[llvm] 95401b0 - Revert "[x86] use zero-extending load of a byte outside of loops too"

Sanjay Patel via llvm-commits llvm-commits at lists.llvm.org
Tue Jul 19 14:37:36 PDT 2022


Author: Sanjay Patel
Date: 2022-07-19T17:37:22-04:00
New Revision: 95401b015393b350f826d097cc5b45b6a604dfa5

URL: https://github.com/llvm/llvm-project/commit/95401b015393b350f826d097cc5b45b6a604dfa5
DIFF: https://github.com/llvm/llvm-project/commit/95401b015393b350f826d097cc5b45b6a604dfa5.diff

LOG: Revert "[x86] use zero-extending load of a byte outside of loops too"

This reverts commit 9d1ea1774c51c44ddf0b5065bf600919988d7015.
There are tests of update_llc_tests_checks.py that missed being updated.

Added: 
    

Modified: 
    llvm/lib/Target/X86/X86FixupBWInsts.cpp
    llvm/test/CodeGen/X86/2006-01-19-ISelFoldingBug.ll
    llvm/test/CodeGen/X86/2006-05-08-InstrSched.ll
    llvm/test/CodeGen/X86/2006-11-17-IllegalMove.ll
    llvm/test/CodeGen/X86/2007-08-09-IllegalX86-64Asm.ll
    llvm/test/CodeGen/X86/2008-04-17-CoalescerBug.ll
    llvm/test/CodeGen/X86/2008-04-24-MemCpyBug.ll
    llvm/test/CodeGen/X86/2008-09-11-CoalescerBug2.ll
    llvm/test/CodeGen/X86/2010-09-17-SideEffectsInChain.ll
    llvm/test/CodeGen/X86/8bit_cmov_of_trunc_promotion.ll
    llvm/test/CodeGen/X86/GlobalISel/callingconv.ll
    llvm/test/CodeGen/X86/GlobalISel/memop-scalar-x32.ll
    llvm/test/CodeGen/X86/GlobalISel/memop-scalar.ll
    llvm/test/CodeGen/X86/PR40322.ll
    llvm/test/CodeGen/X86/abs.ll
    llvm/test/CodeGen/X86/add-sub-bool.ll
    llvm/test/CodeGen/X86/and-load-fold.ll
    llvm/test/CodeGen/X86/and-sink.ll
    llvm/test/CodeGen/X86/and-with-overflow.ll
    llvm/test/CodeGen/X86/arg-copy-elide.ll
    llvm/test/CodeGen/X86/atom-cmpb.ll
    llvm/test/CodeGen/X86/atomic-idempotent.ll
    llvm/test/CodeGen/X86/atomic-mi.ll
    llvm/test/CodeGen/X86/atomic-monotonic.ll
    llvm/test/CodeGen/X86/atomic-unordered.ll
    llvm/test/CodeGen/X86/avoid-sfb-overlaps.ll
    llvm/test/CodeGen/X86/avoid-sfb.ll
    llvm/test/CodeGen/X86/avx512-calling-conv.ll
    llvm/test/CodeGen/X86/avx512-ext.ll
    llvm/test/CodeGen/X86/avx512-extract-subvector-load-store.ll
    llvm/test/CodeGen/X86/avx512-insert-extract.ll
    llvm/test/CodeGen/X86/avx512-intrinsics-canonical.ll
    llvm/test/CodeGen/X86/avx512-intrinsics-fast-isel.ll
    llvm/test/CodeGen/X86/avx512-intrinsics-upgrade.ll
    llvm/test/CodeGen/X86/avx512-intrinsics.ll
    llvm/test/CodeGen/X86/avx512-load-store.ll
    llvm/test/CodeGen/X86/avx512-load-trunc-store-i1.ll
    llvm/test/CodeGen/X86/avx512-mask-op.ll
    llvm/test/CodeGen/X86/avx512-select.ll
    llvm/test/CodeGen/X86/avx512bf16-vl-intrinsics.ll
    llvm/test/CodeGen/X86/avx512bw-intrinsics-fast-isel.ll
    llvm/test/CodeGen/X86/avx512bwvl-intrinsics-fast-isel.ll
    llvm/test/CodeGen/X86/avx512bwvl-intrinsics-upgrade.ll
    llvm/test/CodeGen/X86/avx512ifma-intrinsics-fast-isel.ll
    llvm/test/CodeGen/X86/avx512ifmavl-intrinsics-fast-isel.ll
    llvm/test/CodeGen/X86/avx512vbmi2-intrinsics-fast-isel.ll
    llvm/test/CodeGen/X86/avx512vbmi2vl-intrinsics-fast-isel.ll
    llvm/test/CodeGen/X86/avx512vl-intrinsics-fast-isel.ll
    llvm/test/CodeGen/X86/avx512vl-intrinsics-upgrade.ll
    llvm/test/CodeGen/X86/bitcast-vector-bool.ll
    llvm/test/CodeGen/X86/bitreverse.ll
    llvm/test/CodeGen/X86/bmi.ll
    llvm/test/CodeGen/X86/bool-math.ll
    llvm/test/CodeGen/X86/bool-vector.ll
    llvm/test/CodeGen/X86/brcond.ll
    llvm/test/CodeGen/X86/bt.ll
    llvm/test/CodeGen/X86/btc_bts_btr.ll
    llvm/test/CodeGen/X86/byval5.ll
    llvm/test/CodeGen/X86/callbr-asm-instr-scheduling.ll
    llvm/test/CodeGen/X86/clear-highbits.ll
    llvm/test/CodeGen/X86/clear-lowbits.ll
    llvm/test/CodeGen/X86/clz.ll
    llvm/test/CodeGen/X86/cmov.ll
    llvm/test/CodeGen/X86/cmovcmov.ll
    llvm/test/CodeGen/X86/combine-andintoload.ll
    llvm/test/CodeGen/X86/combine-bswap.ll
    llvm/test/CodeGen/X86/const-shift-of-constmasked.ll
    llvm/test/CodeGen/X86/copy-eflags.ll
    llvm/test/CodeGen/X86/div-rem-pair-recomposition-signed.ll
    llvm/test/CodeGen/X86/div-rem-pair-recomposition-unsigned.ll
    llvm/test/CodeGen/X86/divide-by-constant.ll
    llvm/test/CodeGen/X86/divrem8_ext.ll
    llvm/test/CodeGen/X86/emutls.ll
    llvm/test/CodeGen/X86/extract-bits.ll
    llvm/test/CodeGen/X86/extract-insert.ll
    llvm/test/CodeGen/X86/extract-lowbits.ll
    llvm/test/CodeGen/X86/extractelement-index.ll
    llvm/test/CodeGen/X86/fast-isel-call-bool.ll
    llvm/test/CodeGen/X86/fast-isel-i1.ll
    llvm/test/CodeGen/X86/fast-isel-sext-zext.ll
    llvm/test/CodeGen/X86/fixup-bw-copy.ll
    llvm/test/CodeGen/X86/fixup-bw-inst.ll
    llvm/test/CodeGen/X86/fold-and-shift-x86_64.ll
    llvm/test/CodeGen/X86/fold-and-shift.ll
    llvm/test/CodeGen/X86/fp-intrinsics.ll
    llvm/test/CodeGen/X86/fp-strict-scalar-fptoint.ll
    llvm/test/CodeGen/X86/fp-strict-scalar-inttofp-fp16.ll
    llvm/test/CodeGen/X86/fp-strict-scalar-inttofp.ll
    llvm/test/CodeGen/X86/fp80-strict-scalar.ll
    llvm/test/CodeGen/X86/fptosi-sat-scalar.ll
    llvm/test/CodeGen/X86/fptoui-sat-scalar.ll
    llvm/test/CodeGen/X86/fshl.ll
    llvm/test/CodeGen/X86/fshr.ll
    llvm/test/CodeGen/X86/funnel-shift-rot.ll
    llvm/test/CodeGen/X86/funnel-shift.ll
    llvm/test/CodeGen/X86/gpr-to-mask.ll
    llvm/test/CodeGen/X86/h-register-addressing-32.ll
    llvm/test/CodeGen/X86/h-register-addressing-64.ll
    llvm/test/CodeGen/X86/hoist-and-by-const-from-lshr-in-eqcmp-zero.ll
    llvm/test/CodeGen/X86/hoist-and-by-const-from-shl-in-eqcmp-zero.ll
    llvm/test/CodeGen/X86/iabs.ll
    llvm/test/CodeGen/X86/inc-of-add.ll
    llvm/test/CodeGen/X86/insertelement-var-index.ll
    llvm/test/CodeGen/X86/isel-sink2.ll
    llvm/test/CodeGen/X86/legalize-shift-64.ll
    llvm/test/CodeGen/X86/lifetime-alias.ll
    llvm/test/CodeGen/X86/load-local-v3i1.ll
    llvm/test/CodeGen/X86/load-local-v4i5.ll
    llvm/test/CodeGen/X86/load-scalar-as-vector.ll
    llvm/test/CodeGen/X86/masked_gather_scatter.ll
    llvm/test/CodeGen/X86/memcmp-more-load-pairs-x32.ll
    llvm/test/CodeGen/X86/memcmp-more-load-pairs.ll
    llvm/test/CodeGen/X86/memcmp-x32.ll
    llvm/test/CodeGen/X86/memcmp.ll
    llvm/test/CodeGen/X86/memcpy.ll
    llvm/test/CodeGen/X86/merge-consecutive-loads-128.ll
    llvm/test/CodeGen/X86/merge-store-partially-alias-loads.ll
    llvm/test/CodeGen/X86/midpoint-int.ll
    llvm/test/CodeGen/X86/misched_phys_reg_assign_order.ll
    llvm/test/CodeGen/X86/movmsk-cmp.ll
    llvm/test/CodeGen/X86/musttail-varargs.ll
    llvm/test/CodeGen/X86/neg-abs.ll
    llvm/test/CodeGen/X86/negate-i1.ll
    llvm/test/CodeGen/X86/oddshuffles.ll
    llvm/test/CodeGen/X86/or-with-overflow.ll
    llvm/test/CodeGen/X86/packed_struct.ll
    llvm/test/CodeGen/X86/peephole-na-phys-copy-folding.ll
    llvm/test/CodeGen/X86/popcnt.ll
    llvm/test/CodeGen/X86/pr12360.ll
    llvm/test/CodeGen/X86/pr15267.ll
    llvm/test/CodeGen/X86/pr20011.ll
    llvm/test/CodeGen/X86/pr22473.ll
    llvm/test/CodeGen/X86/pr28824.ll
    llvm/test/CodeGen/X86/pr32345.ll
    llvm/test/CodeGen/X86/pr34292.ll
    llvm/test/CodeGen/X86/pr34381.ll
    llvm/test/CodeGen/X86/pr35765.ll
    llvm/test/CodeGen/X86/pr38539.ll
    llvm/test/CodeGen/X86/pr38743.ll
    llvm/test/CodeGen/X86/pr38795.ll
    llvm/test/CodeGen/X86/pr39926.ll
    llvm/test/CodeGen/X86/pr46527.ll
    llvm/test/CodeGen/X86/pr5145.ll
    llvm/test/CodeGen/X86/reduce-trunc-shl.ll
    llvm/test/CodeGen/X86/rot16.ll
    llvm/test/CodeGen/X86/rot32.ll
    llvm/test/CodeGen/X86/rotate.ll
    llvm/test/CodeGen/X86/rotate4.ll
    llvm/test/CodeGen/X86/sadd_sat.ll
    llvm/test/CodeGen/X86/sadd_sat_plus.ll
    llvm/test/CodeGen/X86/sadd_sat_vec.ll
    llvm/test/CodeGen/X86/sdiv_fix.ll
    llvm/test/CodeGen/X86/sdiv_fix_sat.ll
    llvm/test/CodeGen/X86/select.ll
    llvm/test/CodeGen/X86/setcc-combine.ll
    llvm/test/CodeGen/X86/setcc.ll
    llvm/test/CodeGen/X86/sext-trunc.ll
    llvm/test/CodeGen/X86/shift-amount-mod.ll
    llvm/test/CodeGen/X86/shift-and.ll
    llvm/test/CodeGen/X86/shift-bmi2.ll
    llvm/test/CodeGen/X86/shift-by-signext.ll
    llvm/test/CodeGen/X86/shift-coalesce.ll
    llvm/test/CodeGen/X86/shift-combine.ll
    llvm/test/CodeGen/X86/shift-double.ll
    llvm/test/CodeGen/X86/shift-i128.ll
    llvm/test/CodeGen/X86/shift-mask.ll
    llvm/test/CodeGen/X86/smul_fix.ll
    llvm/test/CodeGen/X86/smul_fix_sat.ll
    llvm/test/CodeGen/X86/srem-seteq-illegal-types.ll
    llvm/test/CodeGen/X86/sse-scalar-fp-arith.ll
    llvm/test/CodeGen/X86/sse2-intrinsics-fast-isel.ll
    llvm/test/CodeGen/X86/sshl_sat.ll
    llvm/test/CodeGen/X86/sshl_sat_vec.ll
    llvm/test/CodeGen/X86/ssub_sat.ll
    llvm/test/CodeGen/X86/ssub_sat_plus.ll
    llvm/test/CodeGen/X86/ssub_sat_vec.ll
    llvm/test/CodeGen/X86/store-narrow.ll
    llvm/test/CodeGen/X86/sttni.ll
    llvm/test/CodeGen/X86/sub-of-not.ll
    llvm/test/CodeGen/X86/swifterror.ll
    llvm/test/CodeGen/X86/tail-opts.ll
    llvm/test/CodeGen/X86/tls.ll
    llvm/test/CodeGen/X86/trunc-to-bool.ll
    llvm/test/CodeGen/X86/uadd_sat.ll
    llvm/test/CodeGen/X86/uadd_sat_plus.ll
    llvm/test/CodeGen/X86/uadd_sat_vec.ll
    llvm/test/CodeGen/X86/udiv_fix.ll
    llvm/test/CodeGen/X86/udiv_fix_sat.ll
    llvm/test/CodeGen/X86/umul_fix.ll
    llvm/test/CodeGen/X86/umul_fix_sat.ll
    llvm/test/CodeGen/X86/umulo-128-legalisation-lowering.ll
    llvm/test/CodeGen/X86/unfold-masked-merge-vector-variablemask.ll
    llvm/test/CodeGen/X86/urem-power-of-two.ll
    llvm/test/CodeGen/X86/urem-seteq-illegal-types.ll
    llvm/test/CodeGen/X86/ushl_sat.ll
    llvm/test/CodeGen/X86/ushl_sat_vec.ll
    llvm/test/CodeGen/X86/usub_sat.ll
    llvm/test/CodeGen/X86/usub_sat_plus.ll
    llvm/test/CodeGen/X86/usub_sat_vec.ll
    llvm/test/CodeGen/X86/vec_setcc.ll
    llvm/test/CodeGen/X86/vector-sext.ll
    llvm/test/CodeGen/X86/volatile-memstores-nooverlapping-load-stores.ll
    llvm/test/CodeGen/X86/xchg-nofold.ll
    llvm/test/CodeGen/X86/xmulo.ll
    llvm/test/CodeGen/X86/xor-icmp.ll
    llvm/test/CodeGen/X86/xor-lea.ll
    llvm/test/CodeGen/X86/xor-with-overflow.ll
    llvm/test/CodeGen/X86/xor.ll
    llvm/test/CodeGen/X86/zext-logicop-shift-load.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/X86/X86FixupBWInsts.cpp b/llvm/lib/Target/X86/X86FixupBWInsts.cpp
index db69234161774..16bff201dd038 100644
--- a/llvm/lib/Target/X86/X86FixupBWInsts.cpp
+++ b/llvm/lib/Target/X86/X86FixupBWInsts.cpp
@@ -393,12 +393,12 @@ MachineInstr *FixupBWInstPass::tryReplaceInstr(MachineInstr *MI,
   switch (MI->getOpcode()) {
 
   case X86::MOV8rm:
-    // Replace 8-bit loads with the zero-extending version if not optimizing
-    // for size. The extending op is cheaper across a wide range of uarch and
-    // it avoids a potentially expensive partial register stall. It takes an
-    // extra byte to encode, however, so don't do this when optimizing for size.
-    if (!OptForSize)
-      return tryReplaceLoad(X86::MOVZX32rm8, MI);
+    // Only replace 8 bit loads with the zero extending versions if
+    // in an inner most loop and not optimizing for size. This takes
+    // an extra byte to encode, and provides limited performance upside.
+    if (MachineLoop *ML = MLI->getLoopFor(&MBB))
+      if (ML->begin() == ML->end() && !OptForSize)
+        return tryReplaceLoad(X86::MOVZX32rm8, MI);
     break;
 
   case X86::MOV16rm:

diff  --git a/llvm/test/CodeGen/X86/2006-01-19-ISelFoldingBug.ll b/llvm/test/CodeGen/X86/2006-01-19-ISelFoldingBug.ll
index 6c4254314009f..54ccbf504a5c3 100644
--- a/llvm/test/CodeGen/X86/2006-01-19-ISelFoldingBug.ll
+++ b/llvm/test/CodeGen/X86/2006-01-19-ISelFoldingBug.ll
@@ -11,7 +11,7 @@ target triple = "i686-unknown-unknown"
 define i32 @test5(i32 %B, i8 %C) {
 ; CHECK-LABEL: test5:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; CHECK-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %edx
 ; CHECK-NEXT:    movl A, %eax
 ; CHECK-NEXT:    shldl %cl, %edx, %eax

diff  --git a/llvm/test/CodeGen/X86/2006-05-08-InstrSched.ll b/llvm/test/CodeGen/X86/2006-05-08-InstrSched.ll
index 16ef67724f883..61f97a0224c75 100644
--- a/llvm/test/CodeGen/X86/2006-05-08-InstrSched.ll
+++ b/llvm/test/CodeGen/X86/2006-05-08-InstrSched.ll
@@ -10,7 +10,7 @@ define void @test() {
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    movl A, %eax
 ; CHECK-NEXT:    movzwl 2(%eax), %eax
-; CHECK-NEXT:    movzbl B, %ecx
+; CHECK-NEXT:    movb B, %cl
 ; CHECK-NEXT:    movl C, %edx
 ; CHECK-NEXT:    andb $16, %cl
 ; CHECK-NEXT:    shll %cl, %edx

diff  --git a/llvm/test/CodeGen/X86/2006-11-17-IllegalMove.ll b/llvm/test/CodeGen/X86/2006-11-17-IllegalMove.ll
index 313fbbac05fd0..1fbba1639b963 100644
--- a/llvm/test/CodeGen/X86/2006-11-17-IllegalMove.ll
+++ b/llvm/test/CodeGen/X86/2006-11-17-IllegalMove.ll
@@ -9,8 +9,8 @@ define void @handle_vector_size_attribute() nounwind {
 ; CHECK-NEXT:    cmpl $1, %eax
 ; CHECK-NEXT:    ja .LBB0_2
 ; CHECK-NEXT:  # %bb.1: # %bb77
-; CHECK-NEXT:    movzbl 0, %eax
-; CHECK-NEXT:    movzbl 0, %eax
+; CHECK-NEXT:    movb 0, %al
+; CHECK-NEXT:    movb 0, %al
 ; CHECK-NEXT:    xorl %eax, %eax
 ; CHECK-NEXT:    testb %al, %al
 ; CHECK-NEXT:  .LBB0_2: # %bb84

diff  --git a/llvm/test/CodeGen/X86/2007-08-09-IllegalX86-64Asm.ll b/llvm/test/CodeGen/X86/2007-08-09-IllegalX86-64Asm.ll
index b0b8771a7d512..3c119c2f09dac 100644
--- a/llvm/test/CodeGen/X86/2007-08-09-IllegalX86-64Asm.ll
+++ b/llvm/test/CodeGen/X86/2007-08-09-IllegalX86-64Asm.ll
@@ -69,11 +69,11 @@ define ptr @ubyte_divmod(ptr %a, ptr %b) {
 ; CHECK-NEXT:    movq _PyUFunc_API at GOTPCREL(%rip), %rbp
 ; CHECK-NEXT:    movq (%rbp), %rax
 ; CHECK-NEXT:    callq *216(%rax)
-; CHECK-NEXT:    movzbl {{[0-9]+}}(%rsp), %edx
+; CHECK-NEXT:    movb {{[0-9]+}}(%rsp), %dl
 ; CHECK-NEXT:    testb %dl, %dl
 ; CHECK-NEXT:    je LBB0_11
 ; CHECK-NEXT:  ## %bb.7: ## %cond_false.i
-; CHECK-NEXT:    movzbl {{[0-9]+}}(%rsp), %ebx
+; CHECK-NEXT:    movb {{[0-9]+}}(%rsp), %bl
 ; CHECK-NEXT:    movzbl %bl, %ecx
 ; CHECK-NEXT:    movl %ecx, %eax
 ; CHECK-NEXT:    divb %dl
@@ -98,8 +98,8 @@ define ptr @ubyte_divmod(ptr %a, ptr %b) {
 ; CHECK-NEXT:  LBB0_11: ## %cond_true.i
 ; CHECK-NEXT:    movl $4, %edi
 ; CHECK-NEXT:    callq _feraiseexcept
-; CHECK-NEXT:    movzbl {{[0-9]+}}(%rsp), %edx
-; CHECK-NEXT:    movzbl {{[0-9]+}}(%rsp), %ebx
+; CHECK-NEXT:    movb {{[0-9]+}}(%rsp), %dl
+; CHECK-NEXT:    movb {{[0-9]+}}(%rsp), %bl
 ; CHECK-NEXT:    xorl %r14d, %r14d
 ; CHECK-NEXT:    testb %bl, %bl
 ; CHECK-NEXT:    je LBB0_14

diff  --git a/llvm/test/CodeGen/X86/2008-04-17-CoalescerBug.ll b/llvm/test/CodeGen/X86/2008-04-17-CoalescerBug.ll
index 6d596195fe7f6..3a700db4b6721 100644
--- a/llvm/test/CodeGen/X86/2008-04-17-CoalescerBug.ll
+++ b/llvm/test/CodeGen/X86/2008-04-17-CoalescerBug.ll
@@ -31,7 +31,7 @@ define void @_ZNK10wxDateTime6FormatEPKwRKNS_8TimeZoneE(ptr noalias sret(%struct
 ; CHECK-NEXT:    .cfi_offset %ebx, -12
 ; CHECK-NEXT:    .cfi_offset %ebp, -8
 ; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %esi
-; CHECK-NEXT:    movzbl {{[0-9]+}}(%esp), %ebx
+; CHECK-NEXT:    movb {{[0-9]+}}(%esp), %bl
 ; CHECK-NEXT:    testb $1, %bl
 ; CHECK-NEXT:    je LBB0_25
 ; CHECK-NEXT:  ## %bb.1: ## %bb116.i

diff  --git a/llvm/test/CodeGen/X86/2008-04-24-MemCpyBug.ll b/llvm/test/CodeGen/X86/2008-04-24-MemCpyBug.ll
index 50e2aac794a85..92cebeeccb3e7 100644
--- a/llvm/test/CodeGen/X86/2008-04-24-MemCpyBug.ll
+++ b/llvm/test/CodeGen/X86/2008-04-24-MemCpyBug.ll
@@ -17,7 +17,7 @@ define void @testit63_entry_2E_ce() nounwind  {
 ; CHECK-NEXT:    movl %esp, %edi
 ; CHECK-NEXT:    movl $g1s63, %esi
 ; CHECK-NEXT:    rep;movsl (%esi), %es:(%edi)
-; CHECK-NEXT:    movzbl g1s63+62, %eax
+; CHECK-NEXT:    movb g1s63+62, %al
 ; CHECK-NEXT:    movb %al, {{[0-9]+}}(%esp)
 ; CHECK-NEXT:    movzwl g1s63+60, %eax
 ; CHECK-NEXT:    movw %ax, {{[0-9]+}}(%esp)

diff  --git a/llvm/test/CodeGen/X86/2008-09-11-CoalescerBug2.ll b/llvm/test/CodeGen/X86/2008-09-11-CoalescerBug2.ll
index 53175413980f1..816ae23d2fe26 100644
--- a/llvm/test/CodeGen/X86/2008-09-11-CoalescerBug2.ll
+++ b/llvm/test/CodeGen/X86/2008-09-11-CoalescerBug2.ll
@@ -15,7 +15,7 @@ define i32 @func_44(i16 signext %p_46) nounwind {
 ; SOURCE-SCHED-NEXT:    xorl %ecx, %ecx
 ; SOURCE-SCHED-NEXT:    cmpl $2, %eax
 ; SOURCE-SCHED-NEXT:    setge %cl
-; SOURCE-SCHED-NEXT:    movzbl g_73, %edx
+; SOURCE-SCHED-NEXT:    movb g_73, %dl
 ; SOURCE-SCHED-NEXT:    xorl %eax, %eax
 ; SOURCE-SCHED-NEXT:    subb {{[0-9]+}}(%esp), %al
 ; SOURCE-SCHED-NEXT:    testb %dl, %dl

diff  --git a/llvm/test/CodeGen/X86/2010-09-17-SideEffectsInChain.ll b/llvm/test/CodeGen/X86/2010-09-17-SideEffectsInChain.ll
index f782c530d9566..ca8df1b6084ae 100644
--- a/llvm/test/CodeGen/X86/2010-09-17-SideEffectsInChain.ll
+++ b/llvm/test/CodeGen/X86/2010-09-17-SideEffectsInChain.ll
@@ -1,44 +1,10 @@
-; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc < %s -mtriple=x86_64-- -mcpu=core2 | FileCheck %s
+; RUN: llc < %s -mcpu=core2 | FileCheck %s
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+target triple = "x86_64-apple-darwin10.4"
 declare void @llvm.memcpy.p0.p0.i64(ptr nocapture, ptr nocapture, i64, i1) nounwind
 
 define fastcc i32 @cli_magic_scandesc(ptr %in) nounwind ssp {
-; CHECK-LABEL: cli_magic_scandesc:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    subq $72, %rsp
-; CHECK-NEXT:    movq __stack_chk_guard(%rip), %rax
-; CHECK-NEXT:    movq %rax, {{[0-9]+}}(%rsp)
-; CHECK-NEXT:    movzbl (%rsp), %eax
-; CHECK-NEXT:    movzbl {{[0-9]+}}(%rsp), %ecx
-; CHECK-NEXT:    movq (%rdi), %rdx
-; CHECK-NEXT:    movq 8(%rdi), %rsi
-; CHECK-NEXT:    movq %rdx, (%rsp)
-; CHECK-NEXT:    movq 24(%rdi), %rdx
-; CHECK-NEXT:    movq %rdx, {{[0-9]+}}(%rsp)
-; CHECK-NEXT:    movq %rsi, {{[0-9]+}}(%rsp)
-; CHECK-NEXT:    movq 16(%rdi), %rdx
-; CHECK-NEXT:    movq %rdx, {{[0-9]+}}(%rsp)
-; CHECK-NEXT:    movq 32(%rdi), %rdx
-; CHECK-NEXT:    movq %rdx, {{[0-9]+}}(%rsp)
-; CHECK-NEXT:    movq 40(%rdi), %rdx
-; CHECK-NEXT:    movq %rdx, {{[0-9]+}}(%rsp)
-; CHECK-NEXT:    movq 48(%rdi), %rdx
-; CHECK-NEXT:    movq %rdx, {{[0-9]+}}(%rsp)
-; CHECK-NEXT:    movq 56(%rdi), %rdx
-; CHECK-NEXT:    movq %rdx, {{[0-9]+}}(%rsp)
-; CHECK-NEXT:    movb %al, (%rsp)
-; CHECK-NEXT:    movb %cl, {{[0-9]+}}(%rsp)
-; CHECK-NEXT:    movq __stack_chk_guard(%rip), %rax
-; CHECK-NEXT:    cmpq {{[0-9]+}}(%rsp), %rax
-; CHECK-NEXT:    jne .LBB0_2
-; CHECK-NEXT:  # %bb.1: # %entry
-; CHECK-NEXT:    xorl %eax, %eax
-; CHECK-NEXT:    addq $72, %rsp
-; CHECK-NEXT:    retq
-; CHECK-NEXT:  .LBB0_2: # %entry
-; CHECK-NEXT:    callq __stack_chk_fail at PLT
 entry:
   %a = alloca [64 x i8]
   %c = getelementptr inbounds [64 x i8], ptr %a, i64 0, i32 30
@@ -49,3 +15,10 @@ entry:
   store i8 %e, ptr %c, align 8
   ret i32 0
 }
+
+; CHECK: movq	___stack_chk_guard at GOTPCREL(%rip)
+; CHECK: movb   (%rsp), [[R1:%.+]]
+; CHECK: movb   30(%rsp), [[R0:%.+]]
+; CHECK: movb   [[R1]], (%rsp)
+; CHECK: movb   [[R0]], 30(%rsp)
+; CHECK: callq	___stack_chk_fail

diff  --git a/llvm/test/CodeGen/X86/8bit_cmov_of_trunc_promotion.ll b/llvm/test/CodeGen/X86/8bit_cmov_of_trunc_promotion.ll
index 1a8d33f5b3480..b8b3440f3aa54 100644
--- a/llvm/test/CodeGen/X86/8bit_cmov_of_trunc_promotion.ll
+++ b/llvm/test/CodeGen/X86/8bit_cmov_of_trunc_promotion.ll
@@ -233,7 +233,7 @@ define i8 @neg_type_mismatch(i32 %a1_wide_orig, i16 %a2_wide_orig, i32 %inc) nou
 define i8 @negative_CopyFromReg(i32 %a1_wide, i32 %a2_wide_orig, i32 %inc) nounwind {
 ; I386-NOCMOV-LABEL: negative_CopyFromReg:
 ; I386-NOCMOV:       # %bb.0:
-; I386-NOCMOV-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; I386-NOCMOV-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; I386-NOCMOV-NEXT:    movl {{[0-9]+}}(%esp), %ecx
 ; I386-NOCMOV-NEXT:    addl {{[0-9]+}}(%esp), %ecx
 ; I386-NOCMOV-NEXT:    cmpb %cl, %al
@@ -255,7 +255,7 @@ define i8 @negative_CopyFromReg(i32 %a1_wide, i32 %a2_wide_orig, i32 %inc) nounw
 ;
 ; I686-NOCMOV-LABEL: negative_CopyFromReg:
 ; I686-NOCMOV:       # %bb.0:
-; I686-NOCMOV-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; I686-NOCMOV-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; I686-NOCMOV-NEXT:    movl {{[0-9]+}}(%esp), %ecx
 ; I686-NOCMOV-NEXT:    addl {{[0-9]+}}(%esp), %ecx
 ; I686-NOCMOV-NEXT:    cmpb %cl, %al
@@ -297,8 +297,8 @@ define i8 @negative_CopyFromReg(i32 %a1_wide, i32 %a2_wide_orig, i32 %inc) nounw
 define i8 @negative_CopyFromRegs(i32 %a1_wide, i32 %a2_wide) nounwind {
 ; I386-NOCMOV-LABEL: negative_CopyFromRegs:
 ; I386-NOCMOV:       # %bb.0:
-; I386-NOCMOV-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
-; I386-NOCMOV-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; I386-NOCMOV-NEXT:    movb {{[0-9]+}}(%esp), %cl
+; I386-NOCMOV-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; I386-NOCMOV-NEXT:    cmpb %cl, %al
 ; I386-NOCMOV-NEXT:    jg .LBB4_2
 ; I386-NOCMOV-NEXT:  # %bb.1:
@@ -317,8 +317,8 @@ define i8 @negative_CopyFromRegs(i32 %a1_wide, i32 %a2_wide) nounwind {
 ;
 ; I686-NOCMOV-LABEL: negative_CopyFromRegs:
 ; I686-NOCMOV:       # %bb.0:
-; I686-NOCMOV-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
-; I686-NOCMOV-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; I686-NOCMOV-NEXT:    movb {{[0-9]+}}(%esp), %cl
+; I686-NOCMOV-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; I686-NOCMOV-NEXT:    cmpb %cl, %al
 ; I686-NOCMOV-NEXT:    jg .LBB4_2
 ; I686-NOCMOV-NEXT:  # %bb.1:

diff  --git a/llvm/test/CodeGen/X86/GlobalISel/callingconv.ll b/llvm/test/CodeGen/X86/GlobalISel/callingconv.ll
index 33d4de16c9772..fe67dedff9cc3 100644
--- a/llvm/test/CodeGen/X86/GlobalISel/callingconv.ll
+++ b/llvm/test/CodeGen/X86/GlobalISel/callingconv.ll
@@ -324,7 +324,7 @@ define void @test_abi_exts_call(ptr %addr) {
 ; X32-NEXT:    .cfi_offset %esi, -12
 ; X32-NEXT:    .cfi_offset %ebx, -8
 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X32-NEXT:    movzbl (%eax), %ebx
+; X32-NEXT:    movb (%eax), %bl
 ; X32-NEXT:    movzbl %bl, %esi
 ; X32-NEXT:    movl %esi, (%esp)
 ; X32-NEXT:    calll take_char
@@ -346,7 +346,7 @@ define void @test_abi_exts_call(ptr %addr) {
 ; X64-NEXT:    pushq %rbx
 ; X64-NEXT:    .cfi_def_cfa_offset 16
 ; X64-NEXT:    .cfi_offset %rbx, -16
-; X64-NEXT:    movzbl (%rdi), %eax
+; X64-NEXT:    movb (%rdi), %al
 ; X64-NEXT:    movzbl %al, %ebx
 ; X64-NEXT:    movl %ebx, %edi
 ; X64-NEXT:    callq take_char

diff  --git a/llvm/test/CodeGen/X86/GlobalISel/memop-scalar-x32.ll b/llvm/test/CodeGen/X86/GlobalISel/memop-scalar-x32.ll
index f92537ad170ff..9ed66face45c9 100644
--- a/llvm/test/CodeGen/X86/GlobalISel/memop-scalar-x32.ll
+++ b/llvm/test/CodeGen/X86/GlobalISel/memop-scalar-x32.ll
@@ -8,7 +8,7 @@ define i1 @test_load_i1(ptr %p1) {
 ; CHECK-LABEL: test_load_i1:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    movl 4(%esp), %eax
-; CHECK-NEXT:    movzbl (%eax), %eax
+; CHECK-NEXT:    movb (%eax), %al
 ; CHECK-NEXT:    retl
   %r = load i1, ptr %p1
   ret i1 %r
@@ -18,7 +18,7 @@ define i8 @test_load_i8(ptr %p1) {
 ; CHECK-LABEL: test_load_i8:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    movl 4(%esp), %eax
-; CHECK-NEXT:    movzbl (%eax), %eax
+; CHECK-NEXT:    movb (%eax), %al
 ; CHECK-NEXT:    retl
   %r = load i8, ptr %p1
   ret i8 %r

diff  --git a/llvm/test/CodeGen/X86/GlobalISel/memop-scalar.ll b/llvm/test/CodeGen/X86/GlobalISel/memop-scalar.ll
index 033672fb1fc21..38f619f4937ba 100644
--- a/llvm/test/CodeGen/X86/GlobalISel/memop-scalar.ll
+++ b/llvm/test/CodeGen/X86/GlobalISel/memop-scalar.ll
@@ -5,7 +5,7 @@
 define i1 @test_load_i1(ptr %p1) {
 ; ALL-LABEL: test_load_i1:
 ; ALL:       # %bb.0:
-; ALL-NEXT:    movzbl (%rdi), %eax
+; ALL-NEXT:    movb (%rdi), %al
 ; ALL-NEXT:    retq
   %r = load i1, ptr %p1
   ret i1 %r
@@ -14,7 +14,7 @@ define i1 @test_load_i1(ptr %p1) {
 define i8 @test_load_i8(ptr %p1) {
 ; ALL-LABEL: test_load_i8:
 ; ALL:       # %bb.0:
-; ALL-NEXT:    movzbl (%rdi), %eax
+; ALL-NEXT:    movb (%rdi), %al
 ; ALL-NEXT:    retq
   %r = load i8, ptr %p1
   ret i8 %r

diff  --git a/llvm/test/CodeGen/X86/PR40322.ll b/llvm/test/CodeGen/X86/PR40322.ll
index 49709cb9b88f8..298d827ea43a8 100644
--- a/llvm/test/CodeGen/X86/PR40322.ll
+++ b/llvm/test/CodeGen/X86/PR40322.ll
@@ -15,7 +15,7 @@ define void @_Z2ami(i32) #0 personality ptr @__gxx_personality_v0 {
 ; CHECK-MINGW-X86-NEXT:    .cfi_def_cfa_offset 12
 ; CHECK-MINGW-X86-NEXT:    .cfi_offset %esi, -12
 ; CHECK-MINGW-X86-NEXT:    .cfi_offset %edi, -8
-; CHECK-MINGW-X86-NEXT:    movzbl __ZGVZ2amiE2au, %eax
+; CHECK-MINGW-X86-NEXT:    movb __ZGVZ2amiE2au, %al
 ; CHECK-MINGW-X86-NEXT:    testb %al, %al
 ; CHECK-MINGW-X86-NEXT:    jne LBB0_4
 ; CHECK-MINGW-X86-NEXT:  # %bb.1: # %init.check

diff  --git a/llvm/test/CodeGen/X86/abs.ll b/llvm/test/CodeGen/X86/abs.ll
index 80a4eb6ccba14..b8264835cc01e 100644
--- a/llvm/test/CodeGen/X86/abs.ll
+++ b/llvm/test/CodeGen/X86/abs.ll
@@ -35,7 +35,7 @@ define i8 @test_i8(i8 %a) nounwind {
 ;
 ; X86-LABEL: test_i8:
 ; X86:       # %bb.0:
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    movl %eax, %ecx
 ; X86-NEXT:    sarb $7, %cl
 ; X86-NEXT:    xorb %cl, %al
@@ -530,13 +530,13 @@ define <16 x i8> @test_v16i8(<16 x i8> %a) nounwind {
 ; X86-NEXT:    xorb %al, %bh
 ; X86-NEXT:    subb %al, %bh
 ; X86-NEXT:    movb %bh, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; X86-NEXT:    movl %ecx, %eax
 ; X86-NEXT:    sarb $7, %al
 ; X86-NEXT:    xorb %al, %cl
 ; X86-NEXT:    subb %al, %cl
 ; X86-NEXT:    movb %cl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; X86-NEXT:    movl %ecx, %eax
 ; X86-NEXT:    sarb $7, %al
 ; X86-NEXT:    xorb %al, %cl
@@ -572,7 +572,7 @@ define <16 x i8> @test_v16i8(<16 x i8> %a) nounwind {
 ; X86-NEXT:    sarb $7, %al
 ; X86-NEXT:    xorb %al, %cl
 ; X86-NEXT:    subb %al, %cl
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    movb %al, %ah
 ; X86-NEXT:    sarb $7, %ah
 ; X86-NEXT:    xorb %ah, %al
@@ -585,23 +585,23 @@ define <16 x i8> @test_v16i8(<16 x i8> %a) nounwind {
 ; X86-NEXT:    movb %dh, 11(%esi)
 ; X86-NEXT:    movb %bl, 10(%esi)
 ; X86-NEXT:    movb %bh, 9(%esi)
-; X86-NEXT:    movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload
+; X86-NEXT:    movb {{[-0-9]+}}(%e{{[sb]}}p), %al # 1-byte Reload
 ; X86-NEXT:    movb %al, 8(%esi)
-; X86-NEXT:    movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload
+; X86-NEXT:    movb {{[-0-9]+}}(%e{{[sb]}}p), %al # 1-byte Reload
 ; X86-NEXT:    movb %al, 7(%esi)
-; X86-NEXT:    movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload
+; X86-NEXT:    movb {{[-0-9]+}}(%e{{[sb]}}p), %al # 1-byte Reload
 ; X86-NEXT:    movb %al, 6(%esi)
-; X86-NEXT:    movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload
+; X86-NEXT:    movb {{[-0-9]+}}(%e{{[sb]}}p), %al # 1-byte Reload
 ; X86-NEXT:    movb %al, 5(%esi)
-; X86-NEXT:    movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload
+; X86-NEXT:    movb {{[-0-9]+}}(%e{{[sb]}}p), %al # 1-byte Reload
 ; X86-NEXT:    movb %al, 4(%esi)
-; X86-NEXT:    movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload
+; X86-NEXT:    movb {{[-0-9]+}}(%e{{[sb]}}p), %al # 1-byte Reload
 ; X86-NEXT:    movb %al, 3(%esi)
-; X86-NEXT:    movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload
+; X86-NEXT:    movb {{[-0-9]+}}(%e{{[sb]}}p), %al # 1-byte Reload
 ; X86-NEXT:    movb %al, 2(%esi)
-; X86-NEXT:    movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload
+; X86-NEXT:    movb {{[-0-9]+}}(%e{{[sb]}}p), %al # 1-byte Reload
 ; X86-NEXT:    movb %al, 1(%esi)
-; X86-NEXT:    movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload
+; X86-NEXT:    movb {{[-0-9]+}}(%e{{[sb]}}p), %al # 1-byte Reload
 ; X86-NEXT:    movb %al, (%esi)
 ; X86-NEXT:    movl %esi, %eax
 ; X86-NEXT:    addl $12, %esp

diff  --git a/llvm/test/CodeGen/X86/add-sub-bool.ll b/llvm/test/CodeGen/X86/add-sub-bool.ll
index 17eda59660193..248a9f19f1fbe 100644
--- a/llvm/test/CodeGen/X86/add-sub-bool.ll
+++ b/llvm/test/CodeGen/X86/add-sub-bool.ll
@@ -390,7 +390,7 @@ define i64 @test_i64_add_add_var(i64 %x, i64 %y, i64 %z, i64 %w) nounwind {
 ; X86-NEXT:    pushl %ebx
 ; X86-NEXT:    pushl %edi
 ; X86-NEXT:    pushl %esi
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %esi
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %ebx
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
@@ -529,7 +529,7 @@ define i32 @test_i32_sub_add_sext_var(i32 %x, i32 %y, i32 %z, i32 %w) nounwind {
 ; X86-LABEL: test_i32_sub_add_sext_var:
 ; X86:       # %bb.0:
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
 ; X86-NEXT:    shll %cl, %edx
 ; X86-NEXT:    sarl $31, %edx

diff  --git a/llvm/test/CodeGen/X86/and-load-fold.ll b/llvm/test/CodeGen/X86/and-load-fold.ll
index b0819cd288775..f01c8b0526fe4 100644
--- a/llvm/test/CodeGen/X86/and-load-fold.ll
+++ b/llvm/test/CodeGen/X86/and-load-fold.ll
@@ -6,7 +6,7 @@
 define i8 @foo(ptr %V) {
 ; CHECK-LABEL: foo:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    movzbl 2(%rdi), %eax
+; CHECK-NEXT:    movb 2(%rdi), %al
 ; CHECK-NEXT:    andb $95, %al
 ; CHECK-NEXT:    retq
   %V3i8 = load <3 x i8>, ptr %V, align 4

diff  --git a/llvm/test/CodeGen/X86/and-sink.ll b/llvm/test/CodeGen/X86/and-sink.ll
index 002d1cdf271b9..ccf35be439c34 100644
--- a/llvm/test/CodeGen/X86/and-sink.ll
+++ b/llvm/test/CodeGen/X86/and-sink.ll
@@ -51,7 +51,7 @@ define i32 @and_sink2(i32 %a, i1 %c, i1 %c2) {
 ; CHECK-NEXT:    testb $1, {{[0-9]+}}(%esp)
 ; CHECK-NEXT:    je .LBB1_5
 ; CHECK-NEXT:  # %bb.1: # %bb0.preheader
-; CHECK-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; CHECK-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %ecx
 ; CHECK-NEXT:    .p2align 4, 0x90
 ; CHECK-NEXT:  .LBB1_2: # %bb0

diff  --git a/llvm/test/CodeGen/X86/and-with-overflow.ll b/llvm/test/CodeGen/X86/and-with-overflow.ll
index a63f6cc6ea7e2..aba73de1e7719 100644
--- a/llvm/test/CodeGen/X86/and-with-overflow.ll
+++ b/llvm/test/CodeGen/X86/and-with-overflow.ll
@@ -9,7 +9,7 @@
 define i8 @and_i8_ri(i8 zeroext %0, i8 zeroext %1) {
 ; X86-LABEL: and_i8_ri:
 ; X86:       # %bb.0:
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    movl %eax, %ecx
 ; X86-NEXT:    andb $-17, %cl
 ; X86-NEXT:    je .LBB0_2
@@ -35,8 +35,8 @@ define i8 @and_i8_ri(i8 zeroext %0, i8 zeroext %1) {
 define i8 @and_i8_rr(i8 zeroext %0, i8 zeroext %1) {
 ; X86-LABEL: and_i8_rr:
 ; X86:       # %bb.0:
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; X86-NEXT:    andb %al, %cl
 ; X86-NEXT:    je .LBB1_2
 ; X86-NEXT:  # %bb.1:

diff  --git a/llvm/test/CodeGen/X86/arg-copy-elide.ll b/llvm/test/CodeGen/X86/arg-copy-elide.ll
index 9d57c9cb2b423..1d3183e6e4dbc 100644
--- a/llvm/test/CodeGen/X86/arg-copy-elide.ll
+++ b/llvm/test/CodeGen/X86/arg-copy-elide.ll
@@ -74,7 +74,7 @@ define i1 @i1_arg(i1 %x) {
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    pushl %ebx
 ; CHECK-NEXT:    pushl %eax
-; CHECK-NEXT:    movzbl {{[0-9]+}}(%esp), %ebx
+; CHECK-NEXT:    movb {{[0-9]+}}(%esp), %bl
 ; CHECK-NEXT:    movl %ebx, %eax
 ; CHECK-NEXT:    andb $1, %al
 ; CHECK-NEXT:    movb %al, {{[0-9]+}}(%esp)
@@ -403,9 +403,9 @@ define i1 @use_i3(i3 %a1, i3 %a2) {
 ; CHECK-LABEL: use_i3:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    pushl %eax
-; CHECK-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; CHECK-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; CHECK-NEXT:    andb $7, %al
-; CHECK-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; CHECK-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; CHECK-NEXT:    andb $7, %cl
 ; CHECK-NEXT:    movb %cl, {{[0-9]+}}(%esp)
 ; CHECK-NEXT:    cmpb %cl, %al

diff  --git a/llvm/test/CodeGen/X86/atom-cmpb.ll b/llvm/test/CodeGen/X86/atom-cmpb.ll
index 46ac6e416738f..b21a1fe63be7b 100644
--- a/llvm/test/CodeGen/X86/atom-cmpb.ll
+++ b/llvm/test/CodeGen/X86/atom-cmpb.ll
@@ -1,7 +1,7 @@
 ; RUN: llc < %s -mtriple=i686-- -mcpu=atom | FileCheck %s
 ; CHECK:        movl
-; CHECK:        movzbl
-; CHECK:        movzbl
+; CHECK:        movb
+; CHECK:        movb
 ; CHECK:        cmpb
 ; CHECK:        notb
 ; CHECK:        notb

diff  --git a/llvm/test/CodeGen/X86/atomic-idempotent.ll b/llvm/test/CodeGen/X86/atomic-idempotent.ll
index 714f2912086cc..19a3de4875516 100644
--- a/llvm/test/CodeGen/X86/atomic-idempotent.ll
+++ b/llvm/test/CodeGen/X86/atomic-idempotent.ll
@@ -15,14 +15,14 @@ define i8 @add8(ptr %p) {
 ; X64-LABEL: add8:
 ; X64:       # %bb.0:
 ; X64-NEXT:    mfence
-; X64-NEXT:    movzbl (%rdi), %eax
+; X64-NEXT:    movb (%rdi), %al
 ; X64-NEXT:    retq
 ;
 ; X86-SSE2-LABEL: add8:
 ; X86-SSE2:       # %bb.0:
 ; X86-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; X86-SSE2-NEXT:    mfence
-; X86-SSE2-NEXT:    movzbl (%eax), %eax
+; X86-SSE2-NEXT:    movb (%eax), %al
 ; X86-SSE2-NEXT:    retl
 ;
 ; X86-SLM-LABEL: add8:

diff  --git a/llvm/test/CodeGen/X86/atomic-mi.ll b/llvm/test/CodeGen/X86/atomic-mi.ll
index 0d0108f55f2ab..7077f4b12d923 100644
--- a/llvm/test/CodeGen/X86/atomic-mi.ll
+++ b/llvm/test/CodeGen/X86/atomic-mi.ll
@@ -182,7 +182,7 @@ define void @add_8r(ptr %p, i8 %v) {
 ;
 ; X32-LABEL: add_8r:
 ; X32:       # %bb.0:
-; X32-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X32-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %ecx
 ; X32-NEXT:    addb %al, (%ecx)
 ; X32-NEXT:    retl
@@ -437,7 +437,7 @@ define void @sub_8r(ptr %p, i8 %v) {
 ;
 ; X32-LABEL: sub_8r:
 ; X32:       # %bb.0:
-; X32-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X32-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %ecx
 ; X32-NEXT:    subb %al, (%ecx)
 ; X32-NEXT:    retl
@@ -616,7 +616,7 @@ define void @and_8r(ptr %p, i8 %v) {
 ;
 ; X32-LABEL: and_8r:
 ; X32:       # %bb.0:
-; X32-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X32-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %ecx
 ; X32-NEXT:    andb %al, (%ecx)
 ; X32-NEXT:    retl
@@ -837,7 +837,7 @@ define void @or_8r(ptr %p, i8 %v) {
 ;
 ; X32-LABEL: or_8r:
 ; X32:       # %bb.0:
-; X32-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X32-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %ecx
 ; X32-NEXT:    orb %al, (%ecx)
 ; X32-NEXT:    retl
@@ -1059,7 +1059,7 @@ define void @xor_8r(ptr %p, i8 %v) {
 ;
 ; X32-LABEL: xor_8r:
 ; X32:       # %bb.0:
-; X32-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X32-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %ecx
 ; X32-NEXT:    xorb %al, (%ecx)
 ; X32-NEXT:    retl

diff  --git a/llvm/test/CodeGen/X86/atomic-monotonic.ll b/llvm/test/CodeGen/X86/atomic-monotonic.ll
index 963825ec48e40..d82e7191803d5 100644
--- a/llvm/test/CodeGen/X86/atomic-monotonic.ll
+++ b/llvm/test/CodeGen/X86/atomic-monotonic.ll
@@ -3,15 +3,10 @@
 ; RUN: llc -O3 < %s -mtriple=x86_64-linux-generic -verify-machineinstrs -mattr=sse2 | FileCheck --check-prefixes=CHECK,CHECK-O3 %s
 
 define i8 @load_i8(ptr %ptr) {
-; CHECK-O0-LABEL: load_i8:
-; CHECK-O0:       # %bb.0:
-; CHECK-O0-NEXT:    movb (%rdi), %al
-; CHECK-O0-NEXT:    retq
-;
-; CHECK-O3-LABEL: load_i8:
-; CHECK-O3:       # %bb.0:
-; CHECK-O3-NEXT:    movzbl (%rdi), %eax
-; CHECK-O3-NEXT:    retq
+; CHECK-LABEL: load_i8:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    movb (%rdi), %al
+; CHECK-NEXT:    retq
   %v = load atomic i8, ptr %ptr monotonic, align 1
   ret i8 %v
 }

diff  --git a/llvm/test/CodeGen/X86/atomic-unordered.ll b/llvm/test/CodeGen/X86/atomic-unordered.ll
index 88ad8c42ff252..0162a0e66ec3c 100644
--- a/llvm/test/CodeGen/X86/atomic-unordered.ll
+++ b/llvm/test/CodeGen/X86/atomic-unordered.ll
@@ -5,15 +5,10 @@
 ; RUN: llc -O3 < %s -mtriple=x86_64-linux-generic -verify-machineinstrs -mcpu=skylake -x86-experimental-unordered-atomic-isel=1 | FileCheck --check-prefixes=CHECK,CHECK-O3,CHECK-O3-EX %s
 
 define i8 @load_i8(i8* %ptr) {
-; CHECK-O0-LABEL: load_i8:
-; CHECK-O0:       # %bb.0:
-; CHECK-O0-NEXT:    movb (%rdi), %al
-; CHECK-O0-NEXT:    retq
-;
-; CHECK-O3-LABEL: load_i8:
-; CHECK-O3:       # %bb.0:
-; CHECK-O3-NEXT:    movzbl (%rdi), %eax
-; CHECK-O3-NEXT:    retq
+; CHECK-LABEL: load_i8:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    movb (%rdi), %al
+; CHECK-NEXT:    retq
   %v = load atomic i8, i8* %ptr unordered, align 1
   ret i8 %v
 }

diff  --git a/llvm/test/CodeGen/X86/avoid-sfb-overlaps.ll b/llvm/test/CodeGen/X86/avoid-sfb-overlaps.ll
index da9d16ca9cf72..b23c26800b298 100644
--- a/llvm/test/CodeGen/X86/avoid-sfb-overlaps.ll
+++ b/llvm/test/CodeGen/X86/avoid-sfb-overlaps.ll
@@ -30,9 +30,9 @@ define dso_local void @test_overlap_1(ptr nocapture %A, i32 %x) local_unnamed_ad
 ; CHECK-NEXT:    movl %eax, 24(%rdi)
 ; CHECK-NEXT:    movzwl -4(%rdi), %eax
 ; CHECK-NEXT:    movw %ax, 28(%rdi)
-; CHECK-NEXT:    movzbl -2(%rdi), %eax
+; CHECK-NEXT:    movb -2(%rdi), %al
 ; CHECK-NEXT:    movb %al, 30(%rdi)
-; CHECK-NEXT:    movzbl -1(%rdi), %eax
+; CHECK-NEXT:    movb -1(%rdi), %al
 ; CHECK-NEXT:    movb %al, 31(%rdi)
 ; CHECK-NEXT:    retq
 ;
@@ -68,9 +68,9 @@ define dso_local void @test_overlap_1(ptr nocapture %A, i32 %x) local_unnamed_ad
 ; CHECK-AVX2-NEXT:    movl %eax, 24(%rdi)
 ; CHECK-AVX2-NEXT:    movzwl -4(%rdi), %eax
 ; CHECK-AVX2-NEXT:    movw %ax, 28(%rdi)
-; CHECK-AVX2-NEXT:    movzbl -2(%rdi), %eax
+; CHECK-AVX2-NEXT:    movb -2(%rdi), %al
 ; CHECK-AVX2-NEXT:    movb %al, 30(%rdi)
-; CHECK-AVX2-NEXT:    movzbl -1(%rdi), %eax
+; CHECK-AVX2-NEXT:    movb -1(%rdi), %al
 ; CHECK-AVX2-NEXT:    movb %al, 31(%rdi)
 ; CHECK-AVX2-NEXT:    retq
 ;
@@ -93,9 +93,9 @@ define dso_local void @test_overlap_1(ptr nocapture %A, i32 %x) local_unnamed_ad
 ; CHECK-AVX512-NEXT:    movl %eax, 24(%rdi)
 ; CHECK-AVX512-NEXT:    movzwl -4(%rdi), %eax
 ; CHECK-AVX512-NEXT:    movw %ax, 28(%rdi)
-; CHECK-AVX512-NEXT:    movzbl -2(%rdi), %eax
+; CHECK-AVX512-NEXT:    movb -2(%rdi), %al
 ; CHECK-AVX512-NEXT:    movb %al, 30(%rdi)
-; CHECK-AVX512-NEXT:    movzbl -1(%rdi), %eax
+; CHECK-AVX512-NEXT:    movb -1(%rdi), %al
 ; CHECK-AVX512-NEXT:    movb %al, 31(%rdi)
 ; CHECK-AVX512-NEXT:    retq
 entry:
@@ -223,9 +223,9 @@ define dso_local void @test_overlap_3(ptr nocapture %A, i32 %x) local_unnamed_ad
 ; CHECK-NEXT:    movw %ax, 24(%rdi)
 ; CHECK-NEXT:    movl -6(%rdi), %eax
 ; CHECK-NEXT:    movl %eax, 26(%rdi)
-; CHECK-NEXT:    movzbl -2(%rdi), %eax
+; CHECK-NEXT:    movb -2(%rdi), %al
 ; CHECK-NEXT:    movb %al, 30(%rdi)
-; CHECK-NEXT:    movzbl -1(%rdi), %eax
+; CHECK-NEXT:    movb -1(%rdi), %al
 ; CHECK-NEXT:    movb %al, 31(%rdi)
 ; CHECK-NEXT:    retq
 ;
@@ -265,9 +265,9 @@ define dso_local void @test_overlap_3(ptr nocapture %A, i32 %x) local_unnamed_ad
 ; CHECK-AVX2-NEXT:    movw %ax, 24(%rdi)
 ; CHECK-AVX2-NEXT:    movl -6(%rdi), %eax
 ; CHECK-AVX2-NEXT:    movl %eax, 26(%rdi)
-; CHECK-AVX2-NEXT:    movzbl -2(%rdi), %eax
+; CHECK-AVX2-NEXT:    movb -2(%rdi), %al
 ; CHECK-AVX2-NEXT:    movb %al, 30(%rdi)
-; CHECK-AVX2-NEXT:    movzbl -1(%rdi), %eax
+; CHECK-AVX2-NEXT:    movb -1(%rdi), %al
 ; CHECK-AVX2-NEXT:    movb %al, 31(%rdi)
 ; CHECK-AVX2-NEXT:    retq
 ;
@@ -294,9 +294,9 @@ define dso_local void @test_overlap_3(ptr nocapture %A, i32 %x) local_unnamed_ad
 ; CHECK-AVX512-NEXT:    movw %ax, 24(%rdi)
 ; CHECK-AVX512-NEXT:    movl -6(%rdi), %eax
 ; CHECK-AVX512-NEXT:    movl %eax, 26(%rdi)
-; CHECK-AVX512-NEXT:    movzbl -2(%rdi), %eax
+; CHECK-AVX512-NEXT:    movb -2(%rdi), %al
 ; CHECK-AVX512-NEXT:    movb %al, 30(%rdi)
-; CHECK-AVX512-NEXT:    movzbl -1(%rdi), %eax
+; CHECK-AVX512-NEXT:    movb -1(%rdi), %al
 ; CHECK-AVX512-NEXT:    movb %al, 31(%rdi)
 ; CHECK-AVX512-NEXT:    retq
 entry:
@@ -327,7 +327,7 @@ define dso_local void @test_overlap_4(ptr nocapture %A, i32 %x) local_unnamed_ad
 ; CHECK-NEXT:    movl $0, -11(%rdi)
 ; CHECK-NEXT:    movl -16(%rdi), %eax
 ; CHECK-NEXT:    movl %eax, 16(%rdi)
-; CHECK-NEXT:    movzbl -12(%rdi), %eax
+; CHECK-NEXT:    movb -12(%rdi), %al
 ; CHECK-NEXT:    movb %al, 20(%rdi)
 ; CHECK-NEXT:    movl -11(%rdi), %eax
 ; CHECK-NEXT:    movl %eax, 21(%rdi)
@@ -335,7 +335,7 @@ define dso_local void @test_overlap_4(ptr nocapture %A, i32 %x) local_unnamed_ad
 ; CHECK-NEXT:    movl %eax, 25(%rdi)
 ; CHECK-NEXT:    movzwl -3(%rdi), %eax
 ; CHECK-NEXT:    movw %ax, 29(%rdi)
-; CHECK-NEXT:    movzbl -1(%rdi), %eax
+; CHECK-NEXT:    movb -1(%rdi), %al
 ; CHECK-NEXT:    movb %al, 31(%rdi)
 ; CHECK-NEXT:    retq
 ;
@@ -361,7 +361,7 @@ define dso_local void @test_overlap_4(ptr nocapture %A, i32 %x) local_unnamed_ad
 ; CHECK-AVX2-NEXT:    movl $0, -11(%rdi)
 ; CHECK-AVX2-NEXT:    movl -16(%rdi), %eax
 ; CHECK-AVX2-NEXT:    movl %eax, 16(%rdi)
-; CHECK-AVX2-NEXT:    movzbl -12(%rdi), %eax
+; CHECK-AVX2-NEXT:    movb -12(%rdi), %al
 ; CHECK-AVX2-NEXT:    movb %al, 20(%rdi)
 ; CHECK-AVX2-NEXT:    movl -11(%rdi), %eax
 ; CHECK-AVX2-NEXT:    movl %eax, 21(%rdi)
@@ -369,7 +369,7 @@ define dso_local void @test_overlap_4(ptr nocapture %A, i32 %x) local_unnamed_ad
 ; CHECK-AVX2-NEXT:    movl %eax, 25(%rdi)
 ; CHECK-AVX2-NEXT:    movzwl -3(%rdi), %eax
 ; CHECK-AVX2-NEXT:    movw %ax, 29(%rdi)
-; CHECK-AVX2-NEXT:    movzbl -1(%rdi), %eax
+; CHECK-AVX2-NEXT:    movb -1(%rdi), %al
 ; CHECK-AVX2-NEXT:    movb %al, 31(%rdi)
 ; CHECK-AVX2-NEXT:    retq
 ;
@@ -383,7 +383,7 @@ define dso_local void @test_overlap_4(ptr nocapture %A, i32 %x) local_unnamed_ad
 ; CHECK-AVX512-NEXT:    movl $0, -11(%rdi)
 ; CHECK-AVX512-NEXT:    movl -16(%rdi), %eax
 ; CHECK-AVX512-NEXT:    movl %eax, 16(%rdi)
-; CHECK-AVX512-NEXT:    movzbl -12(%rdi), %eax
+; CHECK-AVX512-NEXT:    movb -12(%rdi), %al
 ; CHECK-AVX512-NEXT:    movb %al, 20(%rdi)
 ; CHECK-AVX512-NEXT:    movl -11(%rdi), %eax
 ; CHECK-AVX512-NEXT:    movl %eax, 21(%rdi)
@@ -391,7 +391,7 @@ define dso_local void @test_overlap_4(ptr nocapture %A, i32 %x) local_unnamed_ad
 ; CHECK-AVX512-NEXT:    movl %eax, 25(%rdi)
 ; CHECK-AVX512-NEXT:    movzwl -3(%rdi), %eax
 ; CHECK-AVX512-NEXT:    movw %ax, 29(%rdi)
-; CHECK-AVX512-NEXT:    movzbl -1(%rdi), %eax
+; CHECK-AVX512-NEXT:    movb -1(%rdi), %al
 ; CHECK-AVX512-NEXT:    movb %al, 31(%rdi)
 ; CHECK-AVX512-NEXT:    retq
 entry:
@@ -420,11 +420,11 @@ define dso_local void @test_overlap_5(ptr nocapture %A, i32 %x) local_unnamed_ad
 ; CHECK-NEXT:    movb $0, -11(%rdi)
 ; CHECK-NEXT:    movzwl -16(%rdi), %eax
 ; CHECK-NEXT:    movw %ax, 16(%rdi)
-; CHECK-NEXT:    movzbl -14(%rdi), %eax
+; CHECK-NEXT:    movb -14(%rdi), %al
 ; CHECK-NEXT:    movb %al, 18(%rdi)
 ; CHECK-NEXT:    movzwl -13(%rdi), %eax
 ; CHECK-NEXT:    movw %ax, 19(%rdi)
-; CHECK-NEXT:    movzbl -11(%rdi), %eax
+; CHECK-NEXT:    movb -11(%rdi), %al
 ; CHECK-NEXT:    movb %al, 21(%rdi)
 ; CHECK-NEXT:    movq -10(%rdi), %rax
 ; CHECK-NEXT:    movq %rax, 22(%rdi)
@@ -454,11 +454,11 @@ define dso_local void @test_overlap_5(ptr nocapture %A, i32 %x) local_unnamed_ad
 ; CHECK-AVX2-NEXT:    movb $0, -11(%rdi)
 ; CHECK-AVX2-NEXT:    movzwl -16(%rdi), %eax
 ; CHECK-AVX2-NEXT:    movw %ax, 16(%rdi)
-; CHECK-AVX2-NEXT:    movzbl -14(%rdi), %eax
+; CHECK-AVX2-NEXT:    movb -14(%rdi), %al
 ; CHECK-AVX2-NEXT:    movb %al, 18(%rdi)
 ; CHECK-AVX2-NEXT:    movzwl -13(%rdi), %eax
 ; CHECK-AVX2-NEXT:    movw %ax, 19(%rdi)
-; CHECK-AVX2-NEXT:    movzbl -11(%rdi), %eax
+; CHECK-AVX2-NEXT:    movb -11(%rdi), %al
 ; CHECK-AVX2-NEXT:    movb %al, 21(%rdi)
 ; CHECK-AVX2-NEXT:    movq -10(%rdi), %rax
 ; CHECK-AVX2-NEXT:    movq %rax, 22(%rdi)
@@ -476,11 +476,11 @@ define dso_local void @test_overlap_5(ptr nocapture %A, i32 %x) local_unnamed_ad
 ; CHECK-AVX512-NEXT:    movb $0, -11(%rdi)
 ; CHECK-AVX512-NEXT:    movzwl -16(%rdi), %eax
 ; CHECK-AVX512-NEXT:    movw %ax, 16(%rdi)
-; CHECK-AVX512-NEXT:    movzbl -14(%rdi), %eax
+; CHECK-AVX512-NEXT:    movb -14(%rdi), %al
 ; CHECK-AVX512-NEXT:    movb %al, 18(%rdi)
 ; CHECK-AVX512-NEXT:    movzwl -13(%rdi), %eax
 ; CHECK-AVX512-NEXT:    movw %ax, 19(%rdi)
-; CHECK-AVX512-NEXT:    movzbl -11(%rdi), %eax
+; CHECK-AVX512-NEXT:    movb -11(%rdi), %al
 ; CHECK-AVX512-NEXT:    movb %al, 21(%rdi)
 ; CHECK-AVX512-NEXT:    movq -10(%rdi), %rax
 ; CHECK-AVX512-NEXT:    movq %rax, 22(%rdi)

diff  --git a/llvm/test/CodeGen/X86/avoid-sfb.ll b/llvm/test/CodeGen/X86/avoid-sfb.ll
index 9929c8839797d..e43fdfca46813 100644
--- a/llvm/test/CodeGen/X86/avoid-sfb.ll
+++ b/llvm/test/CodeGen/X86/avoid-sfb.ll
@@ -436,13 +436,13 @@ define void @test_mixed_type(ptr nocapture noalias %s1, ptr nocapture %s2, i32 %
 ; CHECK-NEXT:  .LBB5_2: # %if.end
 ; CHECK-NEXT:    movq (%rdi), %rax
 ; CHECK-NEXT:    movq %rax, (%rsi)
-; CHECK-NEXT:    movzbl 8(%rdi), %eax
+; CHECK-NEXT:    movb 8(%rdi), %al
 ; CHECK-NEXT:    movb %al, 8(%rsi)
 ; CHECK-NEXT:    movl 9(%rdi), %eax
 ; CHECK-NEXT:    movl %eax, 9(%rsi)
 ; CHECK-NEXT:    movzwl 13(%rdi), %eax
 ; CHECK-NEXT:    movw %ax, 13(%rsi)
-; CHECK-NEXT:    movzbl 15(%rdi), %eax
+; CHECK-NEXT:    movb 15(%rdi), %al
 ; CHECK-NEXT:    movb %al, 15(%rsi)
 ; CHECK-NEXT:    retq
 ;
@@ -470,13 +470,13 @@ define void @test_mixed_type(ptr nocapture noalias %s1, ptr nocapture %s2, i32 %
 ; CHECK-AVX2-NEXT:  .LBB5_2: # %if.end
 ; CHECK-AVX2-NEXT:    movq (%rdi), %rax
 ; CHECK-AVX2-NEXT:    movq %rax, (%rsi)
-; CHECK-AVX2-NEXT:    movzbl 8(%rdi), %eax
+; CHECK-AVX2-NEXT:    movb 8(%rdi), %al
 ; CHECK-AVX2-NEXT:    movb %al, 8(%rsi)
 ; CHECK-AVX2-NEXT:    movl 9(%rdi), %eax
 ; CHECK-AVX2-NEXT:    movl %eax, 9(%rsi)
 ; CHECK-AVX2-NEXT:    movzwl 13(%rdi), %eax
 ; CHECK-AVX2-NEXT:    movw %ax, 13(%rsi)
-; CHECK-AVX2-NEXT:    movzbl 15(%rdi), %eax
+; CHECK-AVX2-NEXT:    movb 15(%rdi), %al
 ; CHECK-AVX2-NEXT:    movb %al, 15(%rsi)
 ; CHECK-AVX2-NEXT:    retq
 ;
@@ -491,13 +491,13 @@ define void @test_mixed_type(ptr nocapture noalias %s1, ptr nocapture %s2, i32 %
 ; CHECK-AVX512-NEXT:  .LBB5_2: # %if.end
 ; CHECK-AVX512-NEXT:    movq (%rdi), %rax
 ; CHECK-AVX512-NEXT:    movq %rax, (%rsi)
-; CHECK-AVX512-NEXT:    movzbl 8(%rdi), %eax
+; CHECK-AVX512-NEXT:    movb 8(%rdi), %al
 ; CHECK-AVX512-NEXT:    movb %al, 8(%rsi)
 ; CHECK-AVX512-NEXT:    movl 9(%rdi), %eax
 ; CHECK-AVX512-NEXT:    movl %eax, 9(%rsi)
 ; CHECK-AVX512-NEXT:    movzwl 13(%rdi), %eax
 ; CHECK-AVX512-NEXT:    movw %ax, 13(%rsi)
-; CHECK-AVX512-NEXT:    movzbl 15(%rdi), %eax
+; CHECK-AVX512-NEXT:    movb 15(%rdi), %al
 ; CHECK-AVX512-NEXT:    movb %al, 15(%rsi)
 ; CHECK-AVX512-NEXT:    retq
 entry:

diff  --git a/llvm/test/CodeGen/X86/avx512-calling-conv.ll b/llvm/test/CodeGen/X86/avx512-calling-conv.ll
index 7a5baf15fe845..055726f0c324e 100644
--- a/llvm/test/CodeGen/X86/avx512-calling-conv.ll
+++ b/llvm/test/CodeGen/X86/avx512-calling-conv.ll
@@ -687,10 +687,10 @@ define <17 x i1> @test16(<17 x i1> %a, <17 x i1> %b) nounwind {
 ; KNL-NEXT:    testb $1, {{[0-9]+}}(%rsp)
 ; KNL-NEXT:    cmovnel %eax, %r10d
 ; KNL-NEXT:    movq %rdi, %rax
-; KNL-NEXT:    movzbl {{[0-9]+}}(%rsp), %edi
+; KNL-NEXT:    movb {{[0-9]+}}(%rsp), %dil
 ; KNL-NEXT:    andl $1, %edi
 ; KNL-NEXT:    kmovw %edi, %k0
-; KNL-NEXT:    movzbl {{[0-9]+}}(%rsp), %edi
+; KNL-NEXT:    movb {{[0-9]+}}(%rsp), %dil
 ; KNL-NEXT:    kmovw %edi, %k1
 ; KNL-NEXT:    kshiftlw $15, %k1, %k1
 ; KNL-NEXT:    kshiftrw $14, %k1, %k1
@@ -699,7 +699,7 @@ define <17 x i1> @test16(<17 x i1> %a, <17 x i1> %b) nounwind {
 ; KNL-NEXT:    kmovw %edi, %k1
 ; KNL-NEXT:    kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill
 ; KNL-NEXT:    kandw %k1, %k0, %k0
-; KNL-NEXT:    movzbl {{[0-9]+}}(%rsp), %edi
+; KNL-NEXT:    movb {{[0-9]+}}(%rsp), %dil
 ; KNL-NEXT:    kmovw %edi, %k1
 ; KNL-NEXT:    kshiftlw $15, %k1, %k1
 ; KNL-NEXT:    kshiftrw $13, %k1, %k1
@@ -708,7 +708,7 @@ define <17 x i1> @test16(<17 x i1> %a, <17 x i1> %b) nounwind {
 ; KNL-NEXT:    kmovw %edi, %k1
 ; KNL-NEXT:    kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill
 ; KNL-NEXT:    kandw %k1, %k0, %k0
-; KNL-NEXT:    movzbl {{[0-9]+}}(%rsp), %edi
+; KNL-NEXT:    movb {{[0-9]+}}(%rsp), %dil
 ; KNL-NEXT:    kmovw %edi, %k1
 ; KNL-NEXT:    kshiftlw $15, %k1, %k1
 ; KNL-NEXT:    kshiftrw $12, %k1, %k1
@@ -717,7 +717,7 @@ define <17 x i1> @test16(<17 x i1> %a, <17 x i1> %b) nounwind {
 ; KNL-NEXT:    kmovw %edi, %k1
 ; KNL-NEXT:    kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill
 ; KNL-NEXT:    kandw %k1, %k0, %k0
-; KNL-NEXT:    movzbl {{[0-9]+}}(%rsp), %edi
+; KNL-NEXT:    movb {{[0-9]+}}(%rsp), %dil
 ; KNL-NEXT:    kmovw %edi, %k1
 ; KNL-NEXT:    kshiftlw $15, %k1, %k1
 ; KNL-NEXT:    kshiftrw $11, %k1, %k1
@@ -726,7 +726,7 @@ define <17 x i1> @test16(<17 x i1> %a, <17 x i1> %b) nounwind {
 ; KNL-NEXT:    kmovw %edi, %k1
 ; KNL-NEXT:    kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill
 ; KNL-NEXT:    kandw %k1, %k0, %k0
-; KNL-NEXT:    movzbl {{[0-9]+}}(%rsp), %edi
+; KNL-NEXT:    movb {{[0-9]+}}(%rsp), %dil
 ; KNL-NEXT:    kmovw %edi, %k1
 ; KNL-NEXT:    kshiftlw $15, %k1, %k1
 ; KNL-NEXT:    kshiftrw $10, %k1, %k1
@@ -735,7 +735,7 @@ define <17 x i1> @test16(<17 x i1> %a, <17 x i1> %b) nounwind {
 ; KNL-NEXT:    kmovw %edi, %k1
 ; KNL-NEXT:    kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill
 ; KNL-NEXT:    kandw %k1, %k0, %k0
-; KNL-NEXT:    movzbl {{[0-9]+}}(%rsp), %edi
+; KNL-NEXT:    movb {{[0-9]+}}(%rsp), %dil
 ; KNL-NEXT:    kmovw %edi, %k1
 ; KNL-NEXT:    kshiftlw $15, %k1, %k1
 ; KNL-NEXT:    kshiftrw $9, %k1, %k1
@@ -744,7 +744,7 @@ define <17 x i1> @test16(<17 x i1> %a, <17 x i1> %b) nounwind {
 ; KNL-NEXT:    kmovw %edi, %k1
 ; KNL-NEXT:    kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill
 ; KNL-NEXT:    kandw %k1, %k0, %k0
-; KNL-NEXT:    movzbl {{[0-9]+}}(%rsp), %edi
+; KNL-NEXT:    movb {{[0-9]+}}(%rsp), %dil
 ; KNL-NEXT:    kmovw %edi, %k1
 ; KNL-NEXT:    kshiftlw $15, %k1, %k1
 ; KNL-NEXT:    kshiftrw $8, %k1, %k1
@@ -753,7 +753,7 @@ define <17 x i1> @test16(<17 x i1> %a, <17 x i1> %b) nounwind {
 ; KNL-NEXT:    kmovw %edi, %k1
 ; KNL-NEXT:    kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill
 ; KNL-NEXT:    kandw %k1, %k0, %k0
-; KNL-NEXT:    movzbl {{[0-9]+}}(%rsp), %edi
+; KNL-NEXT:    movb {{[0-9]+}}(%rsp), %dil
 ; KNL-NEXT:    kmovw %edi, %k1
 ; KNL-NEXT:    kshiftlw $15, %k1, %k1
 ; KNL-NEXT:    kshiftrw $7, %k1, %k1
@@ -761,7 +761,7 @@ define <17 x i1> @test16(<17 x i1> %a, <17 x i1> %b) nounwind {
 ; KNL-NEXT:    movw $-513, %di ## imm = 0xFDFF
 ; KNL-NEXT:    kmovw %edi, %k7
 ; KNL-NEXT:    kandw %k7, %k0, %k0
-; KNL-NEXT:    movzbl {{[0-9]+}}(%rsp), %edi
+; KNL-NEXT:    movb {{[0-9]+}}(%rsp), %dil
 ; KNL-NEXT:    kmovw %edi, %k1
 ; KNL-NEXT:    kshiftlw $15, %k1, %k1
 ; KNL-NEXT:    kshiftrw $6, %k1, %k1
@@ -769,7 +769,7 @@ define <17 x i1> @test16(<17 x i1> %a, <17 x i1> %b) nounwind {
 ; KNL-NEXT:    movw $-1025, %di ## imm = 0xFBFF
 ; KNL-NEXT:    kmovw %edi, %k4
 ; KNL-NEXT:    kandw %k4, %k0, %k0
-; KNL-NEXT:    movzbl {{[0-9]+}}(%rsp), %edi
+; KNL-NEXT:    movb {{[0-9]+}}(%rsp), %dil
 ; KNL-NEXT:    kmovw %edi, %k1
 ; KNL-NEXT:    kshiftlw $15, %k1, %k1
 ; KNL-NEXT:    kshiftrw $5, %k1, %k1
@@ -777,7 +777,7 @@ define <17 x i1> @test16(<17 x i1> %a, <17 x i1> %b) nounwind {
 ; KNL-NEXT:    movw $-2049, %di ## imm = 0xF7FF
 ; KNL-NEXT:    kmovw %edi, %k3
 ; KNL-NEXT:    kandw %k3, %k0, %k0
-; KNL-NEXT:    movzbl {{[0-9]+}}(%rsp), %edi
+; KNL-NEXT:    movb {{[0-9]+}}(%rsp), %dil
 ; KNL-NEXT:    kmovw %edi, %k1
 ; KNL-NEXT:    kshiftlw $15, %k1, %k1
 ; KNL-NEXT:    kshiftrw $4, %k1, %k1
@@ -785,7 +785,7 @@ define <17 x i1> @test16(<17 x i1> %a, <17 x i1> %b) nounwind {
 ; KNL-NEXT:    movw $-4097, %di ## imm = 0xEFFF
 ; KNL-NEXT:    kmovw %edi, %k2
 ; KNL-NEXT:    kandw %k2, %k0, %k0
-; KNL-NEXT:    movzbl {{[0-9]+}}(%rsp), %edi
+; KNL-NEXT:    movb {{[0-9]+}}(%rsp), %dil
 ; KNL-NEXT:    kmovw %edi, %k1
 ; KNL-NEXT:    kshiftlw $15, %k1, %k1
 ; KNL-NEXT:    kshiftrw $3, %k1, %k1
@@ -793,7 +793,7 @@ define <17 x i1> @test16(<17 x i1> %a, <17 x i1> %b) nounwind {
 ; KNL-NEXT:    movw $-8193, %di ## imm = 0xDFFF
 ; KNL-NEXT:    kmovw %edi, %k1
 ; KNL-NEXT:    kandw %k1, %k0, %k0
-; KNL-NEXT:    movzbl {{[0-9]+}}(%rsp), %edi
+; KNL-NEXT:    movb {{[0-9]+}}(%rsp), %dil
 ; KNL-NEXT:    kmovw %edi, %k5
 ; KNL-NEXT:    kshiftlw $15, %k5, %k5
 ; KNL-NEXT:    kshiftrw $2, %k5, %k5
@@ -801,13 +801,13 @@ define <17 x i1> @test16(<17 x i1> %a, <17 x i1> %b) nounwind {
 ; KNL-NEXT:    movw $-16385, %di ## imm = 0xBFFF
 ; KNL-NEXT:    kmovw %edi, %k0
 ; KNL-NEXT:    kandw %k0, %k5, %k5
-; KNL-NEXT:    movzbl {{[0-9]+}}(%rsp), %edi
+; KNL-NEXT:    movb {{[0-9]+}}(%rsp), %dil
 ; KNL-NEXT:    kmovw %edi, %k6
 ; KNL-NEXT:    kshiftlw $14, %k6, %k6
 ; KNL-NEXT:    korw %k6, %k5, %k5
 ; KNL-NEXT:    kshiftlw $1, %k5, %k5
 ; KNL-NEXT:    kshiftrw $1, %k5, %k5
-; KNL-NEXT:    movzbl {{[0-9]+}}(%rsp), %edi
+; KNL-NEXT:    movb {{[0-9]+}}(%rsp), %dil
 ; KNL-NEXT:    kmovw %edi, %k6
 ; KNL-NEXT:    kshiftlw $15, %k6, %k6
 ; KNL-NEXT:    korw %k6, %k5, %k5
@@ -838,70 +838,70 @@ define <17 x i1> @test16(<17 x i1> %a, <17 x i1> %b) nounwind {
 ; KNL-NEXT:    korw %k6, %k5, %k5
 ; KNL-NEXT:    kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k6 ## 2-byte Reload
 ; KNL-NEXT:    kandw %k6, %k5, %k5
-; KNL-NEXT:    movzbl {{[0-9]+}}(%rsp), %ecx
+; KNL-NEXT:    movb {{[0-9]+}}(%rsp), %cl
 ; KNL-NEXT:    kmovw %ecx, %k6
 ; KNL-NEXT:    kshiftlw $15, %k6, %k6
 ; KNL-NEXT:    kshiftrw $10, %k6, %k6
 ; KNL-NEXT:    korw %k6, %k5, %k5
 ; KNL-NEXT:    kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k6 ## 2-byte Reload
 ; KNL-NEXT:    kandw %k6, %k5, %k5
-; KNL-NEXT:    movzbl {{[0-9]+}}(%rsp), %ecx
+; KNL-NEXT:    movb {{[0-9]+}}(%rsp), %cl
 ; KNL-NEXT:    kmovw %ecx, %k6
 ; KNL-NEXT:    kshiftlw $15, %k6, %k6
 ; KNL-NEXT:    kshiftrw $9, %k6, %k6
 ; KNL-NEXT:    korw %k6, %k5, %k5
 ; KNL-NEXT:    kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k6 ## 2-byte Reload
 ; KNL-NEXT:    kandw %k6, %k5, %k5
-; KNL-NEXT:    movzbl {{[0-9]+}}(%rsp), %ecx
+; KNL-NEXT:    movb {{[0-9]+}}(%rsp), %cl
 ; KNL-NEXT:    kmovw %ecx, %k6
 ; KNL-NEXT:    kshiftlw $15, %k6, %k6
 ; KNL-NEXT:    kshiftrw $8, %k6, %k6
 ; KNL-NEXT:    korw %k6, %k5, %k5
 ; KNL-NEXT:    kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k6 ## 2-byte Reload
 ; KNL-NEXT:    kandw %k6, %k5, %k5
-; KNL-NEXT:    movzbl {{[0-9]+}}(%rsp), %ecx
+; KNL-NEXT:    movb {{[0-9]+}}(%rsp), %cl
 ; KNL-NEXT:    kmovw %ecx, %k6
 ; KNL-NEXT:    kshiftlw $15, %k6, %k6
 ; KNL-NEXT:    kshiftrw $7, %k6, %k6
 ; KNL-NEXT:    korw %k6, %k5, %k5
 ; KNL-NEXT:    kandw %k7, %k5, %k5
-; KNL-NEXT:    movzbl {{[0-9]+}}(%rsp), %ecx
+; KNL-NEXT:    movb {{[0-9]+}}(%rsp), %cl
 ; KNL-NEXT:    kmovw %ecx, %k6
 ; KNL-NEXT:    kshiftlw $15, %k6, %k6
 ; KNL-NEXT:    kshiftrw $6, %k6, %k6
 ; KNL-NEXT:    korw %k6, %k5, %k5
 ; KNL-NEXT:    kandw %k4, %k5, %k4
-; KNL-NEXT:    movzbl {{[0-9]+}}(%rsp), %ecx
+; KNL-NEXT:    movb {{[0-9]+}}(%rsp), %cl
 ; KNL-NEXT:    kmovw %ecx, %k5
 ; KNL-NEXT:    kshiftlw $15, %k5, %k5
 ; KNL-NEXT:    kshiftrw $5, %k5, %k5
 ; KNL-NEXT:    korw %k5, %k4, %k4
 ; KNL-NEXT:    kandw %k3, %k4, %k3
-; KNL-NEXT:    movzbl {{[0-9]+}}(%rsp), %ecx
+; KNL-NEXT:    movb {{[0-9]+}}(%rsp), %cl
 ; KNL-NEXT:    kmovw %ecx, %k4
 ; KNL-NEXT:    kshiftlw $15, %k4, %k4
 ; KNL-NEXT:    kshiftrw $4, %k4, %k4
 ; KNL-NEXT:    korw %k4, %k3, %k3
 ; KNL-NEXT:    kandw %k2, %k3, %k2
-; KNL-NEXT:    movzbl {{[0-9]+}}(%rsp), %ecx
+; KNL-NEXT:    movb {{[0-9]+}}(%rsp), %cl
 ; KNL-NEXT:    kmovw %ecx, %k3
 ; KNL-NEXT:    kshiftlw $15, %k3, %k3
 ; KNL-NEXT:    kshiftrw $3, %k3, %k3
 ; KNL-NEXT:    korw %k3, %k2, %k2
 ; KNL-NEXT:    kandw %k1, %k2, %k1
-; KNL-NEXT:    movzbl {{[0-9]+}}(%rsp), %ecx
+; KNL-NEXT:    movb {{[0-9]+}}(%rsp), %cl
 ; KNL-NEXT:    kmovw %ecx, %k2
 ; KNL-NEXT:    kshiftlw $15, %k2, %k2
 ; KNL-NEXT:    kshiftrw $2, %k2, %k2
 ; KNL-NEXT:    korw %k2, %k1, %k1
 ; KNL-NEXT:    kandw %k0, %k1, %k0
-; KNL-NEXT:    movzbl {{[0-9]+}}(%rsp), %ecx
+; KNL-NEXT:    movb {{[0-9]+}}(%rsp), %cl
 ; KNL-NEXT:    kmovw %ecx, %k1
 ; KNL-NEXT:    kshiftlw $14, %k1, %k1
 ; KNL-NEXT:    korw %k1, %k0, %k0
 ; KNL-NEXT:    kshiftlw $1, %k0, %k0
 ; KNL-NEXT:    kshiftrw $1, %k0, %k0
-; KNL-NEXT:    movzbl {{[0-9]+}}(%rsp), %ecx
+; KNL-NEXT:    movb {{[0-9]+}}(%rsp), %cl
 ; KNL-NEXT:    kmovw %ecx, %k1
 ; KNL-NEXT:    kshiftlw $15, %k1, %k1
 ; KNL-NEXT:    korw %k1, %k0, %k0
@@ -1323,10 +1323,10 @@ define <17 x i1> @test16(<17 x i1> %a, <17 x i1> %b) nounwind {
 ; KNL_X32-NEXT:    cmovnel %edx, %ecx
 ; KNL_X32-NEXT:    testb $1, {{[0-9]+}}(%esp)
 ; KNL_X32-NEXT:    cmovnel %edx, %eax
-; KNL_X32-NEXT:    movzbl {{[0-9]+}}(%esp), %edx
+; KNL_X32-NEXT:    movb {{[0-9]+}}(%esp), %dl
 ; KNL_X32-NEXT:    andl $1, %edx
 ; KNL_X32-NEXT:    kmovw %edx, %k0
-; KNL_X32-NEXT:    movzbl {{[0-9]+}}(%esp), %edx
+; KNL_X32-NEXT:    movb {{[0-9]+}}(%esp), %dl
 ; KNL_X32-NEXT:    kmovw %edx, %k1
 ; KNL_X32-NEXT:    kshiftlw $15, %k1, %k1
 ; KNL_X32-NEXT:    kshiftrw $14, %k1, %k1
@@ -1335,7 +1335,7 @@ define <17 x i1> @test16(<17 x i1> %a, <17 x i1> %b) nounwind {
 ; KNL_X32-NEXT:    kmovw %edx, %k1
 ; KNL_X32-NEXT:    kmovw %k1, {{[-0-9]+}}(%e{{[sb]}}p) ## 2-byte Spill
 ; KNL_X32-NEXT:    kandw %k1, %k0, %k0
-; KNL_X32-NEXT:    movzbl {{[0-9]+}}(%esp), %edx
+; KNL_X32-NEXT:    movb {{[0-9]+}}(%esp), %dl
 ; KNL_X32-NEXT:    kmovw %edx, %k1
 ; KNL_X32-NEXT:    kshiftlw $15, %k1, %k1
 ; KNL_X32-NEXT:    kshiftrw $13, %k1, %k1
@@ -1344,7 +1344,7 @@ define <17 x i1> @test16(<17 x i1> %a, <17 x i1> %b) nounwind {
 ; KNL_X32-NEXT:    kmovw %edx, %k1
 ; KNL_X32-NEXT:    kmovw %k1, {{[-0-9]+}}(%e{{[sb]}}p) ## 2-byte Spill
 ; KNL_X32-NEXT:    kandw %k1, %k0, %k0
-; KNL_X32-NEXT:    movzbl {{[0-9]+}}(%esp), %edx
+; KNL_X32-NEXT:    movb {{[0-9]+}}(%esp), %dl
 ; KNL_X32-NEXT:    kmovw %edx, %k1
 ; KNL_X32-NEXT:    kshiftlw $15, %k1, %k1
 ; KNL_X32-NEXT:    kshiftrw $12, %k1, %k1
@@ -1353,7 +1353,7 @@ define <17 x i1> @test16(<17 x i1> %a, <17 x i1> %b) nounwind {
 ; KNL_X32-NEXT:    kmovw %edx, %k1
 ; KNL_X32-NEXT:    kmovw %k1, {{[-0-9]+}}(%e{{[sb]}}p) ## 2-byte Spill
 ; KNL_X32-NEXT:    kandw %k1, %k0, %k0
-; KNL_X32-NEXT:    movzbl {{[0-9]+}}(%esp), %edx
+; KNL_X32-NEXT:    movb {{[0-9]+}}(%esp), %dl
 ; KNL_X32-NEXT:    kmovw %edx, %k1
 ; KNL_X32-NEXT:    kshiftlw $15, %k1, %k1
 ; KNL_X32-NEXT:    kshiftrw $11, %k1, %k1
@@ -1362,7 +1362,7 @@ define <17 x i1> @test16(<17 x i1> %a, <17 x i1> %b) nounwind {
 ; KNL_X32-NEXT:    kmovw %edx, %k1
 ; KNL_X32-NEXT:    kmovw %k1, {{[-0-9]+}}(%e{{[sb]}}p) ## 2-byte Spill
 ; KNL_X32-NEXT:    kandw %k1, %k0, %k0
-; KNL_X32-NEXT:    movzbl {{[0-9]+}}(%esp), %edx
+; KNL_X32-NEXT:    movb {{[0-9]+}}(%esp), %dl
 ; KNL_X32-NEXT:    kmovw %edx, %k1
 ; KNL_X32-NEXT:    kshiftlw $15, %k1, %k1
 ; KNL_X32-NEXT:    kshiftrw $10, %k1, %k1
@@ -1371,7 +1371,7 @@ define <17 x i1> @test16(<17 x i1> %a, <17 x i1> %b) nounwind {
 ; KNL_X32-NEXT:    kmovw %edx, %k1
 ; KNL_X32-NEXT:    kmovw %k1, {{[-0-9]+}}(%e{{[sb]}}p) ## 2-byte Spill
 ; KNL_X32-NEXT:    kandw %k1, %k0, %k0
-; KNL_X32-NEXT:    movzbl {{[0-9]+}}(%esp), %edx
+; KNL_X32-NEXT:    movb {{[0-9]+}}(%esp), %dl
 ; KNL_X32-NEXT:    kmovw %edx, %k1
 ; KNL_X32-NEXT:    kshiftlw $15, %k1, %k1
 ; KNL_X32-NEXT:    kshiftrw $9, %k1, %k1
@@ -1380,7 +1380,7 @@ define <17 x i1> @test16(<17 x i1> %a, <17 x i1> %b) nounwind {
 ; KNL_X32-NEXT:    kmovw %edx, %k1
 ; KNL_X32-NEXT:    kmovw %k1, {{[-0-9]+}}(%e{{[sb]}}p) ## 2-byte Spill
 ; KNL_X32-NEXT:    kandw %k1, %k0, %k0
-; KNL_X32-NEXT:    movzbl {{[0-9]+}}(%esp), %edx
+; KNL_X32-NEXT:    movb {{[0-9]+}}(%esp), %dl
 ; KNL_X32-NEXT:    kmovw %edx, %k1
 ; KNL_X32-NEXT:    kshiftlw $15, %k1, %k1
 ; KNL_X32-NEXT:    kshiftrw $8, %k1, %k1
@@ -1389,7 +1389,7 @@ define <17 x i1> @test16(<17 x i1> %a, <17 x i1> %b) nounwind {
 ; KNL_X32-NEXT:    kmovw %edx, %k1
 ; KNL_X32-NEXT:    kmovw %k1, {{[-0-9]+}}(%e{{[sb]}}p) ## 2-byte Spill
 ; KNL_X32-NEXT:    kandw %k1, %k0, %k0
-; KNL_X32-NEXT:    movzbl {{[0-9]+}}(%esp), %edx
+; KNL_X32-NEXT:    movb {{[0-9]+}}(%esp), %dl
 ; KNL_X32-NEXT:    kmovw %edx, %k1
 ; KNL_X32-NEXT:    kshiftlw $15, %k1, %k1
 ; KNL_X32-NEXT:    kshiftrw $7, %k1, %k1
@@ -1397,7 +1397,7 @@ define <17 x i1> @test16(<17 x i1> %a, <17 x i1> %b) nounwind {
 ; KNL_X32-NEXT:    movw $-513, %dx ## imm = 0xFDFF
 ; KNL_X32-NEXT:    kmovw %edx, %k7
 ; KNL_X32-NEXT:    kandw %k7, %k0, %k0
-; KNL_X32-NEXT:    movzbl {{[0-9]+}}(%esp), %edx
+; KNL_X32-NEXT:    movb {{[0-9]+}}(%esp), %dl
 ; KNL_X32-NEXT:    kmovw %edx, %k1
 ; KNL_X32-NEXT:    kshiftlw $15, %k1, %k1
 ; KNL_X32-NEXT:    kshiftrw $6, %k1, %k1
@@ -1405,7 +1405,7 @@ define <17 x i1> @test16(<17 x i1> %a, <17 x i1> %b) nounwind {
 ; KNL_X32-NEXT:    movw $-1025, %dx ## imm = 0xFBFF
 ; KNL_X32-NEXT:    kmovw %edx, %k4
 ; KNL_X32-NEXT:    kandw %k4, %k0, %k0
-; KNL_X32-NEXT:    movzbl {{[0-9]+}}(%esp), %edx
+; KNL_X32-NEXT:    movb {{[0-9]+}}(%esp), %dl
 ; KNL_X32-NEXT:    kmovw %edx, %k1
 ; KNL_X32-NEXT:    kshiftlw $15, %k1, %k1
 ; KNL_X32-NEXT:    kshiftrw $5, %k1, %k1
@@ -1413,7 +1413,7 @@ define <17 x i1> @test16(<17 x i1> %a, <17 x i1> %b) nounwind {
 ; KNL_X32-NEXT:    movw $-2049, %dx ## imm = 0xF7FF
 ; KNL_X32-NEXT:    kmovw %edx, %k3
 ; KNL_X32-NEXT:    kandw %k3, %k0, %k0
-; KNL_X32-NEXT:    movzbl {{[0-9]+}}(%esp), %edx
+; KNL_X32-NEXT:    movb {{[0-9]+}}(%esp), %dl
 ; KNL_X32-NEXT:    kmovw %edx, %k1
 ; KNL_X32-NEXT:    kshiftlw $15, %k1, %k1
 ; KNL_X32-NEXT:    kshiftrw $4, %k1, %k1
@@ -1421,7 +1421,7 @@ define <17 x i1> @test16(<17 x i1> %a, <17 x i1> %b) nounwind {
 ; KNL_X32-NEXT:    movw $-4097, %dx ## imm = 0xEFFF
 ; KNL_X32-NEXT:    kmovw %edx, %k2
 ; KNL_X32-NEXT:    kandw %k2, %k0, %k0
-; KNL_X32-NEXT:    movzbl {{[0-9]+}}(%esp), %edx
+; KNL_X32-NEXT:    movb {{[0-9]+}}(%esp), %dl
 ; KNL_X32-NEXT:    kmovw %edx, %k1
 ; KNL_X32-NEXT:    kshiftlw $15, %k1, %k1
 ; KNL_X32-NEXT:    kshiftrw $3, %k1, %k1
@@ -1429,7 +1429,7 @@ define <17 x i1> @test16(<17 x i1> %a, <17 x i1> %b) nounwind {
 ; KNL_X32-NEXT:    movw $-8193, %dx ## imm = 0xDFFF
 ; KNL_X32-NEXT:    kmovw %edx, %k1
 ; KNL_X32-NEXT:    kandw %k1, %k0, %k0
-; KNL_X32-NEXT:    movzbl {{[0-9]+}}(%esp), %edx
+; KNL_X32-NEXT:    movb {{[0-9]+}}(%esp), %dl
 ; KNL_X32-NEXT:    kmovw %edx, %k5
 ; KNL_X32-NEXT:    kshiftlw $15, %k5, %k5
 ; KNL_X32-NEXT:    kshiftrw $2, %k5, %k5
@@ -1437,20 +1437,20 @@ define <17 x i1> @test16(<17 x i1> %a, <17 x i1> %b) nounwind {
 ; KNL_X32-NEXT:    movw $-16385, %dx ## imm = 0xBFFF
 ; KNL_X32-NEXT:    kmovw %edx, %k0
 ; KNL_X32-NEXT:    kandw %k0, %k5, %k5
-; KNL_X32-NEXT:    movzbl {{[0-9]+}}(%esp), %edx
+; KNL_X32-NEXT:    movb {{[0-9]+}}(%esp), %dl
 ; KNL_X32-NEXT:    kmovw %edx, %k6
 ; KNL_X32-NEXT:    kshiftlw $14, %k6, %k6
 ; KNL_X32-NEXT:    korw %k6, %k5, %k5
 ; KNL_X32-NEXT:    kshiftlw $1, %k5, %k5
 ; KNL_X32-NEXT:    kshiftrw $1, %k5, %k5
-; KNL_X32-NEXT:    movzbl {{[0-9]+}}(%esp), %edx
+; KNL_X32-NEXT:    movb {{[0-9]+}}(%esp), %dl
 ; KNL_X32-NEXT:    kmovw %edx, %k6
 ; KNL_X32-NEXT:    kshiftlw $15, %k6, %k6
 ; KNL_X32-NEXT:    korw %k6, %k5, %k5
 ; KNL_X32-NEXT:    kmovw %k5, (%esp) ## 2-byte Spill
-; KNL_X32-NEXT:    movzbl {{[0-9]+}}(%esp), %edx
+; KNL_X32-NEXT:    movb {{[0-9]+}}(%esp), %dl
 ; KNL_X32-NEXT:    andl $1, %edx
-; KNL_X32-NEXT:    movzbl {{[0-9]+}}(%esp), %ebx
+; KNL_X32-NEXT:    movb {{[0-9]+}}(%esp), %bl
 ; KNL_X32-NEXT:    kmovw %ebx, %k5
 ; KNL_X32-NEXT:    kshiftlw $15, %k5, %k5
 ; KNL_X32-NEXT:    kshiftrw $14, %k5, %k5
@@ -1458,91 +1458,91 @@ define <17 x i1> @test16(<17 x i1> %a, <17 x i1> %b) nounwind {
 ; KNL_X32-NEXT:    korw %k5, %k6, %k5
 ; KNL_X32-NEXT:    kmovw {{[-0-9]+}}(%e{{[sb]}}p), %k6 ## 2-byte Reload
 ; KNL_X32-NEXT:    kandw %k6, %k5, %k5
-; KNL_X32-NEXT:    movzbl {{[0-9]+}}(%esp), %edx
+; KNL_X32-NEXT:    movb {{[0-9]+}}(%esp), %dl
 ; KNL_X32-NEXT:    kmovw %edx, %k6
 ; KNL_X32-NEXT:    kshiftlw $15, %k6, %k6
 ; KNL_X32-NEXT:    kshiftrw $13, %k6, %k6
 ; KNL_X32-NEXT:    korw %k6, %k5, %k5
 ; KNL_X32-NEXT:    kmovw {{[-0-9]+}}(%e{{[sb]}}p), %k6 ## 2-byte Reload
 ; KNL_X32-NEXT:    kandw %k6, %k5, %k5
-; KNL_X32-NEXT:    movzbl {{[0-9]+}}(%esp), %edx
+; KNL_X32-NEXT:    movb {{[0-9]+}}(%esp), %dl
 ; KNL_X32-NEXT:    kmovw %edx, %k6
 ; KNL_X32-NEXT:    kshiftlw $15, %k6, %k6
 ; KNL_X32-NEXT:    kshiftrw $12, %k6, %k6
 ; KNL_X32-NEXT:    korw %k6, %k5, %k5
 ; KNL_X32-NEXT:    kmovw {{[-0-9]+}}(%e{{[sb]}}p), %k6 ## 2-byte Reload
 ; KNL_X32-NEXT:    kandw %k6, %k5, %k5
-; KNL_X32-NEXT:    movzbl {{[0-9]+}}(%esp), %edx
+; KNL_X32-NEXT:    movb {{[0-9]+}}(%esp), %dl
 ; KNL_X32-NEXT:    kmovw %edx, %k6
 ; KNL_X32-NEXT:    kshiftlw $15, %k6, %k6
 ; KNL_X32-NEXT:    kshiftrw $11, %k6, %k6
 ; KNL_X32-NEXT:    korw %k6, %k5, %k5
 ; KNL_X32-NEXT:    kmovw {{[-0-9]+}}(%e{{[sb]}}p), %k6 ## 2-byte Reload
 ; KNL_X32-NEXT:    kandw %k6, %k5, %k5
-; KNL_X32-NEXT:    movzbl {{[0-9]+}}(%esp), %edx
+; KNL_X32-NEXT:    movb {{[0-9]+}}(%esp), %dl
 ; KNL_X32-NEXT:    kmovw %edx, %k6
 ; KNL_X32-NEXT:    kshiftlw $15, %k6, %k6
 ; KNL_X32-NEXT:    kshiftrw $10, %k6, %k6
 ; KNL_X32-NEXT:    korw %k6, %k5, %k5
 ; KNL_X32-NEXT:    kmovw {{[-0-9]+}}(%e{{[sb]}}p), %k6 ## 2-byte Reload
 ; KNL_X32-NEXT:    kandw %k6, %k5, %k5
-; KNL_X32-NEXT:    movzbl {{[0-9]+}}(%esp), %edx
+; KNL_X32-NEXT:    movb {{[0-9]+}}(%esp), %dl
 ; KNL_X32-NEXT:    kmovw %edx, %k6
 ; KNL_X32-NEXT:    kshiftlw $15, %k6, %k6
 ; KNL_X32-NEXT:    kshiftrw $9, %k6, %k6
 ; KNL_X32-NEXT:    korw %k6, %k5, %k5
 ; KNL_X32-NEXT:    kmovw {{[-0-9]+}}(%e{{[sb]}}p), %k6 ## 2-byte Reload
 ; KNL_X32-NEXT:    kandw %k6, %k5, %k5
-; KNL_X32-NEXT:    movzbl {{[0-9]+}}(%esp), %edx
+; KNL_X32-NEXT:    movb {{[0-9]+}}(%esp), %dl
 ; KNL_X32-NEXT:    kmovw %edx, %k6
 ; KNL_X32-NEXT:    kshiftlw $15, %k6, %k6
 ; KNL_X32-NEXT:    kshiftrw $8, %k6, %k6
 ; KNL_X32-NEXT:    korw %k6, %k5, %k5
 ; KNL_X32-NEXT:    kmovw {{[-0-9]+}}(%e{{[sb]}}p), %k6 ## 2-byte Reload
 ; KNL_X32-NEXT:    kandw %k6, %k5, %k5
-; KNL_X32-NEXT:    movzbl {{[0-9]+}}(%esp), %edx
+; KNL_X32-NEXT:    movb {{[0-9]+}}(%esp), %dl
 ; KNL_X32-NEXT:    kmovw %edx, %k6
 ; KNL_X32-NEXT:    kshiftlw $15, %k6, %k6
 ; KNL_X32-NEXT:    kshiftrw $7, %k6, %k6
 ; KNL_X32-NEXT:    korw %k6, %k5, %k5
 ; KNL_X32-NEXT:    kandw %k7, %k5, %k5
-; KNL_X32-NEXT:    movzbl {{[0-9]+}}(%esp), %edx
+; KNL_X32-NEXT:    movb {{[0-9]+}}(%esp), %dl
 ; KNL_X32-NEXT:    kmovw %edx, %k6
 ; KNL_X32-NEXT:    kshiftlw $15, %k6, %k6
 ; KNL_X32-NEXT:    kshiftrw $6, %k6, %k6
 ; KNL_X32-NEXT:    korw %k6, %k5, %k5
 ; KNL_X32-NEXT:    kandw %k4, %k5, %k4
-; KNL_X32-NEXT:    movzbl {{[0-9]+}}(%esp), %edx
+; KNL_X32-NEXT:    movb {{[0-9]+}}(%esp), %dl
 ; KNL_X32-NEXT:    kmovw %edx, %k5
 ; KNL_X32-NEXT:    kshiftlw $15, %k5, %k5
 ; KNL_X32-NEXT:    kshiftrw $5, %k5, %k5
 ; KNL_X32-NEXT:    korw %k5, %k4, %k4
 ; KNL_X32-NEXT:    kandw %k3, %k4, %k3
-; KNL_X32-NEXT:    movzbl {{[0-9]+}}(%esp), %edx
+; KNL_X32-NEXT:    movb {{[0-9]+}}(%esp), %dl
 ; KNL_X32-NEXT:    kmovw %edx, %k4
 ; KNL_X32-NEXT:    kshiftlw $15, %k4, %k4
 ; KNL_X32-NEXT:    kshiftrw $4, %k4, %k4
 ; KNL_X32-NEXT:    korw %k4, %k3, %k3
 ; KNL_X32-NEXT:    kandw %k2, %k3, %k2
-; KNL_X32-NEXT:    movzbl {{[0-9]+}}(%esp), %edx
+; KNL_X32-NEXT:    movb {{[0-9]+}}(%esp), %dl
 ; KNL_X32-NEXT:    kmovw %edx, %k3
 ; KNL_X32-NEXT:    kshiftlw $15, %k3, %k3
 ; KNL_X32-NEXT:    kshiftrw $3, %k3, %k3
 ; KNL_X32-NEXT:    korw %k3, %k2, %k2
 ; KNL_X32-NEXT:    kandw %k1, %k2, %k1
-; KNL_X32-NEXT:    movzbl {{[0-9]+}}(%esp), %edx
+; KNL_X32-NEXT:    movb {{[0-9]+}}(%esp), %dl
 ; KNL_X32-NEXT:    kmovw %edx, %k2
 ; KNL_X32-NEXT:    kshiftlw $15, %k2, %k2
 ; KNL_X32-NEXT:    kshiftrw $2, %k2, %k2
 ; KNL_X32-NEXT:    korw %k2, %k1, %k1
 ; KNL_X32-NEXT:    kandw %k0, %k1, %k0
-; KNL_X32-NEXT:    movzbl {{[0-9]+}}(%esp), %edx
+; KNL_X32-NEXT:    movb {{[0-9]+}}(%esp), %dl
 ; KNL_X32-NEXT:    kmovw %edx, %k1
 ; KNL_X32-NEXT:    kshiftlw $14, %k1, %k1
 ; KNL_X32-NEXT:    korw %k1, %k0, %k0
 ; KNL_X32-NEXT:    kshiftlw $1, %k0, %k0
 ; KNL_X32-NEXT:    kshiftrw $1, %k0, %k0
-; KNL_X32-NEXT:    movzbl {{[0-9]+}}(%esp), %edx
+; KNL_X32-NEXT:    movb {{[0-9]+}}(%esp), %dl
 ; KNL_X32-NEXT:    kmovw %edx, %k1
 ; KNL_X32-NEXT:    kshiftlw $15, %k1, %k1
 ; KNL_X32-NEXT:    korw %k1, %k0, %k0
@@ -1957,10 +1957,10 @@ define <7 x i1> @test17(<7 x i1> %a, <7 x i1> %b, <7 x i1> %c, <7 x i1> %d, <7 x
 ; KNL-LABEL: test17:
 ; KNL:       ## %bb.0:
 ; KNL-NEXT:    movq %rdi, %rax
-; KNL-NEXT:    movzbl {{[0-9]+}}(%rsp), %edi
+; KNL-NEXT:    movb {{[0-9]+}}(%rsp), %dil
 ; KNL-NEXT:    andl $1, %edi
 ; KNL-NEXT:    kmovw %edi, %k0
-; KNL-NEXT:    movzbl {{[0-9]+}}(%rsp), %edi
+; KNL-NEXT:    movb {{[0-9]+}}(%rsp), %dil
 ; KNL-NEXT:    kmovw %edi, %k1
 ; KNL-NEXT:    kshiftlw $15, %k1, %k1
 ; KNL-NEXT:    kshiftrw $14, %k1, %k1
@@ -1968,7 +1968,7 @@ define <7 x i1> @test17(<7 x i1> %a, <7 x i1> %b, <7 x i1> %c, <7 x i1> %d, <7 x
 ; KNL-NEXT:    movw $-5, %di
 ; KNL-NEXT:    kmovw %edi, %k1
 ; KNL-NEXT:    kandw %k1, %k0, %k0
-; KNL-NEXT:    movzbl {{[0-9]+}}(%rsp), %edi
+; KNL-NEXT:    movb {{[0-9]+}}(%rsp), %dil
 ; KNL-NEXT:    kmovw %edi, %k2
 ; KNL-NEXT:    kshiftlw $15, %k2, %k2
 ; KNL-NEXT:    kshiftrw $13, %k2, %k2
@@ -1976,7 +1976,7 @@ define <7 x i1> @test17(<7 x i1> %a, <7 x i1> %b, <7 x i1> %c, <7 x i1> %d, <7 x
 ; KNL-NEXT:    movw $-9, %di
 ; KNL-NEXT:    kmovw %edi, %k2
 ; KNL-NEXT:    kandw %k2, %k0, %k0
-; KNL-NEXT:    movzbl {{[0-9]+}}(%rsp), %edi
+; KNL-NEXT:    movb {{[0-9]+}}(%rsp), %dil
 ; KNL-NEXT:    kmovw %edi, %k3
 ; KNL-NEXT:    kshiftlw $15, %k3, %k3
 ; KNL-NEXT:    kshiftrw $12, %k3, %k3
@@ -1984,7 +1984,7 @@ define <7 x i1> @test17(<7 x i1> %a, <7 x i1> %b, <7 x i1> %c, <7 x i1> %d, <7 x
 ; KNL-NEXT:    movw $-17, %di
 ; KNL-NEXT:    kmovw %edi, %k3
 ; KNL-NEXT:    kandw %k3, %k0, %k0
-; KNL-NEXT:    movzbl {{[0-9]+}}(%rsp), %edi
+; KNL-NEXT:    movb {{[0-9]+}}(%rsp), %dil
 ; KNL-NEXT:    kmovw %edi, %k4
 ; KNL-NEXT:    kshiftlw $15, %k4, %k4
 ; KNL-NEXT:    kshiftrw $11, %k4, %k4
@@ -1992,7 +1992,7 @@ define <7 x i1> @test17(<7 x i1> %a, <7 x i1> %b, <7 x i1> %c, <7 x i1> %d, <7 x
 ; KNL-NEXT:    movw $-33, %di
 ; KNL-NEXT:    kmovw %edi, %k4
 ; KNL-NEXT:    kandw %k4, %k0, %k0
-; KNL-NEXT:    movzbl {{[0-9]+}}(%rsp), %edi
+; KNL-NEXT:    movb {{[0-9]+}}(%rsp), %dil
 ; KNL-NEXT:    kmovw %edi, %k5
 ; KNL-NEXT:    kshiftlw $15, %k5, %k5
 ; KNL-NEXT:    kshiftrw $10, %k5, %k5
@@ -2000,241 +2000,241 @@ define <7 x i1> @test17(<7 x i1> %a, <7 x i1> %b, <7 x i1> %c, <7 x i1> %d, <7 x
 ; KNL-NEXT:    movw $-65, %di
 ; KNL-NEXT:    kmovw %edi, %k5
 ; KNL-NEXT:    kandw %k5, %k0, %k0
-; KNL-NEXT:    movzbl {{[0-9]+}}(%rsp), %edi
+; KNL-NEXT:    movb {{[0-9]+}}(%rsp), %dil
 ; KNL-NEXT:    kmovw %edi, %k6
 ; KNL-NEXT:    kshiftlw $15, %k6, %k6
 ; KNL-NEXT:    kshiftrw $9, %k6, %k6
 ; KNL-NEXT:    korw %k6, %k0, %k0
 ; KNL-NEXT:    kmovw %k0, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill
-; KNL-NEXT:    movzbl {{[0-9]+}}(%rsp), %edi
+; KNL-NEXT:    movb {{[0-9]+}}(%rsp), %dil
 ; KNL-NEXT:    andl $1, %edi
 ; KNL-NEXT:    kmovw %edi, %k0
-; KNL-NEXT:    movzbl {{[0-9]+}}(%rsp), %edi
+; KNL-NEXT:    movb {{[0-9]+}}(%rsp), %dil
 ; KNL-NEXT:    kmovw %edi, %k6
 ; KNL-NEXT:    kshiftlw $15, %k6, %k6
 ; KNL-NEXT:    kshiftrw $14, %k6, %k6
 ; KNL-NEXT:    korw %k6, %k0, %k0
 ; KNL-NEXT:    kandw %k1, %k0, %k0
-; KNL-NEXT:    movzbl {{[0-9]+}}(%rsp), %edi
+; KNL-NEXT:    movb {{[0-9]+}}(%rsp), %dil
 ; KNL-NEXT:    kmovw %edi, %k6
 ; KNL-NEXT:    kshiftlw $15, %k6, %k6
 ; KNL-NEXT:    kshiftrw $13, %k6, %k6
 ; KNL-NEXT:    korw %k6, %k0, %k0
 ; KNL-NEXT:    kandw %k2, %k0, %k0
-; KNL-NEXT:    movzbl {{[0-9]+}}(%rsp), %edi
+; KNL-NEXT:    movb {{[0-9]+}}(%rsp), %dil
 ; KNL-NEXT:    kmovw %edi, %k6
 ; KNL-NEXT:    kshiftlw $15, %k6, %k6
 ; KNL-NEXT:    kshiftrw $12, %k6, %k6
 ; KNL-NEXT:    korw %k6, %k0, %k0
 ; KNL-NEXT:    kandw %k3, %k0, %k0
-; KNL-NEXT:    movzbl {{[0-9]+}}(%rsp), %edi
+; KNL-NEXT:    movb {{[0-9]+}}(%rsp), %dil
 ; KNL-NEXT:    kmovw %edi, %k6
 ; KNL-NEXT:    kshiftlw $15, %k6, %k6
 ; KNL-NEXT:    kshiftrw $11, %k6, %k6
 ; KNL-NEXT:    korw %k6, %k0, %k0
 ; KNL-NEXT:    kandw %k4, %k0, %k0
-; KNL-NEXT:    movzbl {{[0-9]+}}(%rsp), %edi
+; KNL-NEXT:    movb {{[0-9]+}}(%rsp), %dil
 ; KNL-NEXT:    kmovw %edi, %k6
 ; KNL-NEXT:    kshiftlw $15, %k6, %k6
 ; KNL-NEXT:    kshiftrw $10, %k6, %k6
 ; KNL-NEXT:    korw %k6, %k0, %k0
 ; KNL-NEXT:    kandw %k5, %k0, %k0
-; KNL-NEXT:    movzbl {{[0-9]+}}(%rsp), %edi
+; KNL-NEXT:    movb {{[0-9]+}}(%rsp), %dil
 ; KNL-NEXT:    kmovw %edi, %k6
 ; KNL-NEXT:    kshiftlw $15, %k6, %k6
 ; KNL-NEXT:    kshiftrw $9, %k6, %k6
 ; KNL-NEXT:    korw %k6, %k0, %k0
 ; KNL-NEXT:    kmovw %k0, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill
-; KNL-NEXT:    movzbl {{[0-9]+}}(%rsp), %r10d
+; KNL-NEXT:    movb {{[0-9]+}}(%rsp), %r10b
 ; KNL-NEXT:    andl $1, %r10d
-; KNL-NEXT:    movzbl {{[0-9]+}}(%rsp), %edi
+; KNL-NEXT:    movb {{[0-9]+}}(%rsp), %dil
 ; KNL-NEXT:    kmovw %edi, %k0
 ; KNL-NEXT:    kshiftlw $15, %k0, %k0
 ; KNL-NEXT:    kshiftrw $14, %k0, %k0
 ; KNL-NEXT:    kmovw %r10d, %k6
 ; KNL-NEXT:    korw %k0, %k6, %k0
 ; KNL-NEXT:    kandw %k1, %k0, %k0
-; KNL-NEXT:    movzbl {{[0-9]+}}(%rsp), %edi
+; KNL-NEXT:    movb {{[0-9]+}}(%rsp), %dil
 ; KNL-NEXT:    kmovw %edi, %k6
 ; KNL-NEXT:    kshiftlw $15, %k6, %k6
 ; KNL-NEXT:    kshiftrw $13, %k6, %k6
 ; KNL-NEXT:    korw %k6, %k0, %k0
 ; KNL-NEXT:    kandw %k2, %k0, %k0
-; KNL-NEXT:    movzbl {{[0-9]+}}(%rsp), %edi
+; KNL-NEXT:    movb {{[0-9]+}}(%rsp), %dil
 ; KNL-NEXT:    kmovw %edi, %k6
 ; KNL-NEXT:    kshiftlw $15, %k6, %k6
 ; KNL-NEXT:    kshiftrw $12, %k6, %k6
 ; KNL-NEXT:    korw %k6, %k0, %k0
 ; KNL-NEXT:    kandw %k3, %k0, %k0
-; KNL-NEXT:    movzbl {{[0-9]+}}(%rsp), %edi
+; KNL-NEXT:    movb {{[0-9]+}}(%rsp), %dil
 ; KNL-NEXT:    kmovw %edi, %k6
 ; KNL-NEXT:    kshiftlw $15, %k6, %k6
 ; KNL-NEXT:    kshiftrw $11, %k6, %k6
 ; KNL-NEXT:    korw %k6, %k0, %k0
 ; KNL-NEXT:    kandw %k4, %k0, %k0
-; KNL-NEXT:    movzbl {{[0-9]+}}(%rsp), %edi
+; KNL-NEXT:    movb {{[0-9]+}}(%rsp), %dil
 ; KNL-NEXT:    kmovw %edi, %k6
 ; KNL-NEXT:    kshiftlw $15, %k6, %k6
 ; KNL-NEXT:    kshiftrw $10, %k6, %k6
 ; KNL-NEXT:    korw %k6, %k0, %k0
 ; KNL-NEXT:    kandw %k5, %k0, %k0
-; KNL-NEXT:    movzbl {{[0-9]+}}(%rsp), %edi
+; KNL-NEXT:    movb {{[0-9]+}}(%rsp), %dil
 ; KNL-NEXT:    kmovw %edi, %k6
 ; KNL-NEXT:    kshiftlw $15, %k6, %k6
 ; KNL-NEXT:    kshiftrw $9, %k6, %k6
 ; KNL-NEXT:    korw %k6, %k0, %k0
 ; KNL-NEXT:    kmovw %k0, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill
-; KNL-NEXT:    movzbl {{[0-9]+}}(%rsp), %r10d
+; KNL-NEXT:    movb {{[0-9]+}}(%rsp), %r10b
 ; KNL-NEXT:    andl $1, %r10d
-; KNL-NEXT:    movzbl {{[0-9]+}}(%rsp), %edi
+; KNL-NEXT:    movb {{[0-9]+}}(%rsp), %dil
 ; KNL-NEXT:    kmovw %edi, %k0
 ; KNL-NEXT:    kshiftlw $15, %k0, %k0
 ; KNL-NEXT:    kshiftrw $14, %k0, %k0
 ; KNL-NEXT:    kmovw %r10d, %k6
 ; KNL-NEXT:    korw %k0, %k6, %k0
 ; KNL-NEXT:    kandw %k1, %k0, %k0
-; KNL-NEXT:    movzbl {{[0-9]+}}(%rsp), %edi
+; KNL-NEXT:    movb {{[0-9]+}}(%rsp), %dil
 ; KNL-NEXT:    kmovw %edi, %k6
 ; KNL-NEXT:    kshiftlw $15, %k6, %k6
 ; KNL-NEXT:    kshiftrw $13, %k6, %k6
 ; KNL-NEXT:    korw %k6, %k0, %k0
 ; KNL-NEXT:    kandw %k2, %k0, %k0
-; KNL-NEXT:    movzbl {{[0-9]+}}(%rsp), %edi
+; KNL-NEXT:    movb {{[0-9]+}}(%rsp), %dil
 ; KNL-NEXT:    kmovw %edi, %k6
 ; KNL-NEXT:    kshiftlw $15, %k6, %k6
 ; KNL-NEXT:    kshiftrw $12, %k6, %k6
 ; KNL-NEXT:    korw %k6, %k0, %k0
 ; KNL-NEXT:    kandw %k3, %k0, %k0
-; KNL-NEXT:    movzbl {{[0-9]+}}(%rsp), %edi
+; KNL-NEXT:    movb {{[0-9]+}}(%rsp), %dil
 ; KNL-NEXT:    kmovw %edi, %k6
 ; KNL-NEXT:    kshiftlw $15, %k6, %k6
 ; KNL-NEXT:    kshiftrw $11, %k6, %k6
 ; KNL-NEXT:    korw %k6, %k0, %k0
 ; KNL-NEXT:    kandw %k4, %k0, %k0
-; KNL-NEXT:    movzbl {{[0-9]+}}(%rsp), %edi
+; KNL-NEXT:    movb {{[0-9]+}}(%rsp), %dil
 ; KNL-NEXT:    kmovw %edi, %k6
 ; KNL-NEXT:    kshiftlw $15, %k6, %k6
 ; KNL-NEXT:    kshiftrw $10, %k6, %k6
 ; KNL-NEXT:    korw %k6, %k0, %k0
 ; KNL-NEXT:    kandw %k5, %k0, %k0
-; KNL-NEXT:    movzbl {{[0-9]+}}(%rsp), %edi
+; KNL-NEXT:    movb {{[0-9]+}}(%rsp), %dil
 ; KNL-NEXT:    kmovw %edi, %k6
 ; KNL-NEXT:    kshiftlw $15, %k6, %k6
 ; KNL-NEXT:    kshiftrw $9, %k6, %k6
 ; KNL-NEXT:    korw %k6, %k0, %k0
 ; KNL-NEXT:    kmovw %k0, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill
-; KNL-NEXT:    movzbl {{[0-9]+}}(%rsp), %r10d
+; KNL-NEXT:    movb {{[0-9]+}}(%rsp), %r10b
 ; KNL-NEXT:    andl $1, %r10d
-; KNL-NEXT:    movzbl {{[0-9]+}}(%rsp), %edi
+; KNL-NEXT:    movb {{[0-9]+}}(%rsp), %dil
 ; KNL-NEXT:    kmovw %edi, %k0
 ; KNL-NEXT:    kshiftlw $15, %k0, %k0
 ; KNL-NEXT:    kshiftrw $14, %k0, %k0
 ; KNL-NEXT:    kmovw %r10d, %k6
 ; KNL-NEXT:    korw %k0, %k6, %k0
 ; KNL-NEXT:    kandw %k1, %k0, %k0
-; KNL-NEXT:    movzbl {{[0-9]+}}(%rsp), %edi
+; KNL-NEXT:    movb {{[0-9]+}}(%rsp), %dil
 ; KNL-NEXT:    kmovw %edi, %k6
 ; KNL-NEXT:    kshiftlw $15, %k6, %k6
 ; KNL-NEXT:    kshiftrw $13, %k6, %k6
 ; KNL-NEXT:    korw %k6, %k0, %k0
 ; KNL-NEXT:    kandw %k2, %k0, %k0
-; KNL-NEXT:    movzbl {{[0-9]+}}(%rsp), %edi
+; KNL-NEXT:    movb {{[0-9]+}}(%rsp), %dil
 ; KNL-NEXT:    kmovw %edi, %k6
 ; KNL-NEXT:    kshiftlw $15, %k6, %k6
 ; KNL-NEXT:    kshiftrw $12, %k6, %k6
 ; KNL-NEXT:    korw %k6, %k0, %k0
 ; KNL-NEXT:    kandw %k3, %k0, %k0
-; KNL-NEXT:    movzbl {{[0-9]+}}(%rsp), %edi
+; KNL-NEXT:    movb {{[0-9]+}}(%rsp), %dil
 ; KNL-NEXT:    kmovw %edi, %k6
 ; KNL-NEXT:    kshiftlw $15, %k6, %k6
 ; KNL-NEXT:    kshiftrw $11, %k6, %k6
 ; KNL-NEXT:    korw %k6, %k0, %k0
 ; KNL-NEXT:    kandw %k4, %k0, %k0
-; KNL-NEXT:    movzbl {{[0-9]+}}(%rsp), %edi
+; KNL-NEXT:    movb {{[0-9]+}}(%rsp), %dil
 ; KNL-NEXT:    kmovw %edi, %k6
 ; KNL-NEXT:    kshiftlw $15, %k6, %k6
 ; KNL-NEXT:    kshiftrw $10, %k6, %k6
 ; KNL-NEXT:    korw %k6, %k0, %k0
 ; KNL-NEXT:    kandw %k5, %k0, %k0
-; KNL-NEXT:    movzbl {{[0-9]+}}(%rsp), %edi
+; KNL-NEXT:    movb {{[0-9]+}}(%rsp), %dil
 ; KNL-NEXT:    kmovw %edi, %k6
 ; KNL-NEXT:    kshiftlw $15, %k6, %k6
 ; KNL-NEXT:    kshiftrw $9, %k6, %k6
 ; KNL-NEXT:    korw %k6, %k0, %k0
 ; KNL-NEXT:    kmovw %k0, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill
-; KNL-NEXT:    movzbl {{[0-9]+}}(%rsp), %r10d
+; KNL-NEXT:    movb {{[0-9]+}}(%rsp), %r10b
 ; KNL-NEXT:    andl $1, %r10d
-; KNL-NEXT:    movzbl {{[0-9]+}}(%rsp), %edi
+; KNL-NEXT:    movb {{[0-9]+}}(%rsp), %dil
 ; KNL-NEXT:    kmovw %edi, %k0
 ; KNL-NEXT:    kshiftlw $15, %k0, %k0
 ; KNL-NEXT:    kshiftrw $14, %k0, %k0
 ; KNL-NEXT:    kmovw %r10d, %k6
 ; KNL-NEXT:    korw %k0, %k6, %k0
 ; KNL-NEXT:    kandw %k1, %k0, %k0
-; KNL-NEXT:    movzbl {{[0-9]+}}(%rsp), %edi
+; KNL-NEXT:    movb {{[0-9]+}}(%rsp), %dil
 ; KNL-NEXT:    kmovw %edi, %k6
 ; KNL-NEXT:    kshiftlw $15, %k6, %k6
 ; KNL-NEXT:    kshiftrw $13, %k6, %k6
 ; KNL-NEXT:    korw %k6, %k0, %k0
 ; KNL-NEXT:    kandw %k2, %k0, %k0
-; KNL-NEXT:    movzbl {{[0-9]+}}(%rsp), %edi
+; KNL-NEXT:    movb {{[0-9]+}}(%rsp), %dil
 ; KNL-NEXT:    kmovw %edi, %k6
 ; KNL-NEXT:    kshiftlw $15, %k6, %k6
 ; KNL-NEXT:    kshiftrw $12, %k6, %k6
 ; KNL-NEXT:    korw %k6, %k0, %k0
 ; KNL-NEXT:    kandw %k3, %k0, %k0
-; KNL-NEXT:    movzbl {{[0-9]+}}(%rsp), %edi
+; KNL-NEXT:    movb {{[0-9]+}}(%rsp), %dil
 ; KNL-NEXT:    kmovw %edi, %k6
 ; KNL-NEXT:    kshiftlw $15, %k6, %k6
 ; KNL-NEXT:    kshiftrw $11, %k6, %k6
 ; KNL-NEXT:    korw %k6, %k0, %k0
 ; KNL-NEXT:    kandw %k4, %k0, %k0
-; KNL-NEXT:    movzbl {{[0-9]+}}(%rsp), %edi
+; KNL-NEXT:    movb {{[0-9]+}}(%rsp), %dil
 ; KNL-NEXT:    kmovw %edi, %k6
 ; KNL-NEXT:    kshiftlw $15, %k6, %k6
 ; KNL-NEXT:    kshiftrw $10, %k6, %k6
 ; KNL-NEXT:    korw %k6, %k0, %k0
 ; KNL-NEXT:    kandw %k5, %k0, %k0
-; KNL-NEXT:    movzbl {{[0-9]+}}(%rsp), %edi
+; KNL-NEXT:    movb {{[0-9]+}}(%rsp), %dil
 ; KNL-NEXT:    kmovw %edi, %k6
 ; KNL-NEXT:    kshiftlw $15, %k6, %k6
 ; KNL-NEXT:    kshiftrw $9, %k6, %k6
 ; KNL-NEXT:    korw %k6, %k0, %k0
 ; KNL-NEXT:    kmovw %k0, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill
-; KNL-NEXT:    movzbl {{[0-9]+}}(%rsp), %r10d
+; KNL-NEXT:    movb {{[0-9]+}}(%rsp), %r10b
 ; KNL-NEXT:    andl $1, %r10d
-; KNL-NEXT:    movzbl {{[0-9]+}}(%rsp), %edi
+; KNL-NEXT:    movb {{[0-9]+}}(%rsp), %dil
 ; KNL-NEXT:    kmovw %edi, %k0
 ; KNL-NEXT:    kshiftlw $15, %k0, %k0
 ; KNL-NEXT:    kshiftrw $14, %k0, %k0
 ; KNL-NEXT:    kmovw %r10d, %k6
 ; KNL-NEXT:    korw %k0, %k6, %k0
 ; KNL-NEXT:    kandw %k1, %k0, %k0
-; KNL-NEXT:    movzbl {{[0-9]+}}(%rsp), %edi
+; KNL-NEXT:    movb {{[0-9]+}}(%rsp), %dil
 ; KNL-NEXT:    kmovw %edi, %k6
 ; KNL-NEXT:    kshiftlw $15, %k6, %k6
 ; KNL-NEXT:    kshiftrw $13, %k6, %k6
 ; KNL-NEXT:    korw %k6, %k0, %k0
 ; KNL-NEXT:    kandw %k2, %k0, %k0
-; KNL-NEXT:    movzbl {{[0-9]+}}(%rsp), %edi
+; KNL-NEXT:    movb {{[0-9]+}}(%rsp), %dil
 ; KNL-NEXT:    kmovw %edi, %k6
 ; KNL-NEXT:    kshiftlw $15, %k6, %k6
 ; KNL-NEXT:    kshiftrw $12, %k6, %k6
 ; KNL-NEXT:    korw %k6, %k0, %k0
 ; KNL-NEXT:    kandw %k3, %k0, %k0
-; KNL-NEXT:    movzbl {{[0-9]+}}(%rsp), %edi
+; KNL-NEXT:    movb {{[0-9]+}}(%rsp), %dil
 ; KNL-NEXT:    kmovw %edi, %k6
 ; KNL-NEXT:    kshiftlw $15, %k6, %k6
 ; KNL-NEXT:    kshiftrw $11, %k6, %k6
 ; KNL-NEXT:    korw %k6, %k0, %k0
 ; KNL-NEXT:    kandw %k4, %k0, %k0
-; KNL-NEXT:    movzbl {{[0-9]+}}(%rsp), %edi
+; KNL-NEXT:    movb {{[0-9]+}}(%rsp), %dil
 ; KNL-NEXT:    kmovw %edi, %k6
 ; KNL-NEXT:    kshiftlw $15, %k6, %k6
 ; KNL-NEXT:    kshiftrw $10, %k6, %k6
 ; KNL-NEXT:    korw %k6, %k0, %k0
 ; KNL-NEXT:    kandw %k5, %k0, %k0
-; KNL-NEXT:    movzbl {{[0-9]+}}(%rsp), %edi
+; KNL-NEXT:    movb {{[0-9]+}}(%rsp), %dil
 ; KNL-NEXT:    kmovw %edi, %k6
 ; KNL-NEXT:    kshiftlw $15, %k6, %k6
 ; KNL-NEXT:    kshiftrw $9, %k6, %k6
@@ -2262,51 +2262,51 @@ define <7 x i1> @test17(<7 x i1> %a, <7 x i1> %b, <7 x i1> %c, <7 x i1> %d, <7 x
 ; KNL-NEXT:    kshiftrw $11, %k7, %k7
 ; KNL-NEXT:    korw %k7, %k0, %k0
 ; KNL-NEXT:    kandw %k4, %k0, %k0
-; KNL-NEXT:    movzbl {{[0-9]+}}(%rsp), %ecx
+; KNL-NEXT:    movb {{[0-9]+}}(%rsp), %cl
 ; KNL-NEXT:    kmovw %ecx, %k7
 ; KNL-NEXT:    kshiftlw $15, %k7, %k7
 ; KNL-NEXT:    kshiftrw $10, %k7, %k7
 ; KNL-NEXT:    korw %k7, %k0, %k0
 ; KNL-NEXT:    kandw %k5, %k0, %k0
-; KNL-NEXT:    movzbl {{[0-9]+}}(%rsp), %ecx
+; KNL-NEXT:    movb {{[0-9]+}}(%rsp), %cl
 ; KNL-NEXT:    kmovw %ecx, %k7
 ; KNL-NEXT:    kshiftlw $15, %k7, %k7
 ; KNL-NEXT:    kshiftrw $9, %k7, %k7
 ; KNL-NEXT:    korw %k7, %k0, %k0
-; KNL-NEXT:    movzbl {{[0-9]+}}(%rsp), %ecx
+; KNL-NEXT:    movb {{[0-9]+}}(%rsp), %cl
 ; KNL-NEXT:    andl $1, %ecx
-; KNL-NEXT:    movzbl {{[0-9]+}}(%rsp), %edx
+; KNL-NEXT:    movb {{[0-9]+}}(%rsp), %dl
 ; KNL-NEXT:    kmovw %edx, %k7
 ; KNL-NEXT:    kshiftlw $15, %k7, %k7
 ; KNL-NEXT:    kshiftrw $14, %k7, %k7
 ; KNL-NEXT:    kmovw %ecx, %k6
 ; KNL-NEXT:    korw %k7, %k6, %k6
 ; KNL-NEXT:    kandw %k1, %k6, %k1
-; KNL-NEXT:    movzbl {{[0-9]+}}(%rsp), %ecx
+; KNL-NEXT:    movb {{[0-9]+}}(%rsp), %cl
 ; KNL-NEXT:    kmovw %ecx, %k6
 ; KNL-NEXT:    kshiftlw $15, %k6, %k6
 ; KNL-NEXT:    kshiftrw $13, %k6, %k6
 ; KNL-NEXT:    korw %k6, %k1, %k1
 ; KNL-NEXT:    kandw %k2, %k1, %k1
-; KNL-NEXT:    movzbl {{[0-9]+}}(%rsp), %ecx
+; KNL-NEXT:    movb {{[0-9]+}}(%rsp), %cl
 ; KNL-NEXT:    kmovw %ecx, %k2
 ; KNL-NEXT:    kshiftlw $15, %k2, %k2
 ; KNL-NEXT:    kshiftrw $12, %k2, %k2
 ; KNL-NEXT:    korw %k2, %k1, %k1
 ; KNL-NEXT:    kandw %k3, %k1, %k1
-; KNL-NEXT:    movzbl {{[0-9]+}}(%rsp), %ecx
+; KNL-NEXT:    movb {{[0-9]+}}(%rsp), %cl
 ; KNL-NEXT:    kmovw %ecx, %k2
 ; KNL-NEXT:    kshiftlw $15, %k2, %k2
 ; KNL-NEXT:    kshiftrw $11, %k2, %k2
 ; KNL-NEXT:    korw %k2, %k1, %k1
 ; KNL-NEXT:    kandw %k4, %k1, %k1
-; KNL-NEXT:    movzbl {{[0-9]+}}(%rsp), %ecx
+; KNL-NEXT:    movb {{[0-9]+}}(%rsp), %cl
 ; KNL-NEXT:    kmovw %ecx, %k2
 ; KNL-NEXT:    kshiftlw $15, %k2, %k2
 ; KNL-NEXT:    kshiftrw $10, %k2, %k2
 ; KNL-NEXT:    korw %k2, %k1, %k1
 ; KNL-NEXT:    kandw %k5, %k1, %k1
-; KNL-NEXT:    movzbl {{[0-9]+}}(%rsp), %ecx
+; KNL-NEXT:    movb {{[0-9]+}}(%rsp), %cl
 ; KNL-NEXT:    kmovw %ecx, %k2
 ; KNL-NEXT:    kshiftlw $15, %k2, %k2
 ; KNL-NEXT:    kshiftrw $9, %k2, %k2
@@ -2743,10 +2743,10 @@ define <7 x i1> @test17(<7 x i1> %a, <7 x i1> %b, <7 x i1> %c, <7 x i1> %d, <7 x
 ; KNL_X32:       ## %bb.0:
 ; KNL_X32-NEXT:    pushl %ebx
 ; KNL_X32-NEXT:    subl $16, %esp
-; KNL_X32-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; KNL_X32-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; KNL_X32-NEXT:    andl $1, %eax
 ; KNL_X32-NEXT:    kmovw %eax, %k0
-; KNL_X32-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; KNL_X32-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; KNL_X32-NEXT:    kmovw %eax, %k1
 ; KNL_X32-NEXT:    kshiftlw $15, %k1, %k1
 ; KNL_X32-NEXT:    kshiftrw $14, %k1, %k1
@@ -2754,7 +2754,7 @@ define <7 x i1> @test17(<7 x i1> %a, <7 x i1> %b, <7 x i1> %c, <7 x i1> %d, <7 x
 ; KNL_X32-NEXT:    movw $-5, %ax
 ; KNL_X32-NEXT:    kmovw %eax, %k1
 ; KNL_X32-NEXT:    kandw %k1, %k0, %k0
-; KNL_X32-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; KNL_X32-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; KNL_X32-NEXT:    kmovw %eax, %k2
 ; KNL_X32-NEXT:    kshiftlw $15, %k2, %k2
 ; KNL_X32-NEXT:    kshiftrw $13, %k2, %k2
@@ -2762,7 +2762,7 @@ define <7 x i1> @test17(<7 x i1> %a, <7 x i1> %b, <7 x i1> %c, <7 x i1> %d, <7 x
 ; KNL_X32-NEXT:    movw $-9, %ax
 ; KNL_X32-NEXT:    kmovw %eax, %k2
 ; KNL_X32-NEXT:    kandw %k2, %k0, %k0
-; KNL_X32-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; KNL_X32-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; KNL_X32-NEXT:    kmovw %eax, %k3
 ; KNL_X32-NEXT:    kshiftlw $15, %k3, %k3
 ; KNL_X32-NEXT:    kshiftrw $12, %k3, %k3
@@ -2770,7 +2770,7 @@ define <7 x i1> @test17(<7 x i1> %a, <7 x i1> %b, <7 x i1> %c, <7 x i1> %d, <7 x
 ; KNL_X32-NEXT:    movw $-17, %ax
 ; KNL_X32-NEXT:    kmovw %eax, %k3
 ; KNL_X32-NEXT:    kandw %k3, %k0, %k0
-; KNL_X32-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; KNL_X32-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; KNL_X32-NEXT:    kmovw %eax, %k4
 ; KNL_X32-NEXT:    kshiftlw $15, %k4, %k4
 ; KNL_X32-NEXT:    kshiftrw $11, %k4, %k4
@@ -2778,7 +2778,7 @@ define <7 x i1> @test17(<7 x i1> %a, <7 x i1> %b, <7 x i1> %c, <7 x i1> %d, <7 x
 ; KNL_X32-NEXT:    movw $-33, %ax
 ; KNL_X32-NEXT:    kmovw %eax, %k4
 ; KNL_X32-NEXT:    kandw %k4, %k0, %k0
-; KNL_X32-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; KNL_X32-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; KNL_X32-NEXT:    kmovw %eax, %k5
 ; KNL_X32-NEXT:    kshiftlw $15, %k5, %k5
 ; KNL_X32-NEXT:    kshiftrw $10, %k5, %k5
@@ -2786,318 +2786,318 @@ define <7 x i1> @test17(<7 x i1> %a, <7 x i1> %b, <7 x i1> %c, <7 x i1> %d, <7 x
 ; KNL_X32-NEXT:    movw $-65, %ax
 ; KNL_X32-NEXT:    kmovw %eax, %k5
 ; KNL_X32-NEXT:    kandw %k5, %k0, %k0
-; KNL_X32-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; KNL_X32-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; KNL_X32-NEXT:    kmovw %eax, %k6
 ; KNL_X32-NEXT:    kshiftlw $15, %k6, %k6
 ; KNL_X32-NEXT:    kshiftrw $9, %k6, %k6
 ; KNL_X32-NEXT:    korw %k6, %k0, %k0
 ; KNL_X32-NEXT:    kmovw %k0, {{[-0-9]+}}(%e{{[sb]}}p) ## 2-byte Spill
-; KNL_X32-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; KNL_X32-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; KNL_X32-NEXT:    andl $1, %eax
 ; KNL_X32-NEXT:    kmovw %eax, %k0
-; KNL_X32-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; KNL_X32-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; KNL_X32-NEXT:    kmovw %eax, %k6
 ; KNL_X32-NEXT:    kshiftlw $15, %k6, %k6
 ; KNL_X32-NEXT:    kshiftrw $14, %k6, %k6
 ; KNL_X32-NEXT:    korw %k6, %k0, %k0
 ; KNL_X32-NEXT:    kandw %k1, %k0, %k0
-; KNL_X32-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; KNL_X32-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; KNL_X32-NEXT:    kmovw %eax, %k6
 ; KNL_X32-NEXT:    kshiftlw $15, %k6, %k6
 ; KNL_X32-NEXT:    kshiftrw $13, %k6, %k6
 ; KNL_X32-NEXT:    korw %k6, %k0, %k0
 ; KNL_X32-NEXT:    kandw %k2, %k0, %k0
-; KNL_X32-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; KNL_X32-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; KNL_X32-NEXT:    kmovw %eax, %k6
 ; KNL_X32-NEXT:    kshiftlw $15, %k6, %k6
 ; KNL_X32-NEXT:    kshiftrw $12, %k6, %k6
 ; KNL_X32-NEXT:    korw %k6, %k0, %k0
 ; KNL_X32-NEXT:    kandw %k3, %k0, %k0
-; KNL_X32-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; KNL_X32-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; KNL_X32-NEXT:    kmovw %eax, %k6
 ; KNL_X32-NEXT:    kshiftlw $15, %k6, %k6
 ; KNL_X32-NEXT:    kshiftrw $11, %k6, %k6
 ; KNL_X32-NEXT:    korw %k6, %k0, %k0
 ; KNL_X32-NEXT:    kandw %k4, %k0, %k0
-; KNL_X32-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; KNL_X32-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; KNL_X32-NEXT:    kmovw %eax, %k6
 ; KNL_X32-NEXT:    kshiftlw $15, %k6, %k6
 ; KNL_X32-NEXT:    kshiftrw $10, %k6, %k6
 ; KNL_X32-NEXT:    korw %k6, %k0, %k0
 ; KNL_X32-NEXT:    kandw %k5, %k0, %k0
-; KNL_X32-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; KNL_X32-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; KNL_X32-NEXT:    kmovw %eax, %k6
 ; KNL_X32-NEXT:    kshiftlw $15, %k6, %k6
 ; KNL_X32-NEXT:    kshiftrw $9, %k6, %k6
 ; KNL_X32-NEXT:    korw %k6, %k0, %k0
 ; KNL_X32-NEXT:    kmovw %k0, {{[-0-9]+}}(%e{{[sb]}}p) ## 2-byte Spill
-; KNL_X32-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; KNL_X32-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; KNL_X32-NEXT:    andl $1, %eax
-; KNL_X32-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; KNL_X32-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; KNL_X32-NEXT:    kmovw %ecx, %k0
 ; KNL_X32-NEXT:    kshiftlw $15, %k0, %k0
 ; KNL_X32-NEXT:    kshiftrw $14, %k0, %k0
 ; KNL_X32-NEXT:    kmovw %eax, %k6
 ; KNL_X32-NEXT:    korw %k0, %k6, %k0
 ; KNL_X32-NEXT:    kandw %k1, %k0, %k0
-; KNL_X32-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; KNL_X32-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; KNL_X32-NEXT:    kmovw %eax, %k6
 ; KNL_X32-NEXT:    kshiftlw $15, %k6, %k6
 ; KNL_X32-NEXT:    kshiftrw $13, %k6, %k6
 ; KNL_X32-NEXT:    korw %k6, %k0, %k0
 ; KNL_X32-NEXT:    kandw %k2, %k0, %k0
-; KNL_X32-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; KNL_X32-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; KNL_X32-NEXT:    kmovw %eax, %k6
 ; KNL_X32-NEXT:    kshiftlw $15, %k6, %k6
 ; KNL_X32-NEXT:    kshiftrw $12, %k6, %k6
 ; KNL_X32-NEXT:    korw %k6, %k0, %k0
 ; KNL_X32-NEXT:    kandw %k3, %k0, %k0
-; KNL_X32-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; KNL_X32-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; KNL_X32-NEXT:    kmovw %eax, %k6
 ; KNL_X32-NEXT:    kshiftlw $15, %k6, %k6
 ; KNL_X32-NEXT:    kshiftrw $11, %k6, %k6
 ; KNL_X32-NEXT:    korw %k6, %k0, %k0
 ; KNL_X32-NEXT:    kandw %k4, %k0, %k0
-; KNL_X32-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; KNL_X32-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; KNL_X32-NEXT:    kmovw %eax, %k6
 ; KNL_X32-NEXT:    kshiftlw $15, %k6, %k6
 ; KNL_X32-NEXT:    kshiftrw $10, %k6, %k6
 ; KNL_X32-NEXT:    korw %k6, %k0, %k0
 ; KNL_X32-NEXT:    kandw %k5, %k0, %k0
-; KNL_X32-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; KNL_X32-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; KNL_X32-NEXT:    kmovw %eax, %k6
 ; KNL_X32-NEXT:    kshiftlw $15, %k6, %k6
 ; KNL_X32-NEXT:    kshiftrw $9, %k6, %k6
 ; KNL_X32-NEXT:    korw %k6, %k0, %k0
 ; KNL_X32-NEXT:    kmovw %k0, {{[-0-9]+}}(%e{{[sb]}}p) ## 2-byte Spill
-; KNL_X32-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; KNL_X32-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; KNL_X32-NEXT:    andl $1, %eax
-; KNL_X32-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; KNL_X32-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; KNL_X32-NEXT:    kmovw %ecx, %k0
 ; KNL_X32-NEXT:    kshiftlw $15, %k0, %k0
 ; KNL_X32-NEXT:    kshiftrw $14, %k0, %k0
 ; KNL_X32-NEXT:    kmovw %eax, %k6
 ; KNL_X32-NEXT:    korw %k0, %k6, %k0
 ; KNL_X32-NEXT:    kandw %k1, %k0, %k0
-; KNL_X32-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; KNL_X32-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; KNL_X32-NEXT:    kmovw %eax, %k6
 ; KNL_X32-NEXT:    kshiftlw $15, %k6, %k6
 ; KNL_X32-NEXT:    kshiftrw $13, %k6, %k6
 ; KNL_X32-NEXT:    korw %k6, %k0, %k0
 ; KNL_X32-NEXT:    kandw %k2, %k0, %k0
-; KNL_X32-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; KNL_X32-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; KNL_X32-NEXT:    kmovw %eax, %k6
 ; KNL_X32-NEXT:    kshiftlw $15, %k6, %k6
 ; KNL_X32-NEXT:    kshiftrw $12, %k6, %k6
 ; KNL_X32-NEXT:    korw %k6, %k0, %k0
 ; KNL_X32-NEXT:    kandw %k3, %k0, %k0
-; KNL_X32-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; KNL_X32-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; KNL_X32-NEXT:    kmovw %eax, %k6
 ; KNL_X32-NEXT:    kshiftlw $15, %k6, %k6
 ; KNL_X32-NEXT:    kshiftrw $11, %k6, %k6
 ; KNL_X32-NEXT:    korw %k6, %k0, %k0
 ; KNL_X32-NEXT:    kandw %k4, %k0, %k0
-; KNL_X32-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; KNL_X32-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; KNL_X32-NEXT:    kmovw %eax, %k6
 ; KNL_X32-NEXT:    kshiftlw $15, %k6, %k6
 ; KNL_X32-NEXT:    kshiftrw $10, %k6, %k6
 ; KNL_X32-NEXT:    korw %k6, %k0, %k0
 ; KNL_X32-NEXT:    kandw %k5, %k0, %k0
-; KNL_X32-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; KNL_X32-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; KNL_X32-NEXT:    kmovw %eax, %k6
 ; KNL_X32-NEXT:    kshiftlw $15, %k6, %k6
 ; KNL_X32-NEXT:    kshiftrw $9, %k6, %k6
 ; KNL_X32-NEXT:    korw %k6, %k0, %k0
 ; KNL_X32-NEXT:    kmovw %k0, {{[-0-9]+}}(%e{{[sb]}}p) ## 2-byte Spill
-; KNL_X32-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; KNL_X32-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; KNL_X32-NEXT:    andl $1, %eax
-; KNL_X32-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; KNL_X32-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; KNL_X32-NEXT:    kmovw %ecx, %k0
 ; KNL_X32-NEXT:    kshiftlw $15, %k0, %k0
 ; KNL_X32-NEXT:    kshiftrw $14, %k0, %k0
 ; KNL_X32-NEXT:    kmovw %eax, %k6
 ; KNL_X32-NEXT:    korw %k0, %k6, %k0
 ; KNL_X32-NEXT:    kandw %k1, %k0, %k0
-; KNL_X32-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; KNL_X32-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; KNL_X32-NEXT:    kmovw %eax, %k6
 ; KNL_X32-NEXT:    kshiftlw $15, %k6, %k6
 ; KNL_X32-NEXT:    kshiftrw $13, %k6, %k6
 ; KNL_X32-NEXT:    korw %k6, %k0, %k0
 ; KNL_X32-NEXT:    kandw %k2, %k0, %k0
-; KNL_X32-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; KNL_X32-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; KNL_X32-NEXT:    kmovw %eax, %k6
 ; KNL_X32-NEXT:    kshiftlw $15, %k6, %k6
 ; KNL_X32-NEXT:    kshiftrw $12, %k6, %k6
 ; KNL_X32-NEXT:    korw %k6, %k0, %k0
 ; KNL_X32-NEXT:    kandw %k3, %k0, %k0
-; KNL_X32-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; KNL_X32-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; KNL_X32-NEXT:    kmovw %eax, %k6
 ; KNL_X32-NEXT:    kshiftlw $15, %k6, %k6
 ; KNL_X32-NEXT:    kshiftrw $11, %k6, %k6
 ; KNL_X32-NEXT:    korw %k6, %k0, %k0
 ; KNL_X32-NEXT:    kandw %k4, %k0, %k0
-; KNL_X32-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; KNL_X32-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; KNL_X32-NEXT:    kmovw %eax, %k6
 ; KNL_X32-NEXT:    kshiftlw $15, %k6, %k6
 ; KNL_X32-NEXT:    kshiftrw $10, %k6, %k6
 ; KNL_X32-NEXT:    korw %k6, %k0, %k0
 ; KNL_X32-NEXT:    kandw %k5, %k0, %k0
-; KNL_X32-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; KNL_X32-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; KNL_X32-NEXT:    kmovw %eax, %k6
 ; KNL_X32-NEXT:    kshiftlw $15, %k6, %k6
 ; KNL_X32-NEXT:    kshiftrw $9, %k6, %k6
 ; KNL_X32-NEXT:    korw %k6, %k0, %k0
 ; KNL_X32-NEXT:    kmovw %k0, {{[-0-9]+}}(%e{{[sb]}}p) ## 2-byte Spill
-; KNL_X32-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; KNL_X32-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; KNL_X32-NEXT:    andl $1, %eax
-; KNL_X32-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; KNL_X32-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; KNL_X32-NEXT:    kmovw %ecx, %k0
 ; KNL_X32-NEXT:    kshiftlw $15, %k0, %k0
 ; KNL_X32-NEXT:    kshiftrw $14, %k0, %k0
 ; KNL_X32-NEXT:    kmovw %eax, %k6
 ; KNL_X32-NEXT:    korw %k0, %k6, %k0
 ; KNL_X32-NEXT:    kandw %k1, %k0, %k0
-; KNL_X32-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; KNL_X32-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; KNL_X32-NEXT:    kmovw %eax, %k6
 ; KNL_X32-NEXT:    kshiftlw $15, %k6, %k6
 ; KNL_X32-NEXT:    kshiftrw $13, %k6, %k6
 ; KNL_X32-NEXT:    korw %k6, %k0, %k0
 ; KNL_X32-NEXT:    kandw %k2, %k0, %k0
-; KNL_X32-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; KNL_X32-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; KNL_X32-NEXT:    kmovw %eax, %k6
 ; KNL_X32-NEXT:    kshiftlw $15, %k6, %k6
 ; KNL_X32-NEXT:    kshiftrw $12, %k6, %k6
 ; KNL_X32-NEXT:    korw %k6, %k0, %k0
 ; KNL_X32-NEXT:    kandw %k3, %k0, %k0
-; KNL_X32-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; KNL_X32-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; KNL_X32-NEXT:    kmovw %eax, %k6
 ; KNL_X32-NEXT:    kshiftlw $15, %k6, %k6
 ; KNL_X32-NEXT:    kshiftrw $11, %k6, %k6
 ; KNL_X32-NEXT:    korw %k6, %k0, %k0
 ; KNL_X32-NEXT:    kandw %k4, %k0, %k0
-; KNL_X32-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; KNL_X32-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; KNL_X32-NEXT:    kmovw %eax, %k6
 ; KNL_X32-NEXT:    kshiftlw $15, %k6, %k6
 ; KNL_X32-NEXT:    kshiftrw $10, %k6, %k6
 ; KNL_X32-NEXT:    korw %k6, %k0, %k0
 ; KNL_X32-NEXT:    kandw %k5, %k0, %k0
-; KNL_X32-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; KNL_X32-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; KNL_X32-NEXT:    kmovw %eax, %k6
 ; KNL_X32-NEXT:    kshiftlw $15, %k6, %k6
 ; KNL_X32-NEXT:    kshiftrw $9, %k6, %k6
 ; KNL_X32-NEXT:    korw %k6, %k0, %k0
 ; KNL_X32-NEXT:    kmovw %k0, {{[-0-9]+}}(%e{{[sb]}}p) ## 2-byte Spill
-; KNL_X32-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; KNL_X32-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; KNL_X32-NEXT:    andl $1, %eax
-; KNL_X32-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; KNL_X32-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; KNL_X32-NEXT:    kmovw %ecx, %k0
 ; KNL_X32-NEXT:    kshiftlw $15, %k0, %k0
 ; KNL_X32-NEXT:    kshiftrw $14, %k0, %k0
 ; KNL_X32-NEXT:    kmovw %eax, %k6
 ; KNL_X32-NEXT:    korw %k0, %k6, %k0
 ; KNL_X32-NEXT:    kandw %k1, %k0, %k0
-; KNL_X32-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; KNL_X32-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; KNL_X32-NEXT:    kmovw %eax, %k6
 ; KNL_X32-NEXT:    kshiftlw $15, %k6, %k6
 ; KNL_X32-NEXT:    kshiftrw $13, %k6, %k6
 ; KNL_X32-NEXT:    korw %k6, %k0, %k0
 ; KNL_X32-NEXT:    kandw %k2, %k0, %k0
-; KNL_X32-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; KNL_X32-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; KNL_X32-NEXT:    kmovw %eax, %k6
 ; KNL_X32-NEXT:    kshiftlw $15, %k6, %k6
 ; KNL_X32-NEXT:    kshiftrw $12, %k6, %k6
 ; KNL_X32-NEXT:    korw %k6, %k0, %k0
 ; KNL_X32-NEXT:    kandw %k3, %k0, %k0
-; KNL_X32-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; KNL_X32-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; KNL_X32-NEXT:    kmovw %eax, %k6
 ; KNL_X32-NEXT:    kshiftlw $15, %k6, %k6
 ; KNL_X32-NEXT:    kshiftrw $11, %k6, %k6
 ; KNL_X32-NEXT:    korw %k6, %k0, %k0
 ; KNL_X32-NEXT:    kandw %k4, %k0, %k0
-; KNL_X32-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; KNL_X32-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; KNL_X32-NEXT:    kmovw %eax, %k6
 ; KNL_X32-NEXT:    kshiftlw $15, %k6, %k6
 ; KNL_X32-NEXT:    kshiftrw $10, %k6, %k6
 ; KNL_X32-NEXT:    korw %k6, %k0, %k0
 ; KNL_X32-NEXT:    kandw %k5, %k0, %k0
-; KNL_X32-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; KNL_X32-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; KNL_X32-NEXT:    kmovw %eax, %k6
 ; KNL_X32-NEXT:    kshiftlw $15, %k6, %k6
 ; KNL_X32-NEXT:    kshiftrw $9, %k6, %k6
 ; KNL_X32-NEXT:    korw %k6, %k0, %k0
 ; KNL_X32-NEXT:    kmovw %k0, {{[-0-9]+}}(%e{{[sb]}}p) ## 2-byte Spill
-; KNL_X32-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; KNL_X32-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; KNL_X32-NEXT:    andl $1, %eax
-; KNL_X32-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; KNL_X32-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; KNL_X32-NEXT:    kmovw %ecx, %k0
 ; KNL_X32-NEXT:    kshiftlw $15, %k0, %k0
 ; KNL_X32-NEXT:    kshiftrw $14, %k0, %k0
 ; KNL_X32-NEXT:    kmovw %eax, %k7
 ; KNL_X32-NEXT:    korw %k0, %k7, %k0
 ; KNL_X32-NEXT:    kandw %k1, %k0, %k0
-; KNL_X32-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; KNL_X32-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; KNL_X32-NEXT:    kmovw %eax, %k7
 ; KNL_X32-NEXT:    kshiftlw $15, %k7, %k7
 ; KNL_X32-NEXT:    kshiftrw $13, %k7, %k7
 ; KNL_X32-NEXT:    korw %k7, %k0, %k0
 ; KNL_X32-NEXT:    kandw %k2, %k0, %k0
-; KNL_X32-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; KNL_X32-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; KNL_X32-NEXT:    kmovw %eax, %k7
 ; KNL_X32-NEXT:    kshiftlw $15, %k7, %k7
 ; KNL_X32-NEXT:    kshiftrw $12, %k7, %k7
 ; KNL_X32-NEXT:    korw %k7, %k0, %k0
 ; KNL_X32-NEXT:    kandw %k3, %k0, %k0
-; KNL_X32-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; KNL_X32-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; KNL_X32-NEXT:    kmovw %eax, %k7
 ; KNL_X32-NEXT:    kshiftlw $15, %k7, %k7
 ; KNL_X32-NEXT:    kshiftrw $11, %k7, %k7
 ; KNL_X32-NEXT:    korw %k7, %k0, %k0
 ; KNL_X32-NEXT:    kandw %k4, %k0, %k0
-; KNL_X32-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; KNL_X32-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; KNL_X32-NEXT:    kmovw %eax, %k7
 ; KNL_X32-NEXT:    kshiftlw $15, %k7, %k7
 ; KNL_X32-NEXT:    kshiftrw $10, %k7, %k7
 ; KNL_X32-NEXT:    korw %k7, %k0, %k0
 ; KNL_X32-NEXT:    kandw %k5, %k0, %k0
-; KNL_X32-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; KNL_X32-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; KNL_X32-NEXT:    kmovw %eax, %k7
 ; KNL_X32-NEXT:    kshiftlw $15, %k7, %k7
 ; KNL_X32-NEXT:    kshiftrw $9, %k7, %k7
 ; KNL_X32-NEXT:    korw %k7, %k0, %k0
-; KNL_X32-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; KNL_X32-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; KNL_X32-NEXT:    andl $1, %eax
-; KNL_X32-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; KNL_X32-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; KNL_X32-NEXT:    kmovw %ecx, %k7
 ; KNL_X32-NEXT:    kshiftlw $15, %k7, %k7
 ; KNL_X32-NEXT:    kshiftrw $14, %k7, %k7
 ; KNL_X32-NEXT:    kmovw %eax, %k6
 ; KNL_X32-NEXT:    korw %k7, %k6, %k6
 ; KNL_X32-NEXT:    kandw %k1, %k6, %k1
-; KNL_X32-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; KNL_X32-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; KNL_X32-NEXT:    kmovw %eax, %k6
 ; KNL_X32-NEXT:    kshiftlw $15, %k6, %k6
 ; KNL_X32-NEXT:    kshiftrw $13, %k6, %k6
 ; KNL_X32-NEXT:    korw %k6, %k1, %k1
 ; KNL_X32-NEXT:    kandw %k2, %k1, %k1
-; KNL_X32-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; KNL_X32-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; KNL_X32-NEXT:    kmovw %eax, %k2
 ; KNL_X32-NEXT:    kshiftlw $15, %k2, %k2
 ; KNL_X32-NEXT:    kshiftrw $12, %k2, %k2
 ; KNL_X32-NEXT:    korw %k2, %k1, %k1
 ; KNL_X32-NEXT:    kandw %k3, %k1, %k1
-; KNL_X32-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; KNL_X32-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; KNL_X32-NEXT:    kmovw %eax, %k2
 ; KNL_X32-NEXT:    kshiftlw $15, %k2, %k2
 ; KNL_X32-NEXT:    kshiftrw $11, %k2, %k2
 ; KNL_X32-NEXT:    korw %k2, %k1, %k1
 ; KNL_X32-NEXT:    kandw %k4, %k1, %k1
-; KNL_X32-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; KNL_X32-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; KNL_X32-NEXT:    kmovw %eax, %k2
 ; KNL_X32-NEXT:    kshiftlw $15, %k2, %k2
 ; KNL_X32-NEXT:    kshiftrw $10, %k2, %k2
 ; KNL_X32-NEXT:    korw %k2, %k1, %k1
 ; KNL_X32-NEXT:    kandw %k5, %k1, %k1
-; KNL_X32-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; KNL_X32-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; KNL_X32-NEXT:    kmovw %eax, %k2
 ; KNL_X32-NEXT:    kshiftlw $15, %k2, %k2
 ; KNL_X32-NEXT:    kshiftrw $9, %k2, %k2

diff  --git a/llvm/test/CodeGen/X86/avx512-ext.ll b/llvm/test/CodeGen/X86/avx512-ext.ll
index 6fb8ba9f0ea27..1bb3d2747fb49 100644
--- a/llvm/test/CodeGen/X86/avx512-ext.ll
+++ b/llvm/test/CodeGen/X86/avx512-ext.ll
@@ -1886,10 +1886,10 @@ define void @extload_v8i64(ptr %a, ptr %res) {
 define <64 x i16> @test21(<64 x i16> %x , <64 x i1> %mask) nounwind readnone {
 ; KNL-LABEL: test21:
 ; KNL:       # %bb.0:
-; KNL-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax
+; KNL-NEXT:    movb {{[0-9]+}}(%rsp), %al
 ; KNL-NEXT:    andl $1, %eax
 ; KNL-NEXT:    kmovw %eax, %k0
-; KNL-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax
+; KNL-NEXT:    movb {{[0-9]+}}(%rsp), %al
 ; KNL-NEXT:    kmovw %eax, %k1
 ; KNL-NEXT:    kshiftlw $15, %k1, %k1
 ; KNL-NEXT:    kshiftrw $14, %k1, %k1
@@ -1899,7 +1899,7 @@ define <64 x i16> @test21(<64 x i16> %x , <64 x i1> %mask) nounwind readnone {
 ; KNL-NEXT:    kandw %k1, %k0, %k0
 ; KNL-NEXT:    kmovw %k1, %k7
 ; KNL-NEXT:    kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill
-; KNL-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax
+; KNL-NEXT:    movb {{[0-9]+}}(%rsp), %al
 ; KNL-NEXT:    kmovw %eax, %k1
 ; KNL-NEXT:    kshiftlw $15, %k1, %k1
 ; KNL-NEXT:    kshiftrw $13, %k1, %k1
@@ -1908,7 +1908,7 @@ define <64 x i16> @test21(<64 x i16> %x , <64 x i1> %mask) nounwind readnone {
 ; KNL-NEXT:    kmovw %eax, %k1
 ; KNL-NEXT:    kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill
 ; KNL-NEXT:    kandw %k1, %k0, %k0
-; KNL-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax
+; KNL-NEXT:    movb {{[0-9]+}}(%rsp), %al
 ; KNL-NEXT:    kmovw %eax, %k1
 ; KNL-NEXT:    kshiftlw $15, %k1, %k1
 ; KNL-NEXT:    kshiftrw $12, %k1, %k1
@@ -1917,7 +1917,7 @@ define <64 x i16> @test21(<64 x i16> %x , <64 x i1> %mask) nounwind readnone {
 ; KNL-NEXT:    kmovw %eax, %k1
 ; KNL-NEXT:    kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill
 ; KNL-NEXT:    kandw %k1, %k0, %k0
-; KNL-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax
+; KNL-NEXT:    movb {{[0-9]+}}(%rsp), %al
 ; KNL-NEXT:    kmovw %eax, %k1
 ; KNL-NEXT:    kshiftlw $15, %k1, %k1
 ; KNL-NEXT:    kshiftrw $11, %k1, %k1
@@ -1927,7 +1927,7 @@ define <64 x i16> @test21(<64 x i16> %x , <64 x i1> %mask) nounwind readnone {
 ; KNL-NEXT:    kandw %k1, %k0, %k0
 ; KNL-NEXT:    kmovw %k1, %k2
 ; KNL-NEXT:    kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill
-; KNL-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax
+; KNL-NEXT:    movb {{[0-9]+}}(%rsp), %al
 ; KNL-NEXT:    kmovw %eax, %k1
 ; KNL-NEXT:    kshiftlw $15, %k1, %k1
 ; KNL-NEXT:    kshiftrw $10, %k1, %k1
@@ -1936,7 +1936,7 @@ define <64 x i16> @test21(<64 x i16> %x , <64 x i1> %mask) nounwind readnone {
 ; KNL-NEXT:    kmovw %eax, %k1
 ; KNL-NEXT:    kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill
 ; KNL-NEXT:    kandw %k1, %k0, %k0
-; KNL-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax
+; KNL-NEXT:    movb {{[0-9]+}}(%rsp), %al
 ; KNL-NEXT:    kmovw %eax, %k1
 ; KNL-NEXT:    kshiftlw $15, %k1, %k1
 ; KNL-NEXT:    kshiftrw $9, %k1, %k1
@@ -1946,7 +1946,7 @@ define <64 x i16> @test21(<64 x i16> %x , <64 x i1> %mask) nounwind readnone {
 ; KNL-NEXT:    kandw %k1, %k0, %k0
 ; KNL-NEXT:    kmovw %k1, %k3
 ; KNL-NEXT:    kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill
-; KNL-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax
+; KNL-NEXT:    movb {{[0-9]+}}(%rsp), %al
 ; KNL-NEXT:    kmovw %eax, %k1
 ; KNL-NEXT:    kshiftlw $15, %k1, %k1
 ; KNL-NEXT:    kshiftrw $8, %k1, %k1
@@ -1955,7 +1955,7 @@ define <64 x i16> @test21(<64 x i16> %x , <64 x i1> %mask) nounwind readnone {
 ; KNL-NEXT:    kmovw %eax, %k1
 ; KNL-NEXT:    kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill
 ; KNL-NEXT:    kandw %k1, %k0, %k0
-; KNL-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax
+; KNL-NEXT:    movb {{[0-9]+}}(%rsp), %al
 ; KNL-NEXT:    kmovw %eax, %k1
 ; KNL-NEXT:    kshiftlw $15, %k1, %k1
 ; KNL-NEXT:    kshiftrw $7, %k1, %k1
@@ -1965,7 +1965,7 @@ define <64 x i16> @test21(<64 x i16> %x , <64 x i1> %mask) nounwind readnone {
 ; KNL-NEXT:    kandw %k1, %k0, %k0
 ; KNL-NEXT:    kmovw %k1, %k4
 ; KNL-NEXT:    kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill
-; KNL-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax
+; KNL-NEXT:    movb {{[0-9]+}}(%rsp), %al
 ; KNL-NEXT:    kmovw %eax, %k1
 ; KNL-NEXT:    kshiftlw $15, %k1, %k1
 ; KNL-NEXT:    kshiftrw $6, %k1, %k1
@@ -1974,7 +1974,7 @@ define <64 x i16> @test21(<64 x i16> %x , <64 x i1> %mask) nounwind readnone {
 ; KNL-NEXT:    kmovw %eax, %k1
 ; KNL-NEXT:    kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill
 ; KNL-NEXT:    kandw %k1, %k0, %k0
-; KNL-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax
+; KNL-NEXT:    movb {{[0-9]+}}(%rsp), %al
 ; KNL-NEXT:    kmovw %eax, %k1
 ; KNL-NEXT:    kshiftlw $15, %k1, %k1
 ; KNL-NEXT:    kshiftrw $5, %k1, %k1
@@ -1982,7 +1982,7 @@ define <64 x i16> @test21(<64 x i16> %x , <64 x i1> %mask) nounwind readnone {
 ; KNL-NEXT:    movw $-2049, %ax # imm = 0xF7FF
 ; KNL-NEXT:    kmovw %eax, %k5
 ; KNL-NEXT:    kandw %k5, %k0, %k0
-; KNL-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax
+; KNL-NEXT:    movb {{[0-9]+}}(%rsp), %al
 ; KNL-NEXT:    kmovw %eax, %k1
 ; KNL-NEXT:    kshiftlw $15, %k1, %k1
 ; KNL-NEXT:    kshiftrw $4, %k1, %k1
@@ -1991,7 +1991,7 @@ define <64 x i16> @test21(<64 x i16> %x , <64 x i1> %mask) nounwind readnone {
 ; KNL-NEXT:    kmovw %eax, %k1
 ; KNL-NEXT:    kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill
 ; KNL-NEXT:    kandw %k1, %k0, %k0
-; KNL-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax
+; KNL-NEXT:    movb {{[0-9]+}}(%rsp), %al
 ; KNL-NEXT:    kmovw %eax, %k1
 ; KNL-NEXT:    kshiftlw $15, %k1, %k1
 ; KNL-NEXT:    kshiftrw $3, %k1, %k1
@@ -2000,7 +2000,7 @@ define <64 x i16> @test21(<64 x i16> %x , <64 x i1> %mask) nounwind readnone {
 ; KNL-NEXT:    kmovw %eax, %k1
 ; KNL-NEXT:    kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill
 ; KNL-NEXT:    kandw %k1, %k0, %k0
-; KNL-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax
+; KNL-NEXT:    movb {{[0-9]+}}(%rsp), %al
 ; KNL-NEXT:    kmovw %eax, %k1
 ; KNL-NEXT:    kshiftlw $15, %k1, %k1
 ; KNL-NEXT:    kshiftrw $2, %k1, %k1
@@ -2009,13 +2009,13 @@ define <64 x i16> @test21(<64 x i16> %x , <64 x i1> %mask) nounwind readnone {
 ; KNL-NEXT:    kmovw %eax, %k0
 ; KNL-NEXT:    kmovw %k0, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill
 ; KNL-NEXT:    kandw %k0, %k1, %k1
-; KNL-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax
+; KNL-NEXT:    movb {{[0-9]+}}(%rsp), %al
 ; KNL-NEXT:    kmovw %eax, %k6
 ; KNL-NEXT:    kshiftlw $14, %k6, %k6
 ; KNL-NEXT:    korw %k6, %k1, %k1
 ; KNL-NEXT:    kshiftlw $1, %k1, %k1
 ; KNL-NEXT:    kshiftrw $1, %k1, %k1
-; KNL-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax
+; KNL-NEXT:    movb {{[0-9]+}}(%rsp), %al
 ; KNL-NEXT:    kmovw %eax, %k6
 ; KNL-NEXT:    kshiftlw $15, %k6, %k6
 ; KNL-NEXT:    korw %k6, %k1, %k1
@@ -2050,74 +2050,74 @@ define <64 x i16> @test21(<64 x i16> %x , <64 x i1> %mask) nounwind readnone {
 ; KNL-NEXT:    korw %k6, %k1, %k1
 ; KNL-NEXT:    kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k2 # 2-byte Reload
 ; KNL-NEXT:    kandw %k2, %k1, %k1
-; KNL-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax
+; KNL-NEXT:    movb {{[0-9]+}}(%rsp), %al
 ; KNL-NEXT:    kmovw %eax, %k6
 ; KNL-NEXT:    kshiftlw $15, %k6, %k6
 ; KNL-NEXT:    kshiftrw $9, %k6, %k6
 ; KNL-NEXT:    korw %k6, %k1, %k1
 ; KNL-NEXT:    kandw %k3, %k1, %k1
-; KNL-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax
+; KNL-NEXT:    movb {{[0-9]+}}(%rsp), %al
 ; KNL-NEXT:    kmovw %eax, %k6
 ; KNL-NEXT:    kshiftlw $15, %k6, %k6
 ; KNL-NEXT:    kshiftrw $8, %k6, %k6
 ; KNL-NEXT:    korw %k6, %k1, %k1
 ; KNL-NEXT:    kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k3 # 2-byte Reload
 ; KNL-NEXT:    kandw %k3, %k1, %k1
-; KNL-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax
+; KNL-NEXT:    movb {{[0-9]+}}(%rsp), %al
 ; KNL-NEXT:    kmovw %eax, %k6
 ; KNL-NEXT:    kshiftlw $15, %k6, %k6
 ; KNL-NEXT:    kshiftrw $7, %k6, %k6
 ; KNL-NEXT:    korw %k6, %k1, %k1
 ; KNL-NEXT:    kandw %k4, %k1, %k1
-; KNL-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax
+; KNL-NEXT:    movb {{[0-9]+}}(%rsp), %al
 ; KNL-NEXT:    kmovw %eax, %k6
 ; KNL-NEXT:    kshiftlw $15, %k6, %k6
 ; KNL-NEXT:    kshiftrw $6, %k6, %k6
 ; KNL-NEXT:    korw %k6, %k1, %k1
 ; KNL-NEXT:    kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k4 # 2-byte Reload
 ; KNL-NEXT:    kandw %k4, %k1, %k1
-; KNL-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax
+; KNL-NEXT:    movb {{[0-9]+}}(%rsp), %al
 ; KNL-NEXT:    kmovw %eax, %k6
 ; KNL-NEXT:    kshiftlw $15, %k6, %k6
 ; KNL-NEXT:    kshiftrw $5, %k6, %k6
 ; KNL-NEXT:    korw %k6, %k1, %k1
 ; KNL-NEXT:    kmovw %k5, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill
 ; KNL-NEXT:    kandw %k5, %k1, %k1
-; KNL-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax
+; KNL-NEXT:    movb {{[0-9]+}}(%rsp), %al
 ; KNL-NEXT:    kmovw %eax, %k6
 ; KNL-NEXT:    kshiftlw $15, %k6, %k6
 ; KNL-NEXT:    kshiftrw $4, %k6, %k6
 ; KNL-NEXT:    korw %k6, %k1, %k1
 ; KNL-NEXT:    kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k6 # 2-byte Reload
 ; KNL-NEXT:    kandw %k6, %k1, %k1
-; KNL-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax
+; KNL-NEXT:    movb {{[0-9]+}}(%rsp), %al
 ; KNL-NEXT:    kmovw %eax, %k6
 ; KNL-NEXT:    kshiftlw $15, %k6, %k6
 ; KNL-NEXT:    kshiftrw $3, %k6, %k6
 ; KNL-NEXT:    korw %k6, %k1, %k1
 ; KNL-NEXT:    kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k6 # 2-byte Reload
 ; KNL-NEXT:    kandw %k6, %k1, %k1
-; KNL-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax
+; KNL-NEXT:    movb {{[0-9]+}}(%rsp), %al
 ; KNL-NEXT:    kmovw %eax, %k6
 ; KNL-NEXT:    kshiftlw $15, %k6, %k6
 ; KNL-NEXT:    kshiftrw $2, %k6, %k6
 ; KNL-NEXT:    korw %k6, %k1, %k1
 ; KNL-NEXT:    kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k6 # 2-byte Reload
 ; KNL-NEXT:    kandw %k6, %k1, %k1
-; KNL-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax
+; KNL-NEXT:    movb {{[0-9]+}}(%rsp), %al
 ; KNL-NEXT:    kmovw %eax, %k6
 ; KNL-NEXT:    kshiftlw $14, %k6, %k6
 ; KNL-NEXT:    korw %k6, %k1, %k1
 ; KNL-NEXT:    kshiftlw $1, %k1, %k1
 ; KNL-NEXT:    kshiftrw $1, %k1, %k1
-; KNL-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax
+; KNL-NEXT:    movb {{[0-9]+}}(%rsp), %al
 ; KNL-NEXT:    kmovw %eax, %k6
 ; KNL-NEXT:    kshiftlw $15, %k6, %k6
 ; KNL-NEXT:    korw %k6, %k1, %k1
 ; KNL-NEXT:    kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill
-; KNL-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax
+; KNL-NEXT:    movb {{[0-9]+}}(%rsp), %al
 ; KNL-NEXT:    andl $1, %eax
-; KNL-NEXT:    movzbl {{[0-9]+}}(%rsp), %ecx
+; KNL-NEXT:    movb {{[0-9]+}}(%rsp), %cl
 ; KNL-NEXT:    kmovw %ecx, %k1
 ; KNL-NEXT:    kshiftlw $15, %k1, %k1
 ; KNL-NEXT:    kshiftrw $14, %k1, %k1
@@ -2125,97 +2125,97 @@ define <64 x i16> @test21(<64 x i16> %x , <64 x i1> %mask) nounwind readnone {
 ; KNL-NEXT:    korw %k1, %k6, %k1
 ; KNL-NEXT:    kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k6 # 2-byte Reload
 ; KNL-NEXT:    kandw %k6, %k1, %k1
-; KNL-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax
+; KNL-NEXT:    movb {{[0-9]+}}(%rsp), %al
 ; KNL-NEXT:    kmovw %eax, %k6
 ; KNL-NEXT:    kshiftlw $15, %k6, %k6
 ; KNL-NEXT:    kshiftrw $13, %k6, %k6
 ; KNL-NEXT:    korw %k6, %k1, %k1
 ; KNL-NEXT:    kandw %k7, %k1, %k1
-; KNL-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax
+; KNL-NEXT:    movb {{[0-9]+}}(%rsp), %al
 ; KNL-NEXT:    kmovw %eax, %k6
 ; KNL-NEXT:    kshiftlw $15, %k6, %k6
 ; KNL-NEXT:    kshiftrw $12, %k6, %k6
 ; KNL-NEXT:    korw %k6, %k1, %k1
 ; KNL-NEXT:    kandw %k0, %k1, %k1
-; KNL-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax
+; KNL-NEXT:    movb {{[0-9]+}}(%rsp), %al
 ; KNL-NEXT:    kmovw %eax, %k6
 ; KNL-NEXT:    kshiftlw $15, %k6, %k6
 ; KNL-NEXT:    kshiftrw $11, %k6, %k6
 ; KNL-NEXT:    korw %k6, %k1, %k1
 ; KNL-NEXT:    kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k0 # 2-byte Reload
 ; KNL-NEXT:    kandw %k0, %k1, %k1
-; KNL-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax
+; KNL-NEXT:    movb {{[0-9]+}}(%rsp), %al
 ; KNL-NEXT:    kmovw %eax, %k6
 ; KNL-NEXT:    kshiftlw $15, %k6, %k6
 ; KNL-NEXT:    kshiftrw $10, %k6, %k6
 ; KNL-NEXT:    korw %k6, %k1, %k1
 ; KNL-NEXT:    kandw %k2, %k1, %k1
-; KNL-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax
+; KNL-NEXT:    movb {{[0-9]+}}(%rsp), %al
 ; KNL-NEXT:    kmovw %eax, %k6
 ; KNL-NEXT:    kshiftlw $15, %k6, %k6
 ; KNL-NEXT:    kshiftrw $9, %k6, %k6
 ; KNL-NEXT:    korw %k6, %k1, %k1
 ; KNL-NEXT:    kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k0 # 2-byte Reload
 ; KNL-NEXT:    kandw %k0, %k1, %k1
-; KNL-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax
+; KNL-NEXT:    movb {{[0-9]+}}(%rsp), %al
 ; KNL-NEXT:    kmovw %eax, %k6
 ; KNL-NEXT:    kshiftlw $15, %k6, %k6
 ; KNL-NEXT:    kshiftrw $8, %k6, %k6
 ; KNL-NEXT:    korw %k6, %k1, %k1
 ; KNL-NEXT:    kandw %k3, %k1, %k1
-; KNL-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax
+; KNL-NEXT:    movb {{[0-9]+}}(%rsp), %al
 ; KNL-NEXT:    kmovw %eax, %k6
 ; KNL-NEXT:    kshiftlw $15, %k6, %k6
 ; KNL-NEXT:    kshiftrw $7, %k6, %k6
 ; KNL-NEXT:    korw %k6, %k1, %k1
 ; KNL-NEXT:    kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k3 # 2-byte Reload
 ; KNL-NEXT:    kandw %k3, %k1, %k1
-; KNL-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax
+; KNL-NEXT:    movb {{[0-9]+}}(%rsp), %al
 ; KNL-NEXT:    kmovw %eax, %k6
 ; KNL-NEXT:    kshiftlw $15, %k6, %k6
 ; KNL-NEXT:    kshiftrw $6, %k6, %k6
 ; KNL-NEXT:    korw %k6, %k1, %k1
 ; KNL-NEXT:    kandw %k4, %k1, %k1
-; KNL-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax
+; KNL-NEXT:    movb {{[0-9]+}}(%rsp), %al
 ; KNL-NEXT:    kmovw %eax, %k6
 ; KNL-NEXT:    kshiftlw $15, %k6, %k6
 ; KNL-NEXT:    kshiftrw $5, %k6, %k6
 ; KNL-NEXT:    korw %k6, %k1, %k1
 ; KNL-NEXT:    kandw %k5, %k1, %k1
-; KNL-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax
+; KNL-NEXT:    movb {{[0-9]+}}(%rsp), %al
 ; KNL-NEXT:    kmovw %eax, %k6
 ; KNL-NEXT:    kshiftlw $15, %k6, %k6
 ; KNL-NEXT:    kshiftrw $4, %k6, %k6
 ; KNL-NEXT:    korw %k6, %k1, %k1
 ; KNL-NEXT:    kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k0 # 2-byte Reload
 ; KNL-NEXT:    kandw %k0, %k1, %k1
-; KNL-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax
+; KNL-NEXT:    movb {{[0-9]+}}(%rsp), %al
 ; KNL-NEXT:    kmovw %eax, %k6
 ; KNL-NEXT:    kshiftlw $15, %k6, %k6
 ; KNL-NEXT:    kshiftrw $3, %k6, %k6
 ; KNL-NEXT:    korw %k6, %k1, %k1
 ; KNL-NEXT:    kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k2 # 2-byte Reload
 ; KNL-NEXT:    kandw %k2, %k1, %k1
-; KNL-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax
+; KNL-NEXT:    movb {{[0-9]+}}(%rsp), %al
 ; KNL-NEXT:    kmovw %eax, %k6
 ; KNL-NEXT:    kshiftlw $15, %k6, %k6
 ; KNL-NEXT:    kshiftrw $2, %k6, %k6
 ; KNL-NEXT:    korw %k6, %k1, %k1
 ; KNL-NEXT:    kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k5 # 2-byte Reload
 ; KNL-NEXT:    kandw %k5, %k1, %k1
-; KNL-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax
+; KNL-NEXT:    movb {{[0-9]+}}(%rsp), %al
 ; KNL-NEXT:    kmovw %eax, %k6
 ; KNL-NEXT:    kshiftlw $14, %k6, %k6
 ; KNL-NEXT:    korw %k6, %k1, %k1
 ; KNL-NEXT:    kshiftlw $1, %k1, %k1
 ; KNL-NEXT:    kshiftrw $1, %k1, %k1
-; KNL-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax
+; KNL-NEXT:    movb {{[0-9]+}}(%rsp), %al
 ; KNL-NEXT:    kmovw %eax, %k6
 ; KNL-NEXT:    kshiftlw $15, %k6, %k6
 ; KNL-NEXT:    korw %k6, %k1, %k1
-; KNL-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax
+; KNL-NEXT:    movb {{[0-9]+}}(%rsp), %al
 ; KNL-NEXT:    andl $1, %eax
-; KNL-NEXT:    movzbl {{[0-9]+}}(%rsp), %ecx
+; KNL-NEXT:    movb {{[0-9]+}}(%rsp), %cl
 ; KNL-NEXT:    kmovw %ecx, %k6
 ; KNL-NEXT:    kshiftlw $15, %k6, %k6
 ; KNL-NEXT:    kshiftrw $14, %k6, %k6
@@ -2223,93 +2223,93 @@ define <64 x i16> @test21(<64 x i16> %x , <64 x i1> %mask) nounwind readnone {
 ; KNL-NEXT:    korw %k6, %k7, %k6
 ; KNL-NEXT:    kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k5 # 2-byte Reload
 ; KNL-NEXT:    kandw %k5, %k6, %k6
-; KNL-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax
+; KNL-NEXT:    movb {{[0-9]+}}(%rsp), %al
 ; KNL-NEXT:    kmovw %eax, %k7
 ; KNL-NEXT:    kshiftlw $15, %k7, %k7
 ; KNL-NEXT:    kshiftrw $13, %k7, %k7
 ; KNL-NEXT:    korw %k7, %k6, %k6
 ; KNL-NEXT:    kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k5 # 2-byte Reload
 ; KNL-NEXT:    kandw %k5, %k6, %k6
-; KNL-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax
+; KNL-NEXT:    movb {{[0-9]+}}(%rsp), %al
 ; KNL-NEXT:    kmovw %eax, %k7
 ; KNL-NEXT:    kshiftlw $15, %k7, %k7
 ; KNL-NEXT:    kshiftrw $12, %k7, %k7
 ; KNL-NEXT:    korw %k7, %k6, %k6
 ; KNL-NEXT:    kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k5 # 2-byte Reload
 ; KNL-NEXT:    kandw %k5, %k6, %k6
-; KNL-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax
+; KNL-NEXT:    movb {{[0-9]+}}(%rsp), %al
 ; KNL-NEXT:    kmovw %eax, %k7
 ; KNL-NEXT:    kshiftlw $15, %k7, %k7
 ; KNL-NEXT:    kshiftrw $11, %k7, %k7
 ; KNL-NEXT:    korw %k7, %k6, %k6
 ; KNL-NEXT:    kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k5 # 2-byte Reload
 ; KNL-NEXT:    kandw %k5, %k6, %k6
-; KNL-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax
+; KNL-NEXT:    movb {{[0-9]+}}(%rsp), %al
 ; KNL-NEXT:    kmovw %eax, %k7
 ; KNL-NEXT:    kshiftlw $15, %k7, %k7
 ; KNL-NEXT:    kshiftrw $10, %k7, %k7
 ; KNL-NEXT:    korw %k7, %k6, %k6
 ; KNL-NEXT:    kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k5 # 2-byte Reload
 ; KNL-NEXT:    kandw %k5, %k6, %k6
-; KNL-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax
+; KNL-NEXT:    movb {{[0-9]+}}(%rsp), %al
 ; KNL-NEXT:    kmovw %eax, %k7
 ; KNL-NEXT:    kshiftlw $15, %k7, %k7
 ; KNL-NEXT:    kshiftrw $9, %k7, %k7
 ; KNL-NEXT:    korw %k7, %k6, %k6
 ; KNL-NEXT:    kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k5 # 2-byte Reload
 ; KNL-NEXT:    kandw %k5, %k6, %k6
-; KNL-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax
+; KNL-NEXT:    movb {{[0-9]+}}(%rsp), %al
 ; KNL-NEXT:    kmovw %eax, %k7
 ; KNL-NEXT:    kshiftlw $15, %k7, %k7
 ; KNL-NEXT:    kshiftrw $8, %k7, %k7
 ; KNL-NEXT:    korw %k7, %k6, %k6
 ; KNL-NEXT:    kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k5 # 2-byte Reload
 ; KNL-NEXT:    kandw %k5, %k6, %k6
-; KNL-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax
+; KNL-NEXT:    movb {{[0-9]+}}(%rsp), %al
 ; KNL-NEXT:    kmovw %eax, %k7
 ; KNL-NEXT:    kshiftlw $15, %k7, %k7
 ; KNL-NEXT:    kshiftrw $7, %k7, %k7
 ; KNL-NEXT:    korw %k7, %k6, %k6
 ; KNL-NEXT:    kandw %k3, %k6, %k6
-; KNL-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax
+; KNL-NEXT:    movb {{[0-9]+}}(%rsp), %al
 ; KNL-NEXT:    kmovw %eax, %k7
 ; KNL-NEXT:    kshiftlw $15, %k7, %k7
 ; KNL-NEXT:    kshiftrw $6, %k7, %k7
 ; KNL-NEXT:    korw %k7, %k6, %k6
 ; KNL-NEXT:    kandw %k4, %k6, %k5
-; KNL-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax
+; KNL-NEXT:    movb {{[0-9]+}}(%rsp), %al
 ; KNL-NEXT:    kmovw %eax, %k6
 ; KNL-NEXT:    kshiftlw $15, %k6, %k6
 ; KNL-NEXT:    kshiftrw $5, %k6, %k6
 ; KNL-NEXT:    korw %k6, %k5, %k5
 ; KNL-NEXT:    kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k3 # 2-byte Reload
 ; KNL-NEXT:    kandw %k3, %k5, %k4
-; KNL-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax
+; KNL-NEXT:    movb {{[0-9]+}}(%rsp), %al
 ; KNL-NEXT:    kmovw %eax, %k5
 ; KNL-NEXT:    kshiftlw $15, %k5, %k5
 ; KNL-NEXT:    kshiftrw $4, %k5, %k5
 ; KNL-NEXT:    korw %k5, %k4, %k4
 ; KNL-NEXT:    kandw %k0, %k4, %k3
-; KNL-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax
+; KNL-NEXT:    movb {{[0-9]+}}(%rsp), %al
 ; KNL-NEXT:    kmovw %eax, %k4
 ; KNL-NEXT:    kshiftlw $15, %k4, %k4
 ; KNL-NEXT:    kshiftrw $3, %k4, %k4
 ; KNL-NEXT:    korw %k4, %k3, %k3
 ; KNL-NEXT:    kandw %k2, %k3, %k2
-; KNL-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax
+; KNL-NEXT:    movb {{[0-9]+}}(%rsp), %al
 ; KNL-NEXT:    kmovw %eax, %k3
 ; KNL-NEXT:    kshiftlw $15, %k3, %k3
 ; KNL-NEXT:    kshiftrw $2, %k3, %k3
 ; KNL-NEXT:    korw %k3, %k2, %k2
 ; KNL-NEXT:    kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k0 # 2-byte Reload
 ; KNL-NEXT:    kandw %k0, %k2, %k0
-; KNL-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax
+; KNL-NEXT:    movb {{[0-9]+}}(%rsp), %al
 ; KNL-NEXT:    kmovw %eax, %k2
 ; KNL-NEXT:    kshiftlw $14, %k2, %k2
 ; KNL-NEXT:    korw %k2, %k0, %k0
 ; KNL-NEXT:    kshiftlw $1, %k0, %k0
 ; KNL-NEXT:    kshiftrw $1, %k0, %k0
-; KNL-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax
+; KNL-NEXT:    movb {{[0-9]+}}(%rsp), %al
 ; KNL-NEXT:    kmovw %eax, %k2
 ; KNL-NEXT:    kshiftlw $15, %k2, %k2
 ; KNL-NEXT:    korw %k2, %k0, %k2
@@ -2340,10 +2340,10 @@ define <64 x i16> @test21(<64 x i16> %x , <64 x i1> %mask) nounwind readnone {
 ;
 ; AVX512DQNOBW-LABEL: test21:
 ; AVX512DQNOBW:       # %bb.0:
-; AVX512DQNOBW-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax
+; AVX512DQNOBW-NEXT:    movb {{[0-9]+}}(%rsp), %al
 ; AVX512DQNOBW-NEXT:    andl $1, %eax
 ; AVX512DQNOBW-NEXT:    kmovw %eax, %k0
-; AVX512DQNOBW-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax
+; AVX512DQNOBW-NEXT:    movb {{[0-9]+}}(%rsp), %al
 ; AVX512DQNOBW-NEXT:    kmovw %eax, %k1
 ; AVX512DQNOBW-NEXT:    kshiftlw $15, %k1, %k1
 ; AVX512DQNOBW-NEXT:    kshiftrw $14, %k1, %k1
@@ -2353,7 +2353,7 @@ define <64 x i16> @test21(<64 x i16> %x , <64 x i1> %mask) nounwind readnone {
 ; AVX512DQNOBW-NEXT:    kandw %k1, %k0, %k0
 ; AVX512DQNOBW-NEXT:    kmovw %k1, %k7
 ; AVX512DQNOBW-NEXT:    kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill
-; AVX512DQNOBW-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax
+; AVX512DQNOBW-NEXT:    movb {{[0-9]+}}(%rsp), %al
 ; AVX512DQNOBW-NEXT:    kmovw %eax, %k1
 ; AVX512DQNOBW-NEXT:    kshiftlw $15, %k1, %k1
 ; AVX512DQNOBW-NEXT:    kshiftrw $13, %k1, %k1
@@ -2362,7 +2362,7 @@ define <64 x i16> @test21(<64 x i16> %x , <64 x i1> %mask) nounwind readnone {
 ; AVX512DQNOBW-NEXT:    kmovw %eax, %k1
 ; AVX512DQNOBW-NEXT:    kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill
 ; AVX512DQNOBW-NEXT:    kandw %k1, %k0, %k0
-; AVX512DQNOBW-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax
+; AVX512DQNOBW-NEXT:    movb {{[0-9]+}}(%rsp), %al
 ; AVX512DQNOBW-NEXT:    kmovw %eax, %k1
 ; AVX512DQNOBW-NEXT:    kshiftlw $15, %k1, %k1
 ; AVX512DQNOBW-NEXT:    kshiftrw $12, %k1, %k1
@@ -2371,7 +2371,7 @@ define <64 x i16> @test21(<64 x i16> %x , <64 x i1> %mask) nounwind readnone {
 ; AVX512DQNOBW-NEXT:    kmovw %eax, %k1
 ; AVX512DQNOBW-NEXT:    kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill
 ; AVX512DQNOBW-NEXT:    kandw %k1, %k0, %k0
-; AVX512DQNOBW-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax
+; AVX512DQNOBW-NEXT:    movb {{[0-9]+}}(%rsp), %al
 ; AVX512DQNOBW-NEXT:    kmovw %eax, %k1
 ; AVX512DQNOBW-NEXT:    kshiftlw $15, %k1, %k1
 ; AVX512DQNOBW-NEXT:    kshiftrw $11, %k1, %k1
@@ -2381,7 +2381,7 @@ define <64 x i16> @test21(<64 x i16> %x , <64 x i1> %mask) nounwind readnone {
 ; AVX512DQNOBW-NEXT:    kandw %k1, %k0, %k0
 ; AVX512DQNOBW-NEXT:    kmovw %k1, %k2
 ; AVX512DQNOBW-NEXT:    kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill
-; AVX512DQNOBW-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax
+; AVX512DQNOBW-NEXT:    movb {{[0-9]+}}(%rsp), %al
 ; AVX512DQNOBW-NEXT:    kmovw %eax, %k1
 ; AVX512DQNOBW-NEXT:    kshiftlw $15, %k1, %k1
 ; AVX512DQNOBW-NEXT:    kshiftrw $10, %k1, %k1
@@ -2390,7 +2390,7 @@ define <64 x i16> @test21(<64 x i16> %x , <64 x i1> %mask) nounwind readnone {
 ; AVX512DQNOBW-NEXT:    kmovw %eax, %k1
 ; AVX512DQNOBW-NEXT:    kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill
 ; AVX512DQNOBW-NEXT:    kandw %k1, %k0, %k0
-; AVX512DQNOBW-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax
+; AVX512DQNOBW-NEXT:    movb {{[0-9]+}}(%rsp), %al
 ; AVX512DQNOBW-NEXT:    kmovw %eax, %k1
 ; AVX512DQNOBW-NEXT:    kshiftlw $15, %k1, %k1
 ; AVX512DQNOBW-NEXT:    kshiftrw $9, %k1, %k1
@@ -2400,7 +2400,7 @@ define <64 x i16> @test21(<64 x i16> %x , <64 x i1> %mask) nounwind readnone {
 ; AVX512DQNOBW-NEXT:    kandw %k1, %k0, %k0
 ; AVX512DQNOBW-NEXT:    kmovw %k1, %k3
 ; AVX512DQNOBW-NEXT:    kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill
-; AVX512DQNOBW-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax
+; AVX512DQNOBW-NEXT:    movb {{[0-9]+}}(%rsp), %al
 ; AVX512DQNOBW-NEXT:    kmovw %eax, %k1
 ; AVX512DQNOBW-NEXT:    kshiftlw $15, %k1, %k1
 ; AVX512DQNOBW-NEXT:    kshiftrw $8, %k1, %k1
@@ -2409,7 +2409,7 @@ define <64 x i16> @test21(<64 x i16> %x , <64 x i1> %mask) nounwind readnone {
 ; AVX512DQNOBW-NEXT:    kmovw %eax, %k1
 ; AVX512DQNOBW-NEXT:    kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill
 ; AVX512DQNOBW-NEXT:    kandw %k1, %k0, %k0
-; AVX512DQNOBW-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax
+; AVX512DQNOBW-NEXT:    movb {{[0-9]+}}(%rsp), %al
 ; AVX512DQNOBW-NEXT:    kmovw %eax, %k1
 ; AVX512DQNOBW-NEXT:    kshiftlw $15, %k1, %k1
 ; AVX512DQNOBW-NEXT:    kshiftrw $7, %k1, %k1
@@ -2419,7 +2419,7 @@ define <64 x i16> @test21(<64 x i16> %x , <64 x i1> %mask) nounwind readnone {
 ; AVX512DQNOBW-NEXT:    kandw %k1, %k0, %k0
 ; AVX512DQNOBW-NEXT:    kmovw %k1, %k4
 ; AVX512DQNOBW-NEXT:    kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill
-; AVX512DQNOBW-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax
+; AVX512DQNOBW-NEXT:    movb {{[0-9]+}}(%rsp), %al
 ; AVX512DQNOBW-NEXT:    kmovw %eax, %k1
 ; AVX512DQNOBW-NEXT:    kshiftlw $15, %k1, %k1
 ; AVX512DQNOBW-NEXT:    kshiftrw $6, %k1, %k1
@@ -2428,7 +2428,7 @@ define <64 x i16> @test21(<64 x i16> %x , <64 x i1> %mask) nounwind readnone {
 ; AVX512DQNOBW-NEXT:    kmovw %eax, %k1
 ; AVX512DQNOBW-NEXT:    kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill
 ; AVX512DQNOBW-NEXT:    kandw %k1, %k0, %k0
-; AVX512DQNOBW-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax
+; AVX512DQNOBW-NEXT:    movb {{[0-9]+}}(%rsp), %al
 ; AVX512DQNOBW-NEXT:    kmovw %eax, %k1
 ; AVX512DQNOBW-NEXT:    kshiftlw $15, %k1, %k1
 ; AVX512DQNOBW-NEXT:    kshiftrw $5, %k1, %k1
@@ -2436,7 +2436,7 @@ define <64 x i16> @test21(<64 x i16> %x , <64 x i1> %mask) nounwind readnone {
 ; AVX512DQNOBW-NEXT:    movw $-2049, %ax # imm = 0xF7FF
 ; AVX512DQNOBW-NEXT:    kmovw %eax, %k5
 ; AVX512DQNOBW-NEXT:    kandw %k5, %k0, %k0
-; AVX512DQNOBW-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax
+; AVX512DQNOBW-NEXT:    movb {{[0-9]+}}(%rsp), %al
 ; AVX512DQNOBW-NEXT:    kmovw %eax, %k1
 ; AVX512DQNOBW-NEXT:    kshiftlw $15, %k1, %k1
 ; AVX512DQNOBW-NEXT:    kshiftrw $4, %k1, %k1
@@ -2445,7 +2445,7 @@ define <64 x i16> @test21(<64 x i16> %x , <64 x i1> %mask) nounwind readnone {
 ; AVX512DQNOBW-NEXT:    kmovw %eax, %k1
 ; AVX512DQNOBW-NEXT:    kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill
 ; AVX512DQNOBW-NEXT:    kandw %k1, %k0, %k0
-; AVX512DQNOBW-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax
+; AVX512DQNOBW-NEXT:    movb {{[0-9]+}}(%rsp), %al
 ; AVX512DQNOBW-NEXT:    kmovw %eax, %k1
 ; AVX512DQNOBW-NEXT:    kshiftlw $15, %k1, %k1
 ; AVX512DQNOBW-NEXT:    kshiftrw $3, %k1, %k1
@@ -2454,7 +2454,7 @@ define <64 x i16> @test21(<64 x i16> %x , <64 x i1> %mask) nounwind readnone {
 ; AVX512DQNOBW-NEXT:    kmovw %eax, %k1
 ; AVX512DQNOBW-NEXT:    kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill
 ; AVX512DQNOBW-NEXT:    kandw %k1, %k0, %k0
-; AVX512DQNOBW-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax
+; AVX512DQNOBW-NEXT:    movb {{[0-9]+}}(%rsp), %al
 ; AVX512DQNOBW-NEXT:    kmovw %eax, %k1
 ; AVX512DQNOBW-NEXT:    kshiftlw $15, %k1, %k1
 ; AVX512DQNOBW-NEXT:    kshiftrw $2, %k1, %k1
@@ -2463,13 +2463,13 @@ define <64 x i16> @test21(<64 x i16> %x , <64 x i1> %mask) nounwind readnone {
 ; AVX512DQNOBW-NEXT:    kmovw %eax, %k1
 ; AVX512DQNOBW-NEXT:    kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill
 ; AVX512DQNOBW-NEXT:    kandw %k1, %k0, %k0
-; AVX512DQNOBW-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax
+; AVX512DQNOBW-NEXT:    movb {{[0-9]+}}(%rsp), %al
 ; AVX512DQNOBW-NEXT:    kmovw %eax, %k6
 ; AVX512DQNOBW-NEXT:    kshiftlw $14, %k6, %k6
 ; AVX512DQNOBW-NEXT:    korw %k6, %k0, %k0
 ; AVX512DQNOBW-NEXT:    kshiftlw $1, %k0, %k0
 ; AVX512DQNOBW-NEXT:    kshiftrw $1, %k0, %k0
-; AVX512DQNOBW-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax
+; AVX512DQNOBW-NEXT:    movb {{[0-9]+}}(%rsp), %al
 ; AVX512DQNOBW-NEXT:    kmovw %eax, %k6
 ; AVX512DQNOBW-NEXT:    kshiftlw $15, %k6, %k6
 ; AVX512DQNOBW-NEXT:    korw %k6, %k0, %k0
@@ -2504,74 +2504,74 @@ define <64 x i16> @test21(<64 x i16> %x , <64 x i1> %mask) nounwind readnone {
 ; AVX512DQNOBW-NEXT:    korw %k6, %k0, %k0
 ; AVX512DQNOBW-NEXT:    kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k2 # 2-byte Reload
 ; AVX512DQNOBW-NEXT:    kandw %k2, %k0, %k0
-; AVX512DQNOBW-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax
+; AVX512DQNOBW-NEXT:    movb {{[0-9]+}}(%rsp), %al
 ; AVX512DQNOBW-NEXT:    kmovw %eax, %k6
 ; AVX512DQNOBW-NEXT:    kshiftlw $15, %k6, %k6
 ; AVX512DQNOBW-NEXT:    kshiftrw $9, %k6, %k6
 ; AVX512DQNOBW-NEXT:    korw %k6, %k0, %k0
 ; AVX512DQNOBW-NEXT:    kandw %k3, %k0, %k0
-; AVX512DQNOBW-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax
+; AVX512DQNOBW-NEXT:    movb {{[0-9]+}}(%rsp), %al
 ; AVX512DQNOBW-NEXT:    kmovw %eax, %k6
 ; AVX512DQNOBW-NEXT:    kshiftlw $15, %k6, %k6
 ; AVX512DQNOBW-NEXT:    kshiftrw $8, %k6, %k6
 ; AVX512DQNOBW-NEXT:    korw %k6, %k0, %k0
 ; AVX512DQNOBW-NEXT:    kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k3 # 2-byte Reload
 ; AVX512DQNOBW-NEXT:    kandw %k3, %k0, %k0
-; AVX512DQNOBW-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax
+; AVX512DQNOBW-NEXT:    movb {{[0-9]+}}(%rsp), %al
 ; AVX512DQNOBW-NEXT:    kmovw %eax, %k6
 ; AVX512DQNOBW-NEXT:    kshiftlw $15, %k6, %k6
 ; AVX512DQNOBW-NEXT:    kshiftrw $7, %k6, %k6
 ; AVX512DQNOBW-NEXT:    korw %k6, %k0, %k0
 ; AVX512DQNOBW-NEXT:    kandw %k4, %k0, %k0
-; AVX512DQNOBW-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax
+; AVX512DQNOBW-NEXT:    movb {{[0-9]+}}(%rsp), %al
 ; AVX512DQNOBW-NEXT:    kmovw %eax, %k6
 ; AVX512DQNOBW-NEXT:    kshiftlw $15, %k6, %k6
 ; AVX512DQNOBW-NEXT:    kshiftrw $6, %k6, %k6
 ; AVX512DQNOBW-NEXT:    korw %k6, %k0, %k0
 ; AVX512DQNOBW-NEXT:    kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k4 # 2-byte Reload
 ; AVX512DQNOBW-NEXT:    kandw %k4, %k0, %k0
-; AVX512DQNOBW-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax
+; AVX512DQNOBW-NEXT:    movb {{[0-9]+}}(%rsp), %al
 ; AVX512DQNOBW-NEXT:    kmovw %eax, %k6
 ; AVX512DQNOBW-NEXT:    kshiftlw $15, %k6, %k6
 ; AVX512DQNOBW-NEXT:    kshiftrw $5, %k6, %k6
 ; AVX512DQNOBW-NEXT:    korw %k6, %k0, %k0
 ; AVX512DQNOBW-NEXT:    kmovw %k5, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill
 ; AVX512DQNOBW-NEXT:    kandw %k5, %k0, %k0
-; AVX512DQNOBW-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax
+; AVX512DQNOBW-NEXT:    movb {{[0-9]+}}(%rsp), %al
 ; AVX512DQNOBW-NEXT:    kmovw %eax, %k6
 ; AVX512DQNOBW-NEXT:    kshiftlw $15, %k6, %k6
 ; AVX512DQNOBW-NEXT:    kshiftrw $4, %k6, %k6
 ; AVX512DQNOBW-NEXT:    korw %k6, %k0, %k0
 ; AVX512DQNOBW-NEXT:    kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k6 # 2-byte Reload
 ; AVX512DQNOBW-NEXT:    kandw %k6, %k0, %k0
-; AVX512DQNOBW-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax
+; AVX512DQNOBW-NEXT:    movb {{[0-9]+}}(%rsp), %al
 ; AVX512DQNOBW-NEXT:    kmovw %eax, %k6
 ; AVX512DQNOBW-NEXT:    kshiftlw $15, %k6, %k6
 ; AVX512DQNOBW-NEXT:    kshiftrw $3, %k6, %k6
 ; AVX512DQNOBW-NEXT:    korw %k6, %k0, %k0
 ; AVX512DQNOBW-NEXT:    kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k6 # 2-byte Reload
 ; AVX512DQNOBW-NEXT:    kandw %k6, %k0, %k0
-; AVX512DQNOBW-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax
+; AVX512DQNOBW-NEXT:    movb {{[0-9]+}}(%rsp), %al
 ; AVX512DQNOBW-NEXT:    kmovw %eax, %k6
 ; AVX512DQNOBW-NEXT:    kshiftlw $15, %k6, %k6
 ; AVX512DQNOBW-NEXT:    kshiftrw $2, %k6, %k6
 ; AVX512DQNOBW-NEXT:    korw %k6, %k0, %k0
 ; AVX512DQNOBW-NEXT:    kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k6 # 2-byte Reload
 ; AVX512DQNOBW-NEXT:    kandw %k6, %k0, %k0
-; AVX512DQNOBW-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax
+; AVX512DQNOBW-NEXT:    movb {{[0-9]+}}(%rsp), %al
 ; AVX512DQNOBW-NEXT:    kmovw %eax, %k6
 ; AVX512DQNOBW-NEXT:    kshiftlw $14, %k6, %k6
 ; AVX512DQNOBW-NEXT:    korw %k6, %k0, %k0
 ; AVX512DQNOBW-NEXT:    kshiftlw $1, %k0, %k0
 ; AVX512DQNOBW-NEXT:    kshiftrw $1, %k0, %k0
-; AVX512DQNOBW-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax
+; AVX512DQNOBW-NEXT:    movb {{[0-9]+}}(%rsp), %al
 ; AVX512DQNOBW-NEXT:    kmovw %eax, %k6
 ; AVX512DQNOBW-NEXT:    kshiftlw $15, %k6, %k6
 ; AVX512DQNOBW-NEXT:    korw %k6, %k0, %k0
 ; AVX512DQNOBW-NEXT:    kmovw %k0, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill
-; AVX512DQNOBW-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax
+; AVX512DQNOBW-NEXT:    movb {{[0-9]+}}(%rsp), %al
 ; AVX512DQNOBW-NEXT:    andl $1, %eax
-; AVX512DQNOBW-NEXT:    movzbl {{[0-9]+}}(%rsp), %ecx
+; AVX512DQNOBW-NEXT:    movb {{[0-9]+}}(%rsp), %cl
 ; AVX512DQNOBW-NEXT:    kmovw %ecx, %k0
 ; AVX512DQNOBW-NEXT:    kshiftlw $15, %k0, %k0
 ; AVX512DQNOBW-NEXT:    kshiftrw $14, %k0, %k0
@@ -2579,97 +2579,97 @@ define <64 x i16> @test21(<64 x i16> %x , <64 x i1> %mask) nounwind readnone {
 ; AVX512DQNOBW-NEXT:    korw %k0, %k6, %k0
 ; AVX512DQNOBW-NEXT:    kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k6 # 2-byte Reload
 ; AVX512DQNOBW-NEXT:    kandw %k6, %k0, %k0
-; AVX512DQNOBW-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax
+; AVX512DQNOBW-NEXT:    movb {{[0-9]+}}(%rsp), %al
 ; AVX512DQNOBW-NEXT:    kmovw %eax, %k6
 ; AVX512DQNOBW-NEXT:    kshiftlw $15, %k6, %k6
 ; AVX512DQNOBW-NEXT:    kshiftrw $13, %k6, %k6
 ; AVX512DQNOBW-NEXT:    korw %k6, %k0, %k0
 ; AVX512DQNOBW-NEXT:    kandw %k7, %k0, %k0
-; AVX512DQNOBW-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax
+; AVX512DQNOBW-NEXT:    movb {{[0-9]+}}(%rsp), %al
 ; AVX512DQNOBW-NEXT:    kmovw %eax, %k6
 ; AVX512DQNOBW-NEXT:    kshiftlw $15, %k6, %k6
 ; AVX512DQNOBW-NEXT:    kshiftrw $12, %k6, %k6
 ; AVX512DQNOBW-NEXT:    korw %k6, %k0, %k0
 ; AVX512DQNOBW-NEXT:    kandw %k1, %k0, %k0
-; AVX512DQNOBW-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax
+; AVX512DQNOBW-NEXT:    movb {{[0-9]+}}(%rsp), %al
 ; AVX512DQNOBW-NEXT:    kmovw %eax, %k6
 ; AVX512DQNOBW-NEXT:    kshiftlw $15, %k6, %k6
 ; AVX512DQNOBW-NEXT:    kshiftrw $11, %k6, %k6
 ; AVX512DQNOBW-NEXT:    korw %k6, %k0, %k0
 ; AVX512DQNOBW-NEXT:    kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k1 # 2-byte Reload
 ; AVX512DQNOBW-NEXT:    kandw %k1, %k0, %k0
-; AVX512DQNOBW-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax
+; AVX512DQNOBW-NEXT:    movb {{[0-9]+}}(%rsp), %al
 ; AVX512DQNOBW-NEXT:    kmovw %eax, %k6
 ; AVX512DQNOBW-NEXT:    kshiftlw $15, %k6, %k6
 ; AVX512DQNOBW-NEXT:    kshiftrw $10, %k6, %k6
 ; AVX512DQNOBW-NEXT:    korw %k6, %k0, %k0
 ; AVX512DQNOBW-NEXT:    kandw %k2, %k0, %k0
-; AVX512DQNOBW-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax
+; AVX512DQNOBW-NEXT:    movb {{[0-9]+}}(%rsp), %al
 ; AVX512DQNOBW-NEXT:    kmovw %eax, %k6
 ; AVX512DQNOBW-NEXT:    kshiftlw $15, %k6, %k6
 ; AVX512DQNOBW-NEXT:    kshiftrw $9, %k6, %k6
 ; AVX512DQNOBW-NEXT:    korw %k6, %k0, %k0
 ; AVX512DQNOBW-NEXT:    kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k1 # 2-byte Reload
 ; AVX512DQNOBW-NEXT:    kandw %k1, %k0, %k0
-; AVX512DQNOBW-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax
+; AVX512DQNOBW-NEXT:    movb {{[0-9]+}}(%rsp), %al
 ; AVX512DQNOBW-NEXT:    kmovw %eax, %k6
 ; AVX512DQNOBW-NEXT:    kshiftlw $15, %k6, %k6
 ; AVX512DQNOBW-NEXT:    kshiftrw $8, %k6, %k6
 ; AVX512DQNOBW-NEXT:    korw %k6, %k0, %k0
 ; AVX512DQNOBW-NEXT:    kandw %k3, %k0, %k0
-; AVX512DQNOBW-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax
+; AVX512DQNOBW-NEXT:    movb {{[0-9]+}}(%rsp), %al
 ; AVX512DQNOBW-NEXT:    kmovw %eax, %k6
 ; AVX512DQNOBW-NEXT:    kshiftlw $15, %k6, %k6
 ; AVX512DQNOBW-NEXT:    kshiftrw $7, %k6, %k6
 ; AVX512DQNOBW-NEXT:    korw %k6, %k0, %k0
 ; AVX512DQNOBW-NEXT:    kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k3 # 2-byte Reload
 ; AVX512DQNOBW-NEXT:    kandw %k3, %k0, %k0
-; AVX512DQNOBW-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax
+; AVX512DQNOBW-NEXT:    movb {{[0-9]+}}(%rsp), %al
 ; AVX512DQNOBW-NEXT:    kmovw %eax, %k6
 ; AVX512DQNOBW-NEXT:    kshiftlw $15, %k6, %k6
 ; AVX512DQNOBW-NEXT:    kshiftrw $6, %k6, %k6
 ; AVX512DQNOBW-NEXT:    korw %k6, %k0, %k0
 ; AVX512DQNOBW-NEXT:    kandw %k4, %k0, %k0
-; AVX512DQNOBW-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax
+; AVX512DQNOBW-NEXT:    movb {{[0-9]+}}(%rsp), %al
 ; AVX512DQNOBW-NEXT:    kmovw %eax, %k6
 ; AVX512DQNOBW-NEXT:    kshiftlw $15, %k6, %k6
 ; AVX512DQNOBW-NEXT:    kshiftrw $5, %k6, %k6
 ; AVX512DQNOBW-NEXT:    korw %k6, %k0, %k0
 ; AVX512DQNOBW-NEXT:    kandw %k5, %k0, %k0
-; AVX512DQNOBW-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax
+; AVX512DQNOBW-NEXT:    movb {{[0-9]+}}(%rsp), %al
 ; AVX512DQNOBW-NEXT:    kmovw %eax, %k6
 ; AVX512DQNOBW-NEXT:    kshiftlw $15, %k6, %k6
 ; AVX512DQNOBW-NEXT:    kshiftrw $4, %k6, %k6
 ; AVX512DQNOBW-NEXT:    korw %k6, %k0, %k0
 ; AVX512DQNOBW-NEXT:    kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k1 # 2-byte Reload
 ; AVX512DQNOBW-NEXT:    kandw %k1, %k0, %k0
-; AVX512DQNOBW-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax
+; AVX512DQNOBW-NEXT:    movb {{[0-9]+}}(%rsp), %al
 ; AVX512DQNOBW-NEXT:    kmovw %eax, %k6
 ; AVX512DQNOBW-NEXT:    kshiftlw $15, %k6, %k6
 ; AVX512DQNOBW-NEXT:    kshiftrw $3, %k6, %k6
 ; AVX512DQNOBW-NEXT:    korw %k6, %k0, %k0
 ; AVX512DQNOBW-NEXT:    kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k2 # 2-byte Reload
 ; AVX512DQNOBW-NEXT:    kandw %k2, %k0, %k0
-; AVX512DQNOBW-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax
+; AVX512DQNOBW-NEXT:    movb {{[0-9]+}}(%rsp), %al
 ; AVX512DQNOBW-NEXT:    kmovw %eax, %k6
 ; AVX512DQNOBW-NEXT:    kshiftlw $15, %k6, %k6
 ; AVX512DQNOBW-NEXT:    kshiftrw $2, %k6, %k6
 ; AVX512DQNOBW-NEXT:    korw %k6, %k0, %k0
 ; AVX512DQNOBW-NEXT:    kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k5 # 2-byte Reload
 ; AVX512DQNOBW-NEXT:    kandw %k5, %k0, %k0
-; AVX512DQNOBW-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax
+; AVX512DQNOBW-NEXT:    movb {{[0-9]+}}(%rsp), %al
 ; AVX512DQNOBW-NEXT:    kmovw %eax, %k6
 ; AVX512DQNOBW-NEXT:    kshiftlw $14, %k6, %k6
 ; AVX512DQNOBW-NEXT:    korw %k6, %k0, %k0
 ; AVX512DQNOBW-NEXT:    kshiftlw $1, %k0, %k0
 ; AVX512DQNOBW-NEXT:    kshiftrw $1, %k0, %k0
-; AVX512DQNOBW-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax
+; AVX512DQNOBW-NEXT:    movb {{[0-9]+}}(%rsp), %al
 ; AVX512DQNOBW-NEXT:    kmovw %eax, %k6
 ; AVX512DQNOBW-NEXT:    kshiftlw $15, %k6, %k6
 ; AVX512DQNOBW-NEXT:    korw %k6, %k0, %k0
-; AVX512DQNOBW-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax
+; AVX512DQNOBW-NEXT:    movb {{[0-9]+}}(%rsp), %al
 ; AVX512DQNOBW-NEXT:    andl $1, %eax
-; AVX512DQNOBW-NEXT:    movzbl {{[0-9]+}}(%rsp), %ecx
+; AVX512DQNOBW-NEXT:    movb {{[0-9]+}}(%rsp), %cl
 ; AVX512DQNOBW-NEXT:    kmovw %ecx, %k6
 ; AVX512DQNOBW-NEXT:    kshiftlw $15, %k6, %k6
 ; AVX512DQNOBW-NEXT:    kshiftrw $14, %k6, %k6
@@ -2677,93 +2677,93 @@ define <64 x i16> @test21(<64 x i16> %x , <64 x i1> %mask) nounwind readnone {
 ; AVX512DQNOBW-NEXT:    korw %k6, %k7, %k6
 ; AVX512DQNOBW-NEXT:    kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k5 # 2-byte Reload
 ; AVX512DQNOBW-NEXT:    kandw %k5, %k6, %k6
-; AVX512DQNOBW-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax
+; AVX512DQNOBW-NEXT:    movb {{[0-9]+}}(%rsp), %al
 ; AVX512DQNOBW-NEXT:    kmovw %eax, %k7
 ; AVX512DQNOBW-NEXT:    kshiftlw $15, %k7, %k7
 ; AVX512DQNOBW-NEXT:    kshiftrw $13, %k7, %k7
 ; AVX512DQNOBW-NEXT:    korw %k7, %k6, %k6
 ; AVX512DQNOBW-NEXT:    kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k5 # 2-byte Reload
 ; AVX512DQNOBW-NEXT:    kandw %k5, %k6, %k6
-; AVX512DQNOBW-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax
+; AVX512DQNOBW-NEXT:    movb {{[0-9]+}}(%rsp), %al
 ; AVX512DQNOBW-NEXT:    kmovw %eax, %k7
 ; AVX512DQNOBW-NEXT:    kshiftlw $15, %k7, %k7
 ; AVX512DQNOBW-NEXT:    kshiftrw $12, %k7, %k7
 ; AVX512DQNOBW-NEXT:    korw %k7, %k6, %k6
 ; AVX512DQNOBW-NEXT:    kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k5 # 2-byte Reload
 ; AVX512DQNOBW-NEXT:    kandw %k5, %k6, %k6
-; AVX512DQNOBW-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax
+; AVX512DQNOBW-NEXT:    movb {{[0-9]+}}(%rsp), %al
 ; AVX512DQNOBW-NEXT:    kmovw %eax, %k7
 ; AVX512DQNOBW-NEXT:    kshiftlw $15, %k7, %k7
 ; AVX512DQNOBW-NEXT:    kshiftrw $11, %k7, %k7
 ; AVX512DQNOBW-NEXT:    korw %k7, %k6, %k6
 ; AVX512DQNOBW-NEXT:    kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k5 # 2-byte Reload
 ; AVX512DQNOBW-NEXT:    kandw %k5, %k6, %k6
-; AVX512DQNOBW-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax
+; AVX512DQNOBW-NEXT:    movb {{[0-9]+}}(%rsp), %al
 ; AVX512DQNOBW-NEXT:    kmovw %eax, %k7
 ; AVX512DQNOBW-NEXT:    kshiftlw $15, %k7, %k7
 ; AVX512DQNOBW-NEXT:    kshiftrw $10, %k7, %k7
 ; AVX512DQNOBW-NEXT:    korw %k7, %k6, %k6
 ; AVX512DQNOBW-NEXT:    kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k5 # 2-byte Reload
 ; AVX512DQNOBW-NEXT:    kandw %k5, %k6, %k6
-; AVX512DQNOBW-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax
+; AVX512DQNOBW-NEXT:    movb {{[0-9]+}}(%rsp), %al
 ; AVX512DQNOBW-NEXT:    kmovw %eax, %k7
 ; AVX512DQNOBW-NEXT:    kshiftlw $15, %k7, %k7
 ; AVX512DQNOBW-NEXT:    kshiftrw $9, %k7, %k7
 ; AVX512DQNOBW-NEXT:    korw %k7, %k6, %k6
 ; AVX512DQNOBW-NEXT:    kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k5 # 2-byte Reload
 ; AVX512DQNOBW-NEXT:    kandw %k5, %k6, %k6
-; AVX512DQNOBW-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax
+; AVX512DQNOBW-NEXT:    movb {{[0-9]+}}(%rsp), %al
 ; AVX512DQNOBW-NEXT:    kmovw %eax, %k7
 ; AVX512DQNOBW-NEXT:    kshiftlw $15, %k7, %k7
 ; AVX512DQNOBW-NEXT:    kshiftrw $8, %k7, %k7
 ; AVX512DQNOBW-NEXT:    korw %k7, %k6, %k6
 ; AVX512DQNOBW-NEXT:    kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k5 # 2-byte Reload
 ; AVX512DQNOBW-NEXT:    kandw %k5, %k6, %k6
-; AVX512DQNOBW-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax
+; AVX512DQNOBW-NEXT:    movb {{[0-9]+}}(%rsp), %al
 ; AVX512DQNOBW-NEXT:    kmovw %eax, %k7
 ; AVX512DQNOBW-NEXT:    kshiftlw $15, %k7, %k7
 ; AVX512DQNOBW-NEXT:    kshiftrw $7, %k7, %k7
 ; AVX512DQNOBW-NEXT:    korw %k7, %k6, %k6
 ; AVX512DQNOBW-NEXT:    kandw %k3, %k6, %k6
-; AVX512DQNOBW-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax
+; AVX512DQNOBW-NEXT:    movb {{[0-9]+}}(%rsp), %al
 ; AVX512DQNOBW-NEXT:    kmovw %eax, %k7
 ; AVX512DQNOBW-NEXT:    kshiftlw $15, %k7, %k7
 ; AVX512DQNOBW-NEXT:    kshiftrw $6, %k7, %k7
 ; AVX512DQNOBW-NEXT:    korw %k7, %k6, %k6
 ; AVX512DQNOBW-NEXT:    kandw %k4, %k6, %k5
-; AVX512DQNOBW-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax
+; AVX512DQNOBW-NEXT:    movb {{[0-9]+}}(%rsp), %al
 ; AVX512DQNOBW-NEXT:    kmovw %eax, %k6
 ; AVX512DQNOBW-NEXT:    kshiftlw $15, %k6, %k6
 ; AVX512DQNOBW-NEXT:    kshiftrw $5, %k6, %k6
 ; AVX512DQNOBW-NEXT:    korw %k6, %k5, %k5
 ; AVX512DQNOBW-NEXT:    kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k3 # 2-byte Reload
 ; AVX512DQNOBW-NEXT:    kandw %k3, %k5, %k4
-; AVX512DQNOBW-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax
+; AVX512DQNOBW-NEXT:    movb {{[0-9]+}}(%rsp), %al
 ; AVX512DQNOBW-NEXT:    kmovw %eax, %k5
 ; AVX512DQNOBW-NEXT:    kshiftlw $15, %k5, %k5
 ; AVX512DQNOBW-NEXT:    kshiftrw $4, %k5, %k5
 ; AVX512DQNOBW-NEXT:    korw %k5, %k4, %k4
 ; AVX512DQNOBW-NEXT:    kandw %k1, %k4, %k3
-; AVX512DQNOBW-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax
+; AVX512DQNOBW-NEXT:    movb {{[0-9]+}}(%rsp), %al
 ; AVX512DQNOBW-NEXT:    kmovw %eax, %k4
 ; AVX512DQNOBW-NEXT:    kshiftlw $15, %k4, %k4
 ; AVX512DQNOBW-NEXT:    kshiftrw $3, %k4, %k4
 ; AVX512DQNOBW-NEXT:    korw %k4, %k3, %k3
 ; AVX512DQNOBW-NEXT:    kandw %k2, %k3, %k2
-; AVX512DQNOBW-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax
+; AVX512DQNOBW-NEXT:    movb {{[0-9]+}}(%rsp), %al
 ; AVX512DQNOBW-NEXT:    kmovw %eax, %k3
 ; AVX512DQNOBW-NEXT:    kshiftlw $15, %k3, %k3
 ; AVX512DQNOBW-NEXT:    kshiftrw $2, %k3, %k3
 ; AVX512DQNOBW-NEXT:    korw %k3, %k2, %k2
 ; AVX512DQNOBW-NEXT:    kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k1 # 2-byte Reload
 ; AVX512DQNOBW-NEXT:    kandw %k1, %k2, %k1
-; AVX512DQNOBW-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax
+; AVX512DQNOBW-NEXT:    movb {{[0-9]+}}(%rsp), %al
 ; AVX512DQNOBW-NEXT:    kmovw %eax, %k2
 ; AVX512DQNOBW-NEXT:    kshiftlw $14, %k2, %k2
 ; AVX512DQNOBW-NEXT:    korw %k2, %k1, %k1
 ; AVX512DQNOBW-NEXT:    kshiftlw $1, %k1, %k1
 ; AVX512DQNOBW-NEXT:    kshiftrw $1, %k1, %k1
-; AVX512DQNOBW-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax
+; AVX512DQNOBW-NEXT:    movb {{[0-9]+}}(%rsp), %al
 ; AVX512DQNOBW-NEXT:    kmovw %eax, %k2
 ; AVX512DQNOBW-NEXT:    kshiftlw $15, %k2, %k2
 ; AVX512DQNOBW-NEXT:    korw %k2, %k1, %k1

diff  --git a/llvm/test/CodeGen/X86/avx512-extract-subvector-load-store.ll b/llvm/test/CodeGen/X86/avx512-extract-subvector-load-store.ll
index e43b2f4b4abc4..cc34b3841d3f8 100644
--- a/llvm/test/CodeGen/X86/avx512-extract-subvector-load-store.ll
+++ b/llvm/test/CodeGen/X86/avx512-extract-subvector-load-store.ll
@@ -696,7 +696,7 @@ define void @load_v2i1_broadcast_1_v1i1_store(ptr %a0,ptr %a1) {
 define void @load_v3i1_broadcast_1_v1i1_store(ptr %a0,ptr %a1) {
 ; AVX512-LABEL: load_v3i1_broadcast_1_v1i1_store:
 ; AVX512:       # %bb.0:
-; AVX512-NEXT:    movzbl (%rdi), %eax
+; AVX512-NEXT:    movb (%rdi), %al
 ; AVX512-NEXT:    shrb %al
 ; AVX512-NEXT:    xorl %ecx, %ecx
 ; AVX512-NEXT:    testb $1, %al
@@ -711,7 +711,7 @@ define void @load_v3i1_broadcast_1_v1i1_store(ptr %a0,ptr %a1) {
 ;
 ; AVX512NOTDQ-LABEL: load_v3i1_broadcast_1_v1i1_store:
 ; AVX512NOTDQ:       # %bb.0:
-; AVX512NOTDQ-NEXT:    movzbl (%rdi), %eax
+; AVX512NOTDQ-NEXT:    movb (%rdi), %al
 ; AVX512NOTDQ-NEXT:    shrb %al
 ; AVX512NOTDQ-NEXT:    xorl %ecx, %ecx
 ; AVX512NOTDQ-NEXT:    testb $1, %al

diff  --git a/llvm/test/CodeGen/X86/avx512-insert-extract.ll b/llvm/test/CodeGen/X86/avx512-insert-extract.ll
index 51e704ba303be..c40ce9ad2d95a 100644
--- a/llvm/test/CodeGen/X86/avx512-insert-extract.ll
+++ b/llvm/test/CodeGen/X86/avx512-insert-extract.ll
@@ -308,7 +308,7 @@ define i16 @test15(ptr%addr) {
 define i16 @test16(ptr%addr, i16 %a) {
 ; KNL-LABEL: test16:
 ; KNL:       ## %bb.0:
-; KNL-NEXT:    movzbl (%rdi), %eax
+; KNL-NEXT:    movb (%rdi), %al
 ; KNL-NEXT:    kmovw %esi, %k0
 ; KNL-NEXT:    movw $-1025, %cx ## imm = 0xFBFF
 ; KNL-NEXT:    kmovw %ecx, %k1
@@ -344,7 +344,7 @@ define i16 @test16(ptr%addr, i16 %a) {
 define i8 @test17(ptr%addr, i8 %a) {
 ; KNL-LABEL: test17:
 ; KNL:       ## %bb.0:
-; KNL-NEXT:    movzbl (%rdi), %eax
+; KNL-NEXT:    movb (%rdi), %al
 ; KNL-NEXT:    kmovw %esi, %k0
 ; KNL-NEXT:    movw $-17, %cx
 ; KNL-NEXT:    kmovw %ecx, %k1
@@ -1429,7 +1429,7 @@ define i8 @test_extractelement_variable_v16i8(<16 x i8> %t1, i32 %index) {
 ; CHECK-NEXT:    ## kill: def $edi killed $edi def $rdi
 ; CHECK-NEXT:    vmovaps %xmm0, -{{[0-9]+}}(%rsp)
 ; CHECK-NEXT:    andl $15, %edi
-; CHECK-NEXT:    movzbl -24(%rsp,%rdi), %eax
+; CHECK-NEXT:    movb -24(%rsp,%rdi), %al
 ; CHECK-NEXT:    retq
   %t2 = extractelement <16 x i8> %t1, i32 %index
   ret i8 %t2
@@ -1448,7 +1448,7 @@ define i8 @test_extractelement_variable_v32i8(<32 x i8> %t1, i32 %index) {
 ; CHECK-NEXT:    ## kill: def $edi killed $edi def $rdi
 ; CHECK-NEXT:    vmovaps %ymm0, (%rsp)
 ; CHECK-NEXT:    andl $31, %edi
-; CHECK-NEXT:    movzbl (%rsp,%rdi), %eax
+; CHECK-NEXT:    movb (%rsp,%rdi), %al
 ; CHECK-NEXT:    movq %rbp, %rsp
 ; CHECK-NEXT:    popq %rbp
 ; CHECK-NEXT:    vzeroupper
@@ -1471,7 +1471,7 @@ define i8 @test_extractelement_variable_v64i8(<64 x i8> %t1, i32 %index) {
 ; CHECK-NEXT:    ## kill: def $edi killed $edi def $rdi
 ; CHECK-NEXT:    vmovaps %zmm0, (%rsp)
 ; CHECK-NEXT:    andl $63, %edi
-; CHECK-NEXT:    movzbl (%rsp,%rdi), %eax
+; CHECK-NEXT:    movb (%rsp,%rdi), %al
 ; CHECK-NEXT:    movq %rbp, %rsp
 ; CHECK-NEXT:    popq %rbp
 ; CHECK-NEXT:    vzeroupper
@@ -1495,7 +1495,7 @@ define i8 @test_extractelement_variable_v64i8_indexi8(<64 x i8> %t1, i8 %index)
 ; CHECK-NEXT:    vmovaps %zmm0, (%rsp)
 ; CHECK-NEXT:    movzbl %dil, %eax
 ; CHECK-NEXT:    andl $63, %eax
-; CHECK-NEXT:    movzbl (%rsp,%rax), %eax
+; CHECK-NEXT:    movb (%rsp,%rax), %al
 ; CHECK-NEXT:    movq %rbp, %rsp
 ; CHECK-NEXT:    popq %rbp
 ; CHECK-NEXT:    vzeroupper

diff  --git a/llvm/test/CodeGen/X86/avx512-intrinsics-canonical.ll b/llvm/test/CodeGen/X86/avx512-intrinsics-canonical.ll
index a84c32aeead4a..3e9225315228f 100644
--- a/llvm/test/CodeGen/X86/avx512-intrinsics-canonical.ll
+++ b/llvm/test/CodeGen/X86/avx512-intrinsics-canonical.ll
@@ -1911,7 +1911,7 @@ entry:
 define <4 x float> @test_mm_mask_fmadd_ss(<4 x float> %__W, i8 zeroext %__U, <4 x float> %__A, <4 x float> %__B) {
 ; X86-LABEL: test_mm_mask_fmadd_ss:
 ; X86:       ## %bb.0: ## %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al ## encoding: [0x8a,0x44,0x24,0x04]
 ; X86-NEXT:    kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
 ; X86-NEXT:    vfmadd213ss %xmm2, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x09,0xa9,0xc2]
 ; X86-NEXT:    ## xmm0 {%k1} = (xmm1 * xmm0) + xmm2
@@ -1939,7 +1939,7 @@ entry:
 define <4 x float> @test_mm_mask_fmadd_round_ss(<4 x float> %__W, i8 zeroext %__U, <4 x float> %__A, <4 x float> %__B) {
 ; X86-LABEL: test_mm_mask_fmadd_round_ss:
 ; X86:       ## %bb.0: ## %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al ## encoding: [0x8a,0x44,0x24,0x04]
 ; X86-NEXT:    kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
 ; X86-NEXT:    vfmadd213ss %xmm2, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x09,0xa9,0xc2]
 ; X86-NEXT:    ## xmm0 {%k1} = (xmm1 * xmm0) + xmm2
@@ -1961,7 +1961,7 @@ declare <4 x float> @llvm.x86.avx512.mask.vfmadd.ss(<4 x float>, <4 x float>, <4
 define <4 x float> @test_mm_maskz_fmadd_ss(i8 zeroext %__U, <4 x float> %__A, <4 x float> %__B, <4 x float> %__C) {
 ; X86-LABEL: test_mm_maskz_fmadd_ss:
 ; X86:       ## %bb.0: ## %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al ## encoding: [0x8a,0x44,0x24,0x04]
 ; X86-NEXT:    kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
 ; X86-NEXT:    vfmadd213ss %xmm2, %xmm1, %xmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x75,0x89,0xa9,0xc2]
 ; X86-NEXT:    ## xmm0 {%k1} {z} = (xmm1 * xmm0) + xmm2
@@ -1988,7 +1988,7 @@ entry:
 define <4 x float> @test_mm_maskz_fmadd_round_ss(i8 zeroext %__U, <4 x float> %__A, <4 x float> %__B, <4 x float> %__C) {
 ; X86-LABEL: test_mm_maskz_fmadd_round_ss:
 ; X86:       ## %bb.0: ## %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al ## encoding: [0x8a,0x44,0x24,0x04]
 ; X86-NEXT:    kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
 ; X86-NEXT:    vfmadd213ss %xmm2, %xmm1, %xmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x75,0x89,0xa9,0xc2]
 ; X86-NEXT:    ## xmm0 {%k1} {z} = (xmm1 * xmm0) + xmm2
@@ -2010,7 +2010,7 @@ declare <4 x float> @llvm.x86.avx512.maskz.vfmadd.ss(<4 x float>, <4 x float>, <
 define <4 x float> @test_mm_mask3_fmadd_ss(<4 x float> %__W, <4 x float> %__X, <4 x float> %__Y, i8 zeroext %__U) {
 ; X86-LABEL: test_mm_mask3_fmadd_ss:
 ; X86:       ## %bb.0: ## %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al ## encoding: [0x8a,0x44,0x24,0x04]
 ; X86-NEXT:    kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
 ; X86-NEXT:    vfmadd231ss %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf2,0x7d,0x09,0xb9,0xd1]
 ; X86-NEXT:    ## xmm2 {%k1} = (xmm0 * xmm1) + xmm2
@@ -2040,7 +2040,7 @@ entry:
 define <4 x float> @test_mm_mask3_fmadd_round_ss(<4 x float> %__W, <4 x float> %__X, <4 x float> %__Y, i8 zeroext %__U) {
 ; X86-LABEL: test_mm_mask3_fmadd_round_ss:
 ; X86:       ## %bb.0: ## %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al ## encoding: [0x8a,0x44,0x24,0x04]
 ; X86-NEXT:    kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
 ; X86-NEXT:    vfmadd231ss %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf2,0x7d,0x09,0xb9,0xd1]
 ; X86-NEXT:    ## xmm2 {%k1} = (xmm0 * xmm1) + xmm2
@@ -2064,7 +2064,7 @@ declare <4 x float> @llvm.x86.avx512.mask3.vfmadd.ss(<4 x float>, <4 x float>, <
 define <4 x float> @test_mm_mask_fmsub_ss(<4 x float> %__W, i8 zeroext %__U, <4 x float> %__A, <4 x float> %__B) {
 ; X86-LABEL: test_mm_mask_fmsub_ss:
 ; X86:       ## %bb.0: ## %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al ## encoding: [0x8a,0x44,0x24,0x04]
 ; X86-NEXT:    kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
 ; X86-NEXT:    vfmsub213ss %xmm2, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x09,0xab,0xc2]
 ; X86-NEXT:    ## xmm0 {%k1} = (xmm1 * xmm0) - xmm2
@@ -2093,7 +2093,7 @@ entry:
 define <4 x float> @test_mm_mask_fmsub_round_ss(<4 x float> %__W, i8 zeroext %__U, <4 x float> %__A, <4 x float> %__B) {
 ; X86-LABEL: test_mm_mask_fmsub_round_ss:
 ; X86:       ## %bb.0: ## %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al ## encoding: [0x8a,0x44,0x24,0x04]
 ; X86-NEXT:    kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
 ; X86-NEXT:    vfmsub213ss %xmm2, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x09,0xab,0xc2]
 ; X86-NEXT:    ## xmm0 {%k1} = (xmm1 * xmm0) - xmm2
@@ -2114,7 +2114,7 @@ entry:
 define <4 x float> @test_mm_maskz_fmsub_ss(i8 zeroext %__U, <4 x float> %__A, <4 x float> %__B, <4 x float> %__C) {
 ; X86-LABEL: test_mm_maskz_fmsub_ss:
 ; X86:       ## %bb.0: ## %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al ## encoding: [0x8a,0x44,0x24,0x04]
 ; X86-NEXT:    kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
 ; X86-NEXT:    vfmsub213ss %xmm2, %xmm1, %xmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x75,0x89,0xab,0xc2]
 ; X86-NEXT:    ## xmm0 {%k1} {z} = (xmm1 * xmm0) - xmm2
@@ -2142,7 +2142,7 @@ entry:
 define <4 x float> @test_mm_maskz_fmsub_round_ss(i8 zeroext %__U, <4 x float> %__A, <4 x float> %__B, <4 x float> %__C) {
 ; X86-LABEL: test_mm_maskz_fmsub_round_ss:
 ; X86:       ## %bb.0: ## %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al ## encoding: [0x8a,0x44,0x24,0x04]
 ; X86-NEXT:    kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
 ; X86-NEXT:    vfmsub213ss %xmm2, %xmm1, %xmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x75,0x89,0xab,0xc2]
 ; X86-NEXT:    ## xmm0 {%k1} {z} = (xmm1 * xmm0) - xmm2
@@ -2163,7 +2163,7 @@ entry:
 define <4 x float> @test_mm_mask3_fmsub_ss(<4 x float> %__W, <4 x float> %__X, <4 x float> %__Y, i8 zeroext %__U) {
 ; X86-LABEL: test_mm_mask3_fmsub_ss:
 ; X86:       ## %bb.0: ## %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al ## encoding: [0x8a,0x44,0x24,0x04]
 ; X86-NEXT:    kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
 ; X86-NEXT:    vfmsub231ss %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf2,0x7d,0x09,0xbb,0xd1]
 ; X86-NEXT:    ## xmm2 {%k1} = (xmm0 * xmm1) - xmm2
@@ -2194,7 +2194,7 @@ entry:
 define <4 x float> @test_mm_mask3_fmsub_round_ss(<4 x float> %__W, <4 x float> %__X, <4 x float> %__Y, i8 zeroext %__U) {
 ; X86-LABEL: test_mm_mask3_fmsub_round_ss:
 ; X86:       ## %bb.0: ## %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al ## encoding: [0x8a,0x44,0x24,0x04]
 ; X86-NEXT:    kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
 ; X86-NEXT:    vfmsub231ss %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf2,0x7d,0x09,0xbb,0xd1]
 ; X86-NEXT:    ## xmm2 {%k1} = (xmm0 * xmm1) - xmm2
@@ -2218,7 +2218,7 @@ declare <4 x float> @llvm.x86.avx512.mask3.vfmsub.ss(<4 x float>, <4 x float>, <
 define <4 x float> @test_mm_mask_fnmadd_ss(<4 x float> %__W, i8 zeroext %__U, <4 x float> %__A, <4 x float> %__B) {
 ; X86-LABEL: test_mm_mask_fnmadd_ss:
 ; X86:       ## %bb.0: ## %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al ## encoding: [0x8a,0x44,0x24,0x04]
 ; X86-NEXT:    kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
 ; X86-NEXT:    vfnmadd213ss %xmm2, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x09,0xad,0xc2]
 ; X86-NEXT:    ## xmm0 {%k1} = -(xmm1 * xmm0) + xmm2
@@ -2247,7 +2247,7 @@ entry:
 define <4 x float> @test_mm_mask_fnmadd_round_ss(<4 x float> %__W, i8 zeroext %__U, <4 x float> %__A, <4 x float> %__B) {
 ; X86-LABEL: test_mm_mask_fnmadd_round_ss:
 ; X86:       ## %bb.0: ## %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al ## encoding: [0x8a,0x44,0x24,0x04]
 ; X86-NEXT:    kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
 ; X86-NEXT:    vfnmadd213ss %xmm2, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x09,0xad,0xc2]
 ; X86-NEXT:    ## xmm0 {%k1} = -(xmm1 * xmm0) + xmm2
@@ -2268,7 +2268,7 @@ entry:
 define <4 x float> @test_mm_maskz_fnmadd_ss(i8 zeroext %__U, <4 x float> %__A, <4 x float> %__B, <4 x float> %__C) {
 ; X86-LABEL: test_mm_maskz_fnmadd_ss:
 ; X86:       ## %bb.0: ## %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al ## encoding: [0x8a,0x44,0x24,0x04]
 ; X86-NEXT:    kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
 ; X86-NEXT:    vfnmadd213ss %xmm2, %xmm1, %xmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x75,0x89,0xad,0xc2]
 ; X86-NEXT:    ## xmm0 {%k1} {z} = -(xmm1 * xmm0) + xmm2
@@ -2296,7 +2296,7 @@ entry:
 define <4 x float> @test_mm_maskz_fnmadd_round_ss(i8 zeroext %__U, <4 x float> %__A, <4 x float> %__B, <4 x float> %__C) {
 ; X86-LABEL: test_mm_maskz_fnmadd_round_ss:
 ; X86:       ## %bb.0: ## %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al ## encoding: [0x8a,0x44,0x24,0x04]
 ; X86-NEXT:    kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
 ; X86-NEXT:    vfnmadd213ss %xmm2, %xmm1, %xmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x75,0x89,0xad,0xc2]
 ; X86-NEXT:    ## xmm0 {%k1} {z} = -(xmm1 * xmm0) + xmm2
@@ -2317,7 +2317,7 @@ entry:
 define <4 x float> @test_mm_mask3_fnmadd_ss(<4 x float> %__W, <4 x float> %__X, <4 x float> %__Y, i8 zeroext %__U) {
 ; X86-LABEL: test_mm_mask3_fnmadd_ss:
 ; X86:       ## %bb.0: ## %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al ## encoding: [0x8a,0x44,0x24,0x04]
 ; X86-NEXT:    kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
 ; X86-NEXT:    vfnmadd231ss %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf2,0x7d,0x09,0xbd,0xd1]
 ; X86-NEXT:    ## xmm2 {%k1} = -(xmm0 * xmm1) + xmm2
@@ -2348,7 +2348,7 @@ entry:
 define <4 x float> @test_mm_mask3_fnmadd_round_ss(<4 x float> %__W, <4 x float> %__X, <4 x float> %__Y, i8 zeroext %__U) {
 ; X86-LABEL: test_mm_mask3_fnmadd_round_ss:
 ; X86:       ## %bb.0: ## %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al ## encoding: [0x8a,0x44,0x24,0x04]
 ; X86-NEXT:    kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
 ; X86-NEXT:    vfnmadd231ss %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf2,0x7d,0x09,0xbd,0xd1]
 ; X86-NEXT:    ## xmm2 {%k1} = -(xmm0 * xmm1) + xmm2
@@ -2371,7 +2371,7 @@ entry:
 define <4 x float> @test_mm_mask_fnmsub_ss(<4 x float> %__W, i8 zeroext %__U, <4 x float> %__A, <4 x float> %__B) {
 ; X86-LABEL: test_mm_mask_fnmsub_ss:
 ; X86:       ## %bb.0: ## %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al ## encoding: [0x8a,0x44,0x24,0x04]
 ; X86-NEXT:    kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
 ; X86-NEXT:    vfnmsub213ss %xmm2, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x09,0xaf,0xc2]
 ; X86-NEXT:    ## xmm0 {%k1} = -(xmm1 * xmm0) - xmm2
@@ -2401,7 +2401,7 @@ entry:
 define <4 x float> @test_mm_mask_fnmsub_round_ss(<4 x float> %__W, i8 zeroext %__U, <4 x float> %__A, <4 x float> %__B) {
 ; X86-LABEL: test_mm_mask_fnmsub_round_ss:
 ; X86:       ## %bb.0: ## %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al ## encoding: [0x8a,0x44,0x24,0x04]
 ; X86-NEXT:    kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
 ; X86-NEXT:    vfnmsub213ss %xmm2, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x09,0xaf,0xc2]
 ; X86-NEXT:    ## xmm0 {%k1} = -(xmm1 * xmm0) - xmm2
@@ -2423,7 +2423,7 @@ entry:
 define <4 x float> @test_mm_maskz_fnmsub_ss(i8 zeroext %__U, <4 x float> %__A, <4 x float> %__B, <4 x float> %__C) {
 ; X86-LABEL: test_mm_maskz_fnmsub_ss:
 ; X86:       ## %bb.0: ## %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al ## encoding: [0x8a,0x44,0x24,0x04]
 ; X86-NEXT:    kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
 ; X86-NEXT:    vfnmsub213ss %xmm2, %xmm1, %xmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x75,0x89,0xaf,0xc2]
 ; X86-NEXT:    ## xmm0 {%k1} {z} = -(xmm1 * xmm0) - xmm2
@@ -2452,7 +2452,7 @@ entry:
 define <4 x float> @test_mm_maskz_fnmsub_round_ss(i8 zeroext %__U, <4 x float> %__A, <4 x float> %__B, <4 x float> %__C) {
 ; X86-LABEL: test_mm_maskz_fnmsub_round_ss:
 ; X86:       ## %bb.0: ## %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al ## encoding: [0x8a,0x44,0x24,0x04]
 ; X86-NEXT:    kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
 ; X86-NEXT:    vfnmsub213ss %xmm2, %xmm1, %xmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x75,0x89,0xaf,0xc2]
 ; X86-NEXT:    ## xmm0 {%k1} {z} = -(xmm1 * xmm0) - xmm2
@@ -2474,7 +2474,7 @@ entry:
 define <4 x float> @test_mm_mask3_fnmsub_ss(<4 x float> %__W, <4 x float> %__X, <4 x float> %__Y, i8 zeroext %__U) {
 ; X86-LABEL: test_mm_mask3_fnmsub_ss:
 ; X86:       ## %bb.0: ## %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al ## encoding: [0x8a,0x44,0x24,0x04]
 ; X86-NEXT:    kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
 ; X86-NEXT:    vfnmsub231ss %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf2,0x7d,0x09,0xbf,0xd1]
 ; X86-NEXT:    ## xmm2 {%k1} = -(xmm0 * xmm1) - xmm2
@@ -2506,7 +2506,7 @@ entry:
 define <4 x float> @test_mm_mask3_fnmsub_round_ss(<4 x float> %__W, <4 x float> %__X, <4 x float> %__Y, i8 zeroext %__U) {
 ; X86-LABEL: test_mm_mask3_fnmsub_round_ss:
 ; X86:       ## %bb.0: ## %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al ## encoding: [0x8a,0x44,0x24,0x04]
 ; X86-NEXT:    kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
 ; X86-NEXT:    vfnmsub231ss %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf2,0x7d,0x09,0xbf,0xd1]
 ; X86-NEXT:    ## xmm2 {%k1} = -(xmm0 * xmm1) - xmm2
@@ -2529,7 +2529,7 @@ entry:
 define <2 x double> @test_mm_mask_fmadd_sd(<2 x double> %__W, i8 zeroext %__U, <2 x double> %__A, <2 x double> %__B) {
 ; X86-LABEL: test_mm_mask_fmadd_sd:
 ; X86:       ## %bb.0: ## %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al ## encoding: [0x8a,0x44,0x24,0x04]
 ; X86-NEXT:    kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
 ; X86-NEXT:    vfmadd213sd %xmm2, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x09,0xa9,0xc2]
 ; X86-NEXT:    ## xmm0 {%k1} = (xmm1 * xmm0) + xmm2
@@ -2557,7 +2557,7 @@ entry:
 define <2 x double> @test_mm_mask_fmadd_round_sd(<2 x double> %__W, i8 zeroext %__U, <2 x double> %__A, <2 x double> %__B) {
 ; X86-LABEL: test_mm_mask_fmadd_round_sd:
 ; X86:       ## %bb.0: ## %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al ## encoding: [0x8a,0x44,0x24,0x04]
 ; X86-NEXT:    kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
 ; X86-NEXT:    vfmadd213sd %xmm2, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x09,0xa9,0xc2]
 ; X86-NEXT:    ## xmm0 {%k1} = (xmm1 * xmm0) + xmm2
@@ -2579,7 +2579,7 @@ declare <2 x double> @llvm.x86.avx512.mask.vfmadd.sd(<2 x double>, <2 x double>,
 define <2 x double> @test_mm_maskz_fmadd_sd(i8 zeroext %__U, <2 x double> %__A, <2 x double> %__B, <2 x double> %__C) {
 ; X86-LABEL: test_mm_maskz_fmadd_sd:
 ; X86:       ## %bb.0: ## %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al ## encoding: [0x8a,0x44,0x24,0x04]
 ; X86-NEXT:    kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
 ; X86-NEXT:    vfmadd213sd %xmm2, %xmm1, %xmm0 {%k1} {z} ## encoding: [0x62,0xf2,0xf5,0x89,0xa9,0xc2]
 ; X86-NEXT:    ## xmm0 {%k1} {z} = (xmm1 * xmm0) + xmm2
@@ -2606,7 +2606,7 @@ entry:
 define <2 x double> @test_mm_maskz_fmadd_round_sd(i8 zeroext %__U, <2 x double> %__A, <2 x double> %__B, <2 x double> %__C) {
 ; X86-LABEL: test_mm_maskz_fmadd_round_sd:
 ; X86:       ## %bb.0: ## %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al ## encoding: [0x8a,0x44,0x24,0x04]
 ; X86-NEXT:    kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
 ; X86-NEXT:    vfmadd213sd %xmm2, %xmm1, %xmm0 {%k1} {z} ## encoding: [0x62,0xf2,0xf5,0x89,0xa9,0xc2]
 ; X86-NEXT:    ## xmm0 {%k1} {z} = (xmm1 * xmm0) + xmm2
@@ -2628,7 +2628,7 @@ declare <2 x double> @llvm.x86.avx512.maskz.vfmadd.sd(<2 x double>, <2 x double>
 define <2 x double> @test_mm_mask3_fmadd_sd(<2 x double> %__W, <2 x double> %__X, <2 x double> %__Y, i8 zeroext %__U) {
 ; X86-LABEL: test_mm_mask3_fmadd_sd:
 ; X86:       ## %bb.0: ## %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al ## encoding: [0x8a,0x44,0x24,0x04]
 ; X86-NEXT:    kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
 ; X86-NEXT:    vfmadd231sd %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf2,0xfd,0x09,0xb9,0xd1]
 ; X86-NEXT:    ## xmm2 {%k1} = (xmm0 * xmm1) + xmm2
@@ -2658,7 +2658,7 @@ entry:
 define <2 x double> @test_mm_mask3_fmadd_round_sd(<2 x double> %__W, <2 x double> %__X, <2 x double> %__Y, i8 zeroext %__U) {
 ; X86-LABEL: test_mm_mask3_fmadd_round_sd:
 ; X86:       ## %bb.0: ## %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al ## encoding: [0x8a,0x44,0x24,0x04]
 ; X86-NEXT:    kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
 ; X86-NEXT:    vfmadd231sd %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf2,0xfd,0x09,0xb9,0xd1]
 ; X86-NEXT:    ## xmm2 {%k1} = (xmm0 * xmm1) + xmm2
@@ -2682,7 +2682,7 @@ declare <2 x double> @llvm.x86.avx512.mask3.vfmadd.sd(<2 x double>, <2 x double>
 define <2 x double> @test_mm_mask_fmsub_sd(<2 x double> %__W, i8 zeroext %__U, <2 x double> %__A, <2 x double> %__B) {
 ; X86-LABEL: test_mm_mask_fmsub_sd:
 ; X86:       ## %bb.0: ## %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al ## encoding: [0x8a,0x44,0x24,0x04]
 ; X86-NEXT:    kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
 ; X86-NEXT:    vfmsub213sd %xmm2, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x09,0xab,0xc2]
 ; X86-NEXT:    ## xmm0 {%k1} = (xmm1 * xmm0) - xmm2
@@ -2711,7 +2711,7 @@ entry:
 define <2 x double> @test_mm_mask_fmsub_round_sd(<2 x double> %__W, i8 zeroext %__U, <2 x double> %__A, <2 x double> %__B) {
 ; X86-LABEL: test_mm_mask_fmsub_round_sd:
 ; X86:       ## %bb.0: ## %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al ## encoding: [0x8a,0x44,0x24,0x04]
 ; X86-NEXT:    kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
 ; X86-NEXT:    vfmsub213sd %xmm2, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x09,0xab,0xc2]
 ; X86-NEXT:    ## xmm0 {%k1} = (xmm1 * xmm0) - xmm2
@@ -2732,7 +2732,7 @@ entry:
 define <2 x double> @test_mm_maskz_fmsub_sd(i8 zeroext %__U, <2 x double> %__A, <2 x double> %__B, <2 x double> %__C) {
 ; X86-LABEL: test_mm_maskz_fmsub_sd:
 ; X86:       ## %bb.0: ## %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al ## encoding: [0x8a,0x44,0x24,0x04]
 ; X86-NEXT:    kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
 ; X86-NEXT:    vfmsub213sd %xmm2, %xmm1, %xmm0 {%k1} {z} ## encoding: [0x62,0xf2,0xf5,0x89,0xab,0xc2]
 ; X86-NEXT:    ## xmm0 {%k1} {z} = (xmm1 * xmm0) - xmm2
@@ -2760,7 +2760,7 @@ entry:
 define <2 x double> @test_mm_maskz_fmsub_round_sd(i8 zeroext %__U, <2 x double> %__A, <2 x double> %__B, <2 x double> %__C) {
 ; X86-LABEL: test_mm_maskz_fmsub_round_sd:
 ; X86:       ## %bb.0: ## %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al ## encoding: [0x8a,0x44,0x24,0x04]
 ; X86-NEXT:    kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
 ; X86-NEXT:    vfmsub213sd %xmm2, %xmm1, %xmm0 {%k1} {z} ## encoding: [0x62,0xf2,0xf5,0x89,0xab,0xc2]
 ; X86-NEXT:    ## xmm0 {%k1} {z} = (xmm1 * xmm0) - xmm2
@@ -2781,7 +2781,7 @@ entry:
 define <2 x double> @test_mm_mask3_fmsub_sd(<2 x double> %__W, <2 x double> %__X, <2 x double> %__Y, i8 zeroext %__U) {
 ; X86-LABEL: test_mm_mask3_fmsub_sd:
 ; X86:       ## %bb.0: ## %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al ## encoding: [0x8a,0x44,0x24,0x04]
 ; X86-NEXT:    kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
 ; X86-NEXT:    vfmsub231sd %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf2,0xfd,0x09,0xbb,0xd1]
 ; X86-NEXT:    ## xmm2 {%k1} = (xmm0 * xmm1) - xmm2
@@ -2812,7 +2812,7 @@ entry:
 define <2 x double> @test_mm_mask3_fmsub_round_sd(<2 x double> %__W, <2 x double> %__X, <2 x double> %__Y, i8 zeroext %__U) {
 ; X86-LABEL: test_mm_mask3_fmsub_round_sd:
 ; X86:       ## %bb.0: ## %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al ## encoding: [0x8a,0x44,0x24,0x04]
 ; X86-NEXT:    kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
 ; X86-NEXT:    vfmsub231sd %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf2,0xfd,0x09,0xbb,0xd1]
 ; X86-NEXT:    ## xmm2 {%k1} = (xmm0 * xmm1) - xmm2
@@ -2836,7 +2836,7 @@ declare <2 x double> @llvm.x86.avx512.mask3.vfmsub.sd(<2 x double>, <2 x double>
 define <2 x double> @test_mm_mask_fnmadd_sd(<2 x double> %__W, i8 zeroext %__U, <2 x double> %__A, <2 x double> %__B) {
 ; X86-LABEL: test_mm_mask_fnmadd_sd:
 ; X86:       ## %bb.0: ## %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al ## encoding: [0x8a,0x44,0x24,0x04]
 ; X86-NEXT:    kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
 ; X86-NEXT:    vfnmadd213sd %xmm2, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x09,0xad,0xc2]
 ; X86-NEXT:    ## xmm0 {%k1} = -(xmm1 * xmm0) + xmm2
@@ -2865,7 +2865,7 @@ entry:
 define <2 x double> @test_mm_mask_fnmadd_round_sd(<2 x double> %__W, i8 zeroext %__U, <2 x double> %__A, <2 x double> %__B) {
 ; X86-LABEL: test_mm_mask_fnmadd_round_sd:
 ; X86:       ## %bb.0: ## %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al ## encoding: [0x8a,0x44,0x24,0x04]
 ; X86-NEXT:    kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
 ; X86-NEXT:    vfnmadd213sd %xmm2, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x09,0xad,0xc2]
 ; X86-NEXT:    ## xmm0 {%k1} = -(xmm1 * xmm0) + xmm2
@@ -2886,7 +2886,7 @@ entry:
 define <2 x double> @test_mm_maskz_fnmadd_sd(i8 zeroext %__U, <2 x double> %__A, <2 x double> %__B, <2 x double> %__C) {
 ; X86-LABEL: test_mm_maskz_fnmadd_sd:
 ; X86:       ## %bb.0: ## %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al ## encoding: [0x8a,0x44,0x24,0x04]
 ; X86-NEXT:    kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
 ; X86-NEXT:    vfnmadd213sd %xmm2, %xmm1, %xmm0 {%k1} {z} ## encoding: [0x62,0xf2,0xf5,0x89,0xad,0xc2]
 ; X86-NEXT:    ## xmm0 {%k1} {z} = -(xmm1 * xmm0) + xmm2
@@ -2914,7 +2914,7 @@ entry:
 define <2 x double> @test_mm_maskz_fnmadd_round_sd(i8 zeroext %__U, <2 x double> %__A, <2 x double> %__B, <2 x double> %__C) {
 ; X86-LABEL: test_mm_maskz_fnmadd_round_sd:
 ; X86:       ## %bb.0: ## %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al ## encoding: [0x8a,0x44,0x24,0x04]
 ; X86-NEXT:    kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
 ; X86-NEXT:    vfnmadd213sd %xmm2, %xmm1, %xmm0 {%k1} {z} ## encoding: [0x62,0xf2,0xf5,0x89,0xad,0xc2]
 ; X86-NEXT:    ## xmm0 {%k1} {z} = -(xmm1 * xmm0) + xmm2
@@ -2935,7 +2935,7 @@ entry:
 define <2 x double> @test_mm_mask3_fnmadd_sd(<2 x double> %__W, <2 x double> %__X, <2 x double> %__Y, i8 zeroext %__U) {
 ; X86-LABEL: test_mm_mask3_fnmadd_sd:
 ; X86:       ## %bb.0: ## %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al ## encoding: [0x8a,0x44,0x24,0x04]
 ; X86-NEXT:    kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
 ; X86-NEXT:    vfnmadd231sd %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf2,0xfd,0x09,0xbd,0xd1]
 ; X86-NEXT:    ## xmm2 {%k1} = -(xmm0 * xmm1) + xmm2
@@ -2966,7 +2966,7 @@ entry:
 define <2 x double> @test_mm_mask3_fnmadd_round_sd(<2 x double> %__W, <2 x double> %__X, <2 x double> %__Y, i8 zeroext %__U) {
 ; X86-LABEL: test_mm_mask3_fnmadd_round_sd:
 ; X86:       ## %bb.0: ## %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al ## encoding: [0x8a,0x44,0x24,0x04]
 ; X86-NEXT:    kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
 ; X86-NEXT:    vfnmadd231sd %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf2,0xfd,0x09,0xbd,0xd1]
 ; X86-NEXT:    ## xmm2 {%k1} = -(xmm0 * xmm1) + xmm2
@@ -2989,7 +2989,7 @@ entry:
 define <2 x double> @test_mm_mask_fnmsub_sd(<2 x double> %__W, i8 zeroext %__U, <2 x double> %__A, <2 x double> %__B) {
 ; X86-LABEL: test_mm_mask_fnmsub_sd:
 ; X86:       ## %bb.0: ## %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al ## encoding: [0x8a,0x44,0x24,0x04]
 ; X86-NEXT:    kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
 ; X86-NEXT:    vfnmsub213sd %xmm2, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x09,0xaf,0xc2]
 ; X86-NEXT:    ## xmm0 {%k1} = -(xmm1 * xmm0) - xmm2
@@ -3019,7 +3019,7 @@ entry:
 define <2 x double> @test_mm_mask_fnmsub_round_sd(<2 x double> %__W, i8 zeroext %__U, <2 x double> %__A, <2 x double> %__B) {
 ; X86-LABEL: test_mm_mask_fnmsub_round_sd:
 ; X86:       ## %bb.0: ## %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al ## encoding: [0x8a,0x44,0x24,0x04]
 ; X86-NEXT:    kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
 ; X86-NEXT:    vfnmsub213sd %xmm2, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x09,0xaf,0xc2]
 ; X86-NEXT:    ## xmm0 {%k1} = -(xmm1 * xmm0) - xmm2
@@ -3041,7 +3041,7 @@ entry:
 define <2 x double> @test_mm_maskz_fnmsub_sd(i8 zeroext %__U, <2 x double> %__A, <2 x double> %__B, <2 x double> %__C) {
 ; X86-LABEL: test_mm_maskz_fnmsub_sd:
 ; X86:       ## %bb.0: ## %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al ## encoding: [0x8a,0x44,0x24,0x04]
 ; X86-NEXT:    kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
 ; X86-NEXT:    vfnmsub213sd %xmm2, %xmm1, %xmm0 {%k1} {z} ## encoding: [0x62,0xf2,0xf5,0x89,0xaf,0xc2]
 ; X86-NEXT:    ## xmm0 {%k1} {z} = -(xmm1 * xmm0) - xmm2
@@ -3070,7 +3070,7 @@ entry:
 define <2 x double> @test_mm_maskz_fnmsub_round_sd(i8 zeroext %__U, <2 x double> %__A, <2 x double> %__B, <2 x double> %__C) {
 ; X86-LABEL: test_mm_maskz_fnmsub_round_sd:
 ; X86:       ## %bb.0: ## %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al ## encoding: [0x8a,0x44,0x24,0x04]
 ; X86-NEXT:    kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
 ; X86-NEXT:    vfnmsub213sd %xmm2, %xmm1, %xmm0 {%k1} {z} ## encoding: [0x62,0xf2,0xf5,0x89,0xaf,0xc2]
 ; X86-NEXT:    ## xmm0 {%k1} {z} = -(xmm1 * xmm0) - xmm2
@@ -3092,7 +3092,7 @@ entry:
 define <2 x double> @test_mm_mask3_fnmsub_sd(<2 x double> %__W, <2 x double> %__X, <2 x double> %__Y, i8 zeroext %__U) {
 ; X86-LABEL: test_mm_mask3_fnmsub_sd:
 ; X86:       ## %bb.0: ## %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al ## encoding: [0x8a,0x44,0x24,0x04]
 ; X86-NEXT:    kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
 ; X86-NEXT:    vfnmsub231sd %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf2,0xfd,0x09,0xbf,0xd1]
 ; X86-NEXT:    ## xmm2 {%k1} = -(xmm0 * xmm1) - xmm2
@@ -3124,7 +3124,7 @@ entry:
 define <2 x double> @test_mm_mask3_fnmsub_round_sd(<2 x double> %__W, <2 x double> %__X, <2 x double> %__Y, i8 zeroext %__U) {
 ; X86-LABEL: test_mm_mask3_fnmsub_round_sd:
 ; X86:       ## %bb.0: ## %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al ## encoding: [0x8a,0x44,0x24,0x04]
 ; X86-NEXT:    kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
 ; X86-NEXT:    vfnmsub231sd %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf2,0xfd,0x09,0xbf,0xd1]
 ; X86-NEXT:    ## xmm2 {%k1} = -(xmm0 * xmm1) - xmm2
@@ -3147,7 +3147,7 @@ entry:
 define <4 x float> @test_mm_mask_add_ss(<4 x float> %__W, i8 zeroext %__U, <4 x float> %__A, <4 x float> %__B) {
 ; X86-LABEL: test_mm_mask_add_ss:
 ; X86:       ## %bb.0: ## %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al ## encoding: [0x8a,0x44,0x24,0x04]
 ; X86-NEXT:    kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
 ; X86-NEXT:    vaddss %xmm2, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf1,0x76,0x09,0x58,0xc2]
 ; X86-NEXT:    retl ## encoding: [0xc3]
@@ -3172,7 +3172,7 @@ entry:
 define <4 x float> @test_mm_maskz_add_ss(i8 zeroext %__U, <4 x float> %__A, <4 x float> %__B) {
 ; X86-LABEL: test_mm_maskz_add_ss:
 ; X86:       ## %bb.0: ## %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al ## encoding: [0x8a,0x44,0x24,0x04]
 ; X86-NEXT:    kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
 ; X86-NEXT:    vaddss %xmm1, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7e,0x89,0x58,0xc1]
 ; X86-NEXT:    retl ## encoding: [0xc3]
@@ -3196,7 +3196,7 @@ entry:
 define <2 x double> @test_mm_mask_add_sd(<2 x double> %__W, i8 zeroext %__U, <2 x double> %__A, <2 x double> %__B) {
 ; X86-LABEL: test_mm_mask_add_sd:
 ; X86:       ## %bb.0: ## %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al ## encoding: [0x8a,0x44,0x24,0x04]
 ; X86-NEXT:    kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
 ; X86-NEXT:    vaddsd %xmm2, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf1,0xf7,0x09,0x58,0xc2]
 ; X86-NEXT:    retl ## encoding: [0xc3]
@@ -3221,7 +3221,7 @@ entry:
 define <2 x double> @test_mm_maskz_add_sd(i8 zeroext %__U, <2 x double> %__A, <2 x double> %__B) {
 ; X86-LABEL: test_mm_maskz_add_sd:
 ; X86:       ## %bb.0: ## %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al ## encoding: [0x8a,0x44,0x24,0x04]
 ; X86-NEXT:    kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
 ; X86-NEXT:    vaddsd %xmm1, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xff,0x89,0x58,0xc1]
 ; X86-NEXT:    retl ## encoding: [0xc3]
@@ -3245,7 +3245,7 @@ entry:
 define <4 x float> @test_mm_mask_sub_ss(<4 x float> %__W, i8 zeroext %__U, <4 x float> %__A, <4 x float> %__B) {
 ; X86-LABEL: test_mm_mask_sub_ss:
 ; X86:       ## %bb.0: ## %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al ## encoding: [0x8a,0x44,0x24,0x04]
 ; X86-NEXT:    kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
 ; X86-NEXT:    vsubss %xmm2, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf1,0x76,0x09,0x5c,0xc2]
 ; X86-NEXT:    retl ## encoding: [0xc3]
@@ -3270,7 +3270,7 @@ entry:
 define <4 x float> @test_mm_maskz_sub_ss(i8 zeroext %__U, <4 x float> %__A, <4 x float> %__B) {
 ; X86-LABEL: test_mm_maskz_sub_ss:
 ; X86:       ## %bb.0: ## %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al ## encoding: [0x8a,0x44,0x24,0x04]
 ; X86-NEXT:    kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
 ; X86-NEXT:    vsubss %xmm1, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7e,0x89,0x5c,0xc1]
 ; X86-NEXT:    retl ## encoding: [0xc3]
@@ -3294,7 +3294,7 @@ entry:
 define <2 x double> @test_mm_mask_sub_sd(<2 x double> %__W, i8 zeroext %__U, <2 x double> %__A, <2 x double> %__B) {
 ; X86-LABEL: test_mm_mask_sub_sd:
 ; X86:       ## %bb.0: ## %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al ## encoding: [0x8a,0x44,0x24,0x04]
 ; X86-NEXT:    kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
 ; X86-NEXT:    vsubsd %xmm2, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf1,0xf7,0x09,0x5c,0xc2]
 ; X86-NEXT:    retl ## encoding: [0xc3]
@@ -3319,7 +3319,7 @@ entry:
 define <2 x double> @test_mm_maskz_sub_sd(i8 zeroext %__U, <2 x double> %__A, <2 x double> %__B) {
 ; X86-LABEL: test_mm_maskz_sub_sd:
 ; X86:       ## %bb.0: ## %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al ## encoding: [0x8a,0x44,0x24,0x04]
 ; X86-NEXT:    kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
 ; X86-NEXT:    vsubsd %xmm1, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xff,0x89,0x5c,0xc1]
 ; X86-NEXT:    retl ## encoding: [0xc3]
@@ -3343,7 +3343,7 @@ entry:
 define <4 x float> @test_mm_mask_mul_ss(<4 x float> %__W, i8 zeroext %__U, <4 x float> %__A, <4 x float> %__B) {
 ; X86-LABEL: test_mm_mask_mul_ss:
 ; X86:       ## %bb.0: ## %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al ## encoding: [0x8a,0x44,0x24,0x04]
 ; X86-NEXT:    kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
 ; X86-NEXT:    vmulss %xmm2, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf1,0x76,0x09,0x59,0xc2]
 ; X86-NEXT:    retl ## encoding: [0xc3]
@@ -3368,7 +3368,7 @@ entry:
 define <4 x float> @test_mm_maskz_mul_ss(i8 zeroext %__U, <4 x float> %__A, <4 x float> %__B) {
 ; X86-LABEL: test_mm_maskz_mul_ss:
 ; X86:       ## %bb.0: ## %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al ## encoding: [0x8a,0x44,0x24,0x04]
 ; X86-NEXT:    kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
 ; X86-NEXT:    vmulss %xmm1, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7e,0x89,0x59,0xc1]
 ; X86-NEXT:    retl ## encoding: [0xc3]
@@ -3392,7 +3392,7 @@ entry:
 define <2 x double> @test_mm_mask_mul_sd(<2 x double> %__W, i8 zeroext %__U, <2 x double> %__A, <2 x double> %__B) {
 ; X86-LABEL: test_mm_mask_mul_sd:
 ; X86:       ## %bb.0: ## %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al ## encoding: [0x8a,0x44,0x24,0x04]
 ; X86-NEXT:    kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
 ; X86-NEXT:    vmulsd %xmm2, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf1,0xf7,0x09,0x59,0xc2]
 ; X86-NEXT:    retl ## encoding: [0xc3]
@@ -3417,7 +3417,7 @@ entry:
 define <2 x double> @test_mm_maskz_mul_sd(i8 zeroext %__U, <2 x double> %__A, <2 x double> %__B) {
 ; X86-LABEL: test_mm_maskz_mul_sd:
 ; X86:       ## %bb.0: ## %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al ## encoding: [0x8a,0x44,0x24,0x04]
 ; X86-NEXT:    kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
 ; X86-NEXT:    vmulsd %xmm1, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xff,0x89,0x59,0xc1]
 ; X86-NEXT:    retl ## encoding: [0xc3]
@@ -3441,7 +3441,7 @@ entry:
 define <4 x float> @test_mm_mask_div_ss(<4 x float> %__W, i8 zeroext %__U, <4 x float> %__A, <4 x float> %__B) {
 ; X86-LABEL: test_mm_mask_div_ss:
 ; X86:       ## %bb.0: ## %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al ## encoding: [0x8a,0x44,0x24,0x04]
 ; X86-NEXT:    kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
 ; X86-NEXT:    vdivss %xmm2, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf1,0x76,0x09,0x5e,0xc2]
 ; X86-NEXT:    retl ## encoding: [0xc3]
@@ -3466,7 +3466,7 @@ entry:
 define <4 x float> @test_mm_maskz_div_ss(i8 zeroext %__U, <4 x float> %__A, <4 x float> %__B) {
 ; X86-LABEL: test_mm_maskz_div_ss:
 ; X86:       ## %bb.0: ## %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al ## encoding: [0x8a,0x44,0x24,0x04]
 ; X86-NEXT:    kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
 ; X86-NEXT:    vdivss %xmm1, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7e,0x89,0x5e,0xc1]
 ; X86-NEXT:    retl ## encoding: [0xc3]
@@ -3490,7 +3490,7 @@ entry:
 define <2 x double> @test_mm_mask_div_sd(<2 x double> %__W, i8 zeroext %__U, <2 x double> %__A, <2 x double> %__B) {
 ; X86-LABEL: test_mm_mask_div_sd:
 ; X86:       ## %bb.0: ## %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al ## encoding: [0x8a,0x44,0x24,0x04]
 ; X86-NEXT:    kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
 ; X86-NEXT:    vdivsd %xmm2, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf1,0xf7,0x09,0x5e,0xc2]
 ; X86-NEXT:    retl ## encoding: [0xc3]
@@ -3515,7 +3515,7 @@ entry:
 define <2 x double> @test_mm_maskz_div_sd(i8 zeroext %__U, <2 x double> %__A, <2 x double> %__B) {
 ; X86-LABEL: test_mm_maskz_div_sd:
 ; X86:       ## %bb.0: ## %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al ## encoding: [0x8a,0x44,0x24,0x04]
 ; X86-NEXT:    kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
 ; X86-NEXT:    vdivsd %xmm1, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xff,0x89,0x5e,0xc1]
 ; X86-NEXT:    retl ## encoding: [0xc3]

diff  --git a/llvm/test/CodeGen/X86/avx512-intrinsics-fast-isel.ll b/llvm/test/CodeGen/X86/avx512-intrinsics-fast-isel.ll
index fc8f812bc1488..094aca69bfed1 100644
--- a/llvm/test/CodeGen/X86/avx512-intrinsics-fast-isel.ll
+++ b/llvm/test/CodeGen/X86/avx512-intrinsics-fast-isel.ll
@@ -219,7 +219,7 @@ entry:
 define <8 x double> @test_mm512_mask_shuffle_f64x2(<8 x double> %__W, i8 zeroext %__U, <8 x double> %__A, <8 x double> %__B) {
 ; X86-LABEL: test_mm512_mask_shuffle_f64x2:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vshuff64x2 {{.*#+}} zmm0 {%k1} = zmm1[0,1,2,3],zmm2[0,1,0,1]
 ; X86-NEXT:    retl
@@ -239,7 +239,7 @@ entry:
 define <8 x double> @test_mm512_maskz_shuffle_f64x2(i8 zeroext %__U, <8 x double> %__A, <8 x double> %__B) {
 ; X86-LABEL: test_mm512_maskz_shuffle_f64x2:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vshuff64x2 {{.*#+}} zmm0 {%k1} {z} = zmm0[0,1,2,3],zmm1[0,1,0,1]
 ; X86-NEXT:    retl
@@ -324,7 +324,7 @@ entry:
 define <8 x i64> @test_mm512_mask_shuffle_i64x2(<8 x i64> %__W, i8 zeroext %__U, <8 x i64> %__A, <8 x i64> %__B) local_unnamed_addr #0 {
 ; X86-LABEL: test_mm512_mask_shuffle_i64x2:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vshufi64x2 {{.*#+}} zmm0 {%k1} = zmm1[0,1,2,3],zmm2[0,1,0,1]
 ; X86-NEXT:    retl
@@ -344,7 +344,7 @@ entry:
 define <8 x i64> @test_mm512_maskz_shuffle_i64x2(i8 zeroext %__U, <8 x i64> %__A, <8 x i64> %__B) local_unnamed_addr #0 {
 ; X86-LABEL: test_mm512_maskz_shuffle_i64x2:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vshufi64x2 {{.*#+}} zmm0 {%k1} {z} = zmm0[0,1,2,3],zmm1[0,1,0,1]
 ; X86-NEXT:    retl
@@ -425,7 +425,7 @@ entry:
 define zeroext i8 @test_mm512_mask_testn_epi64_mask(i8 zeroext %__U, <8 x i64> %__A, <8 x i64> %__B) {
 ; X86-LABEL: test_mm512_mask_testn_epi64_mask:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vptestnmq %zmm0, %zmm1, %k0 {%k1}
 ; X86-NEXT:    kmovw %k0, %eax
@@ -482,7 +482,7 @@ entry:
 define zeroext i8 @test_mm512_mask_test_epi64_mask(i8 zeroext %__U, <8 x i64> %__A, <8 x i64> %__B) {
 ; X86-LABEL: test_mm512_mask_test_epi64_mask:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vptestmq %zmm0, %zmm1, %k0 {%k1}
 ; X86-NEXT:    kmovw %k0, %eax
@@ -557,7 +557,7 @@ entry:
 define <8 x i64> @test_mm512_mask_set1_epi64(<8 x i64> %__O, i8 zeroext %__M, i64 %__A) {
 ; X86-LABEL: test_mm512_mask_set1_epi64:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    vmovd {{.*#+}} xmm1 = mem[0],zero,zero,zero
 ; X86-NEXT:    vpinsrd $1, {{[0-9]+}}(%esp), %xmm1, %xmm1
 ; X86-NEXT:    kmovw %eax, %k1
@@ -580,7 +580,7 @@ entry:
 define <8 x i64> @test_mm512_maskz_set1_epi64(i8 zeroext %__M, i64 %__A)  {
 ; X86-LABEL: test_mm512_maskz_set1_epi64:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
 ; X86-NEXT:    vpinsrd $1, {{[0-9]+}}(%esp), %xmm0, %xmm0
 ; X86-NEXT:    kmovw %eax, %k1
@@ -667,7 +667,7 @@ define <8 x i64> @test_mm512_broadcastq_epi64(<2 x i64> %a0) {
 define <8 x i64> @test_mm512_mask_broadcastq_epi64(<8 x i64> %a0, i8 %a1, <2 x i64> %a2) {
 ; X86-LABEL: test_mm512_mask_broadcastq_epi64:
 ; X86:       # %bb.0:
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vpbroadcastq %xmm1, %zmm0 {%k1}
 ; X86-NEXT:    retl
@@ -686,7 +686,7 @@ define <8 x i64> @test_mm512_mask_broadcastq_epi64(<8 x i64> %a0, i8 %a1, <2 x i
 define <8 x i64> @test_mm512_maskz_broadcastq_epi64(i8 %a0, <2 x i64> %a1) {
 ; X86-LABEL: test_mm512_maskz_broadcastq_epi64:
 ; X86:       # %bb.0:
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vpbroadcastq %xmm0, %zmm0 {%k1} {z}
 ; X86-NEXT:    retl
@@ -714,7 +714,7 @@ define <8 x double> @test_mm512_broadcastsd_pd(<2 x double> %a0) {
 define <8 x double> @test_mm512_mask_broadcastsd_pd(<8 x double> %a0, i8 %a1, <2 x double> %a2) {
 ; X86-LABEL: test_mm512_mask_broadcastsd_pd:
 ; X86:       # %bb.0:
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vbroadcastsd %xmm1, %zmm0 {%k1}
 ; X86-NEXT:    retl
@@ -733,7 +733,7 @@ define <8 x double> @test_mm512_mask_broadcastsd_pd(<8 x double> %a0, i8 %a1, <2
 define <8 x double> @test_mm512_maskz_broadcastsd_pd(i8 %a0, <2 x double> %a1) {
 ; X86-LABEL: test_mm512_maskz_broadcastsd_pd:
 ; X86:       # %bb.0:
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vbroadcastsd %xmm0, %zmm0 {%k1} {z}
 ; X86-NEXT:    retl
@@ -808,7 +808,7 @@ define <8 x double> @test_mm512_movedup_pd(<8 x double> %a0) {
 define <8 x double> @test_mm512_mask_movedup_pd(<8 x double> %a0, i8 %a1, <8 x double> %a2) {
 ; X86-LABEL: test_mm512_mask_movedup_pd:
 ; X86:       # %bb.0:
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vmovddup {{.*#+}} zmm0 {%k1} = zmm1[0,0,2,2,4,4,6,6]
 ; X86-NEXT:    retl
@@ -827,7 +827,7 @@ define <8 x double> @test_mm512_mask_movedup_pd(<8 x double> %a0, i8 %a1, <8 x d
 define <8 x double> @test_mm512_maskz_movedup_pd(i8 %a0, <8 x double> %a1) {
 ; X86-LABEL: test_mm512_maskz_movedup_pd:
 ; X86:       # %bb.0:
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vmovddup {{.*#+}} zmm0 {%k1} {z} = zmm0[0,0,2,2,4,4,6,6]
 ; X86-NEXT:    retl
@@ -949,7 +949,7 @@ define <8 x double> @test_mm512_permute_pd(<8 x double> %a0) {
 define <8 x double> @test_mm512_mask_permute_pd(<8 x double> %a0, i8 %a1, <8 x double> %a2) {
 ; X86-LABEL: test_mm512_mask_permute_pd:
 ; X86:       # %bb.0:
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vpermilpd {{.*#+}} zmm0 {%k1} = zmm1[0,1,2,2,4,4,6,6]
 ; X86-NEXT:    retl
@@ -968,7 +968,7 @@ define <8 x double> @test_mm512_mask_permute_pd(<8 x double> %a0, i8 %a1, <8 x d
 define <8 x double> @test_mm512_maskz_permute_pd(i8 %a0, <8 x double> %a1) {
 ; X86-LABEL: test_mm512_maskz_permute_pd:
 ; X86:       # %bb.0:
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vpermilpd {{.*#+}} zmm0 {%k1} {z} = zmm0[0,1,2,2,4,4,6,6]
 ; X86-NEXT:    retl
@@ -1043,7 +1043,7 @@ define <8 x i64> @test_mm512_permutex_epi64(<8 x i64> %a0) {
 define <8 x i64> @test_mm512_mask_permutex_epi64(<8 x i64> %a0, i8 %a1, <8 x i64> %a2) {
 ; X86-LABEL: test_mm512_mask_permutex_epi64:
 ; X86:       # %bb.0:
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vpermq {{.*#+}} zmm0 {%k1} = zmm1[0,0,0,0,4,4,4,4]
 ; X86-NEXT:    retl
@@ -1062,7 +1062,7 @@ define <8 x i64> @test_mm512_mask_permutex_epi64(<8 x i64> %a0, i8 %a1, <8 x i64
 define <8 x i64> @test_mm512_maskz_permutex_epi64(i8 %a0, <8 x i64> %a1) {
 ; X86-LABEL: test_mm512_maskz_permutex_epi64:
 ; X86:       # %bb.0:
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vpermq {{.*#+}} zmm0 {%k1} {z} = zmm0[0,0,0,0,4,4,4,4]
 ; X86-NEXT:    retl
@@ -1090,7 +1090,7 @@ define <8 x double> @test_mm512_permutex_pd(<8 x double> %a0) {
 define <8 x double> @test_mm512_mask_permutex_pd(<8 x double> %a0, i8 %a1, <8 x double> %a2) {
 ; X86-LABEL: test_mm512_mask_permutex_pd:
 ; X86:       # %bb.0:
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vpermpd {{.*#+}} zmm0 {%k1} = zmm1[0,0,0,0,4,4,4,4]
 ; X86-NEXT:    retl
@@ -1109,7 +1109,7 @@ define <8 x double> @test_mm512_mask_permutex_pd(<8 x double> %a0, i8 %a1, <8 x
 define <8 x double> @test_mm512_maskz_permutex_pd(i8 %a0, <8 x double> %a1) {
 ; X86-LABEL: test_mm512_maskz_permutex_pd:
 ; X86:       # %bb.0:
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vpermpd {{.*#+}} zmm0 {%k1} {z} = zmm0[0,0,0,0,4,4,4,4]
 ; X86-NEXT:    retl
@@ -1191,7 +1191,7 @@ define <8 x double> @test_mm512_shuffle_pd(<8 x double> %a0, <8 x double> %a1) {
 define <8 x double> @test_mm512_mask_shuffle_pd(<8 x double> %a0, i8 %a1, <8 x double> %a2, <8 x double> %a3) {
 ; X86-LABEL: test_mm512_mask_shuffle_pd:
 ; X86:       # %bb.0:
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vshufpd {{.*#+}} zmm0 {%k1} = zmm1[0],zmm2[0],zmm1[3],zmm2[2],zmm1[4],zmm2[4],zmm1[6],zmm2[6]
 ; X86-NEXT:    retl
@@ -1210,7 +1210,7 @@ define <8 x double> @test_mm512_mask_shuffle_pd(<8 x double> %a0, i8 %a1, <8 x d
 define <8 x double> @test_mm512_maskz_shuffle_pd(i8 %a0, <8 x double> %a1, <8 x double> %a2) {
 ; X86-LABEL: test_mm512_maskz_shuffle_pd:
 ; X86:       # %bb.0:
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vshufpd {{.*#+}} zmm0 {%k1} {z} = zmm0[0],zmm1[0],zmm0[3],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6]
 ; X86-NEXT:    retl
@@ -1295,7 +1295,7 @@ define <8 x i64> @test_mm512_unpackhi_epi64(<8 x i64> %a0, <8 x i64> %a1) {
 define <8 x i64> @test_mm512_mask_unpackhi_epi64(<8 x i64> %a0, i8 %a1, <8 x i64> %a2, <8 x i64> %a3) {
 ; X86-LABEL: test_mm512_mask_unpackhi_epi64:
 ; X86:       # %bb.0:
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vpunpckhqdq {{.*#+}} zmm0 {%k1} = zmm1[1],zmm2[1],zmm1[3],zmm2[3],zmm1[5],zmm2[5],zmm1[7],zmm2[7]
 ; X86-NEXT:    retl
@@ -1314,7 +1314,7 @@ define <8 x i64> @test_mm512_mask_unpackhi_epi64(<8 x i64> %a0, i8 %a1, <8 x i64
 define <8 x i64> @test_mm512_maskz_unpackhi_epi64(i8 %a0, <8 x i64> %a1, <8 x i64> %a2) {
 ; X86-LABEL: test_mm512_maskz_unpackhi_epi64:
 ; X86:       # %bb.0:
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vpunpckhqdq {{.*#+}} zmm0 {%k1} {z} = zmm0[1],zmm1[1],zmm0[3],zmm1[3],zmm0[5],zmm1[5],zmm0[7],zmm1[7]
 ; X86-NEXT:    retl
@@ -1342,7 +1342,7 @@ define <8 x double> @test_mm512_unpackhi_pd(<8 x double> %a0, <8 x double> %a1)
 define <8 x double> @test_mm512_mask_unpackhi_pd(<8 x double> %a0, i8 %a1, <8 x double> %a2, <8 x double> %a3) {
 ; X86-LABEL: test_mm512_mask_unpackhi_pd:
 ; X86:       # %bb.0:
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vunpckhpd {{.*#+}} zmm0 {%k1} = zmm1[1],zmm2[1],zmm1[3],zmm2[3],zmm1[5],zmm2[5],zmm1[7],zmm2[7]
 ; X86-NEXT:    retl
@@ -1361,7 +1361,7 @@ define <8 x double> @test_mm512_mask_unpackhi_pd(<8 x double> %a0, i8 %a1, <8 x
 define <8 x double> @test_mm512_maskz_unpackhi_pd(i8 %a0, <8 x double> %a1, <8 x double> %a2) {
 ; X86-LABEL: test_mm512_maskz_unpackhi_pd:
 ; X86:       # %bb.0:
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vunpckhpd {{.*#+}} zmm0 {%k1} {z} = zmm0[1],zmm1[1],zmm0[3],zmm1[3],zmm0[5],zmm1[5],zmm0[7],zmm1[7]
 ; X86-NEXT:    retl
@@ -1493,7 +1493,7 @@ define <8 x i64> @test_mm512_unpacklo_epi64(<8 x i64> %a0, <8 x i64> %a1) {
 define <8 x i64> @test_mm512_mask_unpacklo_epi64(<8 x i64> %a0, i8 %a1, <8 x i64> %a2, <8 x i64> %a3) {
 ; X86-LABEL: test_mm512_mask_unpacklo_epi64:
 ; X86:       # %bb.0:
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vpunpcklqdq {{.*#+}} zmm0 {%k1} = zmm1[0],zmm2[0],zmm1[2],zmm2[2],zmm1[4],zmm2[4],zmm1[6],zmm2[6]
 ; X86-NEXT:    retl
@@ -1512,7 +1512,7 @@ define <8 x i64> @test_mm512_mask_unpacklo_epi64(<8 x i64> %a0, i8 %a1, <8 x i64
 define <8 x i64> @test_mm512_maskz_unpacklo_epi64(i8 %a0, <8 x i64> %a1, <8 x i64> %a2) {
 ; X86-LABEL: test_mm512_maskz_unpacklo_epi64:
 ; X86:       # %bb.0:
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vpunpcklqdq {{.*#+}} zmm0 {%k1} {z} = zmm0[0],zmm1[0],zmm0[2],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6]
 ; X86-NEXT:    retl
@@ -1540,7 +1540,7 @@ define <8 x double> @test_mm512_unpacklo_pd(<8 x double> %a0, <8 x double> %a1)
 define <8 x double> @test_mm512_mask_unpacklo_pd(<8 x double> %a0, i8 %a1, <8 x double> %a2, <8 x double> %a3) {
 ; X86-LABEL: test_mm512_mask_unpacklo_pd:
 ; X86:       # %bb.0:
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vunpcklpd {{.*#+}} zmm0 {%k1} = zmm1[0],zmm2[0],zmm1[2],zmm2[2],zmm1[4],zmm2[4],zmm1[6],zmm2[6]
 ; X86-NEXT:    retl
@@ -1559,7 +1559,7 @@ define <8 x double> @test_mm512_mask_unpacklo_pd(<8 x double> %a0, i8 %a1, <8 x
 define <8 x double> @test_mm512_maskz_unpacklo_pd(i8 %a0, <8 x double> %a1, <8 x double> %a2) {
 ; X86-LABEL: test_mm512_maskz_unpacklo_pd:
 ; X86:       # %bb.0:
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vunpcklpd {{.*#+}} zmm0 {%k1} {z} = zmm0[0],zmm1[0],zmm0[2],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6]
 ; X86-NEXT:    retl
@@ -1692,7 +1692,7 @@ define <8 x i64> @test_mm512_mul_epi32(<8 x i64> %__A, <8 x i64> %__B) nounwind
 define <8 x i64> @test_mm512_maskz_mul_epi32(i8 zeroext %__k, <8 x i64> %__A, <8 x i64> %__B) nounwind {
 ; X86-LABEL: test_mm512_maskz_mul_epi32:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vpmuldq %zmm0, %zmm1, %zmm0 {%k1} {z}
 ; X86-NEXT:    retl
@@ -1716,7 +1716,7 @@ entry:
 define <8 x i64> @test_mm512_mask_mul_epi32(i8 zeroext %__k, <8 x i64> %__A, <8 x i64> %__B, <8 x i64> %__src) nounwind {
 ; X86-LABEL: test_mm512_mask_mul_epi32:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vpmuldq %zmm0, %zmm1, %zmm2 {%k1}
 ; X86-NEXT:    vmovdqa64 %zmm2, %zmm0
@@ -1753,7 +1753,7 @@ define <8 x i64> @test_mm512_mul_epu32(<8 x i64> %__A, <8 x i64> %__B) nounwind
 define <8 x i64> @test_mm512_maskz_mul_epu32(i8 zeroext %__k, <8 x i64> %__A, <8 x i64> %__B) nounwind {
 ; X86-LABEL: test_mm512_maskz_mul_epu32:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vpmuludq %zmm0, %zmm1, %zmm0 {%k1} {z}
 ; X86-NEXT:    retl
@@ -1775,7 +1775,7 @@ entry:
 define <8 x i64> @test_mm512_mask_mul_epu32(i8 zeroext %__k, <8 x i64> %__A, <8 x i64> %__B, <8 x i64> %__src) nounwind {
 ; X86-LABEL: test_mm512_mask_mul_epu32:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vpmuludq %zmm0, %zmm1, %zmm2 {%k1}
 ; X86-NEXT:    vmovdqa64 %zmm2, %zmm0
@@ -1799,7 +1799,7 @@ entry:
 define <8 x double> @test_mm512_set1_epi8(i8 signext %d) nounwind {
 ; X86-LABEL: test_mm512_set1_epi8:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    vmovd %eax, %xmm0
 ; X86-NEXT:    vpbroadcastb %xmm0, %ymm0
 ; X86-NEXT:    vinserti64x4 $1, %ymm0, %zmm0, %zmm0
@@ -1987,7 +1987,7 @@ entry:
 define <8 x double> @test_mm512_mask_cvtps_pd(<8 x double> %__W, i8 zeroext %__U, <8 x float> %__A) {
 ; X86-LABEL: test_mm512_mask_cvtps_pd:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vcvtps2pd %ymm1, %zmm0 {%k1}
 ; X86-NEXT:    retl
@@ -2007,7 +2007,7 @@ entry:
 define <8 x double> @test_mm512_mask_cvtpslo_pd(<8 x double> %__W, i8 zeroext %__U, <16 x float> %__A) {
 ; X86-LABEL: test_mm512_mask_cvtpslo_pd:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vcvtps2pd %ymm1, %zmm0 {%k1}
 ; X86-NEXT:    retl
@@ -2028,7 +2028,7 @@ entry:
 define <8 x double> @test_mm512_maskz_cvtps_pd(i8 zeroext %__U, <8 x float> %__A) {
 ; X86-LABEL: test_mm512_maskz_cvtps_pd:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vcvtps2pd %ymm0, %zmm0 {%k1} {z}
 ; X86-NEXT:    retl
@@ -2117,7 +2117,7 @@ entry:
 define <4 x i64> @test_mm512_mask_cvtepi64_epi32(<4 x i64> %__O, i8 zeroext %__M, <8 x i64> %__A) {
 ; X86-LABEL: test_mm512_mask_cvtepi64_epi32:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vpmovqd %zmm1, %ymm0 {%k1}
 ; X86-NEXT:    retl
@@ -2139,7 +2139,7 @@ entry:
 define <4 x i64> @test_mm512_maskz_cvtepi64_epi32(i8 zeroext %__M, <8 x i64> %__A) {
 ; X86-LABEL: test_mm512_maskz_cvtepi64_epi32:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vpmovqd %zmm0, %ymm0 {%k1} {z}
 ; X86-NEXT:    retl
@@ -2172,7 +2172,7 @@ entry:
 define <2 x i64> @test_mm512_mask_cvtepi64_epi16(<2 x i64> %__O, i8 zeroext %__M, <8 x i64> %__A) {
 ; X86-LABEL: test_mm512_mask_cvtepi64_epi16:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vpmovqw %zmm1, %xmm0 {%k1}
 ; X86-NEXT:    vzeroupper
@@ -2194,7 +2194,7 @@ entry:
 define <2 x i64> @test_mm512_maskz_cvtepi64_epi16(i8 zeroext %__M, <8 x i64> %__A) {
 ; X86-LABEL: test_mm512_maskz_cvtepi64_epi16:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vpmovqw %zmm0, %xmm0 {%k1} {z}
 ; X86-NEXT:    vzeroupper
@@ -2294,7 +2294,7 @@ declare <8 x i64> @llvm.x86.avx512.pternlog.q.512(<8 x i64>, <8 x i64>, <8 x i64
 define <8 x i64> @test_mm512_mask_ternarylogic_epi64(<8 x i64> %__A, i8 zeroext %__U, <8 x i64> %__B, <8 x i64> %__C) {
 ; X86-LABEL: test_mm512_mask_ternarylogic_epi64:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vpternlogq $4, %zmm2, %zmm1, %zmm0 {%k1}
 ; X86-NEXT:    retl
@@ -2314,7 +2314,7 @@ entry:
 define <8 x i64> @test_mm512_maskz_ternarylogic_epi64(i8 zeroext %__U, <8 x i64> %__A, <8 x i64> %__B, <8 x i64> %__C) {
 ; X86-LABEL: test_mm512_maskz_ternarylogic_epi64:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vpternlogq $4, %zmm2, %zmm1, %zmm0 {%k1} {z}
 ; X86-NEXT:    retl
@@ -2364,7 +2364,7 @@ declare <8 x double> @llvm.x86.avx512.vpermi2var.pd.512(<8 x double>, <8 x i64>,
 define <8 x double> @test_mm512_mask2_permutex2var_pd(<8 x double> %__A, <8 x i64> %__I, i8 zeroext %__U, <8 x double> %__B) {
 ; X86-LABEL: test_mm512_mask2_permutex2var_pd:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vpermi2pd %zmm2, %zmm0, %zmm1 {%k1}
 ; X86-NEXT:    vmovapd %zmm1, %zmm0
@@ -2415,7 +2415,7 @@ declare <8 x i64> @llvm.x86.avx512.vpermi2var.q.512(<8 x i64>, <8 x i64>, <8 x i
 define <8 x i64> @test_mm512_mask2_permutex2var_epi64(<8 x i64> %__A, <8 x i64> %__I, i8 zeroext %__U, <8 x i64> %__B) {
 ; X86-LABEL: test_mm512_mask2_permutex2var_epi64:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vpermi2q %zmm2, %zmm0, %zmm1 {%k1}
 ; X86-NEXT:    vmovdqa64 %zmm1, %zmm0
@@ -2509,7 +2509,7 @@ entry:
 define <8 x double> @test_mm512_mask_permutex2var_pd(<8 x double> %__A, i8 zeroext %__U, <8 x i64> %__I, <8 x double> %__B) {
 ; X86-LABEL: test_mm512_mask_permutex2var_pd:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vpermt2pd %zmm2, %zmm1, %zmm0 {%k1}
 ; X86-NEXT:    retl
@@ -2529,7 +2529,7 @@ entry:
 define <8 x double> @test_mm512_maskz_permutex2var_pd(i8 zeroext %__U, <8 x double> %__A, <8 x i64> %__I, <8 x double> %__B) {
 ; X86-LABEL: test_mm512_maskz_permutex2var_pd:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vpermt2pd %zmm2, %zmm1, %zmm0 {%k1} {z}
 ; X86-NEXT:    retl
@@ -2612,7 +2612,7 @@ entry:
 define <8 x i64> @test_mm512_mask_permutex2var_epi64(<8 x i64> %__A, i8 zeroext %__U, <8 x i64> %__I, <8 x i64> %__B) {
 ; X86-LABEL: test_mm512_mask_permutex2var_epi64:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vpermt2q %zmm2, %zmm1, %zmm0 {%k1}
 ; X86-NEXT:    retl
@@ -2632,7 +2632,7 @@ entry:
 define <8 x i64> @test_mm512_maskz_permutex2var_epi64(i8 zeroext %__U, <8 x i64> %__A, <8 x i64> %__I, <8 x i64> %__B) {
 ; X86-LABEL: test_mm512_maskz_permutex2var_epi64:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vpermt2q %zmm2, %zmm1, %zmm0 {%k1} {z}
 ; X86-NEXT:    retl
@@ -2651,7 +2651,7 @@ entry:
 define <4 x float> @test_mm_mask_add_ss(<4 x float> %__W, i8 zeroext %__U, <4 x float> %__A, <4 x float> %__B) {
 ; X86-LABEL: test_mm_mask_add_ss:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vaddss %xmm2, %xmm1, %xmm0 {%k1}
 ; X86-NEXT:    retl
@@ -2676,7 +2676,7 @@ entry:
 define <4 x float> @test_mm_maskz_add_ss(i8 zeroext %__U, <4 x float> %__A, <4 x float> %__B) {
 ; X86-LABEL: test_mm_maskz_add_ss:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vaddss %xmm1, %xmm0, %xmm0 {%k1} {z}
 ; X86-NEXT:    retl
@@ -2700,7 +2700,7 @@ entry:
 define <2 x double> @test_mm_mask_add_sd(<2 x double> %__W, i8 zeroext %__U, <2 x double> %__A, <2 x double> %__B) {
 ; X86-LABEL: test_mm_mask_add_sd:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vaddsd %xmm2, %xmm1, %xmm0 {%k1}
 ; X86-NEXT:    retl
@@ -2725,7 +2725,7 @@ entry:
 define <2 x double> @test_mm_maskz_add_sd(i8 zeroext %__U, <2 x double> %__A, <2 x double> %__B) {
 ; X86-LABEL: test_mm_maskz_add_sd:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vaddsd %xmm1, %xmm0, %xmm0 {%k1} {z}
 ; X86-NEXT:    retl
@@ -2749,7 +2749,7 @@ entry:
 define <4 x float> @test_mm_mask_sub_ss(<4 x float> %__W, i8 zeroext %__U, <4 x float> %__A, <4 x float> %__B) {
 ; X86-LABEL: test_mm_mask_sub_ss:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vsubss %xmm2, %xmm1, %xmm0 {%k1}
 ; X86-NEXT:    retl
@@ -2774,7 +2774,7 @@ entry:
 define <4 x float> @test_mm_maskz_sub_ss(i8 zeroext %__U, <4 x float> %__A, <4 x float> %__B) {
 ; X86-LABEL: test_mm_maskz_sub_ss:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vsubss %xmm1, %xmm0, %xmm0 {%k1} {z}
 ; X86-NEXT:    retl
@@ -2798,7 +2798,7 @@ entry:
 define <2 x double> @test_mm_mask_sub_sd(<2 x double> %__W, i8 zeroext %__U, <2 x double> %__A, <2 x double> %__B) {
 ; X86-LABEL: test_mm_mask_sub_sd:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vsubsd %xmm2, %xmm1, %xmm0 {%k1}
 ; X86-NEXT:    retl
@@ -2823,7 +2823,7 @@ entry:
 define <2 x double> @test_mm_maskz_sub_sd(i8 zeroext %__U, <2 x double> %__A, <2 x double> %__B) {
 ; X86-LABEL: test_mm_maskz_sub_sd:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vsubsd %xmm1, %xmm0, %xmm0 {%k1} {z}
 ; X86-NEXT:    retl
@@ -2847,7 +2847,7 @@ entry:
 define <4 x float> @test_mm_mask_mul_ss(<4 x float> %__W, i8 zeroext %__U, <4 x float> %__A, <4 x float> %__B) {
 ; X86-LABEL: test_mm_mask_mul_ss:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vmulss %xmm2, %xmm1, %xmm0 {%k1}
 ; X86-NEXT:    retl
@@ -2872,7 +2872,7 @@ entry:
 define <4 x float> @test_mm_maskz_mul_ss(i8 zeroext %__U, <4 x float> %__A, <4 x float> %__B) {
 ; X86-LABEL: test_mm_maskz_mul_ss:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vmulss %xmm1, %xmm0, %xmm0 {%k1} {z}
 ; X86-NEXT:    retl
@@ -2896,7 +2896,7 @@ entry:
 define <2 x double> @test_mm_mask_mul_sd(<2 x double> %__W, i8 zeroext %__U, <2 x double> %__A, <2 x double> %__B) {
 ; X86-LABEL: test_mm_mask_mul_sd:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vmulsd %xmm2, %xmm1, %xmm0 {%k1}
 ; X86-NEXT:    retl
@@ -2921,7 +2921,7 @@ entry:
 define <2 x double> @test_mm_maskz_mul_sd(i8 zeroext %__U, <2 x double> %__A, <2 x double> %__B) {
 ; X86-LABEL: test_mm_maskz_mul_sd:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vmulsd %xmm1, %xmm0, %xmm0 {%k1} {z}
 ; X86-NEXT:    retl
@@ -2945,7 +2945,7 @@ entry:
 define <4 x float> @test_mm_mask_div_ss(<4 x float> %__W, i8 zeroext %__U, <4 x float> %__A, <4 x float> %__B) {
 ; X86-LABEL: test_mm_mask_div_ss:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vdivss %xmm2, %xmm1, %xmm0 {%k1}
 ; X86-NEXT:    retl
@@ -2970,7 +2970,7 @@ entry:
 define <4 x float> @test_mm_maskz_div_ss(i8 zeroext %__U, <4 x float> %__A, <4 x float> %__B) {
 ; X86-LABEL: test_mm_maskz_div_ss:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vdivss %xmm1, %xmm0, %xmm0 {%k1} {z}
 ; X86-NEXT:    retl
@@ -2994,7 +2994,7 @@ entry:
 define <2 x double> @test_mm_mask_div_sd(<2 x double> %__W, i8 zeroext %__U, <2 x double> %__A, <2 x double> %__B) {
 ; X86-LABEL: test_mm_mask_div_sd:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vdivsd %xmm2, %xmm1, %xmm0 {%k1}
 ; X86-NEXT:    retl
@@ -3019,7 +3019,7 @@ entry:
 define <2 x double> @test_mm_maskz_div_sd(i8 zeroext %__U, <2 x double> %__A, <2 x double> %__B) {
 ; X86-LABEL: test_mm_maskz_div_sd:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vdivsd %xmm1, %xmm0, %xmm0 {%k1} {z}
 ; X86-NEXT:    retl
@@ -3056,7 +3056,7 @@ declare <8 x double> @llvm.x86.avx512.vfmadd.pd.512(<8 x double>, <8 x double>,
 define <8 x double> @test_mm512_mask_fmadd_round_pd(<8 x double> %__A, i8 zeroext %__U, <8 x double> %__B, <8 x double> %__C) {
 ; X86-LABEL: test_mm512_mask_fmadd_round_pd:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vfmadd132pd {rn-sae}, %zmm1, %zmm2, %zmm0 {%k1}
 ; X86-NEXT:    retl
@@ -3076,7 +3076,7 @@ entry:
 define <8 x double> @test_mm512_mask3_fmadd_round_pd(<8 x double> %__A, <8 x double> %__B, <8 x double> %__C, i8 zeroext %__U) {
 ; X86-LABEL: test_mm512_mask3_fmadd_round_pd:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vfmadd231pd {rn-sae}, %zmm1, %zmm0, %zmm2 {%k1}
 ; X86-NEXT:    vmovapd %zmm2, %zmm0
@@ -3098,7 +3098,7 @@ entry:
 define <8 x double> @test_mm512_maskz_fmadd_round_pd(i8 zeroext %__U, <8 x double> %__A, <8 x double> %__B, <8 x double> %__C) {
 ; X86-LABEL: test_mm512_maskz_fmadd_round_pd:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vfmadd213pd {rn-sae}, %zmm2, %zmm1, %zmm0 {%k1} {z}
 ; X86-NEXT:    retl
@@ -3136,7 +3136,7 @@ entry:
 define <8 x double> @test_mm512_mask_fmsub_round_pd(<8 x double> %__A, i8 zeroext %__U, <8 x double> %__B, <8 x double> %__C) {
 ; X86-LABEL: test_mm512_mask_fmsub_round_pd:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vfmsub132pd {rn-sae}, %zmm1, %zmm2, %zmm0 {%k1}
 ; X86-NEXT:    retl
@@ -3157,7 +3157,7 @@ entry:
 define <8 x double> @test_mm512_maskz_fmsub_round_pd(i8 zeroext %__U, <8 x double> %__A, <8 x double> %__B, <8 x double> %__C) {
 ; X86-LABEL: test_mm512_maskz_fmsub_round_pd:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vfmsub213pd {rn-sae}, %zmm2, %zmm1, %zmm0 {%k1} {z}
 ; X86-NEXT:    retl
@@ -3196,7 +3196,7 @@ entry:
 define <8 x double> @test_mm512_mask3_fnmadd_round_pd(<8 x double> %__A, <8 x double> %__B, <8 x double> %__C, i8 zeroext %__U) {
 ; X86-LABEL: test_mm512_mask3_fnmadd_round_pd:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vfnmadd231pd {rn-sae}, %zmm1, %zmm0, %zmm2 {%k1}
 ; X86-NEXT:    vmovapd %zmm2, %zmm0
@@ -3219,7 +3219,7 @@ entry:
 define <8 x double> @test_mm512_maskz_fnmadd_round_pd(i8 zeroext %__U, <8 x double> %__A, <8 x double> %__B, <8 x double> %__C) {
 ; X86-LABEL: test_mm512_maskz_fnmadd_round_pd:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vfnmadd213pd {rn-sae}, %zmm2, %zmm1, %zmm0 {%k1} {z}
 ; X86-NEXT:    retl
@@ -3255,7 +3255,7 @@ entry:
 define <8 x double> @test_mm512_maskz_fnmsub_round_pd(i8 zeroext %__U, <8 x double> %__A, <8 x double> %__B, <8 x double> %__C) {
 ; X86-LABEL: test_mm512_maskz_fnmsub_round_pd:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vfnmsub213pd {rn-sae}, %zmm2, %zmm1, %zmm0 {%k1} {z}
 ; X86-NEXT:    retl
@@ -3287,7 +3287,7 @@ entry:
 define <8 x double> @test_mm512_mask_fmadd_pd(<8 x double> %__A, i8 zeroext %__U, <8 x double> %__B, <8 x double> %__C) {
 ; X86-LABEL: test_mm512_mask_fmadd_pd:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vfmadd132pd {{.*#+}} zmm0 {%k1} = (zmm0 * zmm1) + zmm2
 ; X86-NEXT:    retl
@@ -3307,7 +3307,7 @@ entry:
 define <8 x double> @test_mm512_mask3_fmadd_pd(<8 x double> %__A, <8 x double> %__B, <8 x double> %__C, i8 zeroext %__U) {
 ; X86-LABEL: test_mm512_mask3_fmadd_pd:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vfmadd231pd {{.*#+}} zmm2 {%k1} = (zmm0 * zmm1) + zmm2
 ; X86-NEXT:    vmovapd %zmm2, %zmm0
@@ -3329,7 +3329,7 @@ entry:
 define <8 x double> @test_mm512_maskz_fmadd_pd(i8 zeroext %__U, <8 x double> %__A, <8 x double> %__B, <8 x double> %__C) {
 ; X86-LABEL: test_mm512_maskz_fmadd_pd:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vfmadd213pd {{.*#+}} zmm0 {%k1} {z} = (zmm1 * zmm0) + zmm2
 ; X86-NEXT:    retl
@@ -3367,7 +3367,7 @@ entry:
 define <8 x double> @test_mm512_mask_fmsub_pd(<8 x double> %__A, i8 zeroext %__U, <8 x double> %__B, <8 x double> %__C) {
 ; X86-LABEL: test_mm512_mask_fmsub_pd:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vfmsub132pd {{.*#+}} zmm0 {%k1} = (zmm0 * zmm1) - zmm2
 ; X86-NEXT:    retl
@@ -3388,7 +3388,7 @@ entry:
 define <8 x double> @test_mm512_maskz_fmsub_pd(i8 zeroext %__U, <8 x double> %__A, <8 x double> %__B, <8 x double> %__C) {
 ; X86-LABEL: test_mm512_maskz_fmsub_pd:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vfmsub213pd {{.*#+}} zmm0 {%k1} {z} = (zmm1 * zmm0) - zmm2
 ; X86-NEXT:    retl
@@ -3427,7 +3427,7 @@ entry:
 define <8 x double> @test_mm512_mask3_fnmadd_pd(<8 x double> %__A, <8 x double> %__B, <8 x double> %__C, i8 zeroext %__U) {
 ; X86-LABEL: test_mm512_mask3_fnmadd_pd:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vfnmadd231pd {{.*#+}} zmm2 {%k1} = -(zmm0 * zmm1) + zmm2
 ; X86-NEXT:    vmovapd %zmm2, %zmm0
@@ -3450,7 +3450,7 @@ entry:
 define <8 x double> @test_mm512_maskz_fnmadd_pd(i8 zeroext %__U, <8 x double> %__A, <8 x double> %__B, <8 x double> %__C) {
 ; X86-LABEL: test_mm512_maskz_fnmadd_pd:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vfnmadd213pd {{.*#+}} zmm0 {%k1} {z} = -(zmm1 * zmm0) + zmm2
 ; X86-NEXT:    retl
@@ -3486,7 +3486,7 @@ entry:
 define <8 x double> @test_mm512_maskz_fnmsub_pd(i8 zeroext %__U, <8 x double> %__A, <8 x double> %__B, <8 x double> %__C) {
 ; X86-LABEL: test_mm512_maskz_fnmsub_pd:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vfnmsub213pd {{.*#+}} zmm0 {%k1} {z} = -(zmm1 * zmm0) - zmm2
 ; X86-NEXT:    retl
@@ -3984,7 +3984,7 @@ declare <8 x double> @llvm.x86.avx512.vfmaddsub.pd.512(<8 x double>, <8 x double
 define <8 x double> @test_mm512_mask_fmaddsub_round_pd(<8 x double> %__A, i8 zeroext %__U, <8 x double> %__B, <8 x double> %__C) {
 ; X86-LABEL: test_mm512_mask_fmaddsub_round_pd:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vfmaddsub132pd {rn-sae}, %zmm1, %zmm2, %zmm0 {%k1}
 ; X86-NEXT:    retl
@@ -4004,7 +4004,7 @@ entry:
 define <8 x double> @test_mm512_mask3_fmaddsub_round_pd(<8 x double> %__A, <8 x double> %__B, <8 x double> %__C, i8 zeroext %__U) {
 ; X86-LABEL: test_mm512_mask3_fmaddsub_round_pd:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vfmaddsub231pd {rn-sae}, %zmm1, %zmm0, %zmm2 {%k1}
 ; X86-NEXT:    vmovapd %zmm2, %zmm0
@@ -4026,7 +4026,7 @@ entry:
 define <8 x double> @test_mm512_maskz_fmaddsub_round_pd(i8 zeroext %__U, <8 x double> %__A, <8 x double> %__B, <8 x double> %__C) {
 ; X86-LABEL: test_mm512_maskz_fmaddsub_round_pd:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vfmaddsub213pd {rn-sae}, %zmm2, %zmm1, %zmm0 {%k1} {z}
 ; X86-NEXT:    retl
@@ -4064,7 +4064,7 @@ entry:
 define <8 x double> @test_mm512_mask_fmsubadd_round_pd(<8 x double> %__A, i8 zeroext %__U, <8 x double> %__B, <8 x double> %__C) {
 ; X86-LABEL: test_mm512_mask_fmsubadd_round_pd:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vfmsubadd132pd {rn-sae}, %zmm1, %zmm2, %zmm0 {%k1}
 ; X86-NEXT:    retl
@@ -4085,7 +4085,7 @@ entry:
 define <8 x double> @test_mm512_maskz_fmsubadd_round_pd(i8 zeroext %__U, <8 x double> %__A, <8 x double> %__B, <8 x double> %__C) {
 ; X86-LABEL: test_mm512_maskz_fmsubadd_round_pd:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vfmsubadd213pd {rn-sae}, %zmm2, %zmm1, %zmm0 {%k1} {z}
 ; X86-NEXT:    retl
@@ -4119,7 +4119,7 @@ entry:
 define <8 x double> @test_mm512_mask_fmaddsub_pd(<8 x double> %__A, i8 zeroext %__U, <8 x double> %__B, <8 x double> %__C) {
 ; X86-LABEL: test_mm512_mask_fmaddsub_pd:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vfmaddsub132pd {{.*#+}} zmm0 {%k1} = (zmm0 * zmm1) +/- zmm2
 ; X86-NEXT:    retl
@@ -4142,7 +4142,7 @@ entry:
 define <8 x double> @test_mm512_mask3_fmaddsub_pd(<8 x double> %__A, <8 x double> %__B, <8 x double> %__C, i8 zeroext %__U) {
 ; X86-LABEL: test_mm512_mask3_fmaddsub_pd:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vfmaddsub231pd {{.*#+}} zmm2 {%k1} = (zmm0 * zmm1) +/- zmm2
 ; X86-NEXT:    vmovapd %zmm2, %zmm0
@@ -4167,7 +4167,7 @@ entry:
 define <8 x double> @test_mm512_maskz_fmaddsub_pd(i8 zeroext %__U, <8 x double> %__A, <8 x double> %__B, <8 x double> %__C) {
 ; X86-LABEL: test_mm512_maskz_fmaddsub_pd:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vfmaddsub213pd {{.*#+}} zmm0 {%k1} {z} = (zmm1 * zmm0) +/- zmm2
 ; X86-NEXT:    retl
@@ -4203,7 +4203,7 @@ entry:
 define <8 x double> @test_mm512_mask_fmsubadd_pd(<8 x double> %__A, i8 zeroext %__U, <8 x double> %__B, <8 x double> %__C) {
 ; X86-LABEL: test_mm512_mask_fmsubadd_pd:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vfmsubadd132pd {{.*#+}} zmm0 {%k1} = (zmm0 * zmm1) -/+ zmm2
 ; X86-NEXT:    retl
@@ -4226,7 +4226,7 @@ entry:
 define <8 x double> @test_mm512_maskz_fmsubadd_pd(i8 zeroext %__U, <8 x double> %__A, <8 x double> %__B, <8 x double> %__C) {
 ; X86-LABEL: test_mm512_maskz_fmsubadd_pd:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vfmsubadd213pd {{.*#+}} zmm0 {%k1} {z} = (zmm1 * zmm0) -/+ zmm2
 ; X86-NEXT:    retl
@@ -4526,7 +4526,7 @@ entry:
 define <8 x double> @test_mm512_mask3_fmsub_round_pd(<8 x double> %__A, <8 x double> %__B, <8 x double> %__C, i8 zeroext %__U) {
 ; X86-LABEL: test_mm512_mask3_fmsub_round_pd:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vfmsub231pd {rn-sae}, %zmm1, %zmm0, %zmm2 {%k1}
 ; X86-NEXT:    vmovapd %zmm2, %zmm0
@@ -4549,7 +4549,7 @@ entry:
 define <8 x double> @test_mm512_mask3_fmsub_pd(<8 x double> %__A, <8 x double> %__B, <8 x double> %__C, i8 zeroext %__U) {
 ; X86-LABEL: test_mm512_mask3_fmsub_pd:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vfmsub231pd {{.*#+}} zmm2 {%k1} = (zmm0 * zmm1) - zmm2
 ; X86-NEXT:    vmovapd %zmm2, %zmm0
@@ -4618,7 +4618,7 @@ entry:
 define <8 x double> @test_mm512_mask3_fmsubadd_round_pd(<8 x double> %__A, <8 x double> %__B, <8 x double> %__C, i8 zeroext %__U) {
 ; X86-LABEL: test_mm512_mask3_fmsubadd_round_pd:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vfmsubadd231pd {rn-sae}, %zmm1, %zmm0, %zmm2 {%k1}
 ; X86-NEXT:    vmovapd %zmm2, %zmm0
@@ -4641,7 +4641,7 @@ entry:
 define <8 x double> @test_mm512_mask3_fmsubadd_pd(<8 x double> %__A, <8 x double> %__B, <8 x double> %__C, i8 zeroext %__U) {
 ; X86-LABEL: test_mm512_mask3_fmsubadd_pd:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vfmsubadd231pd {{.*#+}} zmm2 {%k1} = (zmm0 * zmm1) -/+ zmm2
 ; X86-NEXT:    vmovapd %zmm2, %zmm0
@@ -4714,7 +4714,7 @@ entry:
 define <8 x double> @test_mm512_mask_fnmadd_round_pd(<8 x double> %__A, i8 zeroext %__U, <8 x double> %__B, <8 x double> %__C) {
 ; X86-LABEL: test_mm512_mask_fnmadd_round_pd:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vfnmadd132pd {rn-sae}, %zmm1, %zmm2, %zmm0 {%k1}
 ; X86-NEXT:    retl
@@ -4735,7 +4735,7 @@ entry:
 define <8 x double> @test_mm512_mask_fnmadd_pd(<8 x double> %__A, i8 zeroext %__U, <8 x double> %__B, <8 x double> %__C) {
 ; X86-LABEL: test_mm512_mask_fnmadd_pd:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vfnmadd132pd {{.*#+}} zmm0 {%k1} = -(zmm0 * zmm1) + zmm2
 ; X86-NEXT:    retl
@@ -4798,7 +4798,7 @@ entry:
 define <8 x double> @test_mm512_mask_fnmsub_round_pd(<8 x double> %__A, i8 zeroext %__U, <8 x double> %__B, <8 x double> %__C) {
 ; X86-LABEL: test_mm512_mask_fnmsub_round_pd:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vfnmsub132pd {rn-sae}, %zmm1, %zmm2, %zmm0 {%k1}
 ; X86-NEXT:    retl
@@ -4820,7 +4820,7 @@ entry:
 define <8 x double> @test_mm512_mask3_fnmsub_round_pd(<8 x double> %__A, <8 x double> %__B, <8 x double> %__C, i8 zeroext %__U) {
 ; X86-LABEL: test_mm512_mask3_fnmsub_round_pd:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vfnmsub231pd {rn-sae}, %zmm1, %zmm0, %zmm2 {%k1}
 ; X86-NEXT:    vmovapd %zmm2, %zmm0
@@ -4844,7 +4844,7 @@ entry:
 define <8 x double> @test_mm512_mask_fnmsub_pd(<8 x double> %__A, i8 zeroext %__U, <8 x double> %__B, <8 x double> %__C) {
 ; X86-LABEL: test_mm512_mask_fnmsub_pd:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vfnmsub132pd {{.*#+}} zmm0 {%k1} = -(zmm0 * zmm1) - zmm2
 ; X86-NEXT:    retl
@@ -4866,7 +4866,7 @@ entry:
 define <8 x double> @test_mm512_mask3_fnmsub_pd(<8 x double> %__A, <8 x double> %__B, <8 x double> %__C, i8 zeroext %__U) {
 ; X86-LABEL: test_mm512_mask3_fnmsub_pd:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vfnmsub231pd {{.*#+}} zmm2 {%k1} = -(zmm0 * zmm1) - zmm2
 ; X86-NEXT:    vmovapd %zmm2, %zmm0
@@ -4982,7 +4982,7 @@ entry:
 define <4 x float> @test_mm_mask_fmadd_ss(<4 x float> %__W, i8 zeroext %__U, <4 x float> %__A, <4 x float> %__B) {
 ; X86-LABEL: test_mm_mask_fmadd_ss:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vfmadd213ss {{.*#+}} xmm0 {%k1} = (xmm1 * xmm0) + xmm2
 ; X86-NEXT:    retl
@@ -5008,7 +5008,7 @@ entry:
 define <4 x float> @test_mm_mask_fmadd_round_ss(<4 x float> %__W, i8 zeroext %__U, <4 x float> %__A, <4 x float> %__B) {
 ; X86-LABEL: test_mm_mask_fmadd_round_ss:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vfmadd213ss {rn-sae}, %xmm2, %xmm1, %xmm0 {%k1}
 ; X86-NEXT:    retl
@@ -5035,7 +5035,7 @@ declare float @llvm.x86.avx512.vfmadd.f32(float, float, float, i32) #1
 define <4 x float> @test_mm_maskz_fmadd_ss(i8 zeroext %__U, <4 x float> %__A, <4 x float> %__B, <4 x float> %__C) {
 ; X86-LABEL: test_mm_maskz_fmadd_ss:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vfmadd213ss {{.*#+}} xmm0 {%k1} {z} = (xmm1 * xmm0) + xmm2
 ; X86-NEXT:    retl
@@ -5060,7 +5060,7 @@ entry:
 define <4 x float> @test_mm_maskz_fmadd_round_ss(i8 zeroext %__U, <4 x float> %__A, <4 x float> %__B, <4 x float> %__C) {
 ; X86-LABEL: test_mm_maskz_fmadd_round_ss:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vfmadd213ss {rn-sae}, %xmm2, %xmm1, %xmm0 {%k1} {z}
 ; X86-NEXT:    retl
@@ -5085,7 +5085,7 @@ entry:
 define <4 x float> @test_mm_mask3_fmadd_ss(<4 x float> %__W, <4 x float> %__X, <4 x float> %__Y, i8 zeroext %__U) {
 ; X86-LABEL: test_mm_mask3_fmadd_ss:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vfmadd231ss {{.*#+}} xmm2 {%k1} = (xmm0 * xmm1) + xmm2
 ; X86-NEXT:    vmovaps %xmm2, %xmm0
@@ -5113,7 +5113,7 @@ entry:
 define <4 x float> @test_mm_mask3_fmadd_round_ss(<4 x float> %__W, <4 x float> %__X, <4 x float> %__Y, i8 zeroext %__U) {
 ; X86-LABEL: test_mm_mask3_fmadd_round_ss:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vfmadd231ss {rn-sae}, %xmm1, %xmm0, %xmm2 {%k1}
 ; X86-NEXT:    vmovaps %xmm2, %xmm0
@@ -5140,7 +5140,7 @@ entry:
 define <4 x float> @test_mm_mask_fmsub_ss(<4 x float> %__W, i8 zeroext %__U, <4 x float> %__A, <4 x float> %__B) {
 ; X86-LABEL: test_mm_mask_fmsub_ss:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vfmsub213ss {{.*#+}} xmm0 {%k1} = (xmm1 * xmm0) - xmm2
 ; X86-NEXT:    retl
@@ -5167,7 +5167,7 @@ entry:
 define <4 x float> @test_mm_mask_fmsub_round_ss(<4 x float> %__W, i8 zeroext %__U, <4 x float> %__A, <4 x float> %__B) {
 ; X86-LABEL: test_mm_mask_fmsub_round_ss:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vfmsub213ss {rn-sae}, %xmm2, %xmm1, %xmm0 {%k1}
 ; X86-NEXT:    retl
@@ -5193,7 +5193,7 @@ entry:
 define <4 x float> @test_mm_maskz_fmsub_ss(i8 zeroext %__U, <4 x float> %__A, <4 x float> %__B, <4 x float> %__C) {
 ; X86-LABEL: test_mm_maskz_fmsub_ss:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vfmsub213ss {{.*#+}} xmm0 {%k1} {z} = (xmm1 * xmm0) - xmm2
 ; X86-NEXT:    retl
@@ -5219,7 +5219,7 @@ entry:
 define <4 x float> @test_mm_maskz_fmsub_round_ss(i8 zeroext %__U, <4 x float> %__A, <4 x float> %__B, <4 x float> %__C) {
 ; X86-LABEL: test_mm_maskz_fmsub_round_ss:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vfmsub213ss {rn-sae}, %xmm2, %xmm1, %xmm0 {%k1} {z}
 ; X86-NEXT:    retl
@@ -5245,7 +5245,7 @@ entry:
 define <4 x float> @test_mm_mask3_fmsub_ss(<4 x float> %__W, <4 x float> %__X, <4 x float> %__Y, i8 zeroext %__U) {
 ; X86-LABEL: test_mm_mask3_fmsub_ss:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vfmsub231ss {{.*#+}} xmm2 {%k1} = (xmm0 * xmm1) - xmm2
 ; X86-NEXT:    vmovaps %xmm2, %xmm0
@@ -5274,7 +5274,7 @@ entry:
 define <4 x float> @test_mm_mask3_fmsub_round_ss(<4 x float> %__W, <4 x float> %__X, <4 x float> %__Y, i8 zeroext %__U) {
 ; X86-LABEL: test_mm_mask3_fmsub_round_ss:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vfmsub231ss {rn-sae}, %xmm1, %xmm0, %xmm2 {%k1}
 ; X86-NEXT:    vmovaps %xmm2, %xmm0
@@ -5302,7 +5302,7 @@ entry:
 define <4 x float> @test_mm_mask_fnmadd_ss(<4 x float> %__W, i8 zeroext %__U, <4 x float> %__A, <4 x float> %__B) {
 ; X86-LABEL: test_mm_mask_fnmadd_ss:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vfnmadd213ss {{.*#+}} xmm0 {%k1} = -(xmm1 * xmm0) + xmm2
 ; X86-NEXT:    retl
@@ -5329,7 +5329,7 @@ entry:
 define <4 x float> @test_mm_mask_fnmadd_round_ss(<4 x float> %__W, i8 zeroext %__U, <4 x float> %__A, <4 x float> %__B) {
 ; X86-LABEL: test_mm_mask_fnmadd_round_ss:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vfnmadd213ss {rn-sae}, %xmm2, %xmm1, %xmm0 {%k1}
 ; X86-NEXT:    retl
@@ -5355,7 +5355,7 @@ entry:
 define <4 x float> @test_mm_maskz_fnmadd_ss(i8 zeroext %__U, <4 x float> %__A, <4 x float> %__B, <4 x float> %__C) {
 ; X86-LABEL: test_mm_maskz_fnmadd_ss:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vfnmadd213ss {{.*#+}} xmm0 {%k1} {z} = -(xmm1 * xmm0) + xmm2
 ; X86-NEXT:    retl
@@ -5381,7 +5381,7 @@ entry:
 define <4 x float> @test_mm_maskz_fnmadd_round_ss(i8 zeroext %__U, <4 x float> %__A, <4 x float> %__B, <4 x float> %__C) {
 ; X86-LABEL: test_mm_maskz_fnmadd_round_ss:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vfnmadd213ss {rn-sae}, %xmm2, %xmm1, %xmm0 {%k1} {z}
 ; X86-NEXT:    retl
@@ -5407,7 +5407,7 @@ entry:
 define <4 x float> @test_mm_mask3_fnmadd_ss(<4 x float> %__W, <4 x float> %__X, <4 x float> %__Y, i8 zeroext %__U) {
 ; X86-LABEL: test_mm_mask3_fnmadd_ss:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vfnmadd231ss {{.*#+}} xmm2 {%k1} = -(xmm0 * xmm1) + xmm2
 ; X86-NEXT:    vmovaps %xmm2, %xmm0
@@ -5436,7 +5436,7 @@ entry:
 define <4 x float> @test_mm_mask3_fnmadd_round_ss(<4 x float> %__W, <4 x float> %__X, <4 x float> %__Y, i8 zeroext %__U) {
 ; X86-LABEL: test_mm_mask3_fnmadd_round_ss:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vfnmadd231ss {rn-sae}, %xmm1, %xmm0, %xmm2 {%k1}
 ; X86-NEXT:    vmovaps %xmm2, %xmm0
@@ -5464,7 +5464,7 @@ entry:
 define <4 x float> @test_mm_mask_fnmsub_ss(<4 x float> %__W, i8 zeroext %__U, <4 x float> %__A, <4 x float> %__B) {
 ; X86-LABEL: test_mm_mask_fnmsub_ss:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vfnmsub213ss {{.*#+}} xmm0 {%k1} = -(xmm1 * xmm0) - xmm2
 ; X86-NEXT:    retl
@@ -5492,7 +5492,7 @@ entry:
 define <4 x float> @test_mm_mask_fnmsub_round_ss(<4 x float> %__W, i8 zeroext %__U, <4 x float> %__A, <4 x float> %__B) {
 ; X86-LABEL: test_mm_mask_fnmsub_round_ss:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vfnmsub213ss {rn-sae}, %xmm2, %xmm1, %xmm0 {%k1}
 ; X86-NEXT:    retl
@@ -5519,7 +5519,7 @@ entry:
 define <4 x float> @test_mm_maskz_fnmsub_ss(i8 zeroext %__U, <4 x float> %__A, <4 x float> %__B, <4 x float> %__C) {
 ; X86-LABEL: test_mm_maskz_fnmsub_ss:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vfnmsub213ss {{.*#+}} xmm0 {%k1} {z} = -(xmm1 * xmm0) - xmm2
 ; X86-NEXT:    retl
@@ -5546,7 +5546,7 @@ entry:
 define <4 x float> @test_mm_maskz_fnmsub_round_ss(i8 zeroext %__U, <4 x float> %__A, <4 x float> %__B, <4 x float> %__C) {
 ; X86-LABEL: test_mm_maskz_fnmsub_round_ss:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vfnmsub213ss {rn-sae}, %xmm2, %xmm1, %xmm0 {%k1} {z}
 ; X86-NEXT:    retl
@@ -5573,7 +5573,7 @@ entry:
 define <4 x float> @test_mm_mask3_fnmsub_ss(<4 x float> %__W, <4 x float> %__X, <4 x float> %__Y, i8 zeroext %__U) {
 ; X86-LABEL: test_mm_mask3_fnmsub_ss:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vfnmsub231ss {{.*#+}} xmm2 {%k1} = -(xmm0 * xmm1) - xmm2
 ; X86-NEXT:    vmovaps %xmm2, %xmm0
@@ -5603,7 +5603,7 @@ entry:
 define <4 x float> @test_mm_mask3_fnmsub_round_ss(<4 x float> %__W, <4 x float> %__X, <4 x float> %__Y, i8 zeroext %__U) {
 ; X86-LABEL: test_mm_mask3_fnmsub_round_ss:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vfnmsub231ss {rn-sae}, %xmm1, %xmm0, %xmm2 {%k1}
 ; X86-NEXT:    vmovaps %xmm2, %xmm0
@@ -5632,7 +5632,7 @@ entry:
 define <2 x double> @test_mm_mask_fmadd_sd(<2 x double> %__W, i8 zeroext %__U, <2 x double> %__A, <2 x double> %__B) {
 ; X86-LABEL: test_mm_mask_fmadd_sd:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vfmadd213sd {{.*#+}} xmm0 {%k1} = (xmm1 * xmm0) + xmm2
 ; X86-NEXT:    retl
@@ -5658,7 +5658,7 @@ entry:
 define <2 x double> @test_mm_mask_fmadd_round_sd(<2 x double> %__W, i8 zeroext %__U, <2 x double> %__A, <2 x double> %__B) {
 ; X86-LABEL: test_mm_mask_fmadd_round_sd:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vfmadd213sd {rn-sae}, %xmm2, %xmm1, %xmm0 {%k1}
 ; X86-NEXT:    retl
@@ -5685,7 +5685,7 @@ declare double @llvm.x86.avx512.vfmadd.f64(double, double, double, i32) #1
 define <2 x double> @test_mm_maskz_fmadd_sd(i8 zeroext %__U, <2 x double> %__A, <2 x double> %__B, <2 x double> %__C) {
 ; X86-LABEL: test_mm_maskz_fmadd_sd:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vfmadd213sd {{.*#+}} xmm0 {%k1} {z} = (xmm1 * xmm0) + xmm2
 ; X86-NEXT:    retl
@@ -5710,7 +5710,7 @@ entry:
 define <2 x double> @test_mm_maskz_fmadd_round_sd(i8 zeroext %__U, <2 x double> %__A, <2 x double> %__B, <2 x double> %__C) {
 ; X86-LABEL: test_mm_maskz_fmadd_round_sd:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vfmadd213sd {rn-sae}, %xmm2, %xmm1, %xmm0 {%k1} {z}
 ; X86-NEXT:    retl
@@ -5735,7 +5735,7 @@ entry:
 define <2 x double> @test_mm_mask3_fmadd_sd(<2 x double> %__W, <2 x double> %__X, <2 x double> %__Y, i8 zeroext %__U) {
 ; X86-LABEL: test_mm_mask3_fmadd_sd:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vfmadd231sd {{.*#+}} xmm2 {%k1} = (xmm0 * xmm1) + xmm2
 ; X86-NEXT:    vmovapd %xmm2, %xmm0
@@ -5763,7 +5763,7 @@ entry:
 define <2 x double> @test_mm_mask3_fmadd_round_sd(<2 x double> %__W, <2 x double> %__X, <2 x double> %__Y, i8 zeroext %__U) {
 ; X86-LABEL: test_mm_mask3_fmadd_round_sd:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vfmadd231sd {rn-sae}, %xmm1, %xmm0, %xmm2 {%k1}
 ; X86-NEXT:    vmovapd %xmm2, %xmm0
@@ -5790,7 +5790,7 @@ entry:
 define <2 x double> @test_mm_mask_fmsub_sd(<2 x double> %__W, i8 zeroext %__U, <2 x double> %__A, <2 x double> %__B) {
 ; X86-LABEL: test_mm_mask_fmsub_sd:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vfmsub213sd {{.*#+}} xmm0 {%k1} = (xmm1 * xmm0) - xmm2
 ; X86-NEXT:    retl
@@ -5817,7 +5817,7 @@ entry:
 define <2 x double> @test_mm_mask_fmsub_round_sd(<2 x double> %__W, i8 zeroext %__U, <2 x double> %__A, <2 x double> %__B) {
 ; X86-LABEL: test_mm_mask_fmsub_round_sd:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vfmsub213sd {rn-sae}, %xmm2, %xmm1, %xmm0 {%k1}
 ; X86-NEXT:    retl
@@ -5843,7 +5843,7 @@ entry:
 define <2 x double> @test_mm_maskz_fmsub_sd(i8 zeroext %__U, <2 x double> %__A, <2 x double> %__B, <2 x double> %__C) {
 ; X86-LABEL: test_mm_maskz_fmsub_sd:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vfmsub213sd {{.*#+}} xmm0 {%k1} {z} = (xmm1 * xmm0) - xmm2
 ; X86-NEXT:    retl
@@ -5869,7 +5869,7 @@ entry:
 define <2 x double> @test_mm_maskz_fmsub_round_sd(i8 zeroext %__U, <2 x double> %__A, <2 x double> %__B, <2 x double> %__C) {
 ; X86-LABEL: test_mm_maskz_fmsub_round_sd:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vfmsub213sd {rn-sae}, %xmm2, %xmm1, %xmm0 {%k1} {z}
 ; X86-NEXT:    retl
@@ -5895,7 +5895,7 @@ entry:
 define <2 x double> @test_mm_mask3_fmsub_sd(<2 x double> %__W, <2 x double> %__X, <2 x double> %__Y, i8 zeroext %__U) {
 ; X86-LABEL: test_mm_mask3_fmsub_sd:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vfmsub231sd {{.*#+}} xmm2 {%k1} = (xmm0 * xmm1) - xmm2
 ; X86-NEXT:    vmovapd %xmm2, %xmm0
@@ -5924,7 +5924,7 @@ entry:
 define <2 x double> @test_mm_mask3_fmsub_round_sd(<2 x double> %__W, <2 x double> %__X, <2 x double> %__Y, i8 zeroext %__U) {
 ; X86-LABEL: test_mm_mask3_fmsub_round_sd:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vfmsub231sd {rn-sae}, %xmm1, %xmm0, %xmm2 {%k1}
 ; X86-NEXT:    vmovapd %xmm2, %xmm0
@@ -5952,7 +5952,7 @@ entry:
 define <2 x double> @test_mm_mask_fnmadd_sd(<2 x double> %__W, i8 zeroext %__U, <2 x double> %__A, <2 x double> %__B) {
 ; X86-LABEL: test_mm_mask_fnmadd_sd:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vfnmadd213sd {{.*#+}} xmm0 {%k1} = -(xmm1 * xmm0) + xmm2
 ; X86-NEXT:    retl
@@ -5979,7 +5979,7 @@ entry:
 define <2 x double> @test_mm_mask_fnmadd_round_sd(<2 x double> %__W, i8 zeroext %__U, <2 x double> %__A, <2 x double> %__B) {
 ; X86-LABEL: test_mm_mask_fnmadd_round_sd:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vfnmadd213sd {rn-sae}, %xmm2, %xmm1, %xmm0 {%k1}
 ; X86-NEXT:    retl
@@ -6005,7 +6005,7 @@ entry:
 define <2 x double> @test_mm_maskz_fnmadd_sd(i8 zeroext %__U, <2 x double> %__A, <2 x double> %__B, <2 x double> %__C) {
 ; X86-LABEL: test_mm_maskz_fnmadd_sd:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vfnmadd213sd {{.*#+}} xmm0 {%k1} {z} = -(xmm1 * xmm0) + xmm2
 ; X86-NEXT:    retl
@@ -6031,7 +6031,7 @@ entry:
 define <2 x double> @test_mm_maskz_fnmadd_round_sd(i8 zeroext %__U, <2 x double> %__A, <2 x double> %__B, <2 x double> %__C) {
 ; X86-LABEL: test_mm_maskz_fnmadd_round_sd:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vfnmadd213sd {rn-sae}, %xmm2, %xmm1, %xmm0 {%k1} {z}
 ; X86-NEXT:    retl
@@ -6057,7 +6057,7 @@ entry:
 define <2 x double> @test_mm_mask3_fnmadd_sd(<2 x double> %__W, <2 x double> %__X, <2 x double> %__Y, i8 zeroext %__U) {
 ; X86-LABEL: test_mm_mask3_fnmadd_sd:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vfnmadd231sd {{.*#+}} xmm2 {%k1} = -(xmm0 * xmm1) + xmm2
 ; X86-NEXT:    vmovapd %xmm2, %xmm0
@@ -6086,7 +6086,7 @@ entry:
 define <2 x double> @test_mm_mask3_fnmadd_round_sd(<2 x double> %__W, <2 x double> %__X, <2 x double> %__Y, i8 zeroext %__U) {
 ; X86-LABEL: test_mm_mask3_fnmadd_round_sd:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vfnmadd231sd {rn-sae}, %xmm1, %xmm0, %xmm2 {%k1}
 ; X86-NEXT:    vmovapd %xmm2, %xmm0
@@ -6114,7 +6114,7 @@ entry:
 define <2 x double> @test_mm_mask_fnmsub_sd(<2 x double> %__W, i8 zeroext %__U, <2 x double> %__A, <2 x double> %__B) {
 ; X86-LABEL: test_mm_mask_fnmsub_sd:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vfnmsub213sd {{.*#+}} xmm0 {%k1} = -(xmm1 * xmm0) - xmm2
 ; X86-NEXT:    retl
@@ -6142,7 +6142,7 @@ entry:
 define <2 x double> @test_mm_mask_fnmsub_round_sd(<2 x double> %__W, i8 zeroext %__U, <2 x double> %__A, <2 x double> %__B) {
 ; X86-LABEL: test_mm_mask_fnmsub_round_sd:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vfnmsub213sd {rn-sae}, %xmm2, %xmm1, %xmm0 {%k1}
 ; X86-NEXT:    retl
@@ -6169,7 +6169,7 @@ entry:
 define <2 x double> @test_mm_maskz_fnmsub_sd(i8 zeroext %__U, <2 x double> %__A, <2 x double> %__B, <2 x double> %__C) {
 ; X86-LABEL: test_mm_maskz_fnmsub_sd:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vfnmsub213sd {{.*#+}} xmm0 {%k1} {z} = -(xmm1 * xmm0) - xmm2
 ; X86-NEXT:    retl
@@ -6196,7 +6196,7 @@ entry:
 define <2 x double> @test_mm_maskz_fnmsub_round_sd(i8 zeroext %__U, <2 x double> %__A, <2 x double> %__B, <2 x double> %__C) {
 ; X86-LABEL: test_mm_maskz_fnmsub_round_sd:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vfnmsub213sd {rn-sae}, %xmm2, %xmm1, %xmm0 {%k1} {z}
 ; X86-NEXT:    retl
@@ -6223,7 +6223,7 @@ entry:
 define <2 x double> @test_mm_mask3_fnmsub_sd(<2 x double> %__W, <2 x double> %__X, <2 x double> %__Y, i8 zeroext %__U) {
 ; X86-LABEL: test_mm_mask3_fnmsub_sd:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vfnmsub231sd {{.*#+}} xmm2 {%k1} = -(xmm0 * xmm1) - xmm2
 ; X86-NEXT:    vmovapd %xmm2, %xmm0
@@ -6253,7 +6253,7 @@ entry:
 define <2 x double> @test_mm_mask3_fnmsub_round_sd(<2 x double> %__W, <2 x double> %__X, <2 x double> %__Y, i8 zeroext %__U) {
 ; X86-LABEL: test_mm_mask3_fnmsub_round_sd:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vfnmsub231sd {rn-sae}, %xmm1, %xmm0, %xmm2 {%k1}
 ; X86-NEXT:    vmovapd %xmm2, %xmm0
@@ -6283,7 +6283,7 @@ define <8 x i64> @test_mm512_mask_expandloadu_epi64(<8 x i64> %__W, i8 zeroext %
 ; X86-LABEL: test_mm512_mask_expandloadu_epi64:
 ; X86:       # %bb.0: # %entry
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; X86-NEXT:    kmovw %ecx, %k1
 ; X86-NEXT:    vpexpandq (%eax), %zmm0 {%k1}
 ; X86-NEXT:    retl
@@ -6303,7 +6303,7 @@ define <8 x i64> @test_mm512_maskz_expandloadu_epi64(i8 zeroext %__U, ptr readon
 ; X86-LABEL: test_mm512_maskz_expandloadu_epi64:
 ; X86:       # %bb.0: # %entry
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; X86-NEXT:    kmovw %ecx, %k1
 ; X86-NEXT:    vpexpandq (%eax), %zmm0 {%k1} {z}
 ; X86-NEXT:    retl
@@ -6323,7 +6323,7 @@ define <8 x double> @test_mm512_mask_expandloadu_pd(<8 x double> %__W, i8 zeroex
 ; X86-LABEL: test_mm512_mask_expandloadu_pd:
 ; X86:       # %bb.0: # %entry
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; X86-NEXT:    kmovw %ecx, %k1
 ; X86-NEXT:    vexpandpd (%eax), %zmm0 {%k1}
 ; X86-NEXT:    retl
@@ -6343,7 +6343,7 @@ define <8 x double> @test_mm512_maskz_expandloadu_pd(i8 zeroext %__U, ptr readon
 ; X86-LABEL: test_mm512_maskz_expandloadu_pd:
 ; X86:       # %bb.0: # %entry
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; X86-NEXT:    kmovw %ecx, %k1
 ; X86-NEXT:    vexpandpd (%eax), %zmm0 {%k1} {z}
 ; X86-NEXT:    retl
@@ -6445,7 +6445,7 @@ entry:
 define void @test_mm512_mask_compressstoreu_pd(ptr %__P, i8 zeroext %__U, <8 x double> %__A) {
 ; X86-LABEL: test_mm512_mask_compressstoreu_pd:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
 ; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vcompresspd %zmm0, (%ecx) {%k1}
@@ -6467,7 +6467,7 @@ entry:
 define void @test_mm512_mask_compressstoreu_epi64(ptr %__P, i8 zeroext %__U, <8 x i64> %__A) {
 ; X86-LABEL: test_mm512_mask_compressstoreu_epi64:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
 ; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vpcompressq %zmm0, (%ecx) {%k1}
@@ -6728,7 +6728,7 @@ entry:
 define i64 @test_mm512_mask_reduce_add_epi64(i8 zeroext %__M, <8 x i64> %__W) {
 ; X86-LABEL: test_mm512_mask_reduce_add_epi64:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vmovdqa64 %zmm0, %zmm0 {%k1} {z}
 ; X86-NEXT:    vextracti64x4 $1, %zmm0, %ymm1
@@ -6773,7 +6773,7 @@ entry:
 define i64 @test_mm512_mask_reduce_mul_epi64(i8 zeroext %__M, <8 x i64> %__W) {
 ; X86-LABEL: test_mm512_mask_reduce_mul_epi64:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vmovdqa64 {{.*#+}} zmm1 = [1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0]
 ; X86-NEXT:    vmovdqa64 %zmm0, %zmm1 {%k1}
@@ -6862,7 +6862,7 @@ entry:
 define i64 @test_mm512_mask_reduce_and_epi64(i8 zeroext %__M, <8 x i64> %__W) {
 ; X86-LABEL: test_mm512_mask_reduce_and_epi64:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vpternlogd $255, %zmm1, %zmm1, %zmm1
 ; X86-NEXT:    vmovdqa64 %zmm0, %zmm1 {%k1}
@@ -6909,7 +6909,7 @@ entry:
 define i64 @test_mm512_mask_reduce_or_epi64(i8 zeroext %__M, <8 x i64> %__W) {
 ; X86-LABEL: test_mm512_mask_reduce_or_epi64:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vmovdqa64 %zmm0, %zmm0 {%k1} {z}
 ; X86-NEXT:    vextracti64x4 $1, %zmm0, %ymm1
@@ -7507,7 +7507,7 @@ define double @test_mm512_mask_reduce_add_pd(i8 zeroext %__M, <8 x double> %__W)
 ; X86-NEXT:    .cfi_def_cfa_register %ebp
 ; X86-NEXT:    andl $-8, %esp
 ; X86-NEXT:    subl $8, %esp
-; X86-NEXT:    movzbl 8(%ebp), %eax
+; X86-NEXT:    movb 8(%ebp), %al
 ; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vmovapd %zmm0, %zmm0 {%k1} {z}
 ; X86-NEXT:    vextractf64x4 $1, %zmm0, %ymm1
@@ -7561,7 +7561,7 @@ define double @test_mm512_mask_reduce_mul_pd(i8 zeroext %__M, <8 x double> %__W)
 ; X86-NEXT:    .cfi_def_cfa_register %ebp
 ; X86-NEXT:    andl $-8, %esp
 ; X86-NEXT:    subl $8, %esp
-; X86-NEXT:    movzbl 8(%ebp), %eax
+; X86-NEXT:    movb 8(%ebp), %al
 ; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vbroadcastsd {{.*#+}} zmm1 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0]
 ; X86-NEXT:    vmovapd %zmm0, %zmm1 {%k1}
@@ -7976,7 +7976,7 @@ entry:
 define i64 @test_mm512_mask_reduce_max_epi64(i8 zeroext %__M, <8 x i64> %__W) {
 ; X86-LABEL: test_mm512_mask_reduce_max_epi64:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vmovdqa64 {{.*#+}} zmm1 = [0,2147483648,0,2147483648,0,2147483648,0,2147483648,0,2147483648,0,2147483648,0,2147483648,0,2147483648]
 ; X86-NEXT:    vmovdqa64 %zmm0, %zmm1 {%k1}
@@ -8024,7 +8024,7 @@ entry:
 define i64 @test_mm512_mask_reduce_max_epu64(i8 zeroext %__M, <8 x i64> %__W) {
 ; X86-LABEL: test_mm512_mask_reduce_max_epu64:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vmovdqa64 %zmm0, %zmm0 {%k1} {z}
 ; X86-NEXT:    vshufi64x2 {{.*#+}} zmm1 = zmm0[4,5,6,7,0,1,2,3]
@@ -8077,7 +8077,7 @@ define double @test_mm512_mask_reduce_max_pd(i8 zeroext %__M, <8 x double> %__W)
 ; X86-NEXT:    .cfi_def_cfa_register %ebp
 ; X86-NEXT:    andl $-8, %esp
 ; X86-NEXT:    subl $8, %esp
-; X86-NEXT:    movzbl 8(%ebp), %eax
+; X86-NEXT:    movb 8(%ebp), %al
 ; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vbroadcastsd {{.*#+}} zmm1 = [-Inf,-Inf,-Inf,-Inf,-Inf,-Inf,-Inf,-Inf]
 ; X86-NEXT:    vmovapd %zmm0, %zmm1 {%k1}
@@ -8126,7 +8126,7 @@ entry:
 define i64 @test_mm512_mask_reduce_min_epi64(i8 zeroext %__M, <8 x i64> %__W) {
 ; X86-LABEL: test_mm512_mask_reduce_min_epi64:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vmovdqa64 {{.*#+}} zmm1 = [4294967295,2147483647,4294967295,2147483647,4294967295,2147483647,4294967295,2147483647,4294967295,2147483647,4294967295,2147483647,4294967295,2147483647,4294967295,2147483647]
 ; X86-NEXT:    vmovdqa64 %zmm0, %zmm1 {%k1}
@@ -8174,7 +8174,7 @@ entry:
 define i64 @test_mm512_mask_reduce_min_epu64(i8 zeroext %__M, <8 x i64> %__W) {
 ; X86-LABEL: test_mm512_mask_reduce_min_epu64:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vpternlogd $255, %zmm1, %zmm1, %zmm1
 ; X86-NEXT:    vmovdqa64 %zmm0, %zmm1 {%k1}
@@ -8229,7 +8229,7 @@ define double @test_mm512_mask_reduce_min_pd(i8 zeroext %__M, <8 x double> %__W)
 ; X86-NEXT:    .cfi_def_cfa_register %ebp
 ; X86-NEXT:    andl $-8, %esp
 ; X86-NEXT:    subl $8, %esp
-; X86-NEXT:    movzbl 8(%ebp), %eax
+; X86-NEXT:    movb 8(%ebp), %al
 ; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vbroadcastsd {{.*#+}} zmm1 = [+Inf,+Inf,+Inf,+Inf,+Inf,+Inf,+Inf,+Inf]
 ; X86-NEXT:    vmovapd %zmm0, %zmm1 {%k1}
@@ -8898,7 +8898,7 @@ entry:
 define <8 x double> @test_mm512_mask_max_pd(<8 x double> %__W, i8 zeroext %__U, <8 x double> %__A, <8 x double> %__B) {
 ; X86-LABEL: test_mm512_mask_max_pd:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vmaxpd %zmm2, %zmm1, %zmm0 {%k1}
 ; X86-NEXT:    retl
@@ -8918,7 +8918,7 @@ entry:
 define <8 x double> @test_mm512_maskz_max_pd(i8 zeroext %__U, <8 x double> %__A, <8 x double> %__B) {
 ; X86-LABEL: test_mm512_maskz_max_pd:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vmaxpd %zmm1, %zmm0, %zmm0 {%k1} {z}
 ; X86-NEXT:    retl
@@ -8958,7 +8958,7 @@ entry:
 define <8 x double> @test_mm512_mask_max_round_pd(<8 x double> %__W, i8 zeroext %__U, <8 x double> %__A, <8 x double> %__B) {
 ; X86-LABEL: test_mm512_mask_max_round_pd:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vmaxpd %zmm2, %zmm1, %zmm0 {%k1}
 ; X86-NEXT:    retl
@@ -8980,7 +8980,7 @@ declare <8 x double> @llvm.x86.avx512.max.pd.512(<8 x double>, <8 x double>, i32
 define <8 x double> @test_mm512_maskz_max_round_pd(i8 zeroext %__U, <8 x double> %__A, <8 x double> %__B) {
 ; X86-LABEL: test_mm512_maskz_max_round_pd:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vmaxpd %zmm1, %zmm0, %zmm0 {%k1} {z}
 ; X86-NEXT:    retl
@@ -9082,7 +9082,7 @@ entry:
 define <8 x double> @test_mm512_mask_min_pd(<8 x double> %__W, i8 zeroext %__U, <8 x double> %__A, <8 x double> %__B) {
 ; X86-LABEL: test_mm512_mask_min_pd:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vminpd %zmm2, %zmm1, %zmm0 {%k1}
 ; X86-NEXT:    retl
@@ -9102,7 +9102,7 @@ entry:
 define <8 x double> @test_mm512_maskz_min_pd(i8 zeroext %__U, <8 x double> %__A, <8 x double> %__B) {
 ; X86-LABEL: test_mm512_maskz_min_pd:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vminpd %zmm1, %zmm0, %zmm0 {%k1} {z}
 ; X86-NEXT:    retl
@@ -9122,7 +9122,7 @@ entry:
 define <8 x double> @test_mm512_mask_min_round_pd(<8 x double> %__W, i8 zeroext %__U, <8 x double> %__A, <8 x double> %__B) {
 ; X86-LABEL: test_mm512_mask_min_round_pd:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vminpd %zmm2, %zmm1, %zmm0 {%k1}
 ; X86-NEXT:    retl
@@ -9144,7 +9144,7 @@ declare <8 x double> @llvm.x86.avx512.min.pd.512(<8 x double>, <8 x double>, i32
 define <8 x double> @test_mm512_maskz_min_round_pd(i8 zeroext %__U, <8 x double> %__A, <8 x double> %__B) {
 ; X86-LABEL: test_mm512_maskz_min_round_pd:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vminpd %zmm1, %zmm0, %zmm0 {%k1} {z}
 ; X86-NEXT:    retl
@@ -9276,7 +9276,7 @@ entry:
 define <8 x double> @test_mm512_mask_sqrt_pd(<8 x double> %__W, i8 zeroext %__U, <8 x double> %__A) {
 ; X86-LABEL: test_mm512_mask_sqrt_pd:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vsqrtpd %zmm1, %zmm0 {%k1}
 ; X86-NEXT:    retl
@@ -9296,7 +9296,7 @@ entry:
 define <8 x double> @test_mm512_maskz_sqrt_pd(i8 zeroext %__U, <8 x double> %__A) {
 ; X86-LABEL: test_mm512_maskz_sqrt_pd:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vsqrtpd %zmm0, %zmm0 {%k1} {z}
 ; X86-NEXT:    retl
@@ -9316,7 +9316,7 @@ entry:
 define <8 x double> @test_mm512_mask_sqrt_round_pd(<8 x double> %__W, i8 zeroext %__U, <8 x double> %__A) {
 ; X86-LABEL: test_mm512_mask_sqrt_round_pd:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vsqrtpd {rn-sae}, %zmm1, %zmm0 {%k1}
 ; X86-NEXT:    retl
@@ -9338,7 +9338,7 @@ declare <8 x double> @llvm.x86.avx512.sqrt.pd.512(<8 x double>, i32)
 define <8 x double> @test_mm512_maskz_sqrt_round_pd(i8 zeroext %__U, <8 x double> %__A) {
 ; X86-LABEL: test_mm512_maskz_sqrt_round_pd:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vsqrtpd {rn-sae}, %zmm0, %zmm0 {%k1} {z}
 ; X86-NEXT:    retl
@@ -9537,7 +9537,7 @@ entry:
 define <8 x i64> @test_mm512_mask_rol_epi64(<8 x i64> %__W, i8 zeroext %__U, <8 x i64> %__A) {
 ; X86-LABEL: test_mm512_mask_rol_epi64:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vprolq $5, %zmm1, %zmm0 {%k1}
 ; X86-NEXT:    retl
@@ -9557,7 +9557,7 @@ entry:
 define <8 x i64> @test_mm512_maskz_rol_epi64(i8 zeroext %__U, <8 x i64> %__A) {
 ; X86-LABEL: test_mm512_maskz_rol_epi64:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vprolq $5, %zmm0, %zmm0 {%k1} {z}
 ; X86-NEXT:    retl
@@ -9647,7 +9647,7 @@ entry:
 define <8 x i64> @test_mm512_mask_rolv_epi64(<8 x i64> %__W, i8 zeroext %__U, <8 x i64> %__A, <8 x i64> %__B) {
 ; X86-LABEL: test_mm512_mask_rolv_epi64:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vprolvq %zmm2, %zmm1, %zmm0 {%k1}
 ; X86-NEXT:    retl
@@ -9667,7 +9667,7 @@ entry:
 define <8 x i64> @test_mm512_maskz_rolv_epi64(i8 zeroext %__U, <8 x i64> %__A, <8 x i64> %__B) {
 ; X86-LABEL: test_mm512_maskz_rolv_epi64:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vprolvq %zmm1, %zmm0, %zmm0 {%k1} {z}
 ; X86-NEXT:    retl
@@ -9755,7 +9755,7 @@ entry:
 define <8 x i64> @test_mm512_mask_ror_epi64(<8 x i64> %__W, i8 zeroext %__U, <8 x i64> %__A) {
 ; X86-LABEL: test_mm512_mask_ror_epi64:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vprorq $5, %zmm1, %zmm0 {%k1}
 ; X86-NEXT:    retl
@@ -9775,7 +9775,7 @@ entry:
 define <8 x i64> @test_mm512_maskz_ror_epi64(i8 zeroext %__U, <8 x i64> %__A) {
 ; X86-LABEL: test_mm512_maskz_ror_epi64:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vprorq $5, %zmm0, %zmm0 {%k1} {z}
 ; X86-NEXT:    retl
@@ -9865,7 +9865,7 @@ entry:
 define <8 x i64> @test_mm512_mask_rorv_epi64(<8 x i64> %__W, i8 zeroext %__U, <8 x i64> %__A, <8 x i64> %__B) {
 ; X86-LABEL: test_mm512_mask_rorv_epi64:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vprorvq %zmm2, %zmm1, %zmm0 {%k1}
 ; X86-NEXT:    retl
@@ -9885,7 +9885,7 @@ entry:
 define <8 x i64> @test_mm512_maskz_rorv_epi64(i8 zeroext %__U, <8 x i64> %__A, <8 x i64> %__B) {
 ; X86-LABEL: test_mm512_maskz_rorv_epi64:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vprorvq %zmm1, %zmm0, %zmm0 {%k1} {z}
 ; X86-NEXT:    retl

diff  --git a/llvm/test/CodeGen/X86/avx512-intrinsics-upgrade.ll b/llvm/test/CodeGen/X86/avx512-intrinsics-upgrade.ll
index b0be8c55711b9..cd48815a5cfc0 100644
--- a/llvm/test/CodeGen/X86/avx512-intrinsics-upgrade.ll
+++ b/llvm/test/CodeGen/X86/avx512-intrinsics-upgrade.ll
@@ -3330,7 +3330,7 @@ define <8 x i64>@test_int_x86_avx512_mask_pminu_q_512(<8 x i64> %x0, <8 x i64> %
 define <4 x float> @test_mm_mask_move_ss(<4 x float> %__W, i8 zeroext %__U, <4 x float> %__A, <4 x float> %__B) {
 ; X86-LABEL: test_mm_mask_move_ss:
 ; X86:       ## %bb.0: ## %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al ## encoding: [0x8a,0x44,0x24,0x04]
 ; X86-NEXT:    kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
 ; X86-NEXT:    vmovss %xmm2, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf1,0x76,0x09,0x10,0xc2]
 ; X86-NEXT:    retl ## encoding: [0xc3]
@@ -3349,7 +3349,7 @@ entry:
 define <4 x float> @test_mm_maskz_move_ss(i8 zeroext %__U, <4 x float> %__A, <4 x float> %__B) {
 ; X86-LABEL: test_mm_maskz_move_ss:
 ; X86:       ## %bb.0: ## %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al ## encoding: [0x8a,0x44,0x24,0x04]
 ; X86-NEXT:    kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
 ; X86-NEXT:    vmovss %xmm1, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7e,0x89,0x10,0xc1]
 ; X86-NEXT:    retl ## encoding: [0xc3]
@@ -3367,7 +3367,7 @@ entry:
 define <2 x double> @test_mm_mask_move_sd(<2 x double> %__W, i8 zeroext %__U, <2 x double> %__A, <2 x double> %__B) {
 ; X86-LABEL: test_mm_mask_move_sd:
 ; X86:       ## %bb.0: ## %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al ## encoding: [0x8a,0x44,0x24,0x04]
 ; X86-NEXT:    kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
 ; X86-NEXT:    vmovsd %xmm2, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf1,0xf7,0x09,0x10,0xc2]
 ; X86-NEXT:    retl ## encoding: [0xc3]
@@ -3385,7 +3385,7 @@ entry:
 define <2 x double> @test_mm_maskz_move_sd(i8 zeroext %__U, <2 x double> %__A, <2 x double> %__B) {
 ; X86-LABEL: test_mm_maskz_move_sd:
 ; X86:       ## %bb.0: ## %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al ## encoding: [0x8a,0x44,0x24,0x04]
 ; X86-NEXT:    kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
 ; X86-NEXT:    vmovsd %xmm1, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xff,0x89,0x10,0xc1]
 ; X86-NEXT:    retl ## encoding: [0xc3]
@@ -6771,7 +6771,7 @@ define i8 @test_vptestmq(<8 x i64> %a0, <8 x i64> %a1, i8 %m) {
 ; X86:       ## %bb.0:
 ; X86-NEXT:    vptestmq %zmm1, %zmm0, %k0 ## encoding: [0x62,0xf2,0xfd,0x48,0x27,0xc1]
 ; X86-NEXT:    kmovw %k0, %ecx ## encoding: [0xc5,0xf8,0x93,0xc8]
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al ## encoding: [0x8a,0x44,0x24,0x04]
 ; X86-NEXT:    andb %cl, %al ## encoding: [0x20,0xc8]
 ; X86-NEXT:    addb %cl, %al ## encoding: [0x00,0xc8]
 ; X86-NEXT:    vzeroupper ## encoding: [0xc5,0xf8,0x77]
@@ -6857,7 +6857,7 @@ define i8 at test_int_x86_avx512_ptestnm_q_512(<8 x i64> %x0, <8 x i64> %x1, i8 %x2
 ; X86:       ## %bb.0:
 ; X86-NEXT:    vptestnmq %zmm1, %zmm0, %k0 ## encoding: [0x62,0xf2,0xfe,0x48,0x27,0xc1]
 ; X86-NEXT:    kmovw %k0, %ecx ## encoding: [0xc5,0xf8,0x93,0xc8]
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al ## encoding: [0x8a,0x44,0x24,0x04]
 ; X86-NEXT:    andb %cl, %al ## encoding: [0x20,0xc8]
 ; X86-NEXT:    addb %cl, %al ## encoding: [0x00,0xc8]
 ; X86-NEXT:    vzeroupper ## encoding: [0xc5,0xf8,0x77]
@@ -9904,7 +9904,7 @@ declare <2 x double> @llvm.x86.avx512.mask.vfmadd.sd(<2 x double>, <2 x double>,
 define <2 x double>@test_int_x86_avx512_mask_vfmadd_sd(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3,i32 %x4 ){
 ; X86-LABEL: test_int_x86_avx512_mask_vfmadd_sd:
 ; X86:       ## %bb.0:
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al ## encoding: [0x8a,0x44,0x24,0x04]
 ; X86-NEXT:    kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
 ; X86-NEXT:    vmovapd %xmm0, %xmm3 ## encoding: [0xc5,0xf9,0x28,0xd8]
 ; X86-NEXT:    vfmadd213sd %xmm2, %xmm1, %xmm3 {%k1} ## encoding: [0x62,0xf2,0xf5,0x09,0xa9,0xda]
@@ -9941,7 +9941,7 @@ declare <4 x float> @llvm.x86.avx512.mask.vfmadd.ss(<4 x float>, <4 x float>, <4
 define <4 x float>@test_int_x86_avx512_mask_vfmadd_ss(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3,i32 %x4 ){
 ; X86-LABEL: test_int_x86_avx512_mask_vfmadd_ss:
 ; X86:       ## %bb.0:
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al ## encoding: [0x8a,0x44,0x24,0x04]
 ; X86-NEXT:    kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
 ; X86-NEXT:    vmovaps %xmm0, %xmm3 ## encoding: [0xc5,0xf8,0x28,0xd8]
 ; X86-NEXT:    vfmadd213ss %xmm2, %xmm1, %xmm3 {%k1} ## encoding: [0x62,0xf2,0x75,0x09,0xa9,0xda]
@@ -9978,7 +9978,7 @@ declare <2 x double> @llvm.x86.avx512.maskz.vfmadd.sd(<2 x double>, <2 x double>
 define <2 x double>@test_int_x86_avx512_maskz_vfmadd_sd(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3,i32 %x4 ){
 ; X86-LABEL: test_int_x86_avx512_maskz_vfmadd_sd:
 ; X86:       ## %bb.0:
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al ## encoding: [0x8a,0x44,0x24,0x04]
 ; X86-NEXT:    kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
 ; X86-NEXT:    vmovapd %xmm0, %xmm3 ## encoding: [0xc5,0xf9,0x28,0xd8]
 ; X86-NEXT:    vfmadd213sd %xmm2, %xmm1, %xmm3 {%k1} {z} ## encoding: [0x62,0xf2,0xf5,0x89,0xa9,0xda]
@@ -10007,7 +10007,7 @@ declare <4 x float> @llvm.x86.avx512.maskz.vfmadd.ss(<4 x float>, <4 x float>, <
 define <4 x float>@test_int_x86_avx512_maskz_vfmadd_ss(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3,i32 %x4 ){
 ; X86-LABEL: test_int_x86_avx512_maskz_vfmadd_ss:
 ; X86:       ## %bb.0:
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al ## encoding: [0x8a,0x44,0x24,0x04]
 ; X86-NEXT:    kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
 ; X86-NEXT:    vfmadd213ss %xmm2, %xmm1, %xmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x75,0x89,0xa9,0xc2]
 ; X86-NEXT:    ## xmm0 {%k1} {z} = (xmm1 * xmm0) + xmm2
@@ -10029,7 +10029,7 @@ declare <2 x double> @llvm.x86.avx512.mask3.vfmadd.sd(<2 x double>, <2 x double>
 define <2 x double>@test_int_x86_avx512_mask3_vfmadd_sd(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3,i32 %x4 ){
 ; X86-LABEL: test_int_x86_avx512_mask3_vfmadd_sd:
 ; X86:       ## %bb.0:
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al ## encoding: [0x8a,0x44,0x24,0x04]
 ; X86-NEXT:    kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
 ; X86-NEXT:    vmovapd %xmm2, %xmm3 ## encoding: [0xc5,0xf9,0x28,0xda]
 ; X86-NEXT:    vfmadd231sd %xmm1, %xmm0, %xmm3 {%k1} ## encoding: [0x62,0xf2,0xfd,0x09,0xb9,0xd9]
@@ -10066,7 +10066,7 @@ declare <4 x float> @llvm.x86.avx512.mask3.vfmadd.ss(<4 x float>, <4 x float>, <
 define <4 x float>@test_int_x86_avx512_mask3_vfmadd_ss(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3,i32 %x4 ){
 ; X86-LABEL: test_int_x86_avx512_mask3_vfmadd_ss:
 ; X86:       ## %bb.0:
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al ## encoding: [0x8a,0x44,0x24,0x04]
 ; X86-NEXT:    kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
 ; X86-NEXT:    vmovaps %xmm2, %xmm3 ## encoding: [0xc5,0xf8,0x28,0xda]
 ; X86-NEXT:    vfmadd231ss %xmm1, %xmm0, %xmm3 {%k1} ## encoding: [0x62,0xf2,0x7d,0x09,0xb9,0xd9]
@@ -10101,7 +10101,7 @@ define <4 x float>@test_int_x86_avx512_mask3_vfmadd_ss(<4 x float> %x0, <4 x flo
 define void @fmadd_ss_mask_memfold(ptr %a, ptr %b, i8 %c) {
 ; X86-LABEL: fmadd_ss_mask_memfold:
 ; X86:       ## %bb.0:
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x0c]
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al ## encoding: [0x8a,0x44,0x24,0x0c]
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx ## encoding: [0x8b,0x4c,0x24,0x08]
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx ## encoding: [0x8b,0x54,0x24,0x04]
 ; X86-NEXT:    vmovss (%edx), %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfa,0x10,0x02]
@@ -10149,7 +10149,7 @@ define void @fmadd_ss_mask_memfold(ptr %a, ptr %b, i8 %c) {
 define void @fmadd_ss_maskz_memfold(ptr %a, ptr %b, i8 %c) {
 ; X86-LABEL: fmadd_ss_maskz_memfold:
 ; X86:       ## %bb.0:
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x0c]
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al ## encoding: [0x8a,0x44,0x24,0x0c]
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx ## encoding: [0x8b,0x4c,0x24,0x08]
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx ## encoding: [0x8b,0x54,0x24,0x04]
 ; X86-NEXT:    vmovss (%edx), %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfa,0x10,0x02]
@@ -10193,7 +10193,7 @@ define void @fmadd_ss_maskz_memfold(ptr %a, ptr %b, i8 %c) {
 define void @fmadd_sd_mask_memfold(ptr %a, ptr %b, i8 %c) {
 ; X86-LABEL: fmadd_sd_mask_memfold:
 ; X86:       ## %bb.0:
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x0c]
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al ## encoding: [0x8a,0x44,0x24,0x0c]
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx ## encoding: [0x8b,0x4c,0x24,0x08]
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx ## encoding: [0x8b,0x54,0x24,0x04]
 ; X86-NEXT:    vmovsd (%edx), %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfb,0x10,0x02]
@@ -10237,7 +10237,7 @@ define void @fmadd_sd_mask_memfold(ptr %a, ptr %b, i8 %c) {
 define void @fmadd_sd_maskz_memfold(ptr %a, ptr %b, i8 %c) {
 ; X86-LABEL: fmadd_sd_maskz_memfold:
 ; X86:       ## %bb.0:
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x0c]
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al ## encoding: [0x8a,0x44,0x24,0x0c]
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx ## encoding: [0x8b,0x4c,0x24,0x08]
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx ## encoding: [0x8b,0x54,0x24,0x04]
 ; X86-NEXT:    vmovsd (%edx), %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfb,0x10,0x02]
@@ -10279,7 +10279,7 @@ declare <2 x double> @llvm.x86.avx512.mask3.vfmsub.sd(<2 x double>, <2 x double>
 define <2 x double>@test_int_x86_avx512_mask3_vfmsub_sd(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3,i32 %x4 ){
 ; X86-LABEL: test_int_x86_avx512_mask3_vfmsub_sd:
 ; X86:       ## %bb.0:
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al ## encoding: [0x8a,0x44,0x24,0x04]
 ; X86-NEXT:    kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
 ; X86-NEXT:    vmovapd %xmm2, %xmm3 ## encoding: [0xc5,0xf9,0x28,0xda]
 ; X86-NEXT:    vfmsub231sd %xmm1, %xmm0, %xmm3 {%k1} ## encoding: [0x62,0xf2,0xfd,0x09,0xbb,0xd9]
@@ -10316,7 +10316,7 @@ declare <4 x float> @llvm.x86.avx512.mask3.vfmsub.ss(<4 x float>, <4 x float>, <
 define <4 x float>@test_int_x86_avx512_mask3_vfmsub_ss(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3,i32 %x4 ){
 ; X86-LABEL: test_int_x86_avx512_mask3_vfmsub_ss:
 ; X86:       ## %bb.0:
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al ## encoding: [0x8a,0x44,0x24,0x04]
 ; X86-NEXT:    kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
 ; X86-NEXT:    vmovaps %xmm2, %xmm3 ## encoding: [0xc5,0xf8,0x28,0xda]
 ; X86-NEXT:    vfmsub231ss %xmm1, %xmm0, %xmm3 {%k1} ## encoding: [0x62,0xf2,0x7d,0x09,0xbb,0xd9]
@@ -10353,7 +10353,7 @@ declare <2 x double> @llvm.x86.avx512.mask3.vfnmsub.sd(<2 x double>, <2 x double
 define <2 x double>@test_int_x86_avx512_mask3_vfnmsub_sd(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3,i32 %x4 ){
 ; X86-LABEL: test_int_x86_avx512_mask3_vfnmsub_sd:
 ; X86:       ## %bb.0:
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al ## encoding: [0x8a,0x44,0x24,0x04]
 ; X86-NEXT:    kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
 ; X86-NEXT:    vmovapd %xmm2, %xmm3 ## encoding: [0xc5,0xf9,0x28,0xda]
 ; X86-NEXT:    vfnmsub231sd %xmm1, %xmm0, %xmm3 {%k1} ## encoding: [0x62,0xf2,0xfd,0x09,0xbf,0xd9]
@@ -10390,7 +10390,7 @@ declare <4 x float> @llvm.x86.avx512.mask3.vfnmsub.ss(<4 x float>, <4 x float>,
 define <4 x float>@test_int_x86_avx512_mask3_vfnmsub_ss(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3,i32 %x4 ){
 ; X86-LABEL: test_int_x86_avx512_mask3_vfnmsub_ss:
 ; X86:       ## %bb.0:
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al ## encoding: [0x8a,0x44,0x24,0x04]
 ; X86-NEXT:    kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
 ; X86-NEXT:    vmovaps %xmm2, %xmm3 ## encoding: [0xc5,0xf8,0x28,0xda]
 ; X86-NEXT:    vfnmsub231ss %xmm1, %xmm0, %xmm3 {%k1} ## encoding: [0x62,0xf2,0x7d,0x09,0xbf,0xd9]
@@ -10426,7 +10426,7 @@ define <4 x float>@test_int_x86_avx512_mask3_vfmadd_ss_rm(<4 x float> %x0, <4 x
 ; X86-LABEL: test_int_x86_avx512_mask3_vfmadd_ss_rm:
 ; X86:       ## %bb.0:
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx ## encoding: [0x0f,0xb6,0x4c,0x24,0x08]
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %cl ## encoding: [0x8a,0x4c,0x24,0x08]
 ; X86-NEXT:    kmovw %ecx, %k1 ## encoding: [0xc5,0xf8,0x92,0xc9]
 ; X86-NEXT:    vfmadd231ss (%eax), %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x09,0xb9,0x08]
 ; X86-NEXT:    ## xmm1 {%k1} = (xmm0 * mem) + xmm1
@@ -10450,7 +10450,7 @@ define <4 x float>@test_int_x86_avx512_mask_vfmadd_ss_rm(<4 x float> %x0, <4 x f
 ; X86-LABEL: test_int_x86_avx512_mask_vfmadd_ss_rm:
 ; X86:       ## %bb.0:
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx ## encoding: [0x0f,0xb6,0x4c,0x24,0x08]
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %cl ## encoding: [0x8a,0x4c,0x24,0x08]
 ; X86-NEXT:    kmovw %ecx, %k1 ## encoding: [0xc5,0xf8,0x92,0xc9]
 ; X86-NEXT:    vfmadd132ss (%eax), %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x09,0x99,0x00]
 ; X86-NEXT:    ## xmm0 {%k1} = (xmm0 * mem) + xmm1

diff  --git a/llvm/test/CodeGen/X86/avx512-intrinsics.ll b/llvm/test/CodeGen/X86/avx512-intrinsics.ll
index 3339549e5c426..3a19945eb5f17 100644
--- a/llvm/test/CodeGen/X86/avx512-intrinsics.ll
+++ b/llvm/test/CodeGen/X86/avx512-intrinsics.ll
@@ -1099,7 +1099,7 @@ define void @test_mask_store_ss(ptr %ptr, <4 x float> %data, i8 %mask) {
 ; X86-LABEL: test_mask_store_ss:
 ; X86:       # %bb.0:
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; X86-NEXT:    kmovw %ecx, %k1
 ; X86-NEXT:    vmovss %xmm0, (%eax) {%k1}
 ; X86-NEXT:    retl
@@ -5735,7 +5735,7 @@ define <2 x double> @test_int_x86_avx512_mask_vfmadd_sd(<2 x double> %x0, <2 x d
 ;
 ; X86-LABEL: test_int_x86_avx512_mask_vfmadd_sd:
 ; X86:       # %bb.0:
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vmovapd %xmm0, %xmm3
 ; X86-NEXT:    vfmadd213sd {{.*#+}} xmm3 {%k1} = (xmm1 * xmm3) + xmm2
@@ -5786,7 +5786,7 @@ define <4 x float> @test_int_x86_avx512_mask_vfmadd_ss(<4 x float> %x0, <4 x flo
 ;
 ; X86-LABEL: test_int_x86_avx512_mask_vfmadd_ss:
 ; X86:       # %bb.0:
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vmovaps %xmm0, %xmm3
 ; X86-NEXT:    vfmadd213ss {{.*#+}} xmm3 {%k1} = (xmm1 * xmm3) + xmm2
@@ -5834,7 +5834,7 @@ define <2 x double>@test_int_x86_avx512_maskz_vfmadd_sd(<2 x double> %x0, <2 x d
 ;
 ; X86-LABEL: test_int_x86_avx512_maskz_vfmadd_sd:
 ; X86:       # %bb.0:
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vmovapd %xmm0, %xmm3
 ; X86-NEXT:    vfmadd213sd {{.*#+}} xmm3 {%k1} {z} = (xmm1 * xmm3) + xmm2
@@ -5876,7 +5876,7 @@ define <4 x float>@test_int_x86_avx512_maskz_vfmadd_ss(<4 x float> %x0, <4 x flo
 ;
 ; X86-LABEL: test_int_x86_avx512_maskz_vfmadd_ss:
 ; X86:       # %bb.0:
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vmovaps %xmm0, %xmm3
 ; X86-NEXT:    vfmadd213ss {{.*#+}} xmm3 {%k1} {z} = (xmm1 * xmm3) + xmm2
@@ -5916,7 +5916,7 @@ define <4 x float> @test_int_x86_avx512_maskz_vfmadd_ss_load0(i8 zeroext %0, ptr
 ; X86-LABEL: test_int_x86_avx512_maskz_vfmadd_ss_load0:
 ; X86:       # %bb.0:
 ; X86-NEXT:    vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
 ; X86-NEXT:    vmovaps (%ecx), %xmm0
 ; X86-NEXT:    kmovw %eax, %k1
@@ -5947,7 +5947,7 @@ define <2 x double> @test_int_x86_avx512_mask3_vfmadd_sd(<2 x double> %x0, <2 x
 ;
 ; X86-LABEL: test_int_x86_avx512_mask3_vfmadd_sd:
 ; X86:       # %bb.0:
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vmovapd %xmm2, %xmm3
 ; X86-NEXT:    vfmadd231sd {{.*#+}} xmm3 {%k1} = (xmm0 * xmm1) + xmm3
@@ -5998,7 +5998,7 @@ define <4 x float> @test_int_x86_avx512_mask3_vfmadd_ss(<4 x float> %x0, <4 x fl
 ;
 ; X86-LABEL: test_int_x86_avx512_mask3_vfmadd_ss:
 ; X86:       # %bb.0:
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vmovaps %xmm2, %xmm3
 ; X86-NEXT:    vfmadd231ss {{.*#+}} xmm3 {%k1} = (xmm0 * xmm1) + xmm3
@@ -6047,7 +6047,7 @@ define void @fmadd_ss_mask_memfold(ptr %a, ptr %b, i8 %c) {
 ;
 ; X86-LABEL: fmadd_ss_mask_memfold:
 ; X86:       # %bb.0:
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
 ; X86-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
@@ -6093,7 +6093,7 @@ define void @fmadd_ss_maskz_memfold(ptr %a, ptr %b, i8 %c) {
 ;
 ; X86-LABEL: fmadd_ss_maskz_memfold:
 ; X86:       # %bb.0:
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
 ; X86-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
@@ -6139,7 +6139,7 @@ define void @fmadd_sd_mask_memfold(ptr %a, ptr %b, i8 %c) {
 ;
 ; X86-LABEL: fmadd_sd_mask_memfold:
 ; X86:       # %bb.0:
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
 ; X86-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
@@ -6181,7 +6181,7 @@ define void @fmadd_sd_maskz_memfold(ptr %a, ptr %b, i8 %c) {
 ;
 ; X86-LABEL: fmadd_sd_maskz_memfold:
 ; X86:       # %bb.0:
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
 ; X86-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
@@ -6225,7 +6225,7 @@ define <2 x double> @test_int_x86_avx512_mask3_vfmsub_sd(<2 x double> %x0, <2 x
 ;
 ; X86-LABEL: test_int_x86_avx512_mask3_vfmsub_sd:
 ; X86:       # %bb.0:
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vmovapd %xmm2, %xmm3
 ; X86-NEXT:    vfmsub231sd {{.*#+}} xmm3 {%k1} = (xmm0 * xmm1) - xmm3
@@ -6282,7 +6282,7 @@ define <4 x float> @test_int_x86_avx512_mask3_vfmsub_ss(<4 x float> %x0, <4 x fl
 ;
 ; X86-LABEL: test_int_x86_avx512_mask3_vfmsub_ss:
 ; X86:       # %bb.0:
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vmovaps %xmm2, %xmm3
 ; X86-NEXT:    vfmsub231ss {{.*#+}} xmm3 {%k1} = (xmm0 * xmm1) - xmm3
@@ -6339,7 +6339,7 @@ define <2 x double> @test_int_x86_avx512_mask3_vfnmsub_sd(<2 x double> %x0, <2 x
 ;
 ; X86-LABEL: test_int_x86_avx512_mask3_vfnmsub_sd:
 ; X86:       # %bb.0:
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vmovapd %xmm2, %xmm3
 ; X86-NEXT:    vfnmsub231sd {{.*#+}} xmm3 {%k1} = -(xmm0 * xmm1) - xmm3
@@ -6399,7 +6399,7 @@ define <4 x float> @test_int_x86_avx512_mask3_vfnmsub_ss(<4 x float> %x0, <4 x f
 ;
 ; X86-LABEL: test_int_x86_avx512_mask3_vfnmsub_ss:
 ; X86:       # %bb.0:
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vmovaps %xmm2, %xmm3
 ; X86-NEXT:    vfnmsub231ss {{.*#+}} xmm3 {%k1} = -(xmm0 * xmm1) - xmm3
@@ -6455,7 +6455,7 @@ define <4 x float>@test_int_x86_avx512_mask3_vfmadd_ss_rm(<4 x float> %x0, <4 x
 ; X86-LABEL: test_int_x86_avx512_mask3_vfmadd_ss_rm:
 ; X86:       # %bb.0:
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; X86-NEXT:    kmovw %ecx, %k1
 ; X86-NEXT:    vfmadd231ss {{.*#+}} xmm1 {%k1} = (xmm0 * mem) + xmm1
 ; X86-NEXT:    vmovaps %xmm1, %xmm0
@@ -6483,7 +6483,7 @@ define <4 x float>@test_int_x86_avx512_mask_vfmadd_ss_rm(<4 x float> %x0, <4 x f
 ; X86-LABEL: test_int_x86_avx512_mask_vfmadd_ss_rm:
 ; X86:       # %bb.0:
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; X86-NEXT:    kmovw %ecx, %k1
 ; X86-NEXT:    vfmadd132ss {{.*#+}} xmm0 {%k1} = (xmm0 * mem) + xmm1
 ; X86-NEXT:    retl

diff  --git a/llvm/test/CodeGen/X86/avx512-load-store.ll b/llvm/test/CodeGen/X86/avx512-load-store.ll
index c32c3d9b85503..d294855a909d6 100644
--- a/llvm/test/CodeGen/X86/avx512-load-store.ll
+++ b/llvm/test/CodeGen/X86/avx512-load-store.ll
@@ -13,7 +13,7 @@ define <4 x float> @test_mm_mask_move_ss(<4 x float> %__W, i8 zeroext %__U, <4 x
 ;
 ; CHECK32-LABEL: test_mm_mask_move_ss:
 ; CHECK32:       # %bb.0: # %entry
-; CHECK32-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; CHECK32-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; CHECK32-NEXT:    kmovw %eax, %k1
 ; CHECK32-NEXT:    vmovss %xmm2, %xmm1, %xmm0 {%k1}
 ; CHECK32-NEXT:    retl
@@ -36,7 +36,7 @@ define <4 x float> @test_mm_maskz_move_ss(i8 zeroext %__U, <4 x float> %__A, <4
 ;
 ; CHECK32-LABEL: test_mm_maskz_move_ss:
 ; CHECK32:       # %bb.0: # %entry
-; CHECK32-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; CHECK32-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; CHECK32-NEXT:    kmovw %eax, %k1
 ; CHECK32-NEXT:    vmovss %xmm1, %xmm0, %xmm0 {%k1} {z}
 ; CHECK32-NEXT:    retl
@@ -58,7 +58,7 @@ define <2 x double> @test_mm_mask_move_sd(<2 x double> %__W, i8 zeroext %__U, <2
 ;
 ; CHECK32-LABEL: test_mm_mask_move_sd:
 ; CHECK32:       # %bb.0: # %entry
-; CHECK32-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; CHECK32-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; CHECK32-NEXT:    kmovw %eax, %k1
 ; CHECK32-NEXT:    vmovsd %xmm2, %xmm1, %xmm0 {%k1}
 ; CHECK32-NEXT:    retl
@@ -81,7 +81,7 @@ define <2 x double> @test_mm_maskz_move_sd(i8 zeroext %__U, <2 x double> %__A, <
 ;
 ; CHECK32-LABEL: test_mm_maskz_move_sd:
 ; CHECK32:       # %bb.0: # %entry
-; CHECK32-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; CHECK32-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; CHECK32-NEXT:    kmovw %eax, %k1
 ; CHECK32-NEXT:    vmovsd %xmm1, %xmm0, %xmm0 {%k1} {z}
 ; CHECK32-NEXT:    retl
@@ -127,7 +127,7 @@ define void @test_mm_mask_store_sd(ptr %__W, i8 zeroext %__U, <2 x double> %__A)
 ; CHECK32-LABEL: test_mm_mask_store_sd:
 ; CHECK32:       # %bb.0: # %entry
 ; CHECK32-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; CHECK32-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; CHECK32-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; CHECK32-NEXT:    kmovw %ecx, %k1
 ; CHECK32-NEXT:    vmovsd %xmm0, (%eax) {%k1}
 ; CHECK32-NEXT:    retl
@@ -174,7 +174,7 @@ define <2 x double> @test_mm_mask_load_sd(<2 x double> %__A, i8 zeroext %__U, pt
 ; CHECK32-LABEL: test_mm_mask_load_sd:
 ; CHECK32:       # %bb.0: # %entry
 ; CHECK32-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; CHECK32-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; CHECK32-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; CHECK32-NEXT:    kmovw %ecx, %k1
 ; CHECK32-NEXT:    vmovsd (%eax), %xmm0 {%k1}
 ; CHECK32-NEXT:    retl
@@ -221,7 +221,7 @@ define <2 x double> @test_mm_maskz_load_sd(i8 zeroext %__U, ptr %__W) local_unna
 ; CHECK32-LABEL: test_mm_maskz_load_sd:
 ; CHECK32:       # %bb.0: # %entry
 ; CHECK32-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; CHECK32-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; CHECK32-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; CHECK32-NEXT:    kmovw %ecx, %k1
 ; CHECK32-NEXT:    vmovsd (%eax), %xmm0 {%k1} {z}
 ; CHECK32-NEXT:    retl
@@ -245,7 +245,7 @@ define void @test_mm_mask_store_ss_2(ptr %__P, i8 zeroext %__U, <4 x float> %__A
 ; CHECK32-LABEL: test_mm_mask_store_ss_2:
 ; CHECK32:       # %bb.0: # %entry
 ; CHECK32-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; CHECK32-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; CHECK32-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; CHECK32-NEXT:    kmovw %ecx, %k1
 ; CHECK32-NEXT:    vmovss %xmm0, (%eax) {%k1}
 ; CHECK32-NEXT:    retl
@@ -267,7 +267,7 @@ define void @test_mm_mask_store_sd_2(ptr %__P, i8 zeroext %__U, <2 x double> %__
 ; CHECK32-LABEL: test_mm_mask_store_sd_2:
 ; CHECK32:       # %bb.0: # %entry
 ; CHECK32-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; CHECK32-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; CHECK32-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; CHECK32-NEXT:    kmovw %ecx, %k1
 ; CHECK32-NEXT:    vmovsd %xmm0, (%eax) {%k1}
 ; CHECK32-NEXT:    retl
@@ -289,7 +289,7 @@ define <4 x float> @test_mm_mask_load_ss_2(<4 x float> %__A, i8 zeroext %__U, pt
 ; CHECK32-LABEL: test_mm_mask_load_ss_2:
 ; CHECK32:       # %bb.0: # %entry
 ; CHECK32-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; CHECK32-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; CHECK32-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; CHECK32-NEXT:    kmovw %ecx, %k1
 ; CHECK32-NEXT:    vmovss (%eax), %xmm0 {%k1}
 ; CHECK32-NEXT:    retl
@@ -312,7 +312,7 @@ define <4 x float> @test_mm_maskz_load_ss_2(i8 zeroext %__U, ptr readonly %__W)
 ; CHECK32-LABEL: test_mm_maskz_load_ss_2:
 ; CHECK32:       # %bb.0: # %entry
 ; CHECK32-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; CHECK32-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; CHECK32-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; CHECK32-NEXT:    kmovw %ecx, %k1
 ; CHECK32-NEXT:    vmovss (%eax), %xmm0 {%k1} {z}
 ; CHECK32-NEXT:    retl
@@ -334,7 +334,7 @@ define <2 x double> @test_mm_mask_load_sd_2(<2 x double> %__A, i8 zeroext %__U,
 ; CHECK32-LABEL: test_mm_mask_load_sd_2:
 ; CHECK32:       # %bb.0: # %entry
 ; CHECK32-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; CHECK32-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; CHECK32-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; CHECK32-NEXT:    kmovw %ecx, %k1
 ; CHECK32-NEXT:    vmovsd (%eax), %xmm0 {%k1}
 ; CHECK32-NEXT:    retl
@@ -357,7 +357,7 @@ define <2 x double> @test_mm_maskz_load_sd_2(i8 zeroext %__U, ptr readonly %__W)
 ; CHECK32-LABEL: test_mm_maskz_load_sd_2:
 ; CHECK32:       # %bb.0: # %entry
 ; CHECK32-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; CHECK32-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; CHECK32-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; CHECK32-NEXT:    kmovw %ecx, %k1
 ; CHECK32-NEXT:    vmovsd (%eax), %xmm0 {%k1} {z}
 ; CHECK32-NEXT:    retl

diff  --git a/llvm/test/CodeGen/X86/avx512-load-trunc-store-i1.ll b/llvm/test/CodeGen/X86/avx512-load-trunc-store-i1.ll
index 38179d9fcf68d..38fa44b346729 100644
--- a/llvm/test/CodeGen/X86/avx512-load-trunc-store-i1.ll
+++ b/llvm/test/CodeGen/X86/avx512-load-trunc-store-i1.ll
@@ -13,7 +13,7 @@ define void @load_v1i2_trunc_v1i1_store(ptr %a0,ptr %a1) {
 ;
 ; AVX512-ONLY-LABEL: load_v1i2_trunc_v1i1_store:
 ; AVX512-ONLY:       # %bb.0:
-; AVX512-ONLY-NEXT:    movzbl (%rdi), %eax
+; AVX512-ONLY-NEXT:    movb (%rdi), %al
 ; AVX512-ONLY-NEXT:    andl $1, %eax
 ; AVX512-ONLY-NEXT:    kmovw %eax, %k0
 ; AVX512-ONLY-NEXT:    kmovw %k0, %eax
@@ -35,7 +35,7 @@ define void @load_v1i3_trunc_v1i1_store(ptr %a0,ptr %a1) {
 ;
 ; AVX512-ONLY-LABEL: load_v1i3_trunc_v1i1_store:
 ; AVX512-ONLY:       # %bb.0:
-; AVX512-ONLY-NEXT:    movzbl (%rdi), %eax
+; AVX512-ONLY-NEXT:    movb (%rdi), %al
 ; AVX512-ONLY-NEXT:    andl $1, %eax
 ; AVX512-ONLY-NEXT:    kmovw %eax, %k0
 ; AVX512-ONLY-NEXT:    kmovw %k0, %eax
@@ -57,7 +57,7 @@ define void @load_v1i4_trunc_v1i1_store(ptr %a0,ptr %a1) {
 ;
 ; AVX512-ONLY-LABEL: load_v1i4_trunc_v1i1_store:
 ; AVX512-ONLY:       # %bb.0:
-; AVX512-ONLY-NEXT:    movzbl (%rdi), %eax
+; AVX512-ONLY-NEXT:    movb (%rdi), %al
 ; AVX512-ONLY-NEXT:    andl $1, %eax
 ; AVX512-ONLY-NEXT:    kmovw %eax, %k0
 ; AVX512-ONLY-NEXT:    kmovw %k0, %eax
@@ -79,7 +79,7 @@ define void @load_v1i8_trunc_v1i1_store(ptr %a0,ptr %a1) {
 ;
 ; AVX512-ONLY-LABEL: load_v1i8_trunc_v1i1_store:
 ; AVX512-ONLY:       # %bb.0:
-; AVX512-ONLY-NEXT:    movzbl (%rdi), %eax
+; AVX512-ONLY-NEXT:    movb (%rdi), %al
 ; AVX512-ONLY-NEXT:    andl $1, %eax
 ; AVX512-ONLY-NEXT:    kmovw %eax, %k0
 ; AVX512-ONLY-NEXT:    kmovw %k0, %eax
@@ -101,7 +101,7 @@ define void @load_v1i16_trunc_v1i1_store(ptr %a0,ptr %a1) {
 ;
 ; AVX512-ONLY-LABEL: load_v1i16_trunc_v1i1_store:
 ; AVX512-ONLY:       # %bb.0:
-; AVX512-ONLY-NEXT:    movzbl (%rdi), %eax
+; AVX512-ONLY-NEXT:    movb (%rdi), %al
 ; AVX512-ONLY-NEXT:    andl $1, %eax
 ; AVX512-ONLY-NEXT:    kmovw %eax, %k0
 ; AVX512-ONLY-NEXT:    kmovw %k0, %eax
@@ -123,7 +123,7 @@ define void @load_v1i32_trunc_v1i1_store(ptr %a0,ptr %a1) {
 ;
 ; AVX512-ONLY-LABEL: load_v1i32_trunc_v1i1_store:
 ; AVX512-ONLY:       # %bb.0:
-; AVX512-ONLY-NEXT:    movzbl (%rdi), %eax
+; AVX512-ONLY-NEXT:    movb (%rdi), %al
 ; AVX512-ONLY-NEXT:    andl $1, %eax
 ; AVX512-ONLY-NEXT:    kmovw %eax, %k0
 ; AVX512-ONLY-NEXT:    kmovw %k0, %eax
@@ -145,7 +145,7 @@ define void @load_v1i64_trunc_v1i1_store(ptr %a0,ptr %a1) {
 ;
 ; AVX512-ONLY-LABEL: load_v1i64_trunc_v1i1_store:
 ; AVX512-ONLY:       # %bb.0:
-; AVX512-ONLY-NEXT:    movzbl (%rdi), %eax
+; AVX512-ONLY-NEXT:    movb (%rdi), %al
 ; AVX512-ONLY-NEXT:    andl $1, %eax
 ; AVX512-ONLY-NEXT:    kmovw %eax, %k0
 ; AVX512-ONLY-NEXT:    kmovw %k0, %eax

diff  --git a/llvm/test/CodeGen/X86/avx512-mask-op.ll b/llvm/test/CodeGen/X86/avx512-mask-op.ll
index f9d6ac8e9db12..53d681dbcf869 100644
--- a/llvm/test/CodeGen/X86/avx512-mask-op.ll
+++ b/llvm/test/CodeGen/X86/avx512-mask-op.ll
@@ -55,7 +55,7 @@ define i8 @mask8(i8 %x) {
 ;
 ; X86-LABEL: mask8:
 ; X86:       ## %bb.0:
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    notb %al
 ; X86-NEXT:    retl
   %m0 = bitcast i8 %x to <8 x i1>
@@ -73,7 +73,7 @@ define i32 @mask8_zext(i8 %x) {
 ;
 ; X86-LABEL: mask8_zext:
 ; X86:       ## %bb.0:
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    notb %al
 ; X86-NEXT:    movzbl %al, %eax
 ; X86-NEXT:    retl
@@ -277,7 +277,7 @@ define i8 @shuf_test1(i16 %v) nounwind {
 ;
 ; X86-LABEL: shuf_test1:
 ; X86:       ## %bb.0:
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    retl
    %v1 = bitcast i16 %v to <16 x i1>
    %mask = shufflevector <16 x i1> %v1, <16 x i1> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
@@ -1996,7 +1996,7 @@ define void @store_i8_i1(i8 %x, ptr%y) {
 ; X86-LABEL: store_i8_i1:
 ; X86:       ## %bb.0:
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; X86-NEXT:    andb $1, %cl
 ; X86-NEXT:    movb %cl, (%eax)
 ; X86-NEXT:    retl
@@ -2951,7 +2951,7 @@ define void @store_64i1(ptr %a, <64 x i1> %v) {
 ; KNL-NEXT:    kandw %k1, %k0, %k0
 ; KNL-NEXT:    kmovw %k1, %k2
 ; KNL-NEXT:    kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill
-; KNL-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax
+; KNL-NEXT:    movb {{[0-9]+}}(%rsp), %al
 ; KNL-NEXT:    kmovw %eax, %k1
 ; KNL-NEXT:    kshiftlw $15, %k1, %k1
 ; KNL-NEXT:    kshiftrw $10, %k1, %k1
@@ -2960,7 +2960,7 @@ define void @store_64i1(ptr %a, <64 x i1> %v) {
 ; KNL-NEXT:    kmovw %eax, %k1
 ; KNL-NEXT:    kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill
 ; KNL-NEXT:    kandw %k1, %k0, %k0
-; KNL-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax
+; KNL-NEXT:    movb {{[0-9]+}}(%rsp), %al
 ; KNL-NEXT:    kmovw %eax, %k1
 ; KNL-NEXT:    kshiftlw $15, %k1, %k1
 ; KNL-NEXT:    kshiftrw $9, %k1, %k1
@@ -2970,7 +2970,7 @@ define void @store_64i1(ptr %a, <64 x i1> %v) {
 ; KNL-NEXT:    kandw %k1, %k0, %k0
 ; KNL-NEXT:    kmovw %k1, %k3
 ; KNL-NEXT:    kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill
-; KNL-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax
+; KNL-NEXT:    movb {{[0-9]+}}(%rsp), %al
 ; KNL-NEXT:    kmovw %eax, %k1
 ; KNL-NEXT:    kshiftlw $15, %k1, %k1
 ; KNL-NEXT:    kshiftrw $8, %k1, %k1
@@ -2979,7 +2979,7 @@ define void @store_64i1(ptr %a, <64 x i1> %v) {
 ; KNL-NEXT:    kmovw %eax, %k1
 ; KNL-NEXT:    kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill
 ; KNL-NEXT:    kandw %k1, %k0, %k0
-; KNL-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax
+; KNL-NEXT:    movb {{[0-9]+}}(%rsp), %al
 ; KNL-NEXT:    kmovw %eax, %k1
 ; KNL-NEXT:    kshiftlw $15, %k1, %k1
 ; KNL-NEXT:    kshiftrw $7, %k1, %k1
@@ -2989,7 +2989,7 @@ define void @store_64i1(ptr %a, <64 x i1> %v) {
 ; KNL-NEXT:    kandw %k1, %k0, %k0
 ; KNL-NEXT:    kmovw %k1, %k4
 ; KNL-NEXT:    kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill
-; KNL-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax
+; KNL-NEXT:    movb {{[0-9]+}}(%rsp), %al
 ; KNL-NEXT:    kmovw %eax, %k1
 ; KNL-NEXT:    kshiftlw $15, %k1, %k1
 ; KNL-NEXT:    kshiftrw $6, %k1, %k1
@@ -2998,7 +2998,7 @@ define void @store_64i1(ptr %a, <64 x i1> %v) {
 ; KNL-NEXT:    kmovw %eax, %k1
 ; KNL-NEXT:    kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill
 ; KNL-NEXT:    kandw %k1, %k0, %k0
-; KNL-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax
+; KNL-NEXT:    movb {{[0-9]+}}(%rsp), %al
 ; KNL-NEXT:    kmovw %eax, %k1
 ; KNL-NEXT:    kshiftlw $15, %k1, %k1
 ; KNL-NEXT:    kshiftrw $5, %k1, %k1
@@ -3006,7 +3006,7 @@ define void @store_64i1(ptr %a, <64 x i1> %v) {
 ; KNL-NEXT:    movw $-2049, %ax ## imm = 0xF7FF
 ; KNL-NEXT:    kmovw %eax, %k5
 ; KNL-NEXT:    kandw %k5, %k0, %k0
-; KNL-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax
+; KNL-NEXT:    movb {{[0-9]+}}(%rsp), %al
 ; KNL-NEXT:    kmovw %eax, %k1
 ; KNL-NEXT:    kshiftlw $15, %k1, %k1
 ; KNL-NEXT:    kshiftrw $4, %k1, %k1
@@ -3015,7 +3015,7 @@ define void @store_64i1(ptr %a, <64 x i1> %v) {
 ; KNL-NEXT:    kmovw %eax, %k1
 ; KNL-NEXT:    kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill
 ; KNL-NEXT:    kandw %k1, %k0, %k0
-; KNL-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax
+; KNL-NEXT:    movb {{[0-9]+}}(%rsp), %al
 ; KNL-NEXT:    kmovw %eax, %k1
 ; KNL-NEXT:    kshiftlw $15, %k1, %k1
 ; KNL-NEXT:    kshiftrw $3, %k1, %k1
@@ -3024,7 +3024,7 @@ define void @store_64i1(ptr %a, <64 x i1> %v) {
 ; KNL-NEXT:    kmovw %eax, %k1
 ; KNL-NEXT:    kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill
 ; KNL-NEXT:    kandw %k1, %k0, %k0
-; KNL-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax
+; KNL-NEXT:    movb {{[0-9]+}}(%rsp), %al
 ; KNL-NEXT:    kmovw %eax, %k1
 ; KNL-NEXT:    kshiftlw $15, %k1, %k1
 ; KNL-NEXT:    kshiftrw $2, %k1, %k1
@@ -3033,121 +3033,121 @@ define void @store_64i1(ptr %a, <64 x i1> %v) {
 ; KNL-NEXT:    kmovw %eax, %k1
 ; KNL-NEXT:    kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill
 ; KNL-NEXT:    kandw %k1, %k0, %k0
-; KNL-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax
+; KNL-NEXT:    movb {{[0-9]+}}(%rsp), %al
 ; KNL-NEXT:    kmovw %eax, %k6
 ; KNL-NEXT:    kshiftlw $14, %k6, %k6
 ; KNL-NEXT:    korw %k6, %k0, %k0
 ; KNL-NEXT:    kshiftlw $1, %k0, %k0
 ; KNL-NEXT:    kshiftrw $1, %k0, %k0
-; KNL-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax
+; KNL-NEXT:    movb {{[0-9]+}}(%rsp), %al
 ; KNL-NEXT:    kmovw %eax, %k6
 ; KNL-NEXT:    kshiftlw $15, %k6, %k6
 ; KNL-NEXT:    korw %k6, %k0, %k0
 ; KNL-NEXT:    kmovw %k0, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill
-; KNL-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax
+; KNL-NEXT:    movb {{[0-9]+}}(%rsp), %al
 ; KNL-NEXT:    andl $1, %eax
-; KNL-NEXT:    movzbl {{[0-9]+}}(%rsp), %ecx
+; KNL-NEXT:    movb {{[0-9]+}}(%rsp), %cl
 ; KNL-NEXT:    kmovw %ecx, %k0
 ; KNL-NEXT:    kshiftlw $15, %k0, %k0
 ; KNL-NEXT:    kshiftrw $14, %k0, %k0
 ; KNL-NEXT:    kmovw %eax, %k6
 ; KNL-NEXT:    korw %k0, %k6, %k0
 ; KNL-NEXT:    kandw %k7, %k0, %k0
-; KNL-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax
+; KNL-NEXT:    movb {{[0-9]+}}(%rsp), %al
 ; KNL-NEXT:    kmovw %eax, %k6
 ; KNL-NEXT:    kshiftlw $15, %k6, %k6
 ; KNL-NEXT:    kshiftrw $13, %k6, %k6
 ; KNL-NEXT:    korw %k6, %k0, %k0
 ; KNL-NEXT:    kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k7 ## 2-byte Reload
 ; KNL-NEXT:    kandw %k7, %k0, %k0
-; KNL-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax
+; KNL-NEXT:    movb {{[0-9]+}}(%rsp), %al
 ; KNL-NEXT:    kmovw %eax, %k6
 ; KNL-NEXT:    kshiftlw $15, %k6, %k6
 ; KNL-NEXT:    kshiftrw $12, %k6, %k6
 ; KNL-NEXT:    korw %k6, %k0, %k0
 ; KNL-NEXT:    kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k1 ## 2-byte Reload
 ; KNL-NEXT:    kandw %k1, %k0, %k0
-; KNL-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax
+; KNL-NEXT:    movb {{[0-9]+}}(%rsp), %al
 ; KNL-NEXT:    kmovw %eax, %k6
 ; KNL-NEXT:    kshiftlw $15, %k6, %k6
 ; KNL-NEXT:    kshiftrw $11, %k6, %k6
 ; KNL-NEXT:    korw %k6, %k0, %k0
 ; KNL-NEXT:    kandw %k2, %k0, %k0
-; KNL-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax
+; KNL-NEXT:    movb {{[0-9]+}}(%rsp), %al
 ; KNL-NEXT:    kmovw %eax, %k6
 ; KNL-NEXT:    kshiftlw $15, %k6, %k6
 ; KNL-NEXT:    kshiftrw $10, %k6, %k6
 ; KNL-NEXT:    korw %k6, %k0, %k0
 ; KNL-NEXT:    kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k2 ## 2-byte Reload
 ; KNL-NEXT:    kandw %k2, %k0, %k0
-; KNL-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax
+; KNL-NEXT:    movb {{[0-9]+}}(%rsp), %al
 ; KNL-NEXT:    kmovw %eax, %k6
 ; KNL-NEXT:    kshiftlw $15, %k6, %k6
 ; KNL-NEXT:    kshiftrw $9, %k6, %k6
 ; KNL-NEXT:    korw %k6, %k0, %k0
 ; KNL-NEXT:    kandw %k3, %k0, %k0
-; KNL-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax
+; KNL-NEXT:    movb {{[0-9]+}}(%rsp), %al
 ; KNL-NEXT:    kmovw %eax, %k6
 ; KNL-NEXT:    kshiftlw $15, %k6, %k6
 ; KNL-NEXT:    kshiftrw $8, %k6, %k6
 ; KNL-NEXT:    korw %k6, %k0, %k0
 ; KNL-NEXT:    kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k3 ## 2-byte Reload
 ; KNL-NEXT:    kandw %k3, %k0, %k0
-; KNL-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax
+; KNL-NEXT:    movb {{[0-9]+}}(%rsp), %al
 ; KNL-NEXT:    kmovw %eax, %k6
 ; KNL-NEXT:    kshiftlw $15, %k6, %k6
 ; KNL-NEXT:    kshiftrw $7, %k6, %k6
 ; KNL-NEXT:    korw %k6, %k0, %k0
 ; KNL-NEXT:    kandw %k4, %k0, %k0
-; KNL-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax
+; KNL-NEXT:    movb {{[0-9]+}}(%rsp), %al
 ; KNL-NEXT:    kmovw %eax, %k6
 ; KNL-NEXT:    kshiftlw $15, %k6, %k6
 ; KNL-NEXT:    kshiftrw $6, %k6, %k6
 ; KNL-NEXT:    korw %k6, %k0, %k0
 ; KNL-NEXT:    kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k4 ## 2-byte Reload
 ; KNL-NEXT:    kandw %k4, %k0, %k0
-; KNL-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax
+; KNL-NEXT:    movb {{[0-9]+}}(%rsp), %al
 ; KNL-NEXT:    kmovw %eax, %k6
 ; KNL-NEXT:    kshiftlw $15, %k6, %k6
 ; KNL-NEXT:    kshiftrw $5, %k6, %k6
 ; KNL-NEXT:    korw %k6, %k0, %k0
 ; KNL-NEXT:    kmovw %k5, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill
 ; KNL-NEXT:    kandw %k5, %k0, %k0
-; KNL-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax
+; KNL-NEXT:    movb {{[0-9]+}}(%rsp), %al
 ; KNL-NEXT:    kmovw %eax, %k6
 ; KNL-NEXT:    kshiftlw $15, %k6, %k6
 ; KNL-NEXT:    kshiftrw $4, %k6, %k6
 ; KNL-NEXT:    korw %k6, %k0, %k0
 ; KNL-NEXT:    kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k6 ## 2-byte Reload
 ; KNL-NEXT:    kandw %k6, %k0, %k0
-; KNL-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax
+; KNL-NEXT:    movb {{[0-9]+}}(%rsp), %al
 ; KNL-NEXT:    kmovw %eax, %k6
 ; KNL-NEXT:    kshiftlw $15, %k6, %k6
 ; KNL-NEXT:    kshiftrw $3, %k6, %k6
 ; KNL-NEXT:    korw %k6, %k0, %k0
 ; KNL-NEXT:    kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k6 ## 2-byte Reload
 ; KNL-NEXT:    kandw %k6, %k0, %k0
-; KNL-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax
+; KNL-NEXT:    movb {{[0-9]+}}(%rsp), %al
 ; KNL-NEXT:    kmovw %eax, %k6
 ; KNL-NEXT:    kshiftlw $15, %k6, %k6
 ; KNL-NEXT:    kshiftrw $2, %k6, %k6
 ; KNL-NEXT:    korw %k6, %k0, %k0
 ; KNL-NEXT:    kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k6 ## 2-byte Reload
 ; KNL-NEXT:    kandw %k6, %k0, %k0
-; KNL-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax
+; KNL-NEXT:    movb {{[0-9]+}}(%rsp), %al
 ; KNL-NEXT:    kmovw %eax, %k6
 ; KNL-NEXT:    kshiftlw $14, %k6, %k6
 ; KNL-NEXT:    korw %k6, %k0, %k0
 ; KNL-NEXT:    kshiftlw $1, %k0, %k0
 ; KNL-NEXT:    kshiftrw $1, %k0, %k0
-; KNL-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax
+; KNL-NEXT:    movb {{[0-9]+}}(%rsp), %al
 ; KNL-NEXT:    kmovw %eax, %k6
 ; KNL-NEXT:    kshiftlw $15, %k6, %k6
 ; KNL-NEXT:    korw %k6, %k0, %k0
 ; KNL-NEXT:    kmovw %k0, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill
-; KNL-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax
+; KNL-NEXT:    movb {{[0-9]+}}(%rsp), %al
 ; KNL-NEXT:    andl $1, %eax
-; KNL-NEXT:    movzbl {{[0-9]+}}(%rsp), %ecx
+; KNL-NEXT:    movb {{[0-9]+}}(%rsp), %cl
 ; KNL-NEXT:    kmovw %ecx, %k0
 ; KNL-NEXT:    kshiftlw $15, %k0, %k0
 ; KNL-NEXT:    kshiftrw $14, %k0, %k0
@@ -3155,97 +3155,97 @@ define void @store_64i1(ptr %a, <64 x i1> %v) {
 ; KNL-NEXT:    korw %k0, %k6, %k0
 ; KNL-NEXT:    kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k6 ## 2-byte Reload
 ; KNL-NEXT:    kandw %k6, %k0, %k0
-; KNL-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax
+; KNL-NEXT:    movb {{[0-9]+}}(%rsp), %al
 ; KNL-NEXT:    kmovw %eax, %k6
 ; KNL-NEXT:    kshiftlw $15, %k6, %k6
 ; KNL-NEXT:    kshiftrw $13, %k6, %k6
 ; KNL-NEXT:    korw %k6, %k0, %k0
 ; KNL-NEXT:    kandw %k7, %k0, %k0
-; KNL-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax
+; KNL-NEXT:    movb {{[0-9]+}}(%rsp), %al
 ; KNL-NEXT:    kmovw %eax, %k6
 ; KNL-NEXT:    kshiftlw $15, %k6, %k6
 ; KNL-NEXT:    kshiftrw $12, %k6, %k6
 ; KNL-NEXT:    korw %k6, %k0, %k0
 ; KNL-NEXT:    kandw %k1, %k0, %k0
-; KNL-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax
+; KNL-NEXT:    movb {{[0-9]+}}(%rsp), %al
 ; KNL-NEXT:    kmovw %eax, %k6
 ; KNL-NEXT:    kshiftlw $15, %k6, %k6
 ; KNL-NEXT:    kshiftrw $11, %k6, %k6
 ; KNL-NEXT:    korw %k6, %k0, %k0
 ; KNL-NEXT:    kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k1 ## 2-byte Reload
 ; KNL-NEXT:    kandw %k1, %k0, %k0
-; KNL-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax
+; KNL-NEXT:    movb {{[0-9]+}}(%rsp), %al
 ; KNL-NEXT:    kmovw %eax, %k6
 ; KNL-NEXT:    kshiftlw $15, %k6, %k6
 ; KNL-NEXT:    kshiftrw $10, %k6, %k6
 ; KNL-NEXT:    korw %k6, %k0, %k0
 ; KNL-NEXT:    kandw %k2, %k0, %k0
-; KNL-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax
+; KNL-NEXT:    movb {{[0-9]+}}(%rsp), %al
 ; KNL-NEXT:    kmovw %eax, %k6
 ; KNL-NEXT:    kshiftlw $15, %k6, %k6
 ; KNL-NEXT:    kshiftrw $9, %k6, %k6
 ; KNL-NEXT:    korw %k6, %k0, %k0
 ; KNL-NEXT:    kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k1 ## 2-byte Reload
 ; KNL-NEXT:    kandw %k1, %k0, %k0
-; KNL-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax
+; KNL-NEXT:    movb {{[0-9]+}}(%rsp), %al
 ; KNL-NEXT:    kmovw %eax, %k6
 ; KNL-NEXT:    kshiftlw $15, %k6, %k6
 ; KNL-NEXT:    kshiftrw $8, %k6, %k6
 ; KNL-NEXT:    korw %k6, %k0, %k0
 ; KNL-NEXT:    kandw %k3, %k0, %k0
-; KNL-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax
+; KNL-NEXT:    movb {{[0-9]+}}(%rsp), %al
 ; KNL-NEXT:    kmovw %eax, %k6
 ; KNL-NEXT:    kshiftlw $15, %k6, %k6
 ; KNL-NEXT:    kshiftrw $7, %k6, %k6
 ; KNL-NEXT:    korw %k6, %k0, %k0
 ; KNL-NEXT:    kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k3 ## 2-byte Reload
 ; KNL-NEXT:    kandw %k3, %k0, %k0
-; KNL-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax
+; KNL-NEXT:    movb {{[0-9]+}}(%rsp), %al
 ; KNL-NEXT:    kmovw %eax, %k6
 ; KNL-NEXT:    kshiftlw $15, %k6, %k6
 ; KNL-NEXT:    kshiftrw $6, %k6, %k6
 ; KNL-NEXT:    korw %k6, %k0, %k0
 ; KNL-NEXT:    kandw %k4, %k0, %k0
-; KNL-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax
+; KNL-NEXT:    movb {{[0-9]+}}(%rsp), %al
 ; KNL-NEXT:    kmovw %eax, %k6
 ; KNL-NEXT:    kshiftlw $15, %k6, %k6
 ; KNL-NEXT:    kshiftrw $5, %k6, %k6
 ; KNL-NEXT:    korw %k6, %k0, %k0
 ; KNL-NEXT:    kandw %k5, %k0, %k0
-; KNL-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax
+; KNL-NEXT:    movb {{[0-9]+}}(%rsp), %al
 ; KNL-NEXT:    kmovw %eax, %k6
 ; KNL-NEXT:    kshiftlw $15, %k6, %k6
 ; KNL-NEXT:    kshiftrw $4, %k6, %k6
 ; KNL-NEXT:    korw %k6, %k0, %k0
 ; KNL-NEXT:    kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k1 ## 2-byte Reload
 ; KNL-NEXT:    kandw %k1, %k0, %k0
-; KNL-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax
+; KNL-NEXT:    movb {{[0-9]+}}(%rsp), %al
 ; KNL-NEXT:    kmovw %eax, %k6
 ; KNL-NEXT:    kshiftlw $15, %k6, %k6
 ; KNL-NEXT:    kshiftrw $3, %k6, %k6
 ; KNL-NEXT:    korw %k6, %k0, %k0
 ; KNL-NEXT:    kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k2 ## 2-byte Reload
 ; KNL-NEXT:    kandw %k2, %k0, %k0
-; KNL-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax
+; KNL-NEXT:    movb {{[0-9]+}}(%rsp), %al
 ; KNL-NEXT:    kmovw %eax, %k6
 ; KNL-NEXT:    kshiftlw $15, %k6, %k6
 ; KNL-NEXT:    kshiftrw $2, %k6, %k6
 ; KNL-NEXT:    korw %k6, %k0, %k0
 ; KNL-NEXT:    kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k5 ## 2-byte Reload
 ; KNL-NEXT:    kandw %k5, %k0, %k0
-; KNL-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax
+; KNL-NEXT:    movb {{[0-9]+}}(%rsp), %al
 ; KNL-NEXT:    kmovw %eax, %k6
 ; KNL-NEXT:    kshiftlw $14, %k6, %k6
 ; KNL-NEXT:    korw %k6, %k0, %k0
 ; KNL-NEXT:    kshiftlw $1, %k0, %k0
 ; KNL-NEXT:    kshiftrw $1, %k0, %k0
-; KNL-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax
+; KNL-NEXT:    movb {{[0-9]+}}(%rsp), %al
 ; KNL-NEXT:    kmovw %eax, %k6
 ; KNL-NEXT:    kshiftlw $15, %k6, %k6
 ; KNL-NEXT:    korw %k6, %k0, %k0
-; KNL-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax
+; KNL-NEXT:    movb {{[0-9]+}}(%rsp), %al
 ; KNL-NEXT:    andl $1, %eax
-; KNL-NEXT:    movzbl {{[0-9]+}}(%rsp), %ecx
+; KNL-NEXT:    movb {{[0-9]+}}(%rsp), %cl
 ; KNL-NEXT:    kmovw %ecx, %k6
 ; KNL-NEXT:    kshiftlw $15, %k6, %k6
 ; KNL-NEXT:    kshiftrw $14, %k6, %k6
@@ -3253,93 +3253,93 @@ define void @store_64i1(ptr %a, <64 x i1> %v) {
 ; KNL-NEXT:    korw %k6, %k7, %k6
 ; KNL-NEXT:    kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k5 ## 2-byte Reload
 ; KNL-NEXT:    kandw %k5, %k6, %k6
-; KNL-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax
+; KNL-NEXT:    movb {{[0-9]+}}(%rsp), %al
 ; KNL-NEXT:    kmovw %eax, %k7
 ; KNL-NEXT:    kshiftlw $15, %k7, %k7
 ; KNL-NEXT:    kshiftrw $13, %k7, %k7
 ; KNL-NEXT:    korw %k7, %k6, %k6
 ; KNL-NEXT:    kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k5 ## 2-byte Reload
 ; KNL-NEXT:    kandw %k5, %k6, %k6
-; KNL-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax
+; KNL-NEXT:    movb {{[0-9]+}}(%rsp), %al
 ; KNL-NEXT:    kmovw %eax, %k7
 ; KNL-NEXT:    kshiftlw $15, %k7, %k7
 ; KNL-NEXT:    kshiftrw $12, %k7, %k7
 ; KNL-NEXT:    korw %k7, %k6, %k6
 ; KNL-NEXT:    kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k5 ## 2-byte Reload
 ; KNL-NEXT:    kandw %k5, %k6, %k6
-; KNL-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax
+; KNL-NEXT:    movb {{[0-9]+}}(%rsp), %al
 ; KNL-NEXT:    kmovw %eax, %k7
 ; KNL-NEXT:    kshiftlw $15, %k7, %k7
 ; KNL-NEXT:    kshiftrw $11, %k7, %k7
 ; KNL-NEXT:    korw %k7, %k6, %k6
 ; KNL-NEXT:    kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k5 ## 2-byte Reload
 ; KNL-NEXT:    kandw %k5, %k6, %k6
-; KNL-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax
+; KNL-NEXT:    movb {{[0-9]+}}(%rsp), %al
 ; KNL-NEXT:    kmovw %eax, %k7
 ; KNL-NEXT:    kshiftlw $15, %k7, %k7
 ; KNL-NEXT:    kshiftrw $10, %k7, %k7
 ; KNL-NEXT:    korw %k7, %k6, %k6
 ; KNL-NEXT:    kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k5 ## 2-byte Reload
 ; KNL-NEXT:    kandw %k5, %k6, %k6
-; KNL-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax
+; KNL-NEXT:    movb {{[0-9]+}}(%rsp), %al
 ; KNL-NEXT:    kmovw %eax, %k7
 ; KNL-NEXT:    kshiftlw $15, %k7, %k7
 ; KNL-NEXT:    kshiftrw $9, %k7, %k7
 ; KNL-NEXT:    korw %k7, %k6, %k6
 ; KNL-NEXT:    kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k5 ## 2-byte Reload
 ; KNL-NEXT:    kandw %k5, %k6, %k6
-; KNL-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax
+; KNL-NEXT:    movb {{[0-9]+}}(%rsp), %al
 ; KNL-NEXT:    kmovw %eax, %k7
 ; KNL-NEXT:    kshiftlw $15, %k7, %k7
 ; KNL-NEXT:    kshiftrw $8, %k7, %k7
 ; KNL-NEXT:    korw %k7, %k6, %k6
 ; KNL-NEXT:    kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k5 ## 2-byte Reload
 ; KNL-NEXT:    kandw %k5, %k6, %k6
-; KNL-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax
+; KNL-NEXT:    movb {{[0-9]+}}(%rsp), %al
 ; KNL-NEXT:    kmovw %eax, %k7
 ; KNL-NEXT:    kshiftlw $15, %k7, %k7
 ; KNL-NEXT:    kshiftrw $7, %k7, %k7
 ; KNL-NEXT:    korw %k7, %k6, %k6
 ; KNL-NEXT:    kandw %k3, %k6, %k6
-; KNL-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax
+; KNL-NEXT:    movb {{[0-9]+}}(%rsp), %al
 ; KNL-NEXT:    kmovw %eax, %k7
 ; KNL-NEXT:    kshiftlw $15, %k7, %k7
 ; KNL-NEXT:    kshiftrw $6, %k7, %k7
 ; KNL-NEXT:    korw %k7, %k6, %k6
 ; KNL-NEXT:    kandw %k4, %k6, %k5
-; KNL-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax
+; KNL-NEXT:    movb {{[0-9]+}}(%rsp), %al
 ; KNL-NEXT:    kmovw %eax, %k6
 ; KNL-NEXT:    kshiftlw $15, %k6, %k6
 ; KNL-NEXT:    kshiftrw $5, %k6, %k6
 ; KNL-NEXT:    korw %k6, %k5, %k5
 ; KNL-NEXT:    kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k3 ## 2-byte Reload
 ; KNL-NEXT:    kandw %k3, %k5, %k4
-; KNL-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax
+; KNL-NEXT:    movb {{[0-9]+}}(%rsp), %al
 ; KNL-NEXT:    kmovw %eax, %k5
 ; KNL-NEXT:    kshiftlw $15, %k5, %k5
 ; KNL-NEXT:    kshiftrw $4, %k5, %k5
 ; KNL-NEXT:    korw %k5, %k4, %k4
 ; KNL-NEXT:    kandw %k1, %k4, %k3
-; KNL-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax
+; KNL-NEXT:    movb {{[0-9]+}}(%rsp), %al
 ; KNL-NEXT:    kmovw %eax, %k4
 ; KNL-NEXT:    kshiftlw $15, %k4, %k4
 ; KNL-NEXT:    kshiftrw $3, %k4, %k4
 ; KNL-NEXT:    korw %k4, %k3, %k3
 ; KNL-NEXT:    kandw %k2, %k3, %k2
-; KNL-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax
+; KNL-NEXT:    movb {{[0-9]+}}(%rsp), %al
 ; KNL-NEXT:    kmovw %eax, %k3
 ; KNL-NEXT:    kshiftlw $15, %k3, %k3
 ; KNL-NEXT:    kshiftrw $2, %k3, %k3
 ; KNL-NEXT:    korw %k3, %k2, %k2
 ; KNL-NEXT:    kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k1 ## 2-byte Reload
 ; KNL-NEXT:    kandw %k1, %k2, %k1
-; KNL-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax
+; KNL-NEXT:    movb {{[0-9]+}}(%rsp), %al
 ; KNL-NEXT:    kmovw %eax, %k2
 ; KNL-NEXT:    kshiftlw $14, %k2, %k2
 ; KNL-NEXT:    korw %k2, %k1, %k1
 ; KNL-NEXT:    kshiftlw $1, %k1, %k1
 ; KNL-NEXT:    kshiftrw $1, %k1, %k1
-; KNL-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax
+; KNL-NEXT:    movb {{[0-9]+}}(%rsp), %al
 ; KNL-NEXT:    kmovw %eax, %k2
 ; KNL-NEXT:    kshiftlw $15, %k2, %k2
 ; KNL-NEXT:    korw %k2, %k1, %k1
@@ -3405,7 +3405,7 @@ define void @store_64i1(ptr %a, <64 x i1> %v) {
 ; AVX512DQ-NEXT:    kandw %k1, %k0, %k0
 ; AVX512DQ-NEXT:    kmovw %k1, %k2
 ; AVX512DQ-NEXT:    kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill
-; AVX512DQ-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax
+; AVX512DQ-NEXT:    movb {{[0-9]+}}(%rsp), %al
 ; AVX512DQ-NEXT:    kmovw %eax, %k1
 ; AVX512DQ-NEXT:    kshiftlw $15, %k1, %k1
 ; AVX512DQ-NEXT:    kshiftrw $10, %k1, %k1
@@ -3414,7 +3414,7 @@ define void @store_64i1(ptr %a, <64 x i1> %v) {
 ; AVX512DQ-NEXT:    kmovw %eax, %k1
 ; AVX512DQ-NEXT:    kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill
 ; AVX512DQ-NEXT:    kandw %k1, %k0, %k0
-; AVX512DQ-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax
+; AVX512DQ-NEXT:    movb {{[0-9]+}}(%rsp), %al
 ; AVX512DQ-NEXT:    kmovw %eax, %k1
 ; AVX512DQ-NEXT:    kshiftlw $15, %k1, %k1
 ; AVX512DQ-NEXT:    kshiftrw $9, %k1, %k1
@@ -3424,7 +3424,7 @@ define void @store_64i1(ptr %a, <64 x i1> %v) {
 ; AVX512DQ-NEXT:    kandw %k1, %k0, %k0
 ; AVX512DQ-NEXT:    kmovw %k1, %k3
 ; AVX512DQ-NEXT:    kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill
-; AVX512DQ-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax
+; AVX512DQ-NEXT:    movb {{[0-9]+}}(%rsp), %al
 ; AVX512DQ-NEXT:    kmovw %eax, %k1
 ; AVX512DQ-NEXT:    kshiftlw $15, %k1, %k1
 ; AVX512DQ-NEXT:    kshiftrw $8, %k1, %k1
@@ -3433,7 +3433,7 @@ define void @store_64i1(ptr %a, <64 x i1> %v) {
 ; AVX512DQ-NEXT:    kmovw %eax, %k1
 ; AVX512DQ-NEXT:    kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill
 ; AVX512DQ-NEXT:    kandw %k1, %k0, %k0
-; AVX512DQ-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax
+; AVX512DQ-NEXT:    movb {{[0-9]+}}(%rsp), %al
 ; AVX512DQ-NEXT:    kmovw %eax, %k1
 ; AVX512DQ-NEXT:    kshiftlw $15, %k1, %k1
 ; AVX512DQ-NEXT:    kshiftrw $7, %k1, %k1
@@ -3443,7 +3443,7 @@ define void @store_64i1(ptr %a, <64 x i1> %v) {
 ; AVX512DQ-NEXT:    kandw %k1, %k0, %k0
 ; AVX512DQ-NEXT:    kmovw %k1, %k4
 ; AVX512DQ-NEXT:    kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill
-; AVX512DQ-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax
+; AVX512DQ-NEXT:    movb {{[0-9]+}}(%rsp), %al
 ; AVX512DQ-NEXT:    kmovw %eax, %k1
 ; AVX512DQ-NEXT:    kshiftlw $15, %k1, %k1
 ; AVX512DQ-NEXT:    kshiftrw $6, %k1, %k1
@@ -3452,7 +3452,7 @@ define void @store_64i1(ptr %a, <64 x i1> %v) {
 ; AVX512DQ-NEXT:    kmovw %eax, %k1
 ; AVX512DQ-NEXT:    kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill
 ; AVX512DQ-NEXT:    kandw %k1, %k0, %k0
-; AVX512DQ-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax
+; AVX512DQ-NEXT:    movb {{[0-9]+}}(%rsp), %al
 ; AVX512DQ-NEXT:    kmovw %eax, %k1
 ; AVX512DQ-NEXT:    kshiftlw $15, %k1, %k1
 ; AVX512DQ-NEXT:    kshiftrw $5, %k1, %k1
@@ -3460,7 +3460,7 @@ define void @store_64i1(ptr %a, <64 x i1> %v) {
 ; AVX512DQ-NEXT:    movw $-2049, %ax ## imm = 0xF7FF
 ; AVX512DQ-NEXT:    kmovw %eax, %k5
 ; AVX512DQ-NEXT:    kandw %k5, %k0, %k0
-; AVX512DQ-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax
+; AVX512DQ-NEXT:    movb {{[0-9]+}}(%rsp), %al
 ; AVX512DQ-NEXT:    kmovw %eax, %k1
 ; AVX512DQ-NEXT:    kshiftlw $15, %k1, %k1
 ; AVX512DQ-NEXT:    kshiftrw $4, %k1, %k1
@@ -3469,7 +3469,7 @@ define void @store_64i1(ptr %a, <64 x i1> %v) {
 ; AVX512DQ-NEXT:    kmovw %eax, %k1
 ; AVX512DQ-NEXT:    kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill
 ; AVX512DQ-NEXT:    kandw %k1, %k0, %k0
-; AVX512DQ-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax
+; AVX512DQ-NEXT:    movb {{[0-9]+}}(%rsp), %al
 ; AVX512DQ-NEXT:    kmovw %eax, %k1
 ; AVX512DQ-NEXT:    kshiftlw $15, %k1, %k1
 ; AVX512DQ-NEXT:    kshiftrw $3, %k1, %k1
@@ -3478,7 +3478,7 @@ define void @store_64i1(ptr %a, <64 x i1> %v) {
 ; AVX512DQ-NEXT:    kmovw %eax, %k1
 ; AVX512DQ-NEXT:    kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill
 ; AVX512DQ-NEXT:    kandw %k1, %k0, %k0
-; AVX512DQ-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax
+; AVX512DQ-NEXT:    movb {{[0-9]+}}(%rsp), %al
 ; AVX512DQ-NEXT:    kmovw %eax, %k1
 ; AVX512DQ-NEXT:    kshiftlw $15, %k1, %k1
 ; AVX512DQ-NEXT:    kshiftrw $2, %k1, %k1
@@ -3487,121 +3487,121 @@ define void @store_64i1(ptr %a, <64 x i1> %v) {
 ; AVX512DQ-NEXT:    kmovw %eax, %k1
 ; AVX512DQ-NEXT:    kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill
 ; AVX512DQ-NEXT:    kandw %k1, %k0, %k0
-; AVX512DQ-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax
+; AVX512DQ-NEXT:    movb {{[0-9]+}}(%rsp), %al
 ; AVX512DQ-NEXT:    kmovw %eax, %k6
 ; AVX512DQ-NEXT:    kshiftlw $14, %k6, %k6
 ; AVX512DQ-NEXT:    korw %k6, %k0, %k0
 ; AVX512DQ-NEXT:    kshiftlw $1, %k0, %k0
 ; AVX512DQ-NEXT:    kshiftrw $1, %k0, %k0
-; AVX512DQ-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax
+; AVX512DQ-NEXT:    movb {{[0-9]+}}(%rsp), %al
 ; AVX512DQ-NEXT:    kmovw %eax, %k6
 ; AVX512DQ-NEXT:    kshiftlw $15, %k6, %k6
 ; AVX512DQ-NEXT:    korw %k6, %k0, %k0
 ; AVX512DQ-NEXT:    kmovw %k0, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill
-; AVX512DQ-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax
+; AVX512DQ-NEXT:    movb {{[0-9]+}}(%rsp), %al
 ; AVX512DQ-NEXT:    andl $1, %eax
-; AVX512DQ-NEXT:    movzbl {{[0-9]+}}(%rsp), %ecx
+; AVX512DQ-NEXT:    movb {{[0-9]+}}(%rsp), %cl
 ; AVX512DQ-NEXT:    kmovw %ecx, %k0
 ; AVX512DQ-NEXT:    kshiftlw $15, %k0, %k0
 ; AVX512DQ-NEXT:    kshiftrw $14, %k0, %k0
 ; AVX512DQ-NEXT:    kmovw %eax, %k6
 ; AVX512DQ-NEXT:    korw %k0, %k6, %k0
 ; AVX512DQ-NEXT:    kandw %k7, %k0, %k0
-; AVX512DQ-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax
+; AVX512DQ-NEXT:    movb {{[0-9]+}}(%rsp), %al
 ; AVX512DQ-NEXT:    kmovw %eax, %k6
 ; AVX512DQ-NEXT:    kshiftlw $15, %k6, %k6
 ; AVX512DQ-NEXT:    kshiftrw $13, %k6, %k6
 ; AVX512DQ-NEXT:    korw %k6, %k0, %k0
 ; AVX512DQ-NEXT:    kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k7 ## 2-byte Reload
 ; AVX512DQ-NEXT:    kandw %k7, %k0, %k0
-; AVX512DQ-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax
+; AVX512DQ-NEXT:    movb {{[0-9]+}}(%rsp), %al
 ; AVX512DQ-NEXT:    kmovw %eax, %k6
 ; AVX512DQ-NEXT:    kshiftlw $15, %k6, %k6
 ; AVX512DQ-NEXT:    kshiftrw $12, %k6, %k6
 ; AVX512DQ-NEXT:    korw %k6, %k0, %k0
 ; AVX512DQ-NEXT:    kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k1 ## 2-byte Reload
 ; AVX512DQ-NEXT:    kandw %k1, %k0, %k0
-; AVX512DQ-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax
+; AVX512DQ-NEXT:    movb {{[0-9]+}}(%rsp), %al
 ; AVX512DQ-NEXT:    kmovw %eax, %k6
 ; AVX512DQ-NEXT:    kshiftlw $15, %k6, %k6
 ; AVX512DQ-NEXT:    kshiftrw $11, %k6, %k6
 ; AVX512DQ-NEXT:    korw %k6, %k0, %k0
 ; AVX512DQ-NEXT:    kandw %k2, %k0, %k0
-; AVX512DQ-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax
+; AVX512DQ-NEXT:    movb {{[0-9]+}}(%rsp), %al
 ; AVX512DQ-NEXT:    kmovw %eax, %k6
 ; AVX512DQ-NEXT:    kshiftlw $15, %k6, %k6
 ; AVX512DQ-NEXT:    kshiftrw $10, %k6, %k6
 ; AVX512DQ-NEXT:    korw %k6, %k0, %k0
 ; AVX512DQ-NEXT:    kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k2 ## 2-byte Reload
 ; AVX512DQ-NEXT:    kandw %k2, %k0, %k0
-; AVX512DQ-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax
+; AVX512DQ-NEXT:    movb {{[0-9]+}}(%rsp), %al
 ; AVX512DQ-NEXT:    kmovw %eax, %k6
 ; AVX512DQ-NEXT:    kshiftlw $15, %k6, %k6
 ; AVX512DQ-NEXT:    kshiftrw $9, %k6, %k6
 ; AVX512DQ-NEXT:    korw %k6, %k0, %k0
 ; AVX512DQ-NEXT:    kandw %k3, %k0, %k0
-; AVX512DQ-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax
+; AVX512DQ-NEXT:    movb {{[0-9]+}}(%rsp), %al
 ; AVX512DQ-NEXT:    kmovw %eax, %k6
 ; AVX512DQ-NEXT:    kshiftlw $15, %k6, %k6
 ; AVX512DQ-NEXT:    kshiftrw $8, %k6, %k6
 ; AVX512DQ-NEXT:    korw %k6, %k0, %k0
 ; AVX512DQ-NEXT:    kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k3 ## 2-byte Reload
 ; AVX512DQ-NEXT:    kandw %k3, %k0, %k0
-; AVX512DQ-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax
+; AVX512DQ-NEXT:    movb {{[0-9]+}}(%rsp), %al
 ; AVX512DQ-NEXT:    kmovw %eax, %k6
 ; AVX512DQ-NEXT:    kshiftlw $15, %k6, %k6
 ; AVX512DQ-NEXT:    kshiftrw $7, %k6, %k6
 ; AVX512DQ-NEXT:    korw %k6, %k0, %k0
 ; AVX512DQ-NEXT:    kandw %k4, %k0, %k0
-; AVX512DQ-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax
+; AVX512DQ-NEXT:    movb {{[0-9]+}}(%rsp), %al
 ; AVX512DQ-NEXT:    kmovw %eax, %k6
 ; AVX512DQ-NEXT:    kshiftlw $15, %k6, %k6
 ; AVX512DQ-NEXT:    kshiftrw $6, %k6, %k6
 ; AVX512DQ-NEXT:    korw %k6, %k0, %k0
 ; AVX512DQ-NEXT:    kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k4 ## 2-byte Reload
 ; AVX512DQ-NEXT:    kandw %k4, %k0, %k0
-; AVX512DQ-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax
+; AVX512DQ-NEXT:    movb {{[0-9]+}}(%rsp), %al
 ; AVX512DQ-NEXT:    kmovw %eax, %k6
 ; AVX512DQ-NEXT:    kshiftlw $15, %k6, %k6
 ; AVX512DQ-NEXT:    kshiftrw $5, %k6, %k6
 ; AVX512DQ-NEXT:    korw %k6, %k0, %k0
 ; AVX512DQ-NEXT:    kmovw %k5, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill
 ; AVX512DQ-NEXT:    kandw %k5, %k0, %k0
-; AVX512DQ-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax
+; AVX512DQ-NEXT:    movb {{[0-9]+}}(%rsp), %al
 ; AVX512DQ-NEXT:    kmovw %eax, %k6
 ; AVX512DQ-NEXT:    kshiftlw $15, %k6, %k6
 ; AVX512DQ-NEXT:    kshiftrw $4, %k6, %k6
 ; AVX512DQ-NEXT:    korw %k6, %k0, %k0
 ; AVX512DQ-NEXT:    kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k6 ## 2-byte Reload
 ; AVX512DQ-NEXT:    kandw %k6, %k0, %k0
-; AVX512DQ-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax
+; AVX512DQ-NEXT:    movb {{[0-9]+}}(%rsp), %al
 ; AVX512DQ-NEXT:    kmovw %eax, %k6
 ; AVX512DQ-NEXT:    kshiftlw $15, %k6, %k6
 ; AVX512DQ-NEXT:    kshiftrw $3, %k6, %k6
 ; AVX512DQ-NEXT:    korw %k6, %k0, %k0
 ; AVX512DQ-NEXT:    kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k6 ## 2-byte Reload
 ; AVX512DQ-NEXT:    kandw %k6, %k0, %k0
-; AVX512DQ-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax
+; AVX512DQ-NEXT:    movb {{[0-9]+}}(%rsp), %al
 ; AVX512DQ-NEXT:    kmovw %eax, %k6
 ; AVX512DQ-NEXT:    kshiftlw $15, %k6, %k6
 ; AVX512DQ-NEXT:    kshiftrw $2, %k6, %k6
 ; AVX512DQ-NEXT:    korw %k6, %k0, %k0
 ; AVX512DQ-NEXT:    kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k6 ## 2-byte Reload
 ; AVX512DQ-NEXT:    kandw %k6, %k0, %k0
-; AVX512DQ-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax
+; AVX512DQ-NEXT:    movb {{[0-9]+}}(%rsp), %al
 ; AVX512DQ-NEXT:    kmovw %eax, %k6
 ; AVX512DQ-NEXT:    kshiftlw $14, %k6, %k6
 ; AVX512DQ-NEXT:    korw %k6, %k0, %k0
 ; AVX512DQ-NEXT:    kshiftlw $1, %k0, %k0
 ; AVX512DQ-NEXT:    kshiftrw $1, %k0, %k0
-; AVX512DQ-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax
+; AVX512DQ-NEXT:    movb {{[0-9]+}}(%rsp), %al
 ; AVX512DQ-NEXT:    kmovw %eax, %k6
 ; AVX512DQ-NEXT:    kshiftlw $15, %k6, %k6
 ; AVX512DQ-NEXT:    korw %k6, %k0, %k0
 ; AVX512DQ-NEXT:    kmovw %k0, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill
-; AVX512DQ-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax
+; AVX512DQ-NEXT:    movb {{[0-9]+}}(%rsp), %al
 ; AVX512DQ-NEXT:    andl $1, %eax
-; AVX512DQ-NEXT:    movzbl {{[0-9]+}}(%rsp), %ecx
+; AVX512DQ-NEXT:    movb {{[0-9]+}}(%rsp), %cl
 ; AVX512DQ-NEXT:    kmovw %ecx, %k0
 ; AVX512DQ-NEXT:    kshiftlw $15, %k0, %k0
 ; AVX512DQ-NEXT:    kshiftrw $14, %k0, %k0
@@ -3609,97 +3609,97 @@ define void @store_64i1(ptr %a, <64 x i1> %v) {
 ; AVX512DQ-NEXT:    korw %k0, %k6, %k0
 ; AVX512DQ-NEXT:    kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k6 ## 2-byte Reload
 ; AVX512DQ-NEXT:    kandw %k6, %k0, %k0
-; AVX512DQ-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax
+; AVX512DQ-NEXT:    movb {{[0-9]+}}(%rsp), %al
 ; AVX512DQ-NEXT:    kmovw %eax, %k6
 ; AVX512DQ-NEXT:    kshiftlw $15, %k6, %k6
 ; AVX512DQ-NEXT:    kshiftrw $13, %k6, %k6
 ; AVX512DQ-NEXT:    korw %k6, %k0, %k0
 ; AVX512DQ-NEXT:    kandw %k7, %k0, %k0
-; AVX512DQ-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax
+; AVX512DQ-NEXT:    movb {{[0-9]+}}(%rsp), %al
 ; AVX512DQ-NEXT:    kmovw %eax, %k6
 ; AVX512DQ-NEXT:    kshiftlw $15, %k6, %k6
 ; AVX512DQ-NEXT:    kshiftrw $12, %k6, %k6
 ; AVX512DQ-NEXT:    korw %k6, %k0, %k0
 ; AVX512DQ-NEXT:    kandw %k1, %k0, %k0
-; AVX512DQ-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax
+; AVX512DQ-NEXT:    movb {{[0-9]+}}(%rsp), %al
 ; AVX512DQ-NEXT:    kmovw %eax, %k6
 ; AVX512DQ-NEXT:    kshiftlw $15, %k6, %k6
 ; AVX512DQ-NEXT:    kshiftrw $11, %k6, %k6
 ; AVX512DQ-NEXT:    korw %k6, %k0, %k0
 ; AVX512DQ-NEXT:    kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k1 ## 2-byte Reload
 ; AVX512DQ-NEXT:    kandw %k1, %k0, %k0
-; AVX512DQ-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax
+; AVX512DQ-NEXT:    movb {{[0-9]+}}(%rsp), %al
 ; AVX512DQ-NEXT:    kmovw %eax, %k6
 ; AVX512DQ-NEXT:    kshiftlw $15, %k6, %k6
 ; AVX512DQ-NEXT:    kshiftrw $10, %k6, %k6
 ; AVX512DQ-NEXT:    korw %k6, %k0, %k0
 ; AVX512DQ-NEXT:    kandw %k2, %k0, %k0
-; AVX512DQ-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax
+; AVX512DQ-NEXT:    movb {{[0-9]+}}(%rsp), %al
 ; AVX512DQ-NEXT:    kmovw %eax, %k6
 ; AVX512DQ-NEXT:    kshiftlw $15, %k6, %k6
 ; AVX512DQ-NEXT:    kshiftrw $9, %k6, %k6
 ; AVX512DQ-NEXT:    korw %k6, %k0, %k0
 ; AVX512DQ-NEXT:    kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k1 ## 2-byte Reload
 ; AVX512DQ-NEXT:    kandw %k1, %k0, %k0
-; AVX512DQ-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax
+; AVX512DQ-NEXT:    movb {{[0-9]+}}(%rsp), %al
 ; AVX512DQ-NEXT:    kmovw %eax, %k6
 ; AVX512DQ-NEXT:    kshiftlw $15, %k6, %k6
 ; AVX512DQ-NEXT:    kshiftrw $8, %k6, %k6
 ; AVX512DQ-NEXT:    korw %k6, %k0, %k0
 ; AVX512DQ-NEXT:    kandw %k3, %k0, %k0
-; AVX512DQ-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax
+; AVX512DQ-NEXT:    movb {{[0-9]+}}(%rsp), %al
 ; AVX512DQ-NEXT:    kmovw %eax, %k6
 ; AVX512DQ-NEXT:    kshiftlw $15, %k6, %k6
 ; AVX512DQ-NEXT:    kshiftrw $7, %k6, %k6
 ; AVX512DQ-NEXT:    korw %k6, %k0, %k0
 ; AVX512DQ-NEXT:    kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k3 ## 2-byte Reload
 ; AVX512DQ-NEXT:    kandw %k3, %k0, %k0
-; AVX512DQ-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax
+; AVX512DQ-NEXT:    movb {{[0-9]+}}(%rsp), %al
 ; AVX512DQ-NEXT:    kmovw %eax, %k6
 ; AVX512DQ-NEXT:    kshiftlw $15, %k6, %k6
 ; AVX512DQ-NEXT:    kshiftrw $6, %k6, %k6
 ; AVX512DQ-NEXT:    korw %k6, %k0, %k0
 ; AVX512DQ-NEXT:    kandw %k4, %k0, %k0
-; AVX512DQ-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax
+; AVX512DQ-NEXT:    movb {{[0-9]+}}(%rsp), %al
 ; AVX512DQ-NEXT:    kmovw %eax, %k6
 ; AVX512DQ-NEXT:    kshiftlw $15, %k6, %k6
 ; AVX512DQ-NEXT:    kshiftrw $5, %k6, %k6
 ; AVX512DQ-NEXT:    korw %k6, %k0, %k0
 ; AVX512DQ-NEXT:    kandw %k5, %k0, %k0
-; AVX512DQ-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax
+; AVX512DQ-NEXT:    movb {{[0-9]+}}(%rsp), %al
 ; AVX512DQ-NEXT:    kmovw %eax, %k6
 ; AVX512DQ-NEXT:    kshiftlw $15, %k6, %k6
 ; AVX512DQ-NEXT:    kshiftrw $4, %k6, %k6
 ; AVX512DQ-NEXT:    korw %k6, %k0, %k0
 ; AVX512DQ-NEXT:    kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k1 ## 2-byte Reload
 ; AVX512DQ-NEXT:    kandw %k1, %k0, %k0
-; AVX512DQ-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax
+; AVX512DQ-NEXT:    movb {{[0-9]+}}(%rsp), %al
 ; AVX512DQ-NEXT:    kmovw %eax, %k6
 ; AVX512DQ-NEXT:    kshiftlw $15, %k6, %k6
 ; AVX512DQ-NEXT:    kshiftrw $3, %k6, %k6
 ; AVX512DQ-NEXT:    korw %k6, %k0, %k0
 ; AVX512DQ-NEXT:    kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k2 ## 2-byte Reload
 ; AVX512DQ-NEXT:    kandw %k2, %k0, %k0
-; AVX512DQ-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax
+; AVX512DQ-NEXT:    movb {{[0-9]+}}(%rsp), %al
 ; AVX512DQ-NEXT:    kmovw %eax, %k6
 ; AVX512DQ-NEXT:    kshiftlw $15, %k6, %k6
 ; AVX512DQ-NEXT:    kshiftrw $2, %k6, %k6
 ; AVX512DQ-NEXT:    korw %k6, %k0, %k0
 ; AVX512DQ-NEXT:    kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k5 ## 2-byte Reload
 ; AVX512DQ-NEXT:    kandw %k5, %k0, %k0
-; AVX512DQ-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax
+; AVX512DQ-NEXT:    movb {{[0-9]+}}(%rsp), %al
 ; AVX512DQ-NEXT:    kmovw %eax, %k6
 ; AVX512DQ-NEXT:    kshiftlw $14, %k6, %k6
 ; AVX512DQ-NEXT:    korw %k6, %k0, %k0
 ; AVX512DQ-NEXT:    kshiftlw $1, %k0, %k0
 ; AVX512DQ-NEXT:    kshiftrw $1, %k0, %k0
-; AVX512DQ-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax
+; AVX512DQ-NEXT:    movb {{[0-9]+}}(%rsp), %al
 ; AVX512DQ-NEXT:    kmovw %eax, %k6
 ; AVX512DQ-NEXT:    kshiftlw $15, %k6, %k6
 ; AVX512DQ-NEXT:    korw %k6, %k0, %k0
-; AVX512DQ-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax
+; AVX512DQ-NEXT:    movb {{[0-9]+}}(%rsp), %al
 ; AVX512DQ-NEXT:    andl $1, %eax
-; AVX512DQ-NEXT:    movzbl {{[0-9]+}}(%rsp), %ecx
+; AVX512DQ-NEXT:    movb {{[0-9]+}}(%rsp), %cl
 ; AVX512DQ-NEXT:    kmovw %ecx, %k6
 ; AVX512DQ-NEXT:    kshiftlw $15, %k6, %k6
 ; AVX512DQ-NEXT:    kshiftrw $14, %k6, %k6
@@ -3707,93 +3707,93 @@ define void @store_64i1(ptr %a, <64 x i1> %v) {
 ; AVX512DQ-NEXT:    korw %k6, %k7, %k6
 ; AVX512DQ-NEXT:    kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k5 ## 2-byte Reload
 ; AVX512DQ-NEXT:    kandw %k5, %k6, %k6
-; AVX512DQ-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax
+; AVX512DQ-NEXT:    movb {{[0-9]+}}(%rsp), %al
 ; AVX512DQ-NEXT:    kmovw %eax, %k7
 ; AVX512DQ-NEXT:    kshiftlw $15, %k7, %k7
 ; AVX512DQ-NEXT:    kshiftrw $13, %k7, %k7
 ; AVX512DQ-NEXT:    korw %k7, %k6, %k6
 ; AVX512DQ-NEXT:    kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k5 ## 2-byte Reload
 ; AVX512DQ-NEXT:    kandw %k5, %k6, %k6
-; AVX512DQ-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax
+; AVX512DQ-NEXT:    movb {{[0-9]+}}(%rsp), %al
 ; AVX512DQ-NEXT:    kmovw %eax, %k7
 ; AVX512DQ-NEXT:    kshiftlw $15, %k7, %k7
 ; AVX512DQ-NEXT:    kshiftrw $12, %k7, %k7
 ; AVX512DQ-NEXT:    korw %k7, %k6, %k6
 ; AVX512DQ-NEXT:    kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k5 ## 2-byte Reload
 ; AVX512DQ-NEXT:    kandw %k5, %k6, %k6
-; AVX512DQ-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax
+; AVX512DQ-NEXT:    movb {{[0-9]+}}(%rsp), %al
 ; AVX512DQ-NEXT:    kmovw %eax, %k7
 ; AVX512DQ-NEXT:    kshiftlw $15, %k7, %k7
 ; AVX512DQ-NEXT:    kshiftrw $11, %k7, %k7
 ; AVX512DQ-NEXT:    korw %k7, %k6, %k6
 ; AVX512DQ-NEXT:    kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k5 ## 2-byte Reload
 ; AVX512DQ-NEXT:    kandw %k5, %k6, %k6
-; AVX512DQ-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax
+; AVX512DQ-NEXT:    movb {{[0-9]+}}(%rsp), %al
 ; AVX512DQ-NEXT:    kmovw %eax, %k7
 ; AVX512DQ-NEXT:    kshiftlw $15, %k7, %k7
 ; AVX512DQ-NEXT:    kshiftrw $10, %k7, %k7
 ; AVX512DQ-NEXT:    korw %k7, %k6, %k6
 ; AVX512DQ-NEXT:    kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k5 ## 2-byte Reload
 ; AVX512DQ-NEXT:    kandw %k5, %k6, %k6
-; AVX512DQ-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax
+; AVX512DQ-NEXT:    movb {{[0-9]+}}(%rsp), %al
 ; AVX512DQ-NEXT:    kmovw %eax, %k7
 ; AVX512DQ-NEXT:    kshiftlw $15, %k7, %k7
 ; AVX512DQ-NEXT:    kshiftrw $9, %k7, %k7
 ; AVX512DQ-NEXT:    korw %k7, %k6, %k6
 ; AVX512DQ-NEXT:    kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k5 ## 2-byte Reload
 ; AVX512DQ-NEXT:    kandw %k5, %k6, %k6
-; AVX512DQ-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax
+; AVX512DQ-NEXT:    movb {{[0-9]+}}(%rsp), %al
 ; AVX512DQ-NEXT:    kmovw %eax, %k7
 ; AVX512DQ-NEXT:    kshiftlw $15, %k7, %k7
 ; AVX512DQ-NEXT:    kshiftrw $8, %k7, %k7
 ; AVX512DQ-NEXT:    korw %k7, %k6, %k6
 ; AVX512DQ-NEXT:    kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k5 ## 2-byte Reload
 ; AVX512DQ-NEXT:    kandw %k5, %k6, %k6
-; AVX512DQ-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax
+; AVX512DQ-NEXT:    movb {{[0-9]+}}(%rsp), %al
 ; AVX512DQ-NEXT:    kmovw %eax, %k7
 ; AVX512DQ-NEXT:    kshiftlw $15, %k7, %k7
 ; AVX512DQ-NEXT:    kshiftrw $7, %k7, %k7
 ; AVX512DQ-NEXT:    korw %k7, %k6, %k6
 ; AVX512DQ-NEXT:    kandw %k3, %k6, %k6
-; AVX512DQ-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax
+; AVX512DQ-NEXT:    movb {{[0-9]+}}(%rsp), %al
 ; AVX512DQ-NEXT:    kmovw %eax, %k7
 ; AVX512DQ-NEXT:    kshiftlw $15, %k7, %k7
 ; AVX512DQ-NEXT:    kshiftrw $6, %k7, %k7
 ; AVX512DQ-NEXT:    korw %k7, %k6, %k6
 ; AVX512DQ-NEXT:    kandw %k4, %k6, %k5
-; AVX512DQ-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax
+; AVX512DQ-NEXT:    movb {{[0-9]+}}(%rsp), %al
 ; AVX512DQ-NEXT:    kmovw %eax, %k6
 ; AVX512DQ-NEXT:    kshiftlw $15, %k6, %k6
 ; AVX512DQ-NEXT:    kshiftrw $5, %k6, %k6
 ; AVX512DQ-NEXT:    korw %k6, %k5, %k5
 ; AVX512DQ-NEXT:    kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k3 ## 2-byte Reload
 ; AVX512DQ-NEXT:    kandw %k3, %k5, %k4
-; AVX512DQ-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax
+; AVX512DQ-NEXT:    movb {{[0-9]+}}(%rsp), %al
 ; AVX512DQ-NEXT:    kmovw %eax, %k5
 ; AVX512DQ-NEXT:    kshiftlw $15, %k5, %k5
 ; AVX512DQ-NEXT:    kshiftrw $4, %k5, %k5
 ; AVX512DQ-NEXT:    korw %k5, %k4, %k4
 ; AVX512DQ-NEXT:    kandw %k1, %k4, %k3
-; AVX512DQ-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax
+; AVX512DQ-NEXT:    movb {{[0-9]+}}(%rsp), %al
 ; AVX512DQ-NEXT:    kmovw %eax, %k4
 ; AVX512DQ-NEXT:    kshiftlw $15, %k4, %k4
 ; AVX512DQ-NEXT:    kshiftrw $3, %k4, %k4
 ; AVX512DQ-NEXT:    korw %k4, %k3, %k3
 ; AVX512DQ-NEXT:    kandw %k2, %k3, %k2
-; AVX512DQ-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax
+; AVX512DQ-NEXT:    movb {{[0-9]+}}(%rsp), %al
 ; AVX512DQ-NEXT:    kmovw %eax, %k3
 ; AVX512DQ-NEXT:    kshiftlw $15, %k3, %k3
 ; AVX512DQ-NEXT:    kshiftrw $2, %k3, %k3
 ; AVX512DQ-NEXT:    korw %k3, %k2, %k2
 ; AVX512DQ-NEXT:    kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k1 ## 2-byte Reload
 ; AVX512DQ-NEXT:    kandw %k1, %k2, %k1
-; AVX512DQ-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax
+; AVX512DQ-NEXT:    movb {{[0-9]+}}(%rsp), %al
 ; AVX512DQ-NEXT:    kmovw %eax, %k2
 ; AVX512DQ-NEXT:    kshiftlw $14, %k2, %k2
 ; AVX512DQ-NEXT:    korw %k2, %k1, %k1
 ; AVX512DQ-NEXT:    kshiftlw $1, %k1, %k1
 ; AVX512DQ-NEXT:    kshiftrw $1, %k1, %k1
-; AVX512DQ-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax
+; AVX512DQ-NEXT:    movb {{[0-9]+}}(%rsp), %al
 ; AVX512DQ-NEXT:    kmovw %eax, %k2
 ; AVX512DQ-NEXT:    kshiftlw $15, %k2, %k2
 ; AVX512DQ-NEXT:    korw %k2, %k1, %k1
@@ -3960,7 +3960,7 @@ define i8 @test_v8i1_add(i8 %x, i8 %y) {
 ;
 ; X86-LABEL: test_v8i1_add:
 ; X86:       ## %bb.0:
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    xorb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    retl
   %m0 = bitcast i8 %x to <8 x i1>
@@ -3980,7 +3980,7 @@ define i8 @test_v8i1_sub(i8 %x, i8 %y) {
 ;
 ; X86-LABEL: test_v8i1_sub:
 ; X86:       ## %bb.0:
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    xorb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    retl
   %m0 = bitcast i8 %x to <8 x i1>
@@ -4000,7 +4000,7 @@ define i8 @test_v8i1_mul(i8 %x, i8 %y) {
 ;
 ; X86-LABEL: test_v8i1_mul:
 ; X86:       ## %bb.0:
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    andb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    retl
   %m0 = bitcast i8 %x to <8 x i1>
@@ -5132,7 +5132,7 @@ define i1 @test_v1i1_add(i1 %x, i1 %y) {
 ;
 ; X86-LABEL: test_v1i1_add:
 ; X86:       ## %bb.0:
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    xorb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    retl
   %m0 = bitcast i1 %x to <1 x i1>
@@ -5152,7 +5152,7 @@ define i1 @test_v1i1_sub(i1 %x, i1 %y) {
 ;
 ; X86-LABEL: test_v1i1_sub:
 ; X86:       ## %bb.0:
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    xorb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    retl
   %m0 = bitcast i1 %x to <1 x i1>
@@ -5172,7 +5172,7 @@ define i1 @test_v1i1_mul(i1 %x, i1 %y) {
 ;
 ; X86-LABEL: test_v1i1_mul:
 ; X86:       ## %bb.0:
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    andb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    retl
   %m0 = bitcast i1 %x to <1 x i1>

diff  --git a/llvm/test/CodeGen/X86/avx512-select.ll b/llvm/test/CodeGen/X86/avx512-select.ll
index 4c7633f48c69d..60bc63275bd47 100644
--- a/llvm/test/CodeGen/X86/avx512-select.ll
+++ b/llvm/test/CodeGen/X86/avx512-select.ll
@@ -131,7 +131,7 @@ define <16 x double> @select04(<16 x double> %a, <16 x double> %b) {
 define i8 @select05(i8 %a.0, i8 %m) {
 ; X86-LABEL: select05:
 ; X86:       # %bb.0:
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    orb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    retl
 ;
@@ -206,7 +206,7 @@ define i8 @select05_mem(ptr %a.0, ptr %m) {
 define i8 @select06(i8 %a.0, i8 %m) {
 ; X86-LABEL: select06:
 ; X86:       # %bb.0:
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    andb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    retl
 ;

diff  --git a/llvm/test/CodeGen/X86/avx512bf16-vl-intrinsics.ll b/llvm/test/CodeGen/X86/avx512bf16-vl-intrinsics.ll
index 170197816ae19..b497ff7739d2c 100644
--- a/llvm/test/CodeGen/X86/avx512bf16-vl-intrinsics.ll
+++ b/llvm/test/CodeGen/X86/avx512bf16-vl-intrinsics.ll
@@ -321,7 +321,7 @@ entry:
 define <4 x float> @test_mm128_maskz_dpbf16ps_128(<4 x float> %E, <4 x i32> %A, <4 x i32> %B, i4 zeroext %U) local_unnamed_addr #2 {
 ; X86-LABEL: test_mm128_maskz_dpbf16ps_128:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al # encoding: [0x8a,0x44,0x24,0x04]
 ; X86-NEXT:    kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
 ; X86-NEXT:    vdpbf16ps %xmm2, %xmm1, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x76,0x89,0x52,0xc2]
 ; X86-NEXT:    retl # encoding: [0xc3]
@@ -340,7 +340,7 @@ entry:
 define <4 x float> @test_mm128_mask_dpbf16ps_128(i4 zeroext %U, <4 x float> %E, <4 x i32> %A, <4 x i32> %B) local_unnamed_addr #2 {
 ; X86-LABEL: test_mm128_mask_dpbf16ps_128:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al # encoding: [0x8a,0x44,0x24,0x04]
 ; X86-NEXT:    kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
 ; X86-NEXT:    vdpbf16ps %xmm2, %xmm1, %xmm0 {%k1} # encoding: [0x62,0xf2,0x76,0x09,0x52,0xc2]
 ; X86-NEXT:    retl # encoding: [0xc3]

diff  --git a/llvm/test/CodeGen/X86/avx512bw-intrinsics-fast-isel.ll b/llvm/test/CodeGen/X86/avx512bw-intrinsics-fast-isel.ll
index ada2c8d53aa53..bff2107c2e447 100644
--- a/llvm/test/CodeGen/X86/avx512bw-intrinsics-fast-isel.ll
+++ b/llvm/test/CodeGen/X86/avx512bw-intrinsics-fast-isel.ll
@@ -111,7 +111,7 @@ define <8 x i64> @test_mm512_mask_set1_epi8(<8 x i64> %__O, i64 %__M, i8 signext
 ; X86:       # %bb.0: # %entry
 ; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k0
 ; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    kunpckdq %k1, %k0, %k1
 ; X86-NEXT:    vpbroadcastb %eax, %zmm0 {%k1}
 ; X86-NEXT:    retl
@@ -136,7 +136,7 @@ define <8 x i64> @test_mm512_maskz_set1_epi8(i64 %__M, i8 signext %__A)  {
 ; X86:       # %bb.0: # %entry
 ; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k0
 ; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    kunpckdq %k1, %k0, %k1
 ; X86-NEXT:    vpbroadcastb %eax, %zmm0 {%k1} {z}
 ; X86-NEXT:    retl

diff  --git a/llvm/test/CodeGen/X86/avx512bwvl-intrinsics-fast-isel.ll b/llvm/test/CodeGen/X86/avx512bwvl-intrinsics-fast-isel.ll
index a32b84986e895..11dd987729d92 100644
--- a/llvm/test/CodeGen/X86/avx512bwvl-intrinsics-fast-isel.ll
+++ b/llvm/test/CodeGen/X86/avx512bwvl-intrinsics-fast-isel.ll
@@ -104,7 +104,7 @@ entry:
 define zeroext i8 @test_mm_mask_test_epi16_mask(i8 zeroext %__U, <2 x i64> %__A, <2 x i64> %__B) {
 ; X86-LABEL: test_mm_mask_test_epi16_mask:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    kmovd %eax, %k1
 ; X86-NEXT:    vptestmw %xmm0, %xmm1, %k0 {%k1}
 ; X86-NEXT:    kmovd %k0, %eax
@@ -272,7 +272,7 @@ entry:
 define zeroext i8 @test_mm_mask_testn_epi16_mask(i8 zeroext %__U, <2 x i64> %__A, <2 x i64> %__B) {
 ; X86-LABEL: test_mm_mask_testn_epi16_mask:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    kmovd %eax, %k1
 ; X86-NEXT:    vptestnmw %xmm0, %xmm1, %k0 {%k1}
 ; X86-NEXT:    kmovd %k0, %eax
@@ -343,7 +343,7 @@ entry:
 define <2 x i64> @test_mm_mask_set1_epi8(<2 x i64> %__O, i16 zeroext %__M, i8 signext %__A) local_unnamed_addr #0 {
 ; X86-LABEL: test_mm_mask_set1_epi8:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1
 ; X86-NEXT:    vpbroadcastb %eax, %xmm0 {%k1}
 ; X86-NEXT:    retl
@@ -366,7 +366,7 @@ entry:
 define <2 x i64> @test_mm_maskz_set1_epi8(i16 zeroext %__M, i8 signext %__A)  {
 ; X86-LABEL: test_mm_maskz_set1_epi8:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1
 ; X86-NEXT:    vpbroadcastb %eax, %xmm0 {%k1} {z}
 ; X86-NEXT:    retl
@@ -388,7 +388,7 @@ entry:
 define <4 x i64> @test_mm256_mask_set1_epi8(<4 x i64> %__O, i32 %__M, i8 signext %__A){
 ; X86-LABEL: test_mm256_mask_set1_epi8:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1
 ; X86-NEXT:    vpbroadcastb %eax, %ymm0 {%k1}
 ; X86-NEXT:    retl
@@ -411,7 +411,7 @@ entry:
 define <4 x i64> @test_mm256_maskz_set1_epi8(i32 %__M, i8 signext %__A)  {
 ; X86-LABEL: test_mm256_maskz_set1_epi8:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1
 ; X86-NEXT:    vpbroadcastb %eax, %ymm0 {%k1} {z}
 ; X86-NEXT:    retl
@@ -479,7 +479,7 @@ define <2 x i64> @test_mm_mask_set1_epi16(<2 x i64> %__O, i8 zeroext %__M, i16 s
 ; X86-LABEL: test_mm_mask_set1_epi16:
 ; X86:       # %bb.0: # %entry
 ; X86-NEXT:    movzwl {{[0-9]+}}(%esp), %eax
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; X86-NEXT:    kmovd %ecx, %k1
 ; X86-NEXT:    vpbroadcastw %eax, %xmm0 {%k1}
 ; X86-NEXT:    retl
@@ -503,7 +503,7 @@ define <2 x i64> @test_mm_maskz_set1_epi16(i8 zeroext %__M, i16 signext %__A) {
 ; X86-LABEL: test_mm_maskz_set1_epi16:
 ; X86:       # %bb.0: # %entry
 ; X86-NEXT:    movzwl {{[0-9]+}}(%esp), %eax
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; X86-NEXT:    kmovd %ecx, %k1
 ; X86-NEXT:    vpbroadcastw %eax, %xmm0 {%k1} {z}
 ; X86-NEXT:    retl
@@ -641,7 +641,7 @@ define <2 x i64> @test_mm_broadcastw_epi16(<2 x i64> %a0) {
 define <2 x i64> @test_mm_mask_broadcastw_epi16(<2 x i64> %a0, i8 %a1, <2 x i64> %a2) {
 ; X86-LABEL: test_mm_mask_broadcastw_epi16:
 ; X86:       # %bb.0:
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    kmovd %eax, %k1
 ; X86-NEXT:    vpbroadcastw %xmm1, %xmm0 {%k1}
 ; X86-NEXT:    retl
@@ -663,7 +663,7 @@ define <2 x i64> @test_mm_mask_broadcastw_epi16(<2 x i64> %a0, i8 %a1, <2 x i64>
 define <2 x i64> @test_mm_maskz_broadcastw_epi16(i8 %a0, <2 x i64> %a1) {
 ; X86-LABEL: test_mm_maskz_broadcastw_epi16:
 ; X86:       # %bb.0:
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    kmovd %eax, %k1
 ; X86-NEXT:    vpbroadcastw %xmm0, %xmm0 {%k1} {z}
 ; X86-NEXT:    retl
@@ -809,7 +809,7 @@ entry:
 define <2 x i64> @test_mm_mask2_permutex2var_epi16(<2 x i64> %__A, <2 x i64> %__I, i8 zeroext %__U, <2 x i64> %__B) {
 ; X86-LABEL: test_mm_mask2_permutex2var_epi16:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    kmovd %eax, %k1
 ; X86-NEXT:    vpermi2w %xmm2, %xmm0, %xmm1 {%k1}
 ; X86-NEXT:    vmovdqa %xmm1, %xmm0
@@ -874,7 +874,7 @@ entry:
 define <2 x i64> @test_mm_mask_permutex2var_epi16(<2 x i64> %__A, i8 zeroext %__U, <2 x i64> %__I, <2 x i64> %__B) {
 ; X86-LABEL: test_mm_mask_permutex2var_epi16:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    kmovd %eax, %k1
 ; X86-NEXT:    vpermt2w %xmm2, %xmm1, %xmm0 {%k1}
 ; X86-NEXT:    retl
@@ -898,7 +898,7 @@ entry:
 define <2 x i64> @test_mm_maskz_permutex2var_epi16(i8 zeroext %__U, <2 x i64> %__A, <2 x i64> %__I, <2 x i64> %__B) {
 ; X86-LABEL: test_mm_maskz_permutex2var_epi16:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    kmovd %eax, %k1
 ; X86-NEXT:    vpermt2w %xmm2, %xmm1, %xmm0 {%k1} {z}
 ; X86-NEXT:    retl

diff  --git a/llvm/test/CodeGen/X86/avx512bwvl-intrinsics-upgrade.ll b/llvm/test/CodeGen/X86/avx512bwvl-intrinsics-upgrade.ll
index 4f69c9a676a08..2e4b2e0c5cc91 100644
--- a/llvm/test/CodeGen/X86/avx512bwvl-intrinsics-upgrade.ll
+++ b/llvm/test/CodeGen/X86/avx512bwvl-intrinsics-upgrade.ll
@@ -6174,7 +6174,7 @@ define i8 at test_int_x86_avx512_ptestm_w_128(<8 x i16> %x0, <8 x i16> %x1, i8 %x2)
 ; X86:       # %bb.0:
 ; X86-NEXT:    vptestmw %xmm1, %xmm0, %k0 # encoding: [0x62,0xf2,0xfd,0x08,0x26,0xc1]
 ; X86-NEXT:    kmovd %k0, %ecx # encoding: [0xc5,0xfb,0x93,0xc8]
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al # encoding: [0x8a,0x44,0x24,0x04]
 ; X86-NEXT:    andb %cl, %al # encoding: [0x20,0xc8]
 ; X86-NEXT:    addb %cl, %al # encoding: [0x00,0xc8]
 ; X86-NEXT:    retl # encoding: [0xc3]
@@ -6283,7 +6283,7 @@ define i8 at test_int_x86_avx512_ptestnm_w_128(<8 x i16> %x0, <8 x i16> %x1, i8 %x2
 ; X86:       # %bb.0:
 ; X86-NEXT:    vptestnmw %xmm1, %xmm0, %k0 # encoding: [0x62,0xf2,0xfe,0x08,0x26,0xc1]
 ; X86-NEXT:    kmovd %k0, %ecx # encoding: [0xc5,0xfb,0x93,0xc8]
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al # encoding: [0x8a,0x44,0x24,0x04]
 ; X86-NEXT:    andb %cl, %al # encoding: [0x20,0xc8]
 ; X86-NEXT:    addb %cl, %al # encoding: [0x00,0xc8]
 ; X86-NEXT:    retl # encoding: [0xc3]

diff  --git a/llvm/test/CodeGen/X86/avx512ifma-intrinsics-fast-isel.ll b/llvm/test/CodeGen/X86/avx512ifma-intrinsics-fast-isel.ll
index d54784c71992b..6b171848ee2a7 100644
--- a/llvm/test/CodeGen/X86/avx512ifma-intrinsics-fast-isel.ll
+++ b/llvm/test/CodeGen/X86/avx512ifma-intrinsics-fast-isel.ll
@@ -17,7 +17,7 @@ entry:
 define <8 x i64> @test_mm512_mask_madd52hi_epu64(<8 x i64> %__W, i8 zeroext %__M, <8 x i64> %__X, <8 x i64> %__Y) {
 ; X86-LABEL: test_mm512_mask_madd52hi_epu64:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vpmadd52huq %zmm2, %zmm1, %zmm0 {%k1}
 ; X86-NEXT:    retl
@@ -37,7 +37,7 @@ entry:
 define <8 x i64> @test_mm512_maskz_madd52hi_epu64(i8 zeroext %__M, <8 x i64> %__X, <8 x i64> %__Y, <8 x i64> %__Z) {
 ; X86-LABEL: test_mm512_maskz_madd52hi_epu64:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vpmadd52huq %zmm2, %zmm1, %zmm0 {%k1} {z}
 ; X86-NEXT:    retl
@@ -67,7 +67,7 @@ entry:
 define <8 x i64> @test_mm512_mask_madd52lo_epu64(<8 x i64> %__W, i8 zeroext %__M, <8 x i64> %__X, <8 x i64> %__Y) {
 ; X86-LABEL: test_mm512_mask_madd52lo_epu64:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vpmadd52luq %zmm2, %zmm1, %zmm0 {%k1}
 ; X86-NEXT:    retl
@@ -87,7 +87,7 @@ entry:
 define <8 x i64> @test_mm512_maskz_madd52lo_epu64(i8 zeroext %__M, <8 x i64> %__X, <8 x i64> %__Y, <8 x i64> %__Z) {
 ; X86-LABEL: test_mm512_maskz_madd52lo_epu64:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vpmadd52luq %zmm2, %zmm1, %zmm0 {%k1} {z}
 ; X86-NEXT:    retl

diff  --git a/llvm/test/CodeGen/X86/avx512ifmavl-intrinsics-fast-isel.ll b/llvm/test/CodeGen/X86/avx512ifmavl-intrinsics-fast-isel.ll
index 4ee81405f608d..576980f695346 100644
--- a/llvm/test/CodeGen/X86/avx512ifmavl-intrinsics-fast-isel.ll
+++ b/llvm/test/CodeGen/X86/avx512ifmavl-intrinsics-fast-isel.ll
@@ -17,7 +17,7 @@ entry:
 define <2 x i64> @test_mm_mask_madd52hi_epu64(<2 x i64> %__W, i8 zeroext %__M, <2 x i64> %__X, <2 x i64> %__Y) {
 ; X86-LABEL: test_mm_mask_madd52hi_epu64:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vpmadd52huq %xmm2, %xmm1, %xmm0 {%k1}
 ; X86-NEXT:    retl
@@ -38,7 +38,7 @@ entry:
 define <2 x i64> @test_mm_maskz_madd52hi_epu64(i8 zeroext %__M, <2 x i64> %__X, <2 x i64> %__Y, <2 x i64> %__Z) {
 ; X86-LABEL: test_mm_maskz_madd52hi_epu64:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vpmadd52huq %xmm2, %xmm1, %xmm0 {%k1} {z}
 ; X86-NEXT:    retl
@@ -69,7 +69,7 @@ entry:
 define <4 x i64> @test_mm256_mask_madd52hi_epu64(<4 x i64> %__W, i8 zeroext %__M, <4 x i64> %__X, <4 x i64> %__Y) {
 ; X86-LABEL: test_mm256_mask_madd52hi_epu64:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vpmadd52huq %ymm2, %ymm1, %ymm0 {%k1}
 ; X86-NEXT:    retl
@@ -90,7 +90,7 @@ entry:
 define <4 x i64> @test_mm256_maskz_madd52hi_epu64(i8 zeroext %__M, <4 x i64> %__X, <4 x i64> %__Y, <4 x i64> %__Z) {
 ; X86-LABEL: test_mm256_maskz_madd52hi_epu64:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vpmadd52huq %ymm2, %ymm1, %ymm0 {%k1} {z}
 ; X86-NEXT:    retl
@@ -121,7 +121,7 @@ entry:
 define <2 x i64> @test_mm_mask_madd52lo_epu64(<2 x i64> %__W, i8 zeroext %__M, <2 x i64> %__X, <2 x i64> %__Y) {
 ; X86-LABEL: test_mm_mask_madd52lo_epu64:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vpmadd52luq %xmm2, %xmm1, %xmm0 {%k1}
 ; X86-NEXT:    retl
@@ -142,7 +142,7 @@ entry:
 define <2 x i64> @test_mm_maskz_madd52lo_epu64(i8 zeroext %__M, <2 x i64> %__X, <2 x i64> %__Y, <2 x i64> %__Z) {
 ; X86-LABEL: test_mm_maskz_madd52lo_epu64:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vpmadd52luq %xmm2, %xmm1, %xmm0 {%k1} {z}
 ; X86-NEXT:    retl
@@ -173,7 +173,7 @@ entry:
 define <4 x i64> @test_mm256_mask_madd52lo_epu64(<4 x i64> %__W, i8 zeroext %__M, <4 x i64> %__X, <4 x i64> %__Y) {
 ; X86-LABEL: test_mm256_mask_madd52lo_epu64:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vpmadd52luq %ymm2, %ymm1, %ymm0 {%k1}
 ; X86-NEXT:    retl
@@ -194,7 +194,7 @@ entry:
 define <4 x i64> @test_mm256_maskz_madd52lo_epu64(i8 zeroext %__M, <4 x i64> %__X, <4 x i64> %__Y, <4 x i64> %__Z) {
 ; X86-LABEL: test_mm256_maskz_madd52lo_epu64:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vpmadd52luq %ymm2, %ymm1, %ymm0 {%k1} {z}
 ; X86-NEXT:    retl

diff  --git a/llvm/test/CodeGen/X86/avx512vbmi2-intrinsics-fast-isel.ll b/llvm/test/CodeGen/X86/avx512vbmi2-intrinsics-fast-isel.ll
index a05b2a36183cc..cc528d0acc54d 100644
--- a/llvm/test/CodeGen/X86/avx512vbmi2-intrinsics-fast-isel.ll
+++ b/llvm/test/CodeGen/X86/avx512vbmi2-intrinsics-fast-isel.ll
@@ -303,7 +303,7 @@ entry:
 define <8 x i64> @test_mm512_mask_shldi_epi64(<8 x i64> %__S, i8 zeroext %__U, <8 x i64> %__A, <8 x i64> %__B) {
 ; X86-LABEL: test_mm512_mask_shldi_epi64:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    kmovd %eax, %k1
 ; X86-NEXT:    vpshldq $47, %zmm2, %zmm1, %zmm0 {%k1}
 ; X86-NEXT:    retl
@@ -325,7 +325,7 @@ declare <8 x i64> @llvm.fshl.v8i64(<8 x i64>, <8 x i64>, <8 x i64>)
 define <8 x i64> @test_mm512_maskz_shldi_epi64(i8 zeroext %__U, <8 x i64> %__A, <8 x i64> %__B) {
 ; X86-LABEL: test_mm512_maskz_shldi_epi64:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    kmovd %eax, %k1
 ; X86-NEXT:    vpshldq $63, %zmm1, %zmm0, %zmm0 {%k1} {z}
 ; X86-NEXT:    retl
@@ -475,7 +475,7 @@ entry:
 define <8 x i64> @test_mm512_mask_shrdi_epi64(<8 x i64> %__S, i8 zeroext %__U, <8 x i64> %__A, <8 x i64> %__B) {
 ; X86-LABEL: test_mm512_mask_shrdi_epi64:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    kmovd %eax, %k1
 ; X86-NEXT:    vpshrdq $47, %zmm2, %zmm1, %zmm0 {%k1}
 ; X86-NEXT:    retl
@@ -497,7 +497,7 @@ declare <8 x i64> @llvm.fshr.v8i64(<8 x i64>, <8 x i64>, <8 x i64>)
 define <8 x i64> @test_mm512_maskz_shrdi_epi64(i8 zeroext %__U, <8 x i64> %__A, <8 x i64> %__B) {
 ; X86-LABEL: test_mm512_maskz_shrdi_epi64:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    kmovd %eax, %k1
 ; X86-NEXT:    vpshrdq $63, %zmm1, %zmm0, %zmm0 {%k1} {z}
 ; X86-NEXT:    retl
@@ -647,7 +647,7 @@ entry:
 define <8 x i64> @test_mm512_mask_shldv_epi64(<8 x i64> %__S, i8 zeroext %__U, <8 x i64> %__A, <8 x i64> %__B) {
 ; X86-LABEL: test_mm512_mask_shldv_epi64:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    kmovd %eax, %k1
 ; X86-NEXT:    vpshldvq %zmm2, %zmm1, %zmm0 {%k1}
 ; X86-NEXT:    retl
@@ -667,7 +667,7 @@ entry:
 define <8 x i64> @test_mm512_maskz_shldv_epi64(i8 zeroext %__U, <8 x i64> %__S, <8 x i64> %__A, <8 x i64> %__B) {
 ; X86-LABEL: test_mm512_maskz_shldv_epi64:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    kmovd %eax, %k1
 ; X86-NEXT:    vpshldvq %zmm2, %zmm1, %zmm0 {%k1} {z}
 ; X86-NEXT:    retl
@@ -817,7 +817,7 @@ entry:
 define <8 x i64> @test_mm512_mask_shrdv_epi64(<8 x i64> %__S, i8 zeroext %__U, <8 x i64> %__A, <8 x i64> %__B) {
 ; X86-LABEL: test_mm512_mask_shrdv_epi64:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    kmovd %eax, %k1
 ; X86-NEXT:    vpshrdvq %zmm2, %zmm1, %zmm0 {%k1}
 ; X86-NEXT:    retl
@@ -837,7 +837,7 @@ entry:
 define <8 x i64> @test_mm512_maskz_shrdv_epi64(i8 zeroext %__U, <8 x i64> %__S, <8 x i64> %__A, <8 x i64> %__B) {
 ; X86-LABEL: test_mm512_maskz_shrdv_epi64:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    kmovd %eax, %k1
 ; X86-NEXT:    vpshrdvq %zmm2, %zmm1, %zmm0 {%k1} {z}
 ; X86-NEXT:    retl

diff  --git a/llvm/test/CodeGen/X86/avx512vbmi2vl-intrinsics-fast-isel.ll b/llvm/test/CodeGen/X86/avx512vbmi2vl-intrinsics-fast-isel.ll
index 4e69fcdc2fd07..5094180f46a81 100644
--- a/llvm/test/CodeGen/X86/avx512vbmi2vl-intrinsics-fast-isel.ll
+++ b/llvm/test/CodeGen/X86/avx512vbmi2vl-intrinsics-fast-isel.ll
@@ -7,7 +7,7 @@
 define <2 x i64> @test_mm_mask_compress_epi16(<2 x i64> %__S, i8 zeroext %__U, <2 x i64> %__D) {
 ; X86-LABEL: test_mm_mask_compress_epi16:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    kmovd %eax, %k1
 ; X86-NEXT:    vpcompressw %xmm1, %xmm0 {%k1}
 ; X86-NEXT:    retl
@@ -28,7 +28,7 @@ entry:
 define <2 x i64> @test_mm_maskz_compress_epi16(i8 zeroext %__U, <2 x i64> %__D) {
 ; X86-LABEL: test_mm_maskz_compress_epi16:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    kmovd %eax, %k1
 ; X86-NEXT:    vpcompressw %xmm0, %xmm0 {%k1} {z}
 ; X86-NEXT:    retl
@@ -87,7 +87,7 @@ entry:
 define void @test_mm_mask_compressstoreu_epi16(ptr %__P, i8 zeroext %__U, <2 x i64> %__D) {
 ; X86-LABEL: test_mm_mask_compressstoreu_epi16:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
 ; X86-NEXT:    kmovd %eax, %k1
 ; X86-NEXT:    vpcompressw %xmm0, (%ecx) {%k1}
@@ -128,7 +128,7 @@ entry:
 define <2 x i64> @test_mm_mask_expand_epi16(<2 x i64> %__S, i8 zeroext %__U, <2 x i64> %__D) {
 ; X86-LABEL: test_mm_mask_expand_epi16:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    kmovd %eax, %k1
 ; X86-NEXT:    vpexpandw %xmm1, %xmm0 {%k1}
 ; X86-NEXT:    retl
@@ -149,7 +149,7 @@ entry:
 define <2 x i64> @test_mm_maskz_expand_epi16(i8 zeroext %__U, <2 x i64> %__D) {
 ; X86-LABEL: test_mm_maskz_expand_epi16:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    kmovd %eax, %k1
 ; X86-NEXT:    vpexpandw %xmm0, %xmm0 {%k1} {z}
 ; X86-NEXT:    retl
@@ -209,7 +209,7 @@ define <2 x i64> @test_mm_mask_expandloadu_epi16(<2 x i64> %__S, i8 zeroext %__U
 ; X86-LABEL: test_mm_mask_expandloadu_epi16:
 ; X86:       # %bb.0: # %entry
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; X86-NEXT:    kmovd %ecx, %k1
 ; X86-NEXT:    vpexpandw (%eax), %xmm0 {%k1}
 ; X86-NEXT:    retl
@@ -231,7 +231,7 @@ define <2 x i64> @test_mm_maskz_expandloadu_epi16(i8 zeroext %__U, ptr readonly
 ; X86-LABEL: test_mm_maskz_expandloadu_epi16:
 ; X86:       # %bb.0: # %entry
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; X86-NEXT:    kmovd %ecx, %k1
 ; X86-NEXT:    vpexpandw (%eax), %xmm0 {%k1} {z}
 ; X86-NEXT:    retl
@@ -574,7 +574,7 @@ entry:
 define <4 x i64> @test_mm256_mask_shldi_epi64(<4 x i64> %__S, i8 zeroext %__U, <4 x i64> %__A, <4 x i64> %__B) {
 ; X86-LABEL: test_mm256_mask_shldi_epi64:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    kmovd %eax, %k1
 ; X86-NEXT:    vpshldq $47, %ymm2, %ymm1, %ymm0 {%k1}
 ; X86-NEXT:    retl
@@ -597,7 +597,7 @@ declare <4 x i64> @llvm.fshl.v4i64(<4 x i64>, <4 x i64>, <4 x i64>)
 define <4 x i64> @test_mm256_maskz_shldi_epi64(i8 zeroext %__U, <4 x i64> %__A, <4 x i64> %__B) {
 ; X86-LABEL: test_mm256_maskz_shldi_epi64:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    kmovd %eax, %k1
 ; X86-NEXT:    vpshldq $63, %ymm1, %ymm0, %ymm0 {%k1} {z}
 ; X86-NEXT:    retl
@@ -628,7 +628,7 @@ entry:
 define <2 x i64> @test_mm_mask_shldi_epi64(<2 x i64> %__S, i8 zeroext %__U, <2 x i64> %__A, <2 x i64> %__B) {
 ; X86-LABEL: test_mm_mask_shldi_epi64:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    kmovd %eax, %k1
 ; X86-NEXT:    vpshldq $47, %xmm2, %xmm1, %xmm0 {%k1}
 ; X86-NEXT:    retl
@@ -651,7 +651,7 @@ declare <2 x i64> @llvm.fshl.v2i64(<2 x i64>, <2 x i64>, <2 x i64>)
 define <2 x i64> @test_mm_maskz_shldi_epi64(i8 zeroext %__U, <2 x i64> %__A, <2 x i64> %__B) {
 ; X86-LABEL: test_mm_maskz_shldi_epi64:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    kmovd %eax, %k1
 ; X86-NEXT:    vpshldq $63, %xmm1, %xmm0, %xmm0 {%k1} {z}
 ; X86-NEXT:    retl
@@ -682,7 +682,7 @@ entry:
 define <4 x i64> @test_mm256_mask_shldi_epi32(<4 x i64> %__S, i8 zeroext %__U, <4 x i64> %__A, <4 x i64> %__B) {
 ; X86-LABEL: test_mm256_mask_shldi_epi32:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    kmovd %eax, %k1
 ; X86-NEXT:    vpshldd $7, %ymm2, %ymm1, %ymm0 {%k1}
 ; X86-NEXT:    retl
@@ -708,7 +708,7 @@ declare <8 x i32> @llvm.fshl.v8i32(<8 x i32>, <8 x i32>, <8 x i32>)
 define <4 x i64> @test_mm256_maskz_shldi_epi32(i8 zeroext %__U, <4 x i64> %__A, <4 x i64> %__B) {
 ; X86-LABEL: test_mm256_maskz_shldi_epi32:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    kmovd %eax, %k1
 ; X86-NEXT:    vpshldd $15, %ymm1, %ymm0, %ymm0 {%k1} {z}
 ; X86-NEXT:    retl
@@ -744,7 +744,7 @@ entry:
 define <2 x i64> @test_mm_mask_shldi_epi32(<2 x i64> %__S, i8 zeroext %__U, <2 x i64> %__A, <2 x i64> %__B) {
 ; X86-LABEL: test_mm_mask_shldi_epi32:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    kmovd %eax, %k1
 ; X86-NEXT:    vpshldd $7, %xmm2, %xmm1, %xmm0 {%k1}
 ; X86-NEXT:    retl
@@ -771,7 +771,7 @@ declare <4 x i32> @llvm.fshl.v4i32(<4 x i32>, <4 x i32>, <4 x i32>)
 define <2 x i64> @test_mm_maskz_shldi_epi32(i8 zeroext %__U, <2 x i64> %__A, <2 x i64> %__B) {
 ; X86-LABEL: test_mm_maskz_shldi_epi32:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    kmovd %eax, %k1
 ; X86-NEXT:    vpshldd $15, %xmm1, %xmm0, %xmm0 {%k1} {z}
 ; X86-NEXT:    retl
@@ -868,7 +868,7 @@ entry:
 define <2 x i64> @test_mm_mask_shldi_epi16(<2 x i64> %__S, i8 zeroext %__U, <2 x i64> %__A, <2 x i64> %__B) {
 ; X86-LABEL: test_mm_mask_shldi_epi16:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    kmovd %eax, %k1
 ; X86-NEXT:    vpshldw $3, %xmm2, %xmm1, %xmm0 {%k1}
 ; X86-NEXT:    retl
@@ -894,7 +894,7 @@ declare <8 x i16> @llvm.fshl.v8i16(<8 x i16>, <8 x i16>, <8 x i16>)
 define <2 x i64> @test_mm_maskz_shldi_epi16(i8 zeroext %__U, <2 x i64> %__A, <2 x i64> %__B) {
 ; X86-LABEL: test_mm_maskz_shldi_epi16:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    kmovd %eax, %k1
 ; X86-NEXT:    vpshldw $7, %xmm1, %xmm0, %xmm0 {%k1} {z}
 ; X86-NEXT:    retl
@@ -930,7 +930,7 @@ entry:
 define <4 x i64> @test_mm256_mask_shrdi_epi64(<4 x i64> %__S, i8 zeroext %__U, <4 x i64> %__A, <4 x i64> %__B) {
 ; X86-LABEL: test_mm256_mask_shrdi_epi64:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    kmovd %eax, %k1
 ; X86-NEXT:    vpshrdq $47, %ymm2, %ymm1, %ymm0 {%k1}
 ; X86-NEXT:    retl
@@ -953,7 +953,7 @@ declare <4 x i64> @llvm.fshr.v4i64(<4 x i64>, <4 x i64>, <4 x i64>)
 define <4 x i64> @test_mm256_maskz_shrdi_epi64(i8 zeroext %__U, <4 x i64> %__A, <4 x i64> %__B) {
 ; X86-LABEL: test_mm256_maskz_shrdi_epi64:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    kmovd %eax, %k1
 ; X86-NEXT:    vpshrdq $63, %ymm1, %ymm0, %ymm0 {%k1} {z}
 ; X86-NEXT:    retl
@@ -984,7 +984,7 @@ entry:
 define <2 x i64> @test_mm_mask_shrdi_epi64(<2 x i64> %__S, i8 zeroext %__U, <2 x i64> %__A, <2 x i64> %__B) {
 ; X86-LABEL: test_mm_mask_shrdi_epi64:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    kmovd %eax, %k1
 ; X86-NEXT:    vpshrdq $47, %xmm2, %xmm1, %xmm0 {%k1}
 ; X86-NEXT:    retl
@@ -1007,7 +1007,7 @@ declare <2 x i64> @llvm.fshr.v2i64(<2 x i64>, <2 x i64>, <2 x i64>)
 define <2 x i64> @test_mm_maskz_shrdi_epi64(i8 zeroext %__U, <2 x i64> %__A, <2 x i64> %__B) {
 ; X86-LABEL: test_mm_maskz_shrdi_epi64:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    kmovd %eax, %k1
 ; X86-NEXT:    vpshrdq $63, %xmm1, %xmm0, %xmm0 {%k1} {z}
 ; X86-NEXT:    retl
@@ -1038,7 +1038,7 @@ entry:
 define <4 x i64> @test_mm256_mask_shrdi_epi32(<4 x i64> %__S, i8 zeroext %__U, <4 x i64> %__A, <4 x i64> %__B) {
 ; X86-LABEL: test_mm256_mask_shrdi_epi32:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    kmovd %eax, %k1
 ; X86-NEXT:    vpshrdd $7, %ymm2, %ymm1, %ymm0 {%k1}
 ; X86-NEXT:    retl
@@ -1064,7 +1064,7 @@ declare <8 x i32> @llvm.fshr.v8i32(<8 x i32>, <8 x i32>, <8 x i32>)
 define <4 x i64> @test_mm256_maskz_shrdi_epi32(i8 zeroext %__U, <4 x i64> %__A, <4 x i64> %__B) {
 ; X86-LABEL: test_mm256_maskz_shrdi_epi32:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    kmovd %eax, %k1
 ; X86-NEXT:    vpshrdd $15, %ymm1, %ymm0, %ymm0 {%k1} {z}
 ; X86-NEXT:    retl
@@ -1100,7 +1100,7 @@ entry:
 define <2 x i64> @test_mm_mask_shrdi_epi32(<2 x i64> %__S, i8 zeroext %__U, <2 x i64> %__A, <2 x i64> %__B) {
 ; X86-LABEL: test_mm_mask_shrdi_epi32:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    kmovd %eax, %k1
 ; X86-NEXT:    vpshrdd $7, %xmm2, %xmm1, %xmm0 {%k1}
 ; X86-NEXT:    retl
@@ -1127,7 +1127,7 @@ declare <4 x i32> @llvm.fshr.v4i32(<4 x i32>, <4 x i32>, <4 x i32>)
 define <2 x i64> @test_mm_maskz_shrdi_epi32(i8 zeroext %__U, <2 x i64> %__A, <2 x i64> %__B) {
 ; X86-LABEL: test_mm_maskz_shrdi_epi32:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    kmovd %eax, %k1
 ; X86-NEXT:    vpshrdd $15, %xmm1, %xmm0, %xmm0 {%k1} {z}
 ; X86-NEXT:    retl
@@ -1224,7 +1224,7 @@ entry:
 define <2 x i64> @test_mm_mask_shrdi_epi16(<2 x i64> %__S, i8 zeroext %__U, <2 x i64> %__A, <2 x i64> %__B) {
 ; X86-LABEL: test_mm_mask_shrdi_epi16:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    kmovd %eax, %k1
 ; X86-NEXT:    vpshrdw $3, %xmm2, %xmm1, %xmm0 {%k1}
 ; X86-NEXT:    retl
@@ -1250,7 +1250,7 @@ declare <8 x i16> @llvm.fshr.v8i16(<8 x i16>, <8 x i16>, <8 x i16>)
 define <2 x i64> @test_mm_maskz_shrdi_epi16(i8 zeroext %__U, <2 x i64> %__A, <2 x i64> %__B) {
 ; X86-LABEL: test_mm_maskz_shrdi_epi16:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    kmovd %eax, %k1
 ; X86-NEXT:    vpshrdw $7, %xmm1, %xmm0, %xmm0 {%k1} {z}
 ; X86-NEXT:    retl
@@ -1286,7 +1286,7 @@ entry:
 define <4 x i64> @test_mm256_mask_shldv_epi64(<4 x i64> %__S, i8 zeroext %__U, <4 x i64> %__A, <4 x i64> %__B) {
 ; X86-LABEL: test_mm256_mask_shldv_epi64:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    kmovd %eax, %k1
 ; X86-NEXT:    vpshldvq %ymm2, %ymm1, %ymm0 {%k1}
 ; X86-NEXT:    retl
@@ -1307,7 +1307,7 @@ entry:
 define <4 x i64> @test_mm256_maskz_shldv_epi64(i8 zeroext %__U, <4 x i64> %__S, <4 x i64> %__A, <4 x i64> %__B) {
 ; X86-LABEL: test_mm256_maskz_shldv_epi64:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    kmovd %eax, %k1
 ; X86-NEXT:    vpshldvq %ymm2, %ymm1, %ymm0 {%k1} {z}
 ; X86-NEXT:    retl
@@ -1338,7 +1338,7 @@ entry:
 define <2 x i64> @test_mm_mask_shldv_epi64(<2 x i64> %__S, i8 zeroext %__U, <2 x i64> %__A, <2 x i64> %__B) {
 ; X86-LABEL: test_mm_mask_shldv_epi64:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    kmovd %eax, %k1
 ; X86-NEXT:    vpshldvq %xmm2, %xmm1, %xmm0 {%k1}
 ; X86-NEXT:    retl
@@ -1359,7 +1359,7 @@ entry:
 define <2 x i64> @test_mm_maskz_shldv_epi64(i8 zeroext %__U, <2 x i64> %__S, <2 x i64> %__A, <2 x i64> %__B) {
 ; X86-LABEL: test_mm_maskz_shldv_epi64:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    kmovd %eax, %k1
 ; X86-NEXT:    vpshldvq %xmm2, %xmm1, %xmm0 {%k1} {z}
 ; X86-NEXT:    retl
@@ -1390,7 +1390,7 @@ entry:
 define <4 x i64> @test_mm256_mask_shldv_epi32(<4 x i64> %__S, i8 zeroext %__U, <4 x i64> %__A, <4 x i64> %__B) {
 ; X86-LABEL: test_mm256_mask_shldv_epi32:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    kmovd %eax, %k1
 ; X86-NEXT:    vpshldvd %ymm2, %ymm1, %ymm0 {%k1}
 ; X86-NEXT:    retl
@@ -1414,7 +1414,7 @@ entry:
 define <4 x i64> @test_mm256_maskz_shldv_epi32(i8 zeroext %__U, <4 x i64> %__S, <4 x i64> %__A, <4 x i64> %__B) {
 ; X86-LABEL: test_mm256_maskz_shldv_epi32:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    kmovd %eax, %k1
 ; X86-NEXT:    vpshldvd %ymm2, %ymm1, %ymm0 {%k1} {z}
 ; X86-NEXT:    retl
@@ -1452,7 +1452,7 @@ entry:
 define <2 x i64> @test_mm_mask_shldv_epi32(<2 x i64> %__S, i8 zeroext %__U, <2 x i64> %__A, <2 x i64> %__B) {
 ; X86-LABEL: test_mm_mask_shldv_epi32:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    kmovd %eax, %k1
 ; X86-NEXT:    vpshldvd %xmm2, %xmm1, %xmm0 {%k1}
 ; X86-NEXT:    retl
@@ -1477,7 +1477,7 @@ entry:
 define <2 x i64> @test_mm_maskz_shldv_epi32(i8 zeroext %__U, <2 x i64> %__S, <2 x i64> %__A, <2 x i64> %__B) {
 ; X86-LABEL: test_mm_maskz_shldv_epi32:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    kmovd %eax, %k1
 ; X86-NEXT:    vpshldvd %xmm2, %xmm1, %xmm0 {%k1} {z}
 ; X86-NEXT:    retl
@@ -1576,7 +1576,7 @@ entry:
 define <2 x i64> @test_mm_mask_shldv_epi16(<2 x i64> %__S, i8 zeroext %__U, <2 x i64> %__A, <2 x i64> %__B) {
 ; X86-LABEL: test_mm_mask_shldv_epi16:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    kmovd %eax, %k1
 ; X86-NEXT:    vpshldvw %xmm2, %xmm1, %xmm0 {%k1}
 ; X86-NEXT:    retl
@@ -1600,7 +1600,7 @@ entry:
 define <2 x i64> @test_mm_maskz_shldv_epi16(i8 zeroext %__U, <2 x i64> %__S, <2 x i64> %__A, <2 x i64> %__B) {
 ; X86-LABEL: test_mm_maskz_shldv_epi16:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    kmovd %eax, %k1
 ; X86-NEXT:    vpshldvw %xmm2, %xmm1, %xmm0 {%k1} {z}
 ; X86-NEXT:    retl
@@ -1638,7 +1638,7 @@ entry:
 define <4 x i64> @test_mm256_mask_shrdv_epi64(<4 x i64> %__S, i8 zeroext %__U, <4 x i64> %__A, <4 x i64> %__B) {
 ; X86-LABEL: test_mm256_mask_shrdv_epi64:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    kmovd %eax, %k1
 ; X86-NEXT:    vpshrdvq %ymm2, %ymm1, %ymm0 {%k1}
 ; X86-NEXT:    retl
@@ -1659,7 +1659,7 @@ entry:
 define <4 x i64> @test_mm256_maskz_shrdv_epi64(i8 zeroext %__U, <4 x i64> %__S, <4 x i64> %__A, <4 x i64> %__B) {
 ; X86-LABEL: test_mm256_maskz_shrdv_epi64:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    kmovd %eax, %k1
 ; X86-NEXT:    vpshrdvq %ymm2, %ymm1, %ymm0 {%k1} {z}
 ; X86-NEXT:    retl
@@ -1690,7 +1690,7 @@ entry:
 define <2 x i64> @test_mm_mask_shrdv_epi64(<2 x i64> %__S, i8 zeroext %__U, <2 x i64> %__A, <2 x i64> %__B) {
 ; X86-LABEL: test_mm_mask_shrdv_epi64:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    kmovd %eax, %k1
 ; X86-NEXT:    vpshrdvq %xmm2, %xmm1, %xmm0 {%k1}
 ; X86-NEXT:    retl
@@ -1711,7 +1711,7 @@ entry:
 define <2 x i64> @test_mm_maskz_shrdv_epi64(i8 zeroext %__U, <2 x i64> %__S, <2 x i64> %__A, <2 x i64> %__B) {
 ; X86-LABEL: test_mm_maskz_shrdv_epi64:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    kmovd %eax, %k1
 ; X86-NEXT:    vpshrdvq %xmm2, %xmm1, %xmm0 {%k1} {z}
 ; X86-NEXT:    retl
@@ -1742,7 +1742,7 @@ entry:
 define <4 x i64> @test_mm256_mask_shrdv_epi32(<4 x i64> %__S, i8 zeroext %__U, <4 x i64> %__A, <4 x i64> %__B) {
 ; X86-LABEL: test_mm256_mask_shrdv_epi32:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    kmovd %eax, %k1
 ; X86-NEXT:    vpshrdvd %ymm2, %ymm1, %ymm0 {%k1}
 ; X86-NEXT:    retl
@@ -1766,7 +1766,7 @@ entry:
 define <4 x i64> @test_mm256_maskz_shrdv_epi32(i8 zeroext %__U, <4 x i64> %__S, <4 x i64> %__A, <4 x i64> %__B) {
 ; X86-LABEL: test_mm256_maskz_shrdv_epi32:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    kmovd %eax, %k1
 ; X86-NEXT:    vpshrdvd %ymm2, %ymm1, %ymm0 {%k1} {z}
 ; X86-NEXT:    retl
@@ -1804,7 +1804,7 @@ entry:
 define <2 x i64> @test_mm_mask_shrdv_epi32(<2 x i64> %__S, i8 zeroext %__U, <2 x i64> %__A, <2 x i64> %__B) {
 ; X86-LABEL: test_mm_mask_shrdv_epi32:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    kmovd %eax, %k1
 ; X86-NEXT:    vpshrdvd %xmm2, %xmm1, %xmm0 {%k1}
 ; X86-NEXT:    retl
@@ -1829,7 +1829,7 @@ entry:
 define <2 x i64> @test_mm_maskz_shrdv_epi32(i8 zeroext %__U, <2 x i64> %__S, <2 x i64> %__A, <2 x i64> %__B) {
 ; X86-LABEL: test_mm_maskz_shrdv_epi32:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    kmovd %eax, %k1
 ; X86-NEXT:    vpshrdvd %xmm2, %xmm1, %xmm0 {%k1} {z}
 ; X86-NEXT:    retl
@@ -1928,7 +1928,7 @@ entry:
 define <2 x i64> @test_mm_mask_shrdv_epi16(<2 x i64> %__S, i8 zeroext %__U, <2 x i64> %__A, <2 x i64> %__B) {
 ; X86-LABEL: test_mm_mask_shrdv_epi16:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    kmovd %eax, %k1
 ; X86-NEXT:    vpshrdvw %xmm2, %xmm1, %xmm0 {%k1}
 ; X86-NEXT:    retl
@@ -1952,7 +1952,7 @@ entry:
 define <2 x i64> @test_mm_maskz_shrdv_epi16(i8 zeroext %__U, <2 x i64> %__S, <2 x i64> %__A, <2 x i64> %__B) {
 ; X86-LABEL: test_mm_maskz_shrdv_epi16:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    kmovd %eax, %k1
 ; X86-NEXT:    vpshrdvw %xmm2, %xmm1, %xmm0 {%k1} {z}
 ; X86-NEXT:    retl

diff  --git a/llvm/test/CodeGen/X86/avx512vl-intrinsics-fast-isel.ll b/llvm/test/CodeGen/X86/avx512vl-intrinsics-fast-isel.ll
index 173e2bad8aceb..b1281338ec56c 100644
--- a/llvm/test/CodeGen/X86/avx512vl-intrinsics-fast-isel.ll
+++ b/llvm/test/CodeGen/X86/avx512vl-intrinsics-fast-isel.ll
@@ -7,7 +7,7 @@
 define <4 x float> @test_mm_mask_cvtepi32_ps(<4 x float> %__W, i8 zeroext %__U, <2 x i64> %__A) {
 ; X86-LABEL: test_mm_mask_cvtepi32_ps:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vcvtdq2ps %xmm1, %xmm0 {%k1}
 ; X86-NEXT:    retl
@@ -29,7 +29,7 @@ entry:
 define <4 x float> @test_mm_maskz_cvtepi32_ps(i8 zeroext %__U, <2 x i64> %__A) {
 ; X86-LABEL: test_mm_maskz_cvtepi32_ps:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vcvtdq2ps %xmm0, %xmm0 {%k1} {z}
 ; X86-NEXT:    retl
@@ -51,7 +51,7 @@ entry:
 define <8 x float> @test_mm256_mask_cvtepi32_ps(<8 x float> %__W, i8 zeroext %__U, <4 x i64> %__A) {
 ; X86-LABEL: test_mm256_mask_cvtepi32_ps:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vcvtdq2ps %ymm1, %ymm0 {%k1}
 ; X86-NEXT:    retl
@@ -72,7 +72,7 @@ entry:
 define <8 x float> @test_mm256_maskz_cvtepi32_ps(i8 zeroext %__U, <4 x i64> %__A) {
 ; X86-LABEL: test_mm256_maskz_cvtepi32_ps:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vcvtdq2ps %ymm0, %ymm0 {%k1} {z}
 ; X86-NEXT:    retl
@@ -93,7 +93,7 @@ entry:
 define <2 x i64> @test_mm_mask_cvtpd_epi32(<2 x i64> %__W, i8 zeroext %__U, <2 x double> %__A) {
 ; X86-LABEL: test_mm_mask_cvtpd_epi32:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vcvtpd2dq %xmm1, %xmm0 {%k1}
 ; X86-NEXT:    retl
@@ -113,7 +113,7 @@ entry:
 define <2 x i64> @test_mm_maskz_cvtpd_epi32(i8 zeroext %__U, <2 x double> %__A) {
 ; X86-LABEL: test_mm_maskz_cvtpd_epi32:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vcvtpd2dq %xmm0, %xmm0 {%k1} {z}
 ; X86-NEXT:    retl
@@ -132,7 +132,7 @@ entry:
 define <2 x i64> @test_mm256_mask_cvtpd_epi32(<2 x i64> %__W, i8 zeroext %__U, <4 x double> %__A) {
 ; X86-LABEL: test_mm256_mask_cvtpd_epi32:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vcvtpd2dq %ymm1, %xmm0 {%k1}
 ; X86-NEXT:    vzeroupper
@@ -157,7 +157,7 @@ entry:
 define <2 x i64> @test_mm256_maskz_cvtpd_epi32(i8 zeroext %__U, <4 x double> %__A) {
 ; X86-LABEL: test_mm256_maskz_cvtpd_epi32:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vcvtpd2dq %ymm0, %xmm0 {%k1} {z}
 ; X86-NEXT:    vzeroupper
@@ -181,7 +181,7 @@ entry:
 define <4 x float> @test_mm_mask_cvtpd_ps(<4 x float> %__W, i8 zeroext %__U, <2 x double> %__A) {
 ; X86-LABEL: test_mm_mask_cvtpd_ps:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vcvtpd2ps %xmm1, %xmm0 {%k1}
 ; X86-NEXT:    retl
@@ -199,7 +199,7 @@ entry:
 define <4 x float> @test_mm_maskz_cvtpd_ps(i8 zeroext %__U, <2 x double> %__A) {
 ; X86-LABEL: test_mm_maskz_cvtpd_ps:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vcvtpd2ps %xmm0, %xmm0 {%k1} {z}
 ; X86-NEXT:    retl
@@ -217,7 +217,7 @@ entry:
 define <4 x float> @test_mm256_mask_cvtpd_ps(<4 x float> %__W, i8 zeroext %__U, <4 x double> %__A) {
 ; X86-LABEL: test_mm256_mask_cvtpd_ps:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vcvtpd2ps %ymm1, %xmm0 {%k1}
 ; X86-NEXT:    vzeroupper
@@ -240,7 +240,7 @@ entry:
 define <4 x float> @test_mm256_maskz_cvtpd_ps(i8 zeroext %__U, <4 x double> %__A) {
 ; X86-LABEL: test_mm256_maskz_cvtpd_ps:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vcvtpd2ps %ymm0, %xmm0 {%k1} {z}
 ; X86-NEXT:    vzeroupper
@@ -274,7 +274,7 @@ entry:
 define <2 x i64> @test_mm_mask_cvtpd_epu32(<2 x i64> %__W, i8 zeroext %__U, <2 x double> %__A) {
 ; X86-LABEL: test_mm_mask_cvtpd_epu32:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vcvtpd2udq %xmm1, %xmm0 {%k1}
 ; X86-NEXT:    retl
@@ -294,7 +294,7 @@ entry:
 define <2 x i64> @test_mm_maskz_cvtpd_epu32(i8 zeroext %__U, <2 x double> %__A) {
 ; X86-LABEL: test_mm_maskz_cvtpd_epu32:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vcvtpd2udq %xmm0, %xmm0 {%k1} {z}
 ; X86-NEXT:    retl
@@ -325,7 +325,7 @@ entry:
 define <2 x i64> @test_mm256_mask_cvtpd_epu32(<2 x i64> %__W, i8 zeroext %__U, <4 x double> %__A) {
 ; X86-LABEL: test_mm256_mask_cvtpd_epu32:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vcvtpd2udq %ymm1, %xmm0 {%k1}
 ; X86-NEXT:    vzeroupper
@@ -347,7 +347,7 @@ entry:
 define <2 x i64> @test_mm256_maskz_cvtpd_epu32(i8 zeroext %__U, <4 x double> %__A) {
 ; X86-LABEL: test_mm256_maskz_cvtpd_epu32:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vcvtpd2udq %ymm0, %xmm0 {%k1} {z}
 ; X86-NEXT:    vzeroupper
@@ -368,7 +368,7 @@ entry:
 define <4 x float> @test_mm_mask_cvtph_ps(<4 x float> %__W, i8 zeroext %__U, <2 x i64> %__A) {
 ; X86-LABEL: test_mm_mask_cvtph_ps:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vcvtph2ps %xmm1, %xmm0 {%k1}
 ; X86-NEXT:    retl
@@ -392,7 +392,7 @@ entry:
 define <4 x float> @test_mm_maskz_cvtph_ps(i8 zeroext %__U, <2 x i64> %__A) {
 ; X86-LABEL: test_mm_maskz_cvtph_ps:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vcvtph2ps %xmm0, %xmm0 {%k1} {z}
 ; X86-NEXT:    retl
@@ -416,7 +416,7 @@ entry:
 define <8 x float> @test_mm256_mask_cvtph_ps(<8 x float> %__W, i8 zeroext %__U, <2 x i64> %__A) {
 ; X86-LABEL: test_mm256_mask_cvtph_ps:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vcvtph2ps %xmm1, %ymm0 {%k1}
 ; X86-NEXT:    retl
@@ -438,7 +438,7 @@ entry:
 define <8 x float> @test_mm256_maskz_cvtph_ps(i8 zeroext %__U, <2 x i64> %__A) {
 ; X86-LABEL: test_mm256_maskz_cvtph_ps:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vcvtph2ps %xmm0, %ymm0 {%k1} {z}
 ; X86-NEXT:    retl
@@ -460,7 +460,7 @@ entry:
 define <2 x i64> @test_mm_mask_cvtps_epi32(<2 x i64> %__W, i8 zeroext %__U, <4 x float> %__A) {
 ; X86-LABEL: test_mm_mask_cvtps_epi32:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vcvtps2dq %xmm1, %xmm0 {%k1}
 ; X86-NEXT:    retl
@@ -483,7 +483,7 @@ entry:
 define <2 x i64> @test_mm_maskz_cvtps_epi32(i8 zeroext %__U, <4 x float> %__A) {
 ; X86-LABEL: test_mm_maskz_cvtps_epi32:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vcvtps2dq %xmm0, %xmm0 {%k1} {z}
 ; X86-NEXT:    retl
@@ -505,7 +505,7 @@ entry:
 define <4 x i64> @test_mm256_mask_cvtps_epi32(<4 x i64> %__W, i8 zeroext %__U, <8 x float> %__A) {
 ; X86-LABEL: test_mm256_mask_cvtps_epi32:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vcvtps2dq %ymm1, %ymm0 {%k1}
 ; X86-NEXT:    retl
@@ -527,7 +527,7 @@ entry:
 define <4 x i64> @test_mm256_maskz_cvtps_epi32(i8 zeroext %__U, <8 x float> %__A) {
 ; X86-LABEL: test_mm256_maskz_cvtps_epi32:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vcvtps2dq %ymm0, %ymm0 {%k1} {z}
 ; X86-NEXT:    retl
@@ -548,7 +548,7 @@ entry:
 define <2 x double> @test_mm_mask_cvtps_pd(<2 x double> %__W, i8 zeroext %__U, <4 x float> %__A) local_unnamed_addr #0 {
 ; X86-LABEL: test_mm_mask_cvtps_pd:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vcvtps2pd %xmm1, %xmm0 {%k1}
 ; X86-NEXT:    retl
@@ -570,7 +570,7 @@ entry:
 define <2 x double> @test_mm_maskz_cvtps_pd(i8 zeroext %__U, <4 x float> %__A) local_unnamed_addr #0 {
 ; X86-LABEL: test_mm_maskz_cvtps_pd:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vcvtps2pd %xmm0, %xmm0 {%k1} {z}
 ; X86-NEXT:    retl
@@ -592,7 +592,7 @@ entry:
 define <4 x double> @test_mm256_mask_cvtps_pd(<4 x double> %__W, i8 zeroext %__U, <4 x float> %__A) local_unnamed_addr #0 {
 ; X86-LABEL: test_mm256_mask_cvtps_pd:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vcvtps2pd %xmm1, %ymm0 {%k1}
 ; X86-NEXT:    retl
@@ -613,7 +613,7 @@ entry:
 define <4 x double> @test_mm256_maskz_cvtps_pd(i8 zeroext %__U, <4 x float> %__A) local_unnamed_addr #0 {
 ; X86-LABEL: test_mm256_maskz_cvtps_pd:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vcvtps2pd %xmm0, %ymm0 {%k1} {z}
 ; X86-NEXT:    retl
@@ -645,7 +645,7 @@ entry:
 define <2 x i64> @test_mm_mask_cvtps_epu32(<2 x i64> %__W, i8 zeroext %__U, <4 x float> %__A) {
 ; X86-LABEL: test_mm_mask_cvtps_epu32:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vcvtps2udq %xmm1, %xmm0 {%k1}
 ; X86-NEXT:    retl
@@ -665,7 +665,7 @@ entry:
 define <2 x i64> @test_mm_maskz_cvtps_epu32(i8 zeroext %__U, <4 x float> %__A) {
 ; X86-LABEL: test_mm_maskz_cvtps_epu32:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vcvtps2udq %xmm0, %xmm0 {%k1} {z}
 ; X86-NEXT:    retl
@@ -695,7 +695,7 @@ entry:
 define <4 x i64> @test_mm256_mask_cvtps_epu32(<4 x i64> %__W, i8 zeroext %__U, <8 x float> %__A) {
 ; X86-LABEL: test_mm256_mask_cvtps_epu32:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vcvtps2udq %ymm1, %ymm0 {%k1}
 ; X86-NEXT:    retl
@@ -715,7 +715,7 @@ entry:
 define <4 x i64> @test_mm256_maskz_cvtps_epu32(i8 zeroext %__U, <8 x float> %__A) {
 ; X86-LABEL: test_mm256_maskz_cvtps_epu32:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vcvtps2udq %ymm0, %ymm0 {%k1} {z}
 ; X86-NEXT:    retl
@@ -734,7 +734,7 @@ entry:
 define <2 x i64> @test_mm_mask_cvttpd_epi32(<2 x i64> %__W, i8 zeroext %__U, <2 x double> %__A) {
 ; X86-LABEL: test_mm_mask_cvttpd_epi32:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vcvttpd2dq %xmm1, %xmm0 {%k1}
 ; X86-NEXT:    retl
@@ -754,7 +754,7 @@ entry:
 define <2 x i64> @test_mm_maskz_cvttpd_epi32(i8 zeroext %__U, <2 x double> %__A) {
 ; X86-LABEL: test_mm_maskz_cvttpd_epi32:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vcvttpd2dq %xmm0, %xmm0 {%k1} {z}
 ; X86-NEXT:    retl
@@ -773,7 +773,7 @@ entry:
 define <2 x i64> @test_mm256_mask_cvttpd_epi32(<2 x i64> %__W, i8 zeroext %__U, <4 x double> %__A) {
 ; X86-LABEL: test_mm256_mask_cvttpd_epi32:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vcvttpd2dq %ymm1, %xmm0 {%k1}
 ; X86-NEXT:    vzeroupper
@@ -798,7 +798,7 @@ entry:
 define <2 x i64> @test_mm256_maskz_cvttpd_epi32(i8 zeroext %__U, <4 x double> %__A) {
 ; X86-LABEL: test_mm256_maskz_cvttpd_epi32:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vcvttpd2dq %ymm0, %xmm0 {%k1} {z}
 ; X86-NEXT:    vzeroupper
@@ -833,7 +833,7 @@ entry:
 define <2 x i64> @test_mm_mask_cvttpd_epu32(<2 x i64> %__W, i8 zeroext %__U, <2 x double> %__A) {
 ; X86-LABEL: test_mm_mask_cvttpd_epu32:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vcvttpd2udq %xmm1, %xmm0 {%k1}
 ; X86-NEXT:    retl
@@ -853,7 +853,7 @@ entry:
 define <2 x i64> @test_mm_maskz_cvttpd_epu32(i8 zeroext %__U, <2 x double> %__A) {
 ; X86-LABEL: test_mm_maskz_cvttpd_epu32:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vcvttpd2udq %xmm0, %xmm0 {%k1} {z}
 ; X86-NEXT:    retl
@@ -884,7 +884,7 @@ entry:
 define <2 x i64> @test_mm256_mask_cvttpd_epu32(<2 x i64> %__W, i8 zeroext %__U, <4 x double> %__A) {
 ; X86-LABEL: test_mm256_mask_cvttpd_epu32:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vcvttpd2udq %ymm1, %xmm0 {%k1}
 ; X86-NEXT:    vzeroupper
@@ -906,7 +906,7 @@ entry:
 define <2 x i64> @test_mm256_maskz_cvttpd_epu32(i8 zeroext %__U, <4 x double> %__A) {
 ; X86-LABEL: test_mm256_maskz_cvttpd_epu32:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vcvttpd2udq %ymm0, %xmm0 {%k1} {z}
 ; X86-NEXT:    vzeroupper
@@ -927,7 +927,7 @@ entry:
 define <2 x i64> @test_mm_mask_cvttps_epi32(<2 x i64> %__W, i8 zeroext %__U, <4 x float> %__A) {
 ; X86-LABEL: test_mm_mask_cvttps_epi32:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vcvttps2dq %xmm1, %xmm0 {%k1}
 ; X86-NEXT:    retl
@@ -950,7 +950,7 @@ entry:
 define <2 x i64> @test_mm_maskz_cvttps_epi32(i8 zeroext %__U, <4 x float> %__A) {
 ; X86-LABEL: test_mm_maskz_cvttps_epi32:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vcvttps2dq %xmm0, %xmm0 {%k1} {z}
 ; X86-NEXT:    retl
@@ -972,7 +972,7 @@ entry:
 define <4 x i64> @test_mm256_mask_cvttps_epi32(<4 x i64> %__W, i8 zeroext %__U, <8 x float> %__A) {
 ; X86-LABEL: test_mm256_mask_cvttps_epi32:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vcvttps2dq %ymm1, %ymm0 {%k1}
 ; X86-NEXT:    retl
@@ -994,7 +994,7 @@ entry:
 define <4 x i64> @test_mm256_maskz_cvttps_epi32(i8 zeroext %__U, <8 x float> %__A) {
 ; X86-LABEL: test_mm256_maskz_cvttps_epi32:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vcvttps2dq %ymm0, %ymm0 {%k1} {z}
 ; X86-NEXT:    retl
@@ -1026,7 +1026,7 @@ entry:
 define <2 x i64> @test_mm_mask_cvttps_epu32(<2 x i64> %__W, i8 zeroext %__U, <4 x float> %__A) {
 ; X86-LABEL: test_mm_mask_cvttps_epu32:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vcvttps2udq %xmm1, %xmm0 {%k1}
 ; X86-NEXT:    retl
@@ -1046,7 +1046,7 @@ entry:
 define <2 x i64> @test_mm_maskz_cvttps_epu32(i8 zeroext %__U, <4 x float> %__A) {
 ; X86-LABEL: test_mm_maskz_cvttps_epu32:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vcvttps2udq %xmm0, %xmm0 {%k1} {z}
 ; X86-NEXT:    retl
@@ -1076,7 +1076,7 @@ entry:
 define <4 x i64> @test_mm256_mask_cvttps_epu32(<4 x i64> %__W, i8 zeroext %__U, <8 x float> %__A) {
 ; X86-LABEL: test_mm256_mask_cvttps_epu32:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vcvttps2udq %ymm1, %ymm0 {%k1}
 ; X86-NEXT:    retl
@@ -1096,7 +1096,7 @@ entry:
 define <4 x i64> @test_mm256_maskz_cvttps_epu32(i8 zeroext %__U, <8 x float> %__A) {
 ; X86-LABEL: test_mm256_maskz_cvttps_epu32:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vcvttps2udq %ymm0, %ymm0 {%k1} {z}
 ; X86-NEXT:    retl
@@ -1127,7 +1127,7 @@ entry:
 define <2 x double> @test_mm_mask_cvtepu32_pd(<2 x double> %__W, i8 zeroext %__U, <2 x i64> %__A) local_unnamed_addr #0 {
 ; X86-LABEL: test_mm_mask_cvtepu32_pd:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vcvtudq2pd %xmm1, %xmm0 {%k1}
 ; X86-NEXT:    retl
@@ -1150,7 +1150,7 @@ entry:
 define <2 x double> @test_mm_maskz_cvtepu32_pd(i8 zeroext %__U, <2 x i64> %__A) local_unnamed_addr #0 {
 ; X86-LABEL: test_mm_maskz_cvtepu32_pd:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vcvtudq2pd %xmm0, %xmm0 {%k1} {z}
 ; X86-NEXT:    retl
@@ -1184,7 +1184,7 @@ entry:
 define <4 x double> @test_mm256_mask_cvtepu32_pd(<4 x double> %__W, i8 zeroext %__U, <2 x i64> %__A) local_unnamed_addr #0 {
 ; X86-LABEL: test_mm256_mask_cvtepu32_pd:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vcvtudq2pd %xmm1, %ymm0 {%k1}
 ; X86-NEXT:    retl
@@ -1206,7 +1206,7 @@ entry:
 define <4 x double> @test_mm256_maskz_cvtepu32_pd(i8 zeroext %__U, <2 x i64> %__A) local_unnamed_addr #0 {
 ; X86-LABEL: test_mm256_maskz_cvtepu32_pd:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vcvtudq2pd %xmm0, %ymm0 {%k1} {z}
 ; X86-NEXT:    retl
@@ -1239,7 +1239,7 @@ entry:
 define <4 x float> @test_mm_mask_cvtepu32_ps(<4 x float> %__W, i8 zeroext %__U, <2 x i64> %__A) {
 ; X86-LABEL: test_mm_mask_cvtepu32_ps:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vcvtudq2ps %xmm1, %xmm0 {%k1}
 ; X86-NEXT:    retl
@@ -1261,7 +1261,7 @@ entry:
 define <4 x float> @test_mm_maskz_cvtepu32_ps(i8 zeroext %__U, <2 x i64> %__A) {
 ; X86-LABEL: test_mm_maskz_cvtepu32_ps:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vcvtudq2ps %xmm0, %xmm0 {%k1} {z}
 ; X86-NEXT:    retl
@@ -1294,7 +1294,7 @@ entry:
 define <8 x float> @test_mm256_mask_cvtepu32_ps(<8 x float> %__W, i8 zeroext %__U, <4 x i64> %__A) {
 ; X86-LABEL: test_mm256_mask_cvtepu32_ps:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vcvtudq2ps %ymm1, %ymm0 {%k1}
 ; X86-NEXT:    retl
@@ -1315,7 +1315,7 @@ entry:
 define <8 x float> @test_mm256_maskz_cvtepu32_ps(i8 zeroext %__U, <4 x i64> %__A) {
 ; X86-LABEL: test_mm256_maskz_cvtepu32_ps:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vcvtudq2ps %ymm0, %ymm0 {%k1} {z}
 ; X86-NEXT:    retl
@@ -1346,7 +1346,7 @@ entry:
 define <8 x float> @test_mm256_mask_shuffle_f32x4(<8 x float> %__W, i8 zeroext %__U, <8 x float> %__A, <8 x float> %__B) {
 ; X86-LABEL: test_mm256_mask_shuffle_f32x4:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vshuff32x4 {{.*#+}} ymm0 {%k1} = ymm1[4,5,6,7],ymm2[4,5,6,7]
 ; X86-NEXT:    retl
@@ -1366,7 +1366,7 @@ entry:
 define <8 x float> @test_mm256_maskz_shuffle_f32x4(i8 zeroext %__U, <8 x float> %__A, <8 x float> %__B) {
 ; X86-LABEL: test_mm256_maskz_shuffle_f32x4:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vshuff32x4 {{.*#+}} ymm0 {%k1} {z} = ymm0[4,5,6,7],ymm1[4,5,6,7]
 ; X86-NEXT:    retl
@@ -1396,7 +1396,7 @@ entry:
 define <4 x double> @test_mm256_mask_shuffle_f64x2(<4 x double> %__W, i8 zeroext %__U, <4 x double> %__A, <4 x double> %__B) {
 ; X86-LABEL: test_mm256_mask_shuffle_f64x2:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vshuff64x2 {{.*#+}} ymm0 {%k1} = ymm1[2,3],ymm2[2,3]
 ; X86-NEXT:    retl
@@ -1417,7 +1417,7 @@ entry:
 define <4 x double> @test_mm256_maskz_shuffle_f64x2(i8 zeroext %__U, <4 x double> %__A, <4 x double> %__B) {
 ; X86-LABEL: test_mm256_maskz_shuffle_f64x2:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vshuff64x2 {{.*#+}} ymm0 {%k1} {z} = ymm0[2,3],ymm1[2,3]
 ; X86-NEXT:    retl
@@ -1448,7 +1448,7 @@ entry:
 define <4 x i64> @test_mm256_mask_shuffle_i32x4(<4 x i64> %__W, i8 zeroext %__U, <4 x i64> %__A, <4 x i64> %__B) {
 ; X86-LABEL: test_mm256_mask_shuffle_i32x4:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vshufi32x4 {{.*#+}} ymm0 {%k1} = ymm1[4,5,6,7],ymm2[4,5,6,7]
 ; X86-NEXT:    retl
@@ -1471,7 +1471,7 @@ entry:
 define <4 x i64> @test_mm256_maskz_shuffle_i32x4(i8 zeroext %__U, <4 x i64> %__A, <4 x i64> %__B) {
 ; X86-LABEL: test_mm256_maskz_shuffle_i32x4:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vshufi32x4 {{.*#+}} ymm0 {%k1} {z} = ymm0[4,5,6,7],ymm1[4,5,6,7]
 ; X86-NEXT:    retl
@@ -1503,7 +1503,7 @@ entry:
 define <4 x i64> @test_mm256_mask_shuffle_i64x2(<4 x i64> %__W, i8 zeroext %__U, <4 x i64> %__A, <4 x i64> %__B) {
 ; X86-LABEL: test_mm256_mask_shuffle_i64x2:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vshufi64x2 {{.*#+}} ymm0 {%k1} = ymm1[2,3],ymm2[2,3]
 ; X86-NEXT:    retl
@@ -1524,7 +1524,7 @@ entry:
 define <4 x i64> @test_mm256_maskz_shuffle_i64x2(i8 zeroext %__U, <4 x i64> %__A, <4 x i64> %__B) {
 ; X86-LABEL: test_mm256_maskz_shuffle_i64x2:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vshufi64x2 {{.*#+}} ymm0 {%k1} {z} = ymm0[2,3],ymm1[2,3]
 ; X86-NEXT:    retl
@@ -1561,7 +1561,7 @@ entry:
 define zeroext i8 @test_mm_mask_test_epi32_mask(i8 zeroext %__U, <2 x i64> %__A, <2 x i64> %__B) {
 ; X86-LABEL: test_mm_mask_test_epi32_mask:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vptestmd %xmm0, %xmm1, %k0 {%k1}
 ; X86-NEXT:    kmovw %k0, %eax
@@ -1606,7 +1606,7 @@ entry:
 define zeroext i8 @test_mm256_mask_test_epi32_mask(i8 zeroext %__U, <4 x i64> %__A, <4 x i64> %__B) {
 ; X86-LABEL: test_mm256_mask_test_epi32_mask:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vptestmd %ymm0, %ymm1, %k0 {%k1}
 ; X86-NEXT:    kmovw %k0, %eax
@@ -1650,7 +1650,7 @@ entry:
 define zeroext i8 @test_mm_mask_test_epi64_mask(i8 zeroext %__U, <2 x i64> %__A, <2 x i64> %__B) {
 ; X86-LABEL: test_mm_mask_test_epi64_mask:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vptestmq %xmm0, %xmm1, %k0 {%k1}
 ; X86-NEXT:    kmovw %k0, %eax
@@ -1694,7 +1694,7 @@ entry:
 define zeroext i8 @test_mm256_mask_test_epi64_mask(i8 zeroext %__U, <4 x i64> %__A, <4 x i64> %__B) {
 ; X86-LABEL: test_mm256_mask_test_epi64_mask:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vptestmq %ymm0, %ymm1, %k0 {%k1}
 ; X86-NEXT:    kmovw %k0, %eax
@@ -1740,7 +1740,7 @@ entry:
 define zeroext i8 @test_mm_mask_testn_epi32_mask(i8 zeroext %__U, <2 x i64> %__A, <2 x i64> %__B) {
 ; X86-LABEL: test_mm_mask_testn_epi32_mask:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vptestnmd %xmm0, %xmm1, %k0 {%k1}
 ; X86-NEXT:    kmovw %k0, %eax
@@ -1785,7 +1785,7 @@ entry:
 define zeroext i8 @test_mm256_mask_testn_epi32_mask(i8 zeroext %__U, <4 x i64> %__A, <4 x i64> %__B) {
 ; X86-LABEL: test_mm256_mask_testn_epi32_mask:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vptestnmd %ymm0, %ymm1, %k0 {%k1}
 ; X86-NEXT:    kmovw %k0, %eax
@@ -1829,7 +1829,7 @@ entry:
 define zeroext i8 @test_mm_mask_testn_epi64_mask(i8 zeroext %__U, <2 x i64> %__A, <2 x i64> %__B) {
 ; X86-LABEL: test_mm_mask_testn_epi64_mask:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vptestnmq %xmm0, %xmm1, %k0 {%k1}
 ; X86-NEXT:    kmovw %k0, %eax
@@ -1873,7 +1873,7 @@ entry:
 define zeroext i8 @test_mm256_mask_testn_epi64_mask(i8 zeroext %__U, <4 x i64> %__A, <4 x i64> %__B) {
 ; X86-LABEL: test_mm256_mask_testn_epi64_mask:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vptestnmq %ymm0, %ymm1, %k0 {%k1}
 ; X86-NEXT:    kmovw %k0, %eax
@@ -1903,7 +1903,7 @@ entry:
 define <2 x i64> @test_mm_mask_set1_epi32(<2 x i64> %__O, i8 zeroext %__M)  {
 ; X86-LABEL: test_mm_mask_set1_epi32:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vpbroadcastd {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0 {%k1}
 ; X86-NEXT:    retl
@@ -1925,7 +1925,7 @@ entry:
 define <2 x i64> @test_mm_maskz_set1_epi32(i8 zeroext %__M) {
 ; X86-LABEL: test_mm_maskz_set1_epi32:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vpbroadcastd {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0 {%k1} {z}
 ; X86-NEXT:    retl
@@ -1946,7 +1946,7 @@ entry:
 define <4 x i64> @test_mm256_mask_set1_epi32(<4 x i64> %__O, i8 zeroext %__M)  {
 ; X86-LABEL: test_mm256_mask_set1_epi32:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vpbroadcastd {{\.?LCPI[0-9]+_[0-9]+}}, %ymm0 {%k1}
 ; X86-NEXT:    retl
@@ -1967,7 +1967,7 @@ entry:
 define <4 x i64> @test_mm256_maskz_set1_epi32(i8 zeroext %__M)  {
 ; X86-LABEL: test_mm256_maskz_set1_epi32:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vpbroadcastd {{\.?LCPI[0-9]+_[0-9]+}}, %ymm0 {%k1} {z}
 ; X86-NEXT:    retl
@@ -1987,7 +1987,7 @@ entry:
 define <2 x i64> @test_mm_mask_set1_epi64(<2 x i64> %__O, i8 zeroext %__M, i64 %__A)  {
 ; X86-LABEL: test_mm_mask_set1_epi64:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    vmovd {{.*#+}} xmm1 = mem[0],zero,zero,zero
 ; X86-NEXT:    vpinsrd $1, {{[0-9]+}}(%esp), %xmm1, %xmm1
 ; X86-NEXT:    kmovw %eax, %k1
@@ -2011,7 +2011,7 @@ entry:
 define <2 x i64> @test_mm_maskz_set1_epi64(i8 zeroext %__M, i64 %__A)  {
 ; X86-LABEL: test_mm_maskz_set1_epi64:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
 ; X86-NEXT:    vpinsrd $1, {{[0-9]+}}(%esp), %xmm0, %xmm0
 ; X86-NEXT:    kmovw %eax, %k1
@@ -2036,7 +2036,7 @@ entry:
 define <4 x i64> @test_mm256_mask_set1_epi64(<4 x i64> %__O, i8 zeroext %__M, i64 %__A) {
 ; X86-LABEL: test_mm256_mask_set1_epi64:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    vmovd {{.*#+}} xmm1 = mem[0],zero,zero,zero
 ; X86-NEXT:    vpinsrd $1, {{[0-9]+}}(%esp), %xmm1, %xmm1
 ; X86-NEXT:    kmovw %eax, %k1
@@ -2060,7 +2060,7 @@ entry:
 define <4 x i64> @test_mm256_maskz_set1_epi64(i8 zeroext %__M, i64 %__A)  {
 ; X86-LABEL: test_mm256_maskz_set1_epi64:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
 ; X86-NEXT:    vpinsrd $1, {{[0-9]+}}(%esp), %xmm0, %xmm0
 ; X86-NEXT:    kmovw %eax, %k1
@@ -2095,7 +2095,7 @@ define <2 x i64> @test_mm_broadcastd_epi32(<2 x i64> %a0) {
 define <2 x i64> @test_mm_mask_broadcastd_epi32(<2 x i64> %__O, i8 zeroext %__M, <2 x i64> %__A) {
 ; X86-LABEL: test_mm_mask_broadcastd_epi32:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vpbroadcastd %xmm1, %xmm0 {%k1}
 ; X86-NEXT:    retl
@@ -2119,7 +2119,7 @@ entry:
 define <2 x i64> @test_mm_maskz_broadcastd_epi32(i8 zeroext %__M, <2 x i64> %__A) {
 ; X86-LABEL: test_mm_maskz_broadcastd_epi32:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vpbroadcastd %xmm0, %xmm0 {%k1} {z}
 ; X86-NEXT:    retl
@@ -2153,7 +2153,7 @@ define <4 x i64> @test_mm256_broadcastd_epi32(<2 x i64> %a0) {
 define <4 x i64> @test_mm256_mask_broadcastd_epi32(<4 x i64> %a0, i8 %a1, <2 x i64> %a2) {
 ; X86-LABEL: test_mm256_mask_broadcastd_epi32:
 ; X86:       # %bb.0:
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vpbroadcastd %xmm1, %ymm0 {%k1}
 ; X86-NEXT:    retl
@@ -2175,7 +2175,7 @@ define <4 x i64> @test_mm256_mask_broadcastd_epi32(<4 x i64> %a0, i8 %a1, <2 x i
 define <4 x i64> @test_mm256_maskz_broadcastd_epi32(i8 %a0, <2 x i64> %a1) {
 ; X86-LABEL: test_mm256_maskz_broadcastd_epi32:
 ; X86:       # %bb.0:
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vpbroadcastd %xmm0, %ymm0 {%k1} {z}
 ; X86-NEXT:    retl
@@ -2205,7 +2205,7 @@ define <2 x i64> @test_mm_broadcastq_epi64(<2 x i64> %a0) {
 define <2 x i64> @test_mm_mask_broadcastq_epi64(<2 x i64> %__O, i8 zeroext %__M, <2 x i64> %__A) {
 ; X86-LABEL: test_mm_mask_broadcastq_epi64:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vpbroadcastq %xmm1, %xmm0 {%k1}
 ; X86-NEXT:    retl
@@ -2226,7 +2226,7 @@ entry:
 define <2 x i64> @test_mm_maskz_broadcastq_epi64(i8 zeroext %__M, <2 x i64> %__A) {
 ; X86-LABEL: test_mm_maskz_broadcastq_epi64:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vpbroadcastq %xmm0, %xmm0 {%k1} {z}
 ; X86-NEXT:    retl
@@ -2256,7 +2256,7 @@ define <4 x i64> @test_mm256_broadcastq_epi64(<2 x i64> %a0) {
 define <4 x i64> @test_mm256_mask_broadcastq_epi64(<4 x i64> %__O, i8 zeroext %__M, <2 x i64> %__A) {
 ; X86-LABEL: test_mm256_mask_broadcastq_epi64:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vpbroadcastq %xmm1, %ymm0 {%k1}
 ; X86-NEXT:    retl
@@ -2277,7 +2277,7 @@ entry:
 define <4 x i64> @test_mm256_maskz_broadcastq_epi64(i8 zeroext %__M, <2 x i64> %__A) {
 ; X86-LABEL: test_mm256_maskz_broadcastq_epi64:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vpbroadcastq %xmm0, %ymm0 {%k1} {z}
 ; X86-NEXT:    retl
@@ -2307,7 +2307,7 @@ define <4 x double> @test_mm256_broadcastsd_pd(<2 x double> %a0) {
 define <4 x double> @test_mm256_mask_broadcastsd_pd(<4 x double> %__O, i8 zeroext %__M, <2 x double> %__A) {
 ; X86-LABEL: test_mm256_mask_broadcastsd_pd:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vbroadcastsd %xmm1, %ymm0 {%k1}
 ; X86-NEXT:    retl
@@ -2328,7 +2328,7 @@ entry:
 define <4 x double> @test_mm256_maskz_broadcastsd_pd(i8 zeroext %__M, <2 x double> %__A) {
 ; X86-LABEL: test_mm256_maskz_broadcastsd_pd:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vbroadcastsd %xmm0, %ymm0 {%k1} {z}
 ; X86-NEXT:    retl
@@ -2358,7 +2358,7 @@ define <4 x float> @test_mm_broadcastss_ps(<4 x float> %a0) {
 define <4 x float> @test_mm_mask_broadcastss_ps(<4 x float> %__O, i8 zeroext %__M, <4 x float> %__A) {
 ; X86-LABEL: test_mm_mask_broadcastss_ps:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vbroadcastss %xmm1, %xmm0 {%k1}
 ; X86-NEXT:    retl
@@ -2379,7 +2379,7 @@ entry:
 define <4 x float> @test_mm_maskz_broadcastss_ps(i8 zeroext %__M, <4 x float> %__A) {
 ; X86-LABEL: test_mm_maskz_broadcastss_ps:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vbroadcastss %xmm0, %xmm0 {%k1} {z}
 ; X86-NEXT:    retl
@@ -2409,7 +2409,7 @@ define <8 x float> @test_mm256_broadcastss_ps(<4 x float> %a0) {
 define <8 x float> @test_mm256_mask_broadcastss_ps(<8 x float> %a0, i8 %a1, <4 x float> %a2) {
 ; X86-LABEL: test_mm256_mask_broadcastss_ps:
 ; X86:       # %bb.0:
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vbroadcastss %xmm1, %ymm0 {%k1}
 ; X86-NEXT:    retl
@@ -2428,7 +2428,7 @@ define <8 x float> @test_mm256_mask_broadcastss_ps(<8 x float> %a0, i8 %a1, <4 x
 define <8 x float> @test_mm256_maskz_broadcastss_ps(i8 %a0, <4 x float> %a1) {
 ; X86-LABEL: test_mm256_maskz_broadcastss_ps:
 ; X86:       # %bb.0:
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vbroadcastss %xmm0, %ymm0 {%k1} {z}
 ; X86-NEXT:    retl
@@ -2456,7 +2456,7 @@ define <2 x double> @test_mm_movddup_pd(<2 x double> %a0) {
 define <2 x double> @test_mm_mask_movedup_pd(<2 x double> %__W, i8 zeroext %__U, <2 x double> %__A) {
 ; X86-LABEL: test_mm_mask_movedup_pd:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vmovddup {{.*#+}} xmm0 {%k1} = xmm1[0,0]
 ; X86-NEXT:    retl
@@ -2477,7 +2477,7 @@ entry:
 define <2 x double> @test_mm_maskz_movedup_pd(i8 zeroext %__U, <2 x double> %__A) {
 ; X86-LABEL: test_mm_maskz_movedup_pd:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vmovddup {{.*#+}} xmm0 {%k1} {z} = xmm0[0,0]
 ; X86-NEXT:    retl
@@ -2507,7 +2507,7 @@ define <4 x double> @test_mm256_movddup_pd(<4 x double> %a0) {
 define <4 x double> @test_mm256_mask_movedup_pd(<4 x double> %__W, i8 zeroext %__U, <4 x double> %__A) {
 ; X86-LABEL: test_mm256_mask_movedup_pd:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vmovddup {{.*#+}} ymm0 {%k1} = ymm1[0,0,2,2]
 ; X86-NEXT:    retl
@@ -2528,7 +2528,7 @@ entry:
 define <4 x double> @test_mm256_maskz_movedup_pd(i8 zeroext %__U, <4 x double> %__A) {
 ; X86-LABEL: test_mm256_maskz_movedup_pd:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vmovddup {{.*#+}} ymm0 {%k1} {z} = ymm0[0,0,2,2]
 ; X86-NEXT:    retl
@@ -2558,7 +2558,7 @@ define <4 x float> @test_mm_movehdup_ps(<4 x float> %a0) {
 define <4 x float> @test_mm_mask_movehdup_ps(<4 x float> %__W, i8 zeroext %__U, <4 x float> %__A) {
 ; X86-LABEL: test_mm_mask_movehdup_ps:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vmovshdup {{.*#+}} xmm0 {%k1} = xmm1[1,1,3,3]
 ; X86-NEXT:    retl
@@ -2579,7 +2579,7 @@ entry:
 define <4 x float> @test_mm_maskz_movehdup_ps(i8 zeroext %__U, <4 x float> %__A) {
 ; X86-LABEL: test_mm_maskz_movehdup_ps:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vmovshdup {{.*#+}} xmm0 {%k1} {z} = xmm0[1,1,3,3]
 ; X86-NEXT:    retl
@@ -2609,7 +2609,7 @@ define <8 x float> @test_mm256_movehdup_ps(<8 x float> %a0) {
 define <8 x float> @test_mm256_mask_movehdup_ps(<8 x float> %a0, i8 %a1, <8 x float> %a2) {
 ; X86-LABEL: test_mm256_mask_movehdup_ps:
 ; X86:       # %bb.0:
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vmovshdup {{.*#+}} ymm0 {%k1} = ymm1[1,1,3,3,5,5,7,7]
 ; X86-NEXT:    retl
@@ -2628,7 +2628,7 @@ define <8 x float> @test_mm256_mask_movehdup_ps(<8 x float> %a0, i8 %a1, <8 x fl
 define <8 x float> @test_mm256_maskz_movehdup_ps(i8 %a0, <8 x float> %a1) {
 ; X86-LABEL: test_mm256_maskz_movehdup_ps:
 ; X86:       # %bb.0:
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vmovshdup {{.*#+}} ymm0 {%k1} {z} = ymm0[1,1,3,3,5,5,7,7]
 ; X86-NEXT:    retl
@@ -2656,7 +2656,7 @@ define <4 x float> @test_mm_moveldup_ps(<4 x float> %a0) {
 define <4 x float> @test_mm_mask_moveldup_ps(<4 x float> %__W, i8 zeroext %__U, <4 x float> %__A) {
 ; X86-LABEL: test_mm_mask_moveldup_ps:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vmovsldup {{.*#+}} xmm0 {%k1} = xmm1[0,0,2,2]
 ; X86-NEXT:    retl
@@ -2677,7 +2677,7 @@ entry:
 define <4 x float> @test_mm_maskz_moveldup_ps(i8 zeroext %__U, <4 x float> %__A) {
 ; X86-LABEL: test_mm_maskz_moveldup_ps:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vmovsldup {{.*#+}} xmm0 {%k1} {z} = xmm0[0,0,2,2]
 ; X86-NEXT:    retl
@@ -2707,7 +2707,7 @@ define <8 x float> @test_mm256_moveldup_ps(<8 x float> %a0) {
 define <8 x float> @test_mm256_mask_moveldup_ps(<8 x float> %a0, i8 %a1, <8 x float> %a2) {
 ; X86-LABEL: test_mm256_mask_moveldup_ps:
 ; X86:       # %bb.0:
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vmovsldup {{.*#+}} ymm0 {%k1} = ymm1[0,0,2,2,4,4,6,6]
 ; X86-NEXT:    retl
@@ -2726,7 +2726,7 @@ define <8 x float> @test_mm256_mask_moveldup_ps(<8 x float> %a0, i8 %a1, <8 x fl
 define <8 x float> @test_mm256_maskz_moveldup_ps(i8 %a0, <8 x float> %a1) {
 ; X86-LABEL: test_mm256_maskz_moveldup_ps:
 ; X86:       # %bb.0:
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vmovsldup {{.*#+}} ymm0 {%k1} {z} = ymm0[0,0,2,2,4,4,6,6]
 ; X86-NEXT:    retl
@@ -2754,7 +2754,7 @@ define <4 x i64> @test_mm256_permutex_epi64(<4 x i64> %a0) {
 define <4 x i64> @test_mm256_mask_permutex_epi64(<4 x i64> %__W, i8 zeroext %__M, <4 x i64> %__X) {
 ; X86-LABEL: test_mm256_mask_permutex_epi64:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vpermq {{.*#+}} ymm0 {%k1} = ymm1[3,0,0,0]
 ; X86-NEXT:    retl
@@ -2775,7 +2775,7 @@ entry:
 define <4 x i64> @test_mm256_maskz_permutex_epi64(i8 zeroext %__M, <4 x i64> %__X) {
 ; X86-LABEL: test_mm256_maskz_permutex_epi64:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vpermq {{.*#+}} ymm0 {%k1} {z} = ymm0[3,0,0,0]
 ; X86-NEXT:    retl
@@ -2805,7 +2805,7 @@ define <4 x double> @test_mm256_permutex_pd(<4 x double> %a0) {
 define <4 x double> @test_mm256_mask_permutex_pd(<4 x double> %__W, i8 zeroext %__U, <4 x double> %__X) {
 ; X86-LABEL: test_mm256_mask_permutex_pd:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vpermpd {{.*#+}} ymm0 {%k1} = ymm1[1,0,0,0]
 ; X86-NEXT:    retl
@@ -2826,7 +2826,7 @@ entry:
 define <4 x double> @test_mm256_maskz_permutex_pd(i8 zeroext %__U, <4 x double> %__X) {
 ; X86-LABEL: test_mm256_maskz_permutex_pd:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vpermpd {{.*#+}} ymm0 {%k1} {z} = ymm0[1,0,0,0]
 ; X86-NEXT:    retl
@@ -2856,7 +2856,7 @@ define <2 x double> @test_mm_shuffle_pd(<2 x double> %a0, <2 x double> %a1) {
 define <2 x double> @test_mm_mask_shuffle_pd(<2 x double> %__W, i8 zeroext %__U, <2 x double> %__A, <2 x double> %__B) {
 ; X86-LABEL: test_mm_mask_shuffle_pd:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vunpckhpd {{.*#+}} xmm0 {%k1} = xmm1[1],xmm2[1]
 ; X86-NEXT:    retl
@@ -2877,7 +2877,7 @@ entry:
 define <2 x double> @test_mm_maskz_shuffle_pd(i8 zeroext %__U, <2 x double> %__A, <2 x double> %__B) {
 ; X86-LABEL: test_mm_maskz_shuffle_pd:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vunpckhpd {{.*#+}} xmm0 {%k1} {z} = xmm0[1],xmm1[1]
 ; X86-NEXT:    retl
@@ -2907,7 +2907,7 @@ define <4 x double> @test_mm256_shuffle_pd(<4 x double> %a0, <4 x double> %a1) {
 define <4 x double> @test_mm256_mask_shuffle_pd(<4 x double> %__W, i8 zeroext %__U, <4 x double> %__A, <4 x double> %__B) {
 ; X86-LABEL: test_mm256_mask_shuffle_pd:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vshufpd {{.*#+}} ymm0 {%k1} = ymm1[1],ymm2[1],ymm1[2],ymm2[2]
 ; X86-NEXT:    retl
@@ -2928,7 +2928,7 @@ entry:
 define <4 x double> @test_mm256_maskz_shuffle_pd(i8 zeroext %__U, <4 x double> %__A, <4 x double> %__B) {
 ; X86-LABEL: test_mm256_maskz_shuffle_pd:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vshufpd {{.*#+}} ymm0 {%k1} {z} = ymm0[1],ymm1[1],ymm0[2],ymm1[2]
 ; X86-NEXT:    retl
@@ -2958,7 +2958,7 @@ define <4 x float> @test_mm_shuffle_ps(<4 x float> %a0, <4 x float> %a1) {
 define <4 x float> @test_mm_mask_shuffle_ps(<4 x float> %__W, i8 zeroext %__U, <4 x float> %__A, <4 x float> %__B) {
 ; X86-LABEL: test_mm_mask_shuffle_ps:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vshufps {{.*#+}} xmm0 {%k1} = xmm1[0,1],xmm2[0,0]
 ; X86-NEXT:    retl
@@ -2979,7 +2979,7 @@ entry:
 define <4 x float> @test_mm_maskz_shuffle_ps(i8 zeroext %__U, <4 x float> %__A, <4 x float> %__B) {
 ; X86-LABEL: test_mm_maskz_shuffle_ps:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vshufps {{.*#+}} xmm0 {%k1} {z} = xmm0[0,1],xmm1[0,0]
 ; X86-NEXT:    retl
@@ -3009,7 +3009,7 @@ define <8 x float> @test_mm256_shuffle_ps(<8 x float> %a0, <8 x float> %a1) {
 define <8 x float> @test_mm256_mask_shuffle_ps(<8 x float> %a0, i8 %a1, <8 x float> %a2, <8 x float> %a3) {
 ; X86-LABEL: test_mm256_mask_shuffle_ps:
 ; X86:       # %bb.0:
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vshufps {{.*#+}} ymm0 {%k1} = ymm1[0,1],ymm2[0,0],ymm1[4,5],ymm2[4,4]
 ; X86-NEXT:    retl
@@ -3028,7 +3028,7 @@ define <8 x float> @test_mm256_mask_shuffle_ps(<8 x float> %a0, i8 %a1, <8 x flo
 define <8 x float> @test_mm256_maskz_shuffle_ps(i8 %a0, <8 x float> %a1, <8 x float> %a2) {
 ; X86-LABEL: test_mm256_maskz_shuffle_ps:
 ; X86:       # %bb.0:
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vshufps {{.*#+}} ymm0 {%k1} {z} = ymm0[0,1],ymm1[0,0],ymm0[4,5],ymm1[4,4]
 ; X86-NEXT:    retl
@@ -3047,7 +3047,7 @@ define <8 x float> @test_mm256_maskz_shuffle_ps(i8 %a0, <8 x float> %a1, <8 x fl
 define <4 x i64> @test_mm256_mask_mul_epi32(<4 x i64> %__W, i8 zeroext %__M, <4 x i64> %__X, <4 x i64> %__Y) nounwind {
 ; X86-LABEL: test_mm256_mask_mul_epi32:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vpmuldq %ymm1, %ymm2, %ymm0 {%k1}
 ; X86-NEXT:    retl
@@ -3072,7 +3072,7 @@ entry:
 define <4 x i64> @test_mm256_maskz_mul_epi32(i8 zeroext %__M, <4 x i64> %__X, <4 x i64> %__Y) nounwind {
 ; X86-LABEL: test_mm256_maskz_mul_epi32:
 ; X86:       # %bb.0:
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vpmuldq %ymm0, %ymm1, %ymm0 {%k1} {z}
 ; X86-NEXT:    retl
@@ -3096,7 +3096,7 @@ define <4 x i64> @test_mm256_maskz_mul_epi32(i8 zeroext %__M, <4 x i64> %__X, <4
 define <2 x i64> @test_mm_mask_mul_epi32(<2 x i64> %__W, i8 zeroext %__M, <2 x i64> %__X, <2 x i64> %__Y) nounwind {
 ; X86-LABEL: test_mm_mask_mul_epi32:
 ; X86:       # %bb.0:
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vpmuldq %xmm1, %xmm2, %xmm0 {%k1}
 ; X86-NEXT:    retl
@@ -3120,7 +3120,7 @@ define <2 x i64> @test_mm_mask_mul_epi32(<2 x i64> %__W, i8 zeroext %__M, <2 x i
 define <2 x i64> @test_mm_maskz_mul_epi32(i8 zeroext %__M, <2 x i64> %__X, <2 x i64> %__Y) nounwind {
 ; X86-LABEL: test_mm_maskz_mul_epi32:
 ; X86:       # %bb.0:
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vpmuldq %xmm0, %xmm1, %xmm0 {%k1} {z}
 ; X86-NEXT:    retl
@@ -3144,7 +3144,7 @@ define <2 x i64> @test_mm_maskz_mul_epi32(i8 zeroext %__M, <2 x i64> %__X, <2 x
 define <4 x i64> @test_mm256_mask_mul_epu32(<4 x i64> %__W, i8 zeroext %__M, <4 x i64> %__X, <4 x i64> %__Y) nounwind {
 ; X86-LABEL: test_mm256_mask_mul_epu32:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vpmuludq %ymm1, %ymm2, %ymm0 {%k1}
 ; X86-NEXT:    retl
@@ -3167,7 +3167,7 @@ entry:
 define <4 x i64> @test_mm256_maskz_mul_epu32(i8 zeroext %__M, <4 x i64> %__X, <4 x i64> %__Y) nounwind {
 ; X86-LABEL: test_mm256_maskz_mul_epu32:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vpmuludq %ymm0, %ymm1, %ymm0 {%k1} {z}
 ; X86-NEXT:    retl
@@ -3190,7 +3190,7 @@ entry:
 define <2 x i64> @test_mm_mask_mul_epu32(<2 x i64> %__W, i8 zeroext %__M, <2 x i64> %__X, <2 x i64> %__Y) nounwind {
 ; X86-LABEL: test_mm_mask_mul_epu32:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vpmuludq %xmm1, %xmm2, %xmm0 {%k1}
 ; X86-NEXT:    retl
@@ -3213,7 +3213,7 @@ entry:
 define <2 x i64> @test_mm_maskz_mul_epu32(i8 zeroext %__M, <2 x i64> %__X, <2 x i64> %__Y) nounwind {
 ; X86-LABEL: test_mm_maskz_mul_epu32:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vpmuludq %xmm0, %xmm1, %xmm0 {%k1} {z}
 ; X86-NEXT:    retl
@@ -3311,7 +3311,7 @@ entry:
 define <2 x i64> @test_mm256_mask_cvtepi32_epi16(<2 x i64> %__O, i8 zeroext %__M, <4 x i64> %__A) {
 ; X86-LABEL: test_mm256_mask_cvtepi32_epi16:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vpmovdw %ymm1, %xmm0 {%k1}
 ; X86-NEXT:    vzeroupper
@@ -3334,7 +3334,7 @@ entry:
 define <2 x i64> @test_mm256_maskz_cvtepi32_epi16(i8 zeroext %__M, <4 x i64> %__A) {
 ; X86-LABEL: test_mm256_maskz_cvtepi32_epi16:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vpmovdw %ymm0, %xmm0 {%k1} {z}
 ; X86-NEXT:    vzeroupper
@@ -3368,7 +3368,7 @@ entry:
 define <2 x i64> @test_mm256_mask_cvtepi64_epi32(<2 x i64> %__O, i8 zeroext %__M, <4 x i64> %__A) {
 ; X86-LABEL: test_mm256_mask_cvtepi64_epi32:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vpmovqd %ymm1, %xmm0 {%k1}
 ; X86-NEXT:    vzeroupper
@@ -3393,7 +3393,7 @@ entry:
 define <2 x i64> @test_mm256_maskz_cvtepi64_epi32(i8 zeroext %__M, <4 x i64> %__A) {
 ; X86-LABEL: test_mm256_maskz_cvtepi64_epi32:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vpmovqd %ymm0, %xmm0 {%k1} {z}
 ; X86-NEXT:    vzeroupper
@@ -3473,7 +3473,7 @@ declare <4 x i32> @llvm.x86.avx512.pternlog.d.128(<4 x i32>, <4 x i32>, <4 x i32
 define <2 x i64> @test_mm_mask_ternarylogic_epi32(<2 x i64> %__A, i8 zeroext %__U, <2 x i64> %__B, <2 x i64> %__C) {
 ; X86-LABEL: test_mm_mask_ternarylogic_epi32:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vpternlogd $4, %xmm2, %xmm1, %xmm0 {%k1}
 ; X86-NEXT:    retl
@@ -3498,7 +3498,7 @@ entry:
 define <2 x i64> @test_mm_maskz_ternarylogic_epi32(i8 zeroext %__U, <2 x i64> %__A, <2 x i64> %__B, <2 x i64> %__C) {
 ; X86-LABEL: test_mm_maskz_ternarylogic_epi32:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vpternlogd $4, %xmm2, %xmm1, %xmm0 {%k1} {z}
 ; X86-NEXT:    retl
@@ -3539,7 +3539,7 @@ declare <8 x i32> @llvm.x86.avx512.pternlog.d.256(<8 x i32>, <8 x i32>, <8 x i32
 define <4 x i64> @test_mm256_mask_ternarylogic_epi32(<4 x i64> %__A, i8 zeroext %__U, <4 x i64> %__B, <4 x i64> %__C) {
 ; X86-LABEL: test_mm256_mask_ternarylogic_epi32:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vpternlogd $4, %ymm2, %ymm1, %ymm0 {%k1}
 ; X86-NEXT:    retl
@@ -3563,7 +3563,7 @@ entry:
 define <4 x i64> @test_mm256_maskz_ternarylogic_epi32(i8 zeroext %__U, <4 x i64> %__A, <4 x i64> %__B, <4 x i64> %__C) {
 ; X86-LABEL: test_mm256_maskz_ternarylogic_epi32:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vpternlogd $4, %ymm2, %ymm1, %ymm0 {%k1} {z}
 ; X86-NEXT:    retl
@@ -3599,7 +3599,7 @@ declare <2 x i64> @llvm.x86.avx512.pternlog.q.128(<2 x i64>, <2 x i64>, <2 x i64
 define <2 x i64> @test_mm_mask_ternarylogic_epi64(<2 x i64> %__A, i8 zeroext %__U, <2 x i64> %__B, <2 x i64> %__C) {
 ; X86-LABEL: test_mm_mask_ternarylogic_epi64:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vpternlogq $4, %xmm2, %xmm1, %xmm0 {%k1}
 ; X86-NEXT:    retl
@@ -3620,7 +3620,7 @@ entry:
 define <2 x i64> @test_mm_maskz_ternarylogic_epi64(i8 zeroext %__U, <2 x i64> %__A, <2 x i64> %__B, <2 x i64> %__C) {
 ; X86-LABEL: test_mm_maskz_ternarylogic_epi64:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vpternlogq $4, %xmm2, %xmm1, %xmm0 {%k1} {z}
 ; X86-NEXT:    retl
@@ -3653,7 +3653,7 @@ declare <4 x i64> @llvm.x86.avx512.pternlog.q.256(<4 x i64>, <4 x i64>, <4 x i64
 define <4 x i64> @test_mm256_mask_ternarylogic_epi64(<4 x i64> %__A, i8 zeroext %__U, <4 x i64> %__B, <4 x i64> %__C) {
 ; X86-LABEL: test_mm256_mask_ternarylogic_epi64:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vpternlogq $4, %ymm2, %ymm1, %ymm0 {%k1}
 ; X86-NEXT:    retl
@@ -3674,7 +3674,7 @@ entry:
 define <4 x i64> @test_mm256_maskz_ternarylogic_epi64(i8 zeroext %__U, <4 x i64> %__A, <4 x i64> %__B, <4 x i64> %__C) {
 ; X86-LABEL: test_mm256_maskz_ternarylogic_epi64:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vpternlogq $4, %ymm2, %ymm1, %ymm0 {%k1} {z}
 ; X86-NEXT:    retl
@@ -3695,7 +3695,7 @@ entry:
 define <2 x i64> @test_mm_mask2_permutex2var_epi32(<2 x i64> %__A, <2 x i64> %__I, i8 zeroext %__U, <2 x i64> %__B) {
 ; X86-LABEL: test_mm_mask2_permutex2var_epi32:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vpermi2d %xmm2, %xmm0, %xmm1 {%k1}
 ; X86-NEXT:    vmovdqa %xmm1, %xmm0
@@ -3722,7 +3722,7 @@ entry:
 define <4 x i64> @test_mm256_mask2_permutex2var_epi32(<4 x i64> %__A, <4 x i64> %__I, i8 zeroext %__U, <4 x i64> %__B) {
 ; X86-LABEL: test_mm256_mask2_permutex2var_epi32:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vpermi2d %ymm2, %ymm0, %ymm1 {%k1}
 ; X86-NEXT:    vmovdqa %ymm1, %ymm0
@@ -3748,7 +3748,7 @@ entry:
 define <2 x double> @test_mm_mask2_permutex2var_pd(<2 x double> %__A, <2 x i64> %__I, i8 zeroext %__U, <2 x double> %__B) {
 ; X86-LABEL: test_mm_mask2_permutex2var_pd:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vpermi2pd %xmm2, %xmm0, %xmm1 {%k1}
 ; X86-NEXT:    vmovapd %xmm1, %xmm0
@@ -3772,7 +3772,7 @@ entry:
 define <4 x double> @test_mm256_mask2_permutex2var_pd(<4 x double> %__A, <4 x i64> %__I, i8 zeroext %__U, <4 x double> %__B) {
 ; X86-LABEL: test_mm256_mask2_permutex2var_pd:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vpermi2pd %ymm2, %ymm0, %ymm1 {%k1}
 ; X86-NEXT:    vmovapd %ymm1, %ymm0
@@ -3796,7 +3796,7 @@ entry:
 define <4 x float> @test_mm_mask2_permutex2var_ps(<4 x float> %__A, <2 x i64> %__I, i8 zeroext %__U, <4 x float> %__B) {
 ; X86-LABEL: test_mm_mask2_permutex2var_ps:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vpermi2ps %xmm2, %xmm0, %xmm1 {%k1}
 ; X86-NEXT:    vmovaps %xmm1, %xmm0
@@ -3821,7 +3821,7 @@ entry:
 define <8 x float> @test_mm256_mask2_permutex2var_ps(<8 x float> %__A, <4 x i64> %__I, i8 zeroext %__U, <8 x float> %__B) {
 ; X86-LABEL: test_mm256_mask2_permutex2var_ps:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vpermi2ps %ymm2, %ymm0, %ymm1 {%k1}
 ; X86-NEXT:    vmovaps %ymm1, %ymm0
@@ -3845,7 +3845,7 @@ entry:
 define <2 x i64> @test_mm_mask2_permutex2var_epi64(<2 x i64> %__A, <2 x i64> %__I, i8 zeroext %__U, <2 x i64> %__B) {
 ; X86-LABEL: test_mm_mask2_permutex2var_epi64:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vpermi2q %xmm2, %xmm0, %xmm1 {%k1}
 ; X86-NEXT:    vmovdqa %xmm1, %xmm0
@@ -3868,7 +3868,7 @@ entry:
 define <4 x i64> @test_mm256_mask2_permutex2var_epi64(<4 x i64> %__A, <4 x i64> %__I, i8 zeroext %__U, <4 x i64> %__B) {
 ; X86-LABEL: test_mm256_mask2_permutex2var_epi64:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vpermi2q %ymm2, %ymm0, %ymm1 {%k1}
 ; X86-NEXT:    vmovdqa %ymm1, %ymm0
@@ -3905,7 +3905,7 @@ entry:
 define <2 x i64> @test_mm_mask_permutex2var_epi32(<2 x i64> %__A, i8 zeroext %__U, <2 x i64> %__I, <2 x i64> %__B) {
 ; X86-LABEL: test_mm_mask_permutex2var_epi32:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vpermt2d %xmm2, %xmm1, %xmm0 {%k1}
 ; X86-NEXT:    retl
@@ -3930,7 +3930,7 @@ entry:
 define <2 x i64> @test_mm_maskz_permutex2var_epi32(i8 zeroext %__U, <2 x i64> %__A, <2 x i64> %__I, <2 x i64> %__B) {
 ; X86-LABEL: test_mm_maskz_permutex2var_epi32:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vpermt2d %xmm2, %xmm1, %xmm0 {%k1} {z}
 ; X86-NEXT:    retl
@@ -3969,7 +3969,7 @@ entry:
 define <4 x i64> @test_mm256_mask_permutex2var_epi32(<4 x i64> %__A, i8 zeroext %__U, <4 x i64> %__I, <4 x i64> %__B) {
 ; X86-LABEL: test_mm256_mask_permutex2var_epi32:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vpermt2d %ymm2, %ymm1, %ymm0 {%k1}
 ; X86-NEXT:    retl
@@ -3993,7 +3993,7 @@ entry:
 define <4 x i64> @test_mm256_maskz_permutex2var_epi32(i8 zeroext %__U, <4 x i64> %__A, <4 x i64> %__I, <4 x i64> %__B) {
 ; X86-LABEL: test_mm256_maskz_permutex2var_epi32:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vpermt2d %ymm2, %ymm1, %ymm0 {%k1} {z}
 ; X86-NEXT:    retl
@@ -4027,7 +4027,7 @@ entry:
 define <2 x double> @test_mm_mask_permutex2var_pd(<2 x double> %__A, i8 zeroext %__U, <2 x i64> %__I, <2 x double> %__B) {
 ; X86-LABEL: test_mm_mask_permutex2var_pd:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vpermt2pd %xmm2, %xmm1, %xmm0 {%k1}
 ; X86-NEXT:    retl
@@ -4048,7 +4048,7 @@ entry:
 define <2 x double> @test_mm_maskz_permutex2var_pd(i8 zeroext %__U, <2 x double> %__A, <2 x i64> %__I, <2 x double> %__B) {
 ; X86-LABEL: test_mm_maskz_permutex2var_pd:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vpermt2pd %xmm2, %xmm1, %xmm0 {%k1} {z}
 ; X86-NEXT:    retl
@@ -4079,7 +4079,7 @@ entry:
 define <4 x double> @test_mm256_mask_permutex2var_pd(<4 x double> %__A, i8 zeroext %__U, <4 x i64> %__I, <4 x double> %__B) {
 ; X86-LABEL: test_mm256_mask_permutex2var_pd:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vpermt2pd %ymm2, %ymm1, %ymm0 {%k1}
 ; X86-NEXT:    retl
@@ -4100,7 +4100,7 @@ entry:
 define <4 x double> @test_mm256_maskz_permutex2var_pd(i8 zeroext %__U, <4 x double> %__A, <4 x i64> %__I, <4 x double> %__B) {
 ; X86-LABEL: test_mm256_maskz_permutex2var_pd:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vpermt2pd %ymm2, %ymm1, %ymm0 {%k1} {z}
 ; X86-NEXT:    retl
@@ -4132,7 +4132,7 @@ entry:
 define <4 x float> @test_mm_mask_permutex2var_ps(<4 x float> %__A, i8 zeroext %__U, <2 x i64> %__I, <4 x float> %__B) {
 ; X86-LABEL: test_mm_mask_permutex2var_ps:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vpermt2ps %xmm2, %xmm1, %xmm0 {%k1}
 ; X86-NEXT:    retl
@@ -4154,7 +4154,7 @@ entry:
 define <4 x float> @test_mm_maskz_permutex2var_ps(i8 zeroext %__U, <4 x float> %__A, <2 x i64> %__I, <4 x float> %__B) {
 ; X86-LABEL: test_mm_maskz_permutex2var_ps:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vpermt2ps %xmm2, %xmm1, %xmm0 {%k1} {z}
 ; X86-NEXT:    retl
@@ -4187,7 +4187,7 @@ entry:
 define <8 x float> @test_mm256_mask_permutex2var_ps(<8 x float> %__A, i8 zeroext %__U, <4 x i64> %__I, <8 x float> %__B) {
 ; X86-LABEL: test_mm256_mask_permutex2var_ps:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vpermt2ps %ymm2, %ymm1, %ymm0 {%k1}
 ; X86-NEXT:    retl
@@ -4208,7 +4208,7 @@ entry:
 define <8 x float> @test_mm256_maskz_permutex2var_ps(i8 zeroext %__U, <8 x float> %__A, <4 x i64> %__I, <8 x float> %__B) {
 ; X86-LABEL: test_mm256_maskz_permutex2var_ps:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vpermt2ps %ymm2, %ymm1, %ymm0 {%k1} {z}
 ; X86-NEXT:    retl
@@ -4239,7 +4239,7 @@ entry:
 define <2 x i64> @test_mm_mask_permutex2var_epi64(<2 x i64> %__A, i8 zeroext %__U, <2 x i64> %__I, <2 x i64> %__B) {
 ; X86-LABEL: test_mm_mask_permutex2var_epi64:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vpermt2q %xmm2, %xmm1, %xmm0 {%k1}
 ; X86-NEXT:    retl
@@ -4260,7 +4260,7 @@ entry:
 define <2 x i64> @test_mm_maskz_permutex2var_epi64(i8 zeroext %__U, <2 x i64> %__A, <2 x i64> %__I, <2 x i64> %__B) {
 ; X86-LABEL: test_mm_maskz_permutex2var_epi64:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vpermt2q %xmm2, %xmm1, %xmm0 {%k1} {z}
 ; X86-NEXT:    retl
@@ -4291,7 +4291,7 @@ entry:
 define <4 x i64> @test_mm256_mask_permutex2var_epi64(<4 x i64> %__A, i8 zeroext %__U, <4 x i64> %__I, <4 x i64> %__B) {
 ; X86-LABEL: test_mm256_mask_permutex2var_epi64:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vpermt2q %ymm2, %ymm1, %ymm0 {%k1}
 ; X86-NEXT:    retl
@@ -4312,7 +4312,7 @@ entry:
 define <4 x i64> @test_mm256_maskz_permutex2var_epi64(i8 zeroext %__U, <4 x i64> %__A, <4 x i64> %__I, <4 x i64> %__B) {
 ; X86-LABEL: test_mm256_maskz_permutex2var_epi64:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vpermt2q %ymm2, %ymm1, %ymm0 {%k1} {z}
 ; X86-NEXT:    retl
@@ -4334,7 +4334,7 @@ entry:
 define <2 x double> @test_mm_mask_fmadd_pd(<2 x double> %__A, i8 zeroext %__U, <2 x double> %__B, <2 x double> %__C) {
 ; X86-LABEL: test_mm_mask_fmadd_pd:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vfmadd132pd {{.*#+}} xmm0 {%k1} = (xmm0 * xmm1) + xmm2
 ; X86-NEXT:    retl
@@ -4355,7 +4355,7 @@ entry:
 define <2 x double> @test_mm_mask_fmsub_pd(<2 x double> %__A, i8 zeroext %__U, <2 x double> %__B, <2 x double> %__C) {
 ; X86-LABEL: test_mm_mask_fmsub_pd:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vfmsub132pd {{.*#+}} xmm0 {%k1} = (xmm0 * xmm1) - xmm2
 ; X86-NEXT:    retl
@@ -4377,7 +4377,7 @@ entry:
 define <2 x double> @test_mm_mask3_fmadd_pd(<2 x double> %__A, <2 x double> %__B, <2 x double> %__C, i8 zeroext %__U) {
 ; X86-LABEL: test_mm_mask3_fmadd_pd:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vfmadd231pd {{.*#+}} xmm2 {%k1} = (xmm0 * xmm1) + xmm2
 ; X86-NEXT:    vmovapd %xmm2, %xmm0
@@ -4400,7 +4400,7 @@ entry:
 define <2 x double> @test_mm_mask3_fnmadd_pd(<2 x double> %__A, <2 x double> %__B, <2 x double> %__C, i8 zeroext %__U) {
 ; X86-LABEL: test_mm_mask3_fnmadd_pd:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vfnmadd231pd {{.*#+}} xmm2 {%k1} = -(xmm0 * xmm1) + xmm2
 ; X86-NEXT:    vmovapd %xmm2, %xmm0
@@ -4424,7 +4424,7 @@ entry:
 define <2 x double> @test_mm_maskz_fmadd_pd(i8 zeroext %__U, <2 x double> %__A, <2 x double> %__B, <2 x double> %__C) {
 ; X86-LABEL: test_mm_maskz_fmadd_pd:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vfmadd213pd {{.*#+}} xmm0 {%k1} {z} = (xmm1 * xmm0) + xmm2
 ; X86-NEXT:    retl
@@ -4445,7 +4445,7 @@ entry:
 define <2 x double> @test_mm_maskz_fmsub_pd(i8 zeroext %__U, <2 x double> %__A, <2 x double> %__B, <2 x double> %__C) {
 ; X86-LABEL: test_mm_maskz_fmsub_pd:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vfmsub213pd {{.*#+}} xmm0 {%k1} {z} = (xmm1 * xmm0) - xmm2
 ; X86-NEXT:    retl
@@ -4467,7 +4467,7 @@ entry:
 define <2 x double> @test_mm_maskz_fnmadd_pd(i8 zeroext %__U, <2 x double> %__A, <2 x double> %__B, <2 x double> %__C) {
 ; X86-LABEL: test_mm_maskz_fnmadd_pd:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vfnmadd213pd {{.*#+}} xmm0 {%k1} {z} = -(xmm1 * xmm0) + xmm2
 ; X86-NEXT:    retl
@@ -4489,7 +4489,7 @@ entry:
 define <2 x double> @test_mm_maskz_fnmsub_pd(i8 zeroext %__U, <2 x double> %__A, <2 x double> %__B, <2 x double> %__C) {
 ; X86-LABEL: test_mm_maskz_fnmsub_pd:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vfnmsub213pd {{.*#+}} xmm0 {%k1} {z} = -(xmm1 * xmm0) - xmm2
 ; X86-NEXT:    retl
@@ -4512,7 +4512,7 @@ entry:
 define <4 x double> @test_mm256_mask_fmadd_pd(<4 x double> %__A, i8 zeroext %__U, <4 x double> %__B, <4 x double> %__C) {
 ; X86-LABEL: test_mm256_mask_fmadd_pd:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vfmadd132pd {{.*#+}} ymm0 {%k1} = (ymm0 * ymm1) + ymm2
 ; X86-NEXT:    retl
@@ -4533,7 +4533,7 @@ entry:
 define <4 x double> @test_mm256_mask_fmsub_pd(<4 x double> %__A, i8 zeroext %__U, <4 x double> %__B, <4 x double> %__C) {
 ; X86-LABEL: test_mm256_mask_fmsub_pd:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vfmsub132pd {{.*#+}} ymm0 {%k1} = (ymm0 * ymm1) - ymm2
 ; X86-NEXT:    retl
@@ -4555,7 +4555,7 @@ entry:
 define <4 x double> @test_mm256_mask3_fmadd_pd(<4 x double> %__A, <4 x double> %__B, <4 x double> %__C, i8 zeroext %__U) {
 ; X86-LABEL: test_mm256_mask3_fmadd_pd:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vfmadd231pd {{.*#+}} ymm2 {%k1} = (ymm0 * ymm1) + ymm2
 ; X86-NEXT:    vmovapd %ymm2, %ymm0
@@ -4578,7 +4578,7 @@ entry:
 define <4 x double> @test_mm256_mask3_fnmadd_pd(<4 x double> %__A, <4 x double> %__B, <4 x double> %__C, i8 zeroext %__U) {
 ; X86-LABEL: test_mm256_mask3_fnmadd_pd:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vfnmadd231pd {{.*#+}} ymm2 {%k1} = -(ymm0 * ymm1) + ymm2
 ; X86-NEXT:    vmovapd %ymm2, %ymm0
@@ -4602,7 +4602,7 @@ entry:
 define <4 x double> @test_mm256_maskz_fmadd_pd(i8 zeroext %__U, <4 x double> %__A, <4 x double> %__B, <4 x double> %__C) {
 ; X86-LABEL: test_mm256_maskz_fmadd_pd:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vfmadd213pd {{.*#+}} ymm0 {%k1} {z} = (ymm1 * ymm0) + ymm2
 ; X86-NEXT:    retl
@@ -4623,7 +4623,7 @@ entry:
 define <4 x double> @test_mm256_maskz_fmsub_pd(i8 zeroext %__U, <4 x double> %__A, <4 x double> %__B, <4 x double> %__C) {
 ; X86-LABEL: test_mm256_maskz_fmsub_pd:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vfmsub213pd {{.*#+}} ymm0 {%k1} {z} = (ymm1 * ymm0) - ymm2
 ; X86-NEXT:    retl
@@ -4645,7 +4645,7 @@ entry:
 define <4 x double> @test_mm256_maskz_fnmadd_pd(i8 zeroext %__U, <4 x double> %__A, <4 x double> %__B, <4 x double> %__C) {
 ; X86-LABEL: test_mm256_maskz_fnmadd_pd:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vfnmadd213pd {{.*#+}} ymm0 {%k1} {z} = -(ymm1 * ymm0) + ymm2
 ; X86-NEXT:    retl
@@ -4667,7 +4667,7 @@ entry:
 define <4 x double> @test_mm256_maskz_fnmsub_pd(i8 zeroext %__U, <4 x double> %__A, <4 x double> %__B, <4 x double> %__C) {
 ; X86-LABEL: test_mm256_maskz_fnmsub_pd:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vfnmsub213pd {{.*#+}} ymm0 {%k1} {z} = -(ymm1 * ymm0) - ymm2
 ; X86-NEXT:    retl
@@ -4690,7 +4690,7 @@ entry:
 define <4 x float> @test_mm_mask_fmadd_ps(<4 x float> %__A, i8 zeroext %__U, <4 x float> %__B, <4 x float> %__C) {
 ; X86-LABEL: test_mm_mask_fmadd_ps:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vfmadd132ps {{.*#+}} xmm0 {%k1} = (xmm0 * xmm1) + xmm2
 ; X86-NEXT:    retl
@@ -4711,7 +4711,7 @@ entry:
 define <4 x float> @test_mm_mask_fmsub_ps(<4 x float> %__A, i8 zeroext %__U, <4 x float> %__B, <4 x float> %__C) {
 ; X86-LABEL: test_mm_mask_fmsub_ps:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vfmsub132ps {{.*#+}} xmm0 {%k1} = (xmm0 * xmm1) - xmm2
 ; X86-NEXT:    retl
@@ -4733,7 +4733,7 @@ entry:
 define <4 x float> @test_mm_mask3_fmadd_ps(<4 x float> %__A, <4 x float> %__B, <4 x float> %__C, i8 zeroext %__U) {
 ; X86-LABEL: test_mm_mask3_fmadd_ps:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vfmadd231ps {{.*#+}} xmm2 {%k1} = (xmm0 * xmm1) + xmm2
 ; X86-NEXT:    vmovaps %xmm2, %xmm0
@@ -4756,7 +4756,7 @@ entry:
 define <4 x float> @test_mm_mask3_fnmadd_ps(<4 x float> %__A, <4 x float> %__B, <4 x float> %__C, i8 zeroext %__U) {
 ; X86-LABEL: test_mm_mask3_fnmadd_ps:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vfnmadd231ps {{.*#+}} xmm2 {%k1} = -(xmm0 * xmm1) + xmm2
 ; X86-NEXT:    vmovaps %xmm2, %xmm0
@@ -4780,7 +4780,7 @@ entry:
 define <4 x float> @test_mm_maskz_fmadd_ps(i8 zeroext %__U, <4 x float> %__A, <4 x float> %__B, <4 x float> %__C) {
 ; X86-LABEL: test_mm_maskz_fmadd_ps:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vfmadd213ps {{.*#+}} xmm0 {%k1} {z} = (xmm1 * xmm0) + xmm2
 ; X86-NEXT:    retl
@@ -4801,7 +4801,7 @@ entry:
 define <4 x float> @test_mm_maskz_fmsub_ps(i8 zeroext %__U, <4 x float> %__A, <4 x float> %__B, <4 x float> %__C) {
 ; X86-LABEL: test_mm_maskz_fmsub_ps:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vfmsub213ps {{.*#+}} xmm0 {%k1} {z} = (xmm1 * xmm0) - xmm2
 ; X86-NEXT:    retl
@@ -4823,7 +4823,7 @@ entry:
 define <4 x float> @test_mm_maskz_fnmadd_ps(i8 zeroext %__U, <4 x float> %__A, <4 x float> %__B, <4 x float> %__C) {
 ; X86-LABEL: test_mm_maskz_fnmadd_ps:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vfnmadd213ps {{.*#+}} xmm0 {%k1} {z} = -(xmm1 * xmm0) + xmm2
 ; X86-NEXT:    retl
@@ -4845,7 +4845,7 @@ entry:
 define <4 x float> @test_mm_maskz_fnmsub_ps(i8 zeroext %__U, <4 x float> %__A, <4 x float> %__B, <4 x float> %__C) {
 ; X86-LABEL: test_mm_maskz_fnmsub_ps:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vfnmsub213ps {{.*#+}} xmm0 {%k1} {z} = -(xmm1 * xmm0) - xmm2
 ; X86-NEXT:    retl
@@ -4868,7 +4868,7 @@ entry:
 define <8 x float> @test_mm256_mask_fmadd_ps(<8 x float> %__A, i8 zeroext %__U, <8 x float> %__B, <8 x float> %__C) {
 ; X86-LABEL: test_mm256_mask_fmadd_ps:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vfmadd132ps {{.*#+}} ymm0 {%k1} = (ymm0 * ymm1) + ymm2
 ; X86-NEXT:    retl
@@ -4888,7 +4888,7 @@ entry:
 define <8 x float> @test_mm256_mask_fmsub_ps(<8 x float> %__A, i8 zeroext %__U, <8 x float> %__B, <8 x float> %__C) {
 ; X86-LABEL: test_mm256_mask_fmsub_ps:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vfmsub132ps {{.*#+}} ymm0 {%k1} = (ymm0 * ymm1) - ymm2
 ; X86-NEXT:    retl
@@ -4909,7 +4909,7 @@ entry:
 define <8 x float> @test_mm256_mask3_fmadd_ps(<8 x float> %__A, <8 x float> %__B, <8 x float> %__C, i8 zeroext %__U) {
 ; X86-LABEL: test_mm256_mask3_fmadd_ps:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vfmadd231ps {{.*#+}} ymm2 {%k1} = (ymm0 * ymm1) + ymm2
 ; X86-NEXT:    vmovaps %ymm2, %ymm0
@@ -4931,7 +4931,7 @@ entry:
 define <8 x float> @test_mm256_mask3_fnmadd_ps(<8 x float> %__A, <8 x float> %__B, <8 x float> %__C, i8 zeroext %__U) {
 ; X86-LABEL: test_mm256_mask3_fnmadd_ps:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vfnmadd231ps {{.*#+}} ymm2 {%k1} = -(ymm0 * ymm1) + ymm2
 ; X86-NEXT:    vmovaps %ymm2, %ymm0
@@ -4954,7 +4954,7 @@ entry:
 define <8 x float> @test_mm256_maskz_fmadd_ps(i8 zeroext %__U, <8 x float> %__A, <8 x float> %__B, <8 x float> %__C) {
 ; X86-LABEL: test_mm256_maskz_fmadd_ps:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vfmadd213ps {{.*#+}} ymm0 {%k1} {z} = (ymm1 * ymm0) + ymm2
 ; X86-NEXT:    retl
@@ -4974,7 +4974,7 @@ entry:
 define <8 x float> @test_mm256_maskz_fmsub_ps(i8 zeroext %__U, <8 x float> %__A, <8 x float> %__B, <8 x float> %__C) {
 ; X86-LABEL: test_mm256_maskz_fmsub_ps:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vfmsub213ps {{.*#+}} ymm0 {%k1} {z} = (ymm1 * ymm0) - ymm2
 ; X86-NEXT:    retl
@@ -4995,7 +4995,7 @@ entry:
 define <8 x float> @test_mm256_maskz_fnmadd_ps(i8 zeroext %__U, <8 x float> %__A, <8 x float> %__B, <8 x float> %__C) {
 ; X86-LABEL: test_mm256_maskz_fnmadd_ps:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vfnmadd213ps {{.*#+}} ymm0 {%k1} {z} = -(ymm1 * ymm0) + ymm2
 ; X86-NEXT:    retl
@@ -5016,7 +5016,7 @@ entry:
 define <8 x float> @test_mm256_maskz_fnmsub_ps(i8 zeroext %__U, <8 x float> %__A, <8 x float> %__B, <8 x float> %__C) {
 ; X86-LABEL: test_mm256_maskz_fnmsub_ps:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vfnmsub213ps {{.*#+}} ymm0 {%k1} {z} = -(ymm1 * ymm0) - ymm2
 ; X86-NEXT:    retl
@@ -5038,7 +5038,7 @@ entry:
 define <2 x double> @test_mm_mask_fmaddsub_pd(<2 x double> %__A, i8 zeroext %__U, <2 x double> %__B, <2 x double> %__C) {
 ; X86-LABEL: test_mm_mask_fmaddsub_pd:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vfmaddsub132pd {{.*#+}} xmm0 {%k1} = (xmm0 * xmm1) +/- xmm2
 ; X86-NEXT:    retl
@@ -5062,7 +5062,7 @@ entry:
 define <2 x double> @test_mm_mask_fmsubadd_pd(<2 x double> %__A, i8 zeroext %__U, <2 x double> %__B, <2 x double> %__C) {
 ; X86-LABEL: test_mm_mask_fmsubadd_pd:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vfmsubadd132pd {{.*#+}} xmm0 {%k1} = (xmm0 * xmm1) -/+ xmm2
 ; X86-NEXT:    retl
@@ -5086,7 +5086,7 @@ entry:
 define <2 x double> @test_mm_mask3_fmaddsub_pd(<2 x double> %__A, <2 x double> %__B, <2 x double> %__C, i8 zeroext %__U) {
 ; X86-LABEL: test_mm_mask3_fmaddsub_pd:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vfmaddsub231pd {{.*#+}} xmm2 {%k1} = (xmm0 * xmm1) +/- xmm2
 ; X86-NEXT:    vmovapd %xmm2, %xmm0
@@ -5112,7 +5112,7 @@ entry:
 define <2 x double> @test_mm_maskz_fmaddsub_pd(i8 zeroext %__U, <2 x double> %__A, <2 x double> %__B, <2 x double> %__C) {
 ; X86-LABEL: test_mm_maskz_fmaddsub_pd:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vfmaddsub213pd {{.*#+}} xmm0 {%k1} {z} = (xmm1 * xmm0) +/- xmm2
 ; X86-NEXT:    retl
@@ -5136,7 +5136,7 @@ entry:
 define <2 x double> @test_mm_maskz_fmsubadd_pd(i8 zeroext %__U, <2 x double> %__A, <2 x double> %__B, <2 x double> %__C) {
 ; X86-LABEL: test_mm_maskz_fmsubadd_pd:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vfmsubadd213pd {{.*#+}} xmm0 {%k1} {z} = (xmm1 * xmm0) -/+ xmm2
 ; X86-NEXT:    retl
@@ -5160,7 +5160,7 @@ entry:
 define <4 x double> @test_mm256_mask_fmaddsub_pd(<4 x double> %__A, i8 zeroext %__U, <4 x double> %__B, <4 x double> %__C) {
 ; X86-LABEL: test_mm256_mask_fmaddsub_pd:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vfmaddsub132pd {{.*#+}} ymm0 {%k1} = (ymm0 * ymm1) +/- ymm2
 ; X86-NEXT:    retl
@@ -5184,7 +5184,7 @@ entry:
 define <4 x double> @test_mm256_mask_fmsubadd_pd(<4 x double> %__A, i8 zeroext %__U, <4 x double> %__B, <4 x double> %__C) {
 ; X86-LABEL: test_mm256_mask_fmsubadd_pd:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vfmsubadd132pd {{.*#+}} ymm0 {%k1} = (ymm0 * ymm1) -/+ ymm2
 ; X86-NEXT:    retl
@@ -5208,7 +5208,7 @@ entry:
 define <4 x double> @test_mm256_mask3_fmaddsub_pd(<4 x double> %__A, <4 x double> %__B, <4 x double> %__C, i8 zeroext %__U) {
 ; X86-LABEL: test_mm256_mask3_fmaddsub_pd:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vfmaddsub231pd {{.*#+}} ymm2 {%k1} = (ymm0 * ymm1) +/- ymm2
 ; X86-NEXT:    vmovapd %ymm2, %ymm0
@@ -5234,7 +5234,7 @@ entry:
 define <4 x double> @test_mm256_maskz_fmaddsub_pd(i8 zeroext %__U, <4 x double> %__A, <4 x double> %__B, <4 x double> %__C) {
 ; X86-LABEL: test_mm256_maskz_fmaddsub_pd:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vfmaddsub213pd {{.*#+}} ymm0 {%k1} {z} = (ymm1 * ymm0) +/- ymm2
 ; X86-NEXT:    retl
@@ -5258,7 +5258,7 @@ entry:
 define <4 x double> @test_mm256_maskz_fmsubadd_pd(i8 zeroext %__U, <4 x double> %__A, <4 x double> %__B, <4 x double> %__C) {
 ; X86-LABEL: test_mm256_maskz_fmsubadd_pd:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vfmsubadd213pd {{.*#+}} ymm0 {%k1} {z} = (ymm1 * ymm0) -/+ ymm2
 ; X86-NEXT:    retl
@@ -5282,7 +5282,7 @@ entry:
 define <4 x float> @test_mm_mask_fmaddsub_ps(<4 x float> %__A, i8 zeroext %__U, <4 x float> %__B, <4 x float> %__C) {
 ; X86-LABEL: test_mm_mask_fmaddsub_ps:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vfmaddsub132ps {{.*#+}} xmm0 {%k1} = (xmm0 * xmm1) +/- xmm2
 ; X86-NEXT:    retl
@@ -5306,7 +5306,7 @@ entry:
 define <4 x float> @test_mm_mask_fmsubadd_ps(<4 x float> %__A, i8 zeroext %__U, <4 x float> %__B, <4 x float> %__C) {
 ; X86-LABEL: test_mm_mask_fmsubadd_ps:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vfmsubadd132ps {{.*#+}} xmm0 {%k1} = (xmm0 * xmm1) -/+ xmm2
 ; X86-NEXT:    retl
@@ -5330,7 +5330,7 @@ entry:
 define <4 x float> @test_mm_mask3_fmaddsub_ps(<4 x float> %__A, <4 x float> %__B, <4 x float> %__C, i8 zeroext %__U) {
 ; X86-LABEL: test_mm_mask3_fmaddsub_ps:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vfmaddsub231ps {{.*#+}} xmm2 {%k1} = (xmm0 * xmm1) +/- xmm2
 ; X86-NEXT:    vmovaps %xmm2, %xmm0
@@ -5356,7 +5356,7 @@ entry:
 define <4 x float> @test_mm_maskz_fmaddsub_ps(i8 zeroext %__U, <4 x float> %__A, <4 x float> %__B, <4 x float> %__C) {
 ; X86-LABEL: test_mm_maskz_fmaddsub_ps:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vfmaddsub213ps {{.*#+}} xmm0 {%k1} {z} = (xmm1 * xmm0) +/- xmm2
 ; X86-NEXT:    retl
@@ -5380,7 +5380,7 @@ entry:
 define <4 x float> @test_mm_maskz_fmsubadd_ps(i8 zeroext %__U, <4 x float> %__A, <4 x float> %__B, <4 x float> %__C) {
 ; X86-LABEL: test_mm_maskz_fmsubadd_ps:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vfmsubadd213ps {{.*#+}} xmm0 {%k1} {z} = (xmm1 * xmm0) -/+ xmm2
 ; X86-NEXT:    retl
@@ -5404,7 +5404,7 @@ entry:
 define <8 x float> @test_mm256_mask_fmaddsub_ps(<8 x float> %__A, i8 zeroext %__U, <8 x float> %__B, <8 x float> %__C) {
 ; X86-LABEL: test_mm256_mask_fmaddsub_ps:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vfmaddsub132ps {{.*#+}} ymm0 {%k1} = (ymm0 * ymm1) +/- ymm2
 ; X86-NEXT:    retl
@@ -5427,7 +5427,7 @@ entry:
 define <8 x float> @test_mm256_mask_fmsubadd_ps(<8 x float> %__A, i8 zeroext %__U, <8 x float> %__B, <8 x float> %__C) {
 ; X86-LABEL: test_mm256_mask_fmsubadd_ps:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vfmsubadd132ps {{.*#+}} ymm0 {%k1} = (ymm0 * ymm1) -/+ ymm2
 ; X86-NEXT:    retl
@@ -5450,7 +5450,7 @@ entry:
 define <8 x float> @test_mm256_mask3_fmaddsub_ps(<8 x float> %__A, <8 x float> %__B, <8 x float> %__C, i8 zeroext %__U) {
 ; X86-LABEL: test_mm256_mask3_fmaddsub_ps:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vfmaddsub231ps {{.*#+}} ymm2 {%k1} = (ymm0 * ymm1) +/- ymm2
 ; X86-NEXT:    vmovaps %ymm2, %ymm0
@@ -5475,7 +5475,7 @@ entry:
 define <8 x float> @test_mm256_maskz_fmaddsub_ps(i8 zeroext %__U, <8 x float> %__A, <8 x float> %__B, <8 x float> %__C) {
 ; X86-LABEL: test_mm256_maskz_fmaddsub_ps:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vfmaddsub213ps {{.*#+}} ymm0 {%k1} {z} = (ymm1 * ymm0) +/- ymm2
 ; X86-NEXT:    retl
@@ -5498,7 +5498,7 @@ entry:
 define <8 x float> @test_mm256_maskz_fmsubadd_ps(i8 zeroext %__U, <8 x float> %__A, <8 x float> %__B, <8 x float> %__C) {
 ; X86-LABEL: test_mm256_maskz_fmsubadd_ps:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vfmsubadd213ps {{.*#+}} ymm0 {%k1} {z} = (ymm1 * ymm0) -/+ ymm2
 ; X86-NEXT:    retl
@@ -5521,7 +5521,7 @@ entry:
 define <2 x double> @test_mm_mask3_fmsub_pd(<2 x double> %__A, <2 x double> %__B, <2 x double> %__C, i8 zeroext %__U) {
 ; X86-LABEL: test_mm_mask3_fmsub_pd:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vfmsub231pd {{.*#+}} xmm2 {%k1} = (xmm0 * xmm1) - xmm2
 ; X86-NEXT:    vmovapd %xmm2, %xmm0
@@ -5545,7 +5545,7 @@ entry:
 define <4 x double> @test_mm256_mask3_fmsub_pd(<4 x double> %__A, <4 x double> %__B, <4 x double> %__C, i8 zeroext %__U) {
 ; X86-LABEL: test_mm256_mask3_fmsub_pd:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vfmsub231pd {{.*#+}} ymm2 {%k1} = (ymm0 * ymm1) - ymm2
 ; X86-NEXT:    vmovapd %ymm2, %ymm0
@@ -5569,7 +5569,7 @@ entry:
 define <4 x float> @test_mm_mask3_fmsub_ps(<4 x float> %__A, <4 x float> %__B, <4 x float> %__C, i8 zeroext %__U) {
 ; X86-LABEL: test_mm_mask3_fmsub_ps:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vfmsub231ps {{.*#+}} xmm2 {%k1} = (xmm0 * xmm1) - xmm2
 ; X86-NEXT:    vmovaps %xmm2, %xmm0
@@ -5593,7 +5593,7 @@ entry:
 define <8 x float> @test_mm256_mask3_fmsub_ps(<8 x float> %__A, <8 x float> %__B, <8 x float> %__C, i8 zeroext %__U) {
 ; X86-LABEL: test_mm256_mask3_fmsub_ps:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vfmsub231ps {{.*#+}} ymm2 {%k1} = (ymm0 * ymm1) - ymm2
 ; X86-NEXT:    vmovaps %ymm2, %ymm0
@@ -5616,7 +5616,7 @@ entry:
 define <2 x double> @test_mm_mask3_fmsubadd_pd(<2 x double> %__A, <2 x double> %__B, <2 x double> %__C, i8 zeroext %__U) {
 ; X86-LABEL: test_mm_mask3_fmsubadd_pd:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vfmsubadd231pd {{.*#+}} xmm2 {%k1} = (xmm0 * xmm1) -/+ xmm2
 ; X86-NEXT:    vmovapd %xmm2, %xmm0
@@ -5642,7 +5642,7 @@ entry:
 define <4 x double> @test_mm256_mask3_fmsubadd_pd(<4 x double> %__A, <4 x double> %__B, <4 x double> %__C, i8 zeroext %__U) {
 ; X86-LABEL: test_mm256_mask3_fmsubadd_pd:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vfmsubadd231pd {{.*#+}} ymm2 {%k1} = (ymm0 * ymm1) -/+ ymm2
 ; X86-NEXT:    vmovapd %ymm2, %ymm0
@@ -5668,7 +5668,7 @@ entry:
 define <4 x float> @test_mm_mask3_fmsubadd_ps(<4 x float> %__A, <4 x float> %__B, <4 x float> %__C, i8 zeroext %__U) {
 ; X86-LABEL: test_mm_mask3_fmsubadd_ps:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vfmsubadd231ps {{.*#+}} xmm2 {%k1} = (xmm0 * xmm1) -/+ xmm2
 ; X86-NEXT:    vmovaps %xmm2, %xmm0
@@ -5694,7 +5694,7 @@ entry:
 define <8 x float> @test_mm256_mask3_fmsubadd_ps(<8 x float> %__A, <8 x float> %__B, <8 x float> %__C, i8 zeroext %__U) {
 ; X86-LABEL: test_mm256_mask3_fmsubadd_ps:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vfmsubadd231ps {{.*#+}} ymm2 {%k1} = (ymm0 * ymm1) -/+ ymm2
 ; X86-NEXT:    vmovaps %ymm2, %ymm0
@@ -5719,7 +5719,7 @@ entry:
 define <2 x double> @test_mm_mask_fnmadd_pd(<2 x double> %__A, i8 zeroext %__U, <2 x double> %__B, <2 x double> %__C) {
 ; X86-LABEL: test_mm_mask_fnmadd_pd:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vfnmadd132pd {{.*#+}} xmm0 {%k1} = -(xmm0 * xmm1) + xmm2
 ; X86-NEXT:    retl
@@ -5741,7 +5741,7 @@ entry:
 define <4 x double> @test_mm256_mask_fnmadd_pd(<4 x double> %__A, i8 zeroext %__U, <4 x double> %__B, <4 x double> %__C) {
 ; X86-LABEL: test_mm256_mask_fnmadd_pd:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vfnmadd132pd {{.*#+}} ymm0 {%k1} = -(ymm0 * ymm1) + ymm2
 ; X86-NEXT:    retl
@@ -5763,7 +5763,7 @@ entry:
 define <4 x float> @test_mm_mask_fnmadd_ps(<4 x float> %__A, i8 zeroext %__U, <4 x float> %__B, <4 x float> %__C) {
 ; X86-LABEL: test_mm_mask_fnmadd_ps:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vfnmadd132ps {{.*#+}} xmm0 {%k1} = -(xmm0 * xmm1) + xmm2
 ; X86-NEXT:    retl
@@ -5785,7 +5785,7 @@ entry:
 define <8 x float> @test_mm256_mask_fnmadd_ps(<8 x float> %__A, i8 zeroext %__U, <8 x float> %__B, <8 x float> %__C) {
 ; X86-LABEL: test_mm256_mask_fnmadd_ps:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vfnmadd132ps {{.*#+}} ymm0 {%k1} = -(ymm0 * ymm1) + ymm2
 ; X86-NEXT:    retl
@@ -5806,7 +5806,7 @@ entry:
 define <2 x double> @test_mm_mask_fnmsub_pd(<2 x double> %__A, i8 zeroext %__U, <2 x double> %__B, <2 x double> %__C) {
 ; X86-LABEL: test_mm_mask_fnmsub_pd:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vfnmsub132pd {{.*#+}} xmm0 {%k1} = -(xmm0 * xmm1) - xmm2
 ; X86-NEXT:    retl
@@ -5829,7 +5829,7 @@ entry:
 define <2 x double> @test_mm_mask3_fnmsub_pd(<2 x double> %__A, <2 x double> %__B, <2 x double> %__C, i8 zeroext %__U) {
 ; X86-LABEL: test_mm_mask3_fnmsub_pd:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vfnmsub231pd {{.*#+}} xmm2 {%k1} = -(xmm0 * xmm1) - xmm2
 ; X86-NEXT:    vmovapd %xmm2, %xmm0
@@ -5854,7 +5854,7 @@ entry:
 define <4 x double> @test_mm256_mask_fnmsub_pd(<4 x double> %__A, i8 zeroext %__U, <4 x double> %__B, <4 x double> %__C) {
 ; X86-LABEL: test_mm256_mask_fnmsub_pd:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vfnmsub132pd {{.*#+}} ymm0 {%k1} = -(ymm0 * ymm1) - ymm2
 ; X86-NEXT:    retl
@@ -5877,7 +5877,7 @@ entry:
 define <4 x double> @test_mm256_mask3_fnmsub_pd(<4 x double> %__A, <4 x double> %__B, <4 x double> %__C, i8 zeroext %__U) {
 ; X86-LABEL: test_mm256_mask3_fnmsub_pd:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vfnmsub231pd {{.*#+}} ymm2 {%k1} = -(ymm0 * ymm1) - ymm2
 ; X86-NEXT:    vmovapd %ymm2, %ymm0
@@ -5902,7 +5902,7 @@ entry:
 define <4 x float> @test_mm_mask_fnmsub_ps(<4 x float> %__A, i8 zeroext %__U, <4 x float> %__B, <4 x float> %__C) {
 ; X86-LABEL: test_mm_mask_fnmsub_ps:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vfnmsub132ps {{.*#+}} xmm0 {%k1} = -(xmm0 * xmm1) - xmm2
 ; X86-NEXT:    retl
@@ -5925,7 +5925,7 @@ entry:
 define <4 x float> @test_mm_mask3_fnmsub_ps(<4 x float> %__A, <4 x float> %__B, <4 x float> %__C, i8 zeroext %__U) {
 ; X86-LABEL: test_mm_mask3_fnmsub_ps:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vfnmsub231ps {{.*#+}} xmm2 {%k1} = -(xmm0 * xmm1) - xmm2
 ; X86-NEXT:    vmovaps %xmm2, %xmm0
@@ -5950,7 +5950,7 @@ entry:
 define <8 x float> @test_mm256_mask_fnmsub_ps(<8 x float> %__A, i8 zeroext %__U, <8 x float> %__B, <8 x float> %__C) {
 ; X86-LABEL: test_mm256_mask_fnmsub_ps:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vfnmsub132ps {{.*#+}} ymm0 {%k1} = -(ymm0 * ymm1) - ymm2
 ; X86-NEXT:    retl
@@ -5972,7 +5972,7 @@ entry:
 define <8 x float> @test_mm256_mask3_fnmsub_ps(<8 x float> %__A, <8 x float> %__B, <8 x float> %__C, i8 zeroext %__U) {
 ; X86-LABEL: test_mm256_mask3_fnmsub_ps:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vfnmsub231ps {{.*#+}} ymm2 {%k1} = -(ymm0 * ymm1) - ymm2
 ; X86-NEXT:    vmovaps %ymm2, %ymm0
@@ -5997,7 +5997,7 @@ define <2 x double> @test_mm_mask_expandloadu_pd(<2 x double> %__W, i8 zeroext %
 ; X86-LABEL: test_mm_mask_expandloadu_pd:
 ; X86:       # %bb.0: # %entry
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; X86-NEXT:    kmovw %ecx, %k1
 ; X86-NEXT:    vexpandpd (%eax), %xmm0 {%k1}
 ; X86-NEXT:    retl
@@ -6018,7 +6018,7 @@ define <2 x double> @test_mm_maskz_expandloadu_pd(i8 zeroext %__U, ptr readonly
 ; X86-LABEL: test_mm_maskz_expandloadu_pd:
 ; X86:       # %bb.0: # %entry
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; X86-NEXT:    kmovw %ecx, %k1
 ; X86-NEXT:    vexpandpd (%eax), %xmm0 {%k1} {z}
 ; X86-NEXT:    retl
@@ -6039,7 +6039,7 @@ define <4 x double> @test_mm256_mask_expandloadu_pd(<4 x double> %__W, i8 zeroex
 ; X86-LABEL: test_mm256_mask_expandloadu_pd:
 ; X86:       # %bb.0: # %entry
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; X86-NEXT:    kmovw %ecx, %k1
 ; X86-NEXT:    vexpandpd (%eax), %ymm0 {%k1}
 ; X86-NEXT:    retl
@@ -6060,7 +6060,7 @@ define <4 x double> @test_mm256_maskz_expandloadu_pd(i8 zeroext %__U, ptr readon
 ; X86-LABEL: test_mm256_maskz_expandloadu_pd:
 ; X86:       # %bb.0: # %entry
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; X86-NEXT:    kmovw %ecx, %k1
 ; X86-NEXT:    vexpandpd (%eax), %ymm0 {%k1} {z}
 ; X86-NEXT:    retl
@@ -6081,7 +6081,7 @@ define <2 x i64> @test_mm_mask_expandloadu_epi64(<2 x i64> %__W, i8 zeroext %__U
 ; X86-LABEL: test_mm_mask_expandloadu_epi64:
 ; X86:       # %bb.0: # %entry
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; X86-NEXT:    kmovw %ecx, %k1
 ; X86-NEXT:    vpexpandq (%eax), %xmm0 {%k1}
 ; X86-NEXT:    retl
@@ -6102,7 +6102,7 @@ define <2 x i64> @test_mm_maskz_expandloadu_epi64(i8 zeroext %__U, ptr readonly
 ; X86-LABEL: test_mm_maskz_expandloadu_epi64:
 ; X86:       # %bb.0: # %entry
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; X86-NEXT:    kmovw %ecx, %k1
 ; X86-NEXT:    vpexpandq (%eax), %xmm0 {%k1} {z}
 ; X86-NEXT:    retl
@@ -6123,7 +6123,7 @@ define <4 x i64> @test_mm256_mask_expandloadu_epi64(<4 x i64> %__W, i8 zeroext %
 ; X86-LABEL: test_mm256_mask_expandloadu_epi64:
 ; X86:       # %bb.0: # %entry
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; X86-NEXT:    kmovw %ecx, %k1
 ; X86-NEXT:    vpexpandq (%eax), %ymm0 {%k1}
 ; X86-NEXT:    retl
@@ -6144,7 +6144,7 @@ define <4 x i64> @test_mm256_maskz_expandloadu_epi64(i8 zeroext %__U, ptr readon
 ; X86-LABEL: test_mm256_maskz_expandloadu_epi64:
 ; X86:       # %bb.0: # %entry
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; X86-NEXT:    kmovw %ecx, %k1
 ; X86-NEXT:    vpexpandq (%eax), %ymm0 {%k1} {z}
 ; X86-NEXT:    retl
@@ -6165,7 +6165,7 @@ define <4 x float> @test_mm_mask_expandloadu_ps(<4 x float> %__W, i8 zeroext %__
 ; X86-LABEL: test_mm_mask_expandloadu_ps:
 ; X86:       # %bb.0: # %entry
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; X86-NEXT:    kmovw %ecx, %k1
 ; X86-NEXT:    vexpandps (%eax), %xmm0 {%k1}
 ; X86-NEXT:    retl
@@ -6186,7 +6186,7 @@ define <4 x float> @test_mm_maskz_expandloadu_ps(i8 zeroext %__U, ptr readonly %
 ; X86-LABEL: test_mm_maskz_expandloadu_ps:
 ; X86:       # %bb.0: # %entry
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; X86-NEXT:    kmovw %ecx, %k1
 ; X86-NEXT:    vexpandps (%eax), %xmm0 {%k1} {z}
 ; X86-NEXT:    retl
@@ -6207,7 +6207,7 @@ define <8 x float> @test_mm256_mask_expandloadu_ps(<8 x float> %__W, i8 zeroext
 ; X86-LABEL: test_mm256_mask_expandloadu_ps:
 ; X86:       # %bb.0: # %entry
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; X86-NEXT:    kmovw %ecx, %k1
 ; X86-NEXT:    vexpandps (%eax), %ymm0 {%k1}
 ; X86-NEXT:    retl
@@ -6227,7 +6227,7 @@ define <8 x float> @test_mm256_maskz_expandloadu_ps(i8 zeroext %__U, ptr readonl
 ; X86-LABEL: test_mm256_maskz_expandloadu_ps:
 ; X86:       # %bb.0: # %entry
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; X86-NEXT:    kmovw %ecx, %k1
 ; X86-NEXT:    vexpandps (%eax), %ymm0 {%k1} {z}
 ; X86-NEXT:    retl
@@ -6247,7 +6247,7 @@ define <2 x i64> @test_mm_mask_expandloadu_epi32(<2 x i64> %__W, i8 zeroext %__U
 ; X86-LABEL: test_mm_mask_expandloadu_epi32:
 ; X86:       # %bb.0: # %entry
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; X86-NEXT:    kmovw %ecx, %k1
 ; X86-NEXT:    vpexpandd (%eax), %xmm0 {%k1}
 ; X86-NEXT:    retl
@@ -6270,7 +6270,7 @@ define <2 x i64> @test_mm_maskz_expandloadu_epi32(i8 zeroext %__U, ptr readonly
 ; X86-LABEL: test_mm_maskz_expandloadu_epi32:
 ; X86:       # %bb.0: # %entry
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; X86-NEXT:    kmovw %ecx, %k1
 ; X86-NEXT:    vpexpandd (%eax), %xmm0 {%k1} {z}
 ; X86-NEXT:    retl
@@ -6292,7 +6292,7 @@ define <4 x i64> @test_mm256_mask_expandloadu_epi32(<4 x i64> %__W, i8 zeroext %
 ; X86-LABEL: test_mm256_mask_expandloadu_epi32:
 ; X86:       # %bb.0: # %entry
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; X86-NEXT:    kmovw %ecx, %k1
 ; X86-NEXT:    vpexpandd (%eax), %ymm0 {%k1}
 ; X86-NEXT:    retl
@@ -6314,7 +6314,7 @@ define <4 x i64> @test_mm256_maskz_expandloadu_epi32(i8 zeroext %__U, ptr readon
 ; X86-LABEL: test_mm256_maskz_expandloadu_epi32:
 ; X86:       # %bb.0: # %entry
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; X86-NEXT:    kmovw %ecx, %k1
 ; X86-NEXT:    vpexpandd (%eax), %ymm0 {%k1} {z}
 ; X86-NEXT:    retl
@@ -6334,7 +6334,7 @@ entry:
 define void @test_mm_mask_compressstoreu_pd(ptr %__P, i8 zeroext %__U, <2 x double> %__A) {
 ; X86-LABEL: test_mm_mask_compressstoreu_pd:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
 ; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vcompresspd %xmm0, (%ecx) {%k1}
@@ -6355,7 +6355,7 @@ entry:
 define void @test_mm256_mask_compressstoreu_pd(ptr %__P, i8 zeroext %__U, <4 x double> %__A) {
 ; X86-LABEL: test_mm256_mask_compressstoreu_pd:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
 ; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vcompresspd %ymm0, (%ecx) {%k1}
@@ -6378,7 +6378,7 @@ entry:
 define void @test_mm_mask_compressstoreu_epi64(ptr %__P, i8 zeroext %__U, <2 x i64> %__A) {
 ; X86-LABEL: test_mm_mask_compressstoreu_epi64:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
 ; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vpcompressq %xmm0, (%ecx) {%k1}
@@ -6399,7 +6399,7 @@ entry:
 define void @test_mm256_mask_compressstoreu_epi64(ptr %__P, i8 zeroext %__U, <4 x i64> %__A) {
 ; X86-LABEL: test_mm256_mask_compressstoreu_epi64:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
 ; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vpcompressq %ymm0, (%ecx) {%k1}
@@ -6422,7 +6422,7 @@ entry:
 define void @test_mm_mask_compressstoreu_ps(ptr %__P, i8 zeroext %__U, <4 x float> %__A) {
 ; X86-LABEL: test_mm_mask_compressstoreu_ps:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
 ; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vcompressps %xmm0, (%ecx) {%k1}
@@ -6443,7 +6443,7 @@ entry:
 define void @test_mm256_mask_compressstoreu_ps(ptr %__P, i8 zeroext %__U, <8 x float> %__A) {
 ; X86-LABEL: test_mm256_mask_compressstoreu_ps:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
 ; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vcompressps %ymm0, (%ecx) {%k1}
@@ -6465,7 +6465,7 @@ entry:
 define void @test_mm_mask_compressstoreu_epi32(ptr %__P, i8 zeroext %__U, <2 x i64> %__A) {
 ; X86-LABEL: test_mm_mask_compressstoreu_epi32:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
 ; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vpcompressd %xmm0, (%ecx) {%k1}
@@ -6487,7 +6487,7 @@ entry:
 define void @test_mm256_mask_compressstoreu_epi32(ptr %__P, i8 zeroext %__U, <4 x i64> %__A) {
 ; X86-LABEL: test_mm256_mask_compressstoreu_epi32:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
 ; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vpcompressd %ymm0, (%ecx) {%k1}
@@ -6516,7 +6516,7 @@ declare <8 x float> @llvm.fma.v8f32(<8 x float>, <8 x float>, <8 x float>) #8
 define <2 x double> @test_mm_mask_sqrt_pd(<2 x double> %__W, i8 zeroext %__U, <2 x double> %__A) {
 ; X86-LABEL: test_mm_mask_sqrt_pd:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vsqrtpd %xmm1, %xmm0 {%k1}
 ; X86-NEXT:    retl
@@ -6539,7 +6539,7 @@ declare <2 x double> @llvm.sqrt.v2f64(<2 x double>)
 define <2 x double> @test_mm_maskz_sqrt_pd(i8 zeroext %__U, <2 x double> %__A) {
 ; X86-LABEL: test_mm_maskz_sqrt_pd:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vsqrtpd %xmm0, %xmm0 {%k1} {z}
 ; X86-NEXT:    retl
@@ -6560,7 +6560,7 @@ entry:
 define <4 x double> @test_mm256_mask_sqrt_pd(<4 x double> %__W, i8 zeroext %__U, <4 x double> %__A) {
 ; X86-LABEL: test_mm256_mask_sqrt_pd:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vsqrtpd %ymm1, %ymm0 {%k1}
 ; X86-NEXT:    retl
@@ -6583,7 +6583,7 @@ declare <4 x double> @llvm.sqrt.v4f64(<4 x double>)
 define <4 x double> @test_mm256_maskz_sqrt_pd(i8 zeroext %__U, <4 x double> %__A) {
 ; X86-LABEL: test_mm256_maskz_sqrt_pd:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vsqrtpd %ymm0, %ymm0 {%k1} {z}
 ; X86-NEXT:    retl
@@ -6604,7 +6604,7 @@ entry:
 define <4 x float> @test_mm_mask_sqrt_ps(<4 x float> %__W, i8 zeroext %__U, <4 x float> %__A) {
 ; X86-LABEL: test_mm_mask_sqrt_ps:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vsqrtps %xmm1, %xmm0 {%k1}
 ; X86-NEXT:    retl
@@ -6627,7 +6627,7 @@ declare <4 x float> @llvm.sqrt.v4f32(<4 x float>)
 define <4 x float> @test_mm_maskz_sqrt_ps(i8 zeroext %__U, <4 x float> %__A) {
 ; X86-LABEL: test_mm_maskz_sqrt_ps:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vsqrtps %xmm0, %xmm0 {%k1} {z}
 ; X86-NEXT:    retl
@@ -6648,7 +6648,7 @@ entry:
 define <8 x float> @test_mm256_mask_sqrt_ps(<8 x float> %__W, i8 zeroext %__U, <8 x float> %__A) {
 ; X86-LABEL: test_mm256_mask_sqrt_ps:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vsqrtps %ymm1, %ymm0 {%k1}
 ; X86-NEXT:    retl
@@ -6668,7 +6668,7 @@ entry:
 define <8 x float> @test_mm256_maskz_sqrt_ps(i8 zeroext %__U, <8 x float> %__A) {
 ; X86-LABEL: test_mm256_maskz_sqrt_ps:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vsqrtps %ymm0, %ymm0 {%k1} {z}
 ; X86-NEXT:    retl
@@ -6702,7 +6702,7 @@ entry:
 define <2 x i64> @test_mm_mask_rol_epi32(<2 x i64> %__W, i8 zeroext %__U, <2 x i64> %__A) {
 ; X86-LABEL: test_mm_mask_rol_epi32:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vprold $5, %xmm1, %xmm0 {%k1}
 ; X86-NEXT:    retl
@@ -6726,7 +6726,7 @@ entry:
 define <2 x i64> @test_mm_maskz_rol_epi32(i8 zeroext %__U, <2 x i64> %__A) {
 ; X86-LABEL: test_mm_maskz_rol_epi32:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vprold $5, %xmm0, %xmm0 {%k1} {z}
 ; X86-NEXT:    retl
@@ -6761,7 +6761,7 @@ entry:
 define <4 x i64> @test_mm256_mask_rol_epi32(<4 x i64> %__W, i8 zeroext %__U, <4 x i64> %__A) {
 ; X86-LABEL: test_mm256_mask_rol_epi32:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vprold $5, %ymm1, %ymm0 {%k1}
 ; X86-NEXT:    retl
@@ -6784,7 +6784,7 @@ entry:
 define <4 x i64> @test_mm256_maskz_rol_epi32(i8 zeroext %__U, <4 x i64> %__A) {
 ; X86-LABEL: test_mm256_maskz_rol_epi32:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vprold $5, %ymm0, %ymm0 {%k1} {z}
 ; X86-NEXT:    retl
@@ -6816,7 +6816,7 @@ entry:
 define <2 x i64> @test_mm_mask_rol_epi64(<2 x i64> %__W, i8 zeroext %__U, <2 x i64> %__A) {
 ; X86-LABEL: test_mm_mask_rol_epi64:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vprolq $5, %xmm1, %xmm0 {%k1}
 ; X86-NEXT:    retl
@@ -6837,7 +6837,7 @@ entry:
 define <2 x i64> @test_mm_maskz_rol_epi64(i8 zeroext %__U, <2 x i64> %__A) {
 ; X86-LABEL: test_mm_maskz_rol_epi64:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vprolq $5, %xmm0, %xmm0 {%k1} {z}
 ; X86-NEXT:    retl
@@ -6868,7 +6868,7 @@ entry:
 define <4 x i64> @test_mm256_mask_rol_epi64(<4 x i64> %__W, i8 zeroext %__U, <4 x i64> %__A) {
 ; X86-LABEL: test_mm256_mask_rol_epi64:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vprolq $5, %ymm1, %ymm0 {%k1}
 ; X86-NEXT:    retl
@@ -6889,7 +6889,7 @@ entry:
 define <4 x i64> @test_mm256_maskz_rol_epi64(i8 zeroext %__U, <4 x i64> %__A) {
 ; X86-LABEL: test_mm256_maskz_rol_epi64:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vprolq $5, %ymm0, %ymm0 {%k1} {z}
 ; X86-NEXT:    retl
@@ -6923,7 +6923,7 @@ entry:
 define <2 x i64> @test_mm_mask_rolv_epi32(<2 x i64> %__W, i8 zeroext %__U, <2 x i64> %__A, <2 x i64> %__B) {
 ; X86-LABEL: test_mm_mask_rolv_epi32:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vprolvd %xmm2, %xmm1, %xmm0 {%k1}
 ; X86-NEXT:    retl
@@ -6948,7 +6948,7 @@ entry:
 define <2 x i64> @test_mm_maskz_rolv_epi32(i8 zeroext %__U, <2 x i64> %__A, <2 x i64> %__B) {
 ; X86-LABEL: test_mm_maskz_rolv_epi32:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vprolvd %xmm1, %xmm0, %xmm0 {%k1} {z}
 ; X86-NEXT:    retl
@@ -6985,7 +6985,7 @@ entry:
 define <4 x i64> @test_mm256_mask_rolv_epi32(<4 x i64> %__W, i8 zeroext %__U, <4 x i64> %__A, <4 x i64> %__B) {
 ; X86-LABEL: test_mm256_mask_rolv_epi32:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vprolvd %ymm2, %ymm1, %ymm0 {%k1}
 ; X86-NEXT:    retl
@@ -7009,7 +7009,7 @@ entry:
 define <4 x i64> @test_mm256_maskz_rolv_epi32(i8 zeroext %__U, <4 x i64> %__A, <4 x i64> %__B) {
 ; X86-LABEL: test_mm256_maskz_rolv_epi32:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vprolvd %ymm1, %ymm0, %ymm0 {%k1} {z}
 ; X86-NEXT:    retl
@@ -7042,7 +7042,7 @@ entry:
 define <2 x i64> @test_mm_mask_rolv_epi64(<2 x i64> %__W, i8 zeroext %__U, <2 x i64> %__A, <2 x i64> %__B) {
 ; X86-LABEL: test_mm_mask_rolv_epi64:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vprolvq %xmm2, %xmm1, %xmm0 {%k1}
 ; X86-NEXT:    retl
@@ -7063,7 +7063,7 @@ entry:
 define <2 x i64> @test_mm_maskz_rolv_epi64(i8 zeroext %__U, <2 x i64> %__A, <2 x i64> %__B) {
 ; X86-LABEL: test_mm_maskz_rolv_epi64:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vprolvq %xmm1, %xmm0, %xmm0 {%k1} {z}
 ; X86-NEXT:    retl
@@ -7094,7 +7094,7 @@ entry:
 define <4 x i64> @test_mm256_mask_rolv_epi64(<4 x i64> %__W, i8 zeroext %__U, <4 x i64> %__A, <4 x i64> %__B) {
 ; X86-LABEL: test_mm256_mask_rolv_epi64:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vprolvq %ymm2, %ymm1, %ymm0 {%k1}
 ; X86-NEXT:    retl
@@ -7115,7 +7115,7 @@ entry:
 define <4 x i64> @test_mm256_maskz_rolv_epi64(i8 zeroext %__U, <4 x i64> %__A, <4 x i64> %__B) {
 ; X86-LABEL: test_mm256_maskz_rolv_epi64:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vprolvq %ymm1, %ymm0, %ymm0 {%k1} {z}
 ; X86-NEXT:    retl
@@ -7148,7 +7148,7 @@ entry:
 define <2 x i64> @test_mm_mask_ror_epi32(<2 x i64> %__W, i8 zeroext %__U, <2 x i64> %__A) {
 ; X86-LABEL: test_mm_mask_ror_epi32:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vprord $5, %xmm1, %xmm0 {%k1}
 ; X86-NEXT:    retl
@@ -7172,7 +7172,7 @@ entry:
 define <2 x i64> @test_mm_maskz_ror_epi32(i8 zeroext %__U, <2 x i64> %__A) {
 ; X86-LABEL: test_mm_maskz_ror_epi32:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vprord $5, %xmm0, %xmm0 {%k1} {z}
 ; X86-NEXT:    retl
@@ -7207,7 +7207,7 @@ entry:
 define <4 x i64> @test_mm256_mask_ror_epi32(<4 x i64> %__W, i8 zeroext %__U, <4 x i64> %__A) {
 ; X86-LABEL: test_mm256_mask_ror_epi32:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vprord $5, %ymm1, %ymm0 {%k1}
 ; X86-NEXT:    retl
@@ -7230,7 +7230,7 @@ entry:
 define <4 x i64> @test_mm256_maskz_ror_epi32(i8 zeroext %__U, <4 x i64> %__A) {
 ; X86-LABEL: test_mm256_maskz_ror_epi32:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vprord $5, %ymm0, %ymm0 {%k1} {z}
 ; X86-NEXT:    retl
@@ -7262,7 +7262,7 @@ entry:
 define <2 x i64> @test_mm_mask_ror_epi64(<2 x i64> %__W, i8 zeroext %__U, <2 x i64> %__A) {
 ; X86-LABEL: test_mm_mask_ror_epi64:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vprorq $5, %xmm1, %xmm0 {%k1}
 ; X86-NEXT:    retl
@@ -7283,7 +7283,7 @@ entry:
 define <2 x i64> @test_mm_maskz_ror_epi64(i8 zeroext %__U, <2 x i64> %__A) {
 ; X86-LABEL: test_mm_maskz_ror_epi64:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vprorq $5, %xmm0, %xmm0 {%k1} {z}
 ; X86-NEXT:    retl
@@ -7314,7 +7314,7 @@ entry:
 define <4 x i64> @test_mm256_mask_ror_epi64(<4 x i64> %__W, i8 zeroext %__U, <4 x i64> %__A) {
 ; X86-LABEL: test_mm256_mask_ror_epi64:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vprorq $5, %ymm1, %ymm0 {%k1}
 ; X86-NEXT:    retl
@@ -7335,7 +7335,7 @@ entry:
 define <4 x i64> @test_mm256_maskz_ror_epi64(i8 zeroext %__U, <4 x i64> %__A) {
 ; X86-LABEL: test_mm256_maskz_ror_epi64:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vprorq $5, %ymm0, %ymm0 {%k1} {z}
 ; X86-NEXT:    retl
@@ -7369,7 +7369,7 @@ entry:
 define <2 x i64> @test_mm_mask_rorv_epi32(<2 x i64> %__W, i8 zeroext %__U, <2 x i64> %__A, <2 x i64> %__B) {
 ; X86-LABEL: test_mm_mask_rorv_epi32:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vprorvd %xmm2, %xmm1, %xmm0 {%k1}
 ; X86-NEXT:    retl
@@ -7394,7 +7394,7 @@ entry:
 define <2 x i64> @test_mm_maskz_rorv_epi32(i8 zeroext %__U, <2 x i64> %__A, <2 x i64> %__B) {
 ; X86-LABEL: test_mm_maskz_rorv_epi32:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vprorvd %xmm1, %xmm0, %xmm0 {%k1} {z}
 ; X86-NEXT:    retl
@@ -7431,7 +7431,7 @@ entry:
 define <4 x i64> @test_mm256_mask_rorv_epi32(<4 x i64> %__W, i8 zeroext %__U, <4 x i64> %__A, <4 x i64> %__B) {
 ; X86-LABEL: test_mm256_mask_rorv_epi32:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vprorvd %ymm2, %ymm1, %ymm0 {%k1}
 ; X86-NEXT:    retl
@@ -7455,7 +7455,7 @@ entry:
 define <4 x i64> @test_mm256_maskz_rorv_epi32(i8 zeroext %__U, <4 x i64> %__A, <4 x i64> %__B) {
 ; X86-LABEL: test_mm256_maskz_rorv_epi32:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vprorvd %ymm1, %ymm0, %ymm0 {%k1} {z}
 ; X86-NEXT:    retl
@@ -7488,7 +7488,7 @@ entry:
 define <2 x i64> @test_mm_mask_rorv_epi64(<2 x i64> %__W, i8 zeroext %__U, <2 x i64> %__A, <2 x i64> %__B) {
 ; X86-LABEL: test_mm_mask_rorv_epi64:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vprorvq %xmm2, %xmm1, %xmm0 {%k1}
 ; X86-NEXT:    retl
@@ -7509,7 +7509,7 @@ entry:
 define <2 x i64> @test_mm_maskz_rorv_epi64(i8 zeroext %__U, <2 x i64> %__A, <2 x i64> %__B) {
 ; X86-LABEL: test_mm_maskz_rorv_epi64:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vprorvq %xmm1, %xmm0, %xmm0 {%k1} {z}
 ; X86-NEXT:    retl
@@ -7540,7 +7540,7 @@ entry:
 define <4 x i64> @test_mm256_mask_rorv_epi64(<4 x i64> %__W, i8 zeroext %__U, <4 x i64> %__A, <4 x i64> %__B) {
 ; X86-LABEL: test_mm256_mask_rorv_epi64:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vprorvq %ymm2, %ymm1, %ymm0 {%k1}
 ; X86-NEXT:    retl
@@ -7561,7 +7561,7 @@ entry:
 define <4 x i64> @test_mm256_maskz_rorv_epi64(i8 zeroext %__U, <4 x i64> %__A, <4 x i64> %__B) {
 ; X86-LABEL: test_mm256_maskz_rorv_epi64:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vprorvq %ymm1, %ymm0, %ymm0 {%k1} {z}
 ; X86-NEXT:    retl

diff  --git a/llvm/test/CodeGen/X86/avx512vl-intrinsics-upgrade.ll b/llvm/test/CodeGen/X86/avx512vl-intrinsics-upgrade.ll
index e7586e4f63158..3707c4861fe98 100644
--- a/llvm/test/CodeGen/X86/avx512vl-intrinsics-upgrade.ll
+++ b/llvm/test/CodeGen/X86/avx512vl-intrinsics-upgrade.ll
@@ -10753,7 +10753,7 @@ define i8 at test_int_x86_avx512_ptestm_d_256(<8 x i32> %x0, <8 x i32> %x1, i8 %x2)
 ; X86:       # %bb.0:
 ; X86-NEXT:    vptestmd %ymm1, %ymm0, %k0 # encoding: [0x62,0xf2,0x7d,0x28,0x27,0xc1]
 ; X86-NEXT:    kmovw %k0, %ecx # encoding: [0xc5,0xf8,0x93,0xc8]
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al # encoding: [0x8a,0x44,0x24,0x04]
 ; X86-NEXT:    andb %cl, %al # encoding: [0x20,0xc8]
 ; X86-NEXT:    addb %cl, %al # encoding: [0x00,0xc8]
 ; X86-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
@@ -10876,7 +10876,7 @@ define i8 at test_int_x86_avx512_ptestnm_d_256(<8 x i32> %x0, <8 x i32> %x1, i8 %x2
 ; X86:       # %bb.0:
 ; X86-NEXT:    vptestnmd %ymm1, %ymm0, %k0 # encoding: [0x62,0xf2,0x7e,0x28,0x27,0xc1]
 ; X86-NEXT:    kmovw %k0, %ecx # encoding: [0xc5,0xf8,0x93,0xc8]
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al # encoding: [0x8a,0x44,0x24,0x04]
 ; X86-NEXT:    andb %cl, %al # encoding: [0x20,0xc8]
 ; X86-NEXT:    addb %cl, %al # encoding: [0x00,0xc8]
 ; X86-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]

diff  --git a/llvm/test/CodeGen/X86/bitcast-vector-bool.ll b/llvm/test/CodeGen/X86/bitcast-vector-bool.ll
index 8ce36dd3c8ef5..a47dbe570fd52 100644
--- a/llvm/test/CodeGen/X86/bitcast-vector-bool.ll
+++ b/llvm/test/CodeGen/X86/bitcast-vector-bool.ll
@@ -119,7 +119,7 @@ define i8 @bitcast_v16i8_to_v2i8(<16 x i8> %a0) nounwind {
 ; SSE2-SSSE3-NEXT:    pmovmskb %xmm0, %eax
 ; SSE2-SSSE3-NEXT:    movd %eax, %xmm0
 ; SSE2-SSSE3-NEXT:    movdqa %xmm0, -{{[0-9]+}}(%rsp)
-; SSE2-SSSE3-NEXT:    movzbl -{{[0-9]+}}(%rsp), %eax
+; SSE2-SSSE3-NEXT:    movb -{{[0-9]+}}(%rsp), %al
 ; SSE2-SSSE3-NEXT:    addb -{{[0-9]+}}(%rsp), %al
 ; SSE2-SSSE3-NEXT:    retq
 ;
@@ -211,7 +211,7 @@ define i8 @bitcast_v16i16_to_v2i8(<16 x i16> %a0) nounwind {
 ; SSE2-SSSE3-NEXT:    pmovmskb %xmm0, %eax
 ; SSE2-SSSE3-NEXT:    movd %eax, %xmm0
 ; SSE2-SSSE3-NEXT:    movdqa %xmm0, -{{[0-9]+}}(%rsp)
-; SSE2-SSSE3-NEXT:    movzbl -{{[0-9]+}}(%rsp), %eax
+; SSE2-SSSE3-NEXT:    movb -{{[0-9]+}}(%rsp), %al
 ; SSE2-SSSE3-NEXT:    addb -{{[0-9]+}}(%rsp), %al
 ; SSE2-SSSE3-NEXT:    retq
 ;
@@ -384,7 +384,7 @@ define i8 @bitcast_v16i32_to_v2i8(<16 x i32> %a0) nounwind {
 ; SSE2-SSSE3-NEXT:    pmovmskb %xmm0, %eax
 ; SSE2-SSSE3-NEXT:    movd %eax, %xmm0
 ; SSE2-SSSE3-NEXT:    movdqa %xmm0, -{{[0-9]+}}(%rsp)
-; SSE2-SSSE3-NEXT:    movzbl -{{[0-9]+}}(%rsp), %eax
+; SSE2-SSSE3-NEXT:    movb -{{[0-9]+}}(%rsp), %al
 ; SSE2-SSSE3-NEXT:    addb -{{[0-9]+}}(%rsp), %al
 ; SSE2-SSSE3-NEXT:    retq
 ;

diff  --git a/llvm/test/CodeGen/X86/bitreverse.ll b/llvm/test/CodeGen/X86/bitreverse.ll
index 04aff9a727c22..67b185daef7eb 100644
--- a/llvm/test/CodeGen/X86/bitreverse.ll
+++ b/llvm/test/CodeGen/X86/bitreverse.ll
@@ -348,7 +348,7 @@ declare i8 @llvm.bitreverse.i8(i8) readnone
 define i8 @test_bitreverse_i8(i8 %a) {
 ; X86-LABEL: test_bitreverse_i8:
 ; X86:       # %bb.0:
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    rolb $4, %al
 ; X86-NEXT:    movl %eax, %ecx
 ; X86-NEXT:    andb $51, %cl
@@ -397,7 +397,7 @@ declare i4 @llvm.bitreverse.i4(i4) readnone
 define i4 @test_bitreverse_i4(i4 %a) {
 ; X86-LABEL: test_bitreverse_i4:
 ; X86:       # %bb.0:
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; X86-NEXT:    movl %ecx, %eax
 ; X86-NEXT:    andb $15, %al
 ; X86-NEXT:    movl %ecx, %edx
@@ -528,7 +528,7 @@ define i4 @fold_i4() {
 define i8 @identity_i8(i8 %a) {
 ; X86-LABEL: identity_i8:
 ; X86:       # %bb.0:
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    retl
 ;
 ; X64-LABEL: identity_i8:
@@ -539,7 +539,7 @@ define i8 @identity_i8(i8 %a) {
 ;
 ; X86XOP-LABEL: identity_i8:
 ; X86XOP:       # %bb.0:
-; X86XOP-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86XOP-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86XOP-NEXT:    retl
   %b = call i8 @llvm.bitreverse.i8(i8 %a)
   %c = call i8 @llvm.bitreverse.i8(i8 %b)

diff  --git a/llvm/test/CodeGen/X86/bmi.ll b/llvm/test/CodeGen/X86/bmi.ll
index 15c8ad471ea7f..17be1300be386 100644
--- a/llvm/test/CodeGen/X86/bmi.ll
+++ b/llvm/test/CodeGen/X86/bmi.ll
@@ -291,7 +291,7 @@ define i1 @andn_cmp_swap_ops(i64 %x, i64 %y) {
 define i1 @andn_cmp_i8(i8 %x, i8 %y) {
 ; X86-LABEL: andn_cmp_i8:
 ; X86:       # %bb.0:
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    notb %al
 ; X86-NEXT:    testb %al, {{[0-9]+}}(%esp)
 ; X86-NEXT:    sete %al

diff  --git a/llvm/test/CodeGen/X86/bool-math.ll b/llvm/test/CodeGen/X86/bool-math.ll
index e5919bc1cec42..c0a7a5bd4fbd5 100644
--- a/llvm/test/CodeGen/X86/bool-math.ll
+++ b/llvm/test/CodeGen/X86/bool-math.ll
@@ -55,7 +55,7 @@ define i8 @sub_zext_cmp_mask_narrower_result(i32 %x) {
 ;
 ; X32-LABEL: sub_zext_cmp_mask_narrower_result:
 ; X32:       # %bb.0:
-; X32-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X32-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X32-NEXT:    andb $1, %al
 ; X32-NEXT:    orb $46, %al
 ; X32-NEXT:    retl
@@ -77,7 +77,7 @@ define i8 @add_zext_cmp_mask_same_size_result(i8 %x) {
 ;
 ; X32-LABEL: add_zext_cmp_mask_same_size_result:
 ; X32:       # %bb.0:
-; X32-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X32-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X32-NEXT:    andb $1, %al
 ; X32-NEXT:    xorb $27, %al
 ; X32-NEXT:    retl
@@ -120,7 +120,7 @@ define i8 @add_zext_cmp_mask_narrower_result(i32 %x) {
 ;
 ; X32-LABEL: add_zext_cmp_mask_narrower_result:
 ; X32:       # %bb.0:
-; X32-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X32-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X32-NEXT:    andb $1, %al
 ; X32-NEXT:    xorb $43, %al
 ; X32-NEXT:    retl
@@ -205,7 +205,7 @@ define i8 @low_bit_select_constants_bigger_true_same_size_result(i8 %x) {
 ;
 ; X32-LABEL: low_bit_select_constants_bigger_true_same_size_result:
 ; X32:       # %bb.0:
-; X32-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X32-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X32-NEXT:    andb $1, %al
 ; X32-NEXT:    xorb $-29, %al
 ; X32-NEXT:    retl
@@ -246,7 +246,7 @@ define i8 @low_bit_select_constants_bigger_true_narrower_result(i16 %x) {
 ;
 ; X32-LABEL: low_bit_select_constants_bigger_true_narrower_result:
 ; X32:       # %bb.0:
-; X32-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X32-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X32-NEXT:    andb $1, %al
 ; X32-NEXT:    xorb $41, %al
 ; X32-NEXT:    retl

diff  --git a/llvm/test/CodeGen/X86/bool-vector.ll b/llvm/test/CodeGen/X86/bool-vector.ll
index 2cc7fa6ba864f..abac07032d83d 100644
--- a/llvm/test/CodeGen/X86/bool-vector.ll
+++ b/llvm/test/CodeGen/X86/bool-vector.ll
@@ -9,9 +9,9 @@
 define i32 @PR15215_bad(<4 x i32> %input) {
 ; X86-LABEL: PR15215_bad:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %edx
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %dl
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; X86-NEXT:    movb {{[0-9]+}}(%esp), %ah
 ; X86-NEXT:    addb %ah, %ah
 ; X86-NEXT:    andb $1, %cl

diff  --git a/llvm/test/CodeGen/X86/brcond.ll b/llvm/test/CodeGen/X86/brcond.ll
index c933b69730806..c2a580a37c8db 100644
--- a/llvm/test/CodeGen/X86/brcond.ll
+++ b/llvm/test/CodeGen/X86/brcond.ll
@@ -6,7 +6,7 @@
 define i32 @test1(i32 %a, i32 %b) nounwind ssp {
 ; CHECK-LABEL: test1:
 ; CHECK:       ## %bb.0: ## %entry
-; CHECK-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; CHECK-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; CHECK-NEXT:    xorb {{[0-9]+}}(%esp), %al
 ; CHECK-NEXT:    testb $64, %al
 ; CHECK-NEXT:    je LBB0_1

diff  --git a/llvm/test/CodeGen/X86/bt.ll b/llvm/test/CodeGen/X86/bt.ll
index 3a792abda1230..dc9d2bcce79e1 100644
--- a/llvm/test/CodeGen/X86/bt.ll
+++ b/llvm/test/CodeGen/X86/bt.ll
@@ -1148,7 +1148,7 @@ define void @demanded_i32(ptr nocapture readonly, ptr nocapture, i32) nounwind {
 define zeroext i1 @demanded_with_known_zeroes(i32 %bit, i32 %bits) {
 ; X86-LABEL: demanded_with_known_zeroes:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    shlb $2, %al
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
 ; X86-NEXT:    movzbl %al, %eax

diff  --git a/llvm/test/CodeGen/X86/btc_bts_btr.ll b/llvm/test/CodeGen/X86/btc_bts_btr.ll
index efd9d1105d975..b77b4ed274e87 100644
--- a/llvm/test/CodeGen/X86/btc_bts_btr.ll
+++ b/llvm/test/CodeGen/X86/btc_bts_btr.ll
@@ -17,7 +17,7 @@ define i16 @btr_16(i16 %x, i16 %n) {
 ; X86-LABEL: btr_16:
 ; X86:       # %bb.0:
 ; X86-NEXT:    movzwl {{[0-9]+}}(%esp), %eax
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; X86-NEXT:    btrw %cx, %ax
 ; X86-NEXT:    retl
   %1 = shl i16 1, %n
@@ -36,7 +36,7 @@ define i16 @bts_16(i16 %x, i16 %n) {
 ;
 ; X86-LABEL: bts_16:
 ; X86:       # %bb.0:
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; X86-NEXT:    movl $1, %eax
 ; X86-NEXT:    shll %cl, %eax
 ; X86-NEXT:    orw {{[0-9]+}}(%esp), %ax
@@ -57,7 +57,7 @@ define i16 @btc_16(i16 %x, i16 %n) {
 ;
 ; X86-LABEL: btc_16:
 ; X86:       # %bb.0:
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; X86-NEXT:    movl $1, %eax
 ; X86-NEXT:    shll %cl, %eax
 ; X86-NEXT:    xorw {{[0-9]+}}(%esp), %ax
@@ -78,7 +78,7 @@ define i32 @btr_32(i32 %x, i32 %n) {
 ; X86-LABEL: btr_32:
 ; X86:       # %bb.0:
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; X86-NEXT:    btrl %ecx, %eax
 ; X86-NEXT:    retl
   %1 = shl i32 1, %n
@@ -97,7 +97,7 @@ define i32 @bts_32(i32 %x, i32 %n) {
 ; X86-LABEL: bts_32:
 ; X86:       # %bb.0:
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; X86-NEXT:    btsl %ecx, %eax
 ; X86-NEXT:    retl
   %1 = shl i32 1, %n
@@ -115,7 +115,7 @@ define i32 @btc_32(i32 %x, i32 %n) {
 ; X86-LABEL: btc_32:
 ; X86:       # %bb.0:
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; X86-NEXT:    btcl %ecx, %eax
 ; X86-NEXT:    retl
   %1 = shl i32 1, %n
@@ -132,7 +132,7 @@ define i64 @btr_64(i64 %x, i64 %n) {
 ;
 ; X86-LABEL: btr_64:
 ; X86:       # %bb.0:
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; X86-NEXT:    movl $1, %eax
 ; X86-NEXT:    xorl %edx, %edx
 ; X86-NEXT:    shldl %cl, %eax, %edx
@@ -163,7 +163,7 @@ define i64 @bts_64(i64 %x, i64 %n) {
 ;
 ; X86-LABEL: bts_64:
 ; X86:       # %bb.0:
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; X86-NEXT:    movl $1, %eax
 ; X86-NEXT:    xorl %edx, %edx
 ; X86-NEXT:    shldl %cl, %eax, %edx
@@ -191,7 +191,7 @@ define i64 @btc_64(i64 %x, i64 %n) {
 ;
 ; X86-LABEL: btc_64:
 ; X86:       # %bb.0:
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; X86-NEXT:    movl $1, %eax
 ; X86-NEXT:    xorl %edx, %edx
 ; X86-NEXT:    shldl %cl, %eax, %edx
@@ -224,7 +224,7 @@ define i16 @btr_16_mask(i16 %x, i16 %n) {
 ; X86-LABEL: btr_16_mask:
 ; X86:       # %bb.0:
 ; X86-NEXT:    movzwl {{[0-9]+}}(%esp), %eax
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; X86-NEXT:    btrw %cx, %ax
 ; X86-NEXT:    retl
   %1 = and i16 %n, 15
@@ -245,7 +245,7 @@ define i16 @bts_16_mask(i16 %x, i16 %n) {
 ;
 ; X86-LABEL: bts_16_mask:
 ; X86:       # %bb.0:
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; X86-NEXT:    andb $15, %cl
 ; X86-NEXT:    movl $1, %eax
 ; X86-NEXT:    shll %cl, %eax
@@ -269,7 +269,7 @@ define i16 @btc_16_mask(i16 %x, i16 %n) {
 ;
 ; X86-LABEL: btc_16_mask:
 ; X86:       # %bb.0:
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; X86-NEXT:    andb $15, %cl
 ; X86-NEXT:    movl $1, %eax
 ; X86-NEXT:    shll %cl, %eax
@@ -292,7 +292,7 @@ define i32 @btr_32_mask(i32 %x, i32 %n) {
 ; X86-LABEL: btr_32_mask:
 ; X86:       # %bb.0:
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; X86-NEXT:    btrl %ecx, %eax
 ; X86-NEXT:    retl
   %1 = and i32 %n, 31
@@ -312,7 +312,7 @@ define i32 @bts_32_mask(i32 %x, i32 %n) {
 ; X86-LABEL: bts_32_mask:
 ; X86:       # %bb.0:
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; X86-NEXT:    btsl %ecx, %eax
 ; X86-NEXT:    retl
   %1 = and i32 %n, 31
@@ -331,7 +331,7 @@ define i32 @btc_32_mask(i32 %x, i32 %n) {
 ; X86-LABEL: btc_32_mask:
 ; X86:       # %bb.0:
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; X86-NEXT:    btcl %ecx, %eax
 ; X86-NEXT:    retl
   %1 = and i32 %n, 31
@@ -349,7 +349,7 @@ define i64 @btr_64_mask(i64 %x, i64 %n) {
 ;
 ; X86-LABEL: btr_64_mask:
 ; X86:       # %bb.0:
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; X86-NEXT:    movl $1, %eax
 ; X86-NEXT:    xorl %edx, %edx
 ; X86-NEXT:    shldl %cl, %eax, %edx
@@ -381,7 +381,7 @@ define i64 @bts_64_mask(i64 %x, i64 %n) {
 ;
 ; X86-LABEL: bts_64_mask:
 ; X86:       # %bb.0:
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; X86-NEXT:    movl $1, %eax
 ; X86-NEXT:    xorl %edx, %edx
 ; X86-NEXT:    shldl %cl, %eax, %edx
@@ -410,7 +410,7 @@ define i64 @btc_64_mask(i64 %x, i64 %n) {
 ;
 ; X86-LABEL: btc_64_mask:
 ; X86:       # %bb.0:
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; X86-NEXT:    movl $1, %eax
 ; X86-NEXT:    xorl %edx, %edx
 ; X86-NEXT:    shldl %cl, %eax, %edx
@@ -441,7 +441,7 @@ define i16 @btr_16_load(ptr %x, i16 %n) {
 ;
 ; X86-LABEL: btr_16_load:
 ; X86:       # %bb.0:
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; X86-NEXT:    movzwl (%eax), %eax
 ; X86-NEXT:    btrw %cx, %ax
@@ -467,7 +467,7 @@ define i16 @bts_16_load(ptr %x, i16 %n) {
 ; X86-LABEL: bts_16_load:
 ; X86:       # %bb.0:
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; X86-NEXT:    movl $1, %eax
 ; X86-NEXT:    shll %cl, %eax
 ; X86-NEXT:    orw (%edx), %ax
@@ -493,7 +493,7 @@ define i16 @btc_16_load(ptr %x, i16 %n) {
 ; X86-LABEL: btc_16_load:
 ; X86:       # %bb.0:
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; X86-NEXT:    movl $1, %eax
 ; X86-NEXT:    shll %cl, %eax
 ; X86-NEXT:    xorw (%edx), %ax
@@ -514,7 +514,7 @@ define i32 @btr_32_load(ptr %x, i32 %n) {
 ;
 ; X86-LABEL: btr_32_load:
 ; X86:       # %bb.0:
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; X86-NEXT:    movl (%eax), %eax
 ; X86-NEXT:    btrl %ecx, %eax
@@ -535,7 +535,7 @@ define i32 @bts_32_load(ptr %x, i32 %n) {
 ;
 ; X86-LABEL: bts_32_load:
 ; X86:       # %bb.0:
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; X86-NEXT:    movl (%eax), %eax
 ; X86-NEXT:    btsl %ecx, %eax
@@ -555,7 +555,7 @@ define i32 @btc_32_load(ptr %x, i32 %n) {
 ;
 ; X86-LABEL: btc_32_load:
 ; X86:       # %bb.0:
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; X86-NEXT:    movl (%eax), %eax
 ; X86-NEXT:    btcl %ecx, %eax
@@ -579,7 +579,7 @@ define i64 @btr_64_load(ptr %x, i64 %n) {
 ; X86-NEXT:    .cfi_def_cfa_offset 8
 ; X86-NEXT:    .cfi_offset %esi, -8
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %esi
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; X86-NEXT:    movl $1, %eax
 ; X86-NEXT:    xorl %edx, %edx
 ; X86-NEXT:    shldl %cl, %eax, %edx
@@ -617,7 +617,7 @@ define i64 @bts_64_load(ptr %x, i64 %n) {
 ; X86-NEXT:    .cfi_def_cfa_offset 8
 ; X86-NEXT:    .cfi_offset %esi, -8
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %esi
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; X86-NEXT:    movl $1, %eax
 ; X86-NEXT:    xorl %edx, %edx
 ; X86-NEXT:    shldl %cl, %eax, %edx
@@ -652,7 +652,7 @@ define i64 @btc_64_load(ptr %x, i64 %n) {
 ; X86-NEXT:    .cfi_def_cfa_offset 8
 ; X86-NEXT:    .cfi_offset %esi, -8
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %esi
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; X86-NEXT:    movl $1, %eax
 ; X86-NEXT:    xorl %edx, %edx
 ; X86-NEXT:    shldl %cl, %eax, %edx
@@ -691,7 +691,7 @@ define void @btr_16_dont_fold(ptr %x, i16 %n) {
 ; X86-LABEL: btr_16_dont_fold:
 ; X86:       # %bb.0:
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; X86-NEXT:    movw $-2, %dx
 ; X86-NEXT:    rolw %cl, %dx
 ; X86-NEXT:    andw %dx, (%eax)
@@ -717,7 +717,7 @@ define void @bts_16_dont_fold(ptr %x, i16 %n) {
 ; X86-LABEL: bts_16_dont_fold:
 ; X86:       # %bb.0:
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; X86-NEXT:    movl $1, %edx
 ; X86-NEXT:    shll %cl, %edx
 ; X86-NEXT:    orw %dx, (%eax)
@@ -742,7 +742,7 @@ define void @btc_16_dont_fold(ptr %x, i16 %n) {
 ; X86-LABEL: btc_16_dont_fold:
 ; X86:       # %bb.0:
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; X86-NEXT:    movl $1, %edx
 ; X86-NEXT:    shll %cl, %edx
 ; X86-NEXT:    xorw %dx, (%eax)
@@ -767,7 +767,7 @@ define void @btr_32_dont_fold(ptr %x, i32 %n) {
 ; X86-LABEL: btr_32_dont_fold:
 ; X86:       # %bb.0:
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; X86-NEXT:    movl $-2, %edx
 ; X86-NEXT:    roll %cl, %edx
 ; X86-NEXT:    andl %edx, (%eax)
@@ -793,7 +793,7 @@ define void @bts_32_dont_fold(ptr %x, i32 %n) {
 ; X86-LABEL: bts_32_dont_fold:
 ; X86:       # %bb.0:
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; X86-NEXT:    movl $1, %edx
 ; X86-NEXT:    shll %cl, %edx
 ; X86-NEXT:    orl %edx, (%eax)
@@ -818,7 +818,7 @@ define void @btc_32_dont_fold(ptr %x, i32 %n) {
 ; X86-LABEL: btc_32_dont_fold:
 ; X86:       # %bb.0:
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; X86-NEXT:    movl $1, %edx
 ; X86-NEXT:    shll %cl, %edx
 ; X86-NEXT:    xorl %edx, (%eax)
@@ -846,7 +846,7 @@ define void @btr_64_dont_fold(ptr %x, i64 %n) {
 ; X86-NEXT:    .cfi_def_cfa_offset 8
 ; X86-NEXT:    .cfi_offset %esi, -8
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; X86-NEXT:    movl $1, %edx
 ; X86-NEXT:    xorl %esi, %esi
 ; X86-NEXT:    shldl %cl, %edx, %esi
@@ -888,7 +888,7 @@ define void @bts_64_dont_fold(ptr %x, i64 %n) {
 ; X86-NEXT:    .cfi_def_cfa_offset 8
 ; X86-NEXT:    .cfi_offset %esi, -8
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; X86-NEXT:    movl $1, %edx
 ; X86-NEXT:    xorl %esi, %esi
 ; X86-NEXT:    shldl %cl, %edx, %esi
@@ -927,7 +927,7 @@ define void @btc_64_dont_fold(ptr %x, i64 %n) {
 ; X86-NEXT:    .cfi_def_cfa_offset 8
 ; X86-NEXT:    .cfi_offset %esi, -8
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; X86-NEXT:    movl $1, %edx
 ; X86-NEXT:    xorl %esi, %esi
 ; X86-NEXT:    shldl %cl, %edx, %esi
@@ -960,7 +960,7 @@ define i32 @btr_32_mask_zeros(i32 %x, i32 %n) {
 ;
 ; X86-LABEL: btr_32_mask_zeros:
 ; X86:       # %bb.0:
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; X86-NEXT:    shlb $2, %cl
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; X86-NEXT:    btrl %ecx, %eax
@@ -983,7 +983,7 @@ define i32 @bts_32_mask_zeros(i32 %x, i32 %n) {
 ;
 ; X86-LABEL: bts_32_mask_zeros:
 ; X86:       # %bb.0:
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; X86-NEXT:    shlb $2, %cl
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; X86-NEXT:    btsl %ecx, %eax
@@ -1005,7 +1005,7 @@ define i32 @btc_32_mask_zeros(i32 %x, i32 %n) {
 ;
 ; X86-LABEL: btc_32_mask_zeros:
 ; X86:       # %bb.0:
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; X86-NEXT:    shlb $2, %cl
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; X86-NEXT:    btcl %ecx, %eax

diff  --git a/llvm/test/CodeGen/X86/byval5.ll b/llvm/test/CodeGen/X86/byval5.ll
index 28deafcd982f5..2b929c596246b 100644
--- a/llvm/test/CodeGen/X86/byval5.ll
+++ b/llvm/test/CodeGen/X86/byval5.ll
@@ -38,14 +38,14 @@ define void @g(i8 signext  %a1, i8 signext  %a2, i8 signext  %a3, i8 signext  %a
 ; X64-NEXT:    movq %rsp, %rdi
 ; X64-NEXT:    movq %rbx, %rsi
 ; X64-NEXT:    rep;movsq (%rsi), %es:(%rdi)
-; X64-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax
+; X64-NEXT:    movb {{[0-9]+}}(%rsp), %al
 ; X64-NEXT:    movb %al, {{[0-9]+}}(%rsp)
 ; X64-NEXT:    callq f at PLT
 ; X64-NEXT:    movl $16, %ecx
 ; X64-NEXT:    movq %rsp, %rdi
 ; X64-NEXT:    movq %rbx, %rsi
 ; X64-NEXT:    rep;movsq (%rsi), %es:(%rdi)
-; X64-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax
+; X64-NEXT:    movb {{[0-9]+}}(%rsp), %al
 ; X64-NEXT:    movb %al, {{[0-9]+}}(%rsp)
 ; X64-NEXT:    callq f at PLT
 ; X64-NEXT:    addq $272, %rsp # imm = 0x110
@@ -61,9 +61,9 @@ define void @g(i8 signext  %a1, i8 signext  %a2, i8 signext  %a3, i8 signext  %a
 ; X86-NEXT:    pushl %esi
 ; X86-NEXT:    andl $-8, %esp
 ; X86-NEXT:    subl $272, %esp # imm = 0x110
-; X86-NEXT:    movzbl 28(%ebp), %eax
-; X86-NEXT:    movzbl 24(%ebp), %ecx
-; X86-NEXT:    movzbl 20(%ebp), %edx
+; X86-NEXT:    movb 28(%ebp), %al
+; X86-NEXT:    movb 24(%ebp), %cl
+; X86-NEXT:    movb 20(%ebp), %dl
 ; X86-NEXT:    movb 16(%ebp), %ah
 ; X86-NEXT:    movb 12(%ebp), %ch
 ; X86-NEXT:    movb 8(%ebp), %dh
@@ -78,14 +78,14 @@ define void @g(i8 signext  %a1, i8 signext  %a2, i8 signext  %a3, i8 signext  %a
 ; X86-NEXT:    movl %esp, %edi
 ; X86-NEXT:    movl %ebx, %esi
 ; X86-NEXT:    rep;movsl (%esi), %es:(%edi)
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    movb %al, {{[0-9]+}}(%esp)
 ; X86-NEXT:    calll f at PLT
 ; X86-NEXT:    movl $32, %ecx
 ; X86-NEXT:    movl %esp, %edi
 ; X86-NEXT:    movl %ebx, %esi
 ; X86-NEXT:    rep;movsl (%esi), %es:(%edi)
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    movb %al, {{[0-9]+}}(%esp)
 ; X86-NEXT:    calll f at PLT
 ; X86-NEXT:    leal -12(%ebp), %esp

diff  --git a/llvm/test/CodeGen/X86/callbr-asm-instr-scheduling.ll b/llvm/test/CodeGen/X86/callbr-asm-instr-scheduling.ll
index dea3d04eb74d5..17c95cf0cf6fd 100644
--- a/llvm/test/CodeGen/X86/callbr-asm-instr-scheduling.ll
+++ b/llvm/test/CodeGen/X86/callbr-asm-instr-scheduling.ll
@@ -26,7 +26,7 @@ define i64 @early_ioremap_pmd(i64 %addr) {
 ; CHECK-NEXT:    #NO_APP
 ; CHECK-NEXT:    movabsq $9223372036854771712, %rdx # imm = 0x7FFFFFFFFFFFF000
 ; CHECK-NEXT:    andq %rax, %rdx
-; CHECK-NEXT:    movzbl pgdir_shift(%rip), %eax
+; CHECK-NEXT:    movb pgdir_shift(%rip), %al
 ; CHECK-NEXT:    movq page_offset_base(%rip), %rcx
 ; CHECK-NEXT:    shrxq %rax, %rdi, %rax
 ; CHECK-NEXT:    addq %rcx, %rdx

diff  --git a/llvm/test/CodeGen/X86/clear-highbits.ll b/llvm/test/CodeGen/X86/clear-highbits.ll
index 755b1094234fd..798cd39463345 100644
--- a/llvm/test/CodeGen/X86/clear-highbits.ll
+++ b/llvm/test/CodeGen/X86/clear-highbits.ll
@@ -24,8 +24,8 @@
 define i8 @clear_highbits8_c0(i8 %val, i8 %numhighbits) nounwind {
 ; X86-LABEL: clear_highbits8_c0:
 ; X86:       # %bb.0:
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %cl
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    shlb %cl, %al
 ; X86-NEXT:    shrb %cl, %al
 ; X86-NEXT:    retl
@@ -47,9 +47,9 @@ define i8 @clear_highbits8_c0(i8 %val, i8 %numhighbits) nounwind {
 define i8 @clear_highbits8_c2_load(ptr %w, i8 %numhighbits) nounwind {
 ; X86-LABEL: clear_highbits8_c2_load:
 ; X86:       # %bb.0:
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT:    movzbl (%eax), %eax
+; X86-NEXT:    movb (%eax), %al
 ; X86-NEXT:    shlb %cl, %al
 ; X86-NEXT:    shrb %cl, %al
 ; X86-NEXT:    retl
@@ -57,7 +57,7 @@ define i8 @clear_highbits8_c2_load(ptr %w, i8 %numhighbits) nounwind {
 ; X64-LABEL: clear_highbits8_c2_load:
 ; X64:       # %bb.0:
 ; X64-NEXT:    movl %esi, %ecx
-; X64-NEXT:    movzbl (%rdi), %eax
+; X64-NEXT:    movb (%rdi), %al
 ; X64-NEXT:    shlb %cl, %al
 ; X64-NEXT:    # kill: def $cl killed $cl killed $ecx
 ; X64-NEXT:    shrb %cl, %al
@@ -71,8 +71,8 @@ define i8 @clear_highbits8_c2_load(ptr %w, i8 %numhighbits) nounwind {
 define i8 @clear_highbits8_c4_commutative(i8 %val, i8 %numhighbits) nounwind {
 ; X86-LABEL: clear_highbits8_c4_commutative:
 ; X86:       # %bb.0:
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %cl
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    shlb %cl, %al
 ; X86-NEXT:    shrb %cl, %al
 ; X86-NEXT:    retl
@@ -98,7 +98,7 @@ define i8 @clear_highbits8_c4_commutative(i8 %val, i8 %numhighbits) nounwind {
 define i16 @clear_highbits16_c0(i16 %val, i16 %numhighbits) nounwind {
 ; X86-NOBMI2-LABEL: clear_highbits16_c0:
 ; X86-NOBMI2:       # %bb.0:
-; X86-NOBMI2-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-NOBMI2-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; X86-NOBMI2-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; X86-NOBMI2-NEXT:    shll %cl, %eax
 ; X86-NOBMI2-NEXT:    movzwl %ax, %eax
@@ -108,7 +108,7 @@ define i16 @clear_highbits16_c0(i16 %val, i16 %numhighbits) nounwind {
 ;
 ; X86-BMI2-LABEL: clear_highbits16_c0:
 ; X86-BMI2:       # %bb.0:
-; X86-BMI2-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-BMI2-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-BMI2-NEXT:    shlxl %eax, {{[0-9]+}}(%esp), %ecx
 ; X86-BMI2-NEXT:    movzwl %cx, %ecx
 ; X86-BMI2-NEXT:    shrxl %eax, %ecx, %eax
@@ -140,7 +140,7 @@ define i16 @clear_highbits16_c0(i16 %val, i16 %numhighbits) nounwind {
 define i16 @clear_highbits16_c1_indexzext(i16 %val, i8 %numhighbits) nounwind {
 ; X86-NOBMI2-LABEL: clear_highbits16_c1_indexzext:
 ; X86-NOBMI2:       # %bb.0:
-; X86-NOBMI2-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-NOBMI2-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; X86-NOBMI2-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; X86-NOBMI2-NEXT:    shll %cl, %eax
 ; X86-NOBMI2-NEXT:    movzwl %ax, %eax
@@ -150,7 +150,7 @@ define i16 @clear_highbits16_c1_indexzext(i16 %val, i8 %numhighbits) nounwind {
 ;
 ; X86-BMI2-LABEL: clear_highbits16_c1_indexzext:
 ; X86-BMI2:       # %bb.0:
-; X86-BMI2-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-BMI2-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-BMI2-NEXT:    shlxl %eax, {{[0-9]+}}(%esp), %ecx
 ; X86-BMI2-NEXT:    movzwl %cx, %ecx
 ; X86-BMI2-NEXT:    shrxl %eax, %ecx, %eax
@@ -183,7 +183,7 @@ define i16 @clear_highbits16_c1_indexzext(i16 %val, i8 %numhighbits) nounwind {
 define i16 @clear_highbits16_c2_load(ptr %w, i16 %numhighbits) nounwind {
 ; X86-NOBMI2-LABEL: clear_highbits16_c2_load:
 ; X86-NOBMI2:       # %bb.0:
-; X86-NOBMI2-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-NOBMI2-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; X86-NOBMI2-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; X86-NOBMI2-NEXT:    movzwl (%eax), %eax
 ; X86-NOBMI2-NEXT:    shll %cl, %eax
@@ -194,7 +194,7 @@ define i16 @clear_highbits16_c2_load(ptr %w, i16 %numhighbits) nounwind {
 ;
 ; X86-BMI2-LABEL: clear_highbits16_c2_load:
 ; X86-BMI2:       # %bb.0:
-; X86-BMI2-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-BMI2-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-BMI2-NEXT:    movl {{[0-9]+}}(%esp), %ecx
 ; X86-BMI2-NEXT:    movzwl (%ecx), %ecx
 ; X86-BMI2-NEXT:    shlxl %eax, %ecx, %ecx
@@ -231,7 +231,7 @@ define i16 @clear_highbits16_c2_load(ptr %w, i16 %numhighbits) nounwind {
 define i16 @clear_highbits16_c3_load_indexzext(ptr %w, i8 %numhighbits) nounwind {
 ; X86-NOBMI2-LABEL: clear_highbits16_c3_load_indexzext:
 ; X86-NOBMI2:       # %bb.0:
-; X86-NOBMI2-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-NOBMI2-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; X86-NOBMI2-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; X86-NOBMI2-NEXT:    movzwl (%eax), %eax
 ; X86-NOBMI2-NEXT:    shll %cl, %eax
@@ -242,7 +242,7 @@ define i16 @clear_highbits16_c3_load_indexzext(ptr %w, i8 %numhighbits) nounwind
 ;
 ; X86-BMI2-LABEL: clear_highbits16_c3_load_indexzext:
 ; X86-BMI2:       # %bb.0:
-; X86-BMI2-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-BMI2-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-BMI2-NEXT:    movl {{[0-9]+}}(%esp), %ecx
 ; X86-BMI2-NEXT:    movzwl (%ecx), %ecx
 ; X86-BMI2-NEXT:    shlxl %eax, %ecx, %ecx
@@ -280,7 +280,7 @@ define i16 @clear_highbits16_c3_load_indexzext(ptr %w, i8 %numhighbits) nounwind
 define i16 @clear_highbits16_c4_commutative(i16 %val, i16 %numhighbits) nounwind {
 ; X86-NOBMI2-LABEL: clear_highbits16_c4_commutative:
 ; X86-NOBMI2:       # %bb.0:
-; X86-NOBMI2-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-NOBMI2-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; X86-NOBMI2-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; X86-NOBMI2-NEXT:    shll %cl, %eax
 ; X86-NOBMI2-NEXT:    movzwl %ax, %eax
@@ -290,7 +290,7 @@ define i16 @clear_highbits16_c4_commutative(i16 %val, i16 %numhighbits) nounwind
 ;
 ; X86-BMI2-LABEL: clear_highbits16_c4_commutative:
 ; X86-BMI2:       # %bb.0:
-; X86-BMI2-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-BMI2-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-BMI2-NEXT:    shlxl %eax, {{[0-9]+}}(%esp), %ecx
 ; X86-BMI2-NEXT:    movzwl %cx, %ecx
 ; X86-BMI2-NEXT:    shrxl %eax, %ecx, %eax
@@ -326,7 +326,7 @@ define i16 @clear_highbits16_c4_commutative(i16 %val, i16 %numhighbits) nounwind
 define i32 @clear_highbits32_c0(i32 %val, i32 %numhighbits) nounwind {
 ; X86-NOBMI2-LABEL: clear_highbits32_c0:
 ; X86-NOBMI2:       # %bb.0:
-; X86-NOBMI2-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-NOBMI2-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; X86-NOBMI2-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; X86-NOBMI2-NEXT:    shll %cl, %eax
 ; X86-NOBMI2-NEXT:    shrl %cl, %eax
@@ -334,7 +334,7 @@ define i32 @clear_highbits32_c0(i32 %val, i32 %numhighbits) nounwind {
 ;
 ; X86-BMI2-LABEL: clear_highbits32_c0:
 ; X86-BMI2:       # %bb.0:
-; X86-BMI2-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-BMI2-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-BMI2-NEXT:    movl $32, %ecx
 ; X86-BMI2-NEXT:    subl %eax, %ecx
 ; X86-BMI2-NEXT:    bzhil %ecx, {{[0-9]+}}(%esp), %eax
@@ -363,7 +363,7 @@ define i32 @clear_highbits32_c0(i32 %val, i32 %numhighbits) nounwind {
 define i32 @clear_highbits32_c1_indexzext(i32 %val, i8 %numhighbits) nounwind {
 ; X86-NOBMI2-LABEL: clear_highbits32_c1_indexzext:
 ; X86-NOBMI2:       # %bb.0:
-; X86-NOBMI2-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-NOBMI2-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; X86-NOBMI2-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; X86-NOBMI2-NEXT:    shll %cl, %eax
 ; X86-NOBMI2-NEXT:    shrl %cl, %eax
@@ -371,7 +371,7 @@ define i32 @clear_highbits32_c1_indexzext(i32 %val, i8 %numhighbits) nounwind {
 ;
 ; X86-BMI2-LABEL: clear_highbits32_c1_indexzext:
 ; X86-BMI2:       # %bb.0:
-; X86-BMI2-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-BMI2-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-BMI2-NEXT:    movl $32, %ecx
 ; X86-BMI2-NEXT:    subl %eax, %ecx
 ; X86-BMI2-NEXT:    bzhil %ecx, {{[0-9]+}}(%esp), %eax
@@ -401,7 +401,7 @@ define i32 @clear_highbits32_c1_indexzext(i32 %val, i8 %numhighbits) nounwind {
 define i32 @clear_highbits32_c2_load(ptr %w, i32 %numhighbits) nounwind {
 ; X86-NOBMI2-LABEL: clear_highbits32_c2_load:
 ; X86-NOBMI2:       # %bb.0:
-; X86-NOBMI2-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-NOBMI2-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; X86-NOBMI2-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; X86-NOBMI2-NEXT:    movl (%eax), %eax
 ; X86-NOBMI2-NEXT:    shll %cl, %eax
@@ -411,7 +411,7 @@ define i32 @clear_highbits32_c2_load(ptr %w, i32 %numhighbits) nounwind {
 ; X86-BMI2-LABEL: clear_highbits32_c2_load:
 ; X86-BMI2:       # %bb.0:
 ; X86-BMI2-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X86-BMI2-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-BMI2-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; X86-BMI2-NEXT:    movl $32, %edx
 ; X86-BMI2-NEXT:    subl %ecx, %edx
 ; X86-BMI2-NEXT:    bzhil %edx, (%eax), %eax
@@ -441,7 +441,7 @@ define i32 @clear_highbits32_c2_load(ptr %w, i32 %numhighbits) nounwind {
 define i32 @clear_highbits32_c3_load_indexzext(ptr %w, i8 %numhighbits) nounwind {
 ; X86-NOBMI2-LABEL: clear_highbits32_c3_load_indexzext:
 ; X86-NOBMI2:       # %bb.0:
-; X86-NOBMI2-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-NOBMI2-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; X86-NOBMI2-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; X86-NOBMI2-NEXT:    movl (%eax), %eax
 ; X86-NOBMI2-NEXT:    shll %cl, %eax
@@ -451,7 +451,7 @@ define i32 @clear_highbits32_c3_load_indexzext(ptr %w, i8 %numhighbits) nounwind
 ; X86-BMI2-LABEL: clear_highbits32_c3_load_indexzext:
 ; X86-BMI2:       # %bb.0:
 ; X86-BMI2-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X86-BMI2-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-BMI2-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; X86-BMI2-NEXT:    movl $32, %edx
 ; X86-BMI2-NEXT:    subl %ecx, %edx
 ; X86-BMI2-NEXT:    bzhil %edx, (%eax), %eax
@@ -482,7 +482,7 @@ define i32 @clear_highbits32_c3_load_indexzext(ptr %w, i8 %numhighbits) nounwind
 define i32 @clear_highbits32_c4_commutative(i32 %val, i32 %numhighbits) nounwind {
 ; X86-NOBMI2-LABEL: clear_highbits32_c4_commutative:
 ; X86-NOBMI2:       # %bb.0:
-; X86-NOBMI2-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-NOBMI2-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; X86-NOBMI2-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; X86-NOBMI2-NEXT:    shll %cl, %eax
 ; X86-NOBMI2-NEXT:    shrl %cl, %eax
@@ -490,7 +490,7 @@ define i32 @clear_highbits32_c4_commutative(i32 %val, i32 %numhighbits) nounwind
 ;
 ; X86-BMI2-LABEL: clear_highbits32_c4_commutative:
 ; X86-BMI2:       # %bb.0:
-; X86-BMI2-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-BMI2-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-BMI2-NEXT:    movl $32, %ecx
 ; X86-BMI2-NEXT:    subl %eax, %ecx
 ; X86-BMI2-NEXT:    bzhil %ecx, {{[0-9]+}}(%esp), %eax
@@ -524,7 +524,7 @@ define i64 @clear_highbits64_c0(i64 %val, i64 %numhighbits) nounwind {
 ; X86-BASELINE-LABEL: clear_highbits64_c0:
 ; X86-BASELINE:       # %bb.0:
 ; X86-BASELINE-NEXT:    pushl %esi
-; X86-BASELINE-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-BASELINE-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; X86-BASELINE-NEXT:    movl $-1, %eax
 ; X86-BASELINE-NEXT:    movl $-1, %esi
 ; X86-BASELINE-NEXT:    shrl %cl, %esi
@@ -545,7 +545,7 @@ define i64 @clear_highbits64_c0(i64 %val, i64 %numhighbits) nounwind {
 ; X86-BMI1-LABEL: clear_highbits64_c0:
 ; X86-BMI1:       # %bb.0:
 ; X86-BMI1-NEXT:    pushl %esi
-; X86-BMI1-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-BMI1-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; X86-BMI1-NEXT:    movl $-1, %esi
 ; X86-BMI1-NEXT:    movl $-1, %eax
 ; X86-BMI1-NEXT:    shrl %cl, %eax
@@ -561,7 +561,7 @@ define i64 @clear_highbits64_c0(i64 %val, i64 %numhighbits) nounwind {
 ; X86-BMI2-LABEL: clear_highbits64_c0:
 ; X86-BMI2:       # %bb.0:
 ; X86-BMI2-NEXT:    pushl %esi
-; X86-BMI2-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-BMI2-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; X86-BMI2-NEXT:    movl $-1, %eax
 ; X86-BMI2-NEXT:    shrxl %ecx, %eax, %esi
 ; X86-BMI2-NEXT:    xorl %edx, %edx
@@ -597,7 +597,7 @@ define i64 @clear_highbits64_c1_indexzext(i64 %val, i8 %numhighbits) nounwind {
 ; X86-BASELINE-LABEL: clear_highbits64_c1_indexzext:
 ; X86-BASELINE:       # %bb.0:
 ; X86-BASELINE-NEXT:    pushl %esi
-; X86-BASELINE-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-BASELINE-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; X86-BASELINE-NEXT:    movl $-1, %eax
 ; X86-BASELINE-NEXT:    movl $-1, %esi
 ; X86-BASELINE-NEXT:    shrl %cl, %esi
@@ -618,7 +618,7 @@ define i64 @clear_highbits64_c1_indexzext(i64 %val, i8 %numhighbits) nounwind {
 ; X86-BMI1-LABEL: clear_highbits64_c1_indexzext:
 ; X86-BMI1:       # %bb.0:
 ; X86-BMI1-NEXT:    pushl %esi
-; X86-BMI1-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-BMI1-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; X86-BMI1-NEXT:    movl $-1, %esi
 ; X86-BMI1-NEXT:    movl $-1, %eax
 ; X86-BMI1-NEXT:    shrl %cl, %eax
@@ -634,7 +634,7 @@ define i64 @clear_highbits64_c1_indexzext(i64 %val, i8 %numhighbits) nounwind {
 ; X86-BMI2-LABEL: clear_highbits64_c1_indexzext:
 ; X86-BMI2:       # %bb.0:
 ; X86-BMI2-NEXT:    pushl %esi
-; X86-BMI2-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-BMI2-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; X86-BMI2-NEXT:    movl $-1, %eax
 ; X86-BMI2-NEXT:    shrxl %ecx, %eax, %esi
 ; X86-BMI2-NEXT:    xorl %edx, %edx
@@ -673,7 +673,7 @@ define i64 @clear_highbits64_c2_load(ptr %w, i64 %numhighbits) nounwind {
 ; X86-BASELINE-NEXT:    pushl %edi
 ; X86-BASELINE-NEXT:    pushl %esi
 ; X86-BASELINE-NEXT:    movl {{[0-9]+}}(%esp), %esi
-; X86-BASELINE-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-BASELINE-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; X86-BASELINE-NEXT:    movl $-1, %eax
 ; X86-BASELINE-NEXT:    movl $-1, %edi
 ; X86-BASELINE-NEXT:    shrl %cl, %edi
@@ -697,7 +697,7 @@ define i64 @clear_highbits64_c2_load(ptr %w, i64 %numhighbits) nounwind {
 ; X86-BMI1-NEXT:    pushl %edi
 ; X86-BMI1-NEXT:    pushl %esi
 ; X86-BMI1-NEXT:    movl {{[0-9]+}}(%esp), %esi
-; X86-BMI1-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-BMI1-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; X86-BMI1-NEXT:    movl $-1, %edi
 ; X86-BMI1-NEXT:    movl $-1, %eax
 ; X86-BMI1-NEXT:    shrl %cl, %eax
@@ -716,7 +716,7 @@ define i64 @clear_highbits64_c2_load(ptr %w, i64 %numhighbits) nounwind {
 ; X86-BMI2-NEXT:    pushl %ebx
 ; X86-BMI2-NEXT:    pushl %esi
 ; X86-BMI2-NEXT:    movl {{[0-9]+}}(%esp), %ecx
-; X86-BMI2-NEXT:    movzbl {{[0-9]+}}(%esp), %ebx
+; X86-BMI2-NEXT:    movb {{[0-9]+}}(%esp), %bl
 ; X86-BMI2-NEXT:    movl $-1, %eax
 ; X86-BMI2-NEXT:    shrxl %ebx, %eax, %esi
 ; X86-BMI2-NEXT:    xorl %edx, %edx
@@ -756,7 +756,7 @@ define i64 @clear_highbits64_c3_load_indexzext(ptr %w, i8 %numhighbits) nounwind
 ; X86-BASELINE-NEXT:    pushl %edi
 ; X86-BASELINE-NEXT:    pushl %esi
 ; X86-BASELINE-NEXT:    movl {{[0-9]+}}(%esp), %esi
-; X86-BASELINE-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-BASELINE-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; X86-BASELINE-NEXT:    movl $-1, %eax
 ; X86-BASELINE-NEXT:    movl $-1, %edi
 ; X86-BASELINE-NEXT:    shrl %cl, %edi
@@ -780,7 +780,7 @@ define i64 @clear_highbits64_c3_load_indexzext(ptr %w, i8 %numhighbits) nounwind
 ; X86-BMI1-NEXT:    pushl %edi
 ; X86-BMI1-NEXT:    pushl %esi
 ; X86-BMI1-NEXT:    movl {{[0-9]+}}(%esp), %esi
-; X86-BMI1-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-BMI1-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; X86-BMI1-NEXT:    movl $-1, %edi
 ; X86-BMI1-NEXT:    movl $-1, %eax
 ; X86-BMI1-NEXT:    shrl %cl, %eax
@@ -799,7 +799,7 @@ define i64 @clear_highbits64_c3_load_indexzext(ptr %w, i8 %numhighbits) nounwind
 ; X86-BMI2-NEXT:    pushl %ebx
 ; X86-BMI2-NEXT:    pushl %esi
 ; X86-BMI2-NEXT:    movl {{[0-9]+}}(%esp), %ecx
-; X86-BMI2-NEXT:    movzbl {{[0-9]+}}(%esp), %ebx
+; X86-BMI2-NEXT:    movb {{[0-9]+}}(%esp), %bl
 ; X86-BMI2-NEXT:    movl $-1, %eax
 ; X86-BMI2-NEXT:    shrxl %ebx, %eax, %esi
 ; X86-BMI2-NEXT:    xorl %edx, %edx
@@ -838,7 +838,7 @@ define i64 @clear_highbits64_c4_commutative(i64 %val, i64 %numhighbits) nounwind
 ; X86-BASELINE-LABEL: clear_highbits64_c4_commutative:
 ; X86-BASELINE:       # %bb.0:
 ; X86-BASELINE-NEXT:    pushl %esi
-; X86-BASELINE-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-BASELINE-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; X86-BASELINE-NEXT:    movl $-1, %eax
 ; X86-BASELINE-NEXT:    movl $-1, %esi
 ; X86-BASELINE-NEXT:    shrl %cl, %esi
@@ -859,7 +859,7 @@ define i64 @clear_highbits64_c4_commutative(i64 %val, i64 %numhighbits) nounwind
 ; X86-BMI1-LABEL: clear_highbits64_c4_commutative:
 ; X86-BMI1:       # %bb.0:
 ; X86-BMI1-NEXT:    pushl %esi
-; X86-BMI1-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-BMI1-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; X86-BMI1-NEXT:    movl $-1, %esi
 ; X86-BMI1-NEXT:    movl $-1, %eax
 ; X86-BMI1-NEXT:    shrl %cl, %eax
@@ -875,7 +875,7 @@ define i64 @clear_highbits64_c4_commutative(i64 %val, i64 %numhighbits) nounwind
 ; X86-BMI2-LABEL: clear_highbits64_c4_commutative:
 ; X86-BMI2:       # %bb.0:
 ; X86-BMI2-NEXT:    pushl %esi
-; X86-BMI2-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-BMI2-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; X86-BMI2-NEXT:    movl $-1, %eax
 ; X86-BMI2-NEXT:    shrxl %ecx, %eax, %esi
 ; X86-BMI2-NEXT:    xorl %edx, %edx
@@ -915,7 +915,7 @@ define i32 @oneuse32_c(i32 %val, i32 %numhighbits, ptr %escape) nounwind {
 ; X86-NOBMI2-LABEL: oneuse32_c:
 ; X86-NOBMI2:       # %bb.0:
 ; X86-NOBMI2-NEXT:    movl {{[0-9]+}}(%esp), %edx
-; X86-NOBMI2-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-NOBMI2-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; X86-NOBMI2-NEXT:    movl $-1, %eax
 ; X86-NOBMI2-NEXT:    shrl %cl, %eax
 ; X86-NOBMI2-NEXT:    movl %eax, (%edx)
@@ -925,7 +925,7 @@ define i32 @oneuse32_c(i32 %val, i32 %numhighbits, ptr %escape) nounwind {
 ; X86-BMI2-LABEL: oneuse32_c:
 ; X86-BMI2:       # %bb.0:
 ; X86-BMI2-NEXT:    movl {{[0-9]+}}(%esp), %ecx
-; X86-BMI2-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-BMI2-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-BMI2-NEXT:    movl $-1, %edx
 ; X86-BMI2-NEXT:    shrxl %eax, %edx, %eax
 ; X86-BMI2-NEXT:    movl %eax, (%ecx)
@@ -960,7 +960,7 @@ define i64 @oneuse64_c(i64 %val, i64 %numhighbits, ptr %escape) nounwind {
 ; X86-BASELINE:       # %bb.0:
 ; X86-BASELINE-NEXT:    pushl %esi
 ; X86-BASELINE-NEXT:    movl {{[0-9]+}}(%esp), %esi
-; X86-BASELINE-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-BASELINE-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; X86-BASELINE-NEXT:    movl $-1, %eax
 ; X86-BASELINE-NEXT:    movl $-1, %edx
 ; X86-BASELINE-NEXT:    shrl %cl, %edx
@@ -982,7 +982,7 @@ define i64 @oneuse64_c(i64 %val, i64 %numhighbits, ptr %escape) nounwind {
 ; X86-BMI1-NEXT:    pushl %edi
 ; X86-BMI1-NEXT:    pushl %esi
 ; X86-BMI1-NEXT:    movl {{[0-9]+}}(%esp), %esi
-; X86-BMI1-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-BMI1-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; X86-BMI1-NEXT:    movl $-1, %eax
 ; X86-BMI1-NEXT:    movl $-1, %edi
 ; X86-BMI1-NEXT:    shrl %cl, %edi
@@ -1003,7 +1003,7 @@ define i64 @oneuse64_c(i64 %val, i64 %numhighbits, ptr %escape) nounwind {
 ; X86-BMI2-NEXT:    pushl %ebx
 ; X86-BMI2-NEXT:    pushl %esi
 ; X86-BMI2-NEXT:    movl {{[0-9]+}}(%esp), %ecx
-; X86-BMI2-NEXT:    movzbl {{[0-9]+}}(%esp), %ebx
+; X86-BMI2-NEXT:    movb {{[0-9]+}}(%esp), %bl
 ; X86-BMI2-NEXT:    movl $-1, %eax
 ; X86-BMI2-NEXT:    shrxl %ebx, %eax, %esi
 ; X86-BMI2-NEXT:    xorl %edx, %edx
@@ -1045,7 +1045,7 @@ define i32 @oneuse32_d(i32 %val, i32 %numhighbits, ptr %escape) nounwind {
 ; X86-NOBMI2-LABEL: oneuse32_d:
 ; X86-NOBMI2:       # %bb.0:
 ; X86-NOBMI2-NEXT:    movl {{[0-9]+}}(%esp), %edx
-; X86-NOBMI2-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-NOBMI2-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; X86-NOBMI2-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; X86-NOBMI2-NEXT:    shll %cl, %eax
 ; X86-NOBMI2-NEXT:    movl %eax, (%edx)
@@ -1055,7 +1055,7 @@ define i32 @oneuse32_d(i32 %val, i32 %numhighbits, ptr %escape) nounwind {
 ; X86-BMI2-LABEL: oneuse32_d:
 ; X86-BMI2:       # %bb.0:
 ; X86-BMI2-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X86-BMI2-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-BMI2-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; X86-BMI2-NEXT:    shlxl %ecx, {{[0-9]+}}(%esp), %edx
 ; X86-BMI2-NEXT:    movl %edx, (%eax)
 ; X86-BMI2-NEXT:    shrxl %ecx, %edx, %eax
@@ -1089,7 +1089,7 @@ define i64 @oneusei64_d(i64 %val, i64 %numhighbits, ptr %escape) nounwind {
 ; X86-BASELINE-NEXT:    pushl %ebx
 ; X86-BASELINE-NEXT:    pushl %edi
 ; X86-BASELINE-NEXT:    pushl %esi
-; X86-BASELINE-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-BASELINE-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; X86-BASELINE-NEXT:    movl {{[0-9]+}}(%esp), %edx
 ; X86-BASELINE-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; X86-BASELINE-NEXT:    movl %edx, %edi
@@ -1131,7 +1131,7 @@ define i64 @oneusei64_d(i64 %val, i64 %numhighbits, ptr %escape) nounwind {
 ; X86-BMI1-NEXT:    pushl %ebx
 ; X86-BMI1-NEXT:    pushl %edi
 ; X86-BMI1-NEXT:    pushl %esi
-; X86-BMI1-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-BMI1-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; X86-BMI1-NEXT:    movl {{[0-9]+}}(%esp), %edx
 ; X86-BMI1-NEXT:    movl {{[0-9]+}}(%esp), %esi
 ; X86-BMI1-NEXT:    movl %edx, %eax
@@ -1161,7 +1161,7 @@ define i64 @oneusei64_d(i64 %val, i64 %numhighbits, ptr %escape) nounwind {
 ; X86-BMI2-NEXT:    pushl %ebx
 ; X86-BMI2-NEXT:    pushl %edi
 ; X86-BMI2-NEXT:    pushl %esi
-; X86-BMI2-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-BMI2-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; X86-BMI2-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; X86-BMI2-NEXT:    movl {{[0-9]+}}(%esp), %esi
 ; X86-BMI2-NEXT:    shldl %cl, %eax, %esi

diff  --git a/llvm/test/CodeGen/X86/clear-lowbits.ll b/llvm/test/CodeGen/X86/clear-lowbits.ll
index 49ea2d0f1ed7a..1e28809cd43eb 100644
--- a/llvm/test/CodeGen/X86/clear-lowbits.ll
+++ b/llvm/test/CodeGen/X86/clear-lowbits.ll
@@ -26,8 +26,8 @@
 define i8 @clear_lowbits8_c0(i8 %val, i8 %numlowbits) nounwind {
 ; X86-LABEL: clear_lowbits8_c0:
 ; X86:       # %bb.0:
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %cl
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    shrb %cl, %al
 ; X86-NEXT:    shlb %cl, %al
 ; X86-NEXT:    retl
@@ -49,9 +49,9 @@ define i8 @clear_lowbits8_c0(i8 %val, i8 %numlowbits) nounwind {
 define i8 @clear_lowbits8_c2_load(ptr %w, i8 %numlowbits) nounwind {
 ; X86-LABEL: clear_lowbits8_c2_load:
 ; X86:       # %bb.0:
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT:    movzbl (%eax), %eax
+; X86-NEXT:    movb (%eax), %al
 ; X86-NEXT:    shrb %cl, %al
 ; X86-NEXT:    shlb %cl, %al
 ; X86-NEXT:    retl
@@ -59,7 +59,7 @@ define i8 @clear_lowbits8_c2_load(ptr %w, i8 %numlowbits) nounwind {
 ; X64-LABEL: clear_lowbits8_c2_load:
 ; X64:       # %bb.0:
 ; X64-NEXT:    movl %esi, %ecx
-; X64-NEXT:    movzbl (%rdi), %eax
+; X64-NEXT:    movb (%rdi), %al
 ; X64-NEXT:    shrb %cl, %al
 ; X64-NEXT:    # kill: def $cl killed $cl killed $ecx
 ; X64-NEXT:    shlb %cl, %al
@@ -73,8 +73,8 @@ define i8 @clear_lowbits8_c2_load(ptr %w, i8 %numlowbits) nounwind {
 define i8 @clear_lowbits8_c4_commutative(i8 %val, i8 %numlowbits) nounwind {
 ; X86-LABEL: clear_lowbits8_c4_commutative:
 ; X86:       # %bb.0:
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %cl
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    shrb %cl, %al
 ; X86-NEXT:    shlb %cl, %al
 ; X86-NEXT:    retl
@@ -98,7 +98,7 @@ define i8 @clear_lowbits8_c4_commutative(i8 %val, i8 %numlowbits) nounwind {
 define i16 @clear_lowbits16_c0(i16 %val, i16 %numlowbits) nounwind {
 ; X86-NOBMI2-LABEL: clear_lowbits16_c0:
 ; X86-NOBMI2:       # %bb.0:
-; X86-NOBMI2-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-NOBMI2-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; X86-NOBMI2-NEXT:    movzwl {{[0-9]+}}(%esp), %eax
 ; X86-NOBMI2-NEXT:    shrl %cl, %eax
 ; X86-NOBMI2-NEXT:    shll %cl, %eax
@@ -108,7 +108,7 @@ define i16 @clear_lowbits16_c0(i16 %val, i16 %numlowbits) nounwind {
 ; X86-BMI2-LABEL: clear_lowbits16_c0:
 ; X86-BMI2:       # %bb.0:
 ; X86-BMI2-NEXT:    movzwl {{[0-9]+}}(%esp), %eax
-; X86-BMI2-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-BMI2-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; X86-BMI2-NEXT:    shrxl %ecx, %eax, %eax
 ; X86-BMI2-NEXT:    shlxl %ecx, %eax, %eax
 ; X86-BMI2-NEXT:    # kill: def $ax killed $ax killed $eax
@@ -139,7 +139,7 @@ define i16 @clear_lowbits16_c0(i16 %val, i16 %numlowbits) nounwind {
 define i16 @clear_lowbits16_c1_indexzext(i16 %val, i8 %numlowbits) nounwind {
 ; X86-NOBMI2-LABEL: clear_lowbits16_c1_indexzext:
 ; X86-NOBMI2:       # %bb.0:
-; X86-NOBMI2-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-NOBMI2-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; X86-NOBMI2-NEXT:    movzwl {{[0-9]+}}(%esp), %eax
 ; X86-NOBMI2-NEXT:    shrl %cl, %eax
 ; X86-NOBMI2-NEXT:    shll %cl, %eax
@@ -149,7 +149,7 @@ define i16 @clear_lowbits16_c1_indexzext(i16 %val, i8 %numlowbits) nounwind {
 ; X86-BMI2-LABEL: clear_lowbits16_c1_indexzext:
 ; X86-BMI2:       # %bb.0:
 ; X86-BMI2-NEXT:    movzwl {{[0-9]+}}(%esp), %eax
-; X86-BMI2-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-BMI2-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; X86-BMI2-NEXT:    shrxl %ecx, %eax, %eax
 ; X86-BMI2-NEXT:    shlxl %ecx, %eax, %eax
 ; X86-BMI2-NEXT:    # kill: def $ax killed $ax killed $eax
@@ -181,7 +181,7 @@ define i16 @clear_lowbits16_c1_indexzext(i16 %val, i8 %numlowbits) nounwind {
 define i16 @clear_lowbits16_c2_load(ptr %w, i16 %numlowbits) nounwind {
 ; X86-NOBMI2-LABEL: clear_lowbits16_c2_load:
 ; X86-NOBMI2:       # %bb.0:
-; X86-NOBMI2-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-NOBMI2-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; X86-NOBMI2-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; X86-NOBMI2-NEXT:    movzwl (%eax), %eax
 ; X86-NOBMI2-NEXT:    shrl %cl, %eax
@@ -191,7 +191,7 @@ define i16 @clear_lowbits16_c2_load(ptr %w, i16 %numlowbits) nounwind {
 ;
 ; X86-BMI2-LABEL: clear_lowbits16_c2_load:
 ; X86-BMI2:       # %bb.0:
-; X86-BMI2-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-BMI2-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-BMI2-NEXT:    movl {{[0-9]+}}(%esp), %ecx
 ; X86-BMI2-NEXT:    movzwl (%ecx), %ecx
 ; X86-BMI2-NEXT:    shrxl %eax, %ecx, %ecx
@@ -225,7 +225,7 @@ define i16 @clear_lowbits16_c2_load(ptr %w, i16 %numlowbits) nounwind {
 define i16 @clear_lowbits16_c3_load_indexzext(ptr %w, i8 %numlowbits) nounwind {
 ; X86-NOBMI2-LABEL: clear_lowbits16_c3_load_indexzext:
 ; X86-NOBMI2:       # %bb.0:
-; X86-NOBMI2-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-NOBMI2-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; X86-NOBMI2-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; X86-NOBMI2-NEXT:    movzwl (%eax), %eax
 ; X86-NOBMI2-NEXT:    shrl %cl, %eax
@@ -235,7 +235,7 @@ define i16 @clear_lowbits16_c3_load_indexzext(ptr %w, i8 %numlowbits) nounwind {
 ;
 ; X86-BMI2-LABEL: clear_lowbits16_c3_load_indexzext:
 ; X86-BMI2:       # %bb.0:
-; X86-BMI2-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-BMI2-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-BMI2-NEXT:    movl {{[0-9]+}}(%esp), %ecx
 ; X86-BMI2-NEXT:    movzwl (%ecx), %ecx
 ; X86-BMI2-NEXT:    shrxl %eax, %ecx, %ecx
@@ -270,7 +270,7 @@ define i16 @clear_lowbits16_c3_load_indexzext(ptr %w, i8 %numlowbits) nounwind {
 define i16 @clear_lowbits16_c4_commutative(i16 %val, i16 %numlowbits) nounwind {
 ; X86-NOBMI2-LABEL: clear_lowbits16_c4_commutative:
 ; X86-NOBMI2:       # %bb.0:
-; X86-NOBMI2-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-NOBMI2-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; X86-NOBMI2-NEXT:    movzwl {{[0-9]+}}(%esp), %eax
 ; X86-NOBMI2-NEXT:    shrl %cl, %eax
 ; X86-NOBMI2-NEXT:    shll %cl, %eax
@@ -280,7 +280,7 @@ define i16 @clear_lowbits16_c4_commutative(i16 %val, i16 %numlowbits) nounwind {
 ; X86-BMI2-LABEL: clear_lowbits16_c4_commutative:
 ; X86-BMI2:       # %bb.0:
 ; X86-BMI2-NEXT:    movzwl {{[0-9]+}}(%esp), %eax
-; X86-BMI2-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-BMI2-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; X86-BMI2-NEXT:    shrxl %ecx, %eax, %eax
 ; X86-BMI2-NEXT:    shlxl %ecx, %eax, %eax
 ; X86-BMI2-NEXT:    # kill: def $ax killed $ax killed $eax
@@ -313,7 +313,7 @@ define i16 @clear_lowbits16_c4_commutative(i16 %val, i16 %numlowbits) nounwind {
 define i32 @clear_lowbits32_c0(i32 %val, i32 %numlowbits) nounwind {
 ; X86-NOBMI2-LABEL: clear_lowbits32_c0:
 ; X86-NOBMI2:       # %bb.0:
-; X86-NOBMI2-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-NOBMI2-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; X86-NOBMI2-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; X86-NOBMI2-NEXT:    shrl %cl, %eax
 ; X86-NOBMI2-NEXT:    shll %cl, %eax
@@ -321,7 +321,7 @@ define i32 @clear_lowbits32_c0(i32 %val, i32 %numlowbits) nounwind {
 ;
 ; X86-BMI2-LABEL: clear_lowbits32_c0:
 ; X86-BMI2:       # %bb.0:
-; X86-BMI2-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-BMI2-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-BMI2-NEXT:    shrxl %eax, {{[0-9]+}}(%esp), %ecx
 ; X86-BMI2-NEXT:    shlxl %eax, %ecx, %eax
 ; X86-BMI2-NEXT:    retl
@@ -348,7 +348,7 @@ define i32 @clear_lowbits32_c0(i32 %val, i32 %numlowbits) nounwind {
 define i32 @clear_lowbits32_c1_indexzext(i32 %val, i8 %numlowbits) nounwind {
 ; X86-NOBMI2-LABEL: clear_lowbits32_c1_indexzext:
 ; X86-NOBMI2:       # %bb.0:
-; X86-NOBMI2-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-NOBMI2-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; X86-NOBMI2-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; X86-NOBMI2-NEXT:    shrl %cl, %eax
 ; X86-NOBMI2-NEXT:    shll %cl, %eax
@@ -356,7 +356,7 @@ define i32 @clear_lowbits32_c1_indexzext(i32 %val, i8 %numlowbits) nounwind {
 ;
 ; X86-BMI2-LABEL: clear_lowbits32_c1_indexzext:
 ; X86-BMI2:       # %bb.0:
-; X86-BMI2-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-BMI2-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-BMI2-NEXT:    shrxl %eax, {{[0-9]+}}(%esp), %ecx
 ; X86-BMI2-NEXT:    shlxl %eax, %ecx, %eax
 ; X86-BMI2-NEXT:    retl
@@ -384,7 +384,7 @@ define i32 @clear_lowbits32_c1_indexzext(i32 %val, i8 %numlowbits) nounwind {
 define i32 @clear_lowbits32_c2_load(ptr %w, i32 %numlowbits) nounwind {
 ; X86-NOBMI2-LABEL: clear_lowbits32_c2_load:
 ; X86-NOBMI2:       # %bb.0:
-; X86-NOBMI2-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-NOBMI2-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; X86-NOBMI2-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; X86-NOBMI2-NEXT:    movl (%eax), %eax
 ; X86-NOBMI2-NEXT:    shrl %cl, %eax
@@ -394,7 +394,7 @@ define i32 @clear_lowbits32_c2_load(ptr %w, i32 %numlowbits) nounwind {
 ; X86-BMI2-LABEL: clear_lowbits32_c2_load:
 ; X86-BMI2:       # %bb.0:
 ; X86-BMI2-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X86-BMI2-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-BMI2-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; X86-BMI2-NEXT:    shrxl %ecx, (%eax), %eax
 ; X86-BMI2-NEXT:    shlxl %ecx, %eax, %eax
 ; X86-BMI2-NEXT:    retl
@@ -422,7 +422,7 @@ define i32 @clear_lowbits32_c2_load(ptr %w, i32 %numlowbits) nounwind {
 define i32 @clear_lowbits32_c3_load_indexzext(ptr %w, i8 %numlowbits) nounwind {
 ; X86-NOBMI2-LABEL: clear_lowbits32_c3_load_indexzext:
 ; X86-NOBMI2:       # %bb.0:
-; X86-NOBMI2-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-NOBMI2-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; X86-NOBMI2-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; X86-NOBMI2-NEXT:    movl (%eax), %eax
 ; X86-NOBMI2-NEXT:    shrl %cl, %eax
@@ -432,7 +432,7 @@ define i32 @clear_lowbits32_c3_load_indexzext(ptr %w, i8 %numlowbits) nounwind {
 ; X86-BMI2-LABEL: clear_lowbits32_c3_load_indexzext:
 ; X86-BMI2:       # %bb.0:
 ; X86-BMI2-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X86-BMI2-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-BMI2-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; X86-BMI2-NEXT:    shrxl %ecx, (%eax), %eax
 ; X86-BMI2-NEXT:    shlxl %ecx, %eax, %eax
 ; X86-BMI2-NEXT:    retl
@@ -461,7 +461,7 @@ define i32 @clear_lowbits32_c3_load_indexzext(ptr %w, i8 %numlowbits) nounwind {
 define i32 @clear_lowbits32_c4_commutative(i32 %val, i32 %numlowbits) nounwind {
 ; X86-NOBMI2-LABEL: clear_lowbits32_c4_commutative:
 ; X86-NOBMI2:       # %bb.0:
-; X86-NOBMI2-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-NOBMI2-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; X86-NOBMI2-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; X86-NOBMI2-NEXT:    shrl %cl, %eax
 ; X86-NOBMI2-NEXT:    shll %cl, %eax
@@ -469,7 +469,7 @@ define i32 @clear_lowbits32_c4_commutative(i32 %val, i32 %numlowbits) nounwind {
 ;
 ; X86-BMI2-LABEL: clear_lowbits32_c4_commutative:
 ; X86-BMI2:       # %bb.0:
-; X86-BMI2-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-BMI2-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-BMI2-NEXT:    shrxl %eax, {{[0-9]+}}(%esp), %ecx
 ; X86-BMI2-NEXT:    shlxl %eax, %ecx, %eax
 ; X86-BMI2-NEXT:    retl
@@ -498,7 +498,7 @@ define i32 @clear_lowbits32_c4_commutative(i32 %val, i32 %numlowbits) nounwind {
 define i64 @clear_lowbits64_c0(i64 %val, i64 %numlowbits) nounwind {
 ; X86-NOBMI2-LABEL: clear_lowbits64_c0:
 ; X86-NOBMI2:       # %bb.0:
-; X86-NOBMI2-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-NOBMI2-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; X86-NOBMI2-NEXT:    movl $-1, %edx
 ; X86-NOBMI2-NEXT:    movl $-1, %eax
 ; X86-NOBMI2-NEXT:    shll %cl, %eax
@@ -514,7 +514,7 @@ define i64 @clear_lowbits64_c0(i64 %val, i64 %numlowbits) nounwind {
 ;
 ; X86-BMI2-LABEL: clear_lowbits64_c0:
 ; X86-BMI2:       # %bb.0:
-; X86-BMI2-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-BMI2-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; X86-BMI2-NEXT:    movl $-1, %edx
 ; X86-BMI2-NEXT:    shlxl %ecx, %edx, %eax
 ; X86-BMI2-NEXT:    testb $32, %cl
@@ -549,7 +549,7 @@ define i64 @clear_lowbits64_c0(i64 %val, i64 %numlowbits) nounwind {
 define i64 @clear_lowbits64_c1_indexzext(i64 %val, i8 %numlowbits) nounwind {
 ; X86-NOBMI2-LABEL: clear_lowbits64_c1_indexzext:
 ; X86-NOBMI2:       # %bb.0:
-; X86-NOBMI2-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-NOBMI2-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; X86-NOBMI2-NEXT:    movl $-1, %edx
 ; X86-NOBMI2-NEXT:    movl $-1, %eax
 ; X86-NOBMI2-NEXT:    shll %cl, %eax
@@ -565,7 +565,7 @@ define i64 @clear_lowbits64_c1_indexzext(i64 %val, i8 %numlowbits) nounwind {
 ;
 ; X86-BMI2-LABEL: clear_lowbits64_c1_indexzext:
 ; X86-BMI2:       # %bb.0:
-; X86-BMI2-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-BMI2-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; X86-BMI2-NEXT:    movl $-1, %edx
 ; X86-BMI2-NEXT:    shlxl %ecx, %edx, %eax
 ; X86-BMI2-NEXT:    testb $32, %cl
@@ -604,7 +604,7 @@ define i64 @clear_lowbits64_c2_load(ptr %w, i64 %numlowbits) nounwind {
 ; X86-NOBMI2:       # %bb.0:
 ; X86-NOBMI2-NEXT:    pushl %esi
 ; X86-NOBMI2-NEXT:    movl {{[0-9]+}}(%esp), %esi
-; X86-NOBMI2-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-NOBMI2-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; X86-NOBMI2-NEXT:    movl $-1, %edx
 ; X86-NOBMI2-NEXT:    movl $-1, %eax
 ; X86-NOBMI2-NEXT:    shll %cl, %eax
@@ -623,7 +623,7 @@ define i64 @clear_lowbits64_c2_load(ptr %w, i64 %numlowbits) nounwind {
 ; X86-BMI2:       # %bb.0:
 ; X86-BMI2-NEXT:    pushl %ebx
 ; X86-BMI2-NEXT:    movl {{[0-9]+}}(%esp), %ecx
-; X86-BMI2-NEXT:    movzbl {{[0-9]+}}(%esp), %ebx
+; X86-BMI2-NEXT:    movb {{[0-9]+}}(%esp), %bl
 ; X86-BMI2-NEXT:    movl $-1, %edx
 ; X86-BMI2-NEXT:    shlxl %ebx, %edx, %eax
 ; X86-BMI2-NEXT:    testb $32, %bl
@@ -662,7 +662,7 @@ define i64 @clear_lowbits64_c3_load_indexzext(ptr %w, i8 %numlowbits) nounwind {
 ; X86-NOBMI2:       # %bb.0:
 ; X86-NOBMI2-NEXT:    pushl %esi
 ; X86-NOBMI2-NEXT:    movl {{[0-9]+}}(%esp), %esi
-; X86-NOBMI2-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-NOBMI2-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; X86-NOBMI2-NEXT:    movl $-1, %edx
 ; X86-NOBMI2-NEXT:    movl $-1, %eax
 ; X86-NOBMI2-NEXT:    shll %cl, %eax
@@ -681,7 +681,7 @@ define i64 @clear_lowbits64_c3_load_indexzext(ptr %w, i8 %numlowbits) nounwind {
 ; X86-BMI2:       # %bb.0:
 ; X86-BMI2-NEXT:    pushl %ebx
 ; X86-BMI2-NEXT:    movl {{[0-9]+}}(%esp), %ecx
-; X86-BMI2-NEXT:    movzbl {{[0-9]+}}(%esp), %ebx
+; X86-BMI2-NEXT:    movb {{[0-9]+}}(%esp), %bl
 ; X86-BMI2-NEXT:    movl $-1, %edx
 ; X86-BMI2-NEXT:    shlxl %ebx, %edx, %eax
 ; X86-BMI2-NEXT:    testb $32, %bl
@@ -720,7 +720,7 @@ define i64 @clear_lowbits64_c3_load_indexzext(ptr %w, i8 %numlowbits) nounwind {
 define i64 @clear_lowbits64_c4_commutative(i64 %val, i64 %numlowbits) nounwind {
 ; X86-NOBMI2-LABEL: clear_lowbits64_c4_commutative:
 ; X86-NOBMI2:       # %bb.0:
-; X86-NOBMI2-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-NOBMI2-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; X86-NOBMI2-NEXT:    movl $-1, %edx
 ; X86-NOBMI2-NEXT:    movl $-1, %eax
 ; X86-NOBMI2-NEXT:    shll %cl, %eax
@@ -736,7 +736,7 @@ define i64 @clear_lowbits64_c4_commutative(i64 %val, i64 %numlowbits) nounwind {
 ;
 ; X86-BMI2-LABEL: clear_lowbits64_c4_commutative:
 ; X86-BMI2:       # %bb.0:
-; X86-BMI2-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-BMI2-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; X86-BMI2-NEXT:    movl $-1, %edx
 ; X86-BMI2-NEXT:    shlxl %ecx, %edx, %eax
 ; X86-BMI2-NEXT:    testb $32, %cl
@@ -777,7 +777,7 @@ define i64 @clear_lowbits64_c4_commutative(i64 %val, i64 %numlowbits) nounwind {
 define i8 @clear_lowbits8_ic0(i8 %val, i8 %numlowbits) nounwind {
 ; X86-LABEL: clear_lowbits8_ic0:
 ; X86:       # %bb.0:
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    movb $8, %cl
 ; X86-NEXT:    subb {{[0-9]+}}(%esp), %cl
 ; X86-NEXT:    shrb %cl, %al
@@ -803,7 +803,7 @@ define i8 @clear_lowbits8_ic2_load(ptr %w, i8 %numlowbits) nounwind {
 ; X86-LABEL: clear_lowbits8_ic2_load:
 ; X86:       # %bb.0:
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT:    movzbl (%eax), %eax
+; X86-NEXT:    movb (%eax), %al
 ; X86-NEXT:    movb $8, %cl
 ; X86-NEXT:    subb {{[0-9]+}}(%esp), %cl
 ; X86-NEXT:    shrb %cl, %al
@@ -812,7 +812,7 @@ define i8 @clear_lowbits8_ic2_load(ptr %w, i8 %numlowbits) nounwind {
 ;
 ; X64-LABEL: clear_lowbits8_ic2_load:
 ; X64:       # %bb.0:
-; X64-NEXT:    movzbl (%rdi), %eax
+; X64-NEXT:    movb (%rdi), %al
 ; X64-NEXT:    movb $8, %cl
 ; X64-NEXT:    subb %sil, %cl
 ; X64-NEXT:    shrb %cl, %al
@@ -828,7 +828,7 @@ define i8 @clear_lowbits8_ic2_load(ptr %w, i8 %numlowbits) nounwind {
 define i8 @clear_lowbits8_ic4_commutative(i8 %val, i8 %numlowbits) nounwind {
 ; X86-LABEL: clear_lowbits8_ic4_commutative:
 ; X86:       # %bb.0:
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    movb $8, %cl
 ; X86-NEXT:    subb {{[0-9]+}}(%esp), %cl
 ; X86-NEXT:    shrb %cl, %al
@@ -1613,7 +1613,7 @@ define i32 @oneuse32_c(i32 %val, i32 %numlowbits, ptr %escape) nounwind {
 ; X86-NOBMI2-LABEL: oneuse32_c:
 ; X86-NOBMI2:       # %bb.0:
 ; X86-NOBMI2-NEXT:    movl {{[0-9]+}}(%esp), %edx
-; X86-NOBMI2-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-NOBMI2-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; X86-NOBMI2-NEXT:    movl $-1, %eax
 ; X86-NOBMI2-NEXT:    shll %cl, %eax
 ; X86-NOBMI2-NEXT:    movl %eax, (%edx)
@@ -1623,7 +1623,7 @@ define i32 @oneuse32_c(i32 %val, i32 %numlowbits, ptr %escape) nounwind {
 ; X86-BMI2-LABEL: oneuse32_c:
 ; X86-BMI2:       # %bb.0:
 ; X86-BMI2-NEXT:    movl {{[0-9]+}}(%esp), %ecx
-; X86-BMI2-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-BMI2-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-BMI2-NEXT:    movl $-1, %edx
 ; X86-BMI2-NEXT:    shlxl %eax, %edx, %eax
 ; X86-BMI2-NEXT:    movl %eax, (%ecx)
@@ -1659,7 +1659,7 @@ define i64 @oneuse64(i64 %val, i64 %numlowbits, ptr %escape) nounwind {
 ; X86-NOBMI2-NEXT:    pushl %edi
 ; X86-NOBMI2-NEXT:    pushl %esi
 ; X86-NOBMI2-NEXT:    movl {{[0-9]+}}(%esp), %esi
-; X86-NOBMI2-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-NOBMI2-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; X86-NOBMI2-NEXT:    movl $-1, %edx
 ; X86-NOBMI2-NEXT:    movl $-1, %edi
 ; X86-NOBMI2-NEXT:    shll %cl, %edi
@@ -1685,7 +1685,7 @@ define i64 @oneuse64(i64 %val, i64 %numlowbits, ptr %escape) nounwind {
 ; X86-BMI2-NEXT:    pushl %ebx
 ; X86-BMI2-NEXT:    pushl %esi
 ; X86-BMI2-NEXT:    movl {{[0-9]+}}(%esp), %ecx
-; X86-BMI2-NEXT:    movzbl {{[0-9]+}}(%esp), %ebx
+; X86-BMI2-NEXT:    movb {{[0-9]+}}(%esp), %bl
 ; X86-BMI2-NEXT:    movl $-1, %edx
 ; X86-BMI2-NEXT:    shlxl %ebx, %edx, %esi
 ; X86-BMI2-NEXT:    xorl %eax, %eax

diff  --git a/llvm/test/CodeGen/X86/clz.ll b/llvm/test/CodeGen/X86/clz.ll
index b66902fff3f1b..fe71e1f06d31b 100644
--- a/llvm/test/CodeGen/X86/clz.ll
+++ b/llvm/test/CodeGen/X86/clz.ll
@@ -302,7 +302,7 @@ define i64 @ctlz_i64(i64 %x) {
 define i8 @ctlz_i8_zero_test(i8 %n) {
 ; X86-LABEL: ctlz_i8_zero_test:
 ; X86:       # %bb.0:
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    testb %al, %al
 ; X86-NEXT:    je .LBB8_1
 ; X86-NEXT:  # %bb.2: # %cond.false
@@ -512,7 +512,7 @@ define i64 @ctlz_i64_zero_test(i64 %n) {
 define i8 @cttz_i8_zero_test(i8 %n) {
 ; X86-LABEL: cttz_i8_zero_test:
 ; X86:       # %bb.0:
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    testb %al, %al
 ; X86-NEXT:    je .LBB12_1
 ; X86-NEXT:  # %bb.2: # %cond.false
@@ -819,7 +819,7 @@ define i32 @ctlz_bsr_zero_test(i32 %n) {
 define i8 @cttz_i8_knownbits(i8 %x)  {
 ; X86-LABEL: cttz_i8_knownbits:
 ; X86:       # %bb.0:
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    orb $2, %al
 ; X86-NEXT:    movzbl %al, %eax
 ; X86-NEXT:    bsfl %eax, %eax
@@ -836,7 +836,7 @@ define i8 @cttz_i8_knownbits(i8 %x)  {
 ;
 ; X86-CLZ-LABEL: cttz_i8_knownbits:
 ; X86-CLZ:       # %bb.0:
-; X86-CLZ-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-CLZ-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-CLZ-NEXT:    orb $2, %al
 ; X86-CLZ-NEXT:    movzbl %al, %eax
 ; X86-CLZ-NEXT:    tzcntl %eax, %eax
@@ -859,7 +859,7 @@ define i8 @cttz_i8_knownbits(i8 %x)  {
 define i8 @ctlz_i8_knownbits(i8 %x)  {
 ; X86-LABEL: ctlz_i8_knownbits:
 ; X86:       # %bb.0:
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    orb $64, %al
 ; X86-NEXT:    movzbl %al, %eax
 ; X86-NEXT:    bsrl %eax, %eax
@@ -878,7 +878,7 @@ define i8 @ctlz_i8_knownbits(i8 %x)  {
 ;
 ; X86-CLZ-LABEL: ctlz_i8_knownbits:
 ; X86-CLZ:       # %bb.0:
-; X86-CLZ-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-CLZ-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-CLZ-NEXT:    orb $64, %al
 ; X86-CLZ-NEXT:    movzbl %al, %eax
 ; X86-CLZ-NEXT:    lzcntl %eax, %eax

diff  --git a/llvm/test/CodeGen/X86/cmov.ll b/llvm/test/CodeGen/X86/cmov.ll
index 94df5fa6d96fc..2296ac5e4604a 100644
--- a/llvm/test/CodeGen/X86/cmov.ll
+++ b/llvm/test/CodeGen/X86/cmov.ll
@@ -85,11 +85,11 @@ define i1 @test4() nounwind {
 ; CHECK-NEXT:    xorb $1, %cl
 ; CHECK-NEXT:    # kill: def $cl killed $cl killed $ecx
 ; CHECK-NEXT:    sarl %cl, %edx
-; CHECK-NEXT:    movzbl g_96(%rip), %eax
+; CHECK-NEXT:    movb g_96(%rip), %al
 ; CHECK-NEXT:    testb %al, %al
 ; CHECK-NEXT:    je .LBB3_2
 ; CHECK-NEXT:  # %bb.1: # %bb.i.i.i
-; CHECK-NEXT:    movzbl g_100(%rip), %ecx
+; CHECK-NEXT:    movb g_100(%rip), %cl
 ; CHECK-NEXT:  .LBB3_2: # %func_4.exit.i
 ; CHECK-NEXT:    xorl %esi, %esi
 ; CHECK-NEXT:    testb %dl, %dl
@@ -102,7 +102,7 @@ define i1 @test4() nounwind {
 ; CHECK-NEXT:    testb %bl, %bl
 ; CHECK-NEXT:    jne .LBB3_5
 ; CHECK-NEXT:  # %bb.4: # %bb.i.i
-; CHECK-NEXT:    movzbl g_100(%rip), %ecx
+; CHECK-NEXT:    movb g_100(%rip), %cl
 ; CHECK-NEXT:    xorl %ebx, %ebx
 ; CHECK-NEXT:    movl %eax, %ecx
 ; CHECK-NEXT:  .LBB3_5: # %func_1.exit

diff  --git a/llvm/test/CodeGen/X86/cmovcmov.ll b/llvm/test/CodeGen/X86/cmovcmov.ll
index ab863dee69010..8407df67c1645 100644
--- a/llvm/test/CodeGen/X86/cmovcmov.ll
+++ b/llvm/test/CodeGen/X86/cmovcmov.ll
@@ -339,7 +339,7 @@ define dso_local void @no_cascade_opt(i32 %v0, i32 %v1, i32 %v2, i32 %v3) nounwi
 ; NOCMOV-NEXT:    movb %al, g8
 ; NOCMOV-NEXT:    retl
 ; NOCMOV-NEXT:  .LBB7_1: # %entry
-; NOCMOV-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; NOCMOV-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; NOCMOV-NEXT:    jg .LBB7_4
 ; NOCMOV-NEXT:  .LBB7_3: # %entry
 ; NOCMOV-NEXT:    movl %ecx, %eax

diff  --git a/llvm/test/CodeGen/X86/combine-andintoload.ll b/llvm/test/CodeGen/X86/combine-andintoload.ll
index caca66adc111c..f81442330639f 100644
--- a/llvm/test/CodeGen/X86/combine-andintoload.ll
+++ b/llvm/test/CodeGen/X86/combine-andintoload.ll
@@ -10,7 +10,7 @@ define zeroext i1 @bigger(ptr nocapture readonly %c, ptr nocapture readonly %e,
 ; CHECK-NEXT:    movl $5, %r8d
 ; CHECK-NEXT:    movl %eax, %ecx
 ; CHECK-NEXT:    shll %cl, %r8d
-; CHECK-NEXT:    movzbl (%rsi,%rdx), %eax
+; CHECK-NEXT:    movb (%rsi,%rdx), %al
 ; CHECK-NEXT:    xorb (%rdi,%rdx), %al
 ; CHECK-NEXT:    movzbl %al, %eax
 ; CHECK-NEXT:    andl %r8d, %eax

diff  --git a/llvm/test/CodeGen/X86/combine-bswap.ll b/llvm/test/CodeGen/X86/combine-bswap.ll
index 017dc960bd171..e7b14d48b61da 100644
--- a/llvm/test/CodeGen/X86/combine-bswap.ll
+++ b/llvm/test/CodeGen/X86/combine-bswap.ll
@@ -176,13 +176,13 @@ define void @demand_one_loaded_byte(ptr %xp, ptr %yp) {
 ; X86:       # %bb.0:
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT:    movzbl 4(%ecx), %ecx
+; X86-NEXT:    movb 4(%ecx), %cl
 ; X86-NEXT:    movb %cl, (%eax)
 ; X86-NEXT:    retl
 ;
 ; X64-LABEL: demand_one_loaded_byte:
 ; X64:       # %bb.0:
-; X64-NEXT:    movzbl 4(%rdi), %eax
+; X64-NEXT:    movb 4(%rdi), %al
 ; X64-NEXT:    movb %al, (%rsi)
 ; X64-NEXT:    retq
   %x = load i64, ptr %xp, align 8

diff  --git a/llvm/test/CodeGen/X86/const-shift-of-constmasked.ll b/llvm/test/CodeGen/X86/const-shift-of-constmasked.ll
index 10787dce3e7e2..8f4a716d71cae 100644
--- a/llvm/test/CodeGen/X86/const-shift-of-constmasked.ll
+++ b/llvm/test/CodeGen/X86/const-shift-of-constmasked.ll
@@ -13,7 +13,7 @@
 define i8 @test_i8_7_mask_lshr_1(i8 %a0) {
 ; X86-LABEL: test_i8_7_mask_lshr_1:
 ; X86:       # %bb.0:
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    andb $6, %al
 ; X86-NEXT:    shrb %al
 ; X86-NEXT:    retl
@@ -33,7 +33,7 @@ define i8 @test_i8_7_mask_lshr_1(i8 %a0) {
 define i8 @test_i8_28_mask_lshr_1(i8 %a0) {
 ; X86-LABEL: test_i8_28_mask_lshr_1:
 ; X86:       # %bb.0:
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    andb $28, %al
 ; X86-NEXT:    shrb %al
 ; X86-NEXT:    retl
@@ -52,7 +52,7 @@ define i8 @test_i8_28_mask_lshr_1(i8 %a0) {
 define i8 @test_i8_28_mask_lshr_2(i8 %a0) {
 ; X86-LABEL: test_i8_28_mask_lshr_2:
 ; X86:       # %bb.0:
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    andb $28, %al
 ; X86-NEXT:    shrb $2, %al
 ; X86-NEXT:    retl
@@ -71,7 +71,7 @@ define i8 @test_i8_28_mask_lshr_2(i8 %a0) {
 define i8 @test_i8_28_mask_lshr_3(i8 %a0) {
 ; X86-LABEL: test_i8_28_mask_lshr_3:
 ; X86:       # %bb.0:
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    andb $24, %al
 ; X86-NEXT:    shrb $3, %al
 ; X86-NEXT:    retl
@@ -90,7 +90,7 @@ define i8 @test_i8_28_mask_lshr_3(i8 %a0) {
 define i8 @test_i8_28_mask_lshr_4(i8 %a0) {
 ; X86-LABEL: test_i8_28_mask_lshr_4:
 ; X86:       # %bb.0:
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    andb $16, %al
 ; X86-NEXT:    shrb $4, %al
 ; X86-NEXT:    retl
@@ -110,7 +110,7 @@ define i8 @test_i8_28_mask_lshr_4(i8 %a0) {
 define i8 @test_i8_224_mask_lshr_1(i8 %a0) {
 ; X86-LABEL: test_i8_224_mask_lshr_1:
 ; X86:       # %bb.0:
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    andb $-32, %al
 ; X86-NEXT:    shrb %al
 ; X86-NEXT:    retl
@@ -129,7 +129,7 @@ define i8 @test_i8_224_mask_lshr_1(i8 %a0) {
 define i8 @test_i8_224_mask_lshr_4(i8 %a0) {
 ; X86-LABEL: test_i8_224_mask_lshr_4:
 ; X86:       # %bb.0:
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    andb $-32, %al
 ; X86-NEXT:    shrb $4, %al
 ; X86-NEXT:    retl
@@ -148,7 +148,7 @@ define i8 @test_i8_224_mask_lshr_4(i8 %a0) {
 define i8 @test_i8_224_mask_lshr_5(i8 %a0) {
 ; X86-LABEL: test_i8_224_mask_lshr_5:
 ; X86:       # %bb.0:
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    shrb $5, %al
 ; X86-NEXT:    retl
 ;
@@ -165,7 +165,7 @@ define i8 @test_i8_224_mask_lshr_5(i8 %a0) {
 define i8 @test_i8_224_mask_lshr_6(i8 %a0) {
 ; X86-LABEL: test_i8_224_mask_lshr_6:
 ; X86:       # %bb.0:
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    shrb $6, %al
 ; X86-NEXT:    retl
 ;
@@ -185,7 +185,7 @@ define i8 @test_i8_224_mask_lshr_6(i8 %a0) {
 define i8 @test_i8_7_mask_ashr_1(i8 %a0) {
 ; X86-LABEL: test_i8_7_mask_ashr_1:
 ; X86:       # %bb.0:
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    andb $6, %al
 ; X86-NEXT:    shrb %al
 ; X86-NEXT:    retl
@@ -205,7 +205,7 @@ define i8 @test_i8_7_mask_ashr_1(i8 %a0) {
 define i8 @test_i8_28_mask_ashr_1(i8 %a0) {
 ; X86-LABEL: test_i8_28_mask_ashr_1:
 ; X86:       # %bb.0:
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    andb $28, %al
 ; X86-NEXT:    shrb %al
 ; X86-NEXT:    retl
@@ -224,7 +224,7 @@ define i8 @test_i8_28_mask_ashr_1(i8 %a0) {
 define i8 @test_i8_28_mask_ashr_2(i8 %a0) {
 ; X86-LABEL: test_i8_28_mask_ashr_2:
 ; X86:       # %bb.0:
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    andb $28, %al
 ; X86-NEXT:    shrb $2, %al
 ; X86-NEXT:    retl
@@ -243,7 +243,7 @@ define i8 @test_i8_28_mask_ashr_2(i8 %a0) {
 define i8 @test_i8_28_mask_ashr_3(i8 %a0) {
 ; X86-LABEL: test_i8_28_mask_ashr_3:
 ; X86:       # %bb.0:
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    andb $24, %al
 ; X86-NEXT:    shrb $3, %al
 ; X86-NEXT:    retl
@@ -262,7 +262,7 @@ define i8 @test_i8_28_mask_ashr_3(i8 %a0) {
 define i8 @test_i8_28_mask_ashr_4(i8 %a0) {
 ; X86-LABEL: test_i8_28_mask_ashr_4:
 ; X86:       # %bb.0:
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    andb $16, %al
 ; X86-NEXT:    shrb $4, %al
 ; X86-NEXT:    retl
@@ -282,7 +282,7 @@ define i8 @test_i8_28_mask_ashr_4(i8 %a0) {
 define i8 @test_i8_224_mask_ashr_1(i8 %a0) {
 ; X86-LABEL: test_i8_224_mask_ashr_1:
 ; X86:       # %bb.0:
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    andb $-32, %al
 ; X86-NEXT:    sarb %al
 ; X86-NEXT:    retl
@@ -301,7 +301,7 @@ define i8 @test_i8_224_mask_ashr_1(i8 %a0) {
 define i8 @test_i8_224_mask_ashr_4(i8 %a0) {
 ; X86-LABEL: test_i8_224_mask_ashr_4:
 ; X86:       # %bb.0:
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    andb $-32, %al
 ; X86-NEXT:    sarb $4, %al
 ; X86-NEXT:    retl
@@ -320,7 +320,7 @@ define i8 @test_i8_224_mask_ashr_4(i8 %a0) {
 define i8 @test_i8_224_mask_ashr_5(i8 %a0) {
 ; X86-LABEL: test_i8_224_mask_ashr_5:
 ; X86:       # %bb.0:
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    sarb $5, %al
 ; X86-NEXT:    retl
 ;
@@ -337,7 +337,7 @@ define i8 @test_i8_224_mask_ashr_5(i8 %a0) {
 define i8 @test_i8_224_mask_ashr_6(i8 %a0) {
 ; X86-LABEL: test_i8_224_mask_ashr_6:
 ; X86:       # %bb.0:
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    sarb $6, %al
 ; X86-NEXT:    retl
 ;
@@ -357,7 +357,7 @@ define i8 @test_i8_224_mask_ashr_6(i8 %a0) {
 define i8 @test_i8_7_mask_shl_1(i8 %a0) {
 ; X86-LABEL: test_i8_7_mask_shl_1:
 ; X86:       # %bb.0:
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    andb $7, %al
 ; X86-NEXT:    addb %al, %al
 ; X86-NEXT:    retl
@@ -376,7 +376,7 @@ define i8 @test_i8_7_mask_shl_1(i8 %a0) {
 define i8 @test_i8_7_mask_shl_4(i8 %a0) {
 ; X86-LABEL: test_i8_7_mask_shl_4:
 ; X86:       # %bb.0:
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    andb $7, %al
 ; X86-NEXT:    shlb $4, %al
 ; X86-NEXT:    retl
@@ -395,7 +395,7 @@ define i8 @test_i8_7_mask_shl_4(i8 %a0) {
 define i8 @test_i8_7_mask_shl_5(i8 %a0) {
 ; X86-LABEL: test_i8_7_mask_shl_5:
 ; X86:       # %bb.0:
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    shlb $5, %al
 ; X86-NEXT:    retl
 ;
@@ -412,7 +412,7 @@ define i8 @test_i8_7_mask_shl_5(i8 %a0) {
 define i8 @test_i8_7_mask_shl_6(i8 %a0) {
 ; X86-LABEL: test_i8_7_mask_shl_6:
 ; X86:       # %bb.0:
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    shlb $6, %al
 ; X86-NEXT:    retl
 ;
@@ -430,7 +430,7 @@ define i8 @test_i8_7_mask_shl_6(i8 %a0) {
 define i8 @test_i8_28_mask_shl_1(i8 %a0) {
 ; X86-LABEL: test_i8_28_mask_shl_1:
 ; X86:       # %bb.0:
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    andb $28, %al
 ; X86-NEXT:    addb %al, %al
 ; X86-NEXT:    retl
@@ -449,7 +449,7 @@ define i8 @test_i8_28_mask_shl_1(i8 %a0) {
 define i8 @test_i8_28_mask_shl_2(i8 %a0) {
 ; X86-LABEL: test_i8_28_mask_shl_2:
 ; X86:       # %bb.0:
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    andb $28, %al
 ; X86-NEXT:    shlb $2, %al
 ; X86-NEXT:    retl
@@ -468,7 +468,7 @@ define i8 @test_i8_28_mask_shl_2(i8 %a0) {
 define i8 @test_i8_28_mask_shl_3(i8 %a0) {
 ; X86-LABEL: test_i8_28_mask_shl_3:
 ; X86:       # %bb.0:
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    andb $28, %al
 ; X86-NEXT:    shlb $3, %al
 ; X86-NEXT:    retl
@@ -487,7 +487,7 @@ define i8 @test_i8_28_mask_shl_3(i8 %a0) {
 define i8 @test_i8_28_mask_shl_4(i8 %a0) {
 ; X86-LABEL: test_i8_28_mask_shl_4:
 ; X86:       # %bb.0:
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    andb $12, %al
 ; X86-NEXT:    shlb $4, %al
 ; X86-NEXT:    retl
@@ -507,7 +507,7 @@ define i8 @test_i8_28_mask_shl_4(i8 %a0) {
 define i8 @test_i8_224_mask_shl_1(i8 %a0) {
 ; X86-LABEL: test_i8_224_mask_shl_1:
 ; X86:       # %bb.0:
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    andb $96, %al
 ; X86-NEXT:    addb %al, %al
 ; X86-NEXT:    retl

diff  --git a/llvm/test/CodeGen/X86/copy-eflags.ll b/llvm/test/CodeGen/X86/copy-eflags.ll
index cf72a34831371..5dfe464b4067e 100644
--- a/llvm/test/CodeGen/X86/copy-eflags.ll
+++ b/llvm/test/CodeGen/X86/copy-eflags.ll
@@ -18,7 +18,7 @@ declare dso_local void @external(i32)
 define dso_local i32 @test1() nounwind {
 ; X32-LABEL: test1:
 ; X32:       # %bb.0: # %entry
-; X32-NEXT:    movzbl b, %ecx
+; X32-NEXT:    movb b, %cl
 ; X32-NEXT:    movl %ecx, %eax
 ; X32-NEXT:    incb %al
 ; X32-NEXT:    movb %al, b
@@ -44,12 +44,12 @@ define dso_local i32 @test1() nounwind {
 ; X64-LABEL: test1:
 ; X64:       # %bb.0: # %entry
 ; X64-NEXT:    pushq %rax
-; X64-NEXT:    movzbl b(%rip), %ecx
+; X64-NEXT:    movb b(%rip), %cl
 ; X64-NEXT:    leal 1(%rcx), %eax
 ; X64-NEXT:    movb %al, b(%rip)
 ; X64-NEXT:    incl c(%rip)
 ; X64-NEXT:    sete %dl
-; X64-NEXT:    movzbl a(%rip), %esi
+; X64-NEXT:    movb a(%rip), %sil
 ; X64-NEXT:    leal 1(%rsi), %edi
 ; X64-NEXT:    cmpb %cl, %sil
 ; X64-NEXT:    sete d(%rip)

diff  --git a/llvm/test/CodeGen/X86/div-rem-pair-recomposition-signed.ll b/llvm/test/CodeGen/X86/div-rem-pair-recomposition-signed.ll
index 1b73acbcb6828..cb91f109b0074 100644
--- a/llvm/test/CodeGen/X86/div-rem-pair-recomposition-signed.ll
+++ b/llvm/test/CodeGen/X86/div-rem-pair-recomposition-signed.ll
@@ -966,7 +966,7 @@ define i32 @multiple_bb(i32 %x, i32 %y, ptr %divdst, i1 zeroext %store_srem, ptr
 ; X86-NEXT:    pushl %esi
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %esi
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ebx
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %bl
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %edi
 ; X86-NEXT:    movl %ecx, %eax
 ; X86-NEXT:    cltd

diff  --git a/llvm/test/CodeGen/X86/div-rem-pair-recomposition-unsigned.ll b/llvm/test/CodeGen/X86/div-rem-pair-recomposition-unsigned.ll
index 6643ada2f42b4..f38ed2e1bf84e 100644
--- a/llvm/test/CodeGen/X86/div-rem-pair-recomposition-unsigned.ll
+++ b/llvm/test/CodeGen/X86/div-rem-pair-recomposition-unsigned.ll
@@ -966,7 +966,7 @@ define i32 @multiple_bb(i32 %x, i32 %y, ptr %divdst, i1 zeroext %store_urem, ptr
 ; X86-NEXT:    pushl %esi
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %esi
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ebx
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %bl
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %edi
 ; X86-NEXT:    movl %ecx, %eax
 ; X86-NEXT:    xorl %edx, %edx

diff  --git a/llvm/test/CodeGen/X86/divide-by-constant.ll b/llvm/test/CodeGen/X86/divide-by-constant.ll
index 3958d5f85e20b..6cb36ea5609b7 100644
--- a/llvm/test/CodeGen/X86/divide-by-constant.ll
+++ b/llvm/test/CodeGen/X86/divide-by-constant.ll
@@ -162,7 +162,7 @@ define i32 @test7(i32 %x) nounwind {
 define i8 @test8(i8 %x) nounwind {
 ; X32-LABEL: test8:
 ; X32:       # %bb.0:
-; X32-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X32-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X32-NEXT:    shrb %al
 ; X32-NEXT:    movzbl %al, %eax
 ; X32-NEXT:    imull $211, %eax, %eax
@@ -185,7 +185,7 @@ define i8 @test8(i8 %x) nounwind {
 define i8 @test9(i8 %x) nounwind {
 ; X32-LABEL: test9:
 ; X32:       # %bb.0:
-; X32-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X32-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X32-NEXT:    shrb $2, %al
 ; X32-NEXT:    movzbl %al, %eax
 ; X32-NEXT:    imull $71, %eax, %eax

diff  --git a/llvm/test/CodeGen/X86/divrem8_ext.ll b/llvm/test/CodeGen/X86/divrem8_ext.ll
index c722b827cf736..12dbe164626b0 100644
--- a/llvm/test/CodeGen/X86/divrem8_ext.ll
+++ b/llvm/test/CodeGen/X86/divrem8_ext.ll
@@ -49,7 +49,7 @@ define zeroext i8 @test_urem_zext_ah(i8 %x, i8 %y) {
 define i8 @test_urem_noext_ah(i8 %x, i8 %y) {
 ; X32-LABEL: test_urem_noext_ah:
 ; X32:       # %bb.0:
-; X32-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X32-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; X32-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
 ; X32-NEXT:    divb %cl
 ; X32-NEXT:    movzbl %ah, %eax
@@ -137,7 +137,7 @@ define signext i8 @test_srem_sext_ah(i8 %x, i8 %y) {
 define i8 @test_srem_noext_ah(i8 %x, i8 %y) {
 ; X32-LABEL: test_srem_noext_ah:
 ; X32:       # %bb.0:
-; X32-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X32-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; X32-NEXT:    movsbl {{[0-9]+}}(%esp), %eax
 ; X32-NEXT:    idivb %cl
 ; X32-NEXT:    movsbl %ah, %eax

diff  --git a/llvm/test/CodeGen/X86/emutls.ll b/llvm/test/CodeGen/X86/emutls.ll
index a7fbc2d8531d3..48bd2ae624062 100644
--- a/llvm/test/CodeGen/X86/emutls.ll
+++ b/llvm/test/CodeGen/X86/emutls.ll
@@ -211,7 +211,7 @@ define dso_local i8 @f13() {
 ; X86-LABEL: f13:
 ; X86:         movl $__emutls_v.b1, (%esp)
 ; X86-NEXT:    calll __emutls_get_address
-; X86-NEXT:    movzbl (%eax), %eax
+; X86-NEXT:    movb (%eax), %al
 ; X86-NEXT:    addl $12, %esp
 ; X86-NEXT:    .cfi_def_cfa_offset 4
 ; X86-NEXT:    retl

diff  --git a/llvm/test/CodeGen/X86/extract-bits.ll b/llvm/test/CodeGen/X86/extract-bits.ll
index 43d2ad4299b88..c056a74cc183c 100644
--- a/llvm/test/CodeGen/X86/extract-bits.ll
+++ b/llvm/test/CodeGen/X86/extract-bits.ll
@@ -34,8 +34,8 @@ define i32 @bextr32_a0(i32 %val, i32 %numskipbits, i32 %numlowbits) nounwind {
 ; X86-NOBMI-LABEL: bextr32_a0:
 ; X86-NOBMI:       # %bb.0:
 ; X86-NOBMI-NEXT:    pushl %esi
-; X86-NOBMI-NEXT:    movzbl {{[0-9]+}}(%esp), %edx
-; X86-NOBMI-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-NOBMI-NEXT:    movb {{[0-9]+}}(%esp), %dl
+; X86-NOBMI-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; X86-NOBMI-NEXT:    movl {{[0-9]+}}(%esp), %esi
 ; X86-NOBMI-NEXT:    shrl %cl, %esi
 ; X86-NOBMI-NEXT:    movl $1, %eax
@@ -48,7 +48,7 @@ define i32 @bextr32_a0(i32 %val, i32 %numskipbits, i32 %numlowbits) nounwind {
 ;
 ; X86-BMI1-LABEL: bextr32_a0:
 ; X86-BMI1:       # %bb.0:
-; X86-BMI1-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-BMI1-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-BMI1-NEXT:    shll $8, %eax
 ; X86-BMI1-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
 ; X86-BMI1-NEXT:    orl %eax, %ecx
@@ -57,8 +57,8 @@ define i32 @bextr32_a0(i32 %val, i32 %numskipbits, i32 %numlowbits) nounwind {
 ;
 ; X86-BMI2-LABEL: bextr32_a0:
 ; X86-BMI2:       # %bb.0:
-; X86-BMI2-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
-; X86-BMI2-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-BMI2-NEXT:    movb {{[0-9]+}}(%esp), %al
+; X86-BMI2-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; X86-BMI2-NEXT:    shrxl %ecx, {{[0-9]+}}(%esp), %ecx
 ; X86-BMI2-NEXT:    bzhil %eax, %ecx, %eax
 ; X86-BMI2-NEXT:    retl
@@ -99,8 +99,8 @@ define i32 @bextr32_a0_arithmetic(i32 %val, i32 %numskipbits, i32 %numlowbits) n
 ; X86-NOBMI-LABEL: bextr32_a0_arithmetic:
 ; X86-NOBMI:       # %bb.0:
 ; X86-NOBMI-NEXT:    pushl %esi
-; X86-NOBMI-NEXT:    movzbl {{[0-9]+}}(%esp), %edx
-; X86-NOBMI-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-NOBMI-NEXT:    movb {{[0-9]+}}(%esp), %dl
+; X86-NOBMI-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; X86-NOBMI-NEXT:    movl {{[0-9]+}}(%esp), %esi
 ; X86-NOBMI-NEXT:    sarl %cl, %esi
 ; X86-NOBMI-NEXT:    movl $1, %eax
@@ -113,8 +113,8 @@ define i32 @bextr32_a0_arithmetic(i32 %val, i32 %numskipbits, i32 %numlowbits) n
 ;
 ; X86-BMI1-LABEL: bextr32_a0_arithmetic:
 ; X86-BMI1:       # %bb.0:
-; X86-BMI1-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
-; X86-BMI1-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-BMI1-NEXT:    movb {{[0-9]+}}(%esp), %al
+; X86-BMI1-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; X86-BMI1-NEXT:    movl {{[0-9]+}}(%esp), %edx
 ; X86-BMI1-NEXT:    sarl %cl, %edx
 ; X86-BMI1-NEXT:    shll $8, %eax
@@ -123,8 +123,8 @@ define i32 @bextr32_a0_arithmetic(i32 %val, i32 %numskipbits, i32 %numlowbits) n
 ;
 ; X86-BMI2-LABEL: bextr32_a0_arithmetic:
 ; X86-BMI2:       # %bb.0:
-; X86-BMI2-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
-; X86-BMI2-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-BMI2-NEXT:    movb {{[0-9]+}}(%esp), %al
+; X86-BMI2-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; X86-BMI2-NEXT:    sarxl %ecx, {{[0-9]+}}(%esp), %ecx
 ; X86-BMI2-NEXT:    bzhil %eax, %ecx, %eax
 ; X86-BMI2-NEXT:    retl
@@ -166,8 +166,8 @@ define i32 @bextr32_a1_indexzext(i32 %val, i8 zeroext %numskipbits, i8 zeroext %
 ; X86-NOBMI-LABEL: bextr32_a1_indexzext:
 ; X86-NOBMI:       # %bb.0:
 ; X86-NOBMI-NEXT:    pushl %esi
-; X86-NOBMI-NEXT:    movzbl {{[0-9]+}}(%esp), %edx
-; X86-NOBMI-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-NOBMI-NEXT:    movb {{[0-9]+}}(%esp), %dl
+; X86-NOBMI-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; X86-NOBMI-NEXT:    movl {{[0-9]+}}(%esp), %esi
 ; X86-NOBMI-NEXT:    shrl %cl, %esi
 ; X86-NOBMI-NEXT:    movl $1, %eax
@@ -180,7 +180,7 @@ define i32 @bextr32_a1_indexzext(i32 %val, i8 zeroext %numskipbits, i8 zeroext %
 ;
 ; X86-BMI1-LABEL: bextr32_a1_indexzext:
 ; X86-BMI1:       # %bb.0:
-; X86-BMI1-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-BMI1-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-BMI1-NEXT:    shll $8, %eax
 ; X86-BMI1-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
 ; X86-BMI1-NEXT:    orl %eax, %ecx
@@ -189,8 +189,8 @@ define i32 @bextr32_a1_indexzext(i32 %val, i8 zeroext %numskipbits, i8 zeroext %
 ;
 ; X86-BMI2-LABEL: bextr32_a1_indexzext:
 ; X86-BMI2:       # %bb.0:
-; X86-BMI2-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
-; X86-BMI2-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-BMI2-NEXT:    movb {{[0-9]+}}(%esp), %al
+; X86-BMI2-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; X86-BMI2-NEXT:    shrxl %ecx, {{[0-9]+}}(%esp), %ecx
 ; X86-BMI2-NEXT:    bzhil %eax, %ecx, %eax
 ; X86-BMI2-NEXT:    retl
@@ -233,8 +233,8 @@ define i32 @bextr32_a2_load(ptr %w, i32 %numskipbits, i32 %numlowbits) nounwind
 ; X86-NOBMI-LABEL: bextr32_a2_load:
 ; X86-NOBMI:       # %bb.0:
 ; X86-NOBMI-NEXT:    pushl %esi
-; X86-NOBMI-NEXT:    movzbl {{[0-9]+}}(%esp), %edx
-; X86-NOBMI-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-NOBMI-NEXT:    movb {{[0-9]+}}(%esp), %dl
+; X86-NOBMI-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; X86-NOBMI-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; X86-NOBMI-NEXT:    movl (%eax), %esi
 ; X86-NOBMI-NEXT:    shrl %cl, %esi
@@ -249,7 +249,7 @@ define i32 @bextr32_a2_load(ptr %w, i32 %numskipbits, i32 %numlowbits) nounwind
 ; X86-BMI1-LABEL: bextr32_a2_load:
 ; X86-BMI1:       # %bb.0:
 ; X86-BMI1-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X86-BMI1-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-BMI1-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; X86-BMI1-NEXT:    shll $8, %ecx
 ; X86-BMI1-NEXT:    movzbl {{[0-9]+}}(%esp), %edx
 ; X86-BMI1-NEXT:    orl %ecx, %edx
@@ -258,9 +258,9 @@ define i32 @bextr32_a2_load(ptr %w, i32 %numskipbits, i32 %numlowbits) nounwind
 ;
 ; X86-BMI2-LABEL: bextr32_a2_load:
 ; X86-BMI2:       # %bb.0:
-; X86-BMI2-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-BMI2-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-BMI2-NEXT:    movl {{[0-9]+}}(%esp), %ecx
-; X86-BMI2-NEXT:    movzbl {{[0-9]+}}(%esp), %edx
+; X86-BMI2-NEXT:    movb {{[0-9]+}}(%esp), %dl
 ; X86-BMI2-NEXT:    shrxl %edx, (%ecx), %ecx
 ; X86-BMI2-NEXT:    bzhil %eax, %ecx, %eax
 ; X86-BMI2-NEXT:    retl
@@ -303,8 +303,8 @@ define i32 @bextr32_a3_load_indexzext(ptr %w, i8 zeroext %numskipbits, i8 zeroex
 ; X86-NOBMI-LABEL: bextr32_a3_load_indexzext:
 ; X86-NOBMI:       # %bb.0:
 ; X86-NOBMI-NEXT:    pushl %esi
-; X86-NOBMI-NEXT:    movzbl {{[0-9]+}}(%esp), %edx
-; X86-NOBMI-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-NOBMI-NEXT:    movb {{[0-9]+}}(%esp), %dl
+; X86-NOBMI-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; X86-NOBMI-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; X86-NOBMI-NEXT:    movl (%eax), %esi
 ; X86-NOBMI-NEXT:    shrl %cl, %esi
@@ -319,7 +319,7 @@ define i32 @bextr32_a3_load_indexzext(ptr %w, i8 zeroext %numskipbits, i8 zeroex
 ; X86-BMI1-LABEL: bextr32_a3_load_indexzext:
 ; X86-BMI1:       # %bb.0:
 ; X86-BMI1-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X86-BMI1-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-BMI1-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; X86-BMI1-NEXT:    shll $8, %ecx
 ; X86-BMI1-NEXT:    movzbl {{[0-9]+}}(%esp), %edx
 ; X86-BMI1-NEXT:    orl %ecx, %edx
@@ -328,9 +328,9 @@ define i32 @bextr32_a3_load_indexzext(ptr %w, i8 zeroext %numskipbits, i8 zeroex
 ;
 ; X86-BMI2-LABEL: bextr32_a3_load_indexzext:
 ; X86-BMI2:       # %bb.0:
-; X86-BMI2-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-BMI2-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-BMI2-NEXT:    movl {{[0-9]+}}(%esp), %ecx
-; X86-BMI2-NEXT:    movzbl {{[0-9]+}}(%esp), %edx
+; X86-BMI2-NEXT:    movb {{[0-9]+}}(%esp), %dl
 ; X86-BMI2-NEXT:    shrxl %edx, (%ecx), %ecx
 ; X86-BMI2-NEXT:    bzhil %eax, %ecx, %eax
 ; X86-BMI2-NEXT:    retl
@@ -375,8 +375,8 @@ define i32 @bextr32_a4_commutative(i32 %val, i32 %numskipbits, i32 %numlowbits)
 ; X86-NOBMI-LABEL: bextr32_a4_commutative:
 ; X86-NOBMI:       # %bb.0:
 ; X86-NOBMI-NEXT:    pushl %esi
-; X86-NOBMI-NEXT:    movzbl {{[0-9]+}}(%esp), %edx
-; X86-NOBMI-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-NOBMI-NEXT:    movb {{[0-9]+}}(%esp), %dl
+; X86-NOBMI-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; X86-NOBMI-NEXT:    movl {{[0-9]+}}(%esp), %esi
 ; X86-NOBMI-NEXT:    shrl %cl, %esi
 ; X86-NOBMI-NEXT:    movl $1, %eax
@@ -389,7 +389,7 @@ define i32 @bextr32_a4_commutative(i32 %val, i32 %numskipbits, i32 %numlowbits)
 ;
 ; X86-BMI1-LABEL: bextr32_a4_commutative:
 ; X86-BMI1:       # %bb.0:
-; X86-BMI1-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-BMI1-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-BMI1-NEXT:    shll $8, %eax
 ; X86-BMI1-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
 ; X86-BMI1-NEXT:    orl %eax, %ecx
@@ -398,8 +398,8 @@ define i32 @bextr32_a4_commutative(i32 %val, i32 %numskipbits, i32 %numlowbits)
 ;
 ; X86-BMI2-LABEL: bextr32_a4_commutative:
 ; X86-BMI2:       # %bb.0:
-; X86-BMI2-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
-; X86-BMI2-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-BMI2-NEXT:    movb {{[0-9]+}}(%esp), %al
+; X86-BMI2-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; X86-BMI2-NEXT:    shrxl %ecx, {{[0-9]+}}(%esp), %ecx
 ; X86-BMI2-NEXT:    bzhil %eax, %ecx, %eax
 ; X86-BMI2-NEXT:    retl
@@ -442,7 +442,7 @@ define i32 @bextr32_a5_skipextrauses(i32 %val, i32 %numskipbits, i32 %numlowbits
 ; X86-NOBMI-NEXT:    pushl %edi
 ; X86-NOBMI-NEXT:    pushl %esi
 ; X86-NOBMI-NEXT:    pushl %eax
-; X86-NOBMI-NEXT:    movzbl {{[0-9]+}}(%esp), %edx
+; X86-NOBMI-NEXT:    movb {{[0-9]+}}(%esp), %dl
 ; X86-NOBMI-NEXT:    movl {{[0-9]+}}(%esp), %edi
 ; X86-NOBMI-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; X86-NOBMI-NEXT:    movl %eax, %ecx
@@ -464,7 +464,7 @@ define i32 @bextr32_a5_skipextrauses(i32 %val, i32 %numskipbits, i32 %numlowbits
 ; X86-BMI1:       # %bb.0:
 ; X86-BMI1-NEXT:    pushl %esi
 ; X86-BMI1-NEXT:    subl $8, %esp
-; X86-BMI1-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-BMI1-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; X86-BMI1-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; X86-BMI1-NEXT:    shll $8, %ecx
 ; X86-BMI1-NEXT:    movzbl %al, %edx
@@ -481,7 +481,7 @@ define i32 @bextr32_a5_skipextrauses(i32 %val, i32 %numskipbits, i32 %numlowbits
 ; X86-BMI2:       # %bb.0:
 ; X86-BMI2-NEXT:    pushl %esi
 ; X86-BMI2-NEXT:    subl $8, %esp
-; X86-BMI2-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-BMI2-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-BMI2-NEXT:    movl {{[0-9]+}}(%esp), %ecx
 ; X86-BMI2-NEXT:    shrxl %ecx, {{[0-9]+}}(%esp), %edx
 ; X86-BMI2-NEXT:    bzhil %eax, %edx, %esi
@@ -619,8 +619,8 @@ define i64 @bextr64_a0(i64 %val, i64 %numskipbits, i64 %numlowbits) nounwind {
 ; X86-BMI2-NEXT:    pushl %ebx
 ; X86-BMI2-NEXT:    pushl %edi
 ; X86-BMI2-NEXT:    pushl %esi
-; X86-BMI2-NEXT:    movzbl {{[0-9]+}}(%esp), %ebx
-; X86-BMI2-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-BMI2-NEXT:    movb {{[0-9]+}}(%esp), %bl
+; X86-BMI2-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; X86-BMI2-NEXT:    movl {{[0-9]+}}(%esp), %esi
 ; X86-BMI2-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; X86-BMI2-NEXT:    shrdl %cl, %eax, %esi
@@ -763,8 +763,8 @@ define i64 @bextr64_a0_arithmetic(i64 %val, i64 %numskipbits, i64 %numlowbits) n
 ; X86-BMI2-NEXT:    pushl %ebx
 ; X86-BMI2-NEXT:    pushl %edi
 ; X86-BMI2-NEXT:    pushl %esi
-; X86-BMI2-NEXT:    movzbl {{[0-9]+}}(%esp), %ebx
-; X86-BMI2-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-BMI2-NEXT:    movb {{[0-9]+}}(%esp), %bl
+; X86-BMI2-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; X86-BMI2-NEXT:    movl {{[0-9]+}}(%esp), %esi
 ; X86-BMI2-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; X86-BMI2-NEXT:    shrdl %cl, %eax, %esi
@@ -907,8 +907,8 @@ define i64 @bextr64_a1_indexzext(i64 %val, i8 zeroext %numskipbits, i8 zeroext %
 ; X86-BMI2-NEXT:    pushl %ebx
 ; X86-BMI2-NEXT:    pushl %edi
 ; X86-BMI2-NEXT:    pushl %esi
-; X86-BMI2-NEXT:    movzbl {{[0-9]+}}(%esp), %ebx
-; X86-BMI2-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-BMI2-NEXT:    movb {{[0-9]+}}(%esp), %bl
+; X86-BMI2-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; X86-BMI2-NEXT:    movl {{[0-9]+}}(%esp), %esi
 ; X86-BMI2-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; X86-BMI2-NEXT:    shrdl %cl, %eax, %esi
@@ -1055,8 +1055,8 @@ define i64 @bextr64_a2_load(ptr %w, i64 %numskipbits, i64 %numlowbits) nounwind
 ; X86-BMI2-NEXT:    pushl %ebx
 ; X86-BMI2-NEXT:    pushl %edi
 ; X86-BMI2-NEXT:    pushl %esi
-; X86-BMI2-NEXT:    movzbl {{[0-9]+}}(%esp), %ebx
-; X86-BMI2-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-BMI2-NEXT:    movb {{[0-9]+}}(%esp), %bl
+; X86-BMI2-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; X86-BMI2-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; X86-BMI2-NEXT:    movl (%eax), %esi
 ; X86-BMI2-NEXT:    movl 4(%eax), %eax
@@ -1202,8 +1202,8 @@ define i64 @bextr64_a3_load_indexzext(ptr %w, i8 zeroext %numskipbits, i8 zeroex
 ; X86-BMI2-NEXT:    pushl %ebx
 ; X86-BMI2-NEXT:    pushl %edi
 ; X86-BMI2-NEXT:    pushl %esi
-; X86-BMI2-NEXT:    movzbl {{[0-9]+}}(%esp), %ebx
-; X86-BMI2-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-BMI2-NEXT:    movb {{[0-9]+}}(%esp), %bl
+; X86-BMI2-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; X86-BMI2-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; X86-BMI2-NEXT:    movl (%eax), %esi
 ; X86-BMI2-NEXT:    movl 4(%eax), %eax
@@ -1351,8 +1351,8 @@ define i64 @bextr64_a4_commutative(i64 %val, i64 %numskipbits, i64 %numlowbits)
 ; X86-BMI2-NEXT:    pushl %ebx
 ; X86-BMI2-NEXT:    pushl %edi
 ; X86-BMI2-NEXT:    pushl %esi
-; X86-BMI2-NEXT:    movzbl {{[0-9]+}}(%esp), %ebx
-; X86-BMI2-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-BMI2-NEXT:    movb {{[0-9]+}}(%esp), %bl
+; X86-BMI2-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; X86-BMI2-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; X86-BMI2-NEXT:    movl {{[0-9]+}}(%esp), %edx
 ; X86-BMI2-NEXT:    shrdl %cl, %edx, %eax
@@ -1423,7 +1423,7 @@ define i64 @bextr64_a5_skipextrauses(i64 %val, i64 %numskipbits, i64 %numlowbits
 ; X86-NOBMI-NEXT:    pushl %edi
 ; X86-NOBMI-NEXT:    pushl %esi
 ; X86-NOBMI-NEXT:    subl $12, %esp
-; X86-NOBMI-NEXT:    movzbl {{[0-9]+}}(%esp), %edx
+; X86-NOBMI-NEXT:    movb {{[0-9]+}}(%esp), %dl
 ; X86-NOBMI-NEXT:    movl {{[0-9]+}}(%esp), %ebx
 ; X86-NOBMI-NEXT:    movl {{[0-9]+}}(%esp), %esi
 ; X86-NOBMI-NEXT:    movl {{[0-9]+}}(%esp), %eax
@@ -1473,7 +1473,7 @@ define i64 @bextr64_a5_skipextrauses(i64 %val, i64 %numskipbits, i64 %numlowbits
 ; X86-BMI1-NEXT:    pushl %edi
 ; X86-BMI1-NEXT:    pushl %esi
 ; X86-BMI1-NEXT:    subl $12, %esp
-; X86-BMI1-NEXT:    movzbl {{[0-9]+}}(%esp), %edx
+; X86-BMI1-NEXT:    movb {{[0-9]+}}(%esp), %dl
 ; X86-BMI1-NEXT:    movl {{[0-9]+}}(%esp), %ebx
 ; X86-BMI1-NEXT:    movl {{[0-9]+}}(%esp), %esi
 ; X86-BMI1-NEXT:    movl {{[0-9]+}}(%esp), %eax
@@ -1523,7 +1523,7 @@ define i64 @bextr64_a5_skipextrauses(i64 %val, i64 %numskipbits, i64 %numlowbits
 ; X86-BMI2-NEXT:    pushl %edi
 ; X86-BMI2-NEXT:    pushl %esi
 ; X86-BMI2-NEXT:    subl $12, %esp
-; X86-BMI2-NEXT:    movzbl {{[0-9]+}}(%esp), %edx
+; X86-BMI2-NEXT:    movb {{[0-9]+}}(%esp), %dl
 ; X86-BMI2-NEXT:    movl {{[0-9]+}}(%esp), %ebx
 ; X86-BMI2-NEXT:    movl {{[0-9]+}}(%esp), %esi
 ; X86-BMI2-NEXT:    movl {{[0-9]+}}(%esp), %eax
@@ -1620,8 +1620,8 @@ define i32 @bextr64_32_a0(i64 %val, i64 %numskipbits, i64 %numlowbits) nounwind
 ; X86-NOBMI:       # %bb.0:
 ; X86-NOBMI-NEXT:    pushl %edi
 ; X86-NOBMI-NEXT:    pushl %esi
-; X86-NOBMI-NEXT:    movzbl {{[0-9]+}}(%esp), %edx
-; X86-NOBMI-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-NOBMI-NEXT:    movb {{[0-9]+}}(%esp), %dl
+; X86-NOBMI-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; X86-NOBMI-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; X86-NOBMI-NEXT:    movl {{[0-9]+}}(%esp), %edi
 ; X86-NOBMI-NEXT:    movl %edi, %esi
@@ -1651,8 +1651,8 @@ define i32 @bextr64_32_a0(i64 %val, i64 %numskipbits, i64 %numlowbits) nounwind
 ; X86-BMI1:       # %bb.0:
 ; X86-BMI1-NEXT:    pushl %edi
 ; X86-BMI1-NEXT:    pushl %esi
-; X86-BMI1-NEXT:    movzbl {{[0-9]+}}(%esp), %edx
-; X86-BMI1-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-BMI1-NEXT:    movb {{[0-9]+}}(%esp), %dl
+; X86-BMI1-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; X86-BMI1-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; X86-BMI1-NEXT:    movl {{[0-9]+}}(%esp), %edi
 ; X86-BMI1-NEXT:    movl %edi, %esi
@@ -1681,8 +1681,8 @@ define i32 @bextr64_32_a0(i64 %val, i64 %numskipbits, i64 %numlowbits) nounwind
 ; X86-BMI2-LABEL: bextr64_32_a0:
 ; X86-BMI2:       # %bb.0:
 ; X86-BMI2-NEXT:    pushl %ebx
-; X86-BMI2-NEXT:    movzbl {{[0-9]+}}(%esp), %ebx
-; X86-BMI2-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-BMI2-NEXT:    movb {{[0-9]+}}(%esp), %bl
+; X86-BMI2-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; X86-BMI2-NEXT:    movl {{[0-9]+}}(%esp), %edx
 ; X86-BMI2-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; X86-BMI2-NEXT:    shrdl %cl, %eax, %edx
@@ -1744,8 +1744,8 @@ define i32 @bextr64_32_a1(i64 %val, i64 %numskipbits, i32 %numlowbits) nounwind
 ; X86-NOBMI:       # %bb.0:
 ; X86-NOBMI-NEXT:    pushl %edi
 ; X86-NOBMI-NEXT:    pushl %esi
-; X86-NOBMI-NEXT:    movzbl {{[0-9]+}}(%esp), %edx
-; X86-NOBMI-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-NOBMI-NEXT:    movb {{[0-9]+}}(%esp), %dl
+; X86-NOBMI-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; X86-NOBMI-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; X86-NOBMI-NEXT:    movl {{[0-9]+}}(%esp), %edi
 ; X86-NOBMI-NEXT:    movl %edi, %esi
@@ -1769,8 +1769,8 @@ define i32 @bextr64_32_a1(i64 %val, i64 %numskipbits, i32 %numlowbits) nounwind
 ; X86-BMI1:       # %bb.0:
 ; X86-BMI1-NEXT:    pushl %edi
 ; X86-BMI1-NEXT:    pushl %esi
-; X86-BMI1-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
-; X86-BMI1-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-BMI1-NEXT:    movb {{[0-9]+}}(%esp), %al
+; X86-BMI1-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; X86-BMI1-NEXT:    movl {{[0-9]+}}(%esp), %esi
 ; X86-BMI1-NEXT:    movl {{[0-9]+}}(%esp), %edi
 ; X86-BMI1-NEXT:    movl %edi, %edx
@@ -1790,8 +1790,8 @@ define i32 @bextr64_32_a1(i64 %val, i64 %numskipbits, i32 %numlowbits) nounwind
 ; X86-BMI2-LABEL: bextr64_32_a1:
 ; X86-BMI2:       # %bb.0:
 ; X86-BMI2-NEXT:    pushl %esi
-; X86-BMI2-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
-; X86-BMI2-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-BMI2-NEXT:    movb {{[0-9]+}}(%esp), %al
+; X86-BMI2-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; X86-BMI2-NEXT:    movl {{[0-9]+}}(%esp), %edx
 ; X86-BMI2-NEXT:    movl {{[0-9]+}}(%esp), %esi
 ; X86-BMI2-NEXT:    shrdl %cl, %esi, %edx
@@ -1846,8 +1846,8 @@ define i32 @bextr64_32_a1_trunc_extrause(i64 %val, i64 %numskipbits, i32 %numlow
 ; X86-NOBMI-NEXT:    pushl %ebx
 ; X86-NOBMI-NEXT:    pushl %esi
 ; X86-NOBMI-NEXT:    pushl %eax
-; X86-NOBMI-NEXT:    movzbl {{[0-9]+}}(%esp), %ebx
-; X86-NOBMI-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-NOBMI-NEXT:    movb {{[0-9]+}}(%esp), %bl
+; X86-NOBMI-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; X86-NOBMI-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; X86-NOBMI-NEXT:    movl {{[0-9]+}}(%esp), %edx
 ; X86-NOBMI-NEXT:    movl %edx, %esi
@@ -1875,8 +1875,8 @@ define i32 @bextr64_32_a1_trunc_extrause(i64 %val, i64 %numskipbits, i32 %numlow
 ; X86-BMI1-NEXT:    pushl %ebx
 ; X86-BMI1-NEXT:    pushl %esi
 ; X86-BMI1-NEXT:    pushl %eax
-; X86-BMI1-NEXT:    movzbl {{[0-9]+}}(%esp), %ebx
-; X86-BMI1-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-BMI1-NEXT:    movb {{[0-9]+}}(%esp), %bl
+; X86-BMI1-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; X86-BMI1-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; X86-BMI1-NEXT:    movl {{[0-9]+}}(%esp), %edx
 ; X86-BMI1-NEXT:    movl %edx, %esi
@@ -1901,8 +1901,8 @@ define i32 @bextr64_32_a1_trunc_extrause(i64 %val, i64 %numskipbits, i32 %numlow
 ; X86-BMI2-NEXT:    pushl %ebx
 ; X86-BMI2-NEXT:    pushl %esi
 ; X86-BMI2-NEXT:    pushl %eax
-; X86-BMI2-NEXT:    movzbl {{[0-9]+}}(%esp), %ebx
-; X86-BMI2-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-BMI2-NEXT:    movb {{[0-9]+}}(%esp), %bl
+; X86-BMI2-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; X86-BMI2-NEXT:    movl {{[0-9]+}}(%esp), %esi
 ; X86-BMI2-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; X86-BMI2-NEXT:    shrdl %cl, %eax, %esi
@@ -1990,8 +1990,8 @@ define i32 @bextr64_32_a2(i64 %val, i64 %numskipbits, i32 %numlowbits) nounwind
 ; X86-NOBMI:       # %bb.0:
 ; X86-NOBMI-NEXT:    pushl %edi
 ; X86-NOBMI-NEXT:    pushl %esi
-; X86-NOBMI-NEXT:    movzbl {{[0-9]+}}(%esp), %edx
-; X86-NOBMI-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-NOBMI-NEXT:    movb {{[0-9]+}}(%esp), %dl
+; X86-NOBMI-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; X86-NOBMI-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; X86-NOBMI-NEXT:    movl {{[0-9]+}}(%esp), %edi
 ; X86-NOBMI-NEXT:    movl %edi, %esi
@@ -2015,8 +2015,8 @@ define i32 @bextr64_32_a2(i64 %val, i64 %numskipbits, i32 %numlowbits) nounwind
 ; X86-BMI1:       # %bb.0:
 ; X86-BMI1-NEXT:    pushl %edi
 ; X86-BMI1-NEXT:    pushl %esi
-; X86-BMI1-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
-; X86-BMI1-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-BMI1-NEXT:    movb {{[0-9]+}}(%esp), %al
+; X86-BMI1-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; X86-BMI1-NEXT:    movl {{[0-9]+}}(%esp), %esi
 ; X86-BMI1-NEXT:    movl {{[0-9]+}}(%esp), %edi
 ; X86-BMI1-NEXT:    movl %edi, %edx
@@ -2036,8 +2036,8 @@ define i32 @bextr64_32_a2(i64 %val, i64 %numskipbits, i32 %numlowbits) nounwind
 ; X86-BMI2-LABEL: bextr64_32_a2:
 ; X86-BMI2:       # %bb.0:
 ; X86-BMI2-NEXT:    pushl %esi
-; X86-BMI2-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
-; X86-BMI2-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-BMI2-NEXT:    movb {{[0-9]+}}(%esp), %al
+; X86-BMI2-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; X86-BMI2-NEXT:    movl {{[0-9]+}}(%esp), %edx
 ; X86-BMI2-NEXT:    movl {{[0-9]+}}(%esp), %esi
 ; X86-BMI2-NEXT:    shrdl %cl, %esi, %edx
@@ -2092,8 +2092,8 @@ define i32 @bextr64_32_a3(i64 %val, i64 %numskipbits, i64 %numlowbits) nounwind
 ; X86-NOBMI:       # %bb.0:
 ; X86-NOBMI-NEXT:    pushl %edi
 ; X86-NOBMI-NEXT:    pushl %esi
-; X86-NOBMI-NEXT:    movzbl {{[0-9]+}}(%esp), %edx
-; X86-NOBMI-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-NOBMI-NEXT:    movb {{[0-9]+}}(%esp), %dl
+; X86-NOBMI-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; X86-NOBMI-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; X86-NOBMI-NEXT:    movl {{[0-9]+}}(%esp), %edi
 ; X86-NOBMI-NEXT:    movl %edi, %esi
@@ -2123,8 +2123,8 @@ define i32 @bextr64_32_a3(i64 %val, i64 %numskipbits, i64 %numlowbits) nounwind
 ; X86-BMI1:       # %bb.0:
 ; X86-BMI1-NEXT:    pushl %edi
 ; X86-BMI1-NEXT:    pushl %esi
-; X86-BMI1-NEXT:    movzbl {{[0-9]+}}(%esp), %edx
-; X86-BMI1-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-BMI1-NEXT:    movb {{[0-9]+}}(%esp), %dl
+; X86-BMI1-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; X86-BMI1-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; X86-BMI1-NEXT:    movl {{[0-9]+}}(%esp), %edi
 ; X86-BMI1-NEXT:    movl %edi, %esi
@@ -2153,8 +2153,8 @@ define i32 @bextr64_32_a3(i64 %val, i64 %numskipbits, i64 %numlowbits) nounwind
 ; X86-BMI2-LABEL: bextr64_32_a3:
 ; X86-BMI2:       # %bb.0:
 ; X86-BMI2-NEXT:    pushl %ebx
-; X86-BMI2-NEXT:    movzbl {{[0-9]+}}(%esp), %ebx
-; X86-BMI2-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-BMI2-NEXT:    movb {{[0-9]+}}(%esp), %bl
+; X86-BMI2-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; X86-BMI2-NEXT:    movl {{[0-9]+}}(%esp), %edx
 ; X86-BMI2-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; X86-BMI2-NEXT:    shrdl %cl, %eax, %edx
@@ -2218,8 +2218,8 @@ define i32 @bextr32_b0(i32 %val, i32 %numskipbits, i32 %numlowbits) nounwind {
 ; X86-NOBMI-LABEL: bextr32_b0:
 ; X86-NOBMI:       # %bb.0:
 ; X86-NOBMI-NEXT:    pushl %esi
-; X86-NOBMI-NEXT:    movzbl {{[0-9]+}}(%esp), %edx
-; X86-NOBMI-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-NOBMI-NEXT:    movb {{[0-9]+}}(%esp), %dl
+; X86-NOBMI-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; X86-NOBMI-NEXT:    movl {{[0-9]+}}(%esp), %esi
 ; X86-NOBMI-NEXT:    shrl %cl, %esi
 ; X86-NOBMI-NEXT:    movl $-1, %eax
@@ -2232,7 +2232,7 @@ define i32 @bextr32_b0(i32 %val, i32 %numskipbits, i32 %numlowbits) nounwind {
 ;
 ; X86-BMI1-LABEL: bextr32_b0:
 ; X86-BMI1:       # %bb.0:
-; X86-BMI1-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-BMI1-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-BMI1-NEXT:    shll $8, %eax
 ; X86-BMI1-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
 ; X86-BMI1-NEXT:    orl %eax, %ecx
@@ -2241,8 +2241,8 @@ define i32 @bextr32_b0(i32 %val, i32 %numskipbits, i32 %numlowbits) nounwind {
 ;
 ; X86-BMI2-LABEL: bextr32_b0:
 ; X86-BMI2:       # %bb.0:
-; X86-BMI2-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
-; X86-BMI2-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-BMI2-NEXT:    movb {{[0-9]+}}(%esp), %al
+; X86-BMI2-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; X86-BMI2-NEXT:    shrxl %ecx, {{[0-9]+}}(%esp), %ecx
 ; X86-BMI2-NEXT:    bzhil %eax, %ecx, %eax
 ; X86-BMI2-NEXT:    retl
@@ -2283,8 +2283,8 @@ define i32 @bextr32_b1_indexzext(i32 %val, i8 zeroext %numskipbits, i8 zeroext %
 ; X86-NOBMI-LABEL: bextr32_b1_indexzext:
 ; X86-NOBMI:       # %bb.0:
 ; X86-NOBMI-NEXT:    pushl %esi
-; X86-NOBMI-NEXT:    movzbl {{[0-9]+}}(%esp), %edx
-; X86-NOBMI-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-NOBMI-NEXT:    movb {{[0-9]+}}(%esp), %dl
+; X86-NOBMI-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; X86-NOBMI-NEXT:    movl {{[0-9]+}}(%esp), %esi
 ; X86-NOBMI-NEXT:    shrl %cl, %esi
 ; X86-NOBMI-NEXT:    movl $-1, %eax
@@ -2297,7 +2297,7 @@ define i32 @bextr32_b1_indexzext(i32 %val, i8 zeroext %numskipbits, i8 zeroext %
 ;
 ; X86-BMI1-LABEL: bextr32_b1_indexzext:
 ; X86-BMI1:       # %bb.0:
-; X86-BMI1-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-BMI1-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-BMI1-NEXT:    shll $8, %eax
 ; X86-BMI1-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
 ; X86-BMI1-NEXT:    orl %eax, %ecx
@@ -2306,8 +2306,8 @@ define i32 @bextr32_b1_indexzext(i32 %val, i8 zeroext %numskipbits, i8 zeroext %
 ;
 ; X86-BMI2-LABEL: bextr32_b1_indexzext:
 ; X86-BMI2:       # %bb.0:
-; X86-BMI2-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
-; X86-BMI2-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-BMI2-NEXT:    movb {{[0-9]+}}(%esp), %al
+; X86-BMI2-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; X86-BMI2-NEXT:    shrxl %ecx, {{[0-9]+}}(%esp), %ecx
 ; X86-BMI2-NEXT:    bzhil %eax, %ecx, %eax
 ; X86-BMI2-NEXT:    retl
@@ -2350,8 +2350,8 @@ define i32 @bextr32_b2_load(ptr %w, i32 %numskipbits, i32 %numlowbits) nounwind
 ; X86-NOBMI-LABEL: bextr32_b2_load:
 ; X86-NOBMI:       # %bb.0:
 ; X86-NOBMI-NEXT:    pushl %esi
-; X86-NOBMI-NEXT:    movzbl {{[0-9]+}}(%esp), %edx
-; X86-NOBMI-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-NOBMI-NEXT:    movb {{[0-9]+}}(%esp), %dl
+; X86-NOBMI-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; X86-NOBMI-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; X86-NOBMI-NEXT:    movl (%eax), %esi
 ; X86-NOBMI-NEXT:    shrl %cl, %esi
@@ -2366,7 +2366,7 @@ define i32 @bextr32_b2_load(ptr %w, i32 %numskipbits, i32 %numlowbits) nounwind
 ; X86-BMI1-LABEL: bextr32_b2_load:
 ; X86-BMI1:       # %bb.0:
 ; X86-BMI1-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X86-BMI1-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-BMI1-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; X86-BMI1-NEXT:    shll $8, %ecx
 ; X86-BMI1-NEXT:    movzbl {{[0-9]+}}(%esp), %edx
 ; X86-BMI1-NEXT:    orl %ecx, %edx
@@ -2375,9 +2375,9 @@ define i32 @bextr32_b2_load(ptr %w, i32 %numskipbits, i32 %numlowbits) nounwind
 ;
 ; X86-BMI2-LABEL: bextr32_b2_load:
 ; X86-BMI2:       # %bb.0:
-; X86-BMI2-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-BMI2-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-BMI2-NEXT:    movl {{[0-9]+}}(%esp), %ecx
-; X86-BMI2-NEXT:    movzbl {{[0-9]+}}(%esp), %edx
+; X86-BMI2-NEXT:    movb {{[0-9]+}}(%esp), %dl
 ; X86-BMI2-NEXT:    shrxl %edx, (%ecx), %ecx
 ; X86-BMI2-NEXT:    bzhil %eax, %ecx, %eax
 ; X86-BMI2-NEXT:    retl
@@ -2420,8 +2420,8 @@ define i32 @bextr32_b3_load_indexzext(ptr %w, i8 zeroext %numskipbits, i8 zeroex
 ; X86-NOBMI-LABEL: bextr32_b3_load_indexzext:
 ; X86-NOBMI:       # %bb.0:
 ; X86-NOBMI-NEXT:    pushl %esi
-; X86-NOBMI-NEXT:    movzbl {{[0-9]+}}(%esp), %edx
-; X86-NOBMI-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-NOBMI-NEXT:    movb {{[0-9]+}}(%esp), %dl
+; X86-NOBMI-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; X86-NOBMI-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; X86-NOBMI-NEXT:    movl (%eax), %esi
 ; X86-NOBMI-NEXT:    shrl %cl, %esi
@@ -2436,7 +2436,7 @@ define i32 @bextr32_b3_load_indexzext(ptr %w, i8 zeroext %numskipbits, i8 zeroex
 ; X86-BMI1-LABEL: bextr32_b3_load_indexzext:
 ; X86-BMI1:       # %bb.0:
 ; X86-BMI1-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X86-BMI1-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-BMI1-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; X86-BMI1-NEXT:    shll $8, %ecx
 ; X86-BMI1-NEXT:    movzbl {{[0-9]+}}(%esp), %edx
 ; X86-BMI1-NEXT:    orl %ecx, %edx
@@ -2445,9 +2445,9 @@ define i32 @bextr32_b3_load_indexzext(ptr %w, i8 zeroext %numskipbits, i8 zeroex
 ;
 ; X86-BMI2-LABEL: bextr32_b3_load_indexzext:
 ; X86-BMI2:       # %bb.0:
-; X86-BMI2-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-BMI2-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-BMI2-NEXT:    movl {{[0-9]+}}(%esp), %ecx
-; X86-BMI2-NEXT:    movzbl {{[0-9]+}}(%esp), %edx
+; X86-BMI2-NEXT:    movb {{[0-9]+}}(%esp), %dl
 ; X86-BMI2-NEXT:    shrxl %edx, (%ecx), %ecx
 ; X86-BMI2-NEXT:    bzhil %eax, %ecx, %eax
 ; X86-BMI2-NEXT:    retl
@@ -2492,8 +2492,8 @@ define i32 @bextr32_b4_commutative(i32 %val, i32 %numskipbits, i32 %numlowbits)
 ; X86-NOBMI-LABEL: bextr32_b4_commutative:
 ; X86-NOBMI:       # %bb.0:
 ; X86-NOBMI-NEXT:    pushl %esi
-; X86-NOBMI-NEXT:    movzbl {{[0-9]+}}(%esp), %edx
-; X86-NOBMI-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-NOBMI-NEXT:    movb {{[0-9]+}}(%esp), %dl
+; X86-NOBMI-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; X86-NOBMI-NEXT:    movl {{[0-9]+}}(%esp), %esi
 ; X86-NOBMI-NEXT:    shrl %cl, %esi
 ; X86-NOBMI-NEXT:    movl $-1, %eax
@@ -2506,7 +2506,7 @@ define i32 @bextr32_b4_commutative(i32 %val, i32 %numskipbits, i32 %numlowbits)
 ;
 ; X86-BMI1-LABEL: bextr32_b4_commutative:
 ; X86-BMI1:       # %bb.0:
-; X86-BMI1-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-BMI1-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-BMI1-NEXT:    shll $8, %eax
 ; X86-BMI1-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
 ; X86-BMI1-NEXT:    orl %eax, %ecx
@@ -2515,8 +2515,8 @@ define i32 @bextr32_b4_commutative(i32 %val, i32 %numskipbits, i32 %numlowbits)
 ;
 ; X86-BMI2-LABEL: bextr32_b4_commutative:
 ; X86-BMI2:       # %bb.0:
-; X86-BMI2-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
-; X86-BMI2-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-BMI2-NEXT:    movb {{[0-9]+}}(%esp), %al
+; X86-BMI2-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; X86-BMI2-NEXT:    shrxl %ecx, {{[0-9]+}}(%esp), %ecx
 ; X86-BMI2-NEXT:    bzhil %eax, %ecx, %eax
 ; X86-BMI2-NEXT:    retl
@@ -2559,7 +2559,7 @@ define i32 @bextr32_b5_skipextrauses(i32 %val, i32 %numskipbits, i32 %numlowbits
 ; X86-NOBMI-NEXT:    pushl %edi
 ; X86-NOBMI-NEXT:    pushl %esi
 ; X86-NOBMI-NEXT:    pushl %eax
-; X86-NOBMI-NEXT:    movzbl {{[0-9]+}}(%esp), %edx
+; X86-NOBMI-NEXT:    movb {{[0-9]+}}(%esp), %dl
 ; X86-NOBMI-NEXT:    movl {{[0-9]+}}(%esp), %edi
 ; X86-NOBMI-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; X86-NOBMI-NEXT:    movl %eax, %ecx
@@ -2581,7 +2581,7 @@ define i32 @bextr32_b5_skipextrauses(i32 %val, i32 %numskipbits, i32 %numlowbits
 ; X86-BMI1:       # %bb.0:
 ; X86-BMI1-NEXT:    pushl %esi
 ; X86-BMI1-NEXT:    subl $8, %esp
-; X86-BMI1-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-BMI1-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; X86-BMI1-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; X86-BMI1-NEXT:    shll $8, %ecx
 ; X86-BMI1-NEXT:    movzbl %al, %edx
@@ -2598,7 +2598,7 @@ define i32 @bextr32_b5_skipextrauses(i32 %val, i32 %numskipbits, i32 %numlowbits
 ; X86-BMI2:       # %bb.0:
 ; X86-BMI2-NEXT:    pushl %esi
 ; X86-BMI2-NEXT:    subl $8, %esp
-; X86-BMI2-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-BMI2-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-BMI2-NEXT:    movl {{[0-9]+}}(%esp), %ecx
 ; X86-BMI2-NEXT:    shrxl %ecx, {{[0-9]+}}(%esp), %edx
 ; X86-BMI2-NEXT:    bzhil %eax, %edx, %esi
@@ -2704,8 +2704,8 @@ define i64 @bextr64_b0(i64 %val, i64 %numskipbits, i64 %numlowbits) nounwind {
 ; X86-BMI1-NEXT:    pushl %ebx
 ; X86-BMI1-NEXT:    pushl %edi
 ; X86-BMI1-NEXT:    pushl %esi
-; X86-BMI1-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
-; X86-BMI1-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-BMI1-NEXT:    movb {{[0-9]+}}(%esp), %al
+; X86-BMI1-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; X86-BMI1-NEXT:    movl {{[0-9]+}}(%esp), %esi
 ; X86-BMI1-NEXT:    movl {{[0-9]+}}(%esp), %edi
 ; X86-BMI1-NEXT:    movl %edi, %edx
@@ -2738,8 +2738,8 @@ define i64 @bextr64_b0(i64 %val, i64 %numskipbits, i64 %numlowbits) nounwind {
 ; X86-BMI2:       # %bb.0:
 ; X86-BMI2-NEXT:    pushl %ebx
 ; X86-BMI2-NEXT:    pushl %esi
-; X86-BMI2-NEXT:    movzbl {{[0-9]+}}(%esp), %ebx
-; X86-BMI2-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-BMI2-NEXT:    movb {{[0-9]+}}(%esp), %bl
+; X86-BMI2-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; X86-BMI2-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; X86-BMI2-NEXT:    movl {{[0-9]+}}(%esp), %edx
 ; X86-BMI2-NEXT:    shrdl %cl, %edx, %eax
@@ -2842,8 +2842,8 @@ define i64 @bextr64_b1_indexzext(i64 %val, i8 zeroext %numskipbits, i8 zeroext %
 ; X86-BMI1-NEXT:    pushl %ebx
 ; X86-BMI1-NEXT:    pushl %edi
 ; X86-BMI1-NEXT:    pushl %esi
-; X86-BMI1-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
-; X86-BMI1-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-BMI1-NEXT:    movb {{[0-9]+}}(%esp), %al
+; X86-BMI1-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; X86-BMI1-NEXT:    movl {{[0-9]+}}(%esp), %esi
 ; X86-BMI1-NEXT:    movl {{[0-9]+}}(%esp), %edi
 ; X86-BMI1-NEXT:    movl %edi, %edx
@@ -2876,8 +2876,8 @@ define i64 @bextr64_b1_indexzext(i64 %val, i8 zeroext %numskipbits, i8 zeroext %
 ; X86-BMI2:       # %bb.0:
 ; X86-BMI2-NEXT:    pushl %ebx
 ; X86-BMI2-NEXT:    pushl %esi
-; X86-BMI2-NEXT:    movzbl {{[0-9]+}}(%esp), %ebx
-; X86-BMI2-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-BMI2-NEXT:    movb {{[0-9]+}}(%esp), %bl
+; X86-BMI2-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; X86-BMI2-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; X86-BMI2-NEXT:    movl {{[0-9]+}}(%esp), %edx
 ; X86-BMI2-NEXT:    shrdl %cl, %edx, %eax
@@ -2985,8 +2985,8 @@ define i64 @bextr64_b2_load(ptr %w, i64 %numskipbits, i64 %numlowbits) nounwind
 ; X86-BMI1-NEXT:    pushl %ebx
 ; X86-BMI1-NEXT:    pushl %edi
 ; X86-BMI1-NEXT:    pushl %esi
-; X86-BMI1-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
-; X86-BMI1-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-BMI1-NEXT:    movb {{[0-9]+}}(%esp), %al
+; X86-BMI1-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; X86-BMI1-NEXT:    movl {{[0-9]+}}(%esp), %edx
 ; X86-BMI1-NEXT:    movl (%edx), %esi
 ; X86-BMI1-NEXT:    movl 4(%edx), %edi
@@ -3020,8 +3020,8 @@ define i64 @bextr64_b2_load(ptr %w, i64 %numskipbits, i64 %numlowbits) nounwind
 ; X86-BMI2:       # %bb.0:
 ; X86-BMI2-NEXT:    pushl %ebx
 ; X86-BMI2-NEXT:    pushl %esi
-; X86-BMI2-NEXT:    movzbl {{[0-9]+}}(%esp), %ebx
-; X86-BMI2-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-BMI2-NEXT:    movb {{[0-9]+}}(%esp), %bl
+; X86-BMI2-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; X86-BMI2-NEXT:    movl {{[0-9]+}}(%esp), %edx
 ; X86-BMI2-NEXT:    movl (%edx), %eax
 ; X86-BMI2-NEXT:    movl 4(%edx), %esi
@@ -3128,8 +3128,8 @@ define i64 @bextr64_b3_load_indexzext(ptr %w, i8 zeroext %numskipbits, i8 zeroex
 ; X86-BMI1-NEXT:    pushl %ebx
 ; X86-BMI1-NEXT:    pushl %edi
 ; X86-BMI1-NEXT:    pushl %esi
-; X86-BMI1-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
-; X86-BMI1-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-BMI1-NEXT:    movb {{[0-9]+}}(%esp), %al
+; X86-BMI1-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; X86-BMI1-NEXT:    movl {{[0-9]+}}(%esp), %edx
 ; X86-BMI1-NEXT:    movl (%edx), %esi
 ; X86-BMI1-NEXT:    movl 4(%edx), %edi
@@ -3163,8 +3163,8 @@ define i64 @bextr64_b3_load_indexzext(ptr %w, i8 zeroext %numskipbits, i8 zeroex
 ; X86-BMI2:       # %bb.0:
 ; X86-BMI2-NEXT:    pushl %ebx
 ; X86-BMI2-NEXT:    pushl %esi
-; X86-BMI2-NEXT:    movzbl {{[0-9]+}}(%esp), %ebx
-; X86-BMI2-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-BMI2-NEXT:    movb {{[0-9]+}}(%esp), %bl
+; X86-BMI2-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; X86-BMI2-NEXT:    movl {{[0-9]+}}(%esp), %edx
 ; X86-BMI2-NEXT:    movl (%edx), %eax
 ; X86-BMI2-NEXT:    movl 4(%edx), %esi
@@ -3274,8 +3274,8 @@ define i64 @bextr64_b4_commutative(i64 %val, i64 %numskipbits, i64 %numlowbits)
 ; X86-BMI1-NEXT:    pushl %ebx
 ; X86-BMI1-NEXT:    pushl %edi
 ; X86-BMI1-NEXT:    pushl %esi
-; X86-BMI1-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
-; X86-BMI1-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-BMI1-NEXT:    movb {{[0-9]+}}(%esp), %al
+; X86-BMI1-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; X86-BMI1-NEXT:    movl {{[0-9]+}}(%esp), %esi
 ; X86-BMI1-NEXT:    movl {{[0-9]+}}(%esp), %edi
 ; X86-BMI1-NEXT:    movl %edi, %edx
@@ -3308,8 +3308,8 @@ define i64 @bextr64_b4_commutative(i64 %val, i64 %numskipbits, i64 %numlowbits)
 ; X86-BMI2:       # %bb.0:
 ; X86-BMI2-NEXT:    pushl %ebx
 ; X86-BMI2-NEXT:    pushl %esi
-; X86-BMI2-NEXT:    movzbl {{[0-9]+}}(%esp), %ebx
-; X86-BMI2-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-BMI2-NEXT:    movb {{[0-9]+}}(%esp), %bl
+; X86-BMI2-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; X86-BMI2-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; X86-BMI2-NEXT:    movl {{[0-9]+}}(%esp), %edx
 ; X86-BMI2-NEXT:    shrdl %cl, %edx, %eax
@@ -3426,7 +3426,7 @@ define i64 @bextr64_b5_skipextrauses(i64 %val, i64 %numskipbits, i64 %numlowbits
 ; X86-BMI1-NEXT:    pushl %edi
 ; X86-BMI1-NEXT:    pushl %esi
 ; X86-BMI1-NEXT:    subl $12, %esp
-; X86-BMI1-NEXT:    movzbl {{[0-9]+}}(%esp), %edx
+; X86-BMI1-NEXT:    movb {{[0-9]+}}(%esp), %dl
 ; X86-BMI1-NEXT:    movl {{[0-9]+}}(%esp), %edi
 ; X86-BMI1-NEXT:    movl {{[0-9]+}}(%esp), %ebx
 ; X86-BMI1-NEXT:    movl {{[0-9]+}}(%esp), %eax
@@ -3473,7 +3473,7 @@ define i64 @bextr64_b5_skipextrauses(i64 %val, i64 %numskipbits, i64 %numlowbits
 ; X86-BMI2-NEXT:    pushl %edi
 ; X86-BMI2-NEXT:    pushl %esi
 ; X86-BMI2-NEXT:    subl $12, %esp
-; X86-BMI2-NEXT:    movzbl {{[0-9]+}}(%esp), %ebx
+; X86-BMI2-NEXT:    movb {{[0-9]+}}(%esp), %bl
 ; X86-BMI2-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; X86-BMI2-NEXT:    movl {{[0-9]+}}(%esp), %edx
 ; X86-BMI2-NEXT:    movl {{[0-9]+}}(%esp), %ecx
@@ -3565,8 +3565,8 @@ define i32 @bextr64_32_b0(i64 %val, i64 %numskipbits, i8 %numlowbits) nounwind {
 ; X86-NOBMI:       # %bb.0:
 ; X86-NOBMI-NEXT:    pushl %edi
 ; X86-NOBMI-NEXT:    pushl %esi
-; X86-NOBMI-NEXT:    movzbl {{[0-9]+}}(%esp), %edx
-; X86-NOBMI-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-NOBMI-NEXT:    movb {{[0-9]+}}(%esp), %dl
+; X86-NOBMI-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; X86-NOBMI-NEXT:    movl {{[0-9]+}}(%esp), %esi
 ; X86-NOBMI-NEXT:    movl {{[0-9]+}}(%esp), %edi
 ; X86-NOBMI-NEXT:    movl %edi, %eax
@@ -3596,8 +3596,8 @@ define i32 @bextr64_32_b0(i64 %val, i64 %numskipbits, i8 %numlowbits) nounwind {
 ; X86-BMI1:       # %bb.0:
 ; X86-BMI1-NEXT:    pushl %edi
 ; X86-BMI1-NEXT:    pushl %esi
-; X86-BMI1-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
-; X86-BMI1-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-BMI1-NEXT:    movb {{[0-9]+}}(%esp), %al
+; X86-BMI1-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; X86-BMI1-NEXT:    movl {{[0-9]+}}(%esp), %esi
 ; X86-BMI1-NEXT:    movl {{[0-9]+}}(%esp), %edi
 ; X86-BMI1-NEXT:    movl %edi, %edx
@@ -3625,8 +3625,8 @@ define i32 @bextr64_32_b0(i64 %val, i64 %numskipbits, i8 %numlowbits) nounwind {
 ; X86-BMI2-LABEL: bextr64_32_b0:
 ; X86-BMI2:       # %bb.0:
 ; X86-BMI2-NEXT:    pushl %esi
-; X86-BMI2-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
-; X86-BMI2-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-BMI2-NEXT:    movb {{[0-9]+}}(%esp), %al
+; X86-BMI2-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; X86-BMI2-NEXT:    movl {{[0-9]+}}(%esp), %edx
 ; X86-BMI2-NEXT:    movl {{[0-9]+}}(%esp), %esi
 ; X86-BMI2-NEXT:    shrdl %cl, %esi, %edx
@@ -3688,8 +3688,8 @@ define i32 @bextr64_32_b1(i64 %val, i64 %numskipbits, i8 %numlowbits) nounwind {
 ; X86-NOBMI:       # %bb.0:
 ; X86-NOBMI-NEXT:    pushl %edi
 ; X86-NOBMI-NEXT:    pushl %esi
-; X86-NOBMI-NEXT:    movzbl {{[0-9]+}}(%esp), %edx
-; X86-NOBMI-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-NOBMI-NEXT:    movb {{[0-9]+}}(%esp), %dl
+; X86-NOBMI-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; X86-NOBMI-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; X86-NOBMI-NEXT:    movl {{[0-9]+}}(%esp), %edi
 ; X86-NOBMI-NEXT:    movl %edi, %esi
@@ -3713,8 +3713,8 @@ define i32 @bextr64_32_b1(i64 %val, i64 %numskipbits, i8 %numlowbits) nounwind {
 ; X86-BMI1:       # %bb.0:
 ; X86-BMI1-NEXT:    pushl %edi
 ; X86-BMI1-NEXT:    pushl %esi
-; X86-BMI1-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
-; X86-BMI1-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-BMI1-NEXT:    movb {{[0-9]+}}(%esp), %al
+; X86-BMI1-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; X86-BMI1-NEXT:    movl {{[0-9]+}}(%esp), %esi
 ; X86-BMI1-NEXT:    movl {{[0-9]+}}(%esp), %edi
 ; X86-BMI1-NEXT:    movl %edi, %edx
@@ -3734,8 +3734,8 @@ define i32 @bextr64_32_b1(i64 %val, i64 %numskipbits, i8 %numlowbits) nounwind {
 ; X86-BMI2-LABEL: bextr64_32_b1:
 ; X86-BMI2:       # %bb.0:
 ; X86-BMI2-NEXT:    pushl %esi
-; X86-BMI2-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
-; X86-BMI2-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-BMI2-NEXT:    movb {{[0-9]+}}(%esp), %al
+; X86-BMI2-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; X86-BMI2-NEXT:    movl {{[0-9]+}}(%esp), %edx
 ; X86-BMI2-NEXT:    movl {{[0-9]+}}(%esp), %esi
 ; X86-BMI2-NEXT:    shrdl %cl, %esi, %edx
@@ -3790,8 +3790,8 @@ define i32 @bextr64_32_b2(i64 %val, i64 %numskipbits, i8 %numlowbits) nounwind {
 ; X86-NOBMI:       # %bb.0:
 ; X86-NOBMI-NEXT:    pushl %edi
 ; X86-NOBMI-NEXT:    pushl %esi
-; X86-NOBMI-NEXT:    movzbl {{[0-9]+}}(%esp), %edx
-; X86-NOBMI-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-NOBMI-NEXT:    movb {{[0-9]+}}(%esp), %dl
+; X86-NOBMI-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; X86-NOBMI-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; X86-NOBMI-NEXT:    movl {{[0-9]+}}(%esp), %edi
 ; X86-NOBMI-NEXT:    movl %edi, %esi
@@ -3815,8 +3815,8 @@ define i32 @bextr64_32_b2(i64 %val, i64 %numskipbits, i8 %numlowbits) nounwind {
 ; X86-BMI1:       # %bb.0:
 ; X86-BMI1-NEXT:    pushl %edi
 ; X86-BMI1-NEXT:    pushl %esi
-; X86-BMI1-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
-; X86-BMI1-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-BMI1-NEXT:    movb {{[0-9]+}}(%esp), %al
+; X86-BMI1-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; X86-BMI1-NEXT:    movl {{[0-9]+}}(%esp), %esi
 ; X86-BMI1-NEXT:    movl {{[0-9]+}}(%esp), %edi
 ; X86-BMI1-NEXT:    movl %edi, %edx
@@ -3836,8 +3836,8 @@ define i32 @bextr64_32_b2(i64 %val, i64 %numskipbits, i8 %numlowbits) nounwind {
 ; X86-BMI2-LABEL: bextr64_32_b2:
 ; X86-BMI2:       # %bb.0:
 ; X86-BMI2-NEXT:    pushl %esi
-; X86-BMI2-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
-; X86-BMI2-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-BMI2-NEXT:    movb {{[0-9]+}}(%esp), %al
+; X86-BMI2-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; X86-BMI2-NEXT:    movl {{[0-9]+}}(%esp), %edx
 ; X86-BMI2-NEXT:    movl {{[0-9]+}}(%esp), %esi
 ; X86-BMI2-NEXT:    shrdl %cl, %esi, %edx
@@ -3893,8 +3893,8 @@ define i32 @bextr64_32_b3(i64 %val, i64 %numskipbits, i8 %numlowbits) nounwind {
 ; X86-NOBMI:       # %bb.0:
 ; X86-NOBMI-NEXT:    pushl %edi
 ; X86-NOBMI-NEXT:    pushl %esi
-; X86-NOBMI-NEXT:    movzbl {{[0-9]+}}(%esp), %edx
-; X86-NOBMI-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-NOBMI-NEXT:    movb {{[0-9]+}}(%esp), %dl
+; X86-NOBMI-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; X86-NOBMI-NEXT:    movl {{[0-9]+}}(%esp), %esi
 ; X86-NOBMI-NEXT:    movl {{[0-9]+}}(%esp), %edi
 ; X86-NOBMI-NEXT:    movl %edi, %eax
@@ -3924,8 +3924,8 @@ define i32 @bextr64_32_b3(i64 %val, i64 %numskipbits, i8 %numlowbits) nounwind {
 ; X86-BMI1:       # %bb.0:
 ; X86-BMI1-NEXT:    pushl %edi
 ; X86-BMI1-NEXT:    pushl %esi
-; X86-BMI1-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
-; X86-BMI1-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-BMI1-NEXT:    movb {{[0-9]+}}(%esp), %al
+; X86-BMI1-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; X86-BMI1-NEXT:    movl {{[0-9]+}}(%esp), %esi
 ; X86-BMI1-NEXT:    movl {{[0-9]+}}(%esp), %edi
 ; X86-BMI1-NEXT:    movl %edi, %edx
@@ -3953,8 +3953,8 @@ define i32 @bextr64_32_b3(i64 %val, i64 %numskipbits, i8 %numlowbits) nounwind {
 ; X86-BMI2-LABEL: bextr64_32_b3:
 ; X86-BMI2:       # %bb.0:
 ; X86-BMI2-NEXT:    pushl %esi
-; X86-BMI2-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
-; X86-BMI2-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-BMI2-NEXT:    movb {{[0-9]+}}(%esp), %al
+; X86-BMI2-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; X86-BMI2-NEXT:    movl {{[0-9]+}}(%esp), %edx
 ; X86-BMI2-NEXT:    movl {{[0-9]+}}(%esp), %esi
 ; X86-BMI2-NEXT:    shrdl %cl, %esi, %edx
@@ -4021,7 +4021,7 @@ define i32 @bextr32_c0(i32 %val, i32 %numskipbits, i32 %numlowbits) nounwind {
 ; X86-NOBMI-NEXT:    pushl %edi
 ; X86-NOBMI-NEXT:    pushl %esi
 ; X86-NOBMI-NEXT:    pushl %eax
-; X86-NOBMI-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-NOBMI-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; X86-NOBMI-NEXT:    movl {{[0-9]+}}(%esp), %edi
 ; X86-NOBMI-NEXT:    shrl %cl, %edi
 ; X86-NOBMI-NEXT:    xorl %ecx, %ecx
@@ -4043,7 +4043,7 @@ define i32 @bextr32_c0(i32 %val, i32 %numskipbits, i32 %numlowbits) nounwind {
 ; X86-BMI1-NEXT:    pushl %edi
 ; X86-BMI1-NEXT:    pushl %esi
 ; X86-BMI1-NEXT:    pushl %eax
-; X86-BMI1-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-BMI1-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; X86-BMI1-NEXT:    movl {{[0-9]+}}(%esp), %edi
 ; X86-BMI1-NEXT:    shrl %cl, %edi
 ; X86-BMI1-NEXT:    xorl %ecx, %ecx
@@ -4065,8 +4065,8 @@ define i32 @bextr32_c0(i32 %val, i32 %numskipbits, i32 %numlowbits) nounwind {
 ; X86-BMI2-NEXT:    pushl %ebx
 ; X86-BMI2-NEXT:    pushl %esi
 ; X86-BMI2-NEXT:    pushl %eax
-; X86-BMI2-NEXT:    movzbl {{[0-9]+}}(%esp), %ebx
-; X86-BMI2-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-BMI2-NEXT:    movb {{[0-9]+}}(%esp), %bl
+; X86-BMI2-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-BMI2-NEXT:    shrxl %eax, {{[0-9]+}}(%esp), %esi
 ; X86-BMI2-NEXT:    movl %ebx, %eax
 ; X86-BMI2-NEXT:    negb %al
@@ -4155,7 +4155,7 @@ define i32 @bextr32_c1_indexzext(i32 %val, i8 %numskipbits, i8 %numlowbits) noun
 ; X86-NOBMI-NEXT:    pushl %edi
 ; X86-NOBMI-NEXT:    pushl %esi
 ; X86-NOBMI-NEXT:    pushl %eax
-; X86-NOBMI-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-NOBMI-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; X86-NOBMI-NEXT:    movl {{[0-9]+}}(%esp), %edi
 ; X86-NOBMI-NEXT:    shrl %cl, %edi
 ; X86-NOBMI-NEXT:    xorl %ecx, %ecx
@@ -4177,7 +4177,7 @@ define i32 @bextr32_c1_indexzext(i32 %val, i8 %numskipbits, i8 %numlowbits) noun
 ; X86-BMI1-NEXT:    pushl %edi
 ; X86-BMI1-NEXT:    pushl %esi
 ; X86-BMI1-NEXT:    pushl %eax
-; X86-BMI1-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-BMI1-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; X86-BMI1-NEXT:    movl {{[0-9]+}}(%esp), %edi
 ; X86-BMI1-NEXT:    shrl %cl, %edi
 ; X86-BMI1-NEXT:    xorl %ecx, %ecx
@@ -4199,8 +4199,8 @@ define i32 @bextr32_c1_indexzext(i32 %val, i8 %numskipbits, i8 %numlowbits) noun
 ; X86-BMI2-NEXT:    pushl %ebx
 ; X86-BMI2-NEXT:    pushl %esi
 ; X86-BMI2-NEXT:    pushl %eax
-; X86-BMI2-NEXT:    movzbl {{[0-9]+}}(%esp), %ebx
-; X86-BMI2-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-BMI2-NEXT:    movb {{[0-9]+}}(%esp), %bl
+; X86-BMI2-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-BMI2-NEXT:    shrxl %eax, {{[0-9]+}}(%esp), %esi
 ; X86-BMI2-NEXT:    movl %ebx, %eax
 ; X86-BMI2-NEXT:    negb %al
@@ -4291,7 +4291,7 @@ define i32 @bextr32_c2_load(ptr %w, i32 %numskipbits, i32 %numlowbits) nounwind
 ; X86-NOBMI-NEXT:    pushl %edi
 ; X86-NOBMI-NEXT:    pushl %esi
 ; X86-NOBMI-NEXT:    pushl %eax
-; X86-NOBMI-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-NOBMI-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; X86-NOBMI-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; X86-NOBMI-NEXT:    movl (%eax), %edi
 ; X86-NOBMI-NEXT:    shrl %cl, %edi
@@ -4314,7 +4314,7 @@ define i32 @bextr32_c2_load(ptr %w, i32 %numskipbits, i32 %numlowbits) nounwind
 ; X86-BMI1-NEXT:    pushl %edi
 ; X86-BMI1-NEXT:    pushl %esi
 ; X86-BMI1-NEXT:    pushl %eax
-; X86-BMI1-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-BMI1-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; X86-BMI1-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; X86-BMI1-NEXT:    movl (%eax), %edi
 ; X86-BMI1-NEXT:    shrl %cl, %edi
@@ -4337,9 +4337,9 @@ define i32 @bextr32_c2_load(ptr %w, i32 %numskipbits, i32 %numlowbits) nounwind
 ; X86-BMI2-NEXT:    pushl %ebx
 ; X86-BMI2-NEXT:    pushl %esi
 ; X86-BMI2-NEXT:    pushl %eax
-; X86-BMI2-NEXT:    movzbl {{[0-9]+}}(%esp), %ebx
+; X86-BMI2-NEXT:    movb {{[0-9]+}}(%esp), %bl
 ; X86-BMI2-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X86-BMI2-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-BMI2-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; X86-BMI2-NEXT:    shrxl %ecx, (%eax), %esi
 ; X86-BMI2-NEXT:    movl %ebx, %eax
 ; X86-BMI2-NEXT:    negb %al
@@ -4429,7 +4429,7 @@ define i32 @bextr32_c3_load_indexzext(ptr %w, i8 %numskipbits, i8 %numlowbits) n
 ; X86-NOBMI-NEXT:    pushl %edi
 ; X86-NOBMI-NEXT:    pushl %esi
 ; X86-NOBMI-NEXT:    pushl %eax
-; X86-NOBMI-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-NOBMI-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; X86-NOBMI-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; X86-NOBMI-NEXT:    movl (%eax), %edi
 ; X86-NOBMI-NEXT:    shrl %cl, %edi
@@ -4452,7 +4452,7 @@ define i32 @bextr32_c3_load_indexzext(ptr %w, i8 %numskipbits, i8 %numlowbits) n
 ; X86-BMI1-NEXT:    pushl %edi
 ; X86-BMI1-NEXT:    pushl %esi
 ; X86-BMI1-NEXT:    pushl %eax
-; X86-BMI1-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-BMI1-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; X86-BMI1-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; X86-BMI1-NEXT:    movl (%eax), %edi
 ; X86-BMI1-NEXT:    shrl %cl, %edi
@@ -4475,9 +4475,9 @@ define i32 @bextr32_c3_load_indexzext(ptr %w, i8 %numskipbits, i8 %numlowbits) n
 ; X86-BMI2-NEXT:    pushl %ebx
 ; X86-BMI2-NEXT:    pushl %esi
 ; X86-BMI2-NEXT:    pushl %eax
-; X86-BMI2-NEXT:    movzbl {{[0-9]+}}(%esp), %ebx
+; X86-BMI2-NEXT:    movb {{[0-9]+}}(%esp), %bl
 ; X86-BMI2-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X86-BMI2-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-BMI2-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; X86-BMI2-NEXT:    shrxl %ecx, (%eax), %esi
 ; X86-BMI2-NEXT:    movl %ebx, %eax
 ; X86-BMI2-NEXT:    negb %al
@@ -4569,7 +4569,7 @@ define i32 @bextr32_c4_commutative(i32 %val, i32 %numskipbits, i32 %numlowbits)
 ; X86-NOBMI-NEXT:    pushl %edi
 ; X86-NOBMI-NEXT:    pushl %esi
 ; X86-NOBMI-NEXT:    pushl %eax
-; X86-NOBMI-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-NOBMI-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; X86-NOBMI-NEXT:    movl {{[0-9]+}}(%esp), %edi
 ; X86-NOBMI-NEXT:    shrl %cl, %edi
 ; X86-NOBMI-NEXT:    xorl %ecx, %ecx
@@ -4591,7 +4591,7 @@ define i32 @bextr32_c4_commutative(i32 %val, i32 %numskipbits, i32 %numlowbits)
 ; X86-BMI1-NEXT:    pushl %edi
 ; X86-BMI1-NEXT:    pushl %esi
 ; X86-BMI1-NEXT:    pushl %eax
-; X86-BMI1-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-BMI1-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; X86-BMI1-NEXT:    movl {{[0-9]+}}(%esp), %edi
 ; X86-BMI1-NEXT:    shrl %cl, %edi
 ; X86-BMI1-NEXT:    xorl %ecx, %ecx
@@ -4613,8 +4613,8 @@ define i32 @bextr32_c4_commutative(i32 %val, i32 %numskipbits, i32 %numlowbits)
 ; X86-BMI2-NEXT:    pushl %ebx
 ; X86-BMI2-NEXT:    pushl %esi
 ; X86-BMI2-NEXT:    pushl %eax
-; X86-BMI2-NEXT:    movzbl {{[0-9]+}}(%esp), %ebx
-; X86-BMI2-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-BMI2-NEXT:    movb {{[0-9]+}}(%esp), %bl
+; X86-BMI2-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-BMI2-NEXT:    shrxl %eax, {{[0-9]+}}(%esp), %esi
 ; X86-BMI2-NEXT:    movl %ebx, %eax
 ; X86-BMI2-NEXT:    negb %al
@@ -4758,7 +4758,7 @@ define i32 @bextr32_c5_skipextrauses(i32 %val, i32 %numskipbits, i32 %numlowbits
 ; X86-BMI2-NEXT:    pushl %edi
 ; X86-BMI2-NEXT:    pushl %esi
 ; X86-BMI2-NEXT:    subl $16, %esp
-; X86-BMI2-NEXT:    movzbl {{[0-9]+}}(%esp), %ebx
+; X86-BMI2-NEXT:    movb {{[0-9]+}}(%esp), %bl
 ; X86-BMI2-NEXT:    movl {{[0-9]+}}(%esp), %edi
 ; X86-BMI2-NEXT:    shrxl %edi, {{[0-9]+}}(%esp), %esi
 ; X86-BMI2-NEXT:    movl %ebx, %eax
@@ -4865,7 +4865,7 @@ define i64 @bextr64_c0(i64 %val, i64 %numskipbits, i64 %numlowbits) nounwind {
 ; X86-NOBMI-NEXT:    pushl %edi
 ; X86-NOBMI-NEXT:    pushl %esi
 ; X86-NOBMI-NEXT:    subl $12, %esp
-; X86-NOBMI-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-NOBMI-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; X86-NOBMI-NEXT:    movl {{[0-9]+}}(%esp), %esi
 ; X86-NOBMI-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; X86-NOBMI-NEXT:    movl %eax, %edi
@@ -4911,7 +4911,7 @@ define i64 @bextr64_c0(i64 %val, i64 %numskipbits, i64 %numlowbits) nounwind {
 ; X86-BMI1-NEXT:    pushl %edi
 ; X86-BMI1-NEXT:    pushl %esi
 ; X86-BMI1-NEXT:    subl $12, %esp
-; X86-BMI1-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-BMI1-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; X86-BMI1-NEXT:    movl {{[0-9]+}}(%esp), %esi
 ; X86-BMI1-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; X86-BMI1-NEXT:    movl %eax, %edi
@@ -4957,7 +4957,7 @@ define i64 @bextr64_c0(i64 %val, i64 %numskipbits, i64 %numlowbits) nounwind {
 ; X86-BMI2-NEXT:    pushl %edi
 ; X86-BMI2-NEXT:    pushl %esi
 ; X86-BMI2-NEXT:    subl $12, %esp
-; X86-BMI2-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-BMI2-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; X86-BMI2-NEXT:    movl {{[0-9]+}}(%esp), %esi
 ; X86-BMI2-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; X86-BMI2-NEXT:    shrdl %cl, %eax, %esi
@@ -5071,7 +5071,7 @@ define i64 @bextr64_c1_indexzext(i64 %val, i8 %numskipbits, i8 %numlowbits) noun
 ; X86-NOBMI-NEXT:    pushl %edi
 ; X86-NOBMI-NEXT:    pushl %esi
 ; X86-NOBMI-NEXT:    subl $12, %esp
-; X86-NOBMI-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-NOBMI-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; X86-NOBMI-NEXT:    movl {{[0-9]+}}(%esp), %esi
 ; X86-NOBMI-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; X86-NOBMI-NEXT:    movl %eax, %edi
@@ -5117,7 +5117,7 @@ define i64 @bextr64_c1_indexzext(i64 %val, i8 %numskipbits, i8 %numlowbits) noun
 ; X86-BMI1-NEXT:    pushl %edi
 ; X86-BMI1-NEXT:    pushl %esi
 ; X86-BMI1-NEXT:    subl $12, %esp
-; X86-BMI1-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-BMI1-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; X86-BMI1-NEXT:    movl {{[0-9]+}}(%esp), %esi
 ; X86-BMI1-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; X86-BMI1-NEXT:    movl %eax, %edi
@@ -5163,7 +5163,7 @@ define i64 @bextr64_c1_indexzext(i64 %val, i8 %numskipbits, i8 %numlowbits) noun
 ; X86-BMI2-NEXT:    pushl %edi
 ; X86-BMI2-NEXT:    pushl %esi
 ; X86-BMI2-NEXT:    subl $12, %esp
-; X86-BMI2-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-BMI2-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; X86-BMI2-NEXT:    movl {{[0-9]+}}(%esp), %esi
 ; X86-BMI2-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; X86-BMI2-NEXT:    shrdl %cl, %eax, %esi
@@ -5280,7 +5280,7 @@ define i64 @bextr64_c2_load(ptr %w, i64 %numskipbits, i64 %numlowbits) nounwind
 ; X86-NOBMI-NEXT:    pushl %edi
 ; X86-NOBMI-NEXT:    pushl %esi
 ; X86-NOBMI-NEXT:    subl $12, %esp
-; X86-NOBMI-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-NOBMI-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; X86-NOBMI-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; X86-NOBMI-NEXT:    movl (%eax), %esi
 ; X86-NOBMI-NEXT:    movl 4(%eax), %eax
@@ -5327,7 +5327,7 @@ define i64 @bextr64_c2_load(ptr %w, i64 %numskipbits, i64 %numlowbits) nounwind
 ; X86-BMI1-NEXT:    pushl %edi
 ; X86-BMI1-NEXT:    pushl %esi
 ; X86-BMI1-NEXT:    subl $12, %esp
-; X86-BMI1-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-BMI1-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; X86-BMI1-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; X86-BMI1-NEXT:    movl (%eax), %esi
 ; X86-BMI1-NEXT:    movl 4(%eax), %eax
@@ -5374,7 +5374,7 @@ define i64 @bextr64_c2_load(ptr %w, i64 %numskipbits, i64 %numlowbits) nounwind
 ; X86-BMI2-NEXT:    pushl %edi
 ; X86-BMI2-NEXT:    pushl %esi
 ; X86-BMI2-NEXT:    subl $12, %esp
-; X86-BMI2-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-BMI2-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; X86-BMI2-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; X86-BMI2-NEXT:    movl (%eax), %esi
 ; X86-BMI2-NEXT:    movl 4(%eax), %eax
@@ -5490,7 +5490,7 @@ define i64 @bextr64_c3_load_indexzext(ptr %w, i8 %numskipbits, i8 %numlowbits) n
 ; X86-NOBMI-NEXT:    pushl %edi
 ; X86-NOBMI-NEXT:    pushl %esi
 ; X86-NOBMI-NEXT:    subl $12, %esp
-; X86-NOBMI-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-NOBMI-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; X86-NOBMI-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; X86-NOBMI-NEXT:    movl (%eax), %esi
 ; X86-NOBMI-NEXT:    movl 4(%eax), %eax
@@ -5537,7 +5537,7 @@ define i64 @bextr64_c3_load_indexzext(ptr %w, i8 %numskipbits, i8 %numlowbits) n
 ; X86-BMI1-NEXT:    pushl %edi
 ; X86-BMI1-NEXT:    pushl %esi
 ; X86-BMI1-NEXT:    subl $12, %esp
-; X86-BMI1-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-BMI1-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; X86-BMI1-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; X86-BMI1-NEXT:    movl (%eax), %esi
 ; X86-BMI1-NEXT:    movl 4(%eax), %eax
@@ -5584,7 +5584,7 @@ define i64 @bextr64_c3_load_indexzext(ptr %w, i8 %numskipbits, i8 %numlowbits) n
 ; X86-BMI2-NEXT:    pushl %edi
 ; X86-BMI2-NEXT:    pushl %esi
 ; X86-BMI2-NEXT:    subl $12, %esp
-; X86-BMI2-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-BMI2-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; X86-BMI2-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; X86-BMI2-NEXT:    movl (%eax), %esi
 ; X86-BMI2-NEXT:    movl 4(%eax), %eax
@@ -5703,7 +5703,7 @@ define i64 @bextr64_c4_commutative(i64 %val, i64 %numskipbits, i64 %numlowbits)
 ; X86-NOBMI-NEXT:    pushl %edi
 ; X86-NOBMI-NEXT:    pushl %esi
 ; X86-NOBMI-NEXT:    subl $12, %esp
-; X86-NOBMI-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-NOBMI-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; X86-NOBMI-NEXT:    movl {{[0-9]+}}(%esp), %esi
 ; X86-NOBMI-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; X86-NOBMI-NEXT:    movl %eax, %edi
@@ -5749,7 +5749,7 @@ define i64 @bextr64_c4_commutative(i64 %val, i64 %numskipbits, i64 %numlowbits)
 ; X86-BMI1-NEXT:    pushl %edi
 ; X86-BMI1-NEXT:    pushl %esi
 ; X86-BMI1-NEXT:    subl $12, %esp
-; X86-BMI1-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-BMI1-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; X86-BMI1-NEXT:    movl {{[0-9]+}}(%esp), %esi
 ; X86-BMI1-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; X86-BMI1-NEXT:    movl %eax, %edi
@@ -5795,7 +5795,7 @@ define i64 @bextr64_c4_commutative(i64 %val, i64 %numskipbits, i64 %numlowbits)
 ; X86-BMI2-NEXT:    pushl %edi
 ; X86-BMI2-NEXT:    pushl %esi
 ; X86-BMI2-NEXT:    subl $12, %esp
-; X86-BMI2-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-BMI2-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; X86-BMI2-NEXT:    movl {{[0-9]+}}(%esp), %esi
 ; X86-BMI2-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; X86-BMI2-NEXT:    shrdl %cl, %eax, %esi
@@ -6138,7 +6138,7 @@ define i32 @bextr64_32_c0(i64 %val, i64 %numskipbits, i64 %numlowbits) nounwind
 ; X86-NOBMI-LABEL: bextr64_32_c0:
 ; X86-NOBMI:       # %bb.0:
 ; X86-NOBMI-NEXT:    pushl %esi
-; X86-NOBMI-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-NOBMI-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; X86-NOBMI-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; X86-NOBMI-NEXT:    movl {{[0-9]+}}(%esp), %esi
 ; X86-NOBMI-NEXT:    movl %esi, %edx
@@ -6165,7 +6165,7 @@ define i32 @bextr64_32_c0(i64 %val, i64 %numskipbits, i64 %numlowbits) nounwind
 ; X86-BMI1-LABEL: bextr64_32_c0:
 ; X86-BMI1:       # %bb.0:
 ; X86-BMI1-NEXT:    pushl %esi
-; X86-BMI1-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-BMI1-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; X86-BMI1-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; X86-BMI1-NEXT:    movl {{[0-9]+}}(%esp), %esi
 ; X86-BMI1-NEXT:    movl %esi, %edx
@@ -6191,7 +6191,7 @@ define i32 @bextr64_32_c0(i64 %val, i64 %numskipbits, i64 %numlowbits) nounwind
 ;
 ; X86-BMI2-LABEL: bextr64_32_c0:
 ; X86-BMI2:       # %bb.0:
-; X86-BMI2-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-BMI2-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; X86-BMI2-NEXT:    movl {{[0-9]+}}(%esp), %edx
 ; X86-BMI2-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; X86-BMI2-NEXT:    shrdl %cl, %eax, %edx
@@ -6251,7 +6251,7 @@ define i32 @bextr64_32_c1(i64 %val, i64 %numskipbits, i32 %numlowbits) nounwind
 ; X86-NOBMI-LABEL: bextr64_32_c1:
 ; X86-NOBMI:       # %bb.0:
 ; X86-NOBMI-NEXT:    pushl %esi
-; X86-NOBMI-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-NOBMI-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; X86-NOBMI-NEXT:    movl {{[0-9]+}}(%esp), %edx
 ; X86-NOBMI-NEXT:    movl {{[0-9]+}}(%esp), %esi
 ; X86-NOBMI-NEXT:    movl %esi, %eax
@@ -6274,8 +6274,8 @@ define i32 @bextr64_32_c1(i64 %val, i64 %numskipbits, i32 %numlowbits) nounwind
 ; X86-BMI1:       # %bb.0:
 ; X86-BMI1-NEXT:    pushl %edi
 ; X86-BMI1-NEXT:    pushl %esi
-; X86-BMI1-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
-; X86-BMI1-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-BMI1-NEXT:    movb {{[0-9]+}}(%esp), %al
+; X86-BMI1-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; X86-BMI1-NEXT:    movl {{[0-9]+}}(%esp), %esi
 ; X86-BMI1-NEXT:    movl {{[0-9]+}}(%esp), %edi
 ; X86-BMI1-NEXT:    movl %edi, %edx
@@ -6295,8 +6295,8 @@ define i32 @bextr64_32_c1(i64 %val, i64 %numskipbits, i32 %numlowbits) nounwind
 ; X86-BMI2-LABEL: bextr64_32_c1:
 ; X86-BMI2:       # %bb.0:
 ; X86-BMI2-NEXT:    pushl %esi
-; X86-BMI2-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
-; X86-BMI2-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-BMI2-NEXT:    movb {{[0-9]+}}(%esp), %al
+; X86-BMI2-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; X86-BMI2-NEXT:    movl {{[0-9]+}}(%esp), %edx
 ; X86-BMI2-NEXT:    movl {{[0-9]+}}(%esp), %esi
 ; X86-BMI2-NEXT:    shrdl %cl, %esi, %edx
@@ -6350,7 +6350,7 @@ define i32 @bextr64_32_c2(i64 %val, i64 %numskipbits, i32 %numlowbits) nounwind
 ; X86-NOBMI-LABEL: bextr64_32_c2:
 ; X86-NOBMI:       # %bb.0:
 ; X86-NOBMI-NEXT:    pushl %esi
-; X86-NOBMI-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-NOBMI-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; X86-NOBMI-NEXT:    movl {{[0-9]+}}(%esp), %edx
 ; X86-NOBMI-NEXT:    movl {{[0-9]+}}(%esp), %esi
 ; X86-NOBMI-NEXT:    movl %esi, %eax
@@ -6373,8 +6373,8 @@ define i32 @bextr64_32_c2(i64 %val, i64 %numskipbits, i32 %numlowbits) nounwind
 ; X86-BMI1:       # %bb.0:
 ; X86-BMI1-NEXT:    pushl %edi
 ; X86-BMI1-NEXT:    pushl %esi
-; X86-BMI1-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
-; X86-BMI1-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-BMI1-NEXT:    movb {{[0-9]+}}(%esp), %al
+; X86-BMI1-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; X86-BMI1-NEXT:    movl {{[0-9]+}}(%esp), %esi
 ; X86-BMI1-NEXT:    movl {{[0-9]+}}(%esp), %edi
 ; X86-BMI1-NEXT:    movl %edi, %edx
@@ -6394,8 +6394,8 @@ define i32 @bextr64_32_c2(i64 %val, i64 %numskipbits, i32 %numlowbits) nounwind
 ; X86-BMI2-LABEL: bextr64_32_c2:
 ; X86-BMI2:       # %bb.0:
 ; X86-BMI2-NEXT:    pushl %esi
-; X86-BMI2-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
-; X86-BMI2-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-BMI2-NEXT:    movb {{[0-9]+}}(%esp), %al
+; X86-BMI2-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; X86-BMI2-NEXT:    movl {{[0-9]+}}(%esp), %edx
 ; X86-BMI2-NEXT:    movl {{[0-9]+}}(%esp), %esi
 ; X86-BMI2-NEXT:    shrdl %cl, %esi, %edx
@@ -6450,7 +6450,7 @@ define i32 @bextr64_32_c3(i64 %val, i64 %numskipbits, i64 %numlowbits) nounwind
 ; X86-NOBMI-LABEL: bextr64_32_c3:
 ; X86-NOBMI:       # %bb.0:
 ; X86-NOBMI-NEXT:    pushl %esi
-; X86-NOBMI-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-NOBMI-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; X86-NOBMI-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; X86-NOBMI-NEXT:    movl {{[0-9]+}}(%esp), %esi
 ; X86-NOBMI-NEXT:    movl %esi, %edx
@@ -6478,7 +6478,7 @@ define i32 @bextr64_32_c3(i64 %val, i64 %numskipbits, i64 %numlowbits) nounwind
 ; X86-BMI1-LABEL: bextr64_32_c3:
 ; X86-BMI1:       # %bb.0:
 ; X86-BMI1-NEXT:    pushl %esi
-; X86-BMI1-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-BMI1-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; X86-BMI1-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; X86-BMI1-NEXT:    movl {{[0-9]+}}(%esp), %esi
 ; X86-BMI1-NEXT:    movl %esi, %edx
@@ -6506,7 +6506,7 @@ define i32 @bextr64_32_c3(i64 %val, i64 %numskipbits, i64 %numlowbits) nounwind
 ; X86-BMI2-LABEL: bextr64_32_c3:
 ; X86-BMI2:       # %bb.0:
 ; X86-BMI2-NEXT:    pushl %esi
-; X86-BMI2-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-BMI2-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; X86-BMI2-NEXT:    movl {{[0-9]+}}(%esp), %edx
 ; X86-BMI2-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; X86-BMI2-NEXT:    shrdl %cl, %eax, %edx
@@ -6579,7 +6579,7 @@ define i32 @bextr64_32_c3(i64 %val, i64 %numskipbits, i64 %numlowbits) nounwind
 define i32 @bextr32_d0(i32 %val, i32 %numskipbits, i32 %numlowbits) nounwind {
 ; X86-NOBMI-LABEL: bextr32_d0:
 ; X86-NOBMI:       # %bb.0:
-; X86-NOBMI-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-NOBMI-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; X86-NOBMI-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; X86-NOBMI-NEXT:    shrl %cl, %eax
 ; X86-NOBMI-NEXT:    xorl %ecx, %ecx
@@ -6591,7 +6591,7 @@ define i32 @bextr32_d0(i32 %val, i32 %numskipbits, i32 %numlowbits) nounwind {
 ;
 ; X86-BMI1-LABEL: bextr32_d0:
 ; X86-BMI1:       # %bb.0:
-; X86-BMI1-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-BMI1-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-BMI1-NEXT:    shll $8, %eax
 ; X86-BMI1-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
 ; X86-BMI1-NEXT:    orl %eax, %ecx
@@ -6600,8 +6600,8 @@ define i32 @bextr32_d0(i32 %val, i32 %numskipbits, i32 %numlowbits) nounwind {
 ;
 ; X86-BMI2-LABEL: bextr32_d0:
 ; X86-BMI2:       # %bb.0:
-; X86-BMI2-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
-; X86-BMI2-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-BMI2-NEXT:    movb {{[0-9]+}}(%esp), %al
+; X86-BMI2-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; X86-BMI2-NEXT:    shrxl %ecx, {{[0-9]+}}(%esp), %ecx
 ; X86-BMI2-NEXT:    bzhil %eax, %ecx, %eax
 ; X86-BMI2-NEXT:    retl
@@ -6641,7 +6641,7 @@ define i32 @bextr32_d0(i32 %val, i32 %numskipbits, i32 %numlowbits) nounwind {
 define i32 @bextr32_d1_indexzext(i32 %val, i8 %numskipbits, i8 %numlowbits) nounwind {
 ; X86-NOBMI-LABEL: bextr32_d1_indexzext:
 ; X86-NOBMI:       # %bb.0:
-; X86-NOBMI-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-NOBMI-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; X86-NOBMI-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; X86-NOBMI-NEXT:    shrl %cl, %eax
 ; X86-NOBMI-NEXT:    xorl %ecx, %ecx
@@ -6653,7 +6653,7 @@ define i32 @bextr32_d1_indexzext(i32 %val, i8 %numskipbits, i8 %numlowbits) noun
 ;
 ; X86-BMI1-LABEL: bextr32_d1_indexzext:
 ; X86-BMI1:       # %bb.0:
-; X86-BMI1-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-BMI1-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-BMI1-NEXT:    shll $8, %eax
 ; X86-BMI1-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
 ; X86-BMI1-NEXT:    orl %eax, %ecx
@@ -6662,8 +6662,8 @@ define i32 @bextr32_d1_indexzext(i32 %val, i8 %numskipbits, i8 %numlowbits) noun
 ;
 ; X86-BMI2-LABEL: bextr32_d1_indexzext:
 ; X86-BMI2:       # %bb.0:
-; X86-BMI2-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
-; X86-BMI2-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-BMI2-NEXT:    movb {{[0-9]+}}(%esp), %al
+; X86-BMI2-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; X86-BMI2-NEXT:    shrxl %ecx, {{[0-9]+}}(%esp), %ecx
 ; X86-BMI2-NEXT:    bzhil %eax, %ecx, %eax
 ; X86-BMI2-NEXT:    retl
@@ -6705,7 +6705,7 @@ define i32 @bextr32_d1_indexzext(i32 %val, i8 %numskipbits, i8 %numlowbits) noun
 define i32 @bextr32_d2_load(ptr %w, i32 %numskipbits, i32 %numlowbits) nounwind {
 ; X86-NOBMI-LABEL: bextr32_d2_load:
 ; X86-NOBMI:       # %bb.0:
-; X86-NOBMI-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-NOBMI-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; X86-NOBMI-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; X86-NOBMI-NEXT:    movl (%eax), %eax
 ; X86-NOBMI-NEXT:    shrl %cl, %eax
@@ -6719,7 +6719,7 @@ define i32 @bextr32_d2_load(ptr %w, i32 %numskipbits, i32 %numlowbits) nounwind
 ; X86-BMI1-LABEL: bextr32_d2_load:
 ; X86-BMI1:       # %bb.0:
 ; X86-BMI1-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X86-BMI1-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-BMI1-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; X86-BMI1-NEXT:    shll $8, %ecx
 ; X86-BMI1-NEXT:    movzbl {{[0-9]+}}(%esp), %edx
 ; X86-BMI1-NEXT:    orl %ecx, %edx
@@ -6728,9 +6728,9 @@ define i32 @bextr32_d2_load(ptr %w, i32 %numskipbits, i32 %numlowbits) nounwind
 ;
 ; X86-BMI2-LABEL: bextr32_d2_load:
 ; X86-BMI2:       # %bb.0:
-; X86-BMI2-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-BMI2-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-BMI2-NEXT:    movl {{[0-9]+}}(%esp), %ecx
-; X86-BMI2-NEXT:    movzbl {{[0-9]+}}(%esp), %edx
+; X86-BMI2-NEXT:    movb {{[0-9]+}}(%esp), %dl
 ; X86-BMI2-NEXT:    shrxl %edx, (%ecx), %ecx
 ; X86-BMI2-NEXT:    bzhil %eax, %ecx, %eax
 ; X86-BMI2-NEXT:    retl
@@ -6771,7 +6771,7 @@ define i32 @bextr32_d2_load(ptr %w, i32 %numskipbits, i32 %numlowbits) nounwind
 define i32 @bextr32_d3_load_indexzext(ptr %w, i8 %numskipbits, i8 %numlowbits) nounwind {
 ; X86-NOBMI-LABEL: bextr32_d3_load_indexzext:
 ; X86-NOBMI:       # %bb.0:
-; X86-NOBMI-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-NOBMI-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; X86-NOBMI-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; X86-NOBMI-NEXT:    movl (%eax), %eax
 ; X86-NOBMI-NEXT:    shrl %cl, %eax
@@ -6785,7 +6785,7 @@ define i32 @bextr32_d3_load_indexzext(ptr %w, i8 %numskipbits, i8 %numlowbits) n
 ; X86-BMI1-LABEL: bextr32_d3_load_indexzext:
 ; X86-BMI1:       # %bb.0:
 ; X86-BMI1-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X86-BMI1-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-BMI1-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; X86-BMI1-NEXT:    shll $8, %ecx
 ; X86-BMI1-NEXT:    movzbl {{[0-9]+}}(%esp), %edx
 ; X86-BMI1-NEXT:    orl %ecx, %edx
@@ -6794,9 +6794,9 @@ define i32 @bextr32_d3_load_indexzext(ptr %w, i8 %numskipbits, i8 %numlowbits) n
 ;
 ; X86-BMI2-LABEL: bextr32_d3_load_indexzext:
 ; X86-BMI2:       # %bb.0:
-; X86-BMI2-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-BMI2-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-BMI2-NEXT:    movl {{[0-9]+}}(%esp), %ecx
-; X86-BMI2-NEXT:    movzbl {{[0-9]+}}(%esp), %edx
+; X86-BMI2-NEXT:    movb {{[0-9]+}}(%esp), %dl
 ; X86-BMI2-NEXT:    shrxl %edx, (%ecx), %ecx
 ; X86-BMI2-NEXT:    bzhil %eax, %ecx, %eax
 ; X86-BMI2-NEXT:    retl
@@ -6861,7 +6861,7 @@ define i32 @bextr32_d5_skipextrauses(i32 %val, i32 %numskipbits, i32 %numlowbits
 ; X86-BMI1:       # %bb.0:
 ; X86-BMI1-NEXT:    pushl %esi
 ; X86-BMI1-NEXT:    subl $8, %esp
-; X86-BMI1-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-BMI1-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; X86-BMI1-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; X86-BMI1-NEXT:    shll $8, %ecx
 ; X86-BMI1-NEXT:    movzbl %al, %edx
@@ -6878,7 +6878,7 @@ define i32 @bextr32_d5_skipextrauses(i32 %val, i32 %numskipbits, i32 %numlowbits
 ; X86-BMI2:       # %bb.0:
 ; X86-BMI2-NEXT:    pushl %esi
 ; X86-BMI2-NEXT:    subl $8, %esp
-; X86-BMI2-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-BMI2-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-BMI2-NEXT:    movl {{[0-9]+}}(%esp), %ecx
 ; X86-BMI2-NEXT:    shrxl %ecx, {{[0-9]+}}(%esp), %edx
 ; X86-BMI2-NEXT:    bzhil %eax, %edx, %esi
@@ -6944,7 +6944,7 @@ define i64 @bextr64_d0(i64 %val, i64 %numskipbits, i64 %numlowbits) nounwind {
 ; X86-NOBMI-NEXT:    pushl %ebx
 ; X86-NOBMI-NEXT:    pushl %edi
 ; X86-NOBMI-NEXT:    pushl %esi
-; X86-NOBMI-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-NOBMI-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; X86-NOBMI-NEXT:    movl {{[0-9]+}}(%esp), %edi
 ; X86-NOBMI-NEXT:    movl {{[0-9]+}}(%esp), %edx
 ; X86-NOBMI-NEXT:    movl %edx, %eax
@@ -6992,7 +6992,7 @@ define i64 @bextr64_d0(i64 %val, i64 %numskipbits, i64 %numlowbits) nounwind {
 ; X86-BMI1-NEXT:    pushl %ebx
 ; X86-BMI1-NEXT:    pushl %edi
 ; X86-BMI1-NEXT:    pushl %esi
-; X86-BMI1-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-BMI1-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; X86-BMI1-NEXT:    movl {{[0-9]+}}(%esp), %edi
 ; X86-BMI1-NEXT:    movl {{[0-9]+}}(%esp), %edx
 ; X86-BMI1-NEXT:    movl %edx, %eax
@@ -7039,7 +7039,7 @@ define i64 @bextr64_d0(i64 %val, i64 %numskipbits, i64 %numlowbits) nounwind {
 ; X86-BMI2:       # %bb.0:
 ; X86-BMI2-NEXT:    pushl %edi
 ; X86-BMI2-NEXT:    pushl %esi
-; X86-BMI2-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-BMI2-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; X86-BMI2-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; X86-BMI2-NEXT:    movl {{[0-9]+}}(%esp), %edx
 ; X86-BMI2-NEXT:    shrdl %cl, %edx, %eax
@@ -7114,7 +7114,7 @@ define i64 @bextr64_d1_indexzext(i64 %val, i8 %numskipbits, i8 %numlowbits) noun
 ; X86-NOBMI-NEXT:    pushl %ebx
 ; X86-NOBMI-NEXT:    pushl %edi
 ; X86-NOBMI-NEXT:    pushl %esi
-; X86-NOBMI-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-NOBMI-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; X86-NOBMI-NEXT:    movl {{[0-9]+}}(%esp), %edi
 ; X86-NOBMI-NEXT:    movl {{[0-9]+}}(%esp), %edx
 ; X86-NOBMI-NEXT:    movl %edx, %eax
@@ -7162,7 +7162,7 @@ define i64 @bextr64_d1_indexzext(i64 %val, i8 %numskipbits, i8 %numlowbits) noun
 ; X86-BMI1-NEXT:    pushl %ebx
 ; X86-BMI1-NEXT:    pushl %edi
 ; X86-BMI1-NEXT:    pushl %esi
-; X86-BMI1-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-BMI1-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; X86-BMI1-NEXT:    movl {{[0-9]+}}(%esp), %edi
 ; X86-BMI1-NEXT:    movl {{[0-9]+}}(%esp), %edx
 ; X86-BMI1-NEXT:    movl %edx, %eax
@@ -7209,7 +7209,7 @@ define i64 @bextr64_d1_indexzext(i64 %val, i8 %numskipbits, i8 %numlowbits) noun
 ; X86-BMI2:       # %bb.0:
 ; X86-BMI2-NEXT:    pushl %edi
 ; X86-BMI2-NEXT:    pushl %esi
-; X86-BMI2-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-BMI2-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; X86-BMI2-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; X86-BMI2-NEXT:    movl {{[0-9]+}}(%esp), %edx
 ; X86-BMI2-NEXT:    shrdl %cl, %edx, %eax
@@ -7288,7 +7288,7 @@ define i64 @bextr64_d2_load(ptr %w, i64 %numskipbits, i64 %numlowbits) nounwind
 ; X86-NOBMI-NEXT:    pushl %ebx
 ; X86-NOBMI-NEXT:    pushl %edi
 ; X86-NOBMI-NEXT:    pushl %esi
-; X86-NOBMI-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-NOBMI-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; X86-NOBMI-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; X86-NOBMI-NEXT:    movl (%eax), %edi
 ; X86-NOBMI-NEXT:    movl 4(%eax), %edx
@@ -7337,7 +7337,7 @@ define i64 @bextr64_d2_load(ptr %w, i64 %numskipbits, i64 %numlowbits) nounwind
 ; X86-BMI1-NEXT:    pushl %ebx
 ; X86-BMI1-NEXT:    pushl %edi
 ; X86-BMI1-NEXT:    pushl %esi
-; X86-BMI1-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-BMI1-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; X86-BMI1-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; X86-BMI1-NEXT:    movl (%eax), %edi
 ; X86-BMI1-NEXT:    movl 4(%eax), %edx
@@ -7385,7 +7385,7 @@ define i64 @bextr64_d2_load(ptr %w, i64 %numskipbits, i64 %numlowbits) nounwind
 ; X86-BMI2:       # %bb.0:
 ; X86-BMI2-NEXT:    pushl %edi
 ; X86-BMI2-NEXT:    pushl %esi
-; X86-BMI2-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-BMI2-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; X86-BMI2-NEXT:    movl {{[0-9]+}}(%esp), %edx
 ; X86-BMI2-NEXT:    movl (%edx), %eax
 ; X86-BMI2-NEXT:    movl 4(%edx), %edx
@@ -7462,7 +7462,7 @@ define i64 @bextr64_d3_load_indexzext(ptr %w, i8 %numskipbits, i8 %numlowbits) n
 ; X86-NOBMI-NEXT:    pushl %ebx
 ; X86-NOBMI-NEXT:    pushl %edi
 ; X86-NOBMI-NEXT:    pushl %esi
-; X86-NOBMI-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-NOBMI-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; X86-NOBMI-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; X86-NOBMI-NEXT:    movl (%eax), %edi
 ; X86-NOBMI-NEXT:    movl 4(%eax), %edx
@@ -7511,7 +7511,7 @@ define i64 @bextr64_d3_load_indexzext(ptr %w, i8 %numskipbits, i8 %numlowbits) n
 ; X86-BMI1-NEXT:    pushl %ebx
 ; X86-BMI1-NEXT:    pushl %edi
 ; X86-BMI1-NEXT:    pushl %esi
-; X86-BMI1-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-BMI1-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; X86-BMI1-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; X86-BMI1-NEXT:    movl (%eax), %edi
 ; X86-BMI1-NEXT:    movl 4(%eax), %edx
@@ -7559,7 +7559,7 @@ define i64 @bextr64_d3_load_indexzext(ptr %w, i8 %numskipbits, i8 %numlowbits) n
 ; X86-BMI2:       # %bb.0:
 ; X86-BMI2-NEXT:    pushl %edi
 ; X86-BMI2-NEXT:    pushl %esi
-; X86-BMI2-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-BMI2-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; X86-BMI2-NEXT:    movl {{[0-9]+}}(%esp), %edx
 ; X86-BMI2-NEXT:    movl (%edx), %eax
 ; X86-BMI2-NEXT:    movl 4(%edx), %edx
@@ -7863,7 +7863,7 @@ define i32 @bextr64_32_d0(i64 %val, i64 %numskipbits, i64 %numlowbits) nounwind
 ; X86-NOBMI-LABEL: bextr64_32_d0:
 ; X86-NOBMI:       # %bb.0:
 ; X86-NOBMI-NEXT:    pushl %esi
-; X86-NOBMI-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-NOBMI-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; X86-NOBMI-NEXT:    movl {{[0-9]+}}(%esp), %edx
 ; X86-NOBMI-NEXT:    movl {{[0-9]+}}(%esp), %esi
 ; X86-NOBMI-NEXT:    movl %esi, %eax
@@ -7898,7 +7898,7 @@ define i32 @bextr64_32_d0(i64 %val, i64 %numskipbits, i64 %numlowbits) nounwind
 ; X86-BMI1-LABEL: bextr64_32_d0:
 ; X86-BMI1:       # %bb.0:
 ; X86-BMI1-NEXT:    pushl %esi
-; X86-BMI1-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-BMI1-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; X86-BMI1-NEXT:    movl {{[0-9]+}}(%esp), %edx
 ; X86-BMI1-NEXT:    movl {{[0-9]+}}(%esp), %esi
 ; X86-BMI1-NEXT:    movl %esi, %eax
@@ -7932,7 +7932,7 @@ define i32 @bextr64_32_d0(i64 %val, i64 %numskipbits, i64 %numlowbits) nounwind
 ;
 ; X86-BMI2-LABEL: bextr64_32_d0:
 ; X86-BMI2:       # %bb.0:
-; X86-BMI2-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-BMI2-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; X86-BMI2-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; X86-BMI2-NEXT:    movl {{[0-9]+}}(%esp), %edx
 ; X86-BMI2-NEXT:    shrdl %cl, %edx, %eax
@@ -8002,7 +8002,7 @@ define i32 @bextr64_32_d1(i64 %val, i64 %numskipbits, i32 %numlowbits) nounwind
 ; X86-NOBMI-LABEL: bextr64_32_d1:
 ; X86-NOBMI:       # %bb.0:
 ; X86-NOBMI-NEXT:    pushl %esi
-; X86-NOBMI-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-NOBMI-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; X86-NOBMI-NEXT:    movl {{[0-9]+}}(%esp), %edx
 ; X86-NOBMI-NEXT:    movl {{[0-9]+}}(%esp), %esi
 ; X86-NOBMI-NEXT:    movl %esi, %eax
@@ -8025,8 +8025,8 @@ define i32 @bextr64_32_d1(i64 %val, i64 %numskipbits, i32 %numlowbits) nounwind
 ; X86-BMI1:       # %bb.0:
 ; X86-BMI1-NEXT:    pushl %edi
 ; X86-BMI1-NEXT:    pushl %esi
-; X86-BMI1-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
-; X86-BMI1-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-BMI1-NEXT:    movb {{[0-9]+}}(%esp), %al
+; X86-BMI1-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; X86-BMI1-NEXT:    movl {{[0-9]+}}(%esp), %esi
 ; X86-BMI1-NEXT:    movl {{[0-9]+}}(%esp), %edi
 ; X86-BMI1-NEXT:    movl %edi, %edx
@@ -8046,8 +8046,8 @@ define i32 @bextr64_32_d1(i64 %val, i64 %numskipbits, i32 %numlowbits) nounwind
 ; X86-BMI2-LABEL: bextr64_32_d1:
 ; X86-BMI2:       # %bb.0:
 ; X86-BMI2-NEXT:    pushl %esi
-; X86-BMI2-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
-; X86-BMI2-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-BMI2-NEXT:    movb {{[0-9]+}}(%esp), %al
+; X86-BMI2-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; X86-BMI2-NEXT:    movl {{[0-9]+}}(%esp), %edx
 ; X86-BMI2-NEXT:    movl {{[0-9]+}}(%esp), %esi
 ; X86-BMI2-NEXT:    shrdl %cl, %esi, %edx

diff  --git a/llvm/test/CodeGen/X86/extract-insert.ll b/llvm/test/CodeGen/X86/extract-insert.ll
index 23d66b2d77f35..8960fe96bf2b1 100644
--- a/llvm/test/CodeGen/X86/extract-insert.ll
+++ b/llvm/test/CodeGen/X86/extract-insert.ll
@@ -15,7 +15,7 @@ define i32 @extractelt_undef_insertelt(i32 %x, i32 %y) {
 define i8 @extractelt_bitcast(i32 %x) nounwind {
 ; X86-LABEL: extractelt_bitcast:
 ; X86:       # %bb.0:
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    retl
 ;
 ; X64-LABEL: extractelt_bitcast:
@@ -87,7 +87,7 @@ define i16 @trunc_i64_to_i16_le(i64 %x) {
 define i8 @trunc_i32_to_i8_le(i32 %x) {
 ; X86-LABEL: trunc_i32_to_i8_le:
 ; X86:       # %bb.0:
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    retl
 ;
 ; X64-LABEL: trunc_i32_to_i8_le:

diff  --git a/llvm/test/CodeGen/X86/extract-lowbits.ll b/llvm/test/CodeGen/X86/extract-lowbits.ll
index 823e26d63e15a..0a4722b34369f 100644
--- a/llvm/test/CodeGen/X86/extract-lowbits.ll
+++ b/llvm/test/CodeGen/X86/extract-lowbits.ll
@@ -30,7 +30,7 @@
 define i32 @bzhi32_a0(i32 %val, i32 %numlowbits) nounwind {
 ; X86-NOBMI-LABEL: bzhi32_a0:
 ; X86-NOBMI:       # %bb.0:
-; X86-NOBMI-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-NOBMI-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; X86-NOBMI-NEXT:    movl $1, %eax
 ; X86-NOBMI-NEXT:    shll %cl, %eax
 ; X86-NOBMI-NEXT:    decl %eax
@@ -39,14 +39,14 @@ define i32 @bzhi32_a0(i32 %val, i32 %numlowbits) nounwind {
 ;
 ; X86-BMI1-LABEL: bzhi32_a0:
 ; X86-BMI1:       # %bb.0:
-; X86-BMI1-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-BMI1-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-BMI1-NEXT:    shll $8, %eax
 ; X86-BMI1-NEXT:    bextrl %eax, {{[0-9]+}}(%esp), %eax
 ; X86-BMI1-NEXT:    retl
 ;
 ; X86-BMI2-LABEL: bzhi32_a0:
 ; X86-BMI2:       # %bb.0:
-; X86-BMI2-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-BMI2-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-BMI2-NEXT:    bzhil %eax, {{[0-9]+}}(%esp), %eax
 ; X86-BMI2-NEXT:    retl
 ;
@@ -79,7 +79,7 @@ define i32 @bzhi32_a0(i32 %val, i32 %numlowbits) nounwind {
 define i32 @bzhi32_a1_indexzext(i32 %val, i8 zeroext %numlowbits) nounwind {
 ; X86-NOBMI-LABEL: bzhi32_a1_indexzext:
 ; X86-NOBMI:       # %bb.0:
-; X86-NOBMI-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-NOBMI-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; X86-NOBMI-NEXT:    movl $1, %eax
 ; X86-NOBMI-NEXT:    shll %cl, %eax
 ; X86-NOBMI-NEXT:    decl %eax
@@ -88,14 +88,14 @@ define i32 @bzhi32_a1_indexzext(i32 %val, i8 zeroext %numlowbits) nounwind {
 ;
 ; X86-BMI1-LABEL: bzhi32_a1_indexzext:
 ; X86-BMI1:       # %bb.0:
-; X86-BMI1-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-BMI1-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-BMI1-NEXT:    shll $8, %eax
 ; X86-BMI1-NEXT:    bextrl %eax, {{[0-9]+}}(%esp), %eax
 ; X86-BMI1-NEXT:    retl
 ;
 ; X86-BMI2-LABEL: bzhi32_a1_indexzext:
 ; X86-BMI2:       # %bb.0:
-; X86-BMI2-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-BMI2-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-BMI2-NEXT:    bzhil %eax, {{[0-9]+}}(%esp), %eax
 ; X86-BMI2-NEXT:    retl
 ;
@@ -130,7 +130,7 @@ define i32 @bzhi32_a2_load(ptr %w, i32 %numlowbits) nounwind {
 ; X86-NOBMI-LABEL: bzhi32_a2_load:
 ; X86-NOBMI:       # %bb.0:
 ; X86-NOBMI-NEXT:    movl {{[0-9]+}}(%esp), %edx
-; X86-NOBMI-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-NOBMI-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; X86-NOBMI-NEXT:    movl $1, %eax
 ; X86-NOBMI-NEXT:    shll %cl, %eax
 ; X86-NOBMI-NEXT:    decl %eax
@@ -140,7 +140,7 @@ define i32 @bzhi32_a2_load(ptr %w, i32 %numlowbits) nounwind {
 ; X86-BMI1-LABEL: bzhi32_a2_load:
 ; X86-BMI1:       # %bb.0:
 ; X86-BMI1-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X86-BMI1-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-BMI1-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; X86-BMI1-NEXT:    shll $8, %ecx
 ; X86-BMI1-NEXT:    bextrl %ecx, (%eax), %eax
 ; X86-BMI1-NEXT:    retl
@@ -148,7 +148,7 @@ define i32 @bzhi32_a2_load(ptr %w, i32 %numlowbits) nounwind {
 ; X86-BMI2-LABEL: bzhi32_a2_load:
 ; X86-BMI2:       # %bb.0:
 ; X86-BMI2-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X86-BMI2-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-BMI2-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; X86-BMI2-NEXT:    bzhil %ecx, (%eax), %eax
 ; X86-BMI2-NEXT:    retl
 ;
@@ -183,7 +183,7 @@ define i32 @bzhi32_a3_load_indexzext(ptr %w, i8 zeroext %numlowbits) nounwind {
 ; X86-NOBMI-LABEL: bzhi32_a3_load_indexzext:
 ; X86-NOBMI:       # %bb.0:
 ; X86-NOBMI-NEXT:    movl {{[0-9]+}}(%esp), %edx
-; X86-NOBMI-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-NOBMI-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; X86-NOBMI-NEXT:    movl $1, %eax
 ; X86-NOBMI-NEXT:    shll %cl, %eax
 ; X86-NOBMI-NEXT:    decl %eax
@@ -193,7 +193,7 @@ define i32 @bzhi32_a3_load_indexzext(ptr %w, i8 zeroext %numlowbits) nounwind {
 ; X86-BMI1-LABEL: bzhi32_a3_load_indexzext:
 ; X86-BMI1:       # %bb.0:
 ; X86-BMI1-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X86-BMI1-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-BMI1-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; X86-BMI1-NEXT:    shll $8, %ecx
 ; X86-BMI1-NEXT:    bextrl %ecx, (%eax), %eax
 ; X86-BMI1-NEXT:    retl
@@ -201,7 +201,7 @@ define i32 @bzhi32_a3_load_indexzext(ptr %w, i8 zeroext %numlowbits) nounwind {
 ; X86-BMI2-LABEL: bzhi32_a3_load_indexzext:
 ; X86-BMI2:       # %bb.0:
 ; X86-BMI2-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X86-BMI2-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-BMI2-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; X86-BMI2-NEXT:    bzhil %ecx, (%eax), %eax
 ; X86-BMI2-NEXT:    retl
 ;
@@ -236,7 +236,7 @@ define i32 @bzhi32_a3_load_indexzext(ptr %w, i8 zeroext %numlowbits) nounwind {
 define i32 @bzhi32_a4_commutative(i32 %val, i32 %numlowbits) nounwind {
 ; X86-NOBMI-LABEL: bzhi32_a4_commutative:
 ; X86-NOBMI:       # %bb.0:
-; X86-NOBMI-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-NOBMI-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; X86-NOBMI-NEXT:    movl $1, %eax
 ; X86-NOBMI-NEXT:    shll %cl, %eax
 ; X86-NOBMI-NEXT:    decl %eax
@@ -245,14 +245,14 @@ define i32 @bzhi32_a4_commutative(i32 %val, i32 %numlowbits) nounwind {
 ;
 ; X86-BMI1-LABEL: bzhi32_a4_commutative:
 ; X86-BMI1:       # %bb.0:
-; X86-BMI1-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-BMI1-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-BMI1-NEXT:    shll $8, %eax
 ; X86-BMI1-NEXT:    bextrl %eax, {{[0-9]+}}(%esp), %eax
 ; X86-BMI1-NEXT:    retl
 ;
 ; X86-BMI2-LABEL: bzhi32_a4_commutative:
 ; X86-BMI2:       # %bb.0:
-; X86-BMI2-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-BMI2-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-BMI2-NEXT:    bzhil %eax, {{[0-9]+}}(%esp), %eax
 ; X86-BMI2-NEXT:    retl
 ;
@@ -287,7 +287,7 @@ define i32 @bzhi32_a4_commutative(i32 %val, i32 %numlowbits) nounwind {
 define i64 @bzhi64_a0(i64 %val, i64 %numlowbits) nounwind {
 ; X86-NOBMI-LABEL: bzhi64_a0:
 ; X86-NOBMI:       # %bb.0:
-; X86-NOBMI-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-NOBMI-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; X86-NOBMI-NEXT:    movl $1, %eax
 ; X86-NOBMI-NEXT:    xorl %edx, %edx
 ; X86-NOBMI-NEXT:    shldl %cl, %eax, %edx
@@ -306,7 +306,7 @@ define i64 @bzhi64_a0(i64 %val, i64 %numlowbits) nounwind {
 ;
 ; X86-BMI1-LABEL: bzhi64_a0:
 ; X86-BMI1:       # %bb.0:
-; X86-BMI1-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-BMI1-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; X86-BMI1-NEXT:    movl $1, %eax
 ; X86-BMI1-NEXT:    xorl %edx, %edx
 ; X86-BMI1-NEXT:    shldl %cl, %eax, %edx
@@ -325,7 +325,7 @@ define i64 @bzhi64_a0(i64 %val, i64 %numlowbits) nounwind {
 ;
 ; X86-BMI2-LABEL: bzhi64_a0:
 ; X86-BMI2:       # %bb.0:
-; X86-BMI2-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-BMI2-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; X86-BMI2-NEXT:    movl $1, %eax
 ; X86-BMI2-NEXT:    xorl %edx, %edx
 ; X86-BMI2-NEXT:    shldl %cl, %eax, %edx
@@ -371,7 +371,7 @@ define i64 @bzhi64_a0(i64 %val, i64 %numlowbits) nounwind {
 define i64 @bzhi64_a1_indexzext(i64 %val, i8 zeroext %numlowbits) nounwind {
 ; X86-NOBMI-LABEL: bzhi64_a1_indexzext:
 ; X86-NOBMI:       # %bb.0:
-; X86-NOBMI-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-NOBMI-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; X86-NOBMI-NEXT:    movl $1, %eax
 ; X86-NOBMI-NEXT:    xorl %edx, %edx
 ; X86-NOBMI-NEXT:    shldl %cl, %eax, %edx
@@ -390,7 +390,7 @@ define i64 @bzhi64_a1_indexzext(i64 %val, i8 zeroext %numlowbits) nounwind {
 ;
 ; X86-BMI1-LABEL: bzhi64_a1_indexzext:
 ; X86-BMI1:       # %bb.0:
-; X86-BMI1-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-BMI1-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; X86-BMI1-NEXT:    movl $1, %eax
 ; X86-BMI1-NEXT:    xorl %edx, %edx
 ; X86-BMI1-NEXT:    shldl %cl, %eax, %edx
@@ -409,7 +409,7 @@ define i64 @bzhi64_a1_indexzext(i64 %val, i8 zeroext %numlowbits) nounwind {
 ;
 ; X86-BMI2-LABEL: bzhi64_a1_indexzext:
 ; X86-BMI2:       # %bb.0:
-; X86-BMI2-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-BMI2-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; X86-BMI2-NEXT:    movl $1, %eax
 ; X86-BMI2-NEXT:    xorl %edx, %edx
 ; X86-BMI2-NEXT:    shldl %cl, %eax, %edx
@@ -460,7 +460,7 @@ define i64 @bzhi64_a2_load(ptr %w, i64 %numlowbits) nounwind {
 ; X86-NOBMI:       # %bb.0:
 ; X86-NOBMI-NEXT:    pushl %esi
 ; X86-NOBMI-NEXT:    movl {{[0-9]+}}(%esp), %esi
-; X86-NOBMI-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-NOBMI-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; X86-NOBMI-NEXT:    movl $1, %eax
 ; X86-NOBMI-NEXT:    xorl %edx, %edx
 ; X86-NOBMI-NEXT:    shldl %cl, %eax, %edx
@@ -482,7 +482,7 @@ define i64 @bzhi64_a2_load(ptr %w, i64 %numlowbits) nounwind {
 ; X86-BMI1:       # %bb.0:
 ; X86-BMI1-NEXT:    pushl %esi
 ; X86-BMI1-NEXT:    movl {{[0-9]+}}(%esp), %esi
-; X86-BMI1-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-BMI1-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; X86-BMI1-NEXT:    movl $1, %eax
 ; X86-BMI1-NEXT:    xorl %edx, %edx
 ; X86-BMI1-NEXT:    shldl %cl, %eax, %edx
@@ -504,7 +504,7 @@ define i64 @bzhi64_a2_load(ptr %w, i64 %numlowbits) nounwind {
 ; X86-BMI2:       # %bb.0:
 ; X86-BMI2-NEXT:    pushl %esi
 ; X86-BMI2-NEXT:    movl {{[0-9]+}}(%esp), %esi
-; X86-BMI2-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-BMI2-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; X86-BMI2-NEXT:    movl $1, %eax
 ; X86-BMI2-NEXT:    xorl %edx, %edx
 ; X86-BMI2-NEXT:    shldl %cl, %eax, %edx
@@ -554,7 +554,7 @@ define i64 @bzhi64_a3_load_indexzext(ptr %w, i8 zeroext %numlowbits) nounwind {
 ; X86-NOBMI:       # %bb.0:
 ; X86-NOBMI-NEXT:    pushl %esi
 ; X86-NOBMI-NEXT:    movl {{[0-9]+}}(%esp), %esi
-; X86-NOBMI-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-NOBMI-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; X86-NOBMI-NEXT:    movl $1, %eax
 ; X86-NOBMI-NEXT:    xorl %edx, %edx
 ; X86-NOBMI-NEXT:    shldl %cl, %eax, %edx
@@ -576,7 +576,7 @@ define i64 @bzhi64_a3_load_indexzext(ptr %w, i8 zeroext %numlowbits) nounwind {
 ; X86-BMI1:       # %bb.0:
 ; X86-BMI1-NEXT:    pushl %esi
 ; X86-BMI1-NEXT:    movl {{[0-9]+}}(%esp), %esi
-; X86-BMI1-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-BMI1-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; X86-BMI1-NEXT:    movl $1, %eax
 ; X86-BMI1-NEXT:    xorl %edx, %edx
 ; X86-BMI1-NEXT:    shldl %cl, %eax, %edx
@@ -598,7 +598,7 @@ define i64 @bzhi64_a3_load_indexzext(ptr %w, i8 zeroext %numlowbits) nounwind {
 ; X86-BMI2:       # %bb.0:
 ; X86-BMI2-NEXT:    pushl %esi
 ; X86-BMI2-NEXT:    movl {{[0-9]+}}(%esp), %esi
-; X86-BMI2-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-BMI2-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; X86-BMI2-NEXT:    movl $1, %eax
 ; X86-BMI2-NEXT:    xorl %edx, %edx
 ; X86-BMI2-NEXT:    shldl %cl, %eax, %edx
@@ -649,7 +649,7 @@ define i64 @bzhi64_a3_load_indexzext(ptr %w, i8 zeroext %numlowbits) nounwind {
 define i64 @bzhi64_a4_commutative(i64 %val, i64 %numlowbits) nounwind {
 ; X86-NOBMI-LABEL: bzhi64_a4_commutative:
 ; X86-NOBMI:       # %bb.0:
-; X86-NOBMI-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-NOBMI-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; X86-NOBMI-NEXT:    movl $1, %eax
 ; X86-NOBMI-NEXT:    xorl %edx, %edx
 ; X86-NOBMI-NEXT:    shldl %cl, %eax, %edx
@@ -668,7 +668,7 @@ define i64 @bzhi64_a4_commutative(i64 %val, i64 %numlowbits) nounwind {
 ;
 ; X86-BMI1-LABEL: bzhi64_a4_commutative:
 ; X86-BMI1:       # %bb.0:
-; X86-BMI1-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-BMI1-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; X86-BMI1-NEXT:    movl $1, %eax
 ; X86-BMI1-NEXT:    xorl %edx, %edx
 ; X86-BMI1-NEXT:    shldl %cl, %eax, %edx
@@ -687,7 +687,7 @@ define i64 @bzhi64_a4_commutative(i64 %val, i64 %numlowbits) nounwind {
 ;
 ; X86-BMI2-LABEL: bzhi64_a4_commutative:
 ; X86-BMI2:       # %bb.0:
-; X86-BMI2-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-BMI2-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; X86-BMI2-NEXT:    movl $1, %eax
 ; X86-BMI2-NEXT:    xorl %edx, %edx
 ; X86-BMI2-NEXT:    shldl %cl, %eax, %edx
@@ -736,7 +736,7 @@ define i64 @bzhi64_a4_commutative(i64 %val, i64 %numlowbits) nounwind {
 define i32 @bzhi64_32_a0(i64 %val, i64 %numlowbits) nounwind {
 ; X86-NOBMI-LABEL: bzhi64_32_a0:
 ; X86-NOBMI:       # %bb.0:
-; X86-NOBMI-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-NOBMI-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; X86-NOBMI-NEXT:    movl $1, %edx
 ; X86-NOBMI-NEXT:    shll %cl, %edx
 ; X86-NOBMI-NEXT:    xorl %eax, %eax
@@ -751,7 +751,7 @@ define i32 @bzhi64_32_a0(i64 %val, i64 %numlowbits) nounwind {
 ;
 ; X86-BMI1-LABEL: bzhi64_32_a0:
 ; X86-BMI1:       # %bb.0:
-; X86-BMI1-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-BMI1-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; X86-BMI1-NEXT:    movl $1, %edx
 ; X86-BMI1-NEXT:    shll %cl, %edx
 ; X86-BMI1-NEXT:    xorl %eax, %eax
@@ -766,7 +766,7 @@ define i32 @bzhi64_32_a0(i64 %val, i64 %numlowbits) nounwind {
 ;
 ; X86-BMI2-LABEL: bzhi64_32_a0:
 ; X86-BMI2:       # %bb.0:
-; X86-BMI2-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-BMI2-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; X86-BMI2-NEXT:    xorl %eax, %eax
 ; X86-BMI2-NEXT:    testb $32, %cl
 ; X86-BMI2-NEXT:    jne .LBB10_2
@@ -810,7 +810,7 @@ define i32 @bzhi64_32_a0(i64 %val, i64 %numlowbits) nounwind {
 define i32 @bzhi64_32_a1(i64 %val, i32 %numlowbits) nounwind {
 ; X86-NOBMI-LABEL: bzhi64_32_a1:
 ; X86-NOBMI:       # %bb.0:
-; X86-NOBMI-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-NOBMI-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; X86-NOBMI-NEXT:    movl $1, %eax
 ; X86-NOBMI-NEXT:    shll %cl, %eax
 ; X86-NOBMI-NEXT:    decl %eax
@@ -819,14 +819,14 @@ define i32 @bzhi64_32_a1(i64 %val, i32 %numlowbits) nounwind {
 ;
 ; X86-BMI1-LABEL: bzhi64_32_a1:
 ; X86-BMI1:       # %bb.0:
-; X86-BMI1-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-BMI1-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-BMI1-NEXT:    shll $8, %eax
 ; X86-BMI1-NEXT:    bextrl %eax, {{[0-9]+}}(%esp), %eax
 ; X86-BMI1-NEXT:    retl
 ;
 ; X86-BMI2-LABEL: bzhi64_32_a1:
 ; X86-BMI2:       # %bb.0:
-; X86-BMI2-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-BMI2-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-BMI2-NEXT:    bzhil %eax, {{[0-9]+}}(%esp), %eax
 ; X86-BMI2-NEXT:    retl
 ;
@@ -862,7 +862,7 @@ define i32 @bzhi64_32_a1(i64 %val, i32 %numlowbits) nounwind {
 define i32 @bzhi64_32_a1_trunc_extrause(i64 %val, i32 %numlowbits, ptr %escape) nounwind {
 ; X86-NOBMI-LABEL: bzhi64_32_a1_trunc_extrause:
 ; X86-NOBMI:       # %bb.0:
-; X86-NOBMI-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-NOBMI-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; X86-NOBMI-NEXT:    movl {{[0-9]+}}(%esp), %edx
 ; X86-NOBMI-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; X86-NOBMI-NEXT:    movl %edx, (%eax)
@@ -874,7 +874,7 @@ define i32 @bzhi64_32_a1_trunc_extrause(i64 %val, i32 %numlowbits, ptr %escape)
 ;
 ; X86-BMI1-LABEL: bzhi64_32_a1_trunc_extrause:
 ; X86-BMI1:       # %bb.0:
-; X86-BMI1-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-BMI1-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-BMI1-NEXT:    movl {{[0-9]+}}(%esp), %ecx
 ; X86-BMI1-NEXT:    movl {{[0-9]+}}(%esp), %edx
 ; X86-BMI1-NEXT:    movl %ecx, (%edx)
@@ -884,7 +884,7 @@ define i32 @bzhi64_32_a1_trunc_extrause(i64 %val, i32 %numlowbits, ptr %escape)
 ;
 ; X86-BMI2-LABEL: bzhi64_32_a1_trunc_extrause:
 ; X86-BMI2:       # %bb.0:
-; X86-BMI2-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-BMI2-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-BMI2-NEXT:    movl {{[0-9]+}}(%esp), %ecx
 ; X86-BMI2-NEXT:    movl {{[0-9]+}}(%esp), %edx
 ; X86-BMI2-NEXT:    movl %ecx, (%edx)
@@ -927,7 +927,7 @@ define i32 @bzhi64_32_a1_trunc_extrause(i64 %val, i32 %numlowbits, ptr %escape)
 define i32 @bzhi64_32_a2(i64 %val, i32 %numlowbits) nounwind {
 ; X86-NOBMI-LABEL: bzhi64_32_a2:
 ; X86-NOBMI:       # %bb.0:
-; X86-NOBMI-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-NOBMI-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; X86-NOBMI-NEXT:    movl $1, %eax
 ; X86-NOBMI-NEXT:    shll %cl, %eax
 ; X86-NOBMI-NEXT:    decl %eax
@@ -936,14 +936,14 @@ define i32 @bzhi64_32_a2(i64 %val, i32 %numlowbits) nounwind {
 ;
 ; X86-BMI1-LABEL: bzhi64_32_a2:
 ; X86-BMI1:       # %bb.0:
-; X86-BMI1-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-BMI1-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-BMI1-NEXT:    shll $8, %eax
 ; X86-BMI1-NEXT:    bextrl %eax, {{[0-9]+}}(%esp), %eax
 ; X86-BMI1-NEXT:    retl
 ;
 ; X86-BMI2-LABEL: bzhi64_32_a2:
 ; X86-BMI2:       # %bb.0:
-; X86-BMI2-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-BMI2-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-BMI2-NEXT:    bzhil %eax, {{[0-9]+}}(%esp), %eax
 ; X86-BMI2-NEXT:    retl
 ;
@@ -980,7 +980,7 @@ define i32 @bzhi64_32_a2(i64 %val, i32 %numlowbits) nounwind {
 define i32 @bzhi64_32_a3(i64 %val, i64 %numlowbits) nounwind {
 ; X86-NOBMI-LABEL: bzhi64_32_a3:
 ; X86-NOBMI:       # %bb.0:
-; X86-NOBMI-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-NOBMI-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; X86-NOBMI-NEXT:    movl $1, %edx
 ; X86-NOBMI-NEXT:    shll %cl, %edx
 ; X86-NOBMI-NEXT:    xorl %eax, %eax
@@ -995,7 +995,7 @@ define i32 @bzhi64_32_a3(i64 %val, i64 %numlowbits) nounwind {
 ;
 ; X86-BMI1-LABEL: bzhi64_32_a3:
 ; X86-BMI1:       # %bb.0:
-; X86-BMI1-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-BMI1-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; X86-BMI1-NEXT:    movl $1, %edx
 ; X86-BMI1-NEXT:    shll %cl, %edx
 ; X86-BMI1-NEXT:    xorl %eax, %eax
@@ -1010,7 +1010,7 @@ define i32 @bzhi64_32_a3(i64 %val, i64 %numlowbits) nounwind {
 ;
 ; X86-BMI2-LABEL: bzhi64_32_a3:
 ; X86-BMI2:       # %bb.0:
-; X86-BMI2-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-BMI2-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; X86-BMI2-NEXT:    xorl %eax, %eax
 ; X86-BMI2-NEXT:    testb $32, %cl
 ; X86-BMI2-NEXT:    jne .LBB14_2
@@ -1057,7 +1057,7 @@ define i32 @bzhi64_32_a3(i64 %val, i64 %numlowbits) nounwind {
 define i32 @bzhi32_b0(i32 %val, i32 %numlowbits) nounwind {
 ; X86-NOBMI-LABEL: bzhi32_b0:
 ; X86-NOBMI:       # %bb.0:
-; X86-NOBMI-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-NOBMI-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; X86-NOBMI-NEXT:    movl $-1, %eax
 ; X86-NOBMI-NEXT:    shll %cl, %eax
 ; X86-NOBMI-NEXT:    notl %eax
@@ -1066,14 +1066,14 @@ define i32 @bzhi32_b0(i32 %val, i32 %numlowbits) nounwind {
 ;
 ; X86-BMI1-LABEL: bzhi32_b0:
 ; X86-BMI1:       # %bb.0:
-; X86-BMI1-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-BMI1-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-BMI1-NEXT:    shll $8, %eax
 ; X86-BMI1-NEXT:    bextrl %eax, {{[0-9]+}}(%esp), %eax
 ; X86-BMI1-NEXT:    retl
 ;
 ; X86-BMI2-LABEL: bzhi32_b0:
 ; X86-BMI2:       # %bb.0:
-; X86-BMI2-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-BMI2-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-BMI2-NEXT:    bzhil %eax, {{[0-9]+}}(%esp), %eax
 ; X86-BMI2-NEXT:    retl
 ;
@@ -1106,7 +1106,7 @@ define i32 @bzhi32_b0(i32 %val, i32 %numlowbits) nounwind {
 define i32 @bzhi32_b1_indexzext(i32 %val, i8 zeroext %numlowbits) nounwind {
 ; X86-NOBMI-LABEL: bzhi32_b1_indexzext:
 ; X86-NOBMI:       # %bb.0:
-; X86-NOBMI-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-NOBMI-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; X86-NOBMI-NEXT:    movl $-1, %eax
 ; X86-NOBMI-NEXT:    shll %cl, %eax
 ; X86-NOBMI-NEXT:    notl %eax
@@ -1115,14 +1115,14 @@ define i32 @bzhi32_b1_indexzext(i32 %val, i8 zeroext %numlowbits) nounwind {
 ;
 ; X86-BMI1-LABEL: bzhi32_b1_indexzext:
 ; X86-BMI1:       # %bb.0:
-; X86-BMI1-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-BMI1-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-BMI1-NEXT:    shll $8, %eax
 ; X86-BMI1-NEXT:    bextrl %eax, {{[0-9]+}}(%esp), %eax
 ; X86-BMI1-NEXT:    retl
 ;
 ; X86-BMI2-LABEL: bzhi32_b1_indexzext:
 ; X86-BMI2:       # %bb.0:
-; X86-BMI2-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-BMI2-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-BMI2-NEXT:    bzhil %eax, {{[0-9]+}}(%esp), %eax
 ; X86-BMI2-NEXT:    retl
 ;
@@ -1157,7 +1157,7 @@ define i32 @bzhi32_b2_load(ptr %w, i32 %numlowbits) nounwind {
 ; X86-NOBMI-LABEL: bzhi32_b2_load:
 ; X86-NOBMI:       # %bb.0:
 ; X86-NOBMI-NEXT:    movl {{[0-9]+}}(%esp), %edx
-; X86-NOBMI-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-NOBMI-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; X86-NOBMI-NEXT:    movl $-1, %eax
 ; X86-NOBMI-NEXT:    shll %cl, %eax
 ; X86-NOBMI-NEXT:    notl %eax
@@ -1167,7 +1167,7 @@ define i32 @bzhi32_b2_load(ptr %w, i32 %numlowbits) nounwind {
 ; X86-BMI1-LABEL: bzhi32_b2_load:
 ; X86-BMI1:       # %bb.0:
 ; X86-BMI1-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X86-BMI1-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-BMI1-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; X86-BMI1-NEXT:    shll $8, %ecx
 ; X86-BMI1-NEXT:    bextrl %ecx, (%eax), %eax
 ; X86-BMI1-NEXT:    retl
@@ -1175,7 +1175,7 @@ define i32 @bzhi32_b2_load(ptr %w, i32 %numlowbits) nounwind {
 ; X86-BMI2-LABEL: bzhi32_b2_load:
 ; X86-BMI2:       # %bb.0:
 ; X86-BMI2-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X86-BMI2-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-BMI2-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; X86-BMI2-NEXT:    bzhil %ecx, (%eax), %eax
 ; X86-BMI2-NEXT:    retl
 ;
@@ -1210,7 +1210,7 @@ define i32 @bzhi32_b3_load_indexzext(ptr %w, i8 zeroext %numlowbits) nounwind {
 ; X86-NOBMI-LABEL: bzhi32_b3_load_indexzext:
 ; X86-NOBMI:       # %bb.0:
 ; X86-NOBMI-NEXT:    movl {{[0-9]+}}(%esp), %edx
-; X86-NOBMI-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-NOBMI-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; X86-NOBMI-NEXT:    movl $-1, %eax
 ; X86-NOBMI-NEXT:    shll %cl, %eax
 ; X86-NOBMI-NEXT:    notl %eax
@@ -1220,7 +1220,7 @@ define i32 @bzhi32_b3_load_indexzext(ptr %w, i8 zeroext %numlowbits) nounwind {
 ; X86-BMI1-LABEL: bzhi32_b3_load_indexzext:
 ; X86-BMI1:       # %bb.0:
 ; X86-BMI1-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X86-BMI1-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-BMI1-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; X86-BMI1-NEXT:    shll $8, %ecx
 ; X86-BMI1-NEXT:    bextrl %ecx, (%eax), %eax
 ; X86-BMI1-NEXT:    retl
@@ -1228,7 +1228,7 @@ define i32 @bzhi32_b3_load_indexzext(ptr %w, i8 zeroext %numlowbits) nounwind {
 ; X86-BMI2-LABEL: bzhi32_b3_load_indexzext:
 ; X86-BMI2:       # %bb.0:
 ; X86-BMI2-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X86-BMI2-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-BMI2-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; X86-BMI2-NEXT:    bzhil %ecx, (%eax), %eax
 ; X86-BMI2-NEXT:    retl
 ;
@@ -1263,7 +1263,7 @@ define i32 @bzhi32_b3_load_indexzext(ptr %w, i8 zeroext %numlowbits) nounwind {
 define i32 @bzhi32_b4_commutative(i32 %val, i32 %numlowbits) nounwind {
 ; X86-NOBMI-LABEL: bzhi32_b4_commutative:
 ; X86-NOBMI:       # %bb.0:
-; X86-NOBMI-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-NOBMI-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; X86-NOBMI-NEXT:    movl $-1, %eax
 ; X86-NOBMI-NEXT:    shll %cl, %eax
 ; X86-NOBMI-NEXT:    notl %eax
@@ -1272,14 +1272,14 @@ define i32 @bzhi32_b4_commutative(i32 %val, i32 %numlowbits) nounwind {
 ;
 ; X86-BMI1-LABEL: bzhi32_b4_commutative:
 ; X86-BMI1:       # %bb.0:
-; X86-BMI1-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-BMI1-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-BMI1-NEXT:    shll $8, %eax
 ; X86-BMI1-NEXT:    bextrl %eax, {{[0-9]+}}(%esp), %eax
 ; X86-BMI1-NEXT:    retl
 ;
 ; X86-BMI2-LABEL: bzhi32_b4_commutative:
 ; X86-BMI2:       # %bb.0:
-; X86-BMI2-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-BMI2-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-BMI2-NEXT:    bzhil %eax, {{[0-9]+}}(%esp), %eax
 ; X86-BMI2-NEXT:    retl
 ;
@@ -1315,7 +1315,7 @@ define i64 @bzhi64_b0(i64 %val, i64 %numlowbits) nounwind {
 ; X86-NOBMI-LABEL: bzhi64_b0:
 ; X86-NOBMI:       # %bb.0:
 ; X86-NOBMI-NEXT:    pushl %esi
-; X86-NOBMI-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-NOBMI-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; X86-NOBMI-NEXT:    movl $-1, %edx
 ; X86-NOBMI-NEXT:    movl $-1, %esi
 ; X86-NOBMI-NEXT:    shll %cl, %esi
@@ -1337,7 +1337,7 @@ define i64 @bzhi64_b0(i64 %val, i64 %numlowbits) nounwind {
 ;
 ; X86-BMI1-LABEL: bzhi64_b0:
 ; X86-BMI1:       # %bb.0:
-; X86-BMI1-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-BMI1-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; X86-BMI1-NEXT:    movl $-1, %edx
 ; X86-BMI1-NEXT:    movl $-1, %eax
 ; X86-BMI1-NEXT:    shll %cl, %eax
@@ -1353,7 +1353,7 @@ define i64 @bzhi64_b0(i64 %val, i64 %numlowbits) nounwind {
 ;
 ; X86-BMI2-LABEL: bzhi64_b0:
 ; X86-BMI2:       # %bb.0:
-; X86-BMI2-NEXT:    movzbl {{[0-9]+}}(%esp), %edx
+; X86-BMI2-NEXT:    movb {{[0-9]+}}(%esp), %dl
 ; X86-BMI2-NEXT:    movl $-1, %ecx
 ; X86-BMI2-NEXT:    shlxl %edx, %ecx, %eax
 ; X86-BMI2-NEXT:    testb $32, %dl
@@ -1396,7 +1396,7 @@ define i64 @bzhi64_b1_indexzext(i64 %val, i8 zeroext %numlowbits) nounwind {
 ; X86-NOBMI-LABEL: bzhi64_b1_indexzext:
 ; X86-NOBMI:       # %bb.0:
 ; X86-NOBMI-NEXT:    pushl %esi
-; X86-NOBMI-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-NOBMI-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; X86-NOBMI-NEXT:    movl $-1, %edx
 ; X86-NOBMI-NEXT:    movl $-1, %esi
 ; X86-NOBMI-NEXT:    shll %cl, %esi
@@ -1418,7 +1418,7 @@ define i64 @bzhi64_b1_indexzext(i64 %val, i8 zeroext %numlowbits) nounwind {
 ;
 ; X86-BMI1-LABEL: bzhi64_b1_indexzext:
 ; X86-BMI1:       # %bb.0:
-; X86-BMI1-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-BMI1-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; X86-BMI1-NEXT:    movl $-1, %edx
 ; X86-BMI1-NEXT:    movl $-1, %eax
 ; X86-BMI1-NEXT:    shll %cl, %eax
@@ -1434,7 +1434,7 @@ define i64 @bzhi64_b1_indexzext(i64 %val, i8 zeroext %numlowbits) nounwind {
 ;
 ; X86-BMI2-LABEL: bzhi64_b1_indexzext:
 ; X86-BMI2:       # %bb.0:
-; X86-BMI2-NEXT:    movzbl {{[0-9]+}}(%esp), %edx
+; X86-BMI2-NEXT:    movb {{[0-9]+}}(%esp), %dl
 ; X86-BMI2-NEXT:    movl $-1, %ecx
 ; X86-BMI2-NEXT:    shlxl %edx, %ecx, %eax
 ; X86-BMI2-NEXT:    testb $32, %dl
@@ -1482,7 +1482,7 @@ define i64 @bzhi64_b2_load(ptr %w, i64 %numlowbits) nounwind {
 ; X86-NOBMI-NEXT:    pushl %edi
 ; X86-NOBMI-NEXT:    pushl %esi
 ; X86-NOBMI-NEXT:    movl {{[0-9]+}}(%esp), %esi
-; X86-NOBMI-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-NOBMI-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; X86-NOBMI-NEXT:    movl $-1, %edx
 ; X86-NOBMI-NEXT:    movl $-1, %edi
 ; X86-NOBMI-NEXT:    shll %cl, %edi
@@ -1507,7 +1507,7 @@ define i64 @bzhi64_b2_load(ptr %w, i64 %numlowbits) nounwind {
 ; X86-BMI1:       # %bb.0:
 ; X86-BMI1-NEXT:    pushl %esi
 ; X86-BMI1-NEXT:    movl {{[0-9]+}}(%esp), %edx
-; X86-BMI1-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-BMI1-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; X86-BMI1-NEXT:    movl $-1, %esi
 ; X86-BMI1-NEXT:    movl $-1, %eax
 ; X86-BMI1-NEXT:    shll %cl, %eax
@@ -1526,7 +1526,7 @@ define i64 @bzhi64_b2_load(ptr %w, i64 %numlowbits) nounwind {
 ; X86-BMI2:       # %bb.0:
 ; X86-BMI2-NEXT:    pushl %ebx
 ; X86-BMI2-NEXT:    movl {{[0-9]+}}(%esp), %ecx
-; X86-BMI2-NEXT:    movzbl {{[0-9]+}}(%esp), %ebx
+; X86-BMI2-NEXT:    movb {{[0-9]+}}(%esp), %bl
 ; X86-BMI2-NEXT:    movl $-1, %edx
 ; X86-BMI2-NEXT:    shlxl %ebx, %edx, %eax
 ; X86-BMI2-NEXT:    testb $32, %bl
@@ -1573,7 +1573,7 @@ define i64 @bzhi64_b3_load_indexzext(ptr %w, i8 zeroext %numlowbits) nounwind {
 ; X86-NOBMI-NEXT:    pushl %edi
 ; X86-NOBMI-NEXT:    pushl %esi
 ; X86-NOBMI-NEXT:    movl {{[0-9]+}}(%esp), %esi
-; X86-NOBMI-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-NOBMI-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; X86-NOBMI-NEXT:    movl $-1, %edx
 ; X86-NOBMI-NEXT:    movl $-1, %edi
 ; X86-NOBMI-NEXT:    shll %cl, %edi
@@ -1598,7 +1598,7 @@ define i64 @bzhi64_b3_load_indexzext(ptr %w, i8 zeroext %numlowbits) nounwind {
 ; X86-BMI1:       # %bb.0:
 ; X86-BMI1-NEXT:    pushl %esi
 ; X86-BMI1-NEXT:    movl {{[0-9]+}}(%esp), %edx
-; X86-BMI1-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-BMI1-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; X86-BMI1-NEXT:    movl $-1, %esi
 ; X86-BMI1-NEXT:    movl $-1, %eax
 ; X86-BMI1-NEXT:    shll %cl, %eax
@@ -1617,7 +1617,7 @@ define i64 @bzhi64_b3_load_indexzext(ptr %w, i8 zeroext %numlowbits) nounwind {
 ; X86-BMI2:       # %bb.0:
 ; X86-BMI2-NEXT:    pushl %ebx
 ; X86-BMI2-NEXT:    movl {{[0-9]+}}(%esp), %ecx
-; X86-BMI2-NEXT:    movzbl {{[0-9]+}}(%esp), %ebx
+; X86-BMI2-NEXT:    movb {{[0-9]+}}(%esp), %bl
 ; X86-BMI2-NEXT:    movl $-1, %edx
 ; X86-BMI2-NEXT:    shlxl %ebx, %edx, %eax
 ; X86-BMI2-NEXT:    testb $32, %bl
@@ -1665,7 +1665,7 @@ define i64 @bzhi64_b4_commutative(i64 %val, i64 %numlowbits) nounwind {
 ; X86-NOBMI-LABEL: bzhi64_b4_commutative:
 ; X86-NOBMI:       # %bb.0:
 ; X86-NOBMI-NEXT:    pushl %esi
-; X86-NOBMI-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-NOBMI-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; X86-NOBMI-NEXT:    movl $-1, %edx
 ; X86-NOBMI-NEXT:    movl $-1, %esi
 ; X86-NOBMI-NEXT:    shll %cl, %esi
@@ -1687,7 +1687,7 @@ define i64 @bzhi64_b4_commutative(i64 %val, i64 %numlowbits) nounwind {
 ;
 ; X86-BMI1-LABEL: bzhi64_b4_commutative:
 ; X86-BMI1:       # %bb.0:
-; X86-BMI1-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-BMI1-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; X86-BMI1-NEXT:    movl $-1, %edx
 ; X86-BMI1-NEXT:    movl $-1, %eax
 ; X86-BMI1-NEXT:    shll %cl, %eax
@@ -1703,7 +1703,7 @@ define i64 @bzhi64_b4_commutative(i64 %val, i64 %numlowbits) nounwind {
 ;
 ; X86-BMI2-LABEL: bzhi64_b4_commutative:
 ; X86-BMI2:       # %bb.0:
-; X86-BMI2-NEXT:    movzbl {{[0-9]+}}(%esp), %edx
+; X86-BMI2-NEXT:    movb {{[0-9]+}}(%esp), %dl
 ; X86-BMI2-NEXT:    movl $-1, %ecx
 ; X86-BMI2-NEXT:    shlxl %edx, %ecx, %eax
 ; X86-BMI2-NEXT:    testb $32, %dl
@@ -1748,7 +1748,7 @@ define i64 @bzhi64_b4_commutative(i64 %val, i64 %numlowbits) nounwind {
 define i32 @bzhi64_32_b0(i64 %val, i8 %numlowbits) nounwind {
 ; X86-NOBMI-LABEL: bzhi64_32_b0:
 ; X86-NOBMI:       # %bb.0:
-; X86-NOBMI-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-NOBMI-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; X86-NOBMI-NEXT:    movl $-1, %edx
 ; X86-NOBMI-NEXT:    shll %cl, %edx
 ; X86-NOBMI-NEXT:    xorl %eax, %eax
@@ -1763,7 +1763,7 @@ define i32 @bzhi64_32_b0(i64 %val, i8 %numlowbits) nounwind {
 ;
 ; X86-BMI1-LABEL: bzhi64_32_b0:
 ; X86-BMI1:       # %bb.0:
-; X86-BMI1-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-BMI1-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; X86-BMI1-NEXT:    movl $-1, %eax
 ; X86-BMI1-NEXT:    shll %cl, %eax
 ; X86-BMI1-NEXT:    xorl %edx, %edx
@@ -1777,7 +1777,7 @@ define i32 @bzhi64_32_b0(i64 %val, i8 %numlowbits) nounwind {
 ;
 ; X86-BMI2-LABEL: bzhi64_32_b0:
 ; X86-BMI2:       # %bb.0:
-; X86-BMI2-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-BMI2-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-BMI2-NEXT:    xorl %ecx, %ecx
 ; X86-BMI2-NEXT:    testb $32, %al
 ; X86-BMI2-NEXT:    jne .LBB25_2
@@ -1821,7 +1821,7 @@ define i32 @bzhi64_32_b0(i64 %val, i8 %numlowbits) nounwind {
 define i32 @bzhi64_32_b1(i64 %val, i8 %numlowbits) nounwind {
 ; X86-NOBMI-LABEL: bzhi64_32_b1:
 ; X86-NOBMI:       # %bb.0:
-; X86-NOBMI-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-NOBMI-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; X86-NOBMI-NEXT:    movl $-1, %eax
 ; X86-NOBMI-NEXT:    shll %cl, %eax
 ; X86-NOBMI-NEXT:    notl %eax
@@ -1830,14 +1830,14 @@ define i32 @bzhi64_32_b1(i64 %val, i8 %numlowbits) nounwind {
 ;
 ; X86-BMI1-LABEL: bzhi64_32_b1:
 ; X86-BMI1:       # %bb.0:
-; X86-BMI1-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-BMI1-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-BMI1-NEXT:    shll $8, %eax
 ; X86-BMI1-NEXT:    bextrl %eax, {{[0-9]+}}(%esp), %eax
 ; X86-BMI1-NEXT:    retl
 ;
 ; X86-BMI2-LABEL: bzhi64_32_b1:
 ; X86-BMI2:       # %bb.0:
-; X86-BMI2-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-BMI2-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-BMI2-NEXT:    bzhil %eax, {{[0-9]+}}(%esp), %eax
 ; X86-BMI2-NEXT:    retl
 ;
@@ -1874,7 +1874,7 @@ define i32 @bzhi64_32_b1(i64 %val, i8 %numlowbits) nounwind {
 define i32 @bzhi64_32_b2(i64 %val, i8 %numlowbits) nounwind {
 ; X86-NOBMI-LABEL: bzhi64_32_b2:
 ; X86-NOBMI:       # %bb.0:
-; X86-NOBMI-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-NOBMI-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; X86-NOBMI-NEXT:    movl $-1, %eax
 ; X86-NOBMI-NEXT:    shll %cl, %eax
 ; X86-NOBMI-NEXT:    notl %eax
@@ -1883,14 +1883,14 @@ define i32 @bzhi64_32_b2(i64 %val, i8 %numlowbits) nounwind {
 ;
 ; X86-BMI1-LABEL: bzhi64_32_b2:
 ; X86-BMI1:       # %bb.0:
-; X86-BMI1-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-BMI1-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-BMI1-NEXT:    shll $8, %eax
 ; X86-BMI1-NEXT:    bextrl %eax, {{[0-9]+}}(%esp), %eax
 ; X86-BMI1-NEXT:    retl
 ;
 ; X86-BMI2-LABEL: bzhi64_32_b2:
 ; X86-BMI2:       # %bb.0:
-; X86-BMI2-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-BMI2-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-BMI2-NEXT:    bzhil %eax, {{[0-9]+}}(%esp), %eax
 ; X86-BMI2-NEXT:    retl
 ;
@@ -1928,7 +1928,7 @@ define i32 @bzhi64_32_b2(i64 %val, i8 %numlowbits) nounwind {
 define i32 @bzhi64_32_b3(i64 %val, i8 %numlowbits) nounwind {
 ; X86-NOBMI-LABEL: bzhi64_32_b3:
 ; X86-NOBMI:       # %bb.0:
-; X86-NOBMI-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-NOBMI-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; X86-NOBMI-NEXT:    movl $-1, %edx
 ; X86-NOBMI-NEXT:    shll %cl, %edx
 ; X86-NOBMI-NEXT:    xorl %eax, %eax
@@ -1943,7 +1943,7 @@ define i32 @bzhi64_32_b3(i64 %val, i8 %numlowbits) nounwind {
 ;
 ; X86-BMI1-LABEL: bzhi64_32_b3:
 ; X86-BMI1:       # %bb.0:
-; X86-BMI1-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-BMI1-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; X86-BMI1-NEXT:    movl $-1, %eax
 ; X86-BMI1-NEXT:    shll %cl, %eax
 ; X86-BMI1-NEXT:    xorl %edx, %edx
@@ -1957,7 +1957,7 @@ define i32 @bzhi64_32_b3(i64 %val, i8 %numlowbits) nounwind {
 ;
 ; X86-BMI2-LABEL: bzhi64_32_b3:
 ; X86-BMI2:       # %bb.0:
-; X86-BMI2-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-BMI2-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-BMI2-NEXT:    xorl %ecx, %ecx
 ; X86-BMI2-NEXT:    testb $32, %al
 ; X86-BMI2-NEXT:    jne .LBB28_2
@@ -2031,7 +2031,7 @@ define i32 @bzhi32_c0(i32 %val, i32 %numlowbits, ptr %escape) nounwind {
 ; X86-BMI2:       # %bb.0:
 ; X86-BMI2-NEXT:    pushl %esi
 ; X86-BMI2-NEXT:    movl {{[0-9]+}}(%esp), %ecx
-; X86-BMI2-NEXT:    movzbl {{[0-9]+}}(%esp), %edx
+; X86-BMI2-NEXT:    movb {{[0-9]+}}(%esp), %dl
 ; X86-BMI2-NEXT:    bzhil %edx, {{[0-9]+}}(%esp), %eax
 ; X86-BMI2-NEXT:    # kill: def $dl killed $dl killed $edx def $edx
 ; X86-BMI2-NEXT:    negb %dl
@@ -2108,7 +2108,7 @@ define i32 @bzhi32_c1_indexzext(i32 %val, i8 %numlowbits, ptr %escape) nounwind
 ; X86-BMI2:       # %bb.0:
 ; X86-BMI2-NEXT:    pushl %esi
 ; X86-BMI2-NEXT:    movl {{[0-9]+}}(%esp), %ecx
-; X86-BMI2-NEXT:    movzbl {{[0-9]+}}(%esp), %edx
+; X86-BMI2-NEXT:    movb {{[0-9]+}}(%esp), %dl
 ; X86-BMI2-NEXT:    bzhil %edx, {{[0-9]+}}(%esp), %eax
 ; X86-BMI2-NEXT:    # kill: def $dl killed $dl killed $edx def $edx
 ; X86-BMI2-NEXT:    negb %dl
@@ -2195,7 +2195,7 @@ define i32 @bzhi32_c2_load(ptr %w, i32 %numlowbits, ptr %escape) nounwind {
 ; X86-BMI2-NEXT:    pushl %esi
 ; X86-BMI2-NEXT:    movl {{[0-9]+}}(%esp), %ecx
 ; X86-BMI2-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X86-BMI2-NEXT:    movzbl {{[0-9]+}}(%esp), %edx
+; X86-BMI2-NEXT:    movb {{[0-9]+}}(%esp), %dl
 ; X86-BMI2-NEXT:    bzhil %edx, (%eax), %eax
 ; X86-BMI2-NEXT:    # kill: def $dl killed $dl killed $edx def $edx
 ; X86-BMI2-NEXT:    negb %dl
@@ -2284,7 +2284,7 @@ define i32 @bzhi32_c3_load_indexzext(ptr %w, i8 %numlowbits, ptr %escape) nounwi
 ; X86-BMI2-NEXT:    pushl %esi
 ; X86-BMI2-NEXT:    movl {{[0-9]+}}(%esp), %ecx
 ; X86-BMI2-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X86-BMI2-NEXT:    movzbl {{[0-9]+}}(%esp), %edx
+; X86-BMI2-NEXT:    movb {{[0-9]+}}(%esp), %dl
 ; X86-BMI2-NEXT:    bzhil %edx, (%eax), %eax
 ; X86-BMI2-NEXT:    # kill: def $dl killed $dl killed $edx def $edx
 ; X86-BMI2-NEXT:    negb %dl
@@ -2365,7 +2365,7 @@ define i32 @bzhi32_c4_commutative(i32 %val, i32 %numlowbits, ptr %escape) nounwi
 ; X86-BMI2:       # %bb.0:
 ; X86-BMI2-NEXT:    pushl %esi
 ; X86-BMI2-NEXT:    movl {{[0-9]+}}(%esp), %ecx
-; X86-BMI2-NEXT:    movzbl {{[0-9]+}}(%esp), %edx
+; X86-BMI2-NEXT:    movb {{[0-9]+}}(%esp), %dl
 ; X86-BMI2-NEXT:    bzhil %edx, {{[0-9]+}}(%esp), %eax
 ; X86-BMI2-NEXT:    # kill: def $dl killed $dl killed $edx def $edx
 ; X86-BMI2-NEXT:    negb %dl
@@ -3070,14 +3070,14 @@ define i32 @bzhi64_32_c1(i64 %val, i32 %numlowbits) nounwind {
 ;
 ; X86-BMI1-LABEL: bzhi64_32_c1:
 ; X86-BMI1:       # %bb.0:
-; X86-BMI1-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-BMI1-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-BMI1-NEXT:    shll $8, %eax
 ; X86-BMI1-NEXT:    bextrl %eax, {{[0-9]+}}(%esp), %eax
 ; X86-BMI1-NEXT:    retl
 ;
 ; X86-BMI2-LABEL: bzhi64_32_c1:
 ; X86-BMI2:       # %bb.0:
-; X86-BMI2-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-BMI2-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-BMI2-NEXT:    bzhil %eax, {{[0-9]+}}(%esp), %eax
 ; X86-BMI2-NEXT:    retl
 ;
@@ -3124,14 +3124,14 @@ define i32 @bzhi64_32_c2(i64 %val, i32 %numlowbits) nounwind {
 ;
 ; X86-BMI1-LABEL: bzhi64_32_c2:
 ; X86-BMI1:       # %bb.0:
-; X86-BMI1-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-BMI1-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-BMI1-NEXT:    shll $8, %eax
 ; X86-BMI1-NEXT:    bextrl %eax, {{[0-9]+}}(%esp), %eax
 ; X86-BMI1-NEXT:    retl
 ;
 ; X86-BMI2-LABEL: bzhi64_32_c2:
 ; X86-BMI2:       # %bb.0:
-; X86-BMI2-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-BMI2-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-BMI2-NEXT:    bzhil %eax, {{[0-9]+}}(%esp), %eax
 ; X86-BMI2-NEXT:    retl
 ;
@@ -3236,14 +3236,14 @@ define i32 @bzhi32_d0(i32 %val, i32 %numlowbits) nounwind {
 ;
 ; X86-BMI1-LABEL: bzhi32_d0:
 ; X86-BMI1:       # %bb.0:
-; X86-BMI1-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-BMI1-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-BMI1-NEXT:    shll $8, %eax
 ; X86-BMI1-NEXT:    bextrl %eax, {{[0-9]+}}(%esp), %eax
 ; X86-BMI1-NEXT:    retl
 ;
 ; X86-BMI2-LABEL: bzhi32_d0:
 ; X86-BMI2:       # %bb.0:
-; X86-BMI2-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-BMI2-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-BMI2-NEXT:    bzhil %eax, {{[0-9]+}}(%esp), %eax
 ; X86-BMI2-NEXT:    retl
 ;
@@ -3286,14 +3286,14 @@ define i32 @bzhi32_d1_indexzext(i32 %val, i8 %numlowbits) nounwind {
 ;
 ; X86-BMI1-LABEL: bzhi32_d1_indexzext:
 ; X86-BMI1:       # %bb.0:
-; X86-BMI1-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-BMI1-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-BMI1-NEXT:    shll $8, %eax
 ; X86-BMI1-NEXT:    bextrl %eax, {{[0-9]+}}(%esp), %eax
 ; X86-BMI1-NEXT:    retl
 ;
 ; X86-BMI2-LABEL: bzhi32_d1_indexzext:
 ; X86-BMI2:       # %bb.0:
-; X86-BMI2-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-BMI2-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-BMI2-NEXT:    bzhil %eax, {{[0-9]+}}(%esp), %eax
 ; X86-BMI2-NEXT:    retl
 ;
@@ -3339,7 +3339,7 @@ define i32 @bzhi32_d2_load(ptr %w, i32 %numlowbits) nounwind {
 ; X86-BMI1-LABEL: bzhi32_d2_load:
 ; X86-BMI1:       # %bb.0:
 ; X86-BMI1-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X86-BMI1-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-BMI1-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; X86-BMI1-NEXT:    shll $8, %ecx
 ; X86-BMI1-NEXT:    bextrl %ecx, (%eax), %eax
 ; X86-BMI1-NEXT:    retl
@@ -3347,7 +3347,7 @@ define i32 @bzhi32_d2_load(ptr %w, i32 %numlowbits) nounwind {
 ; X86-BMI2-LABEL: bzhi32_d2_load:
 ; X86-BMI2:       # %bb.0:
 ; X86-BMI2-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X86-BMI2-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-BMI2-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; X86-BMI2-NEXT:    bzhil %ecx, (%eax), %eax
 ; X86-BMI2-NEXT:    retl
 ;
@@ -3393,7 +3393,7 @@ define i32 @bzhi32_d3_load_indexzext(ptr %w, i8 %numlowbits) nounwind {
 ; X86-BMI1-LABEL: bzhi32_d3_load_indexzext:
 ; X86-BMI1:       # %bb.0:
 ; X86-BMI1-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X86-BMI1-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-BMI1-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; X86-BMI1-NEXT:    shll $8, %ecx
 ; X86-BMI1-NEXT:    bextrl %ecx, (%eax), %eax
 ; X86-BMI1-NEXT:    retl
@@ -3401,7 +3401,7 @@ define i32 @bzhi32_d3_load_indexzext(ptr %w, i8 %numlowbits) nounwind {
 ; X86-BMI2-LABEL: bzhi32_d3_load_indexzext:
 ; X86-BMI2:       # %bb.0:
 ; X86-BMI2-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X86-BMI2-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-BMI2-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; X86-BMI2-NEXT:    bzhil %ecx, (%eax), %eax
 ; X86-BMI2-NEXT:    retl
 ;
@@ -4119,14 +4119,14 @@ define i32 @bzhi64_32_d1(i64 %val, i32 %numlowbits) nounwind {
 ;
 ; X86-BMI1-LABEL: bzhi64_32_d1:
 ; X86-BMI1:       # %bb.0:
-; X86-BMI1-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-BMI1-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-BMI1-NEXT:    shll $8, %eax
 ; X86-BMI1-NEXT:    bextrl %eax, {{[0-9]+}}(%esp), %eax
 ; X86-BMI1-NEXT:    retl
 ;
 ; X86-BMI2-LABEL: bzhi64_32_d1:
 ; X86-BMI2:       # %bb.0:
-; X86-BMI2-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-BMI2-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-BMI2-NEXT:    bzhil %eax, {{[0-9]+}}(%esp), %eax
 ; X86-BMI2-NEXT:    retl
 ;

diff  --git a/llvm/test/CodeGen/X86/extractelement-index.ll b/llvm/test/CodeGen/X86/extractelement-index.ll
index 077351b9718d5..99297b435b2a9 100644
--- a/llvm/test/CodeGen/X86/extractelement-index.ll
+++ b/llvm/test/CodeGen/X86/extractelement-index.ll
@@ -427,14 +427,14 @@ define i8 @extractelement_v16i8_var(<16 x i8> %a, i256 %i) nounwind {
 ; SSE:       # %bb.0:
 ; SSE-NEXT:    andl $15, %edi
 ; SSE-NEXT:    movaps %xmm0, -{{[0-9]+}}(%rsp)
-; SSE-NEXT:    movzbl -24(%rsp,%rdi), %eax
+; SSE-NEXT:    movb -24(%rsp,%rdi), %al
 ; SSE-NEXT:    retq
 ;
 ; AVX-LABEL: extractelement_v16i8_var:
 ; AVX:       # %bb.0:
 ; AVX-NEXT:    andl $15, %edi
 ; AVX-NEXT:    vmovaps %xmm0, -{{[0-9]+}}(%rsp)
-; AVX-NEXT:    movzbl -24(%rsp,%rdi), %eax
+; AVX-NEXT:    movb -24(%rsp,%rdi), %al
 ; AVX-NEXT:    retq
   %b = extractelement <16 x i8> %a, i256 %i
   ret i8 %b
@@ -446,7 +446,7 @@ define i8 @extractelement_v32i8_var(<32 x i8> %a, i256 %i) nounwind {
 ; SSE-NEXT:    andl $31, %edi
 ; SSE-NEXT:    movaps %xmm1, -{{[0-9]+}}(%rsp)
 ; SSE-NEXT:    movaps %xmm0, -{{[0-9]+}}(%rsp)
-; SSE-NEXT:    movzbl -40(%rsp,%rdi), %eax
+; SSE-NEXT:    movb -40(%rsp,%rdi), %al
 ; SSE-NEXT:    retq
 ;
 ; AVX-LABEL: extractelement_v32i8_var:
@@ -457,7 +457,7 @@ define i8 @extractelement_v32i8_var(<32 x i8> %a, i256 %i) nounwind {
 ; AVX-NEXT:    subq $64, %rsp
 ; AVX-NEXT:    andl $31, %edi
 ; AVX-NEXT:    vmovaps %ymm0, (%rsp)
-; AVX-NEXT:    movzbl (%rsp,%rdi), %eax
+; AVX-NEXT:    movb (%rsp,%rdi), %al
 ; AVX-NEXT:    movq %rbp, %rsp
 ; AVX-NEXT:    popq %rbp
 ; AVX-NEXT:    vzeroupper

diff  --git a/llvm/test/CodeGen/X86/fast-isel-call-bool.ll b/llvm/test/CodeGen/X86/fast-isel-call-bool.ll
index b9a5cecdf6449..43ea84bfaf085 100644
--- a/llvm/test/CodeGen/X86/fast-isel-call-bool.ll
+++ b/llvm/test/CodeGen/X86/fast-isel-call-bool.ll
@@ -9,7 +9,7 @@ define i64 @foo(ptr %arg) {
 ; CHECK-LABEL: foo:
 top:
   %0 = load i8, ptr %arg
-; CHECK: movzbl
+; CHECK: movb
   %1 = trunc i8 %0 to i1
 ; CHECK: andb $1,
   %2 = call i64 @bar(i1 %1)

diff  --git a/llvm/test/CodeGen/X86/fast-isel-i1.ll b/llvm/test/CodeGen/X86/fast-isel-i1.ll
index f4c658b731dad..24b6bffc1a3d5 100644
--- a/llvm/test/CodeGen/X86/fast-isel-i1.ll
+++ b/llvm/test/CodeGen/X86/fast-isel-i1.ll
@@ -25,7 +25,7 @@ define void @test2(ptr %a) nounwind {
 entry:
 ; clang uses i8 constants for booleans, so we test with an i8 1.
 ; CHECK-LABEL: test2:
-; CHECK: movzbl {{.*}} %eax
+; CHECK: movb {{.*}} %al
 ; CHECK-NEXT: xorb $1, %al
 ; CHECK-NEXT: testb $1
   %tmp = load i8, ptr %a, align 1

diff  --git a/llvm/test/CodeGen/X86/fast-isel-sext-zext.ll b/llvm/test/CodeGen/X86/fast-isel-sext-zext.ll
index 7dae049d8e023..9a83db52011a9 100644
--- a/llvm/test/CodeGen/X86/fast-isel-sext-zext.ll
+++ b/llvm/test/CodeGen/X86/fast-isel-sext-zext.ll
@@ -5,7 +5,7 @@
 define i8 @test1(i8 %x) nounwind {
 ; X32-LABEL: test1:
 ; X32:       ## %bb.0:
-; X32-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X32-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X32-NEXT:    andb $1, %al
 ; X32-NEXT:    negb %al
 ; X32-NEXT:    retl
@@ -87,7 +87,7 @@ define i32 @test4(i32 %x) nounwind {
 define i8 @test5(i8 %x) nounwind {
 ; X32-LABEL: test5:
 ; X32:       ## %bb.0:
-; X32-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X32-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X32-NEXT:    andb $1, %al
 ; X32-NEXT:    retl
 ;

diff  --git a/llvm/test/CodeGen/X86/fixup-bw-copy.ll b/llvm/test/CodeGen/X86/fixup-bw-copy.ll
index 73907d336b194..f73b0c840c1f7 100644
--- a/llvm/test/CodeGen/X86/fixup-bw-copy.ll
+++ b/llvm/test/CodeGen/X86/fixup-bw-copy.ll
@@ -13,15 +13,10 @@ define i8 @test_movb(i8 %a0) nounwind {
 ; X64-NEXT:    # kill: def $al killed $al killed $eax
 ; X64-NEXT:    retq
 ;
-; BWON32-LABEL: test_movb:
-; BWON32:       # %bb.0:
-; BWON32-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
-; BWON32-NEXT:    retl
-;
-; BWOFF32-LABEL: test_movb:
-; BWOFF32:       # %bb.0:
-; BWOFF32-NEXT:    movb {{[0-9]+}}(%esp), %al
-; BWOFF32-NEXT:    retl
+; X32-LABEL: test_movb:
+; X32:       # %bb.0:
+; X32-NEXT:    movb {{[0-9]+}}(%esp), %al
+; X32-NEXT:    retl
   ret i8 %a0
 }
 

diff  --git a/llvm/test/CodeGen/X86/fixup-bw-inst.ll b/llvm/test/CodeGen/X86/fixup-bw-inst.ll
index 6c371e22b4e6e..76c089612b85a 100644
--- a/llvm/test/CodeGen/X86/fixup-bw-inst.ll
+++ b/llvm/test/CodeGen/X86/fixup-bw-inst.ll
@@ -96,11 +96,11 @@ a4:                                       ; preds = %3, %.lr.ph
   ret void
 }
 
-; This test contains nothing but a simple byte load and store.
-; movb encodes smaller, but we use movzbl for the load for better perf.
+; This test contains nothing but a simple byte load and store.  Since
+; movb encodes smaller, we do not want to use movzbl unless in a tight loop.
+; So this test checks that movb is used.
 ; CHECK-LABEL: foo3:
-; BWON:  movzbl
-; BWOFF: movb
+; CHECK: movb
 ; CHECK: movb
 define void @foo3(ptr%dst, ptr%src) {
   %t0 = load i8, ptr%src, align 1

diff  --git a/llvm/test/CodeGen/X86/fold-and-shift-x86_64.ll b/llvm/test/CodeGen/X86/fold-and-shift-x86_64.ll
index 62abc4e035f4a..a0fb498ad4fa0 100644
--- a/llvm/test/CodeGen/X86/fold-and-shift-x86_64.ll
+++ b/llvm/test/CodeGen/X86/fold-and-shift-x86_64.ll
@@ -5,7 +5,7 @@ define i8 @t1(ptr %X, i64 %i) {
 ; CHECK-LABEL: t1:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    andq $-255, %rsi
-; CHECK-NEXT:    movzbl (%rdi,%rsi,4), %eax
+; CHECK-NEXT:    movb (%rdi,%rsi,4), %al
 ; CHECK-NEXT:    retq
 
 entry:
@@ -20,7 +20,7 @@ define i8 @t2(ptr %X, i64 %i) {
 ; CHECK-LABEL: t2:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    andq $-14, %rsi
-; CHECK-NEXT:    movzbl (%rdi,%rsi,4), %eax
+; CHECK-NEXT:    movb (%rdi,%rsi,4), %al
 ; CHECK-NEXT:    retq
 
 entry:
@@ -35,7 +35,7 @@ define i8 @t3(ptr %X, i64 %i) {
 ; CHECK-LABEL: t3:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    movl %esi, %eax
-; CHECK-NEXT:    movzbl (%rdi,%rax,4), %eax
+; CHECK-NEXT:    movb (%rdi,%rax,4), %al
 ; CHECK-NEXT:    retq
 
 entry:
@@ -50,7 +50,7 @@ define i8 @t4(ptr %X, i64 %i) {
 ; CHECK-LABEL: t4:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    andl $-2, %esi
-; CHECK-NEXT:    movzbl (%rdi,%rsi,4), %eax
+; CHECK-NEXT:    movb (%rdi,%rsi,4), %al
 ; CHECK-NEXT:    retq
 
 entry:
@@ -65,7 +65,7 @@ define i8 @t5(ptr %X, i64 %i) {
 ; CHECK-LABEL: t5:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    andl $-250002, %esi # imm = 0xFFFC2F6E
-; CHECK-NEXT:    movzbl (%rdi,%rsi,4), %eax
+; CHECK-NEXT:    movb (%rdi,%rsi,4), %al
 ; CHECK-NEXT:    retq
 
 entry:
@@ -81,7 +81,7 @@ define i8 @t6(ptr %X, i32 %i) {
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    # kill: def $esi killed $esi def $rsi
 ; CHECK-NEXT:    andl $15, %esi
-; CHECK-NEXT:    movzbl (%rdi,%rsi,4), %eax
+; CHECK-NEXT:    movb (%rdi,%rsi,4), %al
 ; CHECK-NEXT:    retq
 entry:
   %tmp2 = shl i32 %i, 2

diff  --git a/llvm/test/CodeGen/X86/fold-and-shift.ll b/llvm/test/CodeGen/X86/fold-and-shift.ll
index 1318de65df302..8e1204a6292aa 100644
--- a/llvm/test/CodeGen/X86/fold-and-shift.ll
+++ b/llvm/test/CodeGen/X86/fold-and-shift.ll
@@ -94,7 +94,7 @@ define i8 @t5(ptr %X, i32 %i) {
 ; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %ecx
 ; CHECK-NEXT:    andl $-14, %ecx
-; CHECK-NEXT:    movzbl (%eax,%ecx,4), %eax
+; CHECK-NEXT:    movb (%eax,%ecx,4), %al
 ; CHECK-NEXT:    retl
 
 entry:
@@ -111,7 +111,7 @@ define i8 @t6(ptr %X, i32 %i) {
 ; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; CHECK-NEXT:    movl $-255, %ecx
 ; CHECK-NEXT:    andl {{[0-9]+}}(%esp), %ecx
-; CHECK-NEXT:    movzbl (%eax,%ecx,4), %eax
+; CHECK-NEXT:    movb (%eax,%ecx,4), %al
 ; CHECK-NEXT:    retl
 
 entry:

diff  --git a/llvm/test/CodeGen/X86/fp-intrinsics.ll b/llvm/test/CodeGen/X86/fp-intrinsics.ll
index f4689b2ab6bb9..378a32ea61f7c 100644
--- a/llvm/test/CodeGen/X86/fp-intrinsics.ll
+++ b/llvm/test/CodeGen/X86/fp-intrinsics.ll
@@ -935,7 +935,7 @@ define i8 @f20s8(double %x) #0 {
 ; X87-NEXT:    fldcw {{[0-9]+}}(%esp)
 ; X87-NEXT:    fistps {{[0-9]+}}(%esp)
 ; X87-NEXT:    fldcw {{[0-9]+}}(%esp)
-; X87-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X87-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X87-NEXT:    addl $8, %esp
 ; X87-NEXT:    .cfi_def_cfa_offset 4
 ; X87-NEXT:    retl
@@ -1207,7 +1207,7 @@ define i8 @f20u8(double %x) #0 {
 ; X87-NEXT:    fldcw {{[0-9]+}}(%esp)
 ; X87-NEXT:    fistps {{[0-9]+}}(%esp)
 ; X87-NEXT:    fldcw {{[0-9]+}}(%esp)
-; X87-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X87-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X87-NEXT:    addl $8, %esp
 ; X87-NEXT:    .cfi_def_cfa_offset 4
 ; X87-NEXT:    retl

diff  --git a/llvm/test/CodeGen/X86/fp-strict-scalar-fptoint.ll b/llvm/test/CodeGen/X86/fp-strict-scalar-fptoint.ll
index 25a946465ff3f..9646e7db840c0 100644
--- a/llvm/test/CodeGen/X86/fp-strict-scalar-fptoint.ll
+++ b/llvm/test/CodeGen/X86/fp-strict-scalar-fptoint.ll
@@ -67,7 +67,7 @@ define i1 @fptosi_f32toi1(float %x) #0 {
 ; X87-NEXT:    fldcw {{[0-9]+}}(%esp)
 ; X87-NEXT:    fistps {{[0-9]+}}(%esp)
 ; X87-NEXT:    fldcw {{[0-9]+}}(%esp)
-; X87-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X87-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X87-NEXT:    addl $8, %esp
 ; X87-NEXT:    .cfi_def_cfa_offset 4
 ; X87-NEXT:    retl
@@ -114,7 +114,7 @@ define i8 @fptosi_f32toi8(float %x) #0 {
 ; X87-NEXT:    fldcw {{[0-9]+}}(%esp)
 ; X87-NEXT:    fistps {{[0-9]+}}(%esp)
 ; X87-NEXT:    fldcw {{[0-9]+}}(%esp)
-; X87-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X87-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X87-NEXT:    addl $8, %esp
 ; X87-NEXT:    .cfi_def_cfa_offset 4
 ; X87-NEXT:    retl
@@ -339,7 +339,7 @@ define i1 @fptoui_f32toi1(float %x) #0 {
 ; X87-NEXT:    fldcw {{[0-9]+}}(%esp)
 ; X87-NEXT:    fistps {{[0-9]+}}(%esp)
 ; X87-NEXT:    fldcw {{[0-9]+}}(%esp)
-; X87-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X87-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X87-NEXT:    addl $8, %esp
 ; X87-NEXT:    .cfi_def_cfa_offset 4
 ; X87-NEXT:    retl
@@ -386,7 +386,7 @@ define i8 @fptoui_f32toi8(float %x) #0 {
 ; X87-NEXT:    fldcw {{[0-9]+}}(%esp)
 ; X87-NEXT:    fistps {{[0-9]+}}(%esp)
 ; X87-NEXT:    fldcw {{[0-9]+}}(%esp)
-; X87-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X87-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X87-NEXT:    addl $8, %esp
 ; X87-NEXT:    .cfi_def_cfa_offset 4
 ; X87-NEXT:    retl
@@ -754,7 +754,7 @@ define i8 @fptosi_f64toi8(double %x) #0 {
 ; X87-NEXT:    fldcw {{[0-9]+}}(%esp)
 ; X87-NEXT:    fistps {{[0-9]+}}(%esp)
 ; X87-NEXT:    fldcw {{[0-9]+}}(%esp)
-; X87-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X87-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X87-NEXT:    addl $8, %esp
 ; X87-NEXT:    .cfi_def_cfa_offset 4
 ; X87-NEXT:    retl
@@ -979,7 +979,7 @@ define i1 @fptoui_f64toi1(double %x) #0 {
 ; X87-NEXT:    fldcw {{[0-9]+}}(%esp)
 ; X87-NEXT:    fistps {{[0-9]+}}(%esp)
 ; X87-NEXT:    fldcw {{[0-9]+}}(%esp)
-; X87-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X87-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X87-NEXT:    addl $8, %esp
 ; X87-NEXT:    .cfi_def_cfa_offset 4
 ; X87-NEXT:    retl
@@ -1026,7 +1026,7 @@ define i8 @fptoui_f64toi8(double %x) #0 {
 ; X87-NEXT:    fldcw {{[0-9]+}}(%esp)
 ; X87-NEXT:    fistps {{[0-9]+}}(%esp)
 ; X87-NEXT:    fldcw {{[0-9]+}}(%esp)
-; X87-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X87-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X87-NEXT:    addl $8, %esp
 ; X87-NEXT:    .cfi_def_cfa_offset 4
 ; X87-NEXT:    retl

diff  --git a/llvm/test/CodeGen/X86/fp-strict-scalar-inttofp-fp16.ll b/llvm/test/CodeGen/X86/fp-strict-scalar-inttofp-fp16.ll
index 18b6b4ad2055d..7617aee5e5fbf 100644
--- a/llvm/test/CodeGen/X86/fp-strict-scalar-inttofp-fp16.ll
+++ b/llvm/test/CodeGen/X86/fp-strict-scalar-inttofp-fp16.ll
@@ -43,7 +43,7 @@ define half @sitofp_i1tof16(i1 %x) #0 {
 ;
 ; X86-LABEL: sitofp_i1tof16:
 ; X86:       # %bb.0:
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    andb $1, %al
 ; X86-NEXT:    negb %al
 ; X86-NEXT:    movsbl %al, %eax
@@ -231,7 +231,7 @@ define half @uitofp_i1tof16(i1 %x) #0 {
 ;
 ; X86-LABEL: uitofp_i1tof16:
 ; X86:       # %bb.0:
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    andb $1, %al
 ; X86-NEXT:    movzbl %al, %eax
 ; X86-NEXT:    vcvtsi2sh %eax, %xmm0, %xmm0

diff  --git a/llvm/test/CodeGen/X86/fp-strict-scalar-inttofp.ll b/llvm/test/CodeGen/X86/fp-strict-scalar-inttofp.ll
index 4933a870ddd87..738fec62e5f5c 100644
--- a/llvm/test/CodeGen/X86/fp-strict-scalar-inttofp.ll
+++ b/llvm/test/CodeGen/X86/fp-strict-scalar-inttofp.ll
@@ -34,7 +34,7 @@ define float @sitofp_i1tof32(i1 %x) #0 {
 ; SSE-X86:       # %bb.0:
 ; SSE-X86-NEXT:    pushl %eax
 ; SSE-X86-NEXT:    .cfi_def_cfa_offset 8
-; SSE-X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; SSE-X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; SSE-X86-NEXT:    andb $1, %al
 ; SSE-X86-NEXT:    negb %al
 ; SSE-X86-NEXT:    movsbl %al, %eax
@@ -58,7 +58,7 @@ define float @sitofp_i1tof32(i1 %x) #0 {
 ; AVX-X86:       # %bb.0:
 ; AVX-X86-NEXT:    pushl %eax
 ; AVX-X86-NEXT:    .cfi_def_cfa_offset 8
-; AVX-X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; AVX-X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; AVX-X86-NEXT:    andb $1, %al
 ; AVX-X86-NEXT:    negb %al
 ; AVX-X86-NEXT:    movsbl %al, %eax
@@ -82,7 +82,7 @@ define float @sitofp_i1tof32(i1 %x) #0 {
 ; X87:       # %bb.0:
 ; X87-NEXT:    pushl %eax
 ; X87-NEXT:    .cfi_def_cfa_offset 8
-; X87-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X87-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X87-NEXT:    andb $1, %al
 ; X87-NEXT:    negb %al
 ; X87-NEXT:    movsbl %al, %eax
@@ -313,7 +313,7 @@ define float @uitofp_i1tof32(i1 %x) #0 {
 ; SSE-X86:       # %bb.0:
 ; SSE-X86-NEXT:    pushl %eax
 ; SSE-X86-NEXT:    .cfi_def_cfa_offset 8
-; SSE-X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; SSE-X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; SSE-X86-NEXT:    andb $1, %al
 ; SSE-X86-NEXT:    movzbl %al, %eax
 ; SSE-X86-NEXT:    cvtsi2ss %eax, %xmm0
@@ -334,7 +334,7 @@ define float @uitofp_i1tof32(i1 %x) #0 {
 ; AVX-X86:       # %bb.0:
 ; AVX-X86-NEXT:    pushl %eax
 ; AVX-X86-NEXT:    .cfi_def_cfa_offset 8
-; AVX-X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; AVX-X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; AVX-X86-NEXT:    andb $1, %al
 ; AVX-X86-NEXT:    movzbl %al, %eax
 ; AVX-X86-NEXT:    vcvtsi2ss %eax, %xmm0, %xmm0
@@ -355,7 +355,7 @@ define float @uitofp_i1tof32(i1 %x) #0 {
 ; X87:       # %bb.0:
 ; X87-NEXT:    pushl %eax
 ; X87-NEXT:    .cfi_def_cfa_offset 8
-; X87-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X87-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X87-NEXT:    andb $1, %al
 ; X87-NEXT:    movzbl %al, %eax
 ; X87-NEXT:    movw %ax, {{[0-9]+}}(%esp)
@@ -965,7 +965,7 @@ define double @uitofp_i1tof64(i1 %x) #0 {
 ; SSE-X86-NEXT:    .cfi_def_cfa_register %ebp
 ; SSE-X86-NEXT:    andl $-8, %esp
 ; SSE-X86-NEXT:    subl $8, %esp
-; SSE-X86-NEXT:    movzbl 8(%ebp), %eax
+; SSE-X86-NEXT:    movb 8(%ebp), %al
 ; SSE-X86-NEXT:    andb $1, %al
 ; SSE-X86-NEXT:    movzbl %al, %eax
 ; SSE-X86-NEXT:    cvtsi2sd %eax, %xmm0
@@ -992,7 +992,7 @@ define double @uitofp_i1tof64(i1 %x) #0 {
 ; AVX-X86-NEXT:    .cfi_def_cfa_register %ebp
 ; AVX-X86-NEXT:    andl $-8, %esp
 ; AVX-X86-NEXT:    subl $8, %esp
-; AVX-X86-NEXT:    movzbl 8(%ebp), %eax
+; AVX-X86-NEXT:    movb 8(%ebp), %al
 ; AVX-X86-NEXT:    andb $1, %al
 ; AVX-X86-NEXT:    movzbl %al, %eax
 ; AVX-X86-NEXT:    vcvtsi2sd %eax, %xmm0, %xmm0
@@ -1014,7 +1014,7 @@ define double @uitofp_i1tof64(i1 %x) #0 {
 ; X87:       # %bb.0:
 ; X87-NEXT:    pushl %eax
 ; X87-NEXT:    .cfi_def_cfa_offset 8
-; X87-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X87-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X87-NEXT:    andb $1, %al
 ; X87-NEXT:    movzbl %al, %eax
 ; X87-NEXT:    movw %ax, {{[0-9]+}}(%esp)

diff  --git a/llvm/test/CodeGen/X86/fp80-strict-scalar.ll b/llvm/test/CodeGen/X86/fp80-strict-scalar.ll
index b9b1ae60d479e..818ca08ae2d23 100644
--- a/llvm/test/CodeGen/X86/fp80-strict-scalar.ll
+++ b/llvm/test/CodeGen/X86/fp80-strict-scalar.ll
@@ -242,7 +242,7 @@ define i1 @fp80_to_sint1(x86_fp80 %x) #0 {
 ; X86-NEXT:    fldcw {{[0-9]+}}(%esp)
 ; X86-NEXT:    fistps {{[0-9]+}}(%esp)
 ; X86-NEXT:    fldcw {{[0-9]+}}(%esp)
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    addl $8, %esp
 ; X86-NEXT:    .cfi_def_cfa_offset 4
 ; X86-NEXT:    retl
@@ -258,7 +258,7 @@ define i1 @fp80_to_sint1(x86_fp80 %x) #0 {
 ; X64-NEXT:    fldcw -{{[0-9]+}}(%rsp)
 ; X64-NEXT:    fistps -{{[0-9]+}}(%rsp)
 ; X64-NEXT:    fldcw -{{[0-9]+}}(%rsp)
-; X64-NEXT:    movzbl -{{[0-9]+}}(%rsp), %eax
+; X64-NEXT:    movb -{{[0-9]+}}(%rsp), %al
 ; X64-NEXT:    retq
   %result = call i1 @llvm.experimental.constrained.fptosi.i1.f80(x86_fp80 %x,
                                                metadata !"fpexcept.strict") #0
@@ -279,7 +279,7 @@ define i8 @fp80_to_sint8(x86_fp80 %x) #0 {
 ; X86-NEXT:    fldcw {{[0-9]+}}(%esp)
 ; X86-NEXT:    fistps {{[0-9]+}}(%esp)
 ; X86-NEXT:    fldcw {{[0-9]+}}(%esp)
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    addl $8, %esp
 ; X86-NEXT:    .cfi_def_cfa_offset 4
 ; X86-NEXT:    retl
@@ -295,7 +295,7 @@ define i8 @fp80_to_sint8(x86_fp80 %x) #0 {
 ; X64-NEXT:    fldcw -{{[0-9]+}}(%rsp)
 ; X64-NEXT:    fistps -{{[0-9]+}}(%rsp)
 ; X64-NEXT:    fldcw -{{[0-9]+}}(%rsp)
-; X64-NEXT:    movzbl -{{[0-9]+}}(%rsp), %eax
+; X64-NEXT:    movb -{{[0-9]+}}(%rsp), %al
 ; X64-NEXT:    retq
   %result = call i8 @llvm.experimental.constrained.fptosi.i8.f80(x86_fp80 %x,
                                                metadata !"fpexcept.strict") #0
@@ -435,7 +435,7 @@ define i1 @fp80_to_uint1(x86_fp80 %x) #0 {
 ; X86-NEXT:    fldcw {{[0-9]+}}(%esp)
 ; X86-NEXT:    fistps {{[0-9]+}}(%esp)
 ; X86-NEXT:    fldcw {{[0-9]+}}(%esp)
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    addl $8, %esp
 ; X86-NEXT:    .cfi_def_cfa_offset 4
 ; X86-NEXT:    retl
@@ -451,7 +451,7 @@ define i1 @fp80_to_uint1(x86_fp80 %x) #0 {
 ; X64-NEXT:    fldcw -{{[0-9]+}}(%rsp)
 ; X64-NEXT:    fistps -{{[0-9]+}}(%rsp)
 ; X64-NEXT:    fldcw -{{[0-9]+}}(%rsp)
-; X64-NEXT:    movzbl -{{[0-9]+}}(%rsp), %eax
+; X64-NEXT:    movb -{{[0-9]+}}(%rsp), %al
 ; X64-NEXT:    retq
   %result = call i1 @llvm.experimental.constrained.fptoui.i1.f80(x86_fp80 %x,
                                                metadata !"fpexcept.strict") #0
@@ -472,7 +472,7 @@ define i8 @fp80_to_uint8(x86_fp80 %x) #0 {
 ; X86-NEXT:    fldcw {{[0-9]+}}(%esp)
 ; X86-NEXT:    fistps {{[0-9]+}}(%esp)
 ; X86-NEXT:    fldcw {{[0-9]+}}(%esp)
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    addl $8, %esp
 ; X86-NEXT:    .cfi_def_cfa_offset 4
 ; X86-NEXT:    retl
@@ -488,7 +488,7 @@ define i8 @fp80_to_uint8(x86_fp80 %x) #0 {
 ; X64-NEXT:    fldcw -{{[0-9]+}}(%rsp)
 ; X64-NEXT:    fistps -{{[0-9]+}}(%rsp)
 ; X64-NEXT:    fldcw -{{[0-9]+}}(%rsp)
-; X64-NEXT:    movzbl -{{[0-9]+}}(%rsp), %eax
+; X64-NEXT:    movb -{{[0-9]+}}(%rsp), %al
 ; X64-NEXT:    retq
   %result = call i8 @llvm.experimental.constrained.fptoui.i8.f80(x86_fp80 %x,
                                                metadata !"fpexcept.strict") #0
@@ -655,7 +655,7 @@ define x86_fp80 @sint1_to_fp80(i1 %x) #0 {
 ; X86:       # %bb.0:
 ; X86-NEXT:    pushl %eax
 ; X86-NEXT:    .cfi_def_cfa_offset 8
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    andb $1, %al
 ; X86-NEXT:    negb %al
 ; X86-NEXT:    movsbl %al, %eax
@@ -781,7 +781,7 @@ define x86_fp80 @uint1_to_fp80(i1 %x) #0 {
 ; X86:       # %bb.0:
 ; X86-NEXT:    pushl %eax
 ; X86-NEXT:    .cfi_def_cfa_offset 8
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    andb $1, %al
 ; X86-NEXT:    movzbl %al, %eax
 ; X86-NEXT:    movw %ax, {{[0-9]+}}(%esp)

diff  --git a/llvm/test/CodeGen/X86/fptosi-sat-scalar.ll b/llvm/test/CodeGen/X86/fptosi-sat-scalar.ll
index 5a60a9e00aa3f..8ace836987319 100644
--- a/llvm/test/CodeGen/X86/fptosi-sat-scalar.ll
+++ b/llvm/test/CodeGen/X86/fptosi-sat-scalar.ll
@@ -42,7 +42,7 @@ define i1 @test_signed_i1_f32(float %f) nounwind {
 ; X86-X87-NEXT:    movb $-1, %dl
 ; X86-X87-NEXT:    jb .LBB0_2
 ; X86-X87-NEXT:  # %bb.1:
-; X86-X87-NEXT:    movzbl {{[0-9]+}}(%esp), %edx
+; X86-X87-NEXT:    movb {{[0-9]+}}(%esp), %dl
 ; X86-X87-NEXT:  .LBB0_2:
 ; X86-X87-NEXT:    fldz
 ; X86-X87-NEXT:    fxch %st(1)
@@ -115,7 +115,7 @@ define i8 @test_signed_i8_f32(float %f) nounwind {
 ; X86-X87-NEXT:    movb $-128, %dl
 ; X86-X87-NEXT:    jb .LBB1_2
 ; X86-X87-NEXT:  # %bb.1:
-; X86-X87-NEXT:    movzbl {{[0-9]+}}(%esp), %edx
+; X86-X87-NEXT:    movb {{[0-9]+}}(%esp), %dl
 ; X86-X87-NEXT:  .LBB1_2:
 ; X86-X87-NEXT:    flds {{\.?LCPI[0-9]+_[0-9]+}}
 ; X86-X87-NEXT:    fxch %st(1)
@@ -1062,7 +1062,7 @@ define i1 @test_signed_i1_f64(double %f) nounwind {
 ; X86-X87-NEXT:    movb $-1, %dl
 ; X86-X87-NEXT:    jb .LBB10_2
 ; X86-X87-NEXT:  # %bb.1:
-; X86-X87-NEXT:    movzbl {{[0-9]+}}(%esp), %edx
+; X86-X87-NEXT:    movb {{[0-9]+}}(%esp), %dl
 ; X86-X87-NEXT:  .LBB10_2:
 ; X86-X87-NEXT:    fldz
 ; X86-X87-NEXT:    fxch %st(1)
@@ -1135,7 +1135,7 @@ define i8 @test_signed_i8_f64(double %f) nounwind {
 ; X86-X87-NEXT:    movb $-128, %dl
 ; X86-X87-NEXT:    jb .LBB11_2
 ; X86-X87-NEXT:  # %bb.1:
-; X86-X87-NEXT:    movzbl {{[0-9]+}}(%esp), %edx
+; X86-X87-NEXT:    movb {{[0-9]+}}(%esp), %dl
 ; X86-X87-NEXT:  .LBB11_2:
 ; X86-X87-NEXT:    flds {{\.?LCPI[0-9]+_[0-9]+}}
 ; X86-X87-NEXT:    fxch %st(1)
@@ -2079,7 +2079,7 @@ define i1 @test_signed_i1_f16(half %f) nounwind {
 ; X86-X87-NEXT:    movb $-1, %dl
 ; X86-X87-NEXT:    jb .LBB20_2
 ; X86-X87-NEXT:  # %bb.1:
-; X86-X87-NEXT:    movzbl {{[0-9]+}}(%esp), %edx
+; X86-X87-NEXT:    movb {{[0-9]+}}(%esp), %dl
 ; X86-X87-NEXT:  .LBB20_2:
 ; X86-X87-NEXT:    fldz
 ; X86-X87-NEXT:    fxch %st(1)
@@ -2176,7 +2176,7 @@ define i8 @test_signed_i8_f16(half %f) nounwind {
 ; X86-X87-NEXT:    movb $-128, %dl
 ; X86-X87-NEXT:    jb .LBB21_2
 ; X86-X87-NEXT:  # %bb.1:
-; X86-X87-NEXT:    movzbl {{[0-9]+}}(%esp), %edx
+; X86-X87-NEXT:    movb {{[0-9]+}}(%esp), %dl
 ; X86-X87-NEXT:  .LBB21_2:
 ; X86-X87-NEXT:    flds {{\.?LCPI[0-9]+_[0-9]+}}
 ; X86-X87-NEXT:    fxch %st(1)
@@ -3274,7 +3274,7 @@ define i1 @test_signed_i1_f80(x86_fp80 %f) nounwind {
 ; X86-X87-NEXT:    movb $-1, %dl
 ; X86-X87-NEXT:    jb .LBB30_2
 ; X86-X87-NEXT:  # %bb.1:
-; X86-X87-NEXT:    movzbl {{[0-9]+}}(%esp), %edx
+; X86-X87-NEXT:    movb {{[0-9]+}}(%esp), %dl
 ; X86-X87-NEXT:  .LBB30_2:
 ; X86-X87-NEXT:    fldz
 ; X86-X87-NEXT:    fxch %st(1)
@@ -3387,7 +3387,7 @@ define i8 @test_signed_i8_f80(x86_fp80 %f) nounwind {
 ; X86-X87-NEXT:    movb $-128, %dl
 ; X86-X87-NEXT:    jb .LBB31_2
 ; X86-X87-NEXT:  # %bb.1:
-; X86-X87-NEXT:    movzbl {{[0-9]+}}(%esp), %edx
+; X86-X87-NEXT:    movb {{[0-9]+}}(%esp), %dl
 ; X86-X87-NEXT:  .LBB31_2:
 ; X86-X87-NEXT:    flds {{\.?LCPI[0-9]+_[0-9]+}}
 ; X86-X87-NEXT:    fxch %st(1)

diff  --git a/llvm/test/CodeGen/X86/fptoui-sat-scalar.ll b/llvm/test/CodeGen/X86/fptoui-sat-scalar.ll
index 01426b1ac91c2..5fbf26c9d166a 100644
--- a/llvm/test/CodeGen/X86/fptoui-sat-scalar.ll
+++ b/llvm/test/CodeGen/X86/fptoui-sat-scalar.ll
@@ -39,7 +39,7 @@ define i1 @test_unsigned_i1_f32(float %f) nounwind {
 ; X86-X87-NEXT:    sahf
 ; X86-X87-NEXT:    jb .LBB0_1
 ; X86-X87-NEXT:  # %bb.2:
-; X86-X87-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-X87-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; X86-X87-NEXT:    jmp .LBB0_3
 ; X86-X87-NEXT:  .LBB0_1:
 ; X86-X87-NEXT:    xorl %ecx, %ecx
@@ -102,7 +102,7 @@ define i8 @test_unsigned_i8_f32(float %f) nounwind {
 ; X86-X87-NEXT:    sahf
 ; X86-X87-NEXT:    jb .LBB1_1
 ; X86-X87-NEXT:  # %bb.2:
-; X86-X87-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-X87-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; X86-X87-NEXT:    jmp .LBB1_3
 ; X86-X87-NEXT:  .LBB1_1:
 ; X86-X87-NEXT:    xorl %ecx, %ecx
@@ -974,7 +974,7 @@ define i1 @test_unsigned_i1_f64(double %f) nounwind {
 ; X86-X87-NEXT:    sahf
 ; X86-X87-NEXT:    jb .LBB10_1
 ; X86-X87-NEXT:  # %bb.2:
-; X86-X87-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-X87-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; X86-X87-NEXT:    jmp .LBB10_3
 ; X86-X87-NEXT:  .LBB10_1:
 ; X86-X87-NEXT:    xorl %ecx, %ecx
@@ -1037,7 +1037,7 @@ define i8 @test_unsigned_i8_f64(double %f) nounwind {
 ; X86-X87-NEXT:    sahf
 ; X86-X87-NEXT:    jb .LBB11_1
 ; X86-X87-NEXT:  # %bb.2:
-; X86-X87-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-X87-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; X86-X87-NEXT:    jmp .LBB11_3
 ; X86-X87-NEXT:  .LBB11_1:
 ; X86-X87-NEXT:    xorl %ecx, %ecx
@@ -1900,7 +1900,7 @@ define i1 @test_unsigned_i1_f16(half %f) nounwind {
 ; X86-X87-NEXT:    sahf
 ; X86-X87-NEXT:    jb .LBB20_1
 ; X86-X87-NEXT:  # %bb.2:
-; X86-X87-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-X87-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; X86-X87-NEXT:    jmp .LBB20_3
 ; X86-X87-NEXT:  .LBB20_1:
 ; X86-X87-NEXT:    xorl %ecx, %ecx
@@ -1982,7 +1982,7 @@ define i8 @test_unsigned_i8_f16(half %f) nounwind {
 ; X86-X87-NEXT:    sahf
 ; X86-X87-NEXT:    jb .LBB21_1
 ; X86-X87-NEXT:  # %bb.2:
-; X86-X87-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-X87-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; X86-X87-NEXT:    jmp .LBB21_3
 ; X86-X87-NEXT:  .LBB21_1:
 ; X86-X87-NEXT:    xorl %ecx, %ecx
@@ -2995,7 +2995,7 @@ define i1 @test_unsigned_i1_f80(x86_fp80 %f) nounwind {
 ; X86-X87-NEXT:    sahf
 ; X86-X87-NEXT:    jb .LBB30_1
 ; X86-X87-NEXT:  # %bb.2:
-; X86-X87-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-X87-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; X86-X87-NEXT:    jmp .LBB30_3
 ; X86-X87-NEXT:  .LBB30_1:
 ; X86-X87-NEXT:    xorl %ecx, %ecx
@@ -3092,7 +3092,7 @@ define i8 @test_unsigned_i8_f80(x86_fp80 %f) nounwind {
 ; X86-X87-NEXT:    sahf
 ; X86-X87-NEXT:    jb .LBB31_1
 ; X86-X87-NEXT:  # %bb.2:
-; X86-X87-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-X87-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; X86-X87-NEXT:    jmp .LBB31_3
 ; X86-X87-NEXT:  .LBB31_1:
 ; X86-X87-NEXT:    xorl %ecx, %ecx

diff  --git a/llvm/test/CodeGen/X86/fshl.ll b/llvm/test/CodeGen/X86/fshl.ll
index eaf794c787688..5aaced853153f 100644
--- a/llvm/test/CodeGen/X86/fshl.ll
+++ b/llvm/test/CodeGen/X86/fshl.ll
@@ -17,7 +17,7 @@ declare i128 @llvm.fshl.i128(i128, i128, i128) nounwind readnone
 define i8 @var_shift_i8(i8 %x, i8 %y, i8 %z) nounwind {
 ; X86-LABEL: var_shift_i8:
 ; X86:       # %bb.0:
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %edx
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; X86-NEXT:    shll $8, %eax
@@ -48,14 +48,14 @@ define i16 @var_shift_i16(i16 %x, i16 %y, i16 %z) nounwind {
 ; X86-FAST:       # %bb.0:
 ; X86-FAST-NEXT:    movzwl {{[0-9]+}}(%esp), %edx
 ; X86-FAST-NEXT:    movzwl {{[0-9]+}}(%esp), %eax
-; X86-FAST-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-FAST-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; X86-FAST-NEXT:    andb $15, %cl
 ; X86-FAST-NEXT:    shldw %cl, %dx, %ax
 ; X86-FAST-NEXT:    retl
 ;
 ; X86-SLOW-LABEL: var_shift_i16:
 ; X86-SLOW:       # %bb.0:
-; X86-SLOW-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-SLOW-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; X86-SLOW-NEXT:    movzwl {{[0-9]+}}(%esp), %edx
 ; X86-SLOW-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; X86-SLOW-NEXT:    shll $16, %eax
@@ -95,7 +95,7 @@ define i16 @var_shift_i16(i16 %x, i16 %y, i16 %z) nounwind {
 define i32 @var_shift_i32(i32 %x, i32 %y, i32 %z) nounwind {
 ; X86-FAST-LABEL: var_shift_i32:
 ; X86-FAST:       # %bb.0:
-; X86-FAST-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-FAST-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; X86-FAST-NEXT:    movl {{[0-9]+}}(%esp), %edx
 ; X86-FAST-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; X86-FAST-NEXT:    shldl %cl, %edx, %eax
@@ -104,7 +104,7 @@ define i32 @var_shift_i32(i32 %x, i32 %y, i32 %z) nounwind {
 ; X86-SLOW-LABEL: var_shift_i32:
 ; X86-SLOW:       # %bb.0:
 ; X86-SLOW-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X86-SLOW-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-SLOW-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; X86-SLOW-NEXT:    movl {{[0-9]+}}(%esp), %edx
 ; X86-SLOW-NEXT:    shll %cl, %edx
 ; X86-SLOW-NEXT:    notb %cl
@@ -446,8 +446,8 @@ define i128 @var_shift_i128(i128 %x, i128 %y, i128 %z) nounwind {
 define i8 @const_shift_i8(i8 %x, i8 %y) nounwind {
 ; X86-LABEL: const_shift_i8:
 ; X86:       # %bb.0:
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; X86-NEXT:    shrb %cl
 ; X86-NEXT:    shlb $7, %al
 ; X86-NEXT:    orb %cl, %al
@@ -588,12 +588,12 @@ define i8 @combine_fshl_load_i8(ptr %p) nounwind {
 ; X86-LABEL: combine_fshl_load_i8:
 ; X86:       # %bb.0:
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT:    movzbl 1(%eax), %eax
+; X86-NEXT:    movb 1(%eax), %al
 ; X86-NEXT:    retl
 ;
 ; X64-LABEL: combine_fshl_load_i8:
 ; X64:       # %bb.0:
-; X64-NEXT:    movzbl 1(%rdi), %eax
+; X64-NEXT:    movb 1(%rdi), %al
 ; X64-NEXT:    retq
   %p1 = getelementptr i8, ptr %p, i32 1
   %ld0 = load i8, ptr%p

diff  --git a/llvm/test/CodeGen/X86/fshr.ll b/llvm/test/CodeGen/X86/fshr.ll
index eb1f0402c6f47..b3d13a6966ba5 100644
--- a/llvm/test/CodeGen/X86/fshr.ll
+++ b/llvm/test/CodeGen/X86/fshr.ll
@@ -17,7 +17,7 @@ declare i128 @llvm.fshr.i128(i128, i128, i128) nounwind readnone
 define i8 @var_shift_i8(i8 %x, i8 %y, i8 %z) nounwind {
 ; X86-LABEL: var_shift_i8:
 ; X86:       # %bb.0:
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %edx
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; X86-NEXT:    shll $8, %eax
@@ -47,14 +47,14 @@ define i16 @var_shift_i16(i16 %x, i16 %y, i16 %z) nounwind {
 ; X86-FAST:       # %bb.0:
 ; X86-FAST-NEXT:    movzwl {{[0-9]+}}(%esp), %edx
 ; X86-FAST-NEXT:    movzwl {{[0-9]+}}(%esp), %eax
-; X86-FAST-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-FAST-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; X86-FAST-NEXT:    andb $15, %cl
 ; X86-FAST-NEXT:    shrdw %cl, %dx, %ax
 ; X86-FAST-NEXT:    retl
 ;
 ; X86-SLOW-LABEL: var_shift_i16:
 ; X86-SLOW:       # %bb.0:
-; X86-SLOW-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-SLOW-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; X86-SLOW-NEXT:    movzwl {{[0-9]+}}(%esp), %edx
 ; X86-SLOW-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; X86-SLOW-NEXT:    shll $16, %eax
@@ -92,7 +92,7 @@ define i16 @var_shift_i16(i16 %x, i16 %y, i16 %z) nounwind {
 define i32 @var_shift_i32(i32 %x, i32 %y, i32 %z) nounwind {
 ; X86-FAST-LABEL: var_shift_i32:
 ; X86-FAST:       # %bb.0:
-; X86-FAST-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-FAST-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; X86-FAST-NEXT:    movl {{[0-9]+}}(%esp), %edx
 ; X86-FAST-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; X86-FAST-NEXT:    shrdl %cl, %edx, %eax
@@ -101,7 +101,7 @@ define i32 @var_shift_i32(i32 %x, i32 %y, i32 %z) nounwind {
 ; X86-SLOW-LABEL: var_shift_i32:
 ; X86-SLOW:       # %bb.0:
 ; X86-SLOW-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X86-SLOW-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-SLOW-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; X86-SLOW-NEXT:    movl {{[0-9]+}}(%esp), %edx
 ; X86-SLOW-NEXT:    shrl %cl, %edx
 ; X86-SLOW-NEXT:    notb %cl
@@ -436,8 +436,8 @@ define i128 @var_shift_i128(i128 %x, i128 %y, i128 %z) nounwind {
 define i8 @const_shift_i8(i8 %x, i8 %y) nounwind {
 ; X86-LABEL: const_shift_i8:
 ; X86:       # %bb.0:
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; X86-NEXT:    shrb $7, %cl
 ; X86-NEXT:    addb %al, %al
 ; X86-NEXT:    orb %cl, %al
@@ -577,12 +577,12 @@ define i8 @combine_fshr_load_i8(ptr %p) nounwind {
 ; X86-LABEL: combine_fshr_load_i8:
 ; X86:       # %bb.0:
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT:    movzbl (%eax), %eax
+; X86-NEXT:    movb (%eax), %al
 ; X86-NEXT:    retl
 ;
 ; X64-LABEL: combine_fshr_load_i8:
 ; X64:       # %bb.0:
-; X64-NEXT:    movzbl (%rdi), %eax
+; X64-NEXT:    movb (%rdi), %al
 ; X64-NEXT:    retq
   %p1 = getelementptr i8, ptr %p, i32 1
   %ld0 = load i8, ptr%p

diff  --git a/llvm/test/CodeGen/X86/funnel-shift-rot.ll b/llvm/test/CodeGen/X86/funnel-shift-rot.ll
index 79870de6a2589..ef287b959427b 100644
--- a/llvm/test/CodeGen/X86/funnel-shift-rot.ll
+++ b/llvm/test/CodeGen/X86/funnel-shift-rot.ll
@@ -19,7 +19,7 @@ declare <4 x i32> @llvm.fshr.v4i32(<4 x i32>, <4 x i32>, <4 x i32>)
 define i8 @rotl_i8_const_shift(i8 %x) nounwind {
 ; X86-SSE2-LABEL: rotl_i8_const_shift:
 ; X86-SSE2:       # %bb.0:
-; X86-SSE2-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-SSE2-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-SSE2-NEXT:    rolb $3, %al
 ; X86-SSE2-NEXT:    retl
 ;
@@ -36,7 +36,7 @@ define i8 @rotl_i8_const_shift(i8 %x) nounwind {
 define i8 @rotl_i8_const_shift1(i8 %x) nounwind {
 ; X86-SSE2-LABEL: rotl_i8_const_shift1:
 ; X86-SSE2:       # %bb.0:
-; X86-SSE2-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-SSE2-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-SSE2-NEXT:    rolb %al
 ; X86-SSE2-NEXT:    retl
 ;
@@ -53,7 +53,7 @@ define i8 @rotl_i8_const_shift1(i8 %x) nounwind {
 define i8 @rotl_i8_const_shift7(i8 %x) nounwind {
 ; X86-SSE2-LABEL: rotl_i8_const_shift7:
 ; X86-SSE2:       # %bb.0:
-; X86-SSE2-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-SSE2-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-SSE2-NEXT:    rorb %al
 ; X86-SSE2-NEXT:    retl
 ;
@@ -89,7 +89,7 @@ define i64 @rotl_i64_const_shift(i64 %x) nounwind {
 define i16 @rotl_i16(i16 %x, i16 %z) nounwind {
 ; X86-SSE2-LABEL: rotl_i16:
 ; X86-SSE2:       # %bb.0:
-; X86-SSE2-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-SSE2-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; X86-SSE2-NEXT:    movzwl {{[0-9]+}}(%esp), %eax
 ; X86-SSE2-NEXT:    rolw %cl, %ax
 ; X86-SSE2-NEXT:    retl
@@ -109,7 +109,7 @@ define i16 @rotl_i16(i16 %x, i16 %z) nounwind {
 define i32 @rotl_i32(i32 %x, i32 %z) nounwind {
 ; X86-SSE2-LABEL: rotl_i32:
 ; X86-SSE2:       # %bb.0:
-; X86-SSE2-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-SSE2-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; X86-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; X86-SSE2-NEXT:    roll %cl, %eax
 ; X86-SSE2-NEXT:    retl
@@ -187,7 +187,7 @@ define <4 x i32> @rotl_v4i32_const_shift(<4 x i32> %x) nounwind {
 define i8 @rotr_i8_const_shift(i8 %x) nounwind {
 ; X86-SSE2-LABEL: rotr_i8_const_shift:
 ; X86-SSE2:       # %bb.0:
-; X86-SSE2-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-SSE2-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-SSE2-NEXT:    rorb $3, %al
 ; X86-SSE2-NEXT:    retl
 ;
@@ -204,7 +204,7 @@ define i8 @rotr_i8_const_shift(i8 %x) nounwind {
 define i8 @rotr_i8_const_shift1(i8 %x) nounwind {
 ; X86-SSE2-LABEL: rotr_i8_const_shift1:
 ; X86-SSE2:       # %bb.0:
-; X86-SSE2-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-SSE2-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-SSE2-NEXT:    rorb %al
 ; X86-SSE2-NEXT:    retl
 ;
@@ -221,7 +221,7 @@ define i8 @rotr_i8_const_shift1(i8 %x) nounwind {
 define i8 @rotr_i8_const_shift7(i8 %x) nounwind {
 ; X86-SSE2-LABEL: rotr_i8_const_shift7:
 ; X86-SSE2:       # %bb.0:
-; X86-SSE2-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-SSE2-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-SSE2-NEXT:    rolb %al
 ; X86-SSE2-NEXT:    retl
 ;
@@ -256,7 +256,7 @@ define i32 @rotr_i32_const_shift(i32 %x) nounwind {
 define i16 @rotr_i16(i16 %x, i16 %z) nounwind {
 ; X86-SSE2-LABEL: rotr_i16:
 ; X86-SSE2:       # %bb.0:
-; X86-SSE2-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-SSE2-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; X86-SSE2-NEXT:    movzwl {{[0-9]+}}(%esp), %eax
 ; X86-SSE2-NEXT:    rorw %cl, %ax
 ; X86-SSE2-NEXT:    retl

diff  --git a/llvm/test/CodeGen/X86/funnel-shift.ll b/llvm/test/CodeGen/X86/funnel-shift.ll
index 404587437f5f3..a2dab38acb9a4 100644
--- a/llvm/test/CodeGen/X86/funnel-shift.ll
+++ b/llvm/test/CodeGen/X86/funnel-shift.ll
@@ -20,7 +20,7 @@ declare <4 x i32> @llvm.fshr.v4i32(<4 x i32>, <4 x i32>, <4 x i32>)
 define i32 @fshl_i32(i32 %x, i32 %y, i32 %z) nounwind {
 ; X86-SSE2-LABEL: fshl_i32:
 ; X86-SSE2:       # %bb.0:
-; X86-SSE2-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-SSE2-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; X86-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %edx
 ; X86-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; X86-SSE2-NEXT:    shldl %cl, %edx, %eax
@@ -282,7 +282,7 @@ define i8 @fshl_i8_const_fold() nounwind {
 define i32 @fshr_i32(i32 %x, i32 %y, i32 %z) nounwind {
 ; X86-SSE2-LABEL: fshr_i32:
 ; X86-SSE2:       # %bb.0:
-; X86-SSE2-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-SSE2-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; X86-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %edx
 ; X86-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; X86-SSE2-NEXT:    shrdl %cl, %edx, %eax
@@ -419,7 +419,7 @@ define i32 @fshr_i32_demandedbits(i32 %a0, i32 %a1) nounwind {
 define i32 @fshl_i32_undef0(i32 %a0, i32 %a1) nounwind {
 ; X86-SSE2-LABEL: fshl_i32_undef0:
 ; X86-SSE2:       # %bb.0:
-; X86-SSE2-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-SSE2-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; X86-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; X86-SSE2-NEXT:    shldl %cl, %eax, %eax
 ; X86-SSE2-NEXT:    retl
@@ -475,7 +475,7 @@ define i32 @fshl_i32_undef0_cst(i32 %a0) nounwind {
 define i32 @fshl_i32_undef1(i32 %a0, i32 %a1) nounwind {
 ; X86-SSE2-LABEL: fshl_i32_undef1:
 ; X86-SSE2:       # %bb.0:
-; X86-SSE2-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-SSE2-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; X86-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; X86-SSE2-NEXT:    shldl %cl, %eax, %eax
 ; X86-SSE2-NEXT:    retl
@@ -495,7 +495,7 @@ define i32 @fshl_i32_undef1_msk(i32 %a0, i32 %a1) nounwind {
 ; X86-SSE2-LABEL: fshl_i32_undef1_msk:
 ; X86-SSE2:       # %bb.0:
 ; X86-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X86-SSE2-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-SSE2-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; X86-SSE2-NEXT:    andb $7, %cl
 ; X86-SSE2-NEXT:    shll %cl, %eax
 ; X86-SSE2-NEXT:    retl
@@ -549,7 +549,7 @@ define i32 @fshl_i32_undef2(i32 %a0, i32 %a1) nounwind {
 define i32 @fshr_i32_undef0(i32 %a0, i32 %a1) nounwind {
 ; X86-SSE2-LABEL: fshr_i32_undef0:
 ; X86-SSE2:       # %bb.0:
-; X86-SSE2-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-SSE2-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; X86-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; X86-SSE2-NEXT:    shrdl %cl, %eax, %eax
 ; X86-SSE2-NEXT:    retl
@@ -569,7 +569,7 @@ define i32 @fshr_i32_undef0_msk(i32 %a0, i32 %a1) nounwind {
 ; X86-SSE2-LABEL: fshr_i32_undef0_msk:
 ; X86-SSE2:       # %bb.0:
 ; X86-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X86-SSE2-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-SSE2-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; X86-SSE2-NEXT:    andb $7, %cl
 ; X86-SSE2-NEXT:    shrl %cl, %eax
 ; X86-SSE2-NEXT:    retl
@@ -606,7 +606,7 @@ define i32 @fshr_i32_undef0_cst(i32 %a0) nounwind {
 define i32 @fshr_i32_undef1(i32 %a0, i32 %a1) nounwind {
 ; X86-SSE2-LABEL: fshr_i32_undef1:
 ; X86-SSE2:       # %bb.0:
-; X86-SSE2-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-SSE2-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; X86-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; X86-SSE2-NEXT:    shrdl %cl, %eax, %eax
 ; X86-SSE2-NEXT:    retl
@@ -681,7 +681,7 @@ define i32 @fshr_i32_undef2(i32 %a0, i32 %a1) nounwind {
 define i32 @fshl_i32_zero0(i32 %a0, i32 %a1) nounwind {
 ; X86-SSE2-LABEL: fshl_i32_zero0:
 ; X86-SSE2:       # %bb.0:
-; X86-SSE2-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-SSE2-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; X86-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %edx
 ; X86-SSE2-NEXT:    xorl %eax, %eax
 ; X86-SSE2-NEXT:    shldl %cl, %edx, %eax
@@ -717,7 +717,7 @@ define i32 @fshl_i32_zero0_cst(i32 %a0) nounwind {
 define i32 @fshl_i32_zero1(i32 %a0, i32 %a1) nounwind {
 ; X86-SSE2-LABEL: fshl_i32_zero1:
 ; X86-SSE2:       # %bb.0:
-; X86-SSE2-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-SSE2-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; X86-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; X86-SSE2-NEXT:    xorl %edx, %edx
 ; X86-SSE2-NEXT:    shldl %cl, %edx, %eax
@@ -754,7 +754,7 @@ define i32 @fshl_i32_zero1_cst(i32 %a0) nounwind {
 define i32 @fshr_i32_zero0(i32 %a0, i32 %a1) nounwind {
 ; X86-SSE2-LABEL: fshr_i32_zero0:
 ; X86-SSE2:       # %bb.0:
-; X86-SSE2-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-SSE2-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; X86-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; X86-SSE2-NEXT:    xorl %edx, %edx
 ; X86-SSE2-NEXT:    shrdl %cl, %edx, %eax
@@ -791,7 +791,7 @@ define i32 @fshr_i32_zero0_cst(i32 %a0) nounwind {
 define i32 @fshr_i32_zero1(i32 %a0, i32 %a1) nounwind {
 ; X86-SSE2-LABEL: fshr_i32_zero1:
 ; X86-SSE2:       # %bb.0:
-; X86-SSE2-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-SSE2-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; X86-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %edx
 ; X86-SSE2-NEXT:    xorl %eax, %eax
 ; X86-SSE2-NEXT:    shrdl %cl, %edx, %eax
@@ -1047,7 +1047,7 @@ define i32 @or_shl_fshl(i32 %x, i32 %y, i32 %s) nounwind {
 ; X86-SSE2:       # %bb.0:
 ; X86-SSE2-NEXT:    pushl %esi
 ; X86-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X86-SSE2-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-SSE2-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; X86-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %edx
 ; X86-SSE2-NEXT:    movl %edx, %esi
 ; X86-SSE2-NEXT:    shll %cl, %esi
@@ -1075,7 +1075,7 @@ define i32 @or_shl_rotl(i32 %x, i32 %y, i32 %s) nounwind {
 ; X86-SSE2-LABEL: or_shl_rotl:
 ; X86-SSE2:       # %bb.0:
 ; X86-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X86-SSE2-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-SSE2-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; X86-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %edx
 ; X86-SSE2-NEXT:    shll %cl, %edx
 ; X86-SSE2-NEXT:    roll %cl, %eax
@@ -1102,7 +1102,7 @@ define i32 @or_shl_fshl_commute(i32 %x, i32 %y, i32 %s) nounwind {
 ; X86-SSE2:       # %bb.0:
 ; X86-SSE2-NEXT:    pushl %esi
 ; X86-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X86-SSE2-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-SSE2-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; X86-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %edx
 ; X86-SSE2-NEXT:    movl %edx, %esi
 ; X86-SSE2-NEXT:    shll %cl, %esi
@@ -1130,7 +1130,7 @@ define i32 @or_shl_rotl_commute(i32 %x, i32 %y, i32 %s) nounwind {
 ; X86-SSE2-LABEL: or_shl_rotl_commute:
 ; X86-SSE2:       # %bb.0:
 ; X86-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X86-SSE2-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-SSE2-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; X86-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %edx
 ; X86-SSE2-NEXT:    shll %cl, %edx
 ; X86-SSE2-NEXT:    roll %cl, %eax
@@ -1157,7 +1157,7 @@ define i32 @or_lshr_fshr(i32 %x, i32 %y, i32 %s) nounwind {
 ; X86-SSE2:       # %bb.0:
 ; X86-SSE2-NEXT:    pushl %esi
 ; X86-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X86-SSE2-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-SSE2-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; X86-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %edx
 ; X86-SSE2-NEXT:    movl %edx, %esi
 ; X86-SSE2-NEXT:    shrl %cl, %esi
@@ -1185,7 +1185,7 @@ define i32 @or_lshr_rotr(i32 %x, i32 %y, i32 %s) nounwind {
 ; X86-SSE2-LABEL: or_lshr_rotr:
 ; X86-SSE2:       # %bb.0:
 ; X86-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X86-SSE2-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-SSE2-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; X86-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %edx
 ; X86-SSE2-NEXT:    shrl %cl, %edx
 ; X86-SSE2-NEXT:    rorl %cl, %eax
@@ -1212,7 +1212,7 @@ define i32 @or_lshr_fshr_commute(i32 %x, i32 %y, i32 %s) nounwind {
 ; X86-SSE2:       # %bb.0:
 ; X86-SSE2-NEXT:    pushl %esi
 ; X86-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X86-SSE2-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-SSE2-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; X86-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %edx
 ; X86-SSE2-NEXT:    movl %edx, %esi
 ; X86-SSE2-NEXT:    shrl %cl, %esi
@@ -1240,7 +1240,7 @@ define i32 @or_lshr_rotr_commute(i32 %x, i32 %y, i32 %s) nounwind {
 ; X86-SSE2-LABEL: or_lshr_rotr_commute:
 ; X86-SSE2:       # %bb.0:
 ; X86-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X86-SSE2-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-SSE2-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; X86-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %edx
 ; X86-SSE2-NEXT:    shrl %cl, %edx
 ; X86-SSE2-NEXT:    rorl %cl, %eax
@@ -1265,7 +1265,7 @@ define i32 @or_lshr_rotr_commute(i32 %x, i32 %y, i32 %s) nounwind {
 define i32 @or_shl_fshl_simplify(i32 %x, i32 %y, i32 %s) nounwind {
 ; X86-SSE2-LABEL: or_shl_fshl_simplify:
 ; X86-SSE2:       # %bb.0:
-; X86-SSE2-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-SSE2-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; X86-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %edx
 ; X86-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; X86-SSE2-NEXT:    shldl %cl, %edx, %eax
@@ -1287,7 +1287,7 @@ define i32 @or_shl_fshl_simplify(i32 %x, i32 %y, i32 %s) nounwind {
 define i32 @or_lshr_fshr_simplify(i32 %x, i32 %y, i32 %s) nounwind {
 ; X86-SSE2-LABEL: or_lshr_fshr_simplify:
 ; X86-SSE2:       # %bb.0:
-; X86-SSE2-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-SSE2-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; X86-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %edx
 ; X86-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; X86-SSE2-NEXT:    shrdl %cl, %edx, %eax

diff  --git a/llvm/test/CodeGen/X86/gpr-to-mask.ll b/llvm/test/CodeGen/X86/gpr-to-mask.ll
index 3493210cff336..248aa3a77d2b2 100644
--- a/llvm/test/CodeGen/X86/gpr-to-mask.ll
+++ b/llvm/test/CodeGen/X86/gpr-to-mask.ll
@@ -105,11 +105,11 @@ define void @test_load_add(i1 %cond, ptr %fptr, ptr %iptr1, ptr %iptr2, float %f
 ; X86-64-NEXT:    testb $1, %dil
 ; X86-64-NEXT:    je .LBB2_2
 ; X86-64-NEXT:  # %bb.1: # %if
-; X86-64-NEXT:    movzbl (%rdx), %eax
+; X86-64-NEXT:    movb (%rdx), %al
 ; X86-64-NEXT:    addb (%rcx), %al
 ; X86-64-NEXT:    jmp .LBB2_3
 ; X86-64-NEXT:  .LBB2_2: # %else
-; X86-64-NEXT:    movzbl (%rcx), %eax
+; X86-64-NEXT:    movb (%rcx), %al
 ; X86-64-NEXT:  .LBB2_3: # %exit
 ; X86-64-NEXT:    kmovd %eax, %k1
 ; X86-64-NEXT:    vmovss %xmm0, %xmm1, %xmm1 {%k1}
@@ -126,11 +126,11 @@ define void @test_load_add(i1 %cond, ptr %fptr, ptr %iptr1, ptr %iptr2, float %f
 ; X86-32-NEXT:    je .LBB2_2
 ; X86-32-NEXT:  # %bb.1: # %if
 ; X86-32-NEXT:    movl {{[0-9]+}}(%esp), %edx
-; X86-32-NEXT:    movzbl (%edx), %edx
+; X86-32-NEXT:    movb (%edx), %dl
 ; X86-32-NEXT:    addb (%ecx), %dl
 ; X86-32-NEXT:    jmp .LBB2_3
 ; X86-32-NEXT:  .LBB2_2: # %else
-; X86-32-NEXT:    movzbl (%ecx), %edx
+; X86-32-NEXT:    movb (%ecx), %dl
 ; X86-32-NEXT:  .LBB2_3: # %exit
 ; X86-32-NEXT:    kmovd %edx, %k1
 ; X86-32-NEXT:    vmovss %xmm1, %xmm0, %xmm0 {%k1}
@@ -212,10 +212,10 @@ define void @test_loadi1_storei1(i1 %cond, ptr %iptr1, ptr %iptr2, ptr %iptr3)
 ; X86-64-NEXT:    testb $1, %dil
 ; X86-64-NEXT:    je .LBB4_2
 ; X86-64-NEXT:  # %bb.1: # %if
-; X86-64-NEXT:    movzbl (%rsi), %eax
+; X86-64-NEXT:    movb (%rsi), %al
 ; X86-64-NEXT:    jmp .LBB4_3
 ; X86-64-NEXT:  .LBB4_2: # %else
-; X86-64-NEXT:    movzbl (%rdx), %eax
+; X86-64-NEXT:    movb (%rdx), %al
 ; X86-64-NEXT:  .LBB4_3: # %exit
 ; X86-64-NEXT:    andb $1, %al
 ; X86-64-NEXT:    movb %al, (%rcx)
@@ -232,7 +232,7 @@ define void @test_loadi1_storei1(i1 %cond, ptr %iptr1, ptr %iptr2, ptr %iptr3)
 ; X86-32-NEXT:  .LBB4_2: # %else
 ; X86-32-NEXT:    movl {{[0-9]+}}(%esp), %ecx
 ; X86-32-NEXT:  .LBB4_3: # %exit
-; X86-32-NEXT:    movzbl (%ecx), %ecx
+; X86-32-NEXT:    movb (%ecx), %cl
 ; X86-32-NEXT:    andb $1, %cl
 ; X86-32-NEXT:    movb %cl, (%eax)
 ; X86-32-NEXT:    retl
@@ -320,11 +320,11 @@ define void @test_shr1(i1 %cond, ptr %ptr1, ptr %ptr2, <8 x float> %fvec1, <8 x
 ; X86-64-NEXT:    testb $1, %dil
 ; X86-64-NEXT:    je .LBB6_2
 ; X86-64-NEXT:  # %bb.1: # %if
-; X86-64-NEXT:    movzbl (%rsi), %eax
+; X86-64-NEXT:    movb (%rsi), %al
 ; X86-64-NEXT:    shrb %al
 ; X86-64-NEXT:    jmp .LBB6_3
 ; X86-64-NEXT:  .LBB6_2: # %else
-; X86-64-NEXT:    movzbl (%rdx), %eax
+; X86-64-NEXT:    movb (%rdx), %al
 ; X86-64-NEXT:  .LBB6_3: # %exit
 ; X86-64-NEXT:    kmovd %eax, %k1
 ; X86-64-NEXT:    vmovaps %zmm0, %zmm1 {%k1}
@@ -341,12 +341,12 @@ define void @test_shr1(i1 %cond, ptr %ptr1, ptr %ptr2, <8 x float> %fvec1, <8 x
 ; X86-32-NEXT:    je .LBB6_2
 ; X86-32-NEXT:  # %bb.1: # %if
 ; X86-32-NEXT:    movl {{[0-9]+}}(%esp), %ecx
-; X86-32-NEXT:    movzbl (%ecx), %ecx
+; X86-32-NEXT:    movb (%ecx), %cl
 ; X86-32-NEXT:    shrb %cl
 ; X86-32-NEXT:    jmp .LBB6_3
 ; X86-32-NEXT:  .LBB6_2: # %else
 ; X86-32-NEXT:    movl {{[0-9]+}}(%esp), %ecx
-; X86-32-NEXT:    movzbl (%ecx), %ecx
+; X86-32-NEXT:    movb (%ecx), %cl
 ; X86-32-NEXT:  .LBB6_3: # %exit
 ; X86-32-NEXT:    kmovd %ecx, %k1
 ; X86-32-NEXT:    vmovaps %zmm0, %zmm1 {%k1}

diff  --git a/llvm/test/CodeGen/X86/h-register-addressing-32.ll b/llvm/test/CodeGen/X86/h-register-addressing-32.ll
index eee1fd6d522ce..f485387994ce6 100644
--- a/llvm/test/CodeGen/X86/h-register-addressing-32.ll
+++ b/llvm/test/CodeGen/X86/h-register-addressing-32.ll
@@ -46,7 +46,7 @@ define i8 @foo1(ptr nocapture inreg %p, i32 inreg %x) nounwind readonly {
 ; CHECK-LABEL: foo1:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    movzbl %dh, %ecx
-; CHECK-NEXT:    movzbl (%eax,%ecx), %eax
+; CHECK-NEXT:    movb (%eax,%ecx), %al
 ; CHECK-NEXT:    retl
   %t0 = lshr i32 %x, 8
   %t1 = and i32 %t0, 255
@@ -59,7 +59,7 @@ define i8 @bar8(ptr nocapture inreg %p, i32 inreg %x) nounwind readonly {
 ; CHECK-LABEL: bar8:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    movzbl %dh, %ecx
-; CHECK-NEXT:    movzbl (%eax,%ecx,8), %eax
+; CHECK-NEXT:    movb (%eax,%ecx,8), %al
 ; CHECK-NEXT:    retl
   %t0 = lshr i32 %x, 5
   %t1 = and i32 %t0, 2040
@@ -72,7 +72,7 @@ define i8 @bar4(ptr nocapture inreg %p, i32 inreg %x) nounwind readonly {
 ; CHECK-LABEL: bar4:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    movzbl %dh, %ecx
-; CHECK-NEXT:    movzbl (%eax,%ecx,4), %eax
+; CHECK-NEXT:    movb (%eax,%ecx,4), %al
 ; CHECK-NEXT:    retl
   %t0 = lshr i32 %x, 6
   %t1 = and i32 %t0, 1020
@@ -85,7 +85,7 @@ define i8 @bar2(ptr nocapture inreg %p, i32 inreg %x) nounwind readonly {
 ; CHECK-LABEL: bar2:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    movzbl %dh, %ecx
-; CHECK-NEXT:    movzbl (%eax,%ecx,2), %eax
+; CHECK-NEXT:    movb (%eax,%ecx,2), %al
 ; CHECK-NEXT:    retl
   %t0 = lshr i32 %x, 7
   %t1 = and i32 %t0, 510

diff  --git a/llvm/test/CodeGen/X86/h-register-addressing-64.ll b/llvm/test/CodeGen/X86/h-register-addressing-64.ll
index 5136cc84c81f6..cfe5d26f2d606 100644
--- a/llvm/test/CodeGen/X86/h-register-addressing-64.ll
+++ b/llvm/test/CodeGen/X86/h-register-addressing-64.ll
@@ -50,7 +50,7 @@ define i8 @foo1(ptr nocapture inreg %p, i64 inreg %x) nounwind readonly {
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    movq %rsi, %rax
 ; CHECK-NEXT:    movzbl %ah, %eax
-; CHECK-NEXT:    movzbl (%rdi,%rax), %eax
+; CHECK-NEXT:    movb (%rdi,%rax), %al
 ; CHECK-NEXT:    retq
   %t0 = lshr i64 %x, 8
   %t1 = and i64 %t0, 255
@@ -64,7 +64,7 @@ define i8 @bar8(ptr nocapture inreg %p, i64 inreg %x) nounwind readonly {
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    movq %rsi, %rax
 ; CHECK-NEXT:    movzbl %ah, %eax
-; CHECK-NEXT:    movzbl (%rdi,%rax,8), %eax
+; CHECK-NEXT:    movb (%rdi,%rax,8), %al
 ; CHECK-NEXT:    retq
   %t0 = lshr i64 %x, 5
   %t1 = and i64 %t0, 2040
@@ -78,7 +78,7 @@ define i8 @bar4(ptr nocapture inreg %p, i64 inreg %x) nounwind readonly {
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    movq %rsi, %rax
 ; CHECK-NEXT:    movzbl %ah, %eax
-; CHECK-NEXT:    movzbl (%rdi,%rax,4), %eax
+; CHECK-NEXT:    movb (%rdi,%rax,4), %al
 ; CHECK-NEXT:    retq
   %t0 = lshr i64 %x, 6
   %t1 = and i64 %t0, 1020
@@ -92,7 +92,7 @@ define i8 @bar2(ptr nocapture inreg %p, i64 inreg %x) nounwind readonly {
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    movq %rsi, %rax
 ; CHECK-NEXT:    movzbl %ah, %eax
-; CHECK-NEXT:    movzbl (%rdi,%rax,2), %eax
+; CHECK-NEXT:    movb (%rdi,%rax,2), %al
 ; CHECK-NEXT:    retq
   %t0 = lshr i64 %x, 7
   %t1 = and i64 %t0, 510

diff  --git a/llvm/test/CodeGen/X86/hoist-and-by-const-from-lshr-in-eqcmp-zero.ll b/llvm/test/CodeGen/X86/hoist-and-by-const-from-lshr-in-eqcmp-zero.ll
index 7897a20403d06..8ebc2640a8d69 100644
--- a/llvm/test/CodeGen/X86/hoist-and-by-const-from-lshr-in-eqcmp-zero.ll
+++ b/llvm/test/CodeGen/X86/hoist-and-by-const-from-lshr-in-eqcmp-zero.ll
@@ -22,8 +22,8 @@
 define i1 @scalar_i8_signbit_eq(i8 %x, i8 %y) nounwind {
 ; X86-LABEL: scalar_i8_signbit_eq:
 ; X86:       # %bb.0:
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %cl
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    shlb %cl, %al
 ; X86-NEXT:    testb $-128, %al
 ; X86-NEXT:    sete %al
@@ -46,8 +46,8 @@ define i1 @scalar_i8_signbit_eq(i8 %x, i8 %y) nounwind {
 define i1 @scalar_i8_lowestbit_eq(i8 %x, i8 %y) nounwind {
 ; X86-LABEL: scalar_i8_lowestbit_eq:
 ; X86:       # %bb.0:
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %cl
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    shlb %cl, %al
 ; X86-NEXT:    testb $1, %al
 ; X86-NEXT:    sete %al
@@ -70,8 +70,8 @@ define i1 @scalar_i8_lowestbit_eq(i8 %x, i8 %y) nounwind {
 define i1 @scalar_i8_bitsinmiddle_eq(i8 %x, i8 %y) nounwind {
 ; X86-LABEL: scalar_i8_bitsinmiddle_eq:
 ; X86:       # %bb.0:
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %cl
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    shlb %cl, %al
 ; X86-NEXT:    testb $24, %al
 ; X86-NEXT:    sete %al
@@ -96,7 +96,7 @@ define i1 @scalar_i8_bitsinmiddle_eq(i8 %x, i8 %y) nounwind {
 define i1 @scalar_i16_signbit_eq(i16 %x, i16 %y) nounwind {
 ; X86-BMI1-LABEL: scalar_i16_signbit_eq:
 ; X86-BMI1:       # %bb.0:
-; X86-BMI1-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-BMI1-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; X86-BMI1-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; X86-BMI1-NEXT:    shll %cl, %eax
 ; X86-BMI1-NEXT:    testl $32768, %eax # imm = 0x8000
@@ -105,7 +105,7 @@ define i1 @scalar_i16_signbit_eq(i16 %x, i16 %y) nounwind {
 ;
 ; X86-BMI2-LABEL: scalar_i16_signbit_eq:
 ; X86-BMI2:       # %bb.0:
-; X86-BMI2-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-BMI2-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-BMI2-NEXT:    shlxl %eax, {{[0-9]+}}(%esp), %eax
 ; X86-BMI2-NEXT:    testl $32768, %eax # imm = 0x8000
 ; X86-BMI2-NEXT:    sete %al
@@ -135,7 +135,7 @@ define i1 @scalar_i16_signbit_eq(i16 %x, i16 %y) nounwind {
 define i1 @scalar_i16_lowestbit_eq(i16 %x, i16 %y) nounwind {
 ; X86-BMI1-LABEL: scalar_i16_lowestbit_eq:
 ; X86-BMI1:       # %bb.0:
-; X86-BMI1-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-BMI1-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; X86-BMI1-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; X86-BMI1-NEXT:    shll %cl, %eax
 ; X86-BMI1-NEXT:    testb $1, %al
@@ -144,7 +144,7 @@ define i1 @scalar_i16_lowestbit_eq(i16 %x, i16 %y) nounwind {
 ;
 ; X86-BMI2-LABEL: scalar_i16_lowestbit_eq:
 ; X86-BMI2:       # %bb.0:
-; X86-BMI2-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-BMI2-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-BMI2-NEXT:    shlxl %eax, {{[0-9]+}}(%esp), %eax
 ; X86-BMI2-NEXT:    testb $1, %al
 ; X86-BMI2-NEXT:    sete %al
@@ -174,7 +174,7 @@ define i1 @scalar_i16_lowestbit_eq(i16 %x, i16 %y) nounwind {
 define i1 @scalar_i16_bitsinmiddle_eq(i16 %x, i16 %y) nounwind {
 ; X86-BMI1-LABEL: scalar_i16_bitsinmiddle_eq:
 ; X86-BMI1:       # %bb.0:
-; X86-BMI1-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-BMI1-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; X86-BMI1-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; X86-BMI1-NEXT:    shll %cl, %eax
 ; X86-BMI1-NEXT:    testl $4080, %eax # imm = 0xFF0
@@ -183,7 +183,7 @@ define i1 @scalar_i16_bitsinmiddle_eq(i16 %x, i16 %y) nounwind {
 ;
 ; X86-BMI2-LABEL: scalar_i16_bitsinmiddle_eq:
 ; X86-BMI2:       # %bb.0:
-; X86-BMI2-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-BMI2-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-BMI2-NEXT:    shlxl %eax, {{[0-9]+}}(%esp), %eax
 ; X86-BMI2-NEXT:    testl $4080, %eax # imm = 0xFF0
 ; X86-BMI2-NEXT:    sete %al
@@ -215,7 +215,7 @@ define i1 @scalar_i16_bitsinmiddle_eq(i16 %x, i16 %y) nounwind {
 define i1 @scalar_i32_signbit_eq(i32 %x, i32 %y) nounwind {
 ; X86-BMI1-LABEL: scalar_i32_signbit_eq:
 ; X86-BMI1:       # %bb.0:
-; X86-BMI1-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-BMI1-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; X86-BMI1-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; X86-BMI1-NEXT:    shll %cl, %eax
 ; X86-BMI1-NEXT:    testl $-2147483648, %eax # imm = 0x80000000
@@ -224,7 +224,7 @@ define i1 @scalar_i32_signbit_eq(i32 %x, i32 %y) nounwind {
 ;
 ; X86-BMI2-LABEL: scalar_i32_signbit_eq:
 ; X86-BMI2:       # %bb.0:
-; X86-BMI2-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-BMI2-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-BMI2-NEXT:    shlxl %eax, {{[0-9]+}}(%esp), %eax
 ; X86-BMI2-NEXT:    testl $-2147483648, %eax # imm = 0x80000000
 ; X86-BMI2-NEXT:    sete %al
@@ -254,7 +254,7 @@ define i1 @scalar_i32_signbit_eq(i32 %x, i32 %y) nounwind {
 define i1 @scalar_i32_lowestbit_eq(i32 %x, i32 %y) nounwind {
 ; X86-BMI1-LABEL: scalar_i32_lowestbit_eq:
 ; X86-BMI1:       # %bb.0:
-; X86-BMI1-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-BMI1-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; X86-BMI1-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; X86-BMI1-NEXT:    shll %cl, %eax
 ; X86-BMI1-NEXT:    testb $1, %al
@@ -263,7 +263,7 @@ define i1 @scalar_i32_lowestbit_eq(i32 %x, i32 %y) nounwind {
 ;
 ; X86-BMI2-LABEL: scalar_i32_lowestbit_eq:
 ; X86-BMI2:       # %bb.0:
-; X86-BMI2-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-BMI2-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-BMI2-NEXT:    shlxl %eax, {{[0-9]+}}(%esp), %eax
 ; X86-BMI2-NEXT:    testb $1, %al
 ; X86-BMI2-NEXT:    sete %al
@@ -293,7 +293,7 @@ define i1 @scalar_i32_lowestbit_eq(i32 %x, i32 %y) nounwind {
 define i1 @scalar_i32_bitsinmiddle_eq(i32 %x, i32 %y) nounwind {
 ; X86-BMI1-LABEL: scalar_i32_bitsinmiddle_eq:
 ; X86-BMI1:       # %bb.0:
-; X86-BMI1-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-BMI1-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; X86-BMI1-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; X86-BMI1-NEXT:    shll %cl, %eax
 ; X86-BMI1-NEXT:    testl $16776960, %eax # imm = 0xFFFF00
@@ -302,7 +302,7 @@ define i1 @scalar_i32_bitsinmiddle_eq(i32 %x, i32 %y) nounwind {
 ;
 ; X86-BMI2-LABEL: scalar_i32_bitsinmiddle_eq:
 ; X86-BMI2:       # %bb.0:
-; X86-BMI2-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-BMI2-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-BMI2-NEXT:    shlxl %eax, {{[0-9]+}}(%esp), %eax
 ; X86-BMI2-NEXT:    testl $16776960, %eax # imm = 0xFFFF00
 ; X86-BMI2-NEXT:    sete %al
@@ -335,7 +335,7 @@ define i1 @scalar_i64_signbit_eq(i64 %x, i64 %y) nounwind {
 ; X86-BMI1-LABEL: scalar_i64_signbit_eq:
 ; X86-BMI1:       # %bb.0:
 ; X86-BMI1-NEXT:    pushl %esi
-; X86-BMI1-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-BMI1-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; X86-BMI1-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; X86-BMI1-NEXT:    movl {{[0-9]+}}(%esp), %edx
 ; X86-BMI1-NEXT:    movl %eax, %esi
@@ -350,7 +350,7 @@ define i1 @scalar_i64_signbit_eq(i64 %x, i64 %y) nounwind {
 ;
 ; X86-BMI2-LABEL: scalar_i64_signbit_eq:
 ; X86-BMI2:       # %bb.0:
-; X86-BMI2-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-BMI2-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; X86-BMI2-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; X86-BMI2-NEXT:    movl {{[0-9]+}}(%esp), %edx
 ; X86-BMI2-NEXT:    shldl %cl, %eax, %edx
@@ -385,7 +385,7 @@ define i1 @scalar_i64_signbit_eq(i64 %x, i64 %y) nounwind {
 define i1 @scalar_i64_lowestbit_eq(i64 %x, i64 %y) nounwind {
 ; X86-BMI1-LABEL: scalar_i64_lowestbit_eq:
 ; X86-BMI1:       # %bb.0:
-; X86-BMI1-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-BMI1-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; X86-BMI1-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; X86-BMI1-NEXT:    shll %cl, %eax
 ; X86-BMI1-NEXT:    xorl %edx, %edx
@@ -397,7 +397,7 @@ define i1 @scalar_i64_lowestbit_eq(i64 %x, i64 %y) nounwind {
 ;
 ; X86-BMI2-LABEL: scalar_i64_lowestbit_eq:
 ; X86-BMI2:       # %bb.0:
-; X86-BMI2-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-BMI2-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-BMI2-NEXT:    shlxl %eax, {{[0-9]+}}(%esp), %ecx
 ; X86-BMI2-NEXT:    xorl %edx, %edx
 ; X86-BMI2-NEXT:    testb $32, %al
@@ -431,7 +431,7 @@ define i1 @scalar_i64_bitsinmiddle_eq(i64 %x, i64 %y) nounwind {
 ; X86-BMI1-LABEL: scalar_i64_bitsinmiddle_eq:
 ; X86-BMI1:       # %bb.0:
 ; X86-BMI1-NEXT:    pushl %esi
-; X86-BMI1-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-BMI1-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; X86-BMI1-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; X86-BMI1-NEXT:    movl {{[0-9]+}}(%esp), %edx
 ; X86-BMI1-NEXT:    movl %eax, %esi
@@ -451,7 +451,7 @@ define i1 @scalar_i64_bitsinmiddle_eq(i64 %x, i64 %y) nounwind {
 ; X86-BMI2-LABEL: scalar_i64_bitsinmiddle_eq:
 ; X86-BMI2:       # %bb.0:
 ; X86-BMI2-NEXT:    pushl %esi
-; X86-BMI2-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-BMI2-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; X86-BMI2-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; X86-BMI2-NEXT:    movl {{[0-9]+}}(%esp), %edx
 ; X86-BMI2-NEXT:    shldl %cl, %eax, %edx
@@ -772,8 +772,8 @@ define <4 x i1> @vec_4xi32_nonsplat_undef2_eq(<4 x i32> %x, <4 x i32> %y) nounwi
 define i1 @scalar_i8_signbit_ne(i8 %x, i8 %y) nounwind {
 ; X86-LABEL: scalar_i8_signbit_ne:
 ; X86:       # %bb.0:
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %cl
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    shlb %cl, %al
 ; X86-NEXT:    shrb $7, %al
 ; X86-NEXT:    retl
@@ -820,7 +820,7 @@ define i1 @scalar_i32_x_is_const_eq(i32 %y) nounwind {
 define i1 @scalar_i32_x_is_const2_eq(i32 %y) nounwind {
 ; X86-BMI1-LABEL: scalar_i32_x_is_const2_eq:
 ; X86-BMI1:       # %bb.0:
-; X86-BMI1-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-BMI1-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; X86-BMI1-NEXT:    movl $1, %eax
 ; X86-BMI1-NEXT:    shrl %cl, %eax
 ; X86-BMI1-NEXT:    testl %eax, %eax
@@ -829,7 +829,7 @@ define i1 @scalar_i32_x_is_const2_eq(i32 %y) nounwind {
 ;
 ; X86-BMI2-LABEL: scalar_i32_x_is_const2_eq:
 ; X86-BMI2:       # %bb.0:
-; X86-BMI2-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-BMI2-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-BMI2-NEXT:    movl $1, %ecx
 ; X86-BMI2-NEXT:    shrxl %eax, %ecx, %eax
 ; X86-BMI2-NEXT:    testl %eax, %eax
@@ -877,7 +877,7 @@ define i1 @negative_scalar_i8_bitsinmiddle_slt(i8 %x, i8 %y) nounwind {
 define i1 @scalar_i8_signbit_eq_with_nonzero(i8 %x, i8 %y) nounwind {
 ; X86-LABEL: scalar_i8_signbit_eq_with_nonzero:
 ; X86:       # %bb.0:
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; X86-NEXT:    movb $-128, %al
 ; X86-NEXT:    shrb %cl, %al
 ; X86-NEXT:    andb {{[0-9]+}}(%esp), %al

diff  --git a/llvm/test/CodeGen/X86/hoist-and-by-const-from-shl-in-eqcmp-zero.ll b/llvm/test/CodeGen/X86/hoist-and-by-const-from-shl-in-eqcmp-zero.ll
index ddd6f002a0992..63d4e93eeb307 100644
--- a/llvm/test/CodeGen/X86/hoist-and-by-const-from-shl-in-eqcmp-zero.ll
+++ b/llvm/test/CodeGen/X86/hoist-and-by-const-from-shl-in-eqcmp-zero.ll
@@ -22,8 +22,8 @@
 define i1 @scalar_i8_signbit_eq(i8 %x, i8 %y) nounwind {
 ; X86-LABEL: scalar_i8_signbit_eq:
 ; X86:       # %bb.0:
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %cl
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    shrb %cl, %al
 ; X86-NEXT:    testb $-128, %al
 ; X86-NEXT:    sete %al
@@ -66,8 +66,8 @@ define i1 @scalar_i8_lowestbit_eq(i8 %x, i8 %y) nounwind {
 define i1 @scalar_i8_bitsinmiddle_eq(i8 %x, i8 %y) nounwind {
 ; X86-LABEL: scalar_i8_bitsinmiddle_eq:
 ; X86:       # %bb.0:
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %cl
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    shrb %cl, %al
 ; X86-NEXT:    testb $24, %al
 ; X86-NEXT:    sete %al
@@ -92,7 +92,7 @@ define i1 @scalar_i8_bitsinmiddle_eq(i8 %x, i8 %y) nounwind {
 define i1 @scalar_i16_signbit_eq(i16 %x, i16 %y) nounwind {
 ; X86-BMI1-LABEL: scalar_i16_signbit_eq:
 ; X86-BMI1:       # %bb.0:
-; X86-BMI1-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-BMI1-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; X86-BMI1-NEXT:    movzwl {{[0-9]+}}(%esp), %eax
 ; X86-BMI1-NEXT:    shrl %cl, %eax
 ; X86-BMI1-NEXT:    testl $32768, %eax # imm = 0x8000
@@ -102,7 +102,7 @@ define i1 @scalar_i16_signbit_eq(i16 %x, i16 %y) nounwind {
 ; X86-BMI2-LABEL: scalar_i16_signbit_eq:
 ; X86-BMI2:       # %bb.0:
 ; X86-BMI2-NEXT:    movzwl {{[0-9]+}}(%esp), %eax
-; X86-BMI2-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-BMI2-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; X86-BMI2-NEXT:    shrxl %ecx, %eax, %eax
 ; X86-BMI2-NEXT:    testl $32768, %eax # imm = 0x8000
 ; X86-BMI2-NEXT:    sete %al
@@ -154,7 +154,7 @@ define i1 @scalar_i16_lowestbit_eq(i16 %x, i16 %y) nounwind {
 define i1 @scalar_i16_bitsinmiddle_eq(i16 %x, i16 %y) nounwind {
 ; X86-BMI1-LABEL: scalar_i16_bitsinmiddle_eq:
 ; X86-BMI1:       # %bb.0:
-; X86-BMI1-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-BMI1-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; X86-BMI1-NEXT:    movzwl {{[0-9]+}}(%esp), %eax
 ; X86-BMI1-NEXT:    shrl %cl, %eax
 ; X86-BMI1-NEXT:    testl $4080, %eax # imm = 0xFF0
@@ -164,7 +164,7 @@ define i1 @scalar_i16_bitsinmiddle_eq(i16 %x, i16 %y) nounwind {
 ; X86-BMI2-LABEL: scalar_i16_bitsinmiddle_eq:
 ; X86-BMI2:       # %bb.0:
 ; X86-BMI2-NEXT:    movzwl {{[0-9]+}}(%esp), %eax
-; X86-BMI2-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-BMI2-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; X86-BMI2-NEXT:    shrxl %ecx, %eax, %eax
 ; X86-BMI2-NEXT:    testl $4080, %eax # imm = 0xFF0
 ; X86-BMI2-NEXT:    sete %al
@@ -198,7 +198,7 @@ define i1 @scalar_i16_bitsinmiddle_eq(i16 %x, i16 %y) nounwind {
 define i1 @scalar_i32_signbit_eq(i32 %x, i32 %y) nounwind {
 ; X86-BMI1-LABEL: scalar_i32_signbit_eq:
 ; X86-BMI1:       # %bb.0:
-; X86-BMI1-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-BMI1-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; X86-BMI1-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; X86-BMI1-NEXT:    shrl %cl, %eax
 ; X86-BMI1-NEXT:    testl $-2147483648, %eax # imm = 0x80000000
@@ -207,7 +207,7 @@ define i1 @scalar_i32_signbit_eq(i32 %x, i32 %y) nounwind {
 ;
 ; X86-BMI2-LABEL: scalar_i32_signbit_eq:
 ; X86-BMI2:       # %bb.0:
-; X86-BMI2-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-BMI2-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-BMI2-NEXT:    shrxl %eax, {{[0-9]+}}(%esp), %eax
 ; X86-BMI2-NEXT:    testl $-2147483648, %eax # imm = 0x80000000
 ; X86-BMI2-NEXT:    sete %al
@@ -257,7 +257,7 @@ define i1 @scalar_i32_lowestbit_eq(i32 %x, i32 %y) nounwind {
 define i1 @scalar_i32_bitsinmiddle_eq(i32 %x, i32 %y) nounwind {
 ; X86-BMI1-LABEL: scalar_i32_bitsinmiddle_eq:
 ; X86-BMI1:       # %bb.0:
-; X86-BMI1-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-BMI1-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; X86-BMI1-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; X86-BMI1-NEXT:    shrl %cl, %eax
 ; X86-BMI1-NEXT:    testl $16776960, %eax # imm = 0xFFFF00
@@ -266,7 +266,7 @@ define i1 @scalar_i32_bitsinmiddle_eq(i32 %x, i32 %y) nounwind {
 ;
 ; X86-BMI2-LABEL: scalar_i32_bitsinmiddle_eq:
 ; X86-BMI2:       # %bb.0:
-; X86-BMI2-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-BMI2-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-BMI2-NEXT:    shrxl %eax, {{[0-9]+}}(%esp), %eax
 ; X86-BMI2-NEXT:    testl $16776960, %eax # imm = 0xFFFF00
 ; X86-BMI2-NEXT:    sete %al
@@ -298,7 +298,7 @@ define i1 @scalar_i32_bitsinmiddle_eq(i32 %x, i32 %y) nounwind {
 define i1 @scalar_i64_signbit_eq(i64 %x, i64 %y) nounwind {
 ; X86-BMI1-LABEL: scalar_i64_signbit_eq:
 ; X86-BMI1:       # %bb.0:
-; X86-BMI1-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-BMI1-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; X86-BMI1-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; X86-BMI1-NEXT:    shrl %cl, %eax
 ; X86-BMI1-NEXT:    xorl %edx, %edx
@@ -310,7 +310,7 @@ define i1 @scalar_i64_signbit_eq(i64 %x, i64 %y) nounwind {
 ;
 ; X86-BMI2-LABEL: scalar_i64_signbit_eq:
 ; X86-BMI2:       # %bb.0:
-; X86-BMI2-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-BMI2-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-BMI2-NEXT:    shrxl %eax, {{[0-9]+}}(%esp), %ecx
 ; X86-BMI2-NEXT:    xorl %edx, %edx
 ; X86-BMI2-NEXT:    testb $32, %al
@@ -344,7 +344,7 @@ define i1 @scalar_i64_lowestbit_eq(i64 %x, i64 %y) nounwind {
 ; X86-BMI1-LABEL: scalar_i64_lowestbit_eq:
 ; X86-BMI1:       # %bb.0:
 ; X86-BMI1-NEXT:    pushl %esi
-; X86-BMI1-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-BMI1-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; X86-BMI1-NEXT:    movl $1, %eax
 ; X86-BMI1-NEXT:    xorl %esi, %esi
 ; X86-BMI1-NEXT:    xorl %edx, %edx
@@ -363,7 +363,7 @@ define i1 @scalar_i64_lowestbit_eq(i64 %x, i64 %y) nounwind {
 ; X86-BMI2-LABEL: scalar_i64_lowestbit_eq:
 ; X86-BMI2:       # %bb.0:
 ; X86-BMI2-NEXT:    pushl %esi
-; X86-BMI2-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-BMI2-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; X86-BMI2-NEXT:    movl $1, %eax
 ; X86-BMI2-NEXT:    xorl %edx, %edx
 ; X86-BMI2-NEXT:    xorl %esi, %esi
@@ -394,7 +394,7 @@ define i1 @scalar_i64_bitsinmiddle_eq(i64 %x, i64 %y) nounwind {
 ; X86-BMI1-LABEL: scalar_i64_bitsinmiddle_eq:
 ; X86-BMI1:       # %bb.0:
 ; X86-BMI1-NEXT:    pushl %esi
-; X86-BMI1-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-BMI1-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; X86-BMI1-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; X86-BMI1-NEXT:    movl {{[0-9]+}}(%esp), %edx
 ; X86-BMI1-NEXT:    movl %edx, %esi
@@ -414,7 +414,7 @@ define i1 @scalar_i64_bitsinmiddle_eq(i64 %x, i64 %y) nounwind {
 ; X86-BMI2-LABEL: scalar_i64_bitsinmiddle_eq:
 ; X86-BMI2:       # %bb.0:
 ; X86-BMI2-NEXT:    pushl %esi
-; X86-BMI2-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-BMI2-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; X86-BMI2-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; X86-BMI2-NEXT:    movl {{[0-9]+}}(%esp), %edx
 ; X86-BMI2-NEXT:    shrdl %cl, %edx, %eax
@@ -709,8 +709,8 @@ define <4 x i1> @vec_4xi32_nonsplat_undef2_eq(<4 x i32> %x, <4 x i32> %y) nounwi
 define i1 @scalar_i8_signbit_ne(i8 %x, i8 %y) nounwind {
 ; X86-LABEL: scalar_i8_signbit_ne:
 ; X86:       # %bb.0:
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %cl
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    shrb %cl, %al
 ; X86-NEXT:    shrb $7, %al
 ; X86-NEXT:    retl
@@ -737,7 +737,7 @@ define i1 @scalar_i8_signbit_ne(i8 %x, i8 %y) nounwind {
 define i1 @scalar_i32_x_is_const_eq(i32 %y) nounwind {
 ; X86-BMI1-LABEL: scalar_i32_x_is_const_eq:
 ; X86-BMI1:       # %bb.0:
-; X86-BMI1-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-BMI1-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; X86-BMI1-NEXT:    movl $-1437226411, %eax # imm = 0xAA55AA55
 ; X86-BMI1-NEXT:    shll %cl, %eax
 ; X86-BMI1-NEXT:    testb $1, %al
@@ -746,7 +746,7 @@ define i1 @scalar_i32_x_is_const_eq(i32 %y) nounwind {
 ;
 ; X86-BMI2-LABEL: scalar_i32_x_is_const_eq:
 ; X86-BMI2:       # %bb.0:
-; X86-BMI2-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-BMI2-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-BMI2-NEXT:    movl $-1437226411, %ecx # imm = 0xAA55AA55
 ; X86-BMI2-NEXT:    shlxl %eax, %ecx, %eax
 ; X86-BMI2-NEXT:    testb $1, %al
@@ -803,7 +803,7 @@ define i1 @scalar_i32_x_is_const2_eq(i32 %y) nounwind {
 define i1 @negative_scalar_i8_bitsinmiddle_slt(i8 %x, i8 %y) nounwind {
 ; X86-LABEL: negative_scalar_i8_bitsinmiddle_slt:
 ; X86:       # %bb.0:
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; X86-NEXT:    movb $24, %al
 ; X86-NEXT:    shlb %cl, %al
 ; X86-NEXT:    andb {{[0-9]+}}(%esp), %al
@@ -828,7 +828,7 @@ define i1 @negative_scalar_i8_bitsinmiddle_slt(i8 %x, i8 %y) nounwind {
 define i1 @scalar_i8_signbit_eq_with_nonzero(i8 %x, i8 %y) nounwind {
 ; X86-LABEL: scalar_i8_signbit_eq_with_nonzero:
 ; X86:       # %bb.0:
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; X86-NEXT:    movb $-128, %al
 ; X86-NEXT:    shlb %cl, %al
 ; X86-NEXT:    andb {{[0-9]+}}(%esp), %al

diff  --git a/llvm/test/CodeGen/X86/iabs.ll b/llvm/test/CodeGen/X86/iabs.ll
index 55c318e87a5a0..c3cb3b30aa060 100644
--- a/llvm/test/CodeGen/X86/iabs.ll
+++ b/llvm/test/CodeGen/X86/iabs.ll
@@ -12,7 +12,7 @@
 define i8 @test_i8(i8 %a) nounwind {
 ; X86-LABEL: test_i8:
 ; X86:       # %bb.0:
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    movl %eax, %ecx
 ; X86-NEXT:    sarb $7, %cl
 ; X86-NEXT:    xorb %cl, %al

diff  --git a/llvm/test/CodeGen/X86/inc-of-add.ll b/llvm/test/CodeGen/X86/inc-of-add.ll
index a899660031d45..386e04f421e92 100644
--- a/llvm/test/CodeGen/X86/inc-of-add.ll
+++ b/llvm/test/CodeGen/X86/inc-of-add.ll
@@ -12,7 +12,7 @@
 define i8 @scalar_i8(i8 %x, i8 %y) nounwind {
 ; X86-LABEL: scalar_i8:
 ; X86:       # %bb.0:
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    addb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    incb %al
 ; X86-NEXT:    retl

diff  --git a/llvm/test/CodeGen/X86/insertelement-var-index.ll b/llvm/test/CodeGen/X86/insertelement-var-index.ll
index 88c5c2730ec9c..a30cbafbc2085 100644
--- a/llvm/test/CodeGen/X86/insertelement-var-index.ll
+++ b/llvm/test/CodeGen/X86/insertelement-var-index.ll
@@ -638,7 +638,7 @@ define <32 x i8> @load_i8_v32i8_undef(ptr %p, i32 %y) nounwind {
 ; SSE-LABEL: load_i8_v32i8_undef:
 ; SSE:       # %bb.0:
 ; SSE-NEXT:    # kill: def $esi killed $esi def $rsi
-; SSE-NEXT:    movzbl (%rdi), %eax
+; SSE-NEXT:    movb (%rdi), %al
 ; SSE-NEXT:    andl $31, %esi
 ; SSE-NEXT:    movb %al, -40(%rsp,%rsi)
 ; SSE-NEXT:    movaps -{{[0-9]+}}(%rsp), %xmm0
@@ -865,7 +865,7 @@ define <16 x i8> @arg_i8_v16i8(<16 x i8> %v, i8 %x, i32 %y) nounwind {
 ; X86AVX2-NEXT:    subl $32, %esp
 ; X86AVX2-NEXT:    movl 12(%ebp), %eax
 ; X86AVX2-NEXT:    andl $15, %eax
-; X86AVX2-NEXT:    movzbl 8(%ebp), %ecx
+; X86AVX2-NEXT:    movb 8(%ebp), %cl
 ; X86AVX2-NEXT:    vmovaps %xmm0, (%esp)
 ; X86AVX2-NEXT:    movb %cl, (%esp,%eax)
 ; X86AVX2-NEXT:    vmovaps (%esp), %xmm0
@@ -1160,7 +1160,7 @@ define <16 x i8> @load_i8_v16i8(<16 x i8> %v, ptr %p, i32 %y) nounwind {
 ; SSE-LABEL: load_i8_v16i8:
 ; SSE:       # %bb.0:
 ; SSE-NEXT:    # kill: def $esi killed $esi def $rsi
-; SSE-NEXT:    movzbl (%rdi), %eax
+; SSE-NEXT:    movb (%rdi), %al
 ; SSE-NEXT:    movaps %xmm0, -{{[0-9]+}}(%rsp)
 ; SSE-NEXT:    andl $15, %esi
 ; SSE-NEXT:    movb %al, -24(%rsp,%rsi)
@@ -1170,7 +1170,7 @@ define <16 x i8> @load_i8_v16i8(<16 x i8> %v, ptr %p, i32 %y) nounwind {
 ; AVX1OR2-LABEL: load_i8_v16i8:
 ; AVX1OR2:       # %bb.0:
 ; AVX1OR2-NEXT:    # kill: def $esi killed $esi def $rsi
-; AVX1OR2-NEXT:    movzbl (%rdi), %eax
+; AVX1OR2-NEXT:    movb (%rdi), %al
 ; AVX1OR2-NEXT:    vmovaps %xmm0, -{{[0-9]+}}(%rsp)
 ; AVX1OR2-NEXT:    andl $15, %esi
 ; AVX1OR2-NEXT:    movb %al, -24(%rsp,%rsi)
@@ -1180,7 +1180,7 @@ define <16 x i8> @load_i8_v16i8(<16 x i8> %v, ptr %p, i32 %y) nounwind {
 ; AVX512F-LABEL: load_i8_v16i8:
 ; AVX512F:       # %bb.0:
 ; AVX512F-NEXT:    # kill: def $esi killed $esi def $rsi
-; AVX512F-NEXT:    movzbl (%rdi), %eax
+; AVX512F-NEXT:    movb (%rdi), %al
 ; AVX512F-NEXT:    vmovaps %xmm0, -{{[0-9]+}}(%rsp)
 ; AVX512F-NEXT:    andl $15, %esi
 ; AVX512F-NEXT:    movb %al, -24(%rsp,%rsi)
@@ -1203,7 +1203,7 @@ define <16 x i8> @load_i8_v16i8(<16 x i8> %v, ptr %p, i32 %y) nounwind {
 ; X86AVX2-NEXT:    movl 12(%ebp), %eax
 ; X86AVX2-NEXT:    andl $15, %eax
 ; X86AVX2-NEXT:    movl 8(%ebp), %ecx
-; X86AVX2-NEXT:    movzbl (%ecx), %ecx
+; X86AVX2-NEXT:    movb (%ecx), %cl
 ; X86AVX2-NEXT:    vmovaps %xmm0, (%esp)
 ; X86AVX2-NEXT:    movb %cl, (%esp,%eax)
 ; X86AVX2-NEXT:    vmovaps (%esp), %xmm0
@@ -1572,7 +1572,7 @@ define <32 x i8> @arg_i8_v32i8(<32 x i8> %v, i8 %x, i32 %y) nounwind {
 ; X86AVX2-NEXT:    subl $64, %esp
 ; X86AVX2-NEXT:    movl 12(%ebp), %eax
 ; X86AVX2-NEXT:    andl $31, %eax
-; X86AVX2-NEXT:    movzbl 8(%ebp), %ecx
+; X86AVX2-NEXT:    movb 8(%ebp), %cl
 ; X86AVX2-NEXT:    vmovaps %ymm0, (%esp)
 ; X86AVX2-NEXT:    movb %cl, (%esp,%eax)
 ; X86AVX2-NEXT:    vmovaps (%esp), %ymm0
@@ -1884,7 +1884,7 @@ define <32 x i8> @load_i8_v32i8(<32 x i8> %v, ptr %p, i32 %y) nounwind {
 ; SSE-LABEL: load_i8_v32i8:
 ; SSE:       # %bb.0:
 ; SSE-NEXT:    # kill: def $esi killed $esi def $rsi
-; SSE-NEXT:    movzbl (%rdi), %eax
+; SSE-NEXT:    movb (%rdi), %al
 ; SSE-NEXT:    movaps %xmm1, -{{[0-9]+}}(%rsp)
 ; SSE-NEXT:    movaps %xmm0, -{{[0-9]+}}(%rsp)
 ; SSE-NEXT:    andl $31, %esi
@@ -1900,7 +1900,7 @@ define <32 x i8> @load_i8_v32i8(<32 x i8> %v, ptr %p, i32 %y) nounwind {
 ; AVX1OR2-NEXT:    andq $-32, %rsp
 ; AVX1OR2-NEXT:    subq $64, %rsp
 ; AVX1OR2-NEXT:    # kill: def $esi killed $esi def $rsi
-; AVX1OR2-NEXT:    movzbl (%rdi), %eax
+; AVX1OR2-NEXT:    movb (%rdi), %al
 ; AVX1OR2-NEXT:    vmovaps %ymm0, (%rsp)
 ; AVX1OR2-NEXT:    andl $31, %esi
 ; AVX1OR2-NEXT:    movb %al, (%rsp,%rsi)
@@ -1916,7 +1916,7 @@ define <32 x i8> @load_i8_v32i8(<32 x i8> %v, ptr %p, i32 %y) nounwind {
 ; AVX512F-NEXT:    andq $-32, %rsp
 ; AVX512F-NEXT:    subq $64, %rsp
 ; AVX512F-NEXT:    # kill: def $esi killed $esi def $rsi
-; AVX512F-NEXT:    movzbl (%rdi), %eax
+; AVX512F-NEXT:    movb (%rdi), %al
 ; AVX512F-NEXT:    vmovaps %ymm0, (%rsp)
 ; AVX512F-NEXT:    andl $31, %esi
 ; AVX512F-NEXT:    movb %al, (%rsp,%rsi)
@@ -1941,7 +1941,7 @@ define <32 x i8> @load_i8_v32i8(<32 x i8> %v, ptr %p, i32 %y) nounwind {
 ; X86AVX2-NEXT:    movl 12(%ebp), %eax
 ; X86AVX2-NEXT:    andl $31, %eax
 ; X86AVX2-NEXT:    movl 8(%ebp), %ecx
-; X86AVX2-NEXT:    movzbl (%ecx), %ecx
+; X86AVX2-NEXT:    movb (%ecx), %cl
 ; X86AVX2-NEXT:    vmovaps %ymm0, (%esp)
 ; X86AVX2-NEXT:    movb %cl, (%esp,%eax)
 ; X86AVX2-NEXT:    vmovaps (%esp), %ymm0

diff  --git a/llvm/test/CodeGen/X86/isel-sink2.ll b/llvm/test/CodeGen/X86/isel-sink2.ll
index 46ff70a746434..b82f87a274fe6 100644
--- a/llvm/test/CodeGen/X86/isel-sink2.ll
+++ b/llvm/test/CodeGen/X86/isel-sink2.ll
@@ -8,7 +8,7 @@ define i8 @test(ptr%P) nounwind {
 ; CHECK-NEXT:    cmpb $0, 4(%eax)
 ; CHECK-NEXT:    je .LBB0_1
 ; CHECK-NEXT:  # %bb.2: # %F
-; CHECK-NEXT:    movzbl 7(%eax), %eax
+; CHECK-NEXT:    movb 7(%eax), %al
 ; CHECK-NEXT:    retl
 ; CHECK-NEXT:  .LBB0_1: # %TB
 ; CHECK-NEXT:    movb $4, %al

diff  --git a/llvm/test/CodeGen/X86/legalize-shift-64.ll b/llvm/test/CodeGen/X86/legalize-shift-64.ll
index 57643e3413520..a8700ce4f161d 100644
--- a/llvm/test/CodeGen/X86/legalize-shift-64.ll
+++ b/llvm/test/CodeGen/X86/legalize-shift-64.ll
@@ -5,7 +5,7 @@ define i64 @test1(i32 %xx, i32 %test) nounwind {
 ; CHECK-LABEL: test1:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %edx
-; CHECK-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; CHECK-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; CHECK-NEXT:    andb $7, %cl
 ; CHECK-NEXT:    movl %edx, %eax
 ; CHECK-NEXT:    shll %cl, %eax
@@ -26,7 +26,7 @@ define i64 @test2(i64 %xx, i32 %test) nounwind {
 ; CHECK-NEXT:    pushl %esi
 ; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %esi
 ; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %edx
-; CHECK-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; CHECK-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; CHECK-NEXT:    andb $7, %cl
 ; CHECK-NEXT:    movl %esi, %eax
 ; CHECK-NEXT:    shll %cl, %eax
@@ -44,7 +44,7 @@ define i64 @test3(i64 %xx, i32 %test) nounwind {
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %edx
-; CHECK-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; CHECK-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; CHECK-NEXT:    andb $7, %cl
 ; CHECK-NEXT:    shrdl %cl, %edx, %eax
 ; CHECK-NEXT:    shrl %cl, %edx
@@ -60,7 +60,7 @@ define i64 @test4(i64 %xx, i32 %test) nounwind {
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %edx
-; CHECK-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; CHECK-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; CHECK-NEXT:    andb $7, %cl
 ; CHECK-NEXT:    shrdl %cl, %edx, %eax
 ; CHECK-NEXT:    sarl %cl, %edx

diff  --git a/llvm/test/CodeGen/X86/lifetime-alias.ll b/llvm/test/CodeGen/X86/lifetime-alias.ll
index 3efaccba1b63d..c621391fb8c83 100644
--- a/llvm/test/CodeGen/X86/lifetime-alias.ll
+++ b/llvm/test/CodeGen/X86/lifetime-alias.ll
@@ -55,9 +55,9 @@ define i8 @main() local_unnamed_addr #0 personality ptr @__gxx_personality_v0 {
 ; CHECK-NEXT:    movq %rax, -{{[0-9]+}}(%rsp)
 ; CHECK-NEXT:    movzwl -{{[0-9]+}}(%rsp), %eax
 ; CHECK-NEXT:    movw %ax, -{{[0-9]+}}(%rsp)
-; CHECK-NEXT:    movzbl -{{[0-9]+}}(%rsp), %eax
+; CHECK-NEXT:    movb -{{[0-9]+}}(%rsp), %al
 ; CHECK-NEXT:    movb %al, -{{[0-9]+}}(%rsp)
-; CHECK-NEXT:    movzbl -{{[0-9]+}}(%rsp), %eax
+; CHECK-NEXT:    movb -{{[0-9]+}}(%rsp), %al
 ; CHECK-NEXT:    movb %al, -{{[0-9]+}}(%rsp)
 ; CHECK-NEXT:    movl -{{[0-9]+}}(%rsp), %eax
 ; CHECK-NEXT:    movl %eax, -{{[0-9]+}}(%rsp)
@@ -81,7 +81,7 @@ define i8 @main() local_unnamed_addr #0 personality ptr @__gxx_personality_v0 {
 ; CHECK-NEXT:  .LBB0_1:
 ; CHECK-NEXT:    leaq -{{[0-9]+}}(%rsp), %rax
 ; CHECK-NEXT:  .LBB0_3: # %_ZNSt3__312basic_stringIcNS_11char_traitsIcEENS_9allocatorIcEEED2Ev.exit50
-; CHECK-NEXT:    movzbl 16(%rax), %eax
+; CHECK-NEXT:    movb 16(%rax), %al
 ; CHECK-NEXT:    popq %rcx
 ; CHECK-NEXT:    .cfi_def_cfa_offset 8
 ; CHECK-NEXT:    retq

diff  --git a/llvm/test/CodeGen/X86/load-local-v3i1.ll b/llvm/test/CodeGen/X86/load-local-v3i1.ll
index ae3d9cdb326d3..a0483d609e8a6 100644
--- a/llvm/test/CodeGen/X86/load-local-v3i1.ll
+++ b/llvm/test/CodeGen/X86/load-local-v3i1.ll
@@ -94,7 +94,7 @@ define void @local_load_v3i1(ptr addrspace(1) %out, ptr addrspace(1) %in, ptr %p
 ; CHECK-NEXT:    pushq %rbx
 ; CHECK-NEXT:    pushq %rax
 ; CHECK-NEXT:    movq %rdi, %r14
-; CHECK-NEXT:    movzbl (%rdx), %eax
+; CHECK-NEXT:    movb (%rdx), %al
 ; CHECK-NEXT:    movl %eax, %ecx
 ; CHECK-NEXT:    shrb %cl
 ; CHECK-NEXT:    andb $1, %cl

diff  --git a/llvm/test/CodeGen/X86/load-local-v4i5.ll b/llvm/test/CodeGen/X86/load-local-v4i5.ll
index 34100bdbc2c4e..b04373935858a 100644
--- a/llvm/test/CodeGen/X86/load-local-v4i5.ll
+++ b/llvm/test/CodeGen/X86/load-local-v4i5.ll
@@ -8,7 +8,7 @@ define void @_start() {
 ; CHECK:       # %bb.0: # %Entry
 ; CHECK-NEXT:    movl __unnamed_1(%rip), %eax
 ; CHECK-NEXT:    movl %eax, -12(%rsp)
-; CHECK-NEXT:    movzbl -9(%rsp), %ecx
+; CHECK-NEXT:    movb -9(%rsp), %cl
 ; CHECK-NEXT:    movzbl -10(%rsp), %edx
 ; CHECK-NEXT:    movzbl -11(%rsp), %esi
 ; CHECK-NEXT:    andl $31, %eax

diff  --git a/llvm/test/CodeGen/X86/load-scalar-as-vector.ll b/llvm/test/CodeGen/X86/load-scalar-as-vector.ll
index fa0e6648b8712..98da9b1239c40 100644
--- a/llvm/test/CodeGen/X86/load-scalar-as-vector.ll
+++ b/llvm/test/CodeGen/X86/load-scalar-as-vector.ll
@@ -90,7 +90,7 @@ define <2 x i64> @sub_op0_constant(ptr %p) nounwind {
 define <16 x i8> @sub_op1_constant(ptr %p) nounwind {
 ; SSE-LABEL: sub_op1_constant:
 ; SSE:       # %bb.0:
-; SSE-NEXT:    movzbl (%rdi), %eax
+; SSE-NEXT:    movb (%rdi), %al
 ; SSE-NEXT:    addb $-42, %al
 ; SSE-NEXT:    movzbl %al, %eax
 ; SSE-NEXT:    movd %eax, %xmm0
@@ -98,7 +98,7 @@ define <16 x i8> @sub_op1_constant(ptr %p) nounwind {
 ;
 ; AVX-LABEL: sub_op1_constant:
 ; AVX:       # %bb.0:
-; AVX-NEXT:    movzbl (%rdi), %eax
+; AVX-NEXT:    movb (%rdi), %al
 ; AVX-NEXT:    addb $-42, %al
 ; AVX-NEXT:    movzbl %al, %eax
 ; AVX-NEXT:    vmovd %eax, %xmm0
@@ -210,7 +210,7 @@ define <8 x i16> @xor_op1_constant(ptr %p) nounwind {
 define <4 x i32> @shl_op0_constant(ptr %p) nounwind {
 ; SSE-LABEL: shl_op0_constant:
 ; SSE:       # %bb.0:
-; SSE-NEXT:    movzbl (%rdi), %ecx
+; SSE-NEXT:    movb (%rdi), %cl
 ; SSE-NEXT:    movl $42, %eax
 ; SSE-NEXT:    shll %cl, %eax
 ; SSE-NEXT:    movd %eax, %xmm0
@@ -218,7 +218,7 @@ define <4 x i32> @shl_op0_constant(ptr %p) nounwind {
 ;
 ; AVX-LABEL: shl_op0_constant:
 ; AVX:       # %bb.0:
-; AVX-NEXT:    movzbl (%rdi), %ecx
+; AVX-NEXT:    movb (%rdi), %cl
 ; AVX-NEXT:    movl $42, %eax
 ; AVX-NEXT:    shll %cl, %eax
 ; AVX-NEXT:    vmovd %eax, %xmm0
@@ -232,7 +232,7 @@ define <4 x i32> @shl_op0_constant(ptr %p) nounwind {
 define <16 x i8> @shl_op1_constant(ptr %p) nounwind {
 ; SSE-LABEL: shl_op1_constant:
 ; SSE:       # %bb.0:
-; SSE-NEXT:    movzbl (%rdi), %eax
+; SSE-NEXT:    movb (%rdi), %al
 ; SSE-NEXT:    shlb $5, %al
 ; SSE-NEXT:    movzbl %al, %eax
 ; SSE-NEXT:    movd %eax, %xmm0
@@ -240,7 +240,7 @@ define <16 x i8> @shl_op1_constant(ptr %p) nounwind {
 ;
 ; AVX-LABEL: shl_op1_constant:
 ; AVX:       # %bb.0:
-; AVX-NEXT:    movzbl (%rdi), %eax
+; AVX-NEXT:    movb (%rdi), %al
 ; AVX-NEXT:    shlb $5, %al
 ; AVX-NEXT:    movzbl %al, %eax
 ; AVX-NEXT:    vmovd %eax, %xmm0
@@ -254,7 +254,7 @@ define <16 x i8> @shl_op1_constant(ptr %p) nounwind {
 define <2 x i64> @lshr_op0_constant(ptr %p) nounwind {
 ; SSE-LABEL: lshr_op0_constant:
 ; SSE:       # %bb.0:
-; SSE-NEXT:    movzbl (%rdi), %ecx
+; SSE-NEXT:    movb (%rdi), %cl
 ; SSE-NEXT:    movl $42, %eax
 ; SSE-NEXT:    shrq %cl, %rax
 ; SSE-NEXT:    movq %rax, %xmm0
@@ -262,7 +262,7 @@ define <2 x i64> @lshr_op0_constant(ptr %p) nounwind {
 ;
 ; AVX-LABEL: lshr_op0_constant:
 ; AVX:       # %bb.0:
-; AVX-NEXT:    movzbl (%rdi), %ecx
+; AVX-NEXT:    movb (%rdi), %cl
 ; AVX-NEXT:    movl $42, %eax
 ; AVX-NEXT:    shrq %cl, %rax
 ; AVX-NEXT:    vmovq %rax, %xmm0
@@ -296,7 +296,7 @@ define <4 x i32> @lshr_op1_constant(ptr %p) nounwind {
 define <8 x i16> @ashr_op0_constant(ptr %p) nounwind {
 ; SSE-LABEL: ashr_op0_constant:
 ; SSE:       # %bb.0:
-; SSE-NEXT:    movzbl (%rdi), %ecx
+; SSE-NEXT:    movb (%rdi), %cl
 ; SSE-NEXT:    movl $-42, %eax
 ; SSE-NEXT:    sarl %cl, %eax
 ; SSE-NEXT:    movd %eax, %xmm0
@@ -304,7 +304,7 @@ define <8 x i16> @ashr_op0_constant(ptr %p) nounwind {
 ;
 ; AVX-LABEL: ashr_op0_constant:
 ; AVX:       # %bb.0:
-; AVX-NEXT:    movzbl (%rdi), %ecx
+; AVX-NEXT:    movb (%rdi), %cl
 ; AVX-NEXT:    movl $-42, %eax
 ; AVX-NEXT:    sarl %cl, %eax
 ; AVX-NEXT:    vmovd %eax, %xmm0
@@ -520,7 +520,7 @@ define <2 x i64> @urem_op0_constant(ptr %p) nounwind {
 define <16 x i8> @urem_op1_constant(ptr %p) nounwind {
 ; SSE-LABEL: urem_op1_constant:
 ; SSE:       # %bb.0:
-; SSE-NEXT:    movzbl (%rdi), %eax
+; SSE-NEXT:    movb (%rdi), %al
 ; SSE-NEXT:    movl %eax, %ecx
 ; SSE-NEXT:    shrb %cl
 ; SSE-NEXT:    movzbl %cl, %ecx
@@ -534,7 +534,7 @@ define <16 x i8> @urem_op1_constant(ptr %p) nounwind {
 ;
 ; AVX-LABEL: urem_op1_constant:
 ; AVX:       # %bb.0:
-; AVX-NEXT:    movzbl (%rdi), %eax
+; AVX-NEXT:    movb (%rdi), %al
 ; AVX-NEXT:    movl %eax, %ecx
 ; AVX-NEXT:    shrb %cl
 ; AVX-NEXT:    movzbl %cl, %ecx

diff  --git a/llvm/test/CodeGen/X86/masked_gather_scatter.ll b/llvm/test/CodeGen/X86/masked_gather_scatter.ll
index 3d714f31ba72e..0d6c6a06e4adf 100644
--- a/llvm/test/CodeGen/X86/masked_gather_scatter.ll
+++ b/llvm/test/CodeGen/X86/masked_gather_scatter.ll
@@ -2519,11 +2519,11 @@ define <3 x i32> @test30(<3 x ptr> %base, <3 x i32> %ind, <3 x i1> %mask, <3 x i
 ; KNL_32-NEXT:    # kill: def $xmm2 killed $xmm2 def $zmm2
 ; KNL_32-NEXT:    movw $-3, %ax
 ; KNL_32-NEXT:    kmovw %eax, %k0
-; KNL_32-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; KNL_32-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; KNL_32-NEXT:    andl $1, %eax
 ; KNL_32-NEXT:    kmovw %eax, %k1
 ; KNL_32-NEXT:    kandw %k0, %k1, %k0
-; KNL_32-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; KNL_32-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; KNL_32-NEXT:    kmovw %eax, %k1
 ; KNL_32-NEXT:    kshiftlw $15, %k1, %k1
 ; KNL_32-NEXT:    kshiftrw $14, %k1, %k1
@@ -2531,7 +2531,7 @@ define <3 x i32> @test30(<3 x ptr> %base, <3 x i32> %ind, <3 x i1> %mask, <3 x i
 ; KNL_32-NEXT:    movw $-5, %ax
 ; KNL_32-NEXT:    kmovw %eax, %k1
 ; KNL_32-NEXT:    kandw %k1, %k0, %k0
-; KNL_32-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; KNL_32-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; KNL_32-NEXT:    kmovw %eax, %k1
 ; KNL_32-NEXT:    kshiftlw $15, %k1, %k1
 ; KNL_32-NEXT:    kshiftrw $13, %k1, %k1
@@ -2576,12 +2576,12 @@ define <3 x i32> @test30(<3 x ptr> %base, <3 x i32> %ind, <3 x i1> %mask, <3 x i
 ; SKX_32:       # %bb.0:
 ; SKX_32-NEXT:    movb $-3, %al
 ; SKX_32-NEXT:    kmovw %eax, %k0
-; SKX_32-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; SKX_32-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; SKX_32-NEXT:    kmovw %eax, %k1
 ; SKX_32-NEXT:    kshiftlb $7, %k1, %k1
 ; SKX_32-NEXT:    kshiftrb $7, %k1, %k1
 ; SKX_32-NEXT:    kandw %k0, %k1, %k0
-; SKX_32-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; SKX_32-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; SKX_32-NEXT:    kmovw %eax, %k1
 ; SKX_32-NEXT:    kshiftlb $7, %k1, %k1
 ; SKX_32-NEXT:    kshiftrb $6, %k1, %k1
@@ -2589,7 +2589,7 @@ define <3 x i32> @test30(<3 x ptr> %base, <3 x i32> %ind, <3 x i1> %mask, <3 x i
 ; SKX_32-NEXT:    movb $-5, %al
 ; SKX_32-NEXT:    kmovw %eax, %k1
 ; SKX_32-NEXT:    kandw %k1, %k0, %k0
-; SKX_32-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; SKX_32-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; SKX_32-NEXT:    kmovw %eax, %k1
 ; SKX_32-NEXT:    kshiftlb $7, %k1, %k1
 ; SKX_32-NEXT:    kshiftrb $5, %k1, %k1
@@ -2642,11 +2642,11 @@ define void @test30b(<3 x ptr> %base, <3 x i32> %ind, <3 x i1> %mask, <3 x i32>
 ; KNL_32-NEXT:    # kill: def $xmm2 killed $xmm2 def $zmm2
 ; KNL_32-NEXT:    movw $-3, %ax
 ; KNL_32-NEXT:    kmovw %eax, %k0
-; KNL_32-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; KNL_32-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; KNL_32-NEXT:    andl $1, %eax
 ; KNL_32-NEXT:    kmovw %eax, %k1
 ; KNL_32-NEXT:    kandw %k0, %k1, %k0
-; KNL_32-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; KNL_32-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; KNL_32-NEXT:    kmovw %eax, %k1
 ; KNL_32-NEXT:    kshiftlw $15, %k1, %k1
 ; KNL_32-NEXT:    kshiftrw $14, %k1, %k1
@@ -2654,7 +2654,7 @@ define void @test30b(<3 x ptr> %base, <3 x i32> %ind, <3 x i1> %mask, <3 x i32>
 ; KNL_32-NEXT:    movw $-5, %ax
 ; KNL_32-NEXT:    kmovw %eax, %k1
 ; KNL_32-NEXT:    kandw %k1, %k0, %k0
-; KNL_32-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; KNL_32-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; KNL_32-NEXT:    kmovw %eax, %k1
 ; KNL_32-NEXT:    kshiftlw $15, %k1, %k1
 ; KNL_32-NEXT:    kshiftrw $13, %k1, %k1
@@ -2697,12 +2697,12 @@ define void @test30b(<3 x ptr> %base, <3 x i32> %ind, <3 x i1> %mask, <3 x i32>
 ; SKX_32:       # %bb.0:
 ; SKX_32-NEXT:    movb $-3, %al
 ; SKX_32-NEXT:    kmovw %eax, %k0
-; SKX_32-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; SKX_32-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; SKX_32-NEXT:    kmovw %eax, %k1
 ; SKX_32-NEXT:    kshiftlb $7, %k1, %k1
 ; SKX_32-NEXT:    kshiftrb $7, %k1, %k1
 ; SKX_32-NEXT:    kandw %k0, %k1, %k0
-; SKX_32-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; SKX_32-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; SKX_32-NEXT:    kmovw %eax, %k1
 ; SKX_32-NEXT:    kshiftlb $7, %k1, %k1
 ; SKX_32-NEXT:    kshiftrb $6, %k1, %k1
@@ -2710,7 +2710,7 @@ define void @test30b(<3 x ptr> %base, <3 x i32> %ind, <3 x i1> %mask, <3 x i32>
 ; SKX_32-NEXT:    movb $-5, %al
 ; SKX_32-NEXT:    kmovw %eax, %k1
 ; SKX_32-NEXT:    kandw %k1, %k0, %k0
-; SKX_32-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; SKX_32-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; SKX_32-NEXT:    kmovw %eax, %k1
 ; SKX_32-NEXT:    kshiftlb $7, %k1, %k1
 ; SKX_32-NEXT:    kshiftrb $5, %k1, %k1

diff  --git a/llvm/test/CodeGen/X86/memcmp-more-load-pairs-x32.ll b/llvm/test/CodeGen/X86/memcmp-more-load-pairs-x32.ll
index bd55b98f627c6..a93d64f4f53bb 100644
--- a/llvm/test/CodeGen/X86/memcmp-more-load-pairs-x32.ll
+++ b/llvm/test/CodeGen/X86/memcmp-more-load-pairs-x32.ll
@@ -176,7 +176,7 @@ define i1 @length3_eq(ptr %X, ptr %Y) nounwind {
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
 ; X86-NEXT:    movzwl (%ecx), %edx
 ; X86-NEXT:    xorw (%eax), %dx
-; X86-NEXT:    movzbl 2(%ecx), %ecx
+; X86-NEXT:    movb 2(%ecx), %cl
 ; X86-NEXT:    xorb 2(%eax), %cl
 ; X86-NEXT:    movzbl %cl, %eax
 ; X86-NEXT:    orw %dx, %ax
@@ -308,7 +308,7 @@ define i1 @length5_eq(ptr %X, ptr %Y) nounwind {
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
 ; X86-NEXT:    movl (%ecx), %edx
 ; X86-NEXT:    xorl (%eax), %edx
-; X86-NEXT:    movzbl 4(%ecx), %ecx
+; X86-NEXT:    movb 4(%ecx), %cl
 ; X86-NEXT:    xorb 4(%eax), %cl
 ; X86-NEXT:    movzbl %cl, %eax
 ; X86-NEXT:    orl %edx, %eax
@@ -510,7 +510,7 @@ define i1 @length9_eq(ptr %X, ptr %Y) nounwind {
 ; X86-NEXT:    xorl (%eax), %edx
 ; X86-NEXT:    xorl 4(%eax), %esi
 ; X86-NEXT:    orl %edx, %esi
-; X86-NEXT:    movzbl 8(%ecx), %ecx
+; X86-NEXT:    movb 8(%ecx), %cl
 ; X86-NEXT:    xorb 8(%eax), %cl
 ; X86-NEXT:    movzbl %cl, %eax
 ; X86-NEXT:    orl %esi, %eax
@@ -641,7 +641,7 @@ define i1 @length13_eq(ptr %X, ptr %Y) nounwind {
 ; X86-NEXT:    orl %esi, %eax
 ; X86-NEXT:    movl 8(%edx), %esi
 ; X86-NEXT:    xorl 8(%ecx), %esi
-; X86-NEXT:    movzbl 12(%edx), %edx
+; X86-NEXT:    movb 12(%edx), %dl
 ; X86-NEXT:    xorb 12(%ecx), %dl
 ; X86-NEXT:    movzbl %dl, %ecx
 ; X86-NEXT:    orl %esi, %ecx

diff  --git a/llvm/test/CodeGen/X86/memcmp-more-load-pairs.ll b/llvm/test/CodeGen/X86/memcmp-more-load-pairs.ll
index ae5d9e1715b31..28182ccada5e2 100644
--- a/llvm/test/CodeGen/X86/memcmp-more-load-pairs.ll
+++ b/llvm/test/CodeGen/X86/memcmp-more-load-pairs.ll
@@ -165,7 +165,7 @@ define i1 @length3_eq(ptr %X, ptr %Y) nounwind {
 ; X64:       # %bb.0:
 ; X64-NEXT:    movzwl (%rdi), %eax
 ; X64-NEXT:    xorw (%rsi), %ax
-; X64-NEXT:    movzbl 2(%rdi), %ecx
+; X64-NEXT:    movb 2(%rdi), %cl
 ; X64-NEXT:    xorb 2(%rsi), %cl
 ; X64-NEXT:    movzbl %cl, %ecx
 ; X64-NEXT:    orw %ax, %cx
@@ -281,7 +281,7 @@ define i1 @length5_eq(ptr %X, ptr %Y) nounwind {
 ; X64:       # %bb.0:
 ; X64-NEXT:    movl (%rdi), %eax
 ; X64-NEXT:    xorl (%rsi), %eax
-; X64-NEXT:    movzbl 4(%rdi), %ecx
+; X64-NEXT:    movb 4(%rdi), %cl
 ; X64-NEXT:    xorb 4(%rsi), %cl
 ; X64-NEXT:    movzbl %cl, %ecx
 ; X64-NEXT:    orl %eax, %ecx
@@ -439,7 +439,7 @@ define i1 @length9_eq(ptr %X, ptr %Y) nounwind {
 ; X64:       # %bb.0:
 ; X64-NEXT:    movq (%rdi), %rax
 ; X64-NEXT:    xorq (%rsi), %rax
-; X64-NEXT:    movzbl 8(%rdi), %ecx
+; X64-NEXT:    movb 8(%rdi), %cl
 ; X64-NEXT:    xorb 8(%rsi), %cl
 ; X64-NEXT:    movzbl %cl, %ecx
 ; X64-NEXT:    orq %rax, %rcx

diff  --git a/llvm/test/CodeGen/X86/memcmp-x32.ll b/llvm/test/CodeGen/X86/memcmp-x32.ll
index 718db45a8bd64..132fb581bb0e0 100644
--- a/llvm/test/CodeGen/X86/memcmp-x32.ll
+++ b/llvm/test/CodeGen/X86/memcmp-x32.ll
@@ -204,7 +204,7 @@ define i1 @length3_eq(ptr %X, ptr %Y) nounwind {
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
 ; X86-NEXT:    movzwl (%ecx), %edx
 ; X86-NEXT:    xorw (%eax), %dx
-; X86-NEXT:    movzbl 2(%ecx), %ecx
+; X86-NEXT:    movb 2(%ecx), %cl
 ; X86-NEXT:    xorb 2(%eax), %cl
 ; X86-NEXT:    movzbl %cl, %eax
 ; X86-NEXT:    orw %dx, %ax
@@ -336,7 +336,7 @@ define i1 @length5_eq(ptr %X, ptr %Y) nounwind {
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
 ; X86-NEXT:    movl (%ecx), %edx
 ; X86-NEXT:    xorl (%eax), %edx
-; X86-NEXT:    movzbl 4(%ecx), %ecx
+; X86-NEXT:    movb 4(%ecx), %cl
 ; X86-NEXT:    xorb 4(%eax), %cl
 ; X86-NEXT:    movzbl %cl, %eax
 ; X86-NEXT:    orl %edx, %eax

diff  --git a/llvm/test/CodeGen/X86/memcmp.ll b/llvm/test/CodeGen/X86/memcmp.ll
index 6aac8b8a1f61d..22d72bf7620b9 100644
--- a/llvm/test/CodeGen/X86/memcmp.ll
+++ b/llvm/test/CodeGen/X86/memcmp.ll
@@ -191,7 +191,7 @@ define i1 @length3_eq(ptr %X, ptr %Y) nounwind {
 ; X64:       # %bb.0:
 ; X64-NEXT:    movzwl (%rdi), %eax
 ; X64-NEXT:    xorw (%rsi), %ax
-; X64-NEXT:    movzbl 2(%rdi), %ecx
+; X64-NEXT:    movb 2(%rdi), %cl
 ; X64-NEXT:    xorb 2(%rsi), %cl
 ; X64-NEXT:    movzbl %cl, %ecx
 ; X64-NEXT:    orw %ax, %cx
@@ -307,7 +307,7 @@ define i1 @length5_eq(ptr %X, ptr %Y) nounwind {
 ; X64:       # %bb.0:
 ; X64-NEXT:    movl (%rdi), %eax
 ; X64-NEXT:    xorl (%rsi), %eax
-; X64-NEXT:    movzbl 4(%rdi), %ecx
+; X64-NEXT:    movb 4(%rdi), %cl
 ; X64-NEXT:    xorb 4(%rsi), %cl
 ; X64-NEXT:    movzbl %cl, %ecx
 ; X64-NEXT:    orl %eax, %ecx
@@ -465,7 +465,7 @@ define i1 @length9_eq(ptr %X, ptr %Y) nounwind {
 ; X64:       # %bb.0:
 ; X64-NEXT:    movq (%rdi), %rax
 ; X64-NEXT:    xorq (%rsi), %rax
-; X64-NEXT:    movzbl 8(%rdi), %ecx
+; X64-NEXT:    movb 8(%rdi), %cl
 ; X64-NEXT:    xorb 8(%rsi), %cl
 ; X64-NEXT:    movzbl %cl, %ecx
 ; X64-NEXT:    orq %rax, %rcx

diff  --git a/llvm/test/CodeGen/X86/memcpy.ll b/llvm/test/CodeGen/X86/memcpy.ll
index 6ec9b20163051..cdee188062d4e 100644
--- a/llvm/test/CodeGen/X86/memcpy.ll
+++ b/llvm/test/CodeGen/X86/memcpy.ll
@@ -467,7 +467,7 @@ define void @PR15348(ptr %a, ptr %b) {
 ; unaligned loads and stores.
 ; DARWIN-LABEL: PR15348:
 ; DARWIN:       ## %bb.0:
-; DARWIN-NEXT:    movzbl 16(%rsi), %eax
+; DARWIN-NEXT:    movb 16(%rsi), %al
 ; DARWIN-NEXT:    movb %al, 16(%rdi)
 ; DARWIN-NEXT:    movq (%rsi), %rax
 ; DARWIN-NEXT:    movq 8(%rsi), %rcx
@@ -477,7 +477,7 @@ define void @PR15348(ptr %a, ptr %b) {
 ;
 ; LINUX-LABEL: PR15348:
 ; LINUX:       # %bb.0:
-; LINUX-NEXT:    movzbl 16(%rsi), %eax
+; LINUX-NEXT:    movb 16(%rsi), %al
 ; LINUX-NEXT:    movb %al, 16(%rdi)
 ; LINUX-NEXT:    movq (%rsi), %rax
 ; LINUX-NEXT:    movq 8(%rsi), %rcx
@@ -487,7 +487,7 @@ define void @PR15348(ptr %a, ptr %b) {
 ;
 ; LINUX-SKL-LABEL: PR15348:
 ; LINUX-SKL:       # %bb.0:
-; LINUX-SKL-NEXT:    movzbl 16(%rsi), %eax
+; LINUX-SKL-NEXT:    movb 16(%rsi), %al
 ; LINUX-SKL-NEXT:    movb %al, 16(%rdi)
 ; LINUX-SKL-NEXT:    vmovups (%rsi), %xmm0
 ; LINUX-SKL-NEXT:    vmovups %xmm0, (%rdi)
@@ -495,7 +495,7 @@ define void @PR15348(ptr %a, ptr %b) {
 ;
 ; LINUX-SKX-LABEL: PR15348:
 ; LINUX-SKX:       # %bb.0:
-; LINUX-SKX-NEXT:    movzbl 16(%rsi), %eax
+; LINUX-SKX-NEXT:    movb 16(%rsi), %al
 ; LINUX-SKX-NEXT:    movb %al, 16(%rdi)
 ; LINUX-SKX-NEXT:    vmovups (%rsi), %xmm0
 ; LINUX-SKX-NEXT:    vmovups %xmm0, (%rdi)
@@ -503,7 +503,7 @@ define void @PR15348(ptr %a, ptr %b) {
 ;
 ; LINUX-KNL-LABEL: PR15348:
 ; LINUX-KNL:       # %bb.0:
-; LINUX-KNL-NEXT:    movzbl 16(%rsi), %eax
+; LINUX-KNL-NEXT:    movb 16(%rsi), %al
 ; LINUX-KNL-NEXT:    movb %al, 16(%rdi)
 ; LINUX-KNL-NEXT:    vmovups (%rsi), %xmm0
 ; LINUX-KNL-NEXT:    vmovups %xmm0, (%rdi)
@@ -511,7 +511,7 @@ define void @PR15348(ptr %a, ptr %b) {
 ;
 ; LINUX-AVX512BW-LABEL: PR15348:
 ; LINUX-AVX512BW:       # %bb.0:
-; LINUX-AVX512BW-NEXT:    movzbl 16(%rsi), %eax
+; LINUX-AVX512BW-NEXT:    movb 16(%rsi), %al
 ; LINUX-AVX512BW-NEXT:    movb %al, 16(%rdi)
 ; LINUX-AVX512BW-NEXT:    vmovups (%rsi), %xmm0
 ; LINUX-AVX512BW-NEXT:    vmovups %xmm0, (%rdi)

diff  --git a/llvm/test/CodeGen/X86/merge-consecutive-loads-128.ll b/llvm/test/CodeGen/X86/merge-consecutive-loads-128.ll
index 1e31ee7ad6b59..955ba1e5d9388 100644
--- a/llvm/test/CodeGen/X86/merge-consecutive-loads-128.ll
+++ b/llvm/test/CodeGen/X86/merge-consecutive-loads-128.ll
@@ -893,8 +893,8 @@ define <16 x i8> @merge_16i8_i8_01u3456789ABCDuF(ptr %ptr) nounwind uwtable noin
 ; X86-SSE1-NEXT:    movl 3(%ecx), %esi
 ; X86-SSE1-NEXT:    movl 7(%ecx), %edi
 ; X86-SSE1-NEXT:    movzwl 11(%ecx), %ebx
-; X86-SSE1-NEXT:    movzbl 13(%ecx), %edx
-; X86-SSE1-NEXT:    movzbl 15(%ecx), %ecx
+; X86-SSE1-NEXT:    movb 13(%ecx), %dl
+; X86-SSE1-NEXT:    movb 15(%ecx), %cl
 ; X86-SSE1-NEXT:    movb %dl, 13(%eax)
 ; X86-SSE1-NEXT:    movb %cl, 15(%eax)
 ; X86-SSE1-NEXT:    movw %bx, 11(%eax)
@@ -976,7 +976,7 @@ define <16 x i8> @merge_16i8_i8_01u3uuzzuuuuuzzz(ptr %ptr) nounwind uwtable noin
 ; X86-SSE1-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; X86-SSE1-NEXT:    movl {{[0-9]+}}(%esp), %ecx
 ; X86-SSE1-NEXT:    movzwl (%ecx), %edx
-; X86-SSE1-NEXT:    movzbl 3(%ecx), %ecx
+; X86-SSE1-NEXT:    movb 3(%ecx), %cl
 ; X86-SSE1-NEXT:    movb %cl, 3(%eax)
 ; X86-SSE1-NEXT:    movw %dx, (%eax)
 ; X86-SSE1-NEXT:    movb $0, 15(%eax)

diff  --git a/llvm/test/CodeGen/X86/merge-store-partially-alias-loads.ll b/llvm/test/CodeGen/X86/merge-store-partially-alias-loads.ll
index d48c59a89c3d7..67b399b36220a 100644
--- a/llvm/test/CodeGen/X86/merge-store-partially-alias-loads.ll
+++ b/llvm/test/CodeGen/X86/merge-store-partially-alias-loads.ll
@@ -7,10 +7,10 @@
 
 ; X86-LABEL: {{^}}merge_store_partial_overlap_load:
 ; X86-DAG: movzwl ([[BASEREG:%[a-z]+]]), %e[[LO2:[a-z]+]]
-; X86-DAG: movzbl 2([[BASEREG]]), %e[[HI1:[a-z]]]
+; X86-DAG: movb 2([[BASEREG]]), [[HI1:%[a-z]+]]
 
 ; X86-NEXT: movw %[[LO2]], 1([[BASEREG]])
-; X86-NEXT: movb %[[HI1]]l, 3([[BASEREG]])
+; X86-NEXT: movb [[HI1]], 3([[BASEREG]])
 ; X86-NEXT: retq
 
 ; DBGDAG-LABEL: Optimized legalized selection DAG: %bb.0 'merge_store_partial_overlap_load:'

diff  --git a/llvm/test/CodeGen/X86/midpoint-int.ll b/llvm/test/CodeGen/X86/midpoint-int.ll
index 40947c27a5eb6..f840966e71546 100644
--- a/llvm/test/CodeGen/X86/midpoint-int.ll
+++ b/llvm/test/CodeGen/X86/midpoint-int.ll
@@ -967,8 +967,8 @@ define i8 @scalar_i8_signed_reg_reg(i8 %a1, i8 %a2) nounwind {
 ;
 ; X86-LABEL: scalar_i8_signed_reg_reg:
 ; X86:       # %bb.0:
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; X86-NEXT:    cmpb %al, %cl
 ; X86-NEXT:    setle %dl
 ; X86-NEXT:    jg .LBB15_1
@@ -1017,8 +1017,8 @@ define i8 @scalar_i8_unsigned_reg_reg(i8 %a1, i8 %a2) nounwind {
 ;
 ; X86-LABEL: scalar_i8_unsigned_reg_reg:
 ; X86:       # %bb.0:
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; X86-NEXT:    cmpb %al, %cl
 ; X86-NEXT:    setbe %dl
 ; X86-NEXT:    ja .LBB16_1
@@ -1070,9 +1070,9 @@ define i8 @scalar_i8_signed_mem_reg(ptr %a1_addr, i8 %a2) nounwind {
 ;
 ; X86-LABEL: scalar_i8_signed_mem_reg:
 ; X86:       # %bb.0:
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT:    movzbl (%ecx), %ecx
+; X86-NEXT:    movb (%ecx), %cl
 ; X86-NEXT:    cmpb %al, %cl
 ; X86-NEXT:    setle %dl
 ; X86-NEXT:    jg .LBB17_1
@@ -1122,9 +1122,9 @@ define i8 @scalar_i8_signed_reg_mem(i8 %a1, ptr %a2_addr) nounwind {
 ;
 ; X86-LABEL: scalar_i8_signed_reg_mem:
 ; X86:       # %bb.0:
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT:    movzbl (%eax), %eax
+; X86-NEXT:    movb (%eax), %al
 ; X86-NEXT:    cmpb %al, %cl
 ; X86-NEXT:    setle %dl
 ; X86-NEXT:    jg .LBB18_1
@@ -1177,8 +1177,8 @@ define i8 @scalar_i8_signed_mem_mem(ptr %a1_addr, ptr %a2_addr) nounwind {
 ; X86:       # %bb.0:
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT:    movzbl (%ecx), %ecx
-; X86-NEXT:    movzbl (%eax), %eax
+; X86-NEXT:    movb (%ecx), %cl
+; X86-NEXT:    movb (%eax), %al
 ; X86-NEXT:    cmpb %al, %cl
 ; X86-NEXT:    setle %dl
 ; X86-NEXT:    jg .LBB19_1

diff  --git a/llvm/test/CodeGen/X86/misched_phys_reg_assign_order.ll b/llvm/test/CodeGen/X86/misched_phys_reg_assign_order.ll
index 2e2ba29da8646..d15243024c1df 100644
--- a/llvm/test/CodeGen/X86/misched_phys_reg_assign_order.ll
+++ b/llvm/test/CodeGen/X86/misched_phys_reg_assign_order.ll
@@ -19,7 +19,7 @@ define void @g() #0 {
 ; CHECK-NEXT:    .cfi_offset %esi, -16
 ; CHECK-NEXT:    .cfi_offset %ebx, -12
 ; CHECK-NEXT:    movl f, %esi
-; CHECK-NEXT:    movzbl (%esi), %eax
+; CHECK-NEXT:    movb (%esi), %al
 ; CHECK-NEXT:    movb %al, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill
 ; CHECK-NEXT:    xorl %eax, %eax
 ; CHECK-NEXT:    xorl %edx, %edx

diff  --git a/llvm/test/CodeGen/X86/movmsk-cmp.ll b/llvm/test/CodeGen/X86/movmsk-cmp.ll
index 9ed2d40f3b54d..8ed51fde6cbde 100644
--- a/llvm/test/CodeGen/X86/movmsk-cmp.ll
+++ b/llvm/test/CodeGen/X86/movmsk-cmp.ll
@@ -4293,7 +4293,7 @@ define i1 @movmsk_v16i8_var(<16 x i8> %x, <16 x i8> %y, i32 %z) {
 ; SKX-NEXT:    vpmovm2b %k0, %xmm0
 ; SKX-NEXT:    vmovdqa %xmm0, -{{[0-9]+}}(%rsp)
 ; SKX-NEXT:    andl $15, %edi
-; SKX-NEXT:    movzbl -24(%rsp,%rdi), %eax
+; SKX-NEXT:    movb -24(%rsp,%rdi), %al
 ; SKX-NEXT:    retq
   %cmp = icmp eq <16 x i8> %x, %y
   %val = extractelement <16 x i1> %cmp, i32 %z
@@ -4329,7 +4329,7 @@ define i1 @movmsk_v8i16_var(<8 x i16> %x, <8 x i16> %y, i32 %z) {
 ; KNL-NEXT:    vpmovdw %zmm0, %ymm0
 ; KNL-NEXT:    vmovdqa %xmm0, -{{[0-9]+}}(%rsp)
 ; KNL-NEXT:    andl $7, %edi
-; KNL-NEXT:    movzbl -24(%rsp,%rdi,2), %eax
+; KNL-NEXT:    movb -24(%rsp,%rdi,2), %al
 ; KNL-NEXT:    vzeroupper
 ; KNL-NEXT:    retq
 ;
@@ -4340,7 +4340,7 @@ define i1 @movmsk_v8i16_var(<8 x i16> %x, <8 x i16> %y, i32 %z) {
 ; SKX-NEXT:    vpmovm2w %k0, %xmm0
 ; SKX-NEXT:    vmovdqa %xmm0, -{{[0-9]+}}(%rsp)
 ; SKX-NEXT:    andl $7, %edi
-; SKX-NEXT:    movzbl -24(%rsp,%rdi,2), %eax
+; SKX-NEXT:    movb -24(%rsp,%rdi,2), %al
 ; SKX-NEXT:    retq
   %cmp = icmp sgt <8 x i16> %x, %y
   %val = extractelement <8 x i1> %cmp, i32 %z
@@ -4373,7 +4373,7 @@ define i1 @movmsk_v4i32_var(<4 x i32> %x, <4 x i32> %y, i32 %z) {
 ; KNL-NEXT:    vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; KNL-NEXT:    vmovdqa %xmm0, -{{[0-9]+}}(%rsp)
 ; KNL-NEXT:    andl $3, %edi
-; KNL-NEXT:    movzbl -24(%rsp,%rdi,4), %eax
+; KNL-NEXT:    movb -24(%rsp,%rdi,4), %al
 ; KNL-NEXT:    vzeroupper
 ; KNL-NEXT:    retq
 ;
@@ -4384,7 +4384,7 @@ define i1 @movmsk_v4i32_var(<4 x i32> %x, <4 x i32> %y, i32 %z) {
 ; SKX-NEXT:    vpmovm2d %k0, %xmm0
 ; SKX-NEXT:    vmovdqa %xmm0, -{{[0-9]+}}(%rsp)
 ; SKX-NEXT:    andl $3, %edi
-; SKX-NEXT:    movzbl -24(%rsp,%rdi,4), %eax
+; SKX-NEXT:    movb -24(%rsp,%rdi,4), %al
 ; SKX-NEXT:    retq
   %cmp = icmp slt <4 x i32> %x, %y
   %val = extractelement <4 x i1> %cmp, i32 %z
@@ -4430,7 +4430,7 @@ define i1 @movmsk_v2i64_var(<2 x i64> %x, <2 x i64> %y, i32 %z) {
 ; KNL-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; KNL-NEXT:    vmovdqa %xmm0, -{{[0-9]+}}(%rsp)
 ; KNL-NEXT:    andl $1, %edi
-; KNL-NEXT:    movzbl -24(%rsp,%rdi,8), %eax
+; KNL-NEXT:    movb -24(%rsp,%rdi,8), %al
 ; KNL-NEXT:    vzeroupper
 ; KNL-NEXT:    retq
 ;
@@ -4441,7 +4441,7 @@ define i1 @movmsk_v2i64_var(<2 x i64> %x, <2 x i64> %y, i32 %z) {
 ; SKX-NEXT:    vpmovm2q %k0, %xmm0
 ; SKX-NEXT:    vmovdqa %xmm0, -{{[0-9]+}}(%rsp)
 ; SKX-NEXT:    andl $1, %edi
-; SKX-NEXT:    movzbl -24(%rsp,%rdi,8), %eax
+; SKX-NEXT:    movb -24(%rsp,%rdi,8), %al
 ; SKX-NEXT:    retq
   %cmp = icmp ne <2 x i64> %x, %y
   %val = extractelement <2 x i1> %cmp, i32 %z
@@ -4477,7 +4477,7 @@ define i1 @movmsk_v4f32_var(<4 x float> %x, <4 x float> %y, i32 %z) {
 ; KNL-NEXT:    vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; KNL-NEXT:    vmovdqa %xmm0, -{{[0-9]+}}(%rsp)
 ; KNL-NEXT:    andl $3, %edi
-; KNL-NEXT:    movzbl -24(%rsp,%rdi,4), %eax
+; KNL-NEXT:    movb -24(%rsp,%rdi,4), %al
 ; KNL-NEXT:    vzeroupper
 ; KNL-NEXT:    retq
 ;
@@ -4488,7 +4488,7 @@ define i1 @movmsk_v4f32_var(<4 x float> %x, <4 x float> %y, i32 %z) {
 ; SKX-NEXT:    vpmovm2d %k0, %xmm0
 ; SKX-NEXT:    vmovdqa %xmm0, -{{[0-9]+}}(%rsp)
 ; SKX-NEXT:    andl $3, %edi
-; SKX-NEXT:    movzbl -24(%rsp,%rdi,4), %eax
+; SKX-NEXT:    movb -24(%rsp,%rdi,4), %al
 ; SKX-NEXT:    retq
   %cmp = fcmp ueq <4 x float> %x, %y
   %val = extractelement <4 x i1> %cmp, i32 %z
@@ -4521,7 +4521,7 @@ define i1 @movmsk_v2f64_var(<2 x double> %x, <2 x double> %y, i32 %z) {
 ; KNL-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; KNL-NEXT:    vmovdqa %xmm0, -{{[0-9]+}}(%rsp)
 ; KNL-NEXT:    andl $1, %edi
-; KNL-NEXT:    movzbl -24(%rsp,%rdi,8), %eax
+; KNL-NEXT:    movb -24(%rsp,%rdi,8), %al
 ; KNL-NEXT:    vzeroupper
 ; KNL-NEXT:    retq
 ;
@@ -4532,7 +4532,7 @@ define i1 @movmsk_v2f64_var(<2 x double> %x, <2 x double> %y, i32 %z) {
 ; SKX-NEXT:    vpmovm2q %k0, %xmm0
 ; SKX-NEXT:    vmovdqa %xmm0, -{{[0-9]+}}(%rsp)
 ; SKX-NEXT:    andl $1, %edi
-; SKX-NEXT:    movzbl -24(%rsp,%rdi,8), %eax
+; SKX-NEXT:    movb -24(%rsp,%rdi,8), %al
 ; SKX-NEXT:    retq
   %cmp = fcmp oge <2 x double> %x, %y
   %val = extractelement <2 x i1> %cmp, i32 %z

diff  --git a/llvm/test/CodeGen/X86/musttail-varargs.ll b/llvm/test/CodeGen/X86/musttail-varargs.ll
index e722ab33894e2..48b32a4250b40 100644
--- a/llvm/test/CodeGen/X86/musttail-varargs.ll
+++ b/llvm/test/CodeGen/X86/musttail-varargs.ll
@@ -82,7 +82,7 @@ define void @f_thunk(ptr %this, ...) {
 ; LINUX-NEXT:    movq %rbp, %rdx
 ; LINUX-NEXT:    movq %r13, %rcx
 ; LINUX-NEXT:    movq %r12, %r8
-; LINUX-NEXT:    movzbl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 1-byte Folded Reload
+; LINUX-NEXT:    movb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Reload
 ; LINUX-NEXT:    movq %r15, %r9
 ; LINUX-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
 ; LINUX-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload
@@ -175,7 +175,7 @@ define void @f_thunk(ptr %this, ...) {
 ; LINUX-X32-NEXT:    movq %rbp, %rdx
 ; LINUX-X32-NEXT:    movq %r13, %rcx
 ; LINUX-X32-NEXT:    movq %r12, %r8
-; LINUX-X32-NEXT:    movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload
+; LINUX-X32-NEXT:    movb {{[-0-9]+}}(%e{{[sb]}}p), %al # 1-byte Reload
 ; LINUX-X32-NEXT:    movq %r15, %r9
 ; LINUX-X32-NEXT:    movaps {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
 ; LINUX-X32-NEXT:    movaps {{[-0-9]+}}(%e{{[sb]}}p), %xmm1 # 16-byte Reload

diff  --git a/llvm/test/CodeGen/X86/neg-abs.ll b/llvm/test/CodeGen/X86/neg-abs.ll
index 961205c50d976..84f82b7a694c3 100644
--- a/llvm/test/CodeGen/X86/neg-abs.ll
+++ b/llvm/test/CodeGen/X86/neg-abs.ll
@@ -12,7 +12,7 @@ declare i128 @llvm.abs.i128(i128, i1)
 define i8 @neg_abs_i8(i8 %x) nounwind {
 ; X86-LABEL: neg_abs_i8:
 ; X86:       # %bb.0:
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; X86-NEXT:    movl %ecx, %eax
 ; X86-NEXT:    sarb $7, %al
 ; X86-NEXT:    xorb %al, %cl
@@ -154,7 +154,7 @@ define i128 @neg_abs_i128(i128 %x) nounwind {
 define i8 @sub_abs_i8(i8 %x, i8 %y) nounwind {
 ; X86-LABEL: sub_abs_i8:
 ; X86:       # %bb.0:
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; X86-NEXT:    movl %ecx, %eax
 ; X86-NEXT:    sarb $7, %al
 ; X86-NEXT:    xorb %al, %cl

diff  --git a/llvm/test/CodeGen/X86/negate-i1.ll b/llvm/test/CodeGen/X86/negate-i1.ll
index 0ed3fa94f5b8f..2ed6c9535d0ca 100644
--- a/llvm/test/CodeGen/X86/negate-i1.ll
+++ b/llvm/test/CodeGen/X86/negate-i1.ll
@@ -13,7 +13,7 @@ define i8 @select_i8_neg1_or_0(i1 %a) {
 ;
 ; X32-LABEL: select_i8_neg1_or_0:
 ; X32:       # %bb.0:
-; X32-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X32-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X32-NEXT:    andb $1, %al
 ; X32-NEXT:    negb %al
 ; X32-NEXT:    retl
@@ -31,7 +31,7 @@ define i8 @select_i8_neg1_or_0_zeroext(i1 zeroext %a) {
 ;
 ; X32-LABEL: select_i8_neg1_or_0_zeroext:
 ; X32:       # %bb.0:
-; X32-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X32-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X32-NEXT:    negb %al
 ; X32-NEXT:    retl
   %b = sext i1 %a to i8

diff  --git a/llvm/test/CodeGen/X86/oddshuffles.ll b/llvm/test/CodeGen/X86/oddshuffles.ll
index 0e5539449b916..8ec463a767228 100644
--- a/llvm/test/CodeGen/X86/oddshuffles.ll
+++ b/llvm/test/CodeGen/X86/oddshuffles.ll
@@ -237,7 +237,7 @@ define void @v7i8(<4 x i8> %a, <4 x i8> %b, ptr %p) nounwind {
 ; SSE2-NEXT:    pand %xmm2, %xmm1
 ; SSE2-NEXT:    pandn %xmm0, %xmm2
 ; SSE2-NEXT:    por %xmm1, %xmm2
-; SSE2-NEXT:    movzbl -{{[0-9]+}}(%rsp), %eax
+; SSE2-NEXT:    movb -{{[0-9]+}}(%rsp), %al
 ; SSE2-NEXT:    movb %al, 6(%rdi)
 ; SSE2-NEXT:    movd %xmm2, (%rdi)
 ; SSE2-NEXT:    pextrw $2, %xmm2, %eax

diff  --git a/llvm/test/CodeGen/X86/or-with-overflow.ll b/llvm/test/CodeGen/X86/or-with-overflow.ll
index faee83a988c58..495da7e88b773 100644
--- a/llvm/test/CodeGen/X86/or-with-overflow.ll
+++ b/llvm/test/CodeGen/X86/or-with-overflow.ll
@@ -9,7 +9,7 @@
 define i8 @or_i8_ri(i8 zeroext %0, i8 zeroext %1) {
 ; X86-LABEL: or_i8_ri:
 ; X86:       # %bb.0:
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    movl %eax, %ecx
 ; X86-NEXT:    orb $-17, %cl
 ; X86-NEXT:    je .LBB0_2
@@ -35,8 +35,8 @@ define i8 @or_i8_ri(i8 zeroext %0, i8 zeroext %1) {
 define i8 @or_i8_rr(i8 zeroext %0, i8 zeroext %1) {
 ; X86-LABEL: or_i8_rr:
 ; X86:       # %bb.0:
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; X86-NEXT:    orb %al, %cl
 ; X86-NEXT:    je .LBB1_2
 ; X86-NEXT:  # %bb.1:

diff  --git a/llvm/test/CodeGen/X86/packed_struct.ll b/llvm/test/CodeGen/X86/packed_struct.ll
index af9f31b717084..69e3a6e970d37 100644
--- a/llvm/test/CodeGen/X86/packed_struct.ll
+++ b/llvm/test/CodeGen/X86/packed_struct.ll
@@ -30,7 +30,7 @@ entry:
 define i8 @bar() nounwind {
 ; CHECK-LABEL: bar:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    movzbl bara+19, %eax
+; CHECK-NEXT:    movb bara+19, %al
 ; CHECK-NEXT:    addb bara+4, %al
 ; CHECK-NEXT:    retl
 entry:

diff  --git a/llvm/test/CodeGen/X86/peephole-na-phys-copy-folding.ll b/llvm/test/CodeGen/X86/peephole-na-phys-copy-folding.ll
index f3d4b6221d085..a632a17e90a29 100644
--- a/llvm/test/CodeGen/X86/peephole-na-phys-copy-folding.ll
+++ b/llvm/test/CodeGen/X86/peephole-na-phys-copy-folding.ll
@@ -14,7 +14,7 @@ declare i32 @bar(i64)
 define i1 @plus_one() nounwind {
 ; CHECK32-LABEL: plus_one:
 ; CHECK32:       # %bb.0: # %entry
-; CHECK32-NEXT:    movzbl M, %eax
+; CHECK32-NEXT:    movb M, %al
 ; CHECK32-NEXT:    incl L
 ; CHECK32-NEXT:    jne .LBB0_2
 ; CHECK32-NEXT:  # %bb.1: # %entry
@@ -29,7 +29,7 @@ define i1 @plus_one() nounwind {
 ;
 ; CHECK64-LABEL: plus_one:
 ; CHECK64:       # %bb.0: # %entry
-; CHECK64-NEXT:    movzbl M(%rip), %eax
+; CHECK64-NEXT:    movb M(%rip), %al
 ; CHECK64-NEXT:    incl L(%rip)
 ; CHECK64-NEXT:    jne .LBB0_2
 ; CHECK64-NEXT:  # %bb.1: # %entry
@@ -62,7 +62,7 @@ exit2:
 define i1 @plus_forty_two() nounwind {
 ; CHECK32-LABEL: plus_forty_two:
 ; CHECK32:       # %bb.0: # %entry
-; CHECK32-NEXT:    movzbl M, %eax
+; CHECK32-NEXT:    movb M, %al
 ; CHECK32-NEXT:    addl $42, L
 ; CHECK32-NEXT:    jne .LBB1_2
 ; CHECK32-NEXT:  # %bb.1: # %entry
@@ -77,7 +77,7 @@ define i1 @plus_forty_two() nounwind {
 ;
 ; CHECK64-LABEL: plus_forty_two:
 ; CHECK64:       # %bb.0: # %entry
-; CHECK64-NEXT:    movzbl M(%rip), %eax
+; CHECK64-NEXT:    movb M(%rip), %al
 ; CHECK64-NEXT:    addl $42, L(%rip)
 ; CHECK64-NEXT:    jne .LBB1_2
 ; CHECK64-NEXT:  # %bb.1: # %entry
@@ -110,7 +110,7 @@ exit2:
 define i1 @minus_one() nounwind {
 ; CHECK32-LABEL: minus_one:
 ; CHECK32:       # %bb.0: # %entry
-; CHECK32-NEXT:    movzbl M, %eax
+; CHECK32-NEXT:    movb M, %al
 ; CHECK32-NEXT:    decl L
 ; CHECK32-NEXT:    jne .LBB2_2
 ; CHECK32-NEXT:  # %bb.1: # %entry
@@ -125,7 +125,7 @@ define i1 @minus_one() nounwind {
 ;
 ; CHECK64-LABEL: minus_one:
 ; CHECK64:       # %bb.0: # %entry
-; CHECK64-NEXT:    movzbl M(%rip), %eax
+; CHECK64-NEXT:    movb M(%rip), %al
 ; CHECK64-NEXT:    decl L(%rip)
 ; CHECK64-NEXT:    jne .LBB2_2
 ; CHECK64-NEXT:  # %bb.1: # %entry
@@ -158,7 +158,7 @@ exit2:
 define i1 @minus_forty_two() nounwind {
 ; CHECK32-LABEL: minus_forty_two:
 ; CHECK32:       # %bb.0: # %entry
-; CHECK32-NEXT:    movzbl M, %eax
+; CHECK32-NEXT:    movb M, %al
 ; CHECK32-NEXT:    addl $-42, L
 ; CHECK32-NEXT:    jne .LBB3_2
 ; CHECK32-NEXT:  # %bb.1: # %entry
@@ -173,7 +173,7 @@ define i1 @minus_forty_two() nounwind {
 ;
 ; CHECK64-LABEL: minus_forty_two:
 ; CHECK64:       # %bb.0: # %entry
-; CHECK64-NEXT:    movzbl M(%rip), %eax
+; CHECK64-NEXT:    movb M(%rip), %al
 ; CHECK64-NEXT:    addl $-42, L(%rip)
 ; CHECK64-NEXT:    jne .LBB3_2
 ; CHECK64-NEXT:  # %bb.1: # %entry

diff  --git a/llvm/test/CodeGen/X86/popcnt.ll b/llvm/test/CodeGen/X86/popcnt.ll
index 78012b3d514e1..de1cb22fd402b 100644
--- a/llvm/test/CodeGen/X86/popcnt.ll
+++ b/llvm/test/CodeGen/X86/popcnt.ll
@@ -9,7 +9,7 @@
 define i8 @cnt8(i8 %x) nounwind readnone {
 ; X86-LABEL: cnt8:
 ; X86:       # %bb.0:
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; X86-NEXT:    movl %ecx, %eax
 ; X86-NEXT:    shrb %al
 ; X86-NEXT:    andb $85, %al

diff  --git a/llvm/test/CodeGen/X86/pr12360.ll b/llvm/test/CodeGen/X86/pr12360.ll
index e0defce3acafa..d3eae92d73b3c 100644
--- a/llvm/test/CodeGen/X86/pr12360.ll
+++ b/llvm/test/CodeGen/X86/pr12360.ll
@@ -4,7 +4,7 @@
 define zeroext i1 @f1(ptr %x) {
 ; CHECK-LABEL: f1:
 ; CHECK:       ## %bb.0: ## %entry
-; CHECK-NEXT:    movzbl (%rdi), %eax
+; CHECK-NEXT:    movb (%rdi), %al
 ; CHECK-NEXT:    retq
 
 entry:
@@ -16,7 +16,7 @@ entry:
 define zeroext i1 @f2(ptr %x) {
 ; CHECK-LABEL: f2:
 ; CHECK:       ## %bb.0: ## %entry
-; CHECK-NEXT:    movzbl (%rdi), %eax
+; CHECK-NEXT:    movb (%rdi), %al
 ; CHECK-NEXT:    retq
 
 entry:

diff  --git a/llvm/test/CodeGen/X86/pr15267.ll b/llvm/test/CodeGen/X86/pr15267.ll
index 38107284509e0..11cd6828b11b0 100644
--- a/llvm/test/CodeGen/X86/pr15267.ll
+++ b/llvm/test/CodeGen/X86/pr15267.ll
@@ -27,7 +27,7 @@ define <4 x i3> @test1(ptr %in) nounwind {
 define <4 x i1> @test2(ptr %in) nounwind {
 ; CHECK-LABEL: test2:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    movzbl (%rdi), %eax
+; CHECK-NEXT:    movb (%rdi), %al
 ; CHECK-NEXT:    movl %eax, %ecx
 ; CHECK-NEXT:    shrb %cl
 ; CHECK-NEXT:    andb $1, %cl
@@ -53,7 +53,7 @@ define <4 x i1> @test2(ptr %in) nounwind {
 define <4 x i64> @test3(ptr %in) nounwind {
 ; CHECK-LABEL: test3:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    movzbl (%rdi), %eax
+; CHECK-NEXT:    movb (%rdi), %al
 ; CHECK-NEXT:    movzbl %al, %ecx
 ; CHECK-NEXT:    shrb %al
 ; CHECK-NEXT:    movzbl %al, %eax

diff  --git a/llvm/test/CodeGen/X86/pr20011.ll b/llvm/test/CodeGen/X86/pr20011.ll
index 4810226b4a756..077dc40294e5e 100644
--- a/llvm/test/CodeGen/X86/pr20011.ll
+++ b/llvm/test/CodeGen/X86/pr20011.ll
@@ -8,8 +8,8 @@ define void @crash(i64 %x0, i64 %y0, ptr nocapture %dest) nounwind {
 ; X86-LABEL: crash:
 ; X86:       # %bb.0:
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %edx
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %cl
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %dl
 ; X86-NEXT:    shlb $2, %dl
 ; X86-NEXT:    andb $3, %cl
 ; X86-NEXT:    orb %dl, %cl

diff  --git a/llvm/test/CodeGen/X86/pr22473.ll b/llvm/test/CodeGen/X86/pr22473.ll
index 78f5ad8909640..25545c48b5cd2 100644
--- a/llvm/test/CodeGen/X86/pr22473.ll
+++ b/llvm/test/CodeGen/X86/pr22473.ll
@@ -6,7 +6,7 @@ define zeroext i1 @PR22473(ptr, i8) {
 ; X86-LABEL: PR22473:
 ; X86:       # %bb.0:
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT:    movzbl (%eax), %eax
+; X86-NEXT:    movb (%eax), %al
 ; X86-NEXT:    cmpb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    sete %al
 ; X86-NEXT:    retl

diff  --git a/llvm/test/CodeGen/X86/pr28824.ll b/llvm/test/CodeGen/X86/pr28824.ll
index 274689527a243..30c67ff98f4ee 100644
--- a/llvm/test/CodeGen/X86/pr28824.ll
+++ b/llvm/test/CodeGen/X86/pr28824.ll
@@ -1,33 +1,14 @@
-; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc < %s -mtriple=i386-unknown-linux-gnu | FileCheck %s
 
 @d = global i32 0, align 4
 
 ; Verify the sar happens before ecx is clobbered with the parameter being
 ; passed to fn3
-
+; CHECK-LABEL: fn4
+; CHECK: movb d, %cl
+; CHECK: sarl %cl
+; CHECK: movl $2, %ecx
 define i32 @fn4(i32 %i) #0 {
-; CHECK-LABEL: fn4:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    pushl %esi
-; CHECK-NEXT:    subl $8, %esp
-; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; CHECK-NEXT:    movzbl d, %ecx
-; CHECK-NEXT:    movl %eax, %esi
-; CHECK-NEXT:    sarl %cl, %esi
-; CHECK-NEXT:    subl $8, %esp
-; CHECK-NEXT:    movl $2, %ecx
-; CHECK-NEXT:    movl $5, %edx
-; CHECK-NEXT:    pushl %eax
-; CHECK-NEXT:    pushl %esi
-; CHECK-NEXT:    calll fn3 at PLT
-; CHECK-NEXT:    addl $16, %esp
-; CHECK-NEXT:    xorl %eax, %eax
-; CHECK-NEXT:    testl %esi, %esi
-; CHECK-NEXT:    setle %al
-; CHECK-NEXT:    addl $8, %esp
-; CHECK-NEXT:    popl %esi
-; CHECK-NEXT:    retl
 entry:
   %0 = load i32, ptr @d, align 4
   %shr = ashr i32 %i, %0

diff  --git a/llvm/test/CodeGen/X86/pr32345.ll b/llvm/test/CodeGen/X86/pr32345.ll
index 2745cb8bb908b..511672a6f2c5d 100644
--- a/llvm/test/CodeGen/X86/pr32345.ll
+++ b/llvm/test/CodeGen/X86/pr32345.ll
@@ -72,7 +72,7 @@ define void @foo() {
 ;
 ; X64-LABEL: foo:
 ; X64:       # %bb.0: # %bb
-; X64-NEXT:    movzbl var_27(%rip), %ecx
+; X64-NEXT:    movb var_27(%rip), %cl
 ; X64-NEXT:    movzwl var_22(%rip), %eax
 ; X64-NEXT:    movq %rax, -{{[0-9]+}}(%rsp)
 ; X64-NEXT:    addb $30, %cl
@@ -89,7 +89,7 @@ define void @foo() {
 ; 686-NEXT:    .cfi_def_cfa_register %ebp
 ; 686-NEXT:    andl $-8, %esp
 ; 686-NEXT:    subl $8, %esp
-; 686-NEXT:    movzbl var_27, %ecx
+; 686-NEXT:    movb var_27, %cl
 ; 686-NEXT:    movzwl var_22, %eax
 ; 686-NEXT:    movl %eax, (%esp)
 ; 686-NEXT:    movl $0, {{[0-9]+}}(%esp)

diff  --git a/llvm/test/CodeGen/X86/pr34292.ll b/llvm/test/CodeGen/X86/pr34292.ll
index b9cc84c338efb..effc9a556a7cc 100644
--- a/llvm/test/CodeGen/X86/pr34292.ll
+++ b/llvm/test/CodeGen/X86/pr34292.ll
@@ -7,7 +7,7 @@
 define void @sum_unroll(ptr nocapture readonly, ptr nocapture) {
 ; CHECK-LABEL: sum_unroll:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    movzbl _ZL1c(%rip), %eax
+; CHECK-NEXT:    movb _ZL1c(%rip), %al
 ; CHECK-NEXT:    movq (%rdi), %rcx
 ; CHECK-NEXT:    addb $-1, %al
 ; CHECK-NEXT:    adcq %rcx, (%rsi)

diff  --git a/llvm/test/CodeGen/X86/pr34381.ll b/llvm/test/CodeGen/X86/pr34381.ll
index 32d8b74c6667f..c2b25b43cdf92 100644
--- a/llvm/test/CodeGen/X86/pr34381.ll
+++ b/llvm/test/CodeGen/X86/pr34381.ll
@@ -17,7 +17,7 @@ define void @_Z3foov() {
 ; CHECK-NEXT:    cmpl %eax, var_21(%rip)
 ; CHECK-NEXT:    setb %cl
 ; CHECK-NEXT:    movl %ecx, var_390(%rip)
-; CHECK-NEXT:    movzbl var_11(%rip), %eax
+; CHECK-NEXT:    movb var_11(%rip), %al
 ; CHECK-NEXT:    movb %al, var_370(%rip)
 ; CHECK-NEXT:    retq
 entry:

diff  --git a/llvm/test/CodeGen/X86/pr35765.ll b/llvm/test/CodeGen/X86/pr35765.ll
index 81d1fbe9d642b..5d15c2db3e629 100644
--- a/llvm/test/CodeGen/X86/pr35765.ll
+++ b/llvm/test/CodeGen/X86/pr35765.ll
@@ -9,7 +9,7 @@
 define dso_local void @PR35765() {
 ; CHECK-LABEL: PR35765:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    movzbl s1(%rip), %ecx
+; CHECK-NEXT:    movb s1(%rip), %cl
 ; CHECK-NEXT:    addb $-118, %cl
 ; CHECK-NEXT:    movl $4, %eax
 ; CHECK-NEXT:    shll %cl, %eax

diff  --git a/llvm/test/CodeGen/X86/pr38539.ll b/llvm/test/CodeGen/X86/pr38539.ll
index 8736d8e91e768..094767d2316b8 100644
--- a/llvm/test/CodeGen/X86/pr38539.ll
+++ b/llvm/test/CodeGen/X86/pr38539.ll
@@ -6,7 +6,7 @@
 define void @f() {
 ; X64-LABEL: f:
 ; X64:       # %bb.0: # %BB
-; X64-NEXT:    movzbl (%rax), %eax
+; X64-NEXT:    movb (%rax), %al
 ; X64-NEXT:    cmpb $0, (%rax)
 ; X64-NEXT:    setne (%rax)
 ; X64-NEXT:    leaq -{{[0-9]+}}(%rsp), %rax
@@ -23,7 +23,7 @@ define void @f() {
 ; X86-NEXT:    .cfi_def_cfa_register %ebp
 ; X86-NEXT:    andl $-8, %esp
 ; X86-NEXT:    subl $16, %esp
-; X86-NEXT:    movzbl (%eax), %eax
+; X86-NEXT:    movb (%eax), %al
 ; X86-NEXT:    cmpb $0, (%eax)
 ; X86-NEXT:    setne (%eax)
 ; X86-NEXT:    leal -{{[0-9]+}}(%esp), %eax
@@ -61,7 +61,7 @@ BB:
 define void @g() {
 ; X64-LABEL: g:
 ; X64:       # %bb.0: # %BB
-; X64-NEXT:    movzbl (%rax), %eax
+; X64-NEXT:    movb (%rax), %al
 ; X64-NEXT:    cmpb $0, (%rax)
 ; X64-NEXT:    setne (%rax)
 ; X64-NEXT:    leaq -{{[0-9]+}}(%rsp), %rax
@@ -78,7 +78,7 @@ define void @g() {
 ; X86-NEXT:    .cfi_def_cfa_register %ebp
 ; X86-NEXT:    andl $-8, %esp
 ; X86-NEXT:    subl $8, %esp
-; X86-NEXT:    movzbl (%eax), %eax
+; X86-NEXT:    movb (%eax), %al
 ; X86-NEXT:    cmpb $0, (%eax)
 ; X86-NEXT:    setne (%eax)
 ; X86-NEXT:    leal -{{[0-9]+}}(%esp), %eax

diff  --git a/llvm/test/CodeGen/X86/pr38743.ll b/llvm/test/CodeGen/X86/pr38743.ll
index c05310090660d..fff34a8ec4f54 100644
--- a/llvm/test/CodeGen/X86/pr38743.ll
+++ b/llvm/test/CodeGen/X86/pr38743.ll
@@ -40,11 +40,11 @@ define void @pr38743(i32 %a0) #1 align 2 {
 ; CHECK-NEXT:    movq %rax, -{{[0-9]+}}(%rsp)
 ; CHECK-NEXT:    movq -{{[0-9]+}}(%rsp), %rax
 ; CHECK-NEXT:    movq %rax, (%rax)
-; CHECK-NEXT:    movzbl -{{[0-9]+}}(%rsp), %eax
+; CHECK-NEXT:    movb -{{[0-9]+}}(%rsp), %al
 ; CHECK-NEXT:    movq -{{[0-9]+}}(%rsp), %rcx
 ; CHECK-NEXT:    movzwl -{{[0-9]+}}(%rsp), %edx
 ; CHECK-NEXT:    movl -{{[0-9]+}}(%rsp), %esi
-; CHECK-NEXT:    movzbl -{{[0-9]+}}(%rsp), %edi
+; CHECK-NEXT:    movb -{{[0-9]+}}(%rsp), %dil
 ; CHECK-NEXT:    movb %al, (%rax)
 ; CHECK-NEXT:    movq %rcx, 1(%rax)
 ; CHECK-NEXT:    movw %dx, 9(%rax)

diff  --git a/llvm/test/CodeGen/X86/pr38795.ll b/llvm/test/CodeGen/X86/pr38795.ll
index b421cc471fbfc..37f0cf886416a 100644
--- a/llvm/test/CodeGen/X86/pr38795.ll
+++ b/llvm/test/CodeGen/X86/pr38795.ll
@@ -51,7 +51,7 @@ define dso_local void @fn() {
 ; CHECK-NEXT:    movl %ecx, %eax
 ; CHECK-NEXT:    cltd
 ; CHECK-NEXT:    idivl a
-; CHECK-NEXT:    movzbl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 1-byte Folded Reload
+; CHECK-NEXT:    movb {{[-0-9]+}}(%e{{[sb]}}p), %dl # 1-byte Reload
 ; CHECK-NEXT:    movb %cl, %dh
 ; CHECK-NEXT:    movl $0, h
 ; CHECK-NEXT:    cmpb $8, %dl
@@ -76,7 +76,7 @@ define dso_local void @fn() {
 ; CHECK-NEXT:    # in Loop: Header=BB0_1 Depth=1
 ; CHECK-NEXT:    movl $.str, (%esp)
 ; CHECK-NEXT:    calll printf
-; CHECK-NEXT:    movzbl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 1-byte Folded Reload
+; CHECK-NEXT:    movb {{[-0-9]+}}(%e{{[sb]}}p), %dl # 1-byte Reload
 ; CHECK-NEXT:    # implicit-def: $eax
 ; CHECK-NEXT:  .LBB0_6: # %for.cond35
 ; CHECK-NEXT:    # in Loop: Header=BB0_1 Depth=1

diff  --git a/llvm/test/CodeGen/X86/pr39926.ll b/llvm/test/CodeGen/X86/pr39926.ll
index 439ee5784416c..edaacafd3f4cf 100644
--- a/llvm/test/CodeGen/X86/pr39926.ll
+++ b/llvm/test/CodeGen/X86/pr39926.ll
@@ -10,17 +10,17 @@ define i8 @test_offset(ptr %base) {
 ; CHECK-NEXT:    movl $0, 1(%rdi)
 ; CHECK-NEXT:    movl -4(%rdi), %eax
 ; CHECK-NEXT:    movl %eax, -{{[0-9]+}}(%rsp)
-; CHECK-NEXT:    movzbl (%rdi), %eax
+; CHECK-NEXT:    movb (%rdi), %al
 ; CHECK-NEXT:    movb %al, -{{[0-9]+}}(%rsp)
 ; CHECK-NEXT:    movl 1(%rdi), %eax
 ; CHECK-NEXT:    movl %eax, -{{[0-9]+}}(%rsp)
 ; CHECK-NEXT:    movzwl 5(%rdi), %eax
 ; CHECK-NEXT:    movw %ax, -{{[0-9]+}}(%rsp)
-; CHECK-NEXT:    movzbl 7(%rdi), %eax
+; CHECK-NEXT:    movb 7(%rdi), %al
 ; CHECK-NEXT:    movb %al, -{{[0-9]+}}(%rsp)
 ; CHECK-NEXT:    movl 8(%rdi), %eax
 ; CHECK-NEXT:    movl %eax, -{{[0-9]+}}(%rsp)
-; CHECK-NEXT:    movzbl -{{[0-9]+}}(%rsp), %eax
+; CHECK-NEXT:    movb -{{[0-9]+}}(%rsp), %al
 ; CHECK-NEXT:    popq %rcx
 ; CHECK-NEXT:    .cfi_def_cfa_offset 8
 ; CHECK-NEXT:    retq

diff  --git a/llvm/test/CodeGen/X86/pr46527.ll b/llvm/test/CodeGen/X86/pr46527.ll
index 5ae953ab82ab4..82701f5d1b4cf 100644
--- a/llvm/test/CodeGen/X86/pr46527.ll
+++ b/llvm/test/CodeGen/X86/pr46527.ll
@@ -12,7 +12,7 @@ define void @f(ptr %out, <16 x i8> %in, i1 %flag) {
 ; CHECK-NEXT:  .Ltmp0:
 ; CHECK-NEXT:    addl $_GLOBAL_OFFSET_TABLE_+(.Ltmp0-.L0$pb), %ecx
 ; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; CHECK-NEXT:    movzbl {{[0-9]+}}(%esp), %edx
+; CHECK-NEXT:    movb {{[0-9]+}}(%esp), %dl
 ; CHECK-NEXT:    notb %dl
 ; CHECK-NEXT:    andb $1, %dl
 ; CHECK-NEXT:    movzbl %dl, %edx

diff  --git a/llvm/test/CodeGen/X86/pr5145.ll b/llvm/test/CodeGen/X86/pr5145.ll
index da15bd6720ce2..6990e784e9562 100644
--- a/llvm/test/CodeGen/X86/pr5145.ll
+++ b/llvm/test/CodeGen/X86/pr5145.ll
@@ -5,7 +5,7 @@
 define void @atomic_maxmin_i8() {
 ; CHECK-LABEL: atomic_maxmin_i8:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    movzbl sc8(%rip), %eax
+; CHECK-NEXT:    movb sc8(%rip), %al
 ; CHECK-NEXT:    .p2align 4, 0x90
 ; CHECK-NEXT:  .LBB0_1: # %atomicrmw.start
 ; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
@@ -17,7 +17,7 @@ define void @atomic_maxmin_i8() {
 ; CHECK-NEXT:    lock cmpxchgb %cl, sc8(%rip)
 ; CHECK-NEXT:    jne .LBB0_1
 ; CHECK-NEXT:  # %bb.2: # %atomicrmw.end
-; CHECK-NEXT:    movzbl sc8(%rip), %eax
+; CHECK-NEXT:    movb sc8(%rip), %al
 ; CHECK-NEXT:    .p2align 4, 0x90
 ; CHECK-NEXT:  .LBB0_3: # %atomicrmw.start2
 ; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
@@ -29,7 +29,7 @@ define void @atomic_maxmin_i8() {
 ; CHECK-NEXT:    lock cmpxchgb %cl, sc8(%rip)
 ; CHECK-NEXT:    jne .LBB0_3
 ; CHECK-NEXT:  # %bb.4: # %atomicrmw.end1
-; CHECK-NEXT:    movzbl sc8(%rip), %eax
+; CHECK-NEXT:    movb sc8(%rip), %al
 ; CHECK-NEXT:    .p2align 4, 0x90
 ; CHECK-NEXT:  .LBB0_5: # %atomicrmw.start8
 ; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
@@ -41,7 +41,7 @@ define void @atomic_maxmin_i8() {
 ; CHECK-NEXT:    lock cmpxchgb %cl, sc8(%rip)
 ; CHECK-NEXT:    jne .LBB0_5
 ; CHECK-NEXT:  # %bb.6: # %atomicrmw.end7
-; CHECK-NEXT:    movzbl sc8(%rip), %eax
+; CHECK-NEXT:    movb sc8(%rip), %al
 ; CHECK-NEXT:    .p2align 4, 0x90
 ; CHECK-NEXT:  .LBB0_7: # %atomicrmw.start14
 ; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1

diff  --git a/llvm/test/CodeGen/X86/reduce-trunc-shl.ll b/llvm/test/CodeGen/X86/reduce-trunc-shl.ll
index 54ba5bb1eae49..891d42a26feb9 100644
--- a/llvm/test/CodeGen/X86/reduce-trunc-shl.ll
+++ b/llvm/test/CodeGen/X86/reduce-trunc-shl.ll
@@ -157,14 +157,14 @@ define void @trunc_shl_16_i16_i64(ptr %out, ptr %in) {
 define void @trunc_shl_7_i8_i64(ptr %out, ptr %in) {
 ; SSE2-LABEL: trunc_shl_7_i8_i64:
 ; SSE2:       # %bb.0:
-; SSE2-NEXT:    movzbl (%rsi), %eax
+; SSE2-NEXT:    movb (%rsi), %al
 ; SSE2-NEXT:    shlb $7, %al
 ; SSE2-NEXT:    movb %al, (%rdi)
 ; SSE2-NEXT:    retq
 ;
 ; AVX2-LABEL: trunc_shl_7_i8_i64:
 ; AVX2:       # %bb.0:
-; AVX2-NEXT:    movzbl (%rsi), %eax
+; AVX2-NEXT:    movb (%rsi), %al
 ; AVX2-NEXT:    shlb $7, %al
 ; AVX2-NEXT:    movb %al, (%rdi)
 ; AVX2-NEXT:    retq

diff  --git a/llvm/test/CodeGen/X86/rot16.ll b/llvm/test/CodeGen/X86/rot16.ll
index c7c2d33d98922..c101a7d5977e6 100644
--- a/llvm/test/CodeGen/X86/rot16.ll
+++ b/llvm/test/CodeGen/X86/rot16.ll
@@ -7,7 +7,7 @@
 define i16 @foo(i16 %x, i16 %y, i16 %z) nounwind {
 ; X86-LABEL: foo:
 ; X86:       # %bb.0:
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; X86-NEXT:    movzwl {{[0-9]+}}(%esp), %eax
 ; X86-NEXT:    rolw %cl, %ax
 ; X86-NEXT:    retl
@@ -32,7 +32,7 @@ define i16 @bar(i16 %x, i16 %y, i16 %z) nounwind {
 ; X86:       # %bb.0:
 ; X86-NEXT:    movzwl {{[0-9]+}}(%esp), %edx
 ; X86-NEXT:    movzwl {{[0-9]+}}(%esp), %eax
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; X86-NEXT:    andb $15, %cl
 ; X86-NEXT:    shldw %cl, %dx, %ax
 ; X86-NEXT:    retl
@@ -56,7 +56,7 @@ define i16 @bar(i16 %x, i16 %y, i16 %z) nounwind {
 define i16 @un(i16 %x, i16 %y, i16 %z) nounwind {
 ; X86-LABEL: un:
 ; X86:       # %bb.0:
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; X86-NEXT:    movzwl {{[0-9]+}}(%esp), %eax
 ; X86-NEXT:    rorw %cl, %ax
 ; X86-NEXT:    retl
@@ -81,7 +81,7 @@ define i16 @bu(i16 %x, i16 %y, i16 %z) nounwind {
 ; X86:       # %bb.0:
 ; X86-NEXT:    movzwl {{[0-9]+}}(%esp), %edx
 ; X86-NEXT:    movzwl {{[0-9]+}}(%esp), %eax
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; X86-NEXT:    andb $15, %cl
 ; X86-NEXT:    shrdw %cl, %dx, %ax
 ; X86-NEXT:    retl

diff  --git a/llvm/test/CodeGen/X86/rot32.ll b/llvm/test/CodeGen/X86/rot32.ll
index d8edd31c3c06a..aa2cabb43d0c5 100644
--- a/llvm/test/CodeGen/X86/rot32.ll
+++ b/llvm/test/CodeGen/X86/rot32.ll
@@ -9,7 +9,7 @@
 define i32 @foo(i32 %x, i32 %y, i32 %z) nounwind readnone {
 ; CHECK32-LABEL: foo:
 ; CHECK32:       # %bb.0: # %entry
-; CHECK32-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; CHECK32-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; CHECK32-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; CHECK32-NEXT:    roll %cl, %eax
 ; CHECK32-NEXT:    retl
@@ -32,7 +32,7 @@ entry:
 define i32 @bar(i32 %x, i32 %y, i32 %z) nounwind readnone {
 ; CHECK32-LABEL: bar:
 ; CHECK32:       # %bb.0: # %entry
-; CHECK32-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; CHECK32-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; CHECK32-NEXT:    movl {{[0-9]+}}(%esp), %edx
 ; CHECK32-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; CHECK32-NEXT:    shldl %cl, %edx, %eax
@@ -56,7 +56,7 @@ entry:
 define i32 @un(i32 %x, i32 %y, i32 %z) nounwind readnone {
 ; CHECK32-LABEL: un:
 ; CHECK32:       # %bb.0: # %entry
-; CHECK32-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; CHECK32-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; CHECK32-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; CHECK32-NEXT:    rorl %cl, %eax
 ; CHECK32-NEXT:    retl
@@ -79,7 +79,7 @@ entry:
 define i32 @bu(i32 %x, i32 %y, i32 %z) nounwind readnone {
 ; CHECK32-LABEL: bu:
 ; CHECK32:       # %bb.0: # %entry
-; CHECK32-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; CHECK32-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; CHECK32-NEXT:    movl {{[0-9]+}}(%esp), %edx
 ; CHECK32-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; CHECK32-NEXT:    shrdl %cl, %edx, %eax

diff  --git a/llvm/test/CodeGen/X86/rotate.ll b/llvm/test/CodeGen/X86/rotate.ll
index ea32edba62822..4e61a21aaac7b 100644
--- a/llvm/test/CodeGen/X86/rotate.ll
+++ b/llvm/test/CodeGen/X86/rotate.ll
@@ -8,7 +8,7 @@ define i64 @rotl64(i64 %A, i8 %Amt) nounwind {
 ; X86-NEXT:    pushl %ebx
 ; X86-NEXT:    pushl %edi
 ; X86-NEXT:    pushl %esi
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %esi
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %edi
 ; X86-NEXT:    movl %esi, %eax
@@ -62,7 +62,7 @@ define i64 @rotr64(i64 %A, i8 %Amt) nounwind {
 ; X86-NEXT:    pushl %ebx
 ; X86-NEXT:    pushl %edi
 ; X86-NEXT:    pushl %esi
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %edi
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %esi
 ; X86-NEXT:    movl %esi, %edx
@@ -197,7 +197,7 @@ define i64 @rotr1_64(i64 %A) nounwind {
 define i32 @rotl32(i32 %A, i8 %Amt) nounwind {
 ; X86-LABEL: rotl32:
 ; X86:       # %bb.0:
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; X86-NEXT:    roll %cl, %eax
 ; X86-NEXT:    retl
@@ -221,7 +221,7 @@ define i32 @rotl32(i32 %A, i8 %Amt) nounwind {
 define i32 @rotr32(i32 %A, i8 %Amt) nounwind {
 ; X86-LABEL: rotr32:
 ; X86:       # %bb.0:
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; X86-NEXT:    rorl %cl, %eax
 ; X86-NEXT:    retl
@@ -317,7 +317,7 @@ define i32 @rotr1_32(i32 %A) nounwind {
 define i16 @rotl16(i16 %A, i8 %Amt) nounwind {
 ; X86-LABEL: rotl16:
 ; X86:       # %bb.0:
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; X86-NEXT:    movzwl {{[0-9]+}}(%esp), %eax
 ; X86-NEXT:    rolw %cl, %ax
 ; X86-NEXT:    retl
@@ -342,7 +342,7 @@ define i16 @rotl16(i16 %A, i8 %Amt) nounwind {
 define i16 @rotr16(i16 %A, i8 %Amt) nounwind {
 ; X86-LABEL: rotr16:
 ; X86:       # %bb.0:
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; X86-NEXT:    movzwl {{[0-9]+}}(%esp), %eax
 ; X86-NEXT:    rorw %cl, %ax
 ; X86-NEXT:    retl
@@ -443,8 +443,8 @@ define i16 @rotr1_16(i16 %A) nounwind {
 define i8 @rotl8(i8 %A, i8 %Amt) nounwind {
 ; X86-LABEL: rotl8:
 ; X86:       # %bb.0:
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %cl
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    rolb %cl, %al
 ; X86-NEXT:    retl
 ;
@@ -466,8 +466,8 @@ define i8 @rotl8(i8 %A, i8 %Amt) nounwind {
 define i8 @rotr8(i8 %A, i8 %Amt) nounwind {
 ; X86-LABEL: rotr8:
 ; X86:       # %bb.0:
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %cl
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    rorb %cl, %al
 ; X86-NEXT:    retl
 ;
@@ -489,7 +489,7 @@ define i8 @rotr8(i8 %A, i8 %Amt) nounwind {
 define i8 @rotli8(i8 %A) nounwind {
 ; X86-LABEL: rotli8:
 ; X86:       # %bb.0:
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    rolb $5, %al
 ; X86-NEXT:    retl
 ;
@@ -508,7 +508,7 @@ define i8 @rotli8(i8 %A) nounwind {
 define i8 @rotri8(i8 %A) nounwind {
 ; X86-LABEL: rotri8:
 ; X86:       # %bb.0:
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    rolb $3, %al
 ; X86-NEXT:    retl
 ;
@@ -527,7 +527,7 @@ define i8 @rotri8(i8 %A) nounwind {
 define i8 @rotl1_8(i8 %A) nounwind {
 ; X86-LABEL: rotl1_8:
 ; X86:       # %bb.0:
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    rolb %al
 ; X86-NEXT:    retl
 ;
@@ -546,7 +546,7 @@ define i8 @rotl1_8(i8 %A) nounwind {
 define i8 @rotr1_8(i8 %A) nounwind {
 ; X86-LABEL: rotr1_8:
 ; X86:       # %bb.0:
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    rorb %al
 ; X86-NEXT:    retl
 ;
@@ -653,7 +653,7 @@ define i64 @truncated_rot(i64 %x, i32 %amt) nounwind {
 ; X86-NEXT:    pushl %ebx
 ; X86-NEXT:    pushl %edi
 ; X86-NEXT:    pushl %esi
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %esi
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %ebx
 ; X86-NEXT:    movl %esi, %eax

diff  --git a/llvm/test/CodeGen/X86/rotate4.ll b/llvm/test/CodeGen/X86/rotate4.ll
index 0cc9f465dd75a..fff0f1da693d1 100644
--- a/llvm/test/CodeGen/X86/rotate4.ll
+++ b/llvm/test/CodeGen/X86/rotate4.ll
@@ -8,7 +8,7 @@
 define i32 @rotate_left_32(i32 %a, i32 %b) {
 ; X86-LABEL: rotate_left_32:
 ; X86:       # %bb.0:
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; X86-NEXT:    roll %cl, %eax
 ; X86-NEXT:    retl
@@ -32,7 +32,7 @@ define i32 @rotate_left_32(i32 %a, i32 %b) {
 define i32 @rotate_right_32(i32 %a, i32 %b) {
 ; X86-LABEL: rotate_right_32:
 ; X86:       # %bb.0:
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; X86-NEXT:    rorl %cl, %eax
 ; X86-NEXT:    retl
@@ -65,7 +65,7 @@ define i64 @rotate_left_64(i64 %a, i64 %b) {
 ; X86-NEXT:    .cfi_offset %esi, -16
 ; X86-NEXT:    .cfi_offset %edi, -12
 ; X86-NEXT:    .cfi_offset %ebx, -8
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %esi
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %edi
 ; X86-NEXT:    movl %esi, %eax
@@ -126,7 +126,7 @@ define i64 @rotate_right_64(i64 %a, i64 %b) {
 ; X86-NEXT:    .cfi_offset %esi, -16
 ; X86-NEXT:    .cfi_offset %edi, -12
 ; X86-NEXT:    .cfi_offset %ebx, -8
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %edi
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %esi
 ; X86-NEXT:    movl %esi, %edx
@@ -180,7 +180,7 @@ define i64 @rotate_right_64(i64 %a, i64 %b) {
 define void @rotate_left_m32(ptr%pa, i32 %b) {
 ; X86-LABEL: rotate_left_m32:
 ; X86:       # %bb.0:
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; X86-NEXT:    roll %cl, (%eax)
 ; X86-NEXT:    retl
@@ -205,7 +205,7 @@ define void @rotate_left_m32(ptr%pa, i32 %b) {
 define void @rotate_right_m32(ptr%pa, i32 %b) {
 ; X86-LABEL: rotate_right_m32:
 ; X86:       # %bb.0:
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; X86-NEXT:    rorl %cl, (%eax)
 ; X86-NEXT:    retl
@@ -242,7 +242,7 @@ define void @rotate_left_m64(ptr%pa, i64 %b) {
 ; X86-NEXT:    .cfi_offset %edi, -16
 ; X86-NEXT:    .cfi_offset %ebx, -12
 ; X86-NEXT:    .cfi_offset %ebp, -8
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; X86-NEXT:    movl (%eax), %esi
 ; X86-NEXT:    movl 4(%eax), %ebx
@@ -312,7 +312,7 @@ define void @rotate_right_m64(ptr%pa, i64 %b) {
 ; X86-NEXT:    .cfi_offset %edi, -16
 ; X86-NEXT:    .cfi_offset %ebx, -12
 ; X86-NEXT:    .cfi_offset %ebp, -8
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; X86-NEXT:    movl (%eax), %ebx
 ; X86-NEXT:    movl 4(%eax), %esi
@@ -373,8 +373,8 @@ define void @rotate_right_m64(ptr%pa, i64 %b) {
 define i8 @rotate_left_8(i8 %x, i32 %amount) {
 ; X86-LABEL: rotate_left_8:
 ; X86:       # %bb.0:
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %cl
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    rolb %cl, %al
 ; X86-NEXT:    retl
 ;
@@ -399,8 +399,8 @@ define i8 @rotate_left_8(i8 %x, i32 %amount) {
 define i8 @rotate_right_8(i8 %x, i32 %amount) {
 ; X86-LABEL: rotate_right_8:
 ; X86:       # %bb.0:
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %cl
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    rorb %cl, %al
 ; X86-NEXT:    retl
 ;
@@ -425,7 +425,7 @@ define i8 @rotate_right_8(i8 %x, i32 %amount) {
 define i16 @rotate_left_16(i16 %x, i32 %amount) {
 ; X86-LABEL: rotate_left_16:
 ; X86:       # %bb.0:
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; X86-NEXT:    movzwl {{[0-9]+}}(%esp), %eax
 ; X86-NEXT:    rolw %cl, %ax
 ; X86-NEXT:    retl
@@ -451,7 +451,7 @@ define i16 @rotate_left_16(i16 %x, i32 %amount) {
 define i16 @rotate_right_16(i16 %x, i32 %amount) {
 ; X86-LABEL: rotate_right_16:
 ; X86:       # %bb.0:
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; X86-NEXT:    movzwl {{[0-9]+}}(%esp), %eax
 ; X86-NEXT:    rorw %cl, %ax
 ; X86-NEXT:    retl
@@ -477,7 +477,7 @@ define i16 @rotate_right_16(i16 %x, i32 %amount) {
 define void @rotate_left_m8(ptr %p, i32 %amount) {
 ; X86-LABEL: rotate_left_m8:
 ; X86:       # %bb.0:
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; X86-NEXT:    rolb %cl, (%eax)
 ; X86-NEXT:    retl
@@ -503,7 +503,7 @@ define void @rotate_left_m8(ptr %p, i32 %amount) {
 define void @rotate_right_m8(ptr %p, i32 %amount) {
 ; X86-LABEL: rotate_right_m8:
 ; X86:       # %bb.0:
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; X86-NEXT:    rorb %cl, (%eax)
 ; X86-NEXT:    retl
@@ -529,7 +529,7 @@ define void @rotate_right_m8(ptr %p, i32 %amount) {
 define void @rotate_left_m16(ptr %p, i32 %amount) {
 ; X86-LABEL: rotate_left_m16:
 ; X86:       # %bb.0:
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; X86-NEXT:    rolw %cl, (%eax)
 ; X86-NEXT:    retl
@@ -555,7 +555,7 @@ define void @rotate_left_m16(ptr %p, i32 %amount) {
 define void @rotate_right_m16(ptr %p, i32 %amount) {
 ; X86-LABEL: rotate_right_m16:
 ; X86:       # %bb.0:
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; X86-NEXT:    rorw %cl, (%eax)
 ; X86-NEXT:    retl
@@ -582,7 +582,7 @@ define i32 @rotate_demanded_bits(i32, i32) {
 ; X86-LABEL: rotate_demanded_bits:
 ; X86:       # %bb.0:
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; X86-NEXT:    andb $30, %cl
 ; X86-NEXT:    roll %cl, %eax
 ; X86-NEXT:    retl
@@ -608,7 +608,7 @@ define i32 @rotate_demanded_bits_2(i32, i32) {
 ; X86-LABEL: rotate_demanded_bits_2:
 ; X86:       # %bb.0:
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; X86-NEXT:    andb $23, %cl
 ; X86-NEXT:    roll %cl, %eax
 ; X86-NEXT:    retl
@@ -633,7 +633,7 @@ define i32 @rotate_demanded_bits_2(i32, i32) {
 define i32 @rotate_demanded_bits_3(i32, i32) {
 ; X86-LABEL: rotate_demanded_bits_3:
 ; X86:       # %bb.0:
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; X86-NEXT:    addb %cl, %cl
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; X86-NEXT:    roll %cl, %eax

diff  --git a/llvm/test/CodeGen/X86/sadd_sat.ll b/llvm/test/CodeGen/X86/sadd_sat.ll
index 5b9a42d1f0d91..30ccd08b52cf0 100644
--- a/llvm/test/CodeGen/X86/sadd_sat.ll
+++ b/llvm/test/CodeGen/X86/sadd_sat.ll
@@ -101,8 +101,8 @@ define signext i16 @func16(i16 signext %x, i16 signext %y) nounwind {
 define signext i8 @func8(i8 signext %x, i8 signext %y) nounwind {
 ; X86-LABEL: func8:
 ; X86:       # %bb.0:
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; X86-NEXT:    movl %eax, %edx
 ; X86-NEXT:    addb %cl, %dl
 ; X86-NEXT:    sarb $7, %dl
@@ -134,7 +134,7 @@ define signext i8 @func8(i8 signext %x, i8 signext %y) nounwind {
 define signext i4 @func3(i4 signext %x, i4 signext %y) nounwind {
 ; X86-LABEL: func3:
 ; X86:       # %bb.0:
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    addb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    movzbl %al, %ecx
 ; X86-NEXT:    cmpb $7, %al

diff  --git a/llvm/test/CodeGen/X86/sadd_sat_plus.ll b/llvm/test/CodeGen/X86/sadd_sat_plus.ll
index deabeb27cdab8..abb928421afa4 100644
--- a/llvm/test/CodeGen/X86/sadd_sat_plus.ll
+++ b/llvm/test/CodeGen/X86/sadd_sat_plus.ll
@@ -107,9 +107,9 @@ define signext i16 @func16(i16 signext %x, i16 signext %y, i16 signext %z) nounw
 define signext i8 @func8(i8 signext %x, i8 signext %y, i8 signext %z) nounwind {
 ; X86-LABEL: func8:
 ; X86:       # %bb.0:
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    mulb {{[0-9]+}}(%esp)
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; X86-NEXT:    movl %eax, %edx
 ; X86-NEXT:    addb %cl, %dl
 ; X86-NEXT:    sarb $7, %dl
@@ -145,7 +145,7 @@ define signext i8 @func8(i8 signext %x, i8 signext %y, i8 signext %z) nounwind {
 define signext i4 @func4(i4 signext %x, i4 signext %y, i4 signext %z) nounwind {
 ; X86-LABEL: func4:
 ; X86:       # %bb.0:
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    mulb {{[0-9]+}}(%esp)
 ; X86-NEXT:    shlb $4, %al
 ; X86-NEXT:    sarb $4, %al

diff  --git a/llvm/test/CodeGen/X86/sadd_sat_vec.ll b/llvm/test/CodeGen/X86/sadd_sat_vec.ll
index 11d86dd72c561..f9af360bf30bb 100644
--- a/llvm/test/CodeGen/X86/sadd_sat_vec.ll
+++ b/llvm/test/CodeGen/X86/sadd_sat_vec.ll
@@ -429,8 +429,8 @@ define void @v12i16(ptr %px, ptr %py, ptr %pz) nounwind {
 define void @v1i8(ptr %px, ptr %py, ptr %pz) nounwind {
 ; SSE-LABEL: v1i8:
 ; SSE:       # %bb.0:
-; SSE-NEXT:    movzbl (%rdi), %eax
-; SSE-NEXT:    movzbl (%rsi), %ecx
+; SSE-NEXT:    movb (%rdi), %al
+; SSE-NEXT:    movb (%rsi), %cl
 ; SSE-NEXT:    leal (%rax,%rcx), %esi
 ; SSE-NEXT:    sarb $7, %sil
 ; SSE-NEXT:    addb $-128, %sil
@@ -443,8 +443,8 @@ define void @v1i8(ptr %px, ptr %py, ptr %pz) nounwind {
 ;
 ; AVX-LABEL: v1i8:
 ; AVX:       # %bb.0:
-; AVX-NEXT:    movzbl (%rdi), %eax
-; AVX-NEXT:    movzbl (%rsi), %ecx
+; AVX-NEXT:    movb (%rdi), %al
+; AVX-NEXT:    movb (%rsi), %cl
 ; AVX-NEXT:    leal (%rax,%rcx), %esi
 ; AVX-NEXT:    sarb $7, %sil
 ; AVX-NEXT:    addb $-128, %sil

diff  --git a/llvm/test/CodeGen/X86/sdiv_fix.ll b/llvm/test/CodeGen/X86/sdiv_fix.ll
index 5b4d180140957..37d47ebc228ba 100644
--- a/llvm/test/CodeGen/X86/sdiv_fix.ll
+++ b/llvm/test/CodeGen/X86/sdiv_fix.ll
@@ -224,10 +224,10 @@ define i4 @func4(i4 %x, i4 %y) nounwind {
 ; X86:       # %bb.0:
 ; X86-NEXT:    pushl %ebx
 ; X86-NEXT:    pushl %esi
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; X86-NEXT:    shlb $4, %cl
 ; X86-NEXT:    sarb $4, %cl
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %edx
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %dl
 ; X86-NEXT:    shlb $4, %dl
 ; X86-NEXT:    sarb $4, %dl
 ; X86-NEXT:    shlb $2, %dl

diff  --git a/llvm/test/CodeGen/X86/sdiv_fix_sat.ll b/llvm/test/CodeGen/X86/sdiv_fix_sat.ll
index 9e890e6662509..fa89da14508c6 100644
--- a/llvm/test/CodeGen/X86/sdiv_fix_sat.ll
+++ b/llvm/test/CodeGen/X86/sdiv_fix_sat.ll
@@ -264,10 +264,10 @@ define i4 @func4(i4 %x, i4 %y) nounwind {
 ; X86:       # %bb.0:
 ; X86-NEXT:    pushl %ebx
 ; X86-NEXT:    pushl %esi
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; X86-NEXT:    shlb $4, %cl
 ; X86-NEXT:    sarb $4, %cl
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %edx
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %dl
 ; X86-NEXT:    shlb $4, %dl
 ; X86-NEXT:    sarb $4, %dl
 ; X86-NEXT:    shlb $2, %dl

diff  --git a/llvm/test/CodeGen/X86/select.ll b/llvm/test/CodeGen/X86/select.ll
index bad7ef45dda58..2adcbd065cf7c 100644
--- a/llvm/test/CodeGen/X86/select.ll
+++ b/llvm/test/CodeGen/X86/select.ll
@@ -207,7 +207,7 @@ define signext i8 @test4(ptr nocapture %P, double %F) nounwind readonly {
 ; MCU-NEXT:    # kill: def $ah killed $ah killed $ax
 ; MCU-NEXT:    sahf
 ; MCU-NEXT:    seta %dl
-; MCU-NEXT:    movzbl (%ecx,%edx,4), %eax
+; MCU-NEXT:    movb (%ecx,%edx,4), %al
 ; MCU-NEXT:    retl
 entry:
   %0 = fcmp olt double %F, 4.200000e+01
@@ -1235,7 +1235,7 @@ define i8 @test18(i32 %x, i8 zeroext %a, i8 zeroext %b) nounwind {
 ; ATHLON-NEXT:    leal {{[0-9]+}}(%esp), %eax
 ; ATHLON-NEXT:    leal {{[0-9]+}}(%esp), %ecx
 ; ATHLON-NEXT:    cmovll %eax, %ecx
-; ATHLON-NEXT:    movzbl (%ecx), %eax
+; ATHLON-NEXT:    movb (%ecx), %al
 ; ATHLON-NEXT:    retl
 ;
 ; MCU-LABEL: test18:
@@ -1276,7 +1276,7 @@ define i32 @trunc_select_miscompile(i32 %a, i1 zeroext %cc) {
 ; ATHLON-LABEL: trunc_select_miscompile:
 ; ATHLON:       ## %bb.0:
 ; ATHLON-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; ATHLON-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; ATHLON-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; ATHLON-NEXT:    orb $2, %cl
 ; ATHLON-NEXT:    shll %cl, %eax
 ; ATHLON-NEXT:    retl
@@ -1773,7 +1773,7 @@ define i8 @select_uaddo_common_op0(i8 %a, i8 %b, i8 %c, i1 %cond) {
 ;
 ; ATHLON-LABEL: select_uaddo_common_op0:
 ; ATHLON:       ## %bb.0:
-; ATHLON-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; ATHLON-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; ATHLON-NEXT:    testb $1, {{[0-9]+}}(%esp)
 ; ATHLON-NEXT:    leal {{[0-9]+}}(%esp), %ecx
 ; ATHLON-NEXT:    leal {{[0-9]+}}(%esp), %edx

diff  --git a/llvm/test/CodeGen/X86/setcc-combine.ll b/llvm/test/CodeGen/X86/setcc-combine.ll
index d225d420987b3..39c54af306d66 100644
--- a/llvm/test/CodeGen/X86/setcc-combine.ll
+++ b/llvm/test/CodeGen/X86/setcc-combine.ll
@@ -243,7 +243,7 @@ define i32 @test_gt_2(<4 x i32> %A, <4 x i32> %B) {
 define void @test_i1_uge(ptr%A2) {
 ; CHECK-LABEL: test_i1_uge:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    movzbl (%rdi), %eax
+; CHECK-NEXT:    movb (%rdi), %al
 ; CHECK-NEXT:    movl %eax, %ecx
 ; CHECK-NEXT:    xorb $1, %cl
 ; CHECK-NEXT:    andb %cl, %al

diff  --git a/llvm/test/CodeGen/X86/setcc.ll b/llvm/test/CodeGen/X86/setcc.ll
index 62380bcec1cab..ec23cc9155ac4 100644
--- a/llvm/test/CodeGen/X86/setcc.ll
+++ b/llvm/test/CodeGen/X86/setcc.ll
@@ -280,7 +280,7 @@ define i32 @t12(i32 %0, i32 %1) {
 define i16 @shift_and(i16 %a) {
 ; X86-LABEL: shift_and:
 ; X86:       ## %bb.0:
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    andb $4, %al
 ; X86-NEXT:    shrb $2, %al
 ; X86-NEXT:    movzbl %al, %eax

diff  --git a/llvm/test/CodeGen/X86/sext-trunc.ll b/llvm/test/CodeGen/X86/sext-trunc.ll
index c293dcd8efe9c..5c59bc00860e3 100644
--- a/llvm/test/CodeGen/X86/sext-trunc.ll
+++ b/llvm/test/CodeGen/X86/sext-trunc.ll
@@ -1,12 +1,10 @@
-; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc < %s -mtriple=i686-unknown-linux-gnu | FileCheck %s
 
 define signext i8 @foo(i16 signext  %x) nounwind  {
-; CHECK-LABEL: foo:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
-; CHECK-NEXT:    retl
 	%retval56 = trunc i16 %x to i8
 	ret i8 %retval56
 
+; CHECK-LABEL: foo:
+; CHECK: movb
+; CHECK-NEXT: retl
 }

diff  --git a/llvm/test/CodeGen/X86/shift-amount-mod.ll b/llvm/test/CodeGen/X86/shift-amount-mod.ll
index c89db15d12f45..1b0f6f0c09c79 100644
--- a/llvm/test/CodeGen/X86/shift-amount-mod.ll
+++ b/llvm/test/CodeGen/X86/shift-amount-mod.ll
@@ -1322,7 +1322,7 @@ define i64 @reg64_lshr_by_masked_negated_unfolded(i64 %val, i64 %shamt) nounwind
 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %esi
 ; X32-NEXT:    xorl %ecx, %ecx
-; X32-NEXT:    movzbl {{[0-9]+}}(%esp), %edx
+; X32-NEXT:    movb {{[0-9]+}}(%esp), %dl
 ; X32-NEXT:    subb %dl, %cl
 ; X32-NEXT:    movl %esi, %edx
 ; X32-NEXT:    shrl %cl, %edx

diff  --git a/llvm/test/CodeGen/X86/shift-and.ll b/llvm/test/CodeGen/X86/shift-and.ll
index 3cb680396b6ba..ed010275f0820 100644
--- a/llvm/test/CodeGen/X86/shift-and.ll
+++ b/llvm/test/CodeGen/X86/shift-and.ll
@@ -5,7 +5,7 @@
 define i32 @t1(i32 %t, i32 %val) nounwind {
 ; X32-LABEL: t1:
 ; X32:       # %bb.0:
-; X32-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X32-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; X32-NEXT:    shll %cl, %eax
 ; X32-NEXT:    retl
@@ -25,7 +25,7 @@ define i32 @t1(i32 %t, i32 %val) nounwind {
 define i32 @t2(i32 %t, i32 %val) nounwind {
 ; X32-LABEL: t2:
 ; X32:       # %bb.0:
-; X32-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X32-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; X32-NEXT:    shll %cl, %eax
 ; X32-NEXT:    retl
@@ -47,7 +47,7 @@ define i32 @t2(i32 %t, i32 %val) nounwind {
 define void @t3(i16 %t) nounwind {
 ; X32-LABEL: t3:
 ; X32:       # %bb.0:
-; X32-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X32-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; X32-NEXT:    sarw %cl, X
 ; X32-NEXT:    retl
 ;
@@ -68,7 +68,7 @@ define i64 @t4(i64 %t, i64 %val) nounwind {
 ; X32-LABEL: t4:
 ; X32:       # %bb.0:
 ; X32-NEXT:    pushl %esi
-; X32-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X32-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %esi
 ; X32-NEXT:    movl %esi, %edx
@@ -99,7 +99,7 @@ define i64 @t5(i64 %t, i64 %val) nounwind {
 ; X32-LABEL: t5:
 ; X32:       # %bb.0:
 ; X32-NEXT:    pushl %esi
-; X32-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X32-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %esi
 ; X32-NEXT:    movl %esi, %edx
@@ -131,7 +131,7 @@ define void @t5ptr(i64 %t, ptr %ptr) nounwind {
 ; X32:       # %bb.0:
 ; X32-NEXT:    pushl %edi
 ; X32-NEXT:    pushl %esi
-; X32-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X32-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; X32-NEXT:    movl (%eax), %edx
 ; X32-NEXT:    movl 4(%eax), %edi

diff  --git a/llvm/test/CodeGen/X86/shift-bmi2.ll b/llvm/test/CodeGen/X86/shift-bmi2.ll
index db00e1c49dca7..d95b9277ddb5b 100644
--- a/llvm/test/CodeGen/X86/shift-bmi2.ll
+++ b/llvm/test/CodeGen/X86/shift-bmi2.ll
@@ -5,7 +5,7 @@
 define i32 @shl32(i32 %x, i32 %shamt) nounwind uwtable readnone {
 ; BMI2-LABEL: shl32:
 ; BMI2:       # %bb.0:
-; BMI2-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; BMI2-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; BMI2-NEXT:    shlxl %eax, {{[0-9]+}}(%esp), %eax
 ; BMI2-NEXT:    retl
 ;
@@ -37,7 +37,7 @@ define i32 @shl32p(ptr %p, i32 %shamt) nounwind uwtable readnone {
 ; BMI2-LABEL: shl32p:
 ; BMI2:       # %bb.0:
 ; BMI2-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; BMI2-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; BMI2-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; BMI2-NEXT:    shlxl %ecx, (%eax), %eax
 ; BMI2-NEXT:    retl
 ;
@@ -74,7 +74,7 @@ define i64 @shl64(i64 %x, i64 %shamt) nounwind uwtable readnone {
 ; BMI2-NEXT:    pushl %esi
 ; BMI2-NEXT:    .cfi_def_cfa_offset 8
 ; BMI2-NEXT:    .cfi_offset %esi, -8
-; BMI2-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; BMI2-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; BMI2-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; BMI2-NEXT:    movl {{[0-9]+}}(%esp), %edx
 ; BMI2-NEXT:    shldl %cl, %eax, %edx
@@ -119,7 +119,7 @@ define i64 @shl64p(ptr %p, i64 %shamt) nounwind uwtable readnone {
 ; BMI2-NEXT:    pushl %esi
 ; BMI2-NEXT:    .cfi_def_cfa_offset 8
 ; BMI2-NEXT:    .cfi_offset %esi, -8
-; BMI2-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; BMI2-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; BMI2-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; BMI2-NEXT:    movl (%eax), %esi
 ; BMI2-NEXT:    movl 4(%eax), %edx
@@ -165,7 +165,7 @@ define i64 @shl64pi(ptr %p) nounwind uwtable readnone {
 define i32 @lshr32(i32 %x, i32 %shamt) nounwind uwtable readnone {
 ; BMI2-LABEL: lshr32:
 ; BMI2:       # %bb.0:
-; BMI2-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; BMI2-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; BMI2-NEXT:    shrxl %eax, {{[0-9]+}}(%esp), %eax
 ; BMI2-NEXT:    retl
 ;
@@ -181,7 +181,7 @@ define i32 @lshr32p(ptr %p, i32 %shamt) nounwind uwtable readnone {
 ; BMI2-LABEL: lshr32p:
 ; BMI2:       # %bb.0:
 ; BMI2-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; BMI2-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; BMI2-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; BMI2-NEXT:    shrxl %ecx, (%eax), %eax
 ; BMI2-NEXT:    retl
 ;
@@ -200,7 +200,7 @@ define i64 @lshr64(i64 %x, i64 %shamt) nounwind uwtable readnone {
 ; BMI2-NEXT:    pushl %esi
 ; BMI2-NEXT:    .cfi_def_cfa_offset 8
 ; BMI2-NEXT:    .cfi_offset %esi, -8
-; BMI2-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; BMI2-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; BMI2-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; BMI2-NEXT:    movl {{[0-9]+}}(%esp), %edx
 ; BMI2-NEXT:    shrdl %cl, %edx, %eax
@@ -227,7 +227,7 @@ define i64 @lshr64p(ptr %p, i64 %shamt) nounwind uwtable readnone {
 ; BMI2-NEXT:    pushl %esi
 ; BMI2-NEXT:    .cfi_def_cfa_offset 8
 ; BMI2-NEXT:    .cfi_offset %esi, -8
-; BMI2-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; BMI2-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; BMI2-NEXT:    movl {{[0-9]+}}(%esp), %edx
 ; BMI2-NEXT:    movl (%edx), %eax
 ; BMI2-NEXT:    movl 4(%edx), %edx
@@ -253,7 +253,7 @@ define i64 @lshr64p(ptr %p, i64 %shamt) nounwind uwtable readnone {
 define i32 @ashr32(i32 %x, i32 %shamt) nounwind uwtable readnone {
 ; BMI2-LABEL: ashr32:
 ; BMI2:       # %bb.0:
-; BMI2-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; BMI2-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; BMI2-NEXT:    sarxl %eax, {{[0-9]+}}(%esp), %eax
 ; BMI2-NEXT:    retl
 ;
@@ -269,7 +269,7 @@ define i32 @ashr32p(ptr %p, i32 %shamt) nounwind uwtable readnone {
 ; BMI2-LABEL: ashr32p:
 ; BMI2:       # %bb.0:
 ; BMI2-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; BMI2-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; BMI2-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; BMI2-NEXT:    sarxl %ecx, (%eax), %eax
 ; BMI2-NEXT:    retl
 ;
@@ -288,7 +288,7 @@ define i64 @ashr64(i64 %x, i64 %shamt) nounwind uwtable readnone {
 ; BMI2-NEXT:    pushl %esi
 ; BMI2-NEXT:    .cfi_def_cfa_offset 8
 ; BMI2-NEXT:    .cfi_offset %esi, -8
-; BMI2-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; BMI2-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; BMI2-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; BMI2-NEXT:    movl {{[0-9]+}}(%esp), %edx
 ; BMI2-NEXT:    shrdl %cl, %edx, %eax
@@ -315,7 +315,7 @@ define i64 @ashr64p(ptr %p, i64 %shamt) nounwind uwtable readnone {
 ; BMI2-NEXT:    pushl %esi
 ; BMI2-NEXT:    .cfi_def_cfa_offset 8
 ; BMI2-NEXT:    .cfi_offset %esi, -8
-; BMI2-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; BMI2-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; BMI2-NEXT:    movl {{[0-9]+}}(%esp), %edx
 ; BMI2-NEXT:    movl (%edx), %eax
 ; BMI2-NEXT:    movl 4(%edx), %edx
@@ -341,7 +341,7 @@ define i64 @ashr64p(ptr %p, i64 %shamt) nounwind uwtable readnone {
 define i32 @shl32and(i32 %t, i32 %val) nounwind {
 ; BMI2-LABEL: shl32and:
 ; BMI2:       # %bb.0:
-; BMI2-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; BMI2-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; BMI2-NEXT:    shlxl %eax, {{[0-9]+}}(%esp), %eax
 ; BMI2-NEXT:    retl
 ;
@@ -358,7 +358,7 @@ define i64 @shl64and(i64 %t, i64 %val) nounwind {
 ; BMI2-LABEL: shl64and:
 ; BMI2:       # %bb.0:
 ; BMI2-NEXT:    pushl %esi
-; BMI2-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; BMI2-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; BMI2-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; BMI2-NEXT:    movl {{[0-9]+}}(%esp), %edx
 ; BMI2-NEXT:    shldl %cl, %eax, %edx
@@ -382,7 +382,7 @@ define i64 @shl64and(i64 %t, i64 %val) nounwind {
 define i32 @lshr32and(i32 %t, i32 %val) nounwind {
 ; BMI2-LABEL: lshr32and:
 ; BMI2:       # %bb.0:
-; BMI2-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; BMI2-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; BMI2-NEXT:    shrxl %eax, {{[0-9]+}}(%esp), %eax
 ; BMI2-NEXT:    retl
 ;
@@ -399,7 +399,7 @@ define i64 @lshr64and(i64 %t, i64 %val) nounwind {
 ; BMI2-LABEL: lshr64and:
 ; BMI2:       # %bb.0:
 ; BMI2-NEXT:    pushl %esi
-; BMI2-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; BMI2-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; BMI2-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; BMI2-NEXT:    movl {{[0-9]+}}(%esp), %edx
 ; BMI2-NEXT:    shrdl %cl, %edx, %eax
@@ -423,7 +423,7 @@ define i64 @lshr64and(i64 %t, i64 %val) nounwind {
 define i32 @ashr32and(i32 %t, i32 %val) nounwind {
 ; BMI2-LABEL: ashr32and:
 ; BMI2:       # %bb.0:
-; BMI2-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; BMI2-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; BMI2-NEXT:    sarxl %eax, {{[0-9]+}}(%esp), %eax
 ; BMI2-NEXT:    retl
 ;
@@ -440,7 +440,7 @@ define i64 @ashr64and(i64 %t, i64 %val) nounwind {
 ; BMI2-LABEL: ashr64and:
 ; BMI2:       # %bb.0:
 ; BMI2-NEXT:    pushl %esi
-; BMI2-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; BMI2-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; BMI2-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; BMI2-NEXT:    movl {{[0-9]+}}(%esp), %edx
 ; BMI2-NEXT:    shrdl %cl, %edx, %eax

diff  --git a/llvm/test/CodeGen/X86/shift-by-signext.ll b/llvm/test/CodeGen/X86/shift-by-signext.ll
index 97a4318c0720f..6ecf8cd9c882a 100644
--- a/llvm/test/CodeGen/X86/shift-by-signext.ll
+++ b/llvm/test/CodeGen/X86/shift-by-signext.ll
@@ -8,7 +8,7 @@
 define i32 @t0_shl(i32 %x, i8 %shamt) nounwind {
 ; X86-LABEL: t0_shl:
 ; X86:       # %bb.0:
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    shlxl %eax, {{[0-9]+}}(%esp), %eax
 ; X86-NEXT:    retl
 ;
@@ -23,7 +23,7 @@ define i32 @t0_shl(i32 %x, i8 %shamt) nounwind {
 define i32 @t1_lshr(i32 %x, i8 %shamt) nounwind {
 ; X86-LABEL: t1_lshr:
 ; X86:       # %bb.0:
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    shrxl %eax, {{[0-9]+}}(%esp), %eax
 ; X86-NEXT:    retl
 ;
@@ -38,7 +38,7 @@ define i32 @t1_lshr(i32 %x, i8 %shamt) nounwind {
 define i32 @t2_ashr(i32 %x, i8 %shamt) nounwind {
 ; X86-LABEL: t2_ashr:
 ; X86:       # %bb.0:
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    sarxl %eax, {{[0-9]+}}(%esp), %eax
 ; X86-NEXT:    retl
 ;
@@ -90,7 +90,7 @@ define i32 @n6_fshl(i32 %x, i32 %y, i8 %shamt) nounwind {
 ; X86:       # %bb.0:
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; X86-NEXT:    shldl %cl, %edx, %eax
 ; X86-NEXT:    retl
 ;
@@ -110,7 +110,7 @@ define i32 @n7_fshr(i32 %x, i32 %y, i8 %shamt) nounwind {
 ; X86:       # %bb.0:
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; X86-NEXT:    shrdl %cl, %edx, %eax
 ; X86-NEXT:    retl
 ;

diff  --git a/llvm/test/CodeGen/X86/shift-coalesce.ll b/llvm/test/CodeGen/X86/shift-coalesce.ll
index e01f56d643df0..3c71feb6483f4 100644
--- a/llvm/test/CodeGen/X86/shift-coalesce.ll
+++ b/llvm/test/CodeGen/X86/shift-coalesce.ll
@@ -12,7 +12,7 @@ define i64 @foo(i64 %x, ptr %X) {
 ; CHECK-NEXT:    mov esi, dword ptr [esp + 8]
 ; CHECK-NEXT:    mov edx, dword ptr [esp + 12]
 ; CHECK-NEXT:    mov eax, dword ptr [esp + 16]
-; CHECK-NEXT:    movzx ecx, byte ptr [eax]
+; CHECK-NEXT:    mov cl, byte ptr [eax]
 ; CHECK-NEXT:    mov eax, esi
 ; CHECK-NEXT:    shl eax, cl
 ; CHECK-NEXT:    shld edx, esi, cl

diff  --git a/llvm/test/CodeGen/X86/shift-combine.ll b/llvm/test/CodeGen/X86/shift-combine.ll
index 8f51cfd26240e..075fbb29f4a95 100644
--- a/llvm/test/CodeGen/X86/shift-combine.ll
+++ b/llvm/test/CodeGen/X86/shift-combine.ll
@@ -180,7 +180,7 @@ define i64 @ashr_add_shl_i32(i64 %r) nounwind {
 define i64 @ashr_add_shl_i8(i64 %r) nounwind {
 ; X32-LABEL: ashr_add_shl_i8:
 ; X32:       # %bb.0:
-; X32-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X32-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X32-NEXT:    addb $2, %al
 ; X32-NEXT:    movsbl %al, %eax
 ; X32-NEXT:    movl %eax, %edx
@@ -204,8 +204,8 @@ define <4 x i32> @ashr_add_shl_v4i8(<4 x i32> %r) nounwind {
 ; X32-NEXT:    pushl %edi
 ; X32-NEXT:    pushl %esi
 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X32-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
-; X32-NEXT:    movzbl {{[0-9]+}}(%esp), %edx
+; X32-NEXT:    movb {{[0-9]+}}(%esp), %cl
+; X32-NEXT:    movb {{[0-9]+}}(%esp), %dl
 ; X32-NEXT:    movb {{[0-9]+}}(%esp), %ch
 ; X32-NEXT:    movb {{[0-9]+}}(%esp), %dh
 ; X32-NEXT:    incb %dh

diff  --git a/llvm/test/CodeGen/X86/shift-double.ll b/llvm/test/CodeGen/X86/shift-double.ll
index 5a2028216033c..1213a80921d27 100644
--- a/llvm/test/CodeGen/X86/shift-double.ll
+++ b/llvm/test/CodeGen/X86/shift-double.ll
@@ -8,7 +8,7 @@ define i64 @test1(i64 %X, i8 %C) nounwind {
 ; X86-LABEL: test1:
 ; X86:       # %bb.0:
 ; X86-NEXT:    pushl %esi
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %esi
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
 ; X86-NEXT:    movl %esi, %eax
@@ -39,7 +39,7 @@ define i64 @test2(i64 %X, i8 %C) nounwind {
 ; X86-LABEL: test2:
 ; X86:       # %bb.0:
 ; X86-NEXT:    pushl %esi
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %esi
 ; X86-NEXT:    movl %esi, %edx
@@ -71,7 +71,7 @@ define i64 @test3(i64 %X, i8 %C) nounwind {
 ; X86-LABEL: test3:
 ; X86:       # %bb.0:
 ; X86-NEXT:    pushl %esi
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %esi
 ; X86-NEXT:    movl %esi, %edx
@@ -103,7 +103,7 @@ define i64 @test3(i64 %X, i8 %C) nounwind {
 define i32 @test4(i32 %A, i32 %B, i8 %C) nounwind {
 ; X86-LABEL: test4:
 ; X86:       # %bb.0:
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; X86-NEXT:    shldl %cl, %edx, %eax
@@ -130,7 +130,7 @@ define i16 @test5(i16 %A, i16 %B, i8 %C) nounwind {
 ; X86:       # %bb.0:
 ; X86-NEXT:    movzwl {{[0-9]+}}(%esp), %edx
 ; X86-NEXT:    movzwl {{[0-9]+}}(%esp), %eax
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; X86-NEXT:    andb $15, %cl
 ; X86-NEXT:    shldw %cl, %dx, %ax
 ; X86-NEXT:    retl
@@ -158,7 +158,7 @@ define i16 @test5(i16 %A, i16 %B, i8 %C) nounwind {
 define i32 @test6(i32 %A, i32 %B, i8 %C) nounwind {
 ; X86-LABEL: test6:
 ; X86:       # %bb.0:
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; X86-NEXT:    shrdl %cl, %edx, %eax
@@ -185,7 +185,7 @@ define i16 @test7(i16 %A, i16 %B, i8 %C) nounwind {
 ; X86:       # %bb.0:
 ; X86-NEXT:    movzwl {{[0-9]+}}(%esp), %edx
 ; X86-NEXT:    movzwl {{[0-9]+}}(%esp), %eax
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; X86-NEXT:    andb $15, %cl
 ; X86-NEXT:    shrdw %cl, %dx, %ax
 ; X86-NEXT:    retl
@@ -214,7 +214,7 @@ define i64 @test8(i64 %val, i32 %bits) nounwind {
 ; X86-LABEL: test8:
 ; X86:       # %bb.0:
 ; X86-NEXT:    pushl %esi
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %esi
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
 ; X86-NEXT:    movl %esi, %eax
@@ -240,7 +240,7 @@ define i64 @test8(i64 %val, i32 %bits) nounwind {
 define i64 @test9(i64 %val, i32 %bits) nounwind {
 ; X86-LABEL: test9:
 ; X86:       # %bb.0:
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
 ; X86-NEXT:    shrdl %cl, %edx, %eax
@@ -264,7 +264,7 @@ define i64 @test9(i64 %val, i32 %bits) nounwind {
 define i64 @test10(i64 %val, i32 %bits) nounwind {
 ; X86-LABEL: test10:
 ; X86:       # %bb.0:
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
 ; X86-NEXT:    shrdl %cl, %edx, %eax
@@ -290,7 +290,7 @@ define i64 @test10(i64 %val, i32 %bits) nounwind {
 define i32 @test11(i32 %hi, i32 %lo, i32 %bits) nounwind {
 ; X86-LABEL: test11:
 ; X86:       # %bb.0:
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; X86-NEXT:    shldl %cl, %edx, %eax
@@ -314,7 +314,7 @@ define i32 @test11(i32 %hi, i32 %lo, i32 %bits) nounwind {
 define i32 @test12(i32 %hi, i32 %lo, i32 %bits) nounwind {
 ; X86-LABEL: test12:
 ; X86:       # %bb.0:
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; X86-NEXT:    shrdl %cl, %edx, %eax
@@ -338,7 +338,7 @@ define i32 @test12(i32 %hi, i32 %lo, i32 %bits) nounwind {
 define i32 @test13(i32 %hi, i32 %lo, i32 %bits) nounwind {
 ; X86-LABEL: test13:
 ; X86:       # %bb.0:
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; X86-NEXT:    shldl %cl, %edx, %eax
@@ -361,7 +361,7 @@ define i32 @test13(i32 %hi, i32 %lo, i32 %bits) nounwind {
 define i32 @test14(i32 %hi, i32 %lo, i32 %bits) nounwind {
 ; X86-LABEL: test14:
 ; X86:       # %bb.0:
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; X86-NEXT:    shrdl %cl, %edx, %eax
@@ -384,7 +384,7 @@ define i32 @test14(i32 %hi, i32 %lo, i32 %bits) nounwind {
 define i32 @test15(i32 %hi, i32 %lo, i32 %bits) nounwind {
 ; X86-LABEL: test15:
 ; X86:       # %bb.0:
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; X86-NEXT:    shldl %cl, %edx, %eax
@@ -408,7 +408,7 @@ define i32 @test15(i32 %hi, i32 %lo, i32 %bits) nounwind {
 define i32 @test16(i32 %hi, i32 %lo, i32 %bits) nounwind {
 ; X86-LABEL: test16:
 ; X86:       # %bb.0:
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; X86-NEXT:    shrdl %cl, %edx, %eax
@@ -432,7 +432,7 @@ define i32 @test16(i32 %hi, i32 %lo, i32 %bits) nounwind {
 define i32 @test17(i32 %hi, i32 %lo, i32 %bits) nounwind {
 ; X86-LABEL: test17:
 ; X86:       # %bb.0:
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; X86-NEXT:    shrdl %cl, %edx, %eax
@@ -456,7 +456,7 @@ define i32 @test17(i32 %hi, i32 %lo, i32 %bits) nounwind {
 define i32 @test18(i32 %hi, i32 %lo, i32 %bits) nounwind {
 ; X86-LABEL: test18:
 ; X86:       # %bb.0:
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; X86-NEXT:    shldl %cl, %edx, %eax
@@ -488,7 +488,7 @@ define i32 @not_shld_i32(i32, i32, i32) {
 ; X86-LABEL: not_shld_i32:
 ; X86:       # %bb.0:
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
 ; X86-NEXT:    shll %cl, %edx
 ; X86-NEXT:    negb %cl
@@ -519,7 +519,7 @@ define i32 @not_shrd_i32(i32, i32, i32) {
 ; X86-LABEL: not_shrd_i32:
 ; X86:       # %bb.0:
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
 ; X86-NEXT:    shrl %cl, %edx
 ; X86-NEXT:    negb %cl

diff  --git a/llvm/test/CodeGen/X86/shift-i128.ll b/llvm/test/CodeGen/X86/shift-i128.ll
index d85f4f520bd25..ae337fcd6a02c 100644
--- a/llvm/test/CodeGen/X86/shift-i128.ll
+++ b/llvm/test/CodeGen/X86/shift-i128.ll
@@ -15,7 +15,7 @@ define void @test_lshr_i128(i128 %x, i128 %a, ptr nocapture %r) nounwind {
 ; i686-NEXT:    pushl %esi
 ; i686-NEXT:    subl $20, %esp
 ; i686-NEXT:    movl {{[0-9]+}}(%esp), %edx
-; i686-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; i686-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; i686-NEXT:    movl {{[0-9]+}}(%esp), %edi
 ; i686-NEXT:    movl {{[0-9]+}}(%esp), %ebp
 ; i686-NEXT:    movl %ebp, %esi
@@ -152,7 +152,7 @@ define void @test_ashr_i128(i128 %x, i128 %a, ptr nocapture %r) nounwind {
 ; i686-NEXT:    pushl %esi
 ; i686-NEXT:    subl $24, %esp
 ; i686-NEXT:    movl {{[0-9]+}}(%esp), %edx
-; i686-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; i686-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; i686-NEXT:    movl {{[0-9]+}}(%esp), %ebx
 ; i686-NEXT:    movl {{[0-9]+}}(%esp), %ebp
 ; i686-NEXT:    movl %ebp, %esi
@@ -295,7 +295,7 @@ define void @test_shl_i128(i128 %x, i128 %a, ptr nocapture %r) nounwind {
 ; i686-NEXT:    subl $20, %esp
 ; i686-NEXT:    movl {{[0-9]+}}(%esp), %ebp
 ; i686-NEXT:    movl {{[0-9]+}}(%esp), %edi
-; i686-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; i686-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; i686-NEXT:    movl {{[0-9]+}}(%esp), %ebx
 ; i686-NEXT:    movl %eax, %ecx
 ; i686-NEXT:    shll %cl, %ebx
@@ -722,7 +722,7 @@ define void @test_lshr_v2i128(<2 x i128> %x, <2 x i128> %a, ptr nocapture %r) no
 ; x86_64:       # %bb.0: # %entry
 ; x86_64-NEXT:    movq %rcx, %rax
 ; x86_64-NEXT:    movq {{[0-9]+}}(%rsp), %r10
-; x86_64-NEXT:    movzbl {{[0-9]+}}(%rsp), %r9d
+; x86_64-NEXT:    movb {{[0-9]+}}(%rsp), %r9b
 ; x86_64-NEXT:    movl %r9d, %ecx
 ; x86_64-NEXT:    shrdq %cl, %rax, %rdx
 ; x86_64-NEXT:    movl %r8d, %ecx
@@ -1016,7 +1016,7 @@ define void @test_ashr_v2i128(<2 x i128> %x, <2 x i128> %a, ptr nocapture %r) no
 ; x86_64:       # %bb.0: # %entry
 ; x86_64-NEXT:    movq %rcx, %r11
 ; x86_64-NEXT:    movq {{[0-9]+}}(%rsp), %r10
-; x86_64-NEXT:    movzbl {{[0-9]+}}(%rsp), %r9d
+; x86_64-NEXT:    movb {{[0-9]+}}(%rsp), %r9b
 ; x86_64-NEXT:    movl %r9d, %ecx
 ; x86_64-NEXT:    shrdq %cl, %r11, %rdx
 ; x86_64-NEXT:    movl %r8d, %ecx
@@ -1230,7 +1230,7 @@ define void @test_shl_v2i128(<2 x i128> %x, <2 x i128> %a, ptr nocapture %r) nou
 ; i686-NEXT:  .LBB8_29: # %entry
 ; i686-NEXT:    cmpb $0, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload
 ; i686-NEXT:    movl {{[0-9]+}}(%esp), %edi
-; i686-NEXT:    movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload
+; i686-NEXT:    movb {{[-0-9]+}}(%e{{[sb]}}p), %al # 1-byte Reload
 ; i686-NEXT:    movl {{[0-9]+}}(%esp), %esi
 ; i686-NEXT:    jne .LBB8_30
 ; i686-NEXT:  # %bb.31: # %entry
@@ -1242,7 +1242,7 @@ define void @test_shl_v2i128(<2 x i128> %x, <2 x i128> %a, ptr nocapture %r) nou
 ; i686-NEXT:  .LBB8_34: # %entry
 ; i686-NEXT:    movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
 ; i686-NEXT:  .LBB8_35: # %entry
-; i686-NEXT:    movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload
+; i686-NEXT:    movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload
 ; i686-NEXT:    movl {{[0-9]+}}(%esp), %ebx
 ; i686-NEXT:    shrdl %cl, %ebx, %esi
 ; i686-NEXT:    testb $32, %cl
@@ -1327,7 +1327,7 @@ define void @test_shl_v2i128(<2 x i128> %x, <2 x i128> %a, ptr nocapture %r) nou
 ; x86_64:       # %bb.0: # %entry
 ; x86_64-NEXT:    movq %rcx, %rax
 ; x86_64-NEXT:    movq {{[0-9]+}}(%rsp), %r10
-; x86_64-NEXT:    movzbl {{[0-9]+}}(%rsp), %r9d
+; x86_64-NEXT:    movb {{[0-9]+}}(%rsp), %r9b
 ; x86_64-NEXT:    movl %r9d, %ecx
 ; x86_64-NEXT:    shldq %cl, %rdx, %rax
 ; x86_64-NEXT:    movl %r8d, %ecx

diff  --git a/llvm/test/CodeGen/X86/shift-mask.ll b/llvm/test/CodeGen/X86/shift-mask.ll
index fe52c7bece86d..84d59a33acd8b 100644
--- a/llvm/test/CodeGen/X86/shift-mask.ll
+++ b/llvm/test/CodeGen/X86/shift-mask.ll
@@ -20,7 +20,7 @@
 define i8 @test_i8_shl_lshr_0(i8 %a0) {
 ; X86-LABEL: test_i8_shl_lshr_0:
 ; X86:       # %bb.0:
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    andb $-8, %al
 ; X86-NEXT:    retl
 ;
@@ -38,7 +38,7 @@ define i8 @test_i8_shl_lshr_0(i8 %a0) {
 define i8 @test_i8_shl_lshr_1(i8 %a0) {
 ; X86-LABEL: test_i8_shl_lshr_1:
 ; X86:       # %bb.0:
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    shlb $2, %al
 ; X86-NEXT:    andb $-32, %al
 ; X86-NEXT:    retl
@@ -66,7 +66,7 @@ define i8 @test_i8_shl_lshr_1(i8 %a0) {
 define i8 @test_i8_shl_lshr_2(i8 %a0) {
 ; X86-LABEL: test_i8_shl_lshr_2:
 ; X86:       # %bb.0:
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    shrb $2, %al
 ; X86-NEXT:    andb $56, %al
 ; X86-NEXT:    retl
@@ -318,7 +318,7 @@ define i64 @test_i64_shl_lshr_2(i64 %a0) {
 define i8 @test_i8_lshr_lshr_0(i8 %a0) {
 ; X86-LABEL: test_i8_lshr_lshr_0:
 ; X86:       # %bb.0:
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    andb $31, %al
 ; X86-NEXT:    retl
 ;
@@ -336,7 +336,7 @@ define i8 @test_i8_lshr_lshr_0(i8 %a0) {
 define i8 @test_i8_lshr_lshr_1(i8 %a0) {
 ; X86-LABEL: test_i8_lshr_lshr_1:
 ; X86:       # %bb.0:
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    shrb $2, %al
 ; X86-NEXT:    andb $7, %al
 ; X86-NEXT:    retl
@@ -364,7 +364,7 @@ define i8 @test_i8_lshr_lshr_1(i8 %a0) {
 define i8 @test_i8_lshr_lshr_2(i8 %a0) {
 ; X86-LABEL: test_i8_lshr_lshr_2:
 ; X86:       # %bb.0:
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    shlb $2, %al
 ; X86-NEXT:    andb $28, %al
 ; X86-NEXT:    retl

diff  --git a/llvm/test/CodeGen/X86/smul_fix.ll b/llvm/test/CodeGen/X86/smul_fix.ll
index 8c2b945d6a8ce..ccfc53c02e9a3 100644
--- a/llvm/test/CodeGen/X86/smul_fix.ll
+++ b/llvm/test/CodeGen/X86/smul_fix.ll
@@ -114,10 +114,10 @@ define i4 @func3(i4 %x, i4 %y) nounwind {
 ;
 ; X86-LABEL: func3:
 ; X86:       # %bb.0:
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    shlb $4, %al
 ; X86-NEXT:    sarb $4, %al
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; X86-NEXT:    shlb $4, %cl
 ; X86-NEXT:    sarb $4, %cl
 ; X86-NEXT:    movsbl %cl, %ecx
@@ -255,10 +255,10 @@ define i4 @func6(i4 %x, i4 %y) nounwind {
 ;
 ; X86-LABEL: func6:
 ; X86:       # %bb.0:
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    shlb $4, %al
 ; X86-NEXT:    sarb $4, %al
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; X86-NEXT:    shlb $4, %cl
 ; X86-NEXT:    sarb $4, %cl
 ; X86-NEXT:    mulb %cl

diff  --git a/llvm/test/CodeGen/X86/smul_fix_sat.ll b/llvm/test/CodeGen/X86/smul_fix_sat.ll
index 996601ed3be64..0463886fe2285 100644
--- a/llvm/test/CodeGen/X86/smul_fix_sat.ll
+++ b/llvm/test/CodeGen/X86/smul_fix_sat.ll
@@ -164,10 +164,10 @@ define i4 @func3(i4 %x, i4 %y) nounwind {
 ;
 ; X86-LABEL: func3:
 ; X86:       # %bb.0:
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    shlb $4, %al
 ; X86-NEXT:    sarb $4, %al
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; X86-NEXT:    shlb $4, %cl
 ; X86-NEXT:    movsbl %cl, %ecx
 ; X86-NEXT:    movsbl %al, %eax
@@ -475,10 +475,10 @@ define i4 @func6(i4 %x, i4 %y) nounwind {
 ;
 ; X86-LABEL: func6:
 ; X86:       # %bb.0:
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; X86-NEXT:    shlb $4, %cl
 ; X86-NEXT:    sarb $4, %cl
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    shlb $4, %al
 ; X86-NEXT:    xorl %edx, %edx
 ; X86-NEXT:    movb %al, %ah

diff  --git a/llvm/test/CodeGen/X86/srem-seteq-illegal-types.ll b/llvm/test/CodeGen/X86/srem-seteq-illegal-types.ll
index 412128ba7f510..42623573a838c 100644
--- a/llvm/test/CodeGen/X86/srem-seteq-illegal-types.ll
+++ b/llvm/test/CodeGen/X86/srem-seteq-illegal-types.ll
@@ -33,7 +33,7 @@ define i1 @test_srem_odd(i29 %X) nounwind {
 define i1 @test_srem_even(i4 %X) nounwind {
 ; X86-LABEL: test_srem_even:
 ; X86:       # %bb.0:
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    movl %eax, %ecx
 ; X86-NEXT:    shlb $4, %cl
 ; X86-NEXT:    sarb $4, %cl
@@ -79,7 +79,7 @@ define i1 @test_srem_even(i4 %X) nounwind {
 define i1 @test_srem_pow2_setne(i6 %X) nounwind {
 ; X86-LABEL: test_srem_pow2_setne:
 ; X86:       # %bb.0:
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    movl %eax, %ecx
 ; X86-NEXT:    shlb $2, %cl
 ; X86-NEXT:    sarb $5, %cl
@@ -218,9 +218,9 @@ define <3 x i1> @test_srem_vec(<3 x i33> %X) nounwind {
 ; SSE2-NEXT:    pcmpeqd %xmm1, %xmm1
 ; SSE2-NEXT:    pxor %xmm0, %xmm1
 ; SSE2-NEXT:    movdqa %xmm1, -{{[0-9]+}}(%rsp)
-; SSE2-NEXT:    movzbl -{{[0-9]+}}(%rsp), %eax
-; SSE2-NEXT:    movzbl -{{[0-9]+}}(%rsp), %edx
-; SSE2-NEXT:    movzbl -{{[0-9]+}}(%rsp), %ecx
+; SSE2-NEXT:    movb -{{[0-9]+}}(%rsp), %al
+; SSE2-NEXT:    movb -{{[0-9]+}}(%rsp), %dl
+; SSE2-NEXT:    movb -{{[0-9]+}}(%rsp), %cl
 ; SSE2-NEXT:    retq
 ;
 ; SSE41-LABEL: test_srem_vec:

diff  --git a/llvm/test/CodeGen/X86/sse-scalar-fp-arith.ll b/llvm/test/CodeGen/X86/sse-scalar-fp-arith.ll
index 806b209fe66c3..9fca269ead97f 100644
--- a/llvm/test/CodeGen/X86/sse-scalar-fp-arith.ll
+++ b/llvm/test/CodeGen/X86/sse-scalar-fp-arith.ll
@@ -1323,7 +1323,7 @@ define <4 x float> @add_ss_mask(<4 x float> %a, <4 x float> %b, <4 x float> %c,
 ;
 ; X86-AVX512-LABEL: add_ss_mask:
 ; X86-AVX512:       # %bb.0:
-; X86-AVX512-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-AVX512-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-AVX512-NEXT:    kmovw %eax, %k1
 ; X86-AVX512-NEXT:    vaddss %xmm1, %xmm0, %xmm2 {%k1}
 ; X86-AVX512-NEXT:    vmovaps %xmm2, %xmm0
@@ -1417,7 +1417,7 @@ define <2 x double> @add_sd_mask(<2 x double> %a, <2 x double> %b, <2 x double>
 ;
 ; X86-AVX512-LABEL: add_sd_mask:
 ; X86-AVX512:       # %bb.0:
-; X86-AVX512-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-AVX512-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-AVX512-NEXT:    kmovw %eax, %k1
 ; X86-AVX512-NEXT:    vaddsd %xmm1, %xmm0, %xmm2 {%k1}
 ; X86-AVX512-NEXT:    vmovapd %xmm2, %xmm0

diff  --git a/llvm/test/CodeGen/X86/sse2-intrinsics-fast-isel.ll b/llvm/test/CodeGen/X86/sse2-intrinsics-fast-isel.ll
index 348fe27616479..8a129715b4691 100644
--- a/llvm/test/CodeGen/X86/sse2-intrinsics-fast-isel.ll
+++ b/llvm/test/CodeGen/X86/sse2-intrinsics-fast-isel.ll
@@ -4379,7 +4379,7 @@ define <2 x i64> @test_mm_set1_epi8(i8 %a0) nounwind {
 ;
 ; X86-AVX512-LABEL: test_mm_set1_epi8:
 ; X86-AVX512:       # %bb.0:
-; X86-AVX512-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
+; X86-AVX512-NEXT:    movb {{[0-9]+}}(%esp), %al # encoding: [0x8a,0x44,0x24,0x04]
 ; X86-AVX512-NEXT:    vpbroadcastb %eax, %xmm0 # encoding: [0x62,0xf2,0x7d,0x08,0x7a,0xc0]
 ; X86-AVX512-NEXT:    retl # encoding: [0xc3]
 ;

diff  --git a/llvm/test/CodeGen/X86/sshl_sat.ll b/llvm/test/CodeGen/X86/sshl_sat.ll
index 2b87e17a0b5e3..ce89c091c77a3 100644
--- a/llvm/test/CodeGen/X86/sshl_sat.ll
+++ b/llvm/test/CodeGen/X86/sshl_sat.ll
@@ -33,7 +33,7 @@ define i16 @func(i16 %x, i16 %y) nounwind {
 ; X86-NEXT:    pushl %edi
 ; X86-NEXT:    pushl %esi
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; X86-NEXT:    movl %edx, %esi
 ; X86-NEXT:    shll %cl, %esi
 ; X86-NEXT:    movswl %si, %edi
@@ -77,7 +77,7 @@ define i16 @func2(i8 %x, i8 %y) nounwind {
 ; X86-LABEL: func2:
 ; X86:       # %bb.0:
 ; X86-NEXT:    pushl %esi
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; X86-NEXT:    movsbl {{[0-9]+}}(%esp), %eax
 ; X86-NEXT:    addl %eax, %eax
 ; X86-NEXT:    movl %eax, %edx
@@ -179,9 +179,9 @@ define i4 @func4(i4 %x, i4 %y) nounwind {
 ; X86-LABEL: func4:
 ; X86:       # %bb.0:
 ; X86-NEXT:    pushl %esi
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; X86-NEXT:    andb $15, %cl
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %edx
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %dl
 ; X86-NEXT:    shlb $4, %dl
 ; X86-NEXT:    movb %dl, %ch
 ; X86-NEXT:    shlb %cl, %ch
@@ -225,7 +225,7 @@ define i64 @func5(i64 %x, i64 %y) nounwind {
 ; X86-NEXT:    pushl %ebx
 ; X86-NEXT:    pushl %edi
 ; X86-NEXT:    pushl %esi
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; X86-NEXT:    movl %edx, %ebx
@@ -288,7 +288,7 @@ define i18 @func6(i16 %x, i16 %y) nounwind {
 ; X86:       # %bb.0:
 ; X86-NEXT:    pushl %edi
 ; X86-NEXT:    pushl %esi
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; X86-NEXT:    movswl {{[0-9]+}}(%esp), %edx
 ; X86-NEXT:    shll $14, %edx
 ; X86-NEXT:    movl %edx, %esi
@@ -332,7 +332,7 @@ define i32 @func7(i32 %x, i32 %y) nounwind {
 ; X86:       # %bb.0:
 ; X86-NEXT:    pushl %edi
 ; X86-NEXT:    pushl %esi
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
 ; X86-NEXT:    movl %edx, %esi
 ; X86-NEXT:    shll %cl, %esi
@@ -373,8 +373,8 @@ define i8 @func8(i8 %x, i8 %y) nounwind {
 ; X86-LABEL: func8:
 ; X86:       # %bb.0:
 ; X86-NEXT:    pushl %esi
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %edx
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %cl
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %dl
 ; X86-NEXT:    movb %dl, %ch
 ; X86-NEXT:    shlb %cl, %ch
 ; X86-NEXT:    movzbl %ch, %esi

diff  --git a/llvm/test/CodeGen/X86/sshl_sat_vec.ll b/llvm/test/CodeGen/X86/sshl_sat_vec.ll
index 40f9025097586..a0d175fe9de93 100644
--- a/llvm/test/CodeGen/X86/sshl_sat_vec.ll
+++ b/llvm/test/CodeGen/X86/sshl_sat_vec.ll
@@ -108,7 +108,7 @@ define <4 x i32> @vec(<4 x i32> %x, <4 x i32> %y) nounwind {
 ; X86-NEXT:    cmpl %eax, %edi
 ; X86-NEXT:    cmovel %ebp, %edx
 ; X86-NEXT:    movl %esi, %edi
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; X86-NEXT:    shll %cl, %edi
 ; X86-NEXT:    movl %edi, %ebp
 ; X86-NEXT:    sarl %cl, %ebp
@@ -120,7 +120,7 @@ define <4 x i32> @vec(<4 x i32> %x, <4 x i32> %y) nounwind {
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %esi
 ; X86-NEXT:    cmovel %edi, %eax
 ; X86-NEXT:    movl %esi, %edi
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; X86-NEXT:    shll %cl, %edi
 ; X86-NEXT:    movl %edi, %ebp
 ; X86-NEXT:    sarl %cl, %ebp

diff  --git a/llvm/test/CodeGen/X86/ssub_sat.ll b/llvm/test/CodeGen/X86/ssub_sat.ll
index 8ecc8b39ac468..be8926b5d948a 100644
--- a/llvm/test/CodeGen/X86/ssub_sat.ll
+++ b/llvm/test/CodeGen/X86/ssub_sat.ll
@@ -99,8 +99,8 @@ define signext i16 @func16(i16 signext %x, i16 signext %y) nounwind {
 define signext i8 @func8(i8 signext %x, i8 signext %y) nounwind {
 ; X86-LABEL: func8:
 ; X86:       # %bb.0:
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %edx
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %dl
 ; X86-NEXT:    xorl %ecx, %ecx
 ; X86-NEXT:    cmpb %dl, %al
 ; X86-NEXT:    setns %cl
@@ -129,7 +129,7 @@ define signext i8 @func8(i8 signext %x, i8 signext %y) nounwind {
 define signext i4 @func3(i4 signext %x, i4 signext %y) nounwind {
 ; X86-LABEL: func3:
 ; X86:       # %bb.0:
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    subb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    movzbl %al, %ecx
 ; X86-NEXT:    cmpb $7, %al

diff  --git a/llvm/test/CodeGen/X86/ssub_sat_plus.ll b/llvm/test/CodeGen/X86/ssub_sat_plus.ll
index 5baf7a1dac74c..45e01e5610afd 100644
--- a/llvm/test/CodeGen/X86/ssub_sat_plus.ll
+++ b/llvm/test/CodeGen/X86/ssub_sat_plus.ll
@@ -105,8 +105,8 @@ define signext i16 @func16(i16 signext %x, i16 signext %y, i16 signext %z) nounw
 define signext i8 @func8(i8 signext %x, i8 signext %y, i8 signext %z) nounwind {
 ; X86-LABEL: func8:
 ; X86:       # %bb.0:
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %edx
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %dl
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    mulb {{[0-9]+}}(%esp)
 ; X86-NEXT:    xorl %ecx, %ecx
 ; X86-NEXT:    cmpb %al, %dl
@@ -140,8 +140,8 @@ define signext i8 @func8(i8 signext %x, i8 signext %y, i8 signext %z) nounwind {
 define signext i4 @func4(i4 signext %x, i4 signext %y, i4 signext %z) nounwind {
 ; X86-LABEL: func4:
 ; X86:       # %bb.0:
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %cl
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    mulb {{[0-9]+}}(%esp)
 ; X86-NEXT:    shlb $4, %al
 ; X86-NEXT:    sarb $4, %al

diff  --git a/llvm/test/CodeGen/X86/ssub_sat_vec.ll b/llvm/test/CodeGen/X86/ssub_sat_vec.ll
index 42f346c0fd558..c00f9d0c9dd12 100644
--- a/llvm/test/CodeGen/X86/ssub_sat_vec.ll
+++ b/llvm/test/CodeGen/X86/ssub_sat_vec.ll
@@ -429,8 +429,8 @@ define void @v12i16(ptr %px, ptr %py, ptr %pz) nounwind {
 define void @v1i8(ptr %px, ptr %py, ptr %pz) nounwind {
 ; SSE-LABEL: v1i8:
 ; SSE:       # %bb.0:
-; SSE-NEXT:    movzbl (%rdi), %eax
-; SSE-NEXT:    movzbl (%rsi), %ecx
+; SSE-NEXT:    movb (%rdi), %al
+; SSE-NEXT:    movb (%rsi), %cl
 ; SSE-NEXT:    xorl %esi, %esi
 ; SSE-NEXT:    cmpb %cl, %al
 ; SSE-NEXT:    setns %sil
@@ -443,8 +443,8 @@ define void @v1i8(ptr %px, ptr %py, ptr %pz) nounwind {
 ;
 ; AVX-LABEL: v1i8:
 ; AVX:       # %bb.0:
-; AVX-NEXT:    movzbl (%rdi), %eax
-; AVX-NEXT:    movzbl (%rsi), %ecx
+; AVX-NEXT:    movb (%rdi), %al
+; AVX-NEXT:    movb (%rsi), %cl
 ; AVX-NEXT:    xorl %esi, %esi
 ; AVX-NEXT:    cmpb %cl, %al
 ; AVX-NEXT:    setns %sil

diff  --git a/llvm/test/CodeGen/X86/store-narrow.ll b/llvm/test/CodeGen/X86/store-narrow.ll
index 97e31b3fa422b..2632b56f0b325 100644
--- a/llvm/test/CodeGen/X86/store-narrow.ll
+++ b/llvm/test/CodeGen/X86/store-narrow.ll
@@ -13,19 +13,12 @@ define void @test1(ptr nocapture %a0, i8 zeroext %a1) nounwind ssp {
 ; X64-NEXT:    movb %sil, (%rdi)
 ; X64-NEXT:    retq
 ;
-; X86-BWON-LABEL: test1:
-; X86-BWON:       ## %bb.0: ## %entry
-; X86-BWON-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
-; X86-BWON-NEXT:    movl {{[0-9]+}}(%esp), %ecx
-; X86-BWON-NEXT:    movb %al, (%ecx)
-; X86-BWON-NEXT:    retl
-;
-; X86-BWOFF-LABEL: test1:
-; X86-BWOFF:       ## %bb.0: ## %entry
-; X86-BWOFF-NEXT:    movb {{[0-9]+}}(%esp), %al
-; X86-BWOFF-NEXT:    movl {{[0-9]+}}(%esp), %ecx
-; X86-BWOFF-NEXT:    movb %al, (%ecx)
-; X86-BWOFF-NEXT:    retl
+; X86-LABEL: test1:
+; X86:       ## %bb.0: ## %entry
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movb %al, (%ecx)
+; X86-NEXT:    retl
 entry:
   %A = load i32, ptr %a0, align 4
   %B = and i32 %A, -256     ; 0xFFFFFF00
@@ -41,19 +34,12 @@ define void @test2(ptr nocapture %a0, i8 zeroext %a1) nounwind ssp {
 ; X64-NEXT:    movb %sil, 1(%rdi)
 ; X64-NEXT:    retq
 ;
-; X86-BWON-LABEL: test2:
-; X86-BWON:       ## %bb.0: ## %entry
-; X86-BWON-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
-; X86-BWON-NEXT:    movl {{[0-9]+}}(%esp), %ecx
-; X86-BWON-NEXT:    movb %al, 1(%ecx)
-; X86-BWON-NEXT:    retl
-;
-; X86-BWOFF-LABEL: test2:
-; X86-BWOFF:       ## %bb.0: ## %entry
-; X86-BWOFF-NEXT:    movb {{[0-9]+}}(%esp), %al
-; X86-BWOFF-NEXT:    movl {{[0-9]+}}(%esp), %ecx
-; X86-BWOFF-NEXT:    movb %al, 1(%ecx)
-; X86-BWOFF-NEXT:    retl
+; X86-LABEL: test2:
+; X86:       ## %bb.0: ## %entry
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movb %al, 1(%ecx)
+; X86-NEXT:    retl
 entry:
   %A = load i32, ptr %a0, align 4
   %B = and i32 %A, -65281    ; 0xFFFF00FF
@@ -156,19 +142,12 @@ define void @test6(ptr nocapture %a0, i8 zeroext %a1) nounwind ssp {
 ; X64-NEXT:    movb %sil, 5(%rdi)
 ; X64-NEXT:    retq
 ;
-; X86-BWON-LABEL: test6:
-; X86-BWON:       ## %bb.0: ## %entry
-; X86-BWON-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
-; X86-BWON-NEXT:    movl {{[0-9]+}}(%esp), %ecx
-; X86-BWON-NEXT:    movb %al, 5(%ecx)
-; X86-BWON-NEXT:    retl
-;
-; X86-BWOFF-LABEL: test6:
-; X86-BWOFF:       ## %bb.0: ## %entry
-; X86-BWOFF-NEXT:    movb {{[0-9]+}}(%esp), %al
-; X86-BWOFF-NEXT:    movl {{[0-9]+}}(%esp), %ecx
-; X86-BWOFF-NEXT:    movb %al, 5(%ecx)
-; X86-BWOFF-NEXT:    retl
+; X86-LABEL: test6:
+; X86:       ## %bb.0: ## %entry
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movb %al, 5(%ecx)
+; X86-NEXT:    retl
 entry:
   %A = load i64, ptr %a0, align 4
   %B = and i64 %A, -280375465082881    ; 0xFFFF00FFFFFFFFFF
@@ -186,23 +165,14 @@ define i32 @test7(ptr nocapture %a0, i8 zeroext %a1, ptr %P2) nounwind {
 ; X64-NEXT:    movb %sil, 5(%rdi)
 ; X64-NEXT:    retq
 ;
-; X86-BWON-LABEL: test7:
-; X86-BWON:       ## %bb.0: ## %entry
-; X86-BWON-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
-; X86-BWON-NEXT:    movl {{[0-9]+}}(%esp), %edx
-; X86-BWON-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X86-BWON-NEXT:    movl (%eax), %eax
-; X86-BWON-NEXT:    movb %cl, 5(%edx)
-; X86-BWON-NEXT:    retl
-;
-; X86-BWOFF-LABEL: test7:
-; X86-BWOFF:       ## %bb.0: ## %entry
-; X86-BWOFF-NEXT:    movb {{[0-9]+}}(%esp), %cl
-; X86-BWOFF-NEXT:    movl {{[0-9]+}}(%esp), %edx
-; X86-BWOFF-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X86-BWOFF-NEXT:    movl (%eax), %eax
-; X86-BWOFF-NEXT:    movb %cl, 5(%edx)
-; X86-BWOFF-NEXT:    retl
+; X86-LABEL: test7:
+; X86:       ## %bb.0: ## %entry
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %cl
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movl (%eax), %eax
+; X86-NEXT:    movb %cl, 5(%edx)
+; X86-NEXT:    retl
 entry:
   %OtherLoad = load i32 , ptr%P2
   %A = load i64, ptr %a0, align 4

diff  --git a/llvm/test/CodeGen/X86/sttni.ll b/llvm/test/CodeGen/X86/sttni.ll
index 7d1a6171c844a..83055fc3eebe9 100644
--- a/llvm/test/CodeGen/X86/sttni.ll
+++ b/llvm/test/CodeGen/X86/sttni.ll
@@ -70,7 +70,7 @@ define i32 @pcmpestri_reg_
diff _i8(<16 x i8> %lhs, i32 %lhs_len, <16 x i8> %rhs,
 ; X86-NEXT:  .LBB2_2: # %compare
 ; X86-NEXT:    movdqa %xmm0, (%esp)
 ; X86-NEXT:    andl $15, %ecx
-; X86-NEXT:    movzbl (%esp,%ecx), %eax
+; X86-NEXT:    movb (%esp,%ecx), %al
 ; X86-NEXT:    movdqa %xmm1, {{[0-9]+}}(%esp)
 ; X86-NEXT:    subb 16(%esp,%ecx), %al
 ; X86-NEXT:  .LBB2_3: # %exit
@@ -94,7 +94,7 @@ define i32 @pcmpestri_reg_
diff _i8(<16 x i8> %lhs, i32 %lhs_len, <16 x i8> %rhs,
 ; X64-NEXT:  .LBB2_2: # %compare
 ; X64-NEXT:    movdqa %xmm0, -{{[0-9]+}}(%rsp)
 ; X64-NEXT:    andl $15, %ecx
-; X64-NEXT:    movzbl -24(%rsp,%rcx), %eax
+; X64-NEXT:    movb -24(%rsp,%rcx), %al
 ; X64-NEXT:    movdqa %xmm1, -{{[0-9]+}}(%rsp)
 ; X64-NEXT:    subb -40(%rsp,%rcx), %al
 ; X64-NEXT:    movzbl %al, %eax
@@ -200,7 +200,7 @@ define i32 @pcmpestri_mem_
diff _i8(ptr %lhs_ptr, i32 %lhs_len, ptr %rhs_ptr, i32
 ; X86-NEXT:  .LBB5_2: # %compare
 ; X86-NEXT:    movdqa %xmm1, (%esp)
 ; X86-NEXT:    andl $15, %ecx
-; X86-NEXT:    movzbl (%esp,%ecx), %eax
+; X86-NEXT:    movb (%esp,%ecx), %al
 ; X86-NEXT:    movdqa %xmm0, {{[0-9]+}}(%esp)
 ; X86-NEXT:    subb 16(%esp,%ecx), %al
 ; X86-NEXT:  .LBB5_3: # %exit
@@ -227,7 +227,7 @@ define i32 @pcmpestri_mem_
diff _i8(ptr %lhs_ptr, i32 %lhs_len, ptr %rhs_ptr, i32
 ; X64-NEXT:  .LBB5_2: # %compare
 ; X64-NEXT:    movdqa %xmm1, -{{[0-9]+}}(%rsp)
 ; X64-NEXT:    andl $15, %ecx
-; X64-NEXT:    movzbl -24(%rsp,%rcx), %eax
+; X64-NEXT:    movb -24(%rsp,%rcx), %al
 ; X64-NEXT:    movdqa %xmm0, -{{[0-9]+}}(%rsp)
 ; X64-NEXT:    subb -40(%rsp,%rcx), %al
 ; X64-NEXT:    movzbl %al, %eax
@@ -559,7 +559,7 @@ define i32 @pcmpistri_reg_
diff _i8(<16 x i8> %lhs, <16 x i8> %rhs) nounwind {
 ; X86-NEXT:    subl $48, %esp
 ; X86-NEXT:    movdqa %xmm0, (%esp)
 ; X86-NEXT:    andl $15, %ecx
-; X86-NEXT:    movzbl (%esp,%ecx), %eax
+; X86-NEXT:    movb (%esp,%ecx), %al
 ; X86-NEXT:    movdqa %xmm1, {{[0-9]+}}(%esp)
 ; X86-NEXT:    subb 16(%esp,%ecx), %al
 ; X86-NEXT:    movl %ebp, %esp
@@ -580,7 +580,7 @@ define i32 @pcmpistri_reg_
diff _i8(<16 x i8> %lhs, <16 x i8> %rhs) nounwind {
 ; X64-NEXT:  .LBB14_2: # %compare
 ; X64-NEXT:    movdqa %xmm0, -{{[0-9]+}}(%rsp)
 ; X64-NEXT:    andl $15, %ecx
-; X64-NEXT:    movzbl -24(%rsp,%rcx), %eax
+; X64-NEXT:    movb -24(%rsp,%rcx), %al
 ; X64-NEXT:    movdqa %xmm1, -{{[0-9]+}}(%rsp)
 ; X64-NEXT:    subb -40(%rsp,%rcx), %al
 ; X64-NEXT:    movzbl %al, %eax
@@ -669,7 +669,7 @@ define i32 @pcmpistri_mem_
diff _i8(ptr %lhs_ptr, ptr %rhs_ptr) nounwind {
 ; X86-NEXT:  .LBB17_2: # %compare
 ; X86-NEXT:    movdqa %xmm1, (%esp)
 ; X86-NEXT:    andl $15, %ecx
-; X86-NEXT:    movzbl (%esp,%ecx), %eax
+; X86-NEXT:    movb (%esp,%ecx), %al
 ; X86-NEXT:    movdqa %xmm0, {{[0-9]+}}(%esp)
 ; X86-NEXT:    subb 16(%esp,%ecx), %al
 ; X86-NEXT:  .LBB17_3: # %exit
@@ -693,7 +693,7 @@ define i32 @pcmpistri_mem_
diff _i8(ptr %lhs_ptr, ptr %rhs_ptr) nounwind {
 ; X64-NEXT:  .LBB17_2: # %compare
 ; X64-NEXT:    movdqa %xmm1, -{{[0-9]+}}(%rsp)
 ; X64-NEXT:    andl $15, %ecx
-; X64-NEXT:    movzbl -24(%rsp,%rcx), %eax
+; X64-NEXT:    movb -24(%rsp,%rcx), %al
 ; X64-NEXT:    movdqa %xmm0, -{{[0-9]+}}(%rsp)
 ; X64-NEXT:    subb -40(%rsp,%rcx), %al
 ; X64-NEXT:    movzbl %al, %eax

diff  --git a/llvm/test/CodeGen/X86/sub-of-not.ll b/llvm/test/CodeGen/X86/sub-of-not.ll
index 2b8f6c18ff086..08e99122ed9b7 100644
--- a/llvm/test/CodeGen/X86/sub-of-not.ll
+++ b/llvm/test/CodeGen/X86/sub-of-not.ll
@@ -12,7 +12,7 @@
 define i8 @scalar_i8(i8 %x, i8 %y) nounwind {
 ; X86-LABEL: scalar_i8:
 ; X86:       # %bb.0:
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    addb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    incb %al
 ; X86-NEXT:    retl

diff  --git a/llvm/test/CodeGen/X86/swifterror.ll b/llvm/test/CodeGen/X86/swifterror.ll
index 88b81f12f4d67..c3ecd05c0fded 100644
--- a/llvm/test/CodeGen/X86/swifterror.ll
+++ b/llvm/test/CodeGen/X86/swifterror.ll
@@ -79,7 +79,7 @@ define float @caller(ptr %error_ref) {
 ; CHECK-APPLE-NEXT:    testq %r12, %r12
 ; CHECK-APPLE-NEXT:    jne LBB1_2
 ; CHECK-APPLE-NEXT:  ## %bb.1: ## %cont
-; CHECK-APPLE-NEXT:    movzbl 8(%rdi), %eax
+; CHECK-APPLE-NEXT:    movb 8(%rdi), %al
 ; CHECK-APPLE-NEXT:    movb %al, (%rbx)
 ; CHECK-APPLE-NEXT:  LBB1_2: ## %handler
 ; CHECK-APPLE-NEXT:    callq _free
@@ -131,7 +131,7 @@ define float @caller(ptr %error_ref) {
 ; CHECK-i386-NEXT:    jne LBB1_2
 ; CHECK-i386-NEXT:  ## %bb.1: ## %cont
 ; CHECK-i386-NEXT:    movl 16(%esp), %ecx
-; CHECK-i386-NEXT:    movzbl 8(%eax), %edx
+; CHECK-i386-NEXT:    movb 8(%eax), %dl
 ; CHECK-i386-NEXT:    movb %dl, (%ecx)
 ; CHECK-i386-NEXT:  LBB1_2: ## %handler
 ; CHECK-i386-NEXT:    movl %eax, (%esp)
@@ -182,7 +182,7 @@ define float @caller2(ptr %error_ref) {
 ; CHECK-APPLE-NEXT:    ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
 ; CHECK-APPLE-NEXT:    jbe LBB2_1
 ; CHECK-APPLE-NEXT:  ## %bb.3: ## %bb_end
-; CHECK-APPLE-NEXT:    movzbl 8(%r12), %eax
+; CHECK-APPLE-NEXT:    movb 8(%r12), %al
 ; CHECK-APPLE-NEXT:    movb %al, (%rbx)
 ; CHECK-APPLE-NEXT:  LBB2_4: ## %handler
 ; CHECK-APPLE-NEXT:    movq %r12, %rdi
@@ -263,7 +263,7 @@ define float @caller2(ptr %error_ref) {
 ; CHECK-i386-NEXT:    sahf
 ; CHECK-i386-NEXT:    jbe LBB2_1
 ; CHECK-i386-NEXT:  ## %bb.3: ## %bb_end
-; CHECK-i386-NEXT:    movzbl 8(%ecx), %eax
+; CHECK-i386-NEXT:    movb 8(%ecx), %al
 ; CHECK-i386-NEXT:    movb %al, (%esi)
 ; CHECK-i386-NEXT:    fldz
 ; CHECK-i386-NEXT:  LBB2_4: ## %handler
@@ -632,7 +632,7 @@ define float @caller3(ptr %error_ref) {
 ; CHECK-APPLE-NEXT:    testq %r12, %r12
 ; CHECK-APPLE-NEXT:    jne LBB6_2
 ; CHECK-APPLE-NEXT:  ## %bb.1: ## %cont
-; CHECK-APPLE-NEXT:    movzbl 8(%rdi), %eax
+; CHECK-APPLE-NEXT:    movb 8(%rdi), %al
 ; CHECK-APPLE-NEXT:    movb %al, (%rbx)
 ; CHECK-APPLE-NEXT:  LBB6_2: ## %handler
 ; CHECK-APPLE-NEXT:    callq _free
@@ -689,7 +689,7 @@ define float @caller3(ptr %error_ref) {
 ; CHECK-i386-NEXT:    jne LBB6_2
 ; CHECK-i386-NEXT:  ## %bb.1: ## %cont
 ; CHECK-i386-NEXT:    movl 48(%esp), %ecx
-; CHECK-i386-NEXT:    movzbl 8(%eax), %edx
+; CHECK-i386-NEXT:    movb 8(%eax), %dl
 ; CHECK-i386-NEXT:    movb %dl, (%ecx)
 ; CHECK-i386-NEXT:  LBB6_2: ## %handler
 ; CHECK-i386-NEXT:    movl %eax, (%esp)
@@ -744,7 +744,7 @@ define float @caller_with_multiple_swifterror_values(ptr %error_ref, ptr %error_
 ; CHECK-APPLE-NEXT:    testq %r12, %r12
 ; CHECK-APPLE-NEXT:    jne LBB7_2
 ; CHECK-APPLE-NEXT:  ## %bb.1: ## %cont
-; CHECK-APPLE-NEXT:    movzbl 8(%rdi), %eax
+; CHECK-APPLE-NEXT:    movb 8(%rdi), %al
 ; CHECK-APPLE-NEXT:    movb %al, (%rbx)
 ; CHECK-APPLE-NEXT:  LBB7_2: ## %handler
 ; CHECK-APPLE-NEXT:    callq _free
@@ -757,7 +757,7 @@ define float @caller_with_multiple_swifterror_values(ptr %error_ref, ptr %error_
 ; CHECK-APPLE-NEXT:    testq %r12, %r12
 ; CHECK-APPLE-NEXT:    jne LBB7_4
 ; CHECK-APPLE-NEXT:  ## %bb.3: ## %cont2
-; CHECK-APPLE-NEXT:    movzbl 8(%rdi), %eax
+; CHECK-APPLE-NEXT:    movb 8(%rdi), %al
 ; CHECK-APPLE-NEXT:    movb %al, (%r14)
 ; CHECK-APPLE-NEXT:  LBB7_4: ## %handler2
 ; CHECK-APPLE-NEXT:    callq _free
@@ -842,7 +842,7 @@ define float @caller_with_multiple_swifterror_values(ptr %error_ref, ptr %error_
 ; CHECK-i386-NEXT:    jne LBB7_2
 ; CHECK-i386-NEXT:  ## %bb.1: ## %cont
 ; CHECK-i386-NEXT:    movl 8(%ebp), %ecx
-; CHECK-i386-NEXT:    movzbl 8(%eax), %edx
+; CHECK-i386-NEXT:    movb 8(%eax), %dl
 ; CHECK-i386-NEXT:    movb %dl, (%ecx)
 ; CHECK-i386-NEXT:  LBB7_2: ## %handler
 ; CHECK-i386-NEXT:    subl $12, %esp
@@ -863,7 +863,7 @@ define float @caller_with_multiple_swifterror_values(ptr %error_ref, ptr %error_
 ; CHECK-i386-NEXT:    jne LBB7_4
 ; CHECK-i386-NEXT:  ## %bb.3: ## %cont2
 ; CHECK-i386-NEXT:    movl 12(%ebp), %ecx
-; CHECK-i386-NEXT:    movzbl 8(%eax), %edx
+; CHECK-i386-NEXT:    movb 8(%eax), %dl
 ; CHECK-i386-NEXT:    movb %dl, (%ecx)
 ; CHECK-i386-NEXT:  LBB7_4: ## %handler2
 ; CHECK-i386-NEXT:    subl $12, %esp

diff  --git a/llvm/test/CodeGen/X86/tail-opts.ll b/llvm/test/CodeGen/X86/tail-opts.ll
index 1548e2df42bed..71708e7ceb46f 100644
--- a/llvm/test/CodeGen/X86/tail-opts.ll
+++ b/llvm/test/CodeGen/X86/tail-opts.ll
@@ -244,7 +244,7 @@ define fastcc void @c_expand_expr_stmt(ptr %expr) nounwind {
 ; CHECK-NEXT:    testb %al, %al
 ; CHECK-NEXT:    jne .LBB3_9
 ; CHECK-NEXT:  # %bb.1: # %entry
-; CHECK-NEXT:    movzbl 0, %ebx
+; CHECK-NEXT:    movb 0, %bl
 ; CHECK-NEXT:    xorl %eax, %eax
 ; CHECK-NEXT:    testb %al, %al
 ; CHECK-NEXT:    jne .LBB3_8

diff  --git a/llvm/test/CodeGen/X86/tls.ll b/llvm/test/CodeGen/X86/tls.ll
index c4ee609148af3..308d7c5b1b82f 100644
--- a/llvm/test/CodeGen/X86/tls.ll
+++ b/llvm/test/CodeGen/X86/tls.ll
@@ -1,4 +1,3 @@
-; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc < %s -mtriple=i386-linux-gnu | FileCheck -check-prefix=X86_LINUX %s
 ; RUN: llc < %s -mtriple=x86_64-linux-gnu | FileCheck -check-prefix=X64_LINUX %s
 ; RUN: llc < %s -mtriple=i386-linux-gnu -fast-isel | FileCheck -check-prefix=X86_ISEL_LINUX %s
@@ -20,48 +19,29 @@
 
 define dso_local i32 @f1() {
 ; X86_LINUX-LABEL: f1:
-; X86_LINUX:       # %bb.0: # %entry
-; X86_LINUX-NEXT:    movl %gs:i1 at NTPOFF, %eax
-; X86_LINUX-NEXT:    retl
-;
+; X86_LINUX:      movl %gs:i1 at NTPOFF, %eax
+; X86_LINUX-NEXT: ret
 ; X64_LINUX-LABEL: f1:
-; X64_LINUX:       # %bb.0: # %entry
-; X64_LINUX-NEXT:    movl %fs:i1 at TPOFF, %eax
-; X64_LINUX-NEXT:    retq
-;
-; X86_ISEL_LINUX-LABEL: f1:
-; X86_ISEL_LINUX:       # %bb.0: # %entry
-; X86_ISEL_LINUX-NEXT:    movl %gs:i1 at NTPOFF, %eax
-; X86_ISEL_LINUX-NEXT:    retl
-;
-; X64_ISEL_LINUX-LABEL: f1:
-; X64_ISEL_LINUX:       # %bb.0: # %entry
-; X64_ISEL_LINUX-NEXT:    movl %fs:i1 at TPOFF, %eax
-; X64_ISEL_LINUX-NEXT:    retq
-;
+; X64_LINUX:      movl %fs:i1 at TPOFF, %eax
+; X64_LINUX-NEXT: ret
 ; X86_WIN-LABEL: f1:
-; X86_WIN:       # %bb.0: # %entry
-; X86_WIN-NEXT:    movl __tls_index, %eax
-; X86_WIN-NEXT:    movl %fs:__tls_array, %ecx
-; X86_WIN-NEXT:    movl (%ecx,%eax,4), %eax
-; X86_WIN-NEXT:    movl _i1 at SECREL32(%eax), %eax
-; X86_WIN-NEXT:    retl
-;
+; X86_WIN:      movl __tls_index, %eax
+; X86_WIN-NEXT: movl %fs:__tls_array, %ecx
+; X86_WIN-NEXT: movl (%ecx,%eax,4), %eax
+; X86_WIN-NEXT: movl _i1 at SECREL32(%eax), %eax
+; X86_WIN-NEXT: ret
 ; X64_WIN-LABEL: f1:
-; X64_WIN:       # %bb.0: # %entry
-; X64_WIN-NEXT:    movl _tls_index(%rip), %eax
-; X64_WIN-NEXT:    movq %gs:88, %rcx
-; X64_WIN-NEXT:    movq (%rcx,%rax,8), %rax
-; X64_WIN-NEXT:    movl i1 at SECREL32(%rax), %eax
-; X64_WIN-NEXT:    retq
-;
-; MINGW32-LABEL: f1:
-; MINGW32:       # %bb.0: # %entry
-; MINGW32-NEXT:    movl __tls_index, %eax
-; MINGW32-NEXT:    movl %fs:44, %ecx
-; MINGW32-NEXT:    movl (%ecx,%eax,4), %eax
-; MINGW32-NEXT:    movl _i1 at SECREL32(%eax), %eax
-; MINGW32-NEXT:    retl
+; X64_WIN:      movl _tls_index(%rip), %eax
+; X64_WIN-NEXT: movq %gs:88, %rcx
+; X64_WIN-NEXT: movq (%rcx,%rax,8), %rax
+; X64_WIN-NEXT: movl i1 at SECREL32(%rax), %eax
+; X64_WIN-NEXT: ret
+; MINGW32-LABEL: _f1:
+; MINGW32: movl __tls_index, %eax
+; MINGW32-NEXT: movl %fs:44, %ecx
+; MINGW32-NEXT: movl (%ecx,%eax,4), %eax
+; MINGW32-NEXT: movl _i1 at SECREL32(%eax), %eax
+; MINGW32-NEXT: retl
 
 entry:
 	%tmp1 = load i32, ptr @i1
@@ -70,52 +50,31 @@ entry:
 
 define dso_local ptr @f2() {
 ; X86_LINUX-LABEL: f2:
-; X86_LINUX:       # %bb.0: # %entry
-; X86_LINUX-NEXT:    movl %gs:0, %eax
-; X86_LINUX-NEXT:    leal i1 at NTPOFF(%eax), %eax
-; X86_LINUX-NEXT:    retl
-;
+; X86_LINUX:      movl %gs:0, %eax
+; X86_LINUX-NEXT: leal i1 at NTPOFF(%eax), %eax
+; X86_LINUX-NEXT: ret
 ; X64_LINUX-LABEL: f2:
-; X64_LINUX:       # %bb.0: # %entry
-; X64_LINUX-NEXT:    movq %fs:0, %rax
-; X64_LINUX-NEXT:    leaq i1 at TPOFF(%rax), %rax
-; X64_LINUX-NEXT:    retq
-;
-; X86_ISEL_LINUX-LABEL: f2:
-; X86_ISEL_LINUX:       # %bb.0: # %entry
-; X86_ISEL_LINUX-NEXT:    movl %gs:0, %eax
-; X86_ISEL_LINUX-NEXT:    leal i1 at NTPOFF(%eax), %eax
-; X86_ISEL_LINUX-NEXT:    retl
-;
-; X64_ISEL_LINUX-LABEL: f2:
-; X64_ISEL_LINUX:       # %bb.0: # %entry
-; X64_ISEL_LINUX-NEXT:    movq %fs:0, %rax
-; X64_ISEL_LINUX-NEXT:    leaq i1 at TPOFF(%rax), %rax
-; X64_ISEL_LINUX-NEXT:    retq
-;
+; X64_LINUX:      movq %fs:0, %rax
+; X64_LINUX-NEXT: leaq i1 at TPOFF(%rax), %rax
+; X64_LINUX-NEXT: ret
 ; X86_WIN-LABEL: f2:
-; X86_WIN:       # %bb.0: # %entry
-; X86_WIN-NEXT:    movl __tls_index, %eax
-; X86_WIN-NEXT:    movl %fs:__tls_array, %ecx
-; X86_WIN-NEXT:    movl (%ecx,%eax,4), %eax
-; X86_WIN-NEXT:    leal _i1 at SECREL32(%eax), %eax
-; X86_WIN-NEXT:    retl
-;
+; X86_WIN:      movl __tls_index, %eax
+; X86_WIN-NEXT: movl %fs:__tls_array, %ecx
+; X86_WIN-NEXT: movl (%ecx,%eax,4), %eax
+; X86_WIN-NEXT: leal _i1 at SECREL32(%eax), %eax
+; X86_WIN-NEXT: ret
 ; X64_WIN-LABEL: f2:
-; X64_WIN:       # %bb.0: # %entry
-; X64_WIN-NEXT:    movl _tls_index(%rip), %eax
-; X64_WIN-NEXT:    movq %gs:88, %rcx
-; X64_WIN-NEXT:    movq (%rcx,%rax,8), %rax
-; X64_WIN-NEXT:    leaq i1 at SECREL32(%rax), %rax
-; X64_WIN-NEXT:    retq
-;
-; MINGW32-LABEL: f2:
-; MINGW32:       # %bb.0: # %entry
-; MINGW32-NEXT:    movl __tls_index, %eax
-; MINGW32-NEXT:    movl %fs:44, %ecx
-; MINGW32-NEXT:    movl (%ecx,%eax,4), %eax
-; MINGW32-NEXT:    leal _i1 at SECREL32(%eax), %eax
-; MINGW32-NEXT:    retl
+; X64_WIN:      movl _tls_index(%rip), %eax
+; X64_WIN-NEXT: movq %gs:88, %rcx
+; X64_WIN-NEXT: movq (%rcx,%rax,8), %rax
+; X64_WIN-NEXT: leaq i1 at SECREL32(%rax), %rax
+; X64_WIN-NEXT: ret
+; MINGW32-LABEL: _f2:
+; MINGW32: movl __tls_index, %eax
+; MINGW32-NEXT: movl %fs:44, %ecx
+; MINGW32-NEXT: movl (%ecx,%eax,4), %eax
+; MINGW32-NEXT: leal _i1 at SECREL32(%eax), %eax
+; MINGW32-NEXT: retl
 
 entry:
 	ret ptr @i1
@@ -123,52 +82,31 @@ entry:
 
 define dso_local i32 @f3() nounwind {
 ; X86_LINUX-LABEL: f3:
-; X86_LINUX:       # %bb.0: # %entry
-; X86_LINUX-NEXT:    movl i2 at INDNTPOFF, %eax
-; X86_LINUX-NEXT:    movl %gs:(%eax), %eax
-; X86_LINUX-NEXT:    retl
-;
+; X86_LINUX:      movl i2 at INDNTPOFF, %eax
+; X86_LINUX-NEXT: movl %gs:(%eax), %eax
+; X86_LINUX-NEXT: ret
 ; X64_LINUX-LABEL: f3:
-; X64_LINUX:       # %bb.0: # %entry
-; X64_LINUX-NEXT:    movq i2 at GOTTPOFF(%rip), %rax
-; X64_LINUX-NEXT:    movl %fs:(%rax), %eax
-; X64_LINUX-NEXT:    retq
-;
-; X86_ISEL_LINUX-LABEL: f3:
-; X86_ISEL_LINUX:       # %bb.0: # %entry
-; X86_ISEL_LINUX-NEXT:    movl i2 at INDNTPOFF, %eax
-; X86_ISEL_LINUX-NEXT:    movl %gs:(%eax), %eax
-; X86_ISEL_LINUX-NEXT:    retl
-;
-; X64_ISEL_LINUX-LABEL: f3:
-; X64_ISEL_LINUX:       # %bb.0: # %entry
-; X64_ISEL_LINUX-NEXT:    movq i2 at GOTTPOFF(%rip), %rax
-; X64_ISEL_LINUX-NEXT:    movl %fs:(%rax), %eax
-; X64_ISEL_LINUX-NEXT:    retq
-;
+; X64_LINUX:      movq i2 at GOTTPOFF(%rip), %rax
+; X64_LINUX-NEXT: movl %fs:(%rax), %eax
+; X64_LINUX-NEXT: ret
 ; X86_WIN-LABEL: f3:
-; X86_WIN:       # %bb.0: # %entry
-; X86_WIN-NEXT:    movl __tls_index, %eax
-; X86_WIN-NEXT:    movl %fs:__tls_array, %ecx
-; X86_WIN-NEXT:    movl (%ecx,%eax,4), %eax
-; X86_WIN-NEXT:    movl _i2 at SECREL32(%eax), %eax
-; X86_WIN-NEXT:    retl
-;
+; X86_WIN:      movl __tls_index, %eax
+; X86_WIN-NEXT: movl %fs:__tls_array, %ecx
+; X86_WIN-NEXT: movl (%ecx,%eax,4), %eax
+; X86_WIN-NEXT: movl _i2 at SECREL32(%eax), %eax
+; X86_WIN-NEXT: ret
 ; X64_WIN-LABEL: f3:
-; X64_WIN:       # %bb.0: # %entry
-; X64_WIN-NEXT:    movl _tls_index(%rip), %eax
-; X64_WIN-NEXT:    movq %gs:88, %rcx
-; X64_WIN-NEXT:    movq (%rcx,%rax,8), %rax
-; X64_WIN-NEXT:    movl i2 at SECREL32(%rax), %eax
-; X64_WIN-NEXT:    retq
-;
-; MINGW32-LABEL: f3:
-; MINGW32:       # %bb.0: # %entry
-; MINGW32-NEXT:    movl __tls_index, %eax
-; MINGW32-NEXT:    movl %fs:44, %ecx
-; MINGW32-NEXT:    movl (%ecx,%eax,4), %eax
-; MINGW32-NEXT:    movl _i2 at SECREL32(%eax), %eax
-; MINGW32-NEXT:    retl
+; X64_WIN:      movl _tls_index(%rip), %eax
+; X64_WIN-NEXT: movq %gs:88, %rcx
+; X64_WIN-NEXT: movq (%rcx,%rax,8), %rax
+; X64_WIN-NEXT: movl i2 at SECREL32(%rax), %eax
+; X64_WIN-NEXT: ret
+; MINGW32-LABEL: _f3:
+; MINGW32: movl __tls_index, %eax
+; MINGW32-NEXT: movl %fs:44, %ecx
+; MINGW32-NEXT: movl (%ecx,%eax,4), %eax
+; MINGW32-NEXT: movl _i2 at SECREL32(%eax), %eax
+; MINGW32-NEXT: retl
 
 entry:
 	%tmp1 = load i32, ptr @i2
@@ -177,52 +115,31 @@ entry:
 
 define dso_local ptr @f4() {
 ; X86_LINUX-LABEL: f4:
-; X86_LINUX:       # %bb.0: # %entry
-; X86_LINUX-NEXT:    movl %gs:0, %eax
-; X86_LINUX-NEXT:    addl i2 at INDNTPOFF, %eax
-; X86_LINUX-NEXT:    retl
-;
+; X86_LINUX:      movl %gs:0, %eax
+; X86_LINUX-NEXT: addl i2 at INDNTPOFF, %eax
+; X86_LINUX-NEXT: ret
 ; X64_LINUX-LABEL: f4:
-; X64_LINUX:       # %bb.0: # %entry
-; X64_LINUX-NEXT:    movq %fs:0, %rax
-; X64_LINUX-NEXT:    addq i2 at GOTTPOFF(%rip), %rax
-; X64_LINUX-NEXT:    retq
-;
-; X86_ISEL_LINUX-LABEL: f4:
-; X86_ISEL_LINUX:       # %bb.0: # %entry
-; X86_ISEL_LINUX-NEXT:    movl %gs:0, %eax
-; X86_ISEL_LINUX-NEXT:    addl i2 at INDNTPOFF, %eax
-; X86_ISEL_LINUX-NEXT:    retl
-;
-; X64_ISEL_LINUX-LABEL: f4:
-; X64_ISEL_LINUX:       # %bb.0: # %entry
-; X64_ISEL_LINUX-NEXT:    movq %fs:0, %rax
-; X64_ISEL_LINUX-NEXT:    addq i2 at GOTTPOFF(%rip), %rax
-; X64_ISEL_LINUX-NEXT:    retq
-;
+; X64_LINUX:      movq %fs:0, %rax
+; X64_LINUX-NEXT: addq i2 at GOTTPOFF(%rip), %rax
+; X64_LINUX-NEXT: ret
 ; X86_WIN-LABEL: f4:
-; X86_WIN:       # %bb.0: # %entry
-; X86_WIN-NEXT:    movl __tls_index, %eax
-; X86_WIN-NEXT:    movl %fs:__tls_array, %ecx
-; X86_WIN-NEXT:    movl (%ecx,%eax,4), %eax
-; X86_WIN-NEXT:    leal _i2 at SECREL32(%eax), %eax
-; X86_WIN-NEXT:    retl
-;
+; X86_WIN:      movl __tls_index, %eax
+; X86_WIN-NEXT: movl %fs:__tls_array, %ecx
+; X86_WIN-NEXT: movl (%ecx,%eax,4), %eax
+; X86_WIN-NEXT: leal _i2 at SECREL32(%eax), %eax
+; X86_WIN-NEXT: ret
 ; X64_WIN-LABEL: f4:
-; X64_WIN:       # %bb.0: # %entry
-; X64_WIN-NEXT:    movl _tls_index(%rip), %eax
-; X64_WIN-NEXT:    movq %gs:88, %rcx
-; X64_WIN-NEXT:    movq (%rcx,%rax,8), %rax
-; X64_WIN-NEXT:    leaq i2 at SECREL32(%rax), %rax
-; X64_WIN-NEXT:    retq
-;
-; MINGW32-LABEL: f4:
-; MINGW32:       # %bb.0: # %entry
-; MINGW32-NEXT:    movl __tls_index, %eax
-; MINGW32-NEXT:    movl %fs:44, %ecx
-; MINGW32-NEXT:    movl (%ecx,%eax,4), %eax
-; MINGW32-NEXT:    leal _i2 at SECREL32(%eax), %eax
-; MINGW32-NEXT:    retl
+; X64_WIN:      movl _tls_index(%rip), %eax
+; X64_WIN-NEXT: movq %gs:88, %rcx
+; X64_WIN-NEXT: movq (%rcx,%rax,8), %rax
+; X64_WIN-NEXT: leaq i2 at SECREL32(%rax), %rax
+; X64_WIN-NEXT: ret
+; MINGW32-LABEL: _f4:
+; MINGW32: movl __tls_index, %eax
+; MINGW32-NEXT: movl %fs:44, %ecx
+; MINGW32-NEXT: movl (%ecx,%eax,4), %eax
+; MINGW32-NEXT: leal _i2 at SECREL32(%eax), %eax
+; MINGW32-NEXT: retl
 
 entry:
 	ret ptr @i2
@@ -230,48 +147,29 @@ entry:
 
 define dso_local i32 @f5() nounwind {
 ; X86_LINUX-LABEL: f5:
-; X86_LINUX:       # %bb.0: # %entry
-; X86_LINUX-NEXT:    movl %gs:i3 at NTPOFF, %eax
-; X86_LINUX-NEXT:    retl
-;
+; X86_LINUX:      movl %gs:i3 at NTPOFF, %eax
+; X86_LINUX-NEXT: ret
 ; X64_LINUX-LABEL: f5:
-; X64_LINUX:       # %bb.0: # %entry
-; X64_LINUX-NEXT:    movl %fs:i3 at TPOFF, %eax
-; X64_LINUX-NEXT:    retq
-;
-; X86_ISEL_LINUX-LABEL: f5:
-; X86_ISEL_LINUX:       # %bb.0: # %entry
-; X86_ISEL_LINUX-NEXT:    movl %gs:i3 at NTPOFF, %eax
-; X86_ISEL_LINUX-NEXT:    retl
-;
-; X64_ISEL_LINUX-LABEL: f5:
-; X64_ISEL_LINUX:       # %bb.0: # %entry
-; X64_ISEL_LINUX-NEXT:    movl %fs:i3 at TPOFF, %eax
-; X64_ISEL_LINUX-NEXT:    retq
-;
+; X64_LINUX:      movl %fs:i3 at TPOFF, %eax
+; X64_LINUX-NEXT: ret
 ; X86_WIN-LABEL: f5:
-; X86_WIN:       # %bb.0: # %entry
-; X86_WIN-NEXT:    movl __tls_index, %eax
-; X86_WIN-NEXT:    movl %fs:__tls_array, %ecx
-; X86_WIN-NEXT:    movl (%ecx,%eax,4), %eax
-; X86_WIN-NEXT:    movl _i3 at SECREL32(%eax), %eax
-; X86_WIN-NEXT:    retl
-;
+; X86_WIN:      movl __tls_index, %eax
+; X86_WIN-NEXT: movl %fs:__tls_array, %ecx
+; X86_WIN-NEXT: movl (%ecx,%eax,4), %eax
+; X86_WIN-NEXT: movl _i3 at SECREL32(%eax), %eax
+; X86_WIN-NEXT: ret
 ; X64_WIN-LABEL: f5:
-; X64_WIN:       # %bb.0: # %entry
-; X64_WIN-NEXT:    movl _tls_index(%rip), %eax
-; X64_WIN-NEXT:    movq %gs:88, %rcx
-; X64_WIN-NEXT:    movq (%rcx,%rax,8), %rax
-; X64_WIN-NEXT:    movl i3 at SECREL32(%rax), %eax
-; X64_WIN-NEXT:    retq
-;
-; MINGW32-LABEL: f5:
-; MINGW32:       # %bb.0: # %entry
-; MINGW32-NEXT:    movl __tls_index, %eax
-; MINGW32-NEXT:    movl %fs:44, %ecx
-; MINGW32-NEXT:    movl (%ecx,%eax,4), %eax
-; MINGW32-NEXT:    movl _i3 at SECREL32(%eax), %eax
-; MINGW32-NEXT:    retl
+; X64_WIN:      movl _tls_index(%rip), %eax
+; X64_WIN-NEXT: movq %gs:88, %rcx
+; X64_WIN-NEXT: movq (%rcx,%rax,8), %rax
+; X64_WIN-NEXT: movl i3 at SECREL32(%rax), %eax
+; X64_WIN-NEXT: ret
+; MINGW32-LABEL: _f5:
+; MINGW32: movl __tls_index, %eax
+; MINGW32-NEXT: movl %fs:44, %ecx
+; MINGW32-NEXT: movl (%ecx,%eax,4), %eax
+; MINGW32-NEXT: movl _i3 at SECREL32(%eax), %eax
+; MINGW32-NEXT: retl
 
 entry:
 	%tmp1 = load i32, ptr @i3
@@ -280,52 +178,31 @@ entry:
 
 define dso_local ptr @f6() {
 ; X86_LINUX-LABEL: f6:
-; X86_LINUX:       # %bb.0: # %entry
-; X86_LINUX-NEXT:    movl %gs:0, %eax
-; X86_LINUX-NEXT:    leal i3 at NTPOFF(%eax), %eax
-; X86_LINUX-NEXT:    retl
-;
+; X86_LINUX:      movl %gs:0, %eax
+; X86_LINUX-NEXT: leal i3 at NTPOFF(%eax), %eax
+; X86_LINUX-NEXT: ret
 ; X64_LINUX-LABEL: f6:
-; X64_LINUX:       # %bb.0: # %entry
-; X64_LINUX-NEXT:    movq %fs:0, %rax
-; X64_LINUX-NEXT:    leaq i3 at TPOFF(%rax), %rax
-; X64_LINUX-NEXT:    retq
-;
-; X86_ISEL_LINUX-LABEL: f6:
-; X86_ISEL_LINUX:       # %bb.0: # %entry
-; X86_ISEL_LINUX-NEXT:    movl %gs:0, %eax
-; X86_ISEL_LINUX-NEXT:    leal i3 at NTPOFF(%eax), %eax
-; X86_ISEL_LINUX-NEXT:    retl
-;
-; X64_ISEL_LINUX-LABEL: f6:
-; X64_ISEL_LINUX:       # %bb.0: # %entry
-; X64_ISEL_LINUX-NEXT:    movq %fs:0, %rax
-; X64_ISEL_LINUX-NEXT:    leaq i3 at TPOFF(%rax), %rax
-; X64_ISEL_LINUX-NEXT:    retq
-;
+; X64_LINUX:      movq %fs:0, %rax
+; X64_LINUX-NEXT: leaq i3 at TPOFF(%rax), %rax
+; X64_LINUX-NEXT: ret
 ; X86_WIN-LABEL: f6:
-; X86_WIN:       # %bb.0: # %entry
-; X86_WIN-NEXT:    movl __tls_index, %eax
-; X86_WIN-NEXT:    movl %fs:__tls_array, %ecx
-; X86_WIN-NEXT:    movl (%ecx,%eax,4), %eax
-; X86_WIN-NEXT:    leal _i3 at SECREL32(%eax), %eax
-; X86_WIN-NEXT:    retl
-;
+; X86_WIN:      movl __tls_index, %eax
+; X86_WIN-NEXT: movl %fs:__tls_array, %ecx
+; X86_WIN-NEXT: movl (%ecx,%eax,4), %eax
+; X86_WIN-NEXT: leal _i3 at SECREL32(%eax), %eax
+; X86_WIN-NEXT: ret
 ; X64_WIN-LABEL: f6:
-; X64_WIN:       # %bb.0: # %entry
-; X64_WIN-NEXT:    movl _tls_index(%rip), %eax
-; X64_WIN-NEXT:    movq %gs:88, %rcx
-; X64_WIN-NEXT:    movq (%rcx,%rax,8), %rax
-; X64_WIN-NEXT:    leaq i3 at SECREL32(%rax), %rax
-; X64_WIN-NEXT:    retq
-;
-; MINGW32-LABEL: f6:
-; MINGW32:       # %bb.0: # %entry
-; MINGW32-NEXT:    movl __tls_index, %eax
-; MINGW32-NEXT:    movl %fs:44, %ecx
-; MINGW32-NEXT:    movl (%ecx,%eax,4), %eax
-; MINGW32-NEXT:    leal _i3 at SECREL32(%eax), %eax
-; MINGW32-NEXT:    retl
+; X64_WIN:      movl _tls_index(%rip), %eax
+; X64_WIN-NEXT: movq %gs:88, %rcx
+; X64_WIN-NEXT: movq (%rcx,%rax,8), %rax
+; X64_WIN-NEXT: leaq i3 at SECREL32(%rax), %rax
+; X64_WIN-NEXT: ret
+; MINGW32-LABEL: _f6:
+; MINGW32: movl __tls_index, %eax
+; MINGW32-NEXT: movl %fs:44, %ecx
+; MINGW32-NEXT: movl (%ecx,%eax,4), %eax
+; MINGW32-NEXT: leal _i3 at SECREL32(%eax), %eax
+; MINGW32-NEXT: retl
 
 entry:
 	ret ptr @i3
@@ -333,48 +210,17 @@ entry:
 
 define dso_local i32 @f7() {
 ; X86_LINUX-LABEL: f7:
-; X86_LINUX:       # %bb.0: # %entry
-; X86_LINUX-NEXT:    movl %gs:i4 at NTPOFF, %eax
-; X86_LINUX-NEXT:    retl
-;
+; X86_LINUX:      movl %gs:i4 at NTPOFF, %eax
+; X86_LINUX-NEXT: ret
 ; X64_LINUX-LABEL: f7:
-; X64_LINUX:       # %bb.0: # %entry
-; X64_LINUX-NEXT:    movl %fs:i4 at TPOFF, %eax
-; X64_LINUX-NEXT:    retq
-;
-; X86_ISEL_LINUX-LABEL: f7:
-; X86_ISEL_LINUX:       # %bb.0: # %entry
-; X86_ISEL_LINUX-NEXT:    movl %gs:i4 at NTPOFF, %eax
-; X86_ISEL_LINUX-NEXT:    retl
-;
-; X64_ISEL_LINUX-LABEL: f7:
-; X64_ISEL_LINUX:       # %bb.0: # %entry
-; X64_ISEL_LINUX-NEXT:    movl %fs:i4 at TPOFF, %eax
-; X64_ISEL_LINUX-NEXT:    retq
-;
-; X86_WIN-LABEL: f7:
-; X86_WIN:       # %bb.0: # %entry
-; X86_WIN-NEXT:    movl __tls_index, %eax
-; X86_WIN-NEXT:    movl %fs:__tls_array, %ecx
-; X86_WIN-NEXT:    movl (%ecx,%eax,4), %eax
-; X86_WIN-NEXT:    movl _i4 at SECREL32(%eax), %eax
-; X86_WIN-NEXT:    retl
-;
-; X64_WIN-LABEL: f7:
-; X64_WIN:       # %bb.0: # %entry
-; X64_WIN-NEXT:    movl _tls_index(%rip), %eax
-; X64_WIN-NEXT:    movq %gs:88, %rcx
-; X64_WIN-NEXT:    movq (%rcx,%rax,8), %rax
-; X64_WIN-NEXT:    movl i4 at SECREL32(%rax), %eax
-; X64_WIN-NEXT:    retq
-;
-; MINGW32-LABEL: f7:
-; MINGW32:       # %bb.0: # %entry
-; MINGW32-NEXT:    movl __tls_index, %eax
-; MINGW32-NEXT:    movl %fs:44, %ecx
-; MINGW32-NEXT:    movl (%ecx,%eax,4), %eax
-; MINGW32-NEXT:    movl _i4 at SECREL32(%eax), %eax
-; MINGW32-NEXT:    retl
+; X64_LINUX:      movl %fs:i4 at TPOFF, %eax
+; X64_LINUX-NEXT: ret
+; MINGW32-LABEL: _f7:
+; MINGW32: movl __tls_index, %eax
+; MINGW32-NEXT: movl %fs:44, %ecx
+; MINGW32-NEXT: movl (%ecx,%eax,4), %eax
+; MINGW32-NEXT: movl _i4 at SECREL32(%eax), %eax
+; MINGW32-NEXT: retl
 
 entry:
 	%tmp1 = load i32, ptr @i4
@@ -383,52 +229,19 @@ entry:
 
 define dso_local ptr @f8() {
 ; X86_LINUX-LABEL: f8:
-; X86_LINUX:       # %bb.0: # %entry
-; X86_LINUX-NEXT:    movl %gs:0, %eax
-; X86_LINUX-NEXT:    leal i4 at NTPOFF(%eax), %eax
-; X86_LINUX-NEXT:    retl
-;
+; X86_LINUX:      movl %gs:0, %eax
+; X86_LINUX-NEXT: leal i4 at NTPOFF(%eax), %eax
+; X86_LINUX-NEXT: ret
 ; X64_LINUX-LABEL: f8:
-; X64_LINUX:       # %bb.0: # %entry
-; X64_LINUX-NEXT:    movq %fs:0, %rax
-; X64_LINUX-NEXT:    leaq i4 at TPOFF(%rax), %rax
-; X64_LINUX-NEXT:    retq
-;
-; X86_ISEL_LINUX-LABEL: f8:
-; X86_ISEL_LINUX:       # %bb.0: # %entry
-; X86_ISEL_LINUX-NEXT:    movl %gs:0, %eax
-; X86_ISEL_LINUX-NEXT:    leal i4 at NTPOFF(%eax), %eax
-; X86_ISEL_LINUX-NEXT:    retl
-;
-; X64_ISEL_LINUX-LABEL: f8:
-; X64_ISEL_LINUX:       # %bb.0: # %entry
-; X64_ISEL_LINUX-NEXT:    movq %fs:0, %rax
-; X64_ISEL_LINUX-NEXT:    leaq i4 at TPOFF(%rax), %rax
-; X64_ISEL_LINUX-NEXT:    retq
-;
-; X86_WIN-LABEL: f8:
-; X86_WIN:       # %bb.0: # %entry
-; X86_WIN-NEXT:    movl __tls_index, %eax
-; X86_WIN-NEXT:    movl %fs:__tls_array, %ecx
-; X86_WIN-NEXT:    movl (%ecx,%eax,4), %eax
-; X86_WIN-NEXT:    leal _i4 at SECREL32(%eax), %eax
-; X86_WIN-NEXT:    retl
-;
-; X64_WIN-LABEL: f8:
-; X64_WIN:       # %bb.0: # %entry
-; X64_WIN-NEXT:    movl _tls_index(%rip), %eax
-; X64_WIN-NEXT:    movq %gs:88, %rcx
-; X64_WIN-NEXT:    movq (%rcx,%rax,8), %rax
-; X64_WIN-NEXT:    leaq i4 at SECREL32(%rax), %rax
-; X64_WIN-NEXT:    retq
-;
-; MINGW32-LABEL: f8:
-; MINGW32:       # %bb.0: # %entry
-; MINGW32-NEXT:    movl __tls_index, %eax
-; MINGW32-NEXT:    movl %fs:44, %ecx
-; MINGW32-NEXT:    movl (%ecx,%eax,4), %eax
-; MINGW32-NEXT:    leal _i4 at SECREL32(%eax), %eax
-; MINGW32-NEXT:    retl
+; X64_LINUX:      movq %fs:0, %rax
+; X64_LINUX-NEXT: leaq i4 at TPOFF(%rax), %rax
+; X64_LINUX-NEXT: ret
+; MINGW32-LABEL: _f8:
+; MINGW32: movl __tls_index, %eax
+; MINGW32-NEXT: movl %fs:44, %ecx
+; MINGW32-NEXT: movl (%ecx,%eax,4), %eax
+; MINGW32-NEXT: leal _i4 at SECREL32(%eax), %eax
+; MINGW32-NEXT: retl
 
 entry:
 	ret ptr @i4
@@ -436,48 +249,17 @@ entry:
 
 define dso_local i32 @f9() {
 ; X86_LINUX-LABEL: f9:
-; X86_LINUX:       # %bb.0: # %entry
-; X86_LINUX-NEXT:    movl %gs:i5 at NTPOFF, %eax
-; X86_LINUX-NEXT:    retl
-;
+; X86_LINUX:      movl %gs:i5 at NTPOFF, %eax
+; X86_LINUX-NEXT: ret
 ; X64_LINUX-LABEL: f9:
-; X64_LINUX:       # %bb.0: # %entry
-; X64_LINUX-NEXT:    movl %fs:i5 at TPOFF, %eax
-; X64_LINUX-NEXT:    retq
-;
-; X86_ISEL_LINUX-LABEL: f9:
-; X86_ISEL_LINUX:       # %bb.0: # %entry
-; X86_ISEL_LINUX-NEXT:    movl %gs:i5 at NTPOFF, %eax
-; X86_ISEL_LINUX-NEXT:    retl
-;
-; X64_ISEL_LINUX-LABEL: f9:
-; X64_ISEL_LINUX:       # %bb.0: # %entry
-; X64_ISEL_LINUX-NEXT:    movl %fs:i5 at TPOFF, %eax
-; X64_ISEL_LINUX-NEXT:    retq
-;
-; X86_WIN-LABEL: f9:
-; X86_WIN:       # %bb.0: # %entry
-; X86_WIN-NEXT:    movl __tls_index, %eax
-; X86_WIN-NEXT:    movl %fs:__tls_array, %ecx
-; X86_WIN-NEXT:    movl (%ecx,%eax,4), %eax
-; X86_WIN-NEXT:    movl _i5 at SECREL32(%eax), %eax
-; X86_WIN-NEXT:    retl
-;
-; X64_WIN-LABEL: f9:
-; X64_WIN:       # %bb.0: # %entry
-; X64_WIN-NEXT:    movl _tls_index(%rip), %eax
-; X64_WIN-NEXT:    movq %gs:88, %rcx
-; X64_WIN-NEXT:    movq (%rcx,%rax,8), %rax
-; X64_WIN-NEXT:    movl i5 at SECREL32(%rax), %eax
-; X64_WIN-NEXT:    retq
-;
-; MINGW32-LABEL: f9:
-; MINGW32:       # %bb.0: # %entry
-; MINGW32-NEXT:    movl __tls_index, %eax
-; MINGW32-NEXT:    movl %fs:44, %ecx
-; MINGW32-NEXT:    movl (%ecx,%eax,4), %eax
-; MINGW32-NEXT:    movl _i5 at SECREL32(%eax), %eax
-; MINGW32-NEXT:    retl
+; X64_LINUX:      movl %fs:i5 at TPOFF, %eax
+; X64_LINUX-NEXT: ret
+; MINGW32-LABEL: _f9:
+; MINGW32: movl __tls_index, %eax
+; MINGW32-NEXT: movl %fs:44, %ecx
+; MINGW32-NEXT: movl (%ecx,%eax,4), %eax
+; MINGW32-NEXT: movl _i5 at SECREL32(%eax), %eax
+; MINGW32-NEXT: retl
 
 entry:
 	%tmp1 = load i32, ptr @i5
@@ -486,52 +268,19 @@ entry:
 
 define dso_local ptr @f10() {
 ; X86_LINUX-LABEL: f10:
-; X86_LINUX:       # %bb.0: # %entry
-; X86_LINUX-NEXT:    movl %gs:0, %eax
-; X86_LINUX-NEXT:    leal i5 at NTPOFF(%eax), %eax
-; X86_LINUX-NEXT:    retl
-;
+; X86_LINUX:      movl %gs:0, %eax
+; X86_LINUX-NEXT: leal i5 at NTPOFF(%eax), %eax
+; X86_LINUX-NEXT: ret
 ; X64_LINUX-LABEL: f10:
-; X64_LINUX:       # %bb.0: # %entry
-; X64_LINUX-NEXT:    movq %fs:0, %rax
-; X64_LINUX-NEXT:    leaq i5 at TPOFF(%rax), %rax
-; X64_LINUX-NEXT:    retq
-;
-; X86_ISEL_LINUX-LABEL: f10:
-; X86_ISEL_LINUX:       # %bb.0: # %entry
-; X86_ISEL_LINUX-NEXT:    movl %gs:0, %eax
-; X86_ISEL_LINUX-NEXT:    leal i5 at NTPOFF(%eax), %eax
-; X86_ISEL_LINUX-NEXT:    retl
-;
-; X64_ISEL_LINUX-LABEL: f10:
-; X64_ISEL_LINUX:       # %bb.0: # %entry
-; X64_ISEL_LINUX-NEXT:    movq %fs:0, %rax
-; X64_ISEL_LINUX-NEXT:    leaq i5 at TPOFF(%rax), %rax
-; X64_ISEL_LINUX-NEXT:    retq
-;
-; X86_WIN-LABEL: f10:
-; X86_WIN:       # %bb.0: # %entry
-; X86_WIN-NEXT:    movl __tls_index, %eax
-; X86_WIN-NEXT:    movl %fs:__tls_array, %ecx
-; X86_WIN-NEXT:    movl (%ecx,%eax,4), %eax
-; X86_WIN-NEXT:    leal _i5 at SECREL32(%eax), %eax
-; X86_WIN-NEXT:    retl
-;
-; X64_WIN-LABEL: f10:
-; X64_WIN:       # %bb.0: # %entry
-; X64_WIN-NEXT:    movl _tls_index(%rip), %eax
-; X64_WIN-NEXT:    movq %gs:88, %rcx
-; X64_WIN-NEXT:    movq (%rcx,%rax,8), %rax
-; X64_WIN-NEXT:    leaq i5 at SECREL32(%rax), %rax
-; X64_WIN-NEXT:    retq
-;
-; MINGW32-LABEL: f10:
-; MINGW32:       # %bb.0: # %entry
-; MINGW32-NEXT:    movl __tls_index, %eax
-; MINGW32-NEXT:    movl %fs:44, %ecx
-; MINGW32-NEXT:    movl (%ecx,%eax,4), %eax
-; MINGW32-NEXT:    leal _i5 at SECREL32(%eax), %eax
-; MINGW32-NEXT:    retl
+; X64_LINUX:      movq %fs:0, %rax
+; X64_LINUX-NEXT: leaq i5 at TPOFF(%rax), %rax
+; X64_LINUX-NEXT: ret
+; MINGW32-LABEL: _f10:
+; MINGW32: movl __tls_index, %eax
+; MINGW32-NEXT: movl %fs:44, %ecx
+; MINGW32-NEXT: movl (%ecx,%eax,4), %eax
+; MINGW32-NEXT: leal _i5 at SECREL32(%eax), %eax
+; MINGW32-NEXT: retl
 
 entry:
 	ret ptr @i5
@@ -539,48 +288,29 @@ entry:
 
 define i16 @f11() {
 ; X86_LINUX-LABEL: f11:
-; X86_LINUX:       # %bb.0: # %entry
-; X86_LINUX-NEXT:    movzwl %gs:s1 at NTPOFF, %eax
-; X86_LINUX-NEXT:    retl
-;
+; X86_LINUX:      movzwl %gs:s1 at NTPOFF, %eax
+; X86_LINUX:      ret
 ; X64_LINUX-LABEL: f11:
-; X64_LINUX:       # %bb.0: # %entry
-; X64_LINUX-NEXT:    movzwl %fs:s1 at TPOFF, %eax
-; X64_LINUX-NEXT:    retq
-;
-; X86_ISEL_LINUX-LABEL: f11:
-; X86_ISEL_LINUX:       # %bb.0: # %entry
-; X86_ISEL_LINUX-NEXT:    movzwl %gs:s1 at NTPOFF, %eax
-; X86_ISEL_LINUX-NEXT:    retl
-;
-; X64_ISEL_LINUX-LABEL: f11:
-; X64_ISEL_LINUX:       # %bb.0: # %entry
-; X64_ISEL_LINUX-NEXT:    movzwl %fs:s1 at TPOFF, %eax
-; X64_ISEL_LINUX-NEXT:    retq
-;
+; X64_LINUX:      movzwl %fs:s1 at TPOFF, %eax
+; X64_LINUX:      ret
 ; X86_WIN-LABEL: f11:
-; X86_WIN:       # %bb.0: # %entry
-; X86_WIN-NEXT:    movl __tls_index, %eax
-; X86_WIN-NEXT:    movl %fs:__tls_array, %ecx
-; X86_WIN-NEXT:    movl (%ecx,%eax,4), %eax
-; X86_WIN-NEXT:    movzwl _s1 at SECREL32(%eax), %eax
-; X86_WIN-NEXT:    retl
-;
+; X86_WIN:      movl __tls_index, %eax
+; X86_WIN-NEXT: movl %fs:__tls_array, %ecx
+; X86_WIN-NEXT: movl (%ecx,%eax,4), %eax
+; X86_WIN-NEXT: movzwl _s1 at SECREL32(%eax), %eax
+; X86_WIN:      ret
 ; X64_WIN-LABEL: f11:
-; X64_WIN:       # %bb.0: # %entry
-; X64_WIN-NEXT:    movl _tls_index(%rip), %eax
-; X64_WIN-NEXT:    movq %gs:88, %rcx
-; X64_WIN-NEXT:    movq (%rcx,%rax,8), %rax
-; X64_WIN-NEXT:    movzwl s1 at SECREL32(%rax), %eax
-; X64_WIN-NEXT:    retq
-;
-; MINGW32-LABEL: f11:
-; MINGW32:       # %bb.0: # %entry
-; MINGW32-NEXT:    movl __tls_index, %eax
-; MINGW32-NEXT:    movl %fs:44, %ecx
-; MINGW32-NEXT:    movl (%ecx,%eax,4), %eax
-; MINGW32-NEXT:    movzwl _s1 at SECREL32(%eax), %eax
-; MINGW32-NEXT:    retl
+; X64_WIN:      movl _tls_index(%rip), %eax
+; X64_WIN-NEXT: movq %gs:88, %rcx
+; X64_WIN-NEXT: movq (%rcx,%rax,8), %rax
+; X64_WIN-NEXT: movzwl s1 at SECREL32(%rax), %eax
+; X64_WIN:      ret
+; MINGW32-LABEL: _f11:
+; MINGW32: movl __tls_index, %eax
+; MINGW32-NEXT: movl %fs:44, %ecx
+; MINGW32-NEXT: movl (%ecx,%eax,4), %eax
+; MINGW32-NEXT: movzwl  _s1 at SECREL32(%eax), %eax
+; MINGW32: retl
 
 entry:
 	%tmp1 = load i16, ptr @s1
@@ -589,48 +319,29 @@ entry:
 
 define dso_local i32 @f12() {
 ; X86_LINUX-LABEL: f12:
-; X86_LINUX:       # %bb.0: # %entry
-; X86_LINUX-NEXT:    movswl %gs:s1 at NTPOFF, %eax
-; X86_LINUX-NEXT:    retl
-;
+; X86_LINUX:      movswl %gs:s1 at NTPOFF, %eax
+; X86_LINUX-NEXT: ret
 ; X64_LINUX-LABEL: f12:
-; X64_LINUX:       # %bb.0: # %entry
-; X64_LINUX-NEXT:    movswl %fs:s1 at TPOFF, %eax
-; X64_LINUX-NEXT:    retq
-;
-; X86_ISEL_LINUX-LABEL: f12:
-; X86_ISEL_LINUX:       # %bb.0: # %entry
-; X86_ISEL_LINUX-NEXT:    movswl %gs:s1 at NTPOFF, %eax
-; X86_ISEL_LINUX-NEXT:    retl
-;
-; X64_ISEL_LINUX-LABEL: f12:
-; X64_ISEL_LINUX:       # %bb.0: # %entry
-; X64_ISEL_LINUX-NEXT:    movswl %fs:s1 at TPOFF, %eax
-; X64_ISEL_LINUX-NEXT:    retq
-;
+; X64_LINUX:      movswl %fs:s1 at TPOFF, %eax
+; X64_LINUX-NEXT: ret
 ; X86_WIN-LABEL: f12:
-; X86_WIN:       # %bb.0: # %entry
-; X86_WIN-NEXT:    movl __tls_index, %eax
-; X86_WIN-NEXT:    movl %fs:__tls_array, %ecx
-; X86_WIN-NEXT:    movl (%ecx,%eax,4), %eax
-; X86_WIN-NEXT:    movswl _s1 at SECREL32(%eax), %eax
-; X86_WIN-NEXT:    retl
-;
+; X86_WIN:      movl __tls_index, %eax
+; X86_WIN-NEXT: movl %fs:__tls_array, %ecx
+; X86_WIN-NEXT: movl (%ecx,%eax,4), %eax
+; X86_WIN-NEXT: movswl _s1 at SECREL32(%eax), %eax
+; X86_WIN-NEXT: ret
 ; X64_WIN-LABEL: f12:
-; X64_WIN:       # %bb.0: # %entry
-; X64_WIN-NEXT:    movl _tls_index(%rip), %eax
-; X64_WIN-NEXT:    movq %gs:88, %rcx
-; X64_WIN-NEXT:    movq (%rcx,%rax,8), %rax
-; X64_WIN-NEXT:    movswl s1 at SECREL32(%rax), %eax
-; X64_WIN-NEXT:    retq
-;
-; MINGW32-LABEL: f12:
-; MINGW32:       # %bb.0: # %entry
-; MINGW32-NEXT:    movl __tls_index, %eax
-; MINGW32-NEXT:    movl %fs:44, %ecx
-; MINGW32-NEXT:    movl (%ecx,%eax,4), %eax
-; MINGW32-NEXT:    movswl _s1 at SECREL32(%eax), %eax
-; MINGW32-NEXT:    retl
+; X64_WIN:      movl _tls_index(%rip), %eax
+; X64_WIN-NEXT: movq %gs:88, %rcx
+; X64_WIN-NEXT: movq (%rcx,%rax,8), %rax
+; X64_WIN-NEXT: movswl s1 at SECREL32(%rax), %eax
+; X64_WIN-NEXT: ret
+; MINGW32-LABEL: _f12:
+; MINGW32: movl __tls_index, %eax
+; MINGW32-NEXT: movl %fs:44, %ecx
+; MINGW32-NEXT: movl (%ecx,%eax,4), %eax
+; MINGW32-NEXT: movswl _s1 at SECREL32(%eax), %eax
+; MINGW32-NEXT: retl
 
 
 entry:
@@ -641,48 +352,29 @@ entry:
 
 define dso_local i8 @f13() {
 ; X86_LINUX-LABEL: f13:
-; X86_LINUX:       # %bb.0: # %entry
-; X86_LINUX-NEXT:    movzbl %gs:b1 at NTPOFF, %eax
-; X86_LINUX-NEXT:    retl
-;
+; X86_LINUX:      movb %gs:b1 at NTPOFF, %al
+; X86_LINUX-NEXT: ret
 ; X64_LINUX-LABEL: f13:
-; X64_LINUX:       # %bb.0: # %entry
-; X64_LINUX-NEXT:    movzbl %fs:b1 at TPOFF, %eax
-; X64_LINUX-NEXT:    retq
-;
-; X86_ISEL_LINUX-LABEL: f13:
-; X86_ISEL_LINUX:       # %bb.0: # %entry
-; X86_ISEL_LINUX-NEXT:    movzbl %gs:b1 at NTPOFF, %eax
-; X86_ISEL_LINUX-NEXT:    retl
-;
-; X64_ISEL_LINUX-LABEL: f13:
-; X64_ISEL_LINUX:       # %bb.0: # %entry
-; X64_ISEL_LINUX-NEXT:    movzbl %fs:b1 at TPOFF, %eax
-; X64_ISEL_LINUX-NEXT:    retq
-;
+; X64_LINUX:      movb %fs:b1 at TPOFF, %al
+; X64_LINUX-NEXT: ret
 ; X86_WIN-LABEL: f13:
-; X86_WIN:       # %bb.0: # %entry
-; X86_WIN-NEXT:    movl __tls_index, %eax
-; X86_WIN-NEXT:    movl %fs:__tls_array, %ecx
-; X86_WIN-NEXT:    movl (%ecx,%eax,4), %eax
-; X86_WIN-NEXT:    movzbl _b1 at SECREL32(%eax), %eax
-; X86_WIN-NEXT:    retl
-;
+; X86_WIN:      movl __tls_index, %eax
+; X86_WIN-NEXT: movl %fs:__tls_array, %ecx
+; X86_WIN-NEXT: movl (%ecx,%eax,4), %eax
+; X86_WIN-NEXT: movb _b1 at SECREL32(%eax), %al
+; X86_WIN-NEXT: ret
 ; X64_WIN-LABEL: f13:
-; X64_WIN:       # %bb.0: # %entry
-; X64_WIN-NEXT:    movl _tls_index(%rip), %eax
-; X64_WIN-NEXT:    movq %gs:88, %rcx
-; X64_WIN-NEXT:    movq (%rcx,%rax,8), %rax
-; X64_WIN-NEXT:    movzbl b1 at SECREL32(%rax), %eax
-; X64_WIN-NEXT:    retq
-;
-; MINGW32-LABEL: f13:
-; MINGW32:       # %bb.0: # %entry
-; MINGW32-NEXT:    movl __tls_index, %eax
-; MINGW32-NEXT:    movl %fs:44, %ecx
-; MINGW32-NEXT:    movl (%ecx,%eax,4), %eax
-; MINGW32-NEXT:    movzbl _b1 at SECREL32(%eax), %eax
-; MINGW32-NEXT:    retl
+; X64_WIN:      movl _tls_index(%rip), %eax
+; X64_WIN-NEXT: movq %gs:88, %rcx
+; X64_WIN-NEXT: movq (%rcx,%rax,8), %rax
+; X64_WIN-NEXT: movb b1 at SECREL32(%rax), %al
+; X64_WIN-NEXT: ret
+; MINGW32-LABEL: _f13:
+; MINGW32: movl __tls_index, %eax
+; MINGW32-NEXT: movl %fs:44, %ecx
+; MINGW32-NEXT: movl (%ecx,%eax,4), %eax
+; MINGW32-NEXT: movb _b1 at SECREL32(%eax), %al
+; MINGW32-NEXT: retl
 
 entry:
 	%tmp1 = load i8, ptr @b1
@@ -691,48 +383,29 @@ entry:
 
 define dso_local i32 @f14() {
 ; X86_LINUX-LABEL: f14:
-; X86_LINUX:       # %bb.0: # %entry
-; X86_LINUX-NEXT:    movsbl %gs:b1 at NTPOFF, %eax
-; X86_LINUX-NEXT:    retl
-;
+; X86_LINUX:      movsbl %gs:b1 at NTPOFF, %eax
+; X86_LINUX-NEXT: ret
 ; X64_LINUX-LABEL: f14:
-; X64_LINUX:       # %bb.0: # %entry
-; X64_LINUX-NEXT:    movsbl %fs:b1 at TPOFF, %eax
-; X64_LINUX-NEXT:    retq
-;
-; X86_ISEL_LINUX-LABEL: f14:
-; X86_ISEL_LINUX:       # %bb.0: # %entry
-; X86_ISEL_LINUX-NEXT:    movsbl %gs:b1 at NTPOFF, %eax
-; X86_ISEL_LINUX-NEXT:    retl
-;
-; X64_ISEL_LINUX-LABEL: f14:
-; X64_ISEL_LINUX:       # %bb.0: # %entry
-; X64_ISEL_LINUX-NEXT:    movsbl %fs:b1 at TPOFF, %eax
-; X64_ISEL_LINUX-NEXT:    retq
-;
+; X64_LINUX:      movsbl %fs:b1 at TPOFF, %eax
+; X64_LINUX-NEXT: ret
 ; X86_WIN-LABEL: f14:
-; X86_WIN:       # %bb.0: # %entry
-; X86_WIN-NEXT:    movl __tls_index, %eax
-; X86_WIN-NEXT:    movl %fs:__tls_array, %ecx
-; X86_WIN-NEXT:    movl (%ecx,%eax,4), %eax
-; X86_WIN-NEXT:    movsbl _b1 at SECREL32(%eax), %eax
-; X86_WIN-NEXT:    retl
-;
+; X86_WIN:      movl __tls_index, %eax
+; X86_WIN-NEXT: movl %fs:__tls_array, %ecx
+; X86_WIN-NEXT: movl (%ecx,%eax,4), %eax
+; X86_WIN-NEXT: movsbl _b1 at SECREL32(%eax), %eax
+; X86_WIN-NEXT: ret
 ; X64_WIN-LABEL: f14:
-; X64_WIN:       # %bb.0: # %entry
-; X64_WIN-NEXT:    movl _tls_index(%rip), %eax
-; X64_WIN-NEXT:    movq %gs:88, %rcx
-; X64_WIN-NEXT:    movq (%rcx,%rax,8), %rax
-; X64_WIN-NEXT:    movsbl b1 at SECREL32(%rax), %eax
-; X64_WIN-NEXT:    retq
-;
-; MINGW32-LABEL: f14:
-; MINGW32:       # %bb.0: # %entry
-; MINGW32-NEXT:    movl __tls_index, %eax
-; MINGW32-NEXT:    movl %fs:44, %ecx
-; MINGW32-NEXT:    movl (%ecx,%eax,4), %eax
-; MINGW32-NEXT:    movsbl _b1 at SECREL32(%eax), %eax
-; MINGW32-NEXT:    retl
+; X64_WIN:      movl _tls_index(%rip), %eax
+; X64_WIN-NEXT: movq %gs:88, %rcx
+; X64_WIN-NEXT: movq (%rcx,%rax,8), %rax
+; X64_WIN-NEXT: movsbl b1 at SECREL32(%rax), %eax
+; X64_WIN-NEXT: ret
+; MINGW32-LABEL: _f14:
+; MINGW32: movl __tls_index, %eax
+; MINGW32-NEXT: movl %fs:44, %ecx
+; MINGW32-NEXT: movl (%ecx,%eax,4), %eax
+; MINGW32-NEXT: movsbl  _b1 at SECREL32(%eax), %eax
+; MINGW32-NEXT: retl
 
 entry:
 	%tmp1 = load i8, ptr @b1
@@ -742,49 +415,28 @@ entry:
 
 define dso_local ptr @f15() {
 ; X86_LINUX-LABEL: f15:
-; X86_LINUX:       # %bb.0: # %entry
-; X86_LINUX-NEXT:    movl %gs:0, %eax
-; X86_LINUX-NEXT:    leal b2 at NTPOFF(%eax), %eax
-; X86_LINUX-NEXT:    retl
-;
+; X86_LINUX:      movl %gs:0, %eax
+; X86_LINUX-NEXT: leal b2 at NTPOFF(%eax), %eax
+; X86_LINUX-NEXT: ret
 ; X64_LINUX-LABEL: f15:
-; X64_LINUX:       # %bb.0: # %entry
-; X64_LINUX-NEXT:    movq %fs:0, %rax
-; X64_LINUX-NEXT:    leaq b2 at TPOFF(%rax), %rax
-; X64_LINUX-NEXT:    retq
-;
-; X86_ISEL_LINUX-LABEL: f15:
-; X86_ISEL_LINUX:       # %bb.0: # %entry
-; X86_ISEL_LINUX-NEXT:    movl %gs:0, %eax
-; X86_ISEL_LINUX-NEXT:    leal b2 at NTPOFF(%eax), %eax
-; X86_ISEL_LINUX-NEXT:    retl
-;
-; X64_ISEL_LINUX-LABEL: f15:
-; X64_ISEL_LINUX:       # %bb.0: # %entry
-; X64_ISEL_LINUX-NEXT:    movq %fs:0, %rax
-; X64_ISEL_LINUX-NEXT:    leaq b2 at TPOFF(%rax), %rax
-; X64_ISEL_LINUX-NEXT:    retq
-;
+; X64_LINUX:      movq %fs:0, %rax
+; X64_LINUX-NEXT: leaq b2 at TPOFF(%rax), %rax
+; X64_LINUX-NEXT: ret
 ; X86_WIN-LABEL: f15:
-; X86_WIN:       # %bb.0: # %entry
-; X86_WIN-NEXT:    movl %fs:__tls_array, %eax
-; X86_WIN-NEXT:    movl (%eax), %eax
-; X86_WIN-NEXT:    leal _b2 at SECREL32(%eax), %eax
-; X86_WIN-NEXT:    retl
-;
+; X86_WIN:      movl %fs:__tls_array, %eax
+; X86_WIN-NEXT: movl (%eax), %eax
+; X86_WIN-NEXT: leal _b2 at SECREL32(%eax), %eax
+; X86_WIN-NEXT: ret
 ; X64_WIN-LABEL: f15:
-; X64_WIN:       # %bb.0: # %entry
-; X64_WIN-NEXT:    movq %gs:88, %rax
-; X64_WIN-NEXT:    movq (%rax), %rax
-; X64_WIN-NEXT:    leaq b2 at SECREL32(%rax), %rax
-; X64_WIN-NEXT:    retq
-;
+; X64_WIN:      movq %gs:88, %rax
+; X64_WIN-NEXT: movq (%rax), %rax
+; X64_WIN-NEXT: leaq b2 at SECREL32(%rax), %rax
+; X64_WIN-NEXT: ret
 ; MINGW32-LABEL: f15:
-; MINGW32:       # %bb.0: # %entry
-; MINGW32-NEXT:    movl %fs:44, %eax
-; MINGW32-NEXT:    movl (%eax), %eax
-; MINGW32-NEXT:    leal _b2 at SECREL32(%eax), %eax
-; MINGW32-NEXT:    retl
+; MINGW32:      movl %fs:44, %eax
+; MINGW32-NEXT: movl (%eax), %eax
+; MINGW32-NEXT: leal _b2 at SECREL32(%eax), %eax
+; MINGW32-NEXT: ret
 entry:
 	ret ptr @b2
 }
@@ -792,53 +444,14 @@ entry:
 
 define dso_local ptr @f16() {
 ; X86_LINUX-LABEL: f16:
-; X86_LINUX:       # %bb.0:
-; X86_LINUX-NEXT:    movl %gs:0, %eax
-; X86_LINUX-NEXT:    leal i6 at NTPOFF(%eax), %eax
-; X86_LINUX-NEXT:    retl
-;
-; X64_LINUX-LABEL: f16:
-; X64_LINUX:       # %bb.0:
-; X64_LINUX-NEXT:    movq %fs:0, %rax
-; X64_LINUX-NEXT:    leaq i6 at TPOFF(%rax), %rax
-; X64_LINUX-NEXT:    retq
-;
-; X86_ISEL_LINUX-LABEL: f16:
-; X86_ISEL_LINUX:       # %bb.0:
-; X86_ISEL_LINUX-NEXT:    movl %gs:0, %eax
-; X86_ISEL_LINUX-NEXT:    leal i6 at NTPOFF(%eax), %eax
-; X86_ISEL_LINUX-NEXT:    retl
-;
-; X64_ISEL_LINUX-LABEL: f16:
-; X64_ISEL_LINUX:       # %bb.0:
-; X64_ISEL_LINUX-NEXT:    movq %fs:0, %rax
-; X64_ISEL_LINUX-NEXT:    leaq i6 at TPOFF(%rax), %rax
-; X64_ISEL_LINUX-NEXT:    retq
-;
-; X86_WIN-LABEL: f16:
-; X86_WIN:       # %bb.0:
-; X86_WIN-NEXT:    movl __tls_index, %eax
-; X86_WIN-NEXT:    movl %fs:__tls_array, %ecx
-; X86_WIN-NEXT:    movl (%ecx,%eax,4), %eax
-; X86_WIN-NEXT:    leal _i6 at SECREL32(%eax), %eax
-; X86_WIN-NEXT:    retl
-;
-; X64_WIN-LABEL: f16:
-; X64_WIN:       # %bb.0:
-; X64_WIN-NEXT:    movl _tls_index(%rip), %eax
-; X64_WIN-NEXT:    movq %gs:88, %rcx
-; X64_WIN-NEXT:    movq (%rcx,%rax,8), %rax
-; X64_WIN-NEXT:    leaq i6 at SECREL32(%rax), %rax
-; X64_WIN-NEXT:    retq
-;
-; MINGW32-LABEL: f16:
-; MINGW32:       # %bb.0:
-; MINGW32-NEXT:    movl __tls_index, %eax
-; MINGW32-NEXT:    movl %fs:44, %ecx
-; MINGW32-NEXT:    movl (%ecx,%eax,4), %eax
-; MINGW32-NEXT:    leal _i6 at SECREL32(%eax), %eax
-; MINGW32-NEXT:    retl
+; X86_LINUX:       movl %gs:0, %eax
+; X86_LINUX-NEXT:  leal i6 at NTPOFF(%eax), %eax
+; X86_LINUX-NEXT:  ret
 
+; X64_LINUX-LABEL: f16:
+; X64_LINUX:       movq %fs:0, %rax
+; X64_LINUX-NEXT:  leaq i6 at TPOFF(%rax), %rax
+; X64_LINUX-NEXT:  ret
 
   ret ptr @i6
 }
@@ -846,52 +459,21 @@ define dso_local ptr @f16() {
 ; NOTE: Similar to f1() but with direct TLS segment access disabled
 define dso_local i32 @f17() #0 {
 ; X86_LINUX-LABEL: f17:
-; X86_LINUX:       # %bb.0: # %entry
-; X86_LINUX-NEXT:    movl %gs:0, %eax
-; X86_LINUX-NEXT:    movl i1 at NTPOFF(%eax), %eax
-; X86_LINUX-NEXT:    retl
-;
+; X86_LINUX:      movl %gs:0, %eax
+; X86_LINUX-NEXT: movl i1 at NTPOFF(%eax), %eax
+; X86_LINUX-NEXT: ret
 ; X64_LINUX-LABEL: f17:
-; X64_LINUX:       # %bb.0: # %entry
-; X64_LINUX-NEXT:    movq %fs:0, %rax
-; X64_LINUX-NEXT:    movl i1 at TPOFF(%rax), %eax
-; X64_LINUX-NEXT:    retq
-;
+; X64_LINUX:      movq %fs:0, %rax
+; X64_LINUX-NEXT: movl i1 at TPOFF(%rax), %eax
+; X64_LINUX-NEXT: ret
 ; X86_ISEL_LINUX-LABEL: f17:
-; X86_ISEL_LINUX:       # %bb.0: # %entry
-; X86_ISEL_LINUX-NEXT:    movl %gs:0, %eax
-; X86_ISEL_LINUX-NEXT:    movl i1 at NTPOFF(%eax), %eax
-; X86_ISEL_LINUX-NEXT:    retl
-;
+; X86_ISEL_LINUX:      movl %gs:0, %eax
+; X86_ISEL_LINUX-NEXT: movl i1 at NTPOFF(%eax), %eax
+; X86_ISEL_LINUX-NEXT: ret
 ; X64_ISEL_LINUX-LABEL: f17:
-; X64_ISEL_LINUX:       # %bb.0: # %entry
-; X64_ISEL_LINUX-NEXT:    movq %fs:0, %rax
-; X64_ISEL_LINUX-NEXT:    movl i1 at TPOFF(%rax), %eax
-; X64_ISEL_LINUX-NEXT:    retq
-;
-; X86_WIN-LABEL: f17:
-; X86_WIN:       # %bb.0: # %entry
-; X86_WIN-NEXT:    movl __tls_index, %eax
-; X86_WIN-NEXT:    movl %fs:__tls_array, %ecx
-; X86_WIN-NEXT:    movl (%ecx,%eax,4), %eax
-; X86_WIN-NEXT:    movl _i1 at SECREL32(%eax), %eax
-; X86_WIN-NEXT:    retl
-;
-; X64_WIN-LABEL: f17:
-; X64_WIN:       # %bb.0: # %entry
-; X64_WIN-NEXT:    movl _tls_index(%rip), %eax
-; X64_WIN-NEXT:    movq %gs:88, %rcx
-; X64_WIN-NEXT:    movq (%rcx,%rax,8), %rax
-; X64_WIN-NEXT:    movl i1 at SECREL32(%rax), %eax
-; X64_WIN-NEXT:    retq
-;
-; MINGW32-LABEL: f17:
-; MINGW32:       # %bb.0: # %entry
-; MINGW32-NEXT:    movl __tls_index, %eax
-; MINGW32-NEXT:    movl %fs:44, %ecx
-; MINGW32-NEXT:    movl (%ecx,%eax,4), %eax
-; MINGW32-NEXT:    movl _i1 at SECREL32(%eax), %eax
-; MINGW32-NEXT:    retl
+; X64_ISEL_LINUX:      movq %fs:0, %rax
+; X64_ISEL_LINUX-NEXT: movl i1 at TPOFF(%rax), %eax
+; X64_ISEL_LINUX-NEXT: ret
 
 entry:
 	%tmp1 = load i32, ptr @i1
@@ -901,56 +483,25 @@ entry:
 ; NOTE: Similar to f3() but with direct TLS segment access disabled
 define dso_local i32 @f18() #1 {
 ; X86_LINUX-LABEL: f18:
-; X86_LINUX:       # %bb.0: # %entry
-; X86_LINUX-NEXT:    movl i2 at INDNTPOFF, %eax
-; X86_LINUX-NEXT:    movl %gs:0, %ecx
-; X86_LINUX-NEXT:    movl (%ecx,%eax), %eax
-; X86_LINUX-NEXT:    retl
-;
+; X86_LINUX:      movl i2 at INDNTPOFF, %eax
+; X86_LINUX-NEXT: movl %gs:0, %ecx
+; X86_LINUX-NEXT: movl (%ecx,%eax), %eax
+; X86_LINUX-NEXT: ret
 ; X64_LINUX-LABEL: f18:
-; X64_LINUX:       # %bb.0: # %entry
-; X64_LINUX-NEXT:    movq i2 at GOTTPOFF(%rip), %rax
-; X64_LINUX-NEXT:    movq %fs:0, %rcx
-; X64_LINUX-NEXT:    movl (%rcx,%rax), %eax
-; X64_LINUX-NEXT:    retq
-;
+; X64_LINUX:      movq i2 at GOTTPOFF(%rip), %rax
+; X64_LINUX-NEXT: movq %fs:0, %rcx
+; X64_LINUX-NEXT: movl (%rcx,%rax), %eax
+; X64_LINUX-NEXT: ret
 ; X86_ISEL_LINUX-LABEL: f18:
-; X86_ISEL_LINUX:       # %bb.0: # %entry
-; X86_ISEL_LINUX-NEXT:    movl i2 at INDNTPOFF, %eax
-; X86_ISEL_LINUX-NEXT:    movl %gs:0, %ecx
-; X86_ISEL_LINUX-NEXT:    movl (%ecx,%eax), %eax
-; X86_ISEL_LINUX-NEXT:    retl
-;
+; X86_ISEL_LINUX:      movl i2 at INDNTPOFF, %eax
+; X86_ISEL_LINUX-NEXT: movl %gs:0, %ecx
+; X86_ISEL_LINUX-NEXT: movl (%ecx,%eax), %eax
+; X86_ISEL_LINUX-NEXT: ret
 ; X64_ISEL_LINUX-LABEL: f18:
-; X64_ISEL_LINUX:       # %bb.0: # %entry
-; X64_ISEL_LINUX-NEXT:    movq i2 at GOTTPOFF(%rip), %rax
-; X64_ISEL_LINUX-NEXT:    movq %fs:0, %rcx
-; X64_ISEL_LINUX-NEXT:    movl (%rcx,%rax), %eax
-; X64_ISEL_LINUX-NEXT:    retq
-;
-; X86_WIN-LABEL: f18:
-; X86_WIN:       # %bb.0: # %entry
-; X86_WIN-NEXT:    movl __tls_index, %eax
-; X86_WIN-NEXT:    movl %fs:__tls_array, %ecx
-; X86_WIN-NEXT:    movl (%ecx,%eax,4), %eax
-; X86_WIN-NEXT:    movl _i2 at SECREL32(%eax), %eax
-; X86_WIN-NEXT:    retl
-;
-; X64_WIN-LABEL: f18:
-; X64_WIN:       # %bb.0: # %entry
-; X64_WIN-NEXT:    movl _tls_index(%rip), %eax
-; X64_WIN-NEXT:    movq %gs:88, %rcx
-; X64_WIN-NEXT:    movq (%rcx,%rax,8), %rax
-; X64_WIN-NEXT:    movl i2 at SECREL32(%rax), %eax
-; X64_WIN-NEXT:    retq
-;
-; MINGW32-LABEL: f18:
-; MINGW32:       # %bb.0: # %entry
-; MINGW32-NEXT:    movl __tls_index, %eax
-; MINGW32-NEXT:    movl %fs:44, %ecx
-; MINGW32-NEXT:    movl (%ecx,%eax,4), %eax
-; MINGW32-NEXT:    movl _i2 at SECREL32(%eax), %eax
-; MINGW32-NEXT:    retl
+; X64_ISEL_LINUX:      movq i2 at GOTTPOFF(%rip), %rax
+; X64_ISEL_LINUX-NEXT: movq %fs:0, %rcx
+; X64_ISEL_LINUX-NEXT: movl (%rcx,%rax), %eax
+; X64_ISEL_LINUX-NEXT: ret
 
 
 entry:

diff  --git a/llvm/test/CodeGen/X86/trunc-to-bool.ll b/llvm/test/CodeGen/X86/trunc-to-bool.ll
index 5a5d057597465..b0d656db34eec 100644
--- a/llvm/test/CodeGen/X86/trunc-to-bool.ll
+++ b/llvm/test/CodeGen/X86/trunc-to-bool.ll
@@ -7,7 +7,7 @@
 define zeroext i1 @test1(i32 %X)  nounwind {
 ; CHECK-LABEL: test1:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; CHECK-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; CHECK-NEXT:    andb $1, %al
 ; CHECK-NEXT:    retl
     %Y = trunc i32 %X to i1

diff  --git a/llvm/test/CodeGen/X86/uadd_sat.ll b/llvm/test/CodeGen/X86/uadd_sat.ll
index 0a3c2ae344fd3..cbecdefbec260 100644
--- a/llvm/test/CodeGen/X86/uadd_sat.ll
+++ b/llvm/test/CodeGen/X86/uadd_sat.ll
@@ -74,7 +74,7 @@ define zeroext i16 @func16(i16 zeroext %x, i16 zeroext %y) nounwind {
 define zeroext i8 @func8(i8 zeroext %x, i8 zeroext %y) nounwind {
 ; X86-LABEL: func8:
 ; X86:       # %bb.0:
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    addb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    movzbl %al, %ecx
 ; X86-NEXT:    movl $255, %eax
@@ -97,7 +97,7 @@ define zeroext i8 @func8(i8 zeroext %x, i8 zeroext %y) nounwind {
 define zeroext i4 @func3(i4 zeroext %x, i4 zeroext %y) nounwind {
 ; X86-LABEL: func3:
 ; X86:       # %bb.0:
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    addb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    movzbl %al, %ecx
 ; X86-NEXT:    cmpb $15, %al

diff  --git a/llvm/test/CodeGen/X86/uadd_sat_plus.ll b/llvm/test/CodeGen/X86/uadd_sat_plus.ll
index 654e3d77f52aa..82766af1daad0 100644
--- a/llvm/test/CodeGen/X86/uadd_sat_plus.ll
+++ b/llvm/test/CodeGen/X86/uadd_sat_plus.ll
@@ -80,7 +80,7 @@ define zeroext i16 @func16(i16 zeroext %x, i16 zeroext %y, i16 zeroext %z) nounw
 define zeroext i8 @func8(i8 zeroext %x, i8 zeroext %y, i8 zeroext %z) nounwind {
 ; X86-LABEL: func8:
 ; X86:       # %bb.0:
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    mulb {{[0-9]+}}(%esp)
 ; X86-NEXT:    addb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    movzbl %al, %ecx
@@ -108,7 +108,7 @@ define zeroext i8 @func8(i8 zeroext %x, i8 zeroext %y, i8 zeroext %z) nounwind {
 define zeroext i4 @func4(i4 zeroext %x, i4 zeroext %y, i4 zeroext %z) nounwind {
 ; X86-LABEL: func4:
 ; X86:       # %bb.0:
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    mulb {{[0-9]+}}(%esp)
 ; X86-NEXT:    andb $15, %al
 ; X86-NEXT:    addb {{[0-9]+}}(%esp), %al

diff  --git a/llvm/test/CodeGen/X86/uadd_sat_vec.ll b/llvm/test/CodeGen/X86/uadd_sat_vec.ll
index 1286f2da6405a..836f52439e4c0 100644
--- a/llvm/test/CodeGen/X86/uadd_sat_vec.ll
+++ b/llvm/test/CodeGen/X86/uadd_sat_vec.ll
@@ -429,7 +429,7 @@ define void @v12i16(ptr %px, ptr %py, ptr %pz) nounwind {
 define void @v1i8(ptr %px, ptr %py, ptr %pz) nounwind {
 ; SSE-LABEL: v1i8:
 ; SSE:       # %bb.0:
-; SSE-NEXT:    movzbl (%rdi), %eax
+; SSE-NEXT:    movb (%rdi), %al
 ; SSE-NEXT:    addb (%rsi), %al
 ; SSE-NEXT:    movzbl %al, %eax
 ; SSE-NEXT:    movl $255, %ecx
@@ -439,7 +439,7 @@ define void @v1i8(ptr %px, ptr %py, ptr %pz) nounwind {
 ;
 ; AVX-LABEL: v1i8:
 ; AVX:       # %bb.0:
-; AVX-NEXT:    movzbl (%rdi), %eax
+; AVX-NEXT:    movb (%rdi), %al
 ; AVX-NEXT:    addb (%rsi), %al
 ; AVX-NEXT:    movzbl %al, %eax
 ; AVX-NEXT:    movl $255, %ecx

diff  --git a/llvm/test/CodeGen/X86/udiv_fix.ll b/llvm/test/CodeGen/X86/udiv_fix.ll
index 8d3319eb59588..eba1fd1565e1b 100644
--- a/llvm/test/CodeGen/X86/udiv_fix.ll
+++ b/llvm/test/CodeGen/X86/udiv_fix.ll
@@ -122,9 +122,9 @@ define i4 @func4(i4 %x, i4 %y) nounwind {
 ;
 ; X86-LABEL: func4:
 ; X86:       # %bb.0:
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; X86-NEXT:    andb $15, %cl
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    andb $15, %al
 ; X86-NEXT:    shlb $2, %al
 ; X86-NEXT:    movzbl %al, %eax

diff  --git a/llvm/test/CodeGen/X86/udiv_fix_sat.ll b/llvm/test/CodeGen/X86/udiv_fix_sat.ll
index ce3f635ab1346..33d0bd7c4f90f 100644
--- a/llvm/test/CodeGen/X86/udiv_fix_sat.ll
+++ b/llvm/test/CodeGen/X86/udiv_fix_sat.ll
@@ -150,9 +150,9 @@ define i4 @func4(i4 %x, i4 %y) nounwind {
 ;
 ; X86-LABEL: func4:
 ; X86:       # %bb.0:
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; X86-NEXT:    andb $15, %cl
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    andb $15, %al
 ; X86-NEXT:    shlb $2, %al
 ; X86-NEXT:    movzbl %al, %eax

diff  --git a/llvm/test/CodeGen/X86/umul_fix.ll b/llvm/test/CodeGen/X86/umul_fix.ll
index cb4bdd1ede75c..fce98cc448645 100644
--- a/llvm/test/CodeGen/X86/umul_fix.ll
+++ b/llvm/test/CodeGen/X86/umul_fix.ll
@@ -85,9 +85,9 @@ define i4 @func3(i4 %x, i4 %y) nounwind {
 ;
 ; X86-LABEL: func3:
 ; X86:       # %bb.0:
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    andb $15, %al
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; X86-NEXT:    andb $15, %cl
 ; X86-NEXT:    mulb %cl
 ; X86-NEXT:    shrb $2, %al
@@ -206,9 +206,9 @@ define i4 @func6(i4 %x, i4 %y) nounwind {
 ;
 ; X86-LABEL: func6:
 ; X86:       # %bb.0:
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    andb $15, %al
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; X86-NEXT:    andb $15, %cl
 ; X86-NEXT:    mulb %cl
 ; X86-NEXT:    retl

diff  --git a/llvm/test/CodeGen/X86/umul_fix_sat.ll b/llvm/test/CodeGen/X86/umul_fix_sat.ll
index 36e3749654f90..247b5ee17e7a5 100644
--- a/llvm/test/CodeGen/X86/umul_fix_sat.ll
+++ b/llvm/test/CodeGen/X86/umul_fix_sat.ll
@@ -113,9 +113,9 @@ define i4 @func3(i4 %x, i4 %y) nounwind {
 ;
 ; X86-LABEL: func3:
 ; X86:       # %bb.0:
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    andb $15, %al
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; X86-NEXT:    movzbl %al, %edx
 ; X86-NEXT:    shlb $4, %cl
 ; X86-NEXT:    movzbl %cl, %eax
@@ -335,9 +335,9 @@ define i4 @func6(i4 %x, i4 %y) nounwind {
 ;
 ; X86-LABEL: func6:
 ; X86:       # %bb.0:
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; X86-NEXT:    andb $15, %cl
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    shlb $4, %al
 ; X86-NEXT:    mulb %cl
 ; X86-NEXT:    movzbl %al, %ecx

diff  --git a/llvm/test/CodeGen/X86/umulo-128-legalisation-lowering.ll b/llvm/test/CodeGen/X86/umulo-128-legalisation-lowering.ll
index d0deed539bc52..3d7544f7f6814 100644
--- a/llvm/test/CodeGen/X86/umulo-128-legalisation-lowering.ll
+++ b/llvm/test/CodeGen/X86/umulo-128-legalisation-lowering.ll
@@ -126,7 +126,7 @@ define { i128, i8 } @muloti_test(i128 %l, i128 %r) unnamed_addr #0 {
 ; X86-NEXT:    testl %edi, %edi
 ; X86-NEXT:    setne %bh
 ; X86-NEXT:    andb %cl, %bh
-; X86-NEXT:    movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload
+; X86-NEXT:    movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload
 ; X86-NEXT:    orb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Folded Reload
 ; X86-NEXT:    orb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Folded Reload
 ; X86-NEXT:    movb %cl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill

diff  --git a/llvm/test/CodeGen/X86/unfold-masked-merge-vector-variablemask.ll b/llvm/test/CodeGen/X86/unfold-masked-merge-vector-variablemask.ll
index a6ca83a3a2af5..2845fc1e53080 100644
--- a/llvm/test/CodeGen/X86/unfold-masked-merge-vector-variablemask.ll
+++ b/llvm/test/CodeGen/X86/unfold-masked-merge-vector-variablemask.ll
@@ -108,9 +108,9 @@ define <4 x i8> @out_v4i8(<4 x i8> %x, <4 x i8> %y, <4 x i8> %mask) nounwind {
 ; CHECK-BASELINE-LABEL: out_v4i8:
 ; CHECK-BASELINE:       # %bb.0:
 ; CHECK-BASELINE-NEXT:    movq %rdi, %rax
-; CHECK-BASELINE-NEXT:    movzbl {{[0-9]+}}(%rsp), %edi
-; CHECK-BASELINE-NEXT:    movzbl {{[0-9]+}}(%rsp), %r10d
-; CHECK-BASELINE-NEXT:    movzbl {{[0-9]+}}(%rsp), %r11d
+; CHECK-BASELINE-NEXT:    movb {{[0-9]+}}(%rsp), %dil
+; CHECK-BASELINE-NEXT:    movb {{[0-9]+}}(%rsp), %r10b
+; CHECK-BASELINE-NEXT:    movb {{[0-9]+}}(%rsp), %r11b
 ; CHECK-BASELINE-NEXT:    xorb %r9b, %sil
 ; CHECK-BASELINE-NEXT:    andb {{[0-9]+}}(%rsp), %sil
 ; CHECK-BASELINE-NEXT:    xorb %r9b, %sil
@@ -132,9 +132,9 @@ define <4 x i8> @out_v4i8(<4 x i8> %x, <4 x i8> %y, <4 x i8> %mask) nounwind {
 ; CHECK-SSE1-LABEL: out_v4i8:
 ; CHECK-SSE1:       # %bb.0:
 ; CHECK-SSE1-NEXT:    movq %rdi, %rax
-; CHECK-SSE1-NEXT:    movzbl {{[0-9]+}}(%rsp), %edi
-; CHECK-SSE1-NEXT:    movzbl {{[0-9]+}}(%rsp), %r10d
-; CHECK-SSE1-NEXT:    movzbl {{[0-9]+}}(%rsp), %r11d
+; CHECK-SSE1-NEXT:    movb {{[0-9]+}}(%rsp), %dil
+; CHECK-SSE1-NEXT:    movb {{[0-9]+}}(%rsp), %r10b
+; CHECK-SSE1-NEXT:    movb {{[0-9]+}}(%rsp), %r11b
 ; CHECK-SSE1-NEXT:    xorb %r9b, %sil
 ; CHECK-SSE1-NEXT:    andb {{[0-9]+}}(%rsp), %sil
 ; CHECK-SSE1-NEXT:    xorb %r9b, %sil
@@ -175,8 +175,8 @@ define <4 x i8> @out_v4i8_undef(<4 x i8> %x, <4 x i8> %y, <4 x i8> %mask) nounwi
 ; CHECK-BASELINE-LABEL: out_v4i8_undef:
 ; CHECK-BASELINE:       # %bb.0:
 ; CHECK-BASELINE-NEXT:    movq %rdi, %rax
-; CHECK-BASELINE-NEXT:    movzbl {{[0-9]+}}(%rsp), %edi
-; CHECK-BASELINE-NEXT:    movzbl {{[0-9]+}}(%rsp), %r10d
+; CHECK-BASELINE-NEXT:    movb {{[0-9]+}}(%rsp), %dil
+; CHECK-BASELINE-NEXT:    movb {{[0-9]+}}(%rsp), %r10b
 ; CHECK-BASELINE-NEXT:    andb {{[0-9]+}}(%rsp), %cl
 ; CHECK-BASELINE-NEXT:    xorb %r9b, %sil
 ; CHECK-BASELINE-NEXT:    andb {{[0-9]+}}(%rsp), %sil
@@ -196,8 +196,8 @@ define <4 x i8> @out_v4i8_undef(<4 x i8> %x, <4 x i8> %y, <4 x i8> %mask) nounwi
 ; CHECK-SSE1-LABEL: out_v4i8_undef:
 ; CHECK-SSE1:       # %bb.0:
 ; CHECK-SSE1-NEXT:    movq %rdi, %rax
-; CHECK-SSE1-NEXT:    movzbl {{[0-9]+}}(%rsp), %edi
-; CHECK-SSE1-NEXT:    movzbl {{[0-9]+}}(%rsp), %r10d
+; CHECK-SSE1-NEXT:    movb {{[0-9]+}}(%rsp), %dil
+; CHECK-SSE1-NEXT:    movb {{[0-9]+}}(%rsp), %r10b
 ; CHECK-SSE1-NEXT:    andb {{[0-9]+}}(%rsp), %cl
 ; CHECK-SSE1-NEXT:    xorb %r9b, %sil
 ; CHECK-SSE1-NEXT:    andb {{[0-9]+}}(%rsp), %sil
@@ -309,14 +309,14 @@ define <8 x i8> @out_v8i8(<8 x i8> %x, <8 x i8> %y, <8 x i8> %mask) nounwind {
 ; CHECK-BASELINE-NEXT:    pushq %r12
 ; CHECK-BASELINE-NEXT:    pushq %rbx
 ; CHECK-BASELINE-NEXT:    movq %rdi, %rax
-; CHECK-BASELINE-NEXT:    movzbl {{[0-9]+}}(%rsp), %edi
-; CHECK-BASELINE-NEXT:    movzbl {{[0-9]+}}(%rsp), %r10d
-; CHECK-BASELINE-NEXT:    movzbl {{[0-9]+}}(%rsp), %r11d
-; CHECK-BASELINE-NEXT:    movzbl {{[0-9]+}}(%rsp), %ebp
-; CHECK-BASELINE-NEXT:    movzbl {{[0-9]+}}(%rsp), %r14d
-; CHECK-BASELINE-NEXT:    movzbl {{[0-9]+}}(%rsp), %r15d
-; CHECK-BASELINE-NEXT:    movzbl {{[0-9]+}}(%rsp), %r12d
-; CHECK-BASELINE-NEXT:    movzbl {{[0-9]+}}(%rsp), %ebx
+; CHECK-BASELINE-NEXT:    movb {{[0-9]+}}(%rsp), %dil
+; CHECK-BASELINE-NEXT:    movb {{[0-9]+}}(%rsp), %r10b
+; CHECK-BASELINE-NEXT:    movb {{[0-9]+}}(%rsp), %r11b
+; CHECK-BASELINE-NEXT:    movb {{[0-9]+}}(%rsp), %bpl
+; CHECK-BASELINE-NEXT:    movb {{[0-9]+}}(%rsp), %r14b
+; CHECK-BASELINE-NEXT:    movb {{[0-9]+}}(%rsp), %r15b
+; CHECK-BASELINE-NEXT:    movb {{[0-9]+}}(%rsp), %r12b
+; CHECK-BASELINE-NEXT:    movb {{[0-9]+}}(%rsp), %bl
 ; CHECK-BASELINE-NEXT:    xorb %bl, %sil
 ; CHECK-BASELINE-NEXT:    andb {{[0-9]+}}(%rsp), %sil
 ; CHECK-BASELINE-NEXT:    xorb %bl, %sil
@@ -332,15 +332,15 @@ define <8 x i8> @out_v8i8(<8 x i8> %x, <8 x i8> %y, <8 x i8> %mask) nounwind {
 ; CHECK-BASELINE-NEXT:    xorb %bpl, %r9b
 ; CHECK-BASELINE-NEXT:    andb {{[0-9]+}}(%rsp), %r9b
 ; CHECK-BASELINE-NEXT:    xorb %bpl, %r9b
-; CHECK-BASELINE-NEXT:    movzbl {{[0-9]+}}(%rsp), %ebp
+; CHECK-BASELINE-NEXT:    movb {{[0-9]+}}(%rsp), %bpl
 ; CHECK-BASELINE-NEXT:    xorb %r11b, %bpl
 ; CHECK-BASELINE-NEXT:    andb {{[0-9]+}}(%rsp), %bpl
 ; CHECK-BASELINE-NEXT:    xorb %r11b, %bpl
-; CHECK-BASELINE-NEXT:    movzbl {{[0-9]+}}(%rsp), %r11d
+; CHECK-BASELINE-NEXT:    movb {{[0-9]+}}(%rsp), %r11b
 ; CHECK-BASELINE-NEXT:    xorb %r10b, %r11b
 ; CHECK-BASELINE-NEXT:    andb {{[0-9]+}}(%rsp), %r11b
 ; CHECK-BASELINE-NEXT:    xorb %r10b, %r11b
-; CHECK-BASELINE-NEXT:    movzbl {{[0-9]+}}(%rsp), %ebx
+; CHECK-BASELINE-NEXT:    movb {{[0-9]+}}(%rsp), %bl
 ; CHECK-BASELINE-NEXT:    xorb %dil, %bl
 ; CHECK-BASELINE-NEXT:    andb {{[0-9]+}}(%rsp), %bl
 ; CHECK-BASELINE-NEXT:    xorb %dil, %bl
@@ -367,14 +367,14 @@ define <8 x i8> @out_v8i8(<8 x i8> %x, <8 x i8> %y, <8 x i8> %mask) nounwind {
 ; CHECK-SSE1-NEXT:    pushq %r12
 ; CHECK-SSE1-NEXT:    pushq %rbx
 ; CHECK-SSE1-NEXT:    movq %rdi, %rax
-; CHECK-SSE1-NEXT:    movzbl {{[0-9]+}}(%rsp), %edi
-; CHECK-SSE1-NEXT:    movzbl {{[0-9]+}}(%rsp), %r10d
-; CHECK-SSE1-NEXT:    movzbl {{[0-9]+}}(%rsp), %r11d
-; CHECK-SSE1-NEXT:    movzbl {{[0-9]+}}(%rsp), %ebp
-; CHECK-SSE1-NEXT:    movzbl {{[0-9]+}}(%rsp), %r14d
-; CHECK-SSE1-NEXT:    movzbl {{[0-9]+}}(%rsp), %r15d
-; CHECK-SSE1-NEXT:    movzbl {{[0-9]+}}(%rsp), %r12d
-; CHECK-SSE1-NEXT:    movzbl {{[0-9]+}}(%rsp), %ebx
+; CHECK-SSE1-NEXT:    movb {{[0-9]+}}(%rsp), %dil
+; CHECK-SSE1-NEXT:    movb {{[0-9]+}}(%rsp), %r10b
+; CHECK-SSE1-NEXT:    movb {{[0-9]+}}(%rsp), %r11b
+; CHECK-SSE1-NEXT:    movb {{[0-9]+}}(%rsp), %bpl
+; CHECK-SSE1-NEXT:    movb {{[0-9]+}}(%rsp), %r14b
+; CHECK-SSE1-NEXT:    movb {{[0-9]+}}(%rsp), %r15b
+; CHECK-SSE1-NEXT:    movb {{[0-9]+}}(%rsp), %r12b
+; CHECK-SSE1-NEXT:    movb {{[0-9]+}}(%rsp), %bl
 ; CHECK-SSE1-NEXT:    xorb %bl, %sil
 ; CHECK-SSE1-NEXT:    andb {{[0-9]+}}(%rsp), %sil
 ; CHECK-SSE1-NEXT:    xorb %bl, %sil
@@ -390,15 +390,15 @@ define <8 x i8> @out_v8i8(<8 x i8> %x, <8 x i8> %y, <8 x i8> %mask) nounwind {
 ; CHECK-SSE1-NEXT:    xorb %bpl, %r9b
 ; CHECK-SSE1-NEXT:    andb {{[0-9]+}}(%rsp), %r9b
 ; CHECK-SSE1-NEXT:    xorb %bpl, %r9b
-; CHECK-SSE1-NEXT:    movzbl {{[0-9]+}}(%rsp), %ebp
+; CHECK-SSE1-NEXT:    movb {{[0-9]+}}(%rsp), %bpl
 ; CHECK-SSE1-NEXT:    xorb %r11b, %bpl
 ; CHECK-SSE1-NEXT:    andb {{[0-9]+}}(%rsp), %bpl
 ; CHECK-SSE1-NEXT:    xorb %r11b, %bpl
-; CHECK-SSE1-NEXT:    movzbl {{[0-9]+}}(%rsp), %r11d
+; CHECK-SSE1-NEXT:    movb {{[0-9]+}}(%rsp), %r11b
 ; CHECK-SSE1-NEXT:    xorb %r10b, %r11b
 ; CHECK-SSE1-NEXT:    andb {{[0-9]+}}(%rsp), %r11b
 ; CHECK-SSE1-NEXT:    xorb %r10b, %r11b
-; CHECK-SSE1-NEXT:    movzbl {{[0-9]+}}(%rsp), %ebx
+; CHECK-SSE1-NEXT:    movb {{[0-9]+}}(%rsp), %bl
 ; CHECK-SSE1-NEXT:    xorb %dil, %bl
 ; CHECK-SSE1-NEXT:    andb {{[0-9]+}}(%rsp), %bl
 ; CHECK-SSE1-NEXT:    xorb %dil, %bl
@@ -635,15 +635,15 @@ define <16 x i8> @out_v16i8(<16 x i8> %x, <16 x i8> %y, <16 x i8> %mask) nounwin
 ; CHECK-BASELINE-NEXT:    pushq %r12
 ; CHECK-BASELINE-NEXT:    pushq %rbx
 ; CHECK-BASELINE-NEXT:    movl %edx, %r11d
-; CHECK-BASELINE-NEXT:    movzbl {{[0-9]+}}(%rsp), %r13d
-; CHECK-BASELINE-NEXT:    movzbl {{[0-9]+}}(%rsp), %r15d
-; CHECK-BASELINE-NEXT:    movzbl {{[0-9]+}}(%rsp), %r14d
-; CHECK-BASELINE-NEXT:    movzbl {{[0-9]+}}(%rsp), %ebp
-; CHECK-BASELINE-NEXT:    movzbl {{[0-9]+}}(%rsp), %r12d
-; CHECK-BASELINE-NEXT:    movzbl {{[0-9]+}}(%rsp), %r10d
-; CHECK-BASELINE-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax
-; CHECK-BASELINE-NEXT:    movzbl {{[0-9]+}}(%rsp), %edx
-; CHECK-BASELINE-NEXT:    movzbl {{[0-9]+}}(%rsp), %ebx
+; CHECK-BASELINE-NEXT:    movb {{[0-9]+}}(%rsp), %r13b
+; CHECK-BASELINE-NEXT:    movb {{[0-9]+}}(%rsp), %r15b
+; CHECK-BASELINE-NEXT:    movb {{[0-9]+}}(%rsp), %r14b
+; CHECK-BASELINE-NEXT:    movb {{[0-9]+}}(%rsp), %bpl
+; CHECK-BASELINE-NEXT:    movb {{[0-9]+}}(%rsp), %r12b
+; CHECK-BASELINE-NEXT:    movb {{[0-9]+}}(%rsp), %r10b
+; CHECK-BASELINE-NEXT:    movb {{[0-9]+}}(%rsp), %al
+; CHECK-BASELINE-NEXT:    movb {{[0-9]+}}(%rsp), %dl
+; CHECK-BASELINE-NEXT:    movb {{[0-9]+}}(%rsp), %bl
 ; CHECK-BASELINE-NEXT:    xorb %bl, %sil
 ; CHECK-BASELINE-NEXT:    andb {{[0-9]+}}(%rsp), %sil
 ; CHECK-BASELINE-NEXT:    xorb %bl, %sil
@@ -662,54 +662,54 @@ define <16 x i8> @out_v16i8(<16 x i8> %x, <16 x i8> %y, <16 x i8> %mask) nounwin
 ; CHECK-BASELINE-NEXT:    xorb %r12b, %r9b
 ; CHECK-BASELINE-NEXT:    andb {{[0-9]+}}(%rsp), %r9b
 ; CHECK-BASELINE-NEXT:    xorb %r12b, %r9b
-; CHECK-BASELINE-NEXT:    movzbl {{[0-9]+}}(%rsp), %r12d
+; CHECK-BASELINE-NEXT:    movb {{[0-9]+}}(%rsp), %r12b
 ; CHECK-BASELINE-NEXT:    xorb %bpl, %r12b
 ; CHECK-BASELINE-NEXT:    andb {{[0-9]+}}(%rsp), %r12b
 ; CHECK-BASELINE-NEXT:    xorb %bpl, %r12b
-; CHECK-BASELINE-NEXT:    movzbl {{[0-9]+}}(%rsp), %ebp
+; CHECK-BASELINE-NEXT:    movb {{[0-9]+}}(%rsp), %bpl
 ; CHECK-BASELINE-NEXT:    xorb %r14b, %bpl
 ; CHECK-BASELINE-NEXT:    andb {{[0-9]+}}(%rsp), %bpl
 ; CHECK-BASELINE-NEXT:    xorb %r14b, %bpl
-; CHECK-BASELINE-NEXT:    movzbl {{[0-9]+}}(%rsp), %esi
+; CHECK-BASELINE-NEXT:    movb {{[0-9]+}}(%rsp), %sil
 ; CHECK-BASELINE-NEXT:    xorb %r15b, %sil
 ; CHECK-BASELINE-NEXT:    andb {{[0-9]+}}(%rsp), %sil
 ; CHECK-BASELINE-NEXT:    xorb %r15b, %sil
-; CHECK-BASELINE-NEXT:    movzbl {{[0-9]+}}(%rsp), %edx
+; CHECK-BASELINE-NEXT:    movb {{[0-9]+}}(%rsp), %dl
 ; CHECK-BASELINE-NEXT:    xorb %r13b, %dl
 ; CHECK-BASELINE-NEXT:    andb {{[0-9]+}}(%rsp), %dl
 ; CHECK-BASELINE-NEXT:    xorb %r13b, %dl
-; CHECK-BASELINE-NEXT:    movzbl {{[0-9]+}}(%rsp), %ecx
-; CHECK-BASELINE-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax
+; CHECK-BASELINE-NEXT:    movb {{[0-9]+}}(%rsp), %cl
+; CHECK-BASELINE-NEXT:    movb {{[0-9]+}}(%rsp), %al
 ; CHECK-BASELINE-NEXT:    xorb %al, %cl
 ; CHECK-BASELINE-NEXT:    andb {{[0-9]+}}(%rsp), %cl
 ; CHECK-BASELINE-NEXT:    xorb %al, %cl
-; CHECK-BASELINE-NEXT:    movzbl {{[0-9]+}}(%rsp), %r13d
-; CHECK-BASELINE-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax
+; CHECK-BASELINE-NEXT:    movb {{[0-9]+}}(%rsp), %r13b
+; CHECK-BASELINE-NEXT:    movb {{[0-9]+}}(%rsp), %al
 ; CHECK-BASELINE-NEXT:    xorb %al, %r13b
 ; CHECK-BASELINE-NEXT:    andb {{[0-9]+}}(%rsp), %r13b
 ; CHECK-BASELINE-NEXT:    xorb %al, %r13b
-; CHECK-BASELINE-NEXT:    movzbl {{[0-9]+}}(%rsp), %r15d
-; CHECK-BASELINE-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax
+; CHECK-BASELINE-NEXT:    movb {{[0-9]+}}(%rsp), %r15b
+; CHECK-BASELINE-NEXT:    movb {{[0-9]+}}(%rsp), %al
 ; CHECK-BASELINE-NEXT:    xorb %al, %r15b
 ; CHECK-BASELINE-NEXT:    andb {{[0-9]+}}(%rsp), %r15b
 ; CHECK-BASELINE-NEXT:    xorb %al, %r15b
-; CHECK-BASELINE-NEXT:    movzbl {{[0-9]+}}(%rsp), %r14d
-; CHECK-BASELINE-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax
+; CHECK-BASELINE-NEXT:    movb {{[0-9]+}}(%rsp), %r14b
+; CHECK-BASELINE-NEXT:    movb {{[0-9]+}}(%rsp), %al
 ; CHECK-BASELINE-NEXT:    xorb %al, %r14b
 ; CHECK-BASELINE-NEXT:    andb {{[0-9]+}}(%rsp), %r14b
 ; CHECK-BASELINE-NEXT:    xorb %al, %r14b
-; CHECK-BASELINE-NEXT:    movzbl {{[0-9]+}}(%rsp), %ebx
-; CHECK-BASELINE-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax
+; CHECK-BASELINE-NEXT:    movb {{[0-9]+}}(%rsp), %bl
+; CHECK-BASELINE-NEXT:    movb {{[0-9]+}}(%rsp), %al
 ; CHECK-BASELINE-NEXT:    xorb %al, %bl
 ; CHECK-BASELINE-NEXT:    andb {{[0-9]+}}(%rsp), %bl
 ; CHECK-BASELINE-NEXT:    xorb %al, %bl
-; CHECK-BASELINE-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax
-; CHECK-BASELINE-NEXT:    movzbl {{[0-9]+}}(%rsp), %r8d
+; CHECK-BASELINE-NEXT:    movb {{[0-9]+}}(%rsp), %al
+; CHECK-BASELINE-NEXT:    movb {{[0-9]+}}(%rsp), %r8b
 ; CHECK-BASELINE-NEXT:    xorb %r8b, %al
 ; CHECK-BASELINE-NEXT:    andb {{[0-9]+}}(%rsp), %al
 ; CHECK-BASELINE-NEXT:    xorb %r8b, %al
-; CHECK-BASELINE-NEXT:    movzbl {{[0-9]+}}(%rsp), %r10d
-; CHECK-BASELINE-NEXT:    movzbl {{[0-9]+}}(%rsp), %r8d
+; CHECK-BASELINE-NEXT:    movb {{[0-9]+}}(%rsp), %r10b
+; CHECK-BASELINE-NEXT:    movb {{[0-9]+}}(%rsp), %r8b
 ; CHECK-BASELINE-NEXT:    xorb %r8b, %r10b
 ; CHECK-BASELINE-NEXT:    andb {{[0-9]+}}(%rsp), %r10b
 ; CHECK-BASELINE-NEXT:    xorb %r8b, %r10b
@@ -750,15 +750,15 @@ define <16 x i8> @out_v16i8(<16 x i8> %x, <16 x i8> %y, <16 x i8> %mask) nounwin
 ; CHECK-SSE1-NEXT:    pushq %r12
 ; CHECK-SSE1-NEXT:    pushq %rbx
 ; CHECK-SSE1-NEXT:    movl %edx, %r11d
-; CHECK-SSE1-NEXT:    movzbl {{[0-9]+}}(%rsp), %r13d
-; CHECK-SSE1-NEXT:    movzbl {{[0-9]+}}(%rsp), %r15d
-; CHECK-SSE1-NEXT:    movzbl {{[0-9]+}}(%rsp), %r14d
-; CHECK-SSE1-NEXT:    movzbl {{[0-9]+}}(%rsp), %ebp
-; CHECK-SSE1-NEXT:    movzbl {{[0-9]+}}(%rsp), %r12d
-; CHECK-SSE1-NEXT:    movzbl {{[0-9]+}}(%rsp), %r10d
-; CHECK-SSE1-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax
-; CHECK-SSE1-NEXT:    movzbl {{[0-9]+}}(%rsp), %edx
-; CHECK-SSE1-NEXT:    movzbl {{[0-9]+}}(%rsp), %ebx
+; CHECK-SSE1-NEXT:    movb {{[0-9]+}}(%rsp), %r13b
+; CHECK-SSE1-NEXT:    movb {{[0-9]+}}(%rsp), %r15b
+; CHECK-SSE1-NEXT:    movb {{[0-9]+}}(%rsp), %r14b
+; CHECK-SSE1-NEXT:    movb {{[0-9]+}}(%rsp), %bpl
+; CHECK-SSE1-NEXT:    movb {{[0-9]+}}(%rsp), %r12b
+; CHECK-SSE1-NEXT:    movb {{[0-9]+}}(%rsp), %r10b
+; CHECK-SSE1-NEXT:    movb {{[0-9]+}}(%rsp), %al
+; CHECK-SSE1-NEXT:    movb {{[0-9]+}}(%rsp), %dl
+; CHECK-SSE1-NEXT:    movb {{[0-9]+}}(%rsp), %bl
 ; CHECK-SSE1-NEXT:    xorb %bl, %sil
 ; CHECK-SSE1-NEXT:    andb {{[0-9]+}}(%rsp), %sil
 ; CHECK-SSE1-NEXT:    xorb %bl, %sil
@@ -777,54 +777,54 @@ define <16 x i8> @out_v16i8(<16 x i8> %x, <16 x i8> %y, <16 x i8> %mask) nounwin
 ; CHECK-SSE1-NEXT:    xorb %r12b, %r9b
 ; CHECK-SSE1-NEXT:    andb {{[0-9]+}}(%rsp), %r9b
 ; CHECK-SSE1-NEXT:    xorb %r12b, %r9b
-; CHECK-SSE1-NEXT:    movzbl {{[0-9]+}}(%rsp), %r12d
+; CHECK-SSE1-NEXT:    movb {{[0-9]+}}(%rsp), %r12b
 ; CHECK-SSE1-NEXT:    xorb %bpl, %r12b
 ; CHECK-SSE1-NEXT:    andb {{[0-9]+}}(%rsp), %r12b
 ; CHECK-SSE1-NEXT:    xorb %bpl, %r12b
-; CHECK-SSE1-NEXT:    movzbl {{[0-9]+}}(%rsp), %ebp
+; CHECK-SSE1-NEXT:    movb {{[0-9]+}}(%rsp), %bpl
 ; CHECK-SSE1-NEXT:    xorb %r14b, %bpl
 ; CHECK-SSE1-NEXT:    andb {{[0-9]+}}(%rsp), %bpl
 ; CHECK-SSE1-NEXT:    xorb %r14b, %bpl
-; CHECK-SSE1-NEXT:    movzbl {{[0-9]+}}(%rsp), %esi
+; CHECK-SSE1-NEXT:    movb {{[0-9]+}}(%rsp), %sil
 ; CHECK-SSE1-NEXT:    xorb %r15b, %sil
 ; CHECK-SSE1-NEXT:    andb {{[0-9]+}}(%rsp), %sil
 ; CHECK-SSE1-NEXT:    xorb %r15b, %sil
-; CHECK-SSE1-NEXT:    movzbl {{[0-9]+}}(%rsp), %edx
+; CHECK-SSE1-NEXT:    movb {{[0-9]+}}(%rsp), %dl
 ; CHECK-SSE1-NEXT:    xorb %r13b, %dl
 ; CHECK-SSE1-NEXT:    andb {{[0-9]+}}(%rsp), %dl
 ; CHECK-SSE1-NEXT:    xorb %r13b, %dl
-; CHECK-SSE1-NEXT:    movzbl {{[0-9]+}}(%rsp), %ecx
-; CHECK-SSE1-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax
+; CHECK-SSE1-NEXT:    movb {{[0-9]+}}(%rsp), %cl
+; CHECK-SSE1-NEXT:    movb {{[0-9]+}}(%rsp), %al
 ; CHECK-SSE1-NEXT:    xorb %al, %cl
 ; CHECK-SSE1-NEXT:    andb {{[0-9]+}}(%rsp), %cl
 ; CHECK-SSE1-NEXT:    xorb %al, %cl
-; CHECK-SSE1-NEXT:    movzbl {{[0-9]+}}(%rsp), %r13d
-; CHECK-SSE1-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax
+; CHECK-SSE1-NEXT:    movb {{[0-9]+}}(%rsp), %r13b
+; CHECK-SSE1-NEXT:    movb {{[0-9]+}}(%rsp), %al
 ; CHECK-SSE1-NEXT:    xorb %al, %r13b
 ; CHECK-SSE1-NEXT:    andb {{[0-9]+}}(%rsp), %r13b
 ; CHECK-SSE1-NEXT:    xorb %al, %r13b
-; CHECK-SSE1-NEXT:    movzbl {{[0-9]+}}(%rsp), %r15d
-; CHECK-SSE1-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax
+; CHECK-SSE1-NEXT:    movb {{[0-9]+}}(%rsp), %r15b
+; CHECK-SSE1-NEXT:    movb {{[0-9]+}}(%rsp), %al
 ; CHECK-SSE1-NEXT:    xorb %al, %r15b
 ; CHECK-SSE1-NEXT:    andb {{[0-9]+}}(%rsp), %r15b
 ; CHECK-SSE1-NEXT:    xorb %al, %r15b
-; CHECK-SSE1-NEXT:    movzbl {{[0-9]+}}(%rsp), %r14d
-; CHECK-SSE1-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax
+; CHECK-SSE1-NEXT:    movb {{[0-9]+}}(%rsp), %r14b
+; CHECK-SSE1-NEXT:    movb {{[0-9]+}}(%rsp), %al
 ; CHECK-SSE1-NEXT:    xorb %al, %r14b
 ; CHECK-SSE1-NEXT:    andb {{[0-9]+}}(%rsp), %r14b
 ; CHECK-SSE1-NEXT:    xorb %al, %r14b
-; CHECK-SSE1-NEXT:    movzbl {{[0-9]+}}(%rsp), %ebx
-; CHECK-SSE1-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax
+; CHECK-SSE1-NEXT:    movb {{[0-9]+}}(%rsp), %bl
+; CHECK-SSE1-NEXT:    movb {{[0-9]+}}(%rsp), %al
 ; CHECK-SSE1-NEXT:    xorb %al, %bl
 ; CHECK-SSE1-NEXT:    andb {{[0-9]+}}(%rsp), %bl
 ; CHECK-SSE1-NEXT:    xorb %al, %bl
-; CHECK-SSE1-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax
-; CHECK-SSE1-NEXT:    movzbl {{[0-9]+}}(%rsp), %r8d
+; CHECK-SSE1-NEXT:    movb {{[0-9]+}}(%rsp), %al
+; CHECK-SSE1-NEXT:    movb {{[0-9]+}}(%rsp), %r8b
 ; CHECK-SSE1-NEXT:    xorb %r8b, %al
 ; CHECK-SSE1-NEXT:    andb {{[0-9]+}}(%rsp), %al
 ; CHECK-SSE1-NEXT:    xorb %r8b, %al
-; CHECK-SSE1-NEXT:    movzbl {{[0-9]+}}(%rsp), %r10d
-; CHECK-SSE1-NEXT:    movzbl {{[0-9]+}}(%rsp), %r8d
+; CHECK-SSE1-NEXT:    movb {{[0-9]+}}(%rsp), %r10b
+; CHECK-SSE1-NEXT:    movb {{[0-9]+}}(%rsp), %r8b
 ; CHECK-SSE1-NEXT:    xorb %r8b, %r10b
 ; CHECK-SSE1-NEXT:    andb {{[0-9]+}}(%rsp), %r10b
 ; CHECK-SSE1-NEXT:    xorb %r8b, %r10b
@@ -1198,196 +1198,196 @@ define <32 x i8> @out_v32i8(ptr%px, ptr%py, ptr%pmask) nounwind {
 ; CHECK-BASELINE-NEXT:    movq %rdx, %r8
 ; CHECK-BASELINE-NEXT:    movq %rsi, %r9
 ; CHECK-BASELINE-NEXT:    movq %rdi, %r11
-; CHECK-BASELINE-NEXT:    movzbl 15(%rdx), %eax
+; CHECK-BASELINE-NEXT:    movb 15(%rdx), %al
 ; CHECK-BASELINE-NEXT:    movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill
-; CHECK-BASELINE-NEXT:    movzbl 14(%rdx), %eax
+; CHECK-BASELINE-NEXT:    movb 14(%rdx), %al
 ; CHECK-BASELINE-NEXT:    movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill
-; CHECK-BASELINE-NEXT:    movzbl 13(%rdx), %eax
+; CHECK-BASELINE-NEXT:    movb 13(%rdx), %al
 ; CHECK-BASELINE-NEXT:    movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill
-; CHECK-BASELINE-NEXT:    movzbl 12(%rdx), %eax
+; CHECK-BASELINE-NEXT:    movb 12(%rdx), %al
 ; CHECK-BASELINE-NEXT:    movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill
-; CHECK-BASELINE-NEXT:    movzbl 11(%rdx), %eax
+; CHECK-BASELINE-NEXT:    movb 11(%rdx), %al
 ; CHECK-BASELINE-NEXT:    movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill
-; CHECK-BASELINE-NEXT:    movzbl 10(%rdx), %eax
+; CHECK-BASELINE-NEXT:    movb 10(%rdx), %al
 ; CHECK-BASELINE-NEXT:    movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill
-; CHECK-BASELINE-NEXT:    movzbl 9(%rdx), %ebp
-; CHECK-BASELINE-NEXT:    movzbl 8(%rdx), %r14d
-; CHECK-BASELINE-NEXT:    movzbl 7(%rdx), %r15d
-; CHECK-BASELINE-NEXT:    movzbl 6(%rdx), %r12d
-; CHECK-BASELINE-NEXT:    movzbl 5(%rdx), %esi
-; CHECK-BASELINE-NEXT:    movzbl 4(%rdx), %r13d
-; CHECK-BASELINE-NEXT:    movzbl 3(%rdx), %edx
-; CHECK-BASELINE-NEXT:    movzbl 2(%r8), %edi
-; CHECK-BASELINE-NEXT:    movzbl (%r8), %eax
-; CHECK-BASELINE-NEXT:    movzbl 1(%r8), %ecx
-; CHECK-BASELINE-NEXT:    movzbl (%r9), %ebx
+; CHECK-BASELINE-NEXT:    movb 9(%rdx), %bpl
+; CHECK-BASELINE-NEXT:    movb 8(%rdx), %r14b
+; CHECK-BASELINE-NEXT:    movb 7(%rdx), %r15b
+; CHECK-BASELINE-NEXT:    movb 6(%rdx), %r12b
+; CHECK-BASELINE-NEXT:    movb 5(%rdx), %sil
+; CHECK-BASELINE-NEXT:    movb 4(%rdx), %r13b
+; CHECK-BASELINE-NEXT:    movb 3(%rdx), %dl
+; CHECK-BASELINE-NEXT:    movb 2(%r8), %dil
+; CHECK-BASELINE-NEXT:    movb (%r8), %al
+; CHECK-BASELINE-NEXT:    movb 1(%r8), %cl
+; CHECK-BASELINE-NEXT:    movb (%r9), %bl
 ; CHECK-BASELINE-NEXT:    xorb %al, %bl
 ; CHECK-BASELINE-NEXT:    andb (%r10), %bl
 ; CHECK-BASELINE-NEXT:    xorb %al, %bl
 ; CHECK-BASELINE-NEXT:    movb %bl, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill
-; CHECK-BASELINE-NEXT:    movzbl 1(%r9), %eax
+; CHECK-BASELINE-NEXT:    movb 1(%r9), %al
 ; CHECK-BASELINE-NEXT:    xorb %cl, %al
 ; CHECK-BASELINE-NEXT:    andb 1(%r10), %al
 ; CHECK-BASELINE-NEXT:    xorb %cl, %al
 ; CHECK-BASELINE-NEXT:    movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill
-; CHECK-BASELINE-NEXT:    movzbl 2(%r9), %eax
+; CHECK-BASELINE-NEXT:    movb 2(%r9), %al
 ; CHECK-BASELINE-NEXT:    xorb %dil, %al
 ; CHECK-BASELINE-NEXT:    andb 2(%r10), %al
 ; CHECK-BASELINE-NEXT:    xorb %dil, %al
 ; CHECK-BASELINE-NEXT:    movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill
-; CHECK-BASELINE-NEXT:    movzbl 3(%r9), %eax
+; CHECK-BASELINE-NEXT:    movb 3(%r9), %al
 ; CHECK-BASELINE-NEXT:    xorb %dl, %al
 ; CHECK-BASELINE-NEXT:    andb 3(%r10), %al
 ; CHECK-BASELINE-NEXT:    xorb %dl, %al
 ; CHECK-BASELINE-NEXT:    movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill
-; CHECK-BASELINE-NEXT:    movzbl 4(%r9), %eax
+; CHECK-BASELINE-NEXT:    movb 4(%r9), %al
 ; CHECK-BASELINE-NEXT:    xorb %r13b, %al
 ; CHECK-BASELINE-NEXT:    andb 4(%r10), %al
 ; CHECK-BASELINE-NEXT:    xorb %r13b, %al
 ; CHECK-BASELINE-NEXT:    movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill
-; CHECK-BASELINE-NEXT:    movzbl 5(%r9), %eax
+; CHECK-BASELINE-NEXT:    movb 5(%r9), %al
 ; CHECK-BASELINE-NEXT:    xorb %sil, %al
 ; CHECK-BASELINE-NEXT:    andb 5(%r10), %al
 ; CHECK-BASELINE-NEXT:    xorb %sil, %al
 ; CHECK-BASELINE-NEXT:    movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill
-; CHECK-BASELINE-NEXT:    movzbl 6(%r9), %eax
+; CHECK-BASELINE-NEXT:    movb 6(%r9), %al
 ; CHECK-BASELINE-NEXT:    xorb %r12b, %al
 ; CHECK-BASELINE-NEXT:    andb 6(%r10), %al
 ; CHECK-BASELINE-NEXT:    xorb %r12b, %al
 ; CHECK-BASELINE-NEXT:    movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill
-; CHECK-BASELINE-NEXT:    movzbl 7(%r9), %eax
+; CHECK-BASELINE-NEXT:    movb 7(%r9), %al
 ; CHECK-BASELINE-NEXT:    xorb %r15b, %al
 ; CHECK-BASELINE-NEXT:    andb 7(%r10), %al
 ; CHECK-BASELINE-NEXT:    xorb %r15b, %al
 ; CHECK-BASELINE-NEXT:    movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill
-; CHECK-BASELINE-NEXT:    movzbl 8(%r9), %eax
+; CHECK-BASELINE-NEXT:    movb 8(%r9), %al
 ; CHECK-BASELINE-NEXT:    xorb %r14b, %al
 ; CHECK-BASELINE-NEXT:    andb 8(%r10), %al
 ; CHECK-BASELINE-NEXT:    xorb %r14b, %al
 ; CHECK-BASELINE-NEXT:    movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill
-; CHECK-BASELINE-NEXT:    movzbl 9(%r9), %eax
+; CHECK-BASELINE-NEXT:    movb 9(%r9), %al
 ; CHECK-BASELINE-NEXT:    xorb %bpl, %al
 ; CHECK-BASELINE-NEXT:    andb 9(%r10), %al
 ; CHECK-BASELINE-NEXT:    xorb %bpl, %al
 ; CHECK-BASELINE-NEXT:    movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill
-; CHECK-BASELINE-NEXT:    movzbl 10(%r9), %eax
-; CHECK-BASELINE-NEXT:    movzbl {{[-0-9]+}}(%r{{[sb]}}p), %ecx # 1-byte Folded Reload
+; CHECK-BASELINE-NEXT:    movb 10(%r9), %al
+; CHECK-BASELINE-NEXT:    movb {{[-0-9]+}}(%r{{[sb]}}p), %cl # 1-byte Reload
 ; CHECK-BASELINE-NEXT:    xorb %cl, %al
 ; CHECK-BASELINE-NEXT:    andb 10(%r10), %al
 ; CHECK-BASELINE-NEXT:    xorb %cl, %al
 ; CHECK-BASELINE-NEXT:    movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill
-; CHECK-BASELINE-NEXT:    movzbl 11(%r9), %eax
-; CHECK-BASELINE-NEXT:    movzbl {{[-0-9]+}}(%r{{[sb]}}p), %ecx # 1-byte Folded Reload
+; CHECK-BASELINE-NEXT:    movb 11(%r9), %al
+; CHECK-BASELINE-NEXT:    movb {{[-0-9]+}}(%r{{[sb]}}p), %cl # 1-byte Reload
 ; CHECK-BASELINE-NEXT:    xorb %cl, %al
 ; CHECK-BASELINE-NEXT:    andb 11(%r10), %al
 ; CHECK-BASELINE-NEXT:    xorb %cl, %al
 ; CHECK-BASELINE-NEXT:    movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill
-; CHECK-BASELINE-NEXT:    movzbl 12(%r9), %eax
-; CHECK-BASELINE-NEXT:    movzbl {{[-0-9]+}}(%r{{[sb]}}p), %ecx # 1-byte Folded Reload
+; CHECK-BASELINE-NEXT:    movb 12(%r9), %al
+; CHECK-BASELINE-NEXT:    movb {{[-0-9]+}}(%r{{[sb]}}p), %cl # 1-byte Reload
 ; CHECK-BASELINE-NEXT:    xorb %cl, %al
 ; CHECK-BASELINE-NEXT:    andb 12(%r10), %al
 ; CHECK-BASELINE-NEXT:    xorb %cl, %al
 ; CHECK-BASELINE-NEXT:    movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill
-; CHECK-BASELINE-NEXT:    movzbl 13(%r9), %eax
-; CHECK-BASELINE-NEXT:    movzbl {{[-0-9]+}}(%r{{[sb]}}p), %ecx # 1-byte Folded Reload
+; CHECK-BASELINE-NEXT:    movb 13(%r9), %al
+; CHECK-BASELINE-NEXT:    movb {{[-0-9]+}}(%r{{[sb]}}p), %cl # 1-byte Reload
 ; CHECK-BASELINE-NEXT:    xorb %cl, %al
 ; CHECK-BASELINE-NEXT:    andb 13(%r10), %al
 ; CHECK-BASELINE-NEXT:    xorb %cl, %al
 ; CHECK-BASELINE-NEXT:    movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill
-; CHECK-BASELINE-NEXT:    movzbl 14(%r9), %eax
-; CHECK-BASELINE-NEXT:    movzbl {{[-0-9]+}}(%r{{[sb]}}p), %ecx # 1-byte Folded Reload
+; CHECK-BASELINE-NEXT:    movb 14(%r9), %al
+; CHECK-BASELINE-NEXT:    movb {{[-0-9]+}}(%r{{[sb]}}p), %cl # 1-byte Reload
 ; CHECK-BASELINE-NEXT:    xorb %cl, %al
 ; CHECK-BASELINE-NEXT:    andb 14(%r10), %al
 ; CHECK-BASELINE-NEXT:    xorb %cl, %al
 ; CHECK-BASELINE-NEXT:    movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill
-; CHECK-BASELINE-NEXT:    movzbl 15(%r9), %eax
-; CHECK-BASELINE-NEXT:    movzbl {{[-0-9]+}}(%r{{[sb]}}p), %ecx # 1-byte Folded Reload
+; CHECK-BASELINE-NEXT:    movb 15(%r9), %al
+; CHECK-BASELINE-NEXT:    movb {{[-0-9]+}}(%r{{[sb]}}p), %cl # 1-byte Reload
 ; CHECK-BASELINE-NEXT:    xorb %cl, %al
 ; CHECK-BASELINE-NEXT:    andb 15(%r10), %al
 ; CHECK-BASELINE-NEXT:    xorb %cl, %al
 ; CHECK-BASELINE-NEXT:    movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill
-; CHECK-BASELINE-NEXT:    movzbl 16(%r8), %eax
-; CHECK-BASELINE-NEXT:    movzbl 16(%r9), %ecx
+; CHECK-BASELINE-NEXT:    movb 16(%r8), %al
+; CHECK-BASELINE-NEXT:    movb 16(%r9), %cl
 ; CHECK-BASELINE-NEXT:    xorb %al, %cl
 ; CHECK-BASELINE-NEXT:    andb 16(%r10), %cl
 ; CHECK-BASELINE-NEXT:    xorb %al, %cl
 ; CHECK-BASELINE-NEXT:    movb %cl, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill
-; CHECK-BASELINE-NEXT:    movzbl 17(%r8), %eax
-; CHECK-BASELINE-NEXT:    movzbl 17(%r9), %ecx
+; CHECK-BASELINE-NEXT:    movb 17(%r8), %al
+; CHECK-BASELINE-NEXT:    movb 17(%r9), %cl
 ; CHECK-BASELINE-NEXT:    xorb %al, %cl
 ; CHECK-BASELINE-NEXT:    andb 17(%r10), %cl
 ; CHECK-BASELINE-NEXT:    xorb %al, %cl
 ; CHECK-BASELINE-NEXT:    movb %cl, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill
-; CHECK-BASELINE-NEXT:    movzbl 18(%r8), %eax
-; CHECK-BASELINE-NEXT:    movzbl 18(%r9), %ecx
+; CHECK-BASELINE-NEXT:    movb 18(%r8), %al
+; CHECK-BASELINE-NEXT:    movb 18(%r9), %cl
 ; CHECK-BASELINE-NEXT:    xorb %al, %cl
 ; CHECK-BASELINE-NEXT:    andb 18(%r10), %cl
 ; CHECK-BASELINE-NEXT:    xorb %al, %cl
 ; CHECK-BASELINE-NEXT:    movb %cl, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill
-; CHECK-BASELINE-NEXT:    movzbl 19(%r8), %eax
-; CHECK-BASELINE-NEXT:    movzbl 19(%r9), %ecx
+; CHECK-BASELINE-NEXT:    movb 19(%r8), %al
+; CHECK-BASELINE-NEXT:    movb 19(%r9), %cl
 ; CHECK-BASELINE-NEXT:    xorb %al, %cl
 ; CHECK-BASELINE-NEXT:    andb 19(%r10), %cl
 ; CHECK-BASELINE-NEXT:    xorb %al, %cl
 ; CHECK-BASELINE-NEXT:    movb %cl, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill
-; CHECK-BASELINE-NEXT:    movzbl 20(%r8), %eax
-; CHECK-BASELINE-NEXT:    movzbl 20(%r9), %ecx
+; CHECK-BASELINE-NEXT:    movb 20(%r8), %al
+; CHECK-BASELINE-NEXT:    movb 20(%r9), %cl
 ; CHECK-BASELINE-NEXT:    xorb %al, %cl
 ; CHECK-BASELINE-NEXT:    andb 20(%r10), %cl
 ; CHECK-BASELINE-NEXT:    xorb %al, %cl
 ; CHECK-BASELINE-NEXT:    movb %cl, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill
-; CHECK-BASELINE-NEXT:    movzbl 21(%r8), %eax
-; CHECK-BASELINE-NEXT:    movzbl 21(%r9), %r13d
+; CHECK-BASELINE-NEXT:    movb 21(%r8), %al
+; CHECK-BASELINE-NEXT:    movb 21(%r9), %r13b
 ; CHECK-BASELINE-NEXT:    xorb %al, %r13b
 ; CHECK-BASELINE-NEXT:    andb 21(%r10), %r13b
 ; CHECK-BASELINE-NEXT:    xorb %al, %r13b
-; CHECK-BASELINE-NEXT:    movzbl 22(%r8), %eax
-; CHECK-BASELINE-NEXT:    movzbl 22(%r9), %r12d
+; CHECK-BASELINE-NEXT:    movb 22(%r8), %al
+; CHECK-BASELINE-NEXT:    movb 22(%r9), %r12b
 ; CHECK-BASELINE-NEXT:    xorb %al, %r12b
 ; CHECK-BASELINE-NEXT:    andb 22(%r10), %r12b
 ; CHECK-BASELINE-NEXT:    xorb %al, %r12b
-; CHECK-BASELINE-NEXT:    movzbl 23(%r8), %eax
-; CHECK-BASELINE-NEXT:    movzbl 23(%r9), %r15d
+; CHECK-BASELINE-NEXT:    movb 23(%r8), %al
+; CHECK-BASELINE-NEXT:    movb 23(%r9), %r15b
 ; CHECK-BASELINE-NEXT:    xorb %al, %r15b
 ; CHECK-BASELINE-NEXT:    andb 23(%r10), %r15b
 ; CHECK-BASELINE-NEXT:    xorb %al, %r15b
-; CHECK-BASELINE-NEXT:    movzbl 24(%r8), %eax
-; CHECK-BASELINE-NEXT:    movzbl 24(%r9), %r14d
+; CHECK-BASELINE-NEXT:    movb 24(%r8), %al
+; CHECK-BASELINE-NEXT:    movb 24(%r9), %r14b
 ; CHECK-BASELINE-NEXT:    xorb %al, %r14b
 ; CHECK-BASELINE-NEXT:    andb 24(%r10), %r14b
 ; CHECK-BASELINE-NEXT:    xorb %al, %r14b
-; CHECK-BASELINE-NEXT:    movzbl 25(%r8), %eax
-; CHECK-BASELINE-NEXT:    movzbl 25(%r9), %ebp
+; CHECK-BASELINE-NEXT:    movb 25(%r8), %al
+; CHECK-BASELINE-NEXT:    movb 25(%r9), %bpl
 ; CHECK-BASELINE-NEXT:    xorb %al, %bpl
 ; CHECK-BASELINE-NEXT:    andb 25(%r10), %bpl
 ; CHECK-BASELINE-NEXT:    xorb %al, %bpl
-; CHECK-BASELINE-NEXT:    movzbl 26(%r8), %eax
-; CHECK-BASELINE-NEXT:    movzbl 26(%r9), %edi
+; CHECK-BASELINE-NEXT:    movb 26(%r8), %al
+; CHECK-BASELINE-NEXT:    movb 26(%r9), %dil
 ; CHECK-BASELINE-NEXT:    xorb %al, %dil
 ; CHECK-BASELINE-NEXT:    andb 26(%r10), %dil
 ; CHECK-BASELINE-NEXT:    xorb %al, %dil
-; CHECK-BASELINE-NEXT:    movzbl 27(%r8), %eax
-; CHECK-BASELINE-NEXT:    movzbl 27(%r9), %esi
+; CHECK-BASELINE-NEXT:    movb 27(%r8), %al
+; CHECK-BASELINE-NEXT:    movb 27(%r9), %sil
 ; CHECK-BASELINE-NEXT:    xorb %al, %sil
 ; CHECK-BASELINE-NEXT:    andb 27(%r10), %sil
 ; CHECK-BASELINE-NEXT:    xorb %al, %sil
-; CHECK-BASELINE-NEXT:    movzbl 28(%r8), %eax
-; CHECK-BASELINE-NEXT:    movzbl 28(%r9), %edx
+; CHECK-BASELINE-NEXT:    movb 28(%r8), %al
+; CHECK-BASELINE-NEXT:    movb 28(%r9), %dl
 ; CHECK-BASELINE-NEXT:    xorb %al, %dl
 ; CHECK-BASELINE-NEXT:    andb 28(%r10), %dl
 ; CHECK-BASELINE-NEXT:    xorb %al, %dl
-; CHECK-BASELINE-NEXT:    movzbl 29(%r8), %eax
-; CHECK-BASELINE-NEXT:    movzbl 29(%r9), %ecx
+; CHECK-BASELINE-NEXT:    movb 29(%r8), %al
+; CHECK-BASELINE-NEXT:    movb 29(%r9), %cl
 ; CHECK-BASELINE-NEXT:    xorb %al, %cl
 ; CHECK-BASELINE-NEXT:    andb 29(%r10), %cl
 ; CHECK-BASELINE-NEXT:    xorb %al, %cl
-; CHECK-BASELINE-NEXT:    movzbl 30(%r8), %ebx
-; CHECK-BASELINE-NEXT:    movzbl 30(%r9), %eax
+; CHECK-BASELINE-NEXT:    movb 30(%r8), %bl
+; CHECK-BASELINE-NEXT:    movb 30(%r9), %al
 ; CHECK-BASELINE-NEXT:    xorb %bl, %al
 ; CHECK-BASELINE-NEXT:    andb 30(%r10), %al
 ; CHECK-BASELINE-NEXT:    xorb %bl, %al
-; CHECK-BASELINE-NEXT:    movzbl 31(%r8), %r8d
-; CHECK-BASELINE-NEXT:    movzbl 31(%r9), %ebx
+; CHECK-BASELINE-NEXT:    movb 31(%r8), %r8b
+; CHECK-BASELINE-NEXT:    movb 31(%r9), %bl
 ; CHECK-BASELINE-NEXT:    xorb %r8b, %bl
 ; CHECK-BASELINE-NEXT:    andb 31(%r10), %bl
 ; CHECK-BASELINE-NEXT:    xorb %r8b, %bl
@@ -1402,47 +1402,47 @@ define <32 x i8> @out_v32i8(ptr%px, ptr%py, ptr%pmask) nounwind {
 ; CHECK-BASELINE-NEXT:    movb %r15b, 23(%r11)
 ; CHECK-BASELINE-NEXT:    movb %r12b, 22(%r11)
 ; CHECK-BASELINE-NEXT:    movb %r13b, 21(%r11)
-; CHECK-BASELINE-NEXT:    movzbl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 1-byte Folded Reload
+; CHECK-BASELINE-NEXT:    movb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Reload
 ; CHECK-BASELINE-NEXT:    movb %al, 20(%r11)
-; CHECK-BASELINE-NEXT:    movzbl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 1-byte Folded Reload
+; CHECK-BASELINE-NEXT:    movb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Reload
 ; CHECK-BASELINE-NEXT:    movb %al, 19(%r11)
-; CHECK-BASELINE-NEXT:    movzbl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 1-byte Folded Reload
+; CHECK-BASELINE-NEXT:    movb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Reload
 ; CHECK-BASELINE-NEXT:    movb %al, 18(%r11)
-; CHECK-BASELINE-NEXT:    movzbl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 1-byte Folded Reload
+; CHECK-BASELINE-NEXT:    movb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Reload
 ; CHECK-BASELINE-NEXT:    movb %al, 17(%r11)
-; CHECK-BASELINE-NEXT:    movzbl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 1-byte Folded Reload
+; CHECK-BASELINE-NEXT:    movb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Reload
 ; CHECK-BASELINE-NEXT:    movb %al, 16(%r11)
-; CHECK-BASELINE-NEXT:    movzbl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 1-byte Folded Reload
+; CHECK-BASELINE-NEXT:    movb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Reload
 ; CHECK-BASELINE-NEXT:    movb %al, 15(%r11)
-; CHECK-BASELINE-NEXT:    movzbl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 1-byte Folded Reload
+; CHECK-BASELINE-NEXT:    movb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Reload
 ; CHECK-BASELINE-NEXT:    movb %al, 14(%r11)
-; CHECK-BASELINE-NEXT:    movzbl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 1-byte Folded Reload
+; CHECK-BASELINE-NEXT:    movb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Reload
 ; CHECK-BASELINE-NEXT:    movb %al, 13(%r11)
-; CHECK-BASELINE-NEXT:    movzbl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 1-byte Folded Reload
+; CHECK-BASELINE-NEXT:    movb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Reload
 ; CHECK-BASELINE-NEXT:    movb %al, 12(%r11)
-; CHECK-BASELINE-NEXT:    movzbl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 1-byte Folded Reload
+; CHECK-BASELINE-NEXT:    movb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Reload
 ; CHECK-BASELINE-NEXT:    movb %al, 11(%r11)
-; CHECK-BASELINE-NEXT:    movzbl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 1-byte Folded Reload
+; CHECK-BASELINE-NEXT:    movb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Reload
 ; CHECK-BASELINE-NEXT:    movb %al, 10(%r11)
-; CHECK-BASELINE-NEXT:    movzbl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 1-byte Folded Reload
+; CHECK-BASELINE-NEXT:    movb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Reload
 ; CHECK-BASELINE-NEXT:    movb %al, 9(%r11)
-; CHECK-BASELINE-NEXT:    movzbl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 1-byte Folded Reload
+; CHECK-BASELINE-NEXT:    movb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Reload
 ; CHECK-BASELINE-NEXT:    movb %al, 8(%r11)
-; CHECK-BASELINE-NEXT:    movzbl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 1-byte Folded Reload
+; CHECK-BASELINE-NEXT:    movb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Reload
 ; CHECK-BASELINE-NEXT:    movb %al, 7(%r11)
-; CHECK-BASELINE-NEXT:    movzbl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 1-byte Folded Reload
+; CHECK-BASELINE-NEXT:    movb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Reload
 ; CHECK-BASELINE-NEXT:    movb %al, 6(%r11)
-; CHECK-BASELINE-NEXT:    movzbl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 1-byte Folded Reload
+; CHECK-BASELINE-NEXT:    movb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Reload
 ; CHECK-BASELINE-NEXT:    movb %al, 5(%r11)
-; CHECK-BASELINE-NEXT:    movzbl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 1-byte Folded Reload
+; CHECK-BASELINE-NEXT:    movb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Reload
 ; CHECK-BASELINE-NEXT:    movb %al, 4(%r11)
-; CHECK-BASELINE-NEXT:    movzbl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 1-byte Folded Reload
+; CHECK-BASELINE-NEXT:    movb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Reload
 ; CHECK-BASELINE-NEXT:    movb %al, 3(%r11)
-; CHECK-BASELINE-NEXT:    movzbl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 1-byte Folded Reload
+; CHECK-BASELINE-NEXT:    movb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Reload
 ; CHECK-BASELINE-NEXT:    movb %al, 2(%r11)
-; CHECK-BASELINE-NEXT:    movzbl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 1-byte Folded Reload
+; CHECK-BASELINE-NEXT:    movb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Reload
 ; CHECK-BASELINE-NEXT:    movb %al, 1(%r11)
-; CHECK-BASELINE-NEXT:    movzbl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 1-byte Folded Reload
+; CHECK-BASELINE-NEXT:    movb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Reload
 ; CHECK-BASELINE-NEXT:    movb %al, (%r11)
 ; CHECK-BASELINE-NEXT:    movq %r11, %rax
 ; CHECK-BASELINE-NEXT:    popq %rbx
@@ -1465,196 +1465,196 @@ define <32 x i8> @out_v32i8(ptr%px, ptr%py, ptr%pmask) nounwind {
 ; CHECK-SSE1-NEXT:    movq %rdx, %r8
 ; CHECK-SSE1-NEXT:    movq %rsi, %r9
 ; CHECK-SSE1-NEXT:    movq %rdi, %r11
-; CHECK-SSE1-NEXT:    movzbl 15(%rdx), %eax
+; CHECK-SSE1-NEXT:    movb 15(%rdx), %al
 ; CHECK-SSE1-NEXT:    movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill
-; CHECK-SSE1-NEXT:    movzbl 14(%rdx), %eax
+; CHECK-SSE1-NEXT:    movb 14(%rdx), %al
 ; CHECK-SSE1-NEXT:    movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill
-; CHECK-SSE1-NEXT:    movzbl 13(%rdx), %eax
+; CHECK-SSE1-NEXT:    movb 13(%rdx), %al
 ; CHECK-SSE1-NEXT:    movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill
-; CHECK-SSE1-NEXT:    movzbl 12(%rdx), %eax
+; CHECK-SSE1-NEXT:    movb 12(%rdx), %al
 ; CHECK-SSE1-NEXT:    movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill
-; CHECK-SSE1-NEXT:    movzbl 11(%rdx), %eax
+; CHECK-SSE1-NEXT:    movb 11(%rdx), %al
 ; CHECK-SSE1-NEXT:    movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill
-; CHECK-SSE1-NEXT:    movzbl 10(%rdx), %eax
+; CHECK-SSE1-NEXT:    movb 10(%rdx), %al
 ; CHECK-SSE1-NEXT:    movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill
-; CHECK-SSE1-NEXT:    movzbl 9(%rdx), %ebp
-; CHECK-SSE1-NEXT:    movzbl 8(%rdx), %r14d
-; CHECK-SSE1-NEXT:    movzbl 7(%rdx), %r15d
-; CHECK-SSE1-NEXT:    movzbl 6(%rdx), %r12d
-; CHECK-SSE1-NEXT:    movzbl 5(%rdx), %esi
-; CHECK-SSE1-NEXT:    movzbl 4(%rdx), %r13d
-; CHECK-SSE1-NEXT:    movzbl 3(%rdx), %edx
-; CHECK-SSE1-NEXT:    movzbl 2(%r8), %edi
-; CHECK-SSE1-NEXT:    movzbl (%r8), %eax
-; CHECK-SSE1-NEXT:    movzbl 1(%r8), %ecx
-; CHECK-SSE1-NEXT:    movzbl (%r9), %ebx
+; CHECK-SSE1-NEXT:    movb 9(%rdx), %bpl
+; CHECK-SSE1-NEXT:    movb 8(%rdx), %r14b
+; CHECK-SSE1-NEXT:    movb 7(%rdx), %r15b
+; CHECK-SSE1-NEXT:    movb 6(%rdx), %r12b
+; CHECK-SSE1-NEXT:    movb 5(%rdx), %sil
+; CHECK-SSE1-NEXT:    movb 4(%rdx), %r13b
+; CHECK-SSE1-NEXT:    movb 3(%rdx), %dl
+; CHECK-SSE1-NEXT:    movb 2(%r8), %dil
+; CHECK-SSE1-NEXT:    movb (%r8), %al
+; CHECK-SSE1-NEXT:    movb 1(%r8), %cl
+; CHECK-SSE1-NEXT:    movb (%r9), %bl
 ; CHECK-SSE1-NEXT:    xorb %al, %bl
 ; CHECK-SSE1-NEXT:    andb (%r10), %bl
 ; CHECK-SSE1-NEXT:    xorb %al, %bl
 ; CHECK-SSE1-NEXT:    movb %bl, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill
-; CHECK-SSE1-NEXT:    movzbl 1(%r9), %eax
+; CHECK-SSE1-NEXT:    movb 1(%r9), %al
 ; CHECK-SSE1-NEXT:    xorb %cl, %al
 ; CHECK-SSE1-NEXT:    andb 1(%r10), %al
 ; CHECK-SSE1-NEXT:    xorb %cl, %al
 ; CHECK-SSE1-NEXT:    movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill
-; CHECK-SSE1-NEXT:    movzbl 2(%r9), %eax
+; CHECK-SSE1-NEXT:    movb 2(%r9), %al
 ; CHECK-SSE1-NEXT:    xorb %dil, %al
 ; CHECK-SSE1-NEXT:    andb 2(%r10), %al
 ; CHECK-SSE1-NEXT:    xorb %dil, %al
 ; CHECK-SSE1-NEXT:    movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill
-; CHECK-SSE1-NEXT:    movzbl 3(%r9), %eax
+; CHECK-SSE1-NEXT:    movb 3(%r9), %al
 ; CHECK-SSE1-NEXT:    xorb %dl, %al
 ; CHECK-SSE1-NEXT:    andb 3(%r10), %al
 ; CHECK-SSE1-NEXT:    xorb %dl, %al
 ; CHECK-SSE1-NEXT:    movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill
-; CHECK-SSE1-NEXT:    movzbl 4(%r9), %eax
+; CHECK-SSE1-NEXT:    movb 4(%r9), %al
 ; CHECK-SSE1-NEXT:    xorb %r13b, %al
 ; CHECK-SSE1-NEXT:    andb 4(%r10), %al
 ; CHECK-SSE1-NEXT:    xorb %r13b, %al
 ; CHECK-SSE1-NEXT:    movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill
-; CHECK-SSE1-NEXT:    movzbl 5(%r9), %eax
+; CHECK-SSE1-NEXT:    movb 5(%r9), %al
 ; CHECK-SSE1-NEXT:    xorb %sil, %al
 ; CHECK-SSE1-NEXT:    andb 5(%r10), %al
 ; CHECK-SSE1-NEXT:    xorb %sil, %al
 ; CHECK-SSE1-NEXT:    movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill
-; CHECK-SSE1-NEXT:    movzbl 6(%r9), %eax
+; CHECK-SSE1-NEXT:    movb 6(%r9), %al
 ; CHECK-SSE1-NEXT:    xorb %r12b, %al
 ; CHECK-SSE1-NEXT:    andb 6(%r10), %al
 ; CHECK-SSE1-NEXT:    xorb %r12b, %al
 ; CHECK-SSE1-NEXT:    movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill
-; CHECK-SSE1-NEXT:    movzbl 7(%r9), %eax
+; CHECK-SSE1-NEXT:    movb 7(%r9), %al
 ; CHECK-SSE1-NEXT:    xorb %r15b, %al
 ; CHECK-SSE1-NEXT:    andb 7(%r10), %al
 ; CHECK-SSE1-NEXT:    xorb %r15b, %al
 ; CHECK-SSE1-NEXT:    movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill
-; CHECK-SSE1-NEXT:    movzbl 8(%r9), %eax
+; CHECK-SSE1-NEXT:    movb 8(%r9), %al
 ; CHECK-SSE1-NEXT:    xorb %r14b, %al
 ; CHECK-SSE1-NEXT:    andb 8(%r10), %al
 ; CHECK-SSE1-NEXT:    xorb %r14b, %al
 ; CHECK-SSE1-NEXT:    movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill
-; CHECK-SSE1-NEXT:    movzbl 9(%r9), %eax
+; CHECK-SSE1-NEXT:    movb 9(%r9), %al
 ; CHECK-SSE1-NEXT:    xorb %bpl, %al
 ; CHECK-SSE1-NEXT:    andb 9(%r10), %al
 ; CHECK-SSE1-NEXT:    xorb %bpl, %al
 ; CHECK-SSE1-NEXT:    movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill
-; CHECK-SSE1-NEXT:    movzbl 10(%r9), %eax
-; CHECK-SSE1-NEXT:    movzbl {{[-0-9]+}}(%r{{[sb]}}p), %ecx # 1-byte Folded Reload
+; CHECK-SSE1-NEXT:    movb 10(%r9), %al
+; CHECK-SSE1-NEXT:    movb {{[-0-9]+}}(%r{{[sb]}}p), %cl # 1-byte Reload
 ; CHECK-SSE1-NEXT:    xorb %cl, %al
 ; CHECK-SSE1-NEXT:    andb 10(%r10), %al
 ; CHECK-SSE1-NEXT:    xorb %cl, %al
 ; CHECK-SSE1-NEXT:    movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill
-; CHECK-SSE1-NEXT:    movzbl 11(%r9), %eax
-; CHECK-SSE1-NEXT:    movzbl {{[-0-9]+}}(%r{{[sb]}}p), %ecx # 1-byte Folded Reload
+; CHECK-SSE1-NEXT:    movb 11(%r9), %al
+; CHECK-SSE1-NEXT:    movb {{[-0-9]+}}(%r{{[sb]}}p), %cl # 1-byte Reload
 ; CHECK-SSE1-NEXT:    xorb %cl, %al
 ; CHECK-SSE1-NEXT:    andb 11(%r10), %al
 ; CHECK-SSE1-NEXT:    xorb %cl, %al
 ; CHECK-SSE1-NEXT:    movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill
-; CHECK-SSE1-NEXT:    movzbl 12(%r9), %eax
-; CHECK-SSE1-NEXT:    movzbl {{[-0-9]+}}(%r{{[sb]}}p), %ecx # 1-byte Folded Reload
+; CHECK-SSE1-NEXT:    movb 12(%r9), %al
+; CHECK-SSE1-NEXT:    movb {{[-0-9]+}}(%r{{[sb]}}p), %cl # 1-byte Reload
 ; CHECK-SSE1-NEXT:    xorb %cl, %al
 ; CHECK-SSE1-NEXT:    andb 12(%r10), %al
 ; CHECK-SSE1-NEXT:    xorb %cl, %al
 ; CHECK-SSE1-NEXT:    movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill
-; CHECK-SSE1-NEXT:    movzbl 13(%r9), %eax
-; CHECK-SSE1-NEXT:    movzbl {{[-0-9]+}}(%r{{[sb]}}p), %ecx # 1-byte Folded Reload
+; CHECK-SSE1-NEXT:    movb 13(%r9), %al
+; CHECK-SSE1-NEXT:    movb {{[-0-9]+}}(%r{{[sb]}}p), %cl # 1-byte Reload
 ; CHECK-SSE1-NEXT:    xorb %cl, %al
 ; CHECK-SSE1-NEXT:    andb 13(%r10), %al
 ; CHECK-SSE1-NEXT:    xorb %cl, %al
 ; CHECK-SSE1-NEXT:    movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill
-; CHECK-SSE1-NEXT:    movzbl 14(%r9), %eax
-; CHECK-SSE1-NEXT:    movzbl {{[-0-9]+}}(%r{{[sb]}}p), %ecx # 1-byte Folded Reload
+; CHECK-SSE1-NEXT:    movb 14(%r9), %al
+; CHECK-SSE1-NEXT:    movb {{[-0-9]+}}(%r{{[sb]}}p), %cl # 1-byte Reload
 ; CHECK-SSE1-NEXT:    xorb %cl, %al
 ; CHECK-SSE1-NEXT:    andb 14(%r10), %al
 ; CHECK-SSE1-NEXT:    xorb %cl, %al
 ; CHECK-SSE1-NEXT:    movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill
-; CHECK-SSE1-NEXT:    movzbl 15(%r9), %eax
-; CHECK-SSE1-NEXT:    movzbl {{[-0-9]+}}(%r{{[sb]}}p), %ecx # 1-byte Folded Reload
+; CHECK-SSE1-NEXT:    movb 15(%r9), %al
+; CHECK-SSE1-NEXT:    movb {{[-0-9]+}}(%r{{[sb]}}p), %cl # 1-byte Reload
 ; CHECK-SSE1-NEXT:    xorb %cl, %al
 ; CHECK-SSE1-NEXT:    andb 15(%r10), %al
 ; CHECK-SSE1-NEXT:    xorb %cl, %al
 ; CHECK-SSE1-NEXT:    movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill
-; CHECK-SSE1-NEXT:    movzbl 16(%r8), %eax
-; CHECK-SSE1-NEXT:    movzbl 16(%r9), %ecx
+; CHECK-SSE1-NEXT:    movb 16(%r8), %al
+; CHECK-SSE1-NEXT:    movb 16(%r9), %cl
 ; CHECK-SSE1-NEXT:    xorb %al, %cl
 ; CHECK-SSE1-NEXT:    andb 16(%r10), %cl
 ; CHECK-SSE1-NEXT:    xorb %al, %cl
 ; CHECK-SSE1-NEXT:    movb %cl, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill
-; CHECK-SSE1-NEXT:    movzbl 17(%r8), %eax
-; CHECK-SSE1-NEXT:    movzbl 17(%r9), %ecx
+; CHECK-SSE1-NEXT:    movb 17(%r8), %al
+; CHECK-SSE1-NEXT:    movb 17(%r9), %cl
 ; CHECK-SSE1-NEXT:    xorb %al, %cl
 ; CHECK-SSE1-NEXT:    andb 17(%r10), %cl
 ; CHECK-SSE1-NEXT:    xorb %al, %cl
 ; CHECK-SSE1-NEXT:    movb %cl, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill
-; CHECK-SSE1-NEXT:    movzbl 18(%r8), %eax
-; CHECK-SSE1-NEXT:    movzbl 18(%r9), %ecx
+; CHECK-SSE1-NEXT:    movb 18(%r8), %al
+; CHECK-SSE1-NEXT:    movb 18(%r9), %cl
 ; CHECK-SSE1-NEXT:    xorb %al, %cl
 ; CHECK-SSE1-NEXT:    andb 18(%r10), %cl
 ; CHECK-SSE1-NEXT:    xorb %al, %cl
 ; CHECK-SSE1-NEXT:    movb %cl, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill
-; CHECK-SSE1-NEXT:    movzbl 19(%r8), %eax
-; CHECK-SSE1-NEXT:    movzbl 19(%r9), %ecx
+; CHECK-SSE1-NEXT:    movb 19(%r8), %al
+; CHECK-SSE1-NEXT:    movb 19(%r9), %cl
 ; CHECK-SSE1-NEXT:    xorb %al, %cl
 ; CHECK-SSE1-NEXT:    andb 19(%r10), %cl
 ; CHECK-SSE1-NEXT:    xorb %al, %cl
 ; CHECK-SSE1-NEXT:    movb %cl, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill
-; CHECK-SSE1-NEXT:    movzbl 20(%r8), %eax
-; CHECK-SSE1-NEXT:    movzbl 20(%r9), %ecx
+; CHECK-SSE1-NEXT:    movb 20(%r8), %al
+; CHECK-SSE1-NEXT:    movb 20(%r9), %cl
 ; CHECK-SSE1-NEXT:    xorb %al, %cl
 ; CHECK-SSE1-NEXT:    andb 20(%r10), %cl
 ; CHECK-SSE1-NEXT:    xorb %al, %cl
 ; CHECK-SSE1-NEXT:    movb %cl, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill
-; CHECK-SSE1-NEXT:    movzbl 21(%r8), %eax
-; CHECK-SSE1-NEXT:    movzbl 21(%r9), %r13d
+; CHECK-SSE1-NEXT:    movb 21(%r8), %al
+; CHECK-SSE1-NEXT:    movb 21(%r9), %r13b
 ; CHECK-SSE1-NEXT:    xorb %al, %r13b
 ; CHECK-SSE1-NEXT:    andb 21(%r10), %r13b
 ; CHECK-SSE1-NEXT:    xorb %al, %r13b
-; CHECK-SSE1-NEXT:    movzbl 22(%r8), %eax
-; CHECK-SSE1-NEXT:    movzbl 22(%r9), %r12d
+; CHECK-SSE1-NEXT:    movb 22(%r8), %al
+; CHECK-SSE1-NEXT:    movb 22(%r9), %r12b
 ; CHECK-SSE1-NEXT:    xorb %al, %r12b
 ; CHECK-SSE1-NEXT:    andb 22(%r10), %r12b
 ; CHECK-SSE1-NEXT:    xorb %al, %r12b
-; CHECK-SSE1-NEXT:    movzbl 23(%r8), %eax
-; CHECK-SSE1-NEXT:    movzbl 23(%r9), %r15d
+; CHECK-SSE1-NEXT:    movb 23(%r8), %al
+; CHECK-SSE1-NEXT:    movb 23(%r9), %r15b
 ; CHECK-SSE1-NEXT:    xorb %al, %r15b
 ; CHECK-SSE1-NEXT:    andb 23(%r10), %r15b
 ; CHECK-SSE1-NEXT:    xorb %al, %r15b
-; CHECK-SSE1-NEXT:    movzbl 24(%r8), %eax
-; CHECK-SSE1-NEXT:    movzbl 24(%r9), %r14d
+; CHECK-SSE1-NEXT:    movb 24(%r8), %al
+; CHECK-SSE1-NEXT:    movb 24(%r9), %r14b
 ; CHECK-SSE1-NEXT:    xorb %al, %r14b
 ; CHECK-SSE1-NEXT:    andb 24(%r10), %r14b
 ; CHECK-SSE1-NEXT:    xorb %al, %r14b
-; CHECK-SSE1-NEXT:    movzbl 25(%r8), %eax
-; CHECK-SSE1-NEXT:    movzbl 25(%r9), %ebp
+; CHECK-SSE1-NEXT:    movb 25(%r8), %al
+; CHECK-SSE1-NEXT:    movb 25(%r9), %bpl
 ; CHECK-SSE1-NEXT:    xorb %al, %bpl
 ; CHECK-SSE1-NEXT:    andb 25(%r10), %bpl
 ; CHECK-SSE1-NEXT:    xorb %al, %bpl
-; CHECK-SSE1-NEXT:    movzbl 26(%r8), %eax
-; CHECK-SSE1-NEXT:    movzbl 26(%r9), %edi
+; CHECK-SSE1-NEXT:    movb 26(%r8), %al
+; CHECK-SSE1-NEXT:    movb 26(%r9), %dil
 ; CHECK-SSE1-NEXT:    xorb %al, %dil
 ; CHECK-SSE1-NEXT:    andb 26(%r10), %dil
 ; CHECK-SSE1-NEXT:    xorb %al, %dil
-; CHECK-SSE1-NEXT:    movzbl 27(%r8), %eax
-; CHECK-SSE1-NEXT:    movzbl 27(%r9), %esi
+; CHECK-SSE1-NEXT:    movb 27(%r8), %al
+; CHECK-SSE1-NEXT:    movb 27(%r9), %sil
 ; CHECK-SSE1-NEXT:    xorb %al, %sil
 ; CHECK-SSE1-NEXT:    andb 27(%r10), %sil
 ; CHECK-SSE1-NEXT:    xorb %al, %sil
-; CHECK-SSE1-NEXT:    movzbl 28(%r8), %eax
-; CHECK-SSE1-NEXT:    movzbl 28(%r9), %edx
+; CHECK-SSE1-NEXT:    movb 28(%r8), %al
+; CHECK-SSE1-NEXT:    movb 28(%r9), %dl
 ; CHECK-SSE1-NEXT:    xorb %al, %dl
 ; CHECK-SSE1-NEXT:    andb 28(%r10), %dl
 ; CHECK-SSE1-NEXT:    xorb %al, %dl
-; CHECK-SSE1-NEXT:    movzbl 29(%r8), %eax
-; CHECK-SSE1-NEXT:    movzbl 29(%r9), %ecx
+; CHECK-SSE1-NEXT:    movb 29(%r8), %al
+; CHECK-SSE1-NEXT:    movb 29(%r9), %cl
 ; CHECK-SSE1-NEXT:    xorb %al, %cl
 ; CHECK-SSE1-NEXT:    andb 29(%r10), %cl
 ; CHECK-SSE1-NEXT:    xorb %al, %cl
-; CHECK-SSE1-NEXT:    movzbl 30(%r8), %ebx
-; CHECK-SSE1-NEXT:    movzbl 30(%r9), %eax
+; CHECK-SSE1-NEXT:    movb 30(%r8), %bl
+; CHECK-SSE1-NEXT:    movb 30(%r9), %al
 ; CHECK-SSE1-NEXT:    xorb %bl, %al
 ; CHECK-SSE1-NEXT:    andb 30(%r10), %al
 ; CHECK-SSE1-NEXT:    xorb %bl, %al
-; CHECK-SSE1-NEXT:    movzbl 31(%r8), %r8d
-; CHECK-SSE1-NEXT:    movzbl 31(%r9), %ebx
+; CHECK-SSE1-NEXT:    movb 31(%r8), %r8b
+; CHECK-SSE1-NEXT:    movb 31(%r9), %bl
 ; CHECK-SSE1-NEXT:    xorb %r8b, %bl
 ; CHECK-SSE1-NEXT:    andb 31(%r10), %bl
 ; CHECK-SSE1-NEXT:    xorb %r8b, %bl
@@ -1669,47 +1669,47 @@ define <32 x i8> @out_v32i8(ptr%px, ptr%py, ptr%pmask) nounwind {
 ; CHECK-SSE1-NEXT:    movb %r15b, 23(%r11)
 ; CHECK-SSE1-NEXT:    movb %r12b, 22(%r11)
 ; CHECK-SSE1-NEXT:    movb %r13b, 21(%r11)
-; CHECK-SSE1-NEXT:    movzbl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 1-byte Folded Reload
+; CHECK-SSE1-NEXT:    movb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Reload
 ; CHECK-SSE1-NEXT:    movb %al, 20(%r11)
-; CHECK-SSE1-NEXT:    movzbl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 1-byte Folded Reload
+; CHECK-SSE1-NEXT:    movb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Reload
 ; CHECK-SSE1-NEXT:    movb %al, 19(%r11)
-; CHECK-SSE1-NEXT:    movzbl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 1-byte Folded Reload
+; CHECK-SSE1-NEXT:    movb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Reload
 ; CHECK-SSE1-NEXT:    movb %al, 18(%r11)
-; CHECK-SSE1-NEXT:    movzbl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 1-byte Folded Reload
+; CHECK-SSE1-NEXT:    movb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Reload
 ; CHECK-SSE1-NEXT:    movb %al, 17(%r11)
-; CHECK-SSE1-NEXT:    movzbl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 1-byte Folded Reload
+; CHECK-SSE1-NEXT:    movb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Reload
 ; CHECK-SSE1-NEXT:    movb %al, 16(%r11)
-; CHECK-SSE1-NEXT:    movzbl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 1-byte Folded Reload
+; CHECK-SSE1-NEXT:    movb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Reload
 ; CHECK-SSE1-NEXT:    movb %al, 15(%r11)
-; CHECK-SSE1-NEXT:    movzbl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 1-byte Folded Reload
+; CHECK-SSE1-NEXT:    movb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Reload
 ; CHECK-SSE1-NEXT:    movb %al, 14(%r11)
-; CHECK-SSE1-NEXT:    movzbl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 1-byte Folded Reload
+; CHECK-SSE1-NEXT:    movb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Reload
 ; CHECK-SSE1-NEXT:    movb %al, 13(%r11)
-; CHECK-SSE1-NEXT:    movzbl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 1-byte Folded Reload
+; CHECK-SSE1-NEXT:    movb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Reload
 ; CHECK-SSE1-NEXT:    movb %al, 12(%r11)
-; CHECK-SSE1-NEXT:    movzbl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 1-byte Folded Reload
+; CHECK-SSE1-NEXT:    movb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Reload
 ; CHECK-SSE1-NEXT:    movb %al, 11(%r11)
-; CHECK-SSE1-NEXT:    movzbl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 1-byte Folded Reload
+; CHECK-SSE1-NEXT:    movb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Reload
 ; CHECK-SSE1-NEXT:    movb %al, 10(%r11)
-; CHECK-SSE1-NEXT:    movzbl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 1-byte Folded Reload
+; CHECK-SSE1-NEXT:    movb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Reload
 ; CHECK-SSE1-NEXT:    movb %al, 9(%r11)
-; CHECK-SSE1-NEXT:    movzbl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 1-byte Folded Reload
+; CHECK-SSE1-NEXT:    movb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Reload
 ; CHECK-SSE1-NEXT:    movb %al, 8(%r11)
-; CHECK-SSE1-NEXT:    movzbl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 1-byte Folded Reload
+; CHECK-SSE1-NEXT:    movb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Reload
 ; CHECK-SSE1-NEXT:    movb %al, 7(%r11)
-; CHECK-SSE1-NEXT:    movzbl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 1-byte Folded Reload
+; CHECK-SSE1-NEXT:    movb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Reload
 ; CHECK-SSE1-NEXT:    movb %al, 6(%r11)
-; CHECK-SSE1-NEXT:    movzbl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 1-byte Folded Reload
+; CHECK-SSE1-NEXT:    movb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Reload
 ; CHECK-SSE1-NEXT:    movb %al, 5(%r11)
-; CHECK-SSE1-NEXT:    movzbl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 1-byte Folded Reload
+; CHECK-SSE1-NEXT:    movb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Reload
 ; CHECK-SSE1-NEXT:    movb %al, 4(%r11)
-; CHECK-SSE1-NEXT:    movzbl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 1-byte Folded Reload
+; CHECK-SSE1-NEXT:    movb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Reload
 ; CHECK-SSE1-NEXT:    movb %al, 3(%r11)
-; CHECK-SSE1-NEXT:    movzbl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 1-byte Folded Reload
+; CHECK-SSE1-NEXT:    movb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Reload
 ; CHECK-SSE1-NEXT:    movb %al, 2(%r11)
-; CHECK-SSE1-NEXT:    movzbl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 1-byte Folded Reload
+; CHECK-SSE1-NEXT:    movb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Reload
 ; CHECK-SSE1-NEXT:    movb %al, 1(%r11)
-; CHECK-SSE1-NEXT:    movzbl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 1-byte Folded Reload
+; CHECK-SSE1-NEXT:    movb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Reload
 ; CHECK-SSE1-NEXT:    movb %al, (%r11)
 ; CHECK-SSE1-NEXT:    movq %r11, %rax
 ; CHECK-SSE1-NEXT:    popq %rbx
@@ -2367,9 +2367,9 @@ define <4 x i8> @in_v4i8(<4 x i8> %x, <4 x i8> %y, <4 x i8> %mask) nounwind {
 ; CHECK-BASELINE-LABEL: in_v4i8:
 ; CHECK-BASELINE:       # %bb.0:
 ; CHECK-BASELINE-NEXT:    movq %rdi, %rax
-; CHECK-BASELINE-NEXT:    movzbl {{[0-9]+}}(%rsp), %edi
-; CHECK-BASELINE-NEXT:    movzbl {{[0-9]+}}(%rsp), %r10d
-; CHECK-BASELINE-NEXT:    movzbl {{[0-9]+}}(%rsp), %r11d
+; CHECK-BASELINE-NEXT:    movb {{[0-9]+}}(%rsp), %dil
+; CHECK-BASELINE-NEXT:    movb {{[0-9]+}}(%rsp), %r10b
+; CHECK-BASELINE-NEXT:    movb {{[0-9]+}}(%rsp), %r11b
 ; CHECK-BASELINE-NEXT:    xorl %r9d, %esi
 ; CHECK-BASELINE-NEXT:    xorb %r11b, %dl
 ; CHECK-BASELINE-NEXT:    xorb %r10b, %cl
@@ -2391,9 +2391,9 @@ define <4 x i8> @in_v4i8(<4 x i8> %x, <4 x i8> %y, <4 x i8> %mask) nounwind {
 ; CHECK-SSE1-LABEL: in_v4i8:
 ; CHECK-SSE1:       # %bb.0:
 ; CHECK-SSE1-NEXT:    movq %rdi, %rax
-; CHECK-SSE1-NEXT:    movzbl {{[0-9]+}}(%rsp), %edi
-; CHECK-SSE1-NEXT:    movzbl {{[0-9]+}}(%rsp), %r10d
-; CHECK-SSE1-NEXT:    movzbl {{[0-9]+}}(%rsp), %r11d
+; CHECK-SSE1-NEXT:    movb {{[0-9]+}}(%rsp), %dil
+; CHECK-SSE1-NEXT:    movb {{[0-9]+}}(%rsp), %r10b
+; CHECK-SSE1-NEXT:    movb {{[0-9]+}}(%rsp), %r11b
 ; CHECK-SSE1-NEXT:    xorl %r9d, %esi
 ; CHECK-SSE1-NEXT:    xorb %r11b, %dl
 ; CHECK-SSE1-NEXT:    xorb %r10b, %cl
@@ -2500,22 +2500,22 @@ define <8 x i8> @in_v8i8(<8 x i8> %x, <8 x i8> %y, <8 x i8> %mask) nounwind {
 ; CHECK-BASELINE-NEXT:    pushq %r13
 ; CHECK-BASELINE-NEXT:    pushq %r12
 ; CHECK-BASELINE-NEXT:    pushq %rbx
-; CHECK-BASELINE-NEXT:    movzbl {{[0-9]+}}(%rsp), %r10d
-; CHECK-BASELINE-NEXT:    movzbl {{[0-9]+}}(%rsp), %ebp
-; CHECK-BASELINE-NEXT:    movzbl {{[0-9]+}}(%rsp), %r14d
-; CHECK-BASELINE-NEXT:    movzbl {{[0-9]+}}(%rsp), %r15d
-; CHECK-BASELINE-NEXT:    movzbl {{[0-9]+}}(%rsp), %r12d
-; CHECK-BASELINE-NEXT:    movzbl {{[0-9]+}}(%rsp), %r11d
+; CHECK-BASELINE-NEXT:    movb {{[0-9]+}}(%rsp), %r10b
+; CHECK-BASELINE-NEXT:    movb {{[0-9]+}}(%rsp), %bpl
+; CHECK-BASELINE-NEXT:    movb {{[0-9]+}}(%rsp), %r14b
+; CHECK-BASELINE-NEXT:    movb {{[0-9]+}}(%rsp), %r15b
+; CHECK-BASELINE-NEXT:    movb {{[0-9]+}}(%rsp), %r12b
+; CHECK-BASELINE-NEXT:    movb {{[0-9]+}}(%rsp), %r11b
 ; CHECK-BASELINE-NEXT:    xorb %r11b, %sil
 ; CHECK-BASELINE-NEXT:    xorb %r12b, %dl
 ; CHECK-BASELINE-NEXT:    xorb %r15b, %cl
 ; CHECK-BASELINE-NEXT:    xorb %r14b, %r8b
 ; CHECK-BASELINE-NEXT:    xorb %bpl, %r9b
-; CHECK-BASELINE-NEXT:    movzbl {{[0-9]+}}(%rsp), %r13d
+; CHECK-BASELINE-NEXT:    movb {{[0-9]+}}(%rsp), %r13b
 ; CHECK-BASELINE-NEXT:    xorb {{[0-9]+}}(%rsp), %r13b
-; CHECK-BASELINE-NEXT:    movzbl {{[0-9]+}}(%rsp), %ebx
+; CHECK-BASELINE-NEXT:    movb {{[0-9]+}}(%rsp), %bl
 ; CHECK-BASELINE-NEXT:    xorb {{[0-9]+}}(%rsp), %bl
-; CHECK-BASELINE-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax
+; CHECK-BASELINE-NEXT:    movb {{[0-9]+}}(%rsp), %al
 ; CHECK-BASELINE-NEXT:    xorb %r10b, %al
 ; CHECK-BASELINE-NEXT:    andb {{[0-9]+}}(%rsp), %r9b
 ; CHECK-BASELINE-NEXT:    andb {{[0-9]+}}(%rsp), %r8b
@@ -2558,22 +2558,22 @@ define <8 x i8> @in_v8i8(<8 x i8> %x, <8 x i8> %y, <8 x i8> %mask) nounwind {
 ; CHECK-SSE1-NEXT:    pushq %r13
 ; CHECK-SSE1-NEXT:    pushq %r12
 ; CHECK-SSE1-NEXT:    pushq %rbx
-; CHECK-SSE1-NEXT:    movzbl {{[0-9]+}}(%rsp), %r10d
-; CHECK-SSE1-NEXT:    movzbl {{[0-9]+}}(%rsp), %ebp
-; CHECK-SSE1-NEXT:    movzbl {{[0-9]+}}(%rsp), %r14d
-; CHECK-SSE1-NEXT:    movzbl {{[0-9]+}}(%rsp), %r15d
-; CHECK-SSE1-NEXT:    movzbl {{[0-9]+}}(%rsp), %r12d
-; CHECK-SSE1-NEXT:    movzbl {{[0-9]+}}(%rsp), %r11d
+; CHECK-SSE1-NEXT:    movb {{[0-9]+}}(%rsp), %r10b
+; CHECK-SSE1-NEXT:    movb {{[0-9]+}}(%rsp), %bpl
+; CHECK-SSE1-NEXT:    movb {{[0-9]+}}(%rsp), %r14b
+; CHECK-SSE1-NEXT:    movb {{[0-9]+}}(%rsp), %r15b
+; CHECK-SSE1-NEXT:    movb {{[0-9]+}}(%rsp), %r12b
+; CHECK-SSE1-NEXT:    movb {{[0-9]+}}(%rsp), %r11b
 ; CHECK-SSE1-NEXT:    xorb %r11b, %sil
 ; CHECK-SSE1-NEXT:    xorb %r12b, %dl
 ; CHECK-SSE1-NEXT:    xorb %r15b, %cl
 ; CHECK-SSE1-NEXT:    xorb %r14b, %r8b
 ; CHECK-SSE1-NEXT:    xorb %bpl, %r9b
-; CHECK-SSE1-NEXT:    movzbl {{[0-9]+}}(%rsp), %r13d
+; CHECK-SSE1-NEXT:    movb {{[0-9]+}}(%rsp), %r13b
 ; CHECK-SSE1-NEXT:    xorb {{[0-9]+}}(%rsp), %r13b
-; CHECK-SSE1-NEXT:    movzbl {{[0-9]+}}(%rsp), %ebx
+; CHECK-SSE1-NEXT:    movb {{[0-9]+}}(%rsp), %bl
 ; CHECK-SSE1-NEXT:    xorb {{[0-9]+}}(%rsp), %bl
-; CHECK-SSE1-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax
+; CHECK-SSE1-NEXT:    movb {{[0-9]+}}(%rsp), %al
 ; CHECK-SSE1-NEXT:    xorb %r10b, %al
 ; CHECK-SSE1-NEXT:    andb {{[0-9]+}}(%rsp), %r9b
 ; CHECK-SSE1-NEXT:    andb {{[0-9]+}}(%rsp), %r8b
@@ -2764,62 +2764,62 @@ define <16 x i8> @in_v16i8(<16 x i8> %x, <16 x i8> %y, <16 x i8> %mask) nounwind
 ; CHECK-BASELINE-NEXT:    movl %edx, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
 ; CHECK-BASELINE-NEXT:    movl %esi, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
 ; CHECK-BASELINE-NEXT:    movq %rdi, %rdx
-; CHECK-BASELINE-NEXT:    movzbl {{[0-9]+}}(%rsp), %esi
-; CHECK-BASELINE-NEXT:    movzbl {{[0-9]+}}(%rsp), %ecx
-; CHECK-BASELINE-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax
-; CHECK-BASELINE-NEXT:    movzbl {{[0-9]+}}(%rsp), %ebp
-; CHECK-BASELINE-NEXT:    movzbl {{[0-9]+}}(%rsp), %r14d
-; CHECK-BASELINE-NEXT:    movzbl {{[0-9]+}}(%rsp), %r15d
-; CHECK-BASELINE-NEXT:    movzbl {{[0-9]+}}(%rsp), %r12d
-; CHECK-BASELINE-NEXT:    movzbl {{[0-9]+}}(%rsp), %r13d
-; CHECK-BASELINE-NEXT:    movzbl {{[0-9]+}}(%rsp), %ebx
-; CHECK-BASELINE-NEXT:    movzbl {{[0-9]+}}(%rsp), %r11d
-; CHECK-BASELINE-NEXT:    movzbl {{[0-9]+}}(%rsp), %r10d
-; CHECK-BASELINE-NEXT:    movzbl {{[0-9]+}}(%rsp), %edi
+; CHECK-BASELINE-NEXT:    movb {{[0-9]+}}(%rsp), %sil
+; CHECK-BASELINE-NEXT:    movb {{[0-9]+}}(%rsp), %cl
+; CHECK-BASELINE-NEXT:    movb {{[0-9]+}}(%rsp), %al
+; CHECK-BASELINE-NEXT:    movb {{[0-9]+}}(%rsp), %bpl
+; CHECK-BASELINE-NEXT:    movb {{[0-9]+}}(%rsp), %r14b
+; CHECK-BASELINE-NEXT:    movb {{[0-9]+}}(%rsp), %r15b
+; CHECK-BASELINE-NEXT:    movb {{[0-9]+}}(%rsp), %r12b
+; CHECK-BASELINE-NEXT:    movb {{[0-9]+}}(%rsp), %r13b
+; CHECK-BASELINE-NEXT:    movb {{[0-9]+}}(%rsp), %bl
+; CHECK-BASELINE-NEXT:    movb {{[0-9]+}}(%rsp), %r11b
+; CHECK-BASELINE-NEXT:    movb {{[0-9]+}}(%rsp), %r10b
+; CHECK-BASELINE-NEXT:    movb {{[0-9]+}}(%rsp), %dil
 ; CHECK-BASELINE-NEXT:    xorb %dil, %r9b
 ; CHECK-BASELINE-NEXT:    andb {{[0-9]+}}(%rsp), %r9b
 ; CHECK-BASELINE-NEXT:    xorb %dil, %r9b
-; CHECK-BASELINE-NEXT:    movzbl {{[0-9]+}}(%rsp), %edi
+; CHECK-BASELINE-NEXT:    movb {{[0-9]+}}(%rsp), %dil
 ; CHECK-BASELINE-NEXT:    xorb %r10b, %dil
 ; CHECK-BASELINE-NEXT:    andb {{[0-9]+}}(%rsp), %dil
 ; CHECK-BASELINE-NEXT:    xorb %r10b, %dil
-; CHECK-BASELINE-NEXT:    movzbl {{[0-9]+}}(%rsp), %r10d
+; CHECK-BASELINE-NEXT:    movb {{[0-9]+}}(%rsp), %r10b
 ; CHECK-BASELINE-NEXT:    xorb %r11b, %r10b
 ; CHECK-BASELINE-NEXT:    andb {{[0-9]+}}(%rsp), %r10b
 ; CHECK-BASELINE-NEXT:    xorb %r11b, %r10b
-; CHECK-BASELINE-NEXT:    movzbl {{[0-9]+}}(%rsp), %r11d
+; CHECK-BASELINE-NEXT:    movb {{[0-9]+}}(%rsp), %r11b
 ; CHECK-BASELINE-NEXT:    xorb %bl, %r11b
 ; CHECK-BASELINE-NEXT:    andb {{[0-9]+}}(%rsp), %r11b
 ; CHECK-BASELINE-NEXT:    xorb %bl, %r11b
-; CHECK-BASELINE-NEXT:    movzbl {{[0-9]+}}(%rsp), %ebx
+; CHECK-BASELINE-NEXT:    movb {{[0-9]+}}(%rsp), %bl
 ; CHECK-BASELINE-NEXT:    xorb %r13b, %bl
 ; CHECK-BASELINE-NEXT:    andb {{[0-9]+}}(%rsp), %bl
 ; CHECK-BASELINE-NEXT:    xorb %r13b, %bl
-; CHECK-BASELINE-NEXT:    movzbl {{[0-9]+}}(%rsp), %r13d
+; CHECK-BASELINE-NEXT:    movb {{[0-9]+}}(%rsp), %r13b
 ; CHECK-BASELINE-NEXT:    xorb %r12b, %r13b
 ; CHECK-BASELINE-NEXT:    andb {{[0-9]+}}(%rsp), %r13b
 ; CHECK-BASELINE-NEXT:    xorb %r12b, %r13b
-; CHECK-BASELINE-NEXT:    movzbl {{[0-9]+}}(%rsp), %r12d
+; CHECK-BASELINE-NEXT:    movb {{[0-9]+}}(%rsp), %r12b
 ; CHECK-BASELINE-NEXT:    xorb %r15b, %r12b
 ; CHECK-BASELINE-NEXT:    andb {{[0-9]+}}(%rsp), %r12b
 ; CHECK-BASELINE-NEXT:    xorb %r15b, %r12b
-; CHECK-BASELINE-NEXT:    movzbl {{[0-9]+}}(%rsp), %r15d
+; CHECK-BASELINE-NEXT:    movb {{[0-9]+}}(%rsp), %r15b
 ; CHECK-BASELINE-NEXT:    xorb %r14b, %r15b
 ; CHECK-BASELINE-NEXT:    andb {{[0-9]+}}(%rsp), %r15b
 ; CHECK-BASELINE-NEXT:    xorb %r14b, %r15b
-; CHECK-BASELINE-NEXT:    movzbl {{[0-9]+}}(%rsp), %r14d
+; CHECK-BASELINE-NEXT:    movb {{[0-9]+}}(%rsp), %r14b
 ; CHECK-BASELINE-NEXT:    xorb %bpl, %r14b
 ; CHECK-BASELINE-NEXT:    andb {{[0-9]+}}(%rsp), %r14b
 ; CHECK-BASELINE-NEXT:    xorb %bpl, %r14b
-; CHECK-BASELINE-NEXT:    movzbl {{[0-9]+}}(%rsp), %ebp
+; CHECK-BASELINE-NEXT:    movb {{[0-9]+}}(%rsp), %bpl
 ; CHECK-BASELINE-NEXT:    xorb %al, %bpl
 ; CHECK-BASELINE-NEXT:    andb {{[0-9]+}}(%rsp), %bpl
 ; CHECK-BASELINE-NEXT:    xorb %al, %bpl
-; CHECK-BASELINE-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax
+; CHECK-BASELINE-NEXT:    movb {{[0-9]+}}(%rsp), %al
 ; CHECK-BASELINE-NEXT:    xorb %cl, %al
 ; CHECK-BASELINE-NEXT:    andb {{[0-9]+}}(%rsp), %al
 ; CHECK-BASELINE-NEXT:    xorb %cl, %al
-; CHECK-BASELINE-NEXT:    movzbl {{[0-9]+}}(%rsp), %ecx
+; CHECK-BASELINE-NEXT:    movb {{[0-9]+}}(%rsp), %cl
 ; CHECK-BASELINE-NEXT:    xorb %sil, %cl
 ; CHECK-BASELINE-NEXT:    andb {{[0-9]+}}(%rsp), %cl
 ; CHECK-BASELINE-NEXT:    xorb %sil, %cl
@@ -2835,24 +2835,24 @@ define <16 x i8> @in_v16i8(<16 x i8> %x, <16 x i8> %y, <16 x i8> %mask) nounwind
 ; CHECK-BASELINE-NEXT:    movb %r10b, 6(%rdx)
 ; CHECK-BASELINE-NEXT:    movb %dil, 5(%rdx)
 ; CHECK-BASELINE-NEXT:    movb %r9b, 4(%rdx)
-; CHECK-BASELINE-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax
+; CHECK-BASELINE-NEXT:    movb {{[0-9]+}}(%rsp), %al
 ; CHECK-BASELINE-NEXT:    xorb %al, %r8b
 ; CHECK-BASELINE-NEXT:    andb {{[0-9]+}}(%rsp), %r8b
 ; CHECK-BASELINE-NEXT:    xorb %al, %r8b
 ; CHECK-BASELINE-NEXT:    movb %r8b, 3(%rdx)
-; CHECK-BASELINE-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax
+; CHECK-BASELINE-NEXT:    movb {{[0-9]+}}(%rsp), %al
 ; CHECK-BASELINE-NEXT:    movl {{[-0-9]+}}(%r{{[sb]}}p), %ecx # 4-byte Reload
 ; CHECK-BASELINE-NEXT:    xorb %al, %cl
 ; CHECK-BASELINE-NEXT:    andb {{[0-9]+}}(%rsp), %cl
 ; CHECK-BASELINE-NEXT:    xorb %al, %cl
 ; CHECK-BASELINE-NEXT:    movb %cl, 2(%rdx)
-; CHECK-BASELINE-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax
+; CHECK-BASELINE-NEXT:    movb {{[0-9]+}}(%rsp), %al
 ; CHECK-BASELINE-NEXT:    movl {{[-0-9]+}}(%r{{[sb]}}p), %ecx # 4-byte Reload
 ; CHECK-BASELINE-NEXT:    xorb %al, %cl
 ; CHECK-BASELINE-NEXT:    andb {{[0-9]+}}(%rsp), %cl
 ; CHECK-BASELINE-NEXT:    xorb %al, %cl
 ; CHECK-BASELINE-NEXT:    movb %cl, 1(%rdx)
-; CHECK-BASELINE-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax
+; CHECK-BASELINE-NEXT:    movb {{[0-9]+}}(%rsp), %al
 ; CHECK-BASELINE-NEXT:    movl {{[-0-9]+}}(%r{{[sb]}}p), %ecx # 4-byte Reload
 ; CHECK-BASELINE-NEXT:    xorb %al, %cl
 ; CHECK-BASELINE-NEXT:    andb {{[0-9]+}}(%rsp), %cl
@@ -2879,62 +2879,62 @@ define <16 x i8> @in_v16i8(<16 x i8> %x, <16 x i8> %y, <16 x i8> %mask) nounwind
 ; CHECK-SSE1-NEXT:    movl %edx, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
 ; CHECK-SSE1-NEXT:    movl %esi, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
 ; CHECK-SSE1-NEXT:    movq %rdi, %rdx
-; CHECK-SSE1-NEXT:    movzbl {{[0-9]+}}(%rsp), %esi
-; CHECK-SSE1-NEXT:    movzbl {{[0-9]+}}(%rsp), %ecx
-; CHECK-SSE1-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax
-; CHECK-SSE1-NEXT:    movzbl {{[0-9]+}}(%rsp), %ebp
-; CHECK-SSE1-NEXT:    movzbl {{[0-9]+}}(%rsp), %r14d
-; CHECK-SSE1-NEXT:    movzbl {{[0-9]+}}(%rsp), %r15d
-; CHECK-SSE1-NEXT:    movzbl {{[0-9]+}}(%rsp), %r12d
-; CHECK-SSE1-NEXT:    movzbl {{[0-9]+}}(%rsp), %r13d
-; CHECK-SSE1-NEXT:    movzbl {{[0-9]+}}(%rsp), %ebx
-; CHECK-SSE1-NEXT:    movzbl {{[0-9]+}}(%rsp), %r11d
-; CHECK-SSE1-NEXT:    movzbl {{[0-9]+}}(%rsp), %r10d
-; CHECK-SSE1-NEXT:    movzbl {{[0-9]+}}(%rsp), %edi
+; CHECK-SSE1-NEXT:    movb {{[0-9]+}}(%rsp), %sil
+; CHECK-SSE1-NEXT:    movb {{[0-9]+}}(%rsp), %cl
+; CHECK-SSE1-NEXT:    movb {{[0-9]+}}(%rsp), %al
+; CHECK-SSE1-NEXT:    movb {{[0-9]+}}(%rsp), %bpl
+; CHECK-SSE1-NEXT:    movb {{[0-9]+}}(%rsp), %r14b
+; CHECK-SSE1-NEXT:    movb {{[0-9]+}}(%rsp), %r15b
+; CHECK-SSE1-NEXT:    movb {{[0-9]+}}(%rsp), %r12b
+; CHECK-SSE1-NEXT:    movb {{[0-9]+}}(%rsp), %r13b
+; CHECK-SSE1-NEXT:    movb {{[0-9]+}}(%rsp), %bl
+; CHECK-SSE1-NEXT:    movb {{[0-9]+}}(%rsp), %r11b
+; CHECK-SSE1-NEXT:    movb {{[0-9]+}}(%rsp), %r10b
+; CHECK-SSE1-NEXT:    movb {{[0-9]+}}(%rsp), %dil
 ; CHECK-SSE1-NEXT:    xorb %dil, %r9b
 ; CHECK-SSE1-NEXT:    andb {{[0-9]+}}(%rsp), %r9b
 ; CHECK-SSE1-NEXT:    xorb %dil, %r9b
-; CHECK-SSE1-NEXT:    movzbl {{[0-9]+}}(%rsp), %edi
+; CHECK-SSE1-NEXT:    movb {{[0-9]+}}(%rsp), %dil
 ; CHECK-SSE1-NEXT:    xorb %r10b, %dil
 ; CHECK-SSE1-NEXT:    andb {{[0-9]+}}(%rsp), %dil
 ; CHECK-SSE1-NEXT:    xorb %r10b, %dil
-; CHECK-SSE1-NEXT:    movzbl {{[0-9]+}}(%rsp), %r10d
+; CHECK-SSE1-NEXT:    movb {{[0-9]+}}(%rsp), %r10b
 ; CHECK-SSE1-NEXT:    xorb %r11b, %r10b
 ; CHECK-SSE1-NEXT:    andb {{[0-9]+}}(%rsp), %r10b
 ; CHECK-SSE1-NEXT:    xorb %r11b, %r10b
-; CHECK-SSE1-NEXT:    movzbl {{[0-9]+}}(%rsp), %r11d
+; CHECK-SSE1-NEXT:    movb {{[0-9]+}}(%rsp), %r11b
 ; CHECK-SSE1-NEXT:    xorb %bl, %r11b
 ; CHECK-SSE1-NEXT:    andb {{[0-9]+}}(%rsp), %r11b
 ; CHECK-SSE1-NEXT:    xorb %bl, %r11b
-; CHECK-SSE1-NEXT:    movzbl {{[0-9]+}}(%rsp), %ebx
+; CHECK-SSE1-NEXT:    movb {{[0-9]+}}(%rsp), %bl
 ; CHECK-SSE1-NEXT:    xorb %r13b, %bl
 ; CHECK-SSE1-NEXT:    andb {{[0-9]+}}(%rsp), %bl
 ; CHECK-SSE1-NEXT:    xorb %r13b, %bl
-; CHECK-SSE1-NEXT:    movzbl {{[0-9]+}}(%rsp), %r13d
+; CHECK-SSE1-NEXT:    movb {{[0-9]+}}(%rsp), %r13b
 ; CHECK-SSE1-NEXT:    xorb %r12b, %r13b
 ; CHECK-SSE1-NEXT:    andb {{[0-9]+}}(%rsp), %r13b
 ; CHECK-SSE1-NEXT:    xorb %r12b, %r13b
-; CHECK-SSE1-NEXT:    movzbl {{[0-9]+}}(%rsp), %r12d
+; CHECK-SSE1-NEXT:    movb {{[0-9]+}}(%rsp), %r12b
 ; CHECK-SSE1-NEXT:    xorb %r15b, %r12b
 ; CHECK-SSE1-NEXT:    andb {{[0-9]+}}(%rsp), %r12b
 ; CHECK-SSE1-NEXT:    xorb %r15b, %r12b
-; CHECK-SSE1-NEXT:    movzbl {{[0-9]+}}(%rsp), %r15d
+; CHECK-SSE1-NEXT:    movb {{[0-9]+}}(%rsp), %r15b
 ; CHECK-SSE1-NEXT:    xorb %r14b, %r15b
 ; CHECK-SSE1-NEXT:    andb {{[0-9]+}}(%rsp), %r15b
 ; CHECK-SSE1-NEXT:    xorb %r14b, %r15b
-; CHECK-SSE1-NEXT:    movzbl {{[0-9]+}}(%rsp), %r14d
+; CHECK-SSE1-NEXT:    movb {{[0-9]+}}(%rsp), %r14b
 ; CHECK-SSE1-NEXT:    xorb %bpl, %r14b
 ; CHECK-SSE1-NEXT:    andb {{[0-9]+}}(%rsp), %r14b
 ; CHECK-SSE1-NEXT:    xorb %bpl, %r14b
-; CHECK-SSE1-NEXT:    movzbl {{[0-9]+}}(%rsp), %ebp
+; CHECK-SSE1-NEXT:    movb {{[0-9]+}}(%rsp), %bpl
 ; CHECK-SSE1-NEXT:    xorb %al, %bpl
 ; CHECK-SSE1-NEXT:    andb {{[0-9]+}}(%rsp), %bpl
 ; CHECK-SSE1-NEXT:    xorb %al, %bpl
-; CHECK-SSE1-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax
+; CHECK-SSE1-NEXT:    movb {{[0-9]+}}(%rsp), %al
 ; CHECK-SSE1-NEXT:    xorb %cl, %al
 ; CHECK-SSE1-NEXT:    andb {{[0-9]+}}(%rsp), %al
 ; CHECK-SSE1-NEXT:    xorb %cl, %al
-; CHECK-SSE1-NEXT:    movzbl {{[0-9]+}}(%rsp), %ecx
+; CHECK-SSE1-NEXT:    movb {{[0-9]+}}(%rsp), %cl
 ; CHECK-SSE1-NEXT:    xorb %sil, %cl
 ; CHECK-SSE1-NEXT:    andb {{[0-9]+}}(%rsp), %cl
 ; CHECK-SSE1-NEXT:    xorb %sil, %cl
@@ -2950,24 +2950,24 @@ define <16 x i8> @in_v16i8(<16 x i8> %x, <16 x i8> %y, <16 x i8> %mask) nounwind
 ; CHECK-SSE1-NEXT:    movb %r10b, 6(%rdx)
 ; CHECK-SSE1-NEXT:    movb %dil, 5(%rdx)
 ; CHECK-SSE1-NEXT:    movb %r9b, 4(%rdx)
-; CHECK-SSE1-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax
+; CHECK-SSE1-NEXT:    movb {{[0-9]+}}(%rsp), %al
 ; CHECK-SSE1-NEXT:    xorb %al, %r8b
 ; CHECK-SSE1-NEXT:    andb {{[0-9]+}}(%rsp), %r8b
 ; CHECK-SSE1-NEXT:    xorb %al, %r8b
 ; CHECK-SSE1-NEXT:    movb %r8b, 3(%rdx)
-; CHECK-SSE1-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax
+; CHECK-SSE1-NEXT:    movb {{[0-9]+}}(%rsp), %al
 ; CHECK-SSE1-NEXT:    movl {{[-0-9]+}}(%r{{[sb]}}p), %ecx # 4-byte Reload
 ; CHECK-SSE1-NEXT:    xorb %al, %cl
 ; CHECK-SSE1-NEXT:    andb {{[0-9]+}}(%rsp), %cl
 ; CHECK-SSE1-NEXT:    xorb %al, %cl
 ; CHECK-SSE1-NEXT:    movb %cl, 2(%rdx)
-; CHECK-SSE1-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax
+; CHECK-SSE1-NEXT:    movb {{[0-9]+}}(%rsp), %al
 ; CHECK-SSE1-NEXT:    movl {{[-0-9]+}}(%r{{[sb]}}p), %ecx # 4-byte Reload
 ; CHECK-SSE1-NEXT:    xorb %al, %cl
 ; CHECK-SSE1-NEXT:    andb {{[0-9]+}}(%rsp), %cl
 ; CHECK-SSE1-NEXT:    xorb %al, %cl
 ; CHECK-SSE1-NEXT:    movb %cl, 1(%rdx)
-; CHECK-SSE1-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax
+; CHECK-SSE1-NEXT:    movb {{[0-9]+}}(%rsp), %al
 ; CHECK-SSE1-NEXT:    movl {{[-0-9]+}}(%r{{[sb]}}p), %ecx # 4-byte Reload
 ; CHECK-SSE1-NEXT:    xorb %al, %cl
 ; CHECK-SSE1-NEXT:    andb {{[0-9]+}}(%rsp), %cl
@@ -3246,194 +3246,194 @@ define <32 x i8> @in_v32i8(ptr%px, ptr%py, ptr%pmask) nounwind {
 ; CHECK-BASELINE-NEXT:    movq %rdx, %r13
 ; CHECK-BASELINE-NEXT:    movq %rsi, %rbx
 ; CHECK-BASELINE-NEXT:    movq %rdi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
-; CHECK-BASELINE-NEXT:    movzbl 15(%rdx), %r12d
-; CHECK-BASELINE-NEXT:    movzbl 14(%rdx), %eax
+; CHECK-BASELINE-NEXT:    movb 15(%rdx), %r12b
+; CHECK-BASELINE-NEXT:    movb 14(%rdx), %al
 ; CHECK-BASELINE-NEXT:    movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill
-; CHECK-BASELINE-NEXT:    movzbl 13(%rdx), %eax
+; CHECK-BASELINE-NEXT:    movb 13(%rdx), %al
 ; CHECK-BASELINE-NEXT:    movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill
-; CHECK-BASELINE-NEXT:    movzbl 12(%rdx), %eax
+; CHECK-BASELINE-NEXT:    movb 12(%rdx), %al
 ; CHECK-BASELINE-NEXT:    movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill
-; CHECK-BASELINE-NEXT:    movzbl 11(%rdx), %eax
+; CHECK-BASELINE-NEXT:    movb 11(%rdx), %al
 ; CHECK-BASELINE-NEXT:    movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill
-; CHECK-BASELINE-NEXT:    movzbl 10(%rdx), %eax
+; CHECK-BASELINE-NEXT:    movb 10(%rdx), %al
 ; CHECK-BASELINE-NEXT:    movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill
-; CHECK-BASELINE-NEXT:    movzbl 9(%rdx), %r9d
-; CHECK-BASELINE-NEXT:    movzbl 8(%rdx), %r10d
-; CHECK-BASELINE-NEXT:    movzbl 7(%rdx), %r11d
-; CHECK-BASELINE-NEXT:    movzbl 6(%rdx), %r8d
-; CHECK-BASELINE-NEXT:    movzbl 5(%rdx), %ebp
-; CHECK-BASELINE-NEXT:    movzbl 4(%rdx), %esi
-; CHECK-BASELINE-NEXT:    movzbl 3(%rdx), %edi
-; CHECK-BASELINE-NEXT:    movzbl 2(%rdx), %r14d
-; CHECK-BASELINE-NEXT:    movzbl (%rdx), %eax
-; CHECK-BASELINE-NEXT:    movzbl 1(%rdx), %r15d
-; CHECK-BASELINE-NEXT:    movzbl (%rbx), %edx
+; CHECK-BASELINE-NEXT:    movb 9(%rdx), %r9b
+; CHECK-BASELINE-NEXT:    movb 8(%rdx), %r10b
+; CHECK-BASELINE-NEXT:    movb 7(%rdx), %r11b
+; CHECK-BASELINE-NEXT:    movb 6(%rdx), %r8b
+; CHECK-BASELINE-NEXT:    movb 5(%rdx), %bpl
+; CHECK-BASELINE-NEXT:    movb 4(%rdx), %sil
+; CHECK-BASELINE-NEXT:    movb 3(%rdx), %dil
+; CHECK-BASELINE-NEXT:    movb 2(%rdx), %r14b
+; CHECK-BASELINE-NEXT:    movb (%rdx), %al
+; CHECK-BASELINE-NEXT:    movb 1(%rdx), %r15b
+; CHECK-BASELINE-NEXT:    movb (%rbx), %dl
 ; CHECK-BASELINE-NEXT:    xorb %al, %dl
 ; CHECK-BASELINE-NEXT:    andb (%rcx), %dl
 ; CHECK-BASELINE-NEXT:    xorb %al, %dl
 ; CHECK-BASELINE-NEXT:    movb %dl, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill
-; CHECK-BASELINE-NEXT:    movzbl 1(%rbx), %eax
+; CHECK-BASELINE-NEXT:    movb 1(%rbx), %al
 ; CHECK-BASELINE-NEXT:    xorb %r15b, %al
 ; CHECK-BASELINE-NEXT:    andb 1(%rcx), %al
 ; CHECK-BASELINE-NEXT:    xorb %r15b, %al
 ; CHECK-BASELINE-NEXT:    movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill
-; CHECK-BASELINE-NEXT:    movzbl 2(%rbx), %eax
+; CHECK-BASELINE-NEXT:    movb 2(%rbx), %al
 ; CHECK-BASELINE-NEXT:    xorb %r14b, %al
 ; CHECK-BASELINE-NEXT:    andb 2(%rcx), %al
 ; CHECK-BASELINE-NEXT:    xorb %r14b, %al
 ; CHECK-BASELINE-NEXT:    movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill
-; CHECK-BASELINE-NEXT:    movzbl 3(%rbx), %eax
+; CHECK-BASELINE-NEXT:    movb 3(%rbx), %al
 ; CHECK-BASELINE-NEXT:    xorb %dil, %al
 ; CHECK-BASELINE-NEXT:    andb 3(%rcx), %al
 ; CHECK-BASELINE-NEXT:    xorb %dil, %al
 ; CHECK-BASELINE-NEXT:    movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill
-; CHECK-BASELINE-NEXT:    movzbl 4(%rbx), %eax
+; CHECK-BASELINE-NEXT:    movb 4(%rbx), %al
 ; CHECK-BASELINE-NEXT:    xorb %sil, %al
 ; CHECK-BASELINE-NEXT:    andb 4(%rcx), %al
 ; CHECK-BASELINE-NEXT:    xorb %sil, %al
 ; CHECK-BASELINE-NEXT:    movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill
-; CHECK-BASELINE-NEXT:    movzbl 5(%rbx), %eax
+; CHECK-BASELINE-NEXT:    movb 5(%rbx), %al
 ; CHECK-BASELINE-NEXT:    xorb %bpl, %al
 ; CHECK-BASELINE-NEXT:    andb 5(%rcx), %al
 ; CHECK-BASELINE-NEXT:    xorb %bpl, %al
 ; CHECK-BASELINE-NEXT:    movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill
-; CHECK-BASELINE-NEXT:    movzbl 6(%rbx), %eax
+; CHECK-BASELINE-NEXT:    movb 6(%rbx), %al
 ; CHECK-BASELINE-NEXT:    xorb %r8b, %al
 ; CHECK-BASELINE-NEXT:    andb 6(%rcx), %al
 ; CHECK-BASELINE-NEXT:    xorb %r8b, %al
 ; CHECK-BASELINE-NEXT:    movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill
-; CHECK-BASELINE-NEXT:    movzbl 7(%rbx), %eax
+; CHECK-BASELINE-NEXT:    movb 7(%rbx), %al
 ; CHECK-BASELINE-NEXT:    xorb %r11b, %al
 ; CHECK-BASELINE-NEXT:    andb 7(%rcx), %al
 ; CHECK-BASELINE-NEXT:    xorb %r11b, %al
 ; CHECK-BASELINE-NEXT:    movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill
-; CHECK-BASELINE-NEXT:    movzbl 8(%rbx), %eax
+; CHECK-BASELINE-NEXT:    movb 8(%rbx), %al
 ; CHECK-BASELINE-NEXT:    xorb %r10b, %al
 ; CHECK-BASELINE-NEXT:    andb 8(%rcx), %al
 ; CHECK-BASELINE-NEXT:    xorb %r10b, %al
 ; CHECK-BASELINE-NEXT:    movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill
-; CHECK-BASELINE-NEXT:    movzbl 9(%rbx), %eax
+; CHECK-BASELINE-NEXT:    movb 9(%rbx), %al
 ; CHECK-BASELINE-NEXT:    xorb %r9b, %al
 ; CHECK-BASELINE-NEXT:    andb 9(%rcx), %al
 ; CHECK-BASELINE-NEXT:    xorb %r9b, %al
 ; CHECK-BASELINE-NEXT:    movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill
-; CHECK-BASELINE-NEXT:    movzbl 10(%rbx), %edx
-; CHECK-BASELINE-NEXT:    movzbl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 1-byte Folded Reload
+; CHECK-BASELINE-NEXT:    movb 10(%rbx), %dl
+; CHECK-BASELINE-NEXT:    movb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Reload
 ; CHECK-BASELINE-NEXT:    xorb %al, %dl
 ; CHECK-BASELINE-NEXT:    andb 10(%rcx), %dl
 ; CHECK-BASELINE-NEXT:    xorb %al, %dl
 ; CHECK-BASELINE-NEXT:    movb %dl, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill
-; CHECK-BASELINE-NEXT:    movzbl 11(%rbx), %edx
-; CHECK-BASELINE-NEXT:    movzbl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 1-byte Folded Reload
+; CHECK-BASELINE-NEXT:    movb 11(%rbx), %dl
+; CHECK-BASELINE-NEXT:    movb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Reload
 ; CHECK-BASELINE-NEXT:    xorb %al, %dl
 ; CHECK-BASELINE-NEXT:    andb 11(%rcx), %dl
 ; CHECK-BASELINE-NEXT:    xorb %al, %dl
 ; CHECK-BASELINE-NEXT:    movb %dl, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill
-; CHECK-BASELINE-NEXT:    movzbl 12(%rbx), %edx
-; CHECK-BASELINE-NEXT:    movzbl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 1-byte Folded Reload
+; CHECK-BASELINE-NEXT:    movb 12(%rbx), %dl
+; CHECK-BASELINE-NEXT:    movb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Reload
 ; CHECK-BASELINE-NEXT:    xorb %al, %dl
 ; CHECK-BASELINE-NEXT:    andb 12(%rcx), %dl
 ; CHECK-BASELINE-NEXT:    xorb %al, %dl
 ; CHECK-BASELINE-NEXT:    movb %dl, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill
-; CHECK-BASELINE-NEXT:    movzbl 13(%rbx), %edx
-; CHECK-BASELINE-NEXT:    movzbl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 1-byte Folded Reload
+; CHECK-BASELINE-NEXT:    movb 13(%rbx), %dl
+; CHECK-BASELINE-NEXT:    movb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Reload
 ; CHECK-BASELINE-NEXT:    xorb %al, %dl
 ; CHECK-BASELINE-NEXT:    andb 13(%rcx), %dl
 ; CHECK-BASELINE-NEXT:    xorb %al, %dl
 ; CHECK-BASELINE-NEXT:    movb %dl, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill
-; CHECK-BASELINE-NEXT:    movzbl 14(%rbx), %edx
-; CHECK-BASELINE-NEXT:    movzbl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 1-byte Folded Reload
+; CHECK-BASELINE-NEXT:    movb 14(%rbx), %dl
+; CHECK-BASELINE-NEXT:    movb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Reload
 ; CHECK-BASELINE-NEXT:    xorb %al, %dl
 ; CHECK-BASELINE-NEXT:    andb 14(%rcx), %dl
 ; CHECK-BASELINE-NEXT:    xorb %al, %dl
 ; CHECK-BASELINE-NEXT:    movb %dl, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill
-; CHECK-BASELINE-NEXT:    movzbl 15(%rbx), %eax
+; CHECK-BASELINE-NEXT:    movb 15(%rbx), %al
 ; CHECK-BASELINE-NEXT:    xorb %r12b, %al
 ; CHECK-BASELINE-NEXT:    andb 15(%rcx), %al
 ; CHECK-BASELINE-NEXT:    xorb %r12b, %al
 ; CHECK-BASELINE-NEXT:    movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill
-; CHECK-BASELINE-NEXT:    movzbl 16(%r13), %eax
-; CHECK-BASELINE-NEXT:    movzbl 16(%rbx), %edx
+; CHECK-BASELINE-NEXT:    movb 16(%r13), %al
+; CHECK-BASELINE-NEXT:    movb 16(%rbx), %dl
 ; CHECK-BASELINE-NEXT:    xorb %al, %dl
 ; CHECK-BASELINE-NEXT:    andb 16(%rcx), %dl
 ; CHECK-BASELINE-NEXT:    xorb %al, %dl
 ; CHECK-BASELINE-NEXT:    movb %dl, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill
-; CHECK-BASELINE-NEXT:    movzbl 17(%r13), %eax
-; CHECK-BASELINE-NEXT:    movzbl 17(%rbx), %edx
+; CHECK-BASELINE-NEXT:    movb 17(%r13), %al
+; CHECK-BASELINE-NEXT:    movb 17(%rbx), %dl
 ; CHECK-BASELINE-NEXT:    xorb %al, %dl
 ; CHECK-BASELINE-NEXT:    andb 17(%rcx), %dl
 ; CHECK-BASELINE-NEXT:    xorb %al, %dl
 ; CHECK-BASELINE-NEXT:    movb %dl, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill
-; CHECK-BASELINE-NEXT:    movzbl 18(%r13), %eax
-; CHECK-BASELINE-NEXT:    movzbl 18(%rbx), %edx
+; CHECK-BASELINE-NEXT:    movb 18(%r13), %al
+; CHECK-BASELINE-NEXT:    movb 18(%rbx), %dl
 ; CHECK-BASELINE-NEXT:    xorb %al, %dl
 ; CHECK-BASELINE-NEXT:    andb 18(%rcx), %dl
 ; CHECK-BASELINE-NEXT:    xorb %al, %dl
 ; CHECK-BASELINE-NEXT:    movb %dl, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill
-; CHECK-BASELINE-NEXT:    movzbl 19(%r13), %eax
-; CHECK-BASELINE-NEXT:    movzbl 19(%rbx), %r12d
+; CHECK-BASELINE-NEXT:    movb 19(%r13), %al
+; CHECK-BASELINE-NEXT:    movb 19(%rbx), %r12b
 ; CHECK-BASELINE-NEXT:    xorb %al, %r12b
 ; CHECK-BASELINE-NEXT:    andb 19(%rcx), %r12b
 ; CHECK-BASELINE-NEXT:    xorb %al, %r12b
-; CHECK-BASELINE-NEXT:    movzbl 20(%r13), %eax
-; CHECK-BASELINE-NEXT:    movzbl 20(%rbx), %r15d
+; CHECK-BASELINE-NEXT:    movb 20(%r13), %al
+; CHECK-BASELINE-NEXT:    movb 20(%rbx), %r15b
 ; CHECK-BASELINE-NEXT:    xorb %al, %r15b
 ; CHECK-BASELINE-NEXT:    andb 20(%rcx), %r15b
 ; CHECK-BASELINE-NEXT:    movq %rcx, %rsi
 ; CHECK-BASELINE-NEXT:    xorb %al, %r15b
-; CHECK-BASELINE-NEXT:    movzbl 21(%r13), %eax
-; CHECK-BASELINE-NEXT:    movzbl 21(%rbx), %r14d
+; CHECK-BASELINE-NEXT:    movb 21(%r13), %al
+; CHECK-BASELINE-NEXT:    movb 21(%rbx), %r14b
 ; CHECK-BASELINE-NEXT:    xorb %al, %r14b
 ; CHECK-BASELINE-NEXT:    andb 21(%rcx), %r14b
 ; CHECK-BASELINE-NEXT:    xorb %al, %r14b
-; CHECK-BASELINE-NEXT:    movzbl 22(%r13), %eax
-; CHECK-BASELINE-NEXT:    movzbl 22(%rbx), %ebp
+; CHECK-BASELINE-NEXT:    movb 22(%r13), %al
+; CHECK-BASELINE-NEXT:    movb 22(%rbx), %bpl
 ; CHECK-BASELINE-NEXT:    xorb %al, %bpl
 ; CHECK-BASELINE-NEXT:    andb 22(%rcx), %bpl
 ; CHECK-BASELINE-NEXT:    xorb %al, %bpl
-; CHECK-BASELINE-NEXT:    movzbl 23(%r13), %eax
-; CHECK-BASELINE-NEXT:    movzbl 23(%rbx), %r11d
+; CHECK-BASELINE-NEXT:    movb 23(%r13), %al
+; CHECK-BASELINE-NEXT:    movb 23(%rbx), %r11b
 ; CHECK-BASELINE-NEXT:    xorb %al, %r11b
 ; CHECK-BASELINE-NEXT:    andb 23(%rcx), %r11b
 ; CHECK-BASELINE-NEXT:    xorb %al, %r11b
-; CHECK-BASELINE-NEXT:    movzbl 24(%r13), %eax
-; CHECK-BASELINE-NEXT:    movzbl 24(%rbx), %r10d
+; CHECK-BASELINE-NEXT:    movb 24(%r13), %al
+; CHECK-BASELINE-NEXT:    movb 24(%rbx), %r10b
 ; CHECK-BASELINE-NEXT:    xorb %al, %r10b
 ; CHECK-BASELINE-NEXT:    andb 24(%rcx), %r10b
 ; CHECK-BASELINE-NEXT:    xorb %al, %r10b
-; CHECK-BASELINE-NEXT:    movzbl 25(%r13), %eax
-; CHECK-BASELINE-NEXT:    movzbl 25(%rbx), %r9d
+; CHECK-BASELINE-NEXT:    movb 25(%r13), %al
+; CHECK-BASELINE-NEXT:    movb 25(%rbx), %r9b
 ; CHECK-BASELINE-NEXT:    xorb %al, %r9b
 ; CHECK-BASELINE-NEXT:    andb 25(%rcx), %r9b
 ; CHECK-BASELINE-NEXT:    xorb %al, %r9b
-; CHECK-BASELINE-NEXT:    movzbl 26(%r13), %eax
-; CHECK-BASELINE-NEXT:    movzbl 26(%rbx), %r8d
+; CHECK-BASELINE-NEXT:    movb 26(%r13), %al
+; CHECK-BASELINE-NEXT:    movb 26(%rbx), %r8b
 ; CHECK-BASELINE-NEXT:    xorb %al, %r8b
 ; CHECK-BASELINE-NEXT:    andb 26(%rcx), %r8b
 ; CHECK-BASELINE-NEXT:    xorb %al, %r8b
-; CHECK-BASELINE-NEXT:    movzbl 27(%r13), %eax
-; CHECK-BASELINE-NEXT:    movzbl 27(%rbx), %edi
+; CHECK-BASELINE-NEXT:    movb 27(%r13), %al
+; CHECK-BASELINE-NEXT:    movb 27(%rbx), %dil
 ; CHECK-BASELINE-NEXT:    xorb %al, %dil
 ; CHECK-BASELINE-NEXT:    andb 27(%rcx), %dil
 ; CHECK-BASELINE-NEXT:    xorb %al, %dil
-; CHECK-BASELINE-NEXT:    movzbl 28(%r13), %eax
-; CHECK-BASELINE-NEXT:    movzbl 28(%rbx), %edx
+; CHECK-BASELINE-NEXT:    movb 28(%r13), %al
+; CHECK-BASELINE-NEXT:    movb 28(%rbx), %dl
 ; CHECK-BASELINE-NEXT:    xorb %al, %dl
 ; CHECK-BASELINE-NEXT:    andb 28(%rcx), %dl
 ; CHECK-BASELINE-NEXT:    xorb %al, %dl
-; CHECK-BASELINE-NEXT:    movzbl 29(%r13), %eax
-; CHECK-BASELINE-NEXT:    movzbl 29(%rbx), %ecx
+; CHECK-BASELINE-NEXT:    movb 29(%r13), %al
+; CHECK-BASELINE-NEXT:    movb 29(%rbx), %cl
 ; CHECK-BASELINE-NEXT:    xorb %al, %cl
 ; CHECK-BASELINE-NEXT:    andb 29(%rsi), %cl
 ; CHECK-BASELINE-NEXT:    xorb %al, %cl
-; CHECK-BASELINE-NEXT:    movzbl 30(%r13), %eax
+; CHECK-BASELINE-NEXT:    movb 30(%r13), %al
 ; CHECK-BASELINE-NEXT:    movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill
-; CHECK-BASELINE-NEXT:    movzbl 30(%rbx), %eax
+; CHECK-BASELINE-NEXT:    movb 30(%rbx), %al
 ; CHECK-BASELINE-NEXT:    xorb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Folded Reload
 ; CHECK-BASELINE-NEXT:    andb 30(%rsi), %al
 ; CHECK-BASELINE-NEXT:    xorb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Folded Reload
-; CHECK-BASELINE-NEXT:    movzbl 31(%r13), %r13d
-; CHECK-BASELINE-NEXT:    movzbl 31(%rbx), %ebx
+; CHECK-BASELINE-NEXT:    movb 31(%r13), %r13b
+; CHECK-BASELINE-NEXT:    movb 31(%rbx), %bl
 ; CHECK-BASELINE-NEXT:    xorb %r13b, %bl
 ; CHECK-BASELINE-NEXT:    andb 31(%rsi), %bl
 ; CHECK-BASELINE-NEXT:    xorb %r13b, %bl
@@ -3451,43 +3451,43 @@ define <32 x i8> @in_v32i8(ptr%px, ptr%py, ptr%pmask) nounwind {
 ; CHECK-BASELINE-NEXT:    movb %r14b, 21(%r13)
 ; CHECK-BASELINE-NEXT:    movb %r15b, 20(%r13)
 ; CHECK-BASELINE-NEXT:    movb %r12b, 19(%r13)
-; CHECK-BASELINE-NEXT:    movzbl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 1-byte Folded Reload
+; CHECK-BASELINE-NEXT:    movb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Reload
 ; CHECK-BASELINE-NEXT:    movb %al, 18(%r13)
-; CHECK-BASELINE-NEXT:    movzbl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 1-byte Folded Reload
+; CHECK-BASELINE-NEXT:    movb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Reload
 ; CHECK-BASELINE-NEXT:    movb %al, 17(%r13)
-; CHECK-BASELINE-NEXT:    movzbl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 1-byte Folded Reload
+; CHECK-BASELINE-NEXT:    movb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Reload
 ; CHECK-BASELINE-NEXT:    movb %al, 16(%r13)
-; CHECK-BASELINE-NEXT:    movzbl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 1-byte Folded Reload
+; CHECK-BASELINE-NEXT:    movb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Reload
 ; CHECK-BASELINE-NEXT:    movb %al, 15(%r13)
-; CHECK-BASELINE-NEXT:    movzbl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 1-byte Folded Reload
+; CHECK-BASELINE-NEXT:    movb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Reload
 ; CHECK-BASELINE-NEXT:    movb %al, 14(%r13)
-; CHECK-BASELINE-NEXT:    movzbl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 1-byte Folded Reload
+; CHECK-BASELINE-NEXT:    movb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Reload
 ; CHECK-BASELINE-NEXT:    movb %al, 13(%r13)
-; CHECK-BASELINE-NEXT:    movzbl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 1-byte Folded Reload
+; CHECK-BASELINE-NEXT:    movb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Reload
 ; CHECK-BASELINE-NEXT:    movb %al, 12(%r13)
-; CHECK-BASELINE-NEXT:    movzbl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 1-byte Folded Reload
+; CHECK-BASELINE-NEXT:    movb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Reload
 ; CHECK-BASELINE-NEXT:    movb %al, 11(%r13)
-; CHECK-BASELINE-NEXT:    movzbl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 1-byte Folded Reload
+; CHECK-BASELINE-NEXT:    movb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Reload
 ; CHECK-BASELINE-NEXT:    movb %al, 10(%r13)
-; CHECK-BASELINE-NEXT:    movzbl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 1-byte Folded Reload
+; CHECK-BASELINE-NEXT:    movb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Reload
 ; CHECK-BASELINE-NEXT:    movb %al, 9(%r13)
-; CHECK-BASELINE-NEXT:    movzbl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 1-byte Folded Reload
+; CHECK-BASELINE-NEXT:    movb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Reload
 ; CHECK-BASELINE-NEXT:    movb %al, 8(%r13)
-; CHECK-BASELINE-NEXT:    movzbl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 1-byte Folded Reload
+; CHECK-BASELINE-NEXT:    movb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Reload
 ; CHECK-BASELINE-NEXT:    movb %al, 7(%r13)
-; CHECK-BASELINE-NEXT:    movzbl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 1-byte Folded Reload
+; CHECK-BASELINE-NEXT:    movb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Reload
 ; CHECK-BASELINE-NEXT:    movb %al, 6(%r13)
-; CHECK-BASELINE-NEXT:    movzbl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 1-byte Folded Reload
+; CHECK-BASELINE-NEXT:    movb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Reload
 ; CHECK-BASELINE-NEXT:    movb %al, 5(%r13)
-; CHECK-BASELINE-NEXT:    movzbl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 1-byte Folded Reload
+; CHECK-BASELINE-NEXT:    movb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Reload
 ; CHECK-BASELINE-NEXT:    movb %al, 4(%r13)
-; CHECK-BASELINE-NEXT:    movzbl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 1-byte Folded Reload
+; CHECK-BASELINE-NEXT:    movb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Reload
 ; CHECK-BASELINE-NEXT:    movb %al, 3(%r13)
-; CHECK-BASELINE-NEXT:    movzbl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 1-byte Folded Reload
+; CHECK-BASELINE-NEXT:    movb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Reload
 ; CHECK-BASELINE-NEXT:    movb %al, 2(%r13)
-; CHECK-BASELINE-NEXT:    movzbl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 1-byte Folded Reload
+; CHECK-BASELINE-NEXT:    movb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Reload
 ; CHECK-BASELINE-NEXT:    movb %al, 1(%r13)
-; CHECK-BASELINE-NEXT:    movzbl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 1-byte Folded Reload
+; CHECK-BASELINE-NEXT:    movb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Reload
 ; CHECK-BASELINE-NEXT:    movb %al, (%r13)
 ; CHECK-BASELINE-NEXT:    movq %r13, %rax
 ; CHECK-BASELINE-NEXT:    popq %rbx
@@ -3509,194 +3509,194 @@ define <32 x i8> @in_v32i8(ptr%px, ptr%py, ptr%pmask) nounwind {
 ; CHECK-SSE1-NEXT:    movq %rdx, %r13
 ; CHECK-SSE1-NEXT:    movq %rsi, %rbx
 ; CHECK-SSE1-NEXT:    movq %rdi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
-; CHECK-SSE1-NEXT:    movzbl 15(%rdx), %r12d
-; CHECK-SSE1-NEXT:    movzbl 14(%rdx), %eax
+; CHECK-SSE1-NEXT:    movb 15(%rdx), %r12b
+; CHECK-SSE1-NEXT:    movb 14(%rdx), %al
 ; CHECK-SSE1-NEXT:    movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill
-; CHECK-SSE1-NEXT:    movzbl 13(%rdx), %eax
+; CHECK-SSE1-NEXT:    movb 13(%rdx), %al
 ; CHECK-SSE1-NEXT:    movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill
-; CHECK-SSE1-NEXT:    movzbl 12(%rdx), %eax
+; CHECK-SSE1-NEXT:    movb 12(%rdx), %al
 ; CHECK-SSE1-NEXT:    movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill
-; CHECK-SSE1-NEXT:    movzbl 11(%rdx), %eax
+; CHECK-SSE1-NEXT:    movb 11(%rdx), %al
 ; CHECK-SSE1-NEXT:    movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill
-; CHECK-SSE1-NEXT:    movzbl 10(%rdx), %eax
+; CHECK-SSE1-NEXT:    movb 10(%rdx), %al
 ; CHECK-SSE1-NEXT:    movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill
-; CHECK-SSE1-NEXT:    movzbl 9(%rdx), %r9d
-; CHECK-SSE1-NEXT:    movzbl 8(%rdx), %r10d
-; CHECK-SSE1-NEXT:    movzbl 7(%rdx), %r11d
-; CHECK-SSE1-NEXT:    movzbl 6(%rdx), %r8d
-; CHECK-SSE1-NEXT:    movzbl 5(%rdx), %ebp
-; CHECK-SSE1-NEXT:    movzbl 4(%rdx), %esi
-; CHECK-SSE1-NEXT:    movzbl 3(%rdx), %edi
-; CHECK-SSE1-NEXT:    movzbl 2(%rdx), %r14d
-; CHECK-SSE1-NEXT:    movzbl (%rdx), %eax
-; CHECK-SSE1-NEXT:    movzbl 1(%rdx), %r15d
-; CHECK-SSE1-NEXT:    movzbl (%rbx), %edx
+; CHECK-SSE1-NEXT:    movb 9(%rdx), %r9b
+; CHECK-SSE1-NEXT:    movb 8(%rdx), %r10b
+; CHECK-SSE1-NEXT:    movb 7(%rdx), %r11b
+; CHECK-SSE1-NEXT:    movb 6(%rdx), %r8b
+; CHECK-SSE1-NEXT:    movb 5(%rdx), %bpl
+; CHECK-SSE1-NEXT:    movb 4(%rdx), %sil
+; CHECK-SSE1-NEXT:    movb 3(%rdx), %dil
+; CHECK-SSE1-NEXT:    movb 2(%rdx), %r14b
+; CHECK-SSE1-NEXT:    movb (%rdx), %al
+; CHECK-SSE1-NEXT:    movb 1(%rdx), %r15b
+; CHECK-SSE1-NEXT:    movb (%rbx), %dl
 ; CHECK-SSE1-NEXT:    xorb %al, %dl
 ; CHECK-SSE1-NEXT:    andb (%rcx), %dl
 ; CHECK-SSE1-NEXT:    xorb %al, %dl
 ; CHECK-SSE1-NEXT:    movb %dl, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill
-; CHECK-SSE1-NEXT:    movzbl 1(%rbx), %eax
+; CHECK-SSE1-NEXT:    movb 1(%rbx), %al
 ; CHECK-SSE1-NEXT:    xorb %r15b, %al
 ; CHECK-SSE1-NEXT:    andb 1(%rcx), %al
 ; CHECK-SSE1-NEXT:    xorb %r15b, %al
 ; CHECK-SSE1-NEXT:    movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill
-; CHECK-SSE1-NEXT:    movzbl 2(%rbx), %eax
+; CHECK-SSE1-NEXT:    movb 2(%rbx), %al
 ; CHECK-SSE1-NEXT:    xorb %r14b, %al
 ; CHECK-SSE1-NEXT:    andb 2(%rcx), %al
 ; CHECK-SSE1-NEXT:    xorb %r14b, %al
 ; CHECK-SSE1-NEXT:    movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill
-; CHECK-SSE1-NEXT:    movzbl 3(%rbx), %eax
+; CHECK-SSE1-NEXT:    movb 3(%rbx), %al
 ; CHECK-SSE1-NEXT:    xorb %dil, %al
 ; CHECK-SSE1-NEXT:    andb 3(%rcx), %al
 ; CHECK-SSE1-NEXT:    xorb %dil, %al
 ; CHECK-SSE1-NEXT:    movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill
-; CHECK-SSE1-NEXT:    movzbl 4(%rbx), %eax
+; CHECK-SSE1-NEXT:    movb 4(%rbx), %al
 ; CHECK-SSE1-NEXT:    xorb %sil, %al
 ; CHECK-SSE1-NEXT:    andb 4(%rcx), %al
 ; CHECK-SSE1-NEXT:    xorb %sil, %al
 ; CHECK-SSE1-NEXT:    movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill
-; CHECK-SSE1-NEXT:    movzbl 5(%rbx), %eax
+; CHECK-SSE1-NEXT:    movb 5(%rbx), %al
 ; CHECK-SSE1-NEXT:    xorb %bpl, %al
 ; CHECK-SSE1-NEXT:    andb 5(%rcx), %al
 ; CHECK-SSE1-NEXT:    xorb %bpl, %al
 ; CHECK-SSE1-NEXT:    movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill
-; CHECK-SSE1-NEXT:    movzbl 6(%rbx), %eax
+; CHECK-SSE1-NEXT:    movb 6(%rbx), %al
 ; CHECK-SSE1-NEXT:    xorb %r8b, %al
 ; CHECK-SSE1-NEXT:    andb 6(%rcx), %al
 ; CHECK-SSE1-NEXT:    xorb %r8b, %al
 ; CHECK-SSE1-NEXT:    movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill
-; CHECK-SSE1-NEXT:    movzbl 7(%rbx), %eax
+; CHECK-SSE1-NEXT:    movb 7(%rbx), %al
 ; CHECK-SSE1-NEXT:    xorb %r11b, %al
 ; CHECK-SSE1-NEXT:    andb 7(%rcx), %al
 ; CHECK-SSE1-NEXT:    xorb %r11b, %al
 ; CHECK-SSE1-NEXT:    movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill
-; CHECK-SSE1-NEXT:    movzbl 8(%rbx), %eax
+; CHECK-SSE1-NEXT:    movb 8(%rbx), %al
 ; CHECK-SSE1-NEXT:    xorb %r10b, %al
 ; CHECK-SSE1-NEXT:    andb 8(%rcx), %al
 ; CHECK-SSE1-NEXT:    xorb %r10b, %al
 ; CHECK-SSE1-NEXT:    movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill
-; CHECK-SSE1-NEXT:    movzbl 9(%rbx), %eax
+; CHECK-SSE1-NEXT:    movb 9(%rbx), %al
 ; CHECK-SSE1-NEXT:    xorb %r9b, %al
 ; CHECK-SSE1-NEXT:    andb 9(%rcx), %al
 ; CHECK-SSE1-NEXT:    xorb %r9b, %al
 ; CHECK-SSE1-NEXT:    movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill
-; CHECK-SSE1-NEXT:    movzbl 10(%rbx), %edx
-; CHECK-SSE1-NEXT:    movzbl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 1-byte Folded Reload
+; CHECK-SSE1-NEXT:    movb 10(%rbx), %dl
+; CHECK-SSE1-NEXT:    movb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Reload
 ; CHECK-SSE1-NEXT:    xorb %al, %dl
 ; CHECK-SSE1-NEXT:    andb 10(%rcx), %dl
 ; CHECK-SSE1-NEXT:    xorb %al, %dl
 ; CHECK-SSE1-NEXT:    movb %dl, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill
-; CHECK-SSE1-NEXT:    movzbl 11(%rbx), %edx
-; CHECK-SSE1-NEXT:    movzbl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 1-byte Folded Reload
+; CHECK-SSE1-NEXT:    movb 11(%rbx), %dl
+; CHECK-SSE1-NEXT:    movb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Reload
 ; CHECK-SSE1-NEXT:    xorb %al, %dl
 ; CHECK-SSE1-NEXT:    andb 11(%rcx), %dl
 ; CHECK-SSE1-NEXT:    xorb %al, %dl
 ; CHECK-SSE1-NEXT:    movb %dl, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill
-; CHECK-SSE1-NEXT:    movzbl 12(%rbx), %edx
-; CHECK-SSE1-NEXT:    movzbl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 1-byte Folded Reload
+; CHECK-SSE1-NEXT:    movb 12(%rbx), %dl
+; CHECK-SSE1-NEXT:    movb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Reload
 ; CHECK-SSE1-NEXT:    xorb %al, %dl
 ; CHECK-SSE1-NEXT:    andb 12(%rcx), %dl
 ; CHECK-SSE1-NEXT:    xorb %al, %dl
 ; CHECK-SSE1-NEXT:    movb %dl, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill
-; CHECK-SSE1-NEXT:    movzbl 13(%rbx), %edx
-; CHECK-SSE1-NEXT:    movzbl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 1-byte Folded Reload
+; CHECK-SSE1-NEXT:    movb 13(%rbx), %dl
+; CHECK-SSE1-NEXT:    movb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Reload
 ; CHECK-SSE1-NEXT:    xorb %al, %dl
 ; CHECK-SSE1-NEXT:    andb 13(%rcx), %dl
 ; CHECK-SSE1-NEXT:    xorb %al, %dl
 ; CHECK-SSE1-NEXT:    movb %dl, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill
-; CHECK-SSE1-NEXT:    movzbl 14(%rbx), %edx
-; CHECK-SSE1-NEXT:    movzbl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 1-byte Folded Reload
+; CHECK-SSE1-NEXT:    movb 14(%rbx), %dl
+; CHECK-SSE1-NEXT:    movb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Reload
 ; CHECK-SSE1-NEXT:    xorb %al, %dl
 ; CHECK-SSE1-NEXT:    andb 14(%rcx), %dl
 ; CHECK-SSE1-NEXT:    xorb %al, %dl
 ; CHECK-SSE1-NEXT:    movb %dl, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill
-; CHECK-SSE1-NEXT:    movzbl 15(%rbx), %eax
+; CHECK-SSE1-NEXT:    movb 15(%rbx), %al
 ; CHECK-SSE1-NEXT:    xorb %r12b, %al
 ; CHECK-SSE1-NEXT:    andb 15(%rcx), %al
 ; CHECK-SSE1-NEXT:    xorb %r12b, %al
 ; CHECK-SSE1-NEXT:    movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill
-; CHECK-SSE1-NEXT:    movzbl 16(%r13), %eax
-; CHECK-SSE1-NEXT:    movzbl 16(%rbx), %edx
+; CHECK-SSE1-NEXT:    movb 16(%r13), %al
+; CHECK-SSE1-NEXT:    movb 16(%rbx), %dl
 ; CHECK-SSE1-NEXT:    xorb %al, %dl
 ; CHECK-SSE1-NEXT:    andb 16(%rcx), %dl
 ; CHECK-SSE1-NEXT:    xorb %al, %dl
 ; CHECK-SSE1-NEXT:    movb %dl, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill
-; CHECK-SSE1-NEXT:    movzbl 17(%r13), %eax
-; CHECK-SSE1-NEXT:    movzbl 17(%rbx), %edx
+; CHECK-SSE1-NEXT:    movb 17(%r13), %al
+; CHECK-SSE1-NEXT:    movb 17(%rbx), %dl
 ; CHECK-SSE1-NEXT:    xorb %al, %dl
 ; CHECK-SSE1-NEXT:    andb 17(%rcx), %dl
 ; CHECK-SSE1-NEXT:    xorb %al, %dl
 ; CHECK-SSE1-NEXT:    movb %dl, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill
-; CHECK-SSE1-NEXT:    movzbl 18(%r13), %eax
-; CHECK-SSE1-NEXT:    movzbl 18(%rbx), %edx
+; CHECK-SSE1-NEXT:    movb 18(%r13), %al
+; CHECK-SSE1-NEXT:    movb 18(%rbx), %dl
 ; CHECK-SSE1-NEXT:    xorb %al, %dl
 ; CHECK-SSE1-NEXT:    andb 18(%rcx), %dl
 ; CHECK-SSE1-NEXT:    xorb %al, %dl
 ; CHECK-SSE1-NEXT:    movb %dl, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill
-; CHECK-SSE1-NEXT:    movzbl 19(%r13), %eax
-; CHECK-SSE1-NEXT:    movzbl 19(%rbx), %r12d
+; CHECK-SSE1-NEXT:    movb 19(%r13), %al
+; CHECK-SSE1-NEXT:    movb 19(%rbx), %r12b
 ; CHECK-SSE1-NEXT:    xorb %al, %r12b
 ; CHECK-SSE1-NEXT:    andb 19(%rcx), %r12b
 ; CHECK-SSE1-NEXT:    xorb %al, %r12b
-; CHECK-SSE1-NEXT:    movzbl 20(%r13), %eax
-; CHECK-SSE1-NEXT:    movzbl 20(%rbx), %r15d
+; CHECK-SSE1-NEXT:    movb 20(%r13), %al
+; CHECK-SSE1-NEXT:    movb 20(%rbx), %r15b
 ; CHECK-SSE1-NEXT:    xorb %al, %r15b
 ; CHECK-SSE1-NEXT:    andb 20(%rcx), %r15b
 ; CHECK-SSE1-NEXT:    movq %rcx, %rsi
 ; CHECK-SSE1-NEXT:    xorb %al, %r15b
-; CHECK-SSE1-NEXT:    movzbl 21(%r13), %eax
-; CHECK-SSE1-NEXT:    movzbl 21(%rbx), %r14d
+; CHECK-SSE1-NEXT:    movb 21(%r13), %al
+; CHECK-SSE1-NEXT:    movb 21(%rbx), %r14b
 ; CHECK-SSE1-NEXT:    xorb %al, %r14b
 ; CHECK-SSE1-NEXT:    andb 21(%rcx), %r14b
 ; CHECK-SSE1-NEXT:    xorb %al, %r14b
-; CHECK-SSE1-NEXT:    movzbl 22(%r13), %eax
-; CHECK-SSE1-NEXT:    movzbl 22(%rbx), %ebp
+; CHECK-SSE1-NEXT:    movb 22(%r13), %al
+; CHECK-SSE1-NEXT:    movb 22(%rbx), %bpl
 ; CHECK-SSE1-NEXT:    xorb %al, %bpl
 ; CHECK-SSE1-NEXT:    andb 22(%rcx), %bpl
 ; CHECK-SSE1-NEXT:    xorb %al, %bpl
-; CHECK-SSE1-NEXT:    movzbl 23(%r13), %eax
-; CHECK-SSE1-NEXT:    movzbl 23(%rbx), %r11d
+; CHECK-SSE1-NEXT:    movb 23(%r13), %al
+; CHECK-SSE1-NEXT:    movb 23(%rbx), %r11b
 ; CHECK-SSE1-NEXT:    xorb %al, %r11b
 ; CHECK-SSE1-NEXT:    andb 23(%rcx), %r11b
 ; CHECK-SSE1-NEXT:    xorb %al, %r11b
-; CHECK-SSE1-NEXT:    movzbl 24(%r13), %eax
-; CHECK-SSE1-NEXT:    movzbl 24(%rbx), %r10d
+; CHECK-SSE1-NEXT:    movb 24(%r13), %al
+; CHECK-SSE1-NEXT:    movb 24(%rbx), %r10b
 ; CHECK-SSE1-NEXT:    xorb %al, %r10b
 ; CHECK-SSE1-NEXT:    andb 24(%rcx), %r10b
 ; CHECK-SSE1-NEXT:    xorb %al, %r10b
-; CHECK-SSE1-NEXT:    movzbl 25(%r13), %eax
-; CHECK-SSE1-NEXT:    movzbl 25(%rbx), %r9d
+; CHECK-SSE1-NEXT:    movb 25(%r13), %al
+; CHECK-SSE1-NEXT:    movb 25(%rbx), %r9b
 ; CHECK-SSE1-NEXT:    xorb %al, %r9b
 ; CHECK-SSE1-NEXT:    andb 25(%rcx), %r9b
 ; CHECK-SSE1-NEXT:    xorb %al, %r9b
-; CHECK-SSE1-NEXT:    movzbl 26(%r13), %eax
-; CHECK-SSE1-NEXT:    movzbl 26(%rbx), %r8d
+; CHECK-SSE1-NEXT:    movb 26(%r13), %al
+; CHECK-SSE1-NEXT:    movb 26(%rbx), %r8b
 ; CHECK-SSE1-NEXT:    xorb %al, %r8b
 ; CHECK-SSE1-NEXT:    andb 26(%rcx), %r8b
 ; CHECK-SSE1-NEXT:    xorb %al, %r8b
-; CHECK-SSE1-NEXT:    movzbl 27(%r13), %eax
-; CHECK-SSE1-NEXT:    movzbl 27(%rbx), %edi
+; CHECK-SSE1-NEXT:    movb 27(%r13), %al
+; CHECK-SSE1-NEXT:    movb 27(%rbx), %dil
 ; CHECK-SSE1-NEXT:    xorb %al, %dil
 ; CHECK-SSE1-NEXT:    andb 27(%rcx), %dil
 ; CHECK-SSE1-NEXT:    xorb %al, %dil
-; CHECK-SSE1-NEXT:    movzbl 28(%r13), %eax
-; CHECK-SSE1-NEXT:    movzbl 28(%rbx), %edx
+; CHECK-SSE1-NEXT:    movb 28(%r13), %al
+; CHECK-SSE1-NEXT:    movb 28(%rbx), %dl
 ; CHECK-SSE1-NEXT:    xorb %al, %dl
 ; CHECK-SSE1-NEXT:    andb 28(%rcx), %dl
 ; CHECK-SSE1-NEXT:    xorb %al, %dl
-; CHECK-SSE1-NEXT:    movzbl 29(%r13), %eax
-; CHECK-SSE1-NEXT:    movzbl 29(%rbx), %ecx
+; CHECK-SSE1-NEXT:    movb 29(%r13), %al
+; CHECK-SSE1-NEXT:    movb 29(%rbx), %cl
 ; CHECK-SSE1-NEXT:    xorb %al, %cl
 ; CHECK-SSE1-NEXT:    andb 29(%rsi), %cl
 ; CHECK-SSE1-NEXT:    xorb %al, %cl
-; CHECK-SSE1-NEXT:    movzbl 30(%r13), %eax
+; CHECK-SSE1-NEXT:    movb 30(%r13), %al
 ; CHECK-SSE1-NEXT:    movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill
-; CHECK-SSE1-NEXT:    movzbl 30(%rbx), %eax
+; CHECK-SSE1-NEXT:    movb 30(%rbx), %al
 ; CHECK-SSE1-NEXT:    xorb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Folded Reload
 ; CHECK-SSE1-NEXT:    andb 30(%rsi), %al
 ; CHECK-SSE1-NEXT:    xorb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Folded Reload
-; CHECK-SSE1-NEXT:    movzbl 31(%r13), %r13d
-; CHECK-SSE1-NEXT:    movzbl 31(%rbx), %ebx
+; CHECK-SSE1-NEXT:    movb 31(%r13), %r13b
+; CHECK-SSE1-NEXT:    movb 31(%rbx), %bl
 ; CHECK-SSE1-NEXT:    xorb %r13b, %bl
 ; CHECK-SSE1-NEXT:    andb 31(%rsi), %bl
 ; CHECK-SSE1-NEXT:    xorb %r13b, %bl
@@ -3714,43 +3714,43 @@ define <32 x i8> @in_v32i8(ptr%px, ptr%py, ptr%pmask) nounwind {
 ; CHECK-SSE1-NEXT:    movb %r14b, 21(%r13)
 ; CHECK-SSE1-NEXT:    movb %r15b, 20(%r13)
 ; CHECK-SSE1-NEXT:    movb %r12b, 19(%r13)
-; CHECK-SSE1-NEXT:    movzbl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 1-byte Folded Reload
+; CHECK-SSE1-NEXT:    movb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Reload
 ; CHECK-SSE1-NEXT:    movb %al, 18(%r13)
-; CHECK-SSE1-NEXT:    movzbl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 1-byte Folded Reload
+; CHECK-SSE1-NEXT:    movb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Reload
 ; CHECK-SSE1-NEXT:    movb %al, 17(%r13)
-; CHECK-SSE1-NEXT:    movzbl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 1-byte Folded Reload
+; CHECK-SSE1-NEXT:    movb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Reload
 ; CHECK-SSE1-NEXT:    movb %al, 16(%r13)
-; CHECK-SSE1-NEXT:    movzbl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 1-byte Folded Reload
+; CHECK-SSE1-NEXT:    movb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Reload
 ; CHECK-SSE1-NEXT:    movb %al, 15(%r13)
-; CHECK-SSE1-NEXT:    movzbl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 1-byte Folded Reload
+; CHECK-SSE1-NEXT:    movb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Reload
 ; CHECK-SSE1-NEXT:    movb %al, 14(%r13)
-; CHECK-SSE1-NEXT:    movzbl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 1-byte Folded Reload
+; CHECK-SSE1-NEXT:    movb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Reload
 ; CHECK-SSE1-NEXT:    movb %al, 13(%r13)
-; CHECK-SSE1-NEXT:    movzbl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 1-byte Folded Reload
+; CHECK-SSE1-NEXT:    movb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Reload
 ; CHECK-SSE1-NEXT:    movb %al, 12(%r13)
-; CHECK-SSE1-NEXT:    movzbl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 1-byte Folded Reload
+; CHECK-SSE1-NEXT:    movb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Reload
 ; CHECK-SSE1-NEXT:    movb %al, 11(%r13)
-; CHECK-SSE1-NEXT:    movzbl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 1-byte Folded Reload
+; CHECK-SSE1-NEXT:    movb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Reload
 ; CHECK-SSE1-NEXT:    movb %al, 10(%r13)
-; CHECK-SSE1-NEXT:    movzbl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 1-byte Folded Reload
+; CHECK-SSE1-NEXT:    movb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Reload
 ; CHECK-SSE1-NEXT:    movb %al, 9(%r13)
-; CHECK-SSE1-NEXT:    movzbl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 1-byte Folded Reload
+; CHECK-SSE1-NEXT:    movb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Reload
 ; CHECK-SSE1-NEXT:    movb %al, 8(%r13)
-; CHECK-SSE1-NEXT:    movzbl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 1-byte Folded Reload
+; CHECK-SSE1-NEXT:    movb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Reload
 ; CHECK-SSE1-NEXT:    movb %al, 7(%r13)
-; CHECK-SSE1-NEXT:    movzbl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 1-byte Folded Reload
+; CHECK-SSE1-NEXT:    movb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Reload
 ; CHECK-SSE1-NEXT:    movb %al, 6(%r13)
-; CHECK-SSE1-NEXT:    movzbl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 1-byte Folded Reload
+; CHECK-SSE1-NEXT:    movb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Reload
 ; CHECK-SSE1-NEXT:    movb %al, 5(%r13)
-; CHECK-SSE1-NEXT:    movzbl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 1-byte Folded Reload
+; CHECK-SSE1-NEXT:    movb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Reload
 ; CHECK-SSE1-NEXT:    movb %al, 4(%r13)
-; CHECK-SSE1-NEXT:    movzbl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 1-byte Folded Reload
+; CHECK-SSE1-NEXT:    movb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Reload
 ; CHECK-SSE1-NEXT:    movb %al, 3(%r13)
-; CHECK-SSE1-NEXT:    movzbl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 1-byte Folded Reload
+; CHECK-SSE1-NEXT:    movb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Reload
 ; CHECK-SSE1-NEXT:    movb %al, 2(%r13)
-; CHECK-SSE1-NEXT:    movzbl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 1-byte Folded Reload
+; CHECK-SSE1-NEXT:    movb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Reload
 ; CHECK-SSE1-NEXT:    movb %al, 1(%r13)
-; CHECK-SSE1-NEXT:    movzbl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 1-byte Folded Reload
+; CHECK-SSE1-NEXT:    movb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Reload
 ; CHECK-SSE1-NEXT:    movb %al, (%r13)
 ; CHECK-SSE1-NEXT:    movq %r13, %rax
 ; CHECK-SSE1-NEXT:    popq %rbx

diff  --git a/llvm/test/CodeGen/X86/urem-power-of-two.ll b/llvm/test/CodeGen/X86/urem-power-of-two.ll
index 16dddfa7e819d..60ae891a4820a 100644
--- a/llvm/test/CodeGen/X86/urem-power-of-two.ll
+++ b/llvm/test/CodeGen/X86/urem-power-of-two.ll
@@ -26,7 +26,7 @@ define i64 @const_pow_2(i64 %x) {
 define i25 @shift_left_pow_2(i25 %x, i25 %y) {
 ; X86-LABEL: shift_left_pow_2:
 ; X86:       # %bb.0:
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; X86-NEXT:    movl $1, %eax
 ; X86-NEXT:    shll %cl, %eax
 ; X86-NEXT:    addl $33554431, %eax # imm = 0x1FFFFFF
@@ -52,7 +52,7 @@ define i25 @shift_left_pow_2(i25 %x, i25 %y) {
 define i16 @shift_right_pow_2(i16 %x, i16 %y) {
 ; X86-LABEL: shift_right_pow_2:
 ; X86:       # %bb.0:
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; X86-NEXT:    movl $32768, %eax # imm = 0x8000
 ; X86-NEXT:    shrl %cl, %eax
 ; X86-NEXT:    decl %eax
@@ -80,7 +80,7 @@ define i16 @shift_right_pow_2(i16 %x, i16 %y) {
 define i8 @and_pow_2(i8 %x, i8 %y) {
 ; X86-LABEL: and_pow_2:
 ; X86:       # %bb.0:
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; X86-NEXT:    andb $4, %cl
 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
 ; X86-NEXT:    divb %cl

diff  --git a/llvm/test/CodeGen/X86/urem-seteq-illegal-types.ll b/llvm/test/CodeGen/X86/urem-seteq-illegal-types.ll
index fd496780f535d..2a38afd7a782b 100644
--- a/llvm/test/CodeGen/X86/urem-seteq-illegal-types.ll
+++ b/llvm/test/CodeGen/X86/urem-seteq-illegal-types.ll
@@ -158,9 +158,9 @@ define <3 x i1> @test_urem_vec(<3 x i11> %X) nounwind {
 ; SSE2-NEXT:    andps %xmm1, %xmm3
 ; SSE2-NEXT:    pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3
 ; SSE2-NEXT:    movdqa %xmm3, -{{[0-9]+}}(%rsp)
-; SSE2-NEXT:    movzbl -{{[0-9]+}}(%rsp), %eax
-; SSE2-NEXT:    movzbl -{{[0-9]+}}(%rsp), %edx
-; SSE2-NEXT:    movzbl -{{[0-9]+}}(%rsp), %ecx
+; SSE2-NEXT:    movb -{{[0-9]+}}(%rsp), %al
+; SSE2-NEXT:    movb -{{[0-9]+}}(%rsp), %dl
+; SSE2-NEXT:    movb -{{[0-9]+}}(%rsp), %cl
 ; SSE2-NEXT:    retq
 ;
 ; SSE41-LABEL: test_urem_vec:

diff  --git a/llvm/test/CodeGen/X86/ushl_sat.ll b/llvm/test/CodeGen/X86/ushl_sat.ll
index a4f0656c13aff..a3c4dd6e46ad9 100644
--- a/llvm/test/CodeGen/X86/ushl_sat.ll
+++ b/llvm/test/CodeGen/X86/ushl_sat.ll
@@ -30,7 +30,7 @@ define i16 @func(i16 %x, i16 %y) nounwind {
 ; X86:       # %bb.0:
 ; X86-NEXT:    pushl %esi
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; X86-NEXT:    movl %eax, %edx
 ; X86-NEXT:    shll %cl, %edx
 ; X86-NEXT:    movzwl %dx, %edx
@@ -69,7 +69,7 @@ define i16 @func2(i8 %x, i8 %y) nounwind {
 ; X86-LABEL: func2:
 ; X86:       # %bb.0:
 ; X86-NEXT:    pushl %esi
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; X86-NEXT:    movsbl {{[0-9]+}}(%esp), %eax
 ; X86-NEXT:    addl %eax, %eax
 ; X86-NEXT:    movl %eax, %edx
@@ -162,9 +162,9 @@ define i4 @func4(i4 %x, i4 %y) nounwind {
 ; X86-LABEL: func4:
 ; X86:       # %bb.0:
 ; X86-NEXT:    pushl %esi
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; X86-NEXT:    andb $15, %cl
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    shlb $4, %al
 ; X86-NEXT:    movl %eax, %edx
 ; X86-NEXT:    shlb %cl, %dl
@@ -201,7 +201,7 @@ define i64 @func5(i64 %x, i64 %y) nounwind {
 ; X86-NEXT:    pushl %ebx
 ; X86-NEXT:    pushl %edi
 ; X86-NEXT:    pushl %esi
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %esi
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
 ; X86-NEXT:    movl %esi, %edi
@@ -255,7 +255,7 @@ define i18 @func6(i16 %x, i16 %y) nounwind {
 ; X86-LABEL: func6:
 ; X86:       # %bb.0:
 ; X86-NEXT:    pushl %esi
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; X86-NEXT:    movswl {{[0-9]+}}(%esp), %eax
 ; X86-NEXT:    shll $14, %eax
 ; X86-NEXT:    movl %eax, %edx
@@ -291,7 +291,7 @@ define i32 @func7(i32 %x, i32 %y) nounwind {
 ; X86-LABEL: func7:
 ; X86:       # %bb.0:
 ; X86-NEXT:    pushl %esi
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; X86-NEXT:    movl %eax, %edx
 ; X86-NEXT:    shll %cl, %edx
@@ -325,8 +325,8 @@ define i8 @func8(i8 %x, i8 %y) nounwind {
 ; X86-LABEL: func8:
 ; X86:       # %bb.0:
 ; X86-NEXT:    pushl %esi
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %cl
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    movl %eax, %edx
 ; X86-NEXT:    shlb %cl, %dl
 ; X86-NEXT:    movzbl %dl, %esi

diff  --git a/llvm/test/CodeGen/X86/ushl_sat_vec.ll b/llvm/test/CodeGen/X86/ushl_sat_vec.ll
index 871e9059c20cd..5904892e7f240 100644
--- a/llvm/test/CodeGen/X86/ushl_sat_vec.ll
+++ b/llvm/test/CodeGen/X86/ushl_sat_vec.ll
@@ -67,7 +67,7 @@ define <4 x i32> @vec(<4 x i32> %x, <4 x i32> %y) nounwind {
 ; X86-NEXT:    pushl %edi
 ; X86-NEXT:    pushl %esi
 ; X86-NEXT:    movb {{[0-9]+}}(%esp), %ch
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %edx
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %dl
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %edi
 ; X86-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %ebx
@@ -94,7 +94,7 @@ define <4 x i32> @vec(<4 x i32> %x, <4 x i32> %y) nounwind {
 ; X86-NEXT:    cmpl %edi, %ebp
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %ebp
 ; X86-NEXT:    cmovnel %ebx, %edx
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; X86-NEXT:    shll %cl, %ebp
 ; X86-NEXT:    movl %ebp, %edi
 ; X86-NEXT:    shrl %cl, %edi

diff  --git a/llvm/test/CodeGen/X86/usub_sat.ll b/llvm/test/CodeGen/X86/usub_sat.ll
index 6749a1f9147af..8ac20843259a2 100644
--- a/llvm/test/CodeGen/X86/usub_sat.ll
+++ b/llvm/test/CodeGen/X86/usub_sat.ll
@@ -74,7 +74,7 @@ define zeroext i16 @func16(i16 zeroext %x, i16 zeroext %y) nounwind {
 define zeroext i8 @func8(i8 zeroext %x, i8 zeroext %y) nounwind {
 ; X86-LABEL: func8:
 ; X86:       # %bb.0:
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    xorl %ecx, %ecx
 ; X86-NEXT:    subb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    movzbl %al, %eax
@@ -97,7 +97,7 @@ define zeroext i8 @func8(i8 zeroext %x, i8 zeroext %y) nounwind {
 define zeroext i4 @func3(i4 zeroext %x, i4 zeroext %y) nounwind {
 ; X86-LABEL: func3:
 ; X86:       # %bb.0:
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    xorl %ecx, %ecx
 ; X86-NEXT:    subb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    movzbl %al, %eax

diff  --git a/llvm/test/CodeGen/X86/usub_sat_plus.ll b/llvm/test/CodeGen/X86/usub_sat_plus.ll
index 0fb14ad5cf7b0..e58a098265ec5 100644
--- a/llvm/test/CodeGen/X86/usub_sat_plus.ll
+++ b/llvm/test/CodeGen/X86/usub_sat_plus.ll
@@ -82,8 +82,8 @@ define zeroext i16 @func16(i16 zeroext %x, i16 zeroext %y, i16 zeroext %z) nounw
 define zeroext i8 @func8(i8 zeroext %x, i8 zeroext %y, i8 zeroext %z) nounwind {
 ; X86-LABEL: func8:
 ; X86:       # %bb.0:
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %cl
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    mulb {{[0-9]+}}(%esp)
 ; X86-NEXT:    xorl %edx, %edx
 ; X86-NEXT:    subb %al, %cl
@@ -111,8 +111,8 @@ define zeroext i8 @func8(i8 zeroext %x, i8 zeroext %y, i8 zeroext %z) nounwind {
 define zeroext i4 @func4(i4 zeroext %x, i4 zeroext %y, i4 zeroext %z) nounwind {
 ; X86-LABEL: func4:
 ; X86:       # %bb.0:
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %cl
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    mulb {{[0-9]+}}(%esp)
 ; X86-NEXT:    andb $15, %al
 ; X86-NEXT:    xorl %edx, %edx

diff  --git a/llvm/test/CodeGen/X86/usub_sat_vec.ll b/llvm/test/CodeGen/X86/usub_sat_vec.ll
index 6a49f74f1ddbd..383d3ad24ad7c 100644
--- a/llvm/test/CodeGen/X86/usub_sat_vec.ll
+++ b/llvm/test/CodeGen/X86/usub_sat_vec.ll
@@ -429,7 +429,7 @@ define void @v12i16(ptr %px, ptr %py, ptr %pz) nounwind {
 define void @v1i8(ptr %px, ptr %py, ptr %pz) nounwind {
 ; SSE-LABEL: v1i8:
 ; SSE:       # %bb.0:
-; SSE-NEXT:    movzbl (%rdi), %eax
+; SSE-NEXT:    movb (%rdi), %al
 ; SSE-NEXT:    xorl %ecx, %ecx
 ; SSE-NEXT:    subb (%rsi), %al
 ; SSE-NEXT:    movzbl %al, %eax
@@ -439,7 +439,7 @@ define void @v1i8(ptr %px, ptr %py, ptr %pz) nounwind {
 ;
 ; AVX-LABEL: v1i8:
 ; AVX:       # %bb.0:
-; AVX-NEXT:    movzbl (%rdi), %eax
+; AVX-NEXT:    movb (%rdi), %al
 ; AVX-NEXT:    xorl %ecx, %ecx
 ; AVX-NEXT:    subb (%rsi), %al
 ; AVX-NEXT:    movzbl %al, %eax

diff  --git a/llvm/test/CodeGen/X86/vec_setcc.ll b/llvm/test/CodeGen/X86/vec_setcc.ll
index 09d655ae5dce9..9f5815e1f7869 100644
--- a/llvm/test/CodeGen/X86/vec_setcc.ll
+++ b/llvm/test/CodeGen/X86/vec_setcc.ll
@@ -208,9 +208,9 @@ define <3 x i1> @test_setcc_v3i1_v3i16(ptr %a) nounwind {
 ; SSE2-NEXT:    pcmpeqw %xmm0, %xmm1
 ; SSE2-NEXT:    punpcklwd {{.*#+}} xmm1 = xmm1[0,0,1,1,2,2,3,3]
 ; SSE2-NEXT:    movdqa %xmm1, -{{[0-9]+}}(%rsp)
-; SSE2-NEXT:    movzbl -{{[0-9]+}}(%rsp), %eax
-; SSE2-NEXT:    movzbl -{{[0-9]+}}(%rsp), %edx
-; SSE2-NEXT:    movzbl -{{[0-9]+}}(%rsp), %ecx
+; SSE2-NEXT:    movb -{{[0-9]+}}(%rsp), %al
+; SSE2-NEXT:    movb -{{[0-9]+}}(%rsp), %dl
+; SSE2-NEXT:    movb -{{[0-9]+}}(%rsp), %cl
 ; SSE2-NEXT:    retq
 ;
 ; SSE41-LABEL: test_setcc_v3i1_v3i16:

diff  --git a/llvm/test/CodeGen/X86/vector-sext.ll b/llvm/test/CodeGen/X86/vector-sext.ll
index 61980ce083c2b..c53641d422459 100644
--- a/llvm/test/CodeGen/X86/vector-sext.ll
+++ b/llvm/test/CodeGen/X86/vector-sext.ll
@@ -1342,7 +1342,7 @@ entry:
 define <2 x i64> @load_sext_2i1_to_2i64(ptr%ptr) {
 ; SSE-LABEL: load_sext_2i1_to_2i64:
 ; SSE:       # %bb.0: # %entry
-; SSE-NEXT:    movzbl (%rdi), %eax
+; SSE-NEXT:    movb (%rdi), %al
 ; SSE-NEXT:    movzbl %al, %ecx
 ; SSE-NEXT:    shrb %al
 ; SSE-NEXT:    movzbl %al, %eax
@@ -1356,7 +1356,7 @@ define <2 x i64> @load_sext_2i1_to_2i64(ptr%ptr) {
 ;
 ; AVX1-LABEL: load_sext_2i1_to_2i64:
 ; AVX1:       # %bb.0: # %entry
-; AVX1-NEXT:    movzbl (%rdi), %eax
+; AVX1-NEXT:    movb (%rdi), %al
 ; AVX1-NEXT:    movzbl %al, %ecx
 ; AVX1-NEXT:    shrb %al
 ; AVX1-NEXT:    movzbl %al, %eax
@@ -1370,7 +1370,7 @@ define <2 x i64> @load_sext_2i1_to_2i64(ptr%ptr) {
 ;
 ; AVX2-LABEL: load_sext_2i1_to_2i64:
 ; AVX2:       # %bb.0: # %entry
-; AVX2-NEXT:    movzbl (%rdi), %eax
+; AVX2-NEXT:    movb (%rdi), %al
 ; AVX2-NEXT:    movzbl %al, %ecx
 ; AVX2-NEXT:    shrb %al
 ; AVX2-NEXT:    movzbl %al, %eax
@@ -1403,7 +1403,7 @@ define <2 x i64> @load_sext_2i1_to_2i64(ptr%ptr) {
 ; X86-SSE2-LABEL: load_sext_2i1_to_2i64:
 ; X86-SSE2:       # %bb.0: # %entry
 ; X86-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X86-SSE2-NEXT:    movzbl (%eax), %eax
+; X86-SSE2-NEXT:    movb (%eax), %al
 ; X86-SSE2-NEXT:    movzbl %al, %ecx
 ; X86-SSE2-NEXT:    shrb %al
 ; X86-SSE2-NEXT:    movzbl %al, %eax
@@ -1420,7 +1420,7 @@ define <2 x i64> @load_sext_2i1_to_2i64(ptr%ptr) {
 ; X86-SSE41-LABEL: load_sext_2i1_to_2i64:
 ; X86-SSE41:       # %bb.0: # %entry
 ; X86-SSE41-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X86-SSE41-NEXT:    movzbl (%eax), %eax
+; X86-SSE41-NEXT:    movb (%eax), %al
 ; X86-SSE41-NEXT:    movzbl %al, %ecx
 ; X86-SSE41-NEXT:    andl $1, %ecx
 ; X86-SSE41-NEXT:    negl %ecx
@@ -1500,7 +1500,7 @@ entry:
 define <4 x i32> @load_sext_4i1_to_4i32(ptr%ptr) {
 ; SSE2-LABEL: load_sext_4i1_to_4i32:
 ; SSE2:       # %bb.0: # %entry
-; SSE2-NEXT:    movzbl (%rdi), %eax
+; SSE2-NEXT:    movb (%rdi), %al
 ; SSE2-NEXT:    movl %eax, %ecx
 ; SSE2-NEXT:    shrb $3, %cl
 ; SSE2-NEXT:    movzbl %cl, %ecx
@@ -1528,7 +1528,7 @@ define <4 x i32> @load_sext_4i1_to_4i32(ptr%ptr) {
 ;
 ; SSSE3-LABEL: load_sext_4i1_to_4i32:
 ; SSSE3:       # %bb.0: # %entry
-; SSSE3-NEXT:    movzbl (%rdi), %eax
+; SSSE3-NEXT:    movb (%rdi), %al
 ; SSSE3-NEXT:    movl %eax, %ecx
 ; SSSE3-NEXT:    shrb $3, %cl
 ; SSSE3-NEXT:    movzbl %cl, %ecx
@@ -1556,7 +1556,7 @@ define <4 x i32> @load_sext_4i1_to_4i32(ptr%ptr) {
 ;
 ; SSE41-LABEL: load_sext_4i1_to_4i32:
 ; SSE41:       # %bb.0: # %entry
-; SSE41-NEXT:    movzbl (%rdi), %eax
+; SSE41-NEXT:    movb (%rdi), %al
 ; SSE41-NEXT:    movzbl %al, %ecx
 ; SSE41-NEXT:    shrb %al
 ; SSE41-NEXT:    movzbl %al, %eax
@@ -1581,7 +1581,7 @@ define <4 x i32> @load_sext_4i1_to_4i32(ptr%ptr) {
 ;
 ; AVX1-LABEL: load_sext_4i1_to_4i32:
 ; AVX1:       # %bb.0: # %entry
-; AVX1-NEXT:    movzbl (%rdi), %eax
+; AVX1-NEXT:    movb (%rdi), %al
 ; AVX1-NEXT:    movzbl %al, %ecx
 ; AVX1-NEXT:    shrb %al
 ; AVX1-NEXT:    movzbl %al, %eax
@@ -1606,7 +1606,7 @@ define <4 x i32> @load_sext_4i1_to_4i32(ptr%ptr) {
 ;
 ; AVX2-LABEL: load_sext_4i1_to_4i32:
 ; AVX2:       # %bb.0: # %entry
-; AVX2-NEXT:    movzbl (%rdi), %eax
+; AVX2-NEXT:    movb (%rdi), %al
 ; AVX2-NEXT:    movzbl %al, %ecx
 ; AVX2-NEXT:    shrb %al
 ; AVX2-NEXT:    movzbl %al, %eax
@@ -1650,7 +1650,7 @@ define <4 x i32> @load_sext_4i1_to_4i32(ptr%ptr) {
 ; X86-SSE2-LABEL: load_sext_4i1_to_4i32:
 ; X86-SSE2:       # %bb.0: # %entry
 ; X86-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X86-SSE2-NEXT:    movzbl (%eax), %eax
+; X86-SSE2-NEXT:    movb (%eax), %al
 ; X86-SSE2-NEXT:    movl %eax, %ecx
 ; X86-SSE2-NEXT:    shrb $3, %cl
 ; X86-SSE2-NEXT:    movzbl %cl, %ecx
@@ -1679,7 +1679,7 @@ define <4 x i32> @load_sext_4i1_to_4i32(ptr%ptr) {
 ; X86-SSE41-LABEL: load_sext_4i1_to_4i32:
 ; X86-SSE41:       # %bb.0: # %entry
 ; X86-SSE41-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X86-SSE41-NEXT:    movzbl (%eax), %eax
+; X86-SSE41-NEXT:    movb (%eax), %al
 ; X86-SSE41-NEXT:    movl %eax, %ecx
 ; X86-SSE41-NEXT:    shrb %cl
 ; X86-SSE41-NEXT:    movzbl %cl, %ecx
@@ -1757,7 +1757,7 @@ entry:
 define <4 x i64> @load_sext_4i1_to_4i64(ptr%ptr) {
 ; SSE2-LABEL: load_sext_4i1_to_4i64:
 ; SSE2:       # %bb.0: # %entry
-; SSE2-NEXT:    movzbl (%rdi), %eax
+; SSE2-NEXT:    movb (%rdi), %al
 ; SSE2-NEXT:    movl %eax, %ecx
 ; SSE2-NEXT:    shrb %cl
 ; SSE2-NEXT:    andb $1, %cl
@@ -1787,7 +1787,7 @@ define <4 x i64> @load_sext_4i1_to_4i64(ptr%ptr) {
 ;
 ; SSSE3-LABEL: load_sext_4i1_to_4i64:
 ; SSSE3:       # %bb.0: # %entry
-; SSSE3-NEXT:    movzbl (%rdi), %eax
+; SSSE3-NEXT:    movb (%rdi), %al
 ; SSSE3-NEXT:    movl %eax, %ecx
 ; SSSE3-NEXT:    shrb %cl
 ; SSSE3-NEXT:    andb $1, %cl
@@ -1817,7 +1817,7 @@ define <4 x i64> @load_sext_4i1_to_4i64(ptr%ptr) {
 ;
 ; SSE41-LABEL: load_sext_4i1_to_4i64:
 ; SSE41:       # %bb.0: # %entry
-; SSE41-NEXT:    movzbl (%rdi), %eax
+; SSE41-NEXT:    movb (%rdi), %al
 ; SSE41-NEXT:    movl %eax, %ecx
 ; SSE41-NEXT:    shrb %cl
 ; SSE41-NEXT:    andb $1, %cl
@@ -1847,7 +1847,7 @@ define <4 x i64> @load_sext_4i1_to_4i64(ptr%ptr) {
 ;
 ; AVX1-LABEL: load_sext_4i1_to_4i64:
 ; AVX1:       # %bb.0: # %entry
-; AVX1-NEXT:    movzbl (%rdi), %eax
+; AVX1-NEXT:    movb (%rdi), %al
 ; AVX1-NEXT:    movzbl %al, %ecx
 ; AVX1-NEXT:    shrb %al
 ; AVX1-NEXT:    movzbl %al, %eax
@@ -1876,7 +1876,7 @@ define <4 x i64> @load_sext_4i1_to_4i64(ptr%ptr) {
 ;
 ; AVX2-LABEL: load_sext_4i1_to_4i64:
 ; AVX2:       # %bb.0: # %entry
-; AVX2-NEXT:    movzbl (%rdi), %eax
+; AVX2-NEXT:    movb (%rdi), %al
 ; AVX2-NEXT:    movl %eax, %ecx
 ; AVX2-NEXT:    shrb $3, %cl
 ; AVX2-NEXT:    movzbl %cl, %ecx
@@ -1921,7 +1921,7 @@ define <4 x i64> @load_sext_4i1_to_4i64(ptr%ptr) {
 ; X86-SSE2-LABEL: load_sext_4i1_to_4i64:
 ; X86-SSE2:       # %bb.0: # %entry
 ; X86-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X86-SSE2-NEXT:    movzbl (%eax), %eax
+; X86-SSE2-NEXT:    movb (%eax), %al
 ; X86-SSE2-NEXT:    movl %eax, %ecx
 ; X86-SSE2-NEXT:    shrb %cl
 ; X86-SSE2-NEXT:    andb $1, %cl
@@ -1952,7 +1952,7 @@ define <4 x i64> @load_sext_4i1_to_4i64(ptr%ptr) {
 ; X86-SSE41-LABEL: load_sext_4i1_to_4i64:
 ; X86-SSE41:       # %bb.0: # %entry
 ; X86-SSE41-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X86-SSE41-NEXT:    movzbl (%eax), %eax
+; X86-SSE41-NEXT:    movb (%eax), %al
 ; X86-SSE41-NEXT:    movl %eax, %ecx
 ; X86-SSE41-NEXT:    shrb %cl
 ; X86-SSE41-NEXT:    andb $1, %cl

diff  --git a/llvm/test/CodeGen/X86/volatile-memstores-nooverlapping-load-stores.ll b/llvm/test/CodeGen/X86/volatile-memstores-nooverlapping-load-stores.ll
index dd61ec629c2f0..ea1dfa64c60e9 100644
--- a/llvm/test/CodeGen/X86/volatile-memstores-nooverlapping-load-stores.ll
+++ b/llvm/test/CodeGen/X86/volatile-memstores-nooverlapping-load-stores.ll
@@ -17,7 +17,7 @@ define dso_local void @copy_7_bytes(ptr noalias nocapture, ptr noalias nocapture
 define dso_local void @copy_7_bytes_volatile(ptr noalias nocapture, ptr noalias nocapture readonly) nounwind #0 {
 ; CHECK-LABEL: copy_7_bytes_volatile:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    movzbl 6(%rsi), %eax
+; CHECK-NEXT:    movb 6(%rsi), %al
 ; CHECK-NEXT:    movb %al, 6(%rdi)
 ; CHECK-NEXT:    movzwl 4(%rsi), %eax
 ; CHECK-NEXT:    movw %ax, 4(%rdi)
@@ -35,7 +35,7 @@ define dso_local void @move_7_bytes(ptr nocapture, ptr nocapture readonly) nounw
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    movl (%rsi), %eax
 ; CHECK-NEXT:    movzwl 4(%rsi), %ecx
-; CHECK-NEXT:    movzbl 6(%rsi), %edx
+; CHECK-NEXT:    movb 6(%rsi), %dl
 ; CHECK-NEXT:    movb %dl, 6(%rdi)
 ; CHECK-NEXT:    movw %cx, 4(%rdi)
 ; CHECK-NEXT:    movl %eax, (%rdi)
@@ -48,7 +48,7 @@ define dso_local void @move_7_bytes_volatile(ptr nocapture, ptr nocapture readon
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    movl (%rsi), %eax
 ; CHECK-NEXT:    movzwl 4(%rsi), %ecx
-; CHECK-NEXT:    movzbl 6(%rsi), %edx
+; CHECK-NEXT:    movb 6(%rsi), %dl
 ; CHECK-NEXT:    movb %dl, 6(%rdi)
 ; CHECK-NEXT:    movw %cx, 4(%rdi)
 ; CHECK-NEXT:    movl %eax, (%rdi)

diff  --git a/llvm/test/CodeGen/X86/xchg-nofold.ll b/llvm/test/CodeGen/X86/xchg-nofold.ll
index 17e7781b21e0b..c41177c338468 100644
--- a/llvm/test/CodeGen/X86/xchg-nofold.ll
+++ b/llvm/test/CodeGen/X86/xchg-nofold.ll
@@ -13,7 +13,7 @@ define zeroext i1 @_Z3fooRSt6atomicIbEb(ptr nocapture dereferenceable(1) %a, i1
 ; CHECK-NEXT:    movl %esi, %eax
 ; CHECK-NEXT:    movq %rdi, %rcx
 ; CHECK-NEXT:    shrq $3, %rcx
-; CHECK-NEXT:    movzbl 2147450880(%rcx), %ecx
+; CHECK-NEXT:    movb 2147450880(%rcx), %cl
 ; CHECK-NEXT:    testb %cl, %cl
 ; CHECK-NEXT:    je .LBB0_3
 ; CHECK-NEXT:  # %bb.1:

diff  --git a/llvm/test/CodeGen/X86/xmulo.ll b/llvm/test/CodeGen/X86/xmulo.ll
index 4adc80b3b8bd6..c3cc7848fc582 100644
--- a/llvm/test/CodeGen/X86/xmulo.ll
+++ b/llvm/test/CodeGen/X86/xmulo.ll
@@ -91,7 +91,7 @@ define zeroext i1 @smuloi8(i8 %v1, i8 %v2, ptr %res) {
 ; WIN32-LABEL: smuloi8:
 ; WIN32:       # %bb.0:
 ; WIN32-NEXT:    movl {{[0-9]+}}(%esp), %edx
-; WIN32-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; WIN32-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; WIN32-NEXT:    imulb {{[0-9]+}}(%esp)
 ; WIN32-NEXT:    seto %cl
 ; WIN32-NEXT:    movb %al, (%edx)
@@ -322,7 +322,7 @@ define zeroext i1 @umuloi8(i8 %v1, i8 %v2, ptr %res) {
 ; WIN32-LABEL: umuloi8:
 ; WIN32:       # %bb.0:
 ; WIN32-NEXT:    movl {{[0-9]+}}(%esp), %edx
-; WIN32-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; WIN32-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; WIN32-NEXT:    mulb {{[0-9]+}}(%esp)
 ; WIN32-NEXT:    seto %cl
 ; WIN32-NEXT:    movb %al, (%edx)
@@ -806,7 +806,7 @@ define zeroext i1 @smulobri8(i8 %v1, i8 %v2) {
 ;
 ; WIN32-LABEL: smulobri8:
 ; WIN32:       # %bb.0:
-; WIN32-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; WIN32-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; WIN32-NEXT:    imulb {{[0-9]+}}(%esp)
 ; WIN32-NEXT:    jo LBB15_1
 ; WIN32-NEXT:  # %bb.2: # %continue
@@ -1122,7 +1122,7 @@ define zeroext i1 @umulobri8(i8 %v1, i8 %v2) {
 ;
 ; WIN32-LABEL: umulobri8:
 ; WIN32:       # %bb.0:
-; WIN32-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; WIN32-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; WIN32-NEXT:    mulb {{[0-9]+}}(%esp)
 ; WIN32-NEXT:    jo LBB19_1
 ; WIN32-NEXT:  # %bb.2: # %continue
@@ -1425,7 +1425,7 @@ define zeroext i1 @smuloi8_load(ptr %ptr1, i8 %v2, ptr %res) {
 ;
 ; FAST-LABEL: smuloi8_load:
 ; FAST:       # %bb.0:
-; FAST-NEXT:    movzbl (%rdi), %eax
+; FAST-NEXT:    movb (%rdi), %al
 ; FAST-NEXT:    imulb %sil
 ; FAST-NEXT:    seto %cl
 ; FAST-NEXT:    movb %al, (%rdx)
@@ -1446,7 +1446,7 @@ define zeroext i1 @smuloi8_load(ptr %ptr1, i8 %v2, ptr %res) {
 ; WIN32:       # %bb.0:
 ; WIN32-NEXT:    movl {{[0-9]+}}(%esp), %edx
 ; WIN32-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; WIN32-NEXT:    movzbl (%eax), %eax
+; WIN32-NEXT:    movb (%eax), %al
 ; WIN32-NEXT:    imulb {{[0-9]+}}(%esp)
 ; WIN32-NEXT:    seto %cl
 ; WIN32-NEXT:    movb %al, (%edx)
@@ -1494,7 +1494,7 @@ define zeroext i1 @smuloi8_load2(i8 %v1, ptr %ptr2, ptr %res) {
 ; WIN32-LABEL: smuloi8_load2:
 ; WIN32:       # %bb.0:
 ; WIN32-NEXT:    movl {{[0-9]+}}(%esp), %edx
-; WIN32-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; WIN32-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; WIN32-NEXT:    movl {{[0-9]+}}(%esp), %ecx
 ; WIN32-NEXT:    imulb (%ecx)
 ; WIN32-NEXT:    seto %cl
@@ -1899,7 +1899,7 @@ define zeroext i1 @umuloi8_load(ptr %ptr1, i8 %v2, ptr %res) {
 ;
 ; FAST-LABEL: umuloi8_load:
 ; FAST:       # %bb.0:
-; FAST-NEXT:    movzbl (%rdi), %eax
+; FAST-NEXT:    movb (%rdi), %al
 ; FAST-NEXT:    mulb %sil
 ; FAST-NEXT:    seto %cl
 ; FAST-NEXT:    movb %al, (%rdx)
@@ -1920,7 +1920,7 @@ define zeroext i1 @umuloi8_load(ptr %ptr1, i8 %v2, ptr %res) {
 ; WIN32:       # %bb.0:
 ; WIN32-NEXT:    movl {{[0-9]+}}(%esp), %edx
 ; WIN32-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; WIN32-NEXT:    movzbl (%eax), %eax
+; WIN32-NEXT:    movb (%eax), %al
 ; WIN32-NEXT:    mulb {{[0-9]+}}(%esp)
 ; WIN32-NEXT:    seto %cl
 ; WIN32-NEXT:    movb %al, (%edx)
@@ -1968,7 +1968,7 @@ define zeroext i1 @umuloi8_load2(i8 %v1, ptr %ptr2, ptr %res) {
 ; WIN32-LABEL: umuloi8_load2:
 ; WIN32:       # %bb.0:
 ; WIN32-NEXT:    movl {{[0-9]+}}(%esp), %edx
-; WIN32-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; WIN32-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; WIN32-NEXT:    movl {{[0-9]+}}(%esp), %ecx
 ; WIN32-NEXT:    mulb (%ecx)
 ; WIN32-NEXT:    seto %cl

diff  --git a/llvm/test/CodeGen/X86/xor-icmp.ll b/llvm/test/CodeGen/X86/xor-icmp.ll
index d1254ada19500..08eb99f02ee7c 100644
--- a/llvm/test/CodeGen/X86/xor-icmp.ll
+++ b/llvm/test/CodeGen/X86/xor-icmp.ll
@@ -6,7 +6,7 @@
 define i32 @t(i32 %a, i32 %b) nounwind ssp {
 ; X86-LABEL: t:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    xorb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    testb $64, %al
 ; X86-NEXT:    je .LBB0_1
@@ -94,7 +94,7 @@ return:                                           ; preds = %entry
 define i1 @xor_not_bools(i1 zeroext %x, i1 zeroext %y) nounwind {
 ; X86-LABEL: xor_not_bools:
 ; X86:       # %bb.0:
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    xorb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    xorb $1, %al
 ; X86-NEXT:    retl

diff  --git a/llvm/test/CodeGen/X86/xor-lea.ll b/llvm/test/CodeGen/X86/xor-lea.ll
index 10e9525a2706a..8e8ab5625b558 100644
--- a/llvm/test/CodeGen/X86/xor-lea.ll
+++ b/llvm/test/CodeGen/X86/xor-lea.ll
@@ -15,7 +15,7 @@
 define i8 @xor_sminval_i8(i8 %x) {
 ; X86-LABEL: xor_sminval_i8:
 ; X86:       # %bb.0:
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    addb $-128, %al
 ; X86-NEXT:    retl
 ;
@@ -33,7 +33,7 @@ define i8 @xor_sminval_i8(i8 %x) {
 define i8 @xor_notsminval_i8(i8 %x) {
 ; X86-LABEL: xor_notsminval_i8:
 ; X86:       # %bb.0:
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    xorb $127, %al
 ; X86-NEXT:    retl
 ;
@@ -122,7 +122,7 @@ define i64 @xor_sminval_i64(i64 %x) {
 define i8 @xor_add_sminval_i8(i8 %x, i8 %y) {
 ; X86-LABEL: xor_add_sminval_i8:
 ; X86:       # %bb.0:
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    addb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    addb $-128, %al
 ; X86-NEXT:    retl
@@ -204,7 +204,7 @@ define i64 @xor_add_sminval_i64(i64 %x, i64 %y) {
 define i8 @sub_xor_sminval_i8(i8 %x, i8 %y) {
 ; X86-LABEL: sub_xor_sminval_i8:
 ; X86:       # %bb.0:
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    addb $-128, %al
 ; X86-NEXT:    subb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    retl
@@ -285,7 +285,7 @@ define i64 @add_xor_sminval_i64(i64 %x, i64 %y) {
 define i8 @xor_shl_sminval_i8(i8 %x) {
 ; X86-LABEL: xor_shl_sminval_i8:
 ; X86:       # %bb.0:
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    addb %al, %al
 ; X86-NEXT:    addb $-128, %al
 ; X86-NEXT:    retl

diff  --git a/llvm/test/CodeGen/X86/xor-with-overflow.ll b/llvm/test/CodeGen/X86/xor-with-overflow.ll
index 5d22302d39add..96533a7798bd2 100644
--- a/llvm/test/CodeGen/X86/xor-with-overflow.ll
+++ b/llvm/test/CodeGen/X86/xor-with-overflow.ll
@@ -9,7 +9,7 @@
 define i8 @xor_i8_ri(i8 zeroext %0, i8 zeroext %1) {
 ; X86-LABEL: xor_i8_ri:
 ; X86:       # %bb.0:
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    movl %eax, %ecx
 ; X86-NEXT:    xorb $-17, %cl
 ; X86-NEXT:    je .LBB0_2
@@ -35,8 +35,8 @@ define i8 @xor_i8_ri(i8 zeroext %0, i8 zeroext %1) {
 define i8 @xor_i8_rr(i8 zeroext %0, i8 zeroext %1) {
 ; X86-LABEL: xor_i8_rr:
 ; X86:       # %bb.0:
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; X86-NEXT:    xorb %al, %cl
 ; X86-NEXT:    je .LBB1_2
 ; X86-NEXT:  # %bb.1:

diff  --git a/llvm/test/CodeGen/X86/xor.ll b/llvm/test/CodeGen/X86/xor.ll
index c392dd650da9a..eccae2885edb4 100644
--- a/llvm/test/CodeGen/X86/xor.ll
+++ b/llvm/test/CodeGen/X86/xor.ll
@@ -192,8 +192,8 @@ bb12:
 define i8 @test6(i8 %a, i8 %b) nounwind  {
 ; X86-LABEL: test6:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %cl
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    .p2align 4, 0x90
 ; X86-NEXT:  .LBB5_1: # %bb
 ; X86-NEXT:    # =>This Inner Loop Header: Depth=1
@@ -394,7 +394,7 @@ define <4 x i32> @test10(<4 x i32> %a) nounwind {
 define i32 @PR17487(i1 %tobool) {
 ; X86-LABEL: PR17487:
 ; X86:       # %bb.0:
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; X86-NEXT:    notb %cl
 ; X86-NEXT:    xorl %eax, %eax
 ; X86-NEXT:    testb $1, %cl
@@ -425,7 +425,7 @@ define i32 @PR17487(i1 %tobool) {
 define i32 @test11(i32 %b) {
 ; X86-LABEL: test11:
 ; X86:       # %bb.0:
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; X86-NEXT:    movl $-2, %eax
 ; X86-NEXT:    roll %cl, %eax
 ; X86-NEXT:    retl

diff  --git a/llvm/test/CodeGen/X86/zext-logicop-shift-load.ll b/llvm/test/CodeGen/X86/zext-logicop-shift-load.ll
index da402d81db9fe..81a413b32c1a1 100644
--- a/llvm/test/CodeGen/X86/zext-logicop-shift-load.ll
+++ b/llvm/test/CodeGen/X86/zext-logicop-shift-load.ll
@@ -55,7 +55,7 @@ define i64 @test3(ptr %data) {
 ; X86-LABEL: test3:
 ; X86:       # %bb.0: # %entry
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT:    movzbl (%eax), %eax
+; X86-NEXT:    movb (%eax), %al
 ; X86-NEXT:    shlb $2, %al
 ; X86-NEXT:    xorb $60, %al
 ; X86-NEXT:    movzbl %al, %eax
@@ -64,7 +64,7 @@ define i64 @test3(ptr %data) {
 ;
 ; X64-LABEL: test3:
 ; X64:       # %bb.0: # %entry
-; X64-NEXT:    movzbl (%rdi), %eax
+; X64-NEXT:    movb (%rdi), %al
 ; X64-NEXT:    shlb $2, %al
 ; X64-NEXT:    xorb $60, %al
 ; X64-NEXT:    movzbl %al, %eax


        


More information about the llvm-commits mailing list