[llvm] r348604 - [DAGCombiner] disable truncation of binops by default

Sanjay Patel via llvm-commits llvm-commits at lists.llvm.org
Fri Dec 7 07:47:52 PST 2018


Author: spatel
Date: Fri Dec  7 07:47:52 2018
New Revision: 348604

URL: http://llvm.org/viewvc/llvm-project?rev=348604&view=rev
Log:
[DAGCombiner] disable truncation of binops by default

As discussed in the post-commit thread of r347917, this
transform is fighting with an existing transform causing
an infinite loop or out-of-memory, so this is effectively 
reverting r347917 and its follow-up r348195 while we
investigate the bug.

Modified:
    llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
    llvm/trunk/test/CodeGen/AMDGPU/cgp-bitfield-extract.ll
    llvm/trunk/test/CodeGen/X86/2008-09-11-CoalescerBug2.ll
    llvm/trunk/test/CodeGen/X86/2010-08-04-MaskedSignedCompare.ll
    llvm/trunk/test/CodeGen/X86/add-sub-nsw-nuw.ll
    llvm/trunk/test/CodeGen/X86/bool-math.ll
    llvm/trunk/test/CodeGen/X86/clear-lowbits.ll
    llvm/trunk/test/CodeGen/X86/cmov.ll
    llvm/trunk/test/CodeGen/X86/extract-bits.ll
    llvm/trunk/test/CodeGen/X86/extract-lowbits.ll
    llvm/trunk/test/CodeGen/X86/fshl.ll
    llvm/trunk/test/CodeGen/X86/fshr.ll
    llvm/trunk/test/CodeGen/X86/funnel-shift-rot.ll
    llvm/trunk/test/CodeGen/X86/funnel-shift.ll
    llvm/trunk/test/CodeGen/X86/pr32284.ll
    llvm/trunk/test/CodeGen/X86/pr37879.ll
    llvm/trunk/test/CodeGen/X86/rot16.ll
    llvm/trunk/test/CodeGen/X86/rotate.ll
    llvm/trunk/test/CodeGen/X86/rotate4.ll
    llvm/trunk/test/CodeGen/X86/schedule-x86-64-shld.ll
    llvm/trunk/test/CodeGen/X86/scheduler-backtracking.ll
    llvm/trunk/test/CodeGen/X86/test-shrink.ll
    llvm/trunk/test/CodeGen/X86/vector-trunc-math-widen.ll
    llvm/trunk/test/CodeGen/X86/vector-trunc-math.ll
    llvm/trunk/test/CodeGen/X86/xchg-nofold.ll

Modified: llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp?rev=348604&r1=348603&r2=348604&view=diff
==============================================================================
--- llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp (original)
+++ llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp Fri Dec  7 07:47:52 2018
@@ -112,6 +112,12 @@ static cl::opt<bool>
   MaySplitLoadIndex("combiner-split-load-index", cl::Hidden, cl::init(true),
                     cl::desc("DAG combiner may split indexing from loads"));
 
+// This is a temporary debug flag to disable a combine that is known to
+// conflict with another combine.
+static cl::opt<bool>
+NarrowTruncatedBinops("narrow-truncated-binops", cl::Hidden, cl::init(false),
+                      cl::desc("Move truncates ahead of binops"));
+
 namespace {
 
   class DAGCombiner {
@@ -9804,7 +9810,7 @@ SDValue DAGCombiner::visitTRUNCATE(SDNod
   case ISD::AND:
   case ISD::OR:
   case ISD::XOR:
-    if (!LegalOperations && N0.hasOneUse() &&
+    if (NarrowTruncatedBinops && !LegalOperations && N0.hasOneUse() &&
         (isConstantOrConstantVector(N0.getOperand(0)) ||
          isConstantOrConstantVector(N0.getOperand(1)))) {
       // TODO: We already restricted this to pre-legalization, but for vectors

Modified: llvm/trunk/test/CodeGen/AMDGPU/cgp-bitfield-extract.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/cgp-bitfield-extract.ll?rev=348604&r1=348603&r2=348604&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/cgp-bitfield-extract.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/cgp-bitfield-extract.ll Fri Dec  7 07:47:52 2018
@@ -125,11 +125,11 @@ ret:
 ; GCN: s_cbranch_scc1
 
 ; SI: s_bfe_u32 s{{[0-9]+}}, s{{[0-9]+}}, 0x80004
-; VI: v_mov_b32_e32 v{{[0-9]+}}, 0xff
+; VI: s_and_b32 s{{[0-9]+}}, [[BFE]], 0xff
 
 ; GCN: BB2_2:
 ; SI: s_bfe_u32 s{{[0-9]+}}, s{{[0-9]+}}, 0x70004
-; VI: v_mov_b32_e32 v{{[0-9]+}}, 0x7f
+; VI: s_and_b32 s{{[0-9]+}}, [[BFE]], 0x7f
 
 ; GCN: BB2_3:
 ; GCN: buffer_store_short

Modified: llvm/trunk/test/CodeGen/X86/2008-09-11-CoalescerBug2.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/2008-09-11-CoalescerBug2.ll?rev=348604&r1=348603&r2=348604&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/2008-09-11-CoalescerBug2.ll (original)
+++ llvm/trunk/test/CodeGen/X86/2008-09-11-CoalescerBug2.ll Fri Dec  7 07:47:52 2018
@@ -17,7 +17,7 @@ define i32 @func_44(i16 signext %p_46) n
 ; SOURCE-SCHED-NEXT:    setg %cl
 ; SOURCE-SCHED-NEXT:    movb g_73, %dl
 ; SOURCE-SCHED-NEXT:    xorl %eax, %eax
-; SOURCE-SCHED-NEXT:    subb {{[0-9]+}}(%esp), %al
+; SOURCE-SCHED-NEXT:    subl {{[0-9]+}}(%esp), %eax
 ; SOURCE-SCHED-NEXT:    testb %dl, %dl
 ; SOURCE-SCHED-NEXT:    jne .LBB0_2
 ; SOURCE-SCHED-NEXT:  # %bb.1: # %bb11

Modified: llvm/trunk/test/CodeGen/X86/2010-08-04-MaskedSignedCompare.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/2010-08-04-MaskedSignedCompare.ll?rev=348604&r1=348603&r2=348604&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/2010-08-04-MaskedSignedCompare.ll (original)
+++ llvm/trunk/test/CodeGen/X86/2010-08-04-MaskedSignedCompare.ll Fri Dec  7 07:47:52 2018
@@ -11,7 +11,7 @@ define i32 @main() nounwind {
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    xorl %eax, %eax
 ; CHECK-NEXT:    cmpq {{.*}}(%rip), %rax
-; CHECK-NEXT:    sbbb %al, %al
+; CHECK-NEXT:    sbbl %eax, %eax
 ; CHECK-NEXT:    testb $-106, %al
 ; CHECK-NEXT:    jle .LBB0_1
 ; CHECK-NEXT:  # %bb.2: # %if.then

Modified: llvm/trunk/test/CodeGen/X86/add-sub-nsw-nuw.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/add-sub-nsw-nuw.ll?rev=348604&r1=348603&r2=348604&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/add-sub-nsw-nuw.ll (original)
+++ llvm/trunk/test/CodeGen/X86/add-sub-nsw-nuw.ll Fri Dec  7 07:47:52 2018
@@ -9,7 +9,7 @@ define i8 @PR30841(i64 %argc) {
 ; CHECK-LABEL: PR30841:
 ; CHECK:       ## %bb.0: ## %entry
 ; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; CHECK-NEXT:    negb %al
+; CHECK-NEXT:    negl %eax
 ; CHECK-NEXT:    ## kill: def $al killed $al killed $eax
 ; CHECK-NEXT:    retl
 entry:

Modified: llvm/trunk/test/CodeGen/X86/bool-math.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/bool-math.ll?rev=348604&r1=348603&r2=348604&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/bool-math.ll (original)
+++ llvm/trunk/test/CodeGen/X86/bool-math.ll Fri Dec  7 07:47:52 2018
@@ -33,7 +33,7 @@ define i8 @sub_zext_cmp_mask_narrower_re
 ; CHECK-LABEL: sub_zext_cmp_mask_narrower_result:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    movl %edi, %eax
-; CHECK-NEXT:    andb $1, %al
+; CHECK-NEXT:    andl $1, %eax
 ; CHECK-NEXT:    orb $46, %al
 ; CHECK-NEXT:    # kill: def $al killed $al killed $eax
 ; CHECK-NEXT:    retq
@@ -77,7 +77,7 @@ define i8 @add_zext_cmp_mask_narrower_re
 ; CHECK-LABEL: add_zext_cmp_mask_narrower_result:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    movl %edi, %eax
-; CHECK-NEXT:    andb $1, %al
+; CHECK-NEXT:    andl $1, %eax
 ; CHECK-NEXT:    xorb $43, %al
 ; CHECK-NEXT:    # kill: def $al killed $al killed $eax
 ; CHECK-NEXT:    retq
@@ -159,7 +159,7 @@ define i8 @low_bit_select_constants_bigg
 ; CHECK-LABEL: low_bit_select_constants_bigger_true_narrower_result:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    movl %edi, %eax
-; CHECK-NEXT:    andb $1, %al
+; CHECK-NEXT:    andl $1, %eax
 ; CHECK-NEXT:    xorb $41, %al
 ; CHECK-NEXT:    # kill: def $al killed $al killed $eax
 ; CHECK-NEXT:    retq

Modified: llvm/trunk/test/CodeGen/X86/clear-lowbits.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/clear-lowbits.ll?rev=348604&r1=348603&r2=348604&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/clear-lowbits.ll (original)
+++ llvm/trunk/test/CodeGen/X86/clear-lowbits.ll Fri Dec  7 07:47:52 2018
@@ -866,9 +866,10 @@ define i16 @clear_lowbits16_ic0(i16 %val
 ; X86-NOBMI2-LABEL: clear_lowbits16_ic0:
 ; X86-NOBMI2:       # %bb.0:
 ; X86-NOBMI2-NEXT:    movzwl {{[0-9]+}}(%esp), %eax
-; X86-NOBMI2-NEXT:    movb $16, %cl
-; X86-NOBMI2-NEXT:    subb {{[0-9]+}}(%esp), %cl
+; X86-NOBMI2-NEXT:    movw $16, %cx
+; X86-NOBMI2-NEXT:    subw {{[0-9]+}}(%esp), %cx
 ; X86-NOBMI2-NEXT:    shrl %cl, %eax
+; X86-NOBMI2-NEXT:    # kill: def $cl killed $cl killed $cx
 ; X86-NOBMI2-NEXT:    shll %cl, %eax
 ; X86-NOBMI2-NEXT:    # kill: def $ax killed $ax killed $eax
 ; X86-NOBMI2-NEXT:    retl
@@ -876,8 +877,8 @@ define i16 @clear_lowbits16_ic0(i16 %val
 ; X86-BMI2-LABEL: clear_lowbits16_ic0:
 ; X86-BMI2:       # %bb.0:
 ; X86-BMI2-NEXT:    movzwl {{[0-9]+}}(%esp), %eax
-; X86-BMI2-NEXT:    movb $16, %cl
-; X86-BMI2-NEXT:    subb {{[0-9]+}}(%esp), %cl
+; X86-BMI2-NEXT:    movw $16, %cx
+; X86-BMI2-NEXT:    subw {{[0-9]+}}(%esp), %cx
 ; X86-BMI2-NEXT:    shrxl %ecx, %eax, %eax
 ; X86-BMI2-NEXT:    shlxl %ecx, %eax, %eax
 ; X86-BMI2-NEXT:    # kill: def $ax killed $ax killed $eax
@@ -886,9 +887,10 @@ define i16 @clear_lowbits16_ic0(i16 %val
 ; X64-NOBMI2-LABEL: clear_lowbits16_ic0:
 ; X64-NOBMI2:       # %bb.0:
 ; X64-NOBMI2-NEXT:    movzwl %di, %eax
-; X64-NOBMI2-NEXT:    movb $16, %cl
-; X64-NOBMI2-NEXT:    subb %sil, %cl
+; X64-NOBMI2-NEXT:    movl $16, %ecx
+; X64-NOBMI2-NEXT:    subl %esi, %ecx
 ; X64-NOBMI2-NEXT:    shrl %cl, %eax
+; X64-NOBMI2-NEXT:    # kill: def $cl killed $cl killed $ecx
 ; X64-NOBMI2-NEXT:    shll %cl, %eax
 ; X64-NOBMI2-NEXT:    # kill: def $ax killed $ax killed $eax
 ; X64-NOBMI2-NEXT:    retq
@@ -896,8 +898,8 @@ define i16 @clear_lowbits16_ic0(i16 %val
 ; X64-BMI2-LABEL: clear_lowbits16_ic0:
 ; X64-BMI2:       # %bb.0:
 ; X64-BMI2-NEXT:    movzwl %di, %eax
-; X64-BMI2-NEXT:    movb $16, %cl
-; X64-BMI2-NEXT:    subb %sil, %cl
+; X64-BMI2-NEXT:    movl $16, %ecx
+; X64-BMI2-NEXT:    subl %esi, %ecx
 ; X64-BMI2-NEXT:    shrxl %ecx, %eax, %eax
 ; X64-BMI2-NEXT:    shlxl %ecx, %eax, %eax
 ; X64-BMI2-NEXT:    # kill: def $ax killed $ax killed $eax
@@ -960,9 +962,10 @@ define i16 @clear_lowbits16_ic2_load(i16
 ; X86-NOBMI2:       # %bb.0:
 ; X86-NOBMI2-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; X86-NOBMI2-NEXT:    movzwl (%eax), %eax
-; X86-NOBMI2-NEXT:    movb $16, %cl
-; X86-NOBMI2-NEXT:    subb {{[0-9]+}}(%esp), %cl
+; X86-NOBMI2-NEXT:    movw $16, %cx
+; X86-NOBMI2-NEXT:    subw {{[0-9]+}}(%esp), %cx
 ; X86-NOBMI2-NEXT:    shrl %cl, %eax
+; X86-NOBMI2-NEXT:    # kill: def $cl killed $cl killed $cx
 ; X86-NOBMI2-NEXT:    shll %cl, %eax
 ; X86-NOBMI2-NEXT:    # kill: def $ax killed $ax killed $eax
 ; X86-NOBMI2-NEXT:    retl
@@ -971,8 +974,8 @@ define i16 @clear_lowbits16_ic2_load(i16
 ; X86-BMI2:       # %bb.0:
 ; X86-BMI2-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; X86-BMI2-NEXT:    movzwl (%eax), %eax
-; X86-BMI2-NEXT:    movb $16, %cl
-; X86-BMI2-NEXT:    subb {{[0-9]+}}(%esp), %cl
+; X86-BMI2-NEXT:    movw $16, %cx
+; X86-BMI2-NEXT:    subw {{[0-9]+}}(%esp), %cx
 ; X86-BMI2-NEXT:    shrxl %ecx, %eax, %eax
 ; X86-BMI2-NEXT:    shlxl %ecx, %eax, %eax
 ; X86-BMI2-NEXT:    # kill: def $ax killed $ax killed $eax
@@ -981,9 +984,10 @@ define i16 @clear_lowbits16_ic2_load(i16
 ; X64-NOBMI2-LABEL: clear_lowbits16_ic2_load:
 ; X64-NOBMI2:       # %bb.0:
 ; X64-NOBMI2-NEXT:    movzwl (%rdi), %eax
-; X64-NOBMI2-NEXT:    movb $16, %cl
-; X64-NOBMI2-NEXT:    subb %sil, %cl
+; X64-NOBMI2-NEXT:    movl $16, %ecx
+; X64-NOBMI2-NEXT:    subl %esi, %ecx
 ; X64-NOBMI2-NEXT:    shrl %cl, %eax
+; X64-NOBMI2-NEXT:    # kill: def $cl killed $cl killed $ecx
 ; X64-NOBMI2-NEXT:    shll %cl, %eax
 ; X64-NOBMI2-NEXT:    # kill: def $ax killed $ax killed $eax
 ; X64-NOBMI2-NEXT:    retq
@@ -991,8 +995,8 @@ define i16 @clear_lowbits16_ic2_load(i16
 ; X64-BMI2-LABEL: clear_lowbits16_ic2_load:
 ; X64-BMI2:       # %bb.0:
 ; X64-BMI2-NEXT:    movzwl (%rdi), %eax
-; X64-BMI2-NEXT:    movb $16, %cl
-; X64-BMI2-NEXT:    subb %sil, %cl
+; X64-BMI2-NEXT:    movl $16, %ecx
+; X64-BMI2-NEXT:    subl %esi, %ecx
 ; X64-BMI2-NEXT:    shrxl %ecx, %eax, %eax
 ; X64-BMI2-NEXT:    shlxl %ecx, %eax, %eax
 ; X64-BMI2-NEXT:    # kill: def $ax killed $ax killed $eax
@@ -1058,9 +1062,10 @@ define i16 @clear_lowbits16_ic4_commutat
 ; X86-NOBMI2-LABEL: clear_lowbits16_ic4_commutative:
 ; X86-NOBMI2:       # %bb.0:
 ; X86-NOBMI2-NEXT:    movzwl {{[0-9]+}}(%esp), %eax
-; X86-NOBMI2-NEXT:    movb $16, %cl
-; X86-NOBMI2-NEXT:    subb {{[0-9]+}}(%esp), %cl
+; X86-NOBMI2-NEXT:    movw $16, %cx
+; X86-NOBMI2-NEXT:    subw {{[0-9]+}}(%esp), %cx
 ; X86-NOBMI2-NEXT:    shrl %cl, %eax
+; X86-NOBMI2-NEXT:    # kill: def $cl killed $cl killed $cx
 ; X86-NOBMI2-NEXT:    shll %cl, %eax
 ; X86-NOBMI2-NEXT:    # kill: def $ax killed $ax killed $eax
 ; X86-NOBMI2-NEXT:    retl
@@ -1068,8 +1073,8 @@ define i16 @clear_lowbits16_ic4_commutat
 ; X86-BMI2-LABEL: clear_lowbits16_ic4_commutative:
 ; X86-BMI2:       # %bb.0:
 ; X86-BMI2-NEXT:    movzwl {{[0-9]+}}(%esp), %eax
-; X86-BMI2-NEXT:    movb $16, %cl
-; X86-BMI2-NEXT:    subb {{[0-9]+}}(%esp), %cl
+; X86-BMI2-NEXT:    movw $16, %cx
+; X86-BMI2-NEXT:    subw {{[0-9]+}}(%esp), %cx
 ; X86-BMI2-NEXT:    shrxl %ecx, %eax, %eax
 ; X86-BMI2-NEXT:    shlxl %ecx, %eax, %eax
 ; X86-BMI2-NEXT:    # kill: def $ax killed $ax killed $eax
@@ -1078,9 +1083,10 @@ define i16 @clear_lowbits16_ic4_commutat
 ; X64-NOBMI2-LABEL: clear_lowbits16_ic4_commutative:
 ; X64-NOBMI2:       # %bb.0:
 ; X64-NOBMI2-NEXT:    movzwl %di, %eax
-; X64-NOBMI2-NEXT:    movb $16, %cl
-; X64-NOBMI2-NEXT:    subb %sil, %cl
+; X64-NOBMI2-NEXT:    movl $16, %ecx
+; X64-NOBMI2-NEXT:    subl %esi, %ecx
 ; X64-NOBMI2-NEXT:    shrl %cl, %eax
+; X64-NOBMI2-NEXT:    # kill: def $cl killed $cl killed $ecx
 ; X64-NOBMI2-NEXT:    shll %cl, %eax
 ; X64-NOBMI2-NEXT:    # kill: def $ax killed $ax killed $eax
 ; X64-NOBMI2-NEXT:    retq
@@ -1088,8 +1094,8 @@ define i16 @clear_lowbits16_ic4_commutat
 ; X64-BMI2-LABEL: clear_lowbits16_ic4_commutative:
 ; X64-BMI2:       # %bb.0:
 ; X64-BMI2-NEXT:    movzwl %di, %eax
-; X64-BMI2-NEXT:    movb $16, %cl
-; X64-BMI2-NEXT:    subb %sil, %cl
+; X64-BMI2-NEXT:    movl $16, %ecx
+; X64-BMI2-NEXT:    subl %esi, %ecx
 ; X64-BMI2-NEXT:    shrxl %ecx, %eax, %eax
 ; X64-BMI2-NEXT:    shlxl %ecx, %eax, %eax
 ; X64-BMI2-NEXT:    # kill: def $ax killed $ax killed $eax
@@ -1107,7 +1113,7 @@ define i32 @clear_lowbits32_ic0(i32 %val
 ; X86-NOBMI2:       # %bb.0:
 ; X86-NOBMI2-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; X86-NOBMI2-NEXT:    xorl %ecx, %ecx
-; X86-NOBMI2-NEXT:    subb {{[0-9]+}}(%esp), %cl
+; X86-NOBMI2-NEXT:    subl {{[0-9]+}}(%esp), %ecx
 ; X86-NOBMI2-NEXT:    shrl %cl, %eax
 ; X86-NOBMI2-NEXT:    # kill: def $cl killed $cl killed $ecx
 ; X86-NOBMI2-NEXT:    shll %cl, %eax
@@ -1116,7 +1122,7 @@ define i32 @clear_lowbits32_ic0(i32 %val
 ; X86-BMI2-LABEL: clear_lowbits32_ic0:
 ; X86-BMI2:       # %bb.0:
 ; X86-BMI2-NEXT:    xorl %eax, %eax
-; X86-BMI2-NEXT:    subb {{[0-9]+}}(%esp), %al
+; X86-BMI2-NEXT:    subl {{[0-9]+}}(%esp), %eax
 ; X86-BMI2-NEXT:    shrxl %eax, {{[0-9]+}}(%esp), %ecx
 ; X86-BMI2-NEXT:    shlxl %eax, %ecx, %eax
 ; X86-BMI2-NEXT:    retl
@@ -1125,7 +1131,7 @@ define i32 @clear_lowbits32_ic0(i32 %val
 ; X64-NOBMI2:       # %bb.0:
 ; X64-NOBMI2-NEXT:    movl %esi, %ecx
 ; X64-NOBMI2-NEXT:    movl %edi, %eax
-; X64-NOBMI2-NEXT:    negb %cl
+; X64-NOBMI2-NEXT:    negl %ecx
 ; X64-NOBMI2-NEXT:    shrl %cl, %eax
 ; X64-NOBMI2-NEXT:    # kill: def $cl killed $cl killed $ecx
 ; X64-NOBMI2-NEXT:    shll %cl, %eax
@@ -1133,7 +1139,7 @@ define i32 @clear_lowbits32_ic0(i32 %val
 ;
 ; X64-BMI2-LABEL: clear_lowbits32_ic0:
 ; X64-BMI2:       # %bb.0:
-; X64-BMI2-NEXT:    negb %sil
+; X64-BMI2-NEXT:    negl %esi
 ; X64-BMI2-NEXT:    shrxl %esi, %edi, %eax
 ; X64-BMI2-NEXT:    shlxl %esi, %eax, %eax
 ; X64-BMI2-NEXT:    retq
@@ -1191,7 +1197,7 @@ define i32 @clear_lowbits32_ic2_load(i32
 ; X86-NOBMI2-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; X86-NOBMI2-NEXT:    movl (%eax), %eax
 ; X86-NOBMI2-NEXT:    xorl %ecx, %ecx
-; X86-NOBMI2-NEXT:    subb {{[0-9]+}}(%esp), %cl
+; X86-NOBMI2-NEXT:    subl {{[0-9]+}}(%esp), %ecx
 ; X86-NOBMI2-NEXT:    shrl %cl, %eax
 ; X86-NOBMI2-NEXT:    # kill: def $cl killed $cl killed $ecx
 ; X86-NOBMI2-NEXT:    shll %cl, %eax
@@ -1201,7 +1207,7 @@ define i32 @clear_lowbits32_ic2_load(i32
 ; X86-BMI2:       # %bb.0:
 ; X86-BMI2-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; X86-BMI2-NEXT:    xorl %ecx, %ecx
-; X86-BMI2-NEXT:    subb {{[0-9]+}}(%esp), %cl
+; X86-BMI2-NEXT:    subl {{[0-9]+}}(%esp), %ecx
 ; X86-BMI2-NEXT:    shrxl %ecx, (%eax), %eax
 ; X86-BMI2-NEXT:    shlxl %ecx, %eax, %eax
 ; X86-BMI2-NEXT:    retl
@@ -1210,7 +1216,7 @@ define i32 @clear_lowbits32_ic2_load(i32
 ; X64-NOBMI2:       # %bb.0:
 ; X64-NOBMI2-NEXT:    movl %esi, %ecx
 ; X64-NOBMI2-NEXT:    movl (%rdi), %eax
-; X64-NOBMI2-NEXT:    negb %cl
+; X64-NOBMI2-NEXT:    negl %ecx
 ; X64-NOBMI2-NEXT:    shrl %cl, %eax
 ; X64-NOBMI2-NEXT:    # kill: def $cl killed $cl killed $ecx
 ; X64-NOBMI2-NEXT:    shll %cl, %eax
@@ -1218,7 +1224,7 @@ define i32 @clear_lowbits32_ic2_load(i32
 ;
 ; X64-BMI2-LABEL: clear_lowbits32_ic2_load:
 ; X64-BMI2:       # %bb.0:
-; X64-BMI2-NEXT:    negb %sil
+; X64-BMI2-NEXT:    negl %esi
 ; X64-BMI2-NEXT:    shrxl %esi, (%rdi), %eax
 ; X64-BMI2-NEXT:    shlxl %esi, %eax, %eax
 ; X64-BMI2-NEXT:    retq
@@ -1279,7 +1285,7 @@ define i32 @clear_lowbits32_ic4_commutat
 ; X86-NOBMI2:       # %bb.0:
 ; X86-NOBMI2-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; X86-NOBMI2-NEXT:    xorl %ecx, %ecx
-; X86-NOBMI2-NEXT:    subb {{[0-9]+}}(%esp), %cl
+; X86-NOBMI2-NEXT:    subl {{[0-9]+}}(%esp), %ecx
 ; X86-NOBMI2-NEXT:    shrl %cl, %eax
 ; X86-NOBMI2-NEXT:    # kill: def $cl killed $cl killed $ecx
 ; X86-NOBMI2-NEXT:    shll %cl, %eax
@@ -1288,7 +1294,7 @@ define i32 @clear_lowbits32_ic4_commutat
 ; X86-BMI2-LABEL: clear_lowbits32_ic4_commutative:
 ; X86-BMI2:       # %bb.0:
 ; X86-BMI2-NEXT:    xorl %eax, %eax
-; X86-BMI2-NEXT:    subb {{[0-9]+}}(%esp), %al
+; X86-BMI2-NEXT:    subl {{[0-9]+}}(%esp), %eax
 ; X86-BMI2-NEXT:    shrxl %eax, {{[0-9]+}}(%esp), %ecx
 ; X86-BMI2-NEXT:    shlxl %eax, %ecx, %eax
 ; X86-BMI2-NEXT:    retl
@@ -1297,7 +1303,7 @@ define i32 @clear_lowbits32_ic4_commutat
 ; X64-NOBMI2:       # %bb.0:
 ; X64-NOBMI2-NEXT:    movl %esi, %ecx
 ; X64-NOBMI2-NEXT:    movl %edi, %eax
-; X64-NOBMI2-NEXT:    negb %cl
+; X64-NOBMI2-NEXT:    negl %ecx
 ; X64-NOBMI2-NEXT:    shrl %cl, %eax
 ; X64-NOBMI2-NEXT:    # kill: def $cl killed $cl killed $ecx
 ; X64-NOBMI2-NEXT:    shll %cl, %eax
@@ -1305,7 +1311,7 @@ define i32 @clear_lowbits32_ic4_commutat
 ;
 ; X64-BMI2-LABEL: clear_lowbits32_ic4_commutative:
 ; X64-BMI2:       # %bb.0:
-; X64-BMI2-NEXT:    negb %sil
+; X64-BMI2-NEXT:    negl %esi
 ; X64-BMI2-NEXT:    shrxl %esi, %edi, %eax
 ; X64-BMI2-NEXT:    shlxl %esi, %eax, %eax
 ; X64-BMI2-NEXT:    retq
@@ -1320,8 +1326,8 @@ define i32 @clear_lowbits32_ic4_commutat
 define i64 @clear_lowbits64_ic0(i64 %val, i64 %numlowbits) nounwind {
 ; X86-NOBMI2-LABEL: clear_lowbits64_ic0:
 ; X86-NOBMI2:       # %bb.0:
-; X86-NOBMI2-NEXT:    movb $64, %cl
-; X86-NOBMI2-NEXT:    subb {{[0-9]+}}(%esp), %cl
+; X86-NOBMI2-NEXT:    movl $64, %ecx
+; X86-NOBMI2-NEXT:    subl {{[0-9]+}}(%esp), %ecx
 ; X86-NOBMI2-NEXT:    movl $-1, %edx
 ; X86-NOBMI2-NEXT:    movl $-1, %eax
 ; X86-NOBMI2-NEXT:    shll %cl, %eax
@@ -1338,8 +1344,8 @@ define i64 @clear_lowbits64_ic0(i64 %val
 ;
 ; X86-BMI2-LABEL: clear_lowbits64_ic0:
 ; X86-BMI2:       # %bb.0:
-; X86-BMI2-NEXT:    movb $64, %cl
-; X86-BMI2-NEXT:    subb {{[0-9]+}}(%esp), %cl
+; X86-BMI2-NEXT:    movl $64, %ecx
+; X86-BMI2-NEXT:    subl {{[0-9]+}}(%esp), %ecx
 ; X86-BMI2-NEXT:    movl $-1, %edx
 ; X86-BMI2-NEXT:    shlxl %ecx, %edx, %eax
 ; X86-BMI2-NEXT:    shldl %cl, %edx, %edx
@@ -1357,7 +1363,7 @@ define i64 @clear_lowbits64_ic0(i64 %val
 ; X64-NOBMI2:       # %bb.0:
 ; X64-NOBMI2-NEXT:    movq %rsi, %rcx
 ; X64-NOBMI2-NEXT:    movq %rdi, %rax
-; X64-NOBMI2-NEXT:    negb %cl
+; X64-NOBMI2-NEXT:    negl %ecx
 ; X64-NOBMI2-NEXT:    shrq %cl, %rax
 ; X64-NOBMI2-NEXT:    # kill: def $cl killed $cl killed $rcx
 ; X64-NOBMI2-NEXT:    shlq %cl, %rax
@@ -1365,7 +1371,7 @@ define i64 @clear_lowbits64_ic0(i64 %val
 ;
 ; X64-BMI2-LABEL: clear_lowbits64_ic0:
 ; X64-BMI2:       # %bb.0:
-; X64-BMI2-NEXT:    negb %sil
+; X64-BMI2-NEXT:    negl %esi
 ; X64-BMI2-NEXT:    shrxq %rsi, %rdi, %rax
 ; X64-BMI2-NEXT:    shlxq %rsi, %rax, %rax
 ; X64-BMI2-NEXT:    retq
@@ -1440,8 +1446,8 @@ define i64 @clear_lowbits64_ic2_load(i64
 ; X86-NOBMI2:       # %bb.0:
 ; X86-NOBMI2-NEXT:    pushl %esi
 ; X86-NOBMI2-NEXT:    movl {{[0-9]+}}(%esp), %esi
-; X86-NOBMI2-NEXT:    movb $64, %cl
-; X86-NOBMI2-NEXT:    subb {{[0-9]+}}(%esp), %cl
+; X86-NOBMI2-NEXT:    movl $64, %ecx
+; X86-NOBMI2-NEXT:    subl {{[0-9]+}}(%esp), %ecx
 ; X86-NOBMI2-NEXT:    movl $-1, %edx
 ; X86-NOBMI2-NEXT:    movl $-1, %eax
 ; X86-NOBMI2-NEXT:    shll %cl, %eax
@@ -1461,8 +1467,8 @@ define i64 @clear_lowbits64_ic2_load(i64
 ; X86-BMI2:       # %bb.0:
 ; X86-BMI2-NEXT:    pushl %esi
 ; X86-BMI2-NEXT:    movl {{[0-9]+}}(%esp), %esi
-; X86-BMI2-NEXT:    movb $64, %cl
-; X86-BMI2-NEXT:    subb {{[0-9]+}}(%esp), %cl
+; X86-BMI2-NEXT:    movl $64, %ecx
+; X86-BMI2-NEXT:    subl {{[0-9]+}}(%esp), %ecx
 ; X86-BMI2-NEXT:    movl $-1, %edx
 ; X86-BMI2-NEXT:    shlxl %ecx, %edx, %eax
 ; X86-BMI2-NEXT:    shldl %cl, %edx, %edx
@@ -1481,7 +1487,7 @@ define i64 @clear_lowbits64_ic2_load(i64
 ; X64-NOBMI2:       # %bb.0:
 ; X64-NOBMI2-NEXT:    movq %rsi, %rcx
 ; X64-NOBMI2-NEXT:    movq (%rdi), %rax
-; X64-NOBMI2-NEXT:    negb %cl
+; X64-NOBMI2-NEXT:    negl %ecx
 ; X64-NOBMI2-NEXT:    shrq %cl, %rax
 ; X64-NOBMI2-NEXT:    # kill: def $cl killed $cl killed $rcx
 ; X64-NOBMI2-NEXT:    shlq %cl, %rax
@@ -1489,7 +1495,7 @@ define i64 @clear_lowbits64_ic2_load(i64
 ;
 ; X64-BMI2-LABEL: clear_lowbits64_ic2_load:
 ; X64-BMI2:       # %bb.0:
-; X64-BMI2-NEXT:    negb %sil
+; X64-BMI2-NEXT:    negl %esi
 ; X64-BMI2-NEXT:    shrxq %rsi, (%rdi), %rax
 ; X64-BMI2-NEXT:    shlxq %rsi, %rax, %rax
 ; X64-BMI2-NEXT:    retq
@@ -1570,8 +1576,8 @@ define i64 @clear_lowbits64_ic3_load_ind
 define i64 @clear_lowbits64_ic4_commutative(i64 %val, i64 %numlowbits) nounwind {
 ; X86-NOBMI2-LABEL: clear_lowbits64_ic4_commutative:
 ; X86-NOBMI2:       # %bb.0:
-; X86-NOBMI2-NEXT:    movb $64, %cl
-; X86-NOBMI2-NEXT:    subb {{[0-9]+}}(%esp), %cl
+; X86-NOBMI2-NEXT:    movl $64, %ecx
+; X86-NOBMI2-NEXT:    subl {{[0-9]+}}(%esp), %ecx
 ; X86-NOBMI2-NEXT:    movl $-1, %edx
 ; X86-NOBMI2-NEXT:    movl $-1, %eax
 ; X86-NOBMI2-NEXT:    shll %cl, %eax
@@ -1588,8 +1594,8 @@ define i64 @clear_lowbits64_ic4_commutat
 ;
 ; X86-BMI2-LABEL: clear_lowbits64_ic4_commutative:
 ; X86-BMI2:       # %bb.0:
-; X86-BMI2-NEXT:    movb $64, %cl
-; X86-BMI2-NEXT:    subb {{[0-9]+}}(%esp), %cl
+; X86-BMI2-NEXT:    movl $64, %ecx
+; X86-BMI2-NEXT:    subl {{[0-9]+}}(%esp), %ecx
 ; X86-BMI2-NEXT:    movl $-1, %edx
 ; X86-BMI2-NEXT:    shlxl %ecx, %edx, %eax
 ; X86-BMI2-NEXT:    shldl %cl, %edx, %edx
@@ -1607,7 +1613,7 @@ define i64 @clear_lowbits64_ic4_commutat
 ; X64-NOBMI2:       # %bb.0:
 ; X64-NOBMI2-NEXT:    movq %rsi, %rcx
 ; X64-NOBMI2-NEXT:    movq %rdi, %rax
-; X64-NOBMI2-NEXT:    negb %cl
+; X64-NOBMI2-NEXT:    negl %ecx
 ; X64-NOBMI2-NEXT:    shrq %cl, %rax
 ; X64-NOBMI2-NEXT:    # kill: def $cl killed $cl killed $rcx
 ; X64-NOBMI2-NEXT:    shlq %cl, %rax
@@ -1615,7 +1621,7 @@ define i64 @clear_lowbits64_ic4_commutat
 ;
 ; X64-BMI2-LABEL: clear_lowbits64_ic4_commutative:
 ; X64-BMI2:       # %bb.0:
-; X64-BMI2-NEXT:    negb %sil
+; X64-BMI2-NEXT:    negl %esi
 ; X64-BMI2-NEXT:    shrxq %rsi, %rdi, %rax
 ; X64-BMI2-NEXT:    shlxq %rsi, %rax, %rax
 ; X64-BMI2-NEXT:    retq

Modified: llvm/trunk/test/CodeGen/X86/cmov.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/cmov.ll?rev=348604&r1=348603&r2=348604&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/cmov.ll (original)
+++ llvm/trunk/test/CodeGen/X86/cmov.ll Fri Dec  7 07:47:52 2018
@@ -81,7 +81,7 @@ define i1 @test4() nounwind {
 ; CHECK-NEXT:    movsbl {{.*}}(%rip), %edx
 ; CHECK-NEXT:    movzbl %dl, %ecx
 ; CHECK-NEXT:    shrl $7, %ecx
-; CHECK-NEXT:    xorb $1, %cl
+; CHECK-NEXT:    xorl $1, %ecx
 ; CHECK-NEXT:    # kill: def $cl killed $cl killed $ecx
 ; CHECK-NEXT:    sarl %cl, %edx
 ; CHECK-NEXT:    movb {{.*}}(%rip), %al

Modified: llvm/trunk/test/CodeGen/X86/extract-bits.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/extract-bits.ll?rev=348604&r1=348603&r2=348604&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/extract-bits.ll (original)
+++ llvm/trunk/test/CodeGen/X86/extract-bits.ll Fri Dec  7 07:47:52 2018
@@ -2983,7 +2983,7 @@ define i32 @bextr32_c0(i32 %val, i32 %nu
 ; X86-NOBMI-NEXT:    movl {{[0-9]+}}(%esp), %edi
 ; X86-NOBMI-NEXT:    shrl %cl, %edi
 ; X86-NOBMI-NEXT:    xorl %ecx, %ecx
-; X86-NOBMI-NEXT:    subb {{[0-9]+}}(%esp), %cl
+; X86-NOBMI-NEXT:    subl {{[0-9]+}}(%esp), %ecx
 ; X86-NOBMI-NEXT:    movl $-1, %esi
 ; X86-NOBMI-NEXT:    # kill: def $cl killed $cl killed $ecx
 ; X86-NOBMI-NEXT:    shrl %cl, %esi
@@ -3005,7 +3005,7 @@ define i32 @bextr32_c0(i32 %val, i32 %nu
 ; X86-BMI1NOTBM-NEXT:    movl {{[0-9]+}}(%esp), %edi
 ; X86-BMI1NOTBM-NEXT:    shrl %cl, %edi
 ; X86-BMI1NOTBM-NEXT:    xorl %ecx, %ecx
-; X86-BMI1NOTBM-NEXT:    subb {{[0-9]+}}(%esp), %cl
+; X86-BMI1NOTBM-NEXT:    subl {{[0-9]+}}(%esp), %ecx
 ; X86-BMI1NOTBM-NEXT:    movl $-1, %esi
 ; X86-BMI1NOTBM-NEXT:    # kill: def $cl killed $cl killed $ecx
 ; X86-BMI1NOTBM-NEXT:    shrl %cl, %esi
@@ -3020,22 +3020,22 @@ define i32 @bextr32_c0(i32 %val, i32 %nu
 ;
 ; X86-BMI1BMI2-LABEL: bextr32_c0:
 ; X86-BMI1BMI2:       # %bb.0:
-; X86-BMI1BMI2-NEXT:    pushl %ebx
+; X86-BMI1BMI2-NEXT:    pushl %edi
 ; X86-BMI1BMI2-NEXT:    pushl %esi
 ; X86-BMI1BMI2-NEXT:    pushl %eax
-; X86-BMI1BMI2-NEXT:    movb {{[0-9]+}}(%esp), %bl
+; X86-BMI1BMI2-NEXT:    movl {{[0-9]+}}(%esp), %esi
 ; X86-BMI1BMI2-NEXT:    movb {{[0-9]+}}(%esp), %al
-; X86-BMI1BMI2-NEXT:    shrxl %eax, {{[0-9]+}}(%esp), %esi
-; X86-BMI1BMI2-NEXT:    movl %ebx, %eax
-; X86-BMI1BMI2-NEXT:    negb %al
+; X86-BMI1BMI2-NEXT:    shrxl %eax, {{[0-9]+}}(%esp), %edi
+; X86-BMI1BMI2-NEXT:    movl %esi, %eax
+; X86-BMI1BMI2-NEXT:    negl %eax
 ; X86-BMI1BMI2-NEXT:    movl $-1, %ecx
 ; X86-BMI1BMI2-NEXT:    shrxl %eax, %ecx, %eax
 ; X86-BMI1BMI2-NEXT:    movl %eax, (%esp)
 ; X86-BMI1BMI2-NEXT:    calll use32
-; X86-BMI1BMI2-NEXT:    bzhil %ebx, %esi, %eax
+; X86-BMI1BMI2-NEXT:    bzhil %esi, %edi, %eax
 ; X86-BMI1BMI2-NEXT:    addl $4, %esp
 ; X86-BMI1BMI2-NEXT:    popl %esi
-; X86-BMI1BMI2-NEXT:    popl %ebx
+; X86-BMI1BMI2-NEXT:    popl %edi
 ; X86-BMI1BMI2-NEXT:    retl
 ;
 ; X64-NOBMI-LABEL: bextr32_c0:
@@ -3047,7 +3047,7 @@ define i32 @bextr32_c0(i32 %val, i32 %nu
 ; X64-NOBMI-NEXT:    movl %edi, %ebx
 ; X64-NOBMI-NEXT:    # kill: def $cl killed $cl killed $ecx
 ; X64-NOBMI-NEXT:    shrl %cl, %ebx
-; X64-NOBMI-NEXT:    negb %dl
+; X64-NOBMI-NEXT:    negl %edx
 ; X64-NOBMI-NEXT:    movl $-1, %ebp
 ; X64-NOBMI-NEXT:    movl %edx, %ecx
 ; X64-NOBMI-NEXT:    shrl %cl, %ebp
@@ -3069,7 +3069,7 @@ define i32 @bextr32_c0(i32 %val, i32 %nu
 ; X64-BMI1NOTBM-NEXT:    movl %edi, %ebx
 ; X64-BMI1NOTBM-NEXT:    # kill: def $cl killed $cl killed $ecx
 ; X64-BMI1NOTBM-NEXT:    shrl %cl, %ebx
-; X64-BMI1NOTBM-NEXT:    negb %dl
+; X64-BMI1NOTBM-NEXT:    negl %edx
 ; X64-BMI1NOTBM-NEXT:    movl $-1, %ebp
 ; X64-BMI1NOTBM-NEXT:    movl %edx, %ecx
 ; X64-BMI1NOTBM-NEXT:    shrl %cl, %ebp
@@ -3089,8 +3089,8 @@ define i32 @bextr32_c0(i32 %val, i32 %nu
 ; X64-BMI1BMI2-NEXT:    pushq %rax
 ; X64-BMI1BMI2-NEXT:    movl %edx, %ebx
 ; X64-BMI1BMI2-NEXT:    shrxl %esi, %edi, %ebp
-; X64-BMI1BMI2-NEXT:    movl %ebx, %eax
-; X64-BMI1BMI2-NEXT:    negb %al
+; X64-BMI1BMI2-NEXT:    movl %edx, %eax
+; X64-BMI1BMI2-NEXT:    negl %eax
 ; X64-BMI1BMI2-NEXT:    movl $-1, %ecx
 ; X64-BMI1BMI2-NEXT:    shrxl %eax, %ecx, %edi
 ; X64-BMI1BMI2-NEXT:    callq use32
@@ -3254,7 +3254,7 @@ define i32 @bextr32_c2_load(i32* %w, i32
 ; X86-NOBMI-NEXT:    movl (%eax), %edi
 ; X86-NOBMI-NEXT:    shrl %cl, %edi
 ; X86-NOBMI-NEXT:    xorl %ecx, %ecx
-; X86-NOBMI-NEXT:    subb {{[0-9]+}}(%esp), %cl
+; X86-NOBMI-NEXT:    subl {{[0-9]+}}(%esp), %ecx
 ; X86-NOBMI-NEXT:    movl $-1, %esi
 ; X86-NOBMI-NEXT:    # kill: def $cl killed $cl killed $ecx
 ; X86-NOBMI-NEXT:    shrl %cl, %esi
@@ -3277,7 +3277,7 @@ define i32 @bextr32_c2_load(i32* %w, i32
 ; X86-BMI1NOTBM-NEXT:    movl (%eax), %edi
 ; X86-BMI1NOTBM-NEXT:    shrl %cl, %edi
 ; X86-BMI1NOTBM-NEXT:    xorl %ecx, %ecx
-; X86-BMI1NOTBM-NEXT:    subb {{[0-9]+}}(%esp), %cl
+; X86-BMI1NOTBM-NEXT:    subl {{[0-9]+}}(%esp), %ecx
 ; X86-BMI1NOTBM-NEXT:    movl $-1, %esi
 ; X86-BMI1NOTBM-NEXT:    # kill: def $cl killed $cl killed $ecx
 ; X86-BMI1NOTBM-NEXT:    shrl %cl, %esi
@@ -3292,23 +3292,23 @@ define i32 @bextr32_c2_load(i32* %w, i32
 ;
 ; X86-BMI1BMI2-LABEL: bextr32_c2_load:
 ; X86-BMI1BMI2:       # %bb.0:
-; X86-BMI1BMI2-NEXT:    pushl %ebx
+; X86-BMI1BMI2-NEXT:    pushl %edi
 ; X86-BMI1BMI2-NEXT:    pushl %esi
 ; X86-BMI1BMI2-NEXT:    pushl %eax
-; X86-BMI1BMI2-NEXT:    movb {{[0-9]+}}(%esp), %bl
+; X86-BMI1BMI2-NEXT:    movl {{[0-9]+}}(%esp), %esi
 ; X86-BMI1BMI2-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; X86-BMI1BMI2-NEXT:    movb {{[0-9]+}}(%esp), %cl
-; X86-BMI1BMI2-NEXT:    shrxl %ecx, (%eax), %esi
-; X86-BMI1BMI2-NEXT:    movl %ebx, %eax
-; X86-BMI1BMI2-NEXT:    negb %al
+; X86-BMI1BMI2-NEXT:    shrxl %ecx, (%eax), %edi
+; X86-BMI1BMI2-NEXT:    movl %esi, %eax
+; X86-BMI1BMI2-NEXT:    negl %eax
 ; X86-BMI1BMI2-NEXT:    movl $-1, %ecx
 ; X86-BMI1BMI2-NEXT:    shrxl %eax, %ecx, %eax
 ; X86-BMI1BMI2-NEXT:    movl %eax, (%esp)
 ; X86-BMI1BMI2-NEXT:    calll use32
-; X86-BMI1BMI2-NEXT:    bzhil %ebx, %esi, %eax
+; X86-BMI1BMI2-NEXT:    bzhil %esi, %edi, %eax
 ; X86-BMI1BMI2-NEXT:    addl $4, %esp
 ; X86-BMI1BMI2-NEXT:    popl %esi
-; X86-BMI1BMI2-NEXT:    popl %ebx
+; X86-BMI1BMI2-NEXT:    popl %edi
 ; X86-BMI1BMI2-NEXT:    retl
 ;
 ; X64-NOBMI-LABEL: bextr32_c2_load:
@@ -3320,7 +3320,7 @@ define i32 @bextr32_c2_load(i32* %w, i32
 ; X64-NOBMI-NEXT:    movl (%rdi), %ebp
 ; X64-NOBMI-NEXT:    # kill: def $cl killed $cl killed $ecx
 ; X64-NOBMI-NEXT:    shrl %cl, %ebp
-; X64-NOBMI-NEXT:    negb %dl
+; X64-NOBMI-NEXT:    negl %edx
 ; X64-NOBMI-NEXT:    movl $-1, %ebx
 ; X64-NOBMI-NEXT:    movl %edx, %ecx
 ; X64-NOBMI-NEXT:    shrl %cl, %ebx
@@ -3342,7 +3342,7 @@ define i32 @bextr32_c2_load(i32* %w, i32
 ; X64-BMI1NOTBM-NEXT:    movl (%rdi), %ebp
 ; X64-BMI1NOTBM-NEXT:    # kill: def $cl killed $cl killed $ecx
 ; X64-BMI1NOTBM-NEXT:    shrl %cl, %ebp
-; X64-BMI1NOTBM-NEXT:    negb %dl
+; X64-BMI1NOTBM-NEXT:    negl %edx
 ; X64-BMI1NOTBM-NEXT:    movl $-1, %ebx
 ; X64-BMI1NOTBM-NEXT:    movl %edx, %ecx
 ; X64-BMI1NOTBM-NEXT:    shrl %cl, %ebx
@@ -3362,8 +3362,8 @@ define i32 @bextr32_c2_load(i32* %w, i32
 ; X64-BMI1BMI2-NEXT:    pushq %rax
 ; X64-BMI1BMI2-NEXT:    movl %edx, %ebx
 ; X64-BMI1BMI2-NEXT:    shrxl %esi, (%rdi), %ebp
-; X64-BMI1BMI2-NEXT:    movl %ebx, %eax
-; X64-BMI1BMI2-NEXT:    negb %al
+; X64-BMI1BMI2-NEXT:    movl %edx, %eax
+; X64-BMI1BMI2-NEXT:    negl %eax
 ; X64-BMI1BMI2-NEXT:    movl $-1, %ecx
 ; X64-BMI1BMI2-NEXT:    shrxl %eax, %ecx, %edi
 ; X64-BMI1BMI2-NEXT:    callq use32
@@ -3531,7 +3531,7 @@ define i32 @bextr32_c4_commutative(i32 %
 ; X86-NOBMI-NEXT:    movl {{[0-9]+}}(%esp), %edi
 ; X86-NOBMI-NEXT:    shrl %cl, %edi
 ; X86-NOBMI-NEXT:    xorl %ecx, %ecx
-; X86-NOBMI-NEXT:    subb {{[0-9]+}}(%esp), %cl
+; X86-NOBMI-NEXT:    subl {{[0-9]+}}(%esp), %ecx
 ; X86-NOBMI-NEXT:    movl $-1, %esi
 ; X86-NOBMI-NEXT:    # kill: def $cl killed $cl killed $ecx
 ; X86-NOBMI-NEXT:    shrl %cl, %esi
@@ -3553,7 +3553,7 @@ define i32 @bextr32_c4_commutative(i32 %
 ; X86-BMI1NOTBM-NEXT:    movl {{[0-9]+}}(%esp), %edi
 ; X86-BMI1NOTBM-NEXT:    shrl %cl, %edi
 ; X86-BMI1NOTBM-NEXT:    xorl %ecx, %ecx
-; X86-BMI1NOTBM-NEXT:    subb {{[0-9]+}}(%esp), %cl
+; X86-BMI1NOTBM-NEXT:    subl {{[0-9]+}}(%esp), %ecx
 ; X86-BMI1NOTBM-NEXT:    movl $-1, %esi
 ; X86-BMI1NOTBM-NEXT:    # kill: def $cl killed $cl killed $ecx
 ; X86-BMI1NOTBM-NEXT:    shrl %cl, %esi
@@ -3568,22 +3568,22 @@ define i32 @bextr32_c4_commutative(i32 %
 ;
 ; X86-BMI1BMI2-LABEL: bextr32_c4_commutative:
 ; X86-BMI1BMI2:       # %bb.0:
-; X86-BMI1BMI2-NEXT:    pushl %ebx
+; X86-BMI1BMI2-NEXT:    pushl %edi
 ; X86-BMI1BMI2-NEXT:    pushl %esi
 ; X86-BMI1BMI2-NEXT:    pushl %eax
-; X86-BMI1BMI2-NEXT:    movb {{[0-9]+}}(%esp), %bl
+; X86-BMI1BMI2-NEXT:    movl {{[0-9]+}}(%esp), %esi
 ; X86-BMI1BMI2-NEXT:    movb {{[0-9]+}}(%esp), %al
-; X86-BMI1BMI2-NEXT:    shrxl %eax, {{[0-9]+}}(%esp), %esi
-; X86-BMI1BMI2-NEXT:    movl %ebx, %eax
-; X86-BMI1BMI2-NEXT:    negb %al
+; X86-BMI1BMI2-NEXT:    shrxl %eax, {{[0-9]+}}(%esp), %edi
+; X86-BMI1BMI2-NEXT:    movl %esi, %eax
+; X86-BMI1BMI2-NEXT:    negl %eax
 ; X86-BMI1BMI2-NEXT:    movl $-1, %ecx
 ; X86-BMI1BMI2-NEXT:    shrxl %eax, %ecx, %eax
 ; X86-BMI1BMI2-NEXT:    movl %eax, (%esp)
 ; X86-BMI1BMI2-NEXT:    calll use32
-; X86-BMI1BMI2-NEXT:    bzhil %ebx, %esi, %eax
+; X86-BMI1BMI2-NEXT:    bzhil %esi, %edi, %eax
 ; X86-BMI1BMI2-NEXT:    addl $4, %esp
 ; X86-BMI1BMI2-NEXT:    popl %esi
-; X86-BMI1BMI2-NEXT:    popl %ebx
+; X86-BMI1BMI2-NEXT:    popl %edi
 ; X86-BMI1BMI2-NEXT:    retl
 ;
 ; X64-NOBMI-LABEL: bextr32_c4_commutative:
@@ -3595,7 +3595,7 @@ define i32 @bextr32_c4_commutative(i32 %
 ; X64-NOBMI-NEXT:    movl %edi, %ebx
 ; X64-NOBMI-NEXT:    # kill: def $cl killed $cl killed $ecx
 ; X64-NOBMI-NEXT:    shrl %cl, %ebx
-; X64-NOBMI-NEXT:    negb %dl
+; X64-NOBMI-NEXT:    negl %edx
 ; X64-NOBMI-NEXT:    movl $-1, %ebp
 ; X64-NOBMI-NEXT:    movl %edx, %ecx
 ; X64-NOBMI-NEXT:    shrl %cl, %ebp
@@ -3617,7 +3617,7 @@ define i32 @bextr32_c4_commutative(i32 %
 ; X64-BMI1NOTBM-NEXT:    movl %edi, %ebx
 ; X64-BMI1NOTBM-NEXT:    # kill: def $cl killed $cl killed $ecx
 ; X64-BMI1NOTBM-NEXT:    shrl %cl, %ebx
-; X64-BMI1NOTBM-NEXT:    negb %dl
+; X64-BMI1NOTBM-NEXT:    negl %edx
 ; X64-BMI1NOTBM-NEXT:    movl $-1, %ebp
 ; X64-BMI1NOTBM-NEXT:    movl %edx, %ecx
 ; X64-BMI1NOTBM-NEXT:    shrl %cl, %ebp
@@ -3637,8 +3637,8 @@ define i32 @bextr32_c4_commutative(i32 %
 ; X64-BMI1BMI2-NEXT:    pushq %rax
 ; X64-BMI1BMI2-NEXT:    movl %edx, %ebx
 ; X64-BMI1BMI2-NEXT:    shrxl %esi, %edi, %ebp
-; X64-BMI1BMI2-NEXT:    movl %ebx, %eax
-; X64-BMI1BMI2-NEXT:    negb %al
+; X64-BMI1BMI2-NEXT:    movl %edx, %eax
+; X64-BMI1BMI2-NEXT:    negl %eax
 ; X64-BMI1BMI2-NEXT:    movl $-1, %ecx
 ; X64-BMI1BMI2-NEXT:    shrxl %eax, %ecx, %edi
 ; X64-BMI1BMI2-NEXT:    callq use32
@@ -3667,7 +3667,7 @@ define i32 @bextr32_c5_skipextrauses(i32
 ; X86-NOBMI-NEXT:    movl %ebx, %ecx
 ; X86-NOBMI-NEXT:    shrl %cl, %edi
 ; X86-NOBMI-NEXT:    xorl %ecx, %ecx
-; X86-NOBMI-NEXT:    subb {{[0-9]+}}(%esp), %cl
+; X86-NOBMI-NEXT:    subl {{[0-9]+}}(%esp), %ecx
 ; X86-NOBMI-NEXT:    movl $-1, %esi
 ; X86-NOBMI-NEXT:    # kill: def $cl killed $cl killed $ecx
 ; X86-NOBMI-NEXT:    shrl %cl, %esi
@@ -3694,7 +3694,7 @@ define i32 @bextr32_c5_skipextrauses(i32
 ; X86-BMI1NOTBM-NEXT:    movl %ebx, %ecx
 ; X86-BMI1NOTBM-NEXT:    shrl %cl, %edi
 ; X86-BMI1NOTBM-NEXT:    xorl %ecx, %ecx
-; X86-BMI1NOTBM-NEXT:    subb {{[0-9]+}}(%esp), %cl
+; X86-BMI1NOTBM-NEXT:    subl {{[0-9]+}}(%esp), %ecx
 ; X86-BMI1NOTBM-NEXT:    movl $-1, %esi
 ; X86-BMI1NOTBM-NEXT:    # kill: def $cl killed $cl killed $ecx
 ; X86-BMI1NOTBM-NEXT:    shrl %cl, %esi
@@ -3716,16 +3716,16 @@ define i32 @bextr32_c5_skipextrauses(i32
 ; X86-BMI1BMI2-NEXT:    pushl %edi
 ; X86-BMI1BMI2-NEXT:    pushl %esi
 ; X86-BMI1BMI2-NEXT:    subl $16, %esp
-; X86-BMI1BMI2-NEXT:    movb {{[0-9]+}}(%esp), %bl
+; X86-BMI1BMI2-NEXT:    movl {{[0-9]+}}(%esp), %esi
 ; X86-BMI1BMI2-NEXT:    movl {{[0-9]+}}(%esp), %edi
-; X86-BMI1BMI2-NEXT:    shrxl %edi, {{[0-9]+}}(%esp), %esi
-; X86-BMI1BMI2-NEXT:    movl %ebx, %eax
-; X86-BMI1BMI2-NEXT:    negb %al
+; X86-BMI1BMI2-NEXT:    shrxl %edi, {{[0-9]+}}(%esp), %ebx
+; X86-BMI1BMI2-NEXT:    movl %esi, %eax
+; X86-BMI1BMI2-NEXT:    negl %eax
 ; X86-BMI1BMI2-NEXT:    movl $-1, %ecx
 ; X86-BMI1BMI2-NEXT:    shrxl %eax, %ecx, %eax
 ; X86-BMI1BMI2-NEXT:    movl %eax, (%esp)
 ; X86-BMI1BMI2-NEXT:    calll use32
-; X86-BMI1BMI2-NEXT:    bzhil %ebx, %esi, %esi
+; X86-BMI1BMI2-NEXT:    bzhil %esi, %ebx, %esi
 ; X86-BMI1BMI2-NEXT:    movl %edi, (%esp)
 ; X86-BMI1BMI2-NEXT:    calll use32
 ; X86-BMI1BMI2-NEXT:    movl %esi, %eax
@@ -3744,7 +3744,7 @@ define i32 @bextr32_c5_skipextrauses(i32
 ; X64-NOBMI-NEXT:    movl %edi, %ebp
 ; X64-NOBMI-NEXT:    movl %r14d, %ecx
 ; X64-NOBMI-NEXT:    shrl %cl, %ebp
-; X64-NOBMI-NEXT:    negb %dl
+; X64-NOBMI-NEXT:    negl %edx
 ; X64-NOBMI-NEXT:    movl $-1, %ebx
 ; X64-NOBMI-NEXT:    movl %edx, %ecx
 ; X64-NOBMI-NEXT:    shrl %cl, %ebx
@@ -3768,7 +3768,7 @@ define i32 @bextr32_c5_skipextrauses(i32
 ; X64-BMI1NOTBM-NEXT:    movl %edi, %ebp
 ; X64-BMI1NOTBM-NEXT:    movl %r14d, %ecx
 ; X64-BMI1NOTBM-NEXT:    shrl %cl, %ebp
-; X64-BMI1NOTBM-NEXT:    negb %dl
+; X64-BMI1NOTBM-NEXT:    negl %edx
 ; X64-BMI1NOTBM-NEXT:    movl $-1, %ebx
 ; X64-BMI1NOTBM-NEXT:    movl %edx, %ecx
 ; X64-BMI1NOTBM-NEXT:    shrl %cl, %ebx
@@ -3791,8 +3791,8 @@ define i32 @bextr32_c5_skipextrauses(i32
 ; X64-BMI1BMI2-NEXT:    movl %edx, %ebx
 ; X64-BMI1BMI2-NEXT:    movl %esi, %ebp
 ; X64-BMI1BMI2-NEXT:    shrxl %esi, %edi, %r14d
-; X64-BMI1BMI2-NEXT:    movl %ebx, %eax
-; X64-BMI1BMI2-NEXT:    negb %al
+; X64-BMI1BMI2-NEXT:    movl %edx, %eax
+; X64-BMI1BMI2-NEXT:    negl %eax
 ; X64-BMI1BMI2-NEXT:    movl $-1, %ecx
 ; X64-BMI1BMI2-NEXT:    shrxl %eax, %ecx, %edi
 ; X64-BMI1BMI2-NEXT:    callq use32
@@ -3835,8 +3835,8 @@ define i64 @bextr64_c0(i64 %val, i64 %nu
 ; X86-NOBMI-NEXT:    movl %edi, %esi
 ; X86-NOBMI-NEXT:    xorl %edi, %edi
 ; X86-NOBMI-NEXT:  .LBB32_2:
-; X86-NOBMI-NEXT:    movb $64, %cl
-; X86-NOBMI-NEXT:    subb {{[0-9]+}}(%esp), %cl
+; X86-NOBMI-NEXT:    movl $64, %ecx
+; X86-NOBMI-NEXT:    subl {{[0-9]+}}(%esp), %ecx
 ; X86-NOBMI-NEXT:    movl $-1, %ebp
 ; X86-NOBMI-NEXT:    movl $-1, %ebx
 ; X86-NOBMI-NEXT:    shrl %cl, %ebx
@@ -3882,8 +3882,8 @@ define i64 @bextr64_c0(i64 %val, i64 %nu
 ; X86-BMI1NOTBM-NEXT:    movl %edi, %esi
 ; X86-BMI1NOTBM-NEXT:    xorl %edi, %edi
 ; X86-BMI1NOTBM-NEXT:  .LBB32_2:
-; X86-BMI1NOTBM-NEXT:    movb $64, %cl
-; X86-BMI1NOTBM-NEXT:    subb {{[0-9]+}}(%esp), %cl
+; X86-BMI1NOTBM-NEXT:    movl $64, %ecx
+; X86-BMI1NOTBM-NEXT:    subl {{[0-9]+}}(%esp), %ecx
 ; X86-BMI1NOTBM-NEXT:    movl $-1, %ebp
 ; X86-BMI1NOTBM-NEXT:    movl $-1, %ebx
 ; X86-BMI1NOTBM-NEXT:    shrl %cl, %ebx
@@ -3928,8 +3928,8 @@ define i64 @bextr64_c0(i64 %val, i64 %nu
 ; X86-BMI1BMI2-NEXT:    movl %edi, %esi
 ; X86-BMI1BMI2-NEXT:    xorl %edi, %edi
 ; X86-BMI1BMI2-NEXT:  .LBB32_2:
-; X86-BMI1BMI2-NEXT:    movb $64, %cl
-; X86-BMI1BMI2-NEXT:    subb {{[0-9]+}}(%esp), %cl
+; X86-BMI1BMI2-NEXT:    movl $64, %ecx
+; X86-BMI1BMI2-NEXT:    subl {{[0-9]+}}(%esp), %ecx
 ; X86-BMI1BMI2-NEXT:    movl $-1, %ebx
 ; X86-BMI1BMI2-NEXT:    shrxl %ecx, %ebx, %ebp
 ; X86-BMI1BMI2-NEXT:    shrdl %cl, %ebx, %ebx
@@ -3964,7 +3964,7 @@ define i64 @bextr64_c0(i64 %val, i64 %nu
 ; X64-NOBMI-NEXT:    movq %rdi, %r14
 ; X64-NOBMI-NEXT:    # kill: def $cl killed $cl killed $rcx
 ; X64-NOBMI-NEXT:    shrq %cl, %r14
-; X64-NOBMI-NEXT:    negb %dl
+; X64-NOBMI-NEXT:    negl %edx
 ; X64-NOBMI-NEXT:    movq $-1, %rbx
 ; X64-NOBMI-NEXT:    movl %edx, %ecx
 ; X64-NOBMI-NEXT:    shrq %cl, %rbx
@@ -3986,7 +3986,7 @@ define i64 @bextr64_c0(i64 %val, i64 %nu
 ; X64-BMI1NOTBM-NEXT:    movq %rdi, %r14
 ; X64-BMI1NOTBM-NEXT:    # kill: def $cl killed $cl killed $rcx
 ; X64-BMI1NOTBM-NEXT:    shrq %cl, %r14
-; X64-BMI1NOTBM-NEXT:    negb %dl
+; X64-BMI1NOTBM-NEXT:    negl %edx
 ; X64-BMI1NOTBM-NEXT:    movq $-1, %rbx
 ; X64-BMI1NOTBM-NEXT:    movl %edx, %ecx
 ; X64-BMI1NOTBM-NEXT:    shrq %cl, %rbx
@@ -4007,7 +4007,7 @@ define i64 @bextr64_c0(i64 %val, i64 %nu
 ; X64-BMI1BMI2-NEXT:    movq %rdx, %rbx
 ; X64-BMI1BMI2-NEXT:    shrxq %rsi, %rdi, %r14
 ; X64-BMI1BMI2-NEXT:    movl %ebx, %eax
-; X64-BMI1BMI2-NEXT:    negb %al
+; X64-BMI1BMI2-NEXT:    negl %eax
 ; X64-BMI1BMI2-NEXT:    movq $-1, %rcx
 ; X64-BMI1BMI2-NEXT:    shrxq %rax, %rcx, %rdi
 ; X64-BMI1BMI2-NEXT:    callq use64
@@ -4257,8 +4257,8 @@ define i64 @bextr64_c2_load(i64* %w, i64
 ; X86-NOBMI-NEXT:    movl %edi, %esi
 ; X86-NOBMI-NEXT:    xorl %edi, %edi
 ; X86-NOBMI-NEXT:  .LBB34_2:
-; X86-NOBMI-NEXT:    movb $64, %cl
-; X86-NOBMI-NEXT:    subb {{[0-9]+}}(%esp), %cl
+; X86-NOBMI-NEXT:    movl $64, %ecx
+; X86-NOBMI-NEXT:    subl {{[0-9]+}}(%esp), %ecx
 ; X86-NOBMI-NEXT:    movl $-1, %ebp
 ; X86-NOBMI-NEXT:    movl $-1, %ebx
 ; X86-NOBMI-NEXT:    shrl %cl, %ebx
@@ -4305,8 +4305,8 @@ define i64 @bextr64_c2_load(i64* %w, i64
 ; X86-BMI1NOTBM-NEXT:    movl %edi, %esi
 ; X86-BMI1NOTBM-NEXT:    xorl %edi, %edi
 ; X86-BMI1NOTBM-NEXT:  .LBB34_2:
-; X86-BMI1NOTBM-NEXT:    movb $64, %cl
-; X86-BMI1NOTBM-NEXT:    subb {{[0-9]+}}(%esp), %cl
+; X86-BMI1NOTBM-NEXT:    movl $64, %ecx
+; X86-BMI1NOTBM-NEXT:    subl {{[0-9]+}}(%esp), %ecx
 ; X86-BMI1NOTBM-NEXT:    movl $-1, %ebp
 ; X86-BMI1NOTBM-NEXT:    movl $-1, %ebx
 ; X86-BMI1NOTBM-NEXT:    shrl %cl, %ebx
@@ -4352,8 +4352,8 @@ define i64 @bextr64_c2_load(i64* %w, i64
 ; X86-BMI1BMI2-NEXT:    movl %edi, %esi
 ; X86-BMI1BMI2-NEXT:    xorl %edi, %edi
 ; X86-BMI1BMI2-NEXT:  .LBB34_2:
-; X86-BMI1BMI2-NEXT:    movb $64, %cl
-; X86-BMI1BMI2-NEXT:    subb {{[0-9]+}}(%esp), %cl
+; X86-BMI1BMI2-NEXT:    movl $64, %ecx
+; X86-BMI1BMI2-NEXT:    subl {{[0-9]+}}(%esp), %ecx
 ; X86-BMI1BMI2-NEXT:    movl $-1, %ebx
 ; X86-BMI1BMI2-NEXT:    shrxl %ecx, %ebx, %ebp
 ; X86-BMI1BMI2-NEXT:    shrdl %cl, %ebx, %ebx
@@ -4388,7 +4388,7 @@ define i64 @bextr64_c2_load(i64* %w, i64
 ; X64-NOBMI-NEXT:    movq (%rdi), %r14
 ; X64-NOBMI-NEXT:    # kill: def $cl killed $cl killed $rcx
 ; X64-NOBMI-NEXT:    shrq %cl, %r14
-; X64-NOBMI-NEXT:    negb %dl
+; X64-NOBMI-NEXT:    negl %edx
 ; X64-NOBMI-NEXT:    movq $-1, %rbx
 ; X64-NOBMI-NEXT:    movl %edx, %ecx
 ; X64-NOBMI-NEXT:    shrq %cl, %rbx
@@ -4410,7 +4410,7 @@ define i64 @bextr64_c2_load(i64* %w, i64
 ; X64-BMI1NOTBM-NEXT:    movq (%rdi), %r14
 ; X64-BMI1NOTBM-NEXT:    # kill: def $cl killed $cl killed $rcx
 ; X64-BMI1NOTBM-NEXT:    shrq %cl, %r14
-; X64-BMI1NOTBM-NEXT:    negb %dl
+; X64-BMI1NOTBM-NEXT:    negl %edx
 ; X64-BMI1NOTBM-NEXT:    movq $-1, %rbx
 ; X64-BMI1NOTBM-NEXT:    movl %edx, %ecx
 ; X64-BMI1NOTBM-NEXT:    shrq %cl, %rbx
@@ -4431,7 +4431,7 @@ define i64 @bextr64_c2_load(i64* %w, i64
 ; X64-BMI1BMI2-NEXT:    movq %rdx, %rbx
 ; X64-BMI1BMI2-NEXT:    shrxq %rsi, (%rdi), %r14
 ; X64-BMI1BMI2-NEXT:    movl %ebx, %eax
-; X64-BMI1BMI2-NEXT:    negb %al
+; X64-BMI1BMI2-NEXT:    negl %eax
 ; X64-BMI1BMI2-NEXT:    movq $-1, %rcx
 ; X64-BMI1BMI2-NEXT:    shrxq %rax, %rcx, %rdi
 ; X64-BMI1BMI2-NEXT:    callq use64
@@ -4685,8 +4685,8 @@ define i64 @bextr64_c4_commutative(i64 %
 ; X86-NOBMI-NEXT:    movl %edi, %esi
 ; X86-NOBMI-NEXT:    xorl %edi, %edi
 ; X86-NOBMI-NEXT:  .LBB36_2:
-; X86-NOBMI-NEXT:    movb $64, %cl
-; X86-NOBMI-NEXT:    subb {{[0-9]+}}(%esp), %cl
+; X86-NOBMI-NEXT:    movl $64, %ecx
+; X86-NOBMI-NEXT:    subl {{[0-9]+}}(%esp), %ecx
 ; X86-NOBMI-NEXT:    movl $-1, %ebp
 ; X86-NOBMI-NEXT:    movl $-1, %ebx
 ; X86-NOBMI-NEXT:    shrl %cl, %ebx
@@ -4732,8 +4732,8 @@ define i64 @bextr64_c4_commutative(i64 %
 ; X86-BMI1NOTBM-NEXT:    movl %edi, %esi
 ; X86-BMI1NOTBM-NEXT:    xorl %edi, %edi
 ; X86-BMI1NOTBM-NEXT:  .LBB36_2:
-; X86-BMI1NOTBM-NEXT:    movb $64, %cl
-; X86-BMI1NOTBM-NEXT:    subb {{[0-9]+}}(%esp), %cl
+; X86-BMI1NOTBM-NEXT:    movl $64, %ecx
+; X86-BMI1NOTBM-NEXT:    subl {{[0-9]+}}(%esp), %ecx
 ; X86-BMI1NOTBM-NEXT:    movl $-1, %ebp
 ; X86-BMI1NOTBM-NEXT:    movl $-1, %ebx
 ; X86-BMI1NOTBM-NEXT:    shrl %cl, %ebx
@@ -4778,8 +4778,8 @@ define i64 @bextr64_c4_commutative(i64 %
 ; X86-BMI1BMI2-NEXT:    movl %edi, %esi
 ; X86-BMI1BMI2-NEXT:    xorl %edi, %edi
 ; X86-BMI1BMI2-NEXT:  .LBB36_2:
-; X86-BMI1BMI2-NEXT:    movb $64, %cl
-; X86-BMI1BMI2-NEXT:    subb {{[0-9]+}}(%esp), %cl
+; X86-BMI1BMI2-NEXT:    movl $64, %ecx
+; X86-BMI1BMI2-NEXT:    subl {{[0-9]+}}(%esp), %ecx
 ; X86-BMI1BMI2-NEXT:    movl $-1, %ebx
 ; X86-BMI1BMI2-NEXT:    shrxl %ecx, %ebx, %ebp
 ; X86-BMI1BMI2-NEXT:    shrdl %cl, %ebx, %ebx
@@ -4814,7 +4814,7 @@ define i64 @bextr64_c4_commutative(i64 %
 ; X64-NOBMI-NEXT:    movq %rdi, %r14
 ; X64-NOBMI-NEXT:    # kill: def $cl killed $cl killed $rcx
 ; X64-NOBMI-NEXT:    shrq %cl, %r14
-; X64-NOBMI-NEXT:    negb %dl
+; X64-NOBMI-NEXT:    negl %edx
 ; X64-NOBMI-NEXT:    movq $-1, %rbx
 ; X64-NOBMI-NEXT:    movl %edx, %ecx
 ; X64-NOBMI-NEXT:    shrq %cl, %rbx
@@ -4836,7 +4836,7 @@ define i64 @bextr64_c4_commutative(i64 %
 ; X64-BMI1NOTBM-NEXT:    movq %rdi, %r14
 ; X64-BMI1NOTBM-NEXT:    # kill: def $cl killed $cl killed $rcx
 ; X64-BMI1NOTBM-NEXT:    shrq %cl, %r14
-; X64-BMI1NOTBM-NEXT:    negb %dl
+; X64-BMI1NOTBM-NEXT:    negl %edx
 ; X64-BMI1NOTBM-NEXT:    movq $-1, %rbx
 ; X64-BMI1NOTBM-NEXT:    movl %edx, %ecx
 ; X64-BMI1NOTBM-NEXT:    shrq %cl, %rbx
@@ -4857,7 +4857,7 @@ define i64 @bextr64_c4_commutative(i64 %
 ; X64-BMI1BMI2-NEXT:    movq %rdx, %rbx
 ; X64-BMI1BMI2-NEXT:    shrxq %rsi, %rdi, %r14
 ; X64-BMI1BMI2-NEXT:    movl %ebx, %eax
-; X64-BMI1BMI2-NEXT:    negb %al
+; X64-BMI1BMI2-NEXT:    negl %eax
 ; X64-BMI1BMI2-NEXT:    movq $-1, %rcx
 ; X64-BMI1BMI2-NEXT:    shrxq %rax, %rcx, %rdi
 ; X64-BMI1BMI2-NEXT:    callq use64
@@ -4894,8 +4894,8 @@ define i64 @bextr64_c5_skipextrauses(i64
 ; X86-NOBMI-NEXT:    movl %edi, %esi
 ; X86-NOBMI-NEXT:    xorl %edi, %edi
 ; X86-NOBMI-NEXT:  .LBB37_2:
-; X86-NOBMI-NEXT:    movb $64, %cl
-; X86-NOBMI-NEXT:    subb {{[0-9]+}}(%esp), %cl
+; X86-NOBMI-NEXT:    movl $64, %ecx
+; X86-NOBMI-NEXT:    subl {{[0-9]+}}(%esp), %ecx
 ; X86-NOBMI-NEXT:    movl $-1, %ebx
 ; X86-NOBMI-NEXT:    movl $-1, %ebp
 ; X86-NOBMI-NEXT:    shrl %cl, %ebp
@@ -4946,8 +4946,8 @@ define i64 @bextr64_c5_skipextrauses(i64
 ; X86-BMI1NOTBM-NEXT:    movl %edi, %esi
 ; X86-BMI1NOTBM-NEXT:    xorl %edi, %edi
 ; X86-BMI1NOTBM-NEXT:  .LBB37_2:
-; X86-BMI1NOTBM-NEXT:    movb $64, %cl
-; X86-BMI1NOTBM-NEXT:    subb {{[0-9]+}}(%esp), %cl
+; X86-BMI1NOTBM-NEXT:    movl $64, %ecx
+; X86-BMI1NOTBM-NEXT:    subl {{[0-9]+}}(%esp), %ecx
 ; X86-BMI1NOTBM-NEXT:    movl $-1, %ebx
 ; X86-BMI1NOTBM-NEXT:    movl $-1, %ebp
 ; X86-BMI1NOTBM-NEXT:    shrl %cl, %ebp
@@ -4997,8 +4997,8 @@ define i64 @bextr64_c5_skipextrauses(i64
 ; X86-BMI1BMI2-NEXT:    movl %edi, %esi
 ; X86-BMI1BMI2-NEXT:    xorl %edi, %edi
 ; X86-BMI1BMI2-NEXT:  .LBB37_2:
-; X86-BMI1BMI2-NEXT:    movb $64, %cl
-; X86-BMI1BMI2-NEXT:    subb {{[0-9]+}}(%esp), %cl
+; X86-BMI1BMI2-NEXT:    movl $64, %ecx
+; X86-BMI1BMI2-NEXT:    subl {{[0-9]+}}(%esp), %ecx
 ; X86-BMI1BMI2-NEXT:    movl $-1, %ebp
 ; X86-BMI1BMI2-NEXT:    shrxl %ecx, %ebp, %ebx
 ; X86-BMI1BMI2-NEXT:    shrdl %cl, %ebp, %ebp
@@ -5038,7 +5038,7 @@ define i64 @bextr64_c5_skipextrauses(i64
 ; X64-NOBMI-NEXT:    movq %rdi, %r15
 ; X64-NOBMI-NEXT:    movl %r14d, %ecx
 ; X64-NOBMI-NEXT:    shrq %cl, %r15
-; X64-NOBMI-NEXT:    negb %dl
+; X64-NOBMI-NEXT:    negl %edx
 ; X64-NOBMI-NEXT:    movq $-1, %rbx
 ; X64-NOBMI-NEXT:    movl %edx, %ecx
 ; X64-NOBMI-NEXT:    shrq %cl, %rbx
@@ -5062,7 +5062,7 @@ define i64 @bextr64_c5_skipextrauses(i64
 ; X64-BMI1NOTBM-NEXT:    movq %rdi, %r15
 ; X64-BMI1NOTBM-NEXT:    movl %r14d, %ecx
 ; X64-BMI1NOTBM-NEXT:    shrq %cl, %r15
-; X64-BMI1NOTBM-NEXT:    negb %dl
+; X64-BMI1NOTBM-NEXT:    negl %edx
 ; X64-BMI1NOTBM-NEXT:    movq $-1, %rbx
 ; X64-BMI1NOTBM-NEXT:    movl %edx, %ecx
 ; X64-BMI1NOTBM-NEXT:    shrq %cl, %rbx
@@ -5086,7 +5086,7 @@ define i64 @bextr64_c5_skipextrauses(i64
 ; X64-BMI1BMI2-NEXT:    movq %rsi, %r14
 ; X64-BMI1BMI2-NEXT:    shrxq %rsi, %rdi, %r15
 ; X64-BMI1BMI2-NEXT:    movl %ebx, %eax
-; X64-BMI1BMI2-NEXT:    negb %al
+; X64-BMI1BMI2-NEXT:    negl %eax
 ; X64-BMI1BMI2-NEXT:    movq $-1, %rcx
 ; X64-BMI1BMI2-NEXT:    shrxq %rax, %rcx, %rdi
 ; X64-BMI1BMI2-NEXT:    callq use64
@@ -5118,7 +5118,7 @@ define i32 @bextr32_d0(i32 %val, i32 %nu
 ; X86-NOBMI-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; X86-NOBMI-NEXT:    shrl %cl, %eax
 ; X86-NOBMI-NEXT:    xorl %ecx, %ecx
-; X86-NOBMI-NEXT:    subb {{[0-9]+}}(%esp), %cl
+; X86-NOBMI-NEXT:    subl {{[0-9]+}}(%esp), %ecx
 ; X86-NOBMI-NEXT:    shll %cl, %eax
 ; X86-NOBMI-NEXT:    # kill: def $cl killed $cl killed $ecx
 ; X86-NOBMI-NEXT:    shrl %cl, %eax
@@ -5126,16 +5126,16 @@ define i32 @bextr32_d0(i32 %val, i32 %nu
 ;
 ; X86-BMI1NOTBM-LABEL: bextr32_d0:
 ; X86-BMI1NOTBM:       # %bb.0:
-; X86-BMI1NOTBM-NEXT:    movb {{[0-9]+}}(%esp), %al
-; X86-BMI1NOTBM-NEXT:    shll $8, %eax
+; X86-BMI1NOTBM-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; X86-BMI1NOTBM-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
-; X86-BMI1NOTBM-NEXT:    orl %eax, %ecx
-; X86-BMI1NOTBM-NEXT:    bextrl %ecx, {{[0-9]+}}(%esp), %eax
+; X86-BMI1NOTBM-NEXT:    shll $8, %eax
+; X86-BMI1NOTBM-NEXT:    orl %ecx, %eax
+; X86-BMI1NOTBM-NEXT:    bextrl %eax, {{[0-9]+}}(%esp), %eax
 ; X86-BMI1NOTBM-NEXT:    retl
 ;
 ; X86-BMI1BMI2-LABEL: bextr32_d0:
 ; X86-BMI1BMI2:       # %bb.0:
-; X86-BMI1BMI2-NEXT:    movb {{[0-9]+}}(%esp), %al
+; X86-BMI1BMI2-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; X86-BMI1BMI2-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; X86-BMI1BMI2-NEXT:    shrxl %ecx, {{[0-9]+}}(%esp), %ecx
 ; X86-BMI1BMI2-NEXT:    bzhil %eax, %ecx, %eax
@@ -5147,7 +5147,7 @@ define i32 @bextr32_d0(i32 %val, i32 %nu
 ; X64-NOBMI-NEXT:    movl %edi, %eax
 ; X64-NOBMI-NEXT:    # kill: def $cl killed $cl killed $ecx
 ; X64-NOBMI-NEXT:    shrl %cl, %eax
-; X64-NOBMI-NEXT:    negb %dl
+; X64-NOBMI-NEXT:    negl %edx
 ; X64-NOBMI-NEXT:    movl %edx, %ecx
 ; X64-NOBMI-NEXT:    shll %cl, %eax
 ; X64-NOBMI-NEXT:    shrl %cl, %eax
@@ -5245,7 +5245,7 @@ define i32 @bextr32_d2_load(i32* %w, i32
 ; X86-NOBMI-NEXT:    movl (%eax), %eax
 ; X86-NOBMI-NEXT:    shrl %cl, %eax
 ; X86-NOBMI-NEXT:    xorl %ecx, %ecx
-; X86-NOBMI-NEXT:    subb {{[0-9]+}}(%esp), %cl
+; X86-NOBMI-NEXT:    subl {{[0-9]+}}(%esp), %ecx
 ; X86-NOBMI-NEXT:    shll %cl, %eax
 ; X86-NOBMI-NEXT:    # kill: def $cl killed $cl killed $ecx
 ; X86-NOBMI-NEXT:    shrl %cl, %eax
@@ -5254,16 +5254,16 @@ define i32 @bextr32_d2_load(i32* %w, i32
 ; X86-BMI1NOTBM-LABEL: bextr32_d2_load:
 ; X86-BMI1NOTBM:       # %bb.0:
 ; X86-BMI1NOTBM-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X86-BMI1NOTBM-NEXT:    movb {{[0-9]+}}(%esp), %cl
-; X86-BMI1NOTBM-NEXT:    shll $8, %ecx
+; X86-BMI1NOTBM-NEXT:    movl {{[0-9]+}}(%esp), %ecx
 ; X86-BMI1NOTBM-NEXT:    movzbl {{[0-9]+}}(%esp), %edx
-; X86-BMI1NOTBM-NEXT:    orl %ecx, %edx
-; X86-BMI1NOTBM-NEXT:    bextrl %edx, (%eax), %eax
+; X86-BMI1NOTBM-NEXT:    shll $8, %ecx
+; X86-BMI1NOTBM-NEXT:    orl %edx, %ecx
+; X86-BMI1NOTBM-NEXT:    bextrl %ecx, (%eax), %eax
 ; X86-BMI1NOTBM-NEXT:    retl
 ;
 ; X86-BMI1BMI2-LABEL: bextr32_d2_load:
 ; X86-BMI1BMI2:       # %bb.0:
-; X86-BMI1BMI2-NEXT:    movb {{[0-9]+}}(%esp), %al
+; X86-BMI1BMI2-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; X86-BMI1BMI2-NEXT:    movl {{[0-9]+}}(%esp), %ecx
 ; X86-BMI1BMI2-NEXT:    movb {{[0-9]+}}(%esp), %dl
 ; X86-BMI1BMI2-NEXT:    shrxl %edx, (%ecx), %ecx
@@ -5276,7 +5276,7 @@ define i32 @bextr32_d2_load(i32* %w, i32
 ; X64-NOBMI-NEXT:    movl (%rdi), %eax
 ; X64-NOBMI-NEXT:    # kill: def $cl killed $cl killed $ecx
 ; X64-NOBMI-NEXT:    shrl %cl, %eax
-; X64-NOBMI-NEXT:    negb %dl
+; X64-NOBMI-NEXT:    negl %edx
 ; X64-NOBMI-NEXT:    movl %edx, %ecx
 ; X64-NOBMI-NEXT:    shll %cl, %eax
 ; X64-NOBMI-NEXT:    shrl %cl, %eax
@@ -5381,7 +5381,7 @@ define i32 @bextr32_d5_skipextrauses(i32
 ; X86-NOBMI-NEXT:    movl %eax, %ecx
 ; X86-NOBMI-NEXT:    shrl %cl, %esi
 ; X86-NOBMI-NEXT:    xorl %ecx, %ecx
-; X86-NOBMI-NEXT:    subb {{[0-9]+}}(%esp), %cl
+; X86-NOBMI-NEXT:    subl {{[0-9]+}}(%esp), %ecx
 ; X86-NOBMI-NEXT:    shll %cl, %esi
 ; X86-NOBMI-NEXT:    # kill: def $cl killed $cl killed $ecx
 ; X86-NOBMI-NEXT:    shrl %cl, %esi
@@ -5396,7 +5396,7 @@ define i32 @bextr32_d5_skipextrauses(i32
 ; X86-BMI1NOTBM:       # %bb.0:
 ; X86-BMI1NOTBM-NEXT:    pushl %esi
 ; X86-BMI1NOTBM-NEXT:    subl $8, %esp
-; X86-BMI1NOTBM-NEXT:    movb {{[0-9]+}}(%esp), %cl
+; X86-BMI1NOTBM-NEXT:    movl {{[0-9]+}}(%esp), %ecx
 ; X86-BMI1NOTBM-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; X86-BMI1NOTBM-NEXT:    shll $8, %ecx
 ; X86-BMI1NOTBM-NEXT:    movzbl %al, %edx
@@ -5413,7 +5413,7 @@ define i32 @bextr32_d5_skipextrauses(i32
 ; X86-BMI1BMI2:       # %bb.0:
 ; X86-BMI1BMI2-NEXT:    pushl %esi
 ; X86-BMI1BMI2-NEXT:    subl $8, %esp
-; X86-BMI1BMI2-NEXT:    movb {{[0-9]+}}(%esp), %al
+; X86-BMI1BMI2-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; X86-BMI1BMI2-NEXT:    movl {{[0-9]+}}(%esp), %ecx
 ; X86-BMI1BMI2-NEXT:    shrxl %ecx, {{[0-9]+}}(%esp), %edx
 ; X86-BMI1BMI2-NEXT:    bzhil %eax, %edx, %esi
@@ -5430,7 +5430,7 @@ define i32 @bextr32_d5_skipextrauses(i32
 ; X64-NOBMI-NEXT:    movl %edi, %ebx
 ; X64-NOBMI-NEXT:    movl %esi, %ecx
 ; X64-NOBMI-NEXT:    shrl %cl, %ebx
-; X64-NOBMI-NEXT:    negb %dl
+; X64-NOBMI-NEXT:    negl %edx
 ; X64-NOBMI-NEXT:    movl %edx, %ecx
 ; X64-NOBMI-NEXT:    shll %cl, %ebx
 ; X64-NOBMI-NEXT:    shrl %cl, %ebx
@@ -5492,8 +5492,8 @@ define i64 @bextr64_d0(i64 %val, i64 %nu
 ; X86-NOBMI-NEXT:    movl %eax, %edi
 ; X86-NOBMI-NEXT:    xorl %eax, %eax
 ; X86-NOBMI-NEXT:  .LBB43_2:
-; X86-NOBMI-NEXT:    movb $64, %cl
-; X86-NOBMI-NEXT:    subb {{[0-9]+}}(%esp), %cl
+; X86-NOBMI-NEXT:    movl $64, %ecx
+; X86-NOBMI-NEXT:    subl {{[0-9]+}}(%esp), %ecx
 ; X86-NOBMI-NEXT:    shldl %cl, %edi, %eax
 ; X86-NOBMI-NEXT:    shll %cl, %edi
 ; X86-NOBMI-NEXT:    testb $32, %cl
@@ -5540,8 +5540,8 @@ define i64 @bextr64_d0(i64 %val, i64 %nu
 ; X86-BMI1NOTBM-NEXT:    movl %eax, %edi
 ; X86-BMI1NOTBM-NEXT:    xorl %eax, %eax
 ; X86-BMI1NOTBM-NEXT:  .LBB43_2:
-; X86-BMI1NOTBM-NEXT:    movb $64, %cl
-; X86-BMI1NOTBM-NEXT:    subb {{[0-9]+}}(%esp), %cl
+; X86-BMI1NOTBM-NEXT:    movl $64, %ecx
+; X86-BMI1NOTBM-NEXT:    subl {{[0-9]+}}(%esp), %ecx
 ; X86-BMI1NOTBM-NEXT:    shldl %cl, %edi, %eax
 ; X86-BMI1NOTBM-NEXT:    shll %cl, %edi
 ; X86-BMI1NOTBM-NEXT:    testb $32, %cl
@@ -5586,8 +5586,8 @@ define i64 @bextr64_d0(i64 %val, i64 %nu
 ; X86-BMI1BMI2-NEXT:    movl %esi, %eax
 ; X86-BMI1BMI2-NEXT:    xorl %esi, %esi
 ; X86-BMI1BMI2-NEXT:  .LBB43_2:
-; X86-BMI1BMI2-NEXT:    movb $64, %cl
-; X86-BMI1BMI2-NEXT:    subb {{[0-9]+}}(%esp), %cl
+; X86-BMI1BMI2-NEXT:    movl $64, %ecx
+; X86-BMI1BMI2-NEXT:    subl {{[0-9]+}}(%esp), %ecx
 ; X86-BMI1BMI2-NEXT:    shldl %cl, %eax, %esi
 ; X86-BMI1BMI2-NEXT:    shlxl %ecx, %eax, %edi
 ; X86-BMI1BMI2-NEXT:    testb $32, %cl
@@ -5617,7 +5617,7 @@ define i64 @bextr64_d0(i64 %val, i64 %nu
 ; X64-NOBMI-NEXT:    movq %rdi, %rax
 ; X64-NOBMI-NEXT:    # kill: def $cl killed $cl killed $rcx
 ; X64-NOBMI-NEXT:    shrq %cl, %rax
-; X64-NOBMI-NEXT:    negb %dl
+; X64-NOBMI-NEXT:    negl %edx
 ; X64-NOBMI-NEXT:    movl %edx, %ecx
 ; X64-NOBMI-NEXT:    shlq %cl, %rax
 ; X64-NOBMI-NEXT:    shrq %cl, %rax
@@ -5838,8 +5838,8 @@ define i64 @bextr64_d2_load(i64* %w, i64
 ; X86-NOBMI-NEXT:    movl %eax, %edi
 ; X86-NOBMI-NEXT:    xorl %eax, %eax
 ; X86-NOBMI-NEXT:  .LBB45_2:
-; X86-NOBMI-NEXT:    movb $64, %cl
-; X86-NOBMI-NEXT:    subb {{[0-9]+}}(%esp), %cl
+; X86-NOBMI-NEXT:    movl $64, %ecx
+; X86-NOBMI-NEXT:    subl {{[0-9]+}}(%esp), %ecx
 ; X86-NOBMI-NEXT:    shldl %cl, %edi, %eax
 ; X86-NOBMI-NEXT:    shll %cl, %edi
 ; X86-NOBMI-NEXT:    testb $32, %cl
@@ -5887,8 +5887,8 @@ define i64 @bextr64_d2_load(i64* %w, i64
 ; X86-BMI1NOTBM-NEXT:    movl %eax, %edi
 ; X86-BMI1NOTBM-NEXT:    xorl %eax, %eax
 ; X86-BMI1NOTBM-NEXT:  .LBB45_2:
-; X86-BMI1NOTBM-NEXT:    movb $64, %cl
-; X86-BMI1NOTBM-NEXT:    subb {{[0-9]+}}(%esp), %cl
+; X86-BMI1NOTBM-NEXT:    movl $64, %ecx
+; X86-BMI1NOTBM-NEXT:    subl {{[0-9]+}}(%esp), %ecx
 ; X86-BMI1NOTBM-NEXT:    shldl %cl, %edi, %eax
 ; X86-BMI1NOTBM-NEXT:    shll %cl, %edi
 ; X86-BMI1NOTBM-NEXT:    testb $32, %cl
@@ -5934,8 +5934,8 @@ define i64 @bextr64_d2_load(i64* %w, i64
 ; X86-BMI1BMI2-NEXT:    movl %esi, %eax
 ; X86-BMI1BMI2-NEXT:    xorl %esi, %esi
 ; X86-BMI1BMI2-NEXT:  .LBB45_2:
-; X86-BMI1BMI2-NEXT:    movb $64, %cl
-; X86-BMI1BMI2-NEXT:    subb {{[0-9]+}}(%esp), %cl
+; X86-BMI1BMI2-NEXT:    movl $64, %ecx
+; X86-BMI1BMI2-NEXT:    subl {{[0-9]+}}(%esp), %ecx
 ; X86-BMI1BMI2-NEXT:    shldl %cl, %eax, %esi
 ; X86-BMI1BMI2-NEXT:    shlxl %ecx, %eax, %edi
 ; X86-BMI1BMI2-NEXT:    testb $32, %cl
@@ -5965,7 +5965,7 @@ define i64 @bextr64_d2_load(i64* %w, i64
 ; X64-NOBMI-NEXT:    movq (%rdi), %rax
 ; X64-NOBMI-NEXT:    # kill: def $cl killed $cl killed $rcx
 ; X64-NOBMI-NEXT:    shrq %cl, %rax
-; X64-NOBMI-NEXT:    negb %dl
+; X64-NOBMI-NEXT:    negl %edx
 ; X64-NOBMI-NEXT:    movl %edx, %ecx
 ; X64-NOBMI-NEXT:    shlq %cl, %rax
 ; X64-NOBMI-NEXT:    shrq %cl, %rax
@@ -6193,8 +6193,8 @@ define i64 @bextr64_d5_skipextrauses(i64
 ; X86-NOBMI-NEXT:    movl %esi, %ebx
 ; X86-NOBMI-NEXT:    xorl %esi, %esi
 ; X86-NOBMI-NEXT:  .LBB47_2:
-; X86-NOBMI-NEXT:    movb $64, %cl
-; X86-NOBMI-NEXT:    subb {{[0-9]+}}(%esp), %cl
+; X86-NOBMI-NEXT:    movl $64, %ecx
+; X86-NOBMI-NEXT:    subl {{[0-9]+}}(%esp), %ecx
 ; X86-NOBMI-NEXT:    shldl %cl, %ebx, %esi
 ; X86-NOBMI-NEXT:    shll %cl, %ebx
 ; X86-NOBMI-NEXT:    testb $32, %cl
@@ -6254,8 +6254,8 @@ define i64 @bextr64_d5_skipextrauses(i64
 ; X86-BMI1NOTBM-NEXT:    movl %esi, %ebx
 ; X86-BMI1NOTBM-NEXT:    xorl %esi, %esi
 ; X86-BMI1NOTBM-NEXT:  .LBB47_2:
-; X86-BMI1NOTBM-NEXT:    movb $64, %cl
-; X86-BMI1NOTBM-NEXT:    subb {{[0-9]+}}(%esp), %cl
+; X86-BMI1NOTBM-NEXT:    movl $64, %ecx
+; X86-BMI1NOTBM-NEXT:    subl {{[0-9]+}}(%esp), %ecx
 ; X86-BMI1NOTBM-NEXT:    shldl %cl, %ebx, %esi
 ; X86-BMI1NOTBM-NEXT:    shll %cl, %ebx
 ; X86-BMI1NOTBM-NEXT:    testb $32, %cl
@@ -6312,8 +6312,8 @@ define i64 @bextr64_d5_skipextrauses(i64
 ; X86-BMI1BMI2-NEXT:    movl %edx, %edi
 ; X86-BMI1BMI2-NEXT:    xorl %edx, %edx
 ; X86-BMI1BMI2-NEXT:  .LBB47_2:
-; X86-BMI1BMI2-NEXT:    movb $64, %cl
-; X86-BMI1BMI2-NEXT:    subb {{[0-9]+}}(%esp), %cl
+; X86-BMI1BMI2-NEXT:    movl $64, %ecx
+; X86-BMI1BMI2-NEXT:    subl {{[0-9]+}}(%esp), %ecx
 ; X86-BMI1BMI2-NEXT:    shldl %cl, %edi, %edx
 ; X86-BMI1BMI2-NEXT:    shlxl %ecx, %edi, %ebx
 ; X86-BMI1BMI2-NEXT:    testb $32, %cl
@@ -6352,7 +6352,7 @@ define i64 @bextr64_d5_skipextrauses(i64
 ; X64-NOBMI-NEXT:    movq %rdi, %rbx
 ; X64-NOBMI-NEXT:    movl %esi, %ecx
 ; X64-NOBMI-NEXT:    shrq %cl, %rbx
-; X64-NOBMI-NEXT:    negb %dl
+; X64-NOBMI-NEXT:    negl %edx
 ; X64-NOBMI-NEXT:    movl %edx, %ecx
 ; X64-NOBMI-NEXT:    shlq %cl, %rbx
 ; X64-NOBMI-NEXT:    shrq %cl, %rbx

Modified: llvm/trunk/test/CodeGen/X86/extract-lowbits.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/extract-lowbits.ll?rev=348604&r1=348603&r2=348604&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/extract-lowbits.ll (original)
+++ llvm/trunk/test/CodeGen/X86/extract-lowbits.ll Fri Dec  7 07:47:52 2018
@@ -1436,7 +1436,7 @@ define i32 @bzhi32_c0(i32 %val, i32 %num
 ; X86-NOBMI-NEXT:    pushl %esi
 ; X86-NOBMI-NEXT:    subl $8, %esp
 ; X86-NOBMI-NEXT:    xorl %ecx, %ecx
-; X86-NOBMI-NEXT:    subb {{[0-9]+}}(%esp), %cl
+; X86-NOBMI-NEXT:    subl {{[0-9]+}}(%esp), %ecx
 ; X86-NOBMI-NEXT:    movl $-1, %esi
 ; X86-NOBMI-NEXT:    # kill: def $cl killed $cl killed $ecx
 ; X86-NOBMI-NEXT:    shrl %cl, %esi
@@ -1453,7 +1453,7 @@ define i32 @bzhi32_c0(i32 %val, i32 %num
 ; X86-BMI1NOTBM-NEXT:    pushl %esi
 ; X86-BMI1NOTBM-NEXT:    subl $8, %esp
 ; X86-BMI1NOTBM-NEXT:    xorl %ecx, %ecx
-; X86-BMI1NOTBM-NEXT:    subb {{[0-9]+}}(%esp), %cl
+; X86-BMI1NOTBM-NEXT:    subl {{[0-9]+}}(%esp), %ecx
 ; X86-BMI1NOTBM-NEXT:    movl $-1, %esi
 ; X86-BMI1NOTBM-NEXT:    # kill: def $cl killed $cl killed $ecx
 ; X86-BMI1NOTBM-NEXT:    shrl %cl, %esi
@@ -1467,18 +1467,18 @@ define i32 @bzhi32_c0(i32 %val, i32 %num
 ;
 ; X86-BMI1BMI2-LABEL: bzhi32_c0:
 ; X86-BMI1BMI2:       # %bb.0:
-; X86-BMI1BMI2-NEXT:    pushl %ebx
+; X86-BMI1BMI2-NEXT:    pushl %esi
 ; X86-BMI1BMI2-NEXT:    subl $8, %esp
-; X86-BMI1BMI2-NEXT:    movb {{[0-9]+}}(%esp), %bl
-; X86-BMI1BMI2-NEXT:    movl %ebx, %eax
-; X86-BMI1BMI2-NEXT:    negb %al
+; X86-BMI1BMI2-NEXT:    movl {{[0-9]+}}(%esp), %esi
+; X86-BMI1BMI2-NEXT:    movl %esi, %eax
+; X86-BMI1BMI2-NEXT:    negl %eax
 ; X86-BMI1BMI2-NEXT:    movl $-1, %ecx
 ; X86-BMI1BMI2-NEXT:    shrxl %eax, %ecx, %eax
 ; X86-BMI1BMI2-NEXT:    movl %eax, (%esp)
 ; X86-BMI1BMI2-NEXT:    calll use32
-; X86-BMI1BMI2-NEXT:    bzhil %ebx, {{[0-9]+}}(%esp), %eax
+; X86-BMI1BMI2-NEXT:    bzhil %esi, {{[0-9]+}}(%esp), %eax
 ; X86-BMI1BMI2-NEXT:    addl $8, %esp
-; X86-BMI1BMI2-NEXT:    popl %ebx
+; X86-BMI1BMI2-NEXT:    popl %esi
 ; X86-BMI1BMI2-NEXT:    retl
 ;
 ; X64-NOBMI-LABEL: bzhi32_c0:
@@ -1488,7 +1488,7 @@ define i32 @bzhi32_c0(i32 %val, i32 %num
 ; X64-NOBMI-NEXT:    pushq %rax
 ; X64-NOBMI-NEXT:    movl %esi, %ecx
 ; X64-NOBMI-NEXT:    movl %edi, %ebx
-; X64-NOBMI-NEXT:    negb %cl
+; X64-NOBMI-NEXT:    negl %ecx
 ; X64-NOBMI-NEXT:    movl $-1, %ebp
 ; X64-NOBMI-NEXT:    # kill: def $cl killed $cl killed $ecx
 ; X64-NOBMI-NEXT:    shrl %cl, %ebp
@@ -1508,7 +1508,7 @@ define i32 @bzhi32_c0(i32 %val, i32 %num
 ; X64-BMI1NOTBM-NEXT:    pushq %rax
 ; X64-BMI1NOTBM-NEXT:    movl %esi, %ecx
 ; X64-BMI1NOTBM-NEXT:    movl %edi, %ebx
-; X64-BMI1NOTBM-NEXT:    negb %cl
+; X64-BMI1NOTBM-NEXT:    negl %ecx
 ; X64-BMI1NOTBM-NEXT:    movl $-1, %ebp
 ; X64-BMI1NOTBM-NEXT:    # kill: def $cl killed $cl killed $ecx
 ; X64-BMI1NOTBM-NEXT:    shrl %cl, %ebp
@@ -1528,8 +1528,8 @@ define i32 @bzhi32_c0(i32 %val, i32 %num
 ; X64-BMI1BMI2-NEXT:    pushq %rax
 ; X64-BMI1BMI2-NEXT:    movl %esi, %ebx
 ; X64-BMI1BMI2-NEXT:    movl %edi, %ebp
-; X64-BMI1BMI2-NEXT:    movl %ebx, %eax
-; X64-BMI1BMI2-NEXT:    negb %al
+; X64-BMI1BMI2-NEXT:    movl %esi, %eax
+; X64-BMI1BMI2-NEXT:    negl %eax
 ; X64-BMI1BMI2-NEXT:    movl $-1, %ecx
 ; X64-BMI1BMI2-NEXT:    shrxl %eax, %ecx, %edi
 ; X64-BMI1BMI2-NEXT:    callq use32
@@ -1668,7 +1668,7 @@ define i32 @bzhi32_c2_load(i32* %w, i32
 ; X86-NOBMI-NEXT:    subl $8, %esp
 ; X86-NOBMI-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; X86-NOBMI-NEXT:    xorl %ecx, %ecx
-; X86-NOBMI-NEXT:    subb {{[0-9]+}}(%esp), %cl
+; X86-NOBMI-NEXT:    subl {{[0-9]+}}(%esp), %ecx
 ; X86-NOBMI-NEXT:    movl $-1, %edx
 ; X86-NOBMI-NEXT:    # kill: def $cl killed $cl killed $ecx
 ; X86-NOBMI-NEXT:    shrl %cl, %edx
@@ -1687,7 +1687,7 @@ define i32 @bzhi32_c2_load(i32* %w, i32
 ; X86-BMI1NOTBM-NEXT:    subl $8, %esp
 ; X86-BMI1NOTBM-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; X86-BMI1NOTBM-NEXT:    xorl %ecx, %ecx
-; X86-BMI1NOTBM-NEXT:    subb {{[0-9]+}}(%esp), %cl
+; X86-BMI1NOTBM-NEXT:    subl {{[0-9]+}}(%esp), %ecx
 ; X86-BMI1NOTBM-NEXT:    movl $-1, %edx
 ; X86-BMI1NOTBM-NEXT:    # kill: def $cl killed $cl killed $ecx
 ; X86-BMI1NOTBM-NEXT:    shrl %cl, %edx
@@ -1705,10 +1705,9 @@ define i32 @bzhi32_c2_load(i32* %w, i32
 ; X86-BMI1BMI2-NEXT:    pushl %esi
 ; X86-BMI1BMI2-NEXT:    subl $8, %esp
 ; X86-BMI1BMI2-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X86-BMI1BMI2-NEXT:    movb {{[0-9]+}}(%esp), %cl
+; X86-BMI1BMI2-NEXT:    movl {{[0-9]+}}(%esp), %ecx
 ; X86-BMI1BMI2-NEXT:    bzhil %ecx, (%eax), %esi
-; X86-BMI1BMI2-NEXT:    # kill: def $cl killed $cl killed $ecx def $ecx
-; X86-BMI1BMI2-NEXT:    negb %cl
+; X86-BMI1BMI2-NEXT:    negl %ecx
 ; X86-BMI1BMI2-NEXT:    movl $-1, %eax
 ; X86-BMI1BMI2-NEXT:    shrxl %ecx, %eax, %eax
 ; X86-BMI1BMI2-NEXT:    movl %eax, (%esp)
@@ -1722,7 +1721,7 @@ define i32 @bzhi32_c2_load(i32* %w, i32
 ; X64-NOBMI:       # %bb.0:
 ; X64-NOBMI-NEXT:    pushq %rbx
 ; X64-NOBMI-NEXT:    movl %esi, %ecx
-; X64-NOBMI-NEXT:    negb %cl
+; X64-NOBMI-NEXT:    negl %ecx
 ; X64-NOBMI-NEXT:    movl $-1, %eax
 ; X64-NOBMI-NEXT:    # kill: def $cl killed $cl killed $ecx
 ; X64-NOBMI-NEXT:    shrl %cl, %eax
@@ -1738,7 +1737,7 @@ define i32 @bzhi32_c2_load(i32* %w, i32
 ; X64-BMI1NOTBM:       # %bb.0:
 ; X64-BMI1NOTBM-NEXT:    pushq %rbx
 ; X64-BMI1NOTBM-NEXT:    movl %esi, %ecx
-; X64-BMI1NOTBM-NEXT:    negb %cl
+; X64-BMI1NOTBM-NEXT:    negl %ecx
 ; X64-BMI1NOTBM-NEXT:    movl $-1, %eax
 ; X64-BMI1NOTBM-NEXT:    # kill: def $cl killed $cl killed $ecx
 ; X64-BMI1NOTBM-NEXT:    shrl %cl, %eax
@@ -1754,8 +1753,7 @@ define i32 @bzhi32_c2_load(i32* %w, i32
 ; X64-BMI1BMI2:       # %bb.0:
 ; X64-BMI1BMI2-NEXT:    pushq %rbx
 ; X64-BMI1BMI2-NEXT:    bzhil %esi, (%rdi), %ebx
-; X64-BMI1BMI2-NEXT:    # kill: def $sil killed $sil killed $esi def $esi
-; X64-BMI1BMI2-NEXT:    negb %sil
+; X64-BMI1BMI2-NEXT:    negl %esi
 ; X64-BMI1BMI2-NEXT:    movl $-1, %eax
 ; X64-BMI1BMI2-NEXT:    shrxl %esi, %eax, %edi
 ; X64-BMI1BMI2-NEXT:    callq use32
@@ -1886,7 +1884,7 @@ define i32 @bzhi32_c4_commutative(i32 %v
 ; X86-NOBMI-NEXT:    pushl %esi
 ; X86-NOBMI-NEXT:    subl $8, %esp
 ; X86-NOBMI-NEXT:    xorl %ecx, %ecx
-; X86-NOBMI-NEXT:    subb {{[0-9]+}}(%esp), %cl
+; X86-NOBMI-NEXT:    subl {{[0-9]+}}(%esp), %ecx
 ; X86-NOBMI-NEXT:    movl $-1, %esi
 ; X86-NOBMI-NEXT:    # kill: def $cl killed $cl killed $ecx
 ; X86-NOBMI-NEXT:    shrl %cl, %esi
@@ -1903,7 +1901,7 @@ define i32 @bzhi32_c4_commutative(i32 %v
 ; X86-BMI1NOTBM-NEXT:    pushl %esi
 ; X86-BMI1NOTBM-NEXT:    subl $8, %esp
 ; X86-BMI1NOTBM-NEXT:    xorl %ecx, %ecx
-; X86-BMI1NOTBM-NEXT:    subb {{[0-9]+}}(%esp), %cl
+; X86-BMI1NOTBM-NEXT:    subl {{[0-9]+}}(%esp), %ecx
 ; X86-BMI1NOTBM-NEXT:    movl $-1, %esi
 ; X86-BMI1NOTBM-NEXT:    # kill: def $cl killed $cl killed $ecx
 ; X86-BMI1NOTBM-NEXT:    shrl %cl, %esi
@@ -1917,18 +1915,18 @@ define i32 @bzhi32_c4_commutative(i32 %v
 ;
 ; X86-BMI1BMI2-LABEL: bzhi32_c4_commutative:
 ; X86-BMI1BMI2:       # %bb.0:
-; X86-BMI1BMI2-NEXT:    pushl %ebx
+; X86-BMI1BMI2-NEXT:    pushl %esi
 ; X86-BMI1BMI2-NEXT:    subl $8, %esp
-; X86-BMI1BMI2-NEXT:    movb {{[0-9]+}}(%esp), %bl
-; X86-BMI1BMI2-NEXT:    movl %ebx, %eax
-; X86-BMI1BMI2-NEXT:    negb %al
+; X86-BMI1BMI2-NEXT:    movl {{[0-9]+}}(%esp), %esi
+; X86-BMI1BMI2-NEXT:    movl %esi, %eax
+; X86-BMI1BMI2-NEXT:    negl %eax
 ; X86-BMI1BMI2-NEXT:    movl $-1, %ecx
 ; X86-BMI1BMI2-NEXT:    shrxl %eax, %ecx, %eax
 ; X86-BMI1BMI2-NEXT:    movl %eax, (%esp)
 ; X86-BMI1BMI2-NEXT:    calll use32
-; X86-BMI1BMI2-NEXT:    bzhil %ebx, {{[0-9]+}}(%esp), %eax
+; X86-BMI1BMI2-NEXT:    bzhil %esi, {{[0-9]+}}(%esp), %eax
 ; X86-BMI1BMI2-NEXT:    addl $8, %esp
-; X86-BMI1BMI2-NEXT:    popl %ebx
+; X86-BMI1BMI2-NEXT:    popl %esi
 ; X86-BMI1BMI2-NEXT:    retl
 ;
 ; X64-NOBMI-LABEL: bzhi32_c4_commutative:
@@ -1938,7 +1936,7 @@ define i32 @bzhi32_c4_commutative(i32 %v
 ; X64-NOBMI-NEXT:    pushq %rax
 ; X64-NOBMI-NEXT:    movl %esi, %ecx
 ; X64-NOBMI-NEXT:    movl %edi, %ebx
-; X64-NOBMI-NEXT:    negb %cl
+; X64-NOBMI-NEXT:    negl %ecx
 ; X64-NOBMI-NEXT:    movl $-1, %ebp
 ; X64-NOBMI-NEXT:    # kill: def $cl killed $cl killed $ecx
 ; X64-NOBMI-NEXT:    shrl %cl, %ebp
@@ -1958,7 +1956,7 @@ define i32 @bzhi32_c4_commutative(i32 %v
 ; X64-BMI1NOTBM-NEXT:    pushq %rax
 ; X64-BMI1NOTBM-NEXT:    movl %esi, %ecx
 ; X64-BMI1NOTBM-NEXT:    movl %edi, %ebx
-; X64-BMI1NOTBM-NEXT:    negb %cl
+; X64-BMI1NOTBM-NEXT:    negl %ecx
 ; X64-BMI1NOTBM-NEXT:    movl $-1, %ebp
 ; X64-BMI1NOTBM-NEXT:    # kill: def $cl killed $cl killed $ecx
 ; X64-BMI1NOTBM-NEXT:    shrl %cl, %ebp
@@ -1978,8 +1976,8 @@ define i32 @bzhi32_c4_commutative(i32 %v
 ; X64-BMI1BMI2-NEXT:    pushq %rax
 ; X64-BMI1BMI2-NEXT:    movl %esi, %ebx
 ; X64-BMI1BMI2-NEXT:    movl %edi, %ebp
-; X64-BMI1BMI2-NEXT:    movl %ebx, %eax
-; X64-BMI1BMI2-NEXT:    negb %al
+; X64-BMI1BMI2-NEXT:    movl %esi, %eax
+; X64-BMI1BMI2-NEXT:    negl %eax
 ; X64-BMI1BMI2-NEXT:    movl $-1, %ecx
 ; X64-BMI1BMI2-NEXT:    shrxl %eax, %ecx, %edi
 ; X64-BMI1BMI2-NEXT:    callq use32
@@ -2005,8 +2003,8 @@ define i64 @bzhi64_c0(i64 %val, i64 %num
 ; X86-NOBMI-NEXT:    pushl %edi
 ; X86-NOBMI-NEXT:    pushl %esi
 ; X86-NOBMI-NEXT:    pushl %eax
-; X86-NOBMI-NEXT:    movb $64, %cl
-; X86-NOBMI-NEXT:    subb {{[0-9]+}}(%esp), %cl
+; X86-NOBMI-NEXT:    movl $64, %ecx
+; X86-NOBMI-NEXT:    subl {{[0-9]+}}(%esp), %ecx
 ; X86-NOBMI-NEXT:    movl $-1, %esi
 ; X86-NOBMI-NEXT:    movl $-1, %edi
 ; X86-NOBMI-NEXT:    shrl %cl, %edi
@@ -2036,8 +2034,8 @@ define i64 @bzhi64_c0(i64 %val, i64 %num
 ; X86-BMI1NOTBM-NEXT:    pushl %edi
 ; X86-BMI1NOTBM-NEXT:    pushl %esi
 ; X86-BMI1NOTBM-NEXT:    pushl %eax
-; X86-BMI1NOTBM-NEXT:    movb $64, %cl
-; X86-BMI1NOTBM-NEXT:    subb {{[0-9]+}}(%esp), %cl
+; X86-BMI1NOTBM-NEXT:    movl $64, %ecx
+; X86-BMI1NOTBM-NEXT:    subl {{[0-9]+}}(%esp), %ecx
 ; X86-BMI1NOTBM-NEXT:    movl $-1, %esi
 ; X86-BMI1NOTBM-NEXT:    movl $-1, %edi
 ; X86-BMI1NOTBM-NEXT:    shrl %cl, %edi
@@ -2067,8 +2065,8 @@ define i64 @bzhi64_c0(i64 %val, i64 %num
 ; X86-BMI1BMI2-NEXT:    pushl %edi
 ; X86-BMI1BMI2-NEXT:    pushl %esi
 ; X86-BMI1BMI2-NEXT:    pushl %eax
-; X86-BMI1BMI2-NEXT:    movb $64, %cl
-; X86-BMI1BMI2-NEXT:    subb {{[0-9]+}}(%esp), %cl
+; X86-BMI1BMI2-NEXT:    movl $64, %ecx
+; X86-BMI1BMI2-NEXT:    subl {{[0-9]+}}(%esp), %ecx
 ; X86-BMI1BMI2-NEXT:    movl $-1, %esi
 ; X86-BMI1BMI2-NEXT:    shrxl %ecx, %esi, %edi
 ; X86-BMI1BMI2-NEXT:    shrdl %cl, %esi, %esi
@@ -2099,7 +2097,7 @@ define i64 @bzhi64_c0(i64 %val, i64 %num
 ; X64-NOBMI-NEXT:    pushq %rax
 ; X64-NOBMI-NEXT:    movq %rsi, %rcx
 ; X64-NOBMI-NEXT:    movq %rdi, %r14
-; X64-NOBMI-NEXT:    negb %cl
+; X64-NOBMI-NEXT:    negl %ecx
 ; X64-NOBMI-NEXT:    movq $-1, %rbx
 ; X64-NOBMI-NEXT:    # kill: def $cl killed $cl killed $rcx
 ; X64-NOBMI-NEXT:    shrq %cl, %rbx
@@ -2119,7 +2117,7 @@ define i64 @bzhi64_c0(i64 %val, i64 %num
 ; X64-BMI1NOTBM-NEXT:    pushq %rax
 ; X64-BMI1NOTBM-NEXT:    movq %rsi, %rcx
 ; X64-BMI1NOTBM-NEXT:    movq %rdi, %r14
-; X64-BMI1NOTBM-NEXT:    negb %cl
+; X64-BMI1NOTBM-NEXT:    negl %ecx
 ; X64-BMI1NOTBM-NEXT:    movq $-1, %rbx
 ; X64-BMI1NOTBM-NEXT:    # kill: def $cl killed $cl killed $rcx
 ; X64-BMI1NOTBM-NEXT:    shrq %cl, %rbx
@@ -2140,7 +2138,7 @@ define i64 @bzhi64_c0(i64 %val, i64 %num
 ; X64-BMI1BMI2-NEXT:    movq %rsi, %rbx
 ; X64-BMI1BMI2-NEXT:    movq %rdi, %r14
 ; X64-BMI1BMI2-NEXT:    movl %ebx, %eax
-; X64-BMI1BMI2-NEXT:    negb %al
+; X64-BMI1BMI2-NEXT:    negl %eax
 ; X64-BMI1BMI2-NEXT:    movq $-1, %rcx
 ; X64-BMI1BMI2-NEXT:    shrxq %rax, %rcx, %rdi
 ; X64-BMI1BMI2-NEXT:    callq use64
@@ -2320,26 +2318,26 @@ define i64 @bzhi64_c2_load(i64* %w, i64
 ; X86-NOBMI-NEXT:    pushl %ebx
 ; X86-NOBMI-NEXT:    pushl %edi
 ; X86-NOBMI-NEXT:    pushl %esi
-; X86-NOBMI-NEXT:    movl {{[0-9]+}}(%esp), %edx
-; X86-NOBMI-NEXT:    movb $64, %cl
-; X86-NOBMI-NEXT:    subb {{[0-9]+}}(%esp), %cl
-; X86-NOBMI-NEXT:    movl $-1, %eax
+; X86-NOBMI-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NOBMI-NEXT:    movl $64, %ecx
+; X86-NOBMI-NEXT:    subl {{[0-9]+}}(%esp), %ecx
+; X86-NOBMI-NEXT:    movl $-1, %edx
 ; X86-NOBMI-NEXT:    movl $-1, %ebx
 ; X86-NOBMI-NEXT:    shrl %cl, %ebx
-; X86-NOBMI-NEXT:    shrdl %cl, %eax, %eax
+; X86-NOBMI-NEXT:    shrdl %cl, %edx, %edx
 ; X86-NOBMI-NEXT:    testb $32, %cl
 ; X86-NOBMI-NEXT:    je .LBB27_2
 ; X86-NOBMI-NEXT:  # %bb.1:
-; X86-NOBMI-NEXT:    movl %ebx, %eax
+; X86-NOBMI-NEXT:    movl %ebx, %edx
 ; X86-NOBMI-NEXT:    xorl %ebx, %ebx
 ; X86-NOBMI-NEXT:  .LBB27_2:
-; X86-NOBMI-NEXT:    movl (%edx), %esi
-; X86-NOBMI-NEXT:    andl %eax, %esi
-; X86-NOBMI-NEXT:    movl 4(%edx), %edi
+; X86-NOBMI-NEXT:    movl (%eax), %esi
+; X86-NOBMI-NEXT:    andl %edx, %esi
+; X86-NOBMI-NEXT:    movl 4(%eax), %edi
 ; X86-NOBMI-NEXT:    andl %ebx, %edi
 ; X86-NOBMI-NEXT:    subl $8, %esp
 ; X86-NOBMI-NEXT:    pushl %ebx
-; X86-NOBMI-NEXT:    pushl %eax
+; X86-NOBMI-NEXT:    pushl %edx
 ; X86-NOBMI-NEXT:    calll use64
 ; X86-NOBMI-NEXT:    addl $16, %esp
 ; X86-NOBMI-NEXT:    movl %esi, %eax
@@ -2354,26 +2352,26 @@ define i64 @bzhi64_c2_load(i64* %w, i64
 ; X86-BMI1NOTBM-NEXT:    pushl %ebx
 ; X86-BMI1NOTBM-NEXT:    pushl %edi
 ; X86-BMI1NOTBM-NEXT:    pushl %esi
-; X86-BMI1NOTBM-NEXT:    movl {{[0-9]+}}(%esp), %edx
-; X86-BMI1NOTBM-NEXT:    movb $64, %cl
-; X86-BMI1NOTBM-NEXT:    subb {{[0-9]+}}(%esp), %cl
-; X86-BMI1NOTBM-NEXT:    movl $-1, %eax
+; X86-BMI1NOTBM-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-BMI1NOTBM-NEXT:    movl $64, %ecx
+; X86-BMI1NOTBM-NEXT:    subl {{[0-9]+}}(%esp), %ecx
+; X86-BMI1NOTBM-NEXT:    movl $-1, %edx
 ; X86-BMI1NOTBM-NEXT:    movl $-1, %ebx
 ; X86-BMI1NOTBM-NEXT:    shrl %cl, %ebx
-; X86-BMI1NOTBM-NEXT:    shrdl %cl, %eax, %eax
+; X86-BMI1NOTBM-NEXT:    shrdl %cl, %edx, %edx
 ; X86-BMI1NOTBM-NEXT:    testb $32, %cl
 ; X86-BMI1NOTBM-NEXT:    je .LBB27_2
 ; X86-BMI1NOTBM-NEXT:  # %bb.1:
-; X86-BMI1NOTBM-NEXT:    movl %ebx, %eax
+; X86-BMI1NOTBM-NEXT:    movl %ebx, %edx
 ; X86-BMI1NOTBM-NEXT:    xorl %ebx, %ebx
 ; X86-BMI1NOTBM-NEXT:  .LBB27_2:
-; X86-BMI1NOTBM-NEXT:    movl (%edx), %esi
-; X86-BMI1NOTBM-NEXT:    andl %eax, %esi
-; X86-BMI1NOTBM-NEXT:    movl 4(%edx), %edi
+; X86-BMI1NOTBM-NEXT:    movl (%eax), %esi
+; X86-BMI1NOTBM-NEXT:    andl %edx, %esi
+; X86-BMI1NOTBM-NEXT:    movl 4(%eax), %edi
 ; X86-BMI1NOTBM-NEXT:    andl %ebx, %edi
 ; X86-BMI1NOTBM-NEXT:    subl $8, %esp
 ; X86-BMI1NOTBM-NEXT:    pushl %ebx
-; X86-BMI1NOTBM-NEXT:    pushl %eax
+; X86-BMI1NOTBM-NEXT:    pushl %edx
 ; X86-BMI1NOTBM-NEXT:    calll use64
 ; X86-BMI1NOTBM-NEXT:    addl $16, %esp
 ; X86-BMI1NOTBM-NEXT:    movl %esi, %eax
@@ -2388,25 +2386,25 @@ define i64 @bzhi64_c2_load(i64* %w, i64
 ; X86-BMI1BMI2-NEXT:    pushl %ebx
 ; X86-BMI1BMI2-NEXT:    pushl %edi
 ; X86-BMI1BMI2-NEXT:    pushl %esi
-; X86-BMI1BMI2-NEXT:    movl {{[0-9]+}}(%esp), %edx
-; X86-BMI1BMI2-NEXT:    movb $64, %cl
-; X86-BMI1BMI2-NEXT:    subb {{[0-9]+}}(%esp), %cl
-; X86-BMI1BMI2-NEXT:    movl $-1, %eax
-; X86-BMI1BMI2-NEXT:    shrxl %ecx, %eax, %ebx
-; X86-BMI1BMI2-NEXT:    shrdl %cl, %eax, %eax
+; X86-BMI1BMI2-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-BMI1BMI2-NEXT:    movl $64, %ecx
+; X86-BMI1BMI2-NEXT:    subl {{[0-9]+}}(%esp), %ecx
+; X86-BMI1BMI2-NEXT:    movl $-1, %edx
+; X86-BMI1BMI2-NEXT:    shrxl %ecx, %edx, %ebx
+; X86-BMI1BMI2-NEXT:    shrdl %cl, %edx, %edx
 ; X86-BMI1BMI2-NEXT:    testb $32, %cl
 ; X86-BMI1BMI2-NEXT:    je .LBB27_2
 ; X86-BMI1BMI2-NEXT:  # %bb.1:
-; X86-BMI1BMI2-NEXT:    movl %ebx, %eax
+; X86-BMI1BMI2-NEXT:    movl %ebx, %edx
 ; X86-BMI1BMI2-NEXT:    xorl %ebx, %ebx
 ; X86-BMI1BMI2-NEXT:  .LBB27_2:
-; X86-BMI1BMI2-NEXT:    movl (%edx), %esi
-; X86-BMI1BMI2-NEXT:    andl %eax, %esi
-; X86-BMI1BMI2-NEXT:    movl 4(%edx), %edi
+; X86-BMI1BMI2-NEXT:    movl (%eax), %esi
+; X86-BMI1BMI2-NEXT:    andl %edx, %esi
+; X86-BMI1BMI2-NEXT:    movl 4(%eax), %edi
 ; X86-BMI1BMI2-NEXT:    andl %ebx, %edi
 ; X86-BMI1BMI2-NEXT:    subl $8, %esp
 ; X86-BMI1BMI2-NEXT:    pushl %ebx
-; X86-BMI1BMI2-NEXT:    pushl %eax
+; X86-BMI1BMI2-NEXT:    pushl %edx
 ; X86-BMI1BMI2-NEXT:    calll use64
 ; X86-BMI1BMI2-NEXT:    addl $16, %esp
 ; X86-BMI1BMI2-NEXT:    movl %esi, %eax
@@ -2420,7 +2418,7 @@ define i64 @bzhi64_c2_load(i64* %w, i64
 ; X64-NOBMI:       # %bb.0:
 ; X64-NOBMI-NEXT:    pushq %rbx
 ; X64-NOBMI-NEXT:    movq %rsi, %rcx
-; X64-NOBMI-NEXT:    negb %cl
+; X64-NOBMI-NEXT:    negl %ecx
 ; X64-NOBMI-NEXT:    movq $-1, %rax
 ; X64-NOBMI-NEXT:    # kill: def $cl killed $cl killed $rcx
 ; X64-NOBMI-NEXT:    shrq %cl, %rax
@@ -2436,7 +2434,7 @@ define i64 @bzhi64_c2_load(i64* %w, i64
 ; X64-BMI1NOTBM:       # %bb.0:
 ; X64-BMI1NOTBM-NEXT:    pushq %rbx
 ; X64-BMI1NOTBM-NEXT:    movq %rsi, %rcx
-; X64-BMI1NOTBM-NEXT:    negb %cl
+; X64-BMI1NOTBM-NEXT:    negl %ecx
 ; X64-BMI1NOTBM-NEXT:    movq $-1, %rax
 ; X64-BMI1NOTBM-NEXT:    # kill: def $cl killed $cl killed $rcx
 ; X64-BMI1NOTBM-NEXT:    shrq %cl, %rax
@@ -2452,8 +2450,8 @@ define i64 @bzhi64_c2_load(i64* %w, i64
 ; X64-BMI1BMI2:       # %bb.0:
 ; X64-BMI1BMI2-NEXT:    pushq %rbx
 ; X64-BMI1BMI2-NEXT:    bzhiq %rsi, (%rdi), %rbx
-; X64-BMI1BMI2-NEXT:    # kill: def $sil killed $sil killed $rsi def $rsi
-; X64-BMI1BMI2-NEXT:    negb %sil
+; X64-BMI1BMI2-NEXT:    # kill: def $esi killed $esi killed $rsi def $rsi
+; X64-BMI1BMI2-NEXT:    negl %esi
 ; X64-BMI1BMI2-NEXT:    movq $-1, %rax
 ; X64-BMI1BMI2-NEXT:    shrxq %rsi, %rax, %rdi
 ; X64-BMI1BMI2-NEXT:    callq use64
@@ -2630,8 +2628,8 @@ define i64 @bzhi64_c4_commutative(i64 %v
 ; X86-NOBMI-NEXT:    pushl %edi
 ; X86-NOBMI-NEXT:    pushl %esi
 ; X86-NOBMI-NEXT:    pushl %eax
-; X86-NOBMI-NEXT:    movb $64, %cl
-; X86-NOBMI-NEXT:    subb {{[0-9]+}}(%esp), %cl
+; X86-NOBMI-NEXT:    movl $64, %ecx
+; X86-NOBMI-NEXT:    subl {{[0-9]+}}(%esp), %ecx
 ; X86-NOBMI-NEXT:    movl $-1, %esi
 ; X86-NOBMI-NEXT:    movl $-1, %edi
 ; X86-NOBMI-NEXT:    shrl %cl, %edi
@@ -2661,8 +2659,8 @@ define i64 @bzhi64_c4_commutative(i64 %v
 ; X86-BMI1NOTBM-NEXT:    pushl %edi
 ; X86-BMI1NOTBM-NEXT:    pushl %esi
 ; X86-BMI1NOTBM-NEXT:    pushl %eax
-; X86-BMI1NOTBM-NEXT:    movb $64, %cl
-; X86-BMI1NOTBM-NEXT:    subb {{[0-9]+}}(%esp), %cl
+; X86-BMI1NOTBM-NEXT:    movl $64, %ecx
+; X86-BMI1NOTBM-NEXT:    subl {{[0-9]+}}(%esp), %ecx
 ; X86-BMI1NOTBM-NEXT:    movl $-1, %esi
 ; X86-BMI1NOTBM-NEXT:    movl $-1, %edi
 ; X86-BMI1NOTBM-NEXT:    shrl %cl, %edi
@@ -2692,8 +2690,8 @@ define i64 @bzhi64_c4_commutative(i64 %v
 ; X86-BMI1BMI2-NEXT:    pushl %edi
 ; X86-BMI1BMI2-NEXT:    pushl %esi
 ; X86-BMI1BMI2-NEXT:    pushl %eax
-; X86-BMI1BMI2-NEXT:    movb $64, %cl
-; X86-BMI1BMI2-NEXT:    subb {{[0-9]+}}(%esp), %cl
+; X86-BMI1BMI2-NEXT:    movl $64, %ecx
+; X86-BMI1BMI2-NEXT:    subl {{[0-9]+}}(%esp), %ecx
 ; X86-BMI1BMI2-NEXT:    movl $-1, %esi
 ; X86-BMI1BMI2-NEXT:    shrxl %ecx, %esi, %edi
 ; X86-BMI1BMI2-NEXT:    shrdl %cl, %esi, %esi
@@ -2724,7 +2722,7 @@ define i64 @bzhi64_c4_commutative(i64 %v
 ; X64-NOBMI-NEXT:    pushq %rax
 ; X64-NOBMI-NEXT:    movq %rsi, %rcx
 ; X64-NOBMI-NEXT:    movq %rdi, %r14
-; X64-NOBMI-NEXT:    negb %cl
+; X64-NOBMI-NEXT:    negl %ecx
 ; X64-NOBMI-NEXT:    movq $-1, %rbx
 ; X64-NOBMI-NEXT:    # kill: def $cl killed $cl killed $rcx
 ; X64-NOBMI-NEXT:    shrq %cl, %rbx
@@ -2744,7 +2742,7 @@ define i64 @bzhi64_c4_commutative(i64 %v
 ; X64-BMI1NOTBM-NEXT:    pushq %rax
 ; X64-BMI1NOTBM-NEXT:    movq %rsi, %rcx
 ; X64-BMI1NOTBM-NEXT:    movq %rdi, %r14
-; X64-BMI1NOTBM-NEXT:    negb %cl
+; X64-BMI1NOTBM-NEXT:    negl %ecx
 ; X64-BMI1NOTBM-NEXT:    movq $-1, %rbx
 ; X64-BMI1NOTBM-NEXT:    # kill: def $cl killed $cl killed $rcx
 ; X64-BMI1NOTBM-NEXT:    shrq %cl, %rbx
@@ -2765,7 +2763,7 @@ define i64 @bzhi64_c4_commutative(i64 %v
 ; X64-BMI1BMI2-NEXT:    movq %rsi, %rbx
 ; X64-BMI1BMI2-NEXT:    movq %rdi, %r14
 ; X64-BMI1BMI2-NEXT:    movl %ebx, %eax
-; X64-BMI1BMI2-NEXT:    negb %al
+; X64-BMI1BMI2-NEXT:    negl %eax
 ; X64-BMI1BMI2-NEXT:    movq $-1, %rcx
 ; X64-BMI1BMI2-NEXT:    shrxq %rax, %rcx, %rdi
 ; X64-BMI1BMI2-NEXT:    callq use64
@@ -2790,7 +2788,7 @@ define i32 @bzhi32_d0(i32 %val, i32 %num
 ; X86-NOBMI:       # %bb.0:
 ; X86-NOBMI-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; X86-NOBMI-NEXT:    xorl %ecx, %ecx
-; X86-NOBMI-NEXT:    subb {{[0-9]+}}(%esp), %cl
+; X86-NOBMI-NEXT:    subl {{[0-9]+}}(%esp), %ecx
 ; X86-NOBMI-NEXT:    shll %cl, %eax
 ; X86-NOBMI-NEXT:    # kill: def $cl killed $cl killed $ecx
 ; X86-NOBMI-NEXT:    shrl %cl, %eax
@@ -2798,14 +2796,14 @@ define i32 @bzhi32_d0(i32 %val, i32 %num
 ;
 ; X86-BMI1NOTBM-LABEL: bzhi32_d0:
 ; X86-BMI1NOTBM:       # %bb.0:
-; X86-BMI1NOTBM-NEXT:    movb {{[0-9]+}}(%esp), %al
+; X86-BMI1NOTBM-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; X86-BMI1NOTBM-NEXT:    shll $8, %eax
 ; X86-BMI1NOTBM-NEXT:    bextrl %eax, {{[0-9]+}}(%esp), %eax
 ; X86-BMI1NOTBM-NEXT:    retl
 ;
 ; X86-BMI1BMI2-LABEL: bzhi32_d0:
 ; X86-BMI1BMI2:       # %bb.0:
-; X86-BMI1BMI2-NEXT:    movb {{[0-9]+}}(%esp), %al
+; X86-BMI1BMI2-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; X86-BMI1BMI2-NEXT:    bzhil %eax, {{[0-9]+}}(%esp), %eax
 ; X86-BMI1BMI2-NEXT:    retl
 ;
@@ -2813,7 +2811,7 @@ define i32 @bzhi32_d0(i32 %val, i32 %num
 ; X64-NOBMI:       # %bb.0:
 ; X64-NOBMI-NEXT:    movl %esi, %ecx
 ; X64-NOBMI-NEXT:    movl %edi, %eax
-; X64-NOBMI-NEXT:    negb %cl
+; X64-NOBMI-NEXT:    negl %ecx
 ; X64-NOBMI-NEXT:    shll %cl, %eax
 ; X64-NOBMI-NEXT:    # kill: def $cl killed $cl killed $ecx
 ; X64-NOBMI-NEXT:    shrl %cl, %eax
@@ -2892,7 +2890,7 @@ define i32 @bzhi32_d2_load(i32* %w, i32
 ; X86-NOBMI-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; X86-NOBMI-NEXT:    movl (%eax), %eax
 ; X86-NOBMI-NEXT:    xorl %ecx, %ecx
-; X86-NOBMI-NEXT:    subb {{[0-9]+}}(%esp), %cl
+; X86-NOBMI-NEXT:    subl {{[0-9]+}}(%esp), %ecx
 ; X86-NOBMI-NEXT:    shll %cl, %eax
 ; X86-NOBMI-NEXT:    # kill: def $cl killed $cl killed $ecx
 ; X86-NOBMI-NEXT:    shrl %cl, %eax
@@ -2901,7 +2899,7 @@ define i32 @bzhi32_d2_load(i32* %w, i32
 ; X86-BMI1NOTBM-LABEL: bzhi32_d2_load:
 ; X86-BMI1NOTBM:       # %bb.0:
 ; X86-BMI1NOTBM-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X86-BMI1NOTBM-NEXT:    movb {{[0-9]+}}(%esp), %cl
+; X86-BMI1NOTBM-NEXT:    movl {{[0-9]+}}(%esp), %ecx
 ; X86-BMI1NOTBM-NEXT:    shll $8, %ecx
 ; X86-BMI1NOTBM-NEXT:    bextrl %ecx, (%eax), %eax
 ; X86-BMI1NOTBM-NEXT:    retl
@@ -2909,15 +2907,15 @@ define i32 @bzhi32_d2_load(i32* %w, i32
 ; X86-BMI1BMI2-LABEL: bzhi32_d2_load:
 ; X86-BMI1BMI2:       # %bb.0:
 ; X86-BMI1BMI2-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X86-BMI1BMI2-NEXT:    movb {{[0-9]+}}(%esp), %cl
-; X86-BMI1BMI2-NEXT:    bzhil %ecx, (%eax), %eax
+; X86-BMI1BMI2-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-BMI1BMI2-NEXT:    bzhil %eax, (%ecx), %eax
 ; X86-BMI1BMI2-NEXT:    retl
 ;
 ; X64-NOBMI-LABEL: bzhi32_d2_load:
 ; X64-NOBMI:       # %bb.0:
 ; X64-NOBMI-NEXT:    movl %esi, %ecx
 ; X64-NOBMI-NEXT:    movl (%rdi), %eax
-; X64-NOBMI-NEXT:    negb %cl
+; X64-NOBMI-NEXT:    negl %ecx
 ; X64-NOBMI-NEXT:    shll %cl, %eax
 ; X64-NOBMI-NEXT:    # kill: def $cl killed $cl killed $ecx
 ; X64-NOBMI-NEXT:    shrl %cl, %eax
@@ -3005,8 +3003,8 @@ define i64 @bzhi64_d0(i64 %val, i64 %num
 ; X86-NOBMI-NEXT:    pushl %esi
 ; X86-NOBMI-NEXT:    movl {{[0-9]+}}(%esp), %edx
 ; X86-NOBMI-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X86-NOBMI-NEXT:    movb $64, %cl
-; X86-NOBMI-NEXT:    subb {{[0-9]+}}(%esp), %cl
+; X86-NOBMI-NEXT:    movl $64, %ecx
+; X86-NOBMI-NEXT:    subl {{[0-9]+}}(%esp), %ecx
 ; X86-NOBMI-NEXT:    movl %edx, %esi
 ; X86-NOBMI-NEXT:    shll %cl, %esi
 ; X86-NOBMI-NEXT:    shldl %cl, %edx, %eax
@@ -3044,8 +3042,8 @@ define i64 @bzhi64_d0(i64 %val, i64 %num
 ; X86-BMI1NOTBM-NEXT:    pushl %esi
 ; X86-BMI1NOTBM-NEXT:    movl {{[0-9]+}}(%esp), %edx
 ; X86-BMI1NOTBM-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X86-BMI1NOTBM-NEXT:    movb $64, %cl
-; X86-BMI1NOTBM-NEXT:    subb {{[0-9]+}}(%esp), %cl
+; X86-BMI1NOTBM-NEXT:    movl $64, %ecx
+; X86-BMI1NOTBM-NEXT:    subl {{[0-9]+}}(%esp), %ecx
 ; X86-BMI1NOTBM-NEXT:    movl %edx, %esi
 ; X86-BMI1NOTBM-NEXT:    shll %cl, %esi
 ; X86-BMI1NOTBM-NEXT:    shldl %cl, %edx, %eax
@@ -3082,8 +3080,8 @@ define i64 @bzhi64_d0(i64 %val, i64 %num
 ; X86-BMI1BMI2-NEXT:    pushl %esi
 ; X86-BMI1BMI2-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; X86-BMI1BMI2-NEXT:    movl {{[0-9]+}}(%esp), %esi
-; X86-BMI1BMI2-NEXT:    movb $64, %cl
-; X86-BMI1BMI2-NEXT:    subb {{[0-9]+}}(%esp), %cl
+; X86-BMI1BMI2-NEXT:    movl $64, %ecx
+; X86-BMI1BMI2-NEXT:    subl {{[0-9]+}}(%esp), %ecx
 ; X86-BMI1BMI2-NEXT:    shldl %cl, %eax, %esi
 ; X86-BMI1BMI2-NEXT:    shlxl %ecx, %eax, %edi
 ; X86-BMI1BMI2-NEXT:    xorl %edx, %edx
@@ -3112,7 +3110,7 @@ define i64 @bzhi64_d0(i64 %val, i64 %num
 ; X64-NOBMI:       # %bb.0:
 ; X64-NOBMI-NEXT:    movq %rsi, %rcx
 ; X64-NOBMI-NEXT:    movq %rdi, %rax
-; X64-NOBMI-NEXT:    negb %cl
+; X64-NOBMI-NEXT:    negl %ecx
 ; X64-NOBMI-NEXT:    shlq %cl, %rax
 ; X64-NOBMI-NEXT:    # kill: def $cl killed $cl killed $rcx
 ; X64-NOBMI-NEXT:    shrq %cl, %rax
@@ -3283,8 +3281,8 @@ define i64 @bzhi64_d2_load(i64* %w, i64
 ; X86-NOBMI-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; X86-NOBMI-NEXT:    movl (%eax), %edx
 ; X86-NOBMI-NEXT:    movl 4(%eax), %eax
-; X86-NOBMI-NEXT:    movb $64, %cl
-; X86-NOBMI-NEXT:    subb {{[0-9]+}}(%esp), %cl
+; X86-NOBMI-NEXT:    movl $64, %ecx
+; X86-NOBMI-NEXT:    subl {{[0-9]+}}(%esp), %ecx
 ; X86-NOBMI-NEXT:    movl %edx, %esi
 ; X86-NOBMI-NEXT:    shll %cl, %esi
 ; X86-NOBMI-NEXT:    shldl %cl, %edx, %eax
@@ -3323,8 +3321,8 @@ define i64 @bzhi64_d2_load(i64* %w, i64
 ; X86-BMI1NOTBM-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; X86-BMI1NOTBM-NEXT:    movl (%eax), %edx
 ; X86-BMI1NOTBM-NEXT:    movl 4(%eax), %eax
-; X86-BMI1NOTBM-NEXT:    movb $64, %cl
-; X86-BMI1NOTBM-NEXT:    subb {{[0-9]+}}(%esp), %cl
+; X86-BMI1NOTBM-NEXT:    movl $64, %ecx
+; X86-BMI1NOTBM-NEXT:    subl {{[0-9]+}}(%esp), %ecx
 ; X86-BMI1NOTBM-NEXT:    movl %edx, %esi
 ; X86-BMI1NOTBM-NEXT:    shll %cl, %esi
 ; X86-BMI1NOTBM-NEXT:    shldl %cl, %edx, %eax
@@ -3362,8 +3360,8 @@ define i64 @bzhi64_d2_load(i64* %w, i64
 ; X86-BMI1BMI2-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; X86-BMI1BMI2-NEXT:    movl (%eax), %edx
 ; X86-BMI1BMI2-NEXT:    movl 4(%eax), %esi
-; X86-BMI1BMI2-NEXT:    movb $64, %cl
-; X86-BMI1BMI2-NEXT:    subb {{[0-9]+}}(%esp), %cl
+; X86-BMI1BMI2-NEXT:    movl $64, %ecx
+; X86-BMI1BMI2-NEXT:    subl {{[0-9]+}}(%esp), %ecx
 ; X86-BMI1BMI2-NEXT:    shldl %cl, %edx, %esi
 ; X86-BMI1BMI2-NEXT:    shlxl %ecx, %edx, %edi
 ; X86-BMI1BMI2-NEXT:    xorl %edx, %edx
@@ -3392,7 +3390,7 @@ define i64 @bzhi64_d2_load(i64* %w, i64
 ; X64-NOBMI:       # %bb.0:
 ; X64-NOBMI-NEXT:    movq %rsi, %rcx
 ; X64-NOBMI-NEXT:    movq (%rdi), %rax
-; X64-NOBMI-NEXT:    negb %cl
+; X64-NOBMI-NEXT:    negl %ecx
 ; X64-NOBMI-NEXT:    shlq %cl, %rax
 ; X64-NOBMI-NEXT:    # kill: def $cl killed $cl killed $rcx
 ; X64-NOBMI-NEXT:    shrq %cl, %rax

Modified: llvm/trunk/test/CodeGen/X86/fshl.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/fshl.ll?rev=348604&r1=348603&r2=348604&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/fshl.ll (original)
+++ llvm/trunk/test/CodeGen/X86/fshl.ll Fri Dec  7 07:47:52 2018
@@ -203,8 +203,8 @@ define i64 @var_shift_i64(i64 %x, i64 %y
 ; X86-FAST-NEXT:    movl %edi, %ebp
 ; X86-FAST-NEXT:    xorl %edi, %edi
 ; X86-FAST-NEXT:  .LBB3_2:
-; X86-FAST-NEXT:    movb $64, %cl
-; X86-FAST-NEXT:    subb %bl, %cl
+; X86-FAST-NEXT:    movl $64, %ecx
+; X86-FAST-NEXT:    subl %ebx, %ecx
 ; X86-FAST-NEXT:    movl %edx, %esi
 ; X86-FAST-NEXT:    shrl %cl, %esi
 ; X86-FAST-NEXT:    shrdl %cl, %edx, (%esp) # 4-byte Folded Spill
@@ -242,58 +242,59 @@ define i64 @var_shift_i64(i64 %x, i64 %y
 ; X86-SLOW-NEXT:    pushl %edi
 ; X86-SLOW-NEXT:    pushl %esi
 ; X86-SLOW-NEXT:    subl $8, %esp
-; X86-SLOW-NEXT:    movl {{[0-9]+}}(%esp), %edi
 ; X86-SLOW-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X86-SLOW-NEXT:    movl {{[0-9]+}}(%esp), %esi
+; X86-SLOW-NEXT:    movl {{[0-9]+}}(%esp), %edi
 ; X86-SLOW-NEXT:    movl {{[0-9]+}}(%esp), %ebx
 ; X86-SLOW-NEXT:    andl $63, %ebx
-; X86-SLOW-NEXT:    movb $64, %dh
-; X86-SLOW-NEXT:    subb %bl, %dh
+; X86-SLOW-NEXT:    movl $64, %ecx
+; X86-SLOW-NEXT:    subl %ebx, %ecx
 ; X86-SLOW-NEXT:    movl %eax, (%esp) # 4-byte Spill
-; X86-SLOW-NEXT:    movb %dh, %cl
 ; X86-SLOW-NEXT:    shrl %cl, %eax
-; X86-SLOW-NEXT:    movb %dh, %dl
-; X86-SLOW-NEXT:    andb $31, %dl
-; X86-SLOW-NEXT:    movl %edx, %ecx
-; X86-SLOW-NEXT:    negb %cl
-; X86-SLOW-NEXT:    movl %esi, %ebp
-; X86-SLOW-NEXT:    shll %cl, %ebp
-; X86-SLOW-NEXT:    testb %dl, %dl
-; X86-SLOW-NEXT:    movl {{[0-9]+}}(%esp), %ecx
 ; X86-SLOW-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-SLOW-NEXT:    movb %cl, %ch
+; X86-SLOW-NEXT:    andb $31, %ch
+; X86-SLOW-NEXT:    movb %ch, %cl
+; X86-SLOW-NEXT:    negb %cl
+; X86-SLOW-NEXT:    movl %edi, %esi
+; X86-SLOW-NEXT:    shll %cl, %esi
+; X86-SLOW-NEXT:    testb %ch, %ch
+; X86-SLOW-NEXT:    movl {{[0-9]+}}(%esp), %ebp
+; X86-SLOW-NEXT:    movl {{[0-9]+}}(%esp), %edx
 ; X86-SLOW-NEXT:    je .LBB3_2
 ; X86-SLOW-NEXT:  # %bb.1:
-; X86-SLOW-NEXT:    orl %eax, %ebp
-; X86-SLOW-NEXT:    movl %ebp, (%esp) # 4-byte Spill
+; X86-SLOW-NEXT:    orl %eax, %esi
+; X86-SLOW-NEXT:    movl %esi, (%esp) # 4-byte Spill
 ; X86-SLOW-NEXT:  .LBB3_2:
-; X86-SLOW-NEXT:    movl {{[0-9]+}}(%esp), %ebp
-; X86-SLOW-NEXT:    movl %ebp, %eax
+; X86-SLOW-NEXT:    movl %edx, %eax
 ; X86-SLOW-NEXT:    movl %ebx, %ecx
 ; X86-SLOW-NEXT:    shll %cl, %eax
 ; X86-SLOW-NEXT:    movb %bl, %ch
 ; X86-SLOW-NEXT:    andb $31, %ch
 ; X86-SLOW-NEXT:    movb %ch, %cl
 ; X86-SLOW-NEXT:    negb %cl
-; X86-SLOW-NEXT:    shrl %cl, %edi
+; X86-SLOW-NEXT:    movl %edx, %esi
+; X86-SLOW-NEXT:    movl %ebp, %edx
+; X86-SLOW-NEXT:    shrl %cl, %ebp
 ; X86-SLOW-NEXT:    testb %ch, %ch
 ; X86-SLOW-NEXT:    je .LBB3_4
 ; X86-SLOW-NEXT:  # %bb.3:
-; X86-SLOW-NEXT:    orl %edi, %eax
-; X86-SLOW-NEXT:    movl %eax, %ebp
+; X86-SLOW-NEXT:    orl %ebp, %eax
+; X86-SLOW-NEXT:    movl %eax, %esi
 ; X86-SLOW-NEXT:  .LBB3_4:
-; X86-SLOW-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X86-SLOW-NEXT:    movl %eax, %edi
+; X86-SLOW-NEXT:    movl %edx, %eax
+; X86-SLOW-NEXT:    movl %edx, %ebp
 ; X86-SLOW-NEXT:    movl %ebx, %ecx
-; X86-SLOW-NEXT:    shll %cl, %edi
+; X86-SLOW-NEXT:    shll %cl, %ebp
 ; X86-SLOW-NEXT:    testb $32, %bl
 ; X86-SLOW-NEXT:    je .LBB3_6
 ; X86-SLOW-NEXT:  # %bb.5:
-; X86-SLOW-NEXT:    movl %edi, %ebp
-; X86-SLOW-NEXT:    xorl %edi, %edi
+; X86-SLOW-NEXT:    movl %ebp, %esi
+; X86-SLOW-NEXT:    xorl %ebp, %ebp
 ; X86-SLOW-NEXT:  .LBB3_6:
-; X86-SLOW-NEXT:    movb %dh, %cl
-; X86-SLOW-NEXT:    shrl %cl, %esi
-; X86-SLOW-NEXT:    testb $32, %dh
+; X86-SLOW-NEXT:    movl {{[0-9]+}}(%esp), %edx
+; X86-SLOW-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-SLOW-NEXT:    shrl %cl, %edi
+; X86-SLOW-NEXT:    testb $32, %cl
 ; X86-SLOW-NEXT:    jne .LBB3_7
 ; X86-SLOW-NEXT:  # %bb.8:
 ; X86-SLOW-NEXT:    movl (%esp), %ecx # 4-byte Reload
@@ -301,17 +302,16 @@ define i64 @var_shift_i64(i64 %x, i64 %y
 ; X86-SLOW-NEXT:    jne .LBB3_10
 ; X86-SLOW-NEXT:    jmp .LBB3_11
 ; X86-SLOW-NEXT:  .LBB3_7:
-; X86-SLOW-NEXT:    movl %esi, %ecx
-; X86-SLOW-NEXT:    xorl %esi, %esi
+; X86-SLOW-NEXT:    movl %edi, %ecx
+; X86-SLOW-NEXT:    xorl %edi, %edi
 ; X86-SLOW-NEXT:    testl %ebx, %ebx
 ; X86-SLOW-NEXT:    je .LBB3_11
 ; X86-SLOW-NEXT:  .LBB3_10:
-; X86-SLOW-NEXT:    orl %esi, %ebp
-; X86-SLOW-NEXT:    orl %ecx, %edi
-; X86-SLOW-NEXT:    movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-SLOW-NEXT:    movl %edi, %eax
+; X86-SLOW-NEXT:    orl %edi, %esi
+; X86-SLOW-NEXT:    orl %ecx, %ebp
+; X86-SLOW-NEXT:    movl %esi, %edx
+; X86-SLOW-NEXT:    movl %ebp, %eax
 ; X86-SLOW-NEXT:  .LBB3_11:
-; X86-SLOW-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
 ; X86-SLOW-NEXT:    addl $8, %esp
 ; X86-SLOW-NEXT:    popl %esi
 ; X86-SLOW-NEXT:    popl %edi

Modified: llvm/trunk/test/CodeGen/X86/fshr.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/fshr.ll?rev=348604&r1=348603&r2=348604&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/fshr.ll (original)
+++ llvm/trunk/test/CodeGen/X86/fshr.ll Fri Dec  7 07:47:52 2018
@@ -187,17 +187,17 @@ define i64 @var_shift_i64(i64 %x, i64 %y
 ; X86-FAST-NEXT:    pushl %eax
 ; X86-FAST-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; X86-FAST-NEXT:    movl %eax, (%esp) # 4-byte Spill
-; X86-FAST-NEXT:    movl {{[0-9]+}}(%esp), %edx
 ; X86-FAST-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; X86-FAST-NEXT:    movl {{[0-9]+}}(%esp), %esi
 ; X86-FAST-NEXT:    movl {{[0-9]+}}(%esp), %ebx
 ; X86-FAST-NEXT:    andl $63, %ebx
-; X86-FAST-NEXT:    movb $64, %cl
-; X86-FAST-NEXT:    subb %bl, %cl
+; X86-FAST-NEXT:    movl $64, %ecx
+; X86-FAST-NEXT:    subl %ebx, %ecx
 ; X86-FAST-NEXT:    movl %eax, %edi
 ; X86-FAST-NEXT:    shll %cl, %edi
 ; X86-FAST-NEXT:    shldl %cl, %eax, %esi
 ; X86-FAST-NEXT:    testb $32, %cl
+; X86-FAST-NEXT:    movl {{[0-9]+}}(%esp), %edx
 ; X86-FAST-NEXT:    je .LBB3_2
 ; X86-FAST-NEXT:  # %bb.1:
 ; X86-FAST-NEXT:    movl %edi, %esi
@@ -237,14 +237,12 @@ define i64 @var_shift_i64(i64 %x, i64 %y
 ; X86-SLOW-NEXT:    pushl %edi
 ; X86-SLOW-NEXT:    pushl %esi
 ; X86-SLOW-NEXT:    subl $8, %esp
-; X86-SLOW-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X86-SLOW-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-SLOW-NEXT:    movl {{[0-9]+}}(%esp), %esi
+; X86-SLOW-NEXT:    movl {{[0-9]+}}(%esp), %edi
 ; X86-SLOW-NEXT:    movl {{[0-9]+}}(%esp), %edx
 ; X86-SLOW-NEXT:    movl {{[0-9]+}}(%esp), %ebx
 ; X86-SLOW-NEXT:    andl $63, %ebx
-; X86-SLOW-NEXT:    movb $64, %al
-; X86-SLOW-NEXT:    subb %bl, %al
+; X86-SLOW-NEXT:    movl $64, %eax
+; X86-SLOW-NEXT:    subl %ebx, %eax
 ; X86-SLOW-NEXT:    movl %edx, (%esp) # 4-byte Spill
 ; X86-SLOW-NEXT:    movl %eax, %ecx
 ; X86-SLOW-NEXT:    shll %cl, %edx
@@ -252,43 +250,45 @@ define i64 @var_shift_i64(i64 %x, i64 %y
 ; X86-SLOW-NEXT:    andb $31, %ch
 ; X86-SLOW-NEXT:    movb %ch, %cl
 ; X86-SLOW-NEXT:    negb %cl
-; X86-SLOW-NEXT:    movl %esi, %edi
-; X86-SLOW-NEXT:    shrl %cl, %edi
+; X86-SLOW-NEXT:    movl %edi, %ebp
+; X86-SLOW-NEXT:    shrl %cl, %ebp
 ; X86-SLOW-NEXT:    testb %ch, %ch
-; X86-SLOW-NEXT:    movl {{[0-9]+}}(%esp), %ebp
+; X86-SLOW-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-SLOW-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-SLOW-NEXT:    movl {{[0-9]+}}(%esp), %esi
 ; X86-SLOW-NEXT:    je .LBB3_2
 ; X86-SLOW-NEXT:  # %bb.1:
-; X86-SLOW-NEXT:    orl %edi, %edx
+; X86-SLOW-NEXT:    orl %ebp, %edx
 ; X86-SLOW-NEXT:    movl %edx, (%esp) # 4-byte Spill
 ; X86-SLOW-NEXT:  .LBB3_2:
 ; X86-SLOW-NEXT:    movl {{[0-9]+}}(%esp), %ecx
 ; X86-SLOW-NEXT:    movl %ecx, %edx
 ; X86-SLOW-NEXT:    movl %ebx, %ecx
 ; X86-SLOW-NEXT:    shrl %cl, %edx
-; X86-SLOW-NEXT:    movb %bl, %ah
-; X86-SLOW-NEXT:    andb $31, %ah
-; X86-SLOW-NEXT:    movb %ah, %cl
+; X86-SLOW-NEXT:    movb %bl, %ch
+; X86-SLOW-NEXT:    andb $31, %ch
+; X86-SLOW-NEXT:    movb %ch, %cl
 ; X86-SLOW-NEXT:    negb %cl
-; X86-SLOW-NEXT:    movl %ebp, %edi
-; X86-SLOW-NEXT:    shll %cl, %edi
-; X86-SLOW-NEXT:    testb %ah, %ah
-; X86-SLOW-NEXT:    movl {{[0-9]+}}(%esp), %ebp
+; X86-SLOW-NEXT:    movl %esi, %ebp
+; X86-SLOW-NEXT:    shll %cl, %ebp
+; X86-SLOW-NEXT:    testb %ch, %ch
+; X86-SLOW-NEXT:    movl {{[0-9]+}}(%esp), %esi
 ; X86-SLOW-NEXT:    je .LBB3_4
 ; X86-SLOW-NEXT:  # %bb.3:
-; X86-SLOW-NEXT:    orl %edx, %edi
-; X86-SLOW-NEXT:    movl %edi, %ebp
+; X86-SLOW-NEXT:    orl %edx, %ebp
+; X86-SLOW-NEXT:    movl %ebp, %esi
 ; X86-SLOW-NEXT:  .LBB3_4:
-; X86-SLOW-NEXT:    movl {{[0-9]+}}(%esp), %edi
+; X86-SLOW-NEXT:    movl {{[0-9]+}}(%esp), %ebp
 ; X86-SLOW-NEXT:    movl %ebx, %ecx
-; X86-SLOW-NEXT:    shrl %cl, %edi
+; X86-SLOW-NEXT:    shrl %cl, %ebp
 ; X86-SLOW-NEXT:    testb $32, %bl
 ; X86-SLOW-NEXT:    je .LBB3_6
 ; X86-SLOW-NEXT:  # %bb.5:
-; X86-SLOW-NEXT:    movl %edi, %ebp
-; X86-SLOW-NEXT:    xorl %edi, %edi
+; X86-SLOW-NEXT:    movl %ebp, %esi
+; X86-SLOW-NEXT:    xorl %ebp, %ebp
 ; X86-SLOW-NEXT:  .LBB3_6:
 ; X86-SLOW-NEXT:    movl %eax, %ecx
-; X86-SLOW-NEXT:    shll %cl, %esi
+; X86-SLOW-NEXT:    shll %cl, %edi
 ; X86-SLOW-NEXT:    testb $32, %al
 ; X86-SLOW-NEXT:    movl {{[0-9]+}}(%esp), %edx
 ; X86-SLOW-NEXT:    jne .LBB3_7
@@ -298,14 +298,14 @@ define i64 @var_shift_i64(i64 %x, i64 %y
 ; X86-SLOW-NEXT:    jne .LBB3_10
 ; X86-SLOW-NEXT:    jmp .LBB3_11
 ; X86-SLOW-NEXT:  .LBB3_7:
-; X86-SLOW-NEXT:    movl %esi, %eax
-; X86-SLOW-NEXT:    xorl %esi, %esi
+; X86-SLOW-NEXT:    movl %edi, %eax
+; X86-SLOW-NEXT:    xorl %edi, %edi
 ; X86-SLOW-NEXT:    testl %ebx, %ebx
 ; X86-SLOW-NEXT:    je .LBB3_11
 ; X86-SLOW-NEXT:  .LBB3_10:
-; X86-SLOW-NEXT:    orl %ebp, %esi
-; X86-SLOW-NEXT:    orl %edi, %eax
-; X86-SLOW-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-SLOW-NEXT:    orl %esi, %edi
+; X86-SLOW-NEXT:    orl %ebp, %eax
+; X86-SLOW-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
 ; X86-SLOW-NEXT:    movl %eax, %edx
 ; X86-SLOW-NEXT:  .LBB3_11:
 ; X86-SLOW-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload

Modified: llvm/trunk/test/CodeGen/X86/funnel-shift-rot.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/funnel-shift-rot.ll?rev=348604&r1=348603&r2=348604&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/funnel-shift-rot.ll (original)
+++ llvm/trunk/test/CodeGen/X86/funnel-shift-rot.ll Fri Dec  7 07:47:52 2018
@@ -231,25 +231,30 @@ define i64 @rotr_i64(i64 %x, i64 %z) nou
 ; X32-SSE2-NEXT:    pushl %ebx
 ; X32-SSE2-NEXT:    pushl %edi
 ; X32-SSE2-NEXT:    pushl %esi
-; X32-SSE2-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; X32-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %esi
 ; X32-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %edx
+; X32-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %ebx
+; X32-SSE2-NEXT:    movl %ebx, %ecx
+; X32-SSE2-NEXT:    andl $63, %ecx
 ; X32-SSE2-NEXT:    movl %edx, %edi
 ; X32-SSE2-NEXT:    shrl %cl, %edi
-; X32-SSE2-NEXT:    movl %esi, %ebx
-; X32-SSE2-NEXT:    shrdl %cl, %edx, %ebx
-; X32-SSE2-NEXT:    xorl %ebp, %ebp
+; X32-SSE2-NEXT:    movl %esi, %ebp
+; X32-SSE2-NEXT:    shrdl %cl, %edx, %ebp
+; X32-SSE2-NEXT:    xorl %eax, %eax
 ; X32-SSE2-NEXT:    testb $32, %cl
-; X32-SSE2-NEXT:    cmovnel %edi, %ebx
-; X32-SSE2-NEXT:    cmovnel %ebp, %edi
-; X32-SSE2-NEXT:    negb %cl
+; X32-SSE2-NEXT:    cmovnel %edi, %ebp
+; X32-SSE2-NEXT:    cmovnel %eax, %edi
+; X32-SSE2-NEXT:    negl %ebx
+; X32-SSE2-NEXT:    andl $63, %ebx
 ; X32-SSE2-NEXT:    movl %esi, %eax
+; X32-SSE2-NEXT:    movl %ebx, %ecx
 ; X32-SSE2-NEXT:    shll %cl, %eax
 ; X32-SSE2-NEXT:    shldl %cl, %esi, %edx
-; X32-SSE2-NEXT:    testb $32, %cl
+; X32-SSE2-NEXT:    testb $32, %bl
 ; X32-SSE2-NEXT:    cmovnel %eax, %edx
-; X32-SSE2-NEXT:    cmovnel %ebp, %eax
-; X32-SSE2-NEXT:    orl %ebx, %eax
+; X32-SSE2-NEXT:    movl $0, %ecx
+; X32-SSE2-NEXT:    cmovnel %ecx, %eax
+; X32-SSE2-NEXT:    orl %ebp, %eax
 ; X32-SSE2-NEXT:    orl %edi, %edx
 ; X32-SSE2-NEXT:    popl %esi
 ; X32-SSE2-NEXT:    popl %edi

Modified: llvm/trunk/test/CodeGen/X86/funnel-shift.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/funnel-shift.ll?rev=348604&r1=348603&r2=348604&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/funnel-shift.ll (original)
+++ llvm/trunk/test/CodeGen/X86/funnel-shift.ll Fri Dec  7 07:47:52 2018
@@ -70,8 +70,8 @@ define i37 @fshl_i37(i37 %x, i37 %y, i37
 ; X32-SSE2-NEXT:    cmovnel %ebp, %eax
 ; X32-SSE2-NEXT:    cmovnel %ecx, %ebp
 ; X32-SSE2-NEXT:    xorl %edx, %edx
-; X32-SSE2-NEXT:    movb $37, %cl
-; X32-SSE2-NEXT:    subb %bl, %cl
+; X32-SSE2-NEXT:    movl $37, %ecx
+; X32-SSE2-NEXT:    subl %ebx, %ecx
 ; X32-SSE2-NEXT:    shrdl %cl, %esi, %edi
 ; X32-SSE2-NEXT:    shrl %cl, %esi
 ; X32-SSE2-NEXT:    testb $32, %cl
@@ -248,8 +248,8 @@ define i37 @fshr_i37(i37 %x, i37 %y, i37
 ; X32-SSE2-NEXT:    calll __umoddi3
 ; X32-SSE2-NEXT:    addl $16, %esp
 ; X32-SSE2-NEXT:    movl %eax, %ebx
-; X32-SSE2-NEXT:    movb $37, %cl
-; X32-SSE2-NEXT:    subb %bl, %cl
+; X32-SSE2-NEXT:    movl $37, %ecx
+; X32-SSE2-NEXT:    subl %eax, %ecx
 ; X32-SSE2-NEXT:    movl %ebp, %eax
 ; X32-SSE2-NEXT:    shll %cl, %ebp
 ; X32-SSE2-NEXT:    shldl %cl, %eax, %edi

Modified: llvm/trunk/test/CodeGen/X86/pr32284.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/pr32284.ll?rev=348604&r1=348603&r2=348604&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/pr32284.ll (original)
+++ llvm/trunk/test/CodeGen/X86/pr32284.ll Fri Dec  7 07:47:52 2018
@@ -81,8 +81,9 @@ define void @foo() {
 ; 686-NEXT:    movzbl c, %eax
 ; 686-NEXT:    xorl %ecx, %ecx
 ; 686-NEXT:    testl %eax, %eax
-; 686-NEXT:    setne {{[0-9]+}}(%esp)
 ; 686-NEXT:    setne %cl
+; 686-NEXT:    testb %al, %al
+; 686-NEXT:    setne {{[0-9]+}}(%esp)
 ; 686-NEXT:    xorl %edx, %edx
 ; 686-NEXT:    cmpl %eax, %ecx
 ; 686-NEXT:    setle %dl

Modified: llvm/trunk/test/CodeGen/X86/pr37879.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/pr37879.ll?rev=348604&r1=348603&r2=348604&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/pr37879.ll (original)
+++ llvm/trunk/test/CodeGen/X86/pr37879.ll Fri Dec  7 07:47:52 2018
@@ -6,6 +6,8 @@ define double @foo(i32** nocapture reado
 ; CHECK:       ## %bb.0:
 ; CHECK-NEXT:    movq (%rax), %rax
 ; CHECK-NEXT:    vcvtsi2sdq %rax, %xmm0, %xmm1
+; CHECK-NEXT:    ## kill: def $eax killed $eax killed $rax
+; CHECK-NEXT:    andl $1, %eax
 ; CHECK-NEXT:    kmovd %eax, %k1
 ; CHECK-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
 ; CHECK-NEXT:    vmovsd %xmm1, %xmm0, %xmm0 {%k1}

Modified: llvm/trunk/test/CodeGen/X86/rot16.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/rot16.ll?rev=348604&r1=348603&r2=348604&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/rot16.ll (original)
+++ llvm/trunk/test/CodeGen/X86/rot16.ll Fri Dec  7 07:47:52 2018
@@ -15,7 +15,7 @@ define i16 @foo(i16 %x, i16 %y, i16 %z)
 ; X64-NEXT:    movl %edx, %ecx
 ; X64-NEXT:    movl %edi, %eax
 ; X64-NEXT:    # kill: def $cl killed $cl killed $ecx
-; X64-NEXT:    rolw %cl, %ax
+; X64-NEXT:    shldw %cl, %ax, %ax
 ; X64-NEXT:    # kill: def $ax killed $ax killed $eax
 ; X64-NEXT:    retq
 	%t0 = shl i16 %x, %z
@@ -62,7 +62,7 @@ define i16 @un(i16 %x, i16 %y, i16 %z) n
 ; X64-NEXT:    movl %edx, %ecx
 ; X64-NEXT:    movl %edi, %eax
 ; X64-NEXT:    # kill: def $cl killed $cl killed $ecx
-; X64-NEXT:    rorw %cl, %ax
+; X64-NEXT:    shrdw %cl, %ax, %ax
 ; X64-NEXT:    # kill: def $ax killed $ax killed $eax
 ; X64-NEXT:    retq
 	%t0 = lshr i16 %x, %z

Modified: llvm/trunk/test/CodeGen/X86/rotate.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/rotate.ll?rev=348604&r1=348603&r2=348604&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/rotate.ll (original)
+++ llvm/trunk/test/CodeGen/X86/rotate.ll Fri Dec  7 07:47:52 2018
@@ -653,29 +653,29 @@ define i64 @truncated_rot(i64 %x, i32 %a
 ; X86-NEXT:    pushl %ebx
 ; X86-NEXT:    pushl %edi
 ; X86-NEXT:    pushl %esi
-; X86-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %esi
-; X86-NEXT:    movl {{[0-9]+}}(%esp), %ebx
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %edi
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
 ; X86-NEXT:    movl %esi, %eax
 ; X86-NEXT:    shll %cl, %eax
 ; X86-NEXT:    testb $32, %cl
-; X86-NEXT:    movl $0, %edi
+; X86-NEXT:    movl $0, %ebx
 ; X86-NEXT:    jne .LBB28_2
 ; X86-NEXT:  # %bb.1: # %entry
-; X86-NEXT:    movl %eax, %edi
+; X86-NEXT:    movl %eax, %ebx
 ; X86-NEXT:  .LBB28_2: # %entry
-; X86-NEXT:    movb $64, %dl
-; X86-NEXT:    subb %cl, %dl
-; X86-NEXT:    movl %ebx, %eax
+; X86-NEXT:    movl $64, %edx
+; X86-NEXT:    subl %ecx, %edx
+; X86-NEXT:    movl %edi, %eax
 ; X86-NEXT:    movl %edx, %ecx
 ; X86-NEXT:    shrl %cl, %eax
-; X86-NEXT:    shrdl %cl, %ebx, %esi
+; X86-NEXT:    shrdl %cl, %edi, %esi
 ; X86-NEXT:    testb $32, %dl
 ; X86-NEXT:    jne .LBB28_4
 ; X86-NEXT:  # %bb.3: # %entry
 ; X86-NEXT:    movl %esi, %eax
 ; X86-NEXT:  .LBB28_4: # %entry
-; X86-NEXT:    orl %edi, %eax
+; X86-NEXT:    orl %ebx, %eax
 ; X86-NEXT:    xorl %edx, %edx
 ; X86-NEXT:    popl %esi
 ; X86-NEXT:    popl %edi

Modified: llvm/trunk/test/CodeGen/X86/rotate4.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/rotate4.ll?rev=348604&r1=348603&r2=348604&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/rotate4.ll (original)
+++ llvm/trunk/test/CodeGen/X86/rotate4.ll Fri Dec  7 07:47:52 2018
@@ -65,9 +65,9 @@ define i64 @rotate_left_64(i64 %a, i64 %
 ; X86-NEXT:    .cfi_offset %esi, -16
 ; X86-NEXT:    .cfi_offset %edi, -12
 ; X86-NEXT:    .cfi_offset %ebx, -8
-; X86-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %esi
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %edi
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
 ; X86-NEXT:    movl %esi, %eax
 ; X86-NEXT:    shll %cl, %eax
 ; X86-NEXT:    movl %edi, %edx
@@ -78,7 +78,7 @@ define i64 @rotate_left_64(i64 %a, i64 %
 ; X86-NEXT:    movl %eax, %edx
 ; X86-NEXT:    xorl %eax, %eax
 ; X86-NEXT:  .LBB2_2:
-; X86-NEXT:    negb %cl
+; X86-NEXT:    negl %ecx
 ; X86-NEXT:    movl %edi, %ebx
 ; X86-NEXT:    shrl %cl, %ebx
 ; X86-NEXT:    shrdl %cl, %edi, %esi
@@ -126,9 +126,9 @@ define i64 @rotate_right_64(i64 %a, i64
 ; X86-NEXT:    .cfi_offset %esi, -16
 ; X86-NEXT:    .cfi_offset %edi, -12
 ; X86-NEXT:    .cfi_offset %ebx, -8
-; X86-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %edi
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %esi
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
 ; X86-NEXT:    movl %esi, %edx
 ; X86-NEXT:    shrl %cl, %edx
 ; X86-NEXT:    movl %edi, %eax
@@ -139,7 +139,7 @@ define i64 @rotate_right_64(i64 %a, i64
 ; X86-NEXT:    movl %edx, %eax
 ; X86-NEXT:    xorl %edx, %edx
 ; X86-NEXT:  .LBB3_2:
-; X86-NEXT:    negb %cl
+; X86-NEXT:    negl %ecx
 ; X86-NEXT:    movl %edi, %ebx
 ; X86-NEXT:    shll %cl, %ebx
 ; X86-NEXT:    shldl %cl, %edi, %esi
@@ -242,7 +242,7 @@ define void @rotate_left_m64(i64 *%pa, i
 ; X86-NEXT:    .cfi_offset %edi, -16
 ; X86-NEXT:    .cfi_offset %ebx, -12
 ; X86-NEXT:    .cfi_offset %ebp, -8
-; X86-NEXT:    movb {{[0-9]+}}(%esp), %cl
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; X86-NEXT:    movl (%eax), %edx
 ; X86-NEXT:    movl 4(%eax), %ebx
@@ -256,7 +256,7 @@ define void @rotate_left_m64(i64 *%pa, i
 ; X86-NEXT:    movl %esi, %edi
 ; X86-NEXT:    xorl %esi, %esi
 ; X86-NEXT:  .LBB6_2:
-; X86-NEXT:    negb %cl
+; X86-NEXT:    negl %ecx
 ; X86-NEXT:    movl %ebx, %ebp
 ; X86-NEXT:    shrl %cl, %ebp
 ; X86-NEXT:    shrdl %cl, %ebx, %edx
@@ -312,33 +312,33 @@ define void @rotate_right_m64(i64 *%pa,
 ; X86-NEXT:    .cfi_offset %edi, -16
 ; X86-NEXT:    .cfi_offset %ebx, -12
 ; X86-NEXT:    .cfi_offset %ebp, -8
-; X86-NEXT:    movb {{[0-9]+}}(%esp), %cl
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; X86-NEXT:    movl (%eax), %ebx
-; X86-NEXT:    movl 4(%eax), %esi
-; X86-NEXT:    movl %esi, %edx
-; X86-NEXT:    shrl %cl, %edx
+; X86-NEXT:    movl 4(%eax), %edx
+; X86-NEXT:    movl %edx, %esi
+; X86-NEXT:    shrl %cl, %esi
 ; X86-NEXT:    movl %ebx, %edi
-; X86-NEXT:    shrdl %cl, %esi, %edi
+; X86-NEXT:    shrdl %cl, %edx, %edi
 ; X86-NEXT:    testb $32, %cl
 ; X86-NEXT:    je .LBB7_2
 ; X86-NEXT:  # %bb.1:
-; X86-NEXT:    movl %edx, %edi
-; X86-NEXT:    xorl %edx, %edx
+; X86-NEXT:    movl %esi, %edi
+; X86-NEXT:    xorl %esi, %esi
 ; X86-NEXT:  .LBB7_2:
-; X86-NEXT:    negb %cl
+; X86-NEXT:    negl %ecx
 ; X86-NEXT:    movl %ebx, %ebp
 ; X86-NEXT:    shll %cl, %ebp
-; X86-NEXT:    shldl %cl, %ebx, %esi
+; X86-NEXT:    shldl %cl, %ebx, %edx
 ; X86-NEXT:    testb $32, %cl
 ; X86-NEXT:    je .LBB7_4
 ; X86-NEXT:  # %bb.3:
-; X86-NEXT:    movl %ebp, %esi
+; X86-NEXT:    movl %ebp, %edx
 ; X86-NEXT:    xorl %ebp, %ebp
 ; X86-NEXT:  .LBB7_4:
-; X86-NEXT:    orl %esi, %edx
+; X86-NEXT:    orl %edx, %esi
 ; X86-NEXT:    orl %ebp, %edi
-; X86-NEXT:    movl %edx, 4(%eax)
+; X86-NEXT:    movl %esi, 4(%eax)
 ; X86-NEXT:    movl %edi, (%eax)
 ; X86-NEXT:    popl %esi
 ; X86-NEXT:    .cfi_def_cfa_offset 16

Modified: llvm/trunk/test/CodeGen/X86/schedule-x86-64-shld.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/schedule-x86-64-shld.ll?rev=348604&r1=348603&r2=348604&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/schedule-x86-64-shld.ll (original)
+++ llvm/trunk/test/CodeGen/X86/schedule-x86-64-shld.ll Fri Dec  7 07:47:52 2018
@@ -170,7 +170,7 @@ define i64 @lshift_cl(i64 %a, i64 %b, i6
 ; BDVER12-NEXT:    movq %rdx, %rcx # sched: [1:0.50]
 ; BDVER12-NEXT:    movq %rsi, %rax # sched: [1:0.50]
 ; BDVER12-NEXT:    shlq %cl, %rdi # sched: [1:0.50]
-; BDVER12-NEXT:    negb %cl # sched: [1:0.50]
+; BDVER12-NEXT:    negl %ecx # sched: [1:0.50]
 ; BDVER12-NEXT:    # kill: def $cl killed $cl killed $rcx
 ; BDVER12-NEXT:    shrq %cl, %rax # sched: [1:0.50]
 ; BDVER12-NEXT:    orq %rdi, %rax # sched: [1:0.50]
@@ -181,7 +181,7 @@ define i64 @lshift_cl(i64 %a, i64 %b, i6
 ; BTVER2-NEXT:    movq %rdx, %rcx # sched: [1:0.50]
 ; BTVER2-NEXT:    movq %rsi, %rax # sched: [1:0.50]
 ; BTVER2-NEXT:    shlq %cl, %rdi # sched: [1:0.50]
-; BTVER2-NEXT:    negb %cl # sched: [1:0.50]
+; BTVER2-NEXT:    negl %ecx # sched: [1:0.50]
 ; BTVER2-NEXT:    # kill: def $cl killed $cl killed $rcx
 ; BTVER2-NEXT:    shrq %cl, %rax # sched: [1:0.50]
 ; BTVER2-NEXT:    orq %rdi, %rax # sched: [1:0.50]
@@ -246,7 +246,7 @@ define i64 @rshift_cl(i64 %a, i64 %b, i6
 ; BDVER12-NEXT:    movq %rdx, %rcx # sched: [1:0.50]
 ; BDVER12-NEXT:    movq %rsi, %rax # sched: [1:0.50]
 ; BDVER12-NEXT:    shrq %cl, %rdi # sched: [1:0.50]
-; BDVER12-NEXT:    negb %cl # sched: [1:0.50]
+; BDVER12-NEXT:    negl %ecx # sched: [1:0.50]
 ; BDVER12-NEXT:    # kill: def $cl killed $cl killed $rcx
 ; BDVER12-NEXT:    shlq %cl, %rax # sched: [1:0.50]
 ; BDVER12-NEXT:    orq %rdi, %rax # sched: [1:0.50]
@@ -257,7 +257,7 @@ define i64 @rshift_cl(i64 %a, i64 %b, i6
 ; BTVER2-NEXT:    movq %rdx, %rcx # sched: [1:0.50]
 ; BTVER2-NEXT:    movq %rsi, %rax # sched: [1:0.50]
 ; BTVER2-NEXT:    shrq %cl, %rdi # sched: [1:0.50]
-; BTVER2-NEXT:    negb %cl # sched: [1:0.50]
+; BTVER2-NEXT:    negl %ecx # sched: [1:0.50]
 ; BTVER2-NEXT:    # kill: def $cl killed $cl killed $rcx
 ; BTVER2-NEXT:    shlq %cl, %rax # sched: [1:0.50]
 ; BTVER2-NEXT:    orq %rdi, %rax # sched: [1:0.50]
@@ -321,7 +321,7 @@ define void @lshift_mem_cl(i64 %a, i64 %
 ; BDVER12-NEXT:    movq {{.*}}(%rip), %rax # sched: [5:0.50]
 ; BDVER12-NEXT:    movq %rsi, %rcx # sched: [1:0.50]
 ; BDVER12-NEXT:    shlq %cl, %rax # sched: [1:0.50]
-; BDVER12-NEXT:    negb %cl # sched: [1:0.50]
+; BDVER12-NEXT:    negl %ecx # sched: [1:0.50]
 ; BDVER12-NEXT:    # kill: def $cl killed $cl killed $rcx
 ; BDVER12-NEXT:    shrq %cl, %rdi # sched: [1:0.50]
 ; BDVER12-NEXT:    orq %rax, %rdi # sched: [1:0.50]
@@ -333,7 +333,7 @@ define void @lshift_mem_cl(i64 %a, i64 %
 ; BTVER2-NEXT:    movq {{.*}}(%rip), %rax # sched: [5:1.00]
 ; BTVER2-NEXT:    movq %rsi, %rcx # sched: [1:0.50]
 ; BTVER2-NEXT:    shlq %cl, %rax # sched: [1:0.50]
-; BTVER2-NEXT:    negb %cl # sched: [1:0.50]
+; BTVER2-NEXT:    negl %ecx # sched: [1:0.50]
 ; BTVER2-NEXT:    # kill: def $cl killed $cl killed $rcx
 ; BTVER2-NEXT:    shrq %cl, %rdi # sched: [1:0.50]
 ; BTVER2-NEXT:    orq %rax, %rdi # sched: [1:0.50]

Modified: llvm/trunk/test/CodeGen/X86/scheduler-backtracking.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/scheduler-backtracking.ll?rev=348604&r1=348603&r2=348604&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/scheduler-backtracking.ll (original)
+++ llvm/trunk/test/CodeGen/X86/scheduler-backtracking.ll Fri Dec  7 07:47:52 2018
@@ -16,45 +16,43 @@ define i256 @test1(i256 %a) nounwind {
 ; ILP-NEXT:    pushq %rbx
 ; ILP-NEXT:    movq %rdi, %rax
 ; ILP-NEXT:    xorl %r8d, %r8d
-; ILP-NEXT:    incl %esi
-; ILP-NEXT:    addb %sil, %sil
-; ILP-NEXT:    orb $1, %sil
+; ILP-NEXT:    leal 3(%rsi,%rsi), %r11d
 ; ILP-NEXT:    movl $1, %r9d
 ; ILP-NEXT:    xorl %r14d, %r14d
-; ILP-NEXT:    movl %esi, %ecx
-; ILP-NEXT:    shldq %cl, %r9, %r14
-; ILP-NEXT:    movl $1, %edx
-; ILP-NEXT:    shlq %cl, %rdx
-; ILP-NEXT:    movl %esi, %r11d
-; ILP-NEXT:    addb $-128, %r11b
-; ILP-NEXT:    movb $-128, %r10b
-; ILP-NEXT:    xorl %ebx, %ebx
 ; ILP-NEXT:    movl %r11d, %ecx
-; ILP-NEXT:    shldq %cl, %r9, %rbx
-; ILP-NEXT:    testb $64, %sil
-; ILP-NEXT:    cmovneq %rdx, %r14
-; ILP-NEXT:    cmovneq %r8, %rdx
+; ILP-NEXT:    shldq %cl, %r9, %r14
 ; ILP-NEXT:    movl $1, %edi
 ; ILP-NEXT:    shlq %cl, %rdi
-; ILP-NEXT:    subb %sil, %r10b
+; ILP-NEXT:    movb $-128, %r10b
+; ILP-NEXT:    subb %r11b, %r10b
+; ILP-NEXT:    movl %r11d, %edx
+; ILP-NEXT:    addb $-128, %dl
+; ILP-NEXT:    xorl %esi, %esi
+; ILP-NEXT:    movl %edx, %ecx
+; ILP-NEXT:    shldq %cl, %r9, %rsi
+; ILP-NEXT:    movl $1, %ebx
+; ILP-NEXT:    shlq %cl, %rbx
 ; ILP-NEXT:    movl %r10d, %ecx
 ; ILP-NEXT:    shrdq %cl, %r8, %r9
-; ILP-NEXT:    testb $64, %r10b
-; ILP-NEXT:    cmovneq %r8, %r9
 ; ILP-NEXT:    testb $64, %r11b
-; ILP-NEXT:    cmovneq %rdi, %rbx
+; ILP-NEXT:    cmovneq %rdi, %r14
 ; ILP-NEXT:    cmovneq %r8, %rdi
-; ILP-NEXT:    testb %sil, %sil
+; ILP-NEXT:    testb $64, %r10b
+; ILP-NEXT:    cmovneq %r8, %r9
+; ILP-NEXT:    testb $64, %dl
+; ILP-NEXT:    cmovneq %rbx, %rsi
+; ILP-NEXT:    cmovneq %r8, %rbx
+; ILP-NEXT:    testb %r11b, %r11b
 ; ILP-NEXT:    cmovsq %r8, %r14
-; ILP-NEXT:    cmovsq %r8, %rdx
+; ILP-NEXT:    cmovsq %r8, %rdi
 ; ILP-NEXT:    movq %r14, 8(%rax)
-; ILP-NEXT:    movq %rdx, (%rax)
-; ILP-NEXT:    cmovnsq %r8, %rbx
+; ILP-NEXT:    movq %rdi, (%rax)
+; ILP-NEXT:    cmovnsq %r8, %rsi
+; ILP-NEXT:    cmoveq %r8, %rsi
+; ILP-NEXT:    movq %rsi, 24(%rax)
+; ILP-NEXT:    cmovnsq %r9, %rbx
 ; ILP-NEXT:    cmoveq %r8, %rbx
-; ILP-NEXT:    movq %rbx, 24(%rax)
-; ILP-NEXT:    cmovnsq %r9, %rdi
-; ILP-NEXT:    cmoveq %r8, %rdi
-; ILP-NEXT:    movq %rdi, 16(%rax)
+; ILP-NEXT:    movq %rbx, 16(%rax)
 ; ILP-NEXT:    popq %rbx
 ; ILP-NEXT:    popq %r14
 ; ILP-NEXT:    retq
@@ -62,132 +60,126 @@ define i256 @test1(i256 %a) nounwind {
 ; HYBRID-LABEL: test1:
 ; HYBRID:       # %bb.0:
 ; HYBRID-NEXT:    movq %rdi, %rax
-; HYBRID-NEXT:    incl %esi
-; HYBRID-NEXT:    addb %sil, %sil
-; HYBRID-NEXT:    orb $1, %sil
+; HYBRID-NEXT:    leal 3(%rsi,%rsi), %r10d
 ; HYBRID-NEXT:    movb $-128, %cl
-; HYBRID-NEXT:    subb %sil, %cl
+; HYBRID-NEXT:    subb %r10b, %cl
 ; HYBRID-NEXT:    xorl %r8d, %r8d
-; HYBRID-NEXT:    movl $1, %r11d
+; HYBRID-NEXT:    movl $1, %esi
 ; HYBRID-NEXT:    movl $1, %r9d
 ; HYBRID-NEXT:    shrdq %cl, %r8, %r9
 ; HYBRID-NEXT:    testb $64, %cl
 ; HYBRID-NEXT:    cmovneq %r8, %r9
-; HYBRID-NEXT:    xorl %r10d, %r10d
-; HYBRID-NEXT:    movl %esi, %ecx
-; HYBRID-NEXT:    shldq %cl, %r11, %r10
+; HYBRID-NEXT:    xorl %r11d, %r11d
+; HYBRID-NEXT:    movl %r10d, %ecx
+; HYBRID-NEXT:    shldq %cl, %rsi, %r11
 ; HYBRID-NEXT:    addb $-128, %cl
-; HYBRID-NEXT:    xorl %edi, %edi
-; HYBRID-NEXT:    shldq %cl, %r11, %rdi
-; HYBRID-NEXT:    movl $1, %edx
-; HYBRID-NEXT:    shlq %cl, %rdx
+; HYBRID-NEXT:    xorl %edx, %edx
+; HYBRID-NEXT:    shldq %cl, %rsi, %rdx
+; HYBRID-NEXT:    movl $1, %edi
+; HYBRID-NEXT:    shlq %cl, %rdi
 ; HYBRID-NEXT:    testb $64, %cl
-; HYBRID-NEXT:    cmovneq %rdx, %rdi
-; HYBRID-NEXT:    cmovneq %r8, %rdx
-; HYBRID-NEXT:    movl %esi, %ecx
-; HYBRID-NEXT:    shlq %cl, %r11
-; HYBRID-NEXT:    testb $64, %sil
-; HYBRID-NEXT:    cmovneq %r11, %r10
-; HYBRID-NEXT:    cmovneq %r8, %r11
-; HYBRID-NEXT:    testb %sil, %sil
-; HYBRID-NEXT:    cmovsq %r8, %r10
-; HYBRID-NEXT:    movq %r10, 8(%rax)
+; HYBRID-NEXT:    cmovneq %rdi, %rdx
+; HYBRID-NEXT:    cmovneq %r8, %rdi
+; HYBRID-NEXT:    movl %r10d, %ecx
+; HYBRID-NEXT:    shlq %cl, %rsi
+; HYBRID-NEXT:    testb $64, %r10b
+; HYBRID-NEXT:    cmovneq %rsi, %r11
+; HYBRID-NEXT:    cmovneq %r8, %rsi
+; HYBRID-NEXT:    testb %r10b, %r10b
 ; HYBRID-NEXT:    cmovsq %r8, %r11
-; HYBRID-NEXT:    movq %r11, (%rax)
-; HYBRID-NEXT:    cmovnsq %r8, %rdi
-; HYBRID-NEXT:    cmoveq %r8, %rdi
-; HYBRID-NEXT:    movq %rdi, 24(%rax)
-; HYBRID-NEXT:    cmovnsq %r9, %rdx
+; HYBRID-NEXT:    movq %r11, 8(%rax)
+; HYBRID-NEXT:    cmovsq %r8, %rsi
+; HYBRID-NEXT:    movq %rsi, (%rax)
+; HYBRID-NEXT:    cmovnsq %r8, %rdx
 ; HYBRID-NEXT:    cmoveq %r8, %rdx
-; HYBRID-NEXT:    movq %rdx, 16(%rax)
+; HYBRID-NEXT:    movq %rdx, 24(%rax)
+; HYBRID-NEXT:    cmovnsq %r9, %rdi
+; HYBRID-NEXT:    cmoveq %r8, %rdi
+; HYBRID-NEXT:    movq %rdi, 16(%rax)
 ; HYBRID-NEXT:    retq
 ;
 ; BURR-LABEL: test1:
 ; BURR:       # %bb.0:
 ; BURR-NEXT:    movq %rdi, %rax
-; BURR-NEXT:    incl %esi
-; BURR-NEXT:    addb %sil, %sil
-; BURR-NEXT:    orb $1, %sil
+; BURR-NEXT:    leal 3(%rsi,%rsi), %r10d
 ; BURR-NEXT:    movb $-128, %cl
-; BURR-NEXT:    subb %sil, %cl
+; BURR-NEXT:    subb %r10b, %cl
 ; BURR-NEXT:    xorl %r8d, %r8d
-; BURR-NEXT:    movl $1, %r11d
+; BURR-NEXT:    movl $1, %esi
 ; BURR-NEXT:    movl $1, %r9d
 ; BURR-NEXT:    shrdq %cl, %r8, %r9
 ; BURR-NEXT:    testb $64, %cl
 ; BURR-NEXT:    cmovneq %r8, %r9
-; BURR-NEXT:    xorl %r10d, %r10d
-; BURR-NEXT:    movl %esi, %ecx
-; BURR-NEXT:    shldq %cl, %r11, %r10
+; BURR-NEXT:    xorl %r11d, %r11d
+; BURR-NEXT:    movl %r10d, %ecx
+; BURR-NEXT:    shldq %cl, %rsi, %r11
 ; BURR-NEXT:    addb $-128, %cl
-; BURR-NEXT:    xorl %edi, %edi
-; BURR-NEXT:    shldq %cl, %r11, %rdi
-; BURR-NEXT:    movl $1, %edx
-; BURR-NEXT:    shlq %cl, %rdx
+; BURR-NEXT:    xorl %edx, %edx
+; BURR-NEXT:    shldq %cl, %rsi, %rdx
+; BURR-NEXT:    movl $1, %edi
+; BURR-NEXT:    shlq %cl, %rdi
 ; BURR-NEXT:    testb $64, %cl
-; BURR-NEXT:    cmovneq %rdx, %rdi
-; BURR-NEXT:    cmovneq %r8, %rdx
-; BURR-NEXT:    movl %esi, %ecx
-; BURR-NEXT:    shlq %cl, %r11
-; BURR-NEXT:    testb $64, %sil
-; BURR-NEXT:    cmovneq %r11, %r10
-; BURR-NEXT:    cmovneq %r8, %r11
-; BURR-NEXT:    testb %sil, %sil
-; BURR-NEXT:    cmovsq %r8, %r10
-; BURR-NEXT:    movq %r10, 8(%rax)
+; BURR-NEXT:    cmovneq %rdi, %rdx
+; BURR-NEXT:    cmovneq %r8, %rdi
+; BURR-NEXT:    movl %r10d, %ecx
+; BURR-NEXT:    shlq %cl, %rsi
+; BURR-NEXT:    testb $64, %r10b
+; BURR-NEXT:    cmovneq %rsi, %r11
+; BURR-NEXT:    cmovneq %r8, %rsi
+; BURR-NEXT:    testb %r10b, %r10b
 ; BURR-NEXT:    cmovsq %r8, %r11
-; BURR-NEXT:    movq %r11, (%rax)
-; BURR-NEXT:    cmovnsq %r8, %rdi
-; BURR-NEXT:    cmoveq %r8, %rdi
-; BURR-NEXT:    movq %rdi, 24(%rax)
-; BURR-NEXT:    cmovnsq %r9, %rdx
+; BURR-NEXT:    movq %r11, 8(%rax)
+; BURR-NEXT:    cmovsq %r8, %rsi
+; BURR-NEXT:    movq %rsi, (%rax)
+; BURR-NEXT:    cmovnsq %r8, %rdx
 ; BURR-NEXT:    cmoveq %r8, %rdx
-; BURR-NEXT:    movq %rdx, 16(%rax)
+; BURR-NEXT:    movq %rdx, 24(%rax)
+; BURR-NEXT:    cmovnsq %r9, %rdi
+; BURR-NEXT:    cmoveq %r8, %rdi
+; BURR-NEXT:    movq %rdi, 16(%rax)
 ; BURR-NEXT:    retq
 ;
 ; SRC-LABEL: test1:
 ; SRC:       # %bb.0:
 ; SRC-NEXT:    pushq %rbx
 ; SRC-NEXT:    movq %rdi, %rax
-; SRC-NEXT:    incl %esi
-; SRC-NEXT:    addb %sil, %sil
-; SRC-NEXT:    orb $1, %sil
+; SRC-NEXT:    leal 3(%rsi,%rsi), %r9d
 ; SRC-NEXT:    movb $-128, %cl
-; SRC-NEXT:    subb %sil, %cl
+; SRC-NEXT:    subb %r9b, %cl
 ; SRC-NEXT:    xorl %r8d, %r8d
 ; SRC-NEXT:    movl $1, %edi
 ; SRC-NEXT:    movl $1, %r10d
 ; SRC-NEXT:    shrdq %cl, %r8, %r10
 ; SRC-NEXT:    testb $64, %cl
 ; SRC-NEXT:    cmovneq %r8, %r10
-; SRC-NEXT:    movl %esi, %r9d
-; SRC-NEXT:    addb $-128, %r9b
+; SRC-NEXT:    movl %r9d, %r11d
+; SRC-NEXT:    addb $-128, %r11b
+; SRC-NEXT:    xorl %esi, %esi
+; SRC-NEXT:    movl %r11d, %ecx
+; SRC-NEXT:    shldq %cl, %rdi, %rsi
 ; SRC-NEXT:    xorl %edx, %edx
 ; SRC-NEXT:    movl %r9d, %ecx
 ; SRC-NEXT:    shldq %cl, %rdi, %rdx
-; SRC-NEXT:    xorl %r11d, %r11d
-; SRC-NEXT:    movl %esi, %ecx
-; SRC-NEXT:    shldq %cl, %rdi, %r11
 ; SRC-NEXT:    movl $1, %ebx
 ; SRC-NEXT:    shlq %cl, %rbx
-; SRC-NEXT:    testb $64, %sil
-; SRC-NEXT:    cmovneq %rbx, %r11
+; SRC-NEXT:    testb $64, %r9b
+; SRC-NEXT:    cmovneq %rbx, %rdx
 ; SRC-NEXT:    cmovneq %r8, %rbx
-; SRC-NEXT:    movl %r9d, %ecx
+; SRC-NEXT:    movl %r11d, %ecx
 ; SRC-NEXT:    shlq %cl, %rdi
-; SRC-NEXT:    testb $64, %r9b
-; SRC-NEXT:    cmovneq %rdi, %rdx
+; SRC-NEXT:    testb $64, %r11b
+; SRC-NEXT:    cmovneq %rdi, %rsi
 ; SRC-NEXT:    cmovneq %r8, %rdi
-; SRC-NEXT:    testb %sil, %sil
+; SRC-NEXT:    testb %r9b, %r9b
 ; SRC-NEXT:    cmovnsq %r10, %rdi
 ; SRC-NEXT:    cmoveq %r8, %rdi
-; SRC-NEXT:    cmovnsq %r8, %rdx
-; SRC-NEXT:    cmoveq %r8, %rdx
-; SRC-NEXT:    cmovsq %r8, %r11
+; SRC-NEXT:    cmovnsq %r8, %rsi
+; SRC-NEXT:    cmoveq %r8, %rsi
+; SRC-NEXT:    cmovsq %r8, %rdx
 ; SRC-NEXT:    cmovsq %r8, %rbx
-; SRC-NEXT:    movq %r11, 8(%rax)
+; SRC-NEXT:    movq %rdx, 8(%rax)
 ; SRC-NEXT:    movq %rbx, (%rax)
-; SRC-NEXT:    movq %rdx, 24(%rax)
+; SRC-NEXT:    movq %rsi, 24(%rax)
 ; SRC-NEXT:    movq %rdi, 16(%rax)
 ; SRC-NEXT:    popq %rbx
 ; SRC-NEXT:    retq
@@ -197,48 +189,46 @@ define i256 @test1(i256 %a) nounwind {
 ; LIN-NEXT:    movq %rdi, %rax
 ; LIN-NEXT:    xorl %r9d, %r9d
 ; LIN-NEXT:    movl $1, %r8d
-; LIN-NEXT:    incl %esi
-; LIN-NEXT:    addb %sil, %sil
-; LIN-NEXT:    orb $1, %sil
-; LIN-NEXT:    movl $1, %edx
-; LIN-NEXT:    movl %esi, %ecx
-; LIN-NEXT:    shlq %cl, %rdx
-; LIN-NEXT:    testb $64, %sil
-; LIN-NEXT:    movq %rdx, %rcx
+; LIN-NEXT:    leal 3(%rsi,%rsi), %edx
+; LIN-NEXT:    movl $1, %esi
+; LIN-NEXT:    movl %edx, %ecx
+; LIN-NEXT:    shlq %cl, %rsi
+; LIN-NEXT:    testb $64, %dl
+; LIN-NEXT:    movq %rsi, %rcx
 ; LIN-NEXT:    cmovneq %r9, %rcx
-; LIN-NEXT:    testb %sil, %sil
+; LIN-NEXT:    testb %dl, %dl
 ; LIN-NEXT:    cmovsq %r9, %rcx
 ; LIN-NEXT:    movq %rcx, (%rdi)
 ; LIN-NEXT:    xorl %edi, %edi
-; LIN-NEXT:    movl %esi, %ecx
+; LIN-NEXT:    movl %edx, %ecx
 ; LIN-NEXT:    shldq %cl, %r8, %rdi
-; LIN-NEXT:    cmovneq %rdx, %rdi
+; LIN-NEXT:    cmovneq %rsi, %rdi
 ; LIN-NEXT:    cmovsq %r9, %rdi
 ; LIN-NEXT:    movq %rdi, 8(%rax)
-; LIN-NEXT:    movl %esi, %edx
-; LIN-NEXT:    addb $-128, %dl
+; LIN-NEXT:    movl %edx, %esi
+; LIN-NEXT:    addb $-128, %sil
 ; LIN-NEXT:    movl $1, %r10d
-; LIN-NEXT:    movl %edx, %ecx
+; LIN-NEXT:    movl %esi, %ecx
 ; LIN-NEXT:    shlq %cl, %r10
-; LIN-NEXT:    testb $64, %dl
+; LIN-NEXT:    testb $64, %sil
 ; LIN-NEXT:    movq %r10, %rdi
 ; LIN-NEXT:    cmovneq %r9, %rdi
 ; LIN-NEXT:    movb $-128, %cl
-; LIN-NEXT:    subb %sil, %cl
-; LIN-NEXT:    movl $1, %esi
-; LIN-NEXT:    shrdq %cl, %r9, %rsi
+; LIN-NEXT:    subb %dl, %cl
+; LIN-NEXT:    movl $1, %edx
+; LIN-NEXT:    shrdq %cl, %r9, %rdx
 ; LIN-NEXT:    testb $64, %cl
-; LIN-NEXT:    cmovneq %r9, %rsi
-; LIN-NEXT:    cmovsq %rdi, %rsi
-; LIN-NEXT:    cmoveq %r9, %rsi
-; LIN-NEXT:    movq %rsi, 16(%rax)
-; LIN-NEXT:    xorl %esi, %esi
-; LIN-NEXT:    movl %edx, %ecx
-; LIN-NEXT:    shldq %cl, %r8, %rsi
-; LIN-NEXT:    cmovneq %r10, %rsi
-; LIN-NEXT:    cmovnsq %r9, %rsi
-; LIN-NEXT:    cmoveq %r9, %rsi
-; LIN-NEXT:    movq %rsi, 24(%rax)
+; LIN-NEXT:    cmovneq %r9, %rdx
+; LIN-NEXT:    cmovsq %rdi, %rdx
+; LIN-NEXT:    cmoveq %r9, %rdx
+; LIN-NEXT:    movq %rdx, 16(%rax)
+; LIN-NEXT:    xorl %edx, %edx
+; LIN-NEXT:    movl %esi, %ecx
+; LIN-NEXT:    shldq %cl, %r8, %rdx
+; LIN-NEXT:    cmovneq %r10, %rdx
+; LIN-NEXT:    cmovnsq %r9, %rdx
+; LIN-NEXT:    cmoveq %r9, %rdx
+; LIN-NEXT:    movq %rdx, 24(%rax)
 ; LIN-NEXT:    retq
   %b = add i256 %a, 1
   %m = shl i256 %b, 1

Modified: llvm/trunk/test/CodeGen/X86/test-shrink.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/test-shrink.ll?rev=348604&r1=348603&r2=348604&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/test-shrink.ll (original)
+++ llvm/trunk/test/CodeGen/X86/test-shrink.ll Fri Dec  7 07:47:52 2018
@@ -590,6 +590,7 @@ define void @and16_trunc_8_sign(i16 %x)
 ; CHECK-WIN32-64-LABEL: and16_trunc_8_sign:
 ; CHECK-WIN32-64:       # %bb.0:
 ; CHECK-WIN32-64-NEXT:    subq $40, %rsp
+; CHECK-WIN32-64-NEXT:    # kill: def $cx killed $cx def $ecx
 ; CHECK-WIN32-64-NEXT:    testb $-128, %cl
 ; CHECK-WIN32-64-NEXT:    jg .LBB13_2
 ; CHECK-WIN32-64-NEXT:  # %bb.1: # %yes
@@ -600,7 +601,8 @@ define void @and16_trunc_8_sign(i16 %x)
 ;
 ; CHECK-X86-LABEL: and16_trunc_8_sign:
 ; CHECK-X86:       # %bb.0:
-; CHECK-X86-NEXT:    testb $-128, {{[0-9]+}}(%esp)
+; CHECK-X86-NEXT:    movzwl {{[0-9]+}}(%esp), %eax
+; CHECK-X86-NEXT:    testb $-128, %al
 ; CHECK-X86-NEXT:    jg .LBB13_2
 ; CHECK-X86-NEXT:  # %bb.1: # %yes
 ; CHECK-X86-NEXT:    calll bar
@@ -731,8 +733,8 @@ define void @and32_trunc_16_sign(i32 %x)
 ;
 ; CHECK-X86-LABEL: and32_trunc_16_sign:
 ; CHECK-X86:       # %bb.0:
-; CHECK-X86-NEXT:    movzwl {{[0-9]+}}(%esp), %eax
-; CHECK-X86-NEXT:    andl $32768, %eax # imm = 0x8000
+; CHECK-X86-NEXT:    movl $32768, %eax # imm = 0x8000
+; CHECK-X86-NEXT:    andl {{[0-9]+}}(%esp), %eax
 ; CHECK-X86-NEXT:    testw %ax, %ax
 ; CHECK-X86-NEXT:    jg .LBB16_2
 ; CHECK-X86-NEXT:  # %bb.1: # %yes
@@ -776,8 +778,7 @@ define void @and32_trunc_16_sign_minsize
 ;
 ; CHECK-X86-LABEL: and32_trunc_16_sign_minsize:
 ; CHECK-X86:       # %bb.0:
-; CHECK-X86-NEXT:    movzwl {{[0-9]+}}(%esp), %eax
-; CHECK-X86-NEXT:    testw $-32768, %ax # imm = 0x8000
+; CHECK-X86-NEXT:    testw $-32768, {{[0-9]+}}(%esp) # imm = 0x8000
 ; CHECK-X86-NEXT:    jg .LBB17_2
 ; CHECK-X86-NEXT:  # %bb.1: # %yes
 ; CHECK-X86-NEXT:    calll bar

Modified: llvm/trunk/test/CodeGen/X86/vector-trunc-math-widen.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vector-trunc-math-widen.ll?rev=348604&r1=348603&r2=348604&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vector-trunc-math-widen.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vector-trunc-math-widen.ll Fri Dec  7 07:47:52 2018
@@ -1340,39 +1340,49 @@ define <16 x i8> @trunc_ext_sub_v16i16_v
 define <4 x i32> @trunc_sub_const_v4i64_v4i32(<4 x i64> %a0) nounwind {
 ; SSE-LABEL: trunc_sub_const_v4i64_v4i32:
 ; SSE:       # %bb.0:
+; SSE-NEXT:    movl $1, %eax
+; SSE-NEXT:    movq %rax, %xmm2
+; SSE-NEXT:    pslldq {{.*#+}} xmm2 = zero,zero,zero,zero,zero,zero,zero,zero,xmm2[0,1,2,3,4,5,6,7]
+; SSE-NEXT:    psubq %xmm2, %xmm0
+; SSE-NEXT:    psubq {{.*}}(%rip), %xmm1
 ; SSE-NEXT:    shufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[0,2]
-; SSE-NEXT:    psubd {{.*}}(%rip), %xmm0
 ; SSE-NEXT:    retq
 ;
 ; AVX1-LABEL: trunc_sub_const_v4i64_v4i32:
 ; AVX1:       # %bb.0:
-; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
-; AVX1-NEXT:    vshufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[0,2]
-; AVX1-NEXT:    vpsubd {{.*}}(%rip), %xmm0, %xmm0
+; AVX1-NEXT:    movl $1, %eax
+; AVX1-NEXT:    vmovq %rax, %xmm1
+; AVX1-NEXT:    vpslldq {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,zero,zero,xmm1[0,1,2,3,4,5,6,7]
+; AVX1-NEXT:    vpsubq %xmm1, %xmm0, %xmm1
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
+; AVX1-NEXT:    vpsubq {{.*}}(%rip), %xmm0, %xmm0
+; AVX1-NEXT:    vshufps {{.*#+}} xmm0 = xmm1[0,2],xmm0[0,2]
 ; AVX1-NEXT:    vzeroupper
 ; AVX1-NEXT:    retq
 ;
 ; AVX2-SLOW-LABEL: trunc_sub_const_v4i64_v4i32:
 ; AVX2-SLOW:       # %bb.0:
+; AVX2-SLOW-NEXT:    vpsubq {{.*}}(%rip), %ymm0, %ymm0
 ; AVX2-SLOW-NEXT:    vpshufd {{.*#+}} ymm0 = ymm0[0,2,2,3,4,6,6,7]
 ; AVX2-SLOW-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3]
-; AVX2-SLOW-NEXT:    vpsubd {{.*}}(%rip), %xmm0, %xmm0
+; AVX2-SLOW-NEXT:    # kill: def $xmm0 killed $xmm0 killed $ymm0
 ; AVX2-SLOW-NEXT:    vzeroupper
 ; AVX2-SLOW-NEXT:    retq
 ;
 ; AVX2-FAST-LABEL: trunc_sub_const_v4i64_v4i32:
 ; AVX2-FAST:       # %bb.0:
+; AVX2-FAST-NEXT:    vpsubq {{.*}}(%rip), %ymm0, %ymm0
 ; AVX2-FAST-NEXT:    vmovdqa {{.*#+}} ymm1 = [0,2,4,6,4,6,6,7]
 ; AVX2-FAST-NEXT:    vpermd %ymm0, %ymm1, %ymm0
-; AVX2-FAST-NEXT:    vpsubd {{.*}}(%rip), %xmm0, %xmm0
+; AVX2-FAST-NEXT:    # kill: def $xmm0 killed $xmm0 killed $ymm0
 ; AVX2-FAST-NEXT:    vzeroupper
 ; AVX2-FAST-NEXT:    retq
 ;
 ; AVX512-LABEL: trunc_sub_const_v4i64_v4i32:
 ; AVX512:       # %bb.0:
-; AVX512-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
+; AVX512-NEXT:    vpsubq {{.*}}(%rip), %ymm0, %ymm0
 ; AVX512-NEXT:    vpmovqd %zmm0, %ymm0
-; AVX512-NEXT:    vpsubd {{.*}}(%rip), %xmm0, %xmm0
+; AVX512-NEXT:    # kill: def $xmm0 killed $xmm0 killed $ymm0
 ; AVX512-NEXT:    vzeroupper
 ; AVX512-NEXT:    retq
   %1 = sub <4 x i64> %a0, <i64 0, i64 1, i64 2, i64 3>
@@ -1383,38 +1393,53 @@ define <4 x i32> @trunc_sub_const_v4i64_
 define <8 x i16> @trunc_sub_const_v8i64_v8i16(<8 x i64> %a0) nounwind {
 ; SSE-LABEL: trunc_sub_const_v8i64_v8i16:
 ; SSE:       # %bb.0:
+; SSE-NEXT:    movl $1, %eax
+; SSE-NEXT:    movq %rax, %xmm4
+; SSE-NEXT:    pslldq {{.*#+}} xmm4 = zero,zero,zero,zero,zero,zero,zero,zero,xmm4[0,1,2,3,4,5,6,7]
+; SSE-NEXT:    psubq %xmm4, %xmm0
+; SSE-NEXT:    psubq {{.*}}(%rip), %xmm1
+; SSE-NEXT:    psubq {{.*}}(%rip), %xmm2
+; SSE-NEXT:    psubq {{.*}}(%rip), %xmm3
+; SSE-NEXT:    pshufd {{.*#+}} xmm3 = xmm3[0,2,2,3]
+; SSE-NEXT:    pshuflw {{.*#+}} xmm3 = xmm3[0,1,0,2,4,5,6,7]
+; SSE-NEXT:    pshufd {{.*#+}} xmm2 = xmm2[0,2,2,3]
+; SSE-NEXT:    pshuflw {{.*#+}} xmm2 = xmm2[0,1,0,2,4,5,6,7]
+; SSE-NEXT:    punpckldq {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1]
+; SSE-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
+; SSE-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7]
 ; SSE-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
 ; SSE-NEXT:    pshuflw {{.*#+}} xmm1 = xmm1[0,2,2,3,4,5,6,7]
-; SSE-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
-; SSE-NEXT:    pshuflw {{.*#+}} xmm4 = xmm0[0,2,2,3,4,5,6,7]
-; SSE-NEXT:    punpckldq {{.*#+}} xmm4 = xmm4[0],xmm1[0],xmm4[1],xmm1[1]
-; SSE-NEXT:    pshufd {{.*#+}} xmm0 = xmm3[0,2,2,3]
-; SSE-NEXT:    pshuflw {{.*#+}} xmm1 = xmm0[0,1,0,2,4,5,6,7]
-; SSE-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[0,2,2,3]
-; SSE-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[0,1,0,2,4,5,6,7]
 ; SSE-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
-; SSE-NEXT:    movsd {{.*#+}} xmm0 = xmm4[0],xmm0[1]
-; SSE-NEXT:    psubw {{.*}}(%rip), %xmm0
+; SSE-NEXT:    movsd {{.*#+}} xmm2 = xmm0[0],xmm2[1]
+; SSE-NEXT:    movapd %xmm2, %xmm0
 ; SSE-NEXT:    retq
 ;
 ; AVX1-LABEL: trunc_sub_const_v8i64_v8i16:
 ; AVX1:       # %bb.0:
-; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
-; AVX1-NEXT:    vpxor %xmm3, %xmm3, %xmm3
-; AVX1-NEXT:    vpblendw {{.*#+}} xmm2 = xmm2[0],xmm3[1,2,3],xmm2[4],xmm3[5,6,7]
-; AVX1-NEXT:    vpblendw {{.*#+}} xmm1 = xmm1[0],xmm3[1,2,3],xmm1[4],xmm3[5,6,7]
-; AVX1-NEXT:    vpackusdw %xmm2, %xmm1, %xmm1
-; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
-; AVX1-NEXT:    vpblendw {{.*#+}} xmm2 = xmm2[0],xmm3[1,2,3],xmm2[4],xmm3[5,6,7]
-; AVX1-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0],xmm3[1,2,3],xmm0[4],xmm3[5,6,7]
-; AVX1-NEXT:    vpackusdw %xmm2, %xmm0, %xmm0
+; AVX1-NEXT:    movl $1, %eax
+; AVX1-NEXT:    vmovq %rax, %xmm2
+; AVX1-NEXT:    vpslldq {{.*#+}} xmm2 = zero,zero,zero,zero,zero,zero,zero,zero,xmm2[0,1,2,3,4,5,6,7]
+; AVX1-NEXT:    vpsubq %xmm2, %xmm0, %xmm2
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
+; AVX1-NEXT:    vpsubq {{.*}}(%rip), %xmm0, %xmm0
+; AVX1-NEXT:    vpsubq {{.*}}(%rip), %xmm1, %xmm3
+; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm1
+; AVX1-NEXT:    vpsubq {{.*}}(%rip), %xmm1, %xmm1
+; AVX1-NEXT:    vpxor %xmm4, %xmm4, %xmm4
+; AVX1-NEXT:    vpblendw {{.*#+}} xmm1 = xmm1[0],xmm4[1,2,3],xmm1[4],xmm4[5,6,7]
+; AVX1-NEXT:    vpblendw {{.*#+}} xmm3 = xmm3[0],xmm4[1,2,3],xmm3[4],xmm4[5,6,7]
+; AVX1-NEXT:    vpackusdw %xmm1, %xmm3, %xmm1
+; AVX1-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0],xmm4[1,2,3],xmm0[4],xmm4[5,6,7]
+; AVX1-NEXT:    vpblendw {{.*#+}} xmm2 = xmm2[0],xmm4[1,2,3],xmm2[4],xmm4[5,6,7]
+; AVX1-NEXT:    vpackusdw %xmm0, %xmm2, %xmm0
 ; AVX1-NEXT:    vpackusdw %xmm1, %xmm0, %xmm0
-; AVX1-NEXT:    vpsubw {{.*}}(%rip), %xmm0, %xmm0
 ; AVX1-NEXT:    vzeroupper
 ; AVX1-NEXT:    retq
 ;
 ; AVX2-SLOW-LABEL: trunc_sub_const_v8i64_v8i16:
 ; AVX2-SLOW:       # %bb.0:
+; AVX2-SLOW-NEXT:    vpsubq {{.*}}(%rip), %ymm1, %ymm1
+; AVX2-SLOW-NEXT:    vpsubq {{.*}}(%rip), %ymm0, %ymm0
 ; AVX2-SLOW-NEXT:    vpshufd {{.*#+}} ymm0 = ymm0[0,2,2,3,4,6,6,7]
 ; AVX2-SLOW-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3]
 ; AVX2-SLOW-NEXT:    vpshufd {{.*#+}} ymm1 = ymm1[0,2,2,3,4,6,6,7]
@@ -1422,26 +1447,28 @@ define <8 x i16> @trunc_sub_const_v8i64_
 ; AVX2-SLOW-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm0
 ; AVX2-SLOW-NEXT:    vpshufb {{.*#+}} ymm0 = ymm0[0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15,16,17,20,21,24,25,28,29,24,25,28,29,28,29,30,31]
 ; AVX2-SLOW-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3]
-; AVX2-SLOW-NEXT:    vpsubw {{.*}}(%rip), %xmm0, %xmm0
+; AVX2-SLOW-NEXT:    # kill: def $xmm0 killed $xmm0 killed $ymm0
 ; AVX2-SLOW-NEXT:    vzeroupper
 ; AVX2-SLOW-NEXT:    retq
 ;
 ; AVX2-FAST-LABEL: trunc_sub_const_v8i64_v8i16:
 ; AVX2-FAST:       # %bb.0:
+; AVX2-FAST-NEXT:    vpsubq {{.*}}(%rip), %ymm1, %ymm1
+; AVX2-FAST-NEXT:    vpsubq {{.*}}(%rip), %ymm0, %ymm0
 ; AVX2-FAST-NEXT:    vmovdqa {{.*#+}} ymm2 = [0,2,4,6,4,6,6,7]
 ; AVX2-FAST-NEXT:    vpermd %ymm0, %ymm2, %ymm0
 ; AVX2-FAST-NEXT:    vpermd %ymm1, %ymm2, %ymm1
 ; AVX2-FAST-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm0
 ; AVX2-FAST-NEXT:    vpshufb {{.*#+}} ymm0 = ymm0[0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15,16,17,20,21,24,25,28,29,24,25,28,29,28,29,30,31]
 ; AVX2-FAST-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3]
-; AVX2-FAST-NEXT:    vpsubw {{.*}}(%rip), %xmm0, %xmm0
+; AVX2-FAST-NEXT:    # kill: def $xmm0 killed $xmm0 killed $ymm0
 ; AVX2-FAST-NEXT:    vzeroupper
 ; AVX2-FAST-NEXT:    retq
 ;
 ; AVX512-LABEL: trunc_sub_const_v8i64_v8i16:
 ; AVX512:       # %bb.0:
+; AVX512-NEXT:    vpsubq {{.*}}(%rip), %zmm0, %zmm0
 ; AVX512-NEXT:    vpmovqw %zmm0, %xmm0
-; AVX512-NEXT:    vpsubw {{.*}}(%rip), %xmm0, %xmm0
 ; AVX512-NEXT:    vzeroupper
 ; AVX512-NEXT:    retq
   %1 = sub <8 x i64> %a0, <i64 0, i64 1, i64 2, i64 3, i64 4, i64 5, i64 6, i64 7>
@@ -1452,38 +1479,41 @@ define <8 x i16> @trunc_sub_const_v8i64_
 define <8 x i16> @trunc_sub_const_v8i32_v8i16(<8 x i32> %a0) nounwind {
 ; SSE-LABEL: trunc_sub_const_v8i32_v8i16:
 ; SSE:       # %bb.0:
+; SSE-NEXT:    psubd {{.*}}(%rip), %xmm0
+; SSE-NEXT:    psubd {{.*}}(%rip), %xmm1
 ; SSE-NEXT:    pslld $16, %xmm1
 ; SSE-NEXT:    psrad $16, %xmm1
 ; SSE-NEXT:    pslld $16, %xmm0
 ; SSE-NEXT:    psrad $16, %xmm0
 ; SSE-NEXT:    packssdw %xmm1, %xmm0
-; SSE-NEXT:    psubw {{.*}}(%rip), %xmm0
 ; SSE-NEXT:    retq
 ;
 ; AVX1-LABEL: trunc_sub_const_v8i32_v8i16:
 ; AVX1:       # %bb.0:
-; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
+; AVX1-NEXT:    vpsubd {{.*}}(%rip), %xmm0, %xmm1
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
+; AVX1-NEXT:    vpsubd {{.*}}(%rip), %xmm0, %xmm0
 ; AVX1-NEXT:    vmovdqa {{.*#+}} xmm2 = [0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15]
-; AVX1-NEXT:    vpshufb %xmm2, %xmm1, %xmm1
 ; AVX1-NEXT:    vpshufb %xmm2, %xmm0, %xmm0
-; AVX1-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
-; AVX1-NEXT:    vpsubw {{.*}}(%rip), %xmm0, %xmm0
+; AVX1-NEXT:    vpshufb %xmm2, %xmm1, %xmm1
+; AVX1-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
 ; AVX1-NEXT:    vzeroupper
 ; AVX1-NEXT:    retq
 ;
 ; AVX2-LABEL: trunc_sub_const_v8i32_v8i16:
 ; AVX2:       # %bb.0:
+; AVX2-NEXT:    vpsubd {{.*}}(%rip), %ymm0, %ymm0
 ; AVX2-NEXT:    vpshufb {{.*#+}} ymm0 = ymm0[0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15,16,17,20,21,24,25,28,29,24,25,28,29,28,29,30,31]
 ; AVX2-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3]
-; AVX2-NEXT:    vpsubw {{.*}}(%rip), %xmm0, %xmm0
+; AVX2-NEXT:    # kill: def $xmm0 killed $xmm0 killed $ymm0
 ; AVX2-NEXT:    vzeroupper
 ; AVX2-NEXT:    retq
 ;
 ; AVX512-LABEL: trunc_sub_const_v8i32_v8i16:
 ; AVX512:       # %bb.0:
-; AVX512-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
+; AVX512-NEXT:    vpsubd {{.*}}(%rip), %ymm0, %ymm0
 ; AVX512-NEXT:    vpmovdw %zmm0, %ymm0
-; AVX512-NEXT:    vpsubw {{.*}}(%rip), %xmm0, %xmm0
+; AVX512-NEXT:    # kill: def $xmm0 killed $xmm0 killed $ymm0
 ; AVX512-NEXT:    vzeroupper
 ; AVX512-NEXT:    retq
   %1 = sub <8 x i32> %a0, <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
@@ -1494,6 +1524,17 @@ define <8 x i16> @trunc_sub_const_v8i32_
 define <16 x i8> @trunc_sub_const_v16i64_v16i8(<16 x i64> %a0) nounwind {
 ; SSE-LABEL: trunc_sub_const_v16i64_v16i8:
 ; SSE:       # %bb.0:
+; SSE-NEXT:    movl $1, %eax
+; SSE-NEXT:    movq %rax, %xmm8
+; SSE-NEXT:    pslldq {{.*#+}} xmm8 = zero,zero,zero,zero,zero,zero,zero,zero,xmm8[0,1,2,3,4,5,6,7]
+; SSE-NEXT:    psubq %xmm8, %xmm0
+; SSE-NEXT:    psubq {{.*}}(%rip), %xmm1
+; SSE-NEXT:    psubq {{.*}}(%rip), %xmm2
+; SSE-NEXT:    psubq {{.*}}(%rip), %xmm3
+; SSE-NEXT:    psubq {{.*}}(%rip), %xmm4
+; SSE-NEXT:    psubq {{.*}}(%rip), %xmm5
+; SSE-NEXT:    psubq {{.*}}(%rip), %xmm6
+; SSE-NEXT:    psubq {{.*}}(%rip), %xmm7
 ; SSE-NEXT:    movdqa {{.*#+}} xmm8 = [255,0,0,0,0,0,0,0,255,0,0,0,0,0,0,0]
 ; SSE-NEXT:    pand %xmm8, %xmm7
 ; SSE-NEXT:    pand %xmm8, %xmm6
@@ -1510,38 +1551,51 @@ define <16 x i8> @trunc_sub_const_v16i64
 ; SSE-NEXT:    packuswb %xmm1, %xmm0
 ; SSE-NEXT:    packuswb %xmm2, %xmm0
 ; SSE-NEXT:    packuswb %xmm4, %xmm0
-; SSE-NEXT:    psubb {{.*}}(%rip), %xmm0
 ; SSE-NEXT:    retq
 ;
 ; AVX1-LABEL: trunc_sub_const_v16i64_v16i8:
 ; AVX1:       # %bb.0:
-; AVX1-NEXT:    vextractf128 $1, %ymm3, %xmm4
-; AVX1-NEXT:    vmovddup {{.*#+}} xmm5 = [1.2598673968951787E-321,1.2598673968951787E-321]
-; AVX1-NEXT:    # xmm5 = mem[0,0]
-; AVX1-NEXT:    vandpd %xmm5, %xmm4, %xmm4
-; AVX1-NEXT:    vandpd %xmm5, %xmm3, %xmm3
-; AVX1-NEXT:    vpackusdw %xmm4, %xmm3, %xmm3
-; AVX1-NEXT:    vextractf128 $1, %ymm2, %xmm4
-; AVX1-NEXT:    vandpd %xmm5, %xmm4, %xmm4
-; AVX1-NEXT:    vandpd %xmm5, %xmm2, %xmm2
-; AVX1-NEXT:    vpackusdw %xmm4, %xmm2, %xmm2
+; AVX1-NEXT:    movl $1, %eax
+; AVX1-NEXT:    vmovq %rax, %xmm4
+; AVX1-NEXT:    vpslldq {{.*#+}} xmm4 = zero,zero,zero,zero,zero,zero,zero,zero,xmm4[0,1,2,3,4,5,6,7]
+; AVX1-NEXT:    vpsubq %xmm4, %xmm0, %xmm8
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
+; AVX1-NEXT:    vpsubq {{.*}}(%rip), %xmm0, %xmm0
+; AVX1-NEXT:    vpsubq {{.*}}(%rip), %xmm1, %xmm5
+; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm1
+; AVX1-NEXT:    vpsubq {{.*}}(%rip), %xmm1, %xmm1
+; AVX1-NEXT:    vpsubq {{.*}}(%rip), %xmm2, %xmm6
+; AVX1-NEXT:    vextractf128 $1, %ymm2, %xmm2
+; AVX1-NEXT:    vpsubq {{.*}}(%rip), %xmm2, %xmm2
+; AVX1-NEXT:    vpsubq {{.*}}(%rip), %xmm3, %xmm7
+; AVX1-NEXT:    vextractf128 $1, %ymm3, %xmm3
+; AVX1-NEXT:    vpsubq {{.*}}(%rip), %xmm3, %xmm3
+; AVX1-NEXT:    vmovddup {{.*#+}} xmm4 = [1.2598673968951787E-321,1.2598673968951787E-321]
+; AVX1-NEXT:    # xmm4 = mem[0,0]
+; AVX1-NEXT:    vpand %xmm4, %xmm3, %xmm3
+; AVX1-NEXT:    vpand %xmm4, %xmm7, %xmm7
+; AVX1-NEXT:    vpackusdw %xmm3, %xmm7, %xmm3
+; AVX1-NEXT:    vpand %xmm4, %xmm2, %xmm2
+; AVX1-NEXT:    vpand %xmm4, %xmm6, %xmm6
+; AVX1-NEXT:    vpackusdw %xmm2, %xmm6, %xmm2
 ; AVX1-NEXT:    vpackusdw %xmm3, %xmm2, %xmm2
-; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm3
-; AVX1-NEXT:    vandpd %xmm5, %xmm3, %xmm3
-; AVX1-NEXT:    vandpd %xmm5, %xmm1, %xmm1
-; AVX1-NEXT:    vpackusdw %xmm3, %xmm1, %xmm1
-; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
-; AVX1-NEXT:    vandpd %xmm5, %xmm3, %xmm3
-; AVX1-NEXT:    vandpd %xmm5, %xmm0, %xmm0
-; AVX1-NEXT:    vpackusdw %xmm3, %xmm0, %xmm0
+; AVX1-NEXT:    vpand %xmm4, %xmm1, %xmm1
+; AVX1-NEXT:    vpand %xmm4, %xmm5, %xmm3
+; AVX1-NEXT:    vpackusdw %xmm1, %xmm3, %xmm1
+; AVX1-NEXT:    vpand %xmm4, %xmm0, %xmm0
+; AVX1-NEXT:    vpand %xmm4, %xmm8, %xmm3
+; AVX1-NEXT:    vpackusdw %xmm0, %xmm3, %xmm0
 ; AVX1-NEXT:    vpackusdw %xmm1, %xmm0, %xmm0
 ; AVX1-NEXT:    vpackuswb %xmm2, %xmm0, %xmm0
-; AVX1-NEXT:    vpsubb {{.*}}(%rip), %xmm0, %xmm0
 ; AVX1-NEXT:    vzeroupper
 ; AVX1-NEXT:    retq
 ;
 ; AVX2-SLOW-LABEL: trunc_sub_const_v16i64_v16i8:
 ; AVX2-SLOW:       # %bb.0:
+; AVX2-SLOW-NEXT:    vpsubq {{.*}}(%rip), %ymm1, %ymm1
+; AVX2-SLOW-NEXT:    vpsubq {{.*}}(%rip), %ymm0, %ymm0
+; AVX2-SLOW-NEXT:    vpsubq {{.*}}(%rip), %ymm3, %ymm3
+; AVX2-SLOW-NEXT:    vpsubq {{.*}}(%rip), %ymm2, %ymm2
 ; AVX2-SLOW-NEXT:    vpshufd {{.*#+}} ymm2 = ymm2[0,2,2,3,4,6,6,7]
 ; AVX2-SLOW-NEXT:    vpermq {{.*#+}} ymm2 = ymm2[0,2,2,3]
 ; AVX2-SLOW-NEXT:    vpshufd {{.*#+}} ymm3 = ymm3[0,2,2,3,4,6,6,7]
@@ -1561,12 +1615,15 @@ define <16 x i8> @trunc_sub_const_v16i64
 ; AVX2-SLOW-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3]
 ; AVX2-SLOW-NEXT:    vpand %xmm4, %xmm0, %xmm0
 ; AVX2-SLOW-NEXT:    vpackuswb %xmm2, %xmm0, %xmm0
-; AVX2-SLOW-NEXT:    vpsubb {{.*}}(%rip), %xmm0, %xmm0
 ; AVX2-SLOW-NEXT:    vzeroupper
 ; AVX2-SLOW-NEXT:    retq
 ;
 ; AVX2-FAST-LABEL: trunc_sub_const_v16i64_v16i8:
 ; AVX2-FAST:       # %bb.0:
+; AVX2-FAST-NEXT:    vpsubq {{.*}}(%rip), %ymm1, %ymm1
+; AVX2-FAST-NEXT:    vpsubq {{.*}}(%rip), %ymm0, %ymm0
+; AVX2-FAST-NEXT:    vpsubq {{.*}}(%rip), %ymm3, %ymm3
+; AVX2-FAST-NEXT:    vpsubq {{.*}}(%rip), %ymm2, %ymm2
 ; AVX2-FAST-NEXT:    vmovdqa {{.*#+}} ymm4 = [0,2,4,6,4,6,6,7]
 ; AVX2-FAST-NEXT:    vpermd %ymm2, %ymm4, %ymm2
 ; AVX2-FAST-NEXT:    vpermd %ymm3, %ymm4, %ymm3
@@ -1583,17 +1640,17 @@ define <16 x i8> @trunc_sub_const_v16i64
 ; AVX2-FAST-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3]
 ; AVX2-FAST-NEXT:    vpand %xmm5, %xmm0, %xmm0
 ; AVX2-FAST-NEXT:    vpackuswb %xmm2, %xmm0, %xmm0
-; AVX2-FAST-NEXT:    vpsubb {{.*}}(%rip), %xmm0, %xmm0
 ; AVX2-FAST-NEXT:    vzeroupper
 ; AVX2-FAST-NEXT:    retq
 ;
 ; AVX512-LABEL: trunc_sub_const_v16i64_v16i8:
 ; AVX512:       # %bb.0:
+; AVX512-NEXT:    vpsubq {{.*}}(%rip), %zmm1, %zmm1
+; AVX512-NEXT:    vpsubq {{.*}}(%rip), %zmm0, %zmm0
 ; AVX512-NEXT:    vpmovqd %zmm0, %ymm0
 ; AVX512-NEXT:    vpmovqd %zmm1, %ymm1
 ; AVX512-NEXT:    vinserti64x4 $1, %ymm1, %zmm0, %zmm0
 ; AVX512-NEXT:    vpmovdb %zmm0, %xmm0
-; AVX512-NEXT:    vpsubb {{.*}}(%rip), %xmm0, %xmm0
 ; AVX512-NEXT:    vzeroupper
 ; AVX512-NEXT:    retq
   %1 = sub <16 x i64> %a0, <i64 0, i64 1, i64 2, i64 3, i64 4, i64 5, i64 6, i64 7, i64 8, i64 9, i64 10, i64 11, i64 12, i64 13, i64 14, i64 15>
@@ -1604,6 +1661,10 @@ define <16 x i8> @trunc_sub_const_v16i64
 define <16 x i8> @trunc_sub_const_v16i32_v16i8(<16 x i32> %a0) nounwind {
 ; SSE-LABEL: trunc_sub_const_v16i32_v16i8:
 ; SSE:       # %bb.0:
+; SSE-NEXT:    psubd {{.*}}(%rip), %xmm0
+; SSE-NEXT:    psubd {{.*}}(%rip), %xmm1
+; SSE-NEXT:    psubd {{.*}}(%rip), %xmm2
+; SSE-NEXT:    psubd {{.*}}(%rip), %xmm3
 ; SSE-NEXT:    movdqa {{.*#+}} xmm4 = [255,0,0,0,255,0,0,0,255,0,0,0,255,0,0,0]
 ; SSE-NEXT:    pand %xmm4, %xmm3
 ; SSE-NEXT:    pand %xmm4, %xmm2
@@ -1612,27 +1673,31 @@ define <16 x i8> @trunc_sub_const_v16i32
 ; SSE-NEXT:    pand %xmm4, %xmm0
 ; SSE-NEXT:    packuswb %xmm1, %xmm0
 ; SSE-NEXT:    packuswb %xmm2, %xmm0
-; SSE-NEXT:    psubb {{.*}}(%rip), %xmm0
 ; SSE-NEXT:    retq
 ;
 ; AVX1-LABEL: trunc_sub_const_v16i32_v16i8:
 ; AVX1:       # %bb.0:
-; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
-; AVX1-NEXT:    vbroadcastss {{.*#+}} xmm3 = [3.57331108E-43,3.57331108E-43,3.57331108E-43,3.57331108E-43]
-; AVX1-NEXT:    vandps %xmm3, %xmm2, %xmm2
-; AVX1-NEXT:    vandps %xmm3, %xmm1, %xmm1
-; AVX1-NEXT:    vpackusdw %xmm2, %xmm1, %xmm1
-; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
-; AVX1-NEXT:    vandps %xmm3, %xmm2, %xmm2
-; AVX1-NEXT:    vandps %xmm3, %xmm0, %xmm0
-; AVX1-NEXT:    vpackusdw %xmm2, %xmm0, %xmm0
+; AVX1-NEXT:    vpsubd {{.*}}(%rip), %xmm0, %xmm2
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
+; AVX1-NEXT:    vpsubd {{.*}}(%rip), %xmm0, %xmm0
+; AVX1-NEXT:    vpsubd {{.*}}(%rip), %xmm1, %xmm3
+; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm1
+; AVX1-NEXT:    vpsubd {{.*}}(%rip), %xmm1, %xmm1
+; AVX1-NEXT:    vbroadcastss {{.*#+}} xmm4 = [3.57331108E-43,3.57331108E-43,3.57331108E-43,3.57331108E-43]
+; AVX1-NEXT:    vpand %xmm4, %xmm1, %xmm1
+; AVX1-NEXT:    vpand %xmm4, %xmm3, %xmm3
+; AVX1-NEXT:    vpackusdw %xmm1, %xmm3, %xmm1
+; AVX1-NEXT:    vpand %xmm4, %xmm0, %xmm0
+; AVX1-NEXT:    vpand %xmm4, %xmm2, %xmm2
+; AVX1-NEXT:    vpackusdw %xmm0, %xmm2, %xmm0
 ; AVX1-NEXT:    vpackuswb %xmm1, %xmm0, %xmm0
-; AVX1-NEXT:    vpsubb {{.*}}(%rip), %xmm0, %xmm0
 ; AVX1-NEXT:    vzeroupper
 ; AVX1-NEXT:    retq
 ;
 ; AVX2-LABEL: trunc_sub_const_v16i32_v16i8:
 ; AVX2:       # %bb.0:
+; AVX2-NEXT:    vpsubd {{.*}}(%rip), %ymm0, %ymm0
+; AVX2-NEXT:    vpsubd {{.*}}(%rip), %ymm1, %ymm1
 ; AVX2-NEXT:    vmovdqa {{.*#+}} ymm2 = [0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15,16,17,20,21,24,25,28,29,24,25,28,29,28,29,30,31]
 ; AVX2-NEXT:    vpshufb %ymm2, %ymm1, %ymm1
 ; AVX2-NEXT:    vpermq {{.*#+}} ymm1 = ymm1[0,2,2,3]
@@ -1642,14 +1707,13 @@ define <16 x i8> @trunc_sub_const_v16i32
 ; AVX2-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3]
 ; AVX2-NEXT:    vpand %xmm3, %xmm0, %xmm0
 ; AVX2-NEXT:    vpackuswb %xmm1, %xmm0, %xmm0
-; AVX2-NEXT:    vpsubb {{.*}}(%rip), %xmm0, %xmm0
 ; AVX2-NEXT:    vzeroupper
 ; AVX2-NEXT:    retq
 ;
 ; AVX512-LABEL: trunc_sub_const_v16i32_v16i8:
 ; AVX512:       # %bb.0:
+; AVX512-NEXT:    vpsubd {{.*}}(%rip), %zmm0, %zmm0
 ; AVX512-NEXT:    vpmovdb %zmm0, %xmm0
-; AVX512-NEXT:    vpsubb {{.*}}(%rip), %xmm0, %xmm0
 ; AVX512-NEXT:    vzeroupper
 ; AVX512-NEXT:    retq
   %1 = sub <16 x i32> %a0, <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
@@ -1660,52 +1724,56 @@ define <16 x i8> @trunc_sub_const_v16i32
 define <16 x i8> @trunc_sub_const_v16i16_v16i8(<16 x i16> %a0) nounwind {
 ; SSE-LABEL: trunc_sub_const_v16i16_v16i8:
 ; SSE:       # %bb.0:
+; SSE-NEXT:    psubw {{.*}}(%rip), %xmm0
+; SSE-NEXT:    psubw {{.*}}(%rip), %xmm1
 ; SSE-NEXT:    movdqa {{.*#+}} xmm2 = [255,255,255,255,255,255,255,255]
 ; SSE-NEXT:    pand %xmm2, %xmm1
 ; SSE-NEXT:    pand %xmm2, %xmm0
 ; SSE-NEXT:    packuswb %xmm1, %xmm0
-; SSE-NEXT:    psubb {{.*}}(%rip), %xmm0
 ; SSE-NEXT:    retq
 ;
 ; AVX1-LABEL: trunc_sub_const_v16i16_v16i8:
 ; AVX1:       # %bb.0:
-; AVX1-NEXT:    vandps {{.*}}(%rip), %ymm0, %ymm0
-; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
-; AVX1-NEXT:    vpackuswb %xmm1, %xmm0, %xmm0
-; AVX1-NEXT:    vpsubb {{.*}}(%rip), %xmm0, %xmm0
+; AVX1-NEXT:    vpsubw {{.*}}(%rip), %xmm0, %xmm1
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
+; AVX1-NEXT:    vpsubw {{.*}}(%rip), %xmm0, %xmm0
+; AVX1-NEXT:    vmovdqa {{.*#+}} xmm2 = [255,255,255,255,255,255,255,255]
+; AVX1-NEXT:    vpand %xmm2, %xmm0, %xmm0
+; AVX1-NEXT:    vpand %xmm2, %xmm1, %xmm1
+; AVX1-NEXT:    vpackuswb %xmm0, %xmm1, %xmm0
 ; AVX1-NEXT:    vzeroupper
 ; AVX1-NEXT:    retq
 ;
 ; AVX2-LABEL: trunc_sub_const_v16i16_v16i8:
 ; AVX2:       # %bb.0:
+; AVX2-NEXT:    vpsubw {{.*}}(%rip), %ymm0, %ymm0
 ; AVX2-NEXT:    vpand {{.*}}(%rip), %ymm0, %ymm0
 ; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm1
 ; AVX2-NEXT:    vpackuswb %xmm1, %xmm0, %xmm0
-; AVX2-NEXT:    vpsubb {{.*}}(%rip), %xmm0, %xmm0
 ; AVX2-NEXT:    vzeroupper
 ; AVX2-NEXT:    retq
 ;
 ; AVX512F-LABEL: trunc_sub_const_v16i16_v16i8:
 ; AVX512F:       # %bb.0:
+; AVX512F-NEXT:    vpsubw {{.*}}(%rip), %ymm0, %ymm0
 ; AVX512F-NEXT:    vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero
 ; AVX512F-NEXT:    vpmovdb %zmm0, %xmm0
-; AVX512F-NEXT:    vpsubb {{.*}}(%rip), %xmm0, %xmm0
 ; AVX512F-NEXT:    vzeroupper
 ; AVX512F-NEXT:    retq
 ;
 ; AVX512BW-LABEL: trunc_sub_const_v16i16_v16i8:
 ; AVX512BW:       # %bb.0:
-; AVX512BW-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
+; AVX512BW-NEXT:    vpsubw {{.*}}(%rip), %ymm0, %ymm0
 ; AVX512BW-NEXT:    vpmovwb %zmm0, %ymm0
-; AVX512BW-NEXT:    vpsubb {{.*}}(%rip), %xmm0, %xmm0
+; AVX512BW-NEXT:    # kill: def $xmm0 killed $xmm0 killed $ymm0
 ; AVX512BW-NEXT:    vzeroupper
 ; AVX512BW-NEXT:    retq
 ;
 ; AVX512DQ-LABEL: trunc_sub_const_v16i16_v16i8:
 ; AVX512DQ:       # %bb.0:
+; AVX512DQ-NEXT:    vpsubw {{.*}}(%rip), %ymm0, %ymm0
 ; AVX512DQ-NEXT:    vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero
 ; AVX512DQ-NEXT:    vpmovdb %zmm0, %xmm0
-; AVX512DQ-NEXT:    vpsubb {{.*}}(%rip), %xmm0, %xmm0
 ; AVX512DQ-NEXT:    vzeroupper
 ; AVX512DQ-NEXT:    retq
   %1 = sub <16 x i16> %a0, <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15>

Modified: llvm/trunk/test/CodeGen/X86/vector-trunc-math.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vector-trunc-math.ll?rev=348604&r1=348603&r2=348604&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vector-trunc-math.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vector-trunc-math.ll Fri Dec  7 07:47:52 2018
@@ -1340,39 +1340,49 @@ define <16 x i8> @trunc_ext_sub_v16i16_v
 define <4 x i32> @trunc_sub_const_v4i64_v4i32(<4 x i64> %a0) nounwind {
 ; SSE-LABEL: trunc_sub_const_v4i64_v4i32:
 ; SSE:       # %bb.0:
+; SSE-NEXT:    movl $1, %eax
+; SSE-NEXT:    movq %rax, %xmm2
+; SSE-NEXT:    pslldq {{.*#+}} xmm2 = zero,zero,zero,zero,zero,zero,zero,zero,xmm2[0,1,2,3,4,5,6,7]
+; SSE-NEXT:    psubq %xmm2, %xmm0
+; SSE-NEXT:    psubq {{.*}}(%rip), %xmm1
 ; SSE-NEXT:    shufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[0,2]
-; SSE-NEXT:    psubd {{.*}}(%rip), %xmm0
 ; SSE-NEXT:    retq
 ;
 ; AVX1-LABEL: trunc_sub_const_v4i64_v4i32:
 ; AVX1:       # %bb.0:
-; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
-; AVX1-NEXT:    vshufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[0,2]
-; AVX1-NEXT:    vpsubd {{.*}}(%rip), %xmm0, %xmm0
+; AVX1-NEXT:    movl $1, %eax
+; AVX1-NEXT:    vmovq %rax, %xmm1
+; AVX1-NEXT:    vpslldq {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,zero,zero,xmm1[0,1,2,3,4,5,6,7]
+; AVX1-NEXT:    vpsubq %xmm1, %xmm0, %xmm1
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
+; AVX1-NEXT:    vpsubq {{.*}}(%rip), %xmm0, %xmm0
+; AVX1-NEXT:    vshufps {{.*#+}} xmm0 = xmm1[0,2],xmm0[0,2]
 ; AVX1-NEXT:    vzeroupper
 ; AVX1-NEXT:    retq
 ;
 ; AVX2-SLOW-LABEL: trunc_sub_const_v4i64_v4i32:
 ; AVX2-SLOW:       # %bb.0:
+; AVX2-SLOW-NEXT:    vpsubq {{.*}}(%rip), %ymm0, %ymm0
 ; AVX2-SLOW-NEXT:    vpshufd {{.*#+}} ymm0 = ymm0[0,2,2,3,4,6,6,7]
 ; AVX2-SLOW-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3]
-; AVX2-SLOW-NEXT:    vpsubd {{.*}}(%rip), %xmm0, %xmm0
+; AVX2-SLOW-NEXT:    # kill: def $xmm0 killed $xmm0 killed $ymm0
 ; AVX2-SLOW-NEXT:    vzeroupper
 ; AVX2-SLOW-NEXT:    retq
 ;
 ; AVX2-FAST-LABEL: trunc_sub_const_v4i64_v4i32:
 ; AVX2-FAST:       # %bb.0:
+; AVX2-FAST-NEXT:    vpsubq {{.*}}(%rip), %ymm0, %ymm0
 ; AVX2-FAST-NEXT:    vmovdqa {{.*#+}} ymm1 = [0,2,4,6,4,6,6,7]
 ; AVX2-FAST-NEXT:    vpermd %ymm0, %ymm1, %ymm0
-; AVX2-FAST-NEXT:    vpsubd {{.*}}(%rip), %xmm0, %xmm0
+; AVX2-FAST-NEXT:    # kill: def $xmm0 killed $xmm0 killed $ymm0
 ; AVX2-FAST-NEXT:    vzeroupper
 ; AVX2-FAST-NEXT:    retq
 ;
 ; AVX512-LABEL: trunc_sub_const_v4i64_v4i32:
 ; AVX512:       # %bb.0:
-; AVX512-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
+; AVX512-NEXT:    vpsubq {{.*}}(%rip), %ymm0, %ymm0
 ; AVX512-NEXT:    vpmovqd %zmm0, %ymm0
-; AVX512-NEXT:    vpsubd {{.*}}(%rip), %xmm0, %xmm0
+; AVX512-NEXT:    # kill: def $xmm0 killed $xmm0 killed $ymm0
 ; AVX512-NEXT:    vzeroupper
 ; AVX512-NEXT:    retq
   %1 = sub <4 x i64> %a0, <i64 0, i64 1, i64 2, i64 3>
@@ -1383,38 +1393,53 @@ define <4 x i32> @trunc_sub_const_v4i64_
 define <8 x i16> @trunc_sub_const_v8i64_v8i16(<8 x i64> %a0) nounwind {
 ; SSE-LABEL: trunc_sub_const_v8i64_v8i16:
 ; SSE:       # %bb.0:
+; SSE-NEXT:    movl $1, %eax
+; SSE-NEXT:    movq %rax, %xmm4
+; SSE-NEXT:    pslldq {{.*#+}} xmm4 = zero,zero,zero,zero,zero,zero,zero,zero,xmm4[0,1,2,3,4,5,6,7]
+; SSE-NEXT:    psubq %xmm4, %xmm0
+; SSE-NEXT:    psubq {{.*}}(%rip), %xmm1
+; SSE-NEXT:    psubq {{.*}}(%rip), %xmm2
+; SSE-NEXT:    psubq {{.*}}(%rip), %xmm3
+; SSE-NEXT:    pshufd {{.*#+}} xmm3 = xmm3[0,2,2,3]
+; SSE-NEXT:    pshuflw {{.*#+}} xmm3 = xmm3[0,1,0,2,4,5,6,7]
+; SSE-NEXT:    pshufd {{.*#+}} xmm2 = xmm2[0,2,2,3]
+; SSE-NEXT:    pshuflw {{.*#+}} xmm2 = xmm2[0,1,0,2,4,5,6,7]
+; SSE-NEXT:    punpckldq {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1]
+; SSE-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
+; SSE-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7]
 ; SSE-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
 ; SSE-NEXT:    pshuflw {{.*#+}} xmm1 = xmm1[0,2,2,3,4,5,6,7]
-; SSE-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
-; SSE-NEXT:    pshuflw {{.*#+}} xmm4 = xmm0[0,2,2,3,4,5,6,7]
-; SSE-NEXT:    punpckldq {{.*#+}} xmm4 = xmm4[0],xmm1[0],xmm4[1],xmm1[1]
-; SSE-NEXT:    pshufd {{.*#+}} xmm0 = xmm3[0,2,2,3]
-; SSE-NEXT:    pshuflw {{.*#+}} xmm1 = xmm0[0,1,0,2,4,5,6,7]
-; SSE-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[0,2,2,3]
-; SSE-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[0,1,0,2,4,5,6,7]
 ; SSE-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
-; SSE-NEXT:    movsd {{.*#+}} xmm0 = xmm4[0],xmm0[1]
-; SSE-NEXT:    psubw {{.*}}(%rip), %xmm0
+; SSE-NEXT:    movsd {{.*#+}} xmm2 = xmm0[0],xmm2[1]
+; SSE-NEXT:    movapd %xmm2, %xmm0
 ; SSE-NEXT:    retq
 ;
 ; AVX1-LABEL: trunc_sub_const_v8i64_v8i16:
 ; AVX1:       # %bb.0:
-; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
-; AVX1-NEXT:    vpxor %xmm3, %xmm3, %xmm3
-; AVX1-NEXT:    vpblendw {{.*#+}} xmm2 = xmm2[0],xmm3[1,2,3],xmm2[4],xmm3[5,6,7]
-; AVX1-NEXT:    vpblendw {{.*#+}} xmm1 = xmm1[0],xmm3[1,2,3],xmm1[4],xmm3[5,6,7]
-; AVX1-NEXT:    vpackusdw %xmm2, %xmm1, %xmm1
-; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
-; AVX1-NEXT:    vpblendw {{.*#+}} xmm2 = xmm2[0],xmm3[1,2,3],xmm2[4],xmm3[5,6,7]
-; AVX1-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0],xmm3[1,2,3],xmm0[4],xmm3[5,6,7]
-; AVX1-NEXT:    vpackusdw %xmm2, %xmm0, %xmm0
+; AVX1-NEXT:    movl $1, %eax
+; AVX1-NEXT:    vmovq %rax, %xmm2
+; AVX1-NEXT:    vpslldq {{.*#+}} xmm2 = zero,zero,zero,zero,zero,zero,zero,zero,xmm2[0,1,2,3,4,5,6,7]
+; AVX1-NEXT:    vpsubq %xmm2, %xmm0, %xmm2
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
+; AVX1-NEXT:    vpsubq {{.*}}(%rip), %xmm0, %xmm0
+; AVX1-NEXT:    vpsubq {{.*}}(%rip), %xmm1, %xmm3
+; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm1
+; AVX1-NEXT:    vpsubq {{.*}}(%rip), %xmm1, %xmm1
+; AVX1-NEXT:    vpxor %xmm4, %xmm4, %xmm4
+; AVX1-NEXT:    vpblendw {{.*#+}} xmm1 = xmm1[0],xmm4[1,2,3],xmm1[4],xmm4[5,6,7]
+; AVX1-NEXT:    vpblendw {{.*#+}} xmm3 = xmm3[0],xmm4[1,2,3],xmm3[4],xmm4[5,6,7]
+; AVX1-NEXT:    vpackusdw %xmm1, %xmm3, %xmm1
+; AVX1-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0],xmm4[1,2,3],xmm0[4],xmm4[5,6,7]
+; AVX1-NEXT:    vpblendw {{.*#+}} xmm2 = xmm2[0],xmm4[1,2,3],xmm2[4],xmm4[5,6,7]
+; AVX1-NEXT:    vpackusdw %xmm0, %xmm2, %xmm0
 ; AVX1-NEXT:    vpackusdw %xmm1, %xmm0, %xmm0
-; AVX1-NEXT:    vpsubw {{.*}}(%rip), %xmm0, %xmm0
 ; AVX1-NEXT:    vzeroupper
 ; AVX1-NEXT:    retq
 ;
 ; AVX2-SLOW-LABEL: trunc_sub_const_v8i64_v8i16:
 ; AVX2-SLOW:       # %bb.0:
+; AVX2-SLOW-NEXT:    vpsubq {{.*}}(%rip), %ymm1, %ymm1
+; AVX2-SLOW-NEXT:    vpsubq {{.*}}(%rip), %ymm0, %ymm0
 ; AVX2-SLOW-NEXT:    vpshufd {{.*#+}} ymm0 = ymm0[0,2,2,3,4,6,6,7]
 ; AVX2-SLOW-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3]
 ; AVX2-SLOW-NEXT:    vpshufd {{.*#+}} ymm1 = ymm1[0,2,2,3,4,6,6,7]
@@ -1422,26 +1447,28 @@ define <8 x i16> @trunc_sub_const_v8i64_
 ; AVX2-SLOW-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm0
 ; AVX2-SLOW-NEXT:    vpshufb {{.*#+}} ymm0 = ymm0[0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15,16,17,20,21,24,25,28,29,24,25,28,29,28,29,30,31]
 ; AVX2-SLOW-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3]
-; AVX2-SLOW-NEXT:    vpsubw {{.*}}(%rip), %xmm0, %xmm0
+; AVX2-SLOW-NEXT:    # kill: def $xmm0 killed $xmm0 killed $ymm0
 ; AVX2-SLOW-NEXT:    vzeroupper
 ; AVX2-SLOW-NEXT:    retq
 ;
 ; AVX2-FAST-LABEL: trunc_sub_const_v8i64_v8i16:
 ; AVX2-FAST:       # %bb.0:
+; AVX2-FAST-NEXT:    vpsubq {{.*}}(%rip), %ymm1, %ymm1
+; AVX2-FAST-NEXT:    vpsubq {{.*}}(%rip), %ymm0, %ymm0
 ; AVX2-FAST-NEXT:    vmovdqa {{.*#+}} ymm2 = [0,2,4,6,4,6,6,7]
 ; AVX2-FAST-NEXT:    vpermd %ymm0, %ymm2, %ymm0
 ; AVX2-FAST-NEXT:    vpermd %ymm1, %ymm2, %ymm1
 ; AVX2-FAST-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm0
 ; AVX2-FAST-NEXT:    vpshufb {{.*#+}} ymm0 = ymm0[0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15,16,17,20,21,24,25,28,29,24,25,28,29,28,29,30,31]
 ; AVX2-FAST-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3]
-; AVX2-FAST-NEXT:    vpsubw {{.*}}(%rip), %xmm0, %xmm0
+; AVX2-FAST-NEXT:    # kill: def $xmm0 killed $xmm0 killed $ymm0
 ; AVX2-FAST-NEXT:    vzeroupper
 ; AVX2-FAST-NEXT:    retq
 ;
 ; AVX512-LABEL: trunc_sub_const_v8i64_v8i16:
 ; AVX512:       # %bb.0:
+; AVX512-NEXT:    vpsubq {{.*}}(%rip), %zmm0, %zmm0
 ; AVX512-NEXT:    vpmovqw %zmm0, %xmm0
-; AVX512-NEXT:    vpsubw {{.*}}(%rip), %xmm0, %xmm0
 ; AVX512-NEXT:    vzeroupper
 ; AVX512-NEXT:    retq
   %1 = sub <8 x i64> %a0, <i64 0, i64 1, i64 2, i64 3, i64 4, i64 5, i64 6, i64 7>
@@ -1452,38 +1479,41 @@ define <8 x i16> @trunc_sub_const_v8i64_
 define <8 x i16> @trunc_sub_const_v8i32_v8i16(<8 x i32> %a0) nounwind {
 ; SSE-LABEL: trunc_sub_const_v8i32_v8i16:
 ; SSE:       # %bb.0:
+; SSE-NEXT:    psubd {{.*}}(%rip), %xmm0
+; SSE-NEXT:    psubd {{.*}}(%rip), %xmm1
 ; SSE-NEXT:    pslld $16, %xmm1
 ; SSE-NEXT:    psrad $16, %xmm1
 ; SSE-NEXT:    pslld $16, %xmm0
 ; SSE-NEXT:    psrad $16, %xmm0
 ; SSE-NEXT:    packssdw %xmm1, %xmm0
-; SSE-NEXT:    psubw {{.*}}(%rip), %xmm0
 ; SSE-NEXT:    retq
 ;
 ; AVX1-LABEL: trunc_sub_const_v8i32_v8i16:
 ; AVX1:       # %bb.0:
-; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
+; AVX1-NEXT:    vpsubd {{.*}}(%rip), %xmm0, %xmm1
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
+; AVX1-NEXT:    vpsubd {{.*}}(%rip), %xmm0, %xmm0
 ; AVX1-NEXT:    vmovdqa {{.*#+}} xmm2 = [0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15]
-; AVX1-NEXT:    vpshufb %xmm2, %xmm1, %xmm1
 ; AVX1-NEXT:    vpshufb %xmm2, %xmm0, %xmm0
-; AVX1-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
-; AVX1-NEXT:    vpsubw {{.*}}(%rip), %xmm0, %xmm0
+; AVX1-NEXT:    vpshufb %xmm2, %xmm1, %xmm1
+; AVX1-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
 ; AVX1-NEXT:    vzeroupper
 ; AVX1-NEXT:    retq
 ;
 ; AVX2-LABEL: trunc_sub_const_v8i32_v8i16:
 ; AVX2:       # %bb.0:
+; AVX2-NEXT:    vpsubd {{.*}}(%rip), %ymm0, %ymm0
 ; AVX2-NEXT:    vpshufb {{.*#+}} ymm0 = ymm0[0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15,16,17,20,21,24,25,28,29,24,25,28,29,28,29,30,31]
 ; AVX2-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3]
-; AVX2-NEXT:    vpsubw {{.*}}(%rip), %xmm0, %xmm0
+; AVX2-NEXT:    # kill: def $xmm0 killed $xmm0 killed $ymm0
 ; AVX2-NEXT:    vzeroupper
 ; AVX2-NEXT:    retq
 ;
 ; AVX512-LABEL: trunc_sub_const_v8i32_v8i16:
 ; AVX512:       # %bb.0:
-; AVX512-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
+; AVX512-NEXT:    vpsubd {{.*}}(%rip), %ymm0, %ymm0
 ; AVX512-NEXT:    vpmovdw %zmm0, %ymm0
-; AVX512-NEXT:    vpsubw {{.*}}(%rip), %xmm0, %xmm0
+; AVX512-NEXT:    # kill: def $xmm0 killed $xmm0 killed $ymm0
 ; AVX512-NEXT:    vzeroupper
 ; AVX512-NEXT:    retq
   %1 = sub <8 x i32> %a0, <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
@@ -1494,6 +1524,17 @@ define <8 x i16> @trunc_sub_const_v8i32_
 define <16 x i8> @trunc_sub_const_v16i64_v16i8(<16 x i64> %a0) nounwind {
 ; SSE-LABEL: trunc_sub_const_v16i64_v16i8:
 ; SSE:       # %bb.0:
+; SSE-NEXT:    movl $1, %eax
+; SSE-NEXT:    movq %rax, %xmm8
+; SSE-NEXT:    pslldq {{.*#+}} xmm8 = zero,zero,zero,zero,zero,zero,zero,zero,xmm8[0,1,2,3,4,5,6,7]
+; SSE-NEXT:    psubq %xmm8, %xmm0
+; SSE-NEXT:    psubq {{.*}}(%rip), %xmm1
+; SSE-NEXT:    psubq {{.*}}(%rip), %xmm2
+; SSE-NEXT:    psubq {{.*}}(%rip), %xmm3
+; SSE-NEXT:    psubq {{.*}}(%rip), %xmm4
+; SSE-NEXT:    psubq {{.*}}(%rip), %xmm5
+; SSE-NEXT:    psubq {{.*}}(%rip), %xmm6
+; SSE-NEXT:    psubq {{.*}}(%rip), %xmm7
 ; SSE-NEXT:    movdqa {{.*#+}} xmm8 = [255,0,0,0,0,0,0,0,255,0,0,0,0,0,0,0]
 ; SSE-NEXT:    pand %xmm8, %xmm7
 ; SSE-NEXT:    pand %xmm8, %xmm6
@@ -1510,38 +1551,51 @@ define <16 x i8> @trunc_sub_const_v16i64
 ; SSE-NEXT:    packuswb %xmm1, %xmm0
 ; SSE-NEXT:    packuswb %xmm2, %xmm0
 ; SSE-NEXT:    packuswb %xmm4, %xmm0
-; SSE-NEXT:    psubb {{.*}}(%rip), %xmm0
 ; SSE-NEXT:    retq
 ;
 ; AVX1-LABEL: trunc_sub_const_v16i64_v16i8:
 ; AVX1:       # %bb.0:
-; AVX1-NEXT:    vextractf128 $1, %ymm3, %xmm4
-; AVX1-NEXT:    vmovddup {{.*#+}} xmm5 = [1.2598673968951787E-321,1.2598673968951787E-321]
-; AVX1-NEXT:    # xmm5 = mem[0,0]
-; AVX1-NEXT:    vandpd %xmm5, %xmm4, %xmm4
-; AVX1-NEXT:    vandpd %xmm5, %xmm3, %xmm3
-; AVX1-NEXT:    vpackusdw %xmm4, %xmm3, %xmm3
-; AVX1-NEXT:    vextractf128 $1, %ymm2, %xmm4
-; AVX1-NEXT:    vandpd %xmm5, %xmm4, %xmm4
-; AVX1-NEXT:    vandpd %xmm5, %xmm2, %xmm2
-; AVX1-NEXT:    vpackusdw %xmm4, %xmm2, %xmm2
+; AVX1-NEXT:    movl $1, %eax
+; AVX1-NEXT:    vmovq %rax, %xmm4
+; AVX1-NEXT:    vpslldq {{.*#+}} xmm4 = zero,zero,zero,zero,zero,zero,zero,zero,xmm4[0,1,2,3,4,5,6,7]
+; AVX1-NEXT:    vpsubq %xmm4, %xmm0, %xmm8
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
+; AVX1-NEXT:    vpsubq {{.*}}(%rip), %xmm0, %xmm0
+; AVX1-NEXT:    vpsubq {{.*}}(%rip), %xmm1, %xmm5
+; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm1
+; AVX1-NEXT:    vpsubq {{.*}}(%rip), %xmm1, %xmm1
+; AVX1-NEXT:    vpsubq {{.*}}(%rip), %xmm2, %xmm6
+; AVX1-NEXT:    vextractf128 $1, %ymm2, %xmm2
+; AVX1-NEXT:    vpsubq {{.*}}(%rip), %xmm2, %xmm2
+; AVX1-NEXT:    vpsubq {{.*}}(%rip), %xmm3, %xmm7
+; AVX1-NEXT:    vextractf128 $1, %ymm3, %xmm3
+; AVX1-NEXT:    vpsubq {{.*}}(%rip), %xmm3, %xmm3
+; AVX1-NEXT:    vmovddup {{.*#+}} xmm4 = [1.2598673968951787E-321,1.2598673968951787E-321]
+; AVX1-NEXT:    # xmm4 = mem[0,0]
+; AVX1-NEXT:    vpand %xmm4, %xmm3, %xmm3
+; AVX1-NEXT:    vpand %xmm4, %xmm7, %xmm7
+; AVX1-NEXT:    vpackusdw %xmm3, %xmm7, %xmm3
+; AVX1-NEXT:    vpand %xmm4, %xmm2, %xmm2
+; AVX1-NEXT:    vpand %xmm4, %xmm6, %xmm6
+; AVX1-NEXT:    vpackusdw %xmm2, %xmm6, %xmm2
 ; AVX1-NEXT:    vpackusdw %xmm3, %xmm2, %xmm2
-; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm3
-; AVX1-NEXT:    vandpd %xmm5, %xmm3, %xmm3
-; AVX1-NEXT:    vandpd %xmm5, %xmm1, %xmm1
-; AVX1-NEXT:    vpackusdw %xmm3, %xmm1, %xmm1
-; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
-; AVX1-NEXT:    vandpd %xmm5, %xmm3, %xmm3
-; AVX1-NEXT:    vandpd %xmm5, %xmm0, %xmm0
-; AVX1-NEXT:    vpackusdw %xmm3, %xmm0, %xmm0
+; AVX1-NEXT:    vpand %xmm4, %xmm1, %xmm1
+; AVX1-NEXT:    vpand %xmm4, %xmm5, %xmm3
+; AVX1-NEXT:    vpackusdw %xmm1, %xmm3, %xmm1
+; AVX1-NEXT:    vpand %xmm4, %xmm0, %xmm0
+; AVX1-NEXT:    vpand %xmm4, %xmm8, %xmm3
+; AVX1-NEXT:    vpackusdw %xmm0, %xmm3, %xmm0
 ; AVX1-NEXT:    vpackusdw %xmm1, %xmm0, %xmm0
 ; AVX1-NEXT:    vpackuswb %xmm2, %xmm0, %xmm0
-; AVX1-NEXT:    vpsubb {{.*}}(%rip), %xmm0, %xmm0
 ; AVX1-NEXT:    vzeroupper
 ; AVX1-NEXT:    retq
 ;
 ; AVX2-SLOW-LABEL: trunc_sub_const_v16i64_v16i8:
 ; AVX2-SLOW:       # %bb.0:
+; AVX2-SLOW-NEXT:    vpsubq {{.*}}(%rip), %ymm1, %ymm1
+; AVX2-SLOW-NEXT:    vpsubq {{.*}}(%rip), %ymm0, %ymm0
+; AVX2-SLOW-NEXT:    vpsubq {{.*}}(%rip), %ymm3, %ymm3
+; AVX2-SLOW-NEXT:    vpsubq {{.*}}(%rip), %ymm2, %ymm2
 ; AVX2-SLOW-NEXT:    vpshufd {{.*#+}} ymm2 = ymm2[0,2,2,3,4,6,6,7]
 ; AVX2-SLOW-NEXT:    vpermq {{.*#+}} ymm2 = ymm2[0,2,2,3]
 ; AVX2-SLOW-NEXT:    vpshufd {{.*#+}} ymm3 = ymm3[0,2,2,3,4,6,6,7]
@@ -1561,12 +1615,15 @@ define <16 x i8> @trunc_sub_const_v16i64
 ; AVX2-SLOW-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3]
 ; AVX2-SLOW-NEXT:    vpand %xmm4, %xmm0, %xmm0
 ; AVX2-SLOW-NEXT:    vpackuswb %xmm2, %xmm0, %xmm0
-; AVX2-SLOW-NEXT:    vpsubb {{.*}}(%rip), %xmm0, %xmm0
 ; AVX2-SLOW-NEXT:    vzeroupper
 ; AVX2-SLOW-NEXT:    retq
 ;
 ; AVX2-FAST-LABEL: trunc_sub_const_v16i64_v16i8:
 ; AVX2-FAST:       # %bb.0:
+; AVX2-FAST-NEXT:    vpsubq {{.*}}(%rip), %ymm1, %ymm1
+; AVX2-FAST-NEXT:    vpsubq {{.*}}(%rip), %ymm0, %ymm0
+; AVX2-FAST-NEXT:    vpsubq {{.*}}(%rip), %ymm3, %ymm3
+; AVX2-FAST-NEXT:    vpsubq {{.*}}(%rip), %ymm2, %ymm2
 ; AVX2-FAST-NEXT:    vmovdqa {{.*#+}} ymm4 = [0,2,4,6,4,6,6,7]
 ; AVX2-FAST-NEXT:    vpermd %ymm2, %ymm4, %ymm2
 ; AVX2-FAST-NEXT:    vpermd %ymm3, %ymm4, %ymm3
@@ -1583,17 +1640,17 @@ define <16 x i8> @trunc_sub_const_v16i64
 ; AVX2-FAST-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3]
 ; AVX2-FAST-NEXT:    vpand %xmm5, %xmm0, %xmm0
 ; AVX2-FAST-NEXT:    vpackuswb %xmm2, %xmm0, %xmm0
-; AVX2-FAST-NEXT:    vpsubb {{.*}}(%rip), %xmm0, %xmm0
 ; AVX2-FAST-NEXT:    vzeroupper
 ; AVX2-FAST-NEXT:    retq
 ;
 ; AVX512-LABEL: trunc_sub_const_v16i64_v16i8:
 ; AVX512:       # %bb.0:
+; AVX512-NEXT:    vpsubq {{.*}}(%rip), %zmm1, %zmm1
+; AVX512-NEXT:    vpsubq {{.*}}(%rip), %zmm0, %zmm0
 ; AVX512-NEXT:    vpmovqd %zmm0, %ymm0
 ; AVX512-NEXT:    vpmovqd %zmm1, %ymm1
 ; AVX512-NEXT:    vinserti64x4 $1, %ymm1, %zmm0, %zmm0
 ; AVX512-NEXT:    vpmovdb %zmm0, %xmm0
-; AVX512-NEXT:    vpsubb {{.*}}(%rip), %xmm0, %xmm0
 ; AVX512-NEXT:    vzeroupper
 ; AVX512-NEXT:    retq
   %1 = sub <16 x i64> %a0, <i64 0, i64 1, i64 2, i64 3, i64 4, i64 5, i64 6, i64 7, i64 8, i64 9, i64 10, i64 11, i64 12, i64 13, i64 14, i64 15>
@@ -1604,6 +1661,10 @@ define <16 x i8> @trunc_sub_const_v16i64
 define <16 x i8> @trunc_sub_const_v16i32_v16i8(<16 x i32> %a0) nounwind {
 ; SSE-LABEL: trunc_sub_const_v16i32_v16i8:
 ; SSE:       # %bb.0:
+; SSE-NEXT:    psubd {{.*}}(%rip), %xmm0
+; SSE-NEXT:    psubd {{.*}}(%rip), %xmm1
+; SSE-NEXT:    psubd {{.*}}(%rip), %xmm2
+; SSE-NEXT:    psubd {{.*}}(%rip), %xmm3
 ; SSE-NEXT:    movdqa {{.*#+}} xmm4 = [255,0,0,0,255,0,0,0,255,0,0,0,255,0,0,0]
 ; SSE-NEXT:    pand %xmm4, %xmm3
 ; SSE-NEXT:    pand %xmm4, %xmm2
@@ -1612,27 +1673,31 @@ define <16 x i8> @trunc_sub_const_v16i32
 ; SSE-NEXT:    pand %xmm4, %xmm0
 ; SSE-NEXT:    packuswb %xmm1, %xmm0
 ; SSE-NEXT:    packuswb %xmm2, %xmm0
-; SSE-NEXT:    psubb {{.*}}(%rip), %xmm0
 ; SSE-NEXT:    retq
 ;
 ; AVX1-LABEL: trunc_sub_const_v16i32_v16i8:
 ; AVX1:       # %bb.0:
-; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
-; AVX1-NEXT:    vbroadcastss {{.*#+}} xmm3 = [3.57331108E-43,3.57331108E-43,3.57331108E-43,3.57331108E-43]
-; AVX1-NEXT:    vandps %xmm3, %xmm2, %xmm2
-; AVX1-NEXT:    vandps %xmm3, %xmm1, %xmm1
-; AVX1-NEXT:    vpackusdw %xmm2, %xmm1, %xmm1
-; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
-; AVX1-NEXT:    vandps %xmm3, %xmm2, %xmm2
-; AVX1-NEXT:    vandps %xmm3, %xmm0, %xmm0
-; AVX1-NEXT:    vpackusdw %xmm2, %xmm0, %xmm0
+; AVX1-NEXT:    vpsubd {{.*}}(%rip), %xmm0, %xmm2
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
+; AVX1-NEXT:    vpsubd {{.*}}(%rip), %xmm0, %xmm0
+; AVX1-NEXT:    vpsubd {{.*}}(%rip), %xmm1, %xmm3
+; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm1
+; AVX1-NEXT:    vpsubd {{.*}}(%rip), %xmm1, %xmm1
+; AVX1-NEXT:    vbroadcastss {{.*#+}} xmm4 = [3.57331108E-43,3.57331108E-43,3.57331108E-43,3.57331108E-43]
+; AVX1-NEXT:    vpand %xmm4, %xmm1, %xmm1
+; AVX1-NEXT:    vpand %xmm4, %xmm3, %xmm3
+; AVX1-NEXT:    vpackusdw %xmm1, %xmm3, %xmm1
+; AVX1-NEXT:    vpand %xmm4, %xmm0, %xmm0
+; AVX1-NEXT:    vpand %xmm4, %xmm2, %xmm2
+; AVX1-NEXT:    vpackusdw %xmm0, %xmm2, %xmm0
 ; AVX1-NEXT:    vpackuswb %xmm1, %xmm0, %xmm0
-; AVX1-NEXT:    vpsubb {{.*}}(%rip), %xmm0, %xmm0
 ; AVX1-NEXT:    vzeroupper
 ; AVX1-NEXT:    retq
 ;
 ; AVX2-LABEL: trunc_sub_const_v16i32_v16i8:
 ; AVX2:       # %bb.0:
+; AVX2-NEXT:    vpsubd {{.*}}(%rip), %ymm0, %ymm0
+; AVX2-NEXT:    vpsubd {{.*}}(%rip), %ymm1, %ymm1
 ; AVX2-NEXT:    vmovdqa {{.*#+}} ymm2 = [0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15,16,17,20,21,24,25,28,29,24,25,28,29,28,29,30,31]
 ; AVX2-NEXT:    vpshufb %ymm2, %ymm1, %ymm1
 ; AVX2-NEXT:    vpermq {{.*#+}} ymm1 = ymm1[0,2,2,3]
@@ -1642,14 +1707,13 @@ define <16 x i8> @trunc_sub_const_v16i32
 ; AVX2-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3]
 ; AVX2-NEXT:    vpand %xmm3, %xmm0, %xmm0
 ; AVX2-NEXT:    vpackuswb %xmm1, %xmm0, %xmm0
-; AVX2-NEXT:    vpsubb {{.*}}(%rip), %xmm0, %xmm0
 ; AVX2-NEXT:    vzeroupper
 ; AVX2-NEXT:    retq
 ;
 ; AVX512-LABEL: trunc_sub_const_v16i32_v16i8:
 ; AVX512:       # %bb.0:
+; AVX512-NEXT:    vpsubd {{.*}}(%rip), %zmm0, %zmm0
 ; AVX512-NEXT:    vpmovdb %zmm0, %xmm0
-; AVX512-NEXT:    vpsubb {{.*}}(%rip), %xmm0, %xmm0
 ; AVX512-NEXT:    vzeroupper
 ; AVX512-NEXT:    retq
   %1 = sub <16 x i32> %a0, <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
@@ -1660,52 +1724,56 @@ define <16 x i8> @trunc_sub_const_v16i32
 define <16 x i8> @trunc_sub_const_v16i16_v16i8(<16 x i16> %a0) nounwind {
 ; SSE-LABEL: trunc_sub_const_v16i16_v16i8:
 ; SSE:       # %bb.0:
+; SSE-NEXT:    psubw {{.*}}(%rip), %xmm0
+; SSE-NEXT:    psubw {{.*}}(%rip), %xmm1
 ; SSE-NEXT:    movdqa {{.*#+}} xmm2 = [255,255,255,255,255,255,255,255]
 ; SSE-NEXT:    pand %xmm2, %xmm1
 ; SSE-NEXT:    pand %xmm2, %xmm0
 ; SSE-NEXT:    packuswb %xmm1, %xmm0
-; SSE-NEXT:    psubb {{.*}}(%rip), %xmm0
 ; SSE-NEXT:    retq
 ;
 ; AVX1-LABEL: trunc_sub_const_v16i16_v16i8:
 ; AVX1:       # %bb.0:
-; AVX1-NEXT:    vandps {{.*}}(%rip), %ymm0, %ymm0
-; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
-; AVX1-NEXT:    vpackuswb %xmm1, %xmm0, %xmm0
-; AVX1-NEXT:    vpsubb {{.*}}(%rip), %xmm0, %xmm0
+; AVX1-NEXT:    vpsubw {{.*}}(%rip), %xmm0, %xmm1
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
+; AVX1-NEXT:    vpsubw {{.*}}(%rip), %xmm0, %xmm0
+; AVX1-NEXT:    vmovdqa {{.*#+}} xmm2 = [255,255,255,255,255,255,255,255]
+; AVX1-NEXT:    vpand %xmm2, %xmm0, %xmm0
+; AVX1-NEXT:    vpand %xmm2, %xmm1, %xmm1
+; AVX1-NEXT:    vpackuswb %xmm0, %xmm1, %xmm0
 ; AVX1-NEXT:    vzeroupper
 ; AVX1-NEXT:    retq
 ;
 ; AVX2-LABEL: trunc_sub_const_v16i16_v16i8:
 ; AVX2:       # %bb.0:
+; AVX2-NEXT:    vpsubw {{.*}}(%rip), %ymm0, %ymm0
 ; AVX2-NEXT:    vpand {{.*}}(%rip), %ymm0, %ymm0
 ; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm1
 ; AVX2-NEXT:    vpackuswb %xmm1, %xmm0, %xmm0
-; AVX2-NEXT:    vpsubb {{.*}}(%rip), %xmm0, %xmm0
 ; AVX2-NEXT:    vzeroupper
 ; AVX2-NEXT:    retq
 ;
 ; AVX512F-LABEL: trunc_sub_const_v16i16_v16i8:
 ; AVX512F:       # %bb.0:
+; AVX512F-NEXT:    vpsubw {{.*}}(%rip), %ymm0, %ymm0
 ; AVX512F-NEXT:    vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero
 ; AVX512F-NEXT:    vpmovdb %zmm0, %xmm0
-; AVX512F-NEXT:    vpsubb {{.*}}(%rip), %xmm0, %xmm0
 ; AVX512F-NEXT:    vzeroupper
 ; AVX512F-NEXT:    retq
 ;
 ; AVX512BW-LABEL: trunc_sub_const_v16i16_v16i8:
 ; AVX512BW:       # %bb.0:
-; AVX512BW-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
+; AVX512BW-NEXT:    vpsubw {{.*}}(%rip), %ymm0, %ymm0
 ; AVX512BW-NEXT:    vpmovwb %zmm0, %ymm0
-; AVX512BW-NEXT:    vpsubb {{.*}}(%rip), %xmm0, %xmm0
+; AVX512BW-NEXT:    # kill: def $xmm0 killed $xmm0 killed $ymm0
 ; AVX512BW-NEXT:    vzeroupper
 ; AVX512BW-NEXT:    retq
 ;
 ; AVX512DQ-LABEL: trunc_sub_const_v16i16_v16i8:
 ; AVX512DQ:       # %bb.0:
+; AVX512DQ-NEXT:    vpsubw {{.*}}(%rip), %ymm0, %ymm0
 ; AVX512DQ-NEXT:    vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero
 ; AVX512DQ-NEXT:    vpmovdb %zmm0, %xmm0
-; AVX512DQ-NEXT:    vpsubb {{.*}}(%rip), %xmm0, %xmm0
 ; AVX512DQ-NEXT:    vzeroupper
 ; AVX512DQ-NEXT:    retq
   %1 = sub <16 x i16> %a0, <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15>

Modified: llvm/trunk/test/CodeGen/X86/xchg-nofold.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/xchg-nofold.ll?rev=348604&r1=348603&r2=348604&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/xchg-nofold.ll (original)
+++ llvm/trunk/test/CodeGen/X86/xchg-nofold.ll Fri Dec  7 07:47:52 2018
@@ -17,7 +17,7 @@ define zeroext i1 @_Z3fooRSt6atomicIbEb(
 ; CHECK-NEXT:    je .LBB0_3
 ; CHECK-NEXT:  # %bb.1:
 ; CHECK-NEXT:    movl %edi, %edx
-; CHECK-NEXT:    andb $7, %dl
+; CHECK-NEXT:    andl $7, %edx
 ; CHECK-NEXT:    cmpb %cl, %dl
 ; CHECK-NEXT:    jge .LBB0_2
 ; CHECK-NEXT:  .LBB0_3:




More information about the llvm-commits mailing list