[llvm] 40a50f8 - [x86] avoid false dependency stall on 'sbb' with same source reg

Sanjay Patel via llvm-commits llvm-commits at lists.llvm.org
Mon Feb 7 07:13:00 PST 2022


Author: Sanjay Patel
Date: 2022-02-07T10:12:12-05:00
New Revision: 40a50f8701a99a063a9950fc0a41f46934e4e160

URL: https://github.com/llvm/llvm-project/commit/40a50f8701a99a063a9950fc0a41f46934e4e160
DIFF: https://github.com/llvm/llvm-project/commit/40a50f8701a99a063a9950fc0a41f46934e4e160.diff

LOG: [x86] avoid false dependency stall on 'sbb' with same source reg

This is effectively inverting the transform added with D116804
because the downside of the false dependency of something like
"sbb %eax, %eax" is much greater than the upside of eliminating
a zeroing instruction on (all?) Intel CPUs.

Differential Revision: https://reviews.llvm.org/D118843

Added: 
    

Modified: 
    llvm/lib/Target/X86/X86.td
    llvm/lib/Target/X86/X86ISelDAGToDAG.cpp
    llvm/lib/Target/X86/X86Subtarget.h
    llvm/test/CodeGen/X86/combine-movmsk-avx.ll
    llvm/test/CodeGen/X86/copy-eflags.ll
    llvm/test/CodeGen/X86/jump_sign.ll
    llvm/test/CodeGen/X86/machine-cse.ll
    llvm/test/CodeGen/X86/pr32588.ll
    llvm/test/CodeGen/X86/pr35972.ll
    llvm/test/CodeGen/X86/sbb-false-dep.ll
    llvm/test/CodeGen/X86/sbb-zero-idiom.ll
    llvm/test/CodeGen/X86/sbb.ll
    llvm/test/CodeGen/X86/sdiv_fix_sat.ll
    llvm/test/CodeGen/X86/select.ll
    llvm/test/CodeGen/X86/sext-i1.ll
    llvm/test/CodeGen/X86/shl-crash-on-legalize.ll
    llvm/test/CodeGen/X86/umul_fix_sat.ll
    llvm/test/CodeGen/X86/vec_uaddo.ll
    llvm/test/CodeGen/X86/vec_usubo.ll
    llvm/test/CodeGen/X86/vector-compare-any_of.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/X86/X86.td b/llvm/lib/Target/X86/X86.td
index 8e87481f6a950..2a23e99715f09 100644
--- a/llvm/lib/Target/X86/X86.td
+++ b/llvm/lib/Target/X86/X86.td
@@ -445,6 +445,10 @@ def TuningLZCNTFalseDeps : SubtargetFeature<"false-deps-lzcnt-tzcnt",
                                      "HasLZCNTFalseDeps", "true",
                                      "LZCNT/TZCNT have a false dependency on dest register">;
 
+def TuningSBBDepBreaking : SubtargetFeature<"sbb-dep-breaking",
+                                     "HasSBBDepBreaking", "true",
+                                     "SBB with same register has no source dependency">;
+
 // On recent X86 (port bound) processors, its preferable to combine to a single shuffle
 // using a variable mask over multiple fixed shuffles.
 def TuningFastVariableCrossLaneShuffle
@@ -1032,6 +1036,7 @@ def ProcessorFeatures {
                                               Feature64Bit];
   list<SubtargetFeature> BarcelonaTuning = [TuningFastScalarShiftMasks,
                                             TuningSlowSHLD,
+                                            TuningSBBDepBreaking,
                                             TuningInsertVZEROUPPER];
 
   // Bobcat
@@ -1053,6 +1058,7 @@ def ProcessorFeatures {
                                          TuningFastScalarShiftMasks,
                                          TuningFastVectorShiftMasks,
                                          TuningSlowSHLD,
+                                         TuningSBBDepBreaking,
                                          TuningInsertVZEROUPPER];
 
   // Jaguar
@@ -1072,6 +1078,7 @@ def ProcessorFeatures {
                                          TuningFastScalarShiftMasks,
                                          TuningFastVectorShiftMasks,
                                          TuningFastMOVBE,
+                                         TuningSBBDepBreaking,
                                          TuningSlowSHLD];
   list<SubtargetFeature> BtVer2Features =
     !listconcat(BtVer1Features, BtVer2AdditionalFeatures);
@@ -1099,6 +1106,7 @@ def ProcessorFeatures {
                                          TuningFast11ByteNOP,
                                          TuningFastScalarShiftMasks,
                                          TuningBranchFusion,
+                                         TuningSBBDepBreaking,
                                          TuningInsertVZEROUPPER];
 
   // PileDriver
@@ -1174,6 +1182,7 @@ def ProcessorFeatures {
                                      TuningFastScalarShiftMasks,
                                      TuningFastMOVBE,
                                      TuningSlowSHLD,
+                                     TuningSBBDepBreaking,
                                      TuningInsertVZEROUPPER];
   list<SubtargetFeature> ZN2AdditionalFeatures = [FeatureCLWB,
                                                   FeatureRDPID,
@@ -1445,7 +1454,7 @@ foreach P = ["k8", "opteron", "athlon64", "athlon-fx"] in {
   def : Proc<P, [FeatureX87, FeatureCMPXCHG8B, FeatureSSE2, Feature3DNowA,
                  FeatureFXSR, FeatureNOPL, Feature64Bit, FeatureCMOV],
                 [TuningFastScalarShiftMasks, TuningSlowSHLD, TuningSlowUAMem16,
-                 TuningInsertVZEROUPPER]>;
+                 TuningSBBDepBreaking, TuningInsertVZEROUPPER]>;
 }
 
 foreach P = ["k8-sse3", "opteron-sse3", "athlon64-sse3"] in {
@@ -1453,7 +1462,7 @@ foreach P = ["k8-sse3", "opteron-sse3", "athlon64-sse3"] in {
                  FeatureFXSR, FeatureNOPL, FeatureCMPXCHG16B, FeatureCMOV,
                  Feature64Bit],
                 [TuningFastScalarShiftMasks, TuningSlowSHLD, TuningSlowUAMem16,
-                 TuningInsertVZEROUPPER]>;
+                 TuningSBBDepBreaking, TuningInsertVZEROUPPER]>;
 }
 
 foreach P = ["amdfam10", "barcelona"] in {

diff  --git a/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp b/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp
index 0c3cfaa1e61e2..0d697f4fcafde 100644
--- a/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp
+++ b/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp
@@ -464,8 +464,13 @@ namespace {
       }
 
       // Copy flags to the EFLAGS register and glue it to next node.
-      SDValue EFLAGS = CurDAG->getCopyToReg(
-          CurDAG->getEntryNode(), dl, X86::EFLAGS, N->getOperand(2), SDValue());
+      unsigned Opcode = N->getOpcode();
+      assert(Opcode == X86ISD::SBB || Opcode == X86ISD::SETCC_CARRY &&
+             "Unexpected opcode for SBB materialization");
+      unsigned FlagOpIndex = Opcode == X86ISD::SBB ? 2 : 1;
+      SDValue EFLAGS =
+          CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, X86::EFLAGS,
+                               N->getOperand(FlagOpIndex), SDValue());
 
       // Create a 64-bit instruction if the result is 64-bits otherwise use the
       // 32-bit version.
@@ -5801,21 +5806,28 @@ void X86DAGToDAGISel::Select(SDNode *Node) {
     break;
 
   case X86ISD::SETCC_CARRY: {
-    // We have to do this manually because tblgen will put the eflags copy in
-    // the wrong place if we use an extract_subreg in the pattern.
     MVT VT = Node->getSimpleValueType(0);
+    SDValue Result;
+    if (Subtarget->hasSBBDepBreaking()) {
+      // We have to do this manually because tblgen will put the eflags copy in
+      // the wrong place if we use an extract_subreg in the pattern.
+      // Copy flags to the EFLAGS register and glue it to next node.
+      SDValue EFLAGS =
+          CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, X86::EFLAGS,
+                               Node->getOperand(1), SDValue());
 
-    // Copy flags to the EFLAGS register and glue it to next node.
-    SDValue EFLAGS =
-        CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, X86::EFLAGS,
-                             Node->getOperand(1), SDValue());
-
-    // Create a 64-bit instruction if the result is 64-bits otherwise use the
-    // 32-bit version.
-    unsigned Opc = VT == MVT::i64 ? X86::SETB_C64r : X86::SETB_C32r;
-    MVT SetVT = VT == MVT::i64 ? MVT::i64 : MVT::i32;
-    SDValue Result = SDValue(
-        CurDAG->getMachineNode(Opc, dl, SetVT, EFLAGS, EFLAGS.getValue(1)), 0);
+      // Create a 64-bit instruction if the result is 64-bits otherwise use the
+      // 32-bit version.
+      unsigned Opc = VT == MVT::i64 ? X86::SETB_C64r : X86::SETB_C32r;
+      MVT SetVT = VT == MVT::i64 ? MVT::i64 : MVT::i32;
+      Result = SDValue(
+          CurDAG->getMachineNode(Opc, dl, SetVT, EFLAGS, EFLAGS.getValue(1)),
+          0);
+    } else {
+      // The target does not recognize sbb with the same reg operand as a
+      // no-source idiom, so we explicitly zero the input values.
+      Result = getSBBZero(Node);
+    }
 
     // For less than 32-bits we need to extract from the 32-bit node.
     if (VT == MVT::i8 || VT == MVT::i16) {

diff  --git a/llvm/lib/Target/X86/X86Subtarget.h b/llvm/lib/Target/X86/X86Subtarget.h
index 5d773f0c57dfb..d1ff9445e4790 100644
--- a/llvm/lib/Target/X86/X86Subtarget.h
+++ b/llvm/lib/Target/X86/X86Subtarget.h
@@ -246,6 +246,10 @@ class X86Subtarget final : public X86GenSubtargetInfo {
   /// True if LZCNT/TZCNT instructions have a false dependency on the destination register.
   bool HasLZCNTFalseDeps = false;
 
+  /// True if an SBB instruction with same source register is recognized as
+  /// having no dependency on that register.
+  bool HasSBBDepBreaking = false;
+
   /// True if its preferable to combine to a single cross-lane shuffle
   /// using a variable mask over multiple fixed shuffles.
   bool HasFastVariableCrossLaneShuffle = false;
@@ -719,6 +723,7 @@ class X86Subtarget final : public X86GenSubtargetInfo {
   bool useLeaForSP() const { return UseLeaForSP; }
   bool hasPOPCNTFalseDeps() const { return HasPOPCNTFalseDeps; }
   bool hasLZCNTFalseDeps() const { return HasLZCNTFalseDeps; }
+  bool hasSBBDepBreaking() const { return HasSBBDepBreaking; }
   bool hasFastVariableCrossLaneShuffle() const {
     return HasFastVariableCrossLaneShuffle;
   }

diff  --git a/llvm/test/CodeGen/X86/combine-movmsk-avx.ll b/llvm/test/CodeGen/X86/combine-movmsk-avx.ll
index ca0e8db5db03e..3277c89a3e346 100644
--- a/llvm/test/CodeGen/X86/combine-movmsk-avx.ll
+++ b/llvm/test/CodeGen/X86/combine-movmsk-avx.ll
@@ -139,8 +139,9 @@ define i32 @movmskps_concat_v4f32(<4 x float> %a0, <4 x float> %a1)  {
 ; CHECK-LABEL: movmskps_concat_v4f32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vorps %xmm1, %xmm0, %xmm0
-; CHECK-NEXT:    vmovmskps %xmm0, %eax
-; CHECK-NEXT:    negl %eax
+; CHECK-NEXT:    vmovmskps %xmm0, %ecx
+; CHECK-NEXT:    xorl %eax, %eax
+; CHECK-NEXT:    negl %ecx
 ; CHECK-NEXT:    sbbl %eax, %eax
 ; CHECK-NEXT:    retq
   %1 = shufflevector <4 x float> %a0, <4 x float> %a1, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
@@ -153,9 +154,10 @@ define i32 @movmskps_concat_v4f32(<4 x float> %a0, <4 x float> %a1)  {
 define i32 @movmskps_demanded_concat_v4f32(<4 x float> %a0, <4 x float> %a1)  {
 ; CHECK-LABEL: movmskps_demanded_concat_v4f32:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vmovmskps %xmm0, %eax
-; CHECK-NEXT:    andl $3, %eax
-; CHECK-NEXT:    negl %eax
+; CHECK-NEXT:    vmovmskps %xmm0, %ecx
+; CHECK-NEXT:    andl $3, %ecx
+; CHECK-NEXT:    xorl %eax, %eax
+; CHECK-NEXT:    negl %ecx
 ; CHECK-NEXT:    sbbl %eax, %eax
 ; CHECK-NEXT:    retq
   %1 = shufflevector <4 x float> %a0, <4 x float> %a1, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>

diff  --git a/llvm/test/CodeGen/X86/copy-eflags.ll b/llvm/test/CodeGen/X86/copy-eflags.ll
index 4d382b24a2a5b..4c17cd52065e4 100644
--- a/llvm/test/CodeGen/X86/copy-eflags.ll
+++ b/llvm/test/CodeGen/X86/copy-eflags.ll
@@ -293,6 +293,7 @@ bb1:
 define dso_local void @PR37431(i32* %arg1, i8* %arg2, i8* %arg3, i32 %arg4, i64 %arg5) nounwind {
 ; X32-LABEL: PR37431:
 ; X32:       # %bb.0: # %entry
+; X32-NEXT:    pushl %ebp
 ; X32-NEXT:    pushl %ebx
 ; X32-NEXT:    pushl %edi
 ; X32-NEXT:    pushl %esi
@@ -302,10 +303,11 @@ define dso_local void @PR37431(i32* %arg1, i8* %arg2, i8* %arg3, i32 %arg4, i64
 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %esi
 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %edi
 ; X32-NEXT:    movl (%edi), %edi
-; X32-NEXT:    movl %edi, %ebx
-; X32-NEXT:    sarl $31, %ebx
+; X32-NEXT:    movl %edi, %ebp
+; X32-NEXT:    sarl $31, %ebp
+; X32-NEXT:    xorl %ebx, %ebx
 ; X32-NEXT:    cmpl %edi, {{[0-9]+}}(%esp)
-; X32-NEXT:    sbbl %ebx, %esi
+; X32-NEXT:    sbbl %ebp, %esi
 ; X32-NEXT:    sbbl %ebx, %ebx
 ; X32-NEXT:    movb %bl, (%edx)
 ; X32-NEXT:    cltd
@@ -314,6 +316,7 @@ define dso_local void @PR37431(i32* %arg1, i8* %arg2, i8* %arg3, i32 %arg4, i64
 ; X32-NEXT:    popl %esi
 ; X32-NEXT:    popl %edi
 ; X32-NEXT:    popl %ebx
+; X32-NEXT:    popl %ebp
 ; X32-NEXT:    retl
 ;
 ; X64-LABEL: PR37431:
@@ -321,6 +324,7 @@ define dso_local void @PR37431(i32* %arg1, i8* %arg2, i8* %arg3, i32 %arg4, i64
 ; X64-NEXT:    movl %ecx, %eax
 ; X64-NEXT:    movq %rdx, %rcx
 ; X64-NEXT:    movslq (%rdi), %rdx
+; X64-NEXT:    xorl %edi, %edi
 ; X64-NEXT:    cmpq %rdx, %r8
 ; X64-NEXT:    sbbl %edi, %edi
 ; X64-NEXT:    movb %dil, (%rsi)

diff  --git a/llvm/test/CodeGen/X86/jump_sign.ll b/llvm/test/CodeGen/X86/jump_sign.ll
index 848ebc97a1ac9..7e6b462fcd827 100644
--- a/llvm/test/CodeGen/X86/jump_sign.ll
+++ b/llvm/test/CodeGen/X86/jump_sign.ll
@@ -310,6 +310,7 @@ define i32 @func_q(i32 %a0, i32 %a1, i32 %a2) {
 ; CHECK-LABEL: func_q:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; CHECK-NEXT:    xorl %ecx, %ecx
 ; CHECK-NEXT:    subl {{[0-9]+}}(%esp), %eax
 ; CHECK-NEXT:    sbbl %ecx, %ecx
 ; CHECK-NEXT:    negl %eax

diff  --git a/llvm/test/CodeGen/X86/machine-cse.ll b/llvm/test/CodeGen/X86/machine-cse.ll
index e989a782fad7b..b7cd5c913ff13 100644
--- a/llvm/test/CodeGen/X86/machine-cse.ll
+++ b/llvm/test/CodeGen/X86/machine-cse.ll
@@ -112,6 +112,8 @@ define i32 @cross_mbb_phys_cse(i32 %a, i32 %b) nounwind ssp {
 ; CHECK-NEXT:    cmpl %esi, %edi
 ; CHECK-NEXT:    ja .LBB2_2
 ; CHECK-NEXT:  # %bb.1: # %if.end
+; CHECK-NEXT:    xorl %eax, %eax
+; CHECK-NEXT:    cmpl %esi, %edi
 ; CHECK-NEXT:    sbbl %eax, %eax
 ; CHECK-NEXT:  .LBB2_2: # %return
 ; CHECK-NEXT:    retq

diff  --git a/llvm/test/CodeGen/X86/pr32588.ll b/llvm/test/CodeGen/X86/pr32588.ll
index 9e6f0b2881d94..8f2e21910cc6d 100644
--- a/llvm/test/CodeGen/X86/pr32588.ll
+++ b/llvm/test/CodeGen/X86/pr32588.ll
@@ -8,6 +8,7 @@
 define void @fn1() {
 ; CHECK-LABEL: fn1:
 ; CHECK:       # %bb.0:
+; CHECK-NEXT:    xorl %eax, %eax
 ; CHECK-NEXT:    cmpl $1, c(%rip)
 ; CHECK-NEXT:    sbbl %eax, %eax
 ; CHECK-NEXT:    andl $1, %eax

diff  --git a/llvm/test/CodeGen/X86/pr35972.ll b/llvm/test/CodeGen/X86/pr35972.ll
index 09363fbc89bba..e7e60666d5bcf 100644
--- a/llvm/test/CodeGen/X86/pr35972.ll
+++ b/llvm/test/CodeGen/X86/pr35972.ll
@@ -5,6 +5,7 @@ define void @test3(i32 %c, <64 x i1>* %ptr) {
 ; CHECK-LABEL: test3:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; CHECK-NEXT:    xorl %ecx, %ecx
 ; CHECK-NEXT:    cmpl $1, {{[0-9]+}}(%esp)
 ; CHECK-NEXT:    sbbl %ecx, %ecx
 ; CHECK-NEXT:    kmovd %ecx, %k0

diff  --git a/llvm/test/CodeGen/X86/sbb-false-dep.ll b/llvm/test/CodeGen/X86/sbb-false-dep.ll
index 336e56c45969d..204b215a89a5e 100644
--- a/llvm/test/CodeGen/X86/sbb-false-dep.ll
+++ b/llvm/test/CodeGen/X86/sbb-false-dep.ll
@@ -1,5 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc < %s -mtriple=x86_64-- | FileCheck %s --check-prefixes=CHECK
+; RUN: llc < %s -mtriple=x86_64--                          | FileCheck %s --check-prefixes=CHECK
+; RUN: llc < %s -mtriple=x86_64-- -mattr=+sbb-dep-breaking | FileCheck %s --check-prefixes=IDIOM
 
 %struct.y_s = type { i64*, i64* }
 
@@ -24,13 +25,15 @@ define i32 @mallocbench_gs(i32* noundef %0, %struct.y_s* noundef %1, i32 noundef
 ; CHECK-NEXT:    callq foo1 at PLT
 ; CHECK-NEXT:    movq 8(%rbx), %rax
 ; CHECK-NEXT:    movq (%rax), %rdx
+; CHECK-NEXT:    xorl %ebp, %ebp
 ; CHECK-NEXT:    movl %r13d, %ecx
 ; CHECK-NEXT:    negl %ecx
-; CHECK-NEXT:    sbbq %rbp, %rbp
-; CHECK-NEXT:    orq %rdx, %rbp
-; CHECK-NEXT:    cmpl $1, %r13d
+; CHECK-NEXT:    movl $0, %eax
 ; CHECK-NEXT:    sbbq %rax, %rax
 ; CHECK-NEXT:    orq %rdx, %rax
+; CHECK-NEXT:    cmpl $1, %r13d
+; CHECK-NEXT:    sbbq %rbp, %rbp
+; CHECK-NEXT:    orq %rdx, %rbp
 ; CHECK-NEXT:    subq $8, %rsp
 ; CHECK-NEXT:    movq %r12, %rdi
 ; CHECK-NEXT:    movl %r15d, %esi
@@ -38,8 +41,8 @@ define i32 @mallocbench_gs(i32* noundef %0, %struct.y_s* noundef %1, i32 noundef
 ; CHECK-NEXT:    xorl %ecx, %ecx
 ; CHECK-NEXT:    xorl %r8d, %r8d
 ; CHECK-NEXT:    xorl %r9d, %r9d
-; CHECK-NEXT:    pushq %rax
 ; CHECK-NEXT:    pushq %rbp
+; CHECK-NEXT:    pushq %rax
 ; CHECK-NEXT:    pushq %rbx
 ; CHECK-NEXT:    callq foo2 at PLT
 ; CHECK-NEXT:    addq $40, %rsp
@@ -50,6 +53,53 @@ define i32 @mallocbench_gs(i32* noundef %0, %struct.y_s* noundef %1, i32 noundef
 ; CHECK-NEXT:    popq %r15
 ; CHECK-NEXT:    popq %rbp
 ; CHECK-NEXT:    retq
+;
+; IDIOM-LABEL: mallocbench_gs:
+; IDIOM:       # %bb.0:
+; IDIOM-NEXT:    pushq %rbp
+; IDIOM-NEXT:    pushq %r15
+; IDIOM-NEXT:    pushq %r14
+; IDIOM-NEXT:    pushq %r13
+; IDIOM-NEXT:    pushq %r12
+; IDIOM-NEXT:    pushq %rbx
+; IDIOM-NEXT:    pushq %rax
+; IDIOM-NEXT:    movl %r8d, %r13d
+; IDIOM-NEXT:    movl %ecx, %r14d
+; IDIOM-NEXT:    movl %edx, %r15d
+; IDIOM-NEXT:    movq %rsi, %rbx
+; IDIOM-NEXT:    movq %rdi, %r12
+; IDIOM-NEXT:    movq (%rsi), %rdi
+; IDIOM-NEXT:    movq 8(%rsi), %rsi
+; IDIOM-NEXT:    movq %rbx, %rdx
+; IDIOM-NEXT:    callq foo1 at PLT
+; IDIOM-NEXT:    movq 8(%rbx), %rax
+; IDIOM-NEXT:    movq (%rax), %rdx
+; IDIOM-NEXT:    movl %r13d, %ecx
+; IDIOM-NEXT:    negl %ecx
+; IDIOM-NEXT:    sbbq %rbp, %rbp
+; IDIOM-NEXT:    orq %rdx, %rbp
+; IDIOM-NEXT:    cmpl $1, %r13d
+; IDIOM-NEXT:    sbbq %rax, %rax
+; IDIOM-NEXT:    orq %rdx, %rax
+; IDIOM-NEXT:    subq $8, %rsp
+; IDIOM-NEXT:    movq %r12, %rdi
+; IDIOM-NEXT:    movl %r15d, %esi
+; IDIOM-NEXT:    movl %r14d, %edx
+; IDIOM-NEXT:    xorl %ecx, %ecx
+; IDIOM-NEXT:    xorl %r8d, %r8d
+; IDIOM-NEXT:    xorl %r9d, %r9d
+; IDIOM-NEXT:    pushq %rax
+; IDIOM-NEXT:    pushq %rbp
+; IDIOM-NEXT:    pushq %rbx
+; IDIOM-NEXT:    callq foo2 at PLT
+; IDIOM-NEXT:    addq $40, %rsp
+; IDIOM-NEXT:    popq %rbx
+; IDIOM-NEXT:    popq %r12
+; IDIOM-NEXT:    popq %r13
+; IDIOM-NEXT:    popq %r14
+; IDIOM-NEXT:    popq %r15
+; IDIOM-NEXT:    popq %rbp
+; IDIOM-NEXT:    retq
   %6 = getelementptr inbounds %struct.y_s, %struct.y_s* %1, i64 0, i32 0
   %7 = load i64*, i64** %6, align 8
   %8 = getelementptr inbounds %struct.y_s, %struct.y_s* %1, i64 0, i32 1

diff  --git a/llvm/test/CodeGen/X86/sbb-zero-idiom.ll b/llvm/test/CodeGen/X86/sbb-zero-idiom.ll
index 7baa937aedec2..964e91b6f4a86 100644
--- a/llvm/test/CodeGen/X86/sbb-zero-idiom.ll
+++ b/llvm/test/CodeGen/X86/sbb-zero-idiom.ll
@@ -1,18 +1,33 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc < %s -mtriple=x86_64--                   | FileCheck %s --check-prefixes=CHECK
-; RUN: llc < %s -mtriple=x86_64-- -mcpu=sandybridge | FileCheck %s --check-prefixes=CHECK
-; RUN: llc < %s -mtriple=x86_64-- -mcpu=skylake     | FileCheck %s --check-prefixes=CHECK
-; RUN: llc < %s -mtriple=x86_64-- -mcpu=k8          | FileCheck %s --check-prefixes=CHECK
-; RUN: llc < %s -mtriple=x86_64-- -mcpu=btver1      | FileCheck %s --check-prefixes=CHECK
-; RUN: llc < %s -mtriple=x86_64-- -mcpu=bdver2      | FileCheck %s --check-prefixes=CHECK
-; RUN: llc < %s -mtriple=x86_64-- -mcpu=znver3      | FileCheck %s --check-prefixes=CHECK
+
+; Check the attribute.
+
+; RUN: llc < %s -mtriple=x86_64-- -mattr=-sbb-dep-breaking | FileCheck %s --check-prefixes=ZERO
+; RUN: llc < %s -mtriple=x86_64-- -mattr=+sbb-dep-breaking | FileCheck %s --check-prefixes=IDIOM
+
+; And check that CPUs have included the attribute as expected.
+
+; RUN: llc < %s -mtriple=x86_64--                   | FileCheck %s --check-prefixes=ZERO
+; RUN: llc < %s -mtriple=x86_64-- -mcpu=sandybridge | FileCheck %s --check-prefixes=ZERO
+; RUN: llc < %s -mtriple=x86_64-- -mcpu=skylake     | FileCheck %s --check-prefixes=ZERO
+; RUN: llc < %s -mtriple=x86_64-- -mcpu=k8          | FileCheck %s --check-prefixes=IDIOM
+; RUN: llc < %s -mtriple=x86_64-- -mcpu=btver1      | FileCheck %s --check-prefixes=IDIOM
+; RUN: llc < %s -mtriple=x86_64-- -mcpu=bdver2      | FileCheck %s --check-prefixes=IDIOM
+; RUN: llc < %s -mtriple=x86_64-- -mcpu=znver3      | FileCheck %s --check-prefixes=IDIOM
 
 define i32 @i32_select_0_or_neg1(i32 %x) {
-; CHECK-LABEL: i32_select_0_or_neg1:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    negl %edi
-; CHECK-NEXT:    sbbl %eax, %eax
-; CHECK-NEXT:    retq
+; ZERO-LABEL: i32_select_0_or_neg1:
+; ZERO:       # %bb.0:
+; ZERO-NEXT:    xorl %eax, %eax
+; ZERO-NEXT:    negl %edi
+; ZERO-NEXT:    sbbl %eax, %eax
+; ZERO-NEXT:    retq
+;
+; IDIOM-LABEL: i32_select_0_or_neg1:
+; IDIOM:       # %bb.0:
+; IDIOM-NEXT:    negl %edi
+; IDIOM-NEXT:    sbbl %eax, %eax
+; IDIOM-NEXT:    retq
   %cmp = icmp ne i32 %x, 0
   %sel = select i1 %cmp, i32 -1, i32 0
   ret i32 %sel

diff  --git a/llvm/test/CodeGen/X86/sbb.ll b/llvm/test/CodeGen/X86/sbb.ll
index b3dae629ba808..78d609d3a17e6 100644
--- a/llvm/test/CodeGen/X86/sbb.ll
+++ b/llvm/test/CodeGen/X86/sbb.ll
@@ -8,6 +8,7 @@
 define i8 @i8_select_0_or_neg1(i8 %x) {
 ; CHECK-LABEL: i8_select_0_or_neg1:
 ; CHECK:       # %bb.0:
+; CHECK-NEXT:    xorl %eax, %eax
 ; CHECK-NEXT:    negb %dil
 ; CHECK-NEXT:    sbbl %eax, %eax
 ; CHECK-NEXT:    # kill: def $al killed $al killed $eax
@@ -22,6 +23,7 @@ define i8 @i8_select_0_or_neg1(i8 %x) {
 define i16 @i16_select_0_or_neg1_as_math(i16 %x) {
 ; CHECK-LABEL: i16_select_0_or_neg1_as_math:
 ; CHECK:       # %bb.0:
+; CHECK-NEXT:    xorl %eax, %eax
 ; CHECK-NEXT:    negw %di
 ; CHECK-NEXT:    sbbl %eax, %eax
 ; CHECK-NEXT:    # kill: def $ax killed $ax killed $eax
@@ -37,6 +39,7 @@ define i16 @i16_select_0_or_neg1_as_math(i16 %x) {
 define i32 @i32_select_0_or_neg1_commuted(i32 %x) {
 ; CHECK-LABEL: i32_select_0_or_neg1_commuted:
 ; CHECK:       # %bb.0:
+; CHECK-NEXT:    xorl %eax, %eax
 ; CHECK-NEXT:    negl %edi
 ; CHECK-NEXT:    sbbl %eax, %eax
 ; CHECK-NEXT:    retq
@@ -50,6 +53,7 @@ define i32 @i32_select_0_or_neg1_commuted(i32 %x) {
 define i64 @i64_select_0_or_neg1_commuted_as_math(i64 %x) {
 ; CHECK-LABEL: i64_select_0_or_neg1_commuted_as_math:
 ; CHECK:       # %bb.0:
+; CHECK-NEXT:    xorl %eax, %eax
 ; CHECK-NEXT:    negq %rdi
 ; CHECK-NEXT:    sbbq %rax, %rax
 ; CHECK-NEXT:    retq
@@ -64,6 +68,7 @@ define i64 @i64_select_0_or_neg1_commuted_as_math(i64 %x) {
 define i64 @i64_select_neg1_or_0(i64 %x) {
 ; CHECK-LABEL: i64_select_neg1_or_0:
 ; CHECK:       # %bb.0:
+; CHECK-NEXT:    xorl %eax, %eax
 ; CHECK-NEXT:    cmpq $1, %rdi
 ; CHECK-NEXT:    sbbq %rax, %rax
 ; CHECK-NEXT:    retq
@@ -77,6 +82,7 @@ define i64 @i64_select_neg1_or_0(i64 %x) {
 define i32 @i32_select_neg1_or_0_as_math(i32 %x) {
 ; CHECK-LABEL: i32_select_neg1_or_0_as_math:
 ; CHECK:       # %bb.0:
+; CHECK-NEXT:    xorl %eax, %eax
 ; CHECK-NEXT:    cmpl $1, %edi
 ; CHECK-NEXT:    sbbl %eax, %eax
 ; CHECK-NEXT:    retq
@@ -91,6 +97,7 @@ define i32 @i32_select_neg1_or_0_as_math(i32 %x) {
 define i16 @i16_select_neg1_or_0_commuted(i16 %x) {
 ; CHECK-LABEL: i16_select_neg1_or_0_commuted:
 ; CHECK:       # %bb.0:
+; CHECK-NEXT:    xorl %eax, %eax
 ; CHECK-NEXT:    cmpw $1, %di
 ; CHECK-NEXT:    sbbl %eax, %eax
 ; CHECK-NEXT:    # kill: def $ax killed $ax killed $eax
@@ -105,6 +112,7 @@ define i16 @i16_select_neg1_or_0_commuted(i16 %x) {
 define i8 @i8_select_neg1_or_0_commuted_as_math(i8 %x) {
 ; CHECK-LABEL: i8_select_neg1_or_0_commuted_as_math:
 ; CHECK:       # %bb.0:
+; CHECK-NEXT:    xorl %eax, %eax
 ; CHECK-NEXT:    cmpb $1, %dil
 ; CHECK-NEXT:    sbbl %eax, %eax
 ; CHECK-NEXT:    # kill: def $al killed $al killed $eax
@@ -120,6 +128,7 @@ define i8 @i8_select_neg1_or_0_commuted_as_math(i8 %x) {
 define i32 @ult_select_neg1_or_0(i32 %x, i32 %y) nounwind {
 ; CHECK-LABEL: ult_select_neg1_or_0:
 ; CHECK:       # %bb.0:
+; CHECK-NEXT:    xorl %eax, %eax
 ; CHECK-NEXT:    cmpl %esi, %edi
 ; CHECK-NEXT:    sbbl %eax, %eax
 ; CHECK-NEXT:    retq
@@ -134,6 +143,7 @@ define i32 @ult_select_neg1_or_0(i32 %x, i32 %y) nounwind {
 define i32 @ugt_select_neg1_or_0(i32 %x, i32 %y) nounwind {
 ; CHECK-LABEL: ugt_select_neg1_or_0:
 ; CHECK:       # %bb.0:
+; CHECK-NEXT:    xorl %eax, %eax
 ; CHECK-NEXT:    cmpl %esi, %edi
 ; CHECK-NEXT:    sbbl %eax, %eax
 ; CHECK-NEXT:    retq
@@ -148,6 +158,7 @@ define i32 @ugt_select_neg1_or_0(i32 %x, i32 %y) nounwind {
 define i32 @uge_select_0_or_neg1(i32 %x, i32 %y) nounwind {
 ; CHECK-LABEL: uge_select_0_or_neg1:
 ; CHECK:       # %bb.0:
+; CHECK-NEXT:    xorl %eax, %eax
 ; CHECK-NEXT:    cmpl %esi, %edi
 ; CHECK-NEXT:    sbbl %eax, %eax
 ; CHECK-NEXT:    retq
@@ -163,6 +174,7 @@ define i32 @uge_select_0_or_neg1(i32 %x, i32 %y) nounwind {
 define i32 @ule_select_0_or_neg1(i32 %x, i32 %y) nounwind {
 ; CHECK-LABEL: ule_select_0_or_neg1:
 ; CHECK:       # %bb.0:
+; CHECK-NEXT:    xorl %eax, %eax
 ; CHECK-NEXT:    cmpl %esi, %edi
 ; CHECK-NEXT:    sbbl %eax, %eax
 ; CHECK-NEXT:    retq
@@ -178,6 +190,7 @@ define i32 @ule_select_0_or_neg1(i32 %x, i32 %y) nounwind {
 define i32 @uge_select_0_or_neg1_sub(i32 %x, i32 %y) nounwind {
 ; CHECK-LABEL: uge_select_0_or_neg1_sub:
 ; CHECK:       # %bb.0:
+; CHECK-NEXT:    xorl %eax, %eax
 ; CHECK-NEXT:    cmpl %esi, %edi
 ; CHECK-NEXT:    sbbl %eax, %eax
 ; CHECK-NEXT:    retq
@@ -193,6 +206,7 @@ define i32 @uge_select_0_or_neg1_sub(i32 %x, i32 %y) nounwind {
 define i64 @ugt_select_neg1_or_0_sub(i64 %x, i64 %y) nounwind {
 ; CHECK-LABEL: ugt_select_neg1_or_0_sub:
 ; CHECK:       # %bb.0:
+; CHECK-NEXT:    xorl %eax, %eax
 ; CHECK-NEXT:    cmpq %rdi, %rsi
 ; CHECK-NEXT:    sbbq %rax, %rax
 ; CHECK-NEXT:    retq
@@ -208,6 +222,7 @@ define i64 @ugt_select_neg1_or_0_sub(i64 %x, i64 %y) nounwind {
 define i16 @ult_select_neg1_or_0_sub(i16 %x, i16 %y) nounwind {
 ; CHECK-LABEL: ult_select_neg1_or_0_sub:
 ; CHECK:       # %bb.0:
+; CHECK-NEXT:    xorl %eax, %eax
 ; CHECK-NEXT:    cmpw %di, %si
 ; CHECK-NEXT:    sbbl %eax, %eax
 ; CHECK-NEXT:    # kill: def $ax killed $ax killed $eax
@@ -226,6 +241,7 @@ define i16 @ult_select_neg1_or_0_sub(i16 %x, i16 %y) nounwind {
 define void @PR33560(i8 %x, i64 %y) {
 ; CHECK-LABEL: PR33560:
 ; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    xorl %eax, %eax
 ; CHECK-NEXT:    negb %dil
 ; CHECK-NEXT:    sbbq %rax, %rax
 ; CHECK-NEXT:    cmpq %rsi, %rax

diff  --git a/llvm/test/CodeGen/X86/sdiv_fix_sat.ll b/llvm/test/CodeGen/X86/sdiv_fix_sat.ll
index 0ba51f02bbb11..9b964b147d553 100644
--- a/llvm/test/CodeGen/X86/sdiv_fix_sat.ll
+++ b/llvm/test/CodeGen/X86/sdiv_fix_sat.ll
@@ -1219,6 +1219,7 @@ define <4 x i32> @vec(<4 x i32> %x, <4 x i32> %y) nounwind {
 ; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
 ; X86-NEXT:    andl %eax, %ebx
 ; X86-NEXT:    negl %eax
+; X86-NEXT:    movl $0, %ecx
 ; X86-NEXT:    sbbl %ecx, %ecx
 ; X86-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
 ; X86-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
@@ -1242,6 +1243,7 @@ define <4 x i32> @vec(<4 x i32> %x, <4 x i32> %y) nounwind {
 ; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
 ; X86-NEXT:    andl %eax, %edi
 ; X86-NEXT:    negl %eax
+; X86-NEXT:    movl $0, %eax
 ; X86-NEXT:    sbbl %eax, %eax
 ; X86-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
 ; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
@@ -1268,6 +1270,7 @@ define <4 x i32> @vec(<4 x i32> %x, <4 x i32> %y) nounwind {
 ; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
 ; X86-NEXT:    andl %eax, %edx
 ; X86-NEXT:    negl %eax
+; X86-NEXT:    movl $0, %eax
 ; X86-NEXT:    sbbl %eax, %eax
 ; X86-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
 ; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
@@ -1291,6 +1294,7 @@ define <4 x i32> @vec(<4 x i32> %x, <4 x i32> %y) nounwind {
 ; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
 ; X86-NEXT:    andl %eax, %edi
 ; X86-NEXT:    negl %eax
+; X86-NEXT:    movl $0, %eax
 ; X86-NEXT:    sbbl %eax, %eax
 ; X86-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
 ; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload

diff  --git a/llvm/test/CodeGen/X86/select.ll b/llvm/test/CodeGen/X86/select.ll
index 81229e301d652..a7f41e0813297 100644
--- a/llvm/test/CodeGen/X86/select.ll
+++ b/llvm/test/CodeGen/X86/select.ll
@@ -629,21 +629,13 @@ define void @test8(i1 %c, <6 x i32>* %dst.addr, <6 x i32> %src1,<6 x i32> %src2)
 ;; Test integer select between values and constants.
 
 define i64 @test9(i64 %x, i64 %y) nounwind readnone ssp noredzone {
-; GENERIC-LABEL: test9:
-; GENERIC:       ## %bb.0:
-; GENERIC-NEXT:    cmpq $1, %rdi
-; GENERIC-NEXT:    sbbq %rax, %rax
-; GENERIC-NEXT:    orq %rsi, %rax
-; GENERIC-NEXT:    retq
-;
-; ATOM-LABEL: test9:
-; ATOM:       ## %bb.0:
-; ATOM-NEXT:    cmpq $1, %rdi
-; ATOM-NEXT:    sbbq %rax, %rax
-; ATOM-NEXT:    orq %rsi, %rax
-; ATOM-NEXT:    nop
-; ATOM-NEXT:    nop
-; ATOM-NEXT:    retq
+; CHECK-LABEL: test9:
+; CHECK:       ## %bb.0:
+; CHECK-NEXT:    xorl %eax, %eax
+; CHECK-NEXT:    cmpq $1, %rdi
+; CHECK-NEXT:    sbbq %rax, %rax
+; CHECK-NEXT:    orq %rsi, %rax
+; CHECK-NEXT:    retq
 ;
 ; ATHLON-LABEL: test9:
 ; ATHLON:       ## %bb.0:
@@ -677,21 +669,13 @@ define i64 @test9(i64 %x, i64 %y) nounwind readnone ssp noredzone {
 
 ;; Same as test9
 define i64 @test9a(i64 %x, i64 %y) nounwind readnone ssp noredzone {
-; GENERIC-LABEL: test9a:
-; GENERIC:       ## %bb.0:
-; GENERIC-NEXT:    cmpq $1, %rdi
-; GENERIC-NEXT:    sbbq %rax, %rax
-; GENERIC-NEXT:    orq %rsi, %rax
-; GENERIC-NEXT:    retq
-;
-; ATOM-LABEL: test9a:
-; ATOM:       ## %bb.0:
-; ATOM-NEXT:    cmpq $1, %rdi
-; ATOM-NEXT:    sbbq %rax, %rax
-; ATOM-NEXT:    orq %rsi, %rax
-; ATOM-NEXT:    nop
-; ATOM-NEXT:    nop
-; ATOM-NEXT:    retq
+; CHECK-LABEL: test9a:
+; CHECK:       ## %bb.0:
+; CHECK-NEXT:    xorl %eax, %eax
+; CHECK-NEXT:    cmpq $1, %rdi
+; CHECK-NEXT:    sbbq %rax, %rax
+; CHECK-NEXT:    orq %rsi, %rax
+; CHECK-NEXT:    retq
 ;
 ; ATHLON-LABEL: test9a:
 ; ATHLON:       ## %bb.0:
@@ -723,21 +707,13 @@ define i64 @test9a(i64 %x, i64 %y) nounwind readnone ssp noredzone {
 }
 
 define i64 @test9b(i64 %x, i64 %y) nounwind readnone ssp noredzone {
-; GENERIC-LABEL: test9b:
-; GENERIC:       ## %bb.0:
-; GENERIC-NEXT:    cmpq $1, %rdi
-; GENERIC-NEXT:    sbbq %rax, %rax
-; GENERIC-NEXT:    orq %rsi, %rax
-; GENERIC-NEXT:    retq
-;
-; ATOM-LABEL: test9b:
-; ATOM:       ## %bb.0:
-; ATOM-NEXT:    cmpq $1, %rdi
-; ATOM-NEXT:    sbbq %rax, %rax
-; ATOM-NEXT:    orq %rsi, %rax
-; ATOM-NEXT:    nop
-; ATOM-NEXT:    nop
-; ATOM-NEXT:    retq
+; CHECK-LABEL: test9b:
+; CHECK:       ## %bb.0:
+; CHECK-NEXT:    xorl %eax, %eax
+; CHECK-NEXT:    cmpq $1, %rdi
+; CHECK-NEXT:    sbbq %rax, %rax
+; CHECK-NEXT:    orq %rsi, %rax
+; CHECK-NEXT:    retq
 ;
 ; ATHLON-LABEL: test9b:
 ; ATHLON:       ## %bb.0:
@@ -770,21 +746,13 @@ define i64 @test9b(i64 %x, i64 %y) nounwind readnone ssp noredzone {
 
 ;; Select between -1 and 1.
 define i64 @test10(i64 %x, i64 %y) nounwind readnone ssp noredzone {
-; GENERIC-LABEL: test10:
-; GENERIC:       ## %bb.0:
-; GENERIC-NEXT:    cmpq $1, %rdi
-; GENERIC-NEXT:    sbbq %rax, %rax
-; GENERIC-NEXT:    orq $1, %rax
-; GENERIC-NEXT:    retq
-;
-; ATOM-LABEL: test10:
-; ATOM:       ## %bb.0:
-; ATOM-NEXT:    cmpq $1, %rdi
-; ATOM-NEXT:    sbbq %rax, %rax
-; ATOM-NEXT:    orq $1, %rax
-; ATOM-NEXT:    nop
-; ATOM-NEXT:    nop
-; ATOM-NEXT:    retq
+; CHECK-LABEL: test10:
+; CHECK:       ## %bb.0:
+; CHECK-NEXT:    xorl %eax, %eax
+; CHECK-NEXT:    cmpq $1, %rdi
+; CHECK-NEXT:    sbbq %rax, %rax
+; CHECK-NEXT:    orq $1, %rax
+; CHECK-NEXT:    retq
 ;
 ; ATHLON-LABEL: test10:
 ; ATHLON:       ## %bb.0:
@@ -814,21 +782,13 @@ define i64 @test10(i64 %x, i64 %y) nounwind readnone ssp noredzone {
 }
 
 define i64 @test11(i64 %x, i64 %y) nounwind readnone ssp noredzone {
-; GENERIC-LABEL: test11:
-; GENERIC:       ## %bb.0:
-; GENERIC-NEXT:    negq %rdi
-; GENERIC-NEXT:    sbbq %rax, %rax
-; GENERIC-NEXT:    orq %rsi, %rax
-; GENERIC-NEXT:    retq
-;
-; ATOM-LABEL: test11:
-; ATOM:       ## %bb.0:
-; ATOM-NEXT:    negq %rdi
-; ATOM-NEXT:    sbbq %rax, %rax
-; ATOM-NEXT:    orq %rsi, %rax
-; ATOM-NEXT:    nop
-; ATOM-NEXT:    nop
-; ATOM-NEXT:    retq
+; CHECK-LABEL: test11:
+; CHECK:       ## %bb.0:
+; CHECK-NEXT:    xorl %eax, %eax
+; CHECK-NEXT:    negq %rdi
+; CHECK-NEXT:    sbbq %rax, %rax
+; CHECK-NEXT:    orq %rsi, %rax
+; CHECK-NEXT:    retq
 ;
 ; ATHLON-LABEL: test11:
 ; ATHLON:       ## %bb.0:
@@ -861,21 +821,13 @@ define i64 @test11(i64 %x, i64 %y) nounwind readnone ssp noredzone {
 }
 
 define i64 @test11a(i64 %x, i64 %y) nounwind readnone ssp noredzone {
-; GENERIC-LABEL: test11a:
-; GENERIC:       ## %bb.0:
-; GENERIC-NEXT:    negq %rdi
-; GENERIC-NEXT:    sbbq %rax, %rax
-; GENERIC-NEXT:    orq %rsi, %rax
-; GENERIC-NEXT:    retq
-;
-; ATOM-LABEL: test11a:
-; ATOM:       ## %bb.0:
-; ATOM-NEXT:    negq %rdi
-; ATOM-NEXT:    sbbq %rax, %rax
-; ATOM-NEXT:    orq %rsi, %rax
-; ATOM-NEXT:    nop
-; ATOM-NEXT:    nop
-; ATOM-NEXT:    retq
+; CHECK-LABEL: test11a:
+; CHECK:       ## %bb.0:
+; CHECK-NEXT:    xorl %eax, %eax
+; CHECK-NEXT:    negq %rdi
+; CHECK-NEXT:    sbbq %rax, %rax
+; CHECK-NEXT:    orq %rsi, %rax
+; CHECK-NEXT:    retq
 ;
 ; ATHLON-LABEL: test11a:
 ; ATHLON:       ## %bb.0:
@@ -907,21 +859,13 @@ define i64 @test11a(i64 %x, i64 %y) nounwind readnone ssp noredzone {
 }
 
 define i32 @eqzero_const_or_all_ones(i32 %x) {
-; GENERIC-LABEL: eqzero_const_or_all_ones:
-; GENERIC:       ## %bb.0:
-; GENERIC-NEXT:    negl %edi
-; GENERIC-NEXT:    sbbl %eax, %eax
-; GENERIC-NEXT:    orl $42, %eax
-; GENERIC-NEXT:    retq
-;
-; ATOM-LABEL: eqzero_const_or_all_ones:
-; ATOM:       ## %bb.0:
-; ATOM-NEXT:    negl %edi
-; ATOM-NEXT:    sbbl %eax, %eax
-; ATOM-NEXT:    orl $42, %eax
-; ATOM-NEXT:    nop
-; ATOM-NEXT:    nop
-; ATOM-NEXT:    retq
+; CHECK-LABEL: eqzero_const_or_all_ones:
+; CHECK:       ## %bb.0:
+; CHECK-NEXT:    xorl %eax, %eax
+; CHECK-NEXT:    negl %edi
+; CHECK-NEXT:    sbbl %eax, %eax
+; CHECK-NEXT:    orl $42, %eax
+; CHECK-NEXT:    retq
 ;
 ; ATHLON-LABEL: eqzero_const_or_all_ones:
 ; ATHLON:       ## %bb.0:
@@ -933,9 +877,11 @@ define i32 @eqzero_const_or_all_ones(i32 %x) {
 ;
 ; MCU-LABEL: eqzero_const_or_all_ones:
 ; MCU:       # %bb.0:
+; MCU-NEXT:    xorl %ecx, %ecx
 ; MCU-NEXT:    negl %eax
-; MCU-NEXT:    sbbl %eax, %eax
-; MCU-NEXT:    orl $42, %eax
+; MCU-NEXT:    sbbl %ecx, %ecx
+; MCU-NEXT:    orl $42, %ecx
+; MCU-NEXT:    movl %ecx, %eax
 ; MCU-NEXT:    retl
   %z = icmp eq i32 %x, 0
   %r = select i1 %z, i32 42, i32 -1
@@ -943,24 +889,17 @@ define i32 @eqzero_const_or_all_ones(i32 %x) {
 }
 
 define i32 @nezero_const_or_all_ones(i32 %x) {
-; GENERIC-LABEL: nezero_const_or_all_ones:
-; GENERIC:       ## %bb.0:
-; GENERIC-NEXT:    cmpl $1, %edi
-; GENERIC-NEXT:    sbbl %eax, %eax
-; GENERIC-NEXT:    orl $42, %eax
-; GENERIC-NEXT:    retq
-;
-; ATOM-LABEL: nezero_const_or_all_ones:
-; ATOM:       ## %bb.0:
-; ATOM-NEXT:    cmpl $1, %edi
-; ATOM-NEXT:    sbbl %eax, %eax
-; ATOM-NEXT:    orl $42, %eax
-; ATOM-NEXT:    nop
-; ATOM-NEXT:    nop
-; ATOM-NEXT:    retq
+; CHECK-LABEL: nezero_const_or_all_ones:
+; CHECK:       ## %bb.0:
+; CHECK-NEXT:    xorl %eax, %eax
+; CHECK-NEXT:    cmpl $1, %edi
+; CHECK-NEXT:    sbbl %eax, %eax
+; CHECK-NEXT:    orl $42, %eax
+; CHECK-NEXT:    retq
 ;
 ; ATHLON-LABEL: nezero_const_or_all_ones:
 ; ATHLON:       ## %bb.0:
+; ATHLON-NEXT:    xorl %eax, %eax
 ; ATHLON-NEXT:    cmpl $1, {{[0-9]+}}(%esp)
 ; ATHLON-NEXT:    sbbl %eax, %eax
 ; ATHLON-NEXT:    orl $42, %eax
@@ -968,9 +907,11 @@ define i32 @nezero_const_or_all_ones(i32 %x) {
 ;
 ; MCU-LABEL: nezero_const_or_all_ones:
 ; MCU:       # %bb.0:
+; MCU-NEXT:    xorl %ecx, %ecx
 ; MCU-NEXT:    cmpl $1, %eax
-; MCU-NEXT:    sbbl %eax, %eax
-; MCU-NEXT:    orl $42, %eax
+; MCU-NEXT:    sbbl %ecx, %ecx
+; MCU-NEXT:    orl $42, %ecx
+; MCU-NEXT:    movl %ecx, %eax
 ; MCU-NEXT:    retl
   %z = icmp ne i32 %x, 0
   %r = select i1 %z, i32 42, i32 -1
@@ -978,21 +919,13 @@ define i32 @nezero_const_or_all_ones(i32 %x) {
 }
 
 define i64 @eqzero_all_ones_or_const(i64 %x) {
-; GENERIC-LABEL: eqzero_all_ones_or_const:
-; GENERIC:       ## %bb.0:
-; GENERIC-NEXT:    cmpq $1, %rdi
-; GENERIC-NEXT:    sbbq %rax, %rax
-; GENERIC-NEXT:    orq $42, %rax
-; GENERIC-NEXT:    retq
-;
-; ATOM-LABEL: eqzero_all_ones_or_const:
-; ATOM:       ## %bb.0:
-; ATOM-NEXT:    cmpq $1, %rdi
-; ATOM-NEXT:    sbbq %rax, %rax
-; ATOM-NEXT:    orq $42, %rax
-; ATOM-NEXT:    nop
-; ATOM-NEXT:    nop
-; ATOM-NEXT:    retq
+; CHECK-LABEL: eqzero_all_ones_or_const:
+; CHECK:       ## %bb.0:
+; CHECK-NEXT:    xorl %eax, %eax
+; CHECK-NEXT:    cmpq $1, %rdi
+; CHECK-NEXT:    sbbq %rax, %rax
+; CHECK-NEXT:    orq $42, %rax
+; CHECK-NEXT:    retq
 ;
 ; ATHLON-LABEL: eqzero_all_ones_or_const:
 ; ATHLON:       ## %bb.0:
@@ -1022,23 +955,14 @@ define i64 @eqzero_all_ones_or_const(i64 %x) {
 }
 
 define i8 @nezero_all_ones_or_const(i8 %x) {
-; GENERIC-LABEL: nezero_all_ones_or_const:
-; GENERIC:       ## %bb.0:
-; GENERIC-NEXT:    negb %dil
-; GENERIC-NEXT:    sbbl %eax, %eax
-; GENERIC-NEXT:    orb $42, %al
-; GENERIC-NEXT:    ## kill: def $al killed $al killed $eax
-; GENERIC-NEXT:    retq
-;
-; ATOM-LABEL: nezero_all_ones_or_const:
-; ATOM:       ## %bb.0:
-; ATOM-NEXT:    negb %dil
-; ATOM-NEXT:    sbbl %eax, %eax
-; ATOM-NEXT:    orb $42, %al
-; ATOM-NEXT:    ## kill: def $al killed $al killed $eax
-; ATOM-NEXT:    nop
-; ATOM-NEXT:    nop
-; ATOM-NEXT:    retq
+; CHECK-LABEL: nezero_all_ones_or_const:
+; CHECK:       ## %bb.0:
+; CHECK-NEXT:    xorl %eax, %eax
+; CHECK-NEXT:    negb %dil
+; CHECK-NEXT:    sbbl %eax, %eax
+; CHECK-NEXT:    orb $42, %al
+; CHECK-NEXT:    ## kill: def $al killed $al killed $eax
+; CHECK-NEXT:    retq
 ;
 ; ATHLON-LABEL: nezero_all_ones_or_const:
 ; ATHLON:       ## %bb.0:
@@ -1051,10 +975,11 @@ define i8 @nezero_all_ones_or_const(i8 %x) {
 ;
 ; MCU-LABEL: nezero_all_ones_or_const:
 ; MCU:       # %bb.0:
+; MCU-NEXT:    xorl %ecx, %ecx
 ; MCU-NEXT:    negb %al
-; MCU-NEXT:    sbbl %eax, %eax
-; MCU-NEXT:    orb $42, %al
-; MCU-NEXT:    # kill: def $al killed $al killed $eax
+; MCU-NEXT:    sbbl %ecx, %ecx
+; MCU-NEXT:    orb $42, %cl
+; MCU-NEXT:    movl %ecx, %eax
 ; MCU-NEXT:    retl
   %z = icmp ne i8 %x, 0
   %r = select i1 %z, i8 -1, i8 42
@@ -1062,21 +987,13 @@ define i8 @nezero_all_ones_or_const(i8 %x) {
 }
 
 define i32 @PR53006(i32 %x) {
-; GENERIC-LABEL: PR53006:
-; GENERIC:       ## %bb.0:
-; GENERIC-NEXT:    negl %edi
-; GENERIC-NEXT:    sbbl %eax, %eax
-; GENERIC-NEXT:    orl $1, %eax
-; GENERIC-NEXT:    retq
-;
-; ATOM-LABEL: PR53006:
-; ATOM:       ## %bb.0:
-; ATOM-NEXT:    negl %edi
-; ATOM-NEXT:    sbbl %eax, %eax
-; ATOM-NEXT:    orl $1, %eax
-; ATOM-NEXT:    nop
-; ATOM-NEXT:    nop
-; ATOM-NEXT:    retq
+; CHECK-LABEL: PR53006:
+; CHECK:       ## %bb.0:
+; CHECK-NEXT:    xorl %eax, %eax
+; CHECK-NEXT:    negl %edi
+; CHECK-NEXT:    sbbl %eax, %eax
+; CHECK-NEXT:    orl $1, %eax
+; CHECK-NEXT:    retq
 ;
 ; ATHLON-LABEL: PR53006:
 ; ATHLON:       ## %bb.0:
@@ -1088,9 +1005,11 @@ define i32 @PR53006(i32 %x) {
 ;
 ; MCU-LABEL: PR53006:
 ; MCU:       # %bb.0:
+; MCU-NEXT:    xorl %ecx, %ecx
 ; MCU-NEXT:    negl %eax
-; MCU-NEXT:    sbbl %eax, %eax
-; MCU-NEXT:    orl $1, %eax
+; MCU-NEXT:    sbbl %ecx, %ecx
+; MCU-NEXT:    orl $1, %ecx
+; MCU-NEXT:    movl %ecx, %eax
 ; MCU-NEXT:    retl
   %z = icmp eq i32 %x, 0
   %r = select i1 %z, i32 1, i32 -1
@@ -1100,31 +1019,34 @@ define i32 @PR53006(i32 %x) {
 define i32 @test13(i32 %a, i32 %b) nounwind {
 ; GENERIC-LABEL: test13:
 ; GENERIC:       ## %bb.0:
+; GENERIC-NEXT:    xorl %eax, %eax
 ; GENERIC-NEXT:    cmpl %esi, %edi
 ; GENERIC-NEXT:    sbbl %eax, %eax
 ; GENERIC-NEXT:    retq
 ;
 ; ATOM-LABEL: test13:
 ; ATOM:       ## %bb.0:
+; ATOM-NEXT:    xorl %eax, %eax
 ; ATOM-NEXT:    cmpl %esi, %edi
 ; ATOM-NEXT:    sbbl %eax, %eax
 ; ATOM-NEXT:    nop
 ; ATOM-NEXT:    nop
-; ATOM-NEXT:    nop
-; ATOM-NEXT:    nop
 ; ATOM-NEXT:    retq
 ;
 ; ATHLON-LABEL: test13:
 ; ATHLON:       ## %bb.0:
-; ATHLON-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; ATHLON-NEXT:    cmpl {{[0-9]+}}(%esp), %eax
+; ATHLON-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; ATHLON-NEXT:    xorl %eax, %eax
+; ATHLON-NEXT:    cmpl {{[0-9]+}}(%esp), %ecx
 ; ATHLON-NEXT:    sbbl %eax, %eax
 ; ATHLON-NEXT:    retl
 ;
 ; MCU-LABEL: test13:
 ; MCU:       # %bb.0:
+; MCU-NEXT:    xorl %ecx, %ecx
 ; MCU-NEXT:    cmpl %edx, %eax
-; MCU-NEXT:    sbbl %eax, %eax
+; MCU-NEXT:    sbbl %ecx, %ecx
+; MCU-NEXT:    movl %ecx, %eax
 ; MCU-NEXT:    retl
   %c = icmp ult i32 %a, %b
   %d = sext i1 %c to i32
@@ -1172,18 +1094,18 @@ define i32 @test14(i32 %a, i32 %b) nounwind {
 define i32 @test15(i32 %x) nounwind {
 ; GENERIC-LABEL: test15:
 ; GENERIC:       ## %bb.0: ## %entry
+; GENERIC-NEXT:    xorl %eax, %eax
 ; GENERIC-NEXT:    negl %edi
 ; GENERIC-NEXT:    sbbl %eax, %eax
 ; GENERIC-NEXT:    retq
 ;
 ; ATOM-LABEL: test15:
 ; ATOM:       ## %bb.0: ## %entry
+; ATOM-NEXT:    xorl %eax, %eax
 ; ATOM-NEXT:    negl %edi
 ; ATOM-NEXT:    sbbl %eax, %eax
 ; ATOM-NEXT:    nop
 ; ATOM-NEXT:    nop
-; ATOM-NEXT:    nop
-; ATOM-NEXT:    nop
 ; ATOM-NEXT:    retq
 ;
 ; ATHLON-LABEL: test15:
@@ -1195,8 +1117,10 @@ define i32 @test15(i32 %x) nounwind {
 ;
 ; MCU-LABEL: test15:
 ; MCU:       # %bb.0: # %entry
+; MCU-NEXT:    xorl %ecx, %ecx
 ; MCU-NEXT:    negl %eax
-; MCU-NEXT:    sbbl %eax, %eax
+; MCU-NEXT:    sbbl %ecx, %ecx
+; MCU-NEXT:    movl %ecx, %eax
 ; MCU-NEXT:    retl
 entry:
   %cmp = icmp ne i32 %x, 0
@@ -1207,18 +1131,18 @@ entry:
 define i64 @test16(i64 %x) nounwind uwtable readnone ssp {
 ; GENERIC-LABEL: test16:
 ; GENERIC:       ## %bb.0: ## %entry
+; GENERIC-NEXT:    xorl %eax, %eax
 ; GENERIC-NEXT:    negq %rdi
 ; GENERIC-NEXT:    sbbq %rax, %rax
 ; GENERIC-NEXT:    retq
 ;
 ; ATOM-LABEL: test16:
 ; ATOM:       ## %bb.0: ## %entry
+; ATOM-NEXT:    xorl %eax, %eax
 ; ATOM-NEXT:    negq %rdi
 ; ATOM-NEXT:    sbbq %rax, %rax
 ; ATOM-NEXT:    nop
 ; ATOM-NEXT:    nop
-; ATOM-NEXT:    nop
-; ATOM-NEXT:    nop
 ; ATOM-NEXT:    retq
 ;
 ; ATHLON-LABEL: test16:
@@ -1249,6 +1173,7 @@ entry:
 define i16 @test17(i16 %x) nounwind {
 ; GENERIC-LABEL: test17:
 ; GENERIC:       ## %bb.0: ## %entry
+; GENERIC-NEXT:    xorl %eax, %eax
 ; GENERIC-NEXT:    negw %di
 ; GENERIC-NEXT:    sbbl %eax, %eax
 ; GENERIC-NEXT:    ## kill: def $ax killed $ax killed $eax
@@ -1256,13 +1181,12 @@ define i16 @test17(i16 %x) nounwind {
 ;
 ; ATOM-LABEL: test17:
 ; ATOM:       ## %bb.0: ## %entry
+; ATOM-NEXT:    xorl %eax, %eax
 ; ATOM-NEXT:    negw %di
 ; ATOM-NEXT:    sbbl %eax, %eax
 ; ATOM-NEXT:    ## kill: def $ax killed $ax killed $eax
 ; ATOM-NEXT:    nop
 ; ATOM-NEXT:    nop
-; ATOM-NEXT:    nop
-; ATOM-NEXT:    nop
 ; ATOM-NEXT:    retq
 ;
 ; ATHLON-LABEL: test17:
@@ -1275,9 +1199,10 @@ define i16 @test17(i16 %x) nounwind {
 ;
 ; MCU-LABEL: test17:
 ; MCU:       # %bb.0: # %entry
+; MCU-NEXT:    xorl %ecx, %ecx
 ; MCU-NEXT:    negw %ax
-; MCU-NEXT:    sbbl %eax, %eax
-; MCU-NEXT:    # kill: def $ax killed $ax killed $eax
+; MCU-NEXT:    sbbl %ecx, %ecx
+; MCU-NEXT:    movl %ecx, %eax
 ; MCU-NEXT:    retl
 entry:
   %cmp = icmp ne i16 %x, 0

diff  --git a/llvm/test/CodeGen/X86/sext-i1.ll b/llvm/test/CodeGen/X86/sext-i1.ll
index acf9e8138a0c0..03799af1e70a4 100644
--- a/llvm/test/CodeGen/X86/sext-i1.ll
+++ b/llvm/test/CodeGen/X86/sext-i1.ll
@@ -8,12 +8,14 @@
 define i32 @t1(i32 %x) nounwind readnone ssp {
 ; X32-LABEL: t1:
 ; X32:       # %bb.0:
+; X32-NEXT:    xorl %eax, %eax
 ; X32-NEXT:    cmpl $1, {{[0-9]+}}(%esp)
 ; X32-NEXT:    sbbl %eax, %eax
 ; X32-NEXT:    retl
 ;
 ; X64-LABEL: t1:
 ; X64:       # %bb.0:
+; X64-NEXT:    xorl %eax, %eax
 ; X64-NEXT:    cmpl $1, %edi
 ; X64-NEXT:    sbbl %eax, %eax
 ; X64-NEXT:    retq
@@ -25,12 +27,14 @@ define i32 @t1(i32 %x) nounwind readnone ssp {
 define i32 @t2(i32 %x) nounwind readnone ssp {
 ; X32-LABEL: t2:
 ; X32:       # %bb.0:
+; X32-NEXT:    xorl %eax, %eax
 ; X32-NEXT:    cmpl $1, {{[0-9]+}}(%esp)
 ; X32-NEXT:    sbbl %eax, %eax
 ; X32-NEXT:    retl
 ;
 ; X64-LABEL: t2:
 ; X64:       # %bb.0:
+; X64-NEXT:    xorl %eax, %eax
 ; X64-NEXT:    cmpl $1, %edi
 ; X64-NEXT:    sbbl %eax, %eax
 ; X64-NEXT:    retq
@@ -43,6 +47,7 @@ define i32 @t3(i32 %x, i64 %y) nounwind readonly {
 ; X32-LABEL: t3:
 ; X32:       # %bb.0: # %entry
 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X32-NEXT:    xorl %ecx, %ecx
 ; X32-NEXT:    cmpl $1, {{[0-9]+}}(%esp)
 ; X32-NEXT:    sbbl %ecx, %ecx
 ; X32-NEXT:    cmpl %ecx, {{[0-9]+}}(%esp)
@@ -87,6 +92,7 @@ define i32 @t4(i64 %x) nounwind readnone ssp {
 ;
 ; X64-LABEL: t4:
 ; X64:       # %bb.0:
+; X64-NEXT:    xorl %eax, %eax
 ; X64-NEXT:    cmpq $1, %rdi
 ; X64-NEXT:    sbbl %eax, %eax
 ; X64-NEXT:    retq
@@ -98,6 +104,7 @@ define i32 @t4(i64 %x) nounwind readnone ssp {
 define i64 @t5(i32 %x) nounwind readnone ssp {
 ; X32-LABEL: t5:
 ; X32:       # %bb.0:
+; X32-NEXT:    xorl %eax, %eax
 ; X32-NEXT:    cmpl $1, {{[0-9]+}}(%esp)
 ; X32-NEXT:    sbbl %eax, %eax
 ; X32-NEXT:    movl %eax, %edx
@@ -105,6 +112,7 @@ define i64 @t5(i32 %x) nounwind readnone ssp {
 ;
 ; X64-LABEL: t5:
 ; X64:       # %bb.0:
+; X64-NEXT:    xorl %eax, %eax
 ; X64-NEXT:    cmpl $1, %edi
 ; X64-NEXT:    sbbq %rax, %rax
 ; X64-NEXT:    retq

diff  --git a/llvm/test/CodeGen/X86/shl-crash-on-legalize.ll b/llvm/test/CodeGen/X86/shl-crash-on-legalize.ll
index 66ce60a9b22c5..0168fd90a95de 100644
--- a/llvm/test/CodeGen/X86/shl-crash-on-legalize.ll
+++ b/llvm/test/CodeGen/X86/shl-crash-on-legalize.ll
@@ -14,6 +14,7 @@ define i32 @PR29058(i8 %x, i32 %y) {
 ; CHECK-NEXT:    testb %dil, %dil
 ; CHECK-NEXT:    movl $2147483646, %eax # imm = 0x7FFFFFFE
 ; CHECK-NEXT:    cmovnel %esi, %eax
+; CHECK-NEXT:    xorl %ecx, %ecx
 ; CHECK-NEXT:    cmpb $1, %dil
 ; CHECK-NEXT:    sbbl %ecx, %ecx
 ; CHECK-NEXT:    orb %sil, %cl

diff  --git a/llvm/test/CodeGen/X86/umul_fix_sat.ll b/llvm/test/CodeGen/X86/umul_fix_sat.ll
index cdb5480c7f614..504557242c305 100644
--- a/llvm/test/CodeGen/X86/umul_fix_sat.ll
+++ b/llvm/test/CodeGen/X86/umul_fix_sat.ll
@@ -443,29 +443,30 @@ define i64 @func7(i64 %x, i64 %y) nounwind {
 ; X86-NEXT:    pushl %ebx
 ; X86-NEXT:    pushl %edi
 ; X86-NEXT:    pushl %esi
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %esi
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %ebx
-; X86-NEXT:    movl {{[0-9]+}}(%esp), %edi
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %ebp
-; X86-NEXT:    movl %ebx, %eax
+; X86-NEXT:    movl %esi, %eax
 ; X86-NEXT:    mull %ebp
 ; X86-NEXT:    movl %edx, %ecx
-; X86-NEXT:    movl %eax, %esi
-; X86-NEXT:    movl %ebx, %eax
-; X86-NEXT:    mull %edi
-; X86-NEXT:    addl %edx, %esi
+; X86-NEXT:    movl %eax, %edi
+; X86-NEXT:    movl %esi, %eax
+; X86-NEXT:    mull %ebx
+; X86-NEXT:    addl %edx, %edi
 ; X86-NEXT:    adcl $0, %ecx
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; X86-NEXT:    mull %ebp
-; X86-NEXT:    movl %edx, %ebx
+; X86-NEXT:    movl %edx, %esi
 ; X86-NEXT:    movl %eax, %ebp
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT:    mull %edi
-; X86-NEXT:    addl %esi, %eax
+; X86-NEXT:    mull %ebx
+; X86-NEXT:    addl %edi, %eax
 ; X86-NEXT:    adcl %ecx, %edx
-; X86-NEXT:    adcl $0, %ebx
+; X86-NEXT:    adcl $0, %esi
 ; X86-NEXT:    addl %ebp, %edx
-; X86-NEXT:    adcl $0, %ebx
-; X86-NEXT:    negl %ebx
+; X86-NEXT:    adcl $0, %esi
+; X86-NEXT:    xorl %ecx, %ecx
+; X86-NEXT:    negl %esi
 ; X86-NEXT:    sbbl %ecx, %ecx
 ; X86-NEXT:    orl %ecx, %eax
 ; X86-NEXT:    orl %ecx, %edx
@@ -521,11 +522,12 @@ define i64 @func8(i64 %x, i64 %y) nounwind {
 ; X86-NEXT:    shrdl $31, %edx, %eax
 ; X86-NEXT:    movl %edx, %esi
 ; X86-NEXT:    shrl $31, %esi
+; X86-NEXT:    xorl %edi, %edi
 ; X86-NEXT:    negl %esi
-; X86-NEXT:    sbbl %esi, %esi
-; X86-NEXT:    orl %esi, %eax
+; X86-NEXT:    sbbl %edi, %edi
+; X86-NEXT:    orl %edi, %eax
 ; X86-NEXT:    shrdl $31, %ecx, %edx
-; X86-NEXT:    orl %esi, %edx
+; X86-NEXT:    orl %edi, %edx
 ; X86-NEXT:    popl %esi
 ; X86-NEXT:    popl %edi
 ; X86-NEXT:    popl %ebx

diff  --git a/llvm/test/CodeGen/X86/vec_uaddo.ll b/llvm/test/CodeGen/X86/vec_uaddo.ll
index 73d71135a9332..4a9bfd3b6f0e2 100644
--- a/llvm/test/CodeGen/X86/vec_uaddo.ll
+++ b/llvm/test/CodeGen/X86/vec_uaddo.ll
@@ -26,6 +26,7 @@ declare {<2 x i128>, <2 x i1>} @llvm.uadd.with.overflow.v2i128(<2 x i128>, <2 x
 define <1 x i32> @uaddo_v1i32(<1 x i32> %a0, <1 x i32> %a1, <1 x i32>* %p2) nounwind {
 ; CHECK-LABEL: uaddo_v1i32:
 ; CHECK:       # %bb.0:
+; CHECK-NEXT:    xorl %eax, %eax
 ; CHECK-NEXT:    addl %esi, %edi
 ; CHECK-NEXT:    sbbl %eax, %eax
 ; CHECK-NEXT:    movl %edi, (%rdx)
@@ -1139,14 +1140,16 @@ define <2 x i32> @uaddo_v2i128(<2 x i128> %a0, <2 x i128> %a1, <2 x i128>* %p2)
 ; SSE2-LABEL: uaddo_v2i128:
 ; SSE2:       # %bb.0:
 ; SSE2-NEXT:    movq {{[0-9]+}}(%rsp), %r10
+; SSE2-NEXT:    xorl %r11d, %r11d
 ; SSE2-NEXT:    addq {{[0-9]+}}(%rsp), %rdx
 ; SSE2-NEXT:    adcq {{[0-9]+}}(%rsp), %rcx
+; SSE2-NEXT:    movl $0, %eax
 ; SSE2-NEXT:    sbbl %eax, %eax
 ; SSE2-NEXT:    addq %r8, %rdi
 ; SSE2-NEXT:    adcq %r9, %rsi
 ; SSE2-NEXT:    movd %eax, %xmm1
-; SSE2-NEXT:    sbbl %eax, %eax
-; SSE2-NEXT:    movd %eax, %xmm0
+; SSE2-NEXT:    sbbl %r11d, %r11d
+; SSE2-NEXT:    movd %r11d, %xmm0
 ; SSE2-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
 ; SSE2-NEXT:    movq %rdx, 16(%r10)
 ; SSE2-NEXT:    movq %rdi, (%r10)
@@ -1157,14 +1160,16 @@ define <2 x i32> @uaddo_v2i128(<2 x i128> %a0, <2 x i128> %a1, <2 x i128>* %p2)
 ; SSSE3-LABEL: uaddo_v2i128:
 ; SSSE3:       # %bb.0:
 ; SSSE3-NEXT:    movq {{[0-9]+}}(%rsp), %r10
+; SSSE3-NEXT:    xorl %r11d, %r11d
 ; SSSE3-NEXT:    addq {{[0-9]+}}(%rsp), %rdx
 ; SSSE3-NEXT:    adcq {{[0-9]+}}(%rsp), %rcx
+; SSSE3-NEXT:    movl $0, %eax
 ; SSSE3-NEXT:    sbbl %eax, %eax
 ; SSSE3-NEXT:    addq %r8, %rdi
 ; SSSE3-NEXT:    adcq %r9, %rsi
 ; SSSE3-NEXT:    movd %eax, %xmm1
-; SSSE3-NEXT:    sbbl %eax, %eax
-; SSSE3-NEXT:    movd %eax, %xmm0
+; SSSE3-NEXT:    sbbl %r11d, %r11d
+; SSSE3-NEXT:    movd %r11d, %xmm0
 ; SSSE3-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
 ; SSSE3-NEXT:    movq %rdx, 16(%r10)
 ; SSSE3-NEXT:    movq %rdi, (%r10)
@@ -1175,14 +1180,16 @@ define <2 x i32> @uaddo_v2i128(<2 x i128> %a0, <2 x i128> %a1, <2 x i128>* %p2)
 ; SSE41-LABEL: uaddo_v2i128:
 ; SSE41:       # %bb.0:
 ; SSE41-NEXT:    movq {{[0-9]+}}(%rsp), %r10
+; SSE41-NEXT:    xorl %r11d, %r11d
 ; SSE41-NEXT:    addq {{[0-9]+}}(%rsp), %rdx
 ; SSE41-NEXT:    adcq {{[0-9]+}}(%rsp), %rcx
-; SSE41-NEXT:    sbbl %r11d, %r11d
+; SSE41-NEXT:    movl $0, %eax
+; SSE41-NEXT:    sbbl %eax, %eax
 ; SSE41-NEXT:    addq %r8, %rdi
 ; SSE41-NEXT:    adcq %r9, %rsi
-; SSE41-NEXT:    sbbl %eax, %eax
-; SSE41-NEXT:    movd %eax, %xmm0
-; SSE41-NEXT:    pinsrd $1, %r11d, %xmm0
+; SSE41-NEXT:    sbbl %r11d, %r11d
+; SSE41-NEXT:    movd %r11d, %xmm0
+; SSE41-NEXT:    pinsrd $1, %eax, %xmm0
 ; SSE41-NEXT:    movq %rdx, 16(%r10)
 ; SSE41-NEXT:    movq %rdi, (%r10)
 ; SSE41-NEXT:    movq %rcx, 24(%r10)
@@ -1192,14 +1199,16 @@ define <2 x i32> @uaddo_v2i128(<2 x i128> %a0, <2 x i128> %a1, <2 x i128>* %p2)
 ; AVX-LABEL: uaddo_v2i128:
 ; AVX:       # %bb.0:
 ; AVX-NEXT:    movq {{[0-9]+}}(%rsp), %r10
+; AVX-NEXT:    xorl %r11d, %r11d
 ; AVX-NEXT:    addq {{[0-9]+}}(%rsp), %rdx
 ; AVX-NEXT:    adcq {{[0-9]+}}(%rsp), %rcx
-; AVX-NEXT:    sbbl %r11d, %r11d
+; AVX-NEXT:    movl $0, %eax
+; AVX-NEXT:    sbbl %eax, %eax
 ; AVX-NEXT:    addq %r8, %rdi
 ; AVX-NEXT:    adcq %r9, %rsi
-; AVX-NEXT:    sbbl %eax, %eax
-; AVX-NEXT:    vmovd %eax, %xmm0
-; AVX-NEXT:    vpinsrd $1, %r11d, %xmm0, %xmm0
+; AVX-NEXT:    sbbl %r11d, %r11d
+; AVX-NEXT:    vmovd %r11d, %xmm0
+; AVX-NEXT:    vpinsrd $1, %eax, %xmm0, %xmm0
 ; AVX-NEXT:    movq %rdx, 16(%r10)
 ; AVX-NEXT:    movq %rdi, (%r10)
 ; AVX-NEXT:    movq %rcx, 24(%r10)

diff  --git a/llvm/test/CodeGen/X86/vec_usubo.ll b/llvm/test/CodeGen/X86/vec_usubo.ll
index be9f4ba948202..bb7dc729e115f 100644
--- a/llvm/test/CodeGen/X86/vec_usubo.ll
+++ b/llvm/test/CodeGen/X86/vec_usubo.ll
@@ -26,6 +26,7 @@ declare {<2 x i128>, <2 x i1>} @llvm.usub.with.overflow.v2i128(<2 x i128>, <2 x
 define <1 x i32> @usubo_v1i32(<1 x i32> %a0, <1 x i32> %a1, <1 x i32>* %p2) nounwind {
 ; CHECK-LABEL: usubo_v1i32:
 ; CHECK:       # %bb.0:
+; CHECK-NEXT:    xorl %eax, %eax
 ; CHECK-NEXT:    subl %esi, %edi
 ; CHECK-NEXT:    sbbl %eax, %eax
 ; CHECK-NEXT:    movl %edi, (%rdx)
@@ -1186,14 +1187,16 @@ define <2 x i32> @usubo_v2i128(<2 x i128> %a0, <2 x i128> %a1, <2 x i128>* %p2)
 ; SSE2-LABEL: usubo_v2i128:
 ; SSE2:       # %bb.0:
 ; SSE2-NEXT:    movq {{[0-9]+}}(%rsp), %r10
+; SSE2-NEXT:    xorl %r11d, %r11d
 ; SSE2-NEXT:    subq {{[0-9]+}}(%rsp), %rdx
 ; SSE2-NEXT:    sbbq {{[0-9]+}}(%rsp), %rcx
+; SSE2-NEXT:    movl $0, %eax
 ; SSE2-NEXT:    sbbl %eax, %eax
 ; SSE2-NEXT:    subq %r8, %rdi
 ; SSE2-NEXT:    sbbq %r9, %rsi
 ; SSE2-NEXT:    movd %eax, %xmm1
-; SSE2-NEXT:    sbbl %eax, %eax
-; SSE2-NEXT:    movd %eax, %xmm0
+; SSE2-NEXT:    sbbl %r11d, %r11d
+; SSE2-NEXT:    movd %r11d, %xmm0
 ; SSE2-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
 ; SSE2-NEXT:    movq %rdx, 16(%r10)
 ; SSE2-NEXT:    movq %rdi, (%r10)
@@ -1204,14 +1207,16 @@ define <2 x i32> @usubo_v2i128(<2 x i128> %a0, <2 x i128> %a1, <2 x i128>* %p2)
 ; SSSE3-LABEL: usubo_v2i128:
 ; SSSE3:       # %bb.0:
 ; SSSE3-NEXT:    movq {{[0-9]+}}(%rsp), %r10
+; SSSE3-NEXT:    xorl %r11d, %r11d
 ; SSSE3-NEXT:    subq {{[0-9]+}}(%rsp), %rdx
 ; SSSE3-NEXT:    sbbq {{[0-9]+}}(%rsp), %rcx
+; SSSE3-NEXT:    movl $0, %eax
 ; SSSE3-NEXT:    sbbl %eax, %eax
 ; SSSE3-NEXT:    subq %r8, %rdi
 ; SSSE3-NEXT:    sbbq %r9, %rsi
 ; SSSE3-NEXT:    movd %eax, %xmm1
-; SSSE3-NEXT:    sbbl %eax, %eax
-; SSSE3-NEXT:    movd %eax, %xmm0
+; SSSE3-NEXT:    sbbl %r11d, %r11d
+; SSSE3-NEXT:    movd %r11d, %xmm0
 ; SSSE3-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
 ; SSSE3-NEXT:    movq %rdx, 16(%r10)
 ; SSSE3-NEXT:    movq %rdi, (%r10)
@@ -1222,14 +1227,16 @@ define <2 x i32> @usubo_v2i128(<2 x i128> %a0, <2 x i128> %a1, <2 x i128>* %p2)
 ; SSE41-LABEL: usubo_v2i128:
 ; SSE41:       # %bb.0:
 ; SSE41-NEXT:    movq {{[0-9]+}}(%rsp), %r10
+; SSE41-NEXT:    xorl %r11d, %r11d
 ; SSE41-NEXT:    subq {{[0-9]+}}(%rsp), %rdx
 ; SSE41-NEXT:    sbbq {{[0-9]+}}(%rsp), %rcx
-; SSE41-NEXT:    sbbl %r11d, %r11d
+; SSE41-NEXT:    movl $0, %eax
+; SSE41-NEXT:    sbbl %eax, %eax
 ; SSE41-NEXT:    subq %r8, %rdi
 ; SSE41-NEXT:    sbbq %r9, %rsi
-; SSE41-NEXT:    sbbl %eax, %eax
-; SSE41-NEXT:    movd %eax, %xmm0
-; SSE41-NEXT:    pinsrd $1, %r11d, %xmm0
+; SSE41-NEXT:    sbbl %r11d, %r11d
+; SSE41-NEXT:    movd %r11d, %xmm0
+; SSE41-NEXT:    pinsrd $1, %eax, %xmm0
 ; SSE41-NEXT:    movq %rdx, 16(%r10)
 ; SSE41-NEXT:    movq %rdi, (%r10)
 ; SSE41-NEXT:    movq %rcx, 24(%r10)
@@ -1239,14 +1246,16 @@ define <2 x i32> @usubo_v2i128(<2 x i128> %a0, <2 x i128> %a1, <2 x i128>* %p2)
 ; AVX-LABEL: usubo_v2i128:
 ; AVX:       # %bb.0:
 ; AVX-NEXT:    movq {{[0-9]+}}(%rsp), %r10
+; AVX-NEXT:    xorl %r11d, %r11d
 ; AVX-NEXT:    subq {{[0-9]+}}(%rsp), %rdx
 ; AVX-NEXT:    sbbq {{[0-9]+}}(%rsp), %rcx
-; AVX-NEXT:    sbbl %r11d, %r11d
+; AVX-NEXT:    movl $0, %eax
+; AVX-NEXT:    sbbl %eax, %eax
 ; AVX-NEXT:    subq %r8, %rdi
 ; AVX-NEXT:    sbbq %r9, %rsi
-; AVX-NEXT:    sbbl %eax, %eax
-; AVX-NEXT:    vmovd %eax, %xmm0
-; AVX-NEXT:    vpinsrd $1, %r11d, %xmm0, %xmm0
+; AVX-NEXT:    sbbl %r11d, %r11d
+; AVX-NEXT:    vmovd %r11d, %xmm0
+; AVX-NEXT:    vpinsrd $1, %eax, %xmm0, %xmm0
 ; AVX-NEXT:    movq %rdx, 16(%r10)
 ; AVX-NEXT:    movq %rdi, (%r10)
 ; AVX-NEXT:    movq %rcx, 24(%r10)

diff  --git a/llvm/test/CodeGen/X86/vector-compare-any_of.ll b/llvm/test/CodeGen/X86/vector-compare-any_of.ll
index e41ed53a5edbf..5a5efa7735da9 100644
--- a/llvm/test/CodeGen/X86/vector-compare-any_of.ll
+++ b/llvm/test/CodeGen/X86/vector-compare-any_of.ll
@@ -8,24 +8,27 @@ define i64 @test_v2f64_sext(<2 x double> %a0, <2 x double> %a1) {
 ; SSE-LABEL: test_v2f64_sext:
 ; SSE:       # %bb.0:
 ; SSE-NEXT:    cmpltpd %xmm0, %xmm1
-; SSE-NEXT:    movmskpd %xmm1, %eax
-; SSE-NEXT:    negl %eax
+; SSE-NEXT:    movmskpd %xmm1, %ecx
+; SSE-NEXT:    xorl %eax, %eax
+; SSE-NEXT:    negl %ecx
 ; SSE-NEXT:    sbbq %rax, %rax
 ; SSE-NEXT:    retq
 ;
 ; AVX-LABEL: test_v2f64_sext:
 ; AVX:       # %bb.0:
 ; AVX-NEXT:    vcmpltpd %xmm0, %xmm1, %xmm0
-; AVX-NEXT:    vmovmskpd %xmm0, %eax
-; AVX-NEXT:    negl %eax
+; AVX-NEXT:    vmovmskpd %xmm0, %ecx
+; AVX-NEXT:    xorl %eax, %eax
+; AVX-NEXT:    negl %ecx
 ; AVX-NEXT:    sbbq %rax, %rax
 ; AVX-NEXT:    retq
 ;
 ; AVX512-LABEL: test_v2f64_sext:
 ; AVX512:       # %bb.0:
 ; AVX512-NEXT:    vcmpltpd %xmm0, %xmm1, %xmm0
-; AVX512-NEXT:    vmovmskpd %xmm0, %eax
-; AVX512-NEXT:    negl %eax
+; AVX512-NEXT:    vmovmskpd %xmm0, %ecx
+; AVX512-NEXT:    xorl %eax, %eax
+; AVX512-NEXT:    negl %ecx
 ; AVX512-NEXT:    sbbq %rax, %rax
 ; AVX512-NEXT:    retq
   %c = fcmp ogt <2 x double> %a0, %a1
@@ -42,16 +45,18 @@ define i64 @test_v4f64_sext(<4 x double> %a0, <4 x double> %a1) {
 ; SSE-NEXT:    cmpltpd %xmm1, %xmm3
 ; SSE-NEXT:    cmpltpd %xmm0, %xmm2
 ; SSE-NEXT:    orpd %xmm3, %xmm2
-; SSE-NEXT:    movmskpd %xmm2, %eax
-; SSE-NEXT:    negl %eax
+; SSE-NEXT:    movmskpd %xmm2, %ecx
+; SSE-NEXT:    xorl %eax, %eax
+; SSE-NEXT:    negl %ecx
 ; SSE-NEXT:    sbbq %rax, %rax
 ; SSE-NEXT:    retq
 ;
 ; AVX-LABEL: test_v4f64_sext:
 ; AVX:       # %bb.0:
 ; AVX-NEXT:    vcmpltpd %ymm0, %ymm1, %ymm0
-; AVX-NEXT:    vmovmskpd %ymm0, %eax
-; AVX-NEXT:    negl %eax
+; AVX-NEXT:    vmovmskpd %ymm0, %ecx
+; AVX-NEXT:    xorl %eax, %eax
+; AVX-NEXT:    negl %ecx
 ; AVX-NEXT:    sbbq %rax, %rax
 ; AVX-NEXT:    vzeroupper
 ; AVX-NEXT:    retq
@@ -59,8 +64,9 @@ define i64 @test_v4f64_sext(<4 x double> %a0, <4 x double> %a1) {
 ; AVX512-LABEL: test_v4f64_sext:
 ; AVX512:       # %bb.0:
 ; AVX512-NEXT:    vcmpltpd %ymm0, %ymm1, %ymm0
-; AVX512-NEXT:    vmovmskpd %ymm0, %eax
-; AVX512-NEXT:    negl %eax
+; AVX512-NEXT:    vmovmskpd %ymm0, %ecx
+; AVX512-NEXT:    xorl %eax, %eax
+; AVX512-NEXT:    negl %ecx
 ; AVX512-NEXT:    sbbq %rax, %rax
 ; AVX512-NEXT:    vzeroupper
 ; AVX512-NEXT:    retq
@@ -80,8 +86,9 @@ define i64 @test_v4f64_legal_sext(<4 x double> %a0, <4 x double> %a1) {
 ; SSE-NEXT:    cmpltpd %xmm1, %xmm3
 ; SSE-NEXT:    cmpltpd %xmm0, %xmm2
 ; SSE-NEXT:    packssdw %xmm3, %xmm2
-; SSE-NEXT:    movmskps %xmm2, %eax
-; SSE-NEXT:    negl %eax
+; SSE-NEXT:    movmskps %xmm2, %ecx
+; SSE-NEXT:    xorl %eax, %eax
+; SSE-NEXT:    negl %ecx
 ; SSE-NEXT:    sbbq %rax, %rax
 ; SSE-NEXT:    retq
 ;
@@ -90,8 +97,9 @@ define i64 @test_v4f64_legal_sext(<4 x double> %a0, <4 x double> %a1) {
 ; AVX-NEXT:    vcmpltpd %ymm0, %ymm1, %ymm0
 ; AVX-NEXT:    vextractf128 $1, %ymm0, %xmm1
 ; AVX-NEXT:    vpackssdw %xmm1, %xmm0, %xmm0
-; AVX-NEXT:    vmovmskps %xmm0, %eax
-; AVX-NEXT:    negl %eax
+; AVX-NEXT:    vmovmskps %xmm0, %ecx
+; AVX-NEXT:    xorl %eax, %eax
+; AVX-NEXT:    negl %ecx
 ; AVX-NEXT:    sbbq %rax, %rax
 ; AVX-NEXT:    vzeroupper
 ; AVX-NEXT:    retq
@@ -101,8 +109,9 @@ define i64 @test_v4f64_legal_sext(<4 x double> %a0, <4 x double> %a1) {
 ; AVX512-NEXT:    vcmpltpd %ymm0, %ymm1, %k1
 ; AVX512-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
 ; AVX512-NEXT:    vmovdqa32 %xmm0, %xmm0 {%k1} {z}
-; AVX512-NEXT:    vmovmskps %xmm0, %eax
-; AVX512-NEXT:    negl %eax
+; AVX512-NEXT:    vmovmskps %xmm0, %ecx
+; AVX512-NEXT:    xorl %eax, %eax
+; AVX512-NEXT:    negl %ecx
 ; AVX512-NEXT:    sbbq %rax, %rax
 ; AVX512-NEXT:    vzeroupper
 ; AVX512-NEXT:    retq
@@ -121,24 +130,27 @@ define i32 @test_v4f32_sext(<4 x float> %a0, <4 x float> %a1) {
 ; SSE-LABEL: test_v4f32_sext:
 ; SSE:       # %bb.0:
 ; SSE-NEXT:    cmpltps %xmm0, %xmm1
-; SSE-NEXT:    movmskps %xmm1, %eax
-; SSE-NEXT:    negl %eax
+; SSE-NEXT:    movmskps %xmm1, %ecx
+; SSE-NEXT:    xorl %eax, %eax
+; SSE-NEXT:    negl %ecx
 ; SSE-NEXT:    sbbl %eax, %eax
 ; SSE-NEXT:    retq
 ;
 ; AVX-LABEL: test_v4f32_sext:
 ; AVX:       # %bb.0:
 ; AVX-NEXT:    vcmpltps %xmm0, %xmm1, %xmm0
-; AVX-NEXT:    vmovmskps %xmm0, %eax
-; AVX-NEXT:    negl %eax
+; AVX-NEXT:    vmovmskps %xmm0, %ecx
+; AVX-NEXT:    xorl %eax, %eax
+; AVX-NEXT:    negl %ecx
 ; AVX-NEXT:    sbbl %eax, %eax
 ; AVX-NEXT:    retq
 ;
 ; AVX512-LABEL: test_v4f32_sext:
 ; AVX512:       # %bb.0:
 ; AVX512-NEXT:    vcmpltps %xmm0, %xmm1, %xmm0
-; AVX512-NEXT:    vmovmskps %xmm0, %eax
-; AVX512-NEXT:    negl %eax
+; AVX512-NEXT:    vmovmskps %xmm0, %ecx
+; AVX512-NEXT:    xorl %eax, %eax
+; AVX512-NEXT:    negl %ecx
 ; AVX512-NEXT:    sbbl %eax, %eax
 ; AVX512-NEXT:    retq
   %c = fcmp ogt <4 x float> %a0, %a1
@@ -157,16 +169,18 @@ define i32 @test_v8f32_sext(<8 x float> %a0, <8 x float> %a1) {
 ; SSE-NEXT:    cmpltps %xmm1, %xmm3
 ; SSE-NEXT:    cmpltps %xmm0, %xmm2
 ; SSE-NEXT:    orps %xmm3, %xmm2
-; SSE-NEXT:    movmskps %xmm2, %eax
-; SSE-NEXT:    negl %eax
+; SSE-NEXT:    movmskps %xmm2, %ecx
+; SSE-NEXT:    xorl %eax, %eax
+; SSE-NEXT:    negl %ecx
 ; SSE-NEXT:    sbbl %eax, %eax
 ; SSE-NEXT:    retq
 ;
 ; AVX-LABEL: test_v8f32_sext:
 ; AVX:       # %bb.0:
 ; AVX-NEXT:    vcmpltps %ymm0, %ymm1, %ymm0
-; AVX-NEXT:    vmovmskps %ymm0, %eax
-; AVX-NEXT:    negl %eax
+; AVX-NEXT:    vmovmskps %ymm0, %ecx
+; AVX-NEXT:    xorl %eax, %eax
+; AVX-NEXT:    negl %ecx
 ; AVX-NEXT:    sbbl %eax, %eax
 ; AVX-NEXT:    vzeroupper
 ; AVX-NEXT:    retq
@@ -174,8 +188,9 @@ define i32 @test_v8f32_sext(<8 x float> %a0, <8 x float> %a1) {
 ; AVX512-LABEL: test_v8f32_sext:
 ; AVX512:       # %bb.0:
 ; AVX512-NEXT:    vcmpltps %ymm0, %ymm1, %ymm0
-; AVX512-NEXT:    vmovmskps %ymm0, %eax
-; AVX512-NEXT:    negl %eax
+; AVX512-NEXT:    vmovmskps %ymm0, %ecx
+; AVX512-NEXT:    xorl %eax, %eax
+; AVX512-NEXT:    negl %ecx
 ; AVX512-NEXT:    sbbl %eax, %eax
 ; AVX512-NEXT:    vzeroupper
 ; AVX512-NEXT:    retq
@@ -197,8 +212,9 @@ define i32 @test_v8f32_legal_sext(<8 x float> %a0, <8 x float> %a1) {
 ; SSE-NEXT:    cmpltps %xmm1, %xmm3
 ; SSE-NEXT:    cmpltps %xmm0, %xmm2
 ; SSE-NEXT:    packssdw %xmm3, %xmm2
-; SSE-NEXT:    pmovmskb %xmm2, %eax
-; SSE-NEXT:    negl %eax
+; SSE-NEXT:    pmovmskb %xmm2, %ecx
+; SSE-NEXT:    xorl %eax, %eax
+; SSE-NEXT:    negl %ecx
 ; SSE-NEXT:    sbbl %eax, %eax
 ; SSE-NEXT:    retq
 ;
@@ -207,8 +223,9 @@ define i32 @test_v8f32_legal_sext(<8 x float> %a0, <8 x float> %a1) {
 ; AVX-NEXT:    vcmpltps %ymm0, %ymm1, %ymm0
 ; AVX-NEXT:    vextractf128 $1, %ymm0, %xmm1
 ; AVX-NEXT:    vpackssdw %xmm1, %xmm0, %xmm0
-; AVX-NEXT:    vpmovmskb %xmm0, %eax
-; AVX-NEXT:    negl %eax
+; AVX-NEXT:    vpmovmskb %xmm0, %ecx
+; AVX-NEXT:    xorl %eax, %eax
+; AVX-NEXT:    negl %ecx
 ; AVX-NEXT:    sbbl %eax, %eax
 ; AVX-NEXT:    vzeroupper
 ; AVX-NEXT:    retq
@@ -217,8 +234,9 @@ define i32 @test_v8f32_legal_sext(<8 x float> %a0, <8 x float> %a1) {
 ; AVX512:       # %bb.0:
 ; AVX512-NEXT:    vcmpltps %ymm0, %ymm1, %k0
 ; AVX512-NEXT:    vpmovm2w %k0, %xmm0
-; AVX512-NEXT:    vpmovmskb %xmm0, %eax
-; AVX512-NEXT:    negl %eax
+; AVX512-NEXT:    vpmovmskb %xmm0, %ecx
+; AVX512-NEXT:    xorl %eax, %eax
+; AVX512-NEXT:    negl %ecx
 ; AVX512-NEXT:    sbbl %eax, %eax
 ; AVX512-NEXT:    vzeroupper
 ; AVX512-NEXT:    retq
@@ -239,24 +257,27 @@ define i64 @test_v2i64_sext(<2 x i64> %a0, <2 x i64> %a1) {
 ; SSE-LABEL: test_v2i64_sext:
 ; SSE:       # %bb.0:
 ; SSE-NEXT:    pcmpgtq %xmm1, %xmm0
-; SSE-NEXT:    movmskpd %xmm0, %eax
-; SSE-NEXT:    negl %eax
+; SSE-NEXT:    movmskpd %xmm0, %ecx
+; SSE-NEXT:    xorl %eax, %eax
+; SSE-NEXT:    negl %ecx
 ; SSE-NEXT:    sbbq %rax, %rax
 ; SSE-NEXT:    retq
 ;
 ; AVX-LABEL: test_v2i64_sext:
 ; AVX:       # %bb.0:
 ; AVX-NEXT:    vpcmpgtq %xmm1, %xmm0, %xmm0
-; AVX-NEXT:    vmovmskpd %xmm0, %eax
-; AVX-NEXT:    negl %eax
+; AVX-NEXT:    vmovmskpd %xmm0, %ecx
+; AVX-NEXT:    xorl %eax, %eax
+; AVX-NEXT:    negl %ecx
 ; AVX-NEXT:    sbbq %rax, %rax
 ; AVX-NEXT:    retq
 ;
 ; AVX512-LABEL: test_v2i64_sext:
 ; AVX512:       # %bb.0:
 ; AVX512-NEXT:    vpcmpgtq %xmm1, %xmm0, %xmm0
-; AVX512-NEXT:    vmovmskpd %xmm0, %eax
-; AVX512-NEXT:    negl %eax
+; AVX512-NEXT:    vmovmskpd %xmm0, %ecx
+; AVX512-NEXT:    xorl %eax, %eax
+; AVX512-NEXT:    negl %ecx
 ; AVX512-NEXT:    sbbq %rax, %rax
 ; AVX512-NEXT:    retq
   %c = icmp sgt <2 x i64> %a0, %a1
@@ -273,8 +294,9 @@ define i64 @test_v4i64_sext(<4 x i64> %a0, <4 x i64> %a1) {
 ; SSE-NEXT:    pcmpgtq %xmm3, %xmm1
 ; SSE-NEXT:    pcmpgtq %xmm2, %xmm0
 ; SSE-NEXT:    por %xmm1, %xmm0
-; SSE-NEXT:    movmskpd %xmm0, %eax
-; SSE-NEXT:    negl %eax
+; SSE-NEXT:    movmskpd %xmm0, %ecx
+; SSE-NEXT:    xorl %eax, %eax
+; SSE-NEXT:    negl %ecx
 ; SSE-NEXT:    sbbq %rax, %rax
 ; SSE-NEXT:    retq
 ;
@@ -285,8 +307,9 @@ define i64 @test_v4i64_sext(<4 x i64> %a0, <4 x i64> %a1) {
 ; AVX1-NEXT:    vpcmpgtq %xmm2, %xmm3, %xmm2
 ; AVX1-NEXT:    vpcmpgtq %xmm1, %xmm0, %xmm0
 ; AVX1-NEXT:    vpor %xmm2, %xmm0, %xmm0
-; AVX1-NEXT:    vmovmskpd %xmm0, %eax
-; AVX1-NEXT:    negl %eax
+; AVX1-NEXT:    vmovmskpd %xmm0, %ecx
+; AVX1-NEXT:    xorl %eax, %eax
+; AVX1-NEXT:    negl %ecx
 ; AVX1-NEXT:    sbbq %rax, %rax
 ; AVX1-NEXT:    vzeroupper
 ; AVX1-NEXT:    retq
@@ -294,8 +317,9 @@ define i64 @test_v4i64_sext(<4 x i64> %a0, <4 x i64> %a1) {
 ; AVX2-LABEL: test_v4i64_sext:
 ; AVX2:       # %bb.0:
 ; AVX2-NEXT:    vpcmpgtq %ymm1, %ymm0, %ymm0
-; AVX2-NEXT:    vmovmskpd %ymm0, %eax
-; AVX2-NEXT:    negl %eax
+; AVX2-NEXT:    vmovmskpd %ymm0, %ecx
+; AVX2-NEXT:    xorl %eax, %eax
+; AVX2-NEXT:    negl %ecx
 ; AVX2-NEXT:    sbbq %rax, %rax
 ; AVX2-NEXT:    vzeroupper
 ; AVX2-NEXT:    retq
@@ -303,8 +327,9 @@ define i64 @test_v4i64_sext(<4 x i64> %a0, <4 x i64> %a1) {
 ; AVX512-LABEL: test_v4i64_sext:
 ; AVX512:       # %bb.0:
 ; AVX512-NEXT:    vpcmpgtq %ymm1, %ymm0, %ymm0
-; AVX512-NEXT:    vmovmskpd %ymm0, %eax
-; AVX512-NEXT:    negl %eax
+; AVX512-NEXT:    vmovmskpd %ymm0, %ecx
+; AVX512-NEXT:    xorl %eax, %eax
+; AVX512-NEXT:    negl %ecx
 ; AVX512-NEXT:    sbbq %rax, %rax
 ; AVX512-NEXT:    vzeroupper
 ; AVX512-NEXT:    retq
@@ -324,8 +349,9 @@ define i64 @test_v4i64_legal_sext(<4 x i64> %a0, <4 x i64> %a1) {
 ; SSE-NEXT:    pcmpgtq %xmm3, %xmm1
 ; SSE-NEXT:    pcmpgtq %xmm2, %xmm0
 ; SSE-NEXT:    packssdw %xmm1, %xmm0
-; SSE-NEXT:    movmskps %xmm0, %eax
-; SSE-NEXT:    negl %eax
+; SSE-NEXT:    movmskps %xmm0, %ecx
+; SSE-NEXT:    xorl %eax, %eax
+; SSE-NEXT:    negl %ecx
 ; SSE-NEXT:    sbbq %rax, %rax
 ; SSE-NEXT:    retq
 ;
@@ -336,8 +362,9 @@ define i64 @test_v4i64_legal_sext(<4 x i64> %a0, <4 x i64> %a1) {
 ; AVX1-NEXT:    vpcmpgtq %xmm2, %xmm3, %xmm2
 ; AVX1-NEXT:    vpcmpgtq %xmm1, %xmm0, %xmm0
 ; AVX1-NEXT:    vpackssdw %xmm2, %xmm0, %xmm0
-; AVX1-NEXT:    vmovmskps %xmm0, %eax
-; AVX1-NEXT:    negl %eax
+; AVX1-NEXT:    vmovmskps %xmm0, %ecx
+; AVX1-NEXT:    xorl %eax, %eax
+; AVX1-NEXT:    negl %ecx
 ; AVX1-NEXT:    sbbq %rax, %rax
 ; AVX1-NEXT:    vzeroupper
 ; AVX1-NEXT:    retq
@@ -347,8 +374,9 @@ define i64 @test_v4i64_legal_sext(<4 x i64> %a0, <4 x i64> %a1) {
 ; AVX2-NEXT:    vpcmpgtq %ymm1, %ymm0, %ymm0
 ; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm1
 ; AVX2-NEXT:    vpackssdw %xmm1, %xmm0, %xmm0
-; AVX2-NEXT:    vmovmskps %xmm0, %eax
-; AVX2-NEXT:    negl %eax
+; AVX2-NEXT:    vmovmskps %xmm0, %ecx
+; AVX2-NEXT:    xorl %eax, %eax
+; AVX2-NEXT:    negl %ecx
 ; AVX2-NEXT:    sbbq %rax, %rax
 ; AVX2-NEXT:    vzeroupper
 ; AVX2-NEXT:    retq
@@ -358,8 +386,9 @@ define i64 @test_v4i64_legal_sext(<4 x i64> %a0, <4 x i64> %a1) {
 ; AVX512-NEXT:    vpcmpgtq %ymm1, %ymm0, %k1
 ; AVX512-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
 ; AVX512-NEXT:    vmovdqa32 %xmm0, %xmm0 {%k1} {z}
-; AVX512-NEXT:    vmovmskps %xmm0, %eax
-; AVX512-NEXT:    negl %eax
+; AVX512-NEXT:    vmovmskps %xmm0, %ecx
+; AVX512-NEXT:    xorl %eax, %eax
+; AVX512-NEXT:    negl %ecx
 ; AVX512-NEXT:    sbbq %rax, %rax
 ; AVX512-NEXT:    vzeroupper
 ; AVX512-NEXT:    retq
@@ -378,24 +407,27 @@ define i32 @test_v4i32_sext(<4 x i32> %a0, <4 x i32> %a1) {
 ; SSE-LABEL: test_v4i32_sext:
 ; SSE:       # %bb.0:
 ; SSE-NEXT:    pcmpgtd %xmm1, %xmm0
-; SSE-NEXT:    movmskps %xmm0, %eax
-; SSE-NEXT:    negl %eax
+; SSE-NEXT:    movmskps %xmm0, %ecx
+; SSE-NEXT:    xorl %eax, %eax
+; SSE-NEXT:    negl %ecx
 ; SSE-NEXT:    sbbl %eax, %eax
 ; SSE-NEXT:    retq
 ;
 ; AVX-LABEL: test_v4i32_sext:
 ; AVX:       # %bb.0:
 ; AVX-NEXT:    vpcmpgtd %xmm1, %xmm0, %xmm0
-; AVX-NEXT:    vmovmskps %xmm0, %eax
-; AVX-NEXT:    negl %eax
+; AVX-NEXT:    vmovmskps %xmm0, %ecx
+; AVX-NEXT:    xorl %eax, %eax
+; AVX-NEXT:    negl %ecx
 ; AVX-NEXT:    sbbl %eax, %eax
 ; AVX-NEXT:    retq
 ;
 ; AVX512-LABEL: test_v4i32_sext:
 ; AVX512:       # %bb.0:
 ; AVX512-NEXT:    vpcmpgtd %xmm1, %xmm0, %xmm0
-; AVX512-NEXT:    vmovmskps %xmm0, %eax
-; AVX512-NEXT:    negl %eax
+; AVX512-NEXT:    vmovmskps %xmm0, %ecx
+; AVX512-NEXT:    xorl %eax, %eax
+; AVX512-NEXT:    negl %ecx
 ; AVX512-NEXT:    sbbl %eax, %eax
 ; AVX512-NEXT:    retq
   %c = icmp sgt <4 x i32> %a0, %a1
@@ -414,8 +446,9 @@ define i32 @test_v8i32_sext(<8 x i32> %a0, <8 x i32> %a1) {
 ; SSE-NEXT:    pcmpgtd %xmm3, %xmm1
 ; SSE-NEXT:    pcmpgtd %xmm2, %xmm0
 ; SSE-NEXT:    por %xmm1, %xmm0
-; SSE-NEXT:    movmskps %xmm0, %eax
-; SSE-NEXT:    negl %eax
+; SSE-NEXT:    movmskps %xmm0, %ecx
+; SSE-NEXT:    xorl %eax, %eax
+; SSE-NEXT:    negl %ecx
 ; SSE-NEXT:    sbbl %eax, %eax
 ; SSE-NEXT:    retq
 ;
@@ -426,8 +459,9 @@ define i32 @test_v8i32_sext(<8 x i32> %a0, <8 x i32> %a1) {
 ; AVX1-NEXT:    vpcmpgtd %xmm2, %xmm3, %xmm2
 ; AVX1-NEXT:    vpcmpgtd %xmm1, %xmm0, %xmm0
 ; AVX1-NEXT:    vpor %xmm2, %xmm0, %xmm0
-; AVX1-NEXT:    vmovmskps %xmm0, %eax
-; AVX1-NEXT:    negl %eax
+; AVX1-NEXT:    vmovmskps %xmm0, %ecx
+; AVX1-NEXT:    xorl %eax, %eax
+; AVX1-NEXT:    negl %ecx
 ; AVX1-NEXT:    sbbl %eax, %eax
 ; AVX1-NEXT:    vzeroupper
 ; AVX1-NEXT:    retq
@@ -435,8 +469,9 @@ define i32 @test_v8i32_sext(<8 x i32> %a0, <8 x i32> %a1) {
 ; AVX2-LABEL: test_v8i32_sext:
 ; AVX2:       # %bb.0:
 ; AVX2-NEXT:    vpcmpgtd %ymm1, %ymm0, %ymm0
-; AVX2-NEXT:    vmovmskps %ymm0, %eax
-; AVX2-NEXT:    negl %eax
+; AVX2-NEXT:    vmovmskps %ymm0, %ecx
+; AVX2-NEXT:    xorl %eax, %eax
+; AVX2-NEXT:    negl %ecx
 ; AVX2-NEXT:    sbbl %eax, %eax
 ; AVX2-NEXT:    vzeroupper
 ; AVX2-NEXT:    retq
@@ -444,8 +479,9 @@ define i32 @test_v8i32_sext(<8 x i32> %a0, <8 x i32> %a1) {
 ; AVX512-LABEL: test_v8i32_sext:
 ; AVX512:       # %bb.0:
 ; AVX512-NEXT:    vpcmpgtd %ymm1, %ymm0, %ymm0
-; AVX512-NEXT:    vmovmskps %ymm0, %eax
-; AVX512-NEXT:    negl %eax
+; AVX512-NEXT:    vmovmskps %ymm0, %ecx
+; AVX512-NEXT:    xorl %eax, %eax
+; AVX512-NEXT:    negl %ecx
 ; AVX512-NEXT:    sbbl %eax, %eax
 ; AVX512-NEXT:    vzeroupper
 ; AVX512-NEXT:    retq
@@ -467,8 +503,9 @@ define i32 @test_v8i32_legal_sext(<8 x i32> %a0, <8 x i32> %a1) {
 ; SSE-NEXT:    pcmpgtd %xmm3, %xmm1
 ; SSE-NEXT:    pcmpgtd %xmm2, %xmm0
 ; SSE-NEXT:    packssdw %xmm1, %xmm0
-; SSE-NEXT:    pmovmskb %xmm0, %eax
-; SSE-NEXT:    negl %eax
+; SSE-NEXT:    pmovmskb %xmm0, %ecx
+; SSE-NEXT:    xorl %eax, %eax
+; SSE-NEXT:    negl %ecx
 ; SSE-NEXT:    sbbl %eax, %eax
 ; SSE-NEXT:    retq
 ;
@@ -479,8 +516,9 @@ define i32 @test_v8i32_legal_sext(<8 x i32> %a0, <8 x i32> %a1) {
 ; AVX1-NEXT:    vpcmpgtd %xmm2, %xmm3, %xmm2
 ; AVX1-NEXT:    vpcmpgtd %xmm1, %xmm0, %xmm0
 ; AVX1-NEXT:    vpackssdw %xmm2, %xmm0, %xmm0
-; AVX1-NEXT:    vpmovmskb %xmm0, %eax
-; AVX1-NEXT:    negl %eax
+; AVX1-NEXT:    vpmovmskb %xmm0, %ecx
+; AVX1-NEXT:    xorl %eax, %eax
+; AVX1-NEXT:    negl %ecx
 ; AVX1-NEXT:    sbbl %eax, %eax
 ; AVX1-NEXT:    vzeroupper
 ; AVX1-NEXT:    retq
@@ -490,8 +528,9 @@ define i32 @test_v8i32_legal_sext(<8 x i32> %a0, <8 x i32> %a1) {
 ; AVX2-NEXT:    vpcmpgtd %ymm1, %ymm0, %ymm0
 ; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm1
 ; AVX2-NEXT:    vpackssdw %xmm1, %xmm0, %xmm0
-; AVX2-NEXT:    vpmovmskb %xmm0, %eax
-; AVX2-NEXT:    negl %eax
+; AVX2-NEXT:    vpmovmskb %xmm0, %ecx
+; AVX2-NEXT:    xorl %eax, %eax
+; AVX2-NEXT:    negl %ecx
 ; AVX2-NEXT:    sbbl %eax, %eax
 ; AVX2-NEXT:    vzeroupper
 ; AVX2-NEXT:    retq
@@ -500,8 +539,9 @@ define i32 @test_v8i32_legal_sext(<8 x i32> %a0, <8 x i32> %a1) {
 ; AVX512:       # %bb.0:
 ; AVX512-NEXT:    vpcmpgtd %ymm1, %ymm0, %k0
 ; AVX512-NEXT:    vpmovm2w %k0, %xmm0
-; AVX512-NEXT:    vpmovmskb %xmm0, %eax
-; AVX512-NEXT:    negl %eax
+; AVX512-NEXT:    vpmovmskb %xmm0, %ecx
+; AVX512-NEXT:    xorl %eax, %eax
+; AVX512-NEXT:    negl %ecx
 ; AVX512-NEXT:    sbbl %eax, %eax
 ; AVX512-NEXT:    vzeroupper
 ; AVX512-NEXT:    retq
@@ -522,8 +562,9 @@ define i16 @test_v8i16_sext(<8 x i16> %a0, <8 x i16> %a1) {
 ; SSE-LABEL: test_v8i16_sext:
 ; SSE:       # %bb.0:
 ; SSE-NEXT:    pcmpgtw %xmm1, %xmm0
-; SSE-NEXT:    pmovmskb %xmm0, %eax
-; SSE-NEXT:    negl %eax
+; SSE-NEXT:    pmovmskb %xmm0, %ecx
+; SSE-NEXT:    xorl %eax, %eax
+; SSE-NEXT:    negl %ecx
 ; SSE-NEXT:    sbbl %eax, %eax
 ; SSE-NEXT:    # kill: def $ax killed $ax killed $eax
 ; SSE-NEXT:    retq
@@ -531,8 +572,9 @@ define i16 @test_v8i16_sext(<8 x i16> %a0, <8 x i16> %a1) {
 ; AVX-LABEL: test_v8i16_sext:
 ; AVX:       # %bb.0:
 ; AVX-NEXT:    vpcmpgtw %xmm1, %xmm0, %xmm0
-; AVX-NEXT:    vpmovmskb %xmm0, %eax
-; AVX-NEXT:    negl %eax
+; AVX-NEXT:    vpmovmskb %xmm0, %ecx
+; AVX-NEXT:    xorl %eax, %eax
+; AVX-NEXT:    negl %ecx
 ; AVX-NEXT:    sbbl %eax, %eax
 ; AVX-NEXT:    # kill: def $ax killed $ax killed $eax
 ; AVX-NEXT:    retq
@@ -540,8 +582,9 @@ define i16 @test_v8i16_sext(<8 x i16> %a0, <8 x i16> %a1) {
 ; AVX512-LABEL: test_v8i16_sext:
 ; AVX512:       # %bb.0:
 ; AVX512-NEXT:    vpcmpgtw %xmm1, %xmm0, %xmm0
-; AVX512-NEXT:    vpmovmskb %xmm0, %eax
-; AVX512-NEXT:    negl %eax
+; AVX512-NEXT:    vpmovmskb %xmm0, %ecx
+; AVX512-NEXT:    xorl %eax, %eax
+; AVX512-NEXT:    negl %ecx
 ; AVX512-NEXT:    sbbl %eax, %eax
 ; AVX512-NEXT:    # kill: def $ax killed $ax killed $eax
 ; AVX512-NEXT:    retq
@@ -563,8 +606,9 @@ define i16 @test_v16i16_sext(<16 x i16> %a0, <16 x i16> %a1) {
 ; SSE-NEXT:    pcmpgtw %xmm3, %xmm1
 ; SSE-NEXT:    pcmpgtw %xmm2, %xmm0
 ; SSE-NEXT:    por %xmm1, %xmm0
-; SSE-NEXT:    pmovmskb %xmm0, %eax
-; SSE-NEXT:    negl %eax
+; SSE-NEXT:    pmovmskb %xmm0, %ecx
+; SSE-NEXT:    xorl %eax, %eax
+; SSE-NEXT:    negl %ecx
 ; SSE-NEXT:    sbbl %eax, %eax
 ; SSE-NEXT:    # kill: def $ax killed $ax killed $eax
 ; SSE-NEXT:    retq
@@ -576,8 +620,9 @@ define i16 @test_v16i16_sext(<16 x i16> %a0, <16 x i16> %a1) {
 ; AVX1-NEXT:    vpcmpgtw %xmm2, %xmm3, %xmm2
 ; AVX1-NEXT:    vpcmpgtw %xmm1, %xmm0, %xmm0
 ; AVX1-NEXT:    vpor %xmm2, %xmm0, %xmm0
-; AVX1-NEXT:    vpmovmskb %xmm0, %eax
-; AVX1-NEXT:    negl %eax
+; AVX1-NEXT:    vpmovmskb %xmm0, %ecx
+; AVX1-NEXT:    xorl %eax, %eax
+; AVX1-NEXT:    negl %ecx
 ; AVX1-NEXT:    sbbl %eax, %eax
 ; AVX1-NEXT:    # kill: def $ax killed $ax killed $eax
 ; AVX1-NEXT:    vzeroupper
@@ -586,8 +631,9 @@ define i16 @test_v16i16_sext(<16 x i16> %a0, <16 x i16> %a1) {
 ; AVX2-LABEL: test_v16i16_sext:
 ; AVX2:       # %bb.0:
 ; AVX2-NEXT:    vpcmpgtw %ymm1, %ymm0, %ymm0
-; AVX2-NEXT:    vpmovmskb %ymm0, %eax
-; AVX2-NEXT:    negl %eax
+; AVX2-NEXT:    vpmovmskb %ymm0, %ecx
+; AVX2-NEXT:    xorl %eax, %eax
+; AVX2-NEXT:    negl %ecx
 ; AVX2-NEXT:    sbbl %eax, %eax
 ; AVX2-NEXT:    # kill: def $ax killed $ax killed $eax
 ; AVX2-NEXT:    vzeroupper
@@ -596,8 +642,9 @@ define i16 @test_v16i16_sext(<16 x i16> %a0, <16 x i16> %a1) {
 ; AVX512-LABEL: test_v16i16_sext:
 ; AVX512:       # %bb.0:
 ; AVX512-NEXT:    vpcmpgtw %ymm1, %ymm0, %ymm0
-; AVX512-NEXT:    vpmovmskb %ymm0, %eax
-; AVX512-NEXT:    negl %eax
+; AVX512-NEXT:    vpmovmskb %ymm0, %ecx
+; AVX512-NEXT:    xorl %eax, %eax
+; AVX512-NEXT:    negl %ecx
 ; AVX512-NEXT:    sbbl %eax, %eax
 ; AVX512-NEXT:    # kill: def $ax killed $ax killed $eax
 ; AVX512-NEXT:    vzeroupper
@@ -622,8 +669,9 @@ define i16 @test_v16i16_legal_sext(<16 x i16> %a0, <16 x i16> %a1) {
 ; SSE-NEXT:    pcmpgtw %xmm3, %xmm1
 ; SSE-NEXT:    pcmpgtw %xmm2, %xmm0
 ; SSE-NEXT:    packsswb %xmm1, %xmm0
-; SSE-NEXT:    pmovmskb %xmm0, %eax
-; SSE-NEXT:    negl %eax
+; SSE-NEXT:    pmovmskb %xmm0, %ecx
+; SSE-NEXT:    xorl %eax, %eax
+; SSE-NEXT:    negl %ecx
 ; SSE-NEXT:    sbbl %eax, %eax
 ; SSE-NEXT:    # kill: def $ax killed $ax killed $eax
 ; SSE-NEXT:    retq
@@ -635,8 +683,9 @@ define i16 @test_v16i16_legal_sext(<16 x i16> %a0, <16 x i16> %a1) {
 ; AVX1-NEXT:    vpcmpgtw %xmm2, %xmm3, %xmm2
 ; AVX1-NEXT:    vpcmpgtw %xmm1, %xmm0, %xmm0
 ; AVX1-NEXT:    vpacksswb %xmm2, %xmm0, %xmm0
-; AVX1-NEXT:    vpmovmskb %xmm0, %eax
-; AVX1-NEXT:    negl %eax
+; AVX1-NEXT:    vpmovmskb %xmm0, %ecx
+; AVX1-NEXT:    xorl %eax, %eax
+; AVX1-NEXT:    negl %ecx
 ; AVX1-NEXT:    sbbl %eax, %eax
 ; AVX1-NEXT:    # kill: def $ax killed $ax killed $eax
 ; AVX1-NEXT:    vzeroupper
@@ -645,8 +694,9 @@ define i16 @test_v16i16_legal_sext(<16 x i16> %a0, <16 x i16> %a1) {
 ; AVX2-LABEL: test_v16i16_legal_sext:
 ; AVX2:       # %bb.0:
 ; AVX2-NEXT:    vpcmpgtw %ymm1, %ymm0, %ymm0
-; AVX2-NEXT:    vpmovmskb %ymm0, %eax
-; AVX2-NEXT:    negl %eax
+; AVX2-NEXT:    vpmovmskb %ymm0, %ecx
+; AVX2-NEXT:    xorl %eax, %eax
+; AVX2-NEXT:    negl %ecx
 ; AVX2-NEXT:    sbbl %eax, %eax
 ; AVX2-NEXT:    # kill: def $ax killed $ax killed $eax
 ; AVX2-NEXT:    vzeroupper
@@ -656,8 +706,9 @@ define i16 @test_v16i16_legal_sext(<16 x i16> %a0, <16 x i16> %a1) {
 ; AVX512:       # %bb.0:
 ; AVX512-NEXT:    vpcmpgtw %ymm1, %ymm0, %k0
 ; AVX512-NEXT:    vpmovm2b %k0, %xmm0
-; AVX512-NEXT:    vpmovmskb %xmm0, %eax
-; AVX512-NEXT:    negl %eax
+; AVX512-NEXT:    vpmovmskb %xmm0, %ecx
+; AVX512-NEXT:    xorl %eax, %eax
+; AVX512-NEXT:    negl %ecx
 ; AVX512-NEXT:    sbbl %eax, %eax
 ; AVX512-NEXT:    # kill: def $ax killed $ax killed $eax
 ; AVX512-NEXT:    vzeroupper
@@ -681,8 +732,9 @@ define i8 @test_v16i8_sext(<16 x i8> %a0, <16 x i8> %a1) {
 ; SSE-LABEL: test_v16i8_sext:
 ; SSE:       # %bb.0:
 ; SSE-NEXT:    pcmpgtb %xmm1, %xmm0
-; SSE-NEXT:    pmovmskb %xmm0, %eax
-; SSE-NEXT:    negl %eax
+; SSE-NEXT:    pmovmskb %xmm0, %ecx
+; SSE-NEXT:    xorl %eax, %eax
+; SSE-NEXT:    negl %ecx
 ; SSE-NEXT:    sbbl %eax, %eax
 ; SSE-NEXT:    # kill: def $al killed $al killed $eax
 ; SSE-NEXT:    retq
@@ -690,8 +742,9 @@ define i8 @test_v16i8_sext(<16 x i8> %a0, <16 x i8> %a1) {
 ; AVX-LABEL: test_v16i8_sext:
 ; AVX:       # %bb.0:
 ; AVX-NEXT:    vpcmpgtb %xmm1, %xmm0, %xmm0
-; AVX-NEXT:    vpmovmskb %xmm0, %eax
-; AVX-NEXT:    negl %eax
+; AVX-NEXT:    vpmovmskb %xmm0, %ecx
+; AVX-NEXT:    xorl %eax, %eax
+; AVX-NEXT:    negl %ecx
 ; AVX-NEXT:    sbbl %eax, %eax
 ; AVX-NEXT:    # kill: def $al killed $al killed $eax
 ; AVX-NEXT:    retq
@@ -699,8 +752,9 @@ define i8 @test_v16i8_sext(<16 x i8> %a0, <16 x i8> %a1) {
 ; AVX512-LABEL: test_v16i8_sext:
 ; AVX512:       # %bb.0:
 ; AVX512-NEXT:    vpcmpgtb %xmm1, %xmm0, %xmm0
-; AVX512-NEXT:    vpmovmskb %xmm0, %eax
-; AVX512-NEXT:    negl %eax
+; AVX512-NEXT:    vpmovmskb %xmm0, %ecx
+; AVX512-NEXT:    xorl %eax, %eax
+; AVX512-NEXT:    negl %ecx
 ; AVX512-NEXT:    sbbl %eax, %eax
 ; AVX512-NEXT:    # kill: def $al killed $al killed $eax
 ; AVX512-NEXT:    retq
@@ -724,8 +778,9 @@ define i8 @test_v32i8_sext(<32 x i8> %a0, <32 x i8> %a1) {
 ; SSE-NEXT:    pcmpgtb %xmm3, %xmm1
 ; SSE-NEXT:    pcmpgtb %xmm2, %xmm0
 ; SSE-NEXT:    por %xmm1, %xmm0
-; SSE-NEXT:    pmovmskb %xmm0, %eax
-; SSE-NEXT:    negl %eax
+; SSE-NEXT:    pmovmskb %xmm0, %ecx
+; SSE-NEXT:    xorl %eax, %eax
+; SSE-NEXT:    negl %ecx
 ; SSE-NEXT:    sbbl %eax, %eax
 ; SSE-NEXT:    # kill: def $al killed $al killed $eax
 ; SSE-NEXT:    retq
@@ -737,8 +792,9 @@ define i8 @test_v32i8_sext(<32 x i8> %a0, <32 x i8> %a1) {
 ; AVX1-NEXT:    vpcmpgtb %xmm2, %xmm3, %xmm2
 ; AVX1-NEXT:    vpcmpgtb %xmm1, %xmm0, %xmm0
 ; AVX1-NEXT:    vpor %xmm2, %xmm0, %xmm0
-; AVX1-NEXT:    vpmovmskb %xmm0, %eax
-; AVX1-NEXT:    negl %eax
+; AVX1-NEXT:    vpmovmskb %xmm0, %ecx
+; AVX1-NEXT:    xorl %eax, %eax
+; AVX1-NEXT:    negl %ecx
 ; AVX1-NEXT:    sbbl %eax, %eax
 ; AVX1-NEXT:    # kill: def $al killed $al killed $eax
 ; AVX1-NEXT:    vzeroupper
@@ -747,8 +803,9 @@ define i8 @test_v32i8_sext(<32 x i8> %a0, <32 x i8> %a1) {
 ; AVX2-LABEL: test_v32i8_sext:
 ; AVX2:       # %bb.0:
 ; AVX2-NEXT:    vpcmpgtb %ymm1, %ymm0, %ymm0
-; AVX2-NEXT:    vpmovmskb %ymm0, %eax
-; AVX2-NEXT:    negl %eax
+; AVX2-NEXT:    vpmovmskb %ymm0, %ecx
+; AVX2-NEXT:    xorl %eax, %eax
+; AVX2-NEXT:    negl %ecx
 ; AVX2-NEXT:    sbbl %eax, %eax
 ; AVX2-NEXT:    # kill: def $al killed $al killed $eax
 ; AVX2-NEXT:    vzeroupper
@@ -757,8 +814,9 @@ define i8 @test_v32i8_sext(<32 x i8> %a0, <32 x i8> %a1) {
 ; AVX512-LABEL: test_v32i8_sext:
 ; AVX512:       # %bb.0:
 ; AVX512-NEXT:    vpcmpgtb %ymm1, %ymm0, %ymm0
-; AVX512-NEXT:    vpmovmskb %ymm0, %eax
-; AVX512-NEXT:    negl %eax
+; AVX512-NEXT:    vpmovmskb %ymm0, %ecx
+; AVX512-NEXT:    xorl %eax, %eax
+; AVX512-NEXT:    negl %ecx
 ; AVX512-NEXT:    sbbl %eax, %eax
 ; AVX512-NEXT:    # kill: def $al killed $al killed $eax
 ; AVX512-NEXT:    vzeroupper


        


More information about the llvm-commits mailing list