[llvm] 40a50f8 - [x86] avoid false dependency stall on 'sbb' with same source reg
Sanjay Patel via llvm-commits
llvm-commits at lists.llvm.org
Mon Feb 7 07:13:00 PST 2022
Author: Sanjay Patel
Date: 2022-02-07T10:12:12-05:00
New Revision: 40a50f8701a99a063a9950fc0a41f46934e4e160
URL: https://github.com/llvm/llvm-project/commit/40a50f8701a99a063a9950fc0a41f46934e4e160
DIFF: https://github.com/llvm/llvm-project/commit/40a50f8701a99a063a9950fc0a41f46934e4e160.diff
LOG: [x86] avoid false dependency stall on 'sbb' with same source reg
This is effectively inverting the transform added with D116804
because the downside of the false dependency of something like
"sbb %eax, %eax" is much greater than the upside of eliminating
a zeroing instruction on (all?) Intel CPUs.
Differential Revision: https://reviews.llvm.org/D118843
Added:
Modified:
llvm/lib/Target/X86/X86.td
llvm/lib/Target/X86/X86ISelDAGToDAG.cpp
llvm/lib/Target/X86/X86Subtarget.h
llvm/test/CodeGen/X86/combine-movmsk-avx.ll
llvm/test/CodeGen/X86/copy-eflags.ll
llvm/test/CodeGen/X86/jump_sign.ll
llvm/test/CodeGen/X86/machine-cse.ll
llvm/test/CodeGen/X86/pr32588.ll
llvm/test/CodeGen/X86/pr35972.ll
llvm/test/CodeGen/X86/sbb-false-dep.ll
llvm/test/CodeGen/X86/sbb-zero-idiom.ll
llvm/test/CodeGen/X86/sbb.ll
llvm/test/CodeGen/X86/sdiv_fix_sat.ll
llvm/test/CodeGen/X86/select.ll
llvm/test/CodeGen/X86/sext-i1.ll
llvm/test/CodeGen/X86/shl-crash-on-legalize.ll
llvm/test/CodeGen/X86/umul_fix_sat.ll
llvm/test/CodeGen/X86/vec_uaddo.ll
llvm/test/CodeGen/X86/vec_usubo.ll
llvm/test/CodeGen/X86/vector-compare-any_of.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/X86/X86.td b/llvm/lib/Target/X86/X86.td
index 8e87481f6a950..2a23e99715f09 100644
--- a/llvm/lib/Target/X86/X86.td
+++ b/llvm/lib/Target/X86/X86.td
@@ -445,6 +445,10 @@ def TuningLZCNTFalseDeps : SubtargetFeature<"false-deps-lzcnt-tzcnt",
"HasLZCNTFalseDeps", "true",
"LZCNT/TZCNT have a false dependency on dest register">;
+def TuningSBBDepBreaking : SubtargetFeature<"sbb-dep-breaking",
+ "HasSBBDepBreaking", "true",
+ "SBB with same register has no source dependency">;
+
// On recent X86 (port bound) processors, its preferable to combine to a single shuffle
// using a variable mask over multiple fixed shuffles.
def TuningFastVariableCrossLaneShuffle
@@ -1032,6 +1036,7 @@ def ProcessorFeatures {
Feature64Bit];
list<SubtargetFeature> BarcelonaTuning = [TuningFastScalarShiftMasks,
TuningSlowSHLD,
+ TuningSBBDepBreaking,
TuningInsertVZEROUPPER];
// Bobcat
@@ -1053,6 +1058,7 @@ def ProcessorFeatures {
TuningFastScalarShiftMasks,
TuningFastVectorShiftMasks,
TuningSlowSHLD,
+ TuningSBBDepBreaking,
TuningInsertVZEROUPPER];
// Jaguar
@@ -1072,6 +1078,7 @@ def ProcessorFeatures {
TuningFastScalarShiftMasks,
TuningFastVectorShiftMasks,
TuningFastMOVBE,
+ TuningSBBDepBreaking,
TuningSlowSHLD];
list<SubtargetFeature> BtVer2Features =
!listconcat(BtVer1Features, BtVer2AdditionalFeatures);
@@ -1099,6 +1106,7 @@ def ProcessorFeatures {
TuningFast11ByteNOP,
TuningFastScalarShiftMasks,
TuningBranchFusion,
+ TuningSBBDepBreaking,
TuningInsertVZEROUPPER];
// PileDriver
@@ -1174,6 +1182,7 @@ def ProcessorFeatures {
TuningFastScalarShiftMasks,
TuningFastMOVBE,
TuningSlowSHLD,
+ TuningSBBDepBreaking,
TuningInsertVZEROUPPER];
list<SubtargetFeature> ZN2AdditionalFeatures = [FeatureCLWB,
FeatureRDPID,
@@ -1445,7 +1454,7 @@ foreach P = ["k8", "opteron", "athlon64", "athlon-fx"] in {
def : Proc<P, [FeatureX87, FeatureCMPXCHG8B, FeatureSSE2, Feature3DNowA,
FeatureFXSR, FeatureNOPL, Feature64Bit, FeatureCMOV],
[TuningFastScalarShiftMasks, TuningSlowSHLD, TuningSlowUAMem16,
- TuningInsertVZEROUPPER]>;
+ TuningSBBDepBreaking, TuningInsertVZEROUPPER]>;
}
foreach P = ["k8-sse3", "opteron-sse3", "athlon64-sse3"] in {
@@ -1453,7 +1462,7 @@ foreach P = ["k8-sse3", "opteron-sse3", "athlon64-sse3"] in {
FeatureFXSR, FeatureNOPL, FeatureCMPXCHG16B, FeatureCMOV,
Feature64Bit],
[TuningFastScalarShiftMasks, TuningSlowSHLD, TuningSlowUAMem16,
- TuningInsertVZEROUPPER]>;
+ TuningSBBDepBreaking, TuningInsertVZEROUPPER]>;
}
foreach P = ["amdfam10", "barcelona"] in {
diff --git a/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp b/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp
index 0c3cfaa1e61e2..0d697f4fcafde 100644
--- a/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp
+++ b/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp
@@ -464,8 +464,13 @@ namespace {
}
// Copy flags to the EFLAGS register and glue it to next node.
- SDValue EFLAGS = CurDAG->getCopyToReg(
- CurDAG->getEntryNode(), dl, X86::EFLAGS, N->getOperand(2), SDValue());
+ unsigned Opcode = N->getOpcode();
+ assert(Opcode == X86ISD::SBB || Opcode == X86ISD::SETCC_CARRY &&
+ "Unexpected opcode for SBB materialization");
+ unsigned FlagOpIndex = Opcode == X86ISD::SBB ? 2 : 1;
+ SDValue EFLAGS =
+ CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, X86::EFLAGS,
+ N->getOperand(FlagOpIndex), SDValue());
// Create a 64-bit instruction if the result is 64-bits otherwise use the
// 32-bit version.
@@ -5801,21 +5806,28 @@ void X86DAGToDAGISel::Select(SDNode *Node) {
break;
case X86ISD::SETCC_CARRY: {
- // We have to do this manually because tblgen will put the eflags copy in
- // the wrong place if we use an extract_subreg in the pattern.
MVT VT = Node->getSimpleValueType(0);
+ SDValue Result;
+ if (Subtarget->hasSBBDepBreaking()) {
+ // We have to do this manually because tblgen will put the eflags copy in
+ // the wrong place if we use an extract_subreg in the pattern.
+ // Copy flags to the EFLAGS register and glue it to next node.
+ SDValue EFLAGS =
+ CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, X86::EFLAGS,
+ Node->getOperand(1), SDValue());
- // Copy flags to the EFLAGS register and glue it to next node.
- SDValue EFLAGS =
- CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, X86::EFLAGS,
- Node->getOperand(1), SDValue());
-
- // Create a 64-bit instruction if the result is 64-bits otherwise use the
- // 32-bit version.
- unsigned Opc = VT == MVT::i64 ? X86::SETB_C64r : X86::SETB_C32r;
- MVT SetVT = VT == MVT::i64 ? MVT::i64 : MVT::i32;
- SDValue Result = SDValue(
- CurDAG->getMachineNode(Opc, dl, SetVT, EFLAGS, EFLAGS.getValue(1)), 0);
+ // Create a 64-bit instruction if the result is 64-bits otherwise use the
+ // 32-bit version.
+ unsigned Opc = VT == MVT::i64 ? X86::SETB_C64r : X86::SETB_C32r;
+ MVT SetVT = VT == MVT::i64 ? MVT::i64 : MVT::i32;
+ Result = SDValue(
+ CurDAG->getMachineNode(Opc, dl, SetVT, EFLAGS, EFLAGS.getValue(1)),
+ 0);
+ } else {
+ // The target does not recognize sbb with the same reg operand as a
+ // no-source idiom, so we explicitly zero the input values.
+ Result = getSBBZero(Node);
+ }
// For less than 32-bits we need to extract from the 32-bit node.
if (VT == MVT::i8 || VT == MVT::i16) {
diff --git a/llvm/lib/Target/X86/X86Subtarget.h b/llvm/lib/Target/X86/X86Subtarget.h
index 5d773f0c57dfb..d1ff9445e4790 100644
--- a/llvm/lib/Target/X86/X86Subtarget.h
+++ b/llvm/lib/Target/X86/X86Subtarget.h
@@ -246,6 +246,10 @@ class X86Subtarget final : public X86GenSubtargetInfo {
/// True if LZCNT/TZCNT instructions have a false dependency on the destination register.
bool HasLZCNTFalseDeps = false;
+ /// True if an SBB instruction with same source register is recognized as
+ /// having no dependency on that register.
+ bool HasSBBDepBreaking = false;
+
/// True if its preferable to combine to a single cross-lane shuffle
/// using a variable mask over multiple fixed shuffles.
bool HasFastVariableCrossLaneShuffle = false;
@@ -719,6 +723,7 @@ class X86Subtarget final : public X86GenSubtargetInfo {
bool useLeaForSP() const { return UseLeaForSP; }
bool hasPOPCNTFalseDeps() const { return HasPOPCNTFalseDeps; }
bool hasLZCNTFalseDeps() const { return HasLZCNTFalseDeps; }
+ bool hasSBBDepBreaking() const { return HasSBBDepBreaking; }
bool hasFastVariableCrossLaneShuffle() const {
return HasFastVariableCrossLaneShuffle;
}
diff --git a/llvm/test/CodeGen/X86/combine-movmsk-avx.ll b/llvm/test/CodeGen/X86/combine-movmsk-avx.ll
index ca0e8db5db03e..3277c89a3e346 100644
--- a/llvm/test/CodeGen/X86/combine-movmsk-avx.ll
+++ b/llvm/test/CodeGen/X86/combine-movmsk-avx.ll
@@ -139,8 +139,9 @@ define i32 @movmskps_concat_v4f32(<4 x float> %a0, <4 x float> %a1) {
; CHECK-LABEL: movmskps_concat_v4f32:
; CHECK: # %bb.0:
; CHECK-NEXT: vorps %xmm1, %xmm0, %xmm0
-; CHECK-NEXT: vmovmskps %xmm0, %eax
-; CHECK-NEXT: negl %eax
+; CHECK-NEXT: vmovmskps %xmm0, %ecx
+; CHECK-NEXT: xorl %eax, %eax
+; CHECK-NEXT: negl %ecx
; CHECK-NEXT: sbbl %eax, %eax
; CHECK-NEXT: retq
%1 = shufflevector <4 x float> %a0, <4 x float> %a1, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
@@ -153,9 +154,10 @@ define i32 @movmskps_concat_v4f32(<4 x float> %a0, <4 x float> %a1) {
define i32 @movmskps_demanded_concat_v4f32(<4 x float> %a0, <4 x float> %a1) {
; CHECK-LABEL: movmskps_demanded_concat_v4f32:
; CHECK: # %bb.0:
-; CHECK-NEXT: vmovmskps %xmm0, %eax
-; CHECK-NEXT: andl $3, %eax
-; CHECK-NEXT: negl %eax
+; CHECK-NEXT: vmovmskps %xmm0, %ecx
+; CHECK-NEXT: andl $3, %ecx
+; CHECK-NEXT: xorl %eax, %eax
+; CHECK-NEXT: negl %ecx
; CHECK-NEXT: sbbl %eax, %eax
; CHECK-NEXT: retq
%1 = shufflevector <4 x float> %a0, <4 x float> %a1, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
diff --git a/llvm/test/CodeGen/X86/copy-eflags.ll b/llvm/test/CodeGen/X86/copy-eflags.ll
index 4d382b24a2a5b..4c17cd52065e4 100644
--- a/llvm/test/CodeGen/X86/copy-eflags.ll
+++ b/llvm/test/CodeGen/X86/copy-eflags.ll
@@ -293,6 +293,7 @@ bb1:
define dso_local void @PR37431(i32* %arg1, i8* %arg2, i8* %arg3, i32 %arg4, i64 %arg5) nounwind {
; X32-LABEL: PR37431:
; X32: # %bb.0: # %entry
+; X32-NEXT: pushl %ebp
; X32-NEXT: pushl %ebx
; X32-NEXT: pushl %edi
; X32-NEXT: pushl %esi
@@ -302,10 +303,11 @@ define dso_local void @PR37431(i32* %arg1, i8* %arg2, i8* %arg3, i32 %arg4, i64
; X32-NEXT: movl {{[0-9]+}}(%esp), %esi
; X32-NEXT: movl {{[0-9]+}}(%esp), %edi
; X32-NEXT: movl (%edi), %edi
-; X32-NEXT: movl %edi, %ebx
-; X32-NEXT: sarl $31, %ebx
+; X32-NEXT: movl %edi, %ebp
+; X32-NEXT: sarl $31, %ebp
+; X32-NEXT: xorl %ebx, %ebx
; X32-NEXT: cmpl %edi, {{[0-9]+}}(%esp)
-; X32-NEXT: sbbl %ebx, %esi
+; X32-NEXT: sbbl %ebp, %esi
; X32-NEXT: sbbl %ebx, %ebx
; X32-NEXT: movb %bl, (%edx)
; X32-NEXT: cltd
@@ -314,6 +316,7 @@ define dso_local void @PR37431(i32* %arg1, i8* %arg2, i8* %arg3, i32 %arg4, i64
; X32-NEXT: popl %esi
; X32-NEXT: popl %edi
; X32-NEXT: popl %ebx
+; X32-NEXT: popl %ebp
; X32-NEXT: retl
;
; X64-LABEL: PR37431:
@@ -321,6 +324,7 @@ define dso_local void @PR37431(i32* %arg1, i8* %arg2, i8* %arg3, i32 %arg4, i64
; X64-NEXT: movl %ecx, %eax
; X64-NEXT: movq %rdx, %rcx
; X64-NEXT: movslq (%rdi), %rdx
+; X64-NEXT: xorl %edi, %edi
; X64-NEXT: cmpq %rdx, %r8
; X64-NEXT: sbbl %edi, %edi
; X64-NEXT: movb %dil, (%rsi)
diff --git a/llvm/test/CodeGen/X86/jump_sign.ll b/llvm/test/CodeGen/X86/jump_sign.ll
index 848ebc97a1ac9..7e6b462fcd827 100644
--- a/llvm/test/CodeGen/X86/jump_sign.ll
+++ b/llvm/test/CodeGen/X86/jump_sign.ll
@@ -310,6 +310,7 @@ define i32 @func_q(i32 %a0, i32 %a1, i32 %a2) {
; CHECK-LABEL: func_q:
; CHECK: # %bb.0:
; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
+; CHECK-NEXT: xorl %ecx, %ecx
; CHECK-NEXT: subl {{[0-9]+}}(%esp), %eax
; CHECK-NEXT: sbbl %ecx, %ecx
; CHECK-NEXT: negl %eax
diff --git a/llvm/test/CodeGen/X86/machine-cse.ll b/llvm/test/CodeGen/X86/machine-cse.ll
index e989a782fad7b..b7cd5c913ff13 100644
--- a/llvm/test/CodeGen/X86/machine-cse.ll
+++ b/llvm/test/CodeGen/X86/machine-cse.ll
@@ -112,6 +112,8 @@ define i32 @cross_mbb_phys_cse(i32 %a, i32 %b) nounwind ssp {
; CHECK-NEXT: cmpl %esi, %edi
; CHECK-NEXT: ja .LBB2_2
; CHECK-NEXT: # %bb.1: # %if.end
+; CHECK-NEXT: xorl %eax, %eax
+; CHECK-NEXT: cmpl %esi, %edi
; CHECK-NEXT: sbbl %eax, %eax
; CHECK-NEXT: .LBB2_2: # %return
; CHECK-NEXT: retq
diff --git a/llvm/test/CodeGen/X86/pr32588.ll b/llvm/test/CodeGen/X86/pr32588.ll
index 9e6f0b2881d94..8f2e21910cc6d 100644
--- a/llvm/test/CodeGen/X86/pr32588.ll
+++ b/llvm/test/CodeGen/X86/pr32588.ll
@@ -8,6 +8,7 @@
define void @fn1() {
; CHECK-LABEL: fn1:
; CHECK: # %bb.0:
+; CHECK-NEXT: xorl %eax, %eax
; CHECK-NEXT: cmpl $1, c(%rip)
; CHECK-NEXT: sbbl %eax, %eax
; CHECK-NEXT: andl $1, %eax
diff --git a/llvm/test/CodeGen/X86/pr35972.ll b/llvm/test/CodeGen/X86/pr35972.ll
index 09363fbc89bba..e7e60666d5bcf 100644
--- a/llvm/test/CodeGen/X86/pr35972.ll
+++ b/llvm/test/CodeGen/X86/pr35972.ll
@@ -5,6 +5,7 @@ define void @test3(i32 %c, <64 x i1>* %ptr) {
; CHECK-LABEL: test3:
; CHECK: # %bb.0:
; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
+; CHECK-NEXT: xorl %ecx, %ecx
; CHECK-NEXT: cmpl $1, {{[0-9]+}}(%esp)
; CHECK-NEXT: sbbl %ecx, %ecx
; CHECK-NEXT: kmovd %ecx, %k0
diff --git a/llvm/test/CodeGen/X86/sbb-false-dep.ll b/llvm/test/CodeGen/X86/sbb-false-dep.ll
index 336e56c45969d..204b215a89a5e 100644
--- a/llvm/test/CodeGen/X86/sbb-false-dep.ll
+++ b/llvm/test/CodeGen/X86/sbb-false-dep.ll
@@ -1,5 +1,6 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc < %s -mtriple=x86_64-- | FileCheck %s --check-prefixes=CHECK
+; RUN: llc < %s -mtriple=x86_64-- | FileCheck %s --check-prefixes=CHECK
+; RUN: llc < %s -mtriple=x86_64-- -mattr=+sbb-dep-breaking | FileCheck %s --check-prefixes=IDIOM
%struct.y_s = type { i64*, i64* }
@@ -24,13 +25,15 @@ define i32 @mallocbench_gs(i32* noundef %0, %struct.y_s* noundef %1, i32 noundef
; CHECK-NEXT: callq foo1 at PLT
; CHECK-NEXT: movq 8(%rbx), %rax
; CHECK-NEXT: movq (%rax), %rdx
+; CHECK-NEXT: xorl %ebp, %ebp
; CHECK-NEXT: movl %r13d, %ecx
; CHECK-NEXT: negl %ecx
-; CHECK-NEXT: sbbq %rbp, %rbp
-; CHECK-NEXT: orq %rdx, %rbp
-; CHECK-NEXT: cmpl $1, %r13d
+; CHECK-NEXT: movl $0, %eax
; CHECK-NEXT: sbbq %rax, %rax
; CHECK-NEXT: orq %rdx, %rax
+; CHECK-NEXT: cmpl $1, %r13d
+; CHECK-NEXT: sbbq %rbp, %rbp
+; CHECK-NEXT: orq %rdx, %rbp
; CHECK-NEXT: subq $8, %rsp
; CHECK-NEXT: movq %r12, %rdi
; CHECK-NEXT: movl %r15d, %esi
@@ -38,8 +41,8 @@ define i32 @mallocbench_gs(i32* noundef %0, %struct.y_s* noundef %1, i32 noundef
; CHECK-NEXT: xorl %ecx, %ecx
; CHECK-NEXT: xorl %r8d, %r8d
; CHECK-NEXT: xorl %r9d, %r9d
-; CHECK-NEXT: pushq %rax
; CHECK-NEXT: pushq %rbp
+; CHECK-NEXT: pushq %rax
; CHECK-NEXT: pushq %rbx
; CHECK-NEXT: callq foo2 at PLT
; CHECK-NEXT: addq $40, %rsp
@@ -50,6 +53,53 @@ define i32 @mallocbench_gs(i32* noundef %0, %struct.y_s* noundef %1, i32 noundef
; CHECK-NEXT: popq %r15
; CHECK-NEXT: popq %rbp
; CHECK-NEXT: retq
+;
+; IDIOM-LABEL: mallocbench_gs:
+; IDIOM: # %bb.0:
+; IDIOM-NEXT: pushq %rbp
+; IDIOM-NEXT: pushq %r15
+; IDIOM-NEXT: pushq %r14
+; IDIOM-NEXT: pushq %r13
+; IDIOM-NEXT: pushq %r12
+; IDIOM-NEXT: pushq %rbx
+; IDIOM-NEXT: pushq %rax
+; IDIOM-NEXT: movl %r8d, %r13d
+; IDIOM-NEXT: movl %ecx, %r14d
+; IDIOM-NEXT: movl %edx, %r15d
+; IDIOM-NEXT: movq %rsi, %rbx
+; IDIOM-NEXT: movq %rdi, %r12
+; IDIOM-NEXT: movq (%rsi), %rdi
+; IDIOM-NEXT: movq 8(%rsi), %rsi
+; IDIOM-NEXT: movq %rbx, %rdx
+; IDIOM-NEXT: callq foo1 at PLT
+; IDIOM-NEXT: movq 8(%rbx), %rax
+; IDIOM-NEXT: movq (%rax), %rdx
+; IDIOM-NEXT: movl %r13d, %ecx
+; IDIOM-NEXT: negl %ecx
+; IDIOM-NEXT: sbbq %rbp, %rbp
+; IDIOM-NEXT: orq %rdx, %rbp
+; IDIOM-NEXT: cmpl $1, %r13d
+; IDIOM-NEXT: sbbq %rax, %rax
+; IDIOM-NEXT: orq %rdx, %rax
+; IDIOM-NEXT: subq $8, %rsp
+; IDIOM-NEXT: movq %r12, %rdi
+; IDIOM-NEXT: movl %r15d, %esi
+; IDIOM-NEXT: movl %r14d, %edx
+; IDIOM-NEXT: xorl %ecx, %ecx
+; IDIOM-NEXT: xorl %r8d, %r8d
+; IDIOM-NEXT: xorl %r9d, %r9d
+; IDIOM-NEXT: pushq %rax
+; IDIOM-NEXT: pushq %rbp
+; IDIOM-NEXT: pushq %rbx
+; IDIOM-NEXT: callq foo2 at PLT
+; IDIOM-NEXT: addq $40, %rsp
+; IDIOM-NEXT: popq %rbx
+; IDIOM-NEXT: popq %r12
+; IDIOM-NEXT: popq %r13
+; IDIOM-NEXT: popq %r14
+; IDIOM-NEXT: popq %r15
+; IDIOM-NEXT: popq %rbp
+; IDIOM-NEXT: retq
%6 = getelementptr inbounds %struct.y_s, %struct.y_s* %1, i64 0, i32 0
%7 = load i64*, i64** %6, align 8
%8 = getelementptr inbounds %struct.y_s, %struct.y_s* %1, i64 0, i32 1
diff --git a/llvm/test/CodeGen/X86/sbb-zero-idiom.ll b/llvm/test/CodeGen/X86/sbb-zero-idiom.ll
index 7baa937aedec2..964e91b6f4a86 100644
--- a/llvm/test/CodeGen/X86/sbb-zero-idiom.ll
+++ b/llvm/test/CodeGen/X86/sbb-zero-idiom.ll
@@ -1,18 +1,33 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc < %s -mtriple=x86_64-- | FileCheck %s --check-prefixes=CHECK
-; RUN: llc < %s -mtriple=x86_64-- -mcpu=sandybridge | FileCheck %s --check-prefixes=CHECK
-; RUN: llc < %s -mtriple=x86_64-- -mcpu=skylake | FileCheck %s --check-prefixes=CHECK
-; RUN: llc < %s -mtriple=x86_64-- -mcpu=k8 | FileCheck %s --check-prefixes=CHECK
-; RUN: llc < %s -mtriple=x86_64-- -mcpu=btver1 | FileCheck %s --check-prefixes=CHECK
-; RUN: llc < %s -mtriple=x86_64-- -mcpu=bdver2 | FileCheck %s --check-prefixes=CHECK
-; RUN: llc < %s -mtriple=x86_64-- -mcpu=znver3 | FileCheck %s --check-prefixes=CHECK
+
+; Check the attribute.
+
+; RUN: llc < %s -mtriple=x86_64-- -mattr=-sbb-dep-breaking | FileCheck %s --check-prefixes=ZERO
+; RUN: llc < %s -mtriple=x86_64-- -mattr=+sbb-dep-breaking | FileCheck %s --check-prefixes=IDIOM
+
+; And check that CPUs have included the attribute as expected.
+
+; RUN: llc < %s -mtriple=x86_64-- | FileCheck %s --check-prefixes=ZERO
+; RUN: llc < %s -mtriple=x86_64-- -mcpu=sandybridge | FileCheck %s --check-prefixes=ZERO
+; RUN: llc < %s -mtriple=x86_64-- -mcpu=skylake | FileCheck %s --check-prefixes=ZERO
+; RUN: llc < %s -mtriple=x86_64-- -mcpu=k8 | FileCheck %s --check-prefixes=IDIOM
+; RUN: llc < %s -mtriple=x86_64-- -mcpu=btver1 | FileCheck %s --check-prefixes=IDIOM
+; RUN: llc < %s -mtriple=x86_64-- -mcpu=bdver2 | FileCheck %s --check-prefixes=IDIOM
+; RUN: llc < %s -mtriple=x86_64-- -mcpu=znver3 | FileCheck %s --check-prefixes=IDIOM
define i32 @i32_select_0_or_neg1(i32 %x) {
-; CHECK-LABEL: i32_select_0_or_neg1:
-; CHECK: # %bb.0:
-; CHECK-NEXT: negl %edi
-; CHECK-NEXT: sbbl %eax, %eax
-; CHECK-NEXT: retq
+; ZERO-LABEL: i32_select_0_or_neg1:
+; ZERO: # %bb.0:
+; ZERO-NEXT: xorl %eax, %eax
+; ZERO-NEXT: negl %edi
+; ZERO-NEXT: sbbl %eax, %eax
+; ZERO-NEXT: retq
+;
+; IDIOM-LABEL: i32_select_0_or_neg1:
+; IDIOM: # %bb.0:
+; IDIOM-NEXT: negl %edi
+; IDIOM-NEXT: sbbl %eax, %eax
+; IDIOM-NEXT: retq
%cmp = icmp ne i32 %x, 0
%sel = select i1 %cmp, i32 -1, i32 0
ret i32 %sel
diff --git a/llvm/test/CodeGen/X86/sbb.ll b/llvm/test/CodeGen/X86/sbb.ll
index b3dae629ba808..78d609d3a17e6 100644
--- a/llvm/test/CodeGen/X86/sbb.ll
+++ b/llvm/test/CodeGen/X86/sbb.ll
@@ -8,6 +8,7 @@
define i8 @i8_select_0_or_neg1(i8 %x) {
; CHECK-LABEL: i8_select_0_or_neg1:
; CHECK: # %bb.0:
+; CHECK-NEXT: xorl %eax, %eax
; CHECK-NEXT: negb %dil
; CHECK-NEXT: sbbl %eax, %eax
; CHECK-NEXT: # kill: def $al killed $al killed $eax
@@ -22,6 +23,7 @@ define i8 @i8_select_0_or_neg1(i8 %x) {
define i16 @i16_select_0_or_neg1_as_math(i16 %x) {
; CHECK-LABEL: i16_select_0_or_neg1_as_math:
; CHECK: # %bb.0:
+; CHECK-NEXT: xorl %eax, %eax
; CHECK-NEXT: negw %di
; CHECK-NEXT: sbbl %eax, %eax
; CHECK-NEXT: # kill: def $ax killed $ax killed $eax
@@ -37,6 +39,7 @@ define i16 @i16_select_0_or_neg1_as_math(i16 %x) {
define i32 @i32_select_0_or_neg1_commuted(i32 %x) {
; CHECK-LABEL: i32_select_0_or_neg1_commuted:
; CHECK: # %bb.0:
+; CHECK-NEXT: xorl %eax, %eax
; CHECK-NEXT: negl %edi
; CHECK-NEXT: sbbl %eax, %eax
; CHECK-NEXT: retq
@@ -50,6 +53,7 @@ define i32 @i32_select_0_or_neg1_commuted(i32 %x) {
define i64 @i64_select_0_or_neg1_commuted_as_math(i64 %x) {
; CHECK-LABEL: i64_select_0_or_neg1_commuted_as_math:
; CHECK: # %bb.0:
+; CHECK-NEXT: xorl %eax, %eax
; CHECK-NEXT: negq %rdi
; CHECK-NEXT: sbbq %rax, %rax
; CHECK-NEXT: retq
@@ -64,6 +68,7 @@ define i64 @i64_select_0_or_neg1_commuted_as_math(i64 %x) {
define i64 @i64_select_neg1_or_0(i64 %x) {
; CHECK-LABEL: i64_select_neg1_or_0:
; CHECK: # %bb.0:
+; CHECK-NEXT: xorl %eax, %eax
; CHECK-NEXT: cmpq $1, %rdi
; CHECK-NEXT: sbbq %rax, %rax
; CHECK-NEXT: retq
@@ -77,6 +82,7 @@ define i64 @i64_select_neg1_or_0(i64 %x) {
define i32 @i32_select_neg1_or_0_as_math(i32 %x) {
; CHECK-LABEL: i32_select_neg1_or_0_as_math:
; CHECK: # %bb.0:
+; CHECK-NEXT: xorl %eax, %eax
; CHECK-NEXT: cmpl $1, %edi
; CHECK-NEXT: sbbl %eax, %eax
; CHECK-NEXT: retq
@@ -91,6 +97,7 @@ define i32 @i32_select_neg1_or_0_as_math(i32 %x) {
define i16 @i16_select_neg1_or_0_commuted(i16 %x) {
; CHECK-LABEL: i16_select_neg1_or_0_commuted:
; CHECK: # %bb.0:
+; CHECK-NEXT: xorl %eax, %eax
; CHECK-NEXT: cmpw $1, %di
; CHECK-NEXT: sbbl %eax, %eax
; CHECK-NEXT: # kill: def $ax killed $ax killed $eax
@@ -105,6 +112,7 @@ define i16 @i16_select_neg1_or_0_commuted(i16 %x) {
define i8 @i8_select_neg1_or_0_commuted_as_math(i8 %x) {
; CHECK-LABEL: i8_select_neg1_or_0_commuted_as_math:
; CHECK: # %bb.0:
+; CHECK-NEXT: xorl %eax, %eax
; CHECK-NEXT: cmpb $1, %dil
; CHECK-NEXT: sbbl %eax, %eax
; CHECK-NEXT: # kill: def $al killed $al killed $eax
@@ -120,6 +128,7 @@ define i8 @i8_select_neg1_or_0_commuted_as_math(i8 %x) {
define i32 @ult_select_neg1_or_0(i32 %x, i32 %y) nounwind {
; CHECK-LABEL: ult_select_neg1_or_0:
; CHECK: # %bb.0:
+; CHECK-NEXT: xorl %eax, %eax
; CHECK-NEXT: cmpl %esi, %edi
; CHECK-NEXT: sbbl %eax, %eax
; CHECK-NEXT: retq
@@ -134,6 +143,7 @@ define i32 @ult_select_neg1_or_0(i32 %x, i32 %y) nounwind {
define i32 @ugt_select_neg1_or_0(i32 %x, i32 %y) nounwind {
; CHECK-LABEL: ugt_select_neg1_or_0:
; CHECK: # %bb.0:
+; CHECK-NEXT: xorl %eax, %eax
; CHECK-NEXT: cmpl %esi, %edi
; CHECK-NEXT: sbbl %eax, %eax
; CHECK-NEXT: retq
@@ -148,6 +158,7 @@ define i32 @ugt_select_neg1_or_0(i32 %x, i32 %y) nounwind {
define i32 @uge_select_0_or_neg1(i32 %x, i32 %y) nounwind {
; CHECK-LABEL: uge_select_0_or_neg1:
; CHECK: # %bb.0:
+; CHECK-NEXT: xorl %eax, %eax
; CHECK-NEXT: cmpl %esi, %edi
; CHECK-NEXT: sbbl %eax, %eax
; CHECK-NEXT: retq
@@ -163,6 +174,7 @@ define i32 @uge_select_0_or_neg1(i32 %x, i32 %y) nounwind {
define i32 @ule_select_0_or_neg1(i32 %x, i32 %y) nounwind {
; CHECK-LABEL: ule_select_0_or_neg1:
; CHECK: # %bb.0:
+; CHECK-NEXT: xorl %eax, %eax
; CHECK-NEXT: cmpl %esi, %edi
; CHECK-NEXT: sbbl %eax, %eax
; CHECK-NEXT: retq
@@ -178,6 +190,7 @@ define i32 @ule_select_0_or_neg1(i32 %x, i32 %y) nounwind {
define i32 @uge_select_0_or_neg1_sub(i32 %x, i32 %y) nounwind {
; CHECK-LABEL: uge_select_0_or_neg1_sub:
; CHECK: # %bb.0:
+; CHECK-NEXT: xorl %eax, %eax
; CHECK-NEXT: cmpl %esi, %edi
; CHECK-NEXT: sbbl %eax, %eax
; CHECK-NEXT: retq
@@ -193,6 +206,7 @@ define i32 @uge_select_0_or_neg1_sub(i32 %x, i32 %y) nounwind {
define i64 @ugt_select_neg1_or_0_sub(i64 %x, i64 %y) nounwind {
; CHECK-LABEL: ugt_select_neg1_or_0_sub:
; CHECK: # %bb.0:
+; CHECK-NEXT: xorl %eax, %eax
; CHECK-NEXT: cmpq %rdi, %rsi
; CHECK-NEXT: sbbq %rax, %rax
; CHECK-NEXT: retq
@@ -208,6 +222,7 @@ define i64 @ugt_select_neg1_or_0_sub(i64 %x, i64 %y) nounwind {
define i16 @ult_select_neg1_or_0_sub(i16 %x, i16 %y) nounwind {
; CHECK-LABEL: ult_select_neg1_or_0_sub:
; CHECK: # %bb.0:
+; CHECK-NEXT: xorl %eax, %eax
; CHECK-NEXT: cmpw %di, %si
; CHECK-NEXT: sbbl %eax, %eax
; CHECK-NEXT: # kill: def $ax killed $ax killed $eax
@@ -226,6 +241,7 @@ define i16 @ult_select_neg1_or_0_sub(i16 %x, i16 %y) nounwind {
define void @PR33560(i8 %x, i64 %y) {
; CHECK-LABEL: PR33560:
; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: xorl %eax, %eax
; CHECK-NEXT: negb %dil
; CHECK-NEXT: sbbq %rax, %rax
; CHECK-NEXT: cmpq %rsi, %rax
diff --git a/llvm/test/CodeGen/X86/sdiv_fix_sat.ll b/llvm/test/CodeGen/X86/sdiv_fix_sat.ll
index 0ba51f02bbb11..9b964b147d553 100644
--- a/llvm/test/CodeGen/X86/sdiv_fix_sat.ll
+++ b/llvm/test/CodeGen/X86/sdiv_fix_sat.ll
@@ -1219,6 +1219,7 @@ define <4 x i32> @vec(<4 x i32> %x, <4 x i32> %y) nounwind {
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
; X86-NEXT: andl %eax, %ebx
; X86-NEXT: negl %eax
+; X86-NEXT: movl $0, %ecx
; X86-NEXT: sbbl %ecx, %ecx
; X86-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
; X86-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
@@ -1242,6 +1243,7 @@ define <4 x i32> @vec(<4 x i32> %x, <4 x i32> %y) nounwind {
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
; X86-NEXT: andl %eax, %edi
; X86-NEXT: negl %eax
+; X86-NEXT: movl $0, %eax
; X86-NEXT: sbbl %eax, %eax
; X86-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
@@ -1268,6 +1270,7 @@ define <4 x i32> @vec(<4 x i32> %x, <4 x i32> %y) nounwind {
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
; X86-NEXT: andl %eax, %edx
; X86-NEXT: negl %eax
+; X86-NEXT: movl $0, %eax
; X86-NEXT: sbbl %eax, %eax
; X86-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
@@ -1291,6 +1294,7 @@ define <4 x i32> @vec(<4 x i32> %x, <4 x i32> %y) nounwind {
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
; X86-NEXT: andl %eax, %edi
; X86-NEXT: negl %eax
+; X86-NEXT: movl $0, %eax
; X86-NEXT: sbbl %eax, %eax
; X86-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
diff --git a/llvm/test/CodeGen/X86/select.ll b/llvm/test/CodeGen/X86/select.ll
index 81229e301d652..a7f41e0813297 100644
--- a/llvm/test/CodeGen/X86/select.ll
+++ b/llvm/test/CodeGen/X86/select.ll
@@ -629,21 +629,13 @@ define void @test8(i1 %c, <6 x i32>* %dst.addr, <6 x i32> %src1,<6 x i32> %src2)
;; Test integer select between values and constants.
define i64 @test9(i64 %x, i64 %y) nounwind readnone ssp noredzone {
-; GENERIC-LABEL: test9:
-; GENERIC: ## %bb.0:
-; GENERIC-NEXT: cmpq $1, %rdi
-; GENERIC-NEXT: sbbq %rax, %rax
-; GENERIC-NEXT: orq %rsi, %rax
-; GENERIC-NEXT: retq
-;
-; ATOM-LABEL: test9:
-; ATOM: ## %bb.0:
-; ATOM-NEXT: cmpq $1, %rdi
-; ATOM-NEXT: sbbq %rax, %rax
-; ATOM-NEXT: orq %rsi, %rax
-; ATOM-NEXT: nop
-; ATOM-NEXT: nop
-; ATOM-NEXT: retq
+; CHECK-LABEL: test9:
+; CHECK: ## %bb.0:
+; CHECK-NEXT: xorl %eax, %eax
+; CHECK-NEXT: cmpq $1, %rdi
+; CHECK-NEXT: sbbq %rax, %rax
+; CHECK-NEXT: orq %rsi, %rax
+; CHECK-NEXT: retq
;
; ATHLON-LABEL: test9:
; ATHLON: ## %bb.0:
@@ -677,21 +669,13 @@ define i64 @test9(i64 %x, i64 %y) nounwind readnone ssp noredzone {
;; Same as test9
define i64 @test9a(i64 %x, i64 %y) nounwind readnone ssp noredzone {
-; GENERIC-LABEL: test9a:
-; GENERIC: ## %bb.0:
-; GENERIC-NEXT: cmpq $1, %rdi
-; GENERIC-NEXT: sbbq %rax, %rax
-; GENERIC-NEXT: orq %rsi, %rax
-; GENERIC-NEXT: retq
-;
-; ATOM-LABEL: test9a:
-; ATOM: ## %bb.0:
-; ATOM-NEXT: cmpq $1, %rdi
-; ATOM-NEXT: sbbq %rax, %rax
-; ATOM-NEXT: orq %rsi, %rax
-; ATOM-NEXT: nop
-; ATOM-NEXT: nop
-; ATOM-NEXT: retq
+; CHECK-LABEL: test9a:
+; CHECK: ## %bb.0:
+; CHECK-NEXT: xorl %eax, %eax
+; CHECK-NEXT: cmpq $1, %rdi
+; CHECK-NEXT: sbbq %rax, %rax
+; CHECK-NEXT: orq %rsi, %rax
+; CHECK-NEXT: retq
;
; ATHLON-LABEL: test9a:
; ATHLON: ## %bb.0:
@@ -723,21 +707,13 @@ define i64 @test9a(i64 %x, i64 %y) nounwind readnone ssp noredzone {
}
define i64 @test9b(i64 %x, i64 %y) nounwind readnone ssp noredzone {
-; GENERIC-LABEL: test9b:
-; GENERIC: ## %bb.0:
-; GENERIC-NEXT: cmpq $1, %rdi
-; GENERIC-NEXT: sbbq %rax, %rax
-; GENERIC-NEXT: orq %rsi, %rax
-; GENERIC-NEXT: retq
-;
-; ATOM-LABEL: test9b:
-; ATOM: ## %bb.0:
-; ATOM-NEXT: cmpq $1, %rdi
-; ATOM-NEXT: sbbq %rax, %rax
-; ATOM-NEXT: orq %rsi, %rax
-; ATOM-NEXT: nop
-; ATOM-NEXT: nop
-; ATOM-NEXT: retq
+; CHECK-LABEL: test9b:
+; CHECK: ## %bb.0:
+; CHECK-NEXT: xorl %eax, %eax
+; CHECK-NEXT: cmpq $1, %rdi
+; CHECK-NEXT: sbbq %rax, %rax
+; CHECK-NEXT: orq %rsi, %rax
+; CHECK-NEXT: retq
;
; ATHLON-LABEL: test9b:
; ATHLON: ## %bb.0:
@@ -770,21 +746,13 @@ define i64 @test9b(i64 %x, i64 %y) nounwind readnone ssp noredzone {
;; Select between -1 and 1.
define i64 @test10(i64 %x, i64 %y) nounwind readnone ssp noredzone {
-; GENERIC-LABEL: test10:
-; GENERIC: ## %bb.0:
-; GENERIC-NEXT: cmpq $1, %rdi
-; GENERIC-NEXT: sbbq %rax, %rax
-; GENERIC-NEXT: orq $1, %rax
-; GENERIC-NEXT: retq
-;
-; ATOM-LABEL: test10:
-; ATOM: ## %bb.0:
-; ATOM-NEXT: cmpq $1, %rdi
-; ATOM-NEXT: sbbq %rax, %rax
-; ATOM-NEXT: orq $1, %rax
-; ATOM-NEXT: nop
-; ATOM-NEXT: nop
-; ATOM-NEXT: retq
+; CHECK-LABEL: test10:
+; CHECK: ## %bb.0:
+; CHECK-NEXT: xorl %eax, %eax
+; CHECK-NEXT: cmpq $1, %rdi
+; CHECK-NEXT: sbbq %rax, %rax
+; CHECK-NEXT: orq $1, %rax
+; CHECK-NEXT: retq
;
; ATHLON-LABEL: test10:
; ATHLON: ## %bb.0:
@@ -814,21 +782,13 @@ define i64 @test10(i64 %x, i64 %y) nounwind readnone ssp noredzone {
}
define i64 @test11(i64 %x, i64 %y) nounwind readnone ssp noredzone {
-; GENERIC-LABEL: test11:
-; GENERIC: ## %bb.0:
-; GENERIC-NEXT: negq %rdi
-; GENERIC-NEXT: sbbq %rax, %rax
-; GENERIC-NEXT: orq %rsi, %rax
-; GENERIC-NEXT: retq
-;
-; ATOM-LABEL: test11:
-; ATOM: ## %bb.0:
-; ATOM-NEXT: negq %rdi
-; ATOM-NEXT: sbbq %rax, %rax
-; ATOM-NEXT: orq %rsi, %rax
-; ATOM-NEXT: nop
-; ATOM-NEXT: nop
-; ATOM-NEXT: retq
+; CHECK-LABEL: test11:
+; CHECK: ## %bb.0:
+; CHECK-NEXT: xorl %eax, %eax
+; CHECK-NEXT: negq %rdi
+; CHECK-NEXT: sbbq %rax, %rax
+; CHECK-NEXT: orq %rsi, %rax
+; CHECK-NEXT: retq
;
; ATHLON-LABEL: test11:
; ATHLON: ## %bb.0:
@@ -861,21 +821,13 @@ define i64 @test11(i64 %x, i64 %y) nounwind readnone ssp noredzone {
}
define i64 @test11a(i64 %x, i64 %y) nounwind readnone ssp noredzone {
-; GENERIC-LABEL: test11a:
-; GENERIC: ## %bb.0:
-; GENERIC-NEXT: negq %rdi
-; GENERIC-NEXT: sbbq %rax, %rax
-; GENERIC-NEXT: orq %rsi, %rax
-; GENERIC-NEXT: retq
-;
-; ATOM-LABEL: test11a:
-; ATOM: ## %bb.0:
-; ATOM-NEXT: negq %rdi
-; ATOM-NEXT: sbbq %rax, %rax
-; ATOM-NEXT: orq %rsi, %rax
-; ATOM-NEXT: nop
-; ATOM-NEXT: nop
-; ATOM-NEXT: retq
+; CHECK-LABEL: test11a:
+; CHECK: ## %bb.0:
+; CHECK-NEXT: xorl %eax, %eax
+; CHECK-NEXT: negq %rdi
+; CHECK-NEXT: sbbq %rax, %rax
+; CHECK-NEXT: orq %rsi, %rax
+; CHECK-NEXT: retq
;
; ATHLON-LABEL: test11a:
; ATHLON: ## %bb.0:
@@ -907,21 +859,13 @@ define i64 @test11a(i64 %x, i64 %y) nounwind readnone ssp noredzone {
}
define i32 @eqzero_const_or_all_ones(i32 %x) {
-; GENERIC-LABEL: eqzero_const_or_all_ones:
-; GENERIC: ## %bb.0:
-; GENERIC-NEXT: negl %edi
-; GENERIC-NEXT: sbbl %eax, %eax
-; GENERIC-NEXT: orl $42, %eax
-; GENERIC-NEXT: retq
-;
-; ATOM-LABEL: eqzero_const_or_all_ones:
-; ATOM: ## %bb.0:
-; ATOM-NEXT: negl %edi
-; ATOM-NEXT: sbbl %eax, %eax
-; ATOM-NEXT: orl $42, %eax
-; ATOM-NEXT: nop
-; ATOM-NEXT: nop
-; ATOM-NEXT: retq
+; CHECK-LABEL: eqzero_const_or_all_ones:
+; CHECK: ## %bb.0:
+; CHECK-NEXT: xorl %eax, %eax
+; CHECK-NEXT: negl %edi
+; CHECK-NEXT: sbbl %eax, %eax
+; CHECK-NEXT: orl $42, %eax
+; CHECK-NEXT: retq
;
; ATHLON-LABEL: eqzero_const_or_all_ones:
; ATHLON: ## %bb.0:
@@ -933,9 +877,11 @@ define i32 @eqzero_const_or_all_ones(i32 %x) {
;
; MCU-LABEL: eqzero_const_or_all_ones:
; MCU: # %bb.0:
+; MCU-NEXT: xorl %ecx, %ecx
; MCU-NEXT: negl %eax
-; MCU-NEXT: sbbl %eax, %eax
-; MCU-NEXT: orl $42, %eax
+; MCU-NEXT: sbbl %ecx, %ecx
+; MCU-NEXT: orl $42, %ecx
+; MCU-NEXT: movl %ecx, %eax
; MCU-NEXT: retl
%z = icmp eq i32 %x, 0
%r = select i1 %z, i32 42, i32 -1
@@ -943,24 +889,17 @@ define i32 @eqzero_const_or_all_ones(i32 %x) {
}
define i32 @nezero_const_or_all_ones(i32 %x) {
-; GENERIC-LABEL: nezero_const_or_all_ones:
-; GENERIC: ## %bb.0:
-; GENERIC-NEXT: cmpl $1, %edi
-; GENERIC-NEXT: sbbl %eax, %eax
-; GENERIC-NEXT: orl $42, %eax
-; GENERIC-NEXT: retq
-;
-; ATOM-LABEL: nezero_const_or_all_ones:
-; ATOM: ## %bb.0:
-; ATOM-NEXT: cmpl $1, %edi
-; ATOM-NEXT: sbbl %eax, %eax
-; ATOM-NEXT: orl $42, %eax
-; ATOM-NEXT: nop
-; ATOM-NEXT: nop
-; ATOM-NEXT: retq
+; CHECK-LABEL: nezero_const_or_all_ones:
+; CHECK: ## %bb.0:
+; CHECK-NEXT: xorl %eax, %eax
+; CHECK-NEXT: cmpl $1, %edi
+; CHECK-NEXT: sbbl %eax, %eax
+; CHECK-NEXT: orl $42, %eax
+; CHECK-NEXT: retq
;
; ATHLON-LABEL: nezero_const_or_all_ones:
; ATHLON: ## %bb.0:
+; ATHLON-NEXT: xorl %eax, %eax
; ATHLON-NEXT: cmpl $1, {{[0-9]+}}(%esp)
; ATHLON-NEXT: sbbl %eax, %eax
; ATHLON-NEXT: orl $42, %eax
@@ -968,9 +907,11 @@ define i32 @nezero_const_or_all_ones(i32 %x) {
;
; MCU-LABEL: nezero_const_or_all_ones:
; MCU: # %bb.0:
+; MCU-NEXT: xorl %ecx, %ecx
; MCU-NEXT: cmpl $1, %eax
-; MCU-NEXT: sbbl %eax, %eax
-; MCU-NEXT: orl $42, %eax
+; MCU-NEXT: sbbl %ecx, %ecx
+; MCU-NEXT: orl $42, %ecx
+; MCU-NEXT: movl %ecx, %eax
; MCU-NEXT: retl
%z = icmp ne i32 %x, 0
%r = select i1 %z, i32 42, i32 -1
@@ -978,21 +919,13 @@ define i32 @nezero_const_or_all_ones(i32 %x) {
}
define i64 @eqzero_all_ones_or_const(i64 %x) {
-; GENERIC-LABEL: eqzero_all_ones_or_const:
-; GENERIC: ## %bb.0:
-; GENERIC-NEXT: cmpq $1, %rdi
-; GENERIC-NEXT: sbbq %rax, %rax
-; GENERIC-NEXT: orq $42, %rax
-; GENERIC-NEXT: retq
-;
-; ATOM-LABEL: eqzero_all_ones_or_const:
-; ATOM: ## %bb.0:
-; ATOM-NEXT: cmpq $1, %rdi
-; ATOM-NEXT: sbbq %rax, %rax
-; ATOM-NEXT: orq $42, %rax
-; ATOM-NEXT: nop
-; ATOM-NEXT: nop
-; ATOM-NEXT: retq
+; CHECK-LABEL: eqzero_all_ones_or_const:
+; CHECK: ## %bb.0:
+; CHECK-NEXT: xorl %eax, %eax
+; CHECK-NEXT: cmpq $1, %rdi
+; CHECK-NEXT: sbbq %rax, %rax
+; CHECK-NEXT: orq $42, %rax
+; CHECK-NEXT: retq
;
; ATHLON-LABEL: eqzero_all_ones_or_const:
; ATHLON: ## %bb.0:
@@ -1022,23 +955,14 @@ define i64 @eqzero_all_ones_or_const(i64 %x) {
}
define i8 @nezero_all_ones_or_const(i8 %x) {
-; GENERIC-LABEL: nezero_all_ones_or_const:
-; GENERIC: ## %bb.0:
-; GENERIC-NEXT: negb %dil
-; GENERIC-NEXT: sbbl %eax, %eax
-; GENERIC-NEXT: orb $42, %al
-; GENERIC-NEXT: ## kill: def $al killed $al killed $eax
-; GENERIC-NEXT: retq
-;
-; ATOM-LABEL: nezero_all_ones_or_const:
-; ATOM: ## %bb.0:
-; ATOM-NEXT: negb %dil
-; ATOM-NEXT: sbbl %eax, %eax
-; ATOM-NEXT: orb $42, %al
-; ATOM-NEXT: ## kill: def $al killed $al killed $eax
-; ATOM-NEXT: nop
-; ATOM-NEXT: nop
-; ATOM-NEXT: retq
+; CHECK-LABEL: nezero_all_ones_or_const:
+; CHECK: ## %bb.0:
+; CHECK-NEXT: xorl %eax, %eax
+; CHECK-NEXT: negb %dil
+; CHECK-NEXT: sbbl %eax, %eax
+; CHECK-NEXT: orb $42, %al
+; CHECK-NEXT: ## kill: def $al killed $al killed $eax
+; CHECK-NEXT: retq
;
; ATHLON-LABEL: nezero_all_ones_or_const:
; ATHLON: ## %bb.0:
@@ -1051,10 +975,11 @@ define i8 @nezero_all_ones_or_const(i8 %x) {
;
; MCU-LABEL: nezero_all_ones_or_const:
; MCU: # %bb.0:
+; MCU-NEXT: xorl %ecx, %ecx
; MCU-NEXT: negb %al
-; MCU-NEXT: sbbl %eax, %eax
-; MCU-NEXT: orb $42, %al
-; MCU-NEXT: # kill: def $al killed $al killed $eax
+; MCU-NEXT: sbbl %ecx, %ecx
+; MCU-NEXT: orb $42, %cl
+; MCU-NEXT: movl %ecx, %eax
; MCU-NEXT: retl
%z = icmp ne i8 %x, 0
%r = select i1 %z, i8 -1, i8 42
@@ -1062,21 +987,13 @@ define i8 @nezero_all_ones_or_const(i8 %x) {
}
define i32 @PR53006(i32 %x) {
-; GENERIC-LABEL: PR53006:
-; GENERIC: ## %bb.0:
-; GENERIC-NEXT: negl %edi
-; GENERIC-NEXT: sbbl %eax, %eax
-; GENERIC-NEXT: orl $1, %eax
-; GENERIC-NEXT: retq
-;
-; ATOM-LABEL: PR53006:
-; ATOM: ## %bb.0:
-; ATOM-NEXT: negl %edi
-; ATOM-NEXT: sbbl %eax, %eax
-; ATOM-NEXT: orl $1, %eax
-; ATOM-NEXT: nop
-; ATOM-NEXT: nop
-; ATOM-NEXT: retq
+; CHECK-LABEL: PR53006:
+; CHECK: ## %bb.0:
+; CHECK-NEXT: xorl %eax, %eax
+; CHECK-NEXT: negl %edi
+; CHECK-NEXT: sbbl %eax, %eax
+; CHECK-NEXT: orl $1, %eax
+; CHECK-NEXT: retq
;
; ATHLON-LABEL: PR53006:
; ATHLON: ## %bb.0:
@@ -1088,9 +1005,11 @@ define i32 @PR53006(i32 %x) {
;
; MCU-LABEL: PR53006:
; MCU: # %bb.0:
+; MCU-NEXT: xorl %ecx, %ecx
; MCU-NEXT: negl %eax
-; MCU-NEXT: sbbl %eax, %eax
-; MCU-NEXT: orl $1, %eax
+; MCU-NEXT: sbbl %ecx, %ecx
+; MCU-NEXT: orl $1, %ecx
+; MCU-NEXT: movl %ecx, %eax
; MCU-NEXT: retl
%z = icmp eq i32 %x, 0
%r = select i1 %z, i32 1, i32 -1
@@ -1100,31 +1019,34 @@ define i32 @PR53006(i32 %x) {
define i32 @test13(i32 %a, i32 %b) nounwind {
; GENERIC-LABEL: test13:
; GENERIC: ## %bb.0:
+; GENERIC-NEXT: xorl %eax, %eax
; GENERIC-NEXT: cmpl %esi, %edi
; GENERIC-NEXT: sbbl %eax, %eax
; GENERIC-NEXT: retq
;
; ATOM-LABEL: test13:
; ATOM: ## %bb.0:
+; ATOM-NEXT: xorl %eax, %eax
; ATOM-NEXT: cmpl %esi, %edi
; ATOM-NEXT: sbbl %eax, %eax
; ATOM-NEXT: nop
; ATOM-NEXT: nop
-; ATOM-NEXT: nop
-; ATOM-NEXT: nop
; ATOM-NEXT: retq
;
; ATHLON-LABEL: test13:
; ATHLON: ## %bb.0:
-; ATHLON-NEXT: movl {{[0-9]+}}(%esp), %eax
-; ATHLON-NEXT: cmpl {{[0-9]+}}(%esp), %eax
+; ATHLON-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; ATHLON-NEXT: xorl %eax, %eax
+; ATHLON-NEXT: cmpl {{[0-9]+}}(%esp), %ecx
; ATHLON-NEXT: sbbl %eax, %eax
; ATHLON-NEXT: retl
;
; MCU-LABEL: test13:
; MCU: # %bb.0:
+; MCU-NEXT: xorl %ecx, %ecx
; MCU-NEXT: cmpl %edx, %eax
-; MCU-NEXT: sbbl %eax, %eax
+; MCU-NEXT: sbbl %ecx, %ecx
+; MCU-NEXT: movl %ecx, %eax
; MCU-NEXT: retl
%c = icmp ult i32 %a, %b
%d = sext i1 %c to i32
@@ -1172,18 +1094,18 @@ define i32 @test14(i32 %a, i32 %b) nounwind {
define i32 @test15(i32 %x) nounwind {
; GENERIC-LABEL: test15:
; GENERIC: ## %bb.0: ## %entry
+; GENERIC-NEXT: xorl %eax, %eax
; GENERIC-NEXT: negl %edi
; GENERIC-NEXT: sbbl %eax, %eax
; GENERIC-NEXT: retq
;
; ATOM-LABEL: test15:
; ATOM: ## %bb.0: ## %entry
+; ATOM-NEXT: xorl %eax, %eax
; ATOM-NEXT: negl %edi
; ATOM-NEXT: sbbl %eax, %eax
; ATOM-NEXT: nop
; ATOM-NEXT: nop
-; ATOM-NEXT: nop
-; ATOM-NEXT: nop
; ATOM-NEXT: retq
;
; ATHLON-LABEL: test15:
@@ -1195,8 +1117,10 @@ define i32 @test15(i32 %x) nounwind {
;
; MCU-LABEL: test15:
; MCU: # %bb.0: # %entry
+; MCU-NEXT: xorl %ecx, %ecx
; MCU-NEXT: negl %eax
-; MCU-NEXT: sbbl %eax, %eax
+; MCU-NEXT: sbbl %ecx, %ecx
+; MCU-NEXT: movl %ecx, %eax
; MCU-NEXT: retl
entry:
%cmp = icmp ne i32 %x, 0
@@ -1207,18 +1131,18 @@ entry:
define i64 @test16(i64 %x) nounwind uwtable readnone ssp {
; GENERIC-LABEL: test16:
; GENERIC: ## %bb.0: ## %entry
+; GENERIC-NEXT: xorl %eax, %eax
; GENERIC-NEXT: negq %rdi
; GENERIC-NEXT: sbbq %rax, %rax
; GENERIC-NEXT: retq
;
; ATOM-LABEL: test16:
; ATOM: ## %bb.0: ## %entry
+; ATOM-NEXT: xorl %eax, %eax
; ATOM-NEXT: negq %rdi
; ATOM-NEXT: sbbq %rax, %rax
; ATOM-NEXT: nop
; ATOM-NEXT: nop
-; ATOM-NEXT: nop
-; ATOM-NEXT: nop
; ATOM-NEXT: retq
;
; ATHLON-LABEL: test16:
@@ -1249,6 +1173,7 @@ entry:
define i16 @test17(i16 %x) nounwind {
; GENERIC-LABEL: test17:
; GENERIC: ## %bb.0: ## %entry
+; GENERIC-NEXT: xorl %eax, %eax
; GENERIC-NEXT: negw %di
; GENERIC-NEXT: sbbl %eax, %eax
; GENERIC-NEXT: ## kill: def $ax killed $ax killed $eax
@@ -1256,13 +1181,12 @@ define i16 @test17(i16 %x) nounwind {
;
; ATOM-LABEL: test17:
; ATOM: ## %bb.0: ## %entry
+; ATOM-NEXT: xorl %eax, %eax
; ATOM-NEXT: negw %di
; ATOM-NEXT: sbbl %eax, %eax
; ATOM-NEXT: ## kill: def $ax killed $ax killed $eax
; ATOM-NEXT: nop
; ATOM-NEXT: nop
-; ATOM-NEXT: nop
-; ATOM-NEXT: nop
; ATOM-NEXT: retq
;
; ATHLON-LABEL: test17:
@@ -1275,9 +1199,10 @@ define i16 @test17(i16 %x) nounwind {
;
; MCU-LABEL: test17:
; MCU: # %bb.0: # %entry
+; MCU-NEXT: xorl %ecx, %ecx
; MCU-NEXT: negw %ax
-; MCU-NEXT: sbbl %eax, %eax
-; MCU-NEXT: # kill: def $ax killed $ax killed $eax
+; MCU-NEXT: sbbl %ecx, %ecx
+; MCU-NEXT: movl %ecx, %eax
; MCU-NEXT: retl
entry:
%cmp = icmp ne i16 %x, 0
diff --git a/llvm/test/CodeGen/X86/sext-i1.ll b/llvm/test/CodeGen/X86/sext-i1.ll
index acf9e8138a0c0..03799af1e70a4 100644
--- a/llvm/test/CodeGen/X86/sext-i1.ll
+++ b/llvm/test/CodeGen/X86/sext-i1.ll
@@ -8,12 +8,14 @@
define i32 @t1(i32 %x) nounwind readnone ssp {
; X32-LABEL: t1:
; X32: # %bb.0:
+; X32-NEXT: xorl %eax, %eax
; X32-NEXT: cmpl $1, {{[0-9]+}}(%esp)
; X32-NEXT: sbbl %eax, %eax
; X32-NEXT: retl
;
; X64-LABEL: t1:
; X64: # %bb.0:
+; X64-NEXT: xorl %eax, %eax
; X64-NEXT: cmpl $1, %edi
; X64-NEXT: sbbl %eax, %eax
; X64-NEXT: retq
@@ -25,12 +27,14 @@ define i32 @t1(i32 %x) nounwind readnone ssp {
define i32 @t2(i32 %x) nounwind readnone ssp {
; X32-LABEL: t2:
; X32: # %bb.0:
+; X32-NEXT: xorl %eax, %eax
; X32-NEXT: cmpl $1, {{[0-9]+}}(%esp)
; X32-NEXT: sbbl %eax, %eax
; X32-NEXT: retl
;
; X64-LABEL: t2:
; X64: # %bb.0:
+; X64-NEXT: xorl %eax, %eax
; X64-NEXT: cmpl $1, %edi
; X64-NEXT: sbbl %eax, %eax
; X64-NEXT: retq
@@ -43,6 +47,7 @@ define i32 @t3(i32 %x, i64 %y) nounwind readonly {
; X32-LABEL: t3:
; X32: # %bb.0: # %entry
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X32-NEXT: xorl %ecx, %ecx
; X32-NEXT: cmpl $1, {{[0-9]+}}(%esp)
; X32-NEXT: sbbl %ecx, %ecx
; X32-NEXT: cmpl %ecx, {{[0-9]+}}(%esp)
@@ -87,6 +92,7 @@ define i32 @t4(i64 %x) nounwind readnone ssp {
;
; X64-LABEL: t4:
; X64: # %bb.0:
+; X64-NEXT: xorl %eax, %eax
; X64-NEXT: cmpq $1, %rdi
; X64-NEXT: sbbl %eax, %eax
; X64-NEXT: retq
@@ -98,6 +104,7 @@ define i32 @t4(i64 %x) nounwind readnone ssp {
define i64 @t5(i32 %x) nounwind readnone ssp {
; X32-LABEL: t5:
; X32: # %bb.0:
+; X32-NEXT: xorl %eax, %eax
; X32-NEXT: cmpl $1, {{[0-9]+}}(%esp)
; X32-NEXT: sbbl %eax, %eax
; X32-NEXT: movl %eax, %edx
@@ -105,6 +112,7 @@ define i64 @t5(i32 %x) nounwind readnone ssp {
;
; X64-LABEL: t5:
; X64: # %bb.0:
+; X64-NEXT: xorl %eax, %eax
; X64-NEXT: cmpl $1, %edi
; X64-NEXT: sbbq %rax, %rax
; X64-NEXT: retq
diff --git a/llvm/test/CodeGen/X86/shl-crash-on-legalize.ll b/llvm/test/CodeGen/X86/shl-crash-on-legalize.ll
index 66ce60a9b22c5..0168fd90a95de 100644
--- a/llvm/test/CodeGen/X86/shl-crash-on-legalize.ll
+++ b/llvm/test/CodeGen/X86/shl-crash-on-legalize.ll
@@ -14,6 +14,7 @@ define i32 @PR29058(i8 %x, i32 %y) {
; CHECK-NEXT: testb %dil, %dil
; CHECK-NEXT: movl $2147483646, %eax # imm = 0x7FFFFFFE
; CHECK-NEXT: cmovnel %esi, %eax
+; CHECK-NEXT: xorl %ecx, %ecx
; CHECK-NEXT: cmpb $1, %dil
; CHECK-NEXT: sbbl %ecx, %ecx
; CHECK-NEXT: orb %sil, %cl
diff --git a/llvm/test/CodeGen/X86/umul_fix_sat.ll b/llvm/test/CodeGen/X86/umul_fix_sat.ll
index cdb5480c7f614..504557242c305 100644
--- a/llvm/test/CodeGen/X86/umul_fix_sat.ll
+++ b/llvm/test/CodeGen/X86/umul_fix_sat.ll
@@ -443,29 +443,30 @@ define i64 @func7(i64 %x, i64 %y) nounwind {
; X86-NEXT: pushl %ebx
; X86-NEXT: pushl %edi
; X86-NEXT: pushl %esi
+; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
; X86-NEXT: movl {{[0-9]+}}(%esp), %ebp
-; X86-NEXT: movl %ebx, %eax
+; X86-NEXT: movl %esi, %eax
; X86-NEXT: mull %ebp
; X86-NEXT: movl %edx, %ecx
-; X86-NEXT: movl %eax, %esi
-; X86-NEXT: movl %ebx, %eax
-; X86-NEXT: mull %edi
-; X86-NEXT: addl %edx, %esi
+; X86-NEXT: movl %eax, %edi
+; X86-NEXT: movl %esi, %eax
+; X86-NEXT: mull %ebx
+; X86-NEXT: addl %edx, %edi
; X86-NEXT: adcl $0, %ecx
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: mull %ebp
-; X86-NEXT: movl %edx, %ebx
+; X86-NEXT: movl %edx, %esi
; X86-NEXT: movl %eax, %ebp
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: mull %edi
-; X86-NEXT: addl %esi, %eax
+; X86-NEXT: mull %ebx
+; X86-NEXT: addl %edi, %eax
; X86-NEXT: adcl %ecx, %edx
-; X86-NEXT: adcl $0, %ebx
+; X86-NEXT: adcl $0, %esi
; X86-NEXT: addl %ebp, %edx
-; X86-NEXT: adcl $0, %ebx
-; X86-NEXT: negl %ebx
+; X86-NEXT: adcl $0, %esi
+; X86-NEXT: xorl %ecx, %ecx
+; X86-NEXT: negl %esi
; X86-NEXT: sbbl %ecx, %ecx
; X86-NEXT: orl %ecx, %eax
; X86-NEXT: orl %ecx, %edx
@@ -521,11 +522,12 @@ define i64 @func8(i64 %x, i64 %y) nounwind {
; X86-NEXT: shrdl $31, %edx, %eax
; X86-NEXT: movl %edx, %esi
; X86-NEXT: shrl $31, %esi
+; X86-NEXT: xorl %edi, %edi
; X86-NEXT: negl %esi
-; X86-NEXT: sbbl %esi, %esi
-; X86-NEXT: orl %esi, %eax
+; X86-NEXT: sbbl %edi, %edi
+; X86-NEXT: orl %edi, %eax
; X86-NEXT: shrdl $31, %ecx, %edx
-; X86-NEXT: orl %esi, %edx
+; X86-NEXT: orl %edi, %edx
; X86-NEXT: popl %esi
; X86-NEXT: popl %edi
; X86-NEXT: popl %ebx
diff --git a/llvm/test/CodeGen/X86/vec_uaddo.ll b/llvm/test/CodeGen/X86/vec_uaddo.ll
index 73d71135a9332..4a9bfd3b6f0e2 100644
--- a/llvm/test/CodeGen/X86/vec_uaddo.ll
+++ b/llvm/test/CodeGen/X86/vec_uaddo.ll
@@ -26,6 +26,7 @@ declare {<2 x i128>, <2 x i1>} @llvm.uadd.with.overflow.v2i128(<2 x i128>, <2 x
define <1 x i32> @uaddo_v1i32(<1 x i32> %a0, <1 x i32> %a1, <1 x i32>* %p2) nounwind {
; CHECK-LABEL: uaddo_v1i32:
; CHECK: # %bb.0:
+; CHECK-NEXT: xorl %eax, %eax
; CHECK-NEXT: addl %esi, %edi
; CHECK-NEXT: sbbl %eax, %eax
; CHECK-NEXT: movl %edi, (%rdx)
@@ -1139,14 +1140,16 @@ define <2 x i32> @uaddo_v2i128(<2 x i128> %a0, <2 x i128> %a1, <2 x i128>* %p2)
; SSE2-LABEL: uaddo_v2i128:
; SSE2: # %bb.0:
; SSE2-NEXT: movq {{[0-9]+}}(%rsp), %r10
+; SSE2-NEXT: xorl %r11d, %r11d
; SSE2-NEXT: addq {{[0-9]+}}(%rsp), %rdx
; SSE2-NEXT: adcq {{[0-9]+}}(%rsp), %rcx
+; SSE2-NEXT: movl $0, %eax
; SSE2-NEXT: sbbl %eax, %eax
; SSE2-NEXT: addq %r8, %rdi
; SSE2-NEXT: adcq %r9, %rsi
; SSE2-NEXT: movd %eax, %xmm1
-; SSE2-NEXT: sbbl %eax, %eax
-; SSE2-NEXT: movd %eax, %xmm0
+; SSE2-NEXT: sbbl %r11d, %r11d
+; SSE2-NEXT: movd %r11d, %xmm0
; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
; SSE2-NEXT: movq %rdx, 16(%r10)
; SSE2-NEXT: movq %rdi, (%r10)
@@ -1157,14 +1160,16 @@ define <2 x i32> @uaddo_v2i128(<2 x i128> %a0, <2 x i128> %a1, <2 x i128>* %p2)
; SSSE3-LABEL: uaddo_v2i128:
; SSSE3: # %bb.0:
; SSSE3-NEXT: movq {{[0-9]+}}(%rsp), %r10
+; SSSE3-NEXT: xorl %r11d, %r11d
; SSSE3-NEXT: addq {{[0-9]+}}(%rsp), %rdx
; SSSE3-NEXT: adcq {{[0-9]+}}(%rsp), %rcx
+; SSSE3-NEXT: movl $0, %eax
; SSSE3-NEXT: sbbl %eax, %eax
; SSSE3-NEXT: addq %r8, %rdi
; SSSE3-NEXT: adcq %r9, %rsi
; SSSE3-NEXT: movd %eax, %xmm1
-; SSSE3-NEXT: sbbl %eax, %eax
-; SSSE3-NEXT: movd %eax, %xmm0
+; SSSE3-NEXT: sbbl %r11d, %r11d
+; SSSE3-NEXT: movd %r11d, %xmm0
; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
; SSSE3-NEXT: movq %rdx, 16(%r10)
; SSSE3-NEXT: movq %rdi, (%r10)
@@ -1175,14 +1180,16 @@ define <2 x i32> @uaddo_v2i128(<2 x i128> %a0, <2 x i128> %a1, <2 x i128>* %p2)
; SSE41-LABEL: uaddo_v2i128:
; SSE41: # %bb.0:
; SSE41-NEXT: movq {{[0-9]+}}(%rsp), %r10
+; SSE41-NEXT: xorl %r11d, %r11d
; SSE41-NEXT: addq {{[0-9]+}}(%rsp), %rdx
; SSE41-NEXT: adcq {{[0-9]+}}(%rsp), %rcx
-; SSE41-NEXT: sbbl %r11d, %r11d
+; SSE41-NEXT: movl $0, %eax
+; SSE41-NEXT: sbbl %eax, %eax
; SSE41-NEXT: addq %r8, %rdi
; SSE41-NEXT: adcq %r9, %rsi
-; SSE41-NEXT: sbbl %eax, %eax
-; SSE41-NEXT: movd %eax, %xmm0
-; SSE41-NEXT: pinsrd $1, %r11d, %xmm0
+; SSE41-NEXT: sbbl %r11d, %r11d
+; SSE41-NEXT: movd %r11d, %xmm0
+; SSE41-NEXT: pinsrd $1, %eax, %xmm0
; SSE41-NEXT: movq %rdx, 16(%r10)
; SSE41-NEXT: movq %rdi, (%r10)
; SSE41-NEXT: movq %rcx, 24(%r10)
@@ -1192,14 +1199,16 @@ define <2 x i32> @uaddo_v2i128(<2 x i128> %a0, <2 x i128> %a1, <2 x i128>* %p2)
; AVX-LABEL: uaddo_v2i128:
; AVX: # %bb.0:
; AVX-NEXT: movq {{[0-9]+}}(%rsp), %r10
+; AVX-NEXT: xorl %r11d, %r11d
; AVX-NEXT: addq {{[0-9]+}}(%rsp), %rdx
; AVX-NEXT: adcq {{[0-9]+}}(%rsp), %rcx
-; AVX-NEXT: sbbl %r11d, %r11d
+; AVX-NEXT: movl $0, %eax
+; AVX-NEXT: sbbl %eax, %eax
; AVX-NEXT: addq %r8, %rdi
; AVX-NEXT: adcq %r9, %rsi
-; AVX-NEXT: sbbl %eax, %eax
-; AVX-NEXT: vmovd %eax, %xmm0
-; AVX-NEXT: vpinsrd $1, %r11d, %xmm0, %xmm0
+; AVX-NEXT: sbbl %r11d, %r11d
+; AVX-NEXT: vmovd %r11d, %xmm0
+; AVX-NEXT: vpinsrd $1, %eax, %xmm0, %xmm0
; AVX-NEXT: movq %rdx, 16(%r10)
; AVX-NEXT: movq %rdi, (%r10)
; AVX-NEXT: movq %rcx, 24(%r10)
diff --git a/llvm/test/CodeGen/X86/vec_usubo.ll b/llvm/test/CodeGen/X86/vec_usubo.ll
index be9f4ba948202..bb7dc729e115f 100644
--- a/llvm/test/CodeGen/X86/vec_usubo.ll
+++ b/llvm/test/CodeGen/X86/vec_usubo.ll
@@ -26,6 +26,7 @@ declare {<2 x i128>, <2 x i1>} @llvm.usub.with.overflow.v2i128(<2 x i128>, <2 x
define <1 x i32> @usubo_v1i32(<1 x i32> %a0, <1 x i32> %a1, <1 x i32>* %p2) nounwind {
; CHECK-LABEL: usubo_v1i32:
; CHECK: # %bb.0:
+; CHECK-NEXT: xorl %eax, %eax
; CHECK-NEXT: subl %esi, %edi
; CHECK-NEXT: sbbl %eax, %eax
; CHECK-NEXT: movl %edi, (%rdx)
@@ -1186,14 +1187,16 @@ define <2 x i32> @usubo_v2i128(<2 x i128> %a0, <2 x i128> %a1, <2 x i128>* %p2)
; SSE2-LABEL: usubo_v2i128:
; SSE2: # %bb.0:
; SSE2-NEXT: movq {{[0-9]+}}(%rsp), %r10
+; SSE2-NEXT: xorl %r11d, %r11d
; SSE2-NEXT: subq {{[0-9]+}}(%rsp), %rdx
; SSE2-NEXT: sbbq {{[0-9]+}}(%rsp), %rcx
+; SSE2-NEXT: movl $0, %eax
; SSE2-NEXT: sbbl %eax, %eax
; SSE2-NEXT: subq %r8, %rdi
; SSE2-NEXT: sbbq %r9, %rsi
; SSE2-NEXT: movd %eax, %xmm1
-; SSE2-NEXT: sbbl %eax, %eax
-; SSE2-NEXT: movd %eax, %xmm0
+; SSE2-NEXT: sbbl %r11d, %r11d
+; SSE2-NEXT: movd %r11d, %xmm0
; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
; SSE2-NEXT: movq %rdx, 16(%r10)
; SSE2-NEXT: movq %rdi, (%r10)
@@ -1204,14 +1207,16 @@ define <2 x i32> @usubo_v2i128(<2 x i128> %a0, <2 x i128> %a1, <2 x i128>* %p2)
; SSSE3-LABEL: usubo_v2i128:
; SSSE3: # %bb.0:
; SSSE3-NEXT: movq {{[0-9]+}}(%rsp), %r10
+; SSSE3-NEXT: xorl %r11d, %r11d
; SSSE3-NEXT: subq {{[0-9]+}}(%rsp), %rdx
; SSSE3-NEXT: sbbq {{[0-9]+}}(%rsp), %rcx
+; SSSE3-NEXT: movl $0, %eax
; SSSE3-NEXT: sbbl %eax, %eax
; SSSE3-NEXT: subq %r8, %rdi
; SSSE3-NEXT: sbbq %r9, %rsi
; SSSE3-NEXT: movd %eax, %xmm1
-; SSSE3-NEXT: sbbl %eax, %eax
-; SSSE3-NEXT: movd %eax, %xmm0
+; SSSE3-NEXT: sbbl %r11d, %r11d
+; SSSE3-NEXT: movd %r11d, %xmm0
; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
; SSSE3-NEXT: movq %rdx, 16(%r10)
; SSSE3-NEXT: movq %rdi, (%r10)
@@ -1222,14 +1227,16 @@ define <2 x i32> @usubo_v2i128(<2 x i128> %a0, <2 x i128> %a1, <2 x i128>* %p2)
; SSE41-LABEL: usubo_v2i128:
; SSE41: # %bb.0:
; SSE41-NEXT: movq {{[0-9]+}}(%rsp), %r10
+; SSE41-NEXT: xorl %r11d, %r11d
; SSE41-NEXT: subq {{[0-9]+}}(%rsp), %rdx
; SSE41-NEXT: sbbq {{[0-9]+}}(%rsp), %rcx
-; SSE41-NEXT: sbbl %r11d, %r11d
+; SSE41-NEXT: movl $0, %eax
+; SSE41-NEXT: sbbl %eax, %eax
; SSE41-NEXT: subq %r8, %rdi
; SSE41-NEXT: sbbq %r9, %rsi
-; SSE41-NEXT: sbbl %eax, %eax
-; SSE41-NEXT: movd %eax, %xmm0
-; SSE41-NEXT: pinsrd $1, %r11d, %xmm0
+; SSE41-NEXT: sbbl %r11d, %r11d
+; SSE41-NEXT: movd %r11d, %xmm0
+; SSE41-NEXT: pinsrd $1, %eax, %xmm0
; SSE41-NEXT: movq %rdx, 16(%r10)
; SSE41-NEXT: movq %rdi, (%r10)
; SSE41-NEXT: movq %rcx, 24(%r10)
@@ -1239,14 +1246,16 @@ define <2 x i32> @usubo_v2i128(<2 x i128> %a0, <2 x i128> %a1, <2 x i128>* %p2)
; AVX-LABEL: usubo_v2i128:
; AVX: # %bb.0:
; AVX-NEXT: movq {{[0-9]+}}(%rsp), %r10
+; AVX-NEXT: xorl %r11d, %r11d
; AVX-NEXT: subq {{[0-9]+}}(%rsp), %rdx
; AVX-NEXT: sbbq {{[0-9]+}}(%rsp), %rcx
-; AVX-NEXT: sbbl %r11d, %r11d
+; AVX-NEXT: movl $0, %eax
+; AVX-NEXT: sbbl %eax, %eax
; AVX-NEXT: subq %r8, %rdi
; AVX-NEXT: sbbq %r9, %rsi
-; AVX-NEXT: sbbl %eax, %eax
-; AVX-NEXT: vmovd %eax, %xmm0
-; AVX-NEXT: vpinsrd $1, %r11d, %xmm0, %xmm0
+; AVX-NEXT: sbbl %r11d, %r11d
+; AVX-NEXT: vmovd %r11d, %xmm0
+; AVX-NEXT: vpinsrd $1, %eax, %xmm0, %xmm0
; AVX-NEXT: movq %rdx, 16(%r10)
; AVX-NEXT: movq %rdi, (%r10)
; AVX-NEXT: movq %rcx, 24(%r10)
diff --git a/llvm/test/CodeGen/X86/vector-compare-any_of.ll b/llvm/test/CodeGen/X86/vector-compare-any_of.ll
index e41ed53a5edbf..5a5efa7735da9 100644
--- a/llvm/test/CodeGen/X86/vector-compare-any_of.ll
+++ b/llvm/test/CodeGen/X86/vector-compare-any_of.ll
@@ -8,24 +8,27 @@ define i64 @test_v2f64_sext(<2 x double> %a0, <2 x double> %a1) {
; SSE-LABEL: test_v2f64_sext:
; SSE: # %bb.0:
; SSE-NEXT: cmpltpd %xmm0, %xmm1
-; SSE-NEXT: movmskpd %xmm1, %eax
-; SSE-NEXT: negl %eax
+; SSE-NEXT: movmskpd %xmm1, %ecx
+; SSE-NEXT: xorl %eax, %eax
+; SSE-NEXT: negl %ecx
; SSE-NEXT: sbbq %rax, %rax
; SSE-NEXT: retq
;
; AVX-LABEL: test_v2f64_sext:
; AVX: # %bb.0:
; AVX-NEXT: vcmpltpd %xmm0, %xmm1, %xmm0
-; AVX-NEXT: vmovmskpd %xmm0, %eax
-; AVX-NEXT: negl %eax
+; AVX-NEXT: vmovmskpd %xmm0, %ecx
+; AVX-NEXT: xorl %eax, %eax
+; AVX-NEXT: negl %ecx
; AVX-NEXT: sbbq %rax, %rax
; AVX-NEXT: retq
;
; AVX512-LABEL: test_v2f64_sext:
; AVX512: # %bb.0:
; AVX512-NEXT: vcmpltpd %xmm0, %xmm1, %xmm0
-; AVX512-NEXT: vmovmskpd %xmm0, %eax
-; AVX512-NEXT: negl %eax
+; AVX512-NEXT: vmovmskpd %xmm0, %ecx
+; AVX512-NEXT: xorl %eax, %eax
+; AVX512-NEXT: negl %ecx
; AVX512-NEXT: sbbq %rax, %rax
; AVX512-NEXT: retq
%c = fcmp ogt <2 x double> %a0, %a1
@@ -42,16 +45,18 @@ define i64 @test_v4f64_sext(<4 x double> %a0, <4 x double> %a1) {
; SSE-NEXT: cmpltpd %xmm1, %xmm3
; SSE-NEXT: cmpltpd %xmm0, %xmm2
; SSE-NEXT: orpd %xmm3, %xmm2
-; SSE-NEXT: movmskpd %xmm2, %eax
-; SSE-NEXT: negl %eax
+; SSE-NEXT: movmskpd %xmm2, %ecx
+; SSE-NEXT: xorl %eax, %eax
+; SSE-NEXT: negl %ecx
; SSE-NEXT: sbbq %rax, %rax
; SSE-NEXT: retq
;
; AVX-LABEL: test_v4f64_sext:
; AVX: # %bb.0:
; AVX-NEXT: vcmpltpd %ymm0, %ymm1, %ymm0
-; AVX-NEXT: vmovmskpd %ymm0, %eax
-; AVX-NEXT: negl %eax
+; AVX-NEXT: vmovmskpd %ymm0, %ecx
+; AVX-NEXT: xorl %eax, %eax
+; AVX-NEXT: negl %ecx
; AVX-NEXT: sbbq %rax, %rax
; AVX-NEXT: vzeroupper
; AVX-NEXT: retq
@@ -59,8 +64,9 @@ define i64 @test_v4f64_sext(<4 x double> %a0, <4 x double> %a1) {
; AVX512-LABEL: test_v4f64_sext:
; AVX512: # %bb.0:
; AVX512-NEXT: vcmpltpd %ymm0, %ymm1, %ymm0
-; AVX512-NEXT: vmovmskpd %ymm0, %eax
-; AVX512-NEXT: negl %eax
+; AVX512-NEXT: vmovmskpd %ymm0, %ecx
+; AVX512-NEXT: xorl %eax, %eax
+; AVX512-NEXT: negl %ecx
; AVX512-NEXT: sbbq %rax, %rax
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
@@ -80,8 +86,9 @@ define i64 @test_v4f64_legal_sext(<4 x double> %a0, <4 x double> %a1) {
; SSE-NEXT: cmpltpd %xmm1, %xmm3
; SSE-NEXT: cmpltpd %xmm0, %xmm2
; SSE-NEXT: packssdw %xmm3, %xmm2
-; SSE-NEXT: movmskps %xmm2, %eax
-; SSE-NEXT: negl %eax
+; SSE-NEXT: movmskps %xmm2, %ecx
+; SSE-NEXT: xorl %eax, %eax
+; SSE-NEXT: negl %ecx
; SSE-NEXT: sbbq %rax, %rax
; SSE-NEXT: retq
;
@@ -90,8 +97,9 @@ define i64 @test_v4f64_legal_sext(<4 x double> %a0, <4 x double> %a1) {
; AVX-NEXT: vcmpltpd %ymm0, %ymm1, %ymm0
; AVX-NEXT: vextractf128 $1, %ymm0, %xmm1
; AVX-NEXT: vpackssdw %xmm1, %xmm0, %xmm0
-; AVX-NEXT: vmovmskps %xmm0, %eax
-; AVX-NEXT: negl %eax
+; AVX-NEXT: vmovmskps %xmm0, %ecx
+; AVX-NEXT: xorl %eax, %eax
+; AVX-NEXT: negl %ecx
; AVX-NEXT: sbbq %rax, %rax
; AVX-NEXT: vzeroupper
; AVX-NEXT: retq
@@ -101,8 +109,9 @@ define i64 @test_v4f64_legal_sext(<4 x double> %a0, <4 x double> %a1) {
; AVX512-NEXT: vcmpltpd %ymm0, %ymm1, %k1
; AVX512-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
; AVX512-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z}
-; AVX512-NEXT: vmovmskps %xmm0, %eax
-; AVX512-NEXT: negl %eax
+; AVX512-NEXT: vmovmskps %xmm0, %ecx
+; AVX512-NEXT: xorl %eax, %eax
+; AVX512-NEXT: negl %ecx
; AVX512-NEXT: sbbq %rax, %rax
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
@@ -121,24 +130,27 @@ define i32 @test_v4f32_sext(<4 x float> %a0, <4 x float> %a1) {
; SSE-LABEL: test_v4f32_sext:
; SSE: # %bb.0:
; SSE-NEXT: cmpltps %xmm0, %xmm1
-; SSE-NEXT: movmskps %xmm1, %eax
-; SSE-NEXT: negl %eax
+; SSE-NEXT: movmskps %xmm1, %ecx
+; SSE-NEXT: xorl %eax, %eax
+; SSE-NEXT: negl %ecx
; SSE-NEXT: sbbl %eax, %eax
; SSE-NEXT: retq
;
; AVX-LABEL: test_v4f32_sext:
; AVX: # %bb.0:
; AVX-NEXT: vcmpltps %xmm0, %xmm1, %xmm0
-; AVX-NEXT: vmovmskps %xmm0, %eax
-; AVX-NEXT: negl %eax
+; AVX-NEXT: vmovmskps %xmm0, %ecx
+; AVX-NEXT: xorl %eax, %eax
+; AVX-NEXT: negl %ecx
; AVX-NEXT: sbbl %eax, %eax
; AVX-NEXT: retq
;
; AVX512-LABEL: test_v4f32_sext:
; AVX512: # %bb.0:
; AVX512-NEXT: vcmpltps %xmm0, %xmm1, %xmm0
-; AVX512-NEXT: vmovmskps %xmm0, %eax
-; AVX512-NEXT: negl %eax
+; AVX512-NEXT: vmovmskps %xmm0, %ecx
+; AVX512-NEXT: xorl %eax, %eax
+; AVX512-NEXT: negl %ecx
; AVX512-NEXT: sbbl %eax, %eax
; AVX512-NEXT: retq
%c = fcmp ogt <4 x float> %a0, %a1
@@ -157,16 +169,18 @@ define i32 @test_v8f32_sext(<8 x float> %a0, <8 x float> %a1) {
; SSE-NEXT: cmpltps %xmm1, %xmm3
; SSE-NEXT: cmpltps %xmm0, %xmm2
; SSE-NEXT: orps %xmm3, %xmm2
-; SSE-NEXT: movmskps %xmm2, %eax
-; SSE-NEXT: negl %eax
+; SSE-NEXT: movmskps %xmm2, %ecx
+; SSE-NEXT: xorl %eax, %eax
+; SSE-NEXT: negl %ecx
; SSE-NEXT: sbbl %eax, %eax
; SSE-NEXT: retq
;
; AVX-LABEL: test_v8f32_sext:
; AVX: # %bb.0:
; AVX-NEXT: vcmpltps %ymm0, %ymm1, %ymm0
-; AVX-NEXT: vmovmskps %ymm0, %eax
-; AVX-NEXT: negl %eax
+; AVX-NEXT: vmovmskps %ymm0, %ecx
+; AVX-NEXT: xorl %eax, %eax
+; AVX-NEXT: negl %ecx
; AVX-NEXT: sbbl %eax, %eax
; AVX-NEXT: vzeroupper
; AVX-NEXT: retq
@@ -174,8 +188,9 @@ define i32 @test_v8f32_sext(<8 x float> %a0, <8 x float> %a1) {
; AVX512-LABEL: test_v8f32_sext:
; AVX512: # %bb.0:
; AVX512-NEXT: vcmpltps %ymm0, %ymm1, %ymm0
-; AVX512-NEXT: vmovmskps %ymm0, %eax
-; AVX512-NEXT: negl %eax
+; AVX512-NEXT: vmovmskps %ymm0, %ecx
+; AVX512-NEXT: xorl %eax, %eax
+; AVX512-NEXT: negl %ecx
; AVX512-NEXT: sbbl %eax, %eax
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
@@ -197,8 +212,9 @@ define i32 @test_v8f32_legal_sext(<8 x float> %a0, <8 x float> %a1) {
; SSE-NEXT: cmpltps %xmm1, %xmm3
; SSE-NEXT: cmpltps %xmm0, %xmm2
; SSE-NEXT: packssdw %xmm3, %xmm2
-; SSE-NEXT: pmovmskb %xmm2, %eax
-; SSE-NEXT: negl %eax
+; SSE-NEXT: pmovmskb %xmm2, %ecx
+; SSE-NEXT: xorl %eax, %eax
+; SSE-NEXT: negl %ecx
; SSE-NEXT: sbbl %eax, %eax
; SSE-NEXT: retq
;
@@ -207,8 +223,9 @@ define i32 @test_v8f32_legal_sext(<8 x float> %a0, <8 x float> %a1) {
; AVX-NEXT: vcmpltps %ymm0, %ymm1, %ymm0
; AVX-NEXT: vextractf128 $1, %ymm0, %xmm1
; AVX-NEXT: vpackssdw %xmm1, %xmm0, %xmm0
-; AVX-NEXT: vpmovmskb %xmm0, %eax
-; AVX-NEXT: negl %eax
+; AVX-NEXT: vpmovmskb %xmm0, %ecx
+; AVX-NEXT: xorl %eax, %eax
+; AVX-NEXT: negl %ecx
; AVX-NEXT: sbbl %eax, %eax
; AVX-NEXT: vzeroupper
; AVX-NEXT: retq
@@ -217,8 +234,9 @@ define i32 @test_v8f32_legal_sext(<8 x float> %a0, <8 x float> %a1) {
; AVX512: # %bb.0:
; AVX512-NEXT: vcmpltps %ymm0, %ymm1, %k0
; AVX512-NEXT: vpmovm2w %k0, %xmm0
-; AVX512-NEXT: vpmovmskb %xmm0, %eax
-; AVX512-NEXT: negl %eax
+; AVX512-NEXT: vpmovmskb %xmm0, %ecx
+; AVX512-NEXT: xorl %eax, %eax
+; AVX512-NEXT: negl %ecx
; AVX512-NEXT: sbbl %eax, %eax
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
@@ -239,24 +257,27 @@ define i64 @test_v2i64_sext(<2 x i64> %a0, <2 x i64> %a1) {
; SSE-LABEL: test_v2i64_sext:
; SSE: # %bb.0:
; SSE-NEXT: pcmpgtq %xmm1, %xmm0
-; SSE-NEXT: movmskpd %xmm0, %eax
-; SSE-NEXT: negl %eax
+; SSE-NEXT: movmskpd %xmm0, %ecx
+; SSE-NEXT: xorl %eax, %eax
+; SSE-NEXT: negl %ecx
; SSE-NEXT: sbbq %rax, %rax
; SSE-NEXT: retq
;
; AVX-LABEL: test_v2i64_sext:
; AVX: # %bb.0:
; AVX-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0
-; AVX-NEXT: vmovmskpd %xmm0, %eax
-; AVX-NEXT: negl %eax
+; AVX-NEXT: vmovmskpd %xmm0, %ecx
+; AVX-NEXT: xorl %eax, %eax
+; AVX-NEXT: negl %ecx
; AVX-NEXT: sbbq %rax, %rax
; AVX-NEXT: retq
;
; AVX512-LABEL: test_v2i64_sext:
; AVX512: # %bb.0:
; AVX512-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0
-; AVX512-NEXT: vmovmskpd %xmm0, %eax
-; AVX512-NEXT: negl %eax
+; AVX512-NEXT: vmovmskpd %xmm0, %ecx
+; AVX512-NEXT: xorl %eax, %eax
+; AVX512-NEXT: negl %ecx
; AVX512-NEXT: sbbq %rax, %rax
; AVX512-NEXT: retq
%c = icmp sgt <2 x i64> %a0, %a1
@@ -273,8 +294,9 @@ define i64 @test_v4i64_sext(<4 x i64> %a0, <4 x i64> %a1) {
; SSE-NEXT: pcmpgtq %xmm3, %xmm1
; SSE-NEXT: pcmpgtq %xmm2, %xmm0
; SSE-NEXT: por %xmm1, %xmm0
-; SSE-NEXT: movmskpd %xmm0, %eax
-; SSE-NEXT: negl %eax
+; SSE-NEXT: movmskpd %xmm0, %ecx
+; SSE-NEXT: xorl %eax, %eax
+; SSE-NEXT: negl %ecx
; SSE-NEXT: sbbq %rax, %rax
; SSE-NEXT: retq
;
@@ -285,8 +307,9 @@ define i64 @test_v4i64_sext(<4 x i64> %a0, <4 x i64> %a1) {
; AVX1-NEXT: vpcmpgtq %xmm2, %xmm3, %xmm2
; AVX1-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0
; AVX1-NEXT: vpor %xmm2, %xmm0, %xmm0
-; AVX1-NEXT: vmovmskpd %xmm0, %eax
-; AVX1-NEXT: negl %eax
+; AVX1-NEXT: vmovmskpd %xmm0, %ecx
+; AVX1-NEXT: xorl %eax, %eax
+; AVX1-NEXT: negl %ecx
; AVX1-NEXT: sbbq %rax, %rax
; AVX1-NEXT: vzeroupper
; AVX1-NEXT: retq
@@ -294,8 +317,9 @@ define i64 @test_v4i64_sext(<4 x i64> %a0, <4 x i64> %a1) {
; AVX2-LABEL: test_v4i64_sext:
; AVX2: # %bb.0:
; AVX2-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0
-; AVX2-NEXT: vmovmskpd %ymm0, %eax
-; AVX2-NEXT: negl %eax
+; AVX2-NEXT: vmovmskpd %ymm0, %ecx
+; AVX2-NEXT: xorl %eax, %eax
+; AVX2-NEXT: negl %ecx
; AVX2-NEXT: sbbq %rax, %rax
; AVX2-NEXT: vzeroupper
; AVX2-NEXT: retq
@@ -303,8 +327,9 @@ define i64 @test_v4i64_sext(<4 x i64> %a0, <4 x i64> %a1) {
; AVX512-LABEL: test_v4i64_sext:
; AVX512: # %bb.0:
; AVX512-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0
-; AVX512-NEXT: vmovmskpd %ymm0, %eax
-; AVX512-NEXT: negl %eax
+; AVX512-NEXT: vmovmskpd %ymm0, %ecx
+; AVX512-NEXT: xorl %eax, %eax
+; AVX512-NEXT: negl %ecx
; AVX512-NEXT: sbbq %rax, %rax
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
@@ -324,8 +349,9 @@ define i64 @test_v4i64_legal_sext(<4 x i64> %a0, <4 x i64> %a1) {
; SSE-NEXT: pcmpgtq %xmm3, %xmm1
; SSE-NEXT: pcmpgtq %xmm2, %xmm0
; SSE-NEXT: packssdw %xmm1, %xmm0
-; SSE-NEXT: movmskps %xmm0, %eax
-; SSE-NEXT: negl %eax
+; SSE-NEXT: movmskps %xmm0, %ecx
+; SSE-NEXT: xorl %eax, %eax
+; SSE-NEXT: negl %ecx
; SSE-NEXT: sbbq %rax, %rax
; SSE-NEXT: retq
;
@@ -336,8 +362,9 @@ define i64 @test_v4i64_legal_sext(<4 x i64> %a0, <4 x i64> %a1) {
; AVX1-NEXT: vpcmpgtq %xmm2, %xmm3, %xmm2
; AVX1-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0
; AVX1-NEXT: vpackssdw %xmm2, %xmm0, %xmm0
-; AVX1-NEXT: vmovmskps %xmm0, %eax
-; AVX1-NEXT: negl %eax
+; AVX1-NEXT: vmovmskps %xmm0, %ecx
+; AVX1-NEXT: xorl %eax, %eax
+; AVX1-NEXT: negl %ecx
; AVX1-NEXT: sbbq %rax, %rax
; AVX1-NEXT: vzeroupper
; AVX1-NEXT: retq
@@ -347,8 +374,9 @@ define i64 @test_v4i64_legal_sext(<4 x i64> %a0, <4 x i64> %a1) {
; AVX2-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0
; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
; AVX2-NEXT: vpackssdw %xmm1, %xmm0, %xmm0
-; AVX2-NEXT: vmovmskps %xmm0, %eax
-; AVX2-NEXT: negl %eax
+; AVX2-NEXT: vmovmskps %xmm0, %ecx
+; AVX2-NEXT: xorl %eax, %eax
+; AVX2-NEXT: negl %ecx
; AVX2-NEXT: sbbq %rax, %rax
; AVX2-NEXT: vzeroupper
; AVX2-NEXT: retq
@@ -358,8 +386,9 @@ define i64 @test_v4i64_legal_sext(<4 x i64> %a0, <4 x i64> %a1) {
; AVX512-NEXT: vpcmpgtq %ymm1, %ymm0, %k1
; AVX512-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
; AVX512-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z}
-; AVX512-NEXT: vmovmskps %xmm0, %eax
-; AVX512-NEXT: negl %eax
+; AVX512-NEXT: vmovmskps %xmm0, %ecx
+; AVX512-NEXT: xorl %eax, %eax
+; AVX512-NEXT: negl %ecx
; AVX512-NEXT: sbbq %rax, %rax
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
@@ -378,24 +407,27 @@ define i32 @test_v4i32_sext(<4 x i32> %a0, <4 x i32> %a1) {
; SSE-LABEL: test_v4i32_sext:
; SSE: # %bb.0:
; SSE-NEXT: pcmpgtd %xmm1, %xmm0
-; SSE-NEXT: movmskps %xmm0, %eax
-; SSE-NEXT: negl %eax
+; SSE-NEXT: movmskps %xmm0, %ecx
+; SSE-NEXT: xorl %eax, %eax
+; SSE-NEXT: negl %ecx
; SSE-NEXT: sbbl %eax, %eax
; SSE-NEXT: retq
;
; AVX-LABEL: test_v4i32_sext:
; AVX: # %bb.0:
; AVX-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0
-; AVX-NEXT: vmovmskps %xmm0, %eax
-; AVX-NEXT: negl %eax
+; AVX-NEXT: vmovmskps %xmm0, %ecx
+; AVX-NEXT: xorl %eax, %eax
+; AVX-NEXT: negl %ecx
; AVX-NEXT: sbbl %eax, %eax
; AVX-NEXT: retq
;
; AVX512-LABEL: test_v4i32_sext:
; AVX512: # %bb.0:
; AVX512-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0
-; AVX512-NEXT: vmovmskps %xmm0, %eax
-; AVX512-NEXT: negl %eax
+; AVX512-NEXT: vmovmskps %xmm0, %ecx
+; AVX512-NEXT: xorl %eax, %eax
+; AVX512-NEXT: negl %ecx
; AVX512-NEXT: sbbl %eax, %eax
; AVX512-NEXT: retq
%c = icmp sgt <4 x i32> %a0, %a1
@@ -414,8 +446,9 @@ define i32 @test_v8i32_sext(<8 x i32> %a0, <8 x i32> %a1) {
; SSE-NEXT: pcmpgtd %xmm3, %xmm1
; SSE-NEXT: pcmpgtd %xmm2, %xmm0
; SSE-NEXT: por %xmm1, %xmm0
-; SSE-NEXT: movmskps %xmm0, %eax
-; SSE-NEXT: negl %eax
+; SSE-NEXT: movmskps %xmm0, %ecx
+; SSE-NEXT: xorl %eax, %eax
+; SSE-NEXT: negl %ecx
; SSE-NEXT: sbbl %eax, %eax
; SSE-NEXT: retq
;
@@ -426,8 +459,9 @@ define i32 @test_v8i32_sext(<8 x i32> %a0, <8 x i32> %a1) {
; AVX1-NEXT: vpcmpgtd %xmm2, %xmm3, %xmm2
; AVX1-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0
; AVX1-NEXT: vpor %xmm2, %xmm0, %xmm0
-; AVX1-NEXT: vmovmskps %xmm0, %eax
-; AVX1-NEXT: negl %eax
+; AVX1-NEXT: vmovmskps %xmm0, %ecx
+; AVX1-NEXT: xorl %eax, %eax
+; AVX1-NEXT: negl %ecx
; AVX1-NEXT: sbbl %eax, %eax
; AVX1-NEXT: vzeroupper
; AVX1-NEXT: retq
@@ -435,8 +469,9 @@ define i32 @test_v8i32_sext(<8 x i32> %a0, <8 x i32> %a1) {
; AVX2-LABEL: test_v8i32_sext:
; AVX2: # %bb.0:
; AVX2-NEXT: vpcmpgtd %ymm1, %ymm0, %ymm0
-; AVX2-NEXT: vmovmskps %ymm0, %eax
-; AVX2-NEXT: negl %eax
+; AVX2-NEXT: vmovmskps %ymm0, %ecx
+; AVX2-NEXT: xorl %eax, %eax
+; AVX2-NEXT: negl %ecx
; AVX2-NEXT: sbbl %eax, %eax
; AVX2-NEXT: vzeroupper
; AVX2-NEXT: retq
@@ -444,8 +479,9 @@ define i32 @test_v8i32_sext(<8 x i32> %a0, <8 x i32> %a1) {
; AVX512-LABEL: test_v8i32_sext:
; AVX512: # %bb.0:
; AVX512-NEXT: vpcmpgtd %ymm1, %ymm0, %ymm0
-; AVX512-NEXT: vmovmskps %ymm0, %eax
-; AVX512-NEXT: negl %eax
+; AVX512-NEXT: vmovmskps %ymm0, %ecx
+; AVX512-NEXT: xorl %eax, %eax
+; AVX512-NEXT: negl %ecx
; AVX512-NEXT: sbbl %eax, %eax
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
@@ -467,8 +503,9 @@ define i32 @test_v8i32_legal_sext(<8 x i32> %a0, <8 x i32> %a1) {
; SSE-NEXT: pcmpgtd %xmm3, %xmm1
; SSE-NEXT: pcmpgtd %xmm2, %xmm0
; SSE-NEXT: packssdw %xmm1, %xmm0
-; SSE-NEXT: pmovmskb %xmm0, %eax
-; SSE-NEXT: negl %eax
+; SSE-NEXT: pmovmskb %xmm0, %ecx
+; SSE-NEXT: xorl %eax, %eax
+; SSE-NEXT: negl %ecx
; SSE-NEXT: sbbl %eax, %eax
; SSE-NEXT: retq
;
@@ -479,8 +516,9 @@ define i32 @test_v8i32_legal_sext(<8 x i32> %a0, <8 x i32> %a1) {
; AVX1-NEXT: vpcmpgtd %xmm2, %xmm3, %xmm2
; AVX1-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0
; AVX1-NEXT: vpackssdw %xmm2, %xmm0, %xmm0
-; AVX1-NEXT: vpmovmskb %xmm0, %eax
-; AVX1-NEXT: negl %eax
+; AVX1-NEXT: vpmovmskb %xmm0, %ecx
+; AVX1-NEXT: xorl %eax, %eax
+; AVX1-NEXT: negl %ecx
; AVX1-NEXT: sbbl %eax, %eax
; AVX1-NEXT: vzeroupper
; AVX1-NEXT: retq
@@ -490,8 +528,9 @@ define i32 @test_v8i32_legal_sext(<8 x i32> %a0, <8 x i32> %a1) {
; AVX2-NEXT: vpcmpgtd %ymm1, %ymm0, %ymm0
; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
; AVX2-NEXT: vpackssdw %xmm1, %xmm0, %xmm0
-; AVX2-NEXT: vpmovmskb %xmm0, %eax
-; AVX2-NEXT: negl %eax
+; AVX2-NEXT: vpmovmskb %xmm0, %ecx
+; AVX2-NEXT: xorl %eax, %eax
+; AVX2-NEXT: negl %ecx
; AVX2-NEXT: sbbl %eax, %eax
; AVX2-NEXT: vzeroupper
; AVX2-NEXT: retq
@@ -500,8 +539,9 @@ define i32 @test_v8i32_legal_sext(<8 x i32> %a0, <8 x i32> %a1) {
; AVX512: # %bb.0:
; AVX512-NEXT: vpcmpgtd %ymm1, %ymm0, %k0
; AVX512-NEXT: vpmovm2w %k0, %xmm0
-; AVX512-NEXT: vpmovmskb %xmm0, %eax
-; AVX512-NEXT: negl %eax
+; AVX512-NEXT: vpmovmskb %xmm0, %ecx
+; AVX512-NEXT: xorl %eax, %eax
+; AVX512-NEXT: negl %ecx
; AVX512-NEXT: sbbl %eax, %eax
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
@@ -522,8 +562,9 @@ define i16 @test_v8i16_sext(<8 x i16> %a0, <8 x i16> %a1) {
; SSE-LABEL: test_v8i16_sext:
; SSE: # %bb.0:
; SSE-NEXT: pcmpgtw %xmm1, %xmm0
-; SSE-NEXT: pmovmskb %xmm0, %eax
-; SSE-NEXT: negl %eax
+; SSE-NEXT: pmovmskb %xmm0, %ecx
+; SSE-NEXT: xorl %eax, %eax
+; SSE-NEXT: negl %ecx
; SSE-NEXT: sbbl %eax, %eax
; SSE-NEXT: # kill: def $ax killed $ax killed $eax
; SSE-NEXT: retq
@@ -531,8 +572,9 @@ define i16 @test_v8i16_sext(<8 x i16> %a0, <8 x i16> %a1) {
; AVX-LABEL: test_v8i16_sext:
; AVX: # %bb.0:
; AVX-NEXT: vpcmpgtw %xmm1, %xmm0, %xmm0
-; AVX-NEXT: vpmovmskb %xmm0, %eax
-; AVX-NEXT: negl %eax
+; AVX-NEXT: vpmovmskb %xmm0, %ecx
+; AVX-NEXT: xorl %eax, %eax
+; AVX-NEXT: negl %ecx
; AVX-NEXT: sbbl %eax, %eax
; AVX-NEXT: # kill: def $ax killed $ax killed $eax
; AVX-NEXT: retq
@@ -540,8 +582,9 @@ define i16 @test_v8i16_sext(<8 x i16> %a0, <8 x i16> %a1) {
; AVX512-LABEL: test_v8i16_sext:
; AVX512: # %bb.0:
; AVX512-NEXT: vpcmpgtw %xmm1, %xmm0, %xmm0
-; AVX512-NEXT: vpmovmskb %xmm0, %eax
-; AVX512-NEXT: negl %eax
+; AVX512-NEXT: vpmovmskb %xmm0, %ecx
+; AVX512-NEXT: xorl %eax, %eax
+; AVX512-NEXT: negl %ecx
; AVX512-NEXT: sbbl %eax, %eax
; AVX512-NEXT: # kill: def $ax killed $ax killed $eax
; AVX512-NEXT: retq
@@ -563,8 +606,9 @@ define i16 @test_v16i16_sext(<16 x i16> %a0, <16 x i16> %a1) {
; SSE-NEXT: pcmpgtw %xmm3, %xmm1
; SSE-NEXT: pcmpgtw %xmm2, %xmm0
; SSE-NEXT: por %xmm1, %xmm0
-; SSE-NEXT: pmovmskb %xmm0, %eax
-; SSE-NEXT: negl %eax
+; SSE-NEXT: pmovmskb %xmm0, %ecx
+; SSE-NEXT: xorl %eax, %eax
+; SSE-NEXT: negl %ecx
; SSE-NEXT: sbbl %eax, %eax
; SSE-NEXT: # kill: def $ax killed $ax killed $eax
; SSE-NEXT: retq
@@ -576,8 +620,9 @@ define i16 @test_v16i16_sext(<16 x i16> %a0, <16 x i16> %a1) {
; AVX1-NEXT: vpcmpgtw %xmm2, %xmm3, %xmm2
; AVX1-NEXT: vpcmpgtw %xmm1, %xmm0, %xmm0
; AVX1-NEXT: vpor %xmm2, %xmm0, %xmm0
-; AVX1-NEXT: vpmovmskb %xmm0, %eax
-; AVX1-NEXT: negl %eax
+; AVX1-NEXT: vpmovmskb %xmm0, %ecx
+; AVX1-NEXT: xorl %eax, %eax
+; AVX1-NEXT: negl %ecx
; AVX1-NEXT: sbbl %eax, %eax
; AVX1-NEXT: # kill: def $ax killed $ax killed $eax
; AVX1-NEXT: vzeroupper
@@ -586,8 +631,9 @@ define i16 @test_v16i16_sext(<16 x i16> %a0, <16 x i16> %a1) {
; AVX2-LABEL: test_v16i16_sext:
; AVX2: # %bb.0:
; AVX2-NEXT: vpcmpgtw %ymm1, %ymm0, %ymm0
-; AVX2-NEXT: vpmovmskb %ymm0, %eax
-; AVX2-NEXT: negl %eax
+; AVX2-NEXT: vpmovmskb %ymm0, %ecx
+; AVX2-NEXT: xorl %eax, %eax
+; AVX2-NEXT: negl %ecx
; AVX2-NEXT: sbbl %eax, %eax
; AVX2-NEXT: # kill: def $ax killed $ax killed $eax
; AVX2-NEXT: vzeroupper
@@ -596,8 +642,9 @@ define i16 @test_v16i16_sext(<16 x i16> %a0, <16 x i16> %a1) {
; AVX512-LABEL: test_v16i16_sext:
; AVX512: # %bb.0:
; AVX512-NEXT: vpcmpgtw %ymm1, %ymm0, %ymm0
-; AVX512-NEXT: vpmovmskb %ymm0, %eax
-; AVX512-NEXT: negl %eax
+; AVX512-NEXT: vpmovmskb %ymm0, %ecx
+; AVX512-NEXT: xorl %eax, %eax
+; AVX512-NEXT: negl %ecx
; AVX512-NEXT: sbbl %eax, %eax
; AVX512-NEXT: # kill: def $ax killed $ax killed $eax
; AVX512-NEXT: vzeroupper
@@ -622,8 +669,9 @@ define i16 @test_v16i16_legal_sext(<16 x i16> %a0, <16 x i16> %a1) {
; SSE-NEXT: pcmpgtw %xmm3, %xmm1
; SSE-NEXT: pcmpgtw %xmm2, %xmm0
; SSE-NEXT: packsswb %xmm1, %xmm0
-; SSE-NEXT: pmovmskb %xmm0, %eax
-; SSE-NEXT: negl %eax
+; SSE-NEXT: pmovmskb %xmm0, %ecx
+; SSE-NEXT: xorl %eax, %eax
+; SSE-NEXT: negl %ecx
; SSE-NEXT: sbbl %eax, %eax
; SSE-NEXT: # kill: def $ax killed $ax killed $eax
; SSE-NEXT: retq
@@ -635,8 +683,9 @@ define i16 @test_v16i16_legal_sext(<16 x i16> %a0, <16 x i16> %a1) {
; AVX1-NEXT: vpcmpgtw %xmm2, %xmm3, %xmm2
; AVX1-NEXT: vpcmpgtw %xmm1, %xmm0, %xmm0
; AVX1-NEXT: vpacksswb %xmm2, %xmm0, %xmm0
-; AVX1-NEXT: vpmovmskb %xmm0, %eax
-; AVX1-NEXT: negl %eax
+; AVX1-NEXT: vpmovmskb %xmm0, %ecx
+; AVX1-NEXT: xorl %eax, %eax
+; AVX1-NEXT: negl %ecx
; AVX1-NEXT: sbbl %eax, %eax
; AVX1-NEXT: # kill: def $ax killed $ax killed $eax
; AVX1-NEXT: vzeroupper
@@ -645,8 +694,9 @@ define i16 @test_v16i16_legal_sext(<16 x i16> %a0, <16 x i16> %a1) {
; AVX2-LABEL: test_v16i16_legal_sext:
; AVX2: # %bb.0:
; AVX2-NEXT: vpcmpgtw %ymm1, %ymm0, %ymm0
-; AVX2-NEXT: vpmovmskb %ymm0, %eax
-; AVX2-NEXT: negl %eax
+; AVX2-NEXT: vpmovmskb %ymm0, %ecx
+; AVX2-NEXT: xorl %eax, %eax
+; AVX2-NEXT: negl %ecx
; AVX2-NEXT: sbbl %eax, %eax
; AVX2-NEXT: # kill: def $ax killed $ax killed $eax
; AVX2-NEXT: vzeroupper
@@ -656,8 +706,9 @@ define i16 @test_v16i16_legal_sext(<16 x i16> %a0, <16 x i16> %a1) {
; AVX512: # %bb.0:
; AVX512-NEXT: vpcmpgtw %ymm1, %ymm0, %k0
; AVX512-NEXT: vpmovm2b %k0, %xmm0
-; AVX512-NEXT: vpmovmskb %xmm0, %eax
-; AVX512-NEXT: negl %eax
+; AVX512-NEXT: vpmovmskb %xmm0, %ecx
+; AVX512-NEXT: xorl %eax, %eax
+; AVX512-NEXT: negl %ecx
; AVX512-NEXT: sbbl %eax, %eax
; AVX512-NEXT: # kill: def $ax killed $ax killed $eax
; AVX512-NEXT: vzeroupper
@@ -681,8 +732,9 @@ define i8 @test_v16i8_sext(<16 x i8> %a0, <16 x i8> %a1) {
; SSE-LABEL: test_v16i8_sext:
; SSE: # %bb.0:
; SSE-NEXT: pcmpgtb %xmm1, %xmm0
-; SSE-NEXT: pmovmskb %xmm0, %eax
-; SSE-NEXT: negl %eax
+; SSE-NEXT: pmovmskb %xmm0, %ecx
+; SSE-NEXT: xorl %eax, %eax
+; SSE-NEXT: negl %ecx
; SSE-NEXT: sbbl %eax, %eax
; SSE-NEXT: # kill: def $al killed $al killed $eax
; SSE-NEXT: retq
@@ -690,8 +742,9 @@ define i8 @test_v16i8_sext(<16 x i8> %a0, <16 x i8> %a1) {
; AVX-LABEL: test_v16i8_sext:
; AVX: # %bb.0:
; AVX-NEXT: vpcmpgtb %xmm1, %xmm0, %xmm0
-; AVX-NEXT: vpmovmskb %xmm0, %eax
-; AVX-NEXT: negl %eax
+; AVX-NEXT: vpmovmskb %xmm0, %ecx
+; AVX-NEXT: xorl %eax, %eax
+; AVX-NEXT: negl %ecx
; AVX-NEXT: sbbl %eax, %eax
; AVX-NEXT: # kill: def $al killed $al killed $eax
; AVX-NEXT: retq
@@ -699,8 +752,9 @@ define i8 @test_v16i8_sext(<16 x i8> %a0, <16 x i8> %a1) {
; AVX512-LABEL: test_v16i8_sext:
; AVX512: # %bb.0:
; AVX512-NEXT: vpcmpgtb %xmm1, %xmm0, %xmm0
-; AVX512-NEXT: vpmovmskb %xmm0, %eax
-; AVX512-NEXT: negl %eax
+; AVX512-NEXT: vpmovmskb %xmm0, %ecx
+; AVX512-NEXT: xorl %eax, %eax
+; AVX512-NEXT: negl %ecx
; AVX512-NEXT: sbbl %eax, %eax
; AVX512-NEXT: # kill: def $al killed $al killed $eax
; AVX512-NEXT: retq
@@ -724,8 +778,9 @@ define i8 @test_v32i8_sext(<32 x i8> %a0, <32 x i8> %a1) {
; SSE-NEXT: pcmpgtb %xmm3, %xmm1
; SSE-NEXT: pcmpgtb %xmm2, %xmm0
; SSE-NEXT: por %xmm1, %xmm0
-; SSE-NEXT: pmovmskb %xmm0, %eax
-; SSE-NEXT: negl %eax
+; SSE-NEXT: pmovmskb %xmm0, %ecx
+; SSE-NEXT: xorl %eax, %eax
+; SSE-NEXT: negl %ecx
; SSE-NEXT: sbbl %eax, %eax
; SSE-NEXT: # kill: def $al killed $al killed $eax
; SSE-NEXT: retq
@@ -737,8 +792,9 @@ define i8 @test_v32i8_sext(<32 x i8> %a0, <32 x i8> %a1) {
; AVX1-NEXT: vpcmpgtb %xmm2, %xmm3, %xmm2
; AVX1-NEXT: vpcmpgtb %xmm1, %xmm0, %xmm0
; AVX1-NEXT: vpor %xmm2, %xmm0, %xmm0
-; AVX1-NEXT: vpmovmskb %xmm0, %eax
-; AVX1-NEXT: negl %eax
+; AVX1-NEXT: vpmovmskb %xmm0, %ecx
+; AVX1-NEXT: xorl %eax, %eax
+; AVX1-NEXT: negl %ecx
; AVX1-NEXT: sbbl %eax, %eax
; AVX1-NEXT: # kill: def $al killed $al killed $eax
; AVX1-NEXT: vzeroupper
@@ -747,8 +803,9 @@ define i8 @test_v32i8_sext(<32 x i8> %a0, <32 x i8> %a1) {
; AVX2-LABEL: test_v32i8_sext:
; AVX2: # %bb.0:
; AVX2-NEXT: vpcmpgtb %ymm1, %ymm0, %ymm0
-; AVX2-NEXT: vpmovmskb %ymm0, %eax
-; AVX2-NEXT: negl %eax
+; AVX2-NEXT: vpmovmskb %ymm0, %ecx
+; AVX2-NEXT: xorl %eax, %eax
+; AVX2-NEXT: negl %ecx
; AVX2-NEXT: sbbl %eax, %eax
; AVX2-NEXT: # kill: def $al killed $al killed $eax
; AVX2-NEXT: vzeroupper
@@ -757,8 +814,9 @@ define i8 @test_v32i8_sext(<32 x i8> %a0, <32 x i8> %a1) {
; AVX512-LABEL: test_v32i8_sext:
; AVX512: # %bb.0:
; AVX512-NEXT: vpcmpgtb %ymm1, %ymm0, %ymm0
-; AVX512-NEXT: vpmovmskb %ymm0, %eax
-; AVX512-NEXT: negl %eax
+; AVX512-NEXT: vpmovmskb %ymm0, %ecx
+; AVX512-NEXT: xorl %eax, %eax
+; AVX512-NEXT: negl %ecx
; AVX512-NEXT: sbbl %eax, %eax
; AVX512-NEXT: # kill: def $al killed $al killed $eax
; AVX512-NEXT: vzeroupper
More information about the llvm-commits
mailing list