[llvm] e094abd - [SelectionDAG] Expand [US]CMP using arithmetic on boolean values instead of selects (#98774)
via llvm-commits
llvm-commits at lists.llvm.org
Tue Jul 16 12:56:23 PDT 2024
Author: Volodymyr Vasylkun
Date: 2024-07-16T20:56:18+01:00
New Revision: e094abde42634e38cda85a6024792f681fc58f32
URL: https://github.com/llvm/llvm-project/commit/e094abde42634e38cda85a6024792f681fc58f32
DIFF: https://github.com/llvm/llvm-project/commit/e094abde42634e38cda85a6024792f681fc58f32.diff
LOG: [SelectionDAG] Expand [US]CMP using arithmetic on boolean values instead of selects (#98774)
The previous expansion of [US]CMP was done using two selects and two
compares. It produced decent code, but on many platforms it is better to
implement [US]CMP nodes by performing the following operation:
```
[us]cmp(x, y) = (x [us]> y) - (x [us]< y)
```
This patch adds this new expansion, as well as a hook in TargetLowering to allow some targets to still use the select-based approach. AArch64 and SystemZ are currently the only targets to prefer the former approach, but other targets may also start to use it if it provides for better codegen.
Added:
llvm/test/CodeGen/ARM/scmp.ll
llvm/test/CodeGen/ARM/ucmp.ll
llvm/test/CodeGen/LoongArch/scmp.ll
llvm/test/CodeGen/LoongArch/ucmp.ll
llvm/test/CodeGen/PowerPC/scmp.ll
llvm/test/CodeGen/PowerPC/ucmp.ll
llvm/test/CodeGen/RISCV/scmp.ll
llvm/test/CodeGen/RISCV/ucmp.ll
llvm/test/CodeGen/SystemZ/scmp.ll
llvm/test/CodeGen/SystemZ/ucmp.ll
llvm/test/CodeGen/Thumb/scmp.ll
llvm/test/CodeGen/Thumb/ucmp.ll
llvm/test/CodeGen/WebAssembly/scmp.ll
llvm/test/CodeGen/WebAssembly/ucmp.ll
Modified:
llvm/include/llvm/CodeGen/TargetLowering.h
llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
llvm/lib/Target/AArch64/AArch64ISelLowering.h
llvm/lib/Target/SystemZ/SystemZISelLowering.h
llvm/test/CodeGen/X86/scmp.ll
llvm/test/CodeGen/X86/ucmp.ll
Removed:
################################################################################
diff --git a/llvm/include/llvm/CodeGen/TargetLowering.h b/llvm/include/llvm/CodeGen/TargetLowering.h
index 06e802314d97c..ef66b82d6f414 100644
--- a/llvm/include/llvm/CodeGen/TargetLowering.h
+++ b/llvm/include/llvm/CodeGen/TargetLowering.h
@@ -3391,6 +3391,10 @@ class TargetLoweringBase {
return isOperationLegalOrCustom(Op, VT);
}
+ /// Should we expand [US]CMP nodes using two selects and two compares, or by
+ /// doing arithmetic on boolean types
+ virtual bool shouldExpandCmpUsingSelects() const { return false; }
+
/// Does this target support complex deinterleaving
virtual bool isComplexDeinterleavingSupported() const { return false; }
diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
index 92e18a4b630e9..1433c8821248d 100644
--- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -10391,14 +10391,28 @@ SDValue TargetLowering::expandCMP(SDNode *Node, SelectionDAG &DAG) const {
auto LTPredicate = (Opcode == ISD::UCMP ? ISD::SETULT : ISD::SETLT);
auto GTPredicate = (Opcode == ISD::UCMP ? ISD::SETUGT : ISD::SETGT);
-
SDValue IsLT = DAG.getSetCC(dl, BoolVT, LHS, RHS, LTPredicate);
SDValue IsGT = DAG.getSetCC(dl, BoolVT, LHS, RHS, GTPredicate);
- SDValue SelectZeroOrOne =
- DAG.getSelect(dl, ResVT, IsGT, DAG.getConstant(1, dl, ResVT),
- DAG.getConstant(0, dl, ResVT));
- return DAG.getSelect(dl, ResVT, IsLT, DAG.getConstant(-1, dl, ResVT),
- SelectZeroOrOne);
+
+ // We can't perform arithmetic on i1 values. Extending them would
+ // probably result in worse codegen, so let's just use two selects instead.
+ // Some targets are also just better off using selects rather than subtraction
+ // because one of the conditions can be merged with one of the selects.
+ // And finally, if we don't know the contents of high bits of a boolean value
+ // we can't perform any arithmetic either.
+ if (shouldExpandCmpUsingSelects() || BoolVT.getScalarSizeInBits() == 1 ||
+ getBooleanContents(BoolVT) == UndefinedBooleanContent) {
+ SDValue SelectZeroOrOne =
+ DAG.getSelect(dl, ResVT, IsGT, DAG.getConstant(1, dl, ResVT),
+ DAG.getConstant(0, dl, ResVT));
+ return DAG.getSelect(dl, ResVT, IsLT, DAG.getConstant(-1, dl, ResVT),
+ SelectZeroOrOne);
+ }
+
+ if (getBooleanContents(BoolVT) == ZeroOrNegativeOneBooleanContent)
+ std::swap(IsGT, IsLT);
+ return DAG.getSExtOrTrunc(DAG.getNode(ISD::SUB, dl, BoolVT, IsGT, IsLT), dl,
+ ResVT);
}
SDValue TargetLowering::expandShlSat(SDNode *Node, SelectionDAG &DAG) const {
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.h b/llvm/lib/Target/AArch64/AArch64ISelLowering.h
index 047c852bb01d2..fcdd47541be82 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.h
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.h
@@ -907,6 +907,8 @@ class AArch64TargetLowering : public TargetLowering {
bool shouldConvertFpToSat(unsigned Op, EVT FPVT, EVT VT) const override;
+ bool shouldExpandCmpUsingSelects() const override { return true; }
+
bool isComplexDeinterleavingSupported() const override;
bool isComplexDeinterleavingOperationSupported(
ComplexDeinterleavingOperation Operation, Type *Ty) const override;
diff --git a/llvm/lib/Target/SystemZ/SystemZISelLowering.h b/llvm/lib/Target/SystemZ/SystemZISelLowering.h
index 2290a7d62e89f..1e7285e3e0fc5 100644
--- a/llvm/lib/Target/SystemZ/SystemZISelLowering.h
+++ b/llvm/lib/Target/SystemZ/SystemZISelLowering.h
@@ -507,6 +507,8 @@ class SystemZTargetLowering : public TargetLowering {
bool shouldConsiderGEPOffsetSplit() const override { return true; }
+ bool shouldExpandCmpUsingSelects() const override { return true; }
+
const char *getTargetNodeName(unsigned Opcode) const override;
std::pair<unsigned, const TargetRegisterClass *>
getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
diff --git a/llvm/test/CodeGen/ARM/scmp.ll b/llvm/test/CodeGen/ARM/scmp.ll
new file mode 100644
index 0000000000000..6e493c993751c
--- /dev/null
+++ b/llvm/test/CodeGen/ARM/scmp.ll
@@ -0,0 +1,143 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc -mtriple=armv7-unknown-eabi %s -o - | FileCheck %s
+
+define i8 @scmp_8_8(i8 signext %x, i8 signext %y) nounwind {
+; CHECK-LABEL: scmp_8_8:
+; CHECK: @ %bb.0:
+; CHECK-NEXT: cmp r0, r1
+; CHECK-NEXT: mov r0, #0
+; CHECK-NEXT: mov r2, #0
+; CHECK-NEXT: movwlt r0, #1
+; CHECK-NEXT: movwgt r2, #1
+; CHECK-NEXT: sub r0, r2, r0
+; CHECK-NEXT: bx lr
+ %1 = call i8 @llvm.scmp(i8 %x, i8 %y)
+ ret i8 %1
+}
+
+define i8 @scmp_8_16(i16 signext %x, i16 signext %y) nounwind {
+; CHECK-LABEL: scmp_8_16:
+; CHECK: @ %bb.0:
+; CHECK-NEXT: cmp r0, r1
+; CHECK-NEXT: mov r0, #0
+; CHECK-NEXT: mov r2, #0
+; CHECK-NEXT: movwlt r0, #1
+; CHECK-NEXT: movwgt r2, #1
+; CHECK-NEXT: sub r0, r2, r0
+; CHECK-NEXT: bx lr
+ %1 = call i8 @llvm.scmp(i16 %x, i16 %y)
+ ret i8 %1
+}
+
+define i8 @scmp_8_32(i32 %x, i32 %y) nounwind {
+; CHECK-LABEL: scmp_8_32:
+; CHECK: @ %bb.0:
+; CHECK-NEXT: cmp r0, r1
+; CHECK-NEXT: mov r0, #0
+; CHECK-NEXT: mov r2, #0
+; CHECK-NEXT: movwlt r0, #1
+; CHECK-NEXT: movwgt r2, #1
+; CHECK-NEXT: sub r0, r2, r0
+; CHECK-NEXT: bx lr
+ %1 = call i8 @llvm.scmp(i32 %x, i32 %y)
+ ret i8 %1
+}
+
+define i8 @scmp_8_64(i64 %x, i64 %y) nounwind {
+; CHECK-LABEL: scmp_8_64:
+; CHECK: @ %bb.0:
+; CHECK-NEXT: .save {r11, lr}
+; CHECK-NEXT: push {r11, lr}
+; CHECK-NEXT: subs lr, r0, r2
+; CHECK-NEXT: mov r12, #0
+; CHECK-NEXT: sbcs lr, r1, r3
+; CHECK-NEXT: mov lr, #0
+; CHECK-NEXT: movwlt lr, #1
+; CHECK-NEXT: subs r0, r2, r0
+; CHECK-NEXT: sbcs r0, r3, r1
+; CHECK-NEXT: movwlt r12, #1
+; CHECK-NEXT: sub r0, r12, lr
+; CHECK-NEXT: pop {r11, pc}
+ %1 = call i8 @llvm.scmp(i64 %x, i64 %y)
+ ret i8 %1
+}
+
+define i8 @scmp_8_128(i128 %x, i128 %y) nounwind {
+; CHECK-LABEL: scmp_8_128:
+; CHECK: @ %bb.0:
+; CHECK-NEXT: .save {r4, r5, r6, r7, r11, lr}
+; CHECK-NEXT: push {r4, r5, r6, r7, r11, lr}
+; CHECK-NEXT: ldr r4, [sp, #24]
+; CHECK-NEXT: mov r5, #0
+; CHECK-NEXT: ldr r6, [sp, #28]
+; CHECK-NEXT: subs r7, r0, r4
+; CHECK-NEXT: ldr r12, [sp, #32]
+; CHECK-NEXT: sbcs r7, r1, r6
+; CHECK-NEXT: ldr lr, [sp, #36]
+; CHECK-NEXT: sbcs r7, r2, r12
+; CHECK-NEXT: sbcs r7, r3, lr
+; CHECK-NEXT: mov r7, #0
+; CHECK-NEXT: movwlt r7, #1
+; CHECK-NEXT: subs r0, r4, r0
+; CHECK-NEXT: sbcs r0, r6, r1
+; CHECK-NEXT: sbcs r0, r12, r2
+; CHECK-NEXT: sbcs r0, lr, r3
+; CHECK-NEXT: movwlt r5, #1
+; CHECK-NEXT: sub r0, r5, r7
+; CHECK-NEXT: pop {r4, r5, r6, r7, r11, pc}
+ %1 = call i8 @llvm.scmp(i128 %x, i128 %y)
+ ret i8 %1
+}
+
+define i32 @scmp_32_32(i32 %x, i32 %y) nounwind {
+; CHECK-LABEL: scmp_32_32:
+; CHECK: @ %bb.0:
+; CHECK-NEXT: cmp r0, r1
+; CHECK-NEXT: mov r0, #0
+; CHECK-NEXT: mov r2, #0
+; CHECK-NEXT: movwlt r0, #1
+; CHECK-NEXT: movwgt r2, #1
+; CHECK-NEXT: sub r0, r2, r0
+; CHECK-NEXT: bx lr
+ %1 = call i32 @llvm.scmp(i32 %x, i32 %y)
+ ret i32 %1
+}
+
+define i32 @scmp_32_64(i64 %x, i64 %y) nounwind {
+; CHECK-LABEL: scmp_32_64:
+; CHECK: @ %bb.0:
+; CHECK-NEXT: .save {r11, lr}
+; CHECK-NEXT: push {r11, lr}
+; CHECK-NEXT: subs lr, r0, r2
+; CHECK-NEXT: mov r12, #0
+; CHECK-NEXT: sbcs lr, r1, r3
+; CHECK-NEXT: mov lr, #0
+; CHECK-NEXT: movwlt lr, #1
+; CHECK-NEXT: subs r0, r2, r0
+; CHECK-NEXT: sbcs r0, r3, r1
+; CHECK-NEXT: movwlt r12, #1
+; CHECK-NEXT: sub r0, r12, lr
+; CHECK-NEXT: pop {r11, pc}
+ %1 = call i32 @llvm.scmp(i64 %x, i64 %y)
+ ret i32 %1
+}
+
+define i64 @scmp_64_64(i64 %x, i64 %y) nounwind {
+; CHECK-LABEL: scmp_64_64:
+; CHECK: @ %bb.0:
+; CHECK-NEXT: .save {r11, lr}
+; CHECK-NEXT: push {r11, lr}
+; CHECK-NEXT: subs lr, r0, r2
+; CHECK-NEXT: mov r12, #0
+; CHECK-NEXT: sbcs lr, r1, r3
+; CHECK-NEXT: mov lr, #0
+; CHECK-NEXT: movwlt lr, #1
+; CHECK-NEXT: subs r0, r2, r0
+; CHECK-NEXT: sbcs r0, r3, r1
+; CHECK-NEXT: movwlt r12, #1
+; CHECK-NEXT: sub r0, r12, lr
+; CHECK-NEXT: asr r1, r0, #31
+; CHECK-NEXT: pop {r11, pc}
+ %1 = call i64 @llvm.scmp(i64 %x, i64 %y)
+ ret i64 %1
+}
diff --git a/llvm/test/CodeGen/ARM/ucmp.ll b/llvm/test/CodeGen/ARM/ucmp.ll
new file mode 100644
index 0000000000000..ad4af534ee8fe
--- /dev/null
+++ b/llvm/test/CodeGen/ARM/ucmp.ll
@@ -0,0 +1,143 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc -mtriple=armv7-unknown-eabi %s -o - | FileCheck %s
+
+define i8 @ucmp_8_8(i8 zeroext %x, i8 zeroext %y) nounwind {
+; CHECK-LABEL: ucmp_8_8:
+; CHECK: @ %bb.0:
+; CHECK-NEXT: cmp r0, r1
+; CHECK-NEXT: mov r0, #0
+; CHECK-NEXT: mov r2, #0
+; CHECK-NEXT: movwlo r0, #1
+; CHECK-NEXT: movwhi r2, #1
+; CHECK-NEXT: sub r0, r2, r0
+; CHECK-NEXT: bx lr
+ %1 = call i8 @llvm.ucmp(i8 %x, i8 %y)
+ ret i8 %1
+}
+
+define i8 @ucmp_8_16(i16 zeroext %x, i16 zeroext %y) nounwind {
+; CHECK-LABEL: ucmp_8_16:
+; CHECK: @ %bb.0:
+; CHECK-NEXT: cmp r0, r1
+; CHECK-NEXT: mov r0, #0
+; CHECK-NEXT: mov r2, #0
+; CHECK-NEXT: movwlo r0, #1
+; CHECK-NEXT: movwhi r2, #1
+; CHECK-NEXT: sub r0, r2, r0
+; CHECK-NEXT: bx lr
+ %1 = call i8 @llvm.ucmp(i16 %x, i16 %y)
+ ret i8 %1
+}
+
+define i8 @ucmp_8_32(i32 %x, i32 %y) nounwind {
+; CHECK-LABEL: ucmp_8_32:
+; CHECK: @ %bb.0:
+; CHECK-NEXT: cmp r0, r1
+; CHECK-NEXT: mov r0, #0
+; CHECK-NEXT: mov r2, #0
+; CHECK-NEXT: movwlo r0, #1
+; CHECK-NEXT: movwhi r2, #1
+; CHECK-NEXT: sub r0, r2, r0
+; CHECK-NEXT: bx lr
+ %1 = call i8 @llvm.ucmp(i32 %x, i32 %y)
+ ret i8 %1
+}
+
+define i8 @ucmp_8_64(i64 %x, i64 %y) nounwind {
+; CHECK-LABEL: ucmp_8_64:
+; CHECK: @ %bb.0:
+; CHECK-NEXT: .save {r11, lr}
+; CHECK-NEXT: push {r11, lr}
+; CHECK-NEXT: subs lr, r0, r2
+; CHECK-NEXT: mov r12, #0
+; CHECK-NEXT: sbcs lr, r1, r3
+; CHECK-NEXT: mov lr, #0
+; CHECK-NEXT: movwlo lr, #1
+; CHECK-NEXT: subs r0, r2, r0
+; CHECK-NEXT: sbcs r0, r3, r1
+; CHECK-NEXT: movwlo r12, #1
+; CHECK-NEXT: sub r0, r12, lr
+; CHECK-NEXT: pop {r11, pc}
+ %1 = call i8 @llvm.ucmp(i64 %x, i64 %y)
+ ret i8 %1
+}
+
+define i8 @ucmp_8_128(i128 %x, i128 %y) nounwind {
+; CHECK-LABEL: ucmp_8_128:
+; CHECK: @ %bb.0:
+; CHECK-NEXT: .save {r4, r5, r6, r7, r11, lr}
+; CHECK-NEXT: push {r4, r5, r6, r7, r11, lr}
+; CHECK-NEXT: ldr r4, [sp, #24]
+; CHECK-NEXT: mov r5, #0
+; CHECK-NEXT: ldr r6, [sp, #28]
+; CHECK-NEXT: subs r7, r0, r4
+; CHECK-NEXT: ldr r12, [sp, #32]
+; CHECK-NEXT: sbcs r7, r1, r6
+; CHECK-NEXT: ldr lr, [sp, #36]
+; CHECK-NEXT: sbcs r7, r2, r12
+; CHECK-NEXT: sbcs r7, r3, lr
+; CHECK-NEXT: mov r7, #0
+; CHECK-NEXT: movwlo r7, #1
+; CHECK-NEXT: subs r0, r4, r0
+; CHECK-NEXT: sbcs r0, r6, r1
+; CHECK-NEXT: sbcs r0, r12, r2
+; CHECK-NEXT: sbcs r0, lr, r3
+; CHECK-NEXT: movwlo r5, #1
+; CHECK-NEXT: sub r0, r5, r7
+; CHECK-NEXT: pop {r4, r5, r6, r7, r11, pc}
+ %1 = call i8 @llvm.ucmp(i128 %x, i128 %y)
+ ret i8 %1
+}
+
+define i32 @ucmp_32_32(i32 %x, i32 %y) nounwind {
+; CHECK-LABEL: ucmp_32_32:
+; CHECK: @ %bb.0:
+; CHECK-NEXT: cmp r0, r1
+; CHECK-NEXT: mov r0, #0
+; CHECK-NEXT: mov r2, #0
+; CHECK-NEXT: movwlo r0, #1
+; CHECK-NEXT: movwhi r2, #1
+; CHECK-NEXT: sub r0, r2, r0
+; CHECK-NEXT: bx lr
+ %1 = call i32 @llvm.ucmp(i32 %x, i32 %y)
+ ret i32 %1
+}
+
+define i32 @ucmp_32_64(i64 %x, i64 %y) nounwind {
+; CHECK-LABEL: ucmp_32_64:
+; CHECK: @ %bb.0:
+; CHECK-NEXT: .save {r11, lr}
+; CHECK-NEXT: push {r11, lr}
+; CHECK-NEXT: subs lr, r0, r2
+; CHECK-NEXT: mov r12, #0
+; CHECK-NEXT: sbcs lr, r1, r3
+; CHECK-NEXT: mov lr, #0
+; CHECK-NEXT: movwlo lr, #1
+; CHECK-NEXT: subs r0, r2, r0
+; CHECK-NEXT: sbcs r0, r3, r1
+; CHECK-NEXT: movwlo r12, #1
+; CHECK-NEXT: sub r0, r12, lr
+; CHECK-NEXT: pop {r11, pc}
+ %1 = call i32 @llvm.ucmp(i64 %x, i64 %y)
+ ret i32 %1
+}
+
+define i64 @ucmp_64_64(i64 %x, i64 %y) nounwind {
+; CHECK-LABEL: ucmp_64_64:
+; CHECK: @ %bb.0:
+; CHECK-NEXT: .save {r11, lr}
+; CHECK-NEXT: push {r11, lr}
+; CHECK-NEXT: subs lr, r0, r2
+; CHECK-NEXT: mov r12, #0
+; CHECK-NEXT: sbcs lr, r1, r3
+; CHECK-NEXT: mov lr, #0
+; CHECK-NEXT: movwlo lr, #1
+; CHECK-NEXT: subs r0, r2, r0
+; CHECK-NEXT: sbcs r0, r3, r1
+; CHECK-NEXT: movwlo r12, #1
+; CHECK-NEXT: sub r0, r12, lr
+; CHECK-NEXT: asr r1, r0, #31
+; CHECK-NEXT: pop {r11, pc}
+ %1 = call i64 @llvm.ucmp(i64 %x, i64 %y)
+ ret i64 %1
+}
diff --git a/llvm/test/CodeGen/LoongArch/scmp.ll b/llvm/test/CodeGen/LoongArch/scmp.ll
new file mode 100644
index 0000000000000..69a92968173d2
--- /dev/null
+++ b/llvm/test/CodeGen/LoongArch/scmp.ll
@@ -0,0 +1,104 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc --mtriple=loongarch64 -mattr=+d --verify-machineinstrs < %s | FileCheck %s
+
+define i8 @scmp.8.8(i8 signext %x, i8 signext %y) nounwind {
+; CHECK-LABEL: scmp.8.8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: slt $a2, $a0, $a1
+; CHECK-NEXT: slt $a0, $a1, $a0
+; CHECK-NEXT: sub.d $a0, $a0, $a2
+; CHECK-NEXT: ret
+ %1 = call i8 @llvm.scmp(i8 %x, i8 %y)
+ ret i8 %1
+}
+
+define i8 @scmp.8.16(i16 signext %x, i16 signext %y) nounwind {
+; CHECK-LABEL: scmp.8.16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: slt $a2, $a0, $a1
+; CHECK-NEXT: slt $a0, $a1, $a0
+; CHECK-NEXT: sub.d $a0, $a0, $a2
+; CHECK-NEXT: ret
+ %1 = call i8 @llvm.scmp(i16 %x, i16 %y)
+ ret i8 %1
+}
+
+define i8 @scmp.8.32(i32 %x, i32 %y) nounwind {
+; CHECK-LABEL: scmp.8.32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: addi.w $a1, $a1, 0
+; CHECK-NEXT: addi.w $a0, $a0, 0
+; CHECK-NEXT: slt $a2, $a0, $a1
+; CHECK-NEXT: slt $a0, $a1, $a0
+; CHECK-NEXT: sub.d $a0, $a0, $a2
+; CHECK-NEXT: ret
+ %1 = call i8 @llvm.scmp(i32 %x, i32 %y)
+ ret i8 %1
+}
+
+define i8 @scmp.8.64(i64 %x, i64 %y) nounwind {
+; CHECK-LABEL: scmp.8.64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: slt $a2, $a0, $a1
+; CHECK-NEXT: slt $a0, $a1, $a0
+; CHECK-NEXT: sub.d $a0, $a0, $a2
+; CHECK-NEXT: ret
+ %1 = call i8 @llvm.scmp(i64 %x, i64 %y)
+ ret i8 %1
+}
+
+define i8 @scmp.8.128(i128 %x, i128 %y) nounwind {
+; CHECK-LABEL: scmp.8.128:
+; CHECK: # %bb.0:
+; CHECK-NEXT: slt $a4, $a1, $a3
+; CHECK-NEXT: xor $a5, $a1, $a3
+; CHECK-NEXT: sltui $a5, $a5, 1
+; CHECK-NEXT: masknez $a4, $a4, $a5
+; CHECK-NEXT: sltu $a6, $a0, $a2
+; CHECK-NEXT: maskeqz $a6, $a6, $a5
+; CHECK-NEXT: or $a4, $a6, $a4
+; CHECK-NEXT: slt $a1, $a3, $a1
+; CHECK-NEXT: masknez $a1, $a1, $a5
+; CHECK-NEXT: sltu $a0, $a2, $a0
+; CHECK-NEXT: maskeqz $a0, $a0, $a5
+; CHECK-NEXT: or $a0, $a0, $a1
+; CHECK-NEXT: sub.d $a0, $a0, $a4
+; CHECK-NEXT: ret
+ %1 = call i8 @llvm.scmp(i128 %x, i128 %y)
+ ret i8 %1
+}
+
+define i32 @scmp.32.32(i32 %x, i32 %y) nounwind {
+; CHECK-LABEL: scmp.32.32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: addi.w $a1, $a1, 0
+; CHECK-NEXT: addi.w $a0, $a0, 0
+; CHECK-NEXT: slt $a2, $a0, $a1
+; CHECK-NEXT: slt $a0, $a1, $a0
+; CHECK-NEXT: sub.d $a0, $a0, $a2
+; CHECK-NEXT: ret
+ %1 = call i32 @llvm.scmp(i32 %x, i32 %y)
+ ret i32 %1
+}
+
+define i32 @scmp.32.64(i64 %x, i64 %y) nounwind {
+; CHECK-LABEL: scmp.32.64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: slt $a2, $a0, $a1
+; CHECK-NEXT: slt $a0, $a1, $a0
+; CHECK-NEXT: sub.d $a0, $a0, $a2
+; CHECK-NEXT: ret
+ %1 = call i32 @llvm.scmp(i64 %x, i64 %y)
+ ret i32 %1
+}
+
+define i64 @scmp.64.64(i64 %x, i64 %y) nounwind {
+; CHECK-LABEL: scmp.64.64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: slt $a2, $a0, $a1
+; CHECK-NEXT: slt $a0, $a1, $a0
+; CHECK-NEXT: sub.d $a0, $a0, $a2
+; CHECK-NEXT: ret
+ %1 = call i64 @llvm.scmp(i64 %x, i64 %y)
+ ret i64 %1
+}
diff --git a/llvm/test/CodeGen/LoongArch/ucmp.ll b/llvm/test/CodeGen/LoongArch/ucmp.ll
new file mode 100644
index 0000000000000..548c5bd0db72b
--- /dev/null
+++ b/llvm/test/CodeGen/LoongArch/ucmp.ll
@@ -0,0 +1,104 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc --mtriple=loongarch64 -mattr=+d --verify-machineinstrs < %s | FileCheck %s
+
+define i8 @ucmp.8.8(i8 zeroext %x, i8 zeroext %y) nounwind {
+; CHECK-LABEL: ucmp.8.8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: sltu $a2, $a0, $a1
+; CHECK-NEXT: sltu $a0, $a1, $a0
+; CHECK-NEXT: sub.d $a0, $a0, $a2
+; CHECK-NEXT: ret
+ %1 = call i8 @llvm.ucmp(i8 %x, i8 %y)
+ ret i8 %1
+}
+
+define i8 @ucmp.8.16(i16 zeroext %x, i16 zeroext %y) nounwind {
+; CHECK-LABEL: ucmp.8.16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: sltu $a2, $a0, $a1
+; CHECK-NEXT: sltu $a0, $a1, $a0
+; CHECK-NEXT: sub.d $a0, $a0, $a2
+; CHECK-NEXT: ret
+ %1 = call i8 @llvm.ucmp(i16 %x, i16 %y)
+ ret i8 %1
+}
+
+define i8 @ucmp.8.32(i32 %x, i32 %y) nounwind {
+; CHECK-LABEL: ucmp.8.32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: bstrpick.d $a1, $a1, 31, 0
+; CHECK-NEXT: bstrpick.d $a0, $a0, 31, 0
+; CHECK-NEXT: sltu $a2, $a0, $a1
+; CHECK-NEXT: sltu $a0, $a1, $a0
+; CHECK-NEXT: sub.d $a0, $a0, $a2
+; CHECK-NEXT: ret
+ %1 = call i8 @llvm.ucmp(i32 %x, i32 %y)
+ ret i8 %1
+}
+
+define i8 @ucmp.8.64(i64 %x, i64 %y) nounwind {
+; CHECK-LABEL: ucmp.8.64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: sltu $a2, $a0, $a1
+; CHECK-NEXT: sltu $a0, $a1, $a0
+; CHECK-NEXT: sub.d $a0, $a0, $a2
+; CHECK-NEXT: ret
+ %1 = call i8 @llvm.ucmp(i64 %x, i64 %y)
+ ret i8 %1
+}
+
+define i8 @ucmp.8.128(i128 %x, i128 %y) nounwind {
+; CHECK-LABEL: ucmp.8.128:
+; CHECK: # %bb.0:
+; CHECK-NEXT: sltu $a4, $a1, $a3
+; CHECK-NEXT: xor $a5, $a1, $a3
+; CHECK-NEXT: sltui $a5, $a5, 1
+; CHECK-NEXT: masknez $a4, $a4, $a5
+; CHECK-NEXT: sltu $a6, $a0, $a2
+; CHECK-NEXT: maskeqz $a6, $a6, $a5
+; CHECK-NEXT: or $a4, $a6, $a4
+; CHECK-NEXT: sltu $a1, $a3, $a1
+; CHECK-NEXT: masknez $a1, $a1, $a5
+; CHECK-NEXT: sltu $a0, $a2, $a0
+; CHECK-NEXT: maskeqz $a0, $a0, $a5
+; CHECK-NEXT: or $a0, $a0, $a1
+; CHECK-NEXT: sub.d $a0, $a0, $a4
+; CHECK-NEXT: ret
+ %1 = call i8 @llvm.ucmp(i128 %x, i128 %y)
+ ret i8 %1
+}
+
+define i32 @ucmp.32.32(i32 %x, i32 %y) nounwind {
+; CHECK-LABEL: ucmp.32.32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: bstrpick.d $a1, $a1, 31, 0
+; CHECK-NEXT: bstrpick.d $a0, $a0, 31, 0
+; CHECK-NEXT: sltu $a2, $a0, $a1
+; CHECK-NEXT: sltu $a0, $a1, $a0
+; CHECK-NEXT: sub.d $a0, $a0, $a2
+; CHECK-NEXT: ret
+ %1 = call i32 @llvm.ucmp(i32 %x, i32 %y)
+ ret i32 %1
+}
+
+define i32 @ucmp.32.64(i64 %x, i64 %y) nounwind {
+; CHECK-LABEL: ucmp.32.64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: sltu $a2, $a0, $a1
+; CHECK-NEXT: sltu $a0, $a1, $a0
+; CHECK-NEXT: sub.d $a0, $a0, $a2
+; CHECK-NEXT: ret
+ %1 = call i32 @llvm.ucmp(i64 %x, i64 %y)
+ ret i32 %1
+}
+
+define i64 @ucmp.64.64(i64 %x, i64 %y) nounwind {
+; CHECK-LABEL: ucmp.64.64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: sltu $a2, $a0, $a1
+; CHECK-NEXT: sltu $a0, $a1, $a0
+; CHECK-NEXT: sub.d $a0, $a0, $a2
+; CHECK-NEXT: ret
+ %1 = call i64 @llvm.ucmp(i64 %x, i64 %y)
+ ret i64 %1
+}
diff --git a/llvm/test/CodeGen/PowerPC/scmp.ll b/llvm/test/CodeGen/PowerPC/scmp.ll
new file mode 100644
index 0000000000000..107137c0bea7c
--- /dev/null
+++ b/llvm/test/CodeGen/PowerPC/scmp.ll
@@ -0,0 +1,127 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc -mtriple=ppc64le-unknown-unknown %s -o - | FileCheck %s
+
+define i8 @scmp_8_8(i8 signext %x, i8 signext %y) nounwind {
+; CHECK-LABEL: scmp_8_8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: cmpw 3, 4
+; CHECK-NEXT: sub 5, 4, 3
+; CHECK-NEXT: li 3, -1
+; CHECK-NEXT: rldicl 5, 5, 1, 63
+; CHECK-NEXT: isellt 3, 3, 5
+; CHECK-NEXT: blr
+ %1 = call i8 @llvm.scmp(i8 %x, i8 %y)
+ ret i8 %1
+}
+
+define i8 @scmp_8_16(i16 signext %x, i16 signext %y) nounwind {
+; CHECK-LABEL: scmp_8_16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: cmpw 3, 4
+; CHECK-NEXT: sub 5, 4, 3
+; CHECK-NEXT: li 3, -1
+; CHECK-NEXT: rldicl 5, 5, 1, 63
+; CHECK-NEXT: isellt 3, 3, 5
+; CHECK-NEXT: blr
+ %1 = call i8 @llvm.scmp(i16 %x, i16 %y)
+ ret i8 %1
+}
+
+define i8 @scmp_8_32(i32 %x, i32 %y) nounwind {
+; CHECK-LABEL: scmp_8_32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: extsw 4, 4
+; CHECK-NEXT: extsw 3, 3
+; CHECK-NEXT: cmpw 3, 4
+; CHECK-NEXT: sub 3, 4, 3
+; CHECK-NEXT: li 4, -1
+; CHECK-NEXT: rldicl 3, 3, 1, 63
+; CHECK-NEXT: isellt 3, 4, 3
+; CHECK-NEXT: blr
+ %1 = call i8 @llvm.scmp(i32 %x, i32 %y)
+ ret i8 %1
+}
+
+define i8 @scmp_8_64(i64 %x, i64 %y) nounwind {
+; CHECK-LABEL: scmp_8_64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: sradi 5, 4, 63
+; CHECK-NEXT: rldicl 6, 3, 1, 63
+; CHECK-NEXT: subc 7, 4, 3
+; CHECK-NEXT: adde 5, 6, 5
+; CHECK-NEXT: cmpd 3, 4
+; CHECK-NEXT: li 3, -1
+; CHECK-NEXT: xori 5, 5, 1
+; CHECK-NEXT: isellt 3, 3, 5
+; CHECK-NEXT: blr
+ %1 = call i8 @llvm.scmp(i64 %x, i64 %y)
+ ret i8 %1
+}
+
+define i8 @scmp_8_128(i128 %x, i128 %y) nounwind {
+; CHECK-LABEL: scmp_8_128:
+; CHECK: # %bb.0:
+; CHECK-NEXT: cmpld 4, 6
+; CHECK-NEXT: cmpd 1, 4, 6
+; CHECK-NEXT: li 4, -1
+; CHECK-NEXT: cmpld 5, 3, 5
+; CHECK-NEXT: li 3, 1
+; CHECK-NEXT: crandc 22, 5, 2
+; CHECK-NEXT: crand 21, 2, 21
+; CHECK-NEXT: crand 20, 2, 20
+; CHECK-NEXT: crnor 21, 21, 22
+; CHECK-NEXT: isel 3, 0, 3, 21
+; CHECK-NEXT: crandc 21, 4, 2
+; CHECK-NEXT: cror 20, 20, 21
+; CHECK-NEXT: isel 3, 4, 3, 20
+; CHECK-NEXT: blr
+ %1 = call i8 @llvm.scmp(i128 %x, i128 %y)
+ ret i8 %1
+}
+
+define i32 @scmp_32_32(i32 %x, i32 %y) nounwind {
+; CHECK-LABEL: scmp_32_32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: extsw 4, 4
+; CHECK-NEXT: extsw 3, 3
+; CHECK-NEXT: cmpw 3, 4
+; CHECK-NEXT: sub 3, 4, 3
+; CHECK-NEXT: li 4, -1
+; CHECK-NEXT: rldicl 3, 3, 1, 63
+; CHECK-NEXT: isellt 3, 4, 3
+; CHECK-NEXT: blr
+ %1 = call i32 @llvm.scmp(i32 %x, i32 %y)
+ ret i32 %1
+}
+
+define i32 @scmp_32_64(i64 %x, i64 %y) nounwind {
+; CHECK-LABEL: scmp_32_64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: sradi 5, 4, 63
+; CHECK-NEXT: rldicl 6, 3, 1, 63
+; CHECK-NEXT: subc 7, 4, 3
+; CHECK-NEXT: adde 5, 6, 5
+; CHECK-NEXT: cmpd 3, 4
+; CHECK-NEXT: li 3, -1
+; CHECK-NEXT: xori 5, 5, 1
+; CHECK-NEXT: isellt 3, 3, 5
+; CHECK-NEXT: blr
+ %1 = call i32 @llvm.scmp(i64 %x, i64 %y)
+ ret i32 %1
+}
+
+define i64 @scmp_64_64(i64 %x, i64 %y) nounwind {
+; CHECK-LABEL: scmp_64_64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: sradi 5, 4, 63
+; CHECK-NEXT: rldicl 6, 3, 1, 63
+; CHECK-NEXT: subc 7, 4, 3
+; CHECK-NEXT: adde 5, 6, 5
+; CHECK-NEXT: cmpd 3, 4
+; CHECK-NEXT: li 3, -1
+; CHECK-NEXT: xori 5, 5, 1
+; CHECK-NEXT: isellt 3, 3, 5
+; CHECK-NEXT: blr
+ %1 = call i64 @llvm.scmp(i64 %x, i64 %y)
+ ret i64 %1
+}
diff --git a/llvm/test/CodeGen/PowerPC/ucmp.ll b/llvm/test/CodeGen/PowerPC/ucmp.ll
new file mode 100644
index 0000000000000..d2dff6e7e05c8
--- /dev/null
+++ b/llvm/test/CodeGen/PowerPC/ucmp.ll
@@ -0,0 +1,124 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc -mtriple=ppc64le-unknown-unknown %s -o - | FileCheck %s
+
+define i8 @ucmp_8_8(i8 zeroext %x, i8 zeroext %y) nounwind {
+; CHECK-LABEL: ucmp_8_8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: cmplw 3, 4
+; CHECK-NEXT: sub 5, 4, 3
+; CHECK-NEXT: li 3, -1
+; CHECK-NEXT: rldicl 5, 5, 1, 63
+; CHECK-NEXT: rldic 3, 3, 0, 32
+; CHECK-NEXT: isellt 3, 3, 5
+; CHECK-NEXT: blr
+ %1 = call i8 @llvm.ucmp(i8 %x, i8 %y)
+ ret i8 %1
+}
+
+define i8 @ucmp_8_16(i16 zeroext %x, i16 zeroext %y) nounwind {
+; CHECK-LABEL: ucmp_8_16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: cmplw 3, 4
+; CHECK-NEXT: sub 5, 4, 3
+; CHECK-NEXT: li 3, -1
+; CHECK-NEXT: rldicl 5, 5, 1, 63
+; CHECK-NEXT: rldic 3, 3, 0, 32
+; CHECK-NEXT: isellt 3, 3, 5
+; CHECK-NEXT: blr
+ %1 = call i8 @llvm.ucmp(i16 %x, i16 %y)
+ ret i8 %1
+}
+
+define i8 @ucmp_8_32(i32 %x, i32 %y) nounwind {
+; CHECK-LABEL: ucmp_8_32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: clrldi 5, 4, 32
+; CHECK-NEXT: clrldi 6, 3, 32
+; CHECK-NEXT: sub 5, 5, 6
+; CHECK-NEXT: cmplw 3, 4
+; CHECK-NEXT: li 3, -1
+; CHECK-NEXT: rldic 3, 3, 0, 32
+; CHECK-NEXT: rldicl 5, 5, 1, 63
+; CHECK-NEXT: isellt 3, 3, 5
+; CHECK-NEXT: blr
+ %1 = call i8 @llvm.ucmp(i32 %x, i32 %y)
+ ret i8 %1
+}
+
+define i8 @ucmp_8_64(i64 %x, i64 %y) nounwind {
+; CHECK-LABEL: ucmp_8_64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: cmpld 3, 4
+; CHECK-NEXT: subc 3, 4, 3
+; CHECK-NEXT: subfe 3, 4, 4
+; CHECK-NEXT: li 4, -1
+; CHECK-NEXT: neg 3, 3
+; CHECK-NEXT: isellt 3, 4, 3
+; CHECK-NEXT: blr
+ %1 = call i8 @llvm.ucmp(i64 %x, i64 %y)
+ ret i8 %1
+}
+
+define i8 @ucmp_8_128(i128 %x, i128 %y) nounwind {
+; CHECK-LABEL: ucmp_8_128:
+; CHECK: # %bb.0:
+; CHECK-NEXT: cmpld 4, 6
+; CHECK-NEXT: cmpld 1, 3, 5
+; CHECK-NEXT: li 3, 1
+; CHECK-NEXT: li 4, -1
+; CHECK-NEXT: crandc 20, 1, 2
+; CHECK-NEXT: crand 21, 2, 5
+; CHECK-NEXT: crnor 20, 21, 20
+; CHECK-NEXT: crand 21, 2, 4
+; CHECK-NEXT: isel 3, 0, 3, 20
+; CHECK-NEXT: crandc 20, 0, 2
+; CHECK-NEXT: cror 20, 21, 20
+; CHECK-NEXT: isel 3, 4, 3, 20
+; CHECK-NEXT: blr
+ %1 = call i8 @llvm.ucmp(i128 %x, i128 %y)
+ ret i8 %1
+}
+
+define i32 @ucmp_32_32(i32 %x, i32 %y) nounwind {
+; CHECK-LABEL: ucmp_32_32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: clrldi 5, 4, 32
+; CHECK-NEXT: clrldi 6, 3, 32
+; CHECK-NEXT: sub 5, 5, 6
+; CHECK-NEXT: cmplw 3, 4
+; CHECK-NEXT: li 3, -1
+; CHECK-NEXT: rldic 3, 3, 0, 32
+; CHECK-NEXT: rldicl 5, 5, 1, 63
+; CHECK-NEXT: isellt 3, 3, 5
+; CHECK-NEXT: blr
+ %1 = call i32 @llvm.ucmp(i32 %x, i32 %y)
+ ret i32 %1
+}
+
+define i32 @ucmp_32_64(i64 %x, i64 %y) nounwind {
+; CHECK-LABEL: ucmp_32_64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: cmpld 3, 4
+; CHECK-NEXT: subc 3, 4, 3
+; CHECK-NEXT: subfe 3, 4, 4
+; CHECK-NEXT: li 4, -1
+; CHECK-NEXT: neg 3, 3
+; CHECK-NEXT: isellt 3, 4, 3
+; CHECK-NEXT: blr
+ %1 = call i32 @llvm.ucmp(i64 %x, i64 %y)
+ ret i32 %1
+}
+
+define i64 @ucmp_64_64(i64 %x, i64 %y) nounwind {
+; CHECK-LABEL: ucmp_64_64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: subc 5, 4, 3
+; CHECK-NEXT: cmpld 3, 4
+; CHECK-NEXT: li 3, -1
+; CHECK-NEXT: subfe 5, 4, 4
+; CHECK-NEXT: neg 5, 5
+; CHECK-NEXT: isellt 3, 3, 5
+; CHECK-NEXT: blr
+ %1 = call i64 @llvm.ucmp(i64 %x, i64 %y)
+ ret i64 %1
+}
diff --git a/llvm/test/CodeGen/RISCV/scmp.ll b/llvm/test/CodeGen/RISCV/scmp.ll
new file mode 100644
index 0000000000000..e79b6989410a6
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/scmp.ll
@@ -0,0 +1,224 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc < %s -mtriple=riscv32 | FileCheck %s --check-prefix=RV32I
+; RUN: llc < %s -mtriple=riscv64 | FileCheck %s --check-prefix=RV64I
+
+define i8 @scmp.8.8(i8 signext %x, i8 signext %y) nounwind {
+; RV32I-LABEL: scmp.8.8:
+; RV32I: # %bb.0:
+; RV32I-NEXT: slt a2, a0, a1
+; RV32I-NEXT: slt a0, a1, a0
+; RV32I-NEXT: sub a0, a0, a2
+; RV32I-NEXT: ret
+;
+; RV64I-LABEL: scmp.8.8:
+; RV64I: # %bb.0:
+; RV64I-NEXT: slt a2, a0, a1
+; RV64I-NEXT: slt a0, a1, a0
+; RV64I-NEXT: sub a0, a0, a2
+; RV64I-NEXT: ret
+ %1 = call i8 @llvm.scmp(i8 %x, i8 %y)
+ ret i8 %1
+}
+
+define i8 @scmp.8.16(i16 signext %x, i16 signext %y) nounwind {
+; RV32I-LABEL: scmp.8.16:
+; RV32I: # %bb.0:
+; RV32I-NEXT: slt a2, a0, a1
+; RV32I-NEXT: slt a0, a1, a0
+; RV32I-NEXT: sub a0, a0, a2
+; RV32I-NEXT: ret
+;
+; RV64I-LABEL: scmp.8.16:
+; RV64I: # %bb.0:
+; RV64I-NEXT: slt a2, a0, a1
+; RV64I-NEXT: slt a0, a1, a0
+; RV64I-NEXT: sub a0, a0, a2
+; RV64I-NEXT: ret
+ %1 = call i8 @llvm.scmp(i16 %x, i16 %y)
+ ret i8 %1
+}
+
+define i8 @scmp.8.32(i32 %x, i32 %y) nounwind {
+; RV32I-LABEL: scmp.8.32:
+; RV32I: # %bb.0:
+; RV32I-NEXT: slt a2, a0, a1
+; RV32I-NEXT: slt a0, a1, a0
+; RV32I-NEXT: sub a0, a0, a2
+; RV32I-NEXT: ret
+;
+; RV64I-LABEL: scmp.8.32:
+; RV64I: # %bb.0:
+; RV64I-NEXT: sext.w a1, a1
+; RV64I-NEXT: sext.w a0, a0
+; RV64I-NEXT: slt a2, a0, a1
+; RV64I-NEXT: slt a0, a1, a0
+; RV64I-NEXT: sub a0, a0, a2
+; RV64I-NEXT: ret
+ %1 = call i8 @llvm.scmp(i32 %x, i32 %y)
+ ret i8 %1
+}
+
+define i8 @scmp.8.64(i64 %x, i64 %y) nounwind {
+; RV32I-LABEL: scmp.8.64:
+; RV32I: # %bb.0:
+; RV32I-NEXT: beq a1, a3, .LBB3_2
+; RV32I-NEXT: # %bb.1:
+; RV32I-NEXT: slt a4, a1, a3
+; RV32I-NEXT: slt a0, a3, a1
+; RV32I-NEXT: sub a0, a0, a4
+; RV32I-NEXT: ret
+; RV32I-NEXT: .LBB3_2:
+; RV32I-NEXT: sltu a4, a0, a2
+; RV32I-NEXT: sltu a0, a2, a0
+; RV32I-NEXT: sub a0, a0, a4
+; RV32I-NEXT: ret
+;
+; RV64I-LABEL: scmp.8.64:
+; RV64I: # %bb.0:
+; RV64I-NEXT: slt a2, a0, a1
+; RV64I-NEXT: slt a0, a1, a0
+; RV64I-NEXT: sub a0, a0, a2
+; RV64I-NEXT: ret
+ %1 = call i8 @llvm.scmp(i64 %x, i64 %y)
+ ret i8 %1
+}
+
+define i8 @scmp.8.128(i128 %x, i128 %y) nounwind {
+; RV32I-LABEL: scmp.8.128:
+; RV32I: # %bb.0:
+; RV32I-NEXT: lw a2, 4(a1)
+; RV32I-NEXT: lw a3, 4(a0)
+; RV32I-NEXT: lw a4, 8(a1)
+; RV32I-NEXT: lw a5, 12(a1)
+; RV32I-NEXT: lw a6, 12(a0)
+; RV32I-NEXT: lw a7, 8(a0)
+; RV32I-NEXT: beq a6, a5, .LBB4_2
+; RV32I-NEXT: # %bb.1:
+; RV32I-NEXT: slt t2, a6, a5
+; RV32I-NEXT: j .LBB4_3
+; RV32I-NEXT: .LBB4_2:
+; RV32I-NEXT: sltu t2, a7, a4
+; RV32I-NEXT: .LBB4_3:
+; RV32I-NEXT: lw a1, 0(a1)
+; RV32I-NEXT: lw t0, 0(a0)
+; RV32I-NEXT: beq a3, a2, .LBB4_5
+; RV32I-NEXT: # %bb.4:
+; RV32I-NEXT: sltu a0, a3, a2
+; RV32I-NEXT: j .LBB4_6
+; RV32I-NEXT: .LBB4_5:
+; RV32I-NEXT: sltu a0, t0, a1
+; RV32I-NEXT: .LBB4_6:
+; RV32I-NEXT: xor t1, a6, a5
+; RV32I-NEXT: xor t3, a7, a4
+; RV32I-NEXT: or t1, t3, t1
+; RV32I-NEXT: beqz t1, .LBB4_8
+; RV32I-NEXT: # %bb.7:
+; RV32I-NEXT: mv a0, t2
+; RV32I-NEXT: .LBB4_8:
+; RV32I-NEXT: beq a6, a5, .LBB4_11
+; RV32I-NEXT: # %bb.9:
+; RV32I-NEXT: slt a4, a5, a6
+; RV32I-NEXT: bne a3, a2, .LBB4_12
+; RV32I-NEXT: .LBB4_10:
+; RV32I-NEXT: sltu a1, a1, t0
+; RV32I-NEXT: bnez t1, .LBB4_13
+; RV32I-NEXT: j .LBB4_14
+; RV32I-NEXT: .LBB4_11:
+; RV32I-NEXT: sltu a4, a4, a7
+; RV32I-NEXT: beq a3, a2, .LBB4_10
+; RV32I-NEXT: .LBB4_12:
+; RV32I-NEXT: sltu a1, a2, a3
+; RV32I-NEXT: beqz t1, .LBB4_14
+; RV32I-NEXT: .LBB4_13:
+; RV32I-NEXT: mv a1, a4
+; RV32I-NEXT: .LBB4_14:
+; RV32I-NEXT: sub a0, a1, a0
+; RV32I-NEXT: ret
+;
+; RV64I-LABEL: scmp.8.128:
+; RV64I: # %bb.0:
+; RV64I-NEXT: beq a1, a3, .LBB4_2
+; RV64I-NEXT: # %bb.1:
+; RV64I-NEXT: slt a4, a1, a3
+; RV64I-NEXT: slt a0, a3, a1
+; RV64I-NEXT: sub a0, a0, a4
+; RV64I-NEXT: ret
+; RV64I-NEXT: .LBB4_2:
+; RV64I-NEXT: sltu a4, a0, a2
+; RV64I-NEXT: sltu a0, a2, a0
+; RV64I-NEXT: sub a0, a0, a4
+; RV64I-NEXT: ret
+ %1 = call i8 @llvm.scmp(i128 %x, i128 %y)
+ ret i8 %1
+}
+
+define i32 @scmp.32.32(i32 %x, i32 %y) nounwind {
+; RV32I-LABEL: scmp.32.32:
+; RV32I: # %bb.0:
+; RV32I-NEXT: slt a2, a0, a1
+; RV32I-NEXT: slt a0, a1, a0
+; RV32I-NEXT: sub a0, a0, a2
+; RV32I-NEXT: ret
+;
+; RV64I-LABEL: scmp.32.32:
+; RV64I: # %bb.0:
+; RV64I-NEXT: sext.w a1, a1
+; RV64I-NEXT: sext.w a0, a0
+; RV64I-NEXT: slt a2, a0, a1
+; RV64I-NEXT: slt a0, a1, a0
+; RV64I-NEXT: sub a0, a0, a2
+; RV64I-NEXT: ret
+ %1 = call i32 @llvm.scmp(i32 %x, i32 %y)
+ ret i32 %1
+}
+
+define i32 @scmp.32.64(i64 %x, i64 %y) nounwind {
+; RV32I-LABEL: scmp.32.64:
+; RV32I: # %bb.0:
+; RV32I-NEXT: beq a1, a3, .LBB6_2
+; RV32I-NEXT: # %bb.1:
+; RV32I-NEXT: slt a4, a1, a3
+; RV32I-NEXT: slt a0, a3, a1
+; RV32I-NEXT: sub a0, a0, a4
+; RV32I-NEXT: ret
+; RV32I-NEXT: .LBB6_2:
+; RV32I-NEXT: sltu a4, a0, a2
+; RV32I-NEXT: sltu a0, a2, a0
+; RV32I-NEXT: sub a0, a0, a4
+; RV32I-NEXT: ret
+;
+; RV64I-LABEL: scmp.32.64:
+; RV64I: # %bb.0:
+; RV64I-NEXT: slt a2, a0, a1
+; RV64I-NEXT: slt a0, a1, a0
+; RV64I-NEXT: sub a0, a0, a2
+; RV64I-NEXT: ret
+ %1 = call i32 @llvm.scmp(i64 %x, i64 %y)
+ ret i32 %1
+}
+
+define i64 @scmp.64.64(i64 %x, i64 %y) nounwind {
+; RV32I-LABEL: scmp.64.64:
+; RV32I: # %bb.0:
+; RV32I-NEXT: beq a1, a3, .LBB7_2
+; RV32I-NEXT: # %bb.1:
+; RV32I-NEXT: slt a4, a1, a3
+; RV32I-NEXT: slt a0, a3, a1
+; RV32I-NEXT: j .LBB7_3
+; RV32I-NEXT: .LBB7_2:
+; RV32I-NEXT: sltu a4, a0, a2
+; RV32I-NEXT: sltu a0, a2, a0
+; RV32I-NEXT: .LBB7_3:
+; RV32I-NEXT: sub a0, a0, a4
+; RV32I-NEXT: srai a1, a0, 31
+; RV32I-NEXT: ret
+;
+; RV64I-LABEL: scmp.64.64:
+; RV64I: # %bb.0:
+; RV64I-NEXT: slt a2, a0, a1
+; RV64I-NEXT: slt a0, a1, a0
+; RV64I-NEXT: sub a0, a0, a2
+; RV64I-NEXT: ret
+ %1 = call i64 @llvm.scmp(i64 %x, i64 %y)
+ ret i64 %1
+}
diff --git a/llvm/test/CodeGen/RISCV/ucmp.ll b/llvm/test/CodeGen/RISCV/ucmp.ll
new file mode 100644
index 0000000000000..026340ede1f90
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/ucmp.ll
@@ -0,0 +1,228 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc < %s -mtriple=riscv32 | FileCheck %s --check-prefix=RV32I
+; RUN: llc < %s -mtriple=riscv64 | FileCheck %s --check-prefix=RV64I
+
+define i8 @ucmp.8.8(i8 zeroext %x, i8 zeroext %y) nounwind {
+; RV32I-LABEL: ucmp.8.8:
+; RV32I: # %bb.0:
+; RV32I-NEXT: sltu a2, a0, a1
+; RV32I-NEXT: sltu a0, a1, a0
+; RV32I-NEXT: sub a0, a0, a2
+; RV32I-NEXT: ret
+;
+; RV64I-LABEL: ucmp.8.8:
+; RV64I: # %bb.0:
+; RV64I-NEXT: sltu a2, a0, a1
+; RV64I-NEXT: sltu a0, a1, a0
+; RV64I-NEXT: sub a0, a0, a2
+; RV64I-NEXT: ret
+ %1 = call i8 @llvm.ucmp(i8 %x, i8 %y)
+ ret i8 %1
+}
+
+define i8 @ucmp.8.16(i16 zeroext %x, i16 zeroext %y) nounwind {
+; RV32I-LABEL: ucmp.8.16:
+; RV32I: # %bb.0:
+; RV32I-NEXT: sltu a2, a0, a1
+; RV32I-NEXT: sltu a0, a1, a0
+; RV32I-NEXT: sub a0, a0, a2
+; RV32I-NEXT: ret
+;
+; RV64I-LABEL: ucmp.8.16:
+; RV64I: # %bb.0:
+; RV64I-NEXT: sltu a2, a0, a1
+; RV64I-NEXT: sltu a0, a1, a0
+; RV64I-NEXT: sub a0, a0, a2
+; RV64I-NEXT: ret
+ %1 = call i8 @llvm.ucmp(i16 %x, i16 %y)
+ ret i8 %1
+}
+
+define i8 @ucmp.8.32(i32 %x, i32 %y) nounwind {
+; RV32I-LABEL: ucmp.8.32:
+; RV32I: # %bb.0:
+; RV32I-NEXT: sltu a2, a0, a1
+; RV32I-NEXT: sltu a0, a1, a0
+; RV32I-NEXT: sub a0, a0, a2
+; RV32I-NEXT: ret
+;
+; RV64I-LABEL: ucmp.8.32:
+; RV64I: # %bb.0:
+; RV64I-NEXT: slli a1, a1, 32
+; RV64I-NEXT: srli a1, a1, 32
+; RV64I-NEXT: slli a0, a0, 32
+; RV64I-NEXT: srli a0, a0, 32
+; RV64I-NEXT: sltu a2, a0, a1
+; RV64I-NEXT: sltu a0, a1, a0
+; RV64I-NEXT: sub a0, a0, a2
+; RV64I-NEXT: ret
+ %1 = call i8 @llvm.ucmp(i32 %x, i32 %y)
+ ret i8 %1
+}
+
+define i8 @ucmp.8.64(i64 %x, i64 %y) nounwind {
+; RV32I-LABEL: ucmp.8.64:
+; RV32I: # %bb.0:
+; RV32I-NEXT: beq a1, a3, .LBB3_2
+; RV32I-NEXT: # %bb.1:
+; RV32I-NEXT: sltu a4, a1, a3
+; RV32I-NEXT: sltu a0, a3, a1
+; RV32I-NEXT: sub a0, a0, a4
+; RV32I-NEXT: ret
+; RV32I-NEXT: .LBB3_2:
+; RV32I-NEXT: sltu a4, a0, a2
+; RV32I-NEXT: sltu a0, a2, a0
+; RV32I-NEXT: sub a0, a0, a4
+; RV32I-NEXT: ret
+;
+; RV64I-LABEL: ucmp.8.64:
+; RV64I: # %bb.0:
+; RV64I-NEXT: sltu a2, a0, a1
+; RV64I-NEXT: sltu a0, a1, a0
+; RV64I-NEXT: sub a0, a0, a2
+; RV64I-NEXT: ret
+ %1 = call i8 @llvm.ucmp(i64 %x, i64 %y)
+ ret i8 %1
+}
+
+define i8 @ucmp.8.128(i128 %x, i128 %y) nounwind {
+; RV32I-LABEL: ucmp.8.128:
+; RV32I: # %bb.0:
+; RV32I-NEXT: lw a2, 4(a1)
+; RV32I-NEXT: lw a3, 4(a0)
+; RV32I-NEXT: lw a4, 8(a1)
+; RV32I-NEXT: lw a5, 12(a1)
+; RV32I-NEXT: lw a6, 12(a0)
+; RV32I-NEXT: lw a7, 8(a0)
+; RV32I-NEXT: beq a6, a5, .LBB4_2
+; RV32I-NEXT: # %bb.1:
+; RV32I-NEXT: sltu t2, a6, a5
+; RV32I-NEXT: j .LBB4_3
+; RV32I-NEXT: .LBB4_2:
+; RV32I-NEXT: sltu t2, a7, a4
+; RV32I-NEXT: .LBB4_3:
+; RV32I-NEXT: lw a1, 0(a1)
+; RV32I-NEXT: lw t0, 0(a0)
+; RV32I-NEXT: beq a3, a2, .LBB4_5
+; RV32I-NEXT: # %bb.4:
+; RV32I-NEXT: sltu a0, a3, a2
+; RV32I-NEXT: j .LBB4_6
+; RV32I-NEXT: .LBB4_5:
+; RV32I-NEXT: sltu a0, t0, a1
+; RV32I-NEXT: .LBB4_6:
+; RV32I-NEXT: xor t1, a6, a5
+; RV32I-NEXT: xor t3, a7, a4
+; RV32I-NEXT: or t1, t3, t1
+; RV32I-NEXT: beqz t1, .LBB4_8
+; RV32I-NEXT: # %bb.7:
+; RV32I-NEXT: mv a0, t2
+; RV32I-NEXT: .LBB4_8:
+; RV32I-NEXT: beq a6, a5, .LBB4_11
+; RV32I-NEXT: # %bb.9:
+; RV32I-NEXT: sltu a4, a5, a6
+; RV32I-NEXT: bne a3, a2, .LBB4_12
+; RV32I-NEXT: .LBB4_10:
+; RV32I-NEXT: sltu a1, a1, t0
+; RV32I-NEXT: bnez t1, .LBB4_13
+; RV32I-NEXT: j .LBB4_14
+; RV32I-NEXT: .LBB4_11:
+; RV32I-NEXT: sltu a4, a4, a7
+; RV32I-NEXT: beq a3, a2, .LBB4_10
+; RV32I-NEXT: .LBB4_12:
+; RV32I-NEXT: sltu a1, a2, a3
+; RV32I-NEXT: beqz t1, .LBB4_14
+; RV32I-NEXT: .LBB4_13:
+; RV32I-NEXT: mv a1, a4
+; RV32I-NEXT: .LBB4_14:
+; RV32I-NEXT: sub a0, a1, a0
+; RV32I-NEXT: ret
+;
+; RV64I-LABEL: ucmp.8.128:
+; RV64I: # %bb.0:
+; RV64I-NEXT: beq a1, a3, .LBB4_2
+; RV64I-NEXT: # %bb.1:
+; RV64I-NEXT: sltu a4, a1, a3
+; RV64I-NEXT: sltu a0, a3, a1
+; RV64I-NEXT: sub a0, a0, a4
+; RV64I-NEXT: ret
+; RV64I-NEXT: .LBB4_2:
+; RV64I-NEXT: sltu a4, a0, a2
+; RV64I-NEXT: sltu a0, a2, a0
+; RV64I-NEXT: sub a0, a0, a4
+; RV64I-NEXT: ret
+ %1 = call i8 @llvm.ucmp(i128 %x, i128 %y)
+ ret i8 %1
+}
+
+define i32 @ucmp.32.32(i32 %x, i32 %y) nounwind {
+; RV32I-LABEL: ucmp.32.32:
+; RV32I: # %bb.0:
+; RV32I-NEXT: sltu a2, a0, a1
+; RV32I-NEXT: sltu a0, a1, a0
+; RV32I-NEXT: sub a0, a0, a2
+; RV32I-NEXT: ret
+;
+; RV64I-LABEL: ucmp.32.32:
+; RV64I: # %bb.0:
+; RV64I-NEXT: slli a1, a1, 32
+; RV64I-NEXT: srli a1, a1, 32
+; RV64I-NEXT: slli a0, a0, 32
+; RV64I-NEXT: srli a0, a0, 32
+; RV64I-NEXT: sltu a2, a0, a1
+; RV64I-NEXT: sltu a0, a1, a0
+; RV64I-NEXT: sub a0, a0, a2
+; RV64I-NEXT: ret
+ %1 = call i32 @llvm.ucmp(i32 %x, i32 %y)
+ ret i32 %1
+}
+
+define i32 @ucmp.32.64(i64 %x, i64 %y) nounwind {
+; RV32I-LABEL: ucmp.32.64:
+; RV32I: # %bb.0:
+; RV32I-NEXT: beq a1, a3, .LBB6_2
+; RV32I-NEXT: # %bb.1:
+; RV32I-NEXT: sltu a4, a1, a3
+; RV32I-NEXT: sltu a0, a3, a1
+; RV32I-NEXT: sub a0, a0, a4
+; RV32I-NEXT: ret
+; RV32I-NEXT: .LBB6_2:
+; RV32I-NEXT: sltu a4, a0, a2
+; RV32I-NEXT: sltu a0, a2, a0
+; RV32I-NEXT: sub a0, a0, a4
+; RV32I-NEXT: ret
+;
+; RV64I-LABEL: ucmp.32.64:
+; RV64I: # %bb.0:
+; RV64I-NEXT: sltu a2, a0, a1
+; RV64I-NEXT: sltu a0, a1, a0
+; RV64I-NEXT: sub a0, a0, a2
+; RV64I-NEXT: ret
+ %1 = call i32 @llvm.ucmp(i64 %x, i64 %y)
+ ret i32 %1
+}
+
+define i64 @ucmp.64.64(i64 %x, i64 %y) nounwind {
+; RV32I-LABEL: ucmp.64.64:
+; RV32I: # %bb.0:
+; RV32I-NEXT: beq a1, a3, .LBB7_2
+; RV32I-NEXT: # %bb.1:
+; RV32I-NEXT: sltu a4, a1, a3
+; RV32I-NEXT: sltu a0, a3, a1
+; RV32I-NEXT: j .LBB7_3
+; RV32I-NEXT: .LBB7_2:
+; RV32I-NEXT: sltu a4, a0, a2
+; RV32I-NEXT: sltu a0, a2, a0
+; RV32I-NEXT: .LBB7_3:
+; RV32I-NEXT: sub a0, a0, a4
+; RV32I-NEXT: srai a1, a0, 31
+; RV32I-NEXT: ret
+;
+; RV64I-LABEL: ucmp.64.64:
+; RV64I: # %bb.0:
+; RV64I-NEXT: sltu a2, a0, a1
+; RV64I-NEXT: sltu a0, a1, a0
+; RV64I-NEXT: sub a0, a0, a2
+; RV64I-NEXT: ret
+ %1 = call i64 @llvm.ucmp(i64 %x, i64 %y)
+ ret i64 %1
+}
diff --git a/llvm/test/CodeGen/SystemZ/scmp.ll b/llvm/test/CodeGen/SystemZ/scmp.ll
new file mode 100644
index 0000000000000..3ecaa60a58d24
--- /dev/null
+++ b/llvm/test/CodeGen/SystemZ/scmp.ll
@@ -0,0 +1,109 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s
+
+define i8 @scmp.8.8(i8 signext %x, i8 signext %y) nounwind {
+; CHECK-LABEL: scmp.8.8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: cr %r2, %r3
+; CHECK-NEXT: lhi %r2, 0
+; CHECK-NEXT: lochih %r2, 1
+; CHECK-NEXT: lochil %r2, -1
+; CHECK-NEXT: br %r14
+ %1 = call i8 @llvm.scmp(i8 %x, i8 %y)
+ ret i8 %1
+}
+
+define i8 @scmp.8.16(i16 signext %x, i16 signext %y) nounwind {
+; CHECK-LABEL: scmp.8.16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: cr %r2, %r3
+; CHECK-NEXT: lhi %r2, 0
+; CHECK-NEXT: lochih %r2, 1
+; CHECK-NEXT: lochil %r2, -1
+; CHECK-NEXT: br %r14
+ %1 = call i8 @llvm.scmp(i16 %x, i16 %y)
+ ret i8 %1
+}
+
+define i8 @scmp.8.32(i32 %x, i32 %y) nounwind {
+; CHECK-LABEL: scmp.8.32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: cr %r2, %r3
+; CHECK-NEXT: lhi %r2, 0
+; CHECK-NEXT: lochih %r2, 1
+; CHECK-NEXT: lochil %r2, -1
+; CHECK-NEXT: br %r14
+ %1 = call i8 @llvm.scmp(i32 %x, i32 %y)
+ ret i8 %1
+}
+
+define i8 @scmp.8.64(i64 %x, i64 %y) nounwind {
+; CHECK-LABEL: scmp.8.64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: cgr %r2, %r3
+; CHECK-NEXT: lhi %r2, 0
+; CHECK-NEXT: lochih %r2, 1
+; CHECK-NEXT: lochil %r2, -1
+; CHECK-NEXT: br %r14
+ %1 = call i8 @llvm.scmp(i64 %x, i64 %y)
+ ret i8 %1
+}
+
+define i8 @scmp.8.128(i128 %x, i128 %y) nounwind {
+; CHECK-LABEL: scmp.8.128:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vl %v0, 0(%r3), 3
+; CHECK-NEXT: vl %v1, 0(%r2), 3
+; CHECK-NEXT: vecg %v0, %v1
+; CHECK-NEXT: jlh .LBB4_2
+; CHECK-NEXT: # %bb.1:
+; CHECK-NEXT: vchlgs %v2, %v1, %v0
+; CHECK-NEXT: .LBB4_2:
+; CHECK-NEXT: lhi %r2, 0
+; CHECK-NEXT: lochil %r2, 1
+; CHECK-NEXT: vecg %v1, %v0
+; CHECK-NEXT: jlh .LBB4_4
+; CHECK-NEXT: # %bb.3:
+; CHECK-NEXT: vchlgs %v0, %v0, %v1
+; CHECK-NEXT: .LBB4_4:
+; CHECK-NEXT: lochil %r2, -1
+; CHECK-NEXT: br %r14
+ %1 = call i8 @llvm.scmp(i128 %x, i128 %y)
+ ret i8 %1
+}
+
+define i32 @scmp.32.32(i32 %x, i32 %y) nounwind {
+; CHECK-LABEL: scmp.32.32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: cr %r2, %r3
+; CHECK-NEXT: lhi %r2, 0
+; CHECK-NEXT: lochih %r2, 1
+; CHECK-NEXT: lochil %r2, -1
+; CHECK-NEXT: br %r14
+ %1 = call i32 @llvm.scmp(i32 %x, i32 %y)
+ ret i32 %1
+}
+
+define i32 @scmp.32.64(i64 %x, i64 %y) nounwind {
+; CHECK-LABEL: scmp.32.64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: cgr %r2, %r3
+; CHECK-NEXT: lhi %r2, 0
+; CHECK-NEXT: lochih %r2, 1
+; CHECK-NEXT: lochil %r2, -1
+; CHECK-NEXT: br %r14
+ %1 = call i32 @llvm.scmp(i64 %x, i64 %y)
+ ret i32 %1
+}
+
+define i64 @scmp.64.64(i64 %x, i64 %y) nounwind {
+; CHECK-LABEL: scmp.64.64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: cgr %r2, %r3
+; CHECK-NEXT: lghi %r2, 0
+; CHECK-NEXT: locghih %r2, 1
+; CHECK-NEXT: locghil %r2, -1
+; CHECK-NEXT: br %r14
+ %1 = call i64 @llvm.scmp(i64 %x, i64 %y)
+ ret i64 %1
+}
diff --git a/llvm/test/CodeGen/SystemZ/ucmp.ll b/llvm/test/CodeGen/SystemZ/ucmp.ll
new file mode 100644
index 0000000000000..4175cd7850a98
--- /dev/null
+++ b/llvm/test/CodeGen/SystemZ/ucmp.ll
@@ -0,0 +1,109 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s
+
+define i8 @ucmp.8.8(i8 zeroext %x, i8 zeroext %y) nounwind {
+; CHECK-LABEL: ucmp.8.8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: cr %r2, %r3
+; CHECK-NEXT: lhi %r2, 0
+; CHECK-NEXT: lochih %r2, 1
+; CHECK-NEXT: lochil %r2, -1
+; CHECK-NEXT: br %r14
+ %1 = call i8 @llvm.ucmp(i8 %x, i8 %y)
+ ret i8 %1
+}
+
+define i8 @ucmp.8.16(i16 zeroext %x, i16 zeroext %y) nounwind {
+; CHECK-LABEL: ucmp.8.16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: cr %r2, %r3
+; CHECK-NEXT: lhi %r2, 0
+; CHECK-NEXT: lochih %r2, 1
+; CHECK-NEXT: lochil %r2, -1
+; CHECK-NEXT: br %r14
+ %1 = call i8 @llvm.ucmp(i16 %x, i16 %y)
+ ret i8 %1
+}
+
+define i8 @ucmp.8.32(i32 %x, i32 %y) nounwind {
+; CHECK-LABEL: ucmp.8.32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: clr %r2, %r3
+; CHECK-NEXT: lhi %r2, 0
+; CHECK-NEXT: lochih %r2, 1
+; CHECK-NEXT: lochil %r2, -1
+; CHECK-NEXT: br %r14
+ %1 = call i8 @llvm.ucmp(i32 %x, i32 %y)
+ ret i8 %1
+}
+
+define i8 @ucmp.8.64(i64 %x, i64 %y) nounwind {
+; CHECK-LABEL: ucmp.8.64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: clgr %r2, %r3
+; CHECK-NEXT: lhi %r2, 0
+; CHECK-NEXT: lochih %r2, 1
+; CHECK-NEXT: lochil %r2, -1
+; CHECK-NEXT: br %r14
+ %1 = call i8 @llvm.ucmp(i64 %x, i64 %y)
+ ret i8 %1
+}
+
+define i8 @ucmp.8.128(i128 %x, i128 %y) nounwind {
+; CHECK-LABEL: ucmp.8.128:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vl %v0, 0(%r3), 3
+; CHECK-NEXT: vl %v1, 0(%r2), 3
+; CHECK-NEXT: veclg %v0, %v1
+; CHECK-NEXT: jlh .LBB4_2
+; CHECK-NEXT: # %bb.1:
+; CHECK-NEXT: vchlgs %v2, %v1, %v0
+; CHECK-NEXT: .LBB4_2:
+; CHECK-NEXT: lhi %r2, 0
+; CHECK-NEXT: lochil %r2, 1
+; CHECK-NEXT: veclg %v1, %v0
+; CHECK-NEXT: jlh .LBB4_4
+; CHECK-NEXT: # %bb.3:
+; CHECK-NEXT: vchlgs %v0, %v0, %v1
+; CHECK-NEXT: .LBB4_4:
+; CHECK-NEXT: lochil %r2, -1
+; CHECK-NEXT: br %r14
+ %1 = call i8 @llvm.ucmp(i128 %x, i128 %y)
+ ret i8 %1
+}
+
+define i32 @ucmp.32.32(i32 %x, i32 %y) nounwind {
+; CHECK-LABEL: ucmp.32.32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: clr %r2, %r3
+; CHECK-NEXT: lhi %r2, 0
+; CHECK-NEXT: lochih %r2, 1
+; CHECK-NEXT: lochil %r2, -1
+; CHECK-NEXT: br %r14
+ %1 = call i32 @llvm.ucmp(i32 %x, i32 %y)
+ ret i32 %1
+}
+
+define i32 @ucmp.32.64(i64 %x, i64 %y) nounwind {
+; CHECK-LABEL: ucmp.32.64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: clgr %r2, %r3
+; CHECK-NEXT: lhi %r2, 0
+; CHECK-NEXT: lochih %r2, 1
+; CHECK-NEXT: lochil %r2, -1
+; CHECK-NEXT: br %r14
+ %1 = call i32 @llvm.ucmp(i64 %x, i64 %y)
+ ret i32 %1
+}
+
+define i64 @ucmp.64.64(i64 %x, i64 %y) nounwind {
+; CHECK-LABEL: ucmp.64.64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: clgr %r2, %r3
+; CHECK-NEXT: lghi %r2, 0
+; CHECK-NEXT: locghih %r2, 1
+; CHECK-NEXT: locghil %r2, -1
+; CHECK-NEXT: br %r14
+ %1 = call i64 @llvm.ucmp(i64 %x, i64 %y)
+ ret i64 %1
+}
diff --git a/llvm/test/CodeGen/Thumb/scmp.ll b/llvm/test/CodeGen/Thumb/scmp.ll
new file mode 100644
index 0000000000000..661dbe97cdb3c
--- /dev/null
+++ b/llvm/test/CodeGen/Thumb/scmp.ll
@@ -0,0 +1,151 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc -mtriple=thumbv7-apple-darwin %s -o - | FileCheck %s
+
+define i8 @scmp_8_8(i8 signext %x, i8 signext %y) nounwind {
+; CHECK-LABEL: scmp_8_8:
+; CHECK: @ %bb.0:
+; CHECK-NEXT: cmp r0, r1
+; CHECK-NEXT: mov.w r0, #0
+; CHECK-NEXT: mov.w r2, #0
+; CHECK-NEXT: it lt
+; CHECK-NEXT: movlt r0, #1
+; CHECK-NEXT: it gt
+; CHECK-NEXT: movgt r2, #1
+; CHECK-NEXT: subs r0, r2, r0
+; CHECK-NEXT: bx lr
+ %1 = call i8 @llvm.scmp(i8 %x, i8 %y)
+ ret i8 %1
+}
+
+define i8 @scmp_8_16(i16 signext %x, i16 signext %y) nounwind {
+; CHECK-LABEL: scmp_8_16:
+; CHECK: @ %bb.0:
+; CHECK-NEXT: cmp r0, r1
+; CHECK-NEXT: mov.w r0, #0
+; CHECK-NEXT: mov.w r2, #0
+; CHECK-NEXT: it lt
+; CHECK-NEXT: movlt r0, #1
+; CHECK-NEXT: it gt
+; CHECK-NEXT: movgt r2, #1
+; CHECK-NEXT: subs r0, r2, r0
+; CHECK-NEXT: bx lr
+ %1 = call i8 @llvm.scmp(i16 %x, i16 %y)
+ ret i8 %1
+}
+
+define i8 @scmp_8_32(i32 %x, i32 %y) nounwind {
+; CHECK-LABEL: scmp_8_32:
+; CHECK: @ %bb.0:
+; CHECK-NEXT: cmp r0, r1
+; CHECK-NEXT: mov.w r0, #0
+; CHECK-NEXT: mov.w r2, #0
+; CHECK-NEXT: it lt
+; CHECK-NEXT: movlt r0, #1
+; CHECK-NEXT: it gt
+; CHECK-NEXT: movgt r2, #1
+; CHECK-NEXT: subs r0, r2, r0
+; CHECK-NEXT: bx lr
+ %1 = call i8 @llvm.scmp(i32 %x, i32 %y)
+ ret i8 %1
+}
+
+define i8 @scmp_8_64(i64 %x, i64 %y) nounwind {
+; CHECK-LABEL: scmp_8_64:
+; CHECK: @ %bb.0:
+; CHECK-NEXT: subs.w r12, r0, r2
+; CHECK-NEXT: mov.w r9, #0
+; CHECK-NEXT: sbcs.w r12, r1, r3
+; CHECK-NEXT: mov.w r12, #0
+; CHECK-NEXT: it lt
+; CHECK-NEXT: movlt.w r12, #1
+; CHECK-NEXT: subs r0, r2, r0
+; CHECK-NEXT: sbcs.w r0, r3, r1
+; CHECK-NEXT: it lt
+; CHECK-NEXT: movlt.w r9, #1
+; CHECK-NEXT: sub.w r0, r9, r12
+; CHECK-NEXT: bx lr
+ %1 = call i8 @llvm.scmp(i64 %x, i64 %y)
+ ret i8 %1
+}
+
+define i8 @scmp_8_128(i128 %x, i128 %y) nounwind {
+; CHECK-LABEL: scmp_8_128:
+; CHECK: @ %bb.0:
+; CHECK-NEXT: push {r4, r5, r6, lr}
+; CHECK-NEXT: add.w lr, sp, #16
+; CHECK-NEXT: ldr r4, [sp, #28]
+; CHECK-NEXT: movs r5, #0
+; CHECK-NEXT: ldm.w lr, {r9, r12, lr}
+; CHECK-NEXT: subs.w r6, r0, r9
+; CHECK-NEXT: sbcs.w r6, r1, r12
+; CHECK-NEXT: sbcs.w r6, r2, lr
+; CHECK-NEXT: sbcs.w r6, r3, r4
+; CHECK-NEXT: mov.w r6, #0
+; CHECK-NEXT: it lt
+; CHECK-NEXT: movlt r6, #1
+; CHECK-NEXT: subs.w r0, r9, r0
+; CHECK-NEXT: sbcs.w r0, r12, r1
+; CHECK-NEXT: sbcs.w r0, lr, r2
+; CHECK-NEXT: sbcs.w r0, r4, r3
+; CHECK-NEXT: it lt
+; CHECK-NEXT: movlt r5, #1
+; CHECK-NEXT: subs r0, r5, r6
+; CHECK-NEXT: pop {r4, r5, r6, pc}
+ %1 = call i8 @llvm.scmp(i128 %x, i128 %y)
+ ret i8 %1
+}
+
+define i32 @scmp_32_32(i32 %x, i32 %y) nounwind {
+; CHECK-LABEL: scmp_32_32:
+; CHECK: @ %bb.0:
+; CHECK-NEXT: cmp r0, r1
+; CHECK-NEXT: mov.w r0, #0
+; CHECK-NEXT: mov.w r2, #0
+; CHECK-NEXT: it lt
+; CHECK-NEXT: movlt r0, #1
+; CHECK-NEXT: it gt
+; CHECK-NEXT: movgt r2, #1
+; CHECK-NEXT: subs r0, r2, r0
+; CHECK-NEXT: bx lr
+ %1 = call i32 @llvm.scmp(i32 %x, i32 %y)
+ ret i32 %1
+}
+
+define i32 @scmp_32_64(i64 %x, i64 %y) nounwind {
+; CHECK-LABEL: scmp_32_64:
+; CHECK: @ %bb.0:
+; CHECK-NEXT: subs.w r12, r0, r2
+; CHECK-NEXT: mov.w r9, #0
+; CHECK-NEXT: sbcs.w r12, r1, r3
+; CHECK-NEXT: mov.w r12, #0
+; CHECK-NEXT: it lt
+; CHECK-NEXT: movlt.w r12, #1
+; CHECK-NEXT: subs r0, r2, r0
+; CHECK-NEXT: sbcs.w r0, r3, r1
+; CHECK-NEXT: it lt
+; CHECK-NEXT: movlt.w r9, #1
+; CHECK-NEXT: sub.w r0, r9, r12
+; CHECK-NEXT: bx lr
+ %1 = call i32 @llvm.scmp(i64 %x, i64 %y)
+ ret i32 %1
+}
+
+define i64 @scmp_64_64(i64 %x, i64 %y) nounwind {
+; CHECK-LABEL: scmp_64_64:
+; CHECK: @ %bb.0:
+; CHECK-NEXT: subs.w r12, r0, r2
+; CHECK-NEXT: mov.w r9, #0
+; CHECK-NEXT: sbcs.w r12, r1, r3
+; CHECK-NEXT: mov.w r12, #0
+; CHECK-NEXT: it lt
+; CHECK-NEXT: movlt.w r12, #1
+; CHECK-NEXT: subs r0, r2, r0
+; CHECK-NEXT: sbcs.w r0, r3, r1
+; CHECK-NEXT: it lt
+; CHECK-NEXT: movlt.w r9, #1
+; CHECK-NEXT: sub.w r0, r9, r12
+; CHECK-NEXT: asrs r1, r0, #31
+; CHECK-NEXT: bx lr
+ %1 = call i64 @llvm.scmp(i64 %x, i64 %y)
+ ret i64 %1
+}
diff --git a/llvm/test/CodeGen/Thumb/ucmp.ll b/llvm/test/CodeGen/Thumb/ucmp.ll
new file mode 100644
index 0000000000000..7e6d0a323b11c
--- /dev/null
+++ b/llvm/test/CodeGen/Thumb/ucmp.ll
@@ -0,0 +1,151 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc -mtriple=thumbv7-apple-darwin %s -o - | FileCheck %s
+
+define i8 @ucmp_8_8(i8 zeroext %x, i8 zeroext %y) nounwind {
+; CHECK-LABEL: ucmp_8_8:
+; CHECK: @ %bb.0:
+; CHECK-NEXT: cmp r0, r1
+; CHECK-NEXT: mov.w r0, #0
+; CHECK-NEXT: mov.w r2, #0
+; CHECK-NEXT: it lo
+; CHECK-NEXT: movlo r0, #1
+; CHECK-NEXT: it hi
+; CHECK-NEXT: movhi r2, #1
+; CHECK-NEXT: subs r0, r2, r0
+; CHECK-NEXT: bx lr
+ %1 = call i8 @llvm.ucmp(i8 %x, i8 %y)
+ ret i8 %1
+}
+
+define i8 @ucmp_8_16(i16 zeroext %x, i16 zeroext %y) nounwind {
+; CHECK-LABEL: ucmp_8_16:
+; CHECK: @ %bb.0:
+; CHECK-NEXT: cmp r0, r1
+; CHECK-NEXT: mov.w r0, #0
+; CHECK-NEXT: mov.w r2, #0
+; CHECK-NEXT: it lo
+; CHECK-NEXT: movlo r0, #1
+; CHECK-NEXT: it hi
+; CHECK-NEXT: movhi r2, #1
+; CHECK-NEXT: subs r0, r2, r0
+; CHECK-NEXT: bx lr
+ %1 = call i8 @llvm.ucmp(i16 %x, i16 %y)
+ ret i8 %1
+}
+
+define i8 @ucmp_8_32(i32 %x, i32 %y) nounwind {
+; CHECK-LABEL: ucmp_8_32:
+; CHECK: @ %bb.0:
+; CHECK-NEXT: cmp r0, r1
+; CHECK-NEXT: mov.w r0, #0
+; CHECK-NEXT: mov.w r2, #0
+; CHECK-NEXT: it lo
+; CHECK-NEXT: movlo r0, #1
+; CHECK-NEXT: it hi
+; CHECK-NEXT: movhi r2, #1
+; CHECK-NEXT: subs r0, r2, r0
+; CHECK-NEXT: bx lr
+ %1 = call i8 @llvm.ucmp(i32 %x, i32 %y)
+ ret i8 %1
+}
+
+define i8 @ucmp_8_64(i64 %x, i64 %y) nounwind {
+; CHECK-LABEL: ucmp_8_64:
+; CHECK: @ %bb.0:
+; CHECK-NEXT: subs.w r12, r0, r2
+; CHECK-NEXT: mov.w r9, #0
+; CHECK-NEXT: sbcs.w r12, r1, r3
+; CHECK-NEXT: mov.w r12, #0
+; CHECK-NEXT: it lo
+; CHECK-NEXT: movlo.w r12, #1
+; CHECK-NEXT: subs r0, r2, r0
+; CHECK-NEXT: sbcs.w r0, r3, r1
+; CHECK-NEXT: it lo
+; CHECK-NEXT: movlo.w r9, #1
+; CHECK-NEXT: sub.w r0, r9, r12
+; CHECK-NEXT: bx lr
+ %1 = call i8 @llvm.ucmp(i64 %x, i64 %y)
+ ret i8 %1
+}
+
+define i8 @ucmp_8_128(i128 %x, i128 %y) nounwind {
+; CHECK-LABEL: ucmp_8_128:
+; CHECK: @ %bb.0:
+; CHECK-NEXT: push {r4, r5, r6, lr}
+; CHECK-NEXT: add.w lr, sp, #16
+; CHECK-NEXT: ldr r4, [sp, #28]
+; CHECK-NEXT: movs r5, #0
+; CHECK-NEXT: ldm.w lr, {r9, r12, lr}
+; CHECK-NEXT: subs.w r6, r0, r9
+; CHECK-NEXT: sbcs.w r6, r1, r12
+; CHECK-NEXT: sbcs.w r6, r2, lr
+; CHECK-NEXT: sbcs.w r6, r3, r4
+; CHECK-NEXT: mov.w r6, #0
+; CHECK-NEXT: it lo
+; CHECK-NEXT: movlo r6, #1
+; CHECK-NEXT: subs.w r0, r9, r0
+; CHECK-NEXT: sbcs.w r0, r12, r1
+; CHECK-NEXT: sbcs.w r0, lr, r2
+; CHECK-NEXT: sbcs.w r0, r4, r3
+; CHECK-NEXT: it lo
+; CHECK-NEXT: movlo r5, #1
+; CHECK-NEXT: subs r0, r5, r6
+; CHECK-NEXT: pop {r4, r5, r6, pc}
+ %1 = call i8 @llvm.ucmp(i128 %x, i128 %y)
+ ret i8 %1
+}
+
+define i32 @ucmp_32_32(i32 %x, i32 %y) nounwind {
+; CHECK-LABEL: ucmp_32_32:
+; CHECK: @ %bb.0:
+; CHECK-NEXT: cmp r0, r1
+; CHECK-NEXT: mov.w r0, #0
+; CHECK-NEXT: mov.w r2, #0
+; CHECK-NEXT: it lo
+; CHECK-NEXT: movlo r0, #1
+; CHECK-NEXT: it hi
+; CHECK-NEXT: movhi r2, #1
+; CHECK-NEXT: subs r0, r2, r0
+; CHECK-NEXT: bx lr
+ %1 = call i32 @llvm.ucmp(i32 %x, i32 %y)
+ ret i32 %1
+}
+
+define i32 @ucmp_32_64(i64 %x, i64 %y) nounwind {
+; CHECK-LABEL: ucmp_32_64:
+; CHECK: @ %bb.0:
+; CHECK-NEXT: subs.w r12, r0, r2
+; CHECK-NEXT: mov.w r9, #0
+; CHECK-NEXT: sbcs.w r12, r1, r3
+; CHECK-NEXT: mov.w r12, #0
+; CHECK-NEXT: it lo
+; CHECK-NEXT: movlo.w r12, #1
+; CHECK-NEXT: subs r0, r2, r0
+; CHECK-NEXT: sbcs.w r0, r3, r1
+; CHECK-NEXT: it lo
+; CHECK-NEXT: movlo.w r9, #1
+; CHECK-NEXT: sub.w r0, r9, r12
+; CHECK-NEXT: bx lr
+ %1 = call i32 @llvm.ucmp(i64 %x, i64 %y)
+ ret i32 %1
+}
+
+define i64 @ucmp_64_64(i64 %x, i64 %y) nounwind {
+; CHECK-LABEL: ucmp_64_64:
+; CHECK: @ %bb.0:
+; CHECK-NEXT: subs.w r12, r0, r2
+; CHECK-NEXT: mov.w r9, #0
+; CHECK-NEXT: sbcs.w r12, r1, r3
+; CHECK-NEXT: mov.w r12, #0
+; CHECK-NEXT: it lo
+; CHECK-NEXT: movlo.w r12, #1
+; CHECK-NEXT: subs r0, r2, r0
+; CHECK-NEXT: sbcs.w r0, r3, r1
+; CHECK-NEXT: it lo
+; CHECK-NEXT: movlo.w r9, #1
+; CHECK-NEXT: sub.w r0, r9, r12
+; CHECK-NEXT: asrs r1, r0, #31
+; CHECK-NEXT: bx lr
+ %1 = call i64 @llvm.ucmp(i64 %x, i64 %y)
+ ret i64 %1
+}
diff --git a/llvm/test/CodeGen/WebAssembly/scmp.ll b/llvm/test/CodeGen/WebAssembly/scmp.ll
new file mode 100644
index 0000000000000..60ab6ef2f527a
--- /dev/null
+++ b/llvm/test/CodeGen/WebAssembly/scmp.ll
@@ -0,0 +1,147 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc < %s -wasm-keep-registers | FileCheck %s
+
+target triple = "wasm32-unknown-unknown"
+
+define i8 @scmp.8.8(i8 signext %x, i8 signext %y) nounwind {
+; CHECK-LABEL: scmp.8.8:
+; CHECK: .functype scmp.8.8 (i32, i32) -> (i32)
+; CHECK-NEXT: # %bb.0:
+; CHECK-NEXT: local.get $push4=, 0
+; CHECK-NEXT: local.get $push3=, 1
+; CHECK-NEXT: i32.gt_s $push1=, $pop4, $pop3
+; CHECK-NEXT: local.get $push6=, 0
+; CHECK-NEXT: local.get $push5=, 1
+; CHECK-NEXT: i32.lt_s $push0=, $pop6, $pop5
+; CHECK-NEXT: i32.sub $push2=, $pop1, $pop0
+; CHECK-NEXT: # fallthrough-return
+ %1 = call i8 @llvm.scmp(i8 %x, i8 %y)
+ ret i8 %1
+}
+
+define i8 @scmp.8.16(i16 signext %x, i16 signext %y) nounwind {
+; CHECK-LABEL: scmp.8.16:
+; CHECK: .functype scmp.8.16 (i32, i32) -> (i32)
+; CHECK-NEXT: # %bb.0:
+; CHECK-NEXT: local.get $push4=, 0
+; CHECK-NEXT: local.get $push3=, 1
+; CHECK-NEXT: i32.gt_s $push1=, $pop4, $pop3
+; CHECK-NEXT: local.get $push6=, 0
+; CHECK-NEXT: local.get $push5=, 1
+; CHECK-NEXT: i32.lt_s $push0=, $pop6, $pop5
+; CHECK-NEXT: i32.sub $push2=, $pop1, $pop0
+; CHECK-NEXT: # fallthrough-return
+ %1 = call i8 @llvm.scmp(i16 %x, i16 %y)
+ ret i8 %1
+}
+
+define i8 @scmp.8.32(i32 %x, i32 %y) nounwind {
+; CHECK-LABEL: scmp.8.32:
+; CHECK: .functype scmp.8.32 (i32, i32) -> (i32)
+; CHECK-NEXT: # %bb.0:
+; CHECK-NEXT: local.get $push4=, 0
+; CHECK-NEXT: local.get $push3=, 1
+; CHECK-NEXT: i32.gt_s $push1=, $pop4, $pop3
+; CHECK-NEXT: local.get $push6=, 0
+; CHECK-NEXT: local.get $push5=, 1
+; CHECK-NEXT: i32.lt_s $push0=, $pop6, $pop5
+; CHECK-NEXT: i32.sub $push2=, $pop1, $pop0
+; CHECK-NEXT: # fallthrough-return
+ %1 = call i8 @llvm.scmp(i32 %x, i32 %y)
+ ret i8 %1
+}
+
+define i8 @scmp.8.64(i64 %x, i64 %y) nounwind {
+; CHECK-LABEL: scmp.8.64:
+; CHECK: .functype scmp.8.64 (i64, i64) -> (i32)
+; CHECK-NEXT: # %bb.0:
+; CHECK-NEXT: local.get $push4=, 0
+; CHECK-NEXT: local.get $push3=, 1
+; CHECK-NEXT: i64.gt_s $push1=, $pop4, $pop3
+; CHECK-NEXT: local.get $push6=, 0
+; CHECK-NEXT: local.get $push5=, 1
+; CHECK-NEXT: i64.lt_s $push0=, $pop6, $pop5
+; CHECK-NEXT: i32.sub $push2=, $pop1, $pop0
+; CHECK-NEXT: # fallthrough-return
+ %1 = call i8 @llvm.scmp(i64 %x, i64 %y)
+ ret i8 %1
+}
+
+define i8 @scmp.8.128(i128 %x, i128 %y) nounwind {
+; CHECK-LABEL: scmp.8.128:
+; CHECK: .functype scmp.8.128 (i64, i64, i64, i64) -> (i32)
+; CHECK-NEXT: .local i32
+; CHECK-NEXT: # %bb.0:
+; CHECK-NEXT: local.get $push10=, 0
+; CHECK-NEXT: local.get $push9=, 2
+; CHECK-NEXT: i64.gt_u $push4=, $pop10, $pop9
+; CHECK-NEXT: local.get $push12=, 1
+; CHECK-NEXT: local.get $push11=, 3
+; CHECK-NEXT: i64.gt_s $push3=, $pop12, $pop11
+; CHECK-NEXT: local.get $push14=, 1
+; CHECK-NEXT: local.get $push13=, 3
+; CHECK-NEXT: i64.eq $push8=, $pop14, $pop13
+; CHECK-NEXT: local.tee $push7=, 4, $pop8
+; CHECK-NEXT: i32.select $push5=, $pop4, $pop3, $pop7
+; CHECK-NEXT: local.get $push16=, 0
+; CHECK-NEXT: local.get $push15=, 2
+; CHECK-NEXT: i64.lt_u $push1=, $pop16, $pop15
+; CHECK-NEXT: local.get $push18=, 1
+; CHECK-NEXT: local.get $push17=, 3
+; CHECK-NEXT: i64.lt_s $push0=, $pop18, $pop17
+; CHECK-NEXT: local.get $push19=, 4
+; CHECK-NEXT: i32.select $push2=, $pop1, $pop0, $pop19
+; CHECK-NEXT: i32.sub $push6=, $pop5, $pop2
+; CHECK-NEXT: # fallthrough-return
+ %1 = call i8 @llvm.scmp(i128 %x, i128 %y)
+ ret i8 %1
+}
+
+define i32 @scmp.32.32(i32 %x, i32 %y) nounwind {
+; CHECK-LABEL: scmp.32.32:
+; CHECK: .functype scmp.32.32 (i32, i32) -> (i32)
+; CHECK-NEXT: # %bb.0:
+; CHECK-NEXT: local.get $push4=, 0
+; CHECK-NEXT: local.get $push3=, 1
+; CHECK-NEXT: i32.gt_s $push1=, $pop4, $pop3
+; CHECK-NEXT: local.get $push6=, 0
+; CHECK-NEXT: local.get $push5=, 1
+; CHECK-NEXT: i32.lt_s $push0=, $pop6, $pop5
+; CHECK-NEXT: i32.sub $push2=, $pop1, $pop0
+; CHECK-NEXT: # fallthrough-return
+ %1 = call i32 @llvm.scmp(i32 %x, i32 %y)
+ ret i32 %1
+}
+
+define i32 @scmp.32.64(i64 %x, i64 %y) nounwind {
+; CHECK-LABEL: scmp.32.64:
+; CHECK: .functype scmp.32.64 (i64, i64) -> (i32)
+; CHECK-NEXT: # %bb.0:
+; CHECK-NEXT: local.get $push4=, 0
+; CHECK-NEXT: local.get $push3=, 1
+; CHECK-NEXT: i64.gt_s $push1=, $pop4, $pop3
+; CHECK-NEXT: local.get $push6=, 0
+; CHECK-NEXT: local.get $push5=, 1
+; CHECK-NEXT: i64.lt_s $push0=, $pop6, $pop5
+; CHECK-NEXT: i32.sub $push2=, $pop1, $pop0
+; CHECK-NEXT: # fallthrough-return
+ %1 = call i32 @llvm.scmp(i64 %x, i64 %y)
+ ret i32 %1
+}
+
+define i64 @scmp.64.64(i64 %x, i64 %y) nounwind {
+; CHECK-LABEL: scmp.64.64:
+; CHECK: .functype scmp.64.64 (i64, i64) -> (i64)
+; CHECK-NEXT: # %bb.0:
+; CHECK-NEXT: local.get $push5=, 0
+; CHECK-NEXT: local.get $push4=, 1
+; CHECK-NEXT: i64.gt_s $push1=, $pop5, $pop4
+; CHECK-NEXT: local.get $push7=, 0
+; CHECK-NEXT: local.get $push6=, 1
+; CHECK-NEXT: i64.lt_s $push0=, $pop7, $pop6
+; CHECK-NEXT: i32.sub $push2=, $pop1, $pop0
+; CHECK-NEXT: i64.extend_i32_s $push3=, $pop2
+; CHECK-NEXT: # fallthrough-return
+ %1 = call i64 @llvm.scmp(i64 %x, i64 %y)
+ ret i64 %1
+}
diff --git a/llvm/test/CodeGen/WebAssembly/ucmp.ll b/llvm/test/CodeGen/WebAssembly/ucmp.ll
new file mode 100644
index 0000000000000..ab7f9b2bab1da
--- /dev/null
+++ b/llvm/test/CodeGen/WebAssembly/ucmp.ll
@@ -0,0 +1,147 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc < %s -wasm-keep-registers | FileCheck %s
+
+target triple = "wasm32-unknown-unknown"
+
+define i8 @ucmp.8.8(i8 zeroext %x, i8 zeroext %y) nounwind {
+; CHECK-LABEL: ucmp.8.8:
+; CHECK: .functype ucmp.8.8 (i32, i32) -> (i32)
+; CHECK-NEXT: # %bb.0:
+; CHECK-NEXT: local.get $push4=, 0
+; CHECK-NEXT: local.get $push3=, 1
+; CHECK-NEXT: i32.gt_u $push1=, $pop4, $pop3
+; CHECK-NEXT: local.get $push6=, 0
+; CHECK-NEXT: local.get $push5=, 1
+; CHECK-NEXT: i32.lt_u $push0=, $pop6, $pop5
+; CHECK-NEXT: i32.sub $push2=, $pop1, $pop0
+; CHECK-NEXT: # fallthrough-return
+ %1 = call i8 @llvm.ucmp(i8 %x, i8 %y)
+ ret i8 %1
+}
+
+define i8 @ucmp.8.16(i16 zeroext %x, i16 zeroext %y) nounwind {
+; CHECK-LABEL: ucmp.8.16:
+; CHECK: .functype ucmp.8.16 (i32, i32) -> (i32)
+; CHECK-NEXT: # %bb.0:
+; CHECK-NEXT: local.get $push4=, 0
+; CHECK-NEXT: local.get $push3=, 1
+; CHECK-NEXT: i32.gt_u $push1=, $pop4, $pop3
+; CHECK-NEXT: local.get $push6=, 0
+; CHECK-NEXT: local.get $push5=, 1
+; CHECK-NEXT: i32.lt_u $push0=, $pop6, $pop5
+; CHECK-NEXT: i32.sub $push2=, $pop1, $pop0
+; CHECK-NEXT: # fallthrough-return
+ %1 = call i8 @llvm.ucmp(i16 %x, i16 %y)
+ ret i8 %1
+}
+
+define i8 @ucmp.8.32(i32 %x, i32 %y) nounwind {
+; CHECK-LABEL: ucmp.8.32:
+; CHECK: .functype ucmp.8.32 (i32, i32) -> (i32)
+; CHECK-NEXT: # %bb.0:
+; CHECK-NEXT: local.get $push4=, 0
+; CHECK-NEXT: local.get $push3=, 1
+; CHECK-NEXT: i32.gt_u $push1=, $pop4, $pop3
+; CHECK-NEXT: local.get $push6=, 0
+; CHECK-NEXT: local.get $push5=, 1
+; CHECK-NEXT: i32.lt_u $push0=, $pop6, $pop5
+; CHECK-NEXT: i32.sub $push2=, $pop1, $pop0
+; CHECK-NEXT: # fallthrough-return
+ %1 = call i8 @llvm.ucmp(i32 %x, i32 %y)
+ ret i8 %1
+}
+
+define i8 @ucmp.8.64(i64 %x, i64 %y) nounwind {
+; CHECK-LABEL: ucmp.8.64:
+; CHECK: .functype ucmp.8.64 (i64, i64) -> (i32)
+; CHECK-NEXT: # %bb.0:
+; CHECK-NEXT: local.get $push4=, 0
+; CHECK-NEXT: local.get $push3=, 1
+; CHECK-NEXT: i64.gt_u $push1=, $pop4, $pop3
+; CHECK-NEXT: local.get $push6=, 0
+; CHECK-NEXT: local.get $push5=, 1
+; CHECK-NEXT: i64.lt_u $push0=, $pop6, $pop5
+; CHECK-NEXT: i32.sub $push2=, $pop1, $pop0
+; CHECK-NEXT: # fallthrough-return
+ %1 = call i8 @llvm.ucmp(i64 %x, i64 %y)
+ ret i8 %1
+}
+
+define i8 @ucmp.8.128(i128 %x, i128 %y) nounwind {
+; CHECK-LABEL: ucmp.8.128:
+; CHECK: .functype ucmp.8.128 (i64, i64, i64, i64) -> (i32)
+; CHECK-NEXT: .local i32
+; CHECK-NEXT: # %bb.0:
+; CHECK-NEXT: local.get $push10=, 0
+; CHECK-NEXT: local.get $push9=, 2
+; CHECK-NEXT: i64.gt_u $push4=, $pop10, $pop9
+; CHECK-NEXT: local.get $push12=, 1
+; CHECK-NEXT: local.get $push11=, 3
+; CHECK-NEXT: i64.gt_u $push3=, $pop12, $pop11
+; CHECK-NEXT: local.get $push14=, 1
+; CHECK-NEXT: local.get $push13=, 3
+; CHECK-NEXT: i64.eq $push8=, $pop14, $pop13
+; CHECK-NEXT: local.tee $push7=, 4, $pop8
+; CHECK-NEXT: i32.select $push5=, $pop4, $pop3, $pop7
+; CHECK-NEXT: local.get $push16=, 0
+; CHECK-NEXT: local.get $push15=, 2
+; CHECK-NEXT: i64.lt_u $push1=, $pop16, $pop15
+; CHECK-NEXT: local.get $push18=, 1
+; CHECK-NEXT: local.get $push17=, 3
+; CHECK-NEXT: i64.lt_u $push0=, $pop18, $pop17
+; CHECK-NEXT: local.get $push19=, 4
+; CHECK-NEXT: i32.select $push2=, $pop1, $pop0, $pop19
+; CHECK-NEXT: i32.sub $push6=, $pop5, $pop2
+; CHECK-NEXT: # fallthrough-return
+ %1 = call i8 @llvm.ucmp(i128 %x, i128 %y)
+ ret i8 %1
+}
+
+define i32 @ucmp.32.32(i32 %x, i32 %y) nounwind {
+; CHECK-LABEL: ucmp.32.32:
+; CHECK: .functype ucmp.32.32 (i32, i32) -> (i32)
+; CHECK-NEXT: # %bb.0:
+; CHECK-NEXT: local.get $push4=, 0
+; CHECK-NEXT: local.get $push3=, 1
+; CHECK-NEXT: i32.gt_u $push1=, $pop4, $pop3
+; CHECK-NEXT: local.get $push6=, 0
+; CHECK-NEXT: local.get $push5=, 1
+; CHECK-NEXT: i32.lt_u $push0=, $pop6, $pop5
+; CHECK-NEXT: i32.sub $push2=, $pop1, $pop0
+; CHECK-NEXT: # fallthrough-return
+ %1 = call i32 @llvm.ucmp(i32 %x, i32 %y)
+ ret i32 %1
+}
+
+define i32 @ucmp.32.64(i64 %x, i64 %y) nounwind {
+; CHECK-LABEL: ucmp.32.64:
+; CHECK: .functype ucmp.32.64 (i64, i64) -> (i32)
+; CHECK-NEXT: # %bb.0:
+; CHECK-NEXT: local.get $push4=, 0
+; CHECK-NEXT: local.get $push3=, 1
+; CHECK-NEXT: i64.gt_u $push1=, $pop4, $pop3
+; CHECK-NEXT: local.get $push6=, 0
+; CHECK-NEXT: local.get $push5=, 1
+; CHECK-NEXT: i64.lt_u $push0=, $pop6, $pop5
+; CHECK-NEXT: i32.sub $push2=, $pop1, $pop0
+; CHECK-NEXT: # fallthrough-return
+ %1 = call i32 @llvm.ucmp(i64 %x, i64 %y)
+ ret i32 %1
+}
+
+define i64 @ucmp.64.64(i64 %x, i64 %y) nounwind {
+; CHECK-LABEL: ucmp.64.64:
+; CHECK: .functype ucmp.64.64 (i64, i64) -> (i64)
+; CHECK-NEXT: # %bb.0:
+; CHECK-NEXT: local.get $push5=, 0
+; CHECK-NEXT: local.get $push4=, 1
+; CHECK-NEXT: i64.gt_u $push1=, $pop5, $pop4
+; CHECK-NEXT: local.get $push7=, 0
+; CHECK-NEXT: local.get $push6=, 1
+; CHECK-NEXT: i64.lt_u $push0=, $pop7, $pop6
+; CHECK-NEXT: i32.sub $push2=, $pop1, $pop0
+; CHECK-NEXT: i64.extend_i32_s $push3=, $pop2
+; CHECK-NEXT: # fallthrough-return
+ %1 = call i64 @llvm.ucmp(i64 %x, i64 %y)
+ ret i64 %1
+}
diff --git a/llvm/test/CodeGen/X86/scmp.ll b/llvm/test/CodeGen/X86/scmp.ll
index 55dc0d6059e05..7d4bbb06534e6 100644
--- a/llvm/test/CodeGen/X86/scmp.ll
+++ b/llvm/test/CodeGen/X86/scmp.ll
@@ -5,24 +5,19 @@
define i8 @scmp.8.8(i8 %x, i8 %y) nounwind {
; X64-LABEL: scmp.8.8:
; X64: # %bb.0:
-; X64-NEXT: xorl %ecx, %ecx
; X64-NEXT: cmpb %sil, %dil
-; X64-NEXT: setg %cl
-; X64-NEXT: movl $255, %eax
-; X64-NEXT: cmovgel %ecx, %eax
-; X64-NEXT: # kill: def $al killed $al killed $eax
+; X64-NEXT: setl %cl
+; X64-NEXT: setg %al
+; X64-NEXT: subb %cl, %al
; X64-NEXT: retq
;
; X86-LABEL: scmp.8.8:
; X86: # %bb.0:
; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: cmpb {{[0-9]+}}(%esp), %al
-; X86-NEXT: setg %cl
-; X86-NEXT: movb $-1, %al
-; X86-NEXT: jl .LBB0_2
-; X86-NEXT: # %bb.1:
-; X86-NEXT: movl %ecx, %eax
-; X86-NEXT: .LBB0_2:
+; X86-NEXT: setl %cl
+; X86-NEXT: setg %al
+; X86-NEXT: subb %cl, %al
; X86-NEXT: retl
%1 = call i8 @llvm.scmp(i8 %x, i8 %y)
ret i8 %1
@@ -31,24 +26,19 @@ define i8 @scmp.8.8(i8 %x, i8 %y) nounwind {
define i8 @scmp.8.16(i16 %x, i16 %y) nounwind {
; X64-LABEL: scmp.8.16:
; X64: # %bb.0:
-; X64-NEXT: xorl %ecx, %ecx
; X64-NEXT: cmpw %si, %di
-; X64-NEXT: setg %cl
-; X64-NEXT: movl $255, %eax
-; X64-NEXT: cmovgel %ecx, %eax
-; X64-NEXT: # kill: def $al killed $al killed $eax
+; X64-NEXT: setl %cl
+; X64-NEXT: setg %al
+; X64-NEXT: subb %cl, %al
; X64-NEXT: retq
;
; X86-LABEL: scmp.8.16:
; X86: # %bb.0:
; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax
; X86-NEXT: cmpw {{[0-9]+}}(%esp), %ax
-; X86-NEXT: setg %cl
-; X86-NEXT: movb $-1, %al
-; X86-NEXT: jl .LBB1_2
-; X86-NEXT: # %bb.1:
-; X86-NEXT: movl %ecx, %eax
-; X86-NEXT: .LBB1_2:
+; X86-NEXT: setl %cl
+; X86-NEXT: setg %al
+; X86-NEXT: subb %cl, %al
; X86-NEXT: retl
%1 = call i8 @llvm.scmp(i16 %x, i16 %y)
ret i8 %1
@@ -57,24 +47,19 @@ define i8 @scmp.8.16(i16 %x, i16 %y) nounwind {
define i8 @scmp.8.32(i32 %x, i32 %y) nounwind {
; X64-LABEL: scmp.8.32:
; X64: # %bb.0:
-; X64-NEXT: xorl %ecx, %ecx
; X64-NEXT: cmpl %esi, %edi
-; X64-NEXT: setg %cl
-; X64-NEXT: movl $255, %eax
-; X64-NEXT: cmovgel %ecx, %eax
-; X64-NEXT: # kill: def $al killed $al killed $eax
+; X64-NEXT: setl %cl
+; X64-NEXT: setg %al
+; X64-NEXT: subb %cl, %al
; X64-NEXT: retq
;
; X86-LABEL: scmp.8.32:
; X86: # %bb.0:
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: cmpl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: setg %cl
-; X86-NEXT: movb $-1, %al
-; X86-NEXT: jl .LBB2_2
-; X86-NEXT: # %bb.1:
-; X86-NEXT: movl %ecx, %eax
-; X86-NEXT: .LBB2_2:
+; X86-NEXT: setl %cl
+; X86-NEXT: setg %al
+; X86-NEXT: subb %cl, %al
; X86-NEXT: retl
%1 = call i8 @llvm.scmp(i32 %x, i32 %y)
ret i8 %1
@@ -83,35 +68,32 @@ define i8 @scmp.8.32(i32 %x, i32 %y) nounwind {
define i8 @scmp.8.64(i64 %x, i64 %y) nounwind {
; X64-LABEL: scmp.8.64:
; X64: # %bb.0:
-; X64-NEXT: xorl %ecx, %ecx
; X64-NEXT: cmpq %rsi, %rdi
-; X64-NEXT: setg %cl
-; X64-NEXT: movl $255, %eax
-; X64-NEXT: cmovgel %ecx, %eax
-; X64-NEXT: # kill: def $al killed $al killed $eax
+; X64-NEXT: setl %cl
+; X64-NEXT: setg %al
+; X64-NEXT: subb %cl, %al
; X64-NEXT: retq
;
; X86-LABEL: scmp.8.64:
; X86: # %bb.0:
+; X86-NEXT: pushl %ebx
; X86-NEXT: pushl %edi
; X86-NEXT: pushl %esi
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
-; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
-; X86-NEXT: cmpl %eax, %esi
-; X86-NEXT: movl %edi, %ecx
-; X86-NEXT: sbbl %edx, %ecx
-; X86-NEXT: setl %cl
-; X86-NEXT: cmpl %esi, %eax
-; X86-NEXT: sbbl %edi, %edx
-; X86-NEXT: movb $-1, %al
-; X86-NEXT: jl .LBB3_2
-; X86-NEXT: # %bb.1:
-; X86-NEXT: movl %ecx, %eax
-; X86-NEXT: .LBB3_2:
+; X86-NEXT: cmpl %eax, %edx
+; X86-NEXT: movl %esi, %edi
+; X86-NEXT: sbbl %ecx, %edi
+; X86-NEXT: setl %bl
+; X86-NEXT: cmpl %edx, %eax
+; X86-NEXT: sbbl %esi, %ecx
+; X86-NEXT: setl %al
+; X86-NEXT: subb %bl, %al
; X86-NEXT: popl %esi
; X86-NEXT: popl %edi
+; X86-NEXT: popl %ebx
; X86-NEXT: retl
%1 = call i8 @llvm.scmp(i64 %x, i64 %y)
ret i8 %1
@@ -120,16 +102,14 @@ define i8 @scmp.8.64(i64 %x, i64 %y) nounwind {
define i8 @scmp.8.128(i128 %x, i128 %y) nounwind {
; X64-LABEL: scmp.8.128:
; X64: # %bb.0:
+; X64-NEXT: cmpq %rdx, %rdi
+; X64-NEXT: movq %rsi, %rax
+; X64-NEXT: sbbq %rcx, %rax
+; X64-NEXT: setl %r8b
; X64-NEXT: cmpq %rdi, %rdx
-; X64-NEXT: movq %rcx, %rax
-; X64-NEXT: sbbq %rsi, %rax
+; X64-NEXT: sbbq %rsi, %rcx
; X64-NEXT: setl %al
-; X64-NEXT: movzbl %al, %r8d
-; X64-NEXT: cmpq %rdx, %rdi
-; X64-NEXT: sbbq %rcx, %rsi
-; X64-NEXT: movl $255, %eax
-; X64-NEXT: cmovgel %r8d, %eax
-; X64-NEXT: # kill: def $al killed $al killed $eax
+; X64-NEXT: subb %r8b, %al
; X64-NEXT: retq
;
; X86-LABEL: scmp.8.128:
@@ -142,26 +122,23 @@ define i8 @scmp.8.128(i128 %x, i128 %y) nounwind {
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ebp
-; X86-NEXT: cmpl {{[0-9]+}}(%esp), %edi
-; X86-NEXT: movl %ebp, %ebx
-; X86-NEXT: sbbl %edx, %ebx
-; X86-NEXT: movl %ecx, %ebx
-; X86-NEXT: sbbl %eax, %ebx
; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx
+; X86-NEXT: cmpl {{[0-9]+}}(%esp), %edi
+; X86-NEXT: movl %ebx, %ebp
+; X86-NEXT: sbbl %edx, %ebp
+; X86-NEXT: movl %ecx, %ebp
+; X86-NEXT: sbbl %eax, %ebp
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ebp
; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
; X86-NEXT: movl %esi, %ecx
-; X86-NEXT: sbbl %ebx, %ecx
+; X86-NEXT: sbbl %ebp, %ecx
; X86-NEXT: setl %cl
; X86-NEXT: cmpl %edi, {{[0-9]+}}(%esp)
-; X86-NEXT: sbbl %ebp, %edx
+; X86-NEXT: sbbl %ebx, %edx
; X86-NEXT: sbbl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: sbbl %esi, %ebx
-; X86-NEXT: movb $-1, %al
-; X86-NEXT: jl .LBB4_2
-; X86-NEXT: # %bb.1:
-; X86-NEXT: movl %ecx, %eax
-; X86-NEXT: .LBB4_2:
+; X86-NEXT: sbbl %esi, %ebp
+; X86-NEXT: setl %al
+; X86-NEXT: subb %cl, %al
; X86-NEXT: popl %esi
; X86-NEXT: popl %edi
; X86-NEXT: popl %ebx
@@ -174,25 +151,21 @@ define i8 @scmp.8.128(i128 %x, i128 %y) nounwind {
define i32 @scmp.32.32(i32 %x, i32 %y) nounwind {
; X64-LABEL: scmp.32.32:
; X64: # %bb.0:
-; X64-NEXT: xorl %ecx, %ecx
; X64-NEXT: cmpl %esi, %edi
+; X64-NEXT: setl %al
; X64-NEXT: setg %cl
-; X64-NEXT: movl $-1, %eax
-; X64-NEXT: cmovgel %ecx, %eax
+; X64-NEXT: subb %al, %cl
+; X64-NEXT: movsbl %cl, %eax
; X64-NEXT: retq
;
; X86-LABEL: scmp.32.32:
; X86: # %bb.0:
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: xorl %ecx, %ecx
; X86-NEXT: cmpl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: setg %dl
-; X86-NEXT: movl $-1, %eax
-; X86-NEXT: jl .LBB5_2
-; X86-NEXT: # %bb.1:
-; X86-NEXT: movb %dl, %cl
-; X86-NEXT: movl %ecx, %eax
-; X86-NEXT: .LBB5_2:
+; X86-NEXT: setl %al
+; X86-NEXT: setg %cl
+; X86-NEXT: subb %al, %cl
+; X86-NEXT: movsbl %cl, %eax
; X86-NEXT: retl
%1 = call i32 @llvm.scmp(i32 %x, i32 %y)
ret i32 %1
@@ -201,34 +174,34 @@ define i32 @scmp.32.32(i32 %x, i32 %y) nounwind {
define i32 @scmp.32.64(i64 %x, i64 %y) nounwind {
; X64-LABEL: scmp.32.64:
; X64: # %bb.0:
-; X64-NEXT: xorl %ecx, %ecx
; X64-NEXT: cmpq %rsi, %rdi
+; X64-NEXT: setl %al
; X64-NEXT: setg %cl
-; X64-NEXT: movl $-1, %eax
-; X64-NEXT: cmovgel %ecx, %eax
+; X64-NEXT: subb %al, %cl
+; X64-NEXT: movsbl %cl, %eax
; X64-NEXT: retq
;
; X86-LABEL: scmp.32.64:
; X86: # %bb.0:
+; X86-NEXT: pushl %ebx
; X86-NEXT: pushl %edi
; X86-NEXT: pushl %esi
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
-; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
-; X86-NEXT: cmpl %eax, %esi
-; X86-NEXT: movl %edi, %ecx
-; X86-NEXT: sbbl %edx, %ecx
-; X86-NEXT: setl %cl
-; X86-NEXT: cmpl %esi, %eax
-; X86-NEXT: sbbl %edi, %edx
-; X86-NEXT: movl $-1, %eax
-; X86-NEXT: jl .LBB6_2
-; X86-NEXT: # %bb.1:
-; X86-NEXT: movzbl %cl, %eax
-; X86-NEXT: .LBB6_2:
+; X86-NEXT: cmpl %eax, %edx
+; X86-NEXT: movl %esi, %edi
+; X86-NEXT: sbbl %ecx, %edi
+; X86-NEXT: setl %bl
+; X86-NEXT: cmpl %edx, %eax
+; X86-NEXT: sbbl %esi, %ecx
+; X86-NEXT: setl %al
+; X86-NEXT: subb %bl, %al
+; X86-NEXT: movsbl %al, %eax
; X86-NEXT: popl %esi
; X86-NEXT: popl %edi
+; X86-NEXT: popl %ebx
; X86-NEXT: retl
%1 = call i32 @llvm.scmp(i64 %x, i64 %y)
ret i32 %1
@@ -237,36 +210,36 @@ define i32 @scmp.32.64(i64 %x, i64 %y) nounwind {
define i64 @scmp.64.64(i64 %x, i64 %y) nounwind {
; X64-LABEL: scmp.64.64:
; X64: # %bb.0:
-; X64-NEXT: xorl %ecx, %ecx
; X64-NEXT: cmpq %rsi, %rdi
+; X64-NEXT: setl %al
; X64-NEXT: setg %cl
-; X64-NEXT: movq $-1, %rax
-; X64-NEXT: cmovgeq %rcx, %rax
+; X64-NEXT: subb %al, %cl
+; X64-NEXT: movsbq %cl, %rax
; X64-NEXT: retq
;
; X86-LABEL: scmp.64.64:
; X86: # %bb.0:
+; X86-NEXT: pushl %ebx
; X86-NEXT: pushl %edi
; X86-NEXT: pushl %esi
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
-; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
-; X86-NEXT: cmpl %eax, %esi
-; X86-NEXT: movl %edi, %ecx
-; X86-NEXT: sbbl %edx, %ecx
-; X86-NEXT: setl %cl
-; X86-NEXT: cmpl %esi, %eax
-; X86-NEXT: sbbl %edi, %edx
-; X86-NEXT: movl $-1, %eax
-; X86-NEXT: movl $-1, %edx
-; X86-NEXT: jl .LBB7_2
-; X86-NEXT: # %bb.1:
-; X86-NEXT: movzbl %cl, %eax
-; X86-NEXT: xorl %edx, %edx
-; X86-NEXT: .LBB7_2:
+; X86-NEXT: cmpl %eax, %edx
+; X86-NEXT: movl %esi, %edi
+; X86-NEXT: sbbl %ecx, %edi
+; X86-NEXT: setl %bl
+; X86-NEXT: cmpl %edx, %eax
+; X86-NEXT: sbbl %esi, %ecx
+; X86-NEXT: setl %al
+; X86-NEXT: subb %bl, %al
+; X86-NEXT: movsbl %al, %eax
+; X86-NEXT: movl %eax, %edx
+; X86-NEXT: sarl $31, %edx
; X86-NEXT: popl %esi
; X86-NEXT: popl %edi
+; X86-NEXT: popl %ebx
; X86-NEXT: retl
%1 = call i64 @llvm.scmp(i64 %x, i64 %y)
ret i64 %1
@@ -275,24 +248,19 @@ define i64 @scmp.64.64(i64 %x, i64 %y) nounwind {
define i4 @scmp_narrow_result(i32 %x, i32 %y) nounwind {
; X64-LABEL: scmp_narrow_result:
; X64: # %bb.0:
-; X64-NEXT: xorl %ecx, %ecx
; X64-NEXT: cmpl %esi, %edi
-; X64-NEXT: setg %cl
-; X64-NEXT: movl $255, %eax
-; X64-NEXT: cmovgel %ecx, %eax
-; X64-NEXT: # kill: def $al killed $al killed $eax
+; X64-NEXT: setl %cl
+; X64-NEXT: setg %al
+; X64-NEXT: subb %cl, %al
; X64-NEXT: retq
;
; X86-LABEL: scmp_narrow_result:
; X86: # %bb.0:
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: cmpl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: setg %cl
-; X86-NEXT: movb $-1, %al
-; X86-NEXT: jl .LBB8_2
-; X86-NEXT: # %bb.1:
-; X86-NEXT: movl %ecx, %eax
-; X86-NEXT: .LBB8_2:
+; X86-NEXT: setl %cl
+; X86-NEXT: setg %al
+; X86-NEXT: subb %cl, %al
; X86-NEXT: retl
%1 = call i4 @llvm.scmp(i32 %x, i32 %y)
ret i4 %1
@@ -305,39 +273,36 @@ define i8 @scmp_narrow_op(i62 %x, i62 %y) nounwind {
; X64-NEXT: sarq $2, %rsi
; X64-NEXT: shlq $2, %rdi
; X64-NEXT: sarq $2, %rdi
-; X64-NEXT: xorl %ecx, %ecx
; X64-NEXT: cmpq %rsi, %rdi
-; X64-NEXT: setg %cl
-; X64-NEXT: movl $255, %eax
-; X64-NEXT: cmovgel %ecx, %eax
-; X64-NEXT: # kill: def $al killed $al killed $eax
+; X64-NEXT: setl %cl
+; X64-NEXT: setg %al
+; X64-NEXT: subb %cl, %al
; X64-NEXT: retq
;
; X86-LABEL: scmp_narrow_op:
; X86: # %bb.0:
+; X86-NEXT: pushl %ebx
; X86-NEXT: pushl %edi
; X86-NEXT: pushl %esi
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: shll $2, %eax
; X86-NEXT: sarl $2, %eax
+; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
-; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
-; X86-NEXT: shll $2, %edi
-; X86-NEXT: sarl $2, %edi
-; X86-NEXT: cmpl %ecx, %esi
-; X86-NEXT: movl %edi, %edx
-; X86-NEXT: sbbl %eax, %edx
-; X86-NEXT: setl %dl
-; X86-NEXT: cmpl %esi, %ecx
-; X86-NEXT: sbbl %edi, %eax
-; X86-NEXT: movb $-1, %al
-; X86-NEXT: jl .LBB9_2
-; X86-NEXT: # %bb.1:
-; X86-NEXT: movl %edx, %eax
-; X86-NEXT: .LBB9_2:
+; X86-NEXT: shll $2, %esi
+; X86-NEXT: sarl $2, %esi
+; X86-NEXT: cmpl %ecx, %edx
+; X86-NEXT: movl %esi, %edi
+; X86-NEXT: sbbl %eax, %edi
+; X86-NEXT: setl %bl
+; X86-NEXT: cmpl %edx, %ecx
+; X86-NEXT: sbbl %esi, %eax
+; X86-NEXT: setl %al
+; X86-NEXT: subb %bl, %al
; X86-NEXT: popl %esi
; X86-NEXT: popl %edi
+; X86-NEXT: popl %ebx
; X86-NEXT: retl
%1 = call i8 @llvm.scmp(i62 %x, i62 %y)
ret i8 %1
@@ -346,39 +311,33 @@ define i8 @scmp_narrow_op(i62 %x, i62 %y) nounwind {
define i141 @scmp_wide_result(i32 %x, i32 %y) nounwind {
; X64-LABEL: scmp_wide_result:
; X64: # %bb.0:
-; X64-NEXT: xorl %ecx, %ecx
; X64-NEXT: cmpl %esi, %edi
+; X64-NEXT: setl %al
; X64-NEXT: setg %cl
-; X64-NEXT: movq $-1, %rax
-; X64-NEXT: cmovgeq %rcx, %rax
-; X64-NEXT: xorl %edx, %edx
-; X64-NEXT: xorl %ecx, %ecx
+; X64-NEXT: subb %al, %cl
+; X64-NEXT: movsbq %cl, %rax
+; X64-NEXT: movq %rax, %rdx
+; X64-NEXT: sarq $63, %rdx
+; X64-NEXT: movl %edx, %ecx
+; X64-NEXT: andl $8191, %ecx # imm = 0x1FFF
; X64-NEXT: retq
;
; X86-LABEL: scmp_wide_result:
; X86: # %bb.0:
-; X86-NEXT: pushl %ebx
-; X86-NEXT: pushl %esi
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
-; X86-NEXT: xorl %ecx, %ecx
-; X86-NEXT: cmpl {{[0-9]+}}(%esp), %edx
-; X86-NEXT: setg %bl
-; X86-NEXT: movl $-1, %edx
-; X86-NEXT: movl $-1, %esi
-; X86-NEXT: jl .LBB10_2
-; X86-NEXT: # %bb.1:
-; X86-NEXT: xorl %esi, %esi
-; X86-NEXT: movb %bl, %cl
-; X86-NEXT: movl %ecx, %edx
-; X86-NEXT: .LBB10_2:
-; X86-NEXT: movl %esi, 4(%eax)
-; X86-NEXT: movl %edx, (%eax)
-; X86-NEXT: movl $0, 12(%eax)
-; X86-NEXT: movl $0, 8(%eax)
-; X86-NEXT: movw $0, 16(%eax)
-; X86-NEXT: popl %esi
-; X86-NEXT: popl %ebx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: cmpl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: setl %cl
+; X86-NEXT: setg %dl
+; X86-NEXT: subb %cl, %dl
+; X86-NEXT: movsbl %dl, %ecx
+; X86-NEXT: movl %ecx, (%eax)
+; X86-NEXT: sarl $31, %ecx
+; X86-NEXT: movl %ecx, 12(%eax)
+; X86-NEXT: movl %ecx, 8(%eax)
+; X86-NEXT: movl %ecx, 4(%eax)
+; X86-NEXT: andl $8191, %ecx # imm = 0x1FFF
+; X86-NEXT: movw %cx, 16(%eax)
; X86-NEXT: retl $4
%1 = call i141 @llvm.scmp(i32 %x, i32 %y)
ret i141 %1
@@ -387,20 +346,18 @@ define i141 @scmp_wide_result(i32 %x, i32 %y) nounwind {
define i8 @scmp_wide_op(i109 %x, i109 %y) nounwind {
; X64-LABEL: scmp_wide_op:
; X64: # %bb.0:
-; X64-NEXT: shlq $19, %rsi
-; X64-NEXT: sarq $19, %rsi
; X64-NEXT: shlq $19, %rcx
; X64-NEXT: sarq $19, %rcx
+; X64-NEXT: shlq $19, %rsi
+; X64-NEXT: sarq $19, %rsi
+; X64-NEXT: cmpq %rdx, %rdi
+; X64-NEXT: movq %rsi, %rax
+; X64-NEXT: sbbq %rcx, %rax
+; X64-NEXT: setl %r8b
; X64-NEXT: cmpq %rdi, %rdx
-; X64-NEXT: movq %rcx, %rax
-; X64-NEXT: sbbq %rsi, %rax
+; X64-NEXT: sbbq %rsi, %rcx
; X64-NEXT: setl %al
-; X64-NEXT: movzbl %al, %r8d
-; X64-NEXT: cmpq %rdx, %rdi
-; X64-NEXT: sbbq %rcx, %rsi
-; X64-NEXT: movl $255, %eax
-; X64-NEXT: cmovgel %r8d, %eax
-; X64-NEXT: # kill: def $al killed $al killed $eax
+; X64-NEXT: subb %r8b, %al
; X64-NEXT: retq
;
; X86-LABEL: scmp_wide_op:
@@ -409,35 +366,31 @@ define i8 @scmp_wide_op(i109 %x, i109 %y) nounwind {
; X86-NEXT: pushl %ebx
; X86-NEXT: pushl %edi
; X86-NEXT: pushl %esi
-; X86-NEXT: pushl %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: shll $19, %eax
; X86-NEXT: sarl $19, %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: shll $19, %ecx
; X86-NEXT: sarl $19, %ecx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
; X86-NEXT: movl {{[0-9]+}}(%esp), %ebp
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx
-; X86-NEXT: cmpl {{[0-9]+}}(%esp), %ebp
-; X86-NEXT: sbbl %edx, %ebx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx
+; X86-NEXT: cmpl %esi, {{[0-9]+}}(%esp)
+; X86-NEXT: sbbl %edx, %ebp
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ebp
; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
; X86-NEXT: movl %edi, %esi
-; X86-NEXT: sbbl %ebx, %esi
+; X86-NEXT: sbbl %ebp, %esi
; X86-NEXT: movl %ecx, %esi
; X86-NEXT: sbbl %eax, %esi
-; X86-NEXT: setl {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
-; X86-NEXT: cmpl %ebp, {{[0-9]+}}(%esp)
+; X86-NEXT: setl %bl
+; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
+; X86-NEXT: cmpl {{[0-9]+}}(%esp), %esi
; X86-NEXT: sbbl {{[0-9]+}}(%esp), %edx
-; X86-NEXT: sbbl %edi, %ebx
+; X86-NEXT: sbbl %edi, %ebp
; X86-NEXT: sbbl %ecx, %eax
-; X86-NEXT: movb $-1, %al
-; X86-NEXT: jl .LBB11_2
-; X86-NEXT: # %bb.1:
-; X86-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload
-; X86-NEXT: .LBB11_2:
-; X86-NEXT: addl $4, %esp
+; X86-NEXT: setl %al
+; X86-NEXT: subb %bl, %al
; X86-NEXT: popl %esi
; X86-NEXT: popl %edi
; X86-NEXT: popl %ebx
@@ -454,34 +407,28 @@ define i41 @scmp_uncommon_types(i7 %x, i7 %y) nounwind {
; X64-NEXT: sarb %sil
; X64-NEXT: addb %dil, %dil
; X64-NEXT: sarb %dil
-; X64-NEXT: xorl %ecx, %ecx
; X64-NEXT: cmpb %sil, %dil
+; X64-NEXT: setl %al
; X64-NEXT: setg %cl
-; X64-NEXT: movq $-1, %rax
-; X64-NEXT: cmovgeq %rcx, %rax
+; X64-NEXT: subb %al, %cl
+; X64-NEXT: movsbq %cl, %rax
; X64-NEXT: retq
;
; X86-LABEL: scmp_uncommon_types:
; X86: # %bb.0:
-; X86-NEXT: pushl %ebx
; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: addb %al, %al
; X86-NEXT: sarb %al
-; X86-NEXT: movzbl {{[0-9]+}}(%esp), %edx
-; X86-NEXT: addb %dl, %dl
-; X86-NEXT: sarb %dl
-; X86-NEXT: xorl %ecx, %ecx
-; X86-NEXT: cmpb %al, %dl
-; X86-NEXT: setg %bl
-; X86-NEXT: movl $-1, %eax
-; X86-NEXT: movl $-1, %edx
-; X86-NEXT: jl .LBB12_2
-; X86-NEXT: # %bb.1:
-; X86-NEXT: xorl %edx, %edx
-; X86-NEXT: movb %bl, %cl
-; X86-NEXT: movl %ecx, %eax
-; X86-NEXT: .LBB12_2:
-; X86-NEXT: popl %ebx
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: addb %cl, %cl
+; X86-NEXT: sarb %cl
+; X86-NEXT: cmpb %al, %cl
+; X86-NEXT: setl %al
+; X86-NEXT: setg %cl
+; X86-NEXT: subb %al, %cl
+; X86-NEXT: movsbl %cl, %eax
+; X86-NEXT: movl %eax, %edx
+; X86-NEXT: sarl $31, %edx
; X86-NEXT: retl
%1 = call i41 @llvm.scmp(i7 %x, i7 %y)
ret i41 %1
@@ -494,38 +441,41 @@ define <4 x i32> @scmp_normal_vectors(<4 x i32> %x, <4 x i32> %y) nounwind {
; X64-NEXT: movd %xmm2, %eax
; X64-NEXT: pshufd {{.*#+}} xmm2 = xmm0[3,3,3,3]
; X64-NEXT: movd %xmm2, %ecx
-; X64-NEXT: xorl %edx, %edx
; X64-NEXT: cmpl %eax, %ecx
-; X64-NEXT: setg %dl
-; X64-NEXT: movl $-1, %eax
-; X64-NEXT: cmovll %eax, %edx
-; X64-NEXT: movd %edx, %xmm2
+; X64-NEXT: setl %al
+; X64-NEXT: setg %cl
+; X64-NEXT: subb %al, %cl
+; X64-NEXT: movsbl %cl, %eax
+; X64-NEXT: movd %eax, %xmm2
; X64-NEXT: pshufd {{.*#+}} xmm3 = xmm1[2,3,2,3]
-; X64-NEXT: movd %xmm3, %ecx
+; X64-NEXT: movd %xmm3, %eax
; X64-NEXT: pshufd {{.*#+}} xmm3 = xmm0[2,3,2,3]
-; X64-NEXT: movd %xmm3, %edx
-; X64-NEXT: xorl %esi, %esi
-; X64-NEXT: cmpl %ecx, %edx
-; X64-NEXT: setg %sil
-; X64-NEXT: cmovll %eax, %esi
-; X64-NEXT: movd %esi, %xmm3
+; X64-NEXT: movd %xmm3, %ecx
+; X64-NEXT: cmpl %eax, %ecx
+; X64-NEXT: setl %al
+; X64-NEXT: setg %cl
+; X64-NEXT: subb %al, %cl
+; X64-NEXT: movsbl %cl, %eax
+; X64-NEXT: movd %eax, %xmm3
; X64-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1]
-; X64-NEXT: movd %xmm1, %ecx
-; X64-NEXT: movd %xmm0, %edx
-; X64-NEXT: xorl %esi, %esi
-; X64-NEXT: cmpl %ecx, %edx
-; X64-NEXT: setg %sil
-; X64-NEXT: cmovll %eax, %esi
-; X64-NEXT: movd %esi, %xmm2
+; X64-NEXT: movd %xmm1, %eax
+; X64-NEXT: movd %xmm0, %ecx
+; X64-NEXT: cmpl %eax, %ecx
+; X64-NEXT: setl %al
+; X64-NEXT: setg %cl
+; X64-NEXT: subb %al, %cl
+; X64-NEXT: movsbl %cl, %eax
+; X64-NEXT: movd %eax, %xmm2
; X64-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,1,1]
-; X64-NEXT: movd %xmm1, %ecx
+; X64-NEXT: movd %xmm1, %eax
; X64-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,1,1]
-; X64-NEXT: movd %xmm0, %edx
-; X64-NEXT: xorl %esi, %esi
-; X64-NEXT: cmpl %ecx, %edx
-; X64-NEXT: setg %sil
-; X64-NEXT: cmovll %eax, %esi
-; X64-NEXT: movd %esi, %xmm0
+; X64-NEXT: movd %xmm0, %ecx
+; X64-NEXT: cmpl %eax, %ecx
+; X64-NEXT: setl %al
+; X64-NEXT: setg %cl
+; X64-NEXT: subb %al, %cl
+; X64-NEXT: movsbl %cl, %eax
+; X64-NEXT: movd %eax, %xmm0
; X64-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1]
; X64-NEXT: punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm3[0]
; X64-NEXT: movdqa %xmm2, %xmm0
@@ -533,59 +483,41 @@ define <4 x i32> @scmp_normal_vectors(<4 x i32> %x, <4 x i32> %y) nounwind {
;
; X86-LABEL: scmp_normal_vectors:
; X86: # %bb.0:
-; X86-NEXT: pushl %ebp
; X86-NEXT: pushl %ebx
; X86-NEXT: pushl %edi
; X86-NEXT: pushl %esi
-; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: xorl %ebx, %ebx
-; X86-NEXT: cmpl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: setg %al
-; X86-NEXT: movl $-1, %edx
-; X86-NEXT: movl $-1, %ebp
-; X86-NEXT: jl .LBB13_2
-; X86-NEXT: # %bb.1:
-; X86-NEXT: movb %al, %bl
-; X86-NEXT: movl %ebx, %ebp
-; X86-NEXT: .LBB13_2:
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
-; X86-NEXT: xorl %ebx, %ebx
-; X86-NEXT: cmpl {{[0-9]+}}(%esp), %esi
-; X86-NEXT: setg %al
-; X86-NEXT: movl $-1, %esi
-; X86-NEXT: jl .LBB13_4
-; X86-NEXT: # %bb.3:
-; X86-NEXT: movb %al, %bl
-; X86-NEXT: movl %ebx, %esi
-; X86-NEXT: .LBB13_4:
-; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: xorl %ebx, %ebx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
+; X86-NEXT: cmpl {{[0-9]+}}(%esp), %edx
+; X86-NEXT: setl %dl
+; X86-NEXT: setg %dh
+; X86-NEXT: subb %dl, %dh
+; X86-NEXT: movsbl %dh, %edx
; X86-NEXT: cmpl {{[0-9]+}}(%esp), %edi
-; X86-NEXT: setg %cl
-; X86-NEXT: movl $-1, %edi
-; X86-NEXT: jl .LBB13_6
-; X86-NEXT: # %bb.5:
-; X86-NEXT: movb %cl, %bl
-; X86-NEXT: movl %ebx, %edi
-; X86-NEXT: .LBB13_6:
-; X86-NEXT: xorl %ebx, %ebx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: setl %bl
+; X86-NEXT: setg %bh
+; X86-NEXT: subb %bl, %bh
+; X86-NEXT: movsbl %bh, %edi
+; X86-NEXT: cmpl {{[0-9]+}}(%esp), %esi
+; X86-NEXT: setl %bl
+; X86-NEXT: setg %bh
+; X86-NEXT: subb %bl, %bh
+; X86-NEXT: movsbl %bh, %esi
; X86-NEXT: cmpl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: setg %cl
-; X86-NEXT: jl .LBB13_8
-; X86-NEXT: # %bb.7:
-; X86-NEXT: movb %cl, %bl
-; X86-NEXT: movl %ebx, %edx
-; X86-NEXT: .LBB13_8:
-; X86-NEXT: movl %edx, 12(%eax)
-; X86-NEXT: movl %edi, 8(%eax)
-; X86-NEXT: movl %esi, 4(%eax)
-; X86-NEXT: movl %ebp, (%eax)
+; X86-NEXT: setl %cl
+; X86-NEXT: setg %ch
+; X86-NEXT: subb %cl, %ch
+; X86-NEXT: movsbl %ch, %ecx
+; X86-NEXT: movl %ecx, 12(%eax)
+; X86-NEXT: movl %esi, 8(%eax)
+; X86-NEXT: movl %edi, 4(%eax)
+; X86-NEXT: movl %edx, (%eax)
; X86-NEXT: popl %esi
; X86-NEXT: popl %edi
; X86-NEXT: popl %ebx
-; X86-NEXT: popl %ebp
; X86-NEXT: retl $4
%1 = call <4 x i32> @llvm.scmp(<4 x i32> %x, <4 x i32> %y)
ret <4 x i32> %1
@@ -596,45 +528,45 @@ define <4 x i8> @scmp_narrow_vec_result(<4 x i32> %x, <4 x i32> %y) nounwind {
; X64: # %bb.0:
; X64-NEXT: movd %xmm1, %eax
; X64-NEXT: movd %xmm0, %ecx
-; X64-NEXT: xorl %edx, %edx
; X64-NEXT: cmpl %eax, %ecx
-; X64-NEXT: setg %dl
-; X64-NEXT: movl $255, %eax
-; X64-NEXT: cmovll %eax, %edx
-; X64-NEXT: movzbl %dl, %ecx
+; X64-NEXT: setl %al
+; X64-NEXT: setg %cl
+; X64-NEXT: subb %al, %cl
+; X64-NEXT: movzbl %cl, %eax
; X64-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,1,1,1]
-; X64-NEXT: movd %xmm2, %edx
+; X64-NEXT: movd %xmm2, %ecx
; X64-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,1,1,1]
-; X64-NEXT: movd %xmm2, %esi
-; X64-NEXT: xorl %edi, %edi
-; X64-NEXT: cmpl %edx, %esi
-; X64-NEXT: setg %dil
-; X64-NEXT: cmovll %eax, %edi
-; X64-NEXT: movzbl %dil, %edx
-; X64-NEXT: shll $8, %edx
-; X64-NEXT: orl %ecx, %edx
+; X64-NEXT: movd %xmm2, %edx
+; X64-NEXT: cmpl %ecx, %edx
+; X64-NEXT: setl %cl
+; X64-NEXT: setg %dl
+; X64-NEXT: subb %cl, %dl
+; X64-NEXT: movzbl %dl, %ecx
+; X64-NEXT: shll $8, %ecx
+; X64-NEXT: orl %eax, %ecx
; X64-NEXT: pshufd {{.*#+}} xmm2 = xmm1[2,3,2,3]
-; X64-NEXT: movd %xmm2, %ecx
+; X64-NEXT: movd %xmm2, %eax
; X64-NEXT: pshufd {{.*#+}} xmm2 = xmm0[2,3,2,3]
-; X64-NEXT: movd %xmm2, %esi
-; X64-NEXT: xorl %edi, %edi
-; X64-NEXT: cmpl %ecx, %esi
-; X64-NEXT: setg %dil
-; X64-NEXT: cmovll %eax, %edi
-; X64-NEXT: movzbl %dil, %ecx
-; X64-NEXT: shll $16, %ecx
-; X64-NEXT: orl %edx, %ecx
+; X64-NEXT: movd %xmm2, %edx
+; X64-NEXT: cmpl %eax, %edx
+; X64-NEXT: setl %al
+; X64-NEXT: setg %dl
+; X64-NEXT: subb %al, %dl
+; X64-NEXT: movzbl %dl, %eax
+; X64-NEXT: shll $16, %eax
+; X64-NEXT: orl %ecx, %eax
; X64-NEXT: pshufd {{.*#+}} xmm1 = xmm1[3,3,3,3]
-; X64-NEXT: movd %xmm1, %edx
+; X64-NEXT: movd %xmm1, %ecx
; X64-NEXT: pshufd {{.*#+}} xmm0 = xmm0[3,3,3,3]
-; X64-NEXT: movd %xmm0, %esi
-; X64-NEXT: xorl %edi, %edi
-; X64-NEXT: cmpl %edx, %esi
-; X64-NEXT: setg %dil
-; X64-NEXT: cmovll %eax, %edi
-; X64-NEXT: shll $24, %edi
-; X64-NEXT: orl %ecx, %edi
-; X64-NEXT: movd %edi, %xmm0
+; X64-NEXT: movd %xmm0, %edx
+; X64-NEXT: cmpl %ecx, %edx
+; X64-NEXT: setl %cl
+; X64-NEXT: setg %dl
+; X64-NEXT: subb %cl, %dl
+; X64-NEXT: movzbl %dl, %ecx
+; X64-NEXT: shll $24, %ecx
+; X64-NEXT: orl %eax, %ecx
+; X64-NEXT: movd %ecx, %xmm0
; X64-NEXT: retq
;
; X86-LABEL: scmp_narrow_vec_result:
@@ -643,41 +575,29 @@ define <4 x i8> @scmp_narrow_vec_result(<4 x i32> %x, <4 x i32> %y) nounwind {
; X86-NEXT: pushl %edi
; X86-NEXT: pushl %esi
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: cmpl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: setg %ch
-; X86-NEXT: movb $-1, %dl
-; X86-NEXT: movb $-1, %cl
-; X86-NEXT: jl .LBB14_2
-; X86-NEXT: # %bb.1:
-; X86-NEXT: movb %ch, %cl
-; X86-NEXT: .LBB14_2:
+; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
-; X86-NEXT: cmpl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: setg %al
-; X86-NEXT: movb $-1, %ch
-; X86-NEXT: jl .LBB14_4
-; X86-NEXT: # %bb.3:
-; X86-NEXT: movb %al, %ch
-; X86-NEXT: .LBB14_4:
-; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
-; X86-NEXT: cmpl {{[0-9]+}}(%esp), %esi
-; X86-NEXT: setg %bl
-; X86-NEXT: movb $-1, %dh
-; X86-NEXT: jl .LBB14_6
-; X86-NEXT: # %bb.5:
-; X86-NEXT: movb %bl, %dh
-; X86-NEXT: .LBB14_6:
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: cmpl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: setl %ch
+; X86-NEXT: setg %cl
+; X86-NEXT: subb %ch, %cl
; X86-NEXT: cmpl {{[0-9]+}}(%esp), %edi
+; X86-NEXT: setl %ch
; X86-NEXT: setg %bl
-; X86-NEXT: jl .LBB14_8
-; X86-NEXT: # %bb.7:
-; X86-NEXT: movb %bl, %dl
-; X86-NEXT: .LBB14_8:
-; X86-NEXT: movb %dl, 3(%eax)
-; X86-NEXT: movb %dh, 2(%eax)
-; X86-NEXT: movb %ch, 1(%eax)
+; X86-NEXT: subb %ch, %bl
+; X86-NEXT: cmpl {{[0-9]+}}(%esp), %esi
+; X86-NEXT: setl %ch
+; X86-NEXT: setg %bh
+; X86-NEXT: subb %ch, %bh
+; X86-NEXT: cmpl {{[0-9]+}}(%esp), %edx
+; X86-NEXT: setl %dl
+; X86-NEXT: setg %ch
+; X86-NEXT: subb %dl, %ch
+; X86-NEXT: movb %ch, 3(%eax)
+; X86-NEXT: movb %bh, 2(%eax)
+; X86-NEXT: movb %bl, 1(%eax)
; X86-NEXT: movb %cl, (%eax)
; X86-NEXT: popl %esi
; X86-NEXT: popl %edi
@@ -700,97 +620,82 @@ define <4 x i32> @scmp_narrow_vec_op(<4 x i8> %x, <4 x i8> %y) nounwind {
; X64-NEXT: psrad $24, %xmm2
; X64-NEXT: pshufd {{.*#+}} xmm0 = xmm2[3,3,3,3]
; X64-NEXT: movd %xmm0, %ecx
-; X64-NEXT: xorl %edx, %edx
; X64-NEXT: cmpl %eax, %ecx
-; X64-NEXT: setg %dl
-; X64-NEXT: movl $-1, %eax
-; X64-NEXT: cmovll %eax, %edx
-; X64-NEXT: movd %edx, %xmm0
+; X64-NEXT: setl %al
+; X64-NEXT: setg %cl
+; X64-NEXT: subb %al, %cl
+; X64-NEXT: movsbl %cl, %eax
+; X64-NEXT: movd %eax, %xmm0
; X64-NEXT: pshufd {{.*#+}} xmm3 = xmm1[2,3,2,3]
-; X64-NEXT: movd %xmm3, %ecx
+; X64-NEXT: movd %xmm3, %eax
; X64-NEXT: pshufd {{.*#+}} xmm3 = xmm2[2,3,2,3]
-; X64-NEXT: movd %xmm3, %edx
-; X64-NEXT: xorl %esi, %esi
-; X64-NEXT: cmpl %ecx, %edx
-; X64-NEXT: setg %sil
-; X64-NEXT: cmovll %eax, %esi
-; X64-NEXT: movd %esi, %xmm3
+; X64-NEXT: movd %xmm3, %ecx
+; X64-NEXT: cmpl %eax, %ecx
+; X64-NEXT: setl %al
+; X64-NEXT: setg %cl
+; X64-NEXT: subb %al, %cl
+; X64-NEXT: movsbl %cl, %eax
+; X64-NEXT: movd %eax, %xmm3
; X64-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1]
-; X64-NEXT: movd %xmm1, %ecx
-; X64-NEXT: movd %xmm2, %edx
-; X64-NEXT: xorl %esi, %esi
-; X64-NEXT: cmpl %ecx, %edx
-; X64-NEXT: setg %sil
-; X64-NEXT: cmovll %eax, %esi
-; X64-NEXT: movd %esi, %xmm0
+; X64-NEXT: movd %xmm1, %eax
+; X64-NEXT: movd %xmm2, %ecx
+; X64-NEXT: cmpl %eax, %ecx
+; X64-NEXT: setl %al
+; X64-NEXT: setg %cl
+; X64-NEXT: subb %al, %cl
+; X64-NEXT: movsbl %cl, %eax
+; X64-NEXT: movd %eax, %xmm0
; X64-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,1,1]
-; X64-NEXT: movd %xmm1, %ecx
+; X64-NEXT: movd %xmm1, %eax
; X64-NEXT: pshufd {{.*#+}} xmm1 = xmm2[1,1,1,1]
-; X64-NEXT: movd %xmm1, %edx
-; X64-NEXT: xorl %esi, %esi
-; X64-NEXT: cmpl %ecx, %edx
-; X64-NEXT: setg %sil
-; X64-NEXT: cmovll %eax, %esi
-; X64-NEXT: movd %esi, %xmm1
+; X64-NEXT: movd %xmm1, %ecx
+; X64-NEXT: cmpl %eax, %ecx
+; X64-NEXT: setl %al
+; X64-NEXT: setg %cl
+; X64-NEXT: subb %al, %cl
+; X64-NEXT: movsbl %cl, %eax
+; X64-NEXT: movd %eax, %xmm1
; X64-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
; X64-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm3[0]
; X64-NEXT: retq
;
; X86-LABEL: scmp_narrow_vec_op:
; X86: # %bb.0:
-; X86-NEXT: pushl %ebp
; X86-NEXT: pushl %ebx
; X86-NEXT: pushl %edi
; X86-NEXT: pushl %esi
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: xorl %ebx, %ebx
-; X86-NEXT: cmpb {{[0-9]+}}(%esp), %al
-; X86-NEXT: setg %al
-; X86-NEXT: movl $-1, %edx
-; X86-NEXT: movl $-1, %ebp
-; X86-NEXT: jl .LBB15_2
-; X86-NEXT: # %bb.1:
-; X86-NEXT: movb %al, %bl
-; X86-NEXT: movl %ebx, %ebp
-; X86-NEXT: .LBB15_2:
; X86-NEXT: movb {{[0-9]+}}(%esp), %ch
-; X86-NEXT: xorl %eax, %eax
-; X86-NEXT: cmpb {{[0-9]+}}(%esp), %cl
-; X86-NEXT: setg %bl
-; X86-NEXT: movl $-1, %esi
-; X86-NEXT: jl .LBB15_4
-; X86-NEXT: # %bb.3:
-; X86-NEXT: movb %bl, %al
-; X86-NEXT: movl %eax, %esi
-; X86-NEXT: .LBB15_4:
-; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: xorl %ebx, %ebx
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ebx
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %edx
+; X86-NEXT: cmpb {{[0-9]+}}(%esp), %dl
+; X86-NEXT: setl %dl
+; X86-NEXT: setg %dh
+; X86-NEXT: subb %dl, %dh
+; X86-NEXT: movsbl %dh, %edx
+; X86-NEXT: cmpb {{[0-9]+}}(%esp), %bl
+; X86-NEXT: setl %bl
+; X86-NEXT: setg %bh
+; X86-NEXT: subb %bl, %bh
+; X86-NEXT: movsbl %bh, %esi
; X86-NEXT: cmpb {{[0-9]+}}(%esp), %ch
-; X86-NEXT: setg %cl
-; X86-NEXT: movl $-1, %edi
-; X86-NEXT: jl .LBB15_6
-; X86-NEXT: # %bb.5:
-; X86-NEXT: movb %cl, %bl
-; X86-NEXT: movl %ebx, %edi
-; X86-NEXT: .LBB15_6:
-; X86-NEXT: xorl %ebx, %ebx
-; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: setl %ch
+; X86-NEXT: setg %bl
+; X86-NEXT: subb %ch, %bl
+; X86-NEXT: movsbl %bl, %edi
; X86-NEXT: cmpb {{[0-9]+}}(%esp), %cl
-; X86-NEXT: setg %cl
-; X86-NEXT: jl .LBB15_8
-; X86-NEXT: # %bb.7:
-; X86-NEXT: movb %cl, %bl
-; X86-NEXT: movl %ebx, %edx
-; X86-NEXT: .LBB15_8:
-; X86-NEXT: movl %edx, 12(%eax)
+; X86-NEXT: setl %cl
+; X86-NEXT: setg %ch
+; X86-NEXT: subb %cl, %ch
+; X86-NEXT: movsbl %ch, %ecx
+; X86-NEXT: movl %ecx, 12(%eax)
; X86-NEXT: movl %edi, 8(%eax)
; X86-NEXT: movl %esi, 4(%eax)
-; X86-NEXT: movl %ebp, (%eax)
+; X86-NEXT: movl %edx, (%eax)
; X86-NEXT: popl %esi
; X86-NEXT: popl %edi
; X86-NEXT: popl %ebx
-; X86-NEXT: popl %ebp
; X86-NEXT: retl $4
%1 = call <4 x i32> @llvm.scmp(<4 x i8> %x, <4 x i8> %y)
ret <4 x i32> %1
@@ -811,163 +716,178 @@ define <16 x i32> @scmp_wide_vec_result(<16 x i8> %x, <16 x i8> %y) nounwind {
; X64-NEXT: psrad $24, %xmm6
; X64-NEXT: pshufd {{.*#+}} xmm0 = xmm6[3,3,3,3]
; X64-NEXT: movd %xmm0, %ecx
-; X64-NEXT: xorl %edx, %edx
; X64-NEXT: cmpl %eax, %ecx
-; X64-NEXT: setg %dl
-; X64-NEXT: movl $-1, %eax
-; X64-NEXT: cmovll %eax, %edx
-; X64-NEXT: movd %edx, %xmm0
+; X64-NEXT: setl %al
+; X64-NEXT: setg %cl
+; X64-NEXT: subb %al, %cl
+; X64-NEXT: movsbl %cl, %eax
+; X64-NEXT: movd %eax, %xmm0
; X64-NEXT: pshufd {{.*#+}} xmm7 = xmm5[2,3,2,3]
-; X64-NEXT: movd %xmm7, %ecx
+; X64-NEXT: movd %xmm7, %eax
; X64-NEXT: pshufd {{.*#+}} xmm7 = xmm6[2,3,2,3]
-; X64-NEXT: movd %xmm7, %edx
-; X64-NEXT: xorl %esi, %esi
-; X64-NEXT: cmpl %ecx, %edx
-; X64-NEXT: setg %sil
-; X64-NEXT: cmovll %eax, %esi
-; X64-NEXT: movd %esi, %xmm7
+; X64-NEXT: movd %xmm7, %ecx
+; X64-NEXT: cmpl %eax, %ecx
+; X64-NEXT: setl %al
+; X64-NEXT: setg %cl
+; X64-NEXT: subb %al, %cl
+; X64-NEXT: movsbl %cl, %eax
+; X64-NEXT: movd %eax, %xmm7
; X64-NEXT: punpckldq {{.*#+}} xmm7 = xmm7[0],xmm0[0],xmm7[1],xmm0[1]
-; X64-NEXT: movd %xmm5, %ecx
-; X64-NEXT: movd %xmm6, %edx
-; X64-NEXT: xorl %esi, %esi
-; X64-NEXT: cmpl %ecx, %edx
-; X64-NEXT: setg %sil
-; X64-NEXT: cmovll %eax, %esi
-; X64-NEXT: movd %esi, %xmm0
+; X64-NEXT: movd %xmm5, %eax
+; X64-NEXT: movd %xmm6, %ecx
+; X64-NEXT: cmpl %eax, %ecx
+; X64-NEXT: setl %al
+; X64-NEXT: setg %cl
+; X64-NEXT: subb %al, %cl
+; X64-NEXT: movsbl %cl, %eax
+; X64-NEXT: movd %eax, %xmm0
; X64-NEXT: pshufd {{.*#+}} xmm5 = xmm5[1,1,1,1]
-; X64-NEXT: movd %xmm5, %ecx
+; X64-NEXT: movd %xmm5, %eax
; X64-NEXT: pshufd {{.*#+}} xmm5 = xmm6[1,1,1,1]
-; X64-NEXT: movd %xmm5, %edx
-; X64-NEXT: xorl %esi, %esi
-; X64-NEXT: cmpl %ecx, %edx
-; X64-NEXT: setg %sil
-; X64-NEXT: cmovll %eax, %esi
-; X64-NEXT: movd %esi, %xmm5
+; X64-NEXT: movd %xmm5, %ecx
+; X64-NEXT: cmpl %eax, %ecx
+; X64-NEXT: setl %al
+; X64-NEXT: setg %cl
+; X64-NEXT: subb %al, %cl
+; X64-NEXT: movsbl %cl, %eax
+; X64-NEXT: movd %eax, %xmm5
; X64-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm5[0],xmm0[1],xmm5[1]
; X64-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm7[0]
; X64-NEXT: punpckhwd {{.*#+}} xmm5 = xmm5[4],xmm1[4],xmm5[5],xmm1[5],xmm5[6],xmm1[6],xmm5[7],xmm1[7]
; X64-NEXT: psrad $24, %xmm5
; X64-NEXT: pshufd {{.*#+}} xmm1 = xmm5[3,3,3,3]
-; X64-NEXT: movd %xmm1, %ecx
+; X64-NEXT: movd %xmm1, %eax
; X64-NEXT: punpckhwd {{.*#+}} xmm4 = xmm4[4,4,5,5,6,6,7,7]
; X64-NEXT: psrad $24, %xmm4
; X64-NEXT: pshufd {{.*#+}} xmm1 = xmm4[3,3,3,3]
-; X64-NEXT: movd %xmm1, %edx
-; X64-NEXT: xorl %esi, %esi
-; X64-NEXT: cmpl %ecx, %edx
-; X64-NEXT: setg %sil
-; X64-NEXT: cmovll %eax, %esi
-; X64-NEXT: movd %esi, %xmm1
+; X64-NEXT: movd %xmm1, %ecx
+; X64-NEXT: cmpl %eax, %ecx
+; X64-NEXT: setl %al
+; X64-NEXT: setg %cl
+; X64-NEXT: subb %al, %cl
+; X64-NEXT: movsbl %cl, %eax
+; X64-NEXT: movd %eax, %xmm1
; X64-NEXT: pshufd {{.*#+}} xmm6 = xmm5[2,3,2,3]
-; X64-NEXT: movd %xmm6, %ecx
+; X64-NEXT: movd %xmm6, %eax
; X64-NEXT: pshufd {{.*#+}} xmm6 = xmm4[2,3,2,3]
-; X64-NEXT: movd %xmm6, %edx
-; X64-NEXT: xorl %esi, %esi
-; X64-NEXT: cmpl %ecx, %edx
-; X64-NEXT: setg %sil
-; X64-NEXT: cmovll %eax, %esi
-; X64-NEXT: movd %esi, %xmm6
+; X64-NEXT: movd %xmm6, %ecx
+; X64-NEXT: cmpl %eax, %ecx
+; X64-NEXT: setl %al
+; X64-NEXT: setg %cl
+; X64-NEXT: subb %al, %cl
+; X64-NEXT: movsbl %cl, %eax
+; X64-NEXT: movd %eax, %xmm6
; X64-NEXT: punpckldq {{.*#+}} xmm6 = xmm6[0],xmm1[0],xmm6[1],xmm1[1]
-; X64-NEXT: movd %xmm5, %ecx
-; X64-NEXT: movd %xmm4, %edx
-; X64-NEXT: xorl %esi, %esi
-; X64-NEXT: cmpl %ecx, %edx
-; X64-NEXT: setg %sil
-; X64-NEXT: cmovll %eax, %esi
-; X64-NEXT: movd %esi, %xmm1
+; X64-NEXT: movd %xmm5, %eax
+; X64-NEXT: movd %xmm4, %ecx
+; X64-NEXT: cmpl %eax, %ecx
+; X64-NEXT: setl %al
+; X64-NEXT: setg %cl
+; X64-NEXT: subb %al, %cl
+; X64-NEXT: movsbl %cl, %eax
+; X64-NEXT: movd %eax, %xmm1
; X64-NEXT: pshufd {{.*#+}} xmm5 = xmm5[1,1,1,1]
-; X64-NEXT: movd %xmm5, %ecx
+; X64-NEXT: movd %xmm5, %eax
; X64-NEXT: pshufd {{.*#+}} xmm4 = xmm4[1,1,1,1]
-; X64-NEXT: movd %xmm4, %edx
-; X64-NEXT: xorl %esi, %esi
-; X64-NEXT: cmpl %ecx, %edx
-; X64-NEXT: setg %sil
-; X64-NEXT: cmovll %eax, %esi
-; X64-NEXT: movd %esi, %xmm4
+; X64-NEXT: movd %xmm4, %ecx
+; X64-NEXT: cmpl %eax, %ecx
+; X64-NEXT: setl %al
+; X64-NEXT: setg %cl
+; X64-NEXT: subb %al, %cl
+; X64-NEXT: movsbl %cl, %eax
+; X64-NEXT: movd %eax, %xmm4
; X64-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm4[0],xmm1[1],xmm4[1]
; X64-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm6[0]
; X64-NEXT: punpckhbw {{.*#+}} xmm3 = xmm3[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15]
; X64-NEXT: punpcklwd {{.*#+}} xmm5 = xmm5[0],xmm3[0],xmm5[1],xmm3[1],xmm5[2],xmm3[2],xmm5[3],xmm3[3]
; X64-NEXT: psrad $24, %xmm5
; X64-NEXT: pshufd {{.*#+}} xmm4 = xmm5[3,3,3,3]
-; X64-NEXT: movd %xmm4, %ecx
+; X64-NEXT: movd %xmm4, %eax
; X64-NEXT: punpckhbw {{.*#+}} xmm4 = xmm4[8],xmm2[8],xmm4[9],xmm2[9],xmm4[10],xmm2[10],xmm4[11],xmm2[11],xmm4[12],xmm2[12],xmm4[13],xmm2[13],xmm4[14],xmm2[14],xmm4[15],xmm2[15]
; X64-NEXT: punpcklwd {{.*#+}} xmm6 = xmm6[0],xmm4[0],xmm6[1],xmm4[1],xmm6[2],xmm4[2],xmm6[3],xmm4[3]
; X64-NEXT: psrad $24, %xmm6
; X64-NEXT: pshufd {{.*#+}} xmm2 = xmm6[3,3,3,3]
-; X64-NEXT: movd %xmm2, %edx
-; X64-NEXT: xorl %esi, %esi
-; X64-NEXT: cmpl %ecx, %edx
-; X64-NEXT: setg %sil
-; X64-NEXT: cmovll %eax, %esi
-; X64-NEXT: movd %esi, %xmm2
+; X64-NEXT: movd %xmm2, %ecx
+; X64-NEXT: cmpl %eax, %ecx
+; X64-NEXT: setl %al
+; X64-NEXT: setg %cl
+; X64-NEXT: subb %al, %cl
+; X64-NEXT: movsbl %cl, %eax
+; X64-NEXT: movd %eax, %xmm2
; X64-NEXT: pshufd {{.*#+}} xmm7 = xmm5[2,3,2,3]
-; X64-NEXT: movd %xmm7, %ecx
+; X64-NEXT: movd %xmm7, %eax
; X64-NEXT: pshufd {{.*#+}} xmm7 = xmm6[2,3,2,3]
-; X64-NEXT: movd %xmm7, %edx
-; X64-NEXT: xorl %esi, %esi
-; X64-NEXT: cmpl %ecx, %edx
-; X64-NEXT: setg %sil
-; X64-NEXT: cmovll %eax, %esi
-; X64-NEXT: movd %esi, %xmm7
+; X64-NEXT: movd %xmm7, %ecx
+; X64-NEXT: cmpl %eax, %ecx
+; X64-NEXT: setl %al
+; X64-NEXT: setg %cl
+; X64-NEXT: subb %al, %cl
+; X64-NEXT: movsbl %cl, %eax
+; X64-NEXT: movd %eax, %xmm7
; X64-NEXT: punpckldq {{.*#+}} xmm7 = xmm7[0],xmm2[0],xmm7[1],xmm2[1]
-; X64-NEXT: movd %xmm5, %ecx
-; X64-NEXT: movd %xmm6, %edx
-; X64-NEXT: xorl %esi, %esi
-; X64-NEXT: cmpl %ecx, %edx
-; X64-NEXT: setg %sil
-; X64-NEXT: cmovll %eax, %esi
-; X64-NEXT: movd %esi, %xmm2
+; X64-NEXT: movd %xmm5, %eax
+; X64-NEXT: movd %xmm6, %ecx
+; X64-NEXT: cmpl %eax, %ecx
+; X64-NEXT: setl %al
+; X64-NEXT: setg %cl
+; X64-NEXT: subb %al, %cl
+; X64-NEXT: movsbl %cl, %eax
+; X64-NEXT: movd %eax, %xmm2
; X64-NEXT: pshufd {{.*#+}} xmm5 = xmm5[1,1,1,1]
-; X64-NEXT: movd %xmm5, %ecx
+; X64-NEXT: movd %xmm5, %eax
; X64-NEXT: pshufd {{.*#+}} xmm5 = xmm6[1,1,1,1]
-; X64-NEXT: movd %xmm5, %edx
-; X64-NEXT: xorl %esi, %esi
-; X64-NEXT: cmpl %ecx, %edx
-; X64-NEXT: setg %sil
-; X64-NEXT: cmovll %eax, %esi
-; X64-NEXT: movd %esi, %xmm5
+; X64-NEXT: movd %xmm5, %ecx
+; X64-NEXT: cmpl %eax, %ecx
+; X64-NEXT: setl %al
+; X64-NEXT: setg %cl
+; X64-NEXT: subb %al, %cl
+; X64-NEXT: movsbl %cl, %eax
+; X64-NEXT: movd %eax, %xmm5
; X64-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm5[0],xmm2[1],xmm5[1]
; X64-NEXT: punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm7[0]
; X64-NEXT: punpckhwd {{.*#+}} xmm5 = xmm5[4],xmm3[4],xmm5[5],xmm3[5],xmm5[6],xmm3[6],xmm5[7],xmm3[7]
; X64-NEXT: psrad $24, %xmm5
; X64-NEXT: pshufd {{.*#+}} xmm3 = xmm5[3,3,3,3]
-; X64-NEXT: movd %xmm3, %ecx
+; X64-NEXT: movd %xmm3, %eax
; X64-NEXT: punpckhwd {{.*#+}} xmm4 = xmm4[4,4,5,5,6,6,7,7]
; X64-NEXT: psrad $24, %xmm4
; X64-NEXT: pshufd {{.*#+}} xmm3 = xmm4[3,3,3,3]
-; X64-NEXT: movd %xmm3, %edx
-; X64-NEXT: xorl %esi, %esi
-; X64-NEXT: cmpl %ecx, %edx
-; X64-NEXT: setg %sil
-; X64-NEXT: cmovll %eax, %esi
-; X64-NEXT: movd %esi, %xmm3
+; X64-NEXT: movd %xmm3, %ecx
+; X64-NEXT: cmpl %eax, %ecx
+; X64-NEXT: setl %al
+; X64-NEXT: setg %cl
+; X64-NEXT: subb %al, %cl
+; X64-NEXT: movsbl %cl, %eax
+; X64-NEXT: movd %eax, %xmm3
; X64-NEXT: pshufd {{.*#+}} xmm6 = xmm5[2,3,2,3]
-; X64-NEXT: movd %xmm6, %ecx
+; X64-NEXT: movd %xmm6, %eax
; X64-NEXT: pshufd {{.*#+}} xmm6 = xmm4[2,3,2,3]
-; X64-NEXT: movd %xmm6, %edx
-; X64-NEXT: xorl %esi, %esi
-; X64-NEXT: cmpl %ecx, %edx
-; X64-NEXT: setg %sil
-; X64-NEXT: cmovll %eax, %esi
-; X64-NEXT: movd %esi, %xmm6
+; X64-NEXT: movd %xmm6, %ecx
+; X64-NEXT: cmpl %eax, %ecx
+; X64-NEXT: setl %al
+; X64-NEXT: setg %cl
+; X64-NEXT: subb %al, %cl
+; X64-NEXT: movsbl %cl, %eax
+; X64-NEXT: movd %eax, %xmm6
; X64-NEXT: punpckldq {{.*#+}} xmm6 = xmm6[0],xmm3[0],xmm6[1],xmm3[1]
-; X64-NEXT: movd %xmm5, %ecx
-; X64-NEXT: movd %xmm4, %edx
-; X64-NEXT: xorl %esi, %esi
-; X64-NEXT: cmpl %ecx, %edx
-; X64-NEXT: setg %sil
-; X64-NEXT: cmovll %eax, %esi
-; X64-NEXT: movd %esi, %xmm3
+; X64-NEXT: movd %xmm5, %eax
+; X64-NEXT: movd %xmm4, %ecx
+; X64-NEXT: cmpl %eax, %ecx
+; X64-NEXT: setl %al
+; X64-NEXT: setg %cl
+; X64-NEXT: subb %al, %cl
+; X64-NEXT: movsbl %cl, %eax
+; X64-NEXT: movd %eax, %xmm3
; X64-NEXT: pshufd {{.*#+}} xmm5 = xmm5[1,1,1,1]
-; X64-NEXT: movd %xmm5, %ecx
+; X64-NEXT: movd %xmm5, %eax
; X64-NEXT: pshufd {{.*#+}} xmm4 = xmm4[1,1,1,1]
-; X64-NEXT: movd %xmm4, %edx
-; X64-NEXT: xorl %esi, %esi
-; X64-NEXT: cmpl %ecx, %edx
-; X64-NEXT: setg %sil
-; X64-NEXT: cmovll %eax, %esi
-; X64-NEXT: movd %esi, %xmm4
+; X64-NEXT: movd %xmm4, %ecx
+; X64-NEXT: cmpl %eax, %ecx
+; X64-NEXT: setl %al
+; X64-NEXT: setg %cl
+; X64-NEXT: subb %al, %cl
+; X64-NEXT: movsbl %cl, %eax
+; X64-NEXT: movd %eax, %xmm4
; X64-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm4[0],xmm3[1],xmm4[1]
; X64-NEXT: punpcklqdq {{.*#+}} xmm3 = xmm3[0],xmm6[0]
; X64-NEXT: retq
@@ -978,202 +898,132 @@ define <16 x i32> @scmp_wide_vec_result(<16 x i8> %x, <16 x i8> %y) nounwind {
; X86-NEXT: pushl %ebx
; X86-NEXT: pushl %edi
; X86-NEXT: pushl %esi
-; X86-NEXT: subl $48, %esp
-; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: subl $16, %esp
; X86-NEXT: movzbl {{[0-9]+}}(%esp), %edx
-; X86-NEXT: xorl %ecx, %ecx
+; X86-NEXT: movb {{[0-9]+}}(%esp), %ah
+; X86-NEXT: movb {{[0-9]+}}(%esp), %ch
+; X86-NEXT: movb {{[0-9]+}}(%esp), %dh
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ebx
+; X86-NEXT: movb {{[0-9]+}}(%esp), %bh
+; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: cmpb {{[0-9]+}}(%esp), %al
+; X86-NEXT: setl %al
+; X86-NEXT: setg %cl
+; X86-NEXT: subb %al, %cl
+; X86-NEXT: movb %cl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill
+; X86-NEXT: cmpb {{[0-9]+}}(%esp), %bh
+; X86-NEXT: setl %al
+; X86-NEXT: setg %cl
+; X86-NEXT: subb %al, %cl
+; X86-NEXT: movb %cl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill
+; X86-NEXT: cmpb {{[0-9]+}}(%esp), %bl
+; X86-NEXT: setl %al
+; X86-NEXT: setg %cl
+; X86-NEXT: subb %al, %cl
+; X86-NEXT: movb %cl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill
+; X86-NEXT: cmpb {{[0-9]+}}(%esp), %dh
+; X86-NEXT: setl %al
+; X86-NEXT: setg %cl
+; X86-NEXT: subb %al, %cl
+; X86-NEXT: movb %cl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill
+; X86-NEXT: cmpb {{[0-9]+}}(%esp), %ch
+; X86-NEXT: setl %al
+; X86-NEXT: setg %cl
+; X86-NEXT: subb %al, %cl
+; X86-NEXT: movb %cl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill
+; X86-NEXT: cmpb {{[0-9]+}}(%esp), %ah
+; X86-NEXT: setl %al
+; X86-NEXT: setg %cl
+; X86-NEXT: subb %al, %cl
+; X86-NEXT: movb %cl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill
; X86-NEXT: cmpb {{[0-9]+}}(%esp), %dl
-; X86-NEXT: setg %dl
-; X86-NEXT: movl $-1, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
-; X86-NEXT: movl $-1, %esi
-; X86-NEXT: jl .LBB16_2
-; X86-NEXT: # %bb.1:
-; X86-NEXT: movb %dl, %cl
-; X86-NEXT: movl %ecx, %esi
-; X86-NEXT: .LBB16_2:
-; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: xorl %edx, %edx
+; X86-NEXT: setl %al
+; X86-NEXT: setg %cl
+; X86-NEXT: subb %al, %cl
+; X86-NEXT: movb %cl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: cmpb {{[0-9]+}}(%esp), %al
-; X86-NEXT: setg %al
-; X86-NEXT: movl $-1, %edi
-; X86-NEXT: jl .LBB16_4
-; X86-NEXT: # %bb.3:
-; X86-NEXT: movb %al, %dl
-; X86-NEXT: movl %edx, %edi
-; X86-NEXT: .LBB16_4:
+; X86-NEXT: setl %al
+; X86-NEXT: setg %bh
+; X86-NEXT: subb %al, %bh
; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: xorl %edx, %edx
-; X86-NEXT: cmpb {{[0-9]+}}(%esp), %cl
-; X86-NEXT: setg %cl
-; X86-NEXT: movl $-1, %ebx
-; X86-NEXT: jl .LBB16_6
-; X86-NEXT: # %bb.5:
-; X86-NEXT: movb %cl, %dl
-; X86-NEXT: movl %edx, %ebx
-; X86-NEXT: .LBB16_6:
-; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: xorl %edx, %edx
; X86-NEXT: cmpb {{[0-9]+}}(%esp), %al
-; X86-NEXT: setg %al
-; X86-NEXT: movl $-1, %ebp
-; X86-NEXT: jl .LBB16_8
-; X86-NEXT: # %bb.7:
-; X86-NEXT: movb %al, %dl
-; X86-NEXT: movl %edx, %ebp
-; X86-NEXT: .LBB16_8:
+; X86-NEXT: setl %al
+; X86-NEXT: setg %bl
+; X86-NEXT: subb %al, %bl
; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: xorl %edx, %edx
-; X86-NEXT: cmpb {{[0-9]+}}(%esp), %cl
-; X86-NEXT: setg %cl
-; X86-NEXT: movl $-1, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
-; X86-NEXT: jl .LBB16_10
-; X86-NEXT: # %bb.9:
-; X86-NEXT: movb %cl, %dl
-; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: .LBB16_10:
-; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: xorl %edx, %edx
; X86-NEXT: cmpb {{[0-9]+}}(%esp), %al
-; X86-NEXT: setg %al
-; X86-NEXT: movl $-1, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
-; X86-NEXT: jl .LBB16_12
-; X86-NEXT: # %bb.11:
-; X86-NEXT: movb %al, %dl
-; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: .LBB16_12:
+; X86-NEXT: setl %al
+; X86-NEXT: setg %dh
+; X86-NEXT: subb %al, %dh
; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: xorl %edx, %edx
-; X86-NEXT: cmpb {{[0-9]+}}(%esp), %cl
-; X86-NEXT: setg %cl
-; X86-NEXT: movl $-1, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
-; X86-NEXT: jl .LBB16_14
-; X86-NEXT: # %bb.13:
-; X86-NEXT: movb %cl, %dl
-; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: .LBB16_14:
-; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: xorl %edx, %edx
; X86-NEXT: cmpb {{[0-9]+}}(%esp), %al
-; X86-NEXT: setg %al
-; X86-NEXT: movl $-1, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
-; X86-NEXT: jl .LBB16_16
-; X86-NEXT: # %bb.15:
-; X86-NEXT: movb %al, %dl
-; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: .LBB16_16:
+; X86-NEXT: setl %al
+; X86-NEXT: setg %dl
+; X86-NEXT: subb %al, %dl
+; X86-NEXT: movsbl %dl, %eax
+; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: xorl %edx, %edx
-; X86-NEXT: cmpb {{[0-9]+}}(%esp), %cl
-; X86-NEXT: setg %cl
-; X86-NEXT: movl $-1, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
-; X86-NEXT: jl .LBB16_18
-; X86-NEXT: # %bb.17:
-; X86-NEXT: movb %cl, %dl
-; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: .LBB16_18:
-; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: xorl %edx, %edx
; X86-NEXT: cmpb {{[0-9]+}}(%esp), %al
-; X86-NEXT: setg %al
-; X86-NEXT: movl $-1, (%esp) # 4-byte Folded Spill
-; X86-NEXT: jl .LBB16_20
-; X86-NEXT: # %bb.19:
-; X86-NEXT: movb %al, %dl
-; X86-NEXT: movl %edx, (%esp) # 4-byte Spill
-; X86-NEXT: .LBB16_20:
-; X86-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: setl %al
+; X86-NEXT: setg %dl
+; X86-NEXT: subb %al, %dl
+; X86-NEXT: movsbl %dl, %eax
+; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: xorl %edx, %edx
-; X86-NEXT: cmpb {{[0-9]+}}(%esp), %cl
-; X86-NEXT: setg %cl
-; X86-NEXT: movl $-1, %ebx
-; X86-NEXT: jl .LBB16_22
-; X86-NEXT: # %bb.21:
-; X86-NEXT: movb %cl, %dl
-; X86-NEXT: movl %edx, %ebx
-; X86-NEXT: .LBB16_22:
-; X86-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: movzbl {{[0-9]+}}(%esp), %edx
-; X86-NEXT: xorl %ebx, %ebx
; X86-NEXT: cmpb {{[0-9]+}}(%esp), %al
-; X86-NEXT: setg %al
-; X86-NEXT: movl $-1, %ebp
-; X86-NEXT: jl .LBB16_24
-; X86-NEXT: # %bb.23:
-; X86-NEXT: movb %al, %bl
-; X86-NEXT: movl %ebx, %ebp
-; X86-NEXT: .LBB16_24:
-; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: setl %al
+; X86-NEXT: setg %dl
+; X86-NEXT: subb %al, %dl
+; X86-NEXT: movsbl %dl, %ebp
; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: xorl %ebx, %ebx
-; X86-NEXT: cmpb {{[0-9]+}}(%esp), %dl
+; X86-NEXT: cmpb {{[0-9]+}}(%esp), %al
+; X86-NEXT: setl %al
+; X86-NEXT: setg %dl
+; X86-NEXT: subb %al, %dl
+; X86-NEXT: movsbl %dl, %edi
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: cmpb {{[0-9]+}}(%esp), %al
+; X86-NEXT: setl %al
; X86-NEXT: setg %ah
-; X86-NEXT: movl $-1, %edx
-; X86-NEXT: jl .LBB16_26
-; X86-NEXT: # %bb.25:
-; X86-NEXT: movb %ah, %bl
-; X86-NEXT: movl %ebx, %edx
-; X86-NEXT: .LBB16_26:
-; X86-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: xorl %ebx, %ebx
+; X86-NEXT: subb %al, %ah
+; X86-NEXT: movsbl %ah, %esi
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: cmpb {{[0-9]+}}(%esp), %al
-; X86-NEXT: setg %al
-; X86-NEXT: movl $-1, %esi
-; X86-NEXT: jl .LBB16_28
-; X86-NEXT: # %bb.27:
-; X86-NEXT: movb %al, %bl
-; X86-NEXT: movl %ebx, %esi
-; X86-NEXT: .LBB16_28:
+; X86-NEXT: setl %al
+; X86-NEXT: setg %dl
+; X86-NEXT: subb %al, %dl
+; X86-NEXT: movsbl %dl, %ecx
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: movb {{[0-9]+}}(%esp), %ch
-; X86-NEXT: xorl %ebx, %ebx
-; X86-NEXT: cmpb {{[0-9]+}}(%esp), %cl
-; X86-NEXT: setg %cl
-; X86-NEXT: movl $-1, %edi
-; X86-NEXT: jl .LBB16_30
-; X86-NEXT: # %bb.29:
-; X86-NEXT: movb %cl, %bl
-; X86-NEXT: movl %ebx, %edi
-; X86-NEXT: .LBB16_30:
-; X86-NEXT: xorl %ebx, %ebx
-; X86-NEXT: cmpb {{[0-9]+}}(%esp), %ch
-; X86-NEXT: setg %cl
-; X86-NEXT: jl .LBB16_32
-; X86-NEXT: # %bb.31:
-; X86-NEXT: movb %cl, %bl
-; X86-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: .LBB16_32:
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
; X86-NEXT: movl %ecx, 60(%eax)
-; X86-NEXT: movl %edi, 56(%eax)
-; X86-NEXT: movl %esi, 52(%eax)
-; X86-NEXT: movl %edx, 48(%eax)
-; X86-NEXT: movl %ebp, 44(%eax)
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
-; X86-NEXT: movl %ecx, 40(%eax)
-; X86-NEXT: movl (%esp), %ecx # 4-byte Reload
-; X86-NEXT: movl %ecx, 36(%eax)
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
-; X86-NEXT: movl %ecx, 32(%eax)
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
-; X86-NEXT: movl %ecx, 28(%eax)
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
-; X86-NEXT: movl %ecx, 24(%eax)
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
-; X86-NEXT: movl %ecx, 20(%eax)
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
-; X86-NEXT: movl %ecx, 16(%eax)
+; X86-NEXT: movl %esi, 56(%eax)
+; X86-NEXT: movl %edi, 52(%eax)
+; X86-NEXT: movl %ebp, 48(%eax)
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
-; X86-NEXT: movl %ecx, 12(%eax)
+; X86-NEXT: movl %ecx, 44(%eax)
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT: movl %ecx, 40(%eax)
+; X86-NEXT: movsbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload
+; X86-NEXT: movsbl %dh, %edx
+; X86-NEXT: movl %edx, 36(%eax)
+; X86-NEXT: movsbl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 1-byte Folded Reload
+; X86-NEXT: movsbl %bl, %esi
+; X86-NEXT: movl %esi, 32(%eax)
+; X86-NEXT: movsbl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 1-byte Folded Reload
+; X86-NEXT: movsbl %bh, %edi
+; X86-NEXT: movl %edi, 28(%eax)
+; X86-NEXT: movsbl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 1-byte Folded Reload
+; X86-NEXT: movsbl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 1-byte Folded Reload
+; X86-NEXT: movl %ebx, 24(%eax)
+; X86-NEXT: movl %edi, 20(%eax)
+; X86-NEXT: movl %esi, 16(%eax)
+; X86-NEXT: movl %edx, 12(%eax)
; X86-NEXT: movl %ecx, 8(%eax)
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT: movsbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload
; X86-NEXT: movl %ecx, 4(%eax)
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT: movsbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload
; X86-NEXT: movl %ecx, (%eax)
-; X86-NEXT: addl $48, %esp
+; X86-NEXT: addl $16, %esp
; X86-NEXT: popl %esi
; X86-NEXT: popl %edi
; X86-NEXT: popl %ebx
@@ -1187,121 +1037,136 @@ define <16 x i8> @scmp_wide_vec_op(<16 x i64> %x, <16 x i64> %y) nounwind {
; X64-LABEL: scmp_wide_vec_op:
; X64: # %bb.0:
; X64-NEXT: movq %xmm7, %rax
-; X64-NEXT: xorl %ecx, %ecx
; X64-NEXT: cmpq {{[0-9]+}}(%rsp), %rax
+; X64-NEXT: setl %al
; X64-NEXT: setg %cl
-; X64-NEXT: movl $255, %eax
-; X64-NEXT: cmovll %eax, %ecx
-; X64-NEXT: movd %ecx, %xmm8
+; X64-NEXT: subb %al, %cl
+; X64-NEXT: movzbl %cl, %eax
+; X64-NEXT: movd %eax, %xmm8
; X64-NEXT: pshufd {{.*#+}} xmm7 = xmm7[2,3,2,3]
-; X64-NEXT: movq %xmm7, %rcx
-; X64-NEXT: xorl %edx, %edx
-; X64-NEXT: cmpq {{[0-9]+}}(%rsp), %rcx
-; X64-NEXT: setg %dl
-; X64-NEXT: cmovll %eax, %edx
-; X64-NEXT: movd %edx, %xmm7
+; X64-NEXT: movq %xmm7, %rax
+; X64-NEXT: cmpq {{[0-9]+}}(%rsp), %rax
+; X64-NEXT: setl %al
+; X64-NEXT: setg %cl
+; X64-NEXT: subb %al, %cl
+; X64-NEXT: movzbl %cl, %eax
+; X64-NEXT: movd %eax, %xmm7
; X64-NEXT: punpcklbw {{.*#+}} xmm8 = xmm8[0],xmm7[0],xmm8[1],xmm7[1],xmm8[2],xmm7[2],xmm8[3],xmm7[3],xmm8[4],xmm7[4],xmm8[5],xmm7[5],xmm8[6],xmm7[6],xmm8[7],xmm7[7]
-; X64-NEXT: movq %xmm6, %rcx
-; X64-NEXT: xorl %edx, %edx
-; X64-NEXT: cmpq {{[0-9]+}}(%rsp), %rcx
-; X64-NEXT: setg %dl
-; X64-NEXT: cmovll %eax, %edx
-; X64-NEXT: movd %edx, %xmm7
+; X64-NEXT: movq %xmm6, %rax
+; X64-NEXT: cmpq {{[0-9]+}}(%rsp), %rax
+; X64-NEXT: setl %al
+; X64-NEXT: setg %cl
+; X64-NEXT: subb %al, %cl
+; X64-NEXT: movzbl %cl, %eax
+; X64-NEXT: movd %eax, %xmm7
; X64-NEXT: pshufd {{.*#+}} xmm6 = xmm6[2,3,2,3]
-; X64-NEXT: movq %xmm6, %rcx
-; X64-NEXT: xorl %edx, %edx
-; X64-NEXT: cmpq {{[0-9]+}}(%rsp), %rcx
-; X64-NEXT: setg %dl
-; X64-NEXT: cmovll %eax, %edx
-; X64-NEXT: movd %edx, %xmm6
+; X64-NEXT: movq %xmm6, %rax
+; X64-NEXT: cmpq {{[0-9]+}}(%rsp), %rax
+; X64-NEXT: setl %al
+; X64-NEXT: setg %cl
+; X64-NEXT: subb %al, %cl
+; X64-NEXT: movzbl %cl, %eax
+; X64-NEXT: movd %eax, %xmm6
; X64-NEXT: punpcklbw {{.*#+}} xmm7 = xmm7[0],xmm6[0],xmm7[1],xmm6[1],xmm7[2],xmm6[2],xmm7[3],xmm6[3],xmm7[4],xmm6[4],xmm7[5],xmm6[5],xmm7[6],xmm6[6],xmm7[7],xmm6[7]
; X64-NEXT: punpcklwd {{.*#+}} xmm7 = xmm7[0],xmm8[0],xmm7[1],xmm8[1],xmm7[2],xmm8[2],xmm7[3],xmm8[3]
-; X64-NEXT: movq %xmm5, %rcx
-; X64-NEXT: xorl %edx, %edx
-; X64-NEXT: cmpq {{[0-9]+}}(%rsp), %rcx
-; X64-NEXT: setg %dl
-; X64-NEXT: cmovll %eax, %edx
-; X64-NEXT: movd %edx, %xmm6
+; X64-NEXT: movq %xmm5, %rax
+; X64-NEXT: cmpq {{[0-9]+}}(%rsp), %rax
+; X64-NEXT: setl %al
+; X64-NEXT: setg %cl
+; X64-NEXT: subb %al, %cl
+; X64-NEXT: movzbl %cl, %eax
+; X64-NEXT: movd %eax, %xmm6
; X64-NEXT: pshufd {{.*#+}} xmm5 = xmm5[2,3,2,3]
-; X64-NEXT: movq %xmm5, %rcx
-; X64-NEXT: xorl %edx, %edx
-; X64-NEXT: cmpq {{[0-9]+}}(%rsp), %rcx
-; X64-NEXT: setg %dl
-; X64-NEXT: cmovll %eax, %edx
-; X64-NEXT: movd %edx, %xmm5
+; X64-NEXT: movq %xmm5, %rax
+; X64-NEXT: cmpq {{[0-9]+}}(%rsp), %rax
+; X64-NEXT: setl %al
+; X64-NEXT: setg %cl
+; X64-NEXT: subb %al, %cl
+; X64-NEXT: movzbl %cl, %eax
+; X64-NEXT: movd %eax, %xmm5
; X64-NEXT: punpcklbw {{.*#+}} xmm6 = xmm6[0],xmm5[0],xmm6[1],xmm5[1],xmm6[2],xmm5[2],xmm6[3],xmm5[3],xmm6[4],xmm5[4],xmm6[5],xmm5[5],xmm6[6],xmm5[6],xmm6[7],xmm5[7]
-; X64-NEXT: movq %xmm4, %rcx
-; X64-NEXT: xorl %edx, %edx
-; X64-NEXT: cmpq {{[0-9]+}}(%rsp), %rcx
-; X64-NEXT: setg %dl
-; X64-NEXT: cmovll %eax, %edx
-; X64-NEXT: movd %edx, %xmm5
+; X64-NEXT: movq %xmm4, %rax
+; X64-NEXT: cmpq {{[0-9]+}}(%rsp), %rax
+; X64-NEXT: setl %al
+; X64-NEXT: setg %cl
+; X64-NEXT: subb %al, %cl
+; X64-NEXT: movzbl %cl, %eax
+; X64-NEXT: movd %eax, %xmm5
; X64-NEXT: pshufd {{.*#+}} xmm4 = xmm4[2,3,2,3]
-; X64-NEXT: movq %xmm4, %rcx
-; X64-NEXT: xorl %edx, %edx
-; X64-NEXT: cmpq {{[0-9]+}}(%rsp), %rcx
-; X64-NEXT: setg %dl
-; X64-NEXT: cmovll %eax, %edx
-; X64-NEXT: movd %edx, %xmm4
+; X64-NEXT: movq %xmm4, %rax
+; X64-NEXT: cmpq {{[0-9]+}}(%rsp), %rax
+; X64-NEXT: setl %al
+; X64-NEXT: setg %cl
+; X64-NEXT: subb %al, %cl
+; X64-NEXT: movzbl %cl, %eax
+; X64-NEXT: movd %eax, %xmm4
; X64-NEXT: punpcklbw {{.*#+}} xmm5 = xmm5[0],xmm4[0],xmm5[1],xmm4[1],xmm5[2],xmm4[2],xmm5[3],xmm4[3],xmm5[4],xmm4[4],xmm5[5],xmm4[5],xmm5[6],xmm4[6],xmm5[7],xmm4[7]
; X64-NEXT: punpcklwd {{.*#+}} xmm5 = xmm5[0],xmm6[0],xmm5[1],xmm6[1],xmm5[2],xmm6[2],xmm5[3],xmm6[3]
; X64-NEXT: punpckldq {{.*#+}} xmm5 = xmm5[0],xmm7[0],xmm5[1],xmm7[1]
-; X64-NEXT: movq %xmm3, %rcx
-; X64-NEXT: xorl %edx, %edx
-; X64-NEXT: cmpq {{[0-9]+}}(%rsp), %rcx
-; X64-NEXT: setg %dl
-; X64-NEXT: cmovll %eax, %edx
-; X64-NEXT: movd %edx, %xmm4
+; X64-NEXT: movq %xmm3, %rax
+; X64-NEXT: cmpq {{[0-9]+}}(%rsp), %rax
+; X64-NEXT: setl %al
+; X64-NEXT: setg %cl
+; X64-NEXT: subb %al, %cl
+; X64-NEXT: movzbl %cl, %eax
+; X64-NEXT: movd %eax, %xmm4
; X64-NEXT: pshufd {{.*#+}} xmm3 = xmm3[2,3,2,3]
-; X64-NEXT: movq %xmm3, %rcx
-; X64-NEXT: xorl %edx, %edx
-; X64-NEXT: cmpq {{[0-9]+}}(%rsp), %rcx
-; X64-NEXT: setg %dl
-; X64-NEXT: cmovll %eax, %edx
-; X64-NEXT: movd %edx, %xmm3
+; X64-NEXT: movq %xmm3, %rax
+; X64-NEXT: cmpq {{[0-9]+}}(%rsp), %rax
+; X64-NEXT: setl %al
+; X64-NEXT: setg %cl
+; X64-NEXT: subb %al, %cl
+; X64-NEXT: movzbl %cl, %eax
+; X64-NEXT: movd %eax, %xmm3
; X64-NEXT: punpcklbw {{.*#+}} xmm4 = xmm4[0],xmm3[0],xmm4[1],xmm3[1],xmm4[2],xmm3[2],xmm4[3],xmm3[3],xmm4[4],xmm3[4],xmm4[5],xmm3[5],xmm4[6],xmm3[6],xmm4[7],xmm3[7]
-; X64-NEXT: movq %xmm2, %rcx
-; X64-NEXT: xorl %edx, %edx
-; X64-NEXT: cmpq {{[0-9]+}}(%rsp), %rcx
-; X64-NEXT: setg %dl
-; X64-NEXT: cmovll %eax, %edx
-; X64-NEXT: movd %edx, %xmm3
+; X64-NEXT: movq %xmm2, %rax
+; X64-NEXT: cmpq {{[0-9]+}}(%rsp), %rax
+; X64-NEXT: setl %al
+; X64-NEXT: setg %cl
+; X64-NEXT: subb %al, %cl
+; X64-NEXT: movzbl %cl, %eax
+; X64-NEXT: movd %eax, %xmm3
; X64-NEXT: pshufd {{.*#+}} xmm2 = xmm2[2,3,2,3]
-; X64-NEXT: movq %xmm2, %rcx
-; X64-NEXT: xorl %edx, %edx
-; X64-NEXT: cmpq {{[0-9]+}}(%rsp), %rcx
-; X64-NEXT: setg %dl
-; X64-NEXT: cmovll %eax, %edx
-; X64-NEXT: movd %edx, %xmm2
+; X64-NEXT: movq %xmm2, %rax
+; X64-NEXT: cmpq {{[0-9]+}}(%rsp), %rax
+; X64-NEXT: setl %al
+; X64-NEXT: setg %cl
+; X64-NEXT: subb %al, %cl
+; X64-NEXT: movzbl %cl, %eax
+; X64-NEXT: movd %eax, %xmm2
; X64-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3],xmm3[4],xmm2[4],xmm3[5],xmm2[5],xmm3[6],xmm2[6],xmm3[7],xmm2[7]
; X64-NEXT: punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm4[0],xmm3[1],xmm4[1],xmm3[2],xmm4[2],xmm3[3],xmm4[3]
-; X64-NEXT: movq %xmm1, %rcx
-; X64-NEXT: xorl %edx, %edx
-; X64-NEXT: cmpq {{[0-9]+}}(%rsp), %rcx
-; X64-NEXT: setg %dl
-; X64-NEXT: cmovll %eax, %edx
-; X64-NEXT: movd %edx, %xmm2
+; X64-NEXT: movq %xmm1, %rax
+; X64-NEXT: cmpq {{[0-9]+}}(%rsp), %rax
+; X64-NEXT: setl %al
+; X64-NEXT: setg %cl
+; X64-NEXT: subb %al, %cl
+; X64-NEXT: movzbl %cl, %eax
+; X64-NEXT: movd %eax, %xmm2
; X64-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,2,3]
-; X64-NEXT: movq %xmm1, %rcx
-; X64-NEXT: xorl %edx, %edx
-; X64-NEXT: cmpq {{[0-9]+}}(%rsp), %rcx
-; X64-NEXT: setg %dl
-; X64-NEXT: cmovll %eax, %edx
-; X64-NEXT: movd %edx, %xmm1
+; X64-NEXT: movq %xmm1, %rax
+; X64-NEXT: cmpq {{[0-9]+}}(%rsp), %rax
+; X64-NEXT: setl %al
+; X64-NEXT: setg %cl
+; X64-NEXT: subb %al, %cl
+; X64-NEXT: movzbl %cl, %eax
+; X64-NEXT: movd %eax, %xmm1
; X64-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3],xmm2[4],xmm1[4],xmm2[5],xmm1[5],xmm2[6],xmm1[6],xmm2[7],xmm1[7]
-; X64-NEXT: movq %xmm0, %rcx
-; X64-NEXT: xorl %edx, %edx
-; X64-NEXT: cmpq {{[0-9]+}}(%rsp), %rcx
-; X64-NEXT: setg %dl
-; X64-NEXT: cmovll %eax, %edx
-; X64-NEXT: movd %edx, %xmm1
+; X64-NEXT: movq %xmm0, %rax
+; X64-NEXT: cmpq {{[0-9]+}}(%rsp), %rax
+; X64-NEXT: setl %al
+; X64-NEXT: setg %cl
+; X64-NEXT: subb %al, %cl
+; X64-NEXT: movzbl %cl, %eax
+; X64-NEXT: movd %eax, %xmm1
; X64-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
-; X64-NEXT: movq %xmm0, %rcx
-; X64-NEXT: xorl %edx, %edx
-; X64-NEXT: cmpq {{[0-9]+}}(%rsp), %rcx
-; X64-NEXT: setg %dl
-; X64-NEXT: cmovll %eax, %edx
-; X64-NEXT: movd %edx, %xmm0
+; X64-NEXT: movq %xmm0, %rax
+; X64-NEXT: cmpq {{[0-9]+}}(%rsp), %rax
+; X64-NEXT: setl %al
+; X64-NEXT: setg %cl
+; X64-NEXT: subb %al, %cl
+; X64-NEXT: movzbl %cl, %eax
+; X64-NEXT: movd %eax, %xmm0
; X64-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
; X64-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3]
; X64-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1]
@@ -1315,41 +1180,76 @@ define <16 x i8> @scmp_wide_vec_op(<16 x i64> %x, <16 x i64> %y) nounwind {
; X86-NEXT: pushl %ebx
; X86-NEXT: pushl %edi
; X86-NEXT: pushl %esi
-; X86-NEXT: subl $16, %esp
-; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: subl $12, %esp
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
+; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx
+; X86-NEXT: cmpl %edx, %edi
+; X86-NEXT: movl %ebx, %ebp
+; X86-NEXT: sbbl %esi, %ebp
+; X86-NEXT: setl %al
+; X86-NEXT: cmpl %edi, %edx
; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ebp
+; X86-NEXT: sbbl %ebx, %esi
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx
+; X86-NEXT: setl %ah
+; X86-NEXT: subb %al, %ah
+; X86-NEXT: movb %ah, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill
+; X86-NEXT: cmpl %ecx, %ebp
+; X86-NEXT: movl %ebx, %eax
+; X86-NEXT: sbbl %edx, %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
-; X86-NEXT: cmpl %eax, %edx
-; X86-NEXT: movl %edi, %ecx
-; X86-NEXT: sbbl %esi, %ecx
+; X86-NEXT: setl %al
+; X86-NEXT: cmpl %ebp, %ecx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: sbbl %ebx, %edx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
+; X86-NEXT: setl %ah
+; X86-NEXT: subb %al, %ah
+; X86-NEXT: movb %ah, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill
+; X86-NEXT: cmpl %edi, %ecx
+; X86-NEXT: movl %edx, %eax
+; X86-NEXT: sbbl %esi, %eax
+; X86-NEXT: setl %al
+; X86-NEXT: cmpl %ecx, %edi
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
+; X86-NEXT: sbbl %edx, %esi
+; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
+; X86-NEXT: setl %dl
+; X86-NEXT: subb %al, %dl
+; X86-NEXT: movb %dl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ebp
+; X86-NEXT: cmpl %ebp, %edi
+; X86-NEXT: movl %esi, %eax
+; X86-NEXT: sbbl %ecx, %eax
+; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
; X86-NEXT: setl %bl
-; X86-NEXT: cmpl %edx, %eax
+; X86-NEXT: cmpl %edi, %ebp
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
-; X86-NEXT: sbbl %edi, %esi
+; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
+; X86-NEXT: sbbl %esi, %ecx
; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
-; X86-NEXT: movb $-1, %bh
-; X86-NEXT: jl .LBB17_2
-; X86-NEXT: # %bb.1:
-; X86-NEXT: movb %bl, %bh
-; X86-NEXT: .LBB17_2:
-; X86-NEXT: cmpl %ecx, %edx
-; X86-NEXT: movl %esi, %edi
-; X86-NEXT: sbbl %eax, %edi
+; X86-NEXT: setl %cl
+; X86-NEXT: subb %bl, %cl
+; X86-NEXT: movb %cl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill
+; X86-NEXT: cmpl %edx, %edi
+; X86-NEXT: movl %esi, %ecx
+; X86-NEXT: sbbl %eax, %ecx
; X86-NEXT: setl %bl
-; X86-NEXT: cmpl %edx, %ecx
+; X86-NEXT: cmpl %edi, %edx
; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: sbbl %esi, %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
-; X86-NEXT: movb $-1, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
-; X86-NEXT: jl .LBB17_4
-; X86-NEXT: # %bb.3:
-; X86-NEXT: movb %bl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill
-; X86-NEXT: .LBB17_4:
+; X86-NEXT: setl %bh
+; X86-NEXT: subb %bl, %bh
+; X86-NEXT: movb %bh, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill
; X86-NEXT: cmpl %edx, %eax
; X86-NEXT: movl %esi, %edi
; X86-NEXT: sbbl %ecx, %edi
@@ -1359,215 +1259,138 @@ define <16 x i8> @scmp_wide_vec_op(<16 x i64> %x, <16 x i64> %y) nounwind {
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: sbbl %esi, %ecx
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: setl %bh
+; X86-NEXT: subb %bl, %bh
+; X86-NEXT: movb %bh, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill
; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
-; X86-NEXT: movb $-1, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
-; X86-NEXT: jl .LBB17_6
-; X86-NEXT: # %bb.5:
-; X86-NEXT: movb %bl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill
-; X86-NEXT: .LBB17_6:
; X86-NEXT: cmpl %edx, %ecx
; X86-NEXT: movl %esi, %edi
; X86-NEXT: sbbl %eax, %edi
; X86-NEXT: setl %bl
; X86-NEXT: cmpl %ecx, %edx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: sbbl %esi, %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
-; X86-NEXT: movb $-1, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
-; X86-NEXT: jl .LBB17_8
-; X86-NEXT: # %bb.7:
-; X86-NEXT: movb %bl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill
-; X86-NEXT: .LBB17_8:
-; X86-NEXT: cmpl %edx, %eax
-; X86-NEXT: movl %esi, %edi
-; X86-NEXT: sbbl %ecx, %edi
-; X86-NEXT: setl %bl
-; X86-NEXT: cmpl %eax, %edx
; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: sbbl %esi, %ecx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: setl %bh
+; X86-NEXT: subb %bl, %bh
+; X86-NEXT: movb %bh, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill
+; X86-NEXT: cmpl %ecx, %edx
; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
-; X86-NEXT: movb $-1, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
-; X86-NEXT: jl .LBB17_10
-; X86-NEXT: # %bb.9:
-; X86-NEXT: movb %bl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill
-; X86-NEXT: .LBB17_10:
-; X86-NEXT: cmpl %edx, %ecx
; X86-NEXT: movl %esi, %edi
; X86-NEXT: sbbl %eax, %edi
; X86-NEXT: setl %bl
-; X86-NEXT: cmpl %ecx, %edx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
+; X86-NEXT: cmpl %edx, %ecx
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: sbbl %esi, %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
-; X86-NEXT: movb $-1, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
-; X86-NEXT: jl .LBB17_12
-; X86-NEXT: # %bb.11:
-; X86-NEXT: movb %bl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill
-; X86-NEXT: .LBB17_12:
-; X86-NEXT: cmpl %edx, %eax
-; X86-NEXT: movl %esi, %edi
-; X86-NEXT: sbbl %ecx, %edi
-; X86-NEXT: setl %bl
-; X86-NEXT: cmpl %eax, %edx
+; X86-NEXT: setl %dl
+; X86-NEXT: subb %bl, %dl
+; X86-NEXT: movb %dl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill
; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: sbbl %esi, %ecx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: cmpl %ecx, %edx
; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
-; X86-NEXT: movb $-1, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
-; X86-NEXT: jl .LBB17_14
-; X86-NEXT: # %bb.13:
-; X86-NEXT: movb %bl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill
-; X86-NEXT: .LBB17_14:
-; X86-NEXT: cmpl %edx, %ecx
; X86-NEXT: movl %esi, %edi
; X86-NEXT: sbbl %eax, %edi
; X86-NEXT: setl %bl
-; X86-NEXT: cmpl %ecx, %edx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: cmpl %edx, %ecx
; X86-NEXT: sbbl %esi, %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
-; X86-NEXT: movb $-1, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
-; X86-NEXT: jl .LBB17_16
-; X86-NEXT: # %bb.15:
-; X86-NEXT: movb %bl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill
-; X86-NEXT: .LBB17_16:
-; X86-NEXT: cmpl %edx, %eax
-; X86-NEXT: movl %edi, %esi
-; X86-NEXT: sbbl %ecx, %esi
-; X86-NEXT: setl %bl
-; X86-NEXT: cmpl %eax, %edx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
-; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: sbbl %edi, %ecx
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: setl %dl
+; X86-NEXT: subb %bl, %dl
+; X86-NEXT: movb %dl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill
+; X86-NEXT: cmpl %eax, %ecx
; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
-; X86-NEXT: movb $-1, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
-; X86-NEXT: jl .LBB17_18
-; X86-NEXT: # %bb.17:
-; X86-NEXT: movb %bl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill
-; X86-NEXT: .LBB17_18:
-; X86-NEXT: cmpl %esi, %ecx
-; X86-NEXT: movl %edx, %edi
-; X86-NEXT: sbbl %eax, %edi
+; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
+; X86-NEXT: movl %esi, %edi
+; X86-NEXT: sbbl %edx, %edi
; X86-NEXT: setl %bl
-; X86-NEXT: cmpl %ecx, %esi
+; X86-NEXT: cmpl %ecx, %eax
+; X86-NEXT: sbbl %esi, %edx
+; X86-NEXT: setl %al
+; X86-NEXT: subb %bl, %al
+; X86-NEXT: movb %al, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ebp
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: cmpl %ebp, %ecx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
+; X86-NEXT: movl %esi, %edi
+; X86-NEXT: sbbl %edx, %edi
+; X86-NEXT: setl %al
+; X86-NEXT: cmpl %ecx, %ebp
+; X86-NEXT: sbbl %esi, %edx
+; X86-NEXT: setl %cl
+; X86-NEXT: subb %al, %cl
+; X86-NEXT: movb %cl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ebp
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: sbbl %edx, %eax
-; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: cmpl %ebp, %ecx
; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
-; X86-NEXT: movb $-1, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
-; X86-NEXT: jl .LBB17_20
-; X86-NEXT: # %bb.19:
-; X86-NEXT: movb %bl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill
-; X86-NEXT: .LBB17_20:
-; X86-NEXT: cmpl %esi, %eax
-; X86-NEXT: movl %edx, %edi
-; X86-NEXT: sbbl %ecx, %edi
-; X86-NEXT: setl %bl
-; X86-NEXT: cmpl %eax, %esi
; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
+; X86-NEXT: movl %esi, %edi
+; X86-NEXT: sbbl %edx, %edi
+; X86-NEXT: setl %al
+; X86-NEXT: cmpl %ecx, %ebp
+; X86-NEXT: sbbl %esi, %edx
+; X86-NEXT: setl %cl
+; X86-NEXT: subb %al, %cl
+; X86-NEXT: movb %cl, (%esp) # 1-byte Spill
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: sbbl %edx, %ecx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: movb $-1, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
-; X86-NEXT: jl .LBB17_22
-; X86-NEXT: # %bb.21:
-; X86-NEXT: movb %bl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill
-; X86-NEXT: .LBB17_22:
-; X86-NEXT: cmpl %esi, %edx
-; X86-NEXT: movl %ecx, %edi
-; X86-NEXT: sbbl %eax, %edi
-; X86-NEXT: setl %bl
-; X86-NEXT: cmpl %edx, %esi
-; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
+; X86-NEXT: cmpl %eax, %ecx
; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
-; X86-NEXT: sbbl %ecx, %eax
-; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
-; X86-NEXT: movb $-1, %cl
-; X86-NEXT: jl .LBB17_24
-; X86-NEXT: # %bb.23:
-; X86-NEXT: movl %ebx, %ecx
-; X86-NEXT: .LBB17_24:
-; X86-NEXT: movb %cl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill
-; X86-NEXT: movb %bh, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill
-; X86-NEXT: cmpl %edi, %eax
-; X86-NEXT: movl %edx, %ecx
-; X86-NEXT: sbbl %esi, %ecx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
+; X86-NEXT: movl %edi, %ebp
+; X86-NEXT: sbbl %esi, %ebp
+; X86-NEXT: setl %dl
+; X86-NEXT: cmpl %ecx, %eax
+; X86-NEXT: sbbl %edi, %esi
; X86-NEXT: setl %ch
-; X86-NEXT: cmpl %eax, %edi
+; X86-NEXT: subb %dl, %ch
+; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
+; X86-NEXT: cmpl %edx, %esi
; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: sbbl %edx, %esi
+; X86-NEXT: movl %eax, %ebp
+; X86-NEXT: sbbl %edi, %ebp
+; X86-NEXT: setl %cl
+; X86-NEXT: cmpl %esi, %edx
+; X86-NEXT: sbbl %eax, %edi
+; X86-NEXT: setl %dl
+; X86-NEXT: subb %cl, %dl
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx
; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ebp
-; X86-NEXT: movb $-1, %cl
-; X86-NEXT: jl .LBB17_26
-; X86-NEXT: # %bb.25:
-; X86-NEXT: movb %ch, %cl
-; X86-NEXT: .LBB17_26:
-; X86-NEXT: movb %cl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill
-; X86-NEXT: cmpl %edi, %esi
-; X86-NEXT: movl %ebp, %ecx
-; X86-NEXT: sbbl %eax, %ecx
-; X86-NEXT: setl %dh
-; X86-NEXT: cmpl %esi, %edi
+; X86-NEXT: cmpl %ebx, %esi
; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
-; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
-; X86-NEXT: sbbl %ebp, %eax
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ebp
-; X86-NEXT: movb $-1, %al
-; X86-NEXT: jl .LBB17_28
-; X86-NEXT: # %bb.27:
-; X86-NEXT: movb %dh, %al
-; X86-NEXT: .LBB17_28:
-; X86-NEXT: movb %al, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: cmpl %edi, %ebx
-; X86-NEXT: movl %ebp, %edx
-; X86-NEXT: sbbl %esi, %ebp
+; X86-NEXT: movl %eax, %ebp
+; X86-NEXT: sbbl %edi, %ebp
+; X86-NEXT: setl %dh
+; X86-NEXT: cmpl %esi, %ebx
+; X86-NEXT: sbbl %eax, %edi
; X86-NEXT: setl %cl
-; X86-NEXT: cmpl %ebx, %edi
+; X86-NEXT: subb %dh, %cl
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
+; X86-NEXT: cmpl %eax, %esi
; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
-; X86-NEXT: sbbl %edx, %esi
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ebp
-; X86-NEXT: movb $-1, %dh
-; X86-NEXT: jl .LBB17_30
-; X86-NEXT: # %bb.29:
-; X86-NEXT: movb %cl, %dh
-; X86-NEXT: .LBB17_30:
; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: cmpl %ebx, %ecx
-; X86-NEXT: movl %ebp, %esi
-; X86-NEXT: sbbl %edi, %esi
-; X86-NEXT: setl %dl
-; X86-NEXT: cmpl %ecx, %ebx
-; X86-NEXT: sbbl %ebp, %edi
-; X86-NEXT: movb $-1, %bl
-; X86-NEXT: jl .LBB17_32
-; X86-NEXT: # %bb.31:
-; X86-NEXT: movl %edx, %ebx
-; X86-NEXT: .LBB17_32:
+; X86-NEXT: movl %ebx, %ebp
+; X86-NEXT: sbbl %edi, %ebp
+; X86-NEXT: setl %dh
+; X86-NEXT: cmpl %esi, %eax
+; X86-NEXT: sbbl %ebx, %edi
+; X86-NEXT: setl %bl
+; X86-NEXT: subb %dh, %bl
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movb %bl, 15(%eax)
-; X86-NEXT: movb %dh, 14(%eax)
-; X86-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload
-; X86-NEXT: movb %cl, 13(%eax)
-; X86-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload
-; X86-NEXT: movb %cl, 12(%eax)
-; X86-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload
+; X86-NEXT: movb %cl, 14(%eax)
+; X86-NEXT: movb %dl, 13(%eax)
+; X86-NEXT: movb %ch, 12(%eax)
+; X86-NEXT: movzbl (%esp), %ecx # 1-byte Folded Reload
; X86-NEXT: movb %cl, 11(%eax)
; X86-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload
; X86-NEXT: movb %cl, 10(%eax)
@@ -1591,7 +1414,7 @@ define <16 x i8> @scmp_wide_vec_op(<16 x i64> %x, <16 x i64> %y) nounwind {
; X86-NEXT: movb %cl, 1(%eax)
; X86-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload
; X86-NEXT: movb %cl, (%eax)
-; X86-NEXT: addl $16, %esp
+; X86-NEXT: addl $12, %esp
; X86-NEXT: popl %esi
; X86-NEXT: popl %edi
; X86-NEXT: popl %ebx
@@ -1607,111 +1430,158 @@ define <7 x i117> @scmp_uncommon_vectors(<7 x i7> %x, <7 x i7> %y) nounwind {
; X64-NEXT: pushq %rbp
; X64-NEXT: pushq %r15
; X64-NEXT: pushq %r14
+; X64-NEXT: pushq %r13
; X64-NEXT: pushq %r12
; X64-NEXT: pushq %rbx
; X64-NEXT: movq %rdi, %rax
+; X64-NEXT: movzbl {{[0-9]+}}(%rsp), %edi
; X64-NEXT: movzbl {{[0-9]+}}(%rsp), %r10d
+; X64-NEXT: movzbl {{[0-9]+}}(%rsp), %r11d
; X64-NEXT: movzbl {{[0-9]+}}(%rsp), %ebx
; X64-NEXT: movzbl {{[0-9]+}}(%rsp), %ebp
; X64-NEXT: movzbl {{[0-9]+}}(%rsp), %r14d
; X64-NEXT: movzbl {{[0-9]+}}(%rsp), %r15d
-; X64-NEXT: movzbl {{[0-9]+}}(%rsp), %r12d
-; X64-NEXT: movzbl {{[0-9]+}}(%rsp), %r11d
-; X64-NEXT: addb %r11b, %r11b
-; X64-NEXT: sarb %r11b
-; X64-NEXT: addb %dl, %dl
-; X64-NEXT: sarb %dl
-; X64-NEXT: xorl %edi, %edi
-; X64-NEXT: cmpb %r11b, %dl
-; X64-NEXT: setg %dil
-; X64-NEXT: movq $-1, %r11
-; X64-NEXT: cmovlq %r11, %rdi
-; X64-NEXT: addb %r12b, %r12b
-; X64-NEXT: sarb %r12b
-; X64-NEXT: addb %cl, %cl
-; X64-NEXT: sarb %cl
-; X64-NEXT: xorl %edx, %edx
-; X64-NEXT: cmpb %r12b, %cl
-; X64-NEXT: setg %dl
-; X64-NEXT: cmovlq %r11, %rdx
; X64-NEXT: addb %r15b, %r15b
; X64-NEXT: sarb %r15b
-; X64-NEXT: addb %r8b, %r8b
-; X64-NEXT: sarb %r8b
-; X64-NEXT: xorl %ecx, %ecx
-; X64-NEXT: cmpb %r15b, %r8b
-; X64-NEXT: setg %cl
-; X64-NEXT: cmovlq %r11, %rcx
+; X64-NEXT: addb %sil, %sil
+; X64-NEXT: sarb %sil
+; X64-NEXT: cmpb %r15b, %sil
+; X64-NEXT: setl %sil
+; X64-NEXT: setg %r15b
+; X64-NEXT: subb %sil, %r15b
+; X64-NEXT: movsbq %r15b, %rsi
+; X64-NEXT: movq %rsi, (%rax)
+; X64-NEXT: movq %rsi, %xmm0
+; X64-NEXT: sarq $63, %rsi
; X64-NEXT: addb %r14b, %r14b
; X64-NEXT: sarb %r14b
-; X64-NEXT: addb %r9b, %r9b
-; X64-NEXT: sarb %r9b
-; X64-NEXT: xorl %r8d, %r8d
-; X64-NEXT: cmpb %r14b, %r9b
-; X64-NEXT: setg %r8b
-; X64-NEXT: cmovlq %r11, %r8
+; X64-NEXT: movzbl {{[0-9]+}}(%rsp), %r15d
+; X64-NEXT: addb %r15b, %r15b
+; X64-NEXT: sarb %r15b
+; X64-NEXT: cmpb %r14b, %r15b
+; X64-NEXT: setl %r14b
+; X64-NEXT: setg %r15b
+; X64-NEXT: subb %r14b, %r15b
+; X64-NEXT: movsbq %r15b, %r14
+; X64-NEXT: movq %r14, %r15
+; X64-NEXT: sarq $63, %r15
; X64-NEXT: addb %bpl, %bpl
; X64-NEXT: sarb %bpl
-; X64-NEXT: addb %sil, %sil
-; X64-NEXT: sarb %sil
-; X64-NEXT: xorl %r9d, %r9d
-; X64-NEXT: cmpb %bpl, %sil
-; X64-NEXT: setg %r9b
-; X64-NEXT: cmovlq %r11, %r9
+; X64-NEXT: addb %dl, %dl
+; X64-NEXT: sarb %dl
+; X64-NEXT: cmpb %bpl, %dl
+; X64-NEXT: setl %dl
+; X64-NEXT: setg %bpl
+; X64-NEXT: subb %dl, %bpl
+; X64-NEXT: movsbq %bpl, %rdx
+; X64-NEXT: movq %rdx, %r12
+; X64-NEXT: sarq $63, %r12
; X64-NEXT: addb %bl, %bl
; X64-NEXT: sarb %bl
+; X64-NEXT: addb %cl, %cl
+; X64-NEXT: sarb %cl
+; X64-NEXT: cmpb %bl, %cl
+; X64-NEXT: setl %cl
+; X64-NEXT: setg %bl
+; X64-NEXT: subb %cl, %bl
+; X64-NEXT: movsbq %bl, %rbx
+; X64-NEXT: movq %rbx, %rcx
+; X64-NEXT: sarq $63, %rcx
+; X64-NEXT: addb %r11b, %r11b
+; X64-NEXT: sarb %r11b
+; X64-NEXT: addb %r8b, %r8b
+; X64-NEXT: sarb %r8b
+; X64-NEXT: cmpb %r11b, %r8b
+; X64-NEXT: setl %r8b
+; X64-NEXT: setg %r11b
+; X64-NEXT: subb %r8b, %r11b
+; X64-NEXT: movsbq %r11b, %r8
+; X64-NEXT: movq %r8, %r11
+; X64-NEXT: sarq $63, %r11
+; X64-NEXT: addb %r10b, %r10b
+; X64-NEXT: sarb %r10b
+; X64-NEXT: addb %r9b, %r9b
+; X64-NEXT: sarb %r9b
+; X64-NEXT: cmpb %r10b, %r9b
+; X64-NEXT: setl %r9b
+; X64-NEXT: setg %r10b
+; X64-NEXT: subb %r9b, %r10b
+; X64-NEXT: movsbq %r10b, %r9
+; X64-NEXT: movq %r9, %r10
+; X64-NEXT: sarq $63, %r10
+; X64-NEXT: addb %dil, %dil
+; X64-NEXT: sarb %dil
; X64-NEXT: movzbl {{[0-9]+}}(%rsp), %ebp
; X64-NEXT: addb %bpl, %bpl
; X64-NEXT: sarb %bpl
-; X64-NEXT: xorl %esi, %esi
-; X64-NEXT: cmpb %bl, %bpl
-; X64-NEXT: setg %sil
-; X64-NEXT: cmovlq %r11, %rsi
-; X64-NEXT: addb %r10b, %r10b
-; X64-NEXT: sarb %r10b
-; X64-NEXT: movzbl {{[0-9]+}}(%rsp), %ebx
-; X64-NEXT: addb %bl, %bl
-; X64-NEXT: sarb %bl
-; X64-NEXT: xorl %r14d, %r14d
-; X64-NEXT: cmpb %r10b, %bl
-; X64-NEXT: setg %r14b
-; X64-NEXT: cmovlq %r11, %r14
-; X64-NEXT: movq %r14, %r10
-; X64-NEXT: shrq $2, %r10
-; X64-NEXT: movq %r10, 88(%rax)
-; X64-NEXT: movq %rsi, %r10
-; X64-NEXT: shlq $9, %r10
+; X64-NEXT: cmpb %dil, %bpl
+; X64-NEXT: setl %dil
+; X64-NEXT: setg %bpl
+; X64-NEXT: subb %dil, %bpl
+; X64-NEXT: movsbq %bpl, %r13
+; X64-NEXT: movq %r13, %rbp
+; X64-NEXT: sarq $63, %rbp
+; X64-NEXT: movq %rbp, %rdi
+; X64-NEXT: shldq $62, %r13, %rdi
+; X64-NEXT: movq %rdi, 88(%rax)
+; X64-NEXT: shrq $2, %rbp
+; X64-NEXT: movl %ebp, 96(%rax)
+; X64-NEXT: movq %r10, %rdi
+; X64-NEXT: shldq $20, %r9, %rdi
+; X64-NEXT: movq %rdi, 64(%rax)
+; X64-NEXT: movq %r11, %rdi
+; X64-NEXT: shldq $31, %r8, %rdi
+; X64-NEXT: movq %rdi, 48(%rax)
+; X64-NEXT: movq %rcx, %rdi
+; X64-NEXT: shldq $42, %rbx, %rdi
+; X64-NEXT: movq %rdi, 32(%rax)
+; X64-NEXT: movabsq $9007199254738944, %rdi # imm = 0x1FFFFFFFFFF800
+; X64-NEXT: andq %r12, %rdi
+; X64-NEXT: shldq $53, %rdx, %r12
+; X64-NEXT: movq %r12, 16(%rax)
+; X64-NEXT: movabsq $9007199254740991, %r12 # imm = 0x1FFFFFFFFFFFFF
+; X64-NEXT: andq %r12, %r15
+; X64-NEXT: shldq $9, %r14, %r15
+; X64-NEXT: shlq $62, %r13
+; X64-NEXT: orq %r15, %r13
+; X64-NEXT: movq %r13, 80(%rax)
+; X64-NEXT: movabsq $2251799813685247, %r15 # imm = 0x7FFFFFFFFFFFF
+; X64-NEXT: andq %rbp, %r15
+; X64-NEXT: movq %r15, %r13
+; X64-NEXT: shrq $48, %r13
+; X64-NEXT: movb %r13b, 102(%rax)
+; X64-NEXT: shrq $32, %r15
+; X64-NEXT: movw %r15w, 100(%rax)
+; X64-NEXT: shlq $42, %rbx
+; X64-NEXT: shrq $11, %rdi
+; X64-NEXT: orq %rbx, %rdi
+; X64-NEXT: movq %rdi, 24(%rax)
+; X64-NEXT: shlq $9, %r14
+; X64-NEXT: shrq $44, %r10
+; X64-NEXT: andl $511, %r10d # imm = 0x1FF
+; X64-NEXT: orq %r14, %r10
; X64-NEXT: movq %r10, 72(%rax)
-; X64-NEXT: movq %r9, (%rax)
-; X64-NEXT: shlq $62, %r14
-; X64-NEXT: shrq $55, %rsi
-; X64-NEXT: orq %r14, %rsi
-; X64-NEXT: movq %rsi, 80(%rax)
-; X64-NEXT: movq %r8, %rsi
-; X64-NEXT: shrq $44, %rsi
-; X64-NEXT: movq %rsi, 64(%rax)
-; X64-NEXT: shlq $20, %r8
-; X64-NEXT: movq %r8, 56(%rax)
-; X64-NEXT: movq %rcx, %rsi
-; X64-NEXT: shrq $33, %rsi
-; X64-NEXT: movq %rsi, 48(%rax)
-; X64-NEXT: shlq $31, %rcx
-; X64-NEXT: movq %rcx, 40(%rax)
-; X64-NEXT: movq %rdx, %rcx
+; X64-NEXT: shlq $20, %r9
+; X64-NEXT: shrq $33, %r11
+; X64-NEXT: andl $1048575, %r11d # imm = 0xFFFFF
+; X64-NEXT: orq %r9, %r11
+; X64-NEXT: movq %r11, 56(%rax)
+; X64-NEXT: shlq $31, %r8
; X64-NEXT: shrq $22, %rcx
-; X64-NEXT: movq %rcx, 32(%rax)
-; X64-NEXT: shlq $42, %rdx
-; X64-NEXT: movq %rdx, 24(%rax)
-; X64-NEXT: movq %rdi, %rcx
-; X64-NEXT: shrq $11, %rcx
-; X64-NEXT: movq %rcx, 16(%rax)
-; X64-NEXT: shlq $53, %rdi
-; X64-NEXT: movq %rdi, 8(%rax)
-; X64-NEXT: movb $0, 102(%rax)
-; X64-NEXT: movw $0, 100(%rax)
-; X64-NEXT: movl $0, 96(%rax)
+; X64-NEXT: andl $2147483647, %ecx # imm = 0x7FFFFFFF
+; X64-NEXT: orq %r8, %rcx
+; X64-NEXT: movq %rcx, 40(%rax)
+; X64-NEXT: movq %rsi, %xmm1
+; X64-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; X64-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
+; X64-NEXT: movq %xmm0, %rcx
+; X64-NEXT: andq %r12, %rcx
+; X64-NEXT: shlq $53, %rdx
+; X64-NEXT: orq %rcx, %rdx
+; X64-NEXT: movq %rdx, 8(%rax)
; X64-NEXT: popq %rbx
; X64-NEXT: popq %r12
+; X64-NEXT: popq %r13
; X64-NEXT: popq %r14
; X64-NEXT: popq %r15
; X64-NEXT: popq %rbp
@@ -1723,203 +1593,200 @@ define <7 x i117> @scmp_uncommon_vectors(<7 x i7> %x, <7 x i7> %y) nounwind {
; X86-NEXT: pushl %ebx
; X86-NEXT: pushl %edi
; X86-NEXT: pushl %esi
-; X86-NEXT: subl $44, %esp
+; X86-NEXT: subl $52, %esp
; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: addb %al, %al
+; X86-NEXT: sarb %al
; X86-NEXT: movb %al, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill
; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: movb %al, (%esp) # 1-byte Spill
-; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: addb %cl, %cl
-; X86-NEXT: movb {{[0-9]+}}(%esp), %dh
-; X86-NEXT: addb %dh, %dh
-; X86-NEXT: movb {{[0-9]+}}(%esp), %dl
-; X86-NEXT: addb %dl, %dl
-; X86-NEXT: sarb %dl
-; X86-NEXT: movb {{[0-9]+}}(%esp), %ch
-; X86-NEXT: addb %ch, %ch
-; X86-NEXT: sarb %ch
+; X86-NEXT: addb %al, %al
+; X86-NEXT: sarb %al
+; X86-NEXT: movb %al, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill
; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: addb %al, %al
; X86-NEXT: sarb %al
-; X86-NEXT: movb {{[0-9]+}}(%esp), %ah
-; X86-NEXT: addb %ah, %ah
-; X86-NEXT: sarb %ah
-; X86-NEXT: xorl %ebx, %ebx
-; X86-NEXT: cmpb %al, %ah
-; X86-NEXT: setg %al
-; X86-NEXT: movl $-1, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
-; X86-NEXT: movl $-1, %esi
-; X86-NEXT: movl $-1, %edi
-; X86-NEXT: jl .LBB18_2
-; X86-NEXT: # %bb.1:
-; X86-NEXT: movb %al, %bl
-; X86-NEXT: movl %ebx, %esi
-; X86-NEXT: xorl %edi, %edi
-; X86-NEXT: .LBB18_2:
-; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ebx
-; X86-NEXT: movb {{[0-9]+}}(%esp), %bh
-; X86-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload
+; X86-NEXT: movb %al, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: addb %al, %al
+; X86-NEXT: sarb %al
+; X86-NEXT: movb %al, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: addb %al, %al
+; X86-NEXT: sarb %al
; X86-NEXT: movb %al, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill
-; X86-NEXT: movzbl (%esp), %eax # 1-byte Folded Reload
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: addb %al, %al
-; X86-NEXT: movb %al, (%esp) # 1-byte Spill
+; X86-NEXT: sarb %al
+; X86-NEXT: movb %al, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %edx
+; X86-NEXT: addb %dl, %dl
+; X86-NEXT: sarb %dl
+; X86-NEXT: movb {{[0-9]+}}(%esp), %ah
+; X86-NEXT: addb %ah, %ah
+; X86-NEXT: sarb %ah
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: addb %cl, %cl
; X86-NEXT: sarb %cl
-; X86-NEXT: sarb %dh
-; X86-NEXT: xorl %eax, %eax
-; X86-NEXT: cmpb %dl, %ch
-; X86-NEXT: setg %dl
-; X86-NEXT: movl $-1, %ebp
-; X86-NEXT: movl $-1, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
-; X86-NEXT: jl .LBB18_4
-; X86-NEXT: # %bb.3:
-; X86-NEXT: movb %dl, %al
-; X86-NEXT: movl %eax, %ebp
-; X86-NEXT: movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
-; X86-NEXT: .LBB18_4:
; X86-NEXT: movb {{[0-9]+}}(%esp), %ch
-; X86-NEXT: movb {{[0-9]+}}(%esp), %dl
-; X86-NEXT: addb %bl, %bl
-; X86-NEXT: addb %bh, %bh
-; X86-NEXT: sarb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
-; X86-NEXT: sarb (%esp) # 1-byte Folded Spill
-; X86-NEXT: xorl %eax, %eax
-; X86-NEXT: cmpb %cl, %dh
-; X86-NEXT: setg %cl
-; X86-NEXT: movl $-1, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
-; X86-NEXT: movl $-1, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
-; X86-NEXT: jl .LBB18_6
-; X86-NEXT: # %bb.5:
-; X86-NEXT: movb %cl, %al
-; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
-; X86-NEXT: .LBB18_6:
-; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: movb {{[0-9]+}}(%esp), %cl
-; X86-NEXT: movb {{[0-9]+}}(%esp), %dh
; X86-NEXT: addb %ch, %ch
-; X86-NEXT: addb %dl, %dl
+; X86-NEXT: sarb %ch
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ebx
+; X86-NEXT: addb %bl, %bl
; X86-NEXT: sarb %bl
-; X86-NEXT: movb %bl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill
+; X86-NEXT: movb {{[0-9]+}}(%esp), %bh
+; X86-NEXT: addb %bh, %bh
; X86-NEXT: sarb %bh
-; X86-NEXT: xorl %eax, %eax
-; X86-NEXT: movb (%esp), %bl # 1-byte Reload
-; X86-NEXT: cmpb {{[-0-9]+}}(%e{{[sb]}}p), %bl # 1-byte Folded Reload
-; X86-NEXT: setg %bl
-; X86-NEXT: movl $-1, %esi
-; X86-NEXT: movl $-1, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
-; X86-NEXT: jl .LBB18_8
-; X86-NEXT: # %bb.7:
-; X86-NEXT: movb %bl, %al
-; X86-NEXT: movl %eax, %esi
-; X86-NEXT: movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
-; X86-NEXT: .LBB18_8:
-; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: addb %cl, %cl
+; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: addb %al, %al
+; X86-NEXT: sarb %al
+; X86-NEXT: movb {{[0-9]+}}(%esp), %dh
; X86-NEXT: addb %dh, %dh
-; X86-NEXT: sarb %ch
-; X86-NEXT: sarb %dl
-; X86-NEXT: xorl %eax, %eax
-; X86-NEXT: cmpb {{[-0-9]+}}(%e{{[sb]}}p), %bh # 1-byte Folded Reload
-; X86-NEXT: setg %bl
-; X86-NEXT: movl $-1, %edi
-; X86-NEXT: movl $-1, %ebp
-; X86-NEXT: jl .LBB18_10
-; X86-NEXT: # %bb.9:
-; X86-NEXT: movb %bl, %al
-; X86-NEXT: movl %eax, %edi
-; X86-NEXT: xorl %ebp, %ebp
-; X86-NEXT: .LBB18_10:
-; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: sarb %cl
; X86-NEXT: sarb %dh
-; X86-NEXT: xorl %ebx, %ebx
-; X86-NEXT: cmpb %ch, %dl
+; X86-NEXT: cmpb %al, %dh
+; X86-NEXT: setl %al
+; X86-NEXT: setg %dh
+; X86-NEXT: subb %al, %dh
+; X86-NEXT: movsbl %dh, %esi
+; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: sarl $31, %esi
+; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: andl $2097151, %esi # imm = 0x1FFFFF
+; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: cmpb %bl, %bh
+; X86-NEXT: setl %al
+; X86-NEXT: setg %dh
+; X86-NEXT: subb %al, %dh
+; X86-NEXT: movsbl %dh, %esi
+; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: sarl $31, %esi
+; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: andl $2097151, %esi # imm = 0x1FFFFF
+; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: cmpb %cl, %ch
+; X86-NEXT: setl %al
+; X86-NEXT: setg %cl
+; X86-NEXT: subb %al, %cl
+; X86-NEXT: movsbl %cl, %ecx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ebp
+; X86-NEXT: movl %ecx, (%ebp)
+; X86-NEXT: sarl $31, %ecx
+; X86-NEXT: movl %ecx, %esi
+; X86-NEXT: andl $2097151, %esi # imm = 0x1FFFFF
+; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: cmpb %dl, %ah
+; X86-NEXT: setl %al
; X86-NEXT: setg %dl
-; X86-NEXT: movl $-1, (%esp) # 4-byte Folded Spill
-; X86-NEXT: movl $-1, %esi
-; X86-NEXT: jl .LBB18_12
-; X86-NEXT: # %bb.11:
-; X86-NEXT: movb %dl, %bl
-; X86-NEXT: movl %ebx, (%esp) # 4-byte Spill
-; X86-NEXT: xorl %esi, %esi
-; X86-NEXT: .LBB18_12:
+; X86-NEXT: subb %al, %dl
+; X86-NEXT: movsbl %dl, %edi
; X86-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: xorl %ebx, %ebx
-; X86-NEXT: cmpb %cl, %dh
-; X86-NEXT: setg %cl
-; X86-NEXT: movl $-1, %edx
-; X86-NEXT: jl .LBB18_14
-; X86-NEXT: # %bb.13:
-; X86-NEXT: movb %cl, %bl
-; X86-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: xorl %edx, %edx
-; X86-NEXT: .LBB18_14:
-; X86-NEXT: movl %edx, 4(%eax)
+; X86-NEXT: sarl $31, %edi
+; X86-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload
+; X86-NEXT: cmpb {{[-0-9]+}}(%e{{[sb]}}p), %al # 1-byte Folded Reload
+; X86-NEXT: setl %al
+; X86-NEXT: setg %dl
+; X86-NEXT: subb %al, %dl
+; X86-NEXT: movsbl %dl, %esi
+; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: sarl $31, %esi
+; X86-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload
+; X86-NEXT: cmpb {{[-0-9]+}}(%e{{[sb]}}p), %al # 1-byte Folded Reload
+; X86-NEXT: setl %al
+; X86-NEXT: setg %dl
+; X86-NEXT: subb %al, %dl
+; X86-NEXT: movsbl %dl, %eax
+; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: sarl $31, %eax
+; X86-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 1-byte Folded Reload
+; X86-NEXT: cmpb {{[-0-9]+}}(%e{{[sb]}}p), %dl # 1-byte Folded Reload
+; X86-NEXT: setl %dl
+; X86-NEXT: setg %dh
+; X86-NEXT: subb %dl, %dh
+; X86-NEXT: movsbl %dh, %edx
+; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: sarl $31, %edx
+; X86-NEXT: movl %edx, 96(%ebp)
+; X86-NEXT: movl %edx, 92(%ebp)
+; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT: movl %ebx, 80(%ebp)
+; X86-NEXT: movl %eax, 68(%ebp)
+; X86-NEXT: movl %eax, 64(%ebp)
+; X86-NEXT: movl %esi, 52(%ebp)
+; X86-NEXT: movl %esi, 48(%ebp)
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT: movl %ebx, 36(%ebp)
+; X86-NEXT: movl %edi, 24(%ebp)
+; X86-NEXT: movl %edi, 20(%ebp)
+; X86-NEXT: movl %ecx, 8(%ebp)
+; X86-NEXT: movl %ecx, 4(%ebp)
+; X86-NEXT: movl %edx, %ecx
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT: shldl $30, %edx, %ecx
+; X86-NEXT: movl %ecx, 88(%ebp)
+; X86-NEXT: movl %ebp, %ebx
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT: shldl $9, %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X86-NEXT: shldl $9, %ebp, %ecx
+; X86-NEXT: movl %ebx, %ebp
+; X86-NEXT: movl %ecx, 76(%ebx)
+; X86-NEXT: movl %eax, %ecx
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT: shldl $20, %ebx, %ecx
+; X86-NEXT: movl %ecx, 60(%ebp)
; X86-NEXT: movl %esi, %ecx
-; X86-NEXT: shrl $2, %ecx
-; X86-NEXT: movl %ecx, 92(%eax)
-; X86-NEXT: movl %ebp, %ecx
-; X86-NEXT: shrl $23, %ecx
-; X86-NEXT: movl %ecx, 80(%eax)
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
-; X86-NEXT: movl %edi, %ecx
-; X86-NEXT: shrl $12, %ecx
-; X86-NEXT: movl %ecx, 64(%eax)
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
-; X86-NEXT: movl %ebx, %ecx
-; X86-NEXT: shrl %ecx
-; X86-NEXT: movl %ecx, 48(%eax)
+; X86-NEXT: shldl $31, %ebx, %ecx
+; X86-NEXT: movl %ecx, 44(%ebp)
+; X86-NEXT: movl %ebp, %ebx
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
-; X86-NEXT: shrl $22, %ecx
-; X86-NEXT: movl %ecx, 36(%eax)
+; X86-NEXT: shldl $10, %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X86-NEXT: shldl $10, %ebp, %ecx
+; X86-NEXT: movl %ecx, 32(%ebx)
+; X86-NEXT: movl %edi, %ecx
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X86-NEXT: shldl $21, %ebp, %ecx
+; X86-NEXT: movl %ecx, 16(%ebx)
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
; X86-NEXT: movl %edx, %ecx
-; X86-NEXT: shrl $11, %ecx
-; X86-NEXT: movl %ecx, 20(%eax)
+; X86-NEXT: shrl $2, %ecx
+; X86-NEXT: movw %cx, 100(%ebx)
+; X86-NEXT: shll $21, %ebp
+; X86-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
+; X86-NEXT: movl %ebp, 12(%ebx)
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
-; X86-NEXT: movl %ecx, (%eax)
-; X86-NEXT: movl (%esp), %ecx # 4-byte Reload
-; X86-NEXT: shldl $30, %ecx, %esi
-; X86-NEXT: movl %esi, 88(%eax)
; X86-NEXT: shll $30, %ecx
-; X86-NEXT: movl %ecx, 84(%eax)
+; X86-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
+; X86-NEXT: movl %ecx, 84(%ebx)
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
-; X86-NEXT: shldl $9, %ecx, %ebp
-; X86-NEXT: movl %ebp, 76(%eax)
; X86-NEXT: shll $9, %ecx
-; X86-NEXT: movl %ecx, 72(%eax)
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
-; X86-NEXT: shldl $20, %ecx, %edi
-; X86-NEXT: movl %edi, 60(%eax)
-; X86-NEXT: shll $20, %ecx
-; X86-NEXT: movl %ecx, 56(%eax)
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
-; X86-NEXT: shldl $31, %ecx, %ebx
-; X86-NEXT: movl %ebx, 44(%eax)
-; X86-NEXT: shll $31, %ecx
-; X86-NEXT: movl %ecx, 40(%eax)
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
-; X86-NEXT: shldl $10, %ecx, %esi
-; X86-NEXT: movl %esi, 32(%eax)
-; X86-NEXT: shll $10, %ecx
-; X86-NEXT: movl %ecx, 28(%eax)
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
-; X86-NEXT: shldl $21, %ecx, %edx
-; X86-NEXT: movl %edx, 16(%eax)
-; X86-NEXT: shll $21, %ecx
-; X86-NEXT: movl %ecx, 12(%eax)
-; X86-NEXT: movb $0, 102(%eax)
-; X86-NEXT: movw $0, 100(%eax)
-; X86-NEXT: movl $0, 96(%eax)
-; X86-NEXT: movl $0, 68(%eax)
-; X86-NEXT: movl $0, 52(%eax)
-; X86-NEXT: movl $0, 24(%eax)
-; X86-NEXT: movl $0, 8(%eax)
-; X86-NEXT: addl $44, %esp
+; X86-NEXT: shrl $12, %eax
+; X86-NEXT: andl $511, %eax # imm = 0x1FF
+; X86-NEXT: orl %ecx, %eax
+; X86-NEXT: movl %eax, 72(%ebx)
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT: shll $20, %eax
+; X86-NEXT: shrl %esi
+; X86-NEXT: andl $1048575, %esi # imm = 0xFFFFF
+; X86-NEXT: orl %eax, %esi
+; X86-NEXT: movl %esi, 56(%ebx)
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT: shll $31, %eax
+; X86-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X86-NEXT: movl %eax, 40(%ebx)
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT: shll $10, %eax
+; X86-NEXT: shrl $11, %edi
+; X86-NEXT: andl $1023, %edi # imm = 0x3FF
+; X86-NEXT: orl %eax, %edi
+; X86-NEXT: movl %edi, 28(%ebx)
+; X86-NEXT: movl %edx, %eax
+; X86-NEXT: shrl $18, %eax
+; X86-NEXT: andl $7, %eax
+; X86-NEXT: movb %al, 102(%ebx)
+; X86-NEXT: movl %ebx, %eax
+; X86-NEXT: addl $52, %esp
; X86-NEXT: popl %esi
; X86-NEXT: popl %edi
; X86-NEXT: popl %ebx
@@ -1936,39 +1803,36 @@ define <1 x i3> @scmp_scalarize(<1 x i33> %x, <1 x i33> %y) nounwind {
; X64-NEXT: sarq $31, %rsi
; X64-NEXT: shlq $31, %rdi
; X64-NEXT: sarq $31, %rdi
-; X64-NEXT: xorl %ecx, %ecx
; X64-NEXT: cmpq %rsi, %rdi
-; X64-NEXT: setg %cl
-; X64-NEXT: movl $255, %eax
-; X64-NEXT: cmovgel %ecx, %eax
-; X64-NEXT: # kill: def $al killed $al killed $eax
+; X64-NEXT: setl %cl
+; X64-NEXT: setg %al
+; X64-NEXT: subb %cl, %al
; X64-NEXT: retq
;
; X86-LABEL: scmp_scalarize:
; X86: # %bb.0:
+; X86-NEXT: pushl %ebx
; X86-NEXT: pushl %edi
; X86-NEXT: pushl %esi
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: andl $1, %eax
; X86-NEXT: negl %eax
+; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
-; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
-; X86-NEXT: andl $1, %edi
-; X86-NEXT: negl %edi
-; X86-NEXT: cmpl %ecx, %esi
-; X86-NEXT: movl %edi, %edx
-; X86-NEXT: sbbl %eax, %edx
-; X86-NEXT: setl %dl
-; X86-NEXT: cmpl %esi, %ecx
-; X86-NEXT: sbbl %edi, %eax
-; X86-NEXT: movb $-1, %al
-; X86-NEXT: jl .LBB19_2
-; X86-NEXT: # %bb.1:
-; X86-NEXT: movl %edx, %eax
-; X86-NEXT: .LBB19_2:
+; X86-NEXT: andl $1, %esi
+; X86-NEXT: negl %esi
+; X86-NEXT: cmpl %ecx, %edx
+; X86-NEXT: movl %esi, %edi
+; X86-NEXT: sbbl %eax, %edi
+; X86-NEXT: setl %bl
+; X86-NEXT: cmpl %edx, %ecx
+; X86-NEXT: sbbl %esi, %eax
+; X86-NEXT: setl %al
+; X86-NEXT: subb %bl, %al
; X86-NEXT: popl %esi
; X86-NEXT: popl %edi
+; X86-NEXT: popl %ebx
; X86-NEXT: retl
%1 = call <1 x i3> @llvm.scmp(<1 x i33> %x, <1 x i33> %y)
ret <1 x i3> %1
@@ -1981,29 +1845,29 @@ define <2 x i8> @scmp_bool_operands(<2 x i1> %x, <2 x i1> %y) nounwind {
; X64-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp)
; X64-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
; X64-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx
-; X64-NEXT: andb $1, %cl
-; X64-NEXT: negb %cl
-; X64-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx
-; X64-NEXT: movzbl -{{[0-9]+}}(%rsp), %esi
-; X64-NEXT: andb $1, %sil
-; X64-NEXT: negb %sil
-; X64-NEXT: xorl %edi, %edi
-; X64-NEXT: cmpb %cl, %sil
-; X64-NEXT: setg %dil
-; X64-NEXT: movl $255, %ecx
-; X64-NEXT: cmovll %ecx, %edi
-; X64-NEXT: shll $8, %edi
; X64-NEXT: andb $1, %al
; X64-NEXT: negb %al
+; X64-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx
+; X64-NEXT: movzbl -{{[0-9]+}}(%rsp), %esi
; X64-NEXT: andb $1, %dl
; X64-NEXT: negb %dl
-; X64-NEXT: xorl %esi, %esi
; X64-NEXT: cmpb %al, %dl
-; X64-NEXT: setg %sil
-; X64-NEXT: cmovll %ecx, %esi
-; X64-NEXT: movzbl %sil, %eax
-; X64-NEXT: orl %edi, %eax
-; X64-NEXT: movd %eax, %xmm0
+; X64-NEXT: setl %al
+; X64-NEXT: setg %dl
+; X64-NEXT: subb %al, %dl
+; X64-NEXT: movzbl %dl, %eax
+; X64-NEXT: andb $1, %cl
+; X64-NEXT: negb %cl
+; X64-NEXT: andb $1, %sil
+; X64-NEXT: negb %sil
+; X64-NEXT: cmpb %cl, %sil
+; X64-NEXT: setl %cl
+; X64-NEXT: setg %dl
+; X64-NEXT: subb %cl, %dl
+; X64-NEXT: movzbl %dl, %ecx
+; X64-NEXT: shll $8, %ecx
+; X64-NEXT: orl %eax, %ecx
+; X64-NEXT: movd %ecx, %xmm0
; X64-NEXT: retq
;
; X86-LABEL: scmp_bool_operands:
@@ -2011,29 +1875,23 @@ define <2 x i8> @scmp_bool_operands(<2 x i1> %x, <2 x i1> %y) nounwind {
; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: andb $1, %cl
; X86-NEXT: negb %cl
-; X86-NEXT: movb {{[0-9]+}}(%esp), %ah
-; X86-NEXT: andb $1, %ah
-; X86-NEXT: negb %ah
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
-; X86-NEXT: andb $1, %al
-; X86-NEXT: negb %al
; X86-NEXT: movzbl {{[0-9]+}}(%esp), %edx
; X86-NEXT: andb $1, %dl
; X86-NEXT: negb %dl
-; X86-NEXT: cmpb %al, %dl
-; X86-NEXT: setg %ch
-; X86-NEXT: movb $-1, %dl
-; X86-NEXT: movb $-1, %al
-; X86-NEXT: jl .LBB20_2
-; X86-NEXT: # %bb.1:
-; X86-NEXT: movb %ch, %al
-; X86-NEXT: .LBB20_2:
-; X86-NEXT: cmpb %cl, %ah
-; X86-NEXT: setg %cl
-; X86-NEXT: jl .LBB20_4
-; X86-NEXT: # %bb.3:
-; X86-NEXT: movl %ecx, %edx
-; X86-NEXT: .LBB20_4:
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: andb $1, %al
+; X86-NEXT: negb %al
+; X86-NEXT: movb {{[0-9]+}}(%esp), %ah
+; X86-NEXT: andb $1, %ah
+; X86-NEXT: negb %ah
+; X86-NEXT: cmpb %al, %ah
+; X86-NEXT: setl %ah
+; X86-NEXT: setg %al
+; X86-NEXT: subb %ah, %al
+; X86-NEXT: cmpb %cl, %dl
+; X86-NEXT: setl %cl
+; X86-NEXT: setg %dl
+; X86-NEXT: subb %cl, %dl
; X86-NEXT: retl
%1 = call <2 x i8> @llvm.scmp(<2 x i1> %x, <2 x i1> %y)
ret <2 x i8> %1
@@ -2048,45 +1906,36 @@ define <2 x i16> @scmp_ret_wider_than_operands(<2 x i8> %x, <2 x i8> %y) nounwin
; X64-NEXT: movd %xmm0, %edx
; X64-NEXT: movl %edx, %esi
; X64-NEXT: shrl $8, %esi
-; X64-NEXT: xorl %edi, %edi
; X64-NEXT: cmpb %cl, %sil
-; X64-NEXT: setg %dil
-; X64-NEXT: movl $65535, %ecx # imm = 0xFFFF
-; X64-NEXT: cmovll %ecx, %edi
-; X64-NEXT: xorl %esi, %esi
-; X64-NEXT: cmpb %al, %dl
+; X64-NEXT: setl %cl
; X64-NEXT: setg %sil
-; X64-NEXT: cmovll %ecx, %esi
-; X64-NEXT: movd %esi, %xmm0
-; X64-NEXT: pinsrw $1, %edi, %xmm0
+; X64-NEXT: subb %cl, %sil
+; X64-NEXT: movsbl %sil, %ecx
+; X64-NEXT: cmpb %al, %dl
+; X64-NEXT: setl %al
+; X64-NEXT: setg %dl
+; X64-NEXT: subb %al, %dl
+; X64-NEXT: movsbl %dl, %eax
+; X64-NEXT: movd %eax, %xmm0
+; X64-NEXT: pinsrw $1, %ecx, %xmm0
; X64-NEXT: retq
;
; X86-LABEL: scmp_ret_wider_than_operands:
; X86: # %bb.0:
-; X86-NEXT: pushl %ebx
; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: xorl %ebx, %ebx
; X86-NEXT: cmpb {{[0-9]+}}(%esp), %al
-; X86-NEXT: setg %ch
-; X86-NEXT: movl $65535, %edx # imm = 0xFFFF
-; X86-NEXT: movl $65535, %eax # imm = 0xFFFF
-; X86-NEXT: jl .LBB21_2
-; X86-NEXT: # %bb.1:
-; X86-NEXT: movb %ch, %bl
-; X86-NEXT: movl %ebx, %eax
-; X86-NEXT: .LBB21_2:
-; X86-NEXT: xorl %ebx, %ebx
+; X86-NEXT: setl %al
+; X86-NEXT: setg %dl
+; X86-NEXT: subb %al, %dl
+; X86-NEXT: movsbl %dl, %eax
; X86-NEXT: cmpb {{[0-9]+}}(%esp), %cl
-; X86-NEXT: setg %cl
-; X86-NEXT: jl .LBB21_4
-; X86-NEXT: # %bb.3:
-; X86-NEXT: movb %cl, %bl
-; X86-NEXT: movl %ebx, %edx
-; X86-NEXT: .LBB21_4:
+; X86-NEXT: setl %cl
+; X86-NEXT: setg %dl
+; X86-NEXT: subb %cl, %dl
+; X86-NEXT: movsbl %dl, %edx
; X86-NEXT: # kill: def $ax killed $ax killed $eax
; X86-NEXT: # kill: def $dx killed $dx killed $edx
-; X86-NEXT: popl %ebx
; X86-NEXT: retl
%1 = call <2 x i16> @llvm.scmp(<2 x i8> %x, <2 x i8> %y)
ret <2 x i16> %1
diff --git a/llvm/test/CodeGen/X86/ucmp.ll b/llvm/test/CodeGen/X86/ucmp.ll
index 344404749d7ef..ac35605be4d52 100644
--- a/llvm/test/CodeGen/X86/ucmp.ll
+++ b/llvm/test/CodeGen/X86/ucmp.ll
@@ -5,24 +5,17 @@
define i8 @ucmp.8.8(i8 %x, i8 %y) nounwind {
; X64-LABEL: ucmp.8.8:
; X64: # %bb.0:
-; X64-NEXT: xorl %ecx, %ecx
; X64-NEXT: cmpb %sil, %dil
-; X64-NEXT: seta %cl
-; X64-NEXT: movl $255, %eax
-; X64-NEXT: cmovael %ecx, %eax
-; X64-NEXT: # kill: def $al killed $al killed $eax
+; X64-NEXT: seta %al
+; X64-NEXT: sbbb $0, %al
; X64-NEXT: retq
;
; X86-LABEL: ucmp.8.8:
; X86: # %bb.0:
; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: cmpb {{[0-9]+}}(%esp), %al
-; X86-NEXT: seta %cl
-; X86-NEXT: movb $-1, %al
-; X86-NEXT: jb .LBB0_2
-; X86-NEXT: # %bb.1:
-; X86-NEXT: movl %ecx, %eax
-; X86-NEXT: .LBB0_2:
+; X86-NEXT: seta %al
+; X86-NEXT: sbbb $0, %al
; X86-NEXT: retl
%1 = call i8 @llvm.ucmp(i8 %x, i8 %y)
ret i8 %1
@@ -31,24 +24,17 @@ define i8 @ucmp.8.8(i8 %x, i8 %y) nounwind {
define i8 @ucmp.8.16(i16 %x, i16 %y) nounwind {
; X64-LABEL: ucmp.8.16:
; X64: # %bb.0:
-; X64-NEXT: xorl %ecx, %ecx
; X64-NEXT: cmpw %si, %di
-; X64-NEXT: seta %cl
-; X64-NEXT: movl $255, %eax
-; X64-NEXT: cmovael %ecx, %eax
-; X64-NEXT: # kill: def $al killed $al killed $eax
+; X64-NEXT: seta %al
+; X64-NEXT: sbbb $0, %al
; X64-NEXT: retq
;
; X86-LABEL: ucmp.8.16:
; X86: # %bb.0:
; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax
; X86-NEXT: cmpw {{[0-9]+}}(%esp), %ax
-; X86-NEXT: seta %cl
-; X86-NEXT: movb $-1, %al
-; X86-NEXT: jb .LBB1_2
-; X86-NEXT: # %bb.1:
-; X86-NEXT: movl %ecx, %eax
-; X86-NEXT: .LBB1_2:
+; X86-NEXT: seta %al
+; X86-NEXT: sbbb $0, %al
; X86-NEXT: retl
%1 = call i8 @llvm.ucmp(i16 %x, i16 %y)
ret i8 %1
@@ -57,24 +43,17 @@ define i8 @ucmp.8.16(i16 %x, i16 %y) nounwind {
define i8 @ucmp.8.32(i32 %x, i32 %y) nounwind {
; X64-LABEL: ucmp.8.32:
; X64: # %bb.0:
-; X64-NEXT: xorl %ecx, %ecx
; X64-NEXT: cmpl %esi, %edi
-; X64-NEXT: seta %cl
-; X64-NEXT: movl $255, %eax
-; X64-NEXT: cmovael %ecx, %eax
-; X64-NEXT: # kill: def $al killed $al killed $eax
+; X64-NEXT: seta %al
+; X64-NEXT: sbbb $0, %al
; X64-NEXT: retq
;
; X86-LABEL: ucmp.8.32:
; X86: # %bb.0:
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: cmpl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: seta %cl
-; X86-NEXT: movb $-1, %al
-; X86-NEXT: jb .LBB2_2
-; X86-NEXT: # %bb.1:
-; X86-NEXT: movl %ecx, %eax
-; X86-NEXT: .LBB2_2:
+; X86-NEXT: seta %al
+; X86-NEXT: sbbb $0, %al
; X86-NEXT: retl
%1 = call i8 @llvm.ucmp(i32 %x, i32 %y)
ret i8 %1
@@ -83,33 +62,26 @@ define i8 @ucmp.8.32(i32 %x, i32 %y) nounwind {
define i8 @ucmp.8.64(i64 %x, i64 %y) nounwind {
; X64-LABEL: ucmp.8.64:
; X64: # %bb.0:
-; X64-NEXT: xorl %ecx, %ecx
; X64-NEXT: cmpq %rsi, %rdi
-; X64-NEXT: seta %cl
-; X64-NEXT: movl $255, %eax
-; X64-NEXT: cmovael %ecx, %eax
-; X64-NEXT: # kill: def $al killed $al killed $eax
+; X64-NEXT: seta %al
+; X64-NEXT: sbbb $0, %al
; X64-NEXT: retq
;
; X86-LABEL: ucmp.8.64:
; X86: # %bb.0:
; X86-NEXT: pushl %edi
; X86-NEXT: pushl %esi
-; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
-; X86-NEXT: cmpl %eax, %esi
-; X86-NEXT: movl %edi, %ecx
-; X86-NEXT: sbbl %edx, %ecx
-; X86-NEXT: setb %cl
-; X86-NEXT: cmpl %esi, %eax
+; X86-NEXT: cmpl %ecx, %esi
+; X86-NEXT: movl %edi, %eax
+; X86-NEXT: sbbl %edx, %eax
+; X86-NEXT: setb %al
+; X86-NEXT: cmpl %esi, %ecx
; X86-NEXT: sbbl %edi, %edx
-; X86-NEXT: movb $-1, %al
-; X86-NEXT: jb .LBB3_2
-; X86-NEXT: # %bb.1:
-; X86-NEXT: movl %ecx, %eax
-; X86-NEXT: .LBB3_2:
+; X86-NEXT: sbbb $0, %al
; X86-NEXT: popl %esi
; X86-NEXT: popl %edi
; X86-NEXT: retl
@@ -124,12 +96,9 @@ define i8 @ucmp.8.128(i128 %x, i128 %y) nounwind {
; X64-NEXT: movq %rcx, %rax
; X64-NEXT: sbbq %rsi, %rax
; X64-NEXT: setb %al
-; X64-NEXT: movzbl %al, %r8d
; X64-NEXT: cmpq %rdx, %rdi
; X64-NEXT: sbbq %rcx, %rsi
-; X64-NEXT: movl $255, %eax
-; X64-NEXT: cmovael %r8d, %eax
-; X64-NEXT: # kill: def $al killed $al killed $eax
+; X64-NEXT: sbbb $0, %al
; X64-NEXT: retq
;
; X86-LABEL: ucmp.8.128:
@@ -138,30 +107,26 @@ define i8 @ucmp.8.128(i128 %x, i128 %y) nounwind {
; X86-NEXT: pushl %ebx
; X86-NEXT: pushl %edi
; X86-NEXT: pushl %esi
-; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ebp
-; X86-NEXT: cmpl {{[0-9]+}}(%esp), %edi
-; X86-NEXT: movl %ebp, %ebx
-; X86-NEXT: sbbl %edx, %ebx
-; X86-NEXT: movl %ecx, %ebx
-; X86-NEXT: sbbl %eax, %ebx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
-; X86-NEXT: movl %esi, %ecx
-; X86-NEXT: sbbl %ebx, %ecx
-; X86-NEXT: setb %cl
-; X86-NEXT: cmpl %edi, {{[0-9]+}}(%esp)
-; X86-NEXT: sbbl %ebp, %edx
-; X86-NEXT: sbbl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: sbbl %esi, %ebx
-; X86-NEXT: movb $-1, %al
-; X86-NEXT: jb .LBB4_2
-; X86-NEXT: # %bb.1:
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ebp
+; X86-NEXT: cmpl {{[0-9]+}}(%esp), %ebx
+; X86-NEXT: movl %ebp, %eax
+; X86-NEXT: sbbl %esi, %eax
; X86-NEXT: movl %ecx, %eax
-; X86-NEXT: .LBB4_2:
+; X86-NEXT: sbbl %edx, %eax
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
+; X86-NEXT: movl %edi, %eax
+; X86-NEXT: sbbl %ecx, %eax
+; X86-NEXT: setb %al
+; X86-NEXT: cmpl %ebx, {{[0-9]+}}(%esp)
+; X86-NEXT: sbbl %ebp, %esi
+; X86-NEXT: sbbl {{[0-9]+}}(%esp), %edx
+; X86-NEXT: sbbl %edi, %ecx
+; X86-NEXT: sbbb $0, %al
; X86-NEXT: popl %esi
; X86-NEXT: popl %edi
; X86-NEXT: popl %ebx
@@ -174,25 +139,19 @@ define i8 @ucmp.8.128(i128 %x, i128 %y) nounwind {
define i32 @ucmp.32.32(i32 %x, i32 %y) nounwind {
; X64-LABEL: ucmp.32.32:
; X64: # %bb.0:
-; X64-NEXT: xorl %ecx, %ecx
; X64-NEXT: cmpl %esi, %edi
-; X64-NEXT: seta %cl
-; X64-NEXT: movl $-1, %eax
-; X64-NEXT: cmovael %ecx, %eax
+; X64-NEXT: seta %al
+; X64-NEXT: sbbb $0, %al
+; X64-NEXT: movsbl %al, %eax
; X64-NEXT: retq
;
; X86-LABEL: ucmp.32.32:
; X86: # %bb.0:
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: xorl %ecx, %ecx
; X86-NEXT: cmpl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: seta %dl
-; X86-NEXT: movl $-1, %eax
-; X86-NEXT: jb .LBB5_2
-; X86-NEXT: # %bb.1:
-; X86-NEXT: movb %dl, %cl
-; X86-NEXT: movl %ecx, %eax
-; X86-NEXT: .LBB5_2:
+; X86-NEXT: seta %al
+; X86-NEXT: sbbb $0, %al
+; X86-NEXT: movsbl %al, %eax
; X86-NEXT: retl
%1 = call i32 @llvm.ucmp(i32 %x, i32 %y)
ret i32 %1
@@ -201,34 +160,32 @@ define i32 @ucmp.32.32(i32 %x, i32 %y) nounwind {
define i32 @ucmp.32.64(i64 %x, i64 %y) nounwind {
; X64-LABEL: ucmp.32.64:
; X64: # %bb.0:
-; X64-NEXT: xorl %ecx, %ecx
; X64-NEXT: cmpq %rsi, %rdi
-; X64-NEXT: seta %cl
-; X64-NEXT: movl $-1, %eax
-; X64-NEXT: cmovael %ecx, %eax
+; X64-NEXT: seta %al
+; X64-NEXT: sbbb $0, %al
+; X64-NEXT: movsbl %al, %eax
; X64-NEXT: retq
;
; X86-LABEL: ucmp.32.64:
; X86: # %bb.0:
+; X86-NEXT: pushl %ebx
; X86-NEXT: pushl %edi
; X86-NEXT: pushl %esi
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
-; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
-; X86-NEXT: cmpl %eax, %esi
-; X86-NEXT: movl %edi, %ecx
-; X86-NEXT: sbbl %edx, %ecx
-; X86-NEXT: setb %cl
-; X86-NEXT: cmpl %esi, %eax
-; X86-NEXT: sbbl %edi, %edx
-; X86-NEXT: movl $-1, %eax
-; X86-NEXT: jb .LBB6_2
-; X86-NEXT: # %bb.1:
-; X86-NEXT: movzbl %cl, %eax
-; X86-NEXT: .LBB6_2:
+; X86-NEXT: cmpl %eax, %edx
+; X86-NEXT: movl %esi, %edi
+; X86-NEXT: sbbl %ecx, %edi
+; X86-NEXT: setb %bl
+; X86-NEXT: cmpl %edx, %eax
+; X86-NEXT: sbbl %esi, %ecx
+; X86-NEXT: sbbb $0, %bl
+; X86-NEXT: movsbl %bl, %eax
; X86-NEXT: popl %esi
; X86-NEXT: popl %edi
+; X86-NEXT: popl %ebx
; X86-NEXT: retl
%1 = call i32 @llvm.ucmp(i64 %x, i64 %y)
ret i32 %1
@@ -237,36 +194,34 @@ define i32 @ucmp.32.64(i64 %x, i64 %y) nounwind {
define i64 @ucmp.64.64(i64 %x, i64 %y) nounwind {
; X64-LABEL: ucmp.64.64:
; X64: # %bb.0:
-; X64-NEXT: xorl %ecx, %ecx
; X64-NEXT: cmpq %rsi, %rdi
-; X64-NEXT: seta %cl
-; X64-NEXT: movq $-1, %rax
-; X64-NEXT: cmovaeq %rcx, %rax
+; X64-NEXT: seta %al
+; X64-NEXT: sbbb $0, %al
+; X64-NEXT: movsbq %al, %rax
; X64-NEXT: retq
;
; X86-LABEL: ucmp.64.64:
; X86: # %bb.0:
+; X86-NEXT: pushl %ebx
; X86-NEXT: pushl %edi
; X86-NEXT: pushl %esi
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
-; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
-; X86-NEXT: cmpl %eax, %esi
-; X86-NEXT: movl %edi, %ecx
-; X86-NEXT: sbbl %edx, %ecx
-; X86-NEXT: setb %cl
-; X86-NEXT: cmpl %esi, %eax
-; X86-NEXT: sbbl %edi, %edx
-; X86-NEXT: movl $-1, %eax
-; X86-NEXT: movl $-1, %edx
-; X86-NEXT: jb .LBB7_2
-; X86-NEXT: # %bb.1:
-; X86-NEXT: movzbl %cl, %eax
-; X86-NEXT: xorl %edx, %edx
-; X86-NEXT: .LBB7_2:
+; X86-NEXT: cmpl %eax, %edx
+; X86-NEXT: movl %esi, %edi
+; X86-NEXT: sbbl %ecx, %edi
+; X86-NEXT: setb %bl
+; X86-NEXT: cmpl %edx, %eax
+; X86-NEXT: sbbl %esi, %ecx
+; X86-NEXT: sbbb $0, %bl
+; X86-NEXT: movsbl %bl, %eax
+; X86-NEXT: movl %eax, %edx
+; X86-NEXT: sarl $31, %edx
; X86-NEXT: popl %esi
; X86-NEXT: popl %edi
+; X86-NEXT: popl %ebx
; X86-NEXT: retl
%1 = call i64 @llvm.ucmp(i64 %x, i64 %y)
ret i64 %1
@@ -275,24 +230,17 @@ define i64 @ucmp.64.64(i64 %x, i64 %y) nounwind {
define i4 @ucmp_narrow_result(i32 %x, i32 %y) nounwind {
; X64-LABEL: ucmp_narrow_result:
; X64: # %bb.0:
-; X64-NEXT: xorl %ecx, %ecx
; X64-NEXT: cmpl %esi, %edi
-; X64-NEXT: seta %cl
-; X64-NEXT: movl $255, %eax
-; X64-NEXT: cmovael %ecx, %eax
-; X64-NEXT: # kill: def $al killed $al killed $eax
+; X64-NEXT: seta %al
+; X64-NEXT: sbbb $0, %al
; X64-NEXT: retq
;
; X86-LABEL: ucmp_narrow_result:
; X86: # %bb.0:
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: cmpl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: seta %cl
-; X86-NEXT: movb $-1, %al
-; X86-NEXT: jb .LBB8_2
-; X86-NEXT: # %bb.1:
-; X86-NEXT: movl %ecx, %eax
-; X86-NEXT: .LBB8_2:
+; X86-NEXT: seta %al
+; X86-NEXT: sbbb $0, %al
; X86-NEXT: retl
%1 = call i4 @llvm.ucmp(i32 %x, i32 %y)
ret i4 %1
@@ -304,35 +252,28 @@ define i8 @ucmp_narrow_op(i62 %x, i62 %y) nounwind {
; X64-NEXT: movabsq $4611686018427387903, %rax # imm = 0x3FFFFFFFFFFFFFFF
; X64-NEXT: andq %rax, %rsi
; X64-NEXT: andq %rax, %rdi
-; X64-NEXT: xorl %ecx, %ecx
; X64-NEXT: cmpq %rsi, %rdi
-; X64-NEXT: seta %cl
-; X64-NEXT: movl $255, %eax
-; X64-NEXT: cmovael %ecx, %eax
-; X64-NEXT: # kill: def $al killed $al killed $eax
+; X64-NEXT: seta %al
+; X64-NEXT: sbbb $0, %al
; X64-NEXT: retq
;
; X86-LABEL: ucmp_narrow_op:
; X86: # %bb.0:
; X86-NEXT: pushl %edi
; X86-NEXT: pushl %esi
-; X86-NEXT: movl $1073741823, %eax # imm = 0x3FFFFFFF
+; X86-NEXT: movl $1073741823, %ecx # imm = 0x3FFFFFFF
; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
-; X86-NEXT: andl %eax, %edx
-; X86-NEXT: andl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: andl %ecx, %edx
+; X86-NEXT: andl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
; X86-NEXT: cmpl %esi, %edi
-; X86-NEXT: movl %eax, %ecx
-; X86-NEXT: sbbl %edx, %ecx
-; X86-NEXT: setb %cl
-; X86-NEXT: cmpl %edi, %esi
-; X86-NEXT: sbbl %eax, %edx
-; X86-NEXT: movb $-1, %al
-; X86-NEXT: jb .LBB9_2
-; X86-NEXT: # %bb.1:
; X86-NEXT: movl %ecx, %eax
-; X86-NEXT: .LBB9_2:
+; X86-NEXT: sbbl %edx, %eax
+; X86-NEXT: setb %al
+; X86-NEXT: cmpl %edi, %esi
+; X86-NEXT: sbbl %ecx, %edx
+; X86-NEXT: sbbb $0, %al
; X86-NEXT: popl %esi
; X86-NEXT: popl %edi
; X86-NEXT: retl
@@ -343,39 +284,31 @@ define i8 @ucmp_narrow_op(i62 %x, i62 %y) nounwind {
define i141 @ucmp_wide_result(i32 %x, i32 %y) nounwind {
; X64-LABEL: ucmp_wide_result:
; X64: # %bb.0:
-; X64-NEXT: xorl %ecx, %ecx
; X64-NEXT: cmpl %esi, %edi
-; X64-NEXT: seta %cl
-; X64-NEXT: movq $-1, %rax
-; X64-NEXT: cmovaeq %rcx, %rax
-; X64-NEXT: xorl %edx, %edx
-; X64-NEXT: xorl %ecx, %ecx
+; X64-NEXT: seta %al
+; X64-NEXT: sbbb $0, %al
+; X64-NEXT: movsbq %al, %rax
+; X64-NEXT: movq %rax, %rdx
+; X64-NEXT: sarq $63, %rdx
+; X64-NEXT: movl %edx, %ecx
+; X64-NEXT: andl $8191, %ecx # imm = 0x1FFF
; X64-NEXT: retq
;
; X86-LABEL: ucmp_wide_result:
; X86: # %bb.0:
-; X86-NEXT: pushl %ebx
-; X86-NEXT: pushl %esi
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
-; X86-NEXT: xorl %ecx, %ecx
-; X86-NEXT: xorl %edx, %edx
-; X86-NEXT: cmpl {{[0-9]+}}(%esp), %esi
-; X86-NEXT: seta %bl
-; X86-NEXT: movl $-1, %esi
-; X86-NEXT: jb .LBB10_2
-; X86-NEXT: # %bb.1:
-; X86-NEXT: movb %bl, %dl
-; X86-NEXT: movl %edx, %esi
-; X86-NEXT: .LBB10_2:
-; X86-NEXT: sbbl %ecx, %ecx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: cmpl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: seta %cl
+; X86-NEXT: sbbb $0, %cl
+; X86-NEXT: movsbl %cl, %ecx
+; X86-NEXT: movl %ecx, (%eax)
+; X86-NEXT: sarl $31, %ecx
+; X86-NEXT: movl %ecx, 12(%eax)
+; X86-NEXT: movl %ecx, 8(%eax)
; X86-NEXT: movl %ecx, 4(%eax)
-; X86-NEXT: movl %esi, (%eax)
-; X86-NEXT: movl $0, 12(%eax)
-; X86-NEXT: movl $0, 8(%eax)
-; X86-NEXT: movw $0, 16(%eax)
-; X86-NEXT: popl %esi
-; X86-NEXT: popl %ebx
+; X86-NEXT: andl $8191, %ecx # imm = 0x1FFF
+; X86-NEXT: movw %cx, 16(%eax)
; X86-NEXT: retl $4
%1 = call i141 @llvm.ucmp(i32 %x, i32 %y)
ret i141 %1
@@ -391,12 +324,9 @@ define i8 @ucmp_wide_op(i109 %x, i109 %y) nounwind {
; X64-NEXT: movq %rcx, %rax
; X64-NEXT: sbbq %rsi, %rax
; X64-NEXT: setb %al
-; X64-NEXT: movzbl %al, %r8d
; X64-NEXT: cmpq %rdx, %rdi
; X64-NEXT: sbbq %rcx, %rsi
-; X64-NEXT: movl $255, %eax
-; X64-NEXT: cmovael %r8d, %eax
-; X64-NEXT: # kill: def $al killed $al killed $eax
+; X64-NEXT: sbbb $0, %al
; X64-NEXT: retq
;
; X86-LABEL: ucmp_wide_op:
@@ -405,33 +335,27 @@ define i8 @ucmp_wide_op(i109 %x, i109 %y) nounwind {
; X86-NEXT: pushl %ebx
; X86-NEXT: pushl %edi
; X86-NEXT: pushl %esi
-; X86-NEXT: pushl %eax
-; X86-NEXT: movl $8191, %eax # imm = 0x1FFF
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: andl %eax, %ecx
-; X86-NEXT: andl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movl $8191, %ecx # imm = 0x1FFF
; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
+; X86-NEXT: andl %ecx, %edx
+; X86-NEXT: andl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
; X86-NEXT: movl {{[0-9]+}}(%esp), %ebp
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: cmpl {{[0-9]+}}(%esp), %ebp
-; X86-NEXT: sbbl %edx, %ebx
+; X86-NEXT: sbbl %edi, %eax
+; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
-; X86-NEXT: movl %edi, %esi
-; X86-NEXT: sbbl %ebx, %esi
-; X86-NEXT: movl %eax, %esi
-; X86-NEXT: sbbl %ecx, %esi
-; X86-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
+; X86-NEXT: movl %ebx, %eax
+; X86-NEXT: sbbl %esi, %eax
+; X86-NEXT: movl %ecx, %eax
+; X86-NEXT: sbbl %edx, %eax
+; X86-NEXT: setb %al
; X86-NEXT: cmpl %ebp, {{[0-9]+}}(%esp)
-; X86-NEXT: sbbl {{[0-9]+}}(%esp), %edx
-; X86-NEXT: sbbl %edi, %ebx
-; X86-NEXT: sbbl %eax, %ecx
-; X86-NEXT: movb $-1, %al
-; X86-NEXT: jb .LBB11_2
-; X86-NEXT: # %bb.1:
-; X86-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload
-; X86-NEXT: .LBB11_2:
-; X86-NEXT: addl $4, %esp
+; X86-NEXT: sbbl {{[0-9]+}}(%esp), %edi
+; X86-NEXT: sbbl %ebx, %esi
+; X86-NEXT: sbbl %ecx, %edx
+; X86-NEXT: sbbb $0, %al
; X86-NEXT: popl %esi
; X86-NEXT: popl %edi
; X86-NEXT: popl %ebx
@@ -446,32 +370,24 @@ define i41 @ucmp_uncommon_types(i7 %x, i7 %y) nounwind {
; X64: # %bb.0:
; X64-NEXT: andb $127, %sil
; X64-NEXT: andb $127, %dil
-; X64-NEXT: xorl %ecx, %ecx
; X64-NEXT: cmpb %sil, %dil
-; X64-NEXT: seta %cl
-; X64-NEXT: movq $-1, %rax
-; X64-NEXT: cmovaeq %rcx, %rax
+; X64-NEXT: seta %al
+; X64-NEXT: sbbb $0, %al
+; X64-NEXT: movsbq %al, %rax
; X64-NEXT: retq
;
; X86-LABEL: ucmp_uncommon_types:
; X86: # %bb.0:
-; X86-NEXT: pushl %ebx
; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: andb $127, %al
-; X86-NEXT: movb {{[0-9]+}}(%esp), %ah
-; X86-NEXT: andb $127, %ah
-; X86-NEXT: xorl %edx, %edx
-; X86-NEXT: xorl %ecx, %ecx
-; X86-NEXT: cmpb %al, %ah
-; X86-NEXT: seta %bl
-; X86-NEXT: movl $-1, %eax
-; X86-NEXT: jb .LBB12_2
-; X86-NEXT: # %bb.1:
-; X86-NEXT: movb %bl, %cl
-; X86-NEXT: movl %ecx, %eax
-; X86-NEXT: .LBB12_2:
-; X86-NEXT: sbbl %edx, %edx
-; X86-NEXT: popl %ebx
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: andb $127, %cl
+; X86-NEXT: cmpb %al, %cl
+; X86-NEXT: seta %al
+; X86-NEXT: sbbb $0, %al
+; X86-NEXT: movsbl %al, %eax
+; X86-NEXT: movl %eax, %edx
+; X86-NEXT: sarl $31, %edx
; X86-NEXT: retl
%1 = call i41 @llvm.ucmp(i7 %x, i7 %y)
ret i41 %1
@@ -484,38 +400,37 @@ define <4 x i32> @ucmp_normal_vectors(<4 x i32> %x, <4 x i32> %y) nounwind {
; X64-NEXT: movd %xmm2, %eax
; X64-NEXT: pshufd {{.*#+}} xmm2 = xmm0[3,3,3,3]
; X64-NEXT: movd %xmm2, %ecx
-; X64-NEXT: xorl %edx, %edx
; X64-NEXT: cmpl %eax, %ecx
-; X64-NEXT: seta %dl
-; X64-NEXT: movl $-1, %eax
-; X64-NEXT: cmovbl %eax, %edx
-; X64-NEXT: movd %edx, %xmm2
+; X64-NEXT: seta %al
+; X64-NEXT: sbbb $0, %al
+; X64-NEXT: movsbl %al, %eax
+; X64-NEXT: movd %eax, %xmm2
; X64-NEXT: pshufd {{.*#+}} xmm3 = xmm1[2,3,2,3]
-; X64-NEXT: movd %xmm3, %ecx
+; X64-NEXT: movd %xmm3, %eax
; X64-NEXT: pshufd {{.*#+}} xmm3 = xmm0[2,3,2,3]
-; X64-NEXT: movd %xmm3, %edx
-; X64-NEXT: xorl %esi, %esi
-; X64-NEXT: cmpl %ecx, %edx
-; X64-NEXT: seta %sil
-; X64-NEXT: cmovbl %eax, %esi
-; X64-NEXT: movd %esi, %xmm3
+; X64-NEXT: movd %xmm3, %ecx
+; X64-NEXT: cmpl %eax, %ecx
+; X64-NEXT: seta %al
+; X64-NEXT: sbbb $0, %al
+; X64-NEXT: movsbl %al, %eax
+; X64-NEXT: movd %eax, %xmm3
; X64-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1]
-; X64-NEXT: movd %xmm1, %ecx
-; X64-NEXT: movd %xmm0, %edx
-; X64-NEXT: xorl %esi, %esi
-; X64-NEXT: cmpl %ecx, %edx
-; X64-NEXT: seta %sil
-; X64-NEXT: cmovbl %eax, %esi
-; X64-NEXT: movd %esi, %xmm2
+; X64-NEXT: movd %xmm1, %eax
+; X64-NEXT: movd %xmm0, %ecx
+; X64-NEXT: cmpl %eax, %ecx
+; X64-NEXT: seta %al
+; X64-NEXT: sbbb $0, %al
+; X64-NEXT: movsbl %al, %eax
+; X64-NEXT: movd %eax, %xmm2
; X64-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,1,1]
-; X64-NEXT: movd %xmm1, %ecx
+; X64-NEXT: movd %xmm1, %eax
; X64-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,1,1]
-; X64-NEXT: movd %xmm0, %edx
-; X64-NEXT: xorl %esi, %esi
-; X64-NEXT: cmpl %ecx, %edx
-; X64-NEXT: seta %sil
-; X64-NEXT: cmovbl %eax, %esi
-; X64-NEXT: movd %esi, %xmm0
+; X64-NEXT: movd %xmm0, %ecx
+; X64-NEXT: cmpl %eax, %ecx
+; X64-NEXT: seta %al
+; X64-NEXT: sbbb $0, %al
+; X64-NEXT: movsbl %al, %eax
+; X64-NEXT: movd %eax, %xmm0
; X64-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1]
; X64-NEXT: punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm3[0]
; X64-NEXT: movdqa %xmm2, %xmm0
@@ -523,59 +438,37 @@ define <4 x i32> @ucmp_normal_vectors(<4 x i32> %x, <4 x i32> %y) nounwind {
;
; X86-LABEL: ucmp_normal_vectors:
; X86: # %bb.0:
-; X86-NEXT: pushl %ebp
; X86-NEXT: pushl %ebx
; X86-NEXT: pushl %edi
; X86-NEXT: pushl %esi
-; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: xorl %ebx, %ebx
-; X86-NEXT: cmpl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: seta %al
-; X86-NEXT: movl $-1, %edx
-; X86-NEXT: movl $-1, %ebp
-; X86-NEXT: jb .LBB13_2
-; X86-NEXT: # %bb.1:
-; X86-NEXT: movb %al, %bl
-; X86-NEXT: movl %ebx, %ebp
-; X86-NEXT: .LBB13_2:
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
-; X86-NEXT: xorl %ebx, %ebx
-; X86-NEXT: cmpl {{[0-9]+}}(%esp), %esi
-; X86-NEXT: seta %al
-; X86-NEXT: movl $-1, %esi
-; X86-NEXT: jb .LBB13_4
-; X86-NEXT: # %bb.3:
-; X86-NEXT: movb %al, %bl
-; X86-NEXT: movl %ebx, %esi
-; X86-NEXT: .LBB13_4:
-; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: xorl %ebx, %ebx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
+; X86-NEXT: cmpl {{[0-9]+}}(%esp), %edx
+; X86-NEXT: seta %dl
+; X86-NEXT: sbbb $0, %dl
+; X86-NEXT: movsbl %dl, %edx
; X86-NEXT: cmpl {{[0-9]+}}(%esp), %edi
-; X86-NEXT: seta %cl
-; X86-NEXT: movl $-1, %edi
-; X86-NEXT: jb .LBB13_6
-; X86-NEXT: # %bb.5:
-; X86-NEXT: movb %cl, %bl
-; X86-NEXT: movl %ebx, %edi
-; X86-NEXT: .LBB13_6:
-; X86-NEXT: xorl %ebx, %ebx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: seta %bl
+; X86-NEXT: sbbb $0, %bl
+; X86-NEXT: movsbl %bl, %edi
+; X86-NEXT: cmpl {{[0-9]+}}(%esp), %esi
+; X86-NEXT: seta %bl
+; X86-NEXT: sbbb $0, %bl
+; X86-NEXT: movsbl %bl, %esi
; X86-NEXT: cmpl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: seta %cl
-; X86-NEXT: jb .LBB13_8
-; X86-NEXT: # %bb.7:
-; X86-NEXT: movb %cl, %bl
-; X86-NEXT: movl %ebx, %edx
-; X86-NEXT: .LBB13_8:
-; X86-NEXT: movl %edx, 12(%eax)
-; X86-NEXT: movl %edi, 8(%eax)
-; X86-NEXT: movl %esi, 4(%eax)
-; X86-NEXT: movl %ebp, (%eax)
+; X86-NEXT: sbbb $0, %cl
+; X86-NEXT: movsbl %cl, %ecx
+; X86-NEXT: movl %ecx, 12(%eax)
+; X86-NEXT: movl %esi, 8(%eax)
+; X86-NEXT: movl %edi, 4(%eax)
+; X86-NEXT: movl %edx, (%eax)
; X86-NEXT: popl %esi
; X86-NEXT: popl %edi
; X86-NEXT: popl %ebx
-; X86-NEXT: popl %ebp
; X86-NEXT: retl $4
%1 = call <4 x i32> @llvm.ucmp(<4 x i32> %x, <4 x i32> %y)
ret <4 x i32> %1
@@ -586,45 +479,41 @@ define <4 x i8> @ucmp_narrow_vec_result(<4 x i32> %x, <4 x i32> %y) nounwind {
; X64: # %bb.0:
; X64-NEXT: movd %xmm1, %eax
; X64-NEXT: movd %xmm0, %ecx
-; X64-NEXT: xorl %edx, %edx
; X64-NEXT: cmpl %eax, %ecx
-; X64-NEXT: seta %dl
-; X64-NEXT: movl $255, %eax
-; X64-NEXT: cmovbl %eax, %edx
-; X64-NEXT: movzbl %dl, %ecx
+; X64-NEXT: seta %al
+; X64-NEXT: sbbb $0, %al
+; X64-NEXT: movzbl %al, %eax
; X64-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,1,1,1]
-; X64-NEXT: movd %xmm2, %edx
+; X64-NEXT: movd %xmm2, %ecx
; X64-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,1,1,1]
-; X64-NEXT: movd %xmm2, %esi
-; X64-NEXT: xorl %edi, %edi
-; X64-NEXT: cmpl %edx, %esi
-; X64-NEXT: seta %dil
-; X64-NEXT: cmovbl %eax, %edi
-; X64-NEXT: movzbl %dil, %edx
-; X64-NEXT: shll $8, %edx
-; X64-NEXT: orl %ecx, %edx
+; X64-NEXT: movd %xmm2, %edx
+; X64-NEXT: cmpl %ecx, %edx
+; X64-NEXT: seta %cl
+; X64-NEXT: sbbb $0, %cl
+; X64-NEXT: movzbl %cl, %ecx
+; X64-NEXT: shll $8, %ecx
+; X64-NEXT: orl %eax, %ecx
; X64-NEXT: pshufd {{.*#+}} xmm2 = xmm1[2,3,2,3]
-; X64-NEXT: movd %xmm2, %ecx
+; X64-NEXT: movd %xmm2, %eax
; X64-NEXT: pshufd {{.*#+}} xmm2 = xmm0[2,3,2,3]
-; X64-NEXT: movd %xmm2, %esi
-; X64-NEXT: xorl %edi, %edi
-; X64-NEXT: cmpl %ecx, %esi
-; X64-NEXT: seta %dil
-; X64-NEXT: cmovbl %eax, %edi
-; X64-NEXT: movzbl %dil, %ecx
-; X64-NEXT: shll $16, %ecx
-; X64-NEXT: orl %edx, %ecx
+; X64-NEXT: movd %xmm2, %edx
+; X64-NEXT: cmpl %eax, %edx
+; X64-NEXT: seta %al
+; X64-NEXT: sbbb $0, %al
+; X64-NEXT: movzbl %al, %eax
+; X64-NEXT: shll $16, %eax
+; X64-NEXT: orl %ecx, %eax
; X64-NEXT: pshufd {{.*#+}} xmm1 = xmm1[3,3,3,3]
-; X64-NEXT: movd %xmm1, %edx
+; X64-NEXT: movd %xmm1, %ecx
; X64-NEXT: pshufd {{.*#+}} xmm0 = xmm0[3,3,3,3]
-; X64-NEXT: movd %xmm0, %esi
-; X64-NEXT: xorl %edi, %edi
-; X64-NEXT: cmpl %edx, %esi
-; X64-NEXT: seta %dil
-; X64-NEXT: cmovbl %eax, %edi
-; X64-NEXT: shll $24, %edi
-; X64-NEXT: orl %ecx, %edi
-; X64-NEXT: movd %edi, %xmm0
+; X64-NEXT: movd %xmm0, %edx
+; X64-NEXT: cmpl %ecx, %edx
+; X64-NEXT: seta %cl
+; X64-NEXT: sbbb $0, %cl
+; X64-NEXT: movzbl %cl, %ecx
+; X64-NEXT: shll $24, %ecx
+; X64-NEXT: orl %eax, %ecx
+; X64-NEXT: movd %ecx, %xmm0
; X64-NEXT: retq
;
; X86-LABEL: ucmp_narrow_vec_result:
@@ -633,40 +522,24 @@ define <4 x i8> @ucmp_narrow_vec_result(<4 x i32> %x, <4 x i32> %y) nounwind {
; X86-NEXT: pushl %edi
; X86-NEXT: pushl %esi
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
+; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: cmpl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: seta %cl
+; X86-NEXT: sbbb $0, %cl
+; X86-NEXT: cmpl {{[0-9]+}}(%esp), %edi
; X86-NEXT: seta %ch
-; X86-NEXT: movb $-1, %dl
-; X86-NEXT: movb $-1, %cl
-; X86-NEXT: jb .LBB14_2
-; X86-NEXT: # %bb.1:
-; X86-NEXT: movb %ch, %cl
-; X86-NEXT: .LBB14_2:
-; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
-; X86-NEXT: cmpl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: seta %al
-; X86-NEXT: movb $-1, %ch
-; X86-NEXT: jb .LBB14_4
-; X86-NEXT: # %bb.3:
-; X86-NEXT: movb %al, %ch
-; X86-NEXT: .LBB14_4:
-; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
+; X86-NEXT: sbbb $0, %ch
; X86-NEXT: cmpl {{[0-9]+}}(%esp), %esi
; X86-NEXT: seta %bl
-; X86-NEXT: movb $-1, %dh
-; X86-NEXT: jb .LBB14_6
-; X86-NEXT: # %bb.5:
-; X86-NEXT: movb %bl, %dh
-; X86-NEXT: .LBB14_6:
-; X86-NEXT: cmpl {{[0-9]+}}(%esp), %edi
-; X86-NEXT: seta %bl
-; X86-NEXT: jb .LBB14_8
-; X86-NEXT: # %bb.7:
-; X86-NEXT: movb %bl, %dl
-; X86-NEXT: .LBB14_8:
+; X86-NEXT: sbbb $0, %bl
+; X86-NEXT: cmpl {{[0-9]+}}(%esp), %edx
+; X86-NEXT: seta %dl
+; X86-NEXT: sbbb $0, %dl
; X86-NEXT: movb %dl, 3(%eax)
-; X86-NEXT: movb %dh, 2(%eax)
+; X86-NEXT: movb %bl, 2(%eax)
; X86-NEXT: movb %ch, 1(%eax)
; X86-NEXT: movb %cl, (%eax)
; X86-NEXT: popl %esi
@@ -682,105 +555,82 @@ define <4 x i32> @ucmp_narrow_vec_op(<4 x i8> %x, <4 x i8> %y) nounwind {
; X64: # %bb.0:
; X64-NEXT: pxor %xmm2, %xmm2
; X64-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3],xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
-; X64-NEXT: pextrw $0, %xmm1, %ecx
+; X64-NEXT: pextrw $0, %xmm1, %eax
; X64-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3]
; X64-NEXT: pshufd {{.*#+}} xmm3 = xmm1[3,3,3,3]
-; X64-NEXT: movd %xmm3, %eax
+; X64-NEXT: movd %xmm3, %ecx
; X64-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3],xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7]
; X64-NEXT: pextrw $0, %xmm0, %edx
; X64-NEXT: movdqa %xmm0, %xmm3
; X64-NEXT: punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3]
; X64-NEXT: pshufd {{.*#+}} xmm0 = xmm3[3,3,3,3]
; X64-NEXT: movd %xmm0, %esi
-; X64-NEXT: xorl %edi, %edi
-; X64-NEXT: cmpl %eax, %esi
-; X64-NEXT: seta %dil
-; X64-NEXT: movl $-1, %eax
-; X64-NEXT: cmovbl %eax, %edi
+; X64-NEXT: cmpl %ecx, %esi
+; X64-NEXT: seta %cl
+; X64-NEXT: sbbb $0, %cl
; X64-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,2,3]
; X64-NEXT: movd %xmm0, %esi
; X64-NEXT: pshufd {{.*#+}} xmm0 = xmm3[2,3,2,3]
-; X64-NEXT: movd %xmm0, %r8d
-; X64-NEXT: xorl %r9d, %r9d
-; X64-NEXT: cmpl %esi, %r8d
-; X64-NEXT: movd %edi, %xmm0
-; X64-NEXT: seta %r9b
-; X64-NEXT: cmovbl %eax, %r9d
-; X64-NEXT: movd %r9d, %xmm2
+; X64-NEXT: movd %xmm0, %edi
+; X64-NEXT: cmpl %esi, %edi
+; X64-NEXT: movsbl %cl, %ecx
+; X64-NEXT: movd %ecx, %xmm0
+; X64-NEXT: seta %cl
+; X64-NEXT: sbbb $0, %cl
+; X64-NEXT: movsbl %cl, %ecx
+; X64-NEXT: movd %ecx, %xmm2
; X64-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1]
-; X64-NEXT: xorl %esi, %esi
-; X64-NEXT: cmpl %ecx, %edx
-; X64-NEXT: seta %sil
-; X64-NEXT: cmovbl %eax, %esi
-; X64-NEXT: movd %esi, %xmm0
+; X64-NEXT: cmpl %eax, %edx
+; X64-NEXT: seta %al
+; X64-NEXT: sbbb $0, %al
+; X64-NEXT: movsbl %al, %eax
+; X64-NEXT: movd %eax, %xmm0
; X64-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,1,1]
-; X64-NEXT: movd %xmm1, %ecx
+; X64-NEXT: movd %xmm1, %eax
; X64-NEXT: pshufd {{.*#+}} xmm1 = xmm3[1,1,1,1]
-; X64-NEXT: movd %xmm1, %edx
-; X64-NEXT: xorl %esi, %esi
-; X64-NEXT: cmpl %ecx, %edx
-; X64-NEXT: seta %sil
-; X64-NEXT: cmovbl %eax, %esi
-; X64-NEXT: movd %esi, %xmm1
+; X64-NEXT: movd %xmm1, %ecx
+; X64-NEXT: cmpl %eax, %ecx
+; X64-NEXT: seta %al
+; X64-NEXT: sbbb $0, %al
+; X64-NEXT: movsbl %al, %eax
+; X64-NEXT: movd %eax, %xmm1
; X64-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
; X64-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0]
; X64-NEXT: retq
;
; X86-LABEL: ucmp_narrow_vec_op:
; X86: # %bb.0:
-; X86-NEXT: pushl %ebp
; X86-NEXT: pushl %ebx
; X86-NEXT: pushl %edi
; X86-NEXT: pushl %esi
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: xorl %ebx, %ebx
-; X86-NEXT: cmpb {{[0-9]+}}(%esp), %al
-; X86-NEXT: seta %al
-; X86-NEXT: movl $-1, %edx
-; X86-NEXT: movl $-1, %ebp
-; X86-NEXT: jb .LBB15_2
-; X86-NEXT: # %bb.1:
-; X86-NEXT: movb %al, %bl
-; X86-NEXT: movl %ebx, %ebp
-; X86-NEXT: .LBB15_2:
; X86-NEXT: movb {{[0-9]+}}(%esp), %ch
-; X86-NEXT: xorl %eax, %eax
-; X86-NEXT: cmpb {{[0-9]+}}(%esp), %cl
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ebx
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %edx
+; X86-NEXT: cmpb {{[0-9]+}}(%esp), %dl
+; X86-NEXT: seta %dl
+; X86-NEXT: sbbb $0, %dl
+; X86-NEXT: movsbl %dl, %edx
+; X86-NEXT: cmpb {{[0-9]+}}(%esp), %bl
; X86-NEXT: seta %bl
-; X86-NEXT: movl $-1, %esi
-; X86-NEXT: jb .LBB15_4
-; X86-NEXT: # %bb.3:
-; X86-NEXT: movb %bl, %al
-; X86-NEXT: movl %eax, %esi
-; X86-NEXT: .LBB15_4:
-; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: xorl %ebx, %ebx
+; X86-NEXT: sbbb $0, %bl
+; X86-NEXT: movsbl %bl, %esi
; X86-NEXT: cmpb {{[0-9]+}}(%esp), %ch
-; X86-NEXT: seta %cl
-; X86-NEXT: movl $-1, %edi
-; X86-NEXT: jb .LBB15_6
-; X86-NEXT: # %bb.5:
-; X86-NEXT: movb %cl, %bl
-; X86-NEXT: movl %ebx, %edi
-; X86-NEXT: .LBB15_6:
-; X86-NEXT: xorl %ebx, %ebx
-; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: seta %ch
+; X86-NEXT: sbbb $0, %ch
+; X86-NEXT: movsbl %ch, %edi
; X86-NEXT: cmpb {{[0-9]+}}(%esp), %cl
; X86-NEXT: seta %cl
-; X86-NEXT: jb .LBB15_8
-; X86-NEXT: # %bb.7:
-; X86-NEXT: movb %cl, %bl
-; X86-NEXT: movl %ebx, %edx
-; X86-NEXT: .LBB15_8:
-; X86-NEXT: movl %edx, 12(%eax)
+; X86-NEXT: sbbb $0, %cl
+; X86-NEXT: movsbl %cl, %ecx
+; X86-NEXT: movl %ecx, 12(%eax)
; X86-NEXT: movl %edi, 8(%eax)
; X86-NEXT: movl %esi, 4(%eax)
-; X86-NEXT: movl %ebp, (%eax)
+; X86-NEXT: movl %edx, (%eax)
; X86-NEXT: popl %esi
; X86-NEXT: popl %edi
; X86-NEXT: popl %ebx
-; X86-NEXT: popl %ebp
; X86-NEXT: retl $4
%1 = call <4 x i32> @llvm.ucmp(<4 x i8> %x, <4 x i8> %y)
ret <4 x i32> %1
@@ -798,178 +648,175 @@ define <16 x i32> @ucmp_wide_vec_result(<16 x i8> %x, <16 x i8> %y) nounwind {
; X64-NEXT: pxor %xmm2, %xmm2
; X64-NEXT: movdqa %xmm1, %xmm4
; X64-NEXT: punpcklbw {{.*#+}} xmm4 = xmm4[0],xmm2[0],xmm4[1],xmm2[1],xmm4[2],xmm2[2],xmm4[3],xmm2[3],xmm4[4],xmm2[4],xmm4[5],xmm2[5],xmm4[6],xmm2[6],xmm4[7],xmm2[7]
-; X64-NEXT: pextrw $0, %xmm4, %edi
+; X64-NEXT: pextrw $0, %xmm4, %edx
; X64-NEXT: movdqa %xmm4, %xmm3
-; X64-NEXT: pextrw $4, %xmm4, %r11d
+; X64-NEXT: pextrw $4, %xmm4, %r9d
; X64-NEXT: movdqa %xmm4, %xmm5
; X64-NEXT: punpcklwd {{.*#+}} xmm5 = xmm5[0],xmm2[0],xmm5[1],xmm2[1],xmm5[2],xmm2[2],xmm5[3],xmm2[3]
; X64-NEXT: pshufd {{.*#+}} xmm4 = xmm5[3,3,3,3]
; X64-NEXT: movd %xmm4, %eax
; X64-NEXT: movdqa %xmm0, %xmm6
; X64-NEXT: punpcklbw {{.*#+}} xmm6 = xmm6[0],xmm2[0],xmm6[1],xmm2[1],xmm6[2],xmm2[2],xmm6[3],xmm2[3],xmm6[4],xmm2[4],xmm6[5],xmm2[5],xmm6[6],xmm2[6],xmm6[7],xmm2[7]
-; X64-NEXT: pextrw $0, %xmm6, %r8d
+; X64-NEXT: pextrw $0, %xmm6, %esi
; X64-NEXT: movdqa %xmm6, %xmm4
-; X64-NEXT: pextrw $4, %xmm6, %ebx
+; X64-NEXT: pextrw $4, %xmm6, %r10d
; X64-NEXT: punpcklwd {{.*#+}} xmm6 = xmm6[0],xmm2[0],xmm6[1],xmm2[1],xmm6[2],xmm2[2],xmm6[3],xmm2[3]
; X64-NEXT: pshufd {{.*#+}} xmm7 = xmm6[3,3,3,3]
; X64-NEXT: movd %xmm7, %ecx
-; X64-NEXT: xorl %esi, %esi
; X64-NEXT: cmpl %eax, %ecx
-; X64-NEXT: seta %sil
-; X64-NEXT: movl $-1, %edx
-; X64-NEXT: cmovbl %edx, %esi
-; X64-NEXT: movl %esi, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
+; X64-NEXT: seta %al
+; X64-NEXT: sbbb $0, %al
+; X64-NEXT: movsbl %al, %eax
+; X64-NEXT: movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
; X64-NEXT: pshufd {{.*#+}} xmm7 = xmm5[2,3,2,3]
-; X64-NEXT: movd %xmm7, %esi
+; X64-NEXT: movd %xmm7, %ecx
; X64-NEXT: pshufd {{.*#+}} xmm7 = xmm6[2,3,2,3]
-; X64-NEXT: movd %xmm7, %r9d
-; X64-NEXT: xorl %eax, %eax
-; X64-NEXT: cmpl %esi, %r9d
-; X64-NEXT: seta %al
-; X64-NEXT: cmovbl %edx, %eax
+; X64-NEXT: movd %xmm7, %edi
+; X64-NEXT: cmpl %ecx, %edi
+; X64-NEXT: seta %cl
+; X64-NEXT: sbbb $0, %cl
+; X64-NEXT: movsbl %cl, %eax
; X64-NEXT: movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
-; X64-NEXT: xorl %esi, %esi
-; X64-NEXT: cmpl %edi, %r8d
-; X64-NEXT: seta %sil
-; X64-NEXT: cmovbl %edx, %esi
+; X64-NEXT: cmpl %edx, %esi
+; X64-NEXT: seta %dl
+; X64-NEXT: sbbb $0, %dl
+; X64-NEXT: movsbl %dl, %edx
; X64-NEXT: pshufd {{.*#+}} xmm5 = xmm5[1,1,1,1]
-; X64-NEXT: movd %xmm5, %r8d
+; X64-NEXT: movd %xmm5, %esi
; X64-NEXT: pshufd {{.*#+}} xmm5 = xmm6[1,1,1,1]
-; X64-NEXT: movd %xmm5, %r9d
-; X64-NEXT: xorl %edi, %edi
-; X64-NEXT: cmpl %r8d, %r9d
-; X64-NEXT: seta %dil
-; X64-NEXT: cmovbl %edx, %edi
+; X64-NEXT: movd %xmm5, %edi
+; X64-NEXT: cmpl %esi, %edi
+; X64-NEXT: seta %sil
+; X64-NEXT: sbbb $0, %sil
+; X64-NEXT: movsbl %sil, %esi
; X64-NEXT: punpckhwd {{.*#+}} xmm3 = xmm3[4],xmm2[4],xmm3[5],xmm2[5],xmm3[6],xmm2[6],xmm3[7],xmm2[7]
; X64-NEXT: pshufd {{.*#+}} xmm5 = xmm3[3,3,3,3]
-; X64-NEXT: movd %xmm5, %r9d
+; X64-NEXT: movd %xmm5, %edi
; X64-NEXT: punpckhwd {{.*#+}} xmm4 = xmm4[4],xmm2[4],xmm4[5],xmm2[5],xmm4[6],xmm2[6],xmm4[7],xmm2[7]
; X64-NEXT: pshufd {{.*#+}} xmm5 = xmm4[3,3,3,3]
-; X64-NEXT: movd %xmm5, %r10d
-; X64-NEXT: xorl %r8d, %r8d
-; X64-NEXT: cmpl %r9d, %r10d
-; X64-NEXT: seta %r8b
-; X64-NEXT: cmovbl %edx, %r8d
+; X64-NEXT: movd %xmm5, %r8d
+; X64-NEXT: cmpl %edi, %r8d
+; X64-NEXT: seta %dil
+; X64-NEXT: sbbb $0, %dil
+; X64-NEXT: movsbl %dil, %edi
; X64-NEXT: pshufd {{.*#+}} xmm5 = xmm3[2,3,2,3]
-; X64-NEXT: movd %xmm5, %r10d
+; X64-NEXT: movd %xmm5, %r8d
; X64-NEXT: pshufd {{.*#+}} xmm5 = xmm4[2,3,2,3]
-; X64-NEXT: movd %xmm5, %ebp
-; X64-NEXT: xorl %r9d, %r9d
-; X64-NEXT: cmpl %r10d, %ebp
+; X64-NEXT: movd %xmm5, %r11d
+; X64-NEXT: cmpl %r8d, %r11d
+; X64-NEXT: seta %r8b
+; X64-NEXT: sbbb $0, %r8b
+; X64-NEXT: movsbl %r8b, %r8d
+; X64-NEXT: cmpl %r9d, %r10d
; X64-NEXT: seta %r9b
-; X64-NEXT: cmovbl %edx, %r9d
-; X64-NEXT: xorl %r10d, %r10d
-; X64-NEXT: cmpl %r11d, %ebx
-; X64-NEXT: seta %r10b
-; X64-NEXT: cmovbl %edx, %r10d
+; X64-NEXT: sbbb $0, %r9b
+; X64-NEXT: movsbl %r9b, %r9d
; X64-NEXT: pshufd {{.*#+}} xmm3 = xmm3[1,1,1,1]
-; X64-NEXT: movd %xmm3, %ebx
+; X64-NEXT: movd %xmm3, %r10d
; X64-NEXT: pshufd {{.*#+}} xmm3 = xmm4[1,1,1,1]
-; X64-NEXT: movd %xmm3, %ebp
-; X64-NEXT: xorl %r11d, %r11d
-; X64-NEXT: cmpl %ebx, %ebp
-; X64-NEXT: seta %r11b
-; X64-NEXT: cmovbl %edx, %r11d
+; X64-NEXT: movd %xmm3, %r11d
+; X64-NEXT: cmpl %r10d, %r11d
+; X64-NEXT: seta %r10b
+; X64-NEXT: sbbb $0, %r10b
+; X64-NEXT: movsbl %r10b, %r10d
; X64-NEXT: punpckhbw {{.*#+}} xmm1 = xmm1[8],xmm2[8],xmm1[9],xmm2[9],xmm1[10],xmm2[10],xmm1[11],xmm2[11],xmm1[12],xmm2[12],xmm1[13],xmm2[13],xmm1[14],xmm2[14],xmm1[15],xmm2[15]
-; X64-NEXT: pextrw $0, %xmm1, %r15d
+; X64-NEXT: pextrw $0, %xmm1, %ebx
; X64-NEXT: movdqa %xmm1, %xmm4
-; X64-NEXT: movdqa %xmm1, %xmm3
-; X64-NEXT: punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3]
-; X64-NEXT: pshufd {{.*#+}} xmm5 = xmm3[3,3,3,3]
-; X64-NEXT: movd %xmm5, %ebp
+; X64-NEXT: pextrw $4, %xmm1, %r11d
+; X64-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3]
+; X64-NEXT: pshufd {{.*#+}} xmm3 = xmm1[3,3,3,3]
+; X64-NEXT: movd %xmm3, %r14d
; X64-NEXT: punpckhbw {{.*#+}} xmm0 = xmm0[8],xmm2[8],xmm0[9],xmm2[9],xmm0[10],xmm2[10],xmm0[11],xmm2[11],xmm0[12],xmm2[12],xmm0[13],xmm2[13],xmm0[14],xmm2[14],xmm0[15],xmm2[15]
-; X64-NEXT: pextrw $0, %xmm0, %r12d
+; X64-NEXT: pextrw $0, %xmm0, %r15d
; X64-NEXT: movdqa %xmm0, %xmm5
-; X64-NEXT: movdqa %xmm0, %xmm6
-; X64-NEXT: punpcklwd {{.*#+}} xmm6 = xmm6[0],xmm2[0],xmm6[1],xmm2[1],xmm6[2],xmm2[2],xmm6[3],xmm2[3]
-; X64-NEXT: pshufd {{.*#+}} xmm7 = xmm6[3,3,3,3]
-; X64-NEXT: movd %xmm7, %r14d
-; X64-NEXT: xorl %ebx, %ebx
-; X64-NEXT: cmpl %ebp, %r14d
-; X64-NEXT: seta %bl
-; X64-NEXT: cmovbl %edx, %ebx
-; X64-NEXT: pshufd {{.*#+}} xmm7 = xmm3[2,3,2,3]
-; X64-NEXT: movd %xmm7, %r14d
-; X64-NEXT: pshufd {{.*#+}} xmm7 = xmm6[2,3,2,3]
-; X64-NEXT: movd %xmm7, %r13d
-; X64-NEXT: xorl %ebp, %ebp
-; X64-NEXT: cmpl %r14d, %r13d
-; X64-NEXT: seta %bpl
-; X64-NEXT: cmovbl %edx, %ebp
-; X64-NEXT: xorl %r14d, %r14d
-; X64-NEXT: cmpl %r15d, %r12d
+; X64-NEXT: pextrw $4, %xmm0, %ebp
+; X64-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
+; X64-NEXT: pshufd {{.*#+}} xmm3 = xmm0[3,3,3,3]
+; X64-NEXT: movd %xmm3, %r12d
+; X64-NEXT: cmpl %r14d, %r12d
; X64-NEXT: seta %r14b
-; X64-NEXT: cmovbl %edx, %r14d
-; X64-NEXT: pshufd {{.*#+}} xmm3 = xmm3[1,1,1,1]
+; X64-NEXT: sbbb $0, %r14b
+; X64-NEXT: movsbl %r14b, %r14d
+; X64-NEXT: pshufd {{.*#+}} xmm3 = xmm1[2,3,2,3]
; X64-NEXT: movd %xmm3, %r12d
-; X64-NEXT: pshufd {{.*#+}} xmm3 = xmm6[1,1,1,1]
+; X64-NEXT: pshufd {{.*#+}} xmm3 = xmm0[2,3,2,3]
; X64-NEXT: movd %xmm3, %r13d
-; X64-NEXT: xorl %r15d, %r15d
; X64-NEXT: cmpl %r12d, %r13d
+; X64-NEXT: seta %r12b
+; X64-NEXT: sbbb $0, %r12b
+; X64-NEXT: cmpl %ebx, %r15d
+; X64-NEXT: seta %bl
+; X64-NEXT: sbbb $0, %bl
+; X64-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,1,1]
+; X64-NEXT: movd %xmm1, %r15d
+; X64-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,1,1]
+; X64-NEXT: movd %xmm0, %r13d
+; X64-NEXT: cmpl %r15d, %r13d
; X64-NEXT: seta %r15b
-; X64-NEXT: cmovbl %edx, %r15d
+; X64-NEXT: sbbb $0, %r15b
; X64-NEXT: punpckhwd {{.*#+}} xmm4 = xmm4[4],xmm2[4],xmm4[5],xmm2[5],xmm4[6],xmm2[6],xmm4[7],xmm2[7]
-; X64-NEXT: pshufd {{.*#+}} xmm3 = xmm4[3,3,3,3]
-; X64-NEXT: movd %xmm3, %r13d
+; X64-NEXT: pshufd {{.*#+}} xmm0 = xmm4[3,3,3,3]
+; X64-NEXT: movd %xmm0, %r13d
; X64-NEXT: punpckhwd {{.*#+}} xmm5 = xmm5[4],xmm2[4],xmm5[5],xmm2[5],xmm5[6],xmm2[6],xmm5[7],xmm2[7]
-; X64-NEXT: pshufd {{.*#+}} xmm2 = xmm5[3,3,3,3]
-; X64-NEXT: movd %xmm2, %eax
-; X64-NEXT: xorl %r12d, %r12d
+; X64-NEXT: pshufd {{.*#+}} xmm0 = xmm5[3,3,3,3]
+; X64-NEXT: movd %xmm0, %eax
; X64-NEXT: cmpl %r13d, %eax
-; X64-NEXT: seta %r12b
-; X64-NEXT: cmovbl %edx, %r12d
-; X64-NEXT: pshufd {{.*#+}} xmm2 = xmm4[2,3,2,3]
-; X64-NEXT: movd %xmm2, %ecx
-; X64-NEXT: pshufd {{.*#+}} xmm2 = xmm5[2,3,2,3]
-; X64-NEXT: movd %xmm2, %eax
-; X64-NEXT: xorl %r13d, %r13d
-; X64-NEXT: cmpl %ecx, %eax
+; X64-NEXT: seta %al
+; X64-NEXT: sbbb $0, %al
+; X64-NEXT: pshufd {{.*#+}} xmm0 = xmm4[2,3,2,3]
+; X64-NEXT: movd %xmm0, %r13d
+; X64-NEXT: pshufd {{.*#+}} xmm0 = xmm5[2,3,2,3]
+; X64-NEXT: movd %xmm0, %ecx
+; X64-NEXT: cmpl %r13d, %ecx
+; X64-NEXT: movsbl %r12b, %ecx
+; X64-NEXT: movsbl %bl, %ebx
+; X64-NEXT: movsbl %r15b, %r15d
; X64-NEXT: movd {{[-0-9]+}}(%r{{[sb]}}p), %xmm2 # 4-byte Folded Reload
; X64-NEXT: # xmm2 = mem[0],zero,zero,zero
-; X64-NEXT: pextrw $4, %xmm1, %eax
; X64-NEXT: movd {{[-0-9]+}}(%r{{[sb]}}p), %xmm3 # 4-byte Folded Reload
; X64-NEXT: # xmm3 = mem[0],zero,zero,zero
-; X64-NEXT: pextrw $4, %xmm0, %ecx
-; X64-NEXT: movd %esi, %xmm0
-; X64-NEXT: movd %edi, %xmm6
-; X64-NEXT: movd %r8d, %xmm7
-; X64-NEXT: movd %r9d, %xmm8
-; X64-NEXT: movd %r10d, %xmm1
-; X64-NEXT: movd %r11d, %xmm9
+; X64-NEXT: movd %edx, %xmm0
+; X64-NEXT: movd %esi, %xmm6
+; X64-NEXT: movd %edi, %xmm7
+; X64-NEXT: movd %r8d, %xmm8
+; X64-NEXT: movd %r9d, %xmm1
+; X64-NEXT: movd %r10d, %xmm9
; X64-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1]
-; X64-NEXT: movd %ebx, %xmm10
+; X64-NEXT: movd %r14d, %xmm10
; X64-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm6[0],xmm0[1],xmm6[1]
-; X64-NEXT: movd %ebp, %xmm6
+; X64-NEXT: movd %ecx, %xmm6
; X64-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm3[0]
-; X64-NEXT: movd %r14d, %xmm2
+; X64-NEXT: movd %ebx, %xmm2
; X64-NEXT: punpckldq {{.*#+}} xmm8 = xmm8[0],xmm7[0],xmm8[1],xmm7[1]
; X64-NEXT: movd %r15d, %xmm3
; X64-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm9[0],xmm1[1],xmm9[1]
-; X64-NEXT: movd %r12d, %xmm7
; X64-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm8[0]
+; X64-NEXT: movsbl %al, %eax
+; X64-NEXT: movd %eax, %xmm7
+; X64-NEXT: seta %al
+; X64-NEXT: sbbb $0, %al
+; X64-NEXT: movsbl %al, %eax
+; X64-NEXT: movd %eax, %xmm8
; X64-NEXT: punpckldq {{.*#+}} xmm6 = xmm6[0],xmm10[0],xmm6[1],xmm10[1]
; X64-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1]
; X64-NEXT: punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm6[0]
-; X64-NEXT: seta %r13b
-; X64-NEXT: cmovbl %edx, %r13d
-; X64-NEXT: movd %r13d, %xmm6
-; X64-NEXT: punpckldq {{.*#+}} xmm6 = xmm6[0],xmm7[0],xmm6[1],xmm7[1]
-; X64-NEXT: xorl %esi, %esi
-; X64-NEXT: cmpl %eax, %ecx
-; X64-NEXT: seta %sil
-; X64-NEXT: cmovbl %edx, %esi
-; X64-NEXT: movd %esi, %xmm3
+; X64-NEXT: punpckldq {{.*#+}} xmm8 = xmm8[0],xmm7[0],xmm8[1],xmm7[1]
+; X64-NEXT: cmpl %r11d, %ebp
+; X64-NEXT: seta %al
+; X64-NEXT: sbbb $0, %al
+; X64-NEXT: movsbl %al, %eax
+; X64-NEXT: movd %eax, %xmm3
; X64-NEXT: pshufd {{.*#+}} xmm4 = xmm4[1,1,1,1]
; X64-NEXT: movd %xmm4, %eax
; X64-NEXT: pshufd {{.*#+}} xmm4 = xmm5[1,1,1,1]
; X64-NEXT: movd %xmm4, %ecx
-; X64-NEXT: xorl %esi, %esi
; X64-NEXT: cmpl %eax, %ecx
-; X64-NEXT: seta %sil
-; X64-NEXT: cmovbl %edx, %esi
-; X64-NEXT: movd %esi, %xmm4
+; X64-NEXT: seta %al
+; X64-NEXT: sbbb $0, %al
+; X64-NEXT: movsbl %al, %eax
+; X64-NEXT: movd %eax, %xmm4
; X64-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm4[0],xmm3[1],xmm4[1]
-; X64-NEXT: punpcklqdq {{.*#+}} xmm3 = xmm3[0],xmm6[0]
+; X64-NEXT: punpcklqdq {{.*#+}} xmm3 = xmm3[0],xmm8[0]
; X64-NEXT: popq %rbx
; X64-NEXT: popq %r12
; X64-NEXT: popq %r13
@@ -984,202 +831,115 @@ define <16 x i32> @ucmp_wide_vec_result(<16 x i8> %x, <16 x i8> %y) nounwind {
; X86-NEXT: pushl %ebx
; X86-NEXT: pushl %edi
; X86-NEXT: pushl %esi
-; X86-NEXT: subl $48, %esp
-; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: subl $12, %esp
; X86-NEXT: movzbl {{[0-9]+}}(%esp), %edx
-; X86-NEXT: xorl %ecx, %ecx
+; X86-NEXT: movb {{[0-9]+}}(%esp), %ah
+; X86-NEXT: movb {{[0-9]+}}(%esp), %ch
+; X86-NEXT: movb {{[0-9]+}}(%esp), %dh
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ebx
+; X86-NEXT: movb {{[0-9]+}}(%esp), %bh
+; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movb {{[0-9]+}}(%esp), %cl
+; X86-NEXT: cmpb {{[0-9]+}}(%esp), %cl
+; X86-NEXT: seta %cl
+; X86-NEXT: sbbb $0, %cl
+; X86-NEXT: movb %cl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill
+; X86-NEXT: cmpb {{[0-9]+}}(%esp), %al
+; X86-NEXT: seta %al
+; X86-NEXT: sbbb $0, %al
+; X86-NEXT: movb %al, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill
+; X86-NEXT: cmpb {{[0-9]+}}(%esp), %bh
+; X86-NEXT: seta %al
+; X86-NEXT: sbbb $0, %al
+; X86-NEXT: movb %al, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill
+; X86-NEXT: cmpb {{[0-9]+}}(%esp), %bl
+; X86-NEXT: seta %al
+; X86-NEXT: sbbb $0, %al
+; X86-NEXT: movb %al, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill
+; X86-NEXT: cmpb {{[0-9]+}}(%esp), %dh
+; X86-NEXT: seta %al
+; X86-NEXT: sbbb $0, %al
+; X86-NEXT: movb %al, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill
+; X86-NEXT: cmpb {{[0-9]+}}(%esp), %ch
+; X86-NEXT: seta %al
+; X86-NEXT: sbbb $0, %al
+; X86-NEXT: movb %al, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill
+; X86-NEXT: cmpb {{[0-9]+}}(%esp), %ah
+; X86-NEXT: seta %al
+; X86-NEXT: sbbb $0, %al
+; X86-NEXT: movb %al, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill
; X86-NEXT: cmpb {{[0-9]+}}(%esp), %dl
-; X86-NEXT: seta %dl
-; X86-NEXT: movl $-1, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
-; X86-NEXT: movl $-1, %esi
-; X86-NEXT: jb .LBB16_2
-; X86-NEXT: # %bb.1:
-; X86-NEXT: movb %dl, %cl
-; X86-NEXT: movl %ecx, %esi
-; X86-NEXT: .LBB16_2:
-; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: xorl %edx, %edx
+; X86-NEXT: seta %bl
+; X86-NEXT: sbbb $0, %bl
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: cmpb {{[0-9]+}}(%esp), %al
; X86-NEXT: seta %al
-; X86-NEXT: movl $-1, %edi
-; X86-NEXT: jb .LBB16_4
-; X86-NEXT: # %bb.3:
-; X86-NEXT: movb %al, %dl
-; X86-NEXT: movl %edx, %edi
-; X86-NEXT: .LBB16_4:
+; X86-NEXT: sbbb $0, %al
+; X86-NEXT: movb %al, (%esp) # 1-byte Spill
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: cmpb {{[0-9]+}}(%esp), %al
+; X86-NEXT: seta %bh
+; X86-NEXT: sbbb $0, %bh
; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: xorl %edx, %edx
-; X86-NEXT: cmpb {{[0-9]+}}(%esp), %cl
-; X86-NEXT: seta %cl
-; X86-NEXT: movl $-1, %ebx
-; X86-NEXT: jb .LBB16_6
-; X86-NEXT: # %bb.5:
-; X86-NEXT: movb %cl, %dl
-; X86-NEXT: movl %edx, %ebx
-; X86-NEXT: .LBB16_6:
-; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: xorl %edx, %edx
; X86-NEXT: cmpb {{[0-9]+}}(%esp), %al
; X86-NEXT: seta %al
-; X86-NEXT: movl $-1, %ebp
-; X86-NEXT: jb .LBB16_8
-; X86-NEXT: # %bb.7:
-; X86-NEXT: movb %al, %dl
-; X86-NEXT: movl %edx, %ebp
-; X86-NEXT: .LBB16_8:
+; X86-NEXT: sbbb $0, %al
+; X86-NEXT: movsbl %al, %eax
+; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: xorl %edx, %edx
-; X86-NEXT: cmpb {{[0-9]+}}(%esp), %cl
-; X86-NEXT: seta %cl
-; X86-NEXT: movl $-1, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
-; X86-NEXT: jb .LBB16_10
-; X86-NEXT: # %bb.9:
-; X86-NEXT: movb %cl, %dl
-; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: .LBB16_10:
-; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: xorl %edx, %edx
; X86-NEXT: cmpb {{[0-9]+}}(%esp), %al
; X86-NEXT: seta %al
-; X86-NEXT: movl $-1, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
-; X86-NEXT: jb .LBB16_12
-; X86-NEXT: # %bb.11:
-; X86-NEXT: movb %al, %dl
-; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: .LBB16_12:
+; X86-NEXT: sbbb $0, %al
+; X86-NEXT: movsbl %al, %edi
; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: xorl %edx, %edx
-; X86-NEXT: cmpb {{[0-9]+}}(%esp), %cl
-; X86-NEXT: seta %cl
-; X86-NEXT: movl $-1, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
-; X86-NEXT: jb .LBB16_14
-; X86-NEXT: # %bb.13:
-; X86-NEXT: movb %cl, %dl
-; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: .LBB16_14:
-; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: xorl %edx, %edx
; X86-NEXT: cmpb {{[0-9]+}}(%esp), %al
; X86-NEXT: seta %al
-; X86-NEXT: movl $-1, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
-; X86-NEXT: jb .LBB16_16
-; X86-NEXT: # %bb.15:
-; X86-NEXT: movb %al, %dl
-; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: .LBB16_16:
+; X86-NEXT: sbbb $0, %al
+; X86-NEXT: movsbl %al, %ebp
; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: xorl %edx, %edx
-; X86-NEXT: cmpb {{[0-9]+}}(%esp), %cl
-; X86-NEXT: seta %cl
-; X86-NEXT: movl $-1, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
-; X86-NEXT: jb .LBB16_18
-; X86-NEXT: # %bb.17:
-; X86-NEXT: movb %cl, %dl
-; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: .LBB16_18:
-; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: xorl %edx, %edx
; X86-NEXT: cmpb {{[0-9]+}}(%esp), %al
; X86-NEXT: seta %al
-; X86-NEXT: movl $-1, (%esp) # 4-byte Folded Spill
-; X86-NEXT: jb .LBB16_20
-; X86-NEXT: # %bb.19:
-; X86-NEXT: movb %al, %dl
-; X86-NEXT: movl %edx, (%esp) # 4-byte Spill
-; X86-NEXT: .LBB16_20:
-; X86-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: sbbb $0, %al
+; X86-NEXT: movsbl %al, %esi
; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: xorl %edx, %edx
-; X86-NEXT: cmpb {{[0-9]+}}(%esp), %cl
-; X86-NEXT: seta %cl
-; X86-NEXT: movl $-1, %ebx
-; X86-NEXT: jb .LBB16_22
-; X86-NEXT: # %bb.21:
-; X86-NEXT: movb %cl, %dl
-; X86-NEXT: movl %edx, %ebx
-; X86-NEXT: .LBB16_22:
-; X86-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: movzbl {{[0-9]+}}(%esp), %edx
-; X86-NEXT: xorl %ebx, %ebx
; X86-NEXT: cmpb {{[0-9]+}}(%esp), %al
; X86-NEXT: seta %al
-; X86-NEXT: movl $-1, %ebp
-; X86-NEXT: jb .LBB16_24
-; X86-NEXT: # %bb.23:
-; X86-NEXT: movb %al, %bl
-; X86-NEXT: movl %ebx, %ebp
-; X86-NEXT: .LBB16_24:
-; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: sbbb $0, %al
+; X86-NEXT: movsbl %al, %edx
; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: xorl %ebx, %ebx
-; X86-NEXT: cmpb {{[0-9]+}}(%esp), %dl
-; X86-NEXT: seta %ah
-; X86-NEXT: movl $-1, %edx
-; X86-NEXT: jb .LBB16_26
-; X86-NEXT: # %bb.25:
-; X86-NEXT: movb %ah, %bl
-; X86-NEXT: movl %ebx, %edx
-; X86-NEXT: .LBB16_26:
-; X86-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: xorl %ebx, %ebx
; X86-NEXT: cmpb {{[0-9]+}}(%esp), %al
; X86-NEXT: seta %al
-; X86-NEXT: movl $-1, %esi
-; X86-NEXT: jb .LBB16_28
-; X86-NEXT: # %bb.27:
-; X86-NEXT: movb %al, %bl
-; X86-NEXT: movl %ebx, %esi
-; X86-NEXT: .LBB16_28:
+; X86-NEXT: sbbb $0, %al
+; X86-NEXT: movsbl %al, %ecx
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: movb {{[0-9]+}}(%esp), %ch
-; X86-NEXT: xorl %ebx, %ebx
-; X86-NEXT: cmpb {{[0-9]+}}(%esp), %cl
-; X86-NEXT: seta %cl
-; X86-NEXT: movl $-1, %edi
-; X86-NEXT: jb .LBB16_30
-; X86-NEXT: # %bb.29:
-; X86-NEXT: movb %cl, %bl
-; X86-NEXT: movl %ebx, %edi
-; X86-NEXT: .LBB16_30:
-; X86-NEXT: xorl %ebx, %ebx
-; X86-NEXT: cmpb {{[0-9]+}}(%esp), %ch
-; X86-NEXT: seta %cl
-; X86-NEXT: jb .LBB16_32
-; X86-NEXT: # %bb.31:
-; X86-NEXT: movb %cl, %bl
-; X86-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: .LBB16_32:
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
; X86-NEXT: movl %ecx, 60(%eax)
-; X86-NEXT: movl %edi, 56(%eax)
+; X86-NEXT: movl %edx, 56(%eax)
; X86-NEXT: movl %esi, 52(%eax)
-; X86-NEXT: movl %edx, 48(%eax)
-; X86-NEXT: movl %ebp, 44(%eax)
+; X86-NEXT: movl %ebp, 48(%eax)
+; X86-NEXT: movl %edi, 44(%eax)
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
; X86-NEXT: movl %ecx, 40(%eax)
-; X86-NEXT: movl (%esp), %ecx # 4-byte Reload
+; X86-NEXT: movsbl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 1-byte Folded Reload
+; X86-NEXT: movsbl %bh, %ecx
; X86-NEXT: movl %ecx, 36(%eax)
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
-; X86-NEXT: movl %ecx, 32(%eax)
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
-; X86-NEXT: movl %ecx, 28(%eax)
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
-; X86-NEXT: movl %ecx, 24(%eax)
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
-; X86-NEXT: movl %ecx, 20(%eax)
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
-; X86-NEXT: movl %ecx, 16(%eax)
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT: movsbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload
+; X86-NEXT: movsbl (%esp), %edx # 1-byte Folded Reload
+; X86-NEXT: movl %edx, 32(%eax)
+; X86-NEXT: movsbl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 1-byte Folded Reload
+; X86-NEXT: movsbl %bl, %edi
+; X86-NEXT: movl %edi, 28(%eax)
+; X86-NEXT: movsbl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 1-byte Folded Reload
+; X86-NEXT: movsbl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 1-byte Folded Reload
+; X86-NEXT: movl %ebx, 24(%eax)
+; X86-NEXT: movl %edi, 20(%eax)
+; X86-NEXT: movl %edx, 16(%eax)
; X86-NEXT: movl %ecx, 12(%eax)
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
-; X86-NEXT: movl %ecx, 8(%eax)
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT: movl %esi, 8(%eax)
+; X86-NEXT: movsbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload
; X86-NEXT: movl %ecx, 4(%eax)
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT: movsbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload
; X86-NEXT: movl %ecx, (%eax)
-; X86-NEXT: addl $48, %esp
+; X86-NEXT: addl $12, %esp
; X86-NEXT: popl %esi
; X86-NEXT: popl %edi
; X86-NEXT: popl %ebx
@@ -1196,150 +956,149 @@ define <16 x i8> @ucmp_wide_vec_op(<16 x i32> %x, <16 x i32> %y) nounwind {
; X64-NEXT: movd %xmm8, %eax
; X64-NEXT: pshufd {{.*#+}} xmm8 = xmm3[3,3,3,3]
; X64-NEXT: movd %xmm8, %ecx
-; X64-NEXT: xorl %edx, %edx
; X64-NEXT: cmpl %eax, %ecx
-; X64-NEXT: seta %dl
-; X64-NEXT: movl $255, %eax
-; X64-NEXT: cmovbl %eax, %edx
-; X64-NEXT: movd %edx, %xmm8
+; X64-NEXT: seta %al
+; X64-NEXT: sbbb $0, %al
+; X64-NEXT: movzbl %al, %eax
+; X64-NEXT: movd %eax, %xmm8
; X64-NEXT: pshufd {{.*#+}} xmm9 = xmm7[2,3,2,3]
-; X64-NEXT: movd %xmm9, %ecx
+; X64-NEXT: movd %xmm9, %eax
; X64-NEXT: pshufd {{.*#+}} xmm9 = xmm3[2,3,2,3]
-; X64-NEXT: movd %xmm9, %edx
-; X64-NEXT: xorl %esi, %esi
-; X64-NEXT: cmpl %ecx, %edx
-; X64-NEXT: seta %sil
-; X64-NEXT: cmovbl %eax, %esi
-; X64-NEXT: movd %esi, %xmm9
+; X64-NEXT: movd %xmm9, %ecx
+; X64-NEXT: cmpl %eax, %ecx
+; X64-NEXT: seta %al
+; X64-NEXT: sbbb $0, %al
+; X64-NEXT: movzbl %al, %eax
+; X64-NEXT: movd %eax, %xmm9
; X64-NEXT: punpcklbw {{.*#+}} xmm9 = xmm9[0],xmm8[0],xmm9[1],xmm8[1],xmm9[2],xmm8[2],xmm9[3],xmm8[3],xmm9[4],xmm8[4],xmm9[5],xmm8[5],xmm9[6],xmm8[6],xmm9[7],xmm8[7]
-; X64-NEXT: movd %xmm7, %ecx
-; X64-NEXT: movd %xmm3, %edx
-; X64-NEXT: xorl %esi, %esi
-; X64-NEXT: cmpl %ecx, %edx
-; X64-NEXT: seta %sil
-; X64-NEXT: cmovbl %eax, %esi
-; X64-NEXT: movd %esi, %xmm8
+; X64-NEXT: movd %xmm7, %eax
+; X64-NEXT: movd %xmm3, %ecx
+; X64-NEXT: cmpl %eax, %ecx
+; X64-NEXT: seta %al
+; X64-NEXT: sbbb $0, %al
+; X64-NEXT: movzbl %al, %eax
+; X64-NEXT: movd %eax, %xmm8
; X64-NEXT: pshufd {{.*#+}} xmm7 = xmm7[1,1,1,1]
-; X64-NEXT: movd %xmm7, %ecx
+; X64-NEXT: movd %xmm7, %eax
; X64-NEXT: pshufd {{.*#+}} xmm3 = xmm3[1,1,1,1]
-; X64-NEXT: movd %xmm3, %edx
-; X64-NEXT: xorl %esi, %esi
-; X64-NEXT: cmpl %ecx, %edx
-; X64-NEXT: seta %sil
-; X64-NEXT: cmovbl %eax, %esi
-; X64-NEXT: movd %esi, %xmm3
+; X64-NEXT: movd %xmm3, %ecx
+; X64-NEXT: cmpl %eax, %ecx
+; X64-NEXT: seta %al
+; X64-NEXT: sbbb $0, %al
+; X64-NEXT: movzbl %al, %eax
+; X64-NEXT: movd %eax, %xmm3
; X64-NEXT: punpcklbw {{.*#+}} xmm8 = xmm8[0],xmm3[0],xmm8[1],xmm3[1],xmm8[2],xmm3[2],xmm8[3],xmm3[3],xmm8[4],xmm3[4],xmm8[5],xmm3[5],xmm8[6],xmm3[6],xmm8[7],xmm3[7]
; X64-NEXT: punpcklwd {{.*#+}} xmm8 = xmm8[0],xmm9[0],xmm8[1],xmm9[1],xmm8[2],xmm9[2],xmm8[3],xmm9[3]
; X64-NEXT: pshufd {{.*#+}} xmm3 = xmm6[3,3,3,3]
-; X64-NEXT: movd %xmm3, %ecx
+; X64-NEXT: movd %xmm3, %eax
; X64-NEXT: pshufd {{.*#+}} xmm3 = xmm2[3,3,3,3]
-; X64-NEXT: movd %xmm3, %edx
-; X64-NEXT: xorl %esi, %esi
-; X64-NEXT: cmpl %ecx, %edx
-; X64-NEXT: seta %sil
-; X64-NEXT: cmovbl %eax, %esi
-; X64-NEXT: movd %esi, %xmm3
+; X64-NEXT: movd %xmm3, %ecx
+; X64-NEXT: cmpl %eax, %ecx
+; X64-NEXT: seta %al
+; X64-NEXT: sbbb $0, %al
+; X64-NEXT: movzbl %al, %eax
+; X64-NEXT: movd %eax, %xmm3
; X64-NEXT: pshufd {{.*#+}} xmm7 = xmm6[2,3,2,3]
-; X64-NEXT: movd %xmm7, %ecx
+; X64-NEXT: movd %xmm7, %eax
; X64-NEXT: pshufd {{.*#+}} xmm7 = xmm2[2,3,2,3]
-; X64-NEXT: movd %xmm7, %edx
-; X64-NEXT: xorl %esi, %esi
-; X64-NEXT: cmpl %ecx, %edx
-; X64-NEXT: seta %sil
-; X64-NEXT: cmovbl %eax, %esi
-; X64-NEXT: movd %esi, %xmm7
+; X64-NEXT: movd %xmm7, %ecx
+; X64-NEXT: cmpl %eax, %ecx
+; X64-NEXT: seta %al
+; X64-NEXT: sbbb $0, %al
+; X64-NEXT: movzbl %al, %eax
+; X64-NEXT: movd %eax, %xmm7
; X64-NEXT: punpcklbw {{.*#+}} xmm7 = xmm7[0],xmm3[0],xmm7[1],xmm3[1],xmm7[2],xmm3[2],xmm7[3],xmm3[3],xmm7[4],xmm3[4],xmm7[5],xmm3[5],xmm7[6],xmm3[6],xmm7[7],xmm3[7]
-; X64-NEXT: movd %xmm6, %ecx
-; X64-NEXT: movd %xmm2, %edx
-; X64-NEXT: xorl %esi, %esi
-; X64-NEXT: cmpl %ecx, %edx
-; X64-NEXT: seta %sil
-; X64-NEXT: cmovbl %eax, %esi
-; X64-NEXT: movd %esi, %xmm3
+; X64-NEXT: movd %xmm6, %eax
+; X64-NEXT: movd %xmm2, %ecx
+; X64-NEXT: cmpl %eax, %ecx
+; X64-NEXT: seta %al
+; X64-NEXT: sbbb $0, %al
+; X64-NEXT: movzbl %al, %eax
+; X64-NEXT: movd %eax, %xmm3
; X64-NEXT: pshufd {{.*#+}} xmm6 = xmm6[1,1,1,1]
-; X64-NEXT: movd %xmm6, %ecx
+; X64-NEXT: movd %xmm6, %eax
; X64-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,1,1,1]
-; X64-NEXT: movd %xmm2, %edx
-; X64-NEXT: xorl %esi, %esi
-; X64-NEXT: cmpl %ecx, %edx
-; X64-NEXT: seta %sil
-; X64-NEXT: cmovbl %eax, %esi
-; X64-NEXT: movd %esi, %xmm2
+; X64-NEXT: movd %xmm2, %ecx
+; X64-NEXT: cmpl %eax, %ecx
+; X64-NEXT: seta %al
+; X64-NEXT: sbbb $0, %al
+; X64-NEXT: movzbl %al, %eax
+; X64-NEXT: movd %eax, %xmm2
; X64-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3],xmm3[4],xmm2[4],xmm3[5],xmm2[5],xmm3[6],xmm2[6],xmm3[7],xmm2[7]
; X64-NEXT: punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm7[0],xmm3[1],xmm7[1],xmm3[2],xmm7[2],xmm3[3],xmm7[3]
; X64-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm8[0],xmm3[1],xmm8[1]
; X64-NEXT: pshufd {{.*#+}} xmm2 = xmm5[3,3,3,3]
-; X64-NEXT: movd %xmm2, %ecx
+; X64-NEXT: movd %xmm2, %eax
; X64-NEXT: pshufd {{.*#+}} xmm2 = xmm1[3,3,3,3]
-; X64-NEXT: movd %xmm2, %edx
-; X64-NEXT: xorl %esi, %esi
-; X64-NEXT: cmpl %ecx, %edx
-; X64-NEXT: seta %sil
-; X64-NEXT: cmovbl %eax, %esi
-; X64-NEXT: movd %esi, %xmm2
+; X64-NEXT: movd %xmm2, %ecx
+; X64-NEXT: cmpl %eax, %ecx
+; X64-NEXT: seta %al
+; X64-NEXT: sbbb $0, %al
+; X64-NEXT: movzbl %al, %eax
+; X64-NEXT: movd %eax, %xmm2
; X64-NEXT: pshufd {{.*#+}} xmm6 = xmm5[2,3,2,3]
-; X64-NEXT: movd %xmm6, %ecx
+; X64-NEXT: movd %xmm6, %eax
; X64-NEXT: pshufd {{.*#+}} xmm6 = xmm1[2,3,2,3]
-; X64-NEXT: movd %xmm6, %edx
-; X64-NEXT: xorl %esi, %esi
-; X64-NEXT: cmpl %ecx, %edx
-; X64-NEXT: seta %sil
-; X64-NEXT: cmovbl %eax, %esi
-; X64-NEXT: movd %esi, %xmm6
+; X64-NEXT: movd %xmm6, %ecx
+; X64-NEXT: cmpl %eax, %ecx
+; X64-NEXT: seta %al
+; X64-NEXT: sbbb $0, %al
+; X64-NEXT: movzbl %al, %eax
+; X64-NEXT: movd %eax, %xmm6
; X64-NEXT: punpcklbw {{.*#+}} xmm6 = xmm6[0],xmm2[0],xmm6[1],xmm2[1],xmm6[2],xmm2[2],xmm6[3],xmm2[3],xmm6[4],xmm2[4],xmm6[5],xmm2[5],xmm6[6],xmm2[6],xmm6[7],xmm2[7]
-; X64-NEXT: movd %xmm5, %ecx
-; X64-NEXT: movd %xmm1, %edx
-; X64-NEXT: xorl %esi, %esi
-; X64-NEXT: cmpl %ecx, %edx
-; X64-NEXT: seta %sil
-; X64-NEXT: cmovbl %eax, %esi
-; X64-NEXT: movd %esi, %xmm2
+; X64-NEXT: movd %xmm5, %eax
+; X64-NEXT: movd %xmm1, %ecx
+; X64-NEXT: cmpl %eax, %ecx
+; X64-NEXT: seta %al
+; X64-NEXT: sbbb $0, %al
+; X64-NEXT: movzbl %al, %eax
+; X64-NEXT: movd %eax, %xmm2
; X64-NEXT: pshufd {{.*#+}} xmm5 = xmm5[1,1,1,1]
-; X64-NEXT: movd %xmm5, %ecx
+; X64-NEXT: movd %xmm5, %eax
; X64-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,1,1]
-; X64-NEXT: movd %xmm1, %edx
-; X64-NEXT: xorl %esi, %esi
-; X64-NEXT: cmpl %ecx, %edx
-; X64-NEXT: seta %sil
-; X64-NEXT: cmovbl %eax, %esi
-; X64-NEXT: movd %esi, %xmm1
+; X64-NEXT: movd %xmm1, %ecx
+; X64-NEXT: cmpl %eax, %ecx
+; X64-NEXT: seta %al
+; X64-NEXT: sbbb $0, %al
+; X64-NEXT: movzbl %al, %eax
+; X64-NEXT: movd %eax, %xmm1
; X64-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3],xmm2[4],xmm1[4],xmm2[5],xmm1[5],xmm2[6],xmm1[6],xmm2[7],xmm1[7]
; X64-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm6[0],xmm2[1],xmm6[1],xmm2[2],xmm6[2],xmm2[3],xmm6[3]
; X64-NEXT: pshufd {{.*#+}} xmm1 = xmm4[3,3,3,3]
-; X64-NEXT: movd %xmm1, %ecx
+; X64-NEXT: movd %xmm1, %eax
; X64-NEXT: pshufd {{.*#+}} xmm1 = xmm0[3,3,3,3]
-; X64-NEXT: movd %xmm1, %edx
-; X64-NEXT: xorl %esi, %esi
-; X64-NEXT: cmpl %ecx, %edx
-; X64-NEXT: seta %sil
-; X64-NEXT: cmovbl %eax, %esi
-; X64-NEXT: movd %esi, %xmm1
+; X64-NEXT: movd %xmm1, %ecx
+; X64-NEXT: cmpl %eax, %ecx
+; X64-NEXT: seta %al
+; X64-NEXT: sbbb $0, %al
+; X64-NEXT: movzbl %al, %eax
+; X64-NEXT: movd %eax, %xmm1
; X64-NEXT: pshufd {{.*#+}} xmm5 = xmm4[2,3,2,3]
-; X64-NEXT: movd %xmm5, %ecx
+; X64-NEXT: movd %xmm5, %eax
; X64-NEXT: pshufd {{.*#+}} xmm5 = xmm0[2,3,2,3]
-; X64-NEXT: movd %xmm5, %edx
-; X64-NEXT: xorl %esi, %esi
-; X64-NEXT: cmpl %ecx, %edx
-; X64-NEXT: seta %sil
-; X64-NEXT: cmovbl %eax, %esi
-; X64-NEXT: movd %esi, %xmm5
+; X64-NEXT: movd %xmm5, %ecx
+; X64-NEXT: cmpl %eax, %ecx
+; X64-NEXT: seta %al
+; X64-NEXT: sbbb $0, %al
+; X64-NEXT: movzbl %al, %eax
+; X64-NEXT: movd %eax, %xmm5
; X64-NEXT: punpcklbw {{.*#+}} xmm5 = xmm5[0],xmm1[0],xmm5[1],xmm1[1],xmm5[2],xmm1[2],xmm5[3],xmm1[3],xmm5[4],xmm1[4],xmm5[5],xmm1[5],xmm5[6],xmm1[6],xmm5[7],xmm1[7]
-; X64-NEXT: movd %xmm4, %ecx
-; X64-NEXT: movd %xmm0, %edx
-; X64-NEXT: xorl %esi, %esi
-; X64-NEXT: cmpl %ecx, %edx
-; X64-NEXT: seta %sil
-; X64-NEXT: cmovbl %eax, %esi
-; X64-NEXT: movd %esi, %xmm1
+; X64-NEXT: movd %xmm4, %eax
+; X64-NEXT: movd %xmm0, %ecx
+; X64-NEXT: cmpl %eax, %ecx
+; X64-NEXT: seta %al
+; X64-NEXT: sbbb $0, %al
+; X64-NEXT: movzbl %al, %eax
+; X64-NEXT: movd %eax, %xmm1
; X64-NEXT: pshufd {{.*#+}} xmm4 = xmm4[1,1,1,1]
-; X64-NEXT: movd %xmm4, %ecx
+; X64-NEXT: movd %xmm4, %eax
; X64-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,1,1]
-; X64-NEXT: movd %xmm0, %edx
-; X64-NEXT: xorl %esi, %esi
-; X64-NEXT: cmpl %ecx, %edx
-; X64-NEXT: seta %sil
-; X64-NEXT: cmovbl %eax, %esi
-; X64-NEXT: movd %esi, %xmm0
+; X64-NEXT: movd %xmm0, %ecx
+; X64-NEXT: cmpl %eax, %ecx
+; X64-NEXT: seta %al
+; X64-NEXT: sbbb $0, %al
+; X64-NEXT: movzbl %al, %eax
+; X64-NEXT: movd %eax, %xmm0
; X64-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
; X64-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm5[0],xmm1[1],xmm5[1],xmm1[2],xmm5[2],xmm1[3],xmm5[3]
; X64-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
@@ -1349,155 +1108,91 @@ define <16 x i8> @ucmp_wide_vec_op(<16 x i32> %x, <16 x i32> %y) nounwind {
;
; X86-LABEL: ucmp_wide_vec_op:
; X86: # %bb.0:
+; X86-NEXT: pushl %ebp
; X86-NEXT: pushl %ebx
; X86-NEXT: pushl %edi
; X86-NEXT: pushl %esi
; X86-NEXT: subl $12, %esp
-; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: cmpl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: seta %cl
-; X86-NEXT: movb $-1, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
-; X86-NEXT: movb $-1, %dl
-; X86-NEXT: jb .LBB17_2
-; X86-NEXT: # %bb.1:
-; X86-NEXT: movl %ecx, %edx
-; X86-NEXT: .LBB17_2:
+; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
+; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: cmpl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ebp
+; X86-NEXT: cmpl {{[0-9]+}}(%esp), %ebp
; X86-NEXT: seta %al
-; X86-NEXT: movb $-1, %ah
-; X86-NEXT: jb .LBB17_4
-; X86-NEXT: # %bb.3:
-; X86-NEXT: movb %al, %ah
-; X86-NEXT: .LBB17_4:
-; X86-NEXT: movb %ah, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill
-; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: sbbb $0, %al
+; X86-NEXT: movb %al, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill
+; X86-NEXT: cmpl {{[0-9]+}}(%esp), %ebx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx
+; X86-NEXT: seta %al
+; X86-NEXT: sbbb $0, %al
+; X86-NEXT: movb %al, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill
+; X86-NEXT: cmpl {{[0-9]+}}(%esp), %ebx
+; X86-NEXT: seta %al
+; X86-NEXT: sbbb $0, %al
+; X86-NEXT: movb %al, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill
+; X86-NEXT: cmpl {{[0-9]+}}(%esp), %edx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
+; X86-NEXT: seta %al
+; X86-NEXT: sbbb $0, %al
+; X86-NEXT: movb %al, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill
+; X86-NEXT: cmpl {{[0-9]+}}(%esp), %edx
+; X86-NEXT: seta %al
+; X86-NEXT: sbbb $0, %al
+; X86-NEXT: movb %al, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill
; X86-NEXT: cmpl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: seta %cl
-; X86-NEXT: movb $-1, %ch
-; X86-NEXT: jb .LBB17_6
-; X86-NEXT: # %bb.5:
-; X86-NEXT: movb %cl, %ch
-; X86-NEXT: .LBB17_6:
-; X86-NEXT: movb %ch, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: cmpl {{[0-9]+}}(%esp), %eax
; X86-NEXT: seta %al
-; X86-NEXT: movb $-1, %ah
-; X86-NEXT: jb .LBB17_8
-; X86-NEXT: # %bb.7:
-; X86-NEXT: movb %al, %ah
-; X86-NEXT: .LBB17_8:
-; X86-NEXT: movb %ah, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill
-; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: sbbb $0, %al
+; X86-NEXT: movb %al, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill
; X86-NEXT: cmpl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: seta %cl
-; X86-NEXT: movb $-1, %ch
-; X86-NEXT: jb .LBB17_10
-; X86-NEXT: # %bb.9:
-; X86-NEXT: movb %cl, %ch
-; X86-NEXT: .LBB17_10:
-; X86-NEXT: movb %ch, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill
+; X86-NEXT: seta %al
+; X86-NEXT: sbbb $0, %al
+; X86-NEXT: movb %al, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill
+; X86-NEXT: cmpl {{[0-9]+}}(%esp), %edi
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: cmpl {{[0-9]+}}(%esp), %eax
; X86-NEXT: seta %al
-; X86-NEXT: movb $-1, %ah
-; X86-NEXT: jb .LBB17_12
-; X86-NEXT: # %bb.11:
-; X86-NEXT: movb %al, %ah
-; X86-NEXT: .LBB17_12:
-; X86-NEXT: movb %ah, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill
-; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: sbbb $0, %al
+; X86-NEXT: movb %al, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill
; X86-NEXT: cmpl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: seta %cl
-; X86-NEXT: movb $-1, %ch
-; X86-NEXT: jb .LBB17_14
-; X86-NEXT: # %bb.13:
-; X86-NEXT: movb %cl, %ch
-; X86-NEXT: .LBB17_14:
-; X86-NEXT: movb %ch, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill
+; X86-NEXT: seta %al
+; X86-NEXT: sbbb $0, %al
+; X86-NEXT: movb %al, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill
+; X86-NEXT: cmpl {{[0-9]+}}(%esp), %esi
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: cmpl {{[0-9]+}}(%esp), %eax
; X86-NEXT: seta %al
-; X86-NEXT: movb $-1, %ah
-; X86-NEXT: jb .LBB17_16
-; X86-NEXT: # %bb.15:
-; X86-NEXT: movb %al, %ah
-; X86-NEXT: .LBB17_16:
-; X86-NEXT: movb %ah, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill
-; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: sbbb $0, %al
+; X86-NEXT: movb %al, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill
; X86-NEXT: cmpl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: seta %cl
-; X86-NEXT: movb $-1, %ch
-; X86-NEXT: jb .LBB17_18
-; X86-NEXT: # %bb.17:
-; X86-NEXT: movb %cl, %ch
-; X86-NEXT: .LBB17_18:
-; X86-NEXT: movb %ch, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: seta %bh
+; X86-NEXT: sbbb $0, %bh
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: cmpl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: seta %al
-; X86-NEXT: movb $-1, %ah
-; X86-NEXT: jb .LBB17_20
-; X86-NEXT: # %bb.19:
-; X86-NEXT: movb %al, %ah
-; X86-NEXT: .LBB17_20:
-; X86-NEXT: movb %ah, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill
-; X86-NEXT: movb %dl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: cmpl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: seta %cl
-; X86-NEXT: movb $-1, %bh
-; X86-NEXT: jb .LBB17_22
-; X86-NEXT: # %bb.21:
-; X86-NEXT: movb %cl, %bh
-; X86-NEXT: .LBB17_22:
-; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
+; X86-NEXT: seta %bl
+; X86-NEXT: sbbb $0, %bl
; X86-NEXT: cmpl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: seta %al
-; X86-NEXT: movb $-1, %cl
-; X86-NEXT: jb .LBB17_24
-; X86-NEXT: # %bb.23:
-; X86-NEXT: movl %eax, %ecx
-; X86-NEXT: .LBB17_24:
+; X86-NEXT: seta %dh
+; X86-NEXT: sbbb $0, %dh
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: cmpl {{[0-9]+}}(%esp), %edx
+; X86-NEXT: cmpl {{[0-9]+}}(%esp), %eax
; X86-NEXT: seta %ch
-; X86-NEXT: movb $-1, %dl
-; X86-NEXT: jb .LBB17_26
-; X86-NEXT: # %bb.25:
-; X86-NEXT: movb %ch, %dl
-; X86-NEXT: .LBB17_26:
-; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
+; X86-NEXT: sbbb $0, %ch
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: cmpl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: seta %al
-; X86-NEXT: movb $-1, %ch
-; X86-NEXT: jb .LBB17_28
-; X86-NEXT: # %bb.27:
-; X86-NEXT: movb %al, %ch
-; X86-NEXT: .LBB17_28:
+; X86-NEXT: seta %dl
+; X86-NEXT: sbbb $0, %dl
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
-; X86-NEXT: cmpl {{[0-9]+}}(%esp), %esi
-; X86-NEXT: seta %bl
-; X86-NEXT: movb $-1, %dh
-; X86-NEXT: jb .LBB17_30
-; X86-NEXT: # %bb.29:
-; X86-NEXT: movb %bl, %dh
-; X86-NEXT: .LBB17_30:
-; X86-NEXT: cmpl {{[0-9]+}}(%esp), %edi
-; X86-NEXT: seta %bl
-; X86-NEXT: jb .LBB17_32
-; X86-NEXT: # %bb.31:
-; X86-NEXT: movb %bl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill
-; X86-NEXT: .LBB17_32:
-; X86-NEXT: movb {{[-0-9]+}}(%e{{[sb]}}p), %bl # 1-byte Reload
-; X86-NEXT: movb %bl, 15(%eax)
-; X86-NEXT: movb %dh, 14(%eax)
+; X86-NEXT: cmpl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: seta %cl
+; X86-NEXT: sbbb $0, %cl
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movb %cl, 15(%eax)
+; X86-NEXT: movb %dl, 14(%eax)
; X86-NEXT: movb %ch, 13(%eax)
-; X86-NEXT: movb %dl, 12(%eax)
-; X86-NEXT: movb %cl, 11(%eax)
+; X86-NEXT: movb %dh, 12(%eax)
+; X86-NEXT: movb %bl, 11(%eax)
; X86-NEXT: movb %bh, 10(%eax)
; X86-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload
; X86-NEXT: movb %cl, 9(%eax)
@@ -1523,6 +1218,7 @@ define <16 x i8> @ucmp_wide_vec_op(<16 x i32> %x, <16 x i32> %y) nounwind {
; X86-NEXT: popl %esi
; X86-NEXT: popl %edi
; X86-NEXT: popl %ebx
+; X86-NEXT: popl %ebp
; X86-NEXT: retl $4
%1 = call <16 x i8> @llvm.ucmp(<16 x i32> %x, <16 x i32> %y)
ret <16 x i8> %1
@@ -1600,70 +1296,66 @@ define <17 x i2> @ucmp_uncommon_vectors(<17 x i71> %x, <17 x i71> %y) nounwind {
; X64-NEXT: movq {{[0-9]+}}(%rsp), %rax
; X64-NEXT: andl $127, %eax
; X64-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
-; X64-NEXT: movq {{[0-9]+}}(%rsp), %r13
-; X64-NEXT: andl $127, %r13d
+; X64-NEXT: movq {{[0-9]+}}(%rsp), %r12
+; X64-NEXT: andl $127, %r12d
; X64-NEXT: movq {{[0-9]+}}(%rsp), %rax
; X64-NEXT: andl $127, %eax
; X64-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
-; X64-NEXT: movq {{[0-9]+}}(%rsp), %r15
-; X64-NEXT: andl $127, %r15d
+; X64-NEXT: movq {{[0-9]+}}(%rsp), %r14
+; X64-NEXT: andl $127, %r14d
; X64-NEXT: movq {{[0-9]+}}(%rsp), %rax
; X64-NEXT: andl $127, %eax
; X64-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
; X64-NEXT: movq {{[0-9]+}}(%rsp), %rbx
; X64-NEXT: andl $127, %ebx
-; X64-NEXT: movq {{[0-9]+}}(%rsp), %r12
-; X64-NEXT: andl $127, %r12d
+; X64-NEXT: movq {{[0-9]+}}(%rsp), %r15
+; X64-NEXT: andl $127, %r15d
; X64-NEXT: movq {{[0-9]+}}(%rsp), %rbp
; X64-NEXT: andl $127, %ebp
; X64-NEXT: movq {{[0-9]+}}(%rsp), %r11
; X64-NEXT: andl $127, %r11d
-; X64-NEXT: movq {{[0-9]+}}(%rsp), %r8
-; X64-NEXT: andl $127, %r8d
+; X64-NEXT: movq {{[0-9]+}}(%rsp), %r13
+; X64-NEXT: andl $127, %r13d
; X64-NEXT: movq {{[0-9]+}}(%rsp), %r10
; X64-NEXT: andl $127, %r10d
-; X64-NEXT: movq {{[0-9]+}}(%rsp), %rdx
-; X64-NEXT: andl $127, %edx
; X64-NEXT: movq {{[0-9]+}}(%rsp), %rsi
; X64-NEXT: andl $127, %esi
-; X64-NEXT: movq {{[0-9]+}}(%rsp), %r14
-; X64-NEXT: andl $127, %r14d
-; X64-NEXT: movq {{[0-9]+}}(%rsp), %rcx
-; X64-NEXT: andl $127, %ecx
-; X64-NEXT: movq {{[0-9]+}}(%rsp), %r9
; X64-NEXT: movq {{[0-9]+}}(%rsp), %rdi
-; X64-NEXT: cmpq %r9, %rdi
-; X64-NEXT: movq %rcx, %rax
-; X64-NEXT: sbbq %r14, %rax
-; X64-NEXT: setb %al
-; X64-NEXT: cmpq %rdi, %r9
-; X64-NEXT: sbbq %rcx, %r14
-; X64-NEXT: movzbl %al, %eax
-; X64-NEXT: movl $255, %r14d
-; X64-NEXT: cmovbl %r14d, %eax
-; X64-NEXT: movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
+; X64-NEXT: andl $127, %edi
+; X64-NEXT: movq {{[0-9]+}}(%rsp), %rax
+; X64-NEXT: andl $127, %eax
+; X64-NEXT: movq {{[0-9]+}}(%rsp), %rdx
+; X64-NEXT: andl $127, %edx
+; X64-NEXT: movq {{[0-9]+}}(%rsp), %r9
+; X64-NEXT: movq {{[0-9]+}}(%rsp), %r8
+; X64-NEXT: cmpq %r9, %r8
+; X64-NEXT: movq %rdx, %rcx
+; X64-NEXT: sbbq %rax, %rcx
+; X64-NEXT: setb %cl
+; X64-NEXT: cmpq %r8, %r9
+; X64-NEXT: sbbq %rdx, %rax
+; X64-NEXT: sbbb $0, %cl
+; X64-NEXT: movb %cl, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill
; X64-NEXT: movq {{[0-9]+}}(%rsp), %rax
; X64-NEXT: movq {{[0-9]+}}(%rsp), %rcx
; X64-NEXT: cmpq %rax, %rcx
-; X64-NEXT: movq %rsi, %rdi
-; X64-NEXT: sbbq %rdx, %rdi
-; X64-NEXT: setb %dil
-; X64-NEXT: cmpq %rcx, %rax
+; X64-NEXT: movq %rdi, %rdx
; X64-NEXT: sbbq %rsi, %rdx
-; X64-NEXT: movzbl %dil, %eax
-; X64-NEXT: cmovbl %r14d, %eax
-; X64-NEXT: movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
+; X64-NEXT: setb %dl
+; X64-NEXT: cmpq %rcx, %rax
+; X64-NEXT: sbbq %rdi, %rsi
+; X64-NEXT: sbbb $0, %dl
+; X64-NEXT: movb %dl, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill
; X64-NEXT: movq {{[0-9]+}}(%rsp), %rax
; X64-NEXT: movq {{[0-9]+}}(%rsp), %rcx
; X64-NEXT: cmpq %rax, %rcx
; X64-NEXT: movq %r10, %rdx
-; X64-NEXT: sbbq %r8, %rdx
+; X64-NEXT: sbbq %r13, %rdx
; X64-NEXT: setb %dl
; X64-NEXT: cmpq %rcx, %rax
-; X64-NEXT: sbbq %r10, %r8
-; X64-NEXT: movzbl %dl, %eax
-; X64-NEXT: cmovbl %r14d, %eax
-; X64-NEXT: movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
+; X64-NEXT: sbbq %r10, %r13
+; X64-NEXT: sbbb $0, %dl
+; X64-NEXT: movb %dl, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill
; X64-NEXT: movq {{[0-9]+}}(%rsp), %rax
; X64-NEXT: movq {{[0-9]+}}(%rsp), %rcx
; X64-NEXT: cmpq %rax, %rcx
@@ -1672,184 +1364,179 @@ define <17 x i2> @ucmp_uncommon_vectors(<17 x i71> %x, <17 x i71> %y) nounwind {
; X64-NEXT: setb %dl
; X64-NEXT: cmpq %rcx, %rax
; X64-NEXT: sbbq %r11, %rbp
-; X64-NEXT: movzbl %dl, %eax
-; X64-NEXT: cmovbl %r14d, %eax
-; X64-NEXT: movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
+; X64-NEXT: sbbb $0, %dl
+; X64-NEXT: movb %dl, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill
; X64-NEXT: movq {{[0-9]+}}(%rsp), %rax
; X64-NEXT: movq {{[0-9]+}}(%rsp), %rcx
; X64-NEXT: cmpq %rax, %rcx
-; X64-NEXT: movq %r12, %rdx
+; X64-NEXT: movq %r15, %rdx
; X64-NEXT: sbbq %rbx, %rdx
; X64-NEXT: setb %dl
; X64-NEXT: cmpq %rcx, %rax
-; X64-NEXT: sbbq %r12, %rbx
-; X64-NEXT: movzbl %dl, %eax
-; X64-NEXT: cmovbl %r14d, %eax
-; X64-NEXT: movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
-; X64-NEXT: movq {{[0-9]+}}(%rsp), %rax
-; X64-NEXT: movq {{[0-9]+}}(%rsp), %rcx
-; X64-NEXT: cmpq %rax, %rcx
-; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rsi # 8-byte Reload
-; X64-NEXT: movq %rsi, %rdx
-; X64-NEXT: sbbq %r15, %rdx
-; X64-NEXT: setb %dl
-; X64-NEXT: cmpq %rcx, %rax
-; X64-NEXT: sbbq %rsi, %r15
-; X64-NEXT: movzbl %dl, %eax
-; X64-NEXT: cmovbl %r14d, %eax
-; X64-NEXT: movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
+; X64-NEXT: sbbq %r15, %rbx
+; X64-NEXT: sbbb $0, %dl
+; X64-NEXT: movb %dl, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill
; X64-NEXT: movq {{[0-9]+}}(%rsp), %rax
; X64-NEXT: movq {{[0-9]+}}(%rsp), %rcx
; X64-NEXT: cmpq %rax, %rcx
; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rsi # 8-byte Reload
; X64-NEXT: movq %rsi, %rdx
-; X64-NEXT: sbbq %r13, %rdx
+; X64-NEXT: sbbq %r14, %rdx
; X64-NEXT: setb %dl
; X64-NEXT: cmpq %rcx, %rax
-; X64-NEXT: sbbq %rsi, %r13
-; X64-NEXT: movzbl %dl, %eax
-; X64-NEXT: cmovbl %r14d, %eax
-; X64-NEXT: movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
+; X64-NEXT: sbbq %rsi, %r14
+; X64-NEXT: sbbb $0, %dl
+; X64-NEXT: movb %dl, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill
; X64-NEXT: movq {{[0-9]+}}(%rsp), %rcx
; X64-NEXT: movq {{[0-9]+}}(%rsp), %rdx
; X64-NEXT: cmpq %rcx, %rdx
-; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rdi # 8-byte Reload
-; X64-NEXT: movq %rdi, %rsi
-; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
-; X64-NEXT: sbbq %rax, %rsi
-; X64-NEXT: setb %sil
+; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rsi # 8-byte Reload
+; X64-NEXT: movq %rsi, %rax
+; X64-NEXT: sbbq %r12, %rax
+; X64-NEXT: setb %r13b
; X64-NEXT: cmpq %rdx, %rcx
-; X64-NEXT: sbbq %rdi, %rax
-; X64-NEXT: movzbl %sil, %ebp
-; X64-NEXT: cmovbl %r14d, %ebp
+; X64-NEXT: sbbq %rsi, %r12
+; X64-NEXT: sbbb $0, %r13b
; X64-NEXT: movq {{[0-9]+}}(%rsp), %rdx
; X64-NEXT: movq {{[0-9]+}}(%rsp), %rsi
; X64-NEXT: cmpq %rdx, %rsi
-; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload
-; X64-NEXT: movq %rcx, %rdi
+; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rdi # 8-byte Reload
+; X64-NEXT: movq %rdi, %rcx
; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
-; X64-NEXT: sbbq %rax, %rdi
-; X64-NEXT: setb %dil
+; X64-NEXT: sbbq %rax, %rcx
+; X64-NEXT: setb %bpl
; X64-NEXT: cmpq %rsi, %rdx
-; X64-NEXT: sbbq %rcx, %rax
-; X64-NEXT: movzbl %dil, %ebx
-; X64-NEXT: cmovbl %r14d, %ebx
+; X64-NEXT: sbbq %rdi, %rax
+; X64-NEXT: sbbb $0, %bpl
; X64-NEXT: movq {{[0-9]+}}(%rsp), %rsi
; X64-NEXT: movq {{[0-9]+}}(%rsp), %rdi
; X64-NEXT: cmpq %rsi, %rdi
; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload
-; X64-NEXT: movq %rcx, %r8
+; X64-NEXT: movq %rcx, %rdx
; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
-; X64-NEXT: sbbq %rax, %r8
-; X64-NEXT: setb %r8b
+; X64-NEXT: sbbq %rax, %rdx
+; X64-NEXT: setb %r11b
; X64-NEXT: cmpq %rdi, %rsi
; X64-NEXT: sbbq %rcx, %rax
-; X64-NEXT: movzbl %r8b, %r10d
-; X64-NEXT: cmovbl %r14d, %r10d
+; X64-NEXT: sbbb $0, %r11b
; X64-NEXT: movq {{[0-9]+}}(%rsp), %rdi
; X64-NEXT: movq {{[0-9]+}}(%rsp), %r8
; X64-NEXT: cmpq %rdi, %r8
; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload
-; X64-NEXT: movq %rcx, %r9
+; X64-NEXT: movq %rcx, %rsi
; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
-; X64-NEXT: sbbq %rax, %r9
-; X64-NEXT: setb %r9b
+; X64-NEXT: sbbq %rax, %rsi
+; X64-NEXT: setb %sil
; X64-NEXT: cmpq %r8, %rdi
; X64-NEXT: sbbq %rcx, %rax
-; X64-NEXT: movzbl %r9b, %r8d
-; X64-NEXT: cmovbl %r14d, %r8d
-; X64-NEXT: movq {{[0-9]+}}(%rsp), %rdi
+; X64-NEXT: sbbb $0, %sil
+; X64-NEXT: movq {{[0-9]+}}(%rsp), %r8
+; X64-NEXT: movq {{[0-9]+}}(%rsp), %r9
+; X64-NEXT: cmpq %r8, %r9
+; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload
+; X64-NEXT: movq %rcx, %rdi
+; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
+; X64-NEXT: sbbq %rax, %rdi
+; X64-NEXT: setb %dil
+; X64-NEXT: cmpq %r9, %r8
+; X64-NEXT: sbbq %rcx, %rax
+; X64-NEXT: sbbb $0, %dil
; X64-NEXT: movq {{[0-9]+}}(%rsp), %r9
-; X64-NEXT: cmpq %rdi, %r9
+; X64-NEXT: movq {{[0-9]+}}(%rsp), %r10
+; X64-NEXT: cmpq %r9, %r10
; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload
-; X64-NEXT: movq %rcx, %r11
+; X64-NEXT: movq %rcx, %r8
; X64-NEXT: movq (%rsp), %rax # 8-byte Reload
-; X64-NEXT: sbbq %rax, %r11
-; X64-NEXT: setb %r11b
-; X64-NEXT: cmpq %r9, %rdi
+; X64-NEXT: sbbq %rax, %r8
+; X64-NEXT: setb %r8b
+; X64-NEXT: cmpq %r10, %r9
; X64-NEXT: sbbq %rcx, %rax
-; X64-NEXT: movzbl %r11b, %r9d
-; X64-NEXT: cmovbl %r14d, %r9d
-; X64-NEXT: movq {{[0-9]+}}(%rsp), %rdi
-; X64-NEXT: movq {{[0-9]+}}(%rsp), %r11
-; X64-NEXT: cmpq %rdi, %r11
+; X64-NEXT: sbbb $0, %r8b
+; X64-NEXT: movq {{[0-9]+}}(%rsp), %r10
+; X64-NEXT: movq {{[0-9]+}}(%rsp), %rbx
+; X64-NEXT: cmpq %r10, %rbx
; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload
-; X64-NEXT: movq %rcx, %r15
+; X64-NEXT: movq %rcx, %r9
; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
-; X64-NEXT: sbbq %rax, %r15
-; X64-NEXT: setb %r15b
-; X64-NEXT: cmpq %r11, %rdi
+; X64-NEXT: sbbq %rax, %r9
+; X64-NEXT: setb %r9b
+; X64-NEXT: cmpq %rbx, %r10
; X64-NEXT: sbbq %rcx, %rax
-; X64-NEXT: movzbl %r15b, %r11d
-; X64-NEXT: cmovbl %r14d, %r11d
-; X64-NEXT: movq {{[0-9]+}}(%rsp), %rdi
+; X64-NEXT: sbbb $0, %r9b
+; X64-NEXT: movq {{[0-9]+}}(%rsp), %rbx
; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
-; X64-NEXT: cmpq %rax, %rdi
+; X64-NEXT: cmpq %rax, %rbx
; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rdx # 8-byte Reload
-; X64-NEXT: movq %rdx, %r15
+; X64-NEXT: movq %rdx, %r10
; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload
-; X64-NEXT: sbbq %rcx, %r15
-; X64-NEXT: setb %r15b
-; X64-NEXT: cmpq %rdi, %rax
+; X64-NEXT: sbbq %rcx, %r10
+; X64-NEXT: setb %r10b
+; X64-NEXT: cmpq %rbx, %rax
; X64-NEXT: sbbq %rdx, %rcx
-; X64-NEXT: movzbl %r15b, %edi
-; X64-NEXT: cmovbl %r14d, %edi
-; X64-NEXT: movq {{[0-9]+}}(%rsp), %r15
+; X64-NEXT: sbbb $0, %r10b
+; X64-NEXT: movq {{[0-9]+}}(%rsp), %r14
; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload
-; X64-NEXT: cmpq %rcx, %r15
+; X64-NEXT: cmpq %rcx, %r14
; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rdx # 8-byte Reload
-; X64-NEXT: movq %rdx, %r12
+; X64-NEXT: movq %rdx, %rbx
; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
-; X64-NEXT: sbbq %rax, %r12
-; X64-NEXT: setb %r12b
-; X64-NEXT: cmpq %r15, %rcx
+; X64-NEXT: sbbq %rax, %rbx
+; X64-NEXT: setb %bl
+; X64-NEXT: cmpq %r14, %rcx
; X64-NEXT: sbbq %rdx, %rax
-; X64-NEXT: movzbl %r12b, %r15d
-; X64-NEXT: cmovbl %r14d, %r15d
-; X64-NEXT: movq {{[0-9]+}}(%rsp), %r12
+; X64-NEXT: movq {{[0-9]+}}(%rsp), %r15
+; X64-NEXT: sbbb $0, %bl
; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload
-; X64-NEXT: cmpq %rcx, %r12
+; X64-NEXT: cmpq %rcx, %r15
; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rdx # 8-byte Reload
-; X64-NEXT: movq %rdx, %r13
+; X64-NEXT: movq %rdx, %r14
; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
-; X64-NEXT: sbbq %rax, %r13
-; X64-NEXT: setb %r13b
-; X64-NEXT: cmpq %r12, %rcx
+; X64-NEXT: sbbq %rax, %r14
+; X64-NEXT: setb %r14b
+; X64-NEXT: cmpq %r15, %rcx
+; X64-NEXT: movq {{[0-9]+}}(%rsp), %r12
; X64-NEXT: sbbq %rdx, %rax
-; X64-NEXT: movzbl %r13b, %r12d
-; X64-NEXT: cmovbl %r14d, %r12d
-; X64-NEXT: movq {{[0-9]+}}(%rsp), %rsi
-; X64-NEXT: movq {{[0-9]+}}(%rsp), %rdx
-; X64-NEXT: cmpq %rsi, %rdx
+; X64-NEXT: movq {{[0-9]+}}(%rsp), %rax
+; X64-NEXT: sbbb $0, %r14b
+; X64-NEXT: cmpq %r12, %rax
+; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rdx # 8-byte Reload
+; X64-NEXT: movq %rdx, %r15
; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload
-; X64-NEXT: movq %rcx, %r13
-; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
-; X64-NEXT: sbbq %rax, %r13
-; X64-NEXT: setb %r13b
-; X64-NEXT: cmpq %rdx, %rsi
-; X64-NEXT: sbbq %rcx, %rax
-; X64-NEXT: movd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Folded Reload
-; X64-NEXT: # xmm0 = mem[0],zero,zero,zero
-; X64-NEXT: movd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 4-byte Folded Reload
-; X64-NEXT: # xmm1 = mem[0],zero,zero,zero
-; X64-NEXT: movd {{[-0-9]+}}(%r{{[sb]}}p), %xmm2 # 4-byte Folded Reload
-; X64-NEXT: # xmm2 = mem[0],zero,zero,zero
-; X64-NEXT: movd {{[-0-9]+}}(%r{{[sb]}}p), %xmm3 # 4-byte Folded Reload
-; X64-NEXT: # xmm3 = mem[0],zero,zero,zero
-; X64-NEXT: movd {{[-0-9]+}}(%r{{[sb]}}p), %xmm4 # 4-byte Folded Reload
-; X64-NEXT: # xmm4 = mem[0],zero,zero,zero
-; X64-NEXT: movd {{[-0-9]+}}(%r{{[sb]}}p), %xmm5 # 4-byte Folded Reload
-; X64-NEXT: # xmm5 = mem[0],zero,zero,zero
-; X64-NEXT: movd {{[-0-9]+}}(%r{{[sb]}}p), %xmm6 # 4-byte Folded Reload
-; X64-NEXT: # xmm6 = mem[0],zero,zero,zero
-; X64-NEXT: movd %ebp, %xmm7
-; X64-NEXT: movd %ebx, %xmm8
-; X64-NEXT: movd %r10d, %xmm9
-; X64-NEXT: movd %r8d, %xmm10
-; X64-NEXT: movd %r9d, %xmm11
-; X64-NEXT: movd %r11d, %xmm12
-; X64-NEXT: movd %edi, %xmm13
-; X64-NEXT: movd %r15d, %xmm14
+; X64-NEXT: sbbq %rcx, %r15
+; X64-NEXT: setb %r15b
+; X64-NEXT: cmpq %rax, %r12
+; X64-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 1-byte Folded Reload
+; X64-NEXT: movd %eax, %xmm0
+; X64-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 1-byte Folded Reload
+; X64-NEXT: movd %eax, %xmm1
+; X64-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 1-byte Folded Reload
+; X64-NEXT: movd %eax, %xmm2
+; X64-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 1-byte Folded Reload
+; X64-NEXT: movd %eax, %xmm3
+; X64-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 1-byte Folded Reload
+; X64-NEXT: movd %eax, %xmm4
+; X64-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 1-byte Folded Reload
+; X64-NEXT: movd %eax, %xmm5
+; X64-NEXT: movzbl %r13b, %eax
+; X64-NEXT: movd %eax, %xmm6
+; X64-NEXT: movzbl %bpl, %eax
+; X64-NEXT: movd %eax, %xmm7
+; X64-NEXT: movzbl %r11b, %eax
+; X64-NEXT: movd %eax, %xmm8
+; X64-NEXT: movzbl %sil, %eax
+; X64-NEXT: movd %eax, %xmm9
+; X64-NEXT: movzbl %dil, %eax
+; X64-NEXT: movd %eax, %xmm10
+; X64-NEXT: movzbl %r8b, %eax
+; X64-NEXT: movd %eax, %xmm11
+; X64-NEXT: movzbl %r9b, %eax
+; X64-NEXT: movd %eax, %xmm12
+; X64-NEXT: movzbl %r10b, %eax
+; X64-NEXT: movd %eax, %xmm13
+; X64-NEXT: movzbl %bl, %eax
+; X64-NEXT: movd %eax, %xmm14
+; X64-NEXT: movzbl %r14b, %eax
+; X64-NEXT: movd %eax, %xmm15
; X64-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
; X64-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3],xmm3[4],xmm2[4],xmm3[5],xmm2[5],xmm3[6],xmm2[6],xmm3[7],xmm2[7]
; X64-NEXT: punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm1[0],xmm3[1],xmm1[1],xmm3[2],xmm1[2],xmm3[3],xmm1[3]
@@ -1861,17 +1548,17 @@ define <17 x i2> @ucmp_uncommon_vectors(<17 x i71> %x, <17 x i71> %y) nounwind {
; X64-NEXT: punpcklbw {{.*#+}} xmm11 = xmm11[0],xmm10[0],xmm11[1],xmm10[1],xmm11[2],xmm10[2],xmm11[3],xmm10[3],xmm11[4],xmm10[4],xmm11[5],xmm10[5],xmm11[6],xmm10[6],xmm11[7],xmm10[7]
; X64-NEXT: punpcklwd {{.*#+}} xmm11 = xmm11[0],xmm9[0],xmm11[1],xmm9[1],xmm11[2],xmm9[2],xmm11[3],xmm9[3]
; X64-NEXT: punpcklbw {{.*#+}} xmm13 = xmm13[0],xmm12[0],xmm13[1],xmm12[1],xmm13[2],xmm12[2],xmm13[3],xmm12[3],xmm13[4],xmm12[4],xmm13[5],xmm12[5],xmm13[6],xmm12[6],xmm13[7],xmm12[7]
-; X64-NEXT: movd %r12d, %xmm0
-; X64-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm14[0],xmm0[1],xmm14[1],xmm0[2],xmm14[2],xmm0[3],xmm14[3],xmm0[4],xmm14[4],xmm0[5],xmm14[5],xmm0[6],xmm14[6],xmm0[7],xmm14[7]
-; X64-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm13[0],xmm0[1],xmm13[1],xmm0[2],xmm13[2],xmm0[3],xmm13[3]
-; X64-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm11[0],xmm0[1],xmm11[1]
-; X64-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm7[0]
-; X64-NEXT: movzbl %r13b, %eax
-; X64-NEXT: cmovbl %r14d, %eax
+; X64-NEXT: punpcklbw {{.*#+}} xmm15 = xmm15[0],xmm14[0],xmm15[1],xmm14[1],xmm15[2],xmm14[2],xmm15[3],xmm14[3],xmm15[4],xmm14[4],xmm15[5],xmm14[5],xmm15[6],xmm14[6],xmm15[7],xmm14[7]
+; X64-NEXT: punpcklwd {{.*#+}} xmm15 = xmm15[0],xmm13[0],xmm15[1],xmm13[1],xmm15[2],xmm13[2],xmm15[3],xmm13[3]
+; X64-NEXT: punpckldq {{.*#+}} xmm15 = xmm15[0],xmm11[0],xmm15[1],xmm11[1]
+; X64-NEXT: punpcklqdq {{.*#+}} xmm15 = xmm15[0],xmm7[0]
+; X64-NEXT: sbbq %rdx, %rcx
+; X64-NEXT: sbbb $0, %r15b
+; X64-NEXT: movzbl %r15b, %eax
; X64-NEXT: andl $3, %eax
; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rdi # 8-byte Reload
; X64-NEXT: movb %al, 4(%rdi)
-; X64-NEXT: movdqa %xmm0, -{{[0-9]+}}(%rsp)
+; X64-NEXT: movdqa %xmm15, -{{[0-9]+}}(%rsp)
; X64-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
; X64-NEXT: andl $3, %eax
; X64-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx
@@ -1950,502 +1637,471 @@ define <17 x i2> @ucmp_uncommon_vectors(<17 x i71> %x, <17 x i71> %y) nounwind {
; X86-NEXT: pushl %ebx
; X86-NEXT: pushl %edi
; X86-NEXT: pushl %esi
-; X86-NEXT: subl $44, %esp
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: andl $127, %ecx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
-; X86-NEXT: andl $127, %edi
+; X86-NEXT: subl $132, %esp
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: andl $127, %eax
-; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
-; X86-NEXT: andl $127, %esi
-; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
-; X86-NEXT: movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
-; X86-NEXT: cmpl %edx, {{[0-9]+}}(%esp)
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ebp
-; X86-NEXT: movl %ebp, %edx
-; X86-NEXT: sbbl %ebx, %edx
-; X86-NEXT: movl %esi, %edx
-; X86-NEXT: sbbl %eax, %edx
-; X86-NEXT: movl $0, %edx
-; X86-NEXT: sbbl %edx, %edx
-; X86-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
-; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
-; X86-NEXT: cmpl {{[0-9]+}}(%esp), %edx
-; X86-NEXT: sbbl %ebp, %ebx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
-; X86-NEXT: sbbl %esi, %eax
-; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
-; X86-NEXT: movl $0, %eax
-; X86-NEXT: sbbl %eax, %eax
-; X86-NEXT: movb $-1, %bl
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ebp
-; X86-NEXT: jb .LBB18_2
-; X86-NEXT: # %bb.1:
-; X86-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 1-byte Folded Reload
-; X86-NEXT: .LBB18_2:
-; X86-NEXT: movb %bl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill
-; X86-NEXT: andl $127, %edx
-; X86-NEXT: andl $127, %esi
+; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx
-; X86-NEXT: cmpl %eax, %ebx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx
-; X86-NEXT: sbbl %ebp, %ebx
-; X86-NEXT: movl %edi, %ebx
-; X86-NEXT: sbbl %ecx, %ebx
-; X86-NEXT: movl $0, %ebx
-; X86-NEXT: sbbl %ebx, %ebx
-; X86-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
-; X86-NEXT: cmpl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: andl $127, %eax
+; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx
-; X86-NEXT: sbbl {{[0-9]+}}(%esp), %ebp
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ebp
-; X86-NEXT: sbbl %edi, %ecx
-; X86-NEXT: movl $0, %edi
-; X86-NEXT: sbbl %edi, %edi
-; X86-NEXT: movb $-1, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
-; X86-NEXT: jb .LBB18_4
-; X86-NEXT: # %bb.3:
-; X86-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload
-; X86-NEXT: movb %cl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill
-; X86-NEXT: .LBB18_4:
; X86-NEXT: andl $127, %eax
-; X86-NEXT: andl $127, %ebx
-; X86-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: cmpl %ebx, %ecx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx
-; X86-NEXT: movl %ebx, %edi
-; X86-NEXT: sbbl %ebp, %edi
-; X86-NEXT: movl %esi, %edi
-; X86-NEXT: sbbl %edx, %edi
-; X86-NEXT: movl $0, %edi
-; X86-NEXT: sbbl %edi, %edi
-; X86-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
-; X86-NEXT: cmpl %ecx, {{[0-9]+}}(%esp)
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
-; X86-NEXT: sbbl %ebx, %ebp
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ebp
-; X86-NEXT: sbbl %esi, %edx
-; X86-NEXT: movl $0, %esi
-; X86-NEXT: sbbl %esi, %esi
-; X86-NEXT: movb $-1, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
-; X86-NEXT: jb .LBB18_6
-; X86-NEXT: # %bb.5:
-; X86-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 1-byte Folded Reload
-; X86-NEXT: movb %dl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill
-; X86-NEXT: .LBB18_6:
-; X86-NEXT: andl $127, %ecx
-; X86-NEXT: andl $127, %edi
-; X86-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
-; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
-; X86-NEXT: cmpl %edi, %edx
+; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: andl $127, %eax
+; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: andl $127, %eax
+; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: andl $127, %eax
+; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: andl $127, %eax
+; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: andl $127, %eax
+; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: andl $127, %eax
+; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: andl $127, %eax
+; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: andl $127, %eax
+; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: andl $127, %eax
+; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: andl $127, %eax
+; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: andl $127, %eax
+; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: andl $127, %eax
+; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: andl $127, %eax
+; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: andl $127, %eax
+; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: andl $127, %eax
+; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: andl $127, %eax
+; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: andl $127, %eax
+; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: andl $127, %eax
+; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: andl $127, %eax
+; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: andl $127, %eax
+; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: andl $127, %eax
+; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: andl $127, %eax
+; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: andl $127, %eax
+; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: andl $127, %eax
+; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: andl $127, %eax
+; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: andl $127, %eax
+; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: andl $127, %eax
+; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ebp
+; X86-NEXT: andl $127, %ebp
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: andl $127, %eax
+; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
+; X86-NEXT: andl $127, %edx
; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
-; X86-NEXT: movl %edi, %esi
-; X86-NEXT: sbbl %ebp, %esi
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT: andl $127, %edi
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT: cmpl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx
; X86-NEXT: movl %ebx, %esi
; X86-NEXT: sbbl %eax, %esi
+; X86-NEXT: movl %edi, %esi
+; X86-NEXT: sbbl %edx, %esi
; X86-NEXT: movl $0, %esi
; X86-NEXT: sbbl %esi, %esi
-; X86-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
-; X86-NEXT: cmpl %edx, {{[0-9]+}}(%esp)
-; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
+; X86-NEXT: setb %cl
; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
-; X86-NEXT: sbbl %edi, %ebp
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ebp
+; X86-NEXT: cmpl {{[0-9]+}}(%esp), %esi
; X86-NEXT: sbbl %ebx, %eax
-; X86-NEXT: movl $0, %ebx
-; X86-NEXT: sbbl %ebx, %ebx
-; X86-NEXT: movb $-1, %bl
-; X86-NEXT: jb .LBB18_8
-; X86-NEXT: # %bb.7:
-; X86-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 1-byte Folded Reload
-; X86-NEXT: .LBB18_8:
-; X86-NEXT: movb %bl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill
-; X86-NEXT: andl $127, %edx
-; X86-NEXT: andl $127, %esi
-; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
-; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: cmpl %esi, %eax
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx
-; X86-NEXT: sbbl %ebp, %ebx
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
-; X86-NEXT: movl %edi, %ebx
-; X86-NEXT: sbbl %ecx, %ebx
-; X86-NEXT: movl $0, %ebx
-; X86-NEXT: sbbl %ebx, %ebx
-; X86-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
-; X86-NEXT: cmpl %eax, %esi
-; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: sbbl %edi, %edx
+; X86-NEXT: movl $0, %eax
+; X86-NEXT: sbbl %eax, %eax
+; X86-NEXT: sbbb $0, %cl
+; X86-NEXT: movb %cl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: cmpl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
-; X86-NEXT: sbbl {{[0-9]+}}(%esp), %ebp
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ebp
-; X86-NEXT: sbbl %edi, %ecx
+; X86-NEXT: movl %esi, %edi
+; X86-NEXT: sbbl %edx, %edi
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT: movl %eax, %edi
+; X86-NEXT: sbbl %ebp, %edi
; X86-NEXT: movl $0, %edi
; X86-NEXT: sbbl %edi, %edi
-; X86-NEXT: movb $-1, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
-; X86-NEXT: jb .LBB18_10
-; X86-NEXT: # %bb.9:
-; X86-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload
-; X86-NEXT: movb %cl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill
-; X86-NEXT: .LBB18_10:
-; X86-NEXT: andl $127, %eax
-; X86-NEXT: andl $127, %esi
-; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
+; X86-NEXT: setb %bl
+; X86-NEXT: cmpl %ecx, {{[0-9]+}}(%esp)
+; X86-NEXT: sbbl %esi, %edx
+; X86-NEXT: sbbl %eax, %ebp
+; X86-NEXT: movl $0, %eax
+; X86-NEXT: sbbl %eax, %eax
+; X86-NEXT: sbbb $0, %bl
+; X86-NEXT: movb %bl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: cmpl %esi, %ecx
+; X86-NEXT: cmpl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
; X86-NEXT: movl %esi, %edi
-; X86-NEXT: sbbl %ebp, %edi
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
-; X86-NEXT: movl %ebx, %edi
; X86-NEXT: sbbl %edx, %edi
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT: movl %eax, %edi
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X86-NEXT: sbbl %ebp, %edi
; X86-NEXT: movl $0, %edi
; X86-NEXT: sbbl %edi, %edi
-; X86-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
+; X86-NEXT: setb %bl
; X86-NEXT: cmpl %ecx, {{[0-9]+}}(%esp)
+; X86-NEXT: sbbl %esi, %edx
+; X86-NEXT: sbbl %eax, %ebp
+; X86-NEXT: movl $0, %eax
+; X86-NEXT: sbbl %eax, %eax
+; X86-NEXT: sbbb $0, %bl
+; X86-NEXT: movb %bl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
-; X86-NEXT: sbbl %esi, %ebp
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ebp
-; X86-NEXT: sbbl %ebx, %edx
-; X86-NEXT: movl $0, %esi
-; X86-NEXT: sbbl %esi, %esi
-; X86-NEXT: movb $-1, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
-; X86-NEXT: jb .LBB18_12
-; X86-NEXT: # %bb.11:
-; X86-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 1-byte Folded Reload
-; X86-NEXT: movb %dl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill
-; X86-NEXT: .LBB18_12:
-; X86-NEXT: andl $127, %ecx
-; X86-NEXT: andl $127, %edi
-; X86-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
-; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
-; X86-NEXT: cmpl %edi, %edx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
-; X86-NEXT: movl %edi, %esi
-; X86-NEXT: sbbl %ebp, %esi
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
-; X86-NEXT: movl %ebx, %esi
-; X86-NEXT: sbbl %eax, %esi
-; X86-NEXT: movl $0, %esi
-; X86-NEXT: sbbl %esi, %esi
-; X86-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
-; X86-NEXT: cmpl %edx, {{[0-9]+}}(%esp)
+; X86-NEXT: cmpl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
-; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: sbbl %edi, %ebp
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ebp
-; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
-; X86-NEXT: sbbl %ebx, %eax
-; X86-NEXT: movl $0, %ebx
-; X86-NEXT: sbbl %ebx, %ebx
-; X86-NEXT: movb $-1, %bl
-; X86-NEXT: jb .LBB18_14
-; X86-NEXT: # %bb.13:
-; X86-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 1-byte Folded Reload
-; X86-NEXT: .LBB18_14:
-; X86-NEXT: movb %bl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill
-; X86-NEXT: andl $127, %edx
-; X86-NEXT: andl $127, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
-; X86-NEXT: cmpl %ebp, {{[0-9]+}}(%esp)
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx
-; X86-NEXT: sbbl %esi, %ebx
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
-; X86-NEXT: movl %edi, %ebx
-; X86-NEXT: sbbl %ecx, %ebx
-; X86-NEXT: movl $0, %ebx
-; X86-NEXT: sbbl %ebx, %ebx
+; X86-NEXT: movl %esi, %edi
+; X86-NEXT: sbbl %edx, %edi
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT: movl %eax, %edi
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X86-NEXT: sbbl %ebp, %edi
+; X86-NEXT: movl $0, %edi
+; X86-NEXT: sbbl %edi, %edi
; X86-NEXT: setb %bl
-; X86-NEXT: cmpl {{[0-9]+}}(%esp), %ebp
-; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ebp
-; X86-NEXT: sbbl {{[0-9]+}}(%esp), %esi
+; X86-NEXT: cmpl %ecx, {{[0-9]+}}(%esp)
+; X86-NEXT: sbbl %esi, %edx
+; X86-NEXT: sbbl %eax, %ebp
+; X86-NEXT: movl $0, %eax
+; X86-NEXT: sbbl %eax, %eax
+; X86-NEXT: sbbb $0, %bl
+; X86-NEXT: movb %bl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: cmpl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
-; X86-NEXT: sbbl %edi, %ecx
+; X86-NEXT: movl %esi, %edi
+; X86-NEXT: sbbl %edx, %edi
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT: movl %eax, %edi
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X86-NEXT: sbbl %ebp, %edi
; X86-NEXT: movl $0, %edi
; X86-NEXT: sbbl %edi, %edi
-; X86-NEXT: movb $-1, %bh
-; X86-NEXT: jb .LBB18_16
-; X86-NEXT: # %bb.15:
-; X86-NEXT: movb %bl, %bh
-; X86-NEXT: .LBB18_16:
-; X86-NEXT: movb %bh, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill
-; X86-NEXT: andl $127, %eax
-; X86-NEXT: andl $127, %ebp
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: cmpl %ebx, %ecx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx
-; X86-NEXT: sbbl %esi, %ebx
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
-; X86-NEXT: movl %edi, %ebx
-; X86-NEXT: sbbl %edx, %ebx
-; X86-NEXT: movl $0, %ebx
-; X86-NEXT: sbbl %ebx, %ebx
-; X86-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
+; X86-NEXT: setb %bl
; X86-NEXT: cmpl %ecx, {{[0-9]+}}(%esp)
+; X86-NEXT: sbbl %esi, %edx
+; X86-NEXT: sbbl %eax, %ebp
+; X86-NEXT: movl $0, %eax
+; X86-NEXT: sbbl %eax, %eax
+; X86-NEXT: sbbb $0, %bl
+; X86-NEXT: movb %bl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx
-; X86-NEXT: sbbl {{[0-9]+}}(%esp), %esi
+; X86-NEXT: cmpl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
-; X86-NEXT: sbbl %edi, %edx
-; X86-NEXT: movl $0, %edx
-; X86-NEXT: sbbl %edx, %edx
-; X86-NEXT: movb $-1, %dl
-; X86-NEXT: jb .LBB18_18
-; X86-NEXT: # %bb.17:
-; X86-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 1-byte Folded Reload
-; X86-NEXT: .LBB18_18:
-; X86-NEXT: movb %dl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill
-; X86-NEXT: andl $127, %ecx
-; X86-NEXT: andl $127, %ebx
-; X86-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
-; X86-NEXT: cmpl %ebx, %edi
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx
-; X86-NEXT: movl %ebx, %edx
+; X86-NEXT: movl %esi, %edi
+; X86-NEXT: sbbl %edx, %edi
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT: movl %eax, %edi
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X86-NEXT: sbbl %ebp, %edi
+; X86-NEXT: movl $0, %edi
+; X86-NEXT: sbbl %edi, %edi
+; X86-NEXT: setb %bl
+; X86-NEXT: cmpl %ecx, {{[0-9]+}}(%esp)
; X86-NEXT: sbbl %esi, %edx
-; X86-NEXT: movl %ebp, %edx
-; X86-NEXT: sbbl %eax, %edx
-; X86-NEXT: movl $0, %edx
-; X86-NEXT: sbbl %edx, %edx
-; X86-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
-; X86-NEXT: cmpl %edi, {{[0-9]+}}(%esp)
-; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
+; X86-NEXT: sbbl %eax, %ebp
+; X86-NEXT: movl $0, %eax
+; X86-NEXT: sbbl %eax, %eax
+; X86-NEXT: sbbb $0, %bl
+; X86-NEXT: movb %bl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: cmpl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
-; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: sbbl %ebx, %esi
; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
-; X86-NEXT: sbbl %ebp, %eax
+; X86-NEXT: movl %esi, %edi
+; X86-NEXT: sbbl %edx, %edi
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT: movl %eax, %edi
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X86-NEXT: sbbl %ebp, %edi
+; X86-NEXT: movl $0, %edi
+; X86-NEXT: sbbl %edi, %edi
+; X86-NEXT: setb %bl
+; X86-NEXT: cmpl %ecx, {{[0-9]+}}(%esp)
+; X86-NEXT: sbbl %esi, %edx
+; X86-NEXT: sbbl %eax, %ebp
+; X86-NEXT: movl $0, %eax
+; X86-NEXT: sbbl %eax, %eax
+; X86-NEXT: sbbb $0, %bl
+; X86-NEXT: movb %bl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: movl $0, %ebp
-; X86-NEXT: sbbl %ebp, %ebp
-; X86-NEXT: movb $-1, %dh
-; X86-NEXT: jb .LBB18_20
-; X86-NEXT: # %bb.19:
-; X86-NEXT: movb {{[-0-9]+}}(%e{{[sb]}}p), %dh # 1-byte Reload
-; X86-NEXT: .LBB18_20:
-; X86-NEXT: movb %dh, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill
-; X86-NEXT: andl $127, %edi
-; X86-NEXT: andl $127, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ebp
-; X86-NEXT: cmpl %ebp, %eax
+; X86-NEXT: cmpl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
+; X86-NEXT: movl %esi, %edi
+; X86-NEXT: sbbl %edx, %edi
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X86-NEXT: movl %ebp, %edi
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT: sbbl %ebx, %edi
+; X86-NEXT: movl $0, %edi
+; X86-NEXT: sbbl %edi, %edi
+; X86-NEXT: setb %cl
+; X86-NEXT: cmpl %eax, {{[0-9]+}}(%esp)
; X86-NEXT: sbbl %esi, %edx
+; X86-NEXT: sbbl %ebp, %ebx
+; X86-NEXT: movl $0, %eax
+; X86-NEXT: sbbl %eax, %eax
+; X86-NEXT: sbbb $0, %cl
+; X86-NEXT: movb %cl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: cmpl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
+; X86-NEXT: movl %esi, %edi
+; X86-NEXT: sbbl %edx, %edi
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X86-NEXT: movl %ebp, %edi
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
-; X86-NEXT: movl %ebx, %edx
-; X86-NEXT: sbbl %ecx, %edx
-; X86-NEXT: movl $0, %edx
-; X86-NEXT: sbbl %edx, %edx
-; X86-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
-; X86-NEXT: cmpl %eax, %ebp
+; X86-NEXT: sbbl %ebx, %edi
+; X86-NEXT: movl $0, %edi
+; X86-NEXT: sbbl %edi, %edi
+; X86-NEXT: setb %cl
+; X86-NEXT: cmpl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT: sbbl %esi, %edx
+; X86-NEXT: sbbl %ebp, %ebx
+; X86-NEXT: movl $0, %eax
+; X86-NEXT: sbbl %eax, %eax
+; X86-NEXT: sbbb $0, %cl
+; X86-NEXT: movb %cl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: cmpl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
-; X86-NEXT: sbbl {{[0-9]+}}(%esp), %esi
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ebp
; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
-; X86-NEXT: sbbl %ebx, %ecx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx
-; X86-NEXT: movl $0, %ecx
-; X86-NEXT: sbbl %ecx, %ecx
-; X86-NEXT: movb $-1, %cl
-; X86-NEXT: jb .LBB18_22
-; X86-NEXT: # %bb.21:
-; X86-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload
-; X86-NEXT: .LBB18_22:
+; X86-NEXT: movl %esi, %edi
+; X86-NEXT: sbbl %edx, %edi
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X86-NEXT: movl %ebp, %edi
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT: sbbl %ebx, %edi
+; X86-NEXT: movl $0, %edi
+; X86-NEXT: sbbl %edi, %edi
+; X86-NEXT: setb %cl
+; X86-NEXT: cmpl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT: sbbl %esi, %edx
+; X86-NEXT: sbbl %ebp, %ebx
+; X86-NEXT: movl $0, %eax
+; X86-NEXT: sbbl %eax, %eax
+; X86-NEXT: sbbb $0, %cl
; X86-NEXT: movb %cl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill
-; X86-NEXT: andl $127, %eax
-; X86-NEXT: andl $127, %edx
-; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: cmpl %ebp, %ebx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: sbbl %esi, %ecx
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
-; X86-NEXT: movl %edx, %ecx
-; X86-NEXT: sbbl %edi, %ecx
-; X86-NEXT: movl $0, %ecx
-; X86-NEXT: sbbl %ecx, %ecx
-; X86-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
-; X86-NEXT: cmpl %ebx, %ebp
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ebp
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx
-; X86-NEXT: sbbl {{[0-9]+}}(%esp), %esi
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: sbbl %edx, %edi
-; X86-NEXT: movl $0, %esi
-; X86-NEXT: sbbl %esi, %esi
-; X86-NEXT: movb $-1, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
-; X86-NEXT: jb .LBB18_24
-; X86-NEXT: # %bb.23:
-; X86-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 1-byte Folded Reload
-; X86-NEXT: movb %dl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill
-; X86-NEXT: .LBB18_24:
-; X86-NEXT: andl $127, %ebp
-; X86-NEXT: andl $127, %ebx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
-; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
-; X86-NEXT: cmpl %esi, %edi
+; X86-NEXT: cmpl %eax, %ecx
; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
-; X86-NEXT: sbbl %ecx, %esi
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
-; X86-NEXT: movl %edx, %esi
-; X86-NEXT: sbbl %eax, %esi
-; X86-NEXT: movl $0, %esi
-; X86-NEXT: sbbl %esi, %esi
-; X86-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
-; X86-NEXT: cmpl %edi, {{[0-9]+}}(%esp)
; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
-; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
-; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: sbbl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: movl %edi, %edx
+; X86-NEXT: sbbl %esi, %edx
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X86-NEXT: movl %ebp, %edx
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT: sbbl %ebx, %edx
+; X86-NEXT: movl $0, %edx
+; X86-NEXT: sbbl %edx, %edx
+; X86-NEXT: setb %dl
+; X86-NEXT: cmpl %ecx, %eax
+; X86-NEXT: sbbl %edi, %esi
+; X86-NEXT: sbbl %ebp, %ebx
+; X86-NEXT: movl $0, %eax
+; X86-NEXT: sbbl %eax, %eax
+; X86-NEXT: sbbb $0, %dl
+; X86-NEXT: movb %dl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: cmpl %eax, %ecx
; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
-; X86-NEXT: sbbl %edx, %eax
+; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
+; X86-NEXT: movl %edi, %ebx
+; X86-NEXT: sbbl %esi, %ebx
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X86-NEXT: movl %ebp, %ebx
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT: sbbl %edx, %ebx
+; X86-NEXT: movl $0, %ebx
+; X86-NEXT: sbbl %ebx, %ebx
+; X86-NEXT: setb %bl
+; X86-NEXT: cmpl %ecx, %eax
+; X86-NEXT: sbbl %edi, %esi
+; X86-NEXT: sbbl %ebp, %edx
; X86-NEXT: movl $0, %eax
; X86-NEXT: sbbl %eax, %eax
-; X86-NEXT: movb $-1, %al
-; X86-NEXT: jb .LBB18_26
-; X86-NEXT: # %bb.25:
-; X86-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload
-; X86-NEXT: .LBB18_26:
-; X86-NEXT: movb %al, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill
-; X86-NEXT: andl $127, %edi
-; X86-NEXT: andl $127, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
-; X86-NEXT: cmpl %ecx, {{[0-9]+}}(%esp)
+; X86-NEXT: sbbb $0, %bl
+; X86-NEXT: movb %bl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: sbbl %esi, %eax
-; X86-NEXT: movl %ebx, %eax
-; X86-NEXT: sbbl %ebp, %eax
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: cmpl %eax, %ecx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
+; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
+; X86-NEXT: movl %edi, %ebx
+; X86-NEXT: sbbl %esi, %ebx
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X86-NEXT: movl %ebp, %ebx
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT: sbbl %edx, %ebx
+; X86-NEXT: movl $0, %ebx
+; X86-NEXT: sbbl %ebx, %ebx
+; X86-NEXT: setb %bl
+; X86-NEXT: cmpl %ecx, %eax
+; X86-NEXT: sbbl %edi, %esi
+; X86-NEXT: sbbl %ebp, %edx
; X86-NEXT: movl $0, %eax
; X86-NEXT: sbbl %eax, %eax
-; X86-NEXT: setb %al
-; X86-NEXT: cmpl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: sbbl {{[0-9]+}}(%esp), %esi
+; X86-NEXT: sbbb $0, %bl
+; X86-NEXT: movb %bl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: cmpl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
-; X86-NEXT: sbbl %ebx, %ebp
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
+; X86-NEXT: movl %edi, %ebp
+; X86-NEXT: sbbl %esi, %ebp
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT: movl %ebx, %ebp
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT: sbbl %edx, %ebp
; X86-NEXT: movl $0, %ebp
; X86-NEXT: sbbl %ebp, %ebp
-; X86-NEXT: movb $-1, %ah
-; X86-NEXT: jb .LBB18_28
-; X86-NEXT: # %bb.27:
-; X86-NEXT: movb %al, %ah
-; X86-NEXT: .LBB18_28:
-; X86-NEXT: movb %ah, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill
-; X86-NEXT: andl $127, %edx
-; X86-NEXT: andl $127, %ecx
-; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ebp
-; X86-NEXT: cmpl %ebp, %ebx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: sbbl %esi, %eax
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
-; X86-NEXT: movl %ecx, %eax
-; X86-NEXT: sbbl %edi, %eax
+; X86-NEXT: setb %cl
+; X86-NEXT: cmpl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT: sbbl %edi, %esi
+; X86-NEXT: sbbl %ebx, %edx
; X86-NEXT: movl $0, %eax
; X86-NEXT: sbbl %eax, %eax
-; X86-NEXT: setb %al
-; X86-NEXT: cmpl %ebx, %ebp
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ebp
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx
-; X86-NEXT: sbbl {{[0-9]+}}(%esp), %esi
-; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
-; X86-NEXT: sbbl %ecx, %edi
-; X86-NEXT: movl $0, %edi
-; X86-NEXT: sbbl %edi, %edi
-; X86-NEXT: movb $-1, %cl
-; X86-NEXT: jb .LBB18_30
-; X86-NEXT: # %bb.29:
-; X86-NEXT: movl %eax, %ecx
-; X86-NEXT: .LBB18_30:
+; X86-NEXT: sbbb $0, %cl
; X86-NEXT: movb %cl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill
-; X86-NEXT: andl $127, %ebp
-; X86-NEXT: andl $127, %ebx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
+; X86-NEXT: cmpl {{[0-9]+}}(%esp), %esi
; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
-; X86-NEXT: cmpl %ecx, %edi
-; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: sbbl %esi, %eax
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ebp
+; X86-NEXT: movl %ebp, %eax
+; X86-NEXT: sbbl %edi, %eax
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
; X86-NEXT: movl %ecx, %eax
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
; X86-NEXT: sbbl %edx, %eax
; X86-NEXT: movl $0, %eax
; X86-NEXT: sbbl %eax, %eax
-; X86-NEXT: setb %al
-; X86-NEXT: cmpl %edi, {{[0-9]+}}(%esp)
-; X86-NEXT: sbbl {{[0-9]+}}(%esp), %esi
-; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
+; X86-NEXT: setb %bl
+; X86-NEXT: cmpl %esi, {{[0-9]+}}(%esp)
+; X86-NEXT: sbbl %ebp, %edi
; X86-NEXT: sbbl %ecx, %edx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
; X86-NEXT: movl $0, %ecx
; X86-NEXT: sbbl %ecx, %ecx
+; X86-NEXT: sbbb $0, %bl
+; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
+; X86-NEXT: cmpl {{[0-9]+}}(%esp), %esi
; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
-; X86-NEXT: movb $-1, %cl
-; X86-NEXT: jb .LBB18_32
-; X86-NEXT: # %bb.31:
-; X86-NEXT: movl %eax, %ecx
-; X86-NEXT: .LBB18_32:
-; X86-NEXT: movb %cl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill
-; X86-NEXT: movl %edi, %ecx
-; X86-NEXT: cmpl %esi, %edi
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: movl %ecx, %ebp
+; X86-NEXT: sbbl %edi, %ebp
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT: movl %eax, %ebp
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT: sbbl %edx, %ebp
+; X86-NEXT: movl $0, %ebp
+; X86-NEXT: sbbl %ebp, %ebp
+; X86-NEXT: setb %bh
+; X86-NEXT: cmpl %esi, {{[0-9]+}}(%esp)
+; X86-NEXT: sbbl %ecx, %edi
+; X86-NEXT: sbbl %eax, %edx
+; X86-NEXT: movl $0, %ecx
+; X86-NEXT: sbbl %ecx, %ecx
+; X86-NEXT: sbbb $0, %bh
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: cmpl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
-; X86-NEXT: movl %edi, %eax
+; X86-NEXT: movl %esi, %ebp
+; X86-NEXT: sbbl %edi, %ebp
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT: movl %edx, %ebp
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT: sbbl %eax, %ebp
+; X86-NEXT: movl $0, %ebp
+; X86-NEXT: sbbl %ebp, %ebp
+; X86-NEXT: setb %cl
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ebp
+; X86-NEXT: cmpl {{[0-9]+}}(%esp), %ebp
+; X86-NEXT: sbbl %esi, %edi
; X86-NEXT: sbbl %edx, %eax
-; X86-NEXT: movl %ebx, %eax
-; X86-NEXT: sbbl %ebp, %eax
-; X86-NEXT: movl $0, %eax
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
; X86-NEXT: sbbl %eax, %eax
-; X86-NEXT: setb %al
-; X86-NEXT: cmpl %ecx, %esi
-; X86-NEXT: sbbl %edi, %edx
-; X86-NEXT: sbbl %ebx, %ebp
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
-; X86-NEXT: sbbl %edx, %edx
-; X86-NEXT: movb $-1, %dl
-; X86-NEXT: jb .LBB18_34
-; X86-NEXT: # %bb.33:
-; X86-NEXT: movl %eax, %edx
-; X86-NEXT: .LBB18_34:
-; X86-NEXT: movzbl %dl, %eax
-; X86-NEXT: andl $3, %eax
-; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
-; X86-NEXT: movb %al, 4(%edx)
-; X86-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload
-; X86-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 1-byte Folded Reload
-; X86-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 1-byte Folded Reload
+; X86-NEXT: sbbb $0, %cl
+; X86-NEXT: movzbl %cl, %ecx
+; X86-NEXT: andl $3, %ecx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
+; X86-NEXT: movb %cl, 4(%edi)
+; X86-NEXT: movzbl %bh, %ebp
+; X86-NEXT: movzbl %bl, %ecx
+; X86-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload
; X86-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 1-byte Folded Reload
-; X86-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 1-byte Folded Reload
+; X86-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 1-byte Folded Reload
; X86-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 1-byte Folded Reload
+; X86-NEXT: andl $3, %ebp
; X86-NEXT: andl $3, %ecx
-; X86-NEXT: andl $3, %edi
-; X86-NEXT: leal (%edi,%ecx,4), %eax
-; X86-NEXT: andl $3, %esi
-; X86-NEXT: shll $4, %esi
-; X86-NEXT: orl %eax, %esi
+; X86-NEXT: leal (%ecx,%ebp,4), %ecx
+; X86-NEXT: andl $3, %eax
+; X86-NEXT: shll $4, %eax
+; X86-NEXT: orl %ecx, %eax
; X86-NEXT: andl $3, %ebx
; X86-NEXT: shll $6, %ebx
-; X86-NEXT: orl %esi, %ebx
-; X86-NEXT: andl $3, %ebp
-; X86-NEXT: shll $8, %ebp
-; X86-NEXT: orl %ebx, %ebp
+; X86-NEXT: orl %eax, %ebx
+; X86-NEXT: andl $3, %esi
+; X86-NEXT: shll $8, %esi
+; X86-NEXT: orl %ebx, %esi
; X86-NEXT: andl $3, %edx
; X86-NEXT: shll $10, %edx
-; X86-NEXT: orl %ebp, %edx
+; X86-NEXT: orl %esi, %edx
; X86-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload
; X86-NEXT: andl $3, %eax
; X86-NEXT: shll $12, %eax
@@ -2457,37 +2113,37 @@ define <17 x i2> @ucmp_uncommon_vectors(<17 x i71> %x, <17 x i71> %y) nounwind {
; X86-NEXT: andl $3, %eax
; X86-NEXT: shll $16, %eax
; X86-NEXT: orl %ecx, %eax
-; X86-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 1-byte Folded Reload
-; X86-NEXT: andl $3, %edi
-; X86-NEXT: shll $18, %edi
-; X86-NEXT: orl %eax, %edi
+; X86-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 1-byte Folded Reload
+; X86-NEXT: andl $3, %esi
+; X86-NEXT: shll $18, %esi
+; X86-NEXT: orl %eax, %esi
+; X86-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload
; X86-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload
+; X86-NEXT: andl $3, %eax
+; X86-NEXT: shll $20, %eax
+; X86-NEXT: orl %esi, %eax
; X86-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 1-byte Folded Reload
-; X86-NEXT: andl $3, %ecx
-; X86-NEXT: shll $20, %ecx
-; X86-NEXT: orl %edi, %ecx
-; X86-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 1-byte Folded Reload
; X86-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 1-byte Folded Reload
-; X86-NEXT: orl %edx, %ecx
+; X86-NEXT: orl %edx, %eax
+; X86-NEXT: andl $3, %ecx
+; X86-NEXT: shll $22, %ecx
; X86-NEXT: andl $3, %esi
-; X86-NEXT: shll $22, %esi
-; X86-NEXT: andl $3, %edi
-; X86-NEXT: shll $24, %edi
-; X86-NEXT: orl %esi, %edi
+; X86-NEXT: shll $24, %esi
+; X86-NEXT: orl %ecx, %esi
; X86-NEXT: andl $3, %ebx
; X86-NEXT: shll $26, %ebx
-; X86-NEXT: orl %edi, %ebx
-; X86-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload
-; X86-NEXT: andl $3, %eax
-; X86-NEXT: shll $28, %eax
-; X86-NEXT: orl %ebx, %eax
+; X86-NEXT: orl %esi, %ebx
+; X86-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload
+; X86-NEXT: andl $3, %ecx
+; X86-NEXT: shll $28, %ecx
+; X86-NEXT: orl %ebx, %ecx
; X86-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 1-byte Folded Reload
; X86-NEXT: shll $30, %edx
-; X86-NEXT: orl %eax, %edx
; X86-NEXT: orl %ecx, %edx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: movl %edx, (%eax)
-; X86-NEXT: addl $44, %esp
+; X86-NEXT: orl %eax, %edx
+; X86-NEXT: movl %edx, (%edi)
+; X86-NEXT: movl %edi, %eax
+; X86-NEXT: addl $132, %esp
; X86-NEXT: popl %esi
; X86-NEXT: popl %edi
; X86-NEXT: popl %ebx
More information about the llvm-commits
mailing list