[llvm] a3b0c31 - Revert "[DAG] Enhance SDPatternMatch to match integer minimum and maximum patterns in addition to the existing ISD nodes." (#112200)
via llvm-commits
llvm-commits at lists.llvm.org
Mon Oct 14 06:43:52 PDT 2024
Author: c8ef
Date: 2024-10-14T21:43:49+08:00
New Revision: a3b0c31ebc2f6fe672f08f6b7d15f25a2b26edda
URL: https://github.com/llvm/llvm-project/commit/a3b0c31ebc2f6fe672f08f6b7d15f25a2b26edda
DIFF: https://github.com/llvm/llvm-project/commit/a3b0c31ebc2f6fe672f08f6b7d15f25a2b26edda.diff
LOG: Revert "[DAG] Enhance SDPatternMatch to match integer minimum and maximum patterns in addition to the existing ISD nodes." (#112200)
Reverts llvm/llvm-project#111774
This appears to be causing some tests to fail.
Added:
Modified:
llvm/include/llvm/CodeGen/SDPatternMatch.h
llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
llvm/test/CodeGen/AArch64/abds.ll
llvm/test/CodeGen/AArch64/abdu.ll
llvm/test/CodeGen/AArch64/midpoint-int.ll
llvm/test/CodeGen/RISCV/abds.ll
llvm/test/CodeGen/RISCV/abdu.ll
llvm/test/CodeGen/X86/abds.ll
llvm/test/CodeGen/X86/abdu.ll
llvm/test/CodeGen/X86/midpoint-int.ll
llvm/unittests/CodeGen/SelectionDAGPatternMatchTest.cpp
Removed:
################################################################################
diff --git a/llvm/include/llvm/CodeGen/SDPatternMatch.h b/llvm/include/llvm/CodeGen/SDPatternMatch.h
index 0af4f73b869c3c..04135ee7e1c022 100644
--- a/llvm/include/llvm/CodeGen/SDPatternMatch.h
+++ b/llvm/include/llvm/CodeGen/SDPatternMatch.h
@@ -542,81 +542,6 @@ struct BinaryOpc_match {
}
};
-template <typename LHS_P, typename RHS_P, typename Pred_t,
- bool Commutable = false, bool ExcludeChain = false>
-struct MaxMin_match {
- using PredType = Pred_t;
- LHS_P LHS;
- RHS_P RHS;
-
- MaxMin_match(const LHS_P &L, const RHS_P &R) : LHS(L), RHS(R) {}
-
- template <typename MatchContext>
- bool match(const MatchContext &Ctx, SDValue N) {
- if (sd_context_match(N, Ctx, m_Opc(ISD::SELECT)) ||
- sd_context_match(N, Ctx, m_Opc(ISD::VSELECT))) {
- EffectiveOperands<ExcludeChain> EO_SELECT(N, Ctx);
- assert(EO_SELECT.Size == 3);
- SDValue Cond = N->getOperand(EO_SELECT.FirstIndex);
- SDValue TrueValue = N->getOperand(EO_SELECT.FirstIndex + 1);
- SDValue FalseValue = N->getOperand(EO_SELECT.FirstIndex + 2);
-
- if (sd_context_match(Cond, Ctx, m_Opc(ISD::SETCC))) {
- EffectiveOperands<ExcludeChain> EO_SETCC(Cond, Ctx);
- assert(EO_SETCC.Size == 3);
- SDValue L = Cond->getOperand(EO_SETCC.FirstIndex);
- SDValue R = Cond->getOperand(EO_SETCC.FirstIndex + 1);
- auto *CondNode =
- cast<CondCodeSDNode>(Cond->getOperand(EO_SETCC.FirstIndex + 2));
-
- if ((TrueValue != L || FalseValue != R) &&
- (TrueValue != R || FalseValue != L)) {
- return false;
- }
-
- ISD::CondCode Cond =
- TrueValue == L ? CondNode->get()
- : getSetCCInverse(CondNode->get(), L.getValueType());
- if (!Pred_t::match(Cond)) {
- return false;
- }
- return (LHS.match(Ctx, L) && RHS.match(Ctx, R)) ||
- (Commutable && LHS.match(Ctx, R) && RHS.match(Ctx, L));
- }
- }
-
- return false;
- }
-};
-
-// Helper class for identifying signed max predicates.
-struct smax_pred_ty {
- static bool match(ISD::CondCode Cond) {
- return Cond == ISD::CondCode::SETGT || Cond == ISD::CondCode::SETGE;
- }
-};
-
-// Helper class for identifying unsigned max predicates.
-struct umax_pred_ty {
- static bool match(ISD::CondCode Cond) {
- return Cond == ISD::CondCode::SETUGT || Cond == ISD::CondCode::SETUGE;
- }
-};
-
-// Helper class for identifying signed min predicates.
-struct smin_pred_ty {
- static bool match(ISD::CondCode Cond) {
- return Cond == ISD::CondCode::SETLT || Cond == ISD::CondCode::SETLE;
- }
-};
-
-// Helper class for identifying unsigned min predicates.
-struct umin_pred_ty {
- static bool match(ISD::CondCode Cond) {
- return Cond == ISD::CondCode::SETULT || Cond == ISD::CondCode::SETULE;
- }
-};
-
template <typename LHS, typename RHS>
inline BinaryOpc_match<LHS, RHS> m_BinOp(unsigned Opc, const LHS &L,
const RHS &R) {
@@ -688,45 +613,21 @@ inline BinaryOpc_match<LHS, RHS, true> m_SMin(const LHS &L, const RHS &R) {
return BinaryOpc_match<LHS, RHS, true>(ISD::SMIN, L, R);
}
-template <typename LHS, typename RHS>
-inline auto m_SMinLike(const LHS &L, const RHS &R) {
- return m_AnyOf(BinaryOpc_match<LHS, RHS, true>(ISD::SMIN, L, R),
- MaxMin_match<LHS, RHS, smin_pred_ty, true>(L, R));
-}
-
template <typename LHS, typename RHS>
inline BinaryOpc_match<LHS, RHS, true> m_SMax(const LHS &L, const RHS &R) {
return BinaryOpc_match<LHS, RHS, true>(ISD::SMAX, L, R);
}
-template <typename LHS, typename RHS>
-inline auto m_SMaxLike(const LHS &L, const RHS &R) {
- return m_AnyOf(BinaryOpc_match<LHS, RHS, true>(ISD::SMAX, L, R),
- MaxMin_match<LHS, RHS, smax_pred_ty, true>(L, R));
-}
-
template <typename LHS, typename RHS>
inline BinaryOpc_match<LHS, RHS, true> m_UMin(const LHS &L, const RHS &R) {
return BinaryOpc_match<LHS, RHS, true>(ISD::UMIN, L, R);
}
-template <typename LHS, typename RHS>
-inline auto m_UMinLike(const LHS &L, const RHS &R) {
- return m_AnyOf(BinaryOpc_match<LHS, RHS, true>(ISD::UMIN, L, R),
- MaxMin_match<LHS, RHS, umin_pred_ty, true>(L, R));
-}
-
template <typename LHS, typename RHS>
inline BinaryOpc_match<LHS, RHS, true> m_UMax(const LHS &L, const RHS &R) {
return BinaryOpc_match<LHS, RHS, true>(ISD::UMAX, L, R);
}
-template <typename LHS, typename RHS>
-inline auto m_UMaxLike(const LHS &L, const RHS &R) {
- return m_AnyOf(BinaryOpc_match<LHS, RHS, true>(ISD::UMAX, L, R),
- MaxMin_match<LHS, RHS, umax_pred_ty, true>(L, R));
-}
-
template <typename LHS, typename RHS>
inline BinaryOpc_match<LHS, RHS> m_UDiv(const LHS &L, const RHS &R) {
return BinaryOpc_match<LHS, RHS>(ISD::UDIV, L, R);
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 924558a298fd85..810ca458bc8787 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -4190,26 +4190,26 @@ SDValue DAGCombiner::visitSUB(SDNode *N) {
// smax(a,b) - smin(a,b) --> abds(a,b)
if ((!LegalOperations || hasOperation(ISD::ABDS, VT)) &&
- sd_match(N0, m_SMaxLike(m_Value(A), m_Value(B))) &&
- sd_match(N1, m_SMinLike(m_Specific(A), m_Specific(B))))
+ sd_match(N0, m_SMax(m_Value(A), m_Value(B))) &&
+ sd_match(N1, m_SMin(m_Specific(A), m_Specific(B))))
return DAG.getNode(ISD::ABDS, DL, VT, A, B);
// smin(a,b) - smax(a,b) --> neg(abds(a,b))
if (hasOperation(ISD::ABDS, VT) &&
- sd_match(N0, m_SMinLike(m_Value(A), m_Value(B))) &&
- sd_match(N1, m_SMaxLike(m_Specific(A), m_Specific(B))))
+ sd_match(N0, m_SMin(m_Value(A), m_Value(B))) &&
+ sd_match(N1, m_SMax(m_Specific(A), m_Specific(B))))
return DAG.getNegative(DAG.getNode(ISD::ABDS, DL, VT, A, B), DL, VT);
// umax(a,b) - umin(a,b) --> abdu(a,b)
if ((!LegalOperations || hasOperation(ISD::ABDU, VT)) &&
- sd_match(N0, m_UMaxLike(m_Value(A), m_Value(B))) &&
- sd_match(N1, m_UMinLike(m_Specific(A), m_Specific(B))))
+ sd_match(N0, m_UMax(m_Value(A), m_Value(B))) &&
+ sd_match(N1, m_UMin(m_Specific(A), m_Specific(B))))
return DAG.getNode(ISD::ABDU, DL, VT, A, B);
// umin(a,b) - umax(a,b) --> neg(abdu(a,b))
if (hasOperation(ISD::ABDU, VT) &&
- sd_match(N0, m_UMinLike(m_Value(A), m_Value(B))) &&
- sd_match(N1, m_UMaxLike(m_Specific(A), m_Specific(B))))
+ sd_match(N0, m_UMin(m_Value(A), m_Value(B))) &&
+ sd_match(N1, m_UMax(m_Specific(A), m_Specific(B))))
return DAG.getNegative(DAG.getNode(ISD::ABDU, DL, VT, A, B), DL, VT);
return SDValue();
diff --git a/llvm/test/CodeGen/AArch64/abds.ll b/llvm/test/CodeGen/AArch64/abds.ll
index 62db30f17747cf..e5cc04f9be1a1f 100644
--- a/llvm/test/CodeGen/AArch64/abds.ll
+++ b/llvm/test/CodeGen/AArch64/abds.ll
@@ -547,9 +547,10 @@ define i8 @abd_select_i8(i8 %a, i8 %b) nounwind {
; CHECK-LABEL: abd_select_i8:
; CHECK: // %bb.0:
; CHECK-NEXT: sxtb w8, w0
-; CHECK-NEXT: sub w8, w8, w1, sxtb
-; CHECK-NEXT: cmp w8, #0
-; CHECK-NEXT: cneg w0, w8, mi
+; CHECK-NEXT: cmp w8, w1, sxtb
+; CHECK-NEXT: csel w8, w0, w1, lt
+; CHECK-NEXT: csel w9, w1, w0, lt
+; CHECK-NEXT: sub w0, w9, w8
; CHECK-NEXT: ret
%cmp = icmp slt i8 %a, %b
%ab = select i1 %cmp, i8 %a, i8 %b
@@ -562,9 +563,10 @@ define i16 @abd_select_i16(i16 %a, i16 %b) nounwind {
; CHECK-LABEL: abd_select_i16:
; CHECK: // %bb.0:
; CHECK-NEXT: sxth w8, w0
-; CHECK-NEXT: sub w8, w8, w1, sxth
-; CHECK-NEXT: cmp w8, #0
-; CHECK-NEXT: cneg w0, w8, mi
+; CHECK-NEXT: cmp w8, w1, sxth
+; CHECK-NEXT: csel w8, w0, w1, le
+; CHECK-NEXT: csel w9, w1, w0, le
+; CHECK-NEXT: sub w0, w9, w8
; CHECK-NEXT: ret
%cmp = icmp sle i16 %a, %b
%ab = select i1 %cmp, i16 %a, i16 %b
@@ -576,9 +578,10 @@ define i16 @abd_select_i16(i16 %a, i16 %b) nounwind {
define i32 @abd_select_i32(i32 %a, i32 %b) nounwind {
; CHECK-LABEL: abd_select_i32:
; CHECK: // %bb.0:
-; CHECK-NEXT: sub w8, w1, w0
-; CHECK-NEXT: subs w9, w0, w1
-; CHECK-NEXT: csel w0, w9, w8, gt
+; CHECK-NEXT: cmp w0, w1
+; CHECK-NEXT: csel w8, w0, w1, gt
+; CHECK-NEXT: csel w9, w1, w0, gt
+; CHECK-NEXT: sub w0, w8, w9
; CHECK-NEXT: ret
%cmp = icmp sgt i32 %a, %b
%ab = select i1 %cmp, i32 %a, i32 %b
@@ -590,9 +593,10 @@ define i32 @abd_select_i32(i32 %a, i32 %b) nounwind {
define i64 @abd_select_i64(i64 %a, i64 %b) nounwind {
; CHECK-LABEL: abd_select_i64:
; CHECK: // %bb.0:
-; CHECK-NEXT: sub x8, x1, x0
-; CHECK-NEXT: subs x9, x0, x1
-; CHECK-NEXT: csel x0, x9, x8, gt
+; CHECK-NEXT: cmp x0, x1
+; CHECK-NEXT: csel x8, x0, x1, ge
+; CHECK-NEXT: csel x9, x1, x0, ge
+; CHECK-NEXT: sub x0, x8, x9
; CHECK-NEXT: ret
%cmp = icmp sge i64 %a, %b
%ab = select i1 %cmp, i64 %a, i64 %b
@@ -604,13 +608,14 @@ define i64 @abd_select_i64(i64 %a, i64 %b) nounwind {
define i128 @abd_select_i128(i128 %a, i128 %b) nounwind {
; CHECK-LABEL: abd_select_i128:
; CHECK: // %bb.0:
-; CHECK-NEXT: subs x8, x0, x2
-; CHECK-NEXT: sbc x9, x1, x3
-; CHECK-NEXT: subs x10, x2, x0
-; CHECK-NEXT: sbc x11, x3, x1
-; CHECK-NEXT: sbcs xzr, x3, x1
-; CHECK-NEXT: csel x0, x8, x10, lt
-; CHECK-NEXT: csel x1, x9, x11, lt
+; CHECK-NEXT: cmp x0, x2
+; CHECK-NEXT: sbcs xzr, x1, x3
+; CHECK-NEXT: csel x8, x0, x2, lt
+; CHECK-NEXT: csel x9, x2, x0, lt
+; CHECK-NEXT: csel x10, x1, x3, lt
+; CHECK-NEXT: csel x11, x3, x1, lt
+; CHECK-NEXT: subs x0, x9, x8
+; CHECK-NEXT: sbc x1, x11, x10
; CHECK-NEXT: ret
%cmp = icmp slt i128 %a, %b
%ab = select i1 %cmp, i128 %a, i128 %b
diff --git a/llvm/test/CodeGen/AArch64/abdu.ll b/llvm/test/CodeGen/AArch64/abdu.ll
index 4585de96c848f2..0a44ae16884582 100644
--- a/llvm/test/CodeGen/AArch64/abdu.ll
+++ b/llvm/test/CodeGen/AArch64/abdu.ll
@@ -408,9 +408,10 @@ define i8 @abd_select_i8(i8 %a, i8 %b) nounwind {
; CHECK-LABEL: abd_select_i8:
; CHECK: // %bb.0:
; CHECK-NEXT: and w8, w0, #0xff
-; CHECK-NEXT: sub w8, w8, w1, uxtb
-; CHECK-NEXT: cmp w8, #0
-; CHECK-NEXT: cneg w0, w8, mi
+; CHECK-NEXT: cmp w8, w1, uxtb
+; CHECK-NEXT: csel w8, w0, w1, lo
+; CHECK-NEXT: csel w9, w1, w0, lo
+; CHECK-NEXT: sub w0, w9, w8
; CHECK-NEXT: ret
%cmp = icmp ult i8 %a, %b
%ab = select i1 %cmp, i8 %a, i8 %b
@@ -423,9 +424,10 @@ define i16 @abd_select_i16(i16 %a, i16 %b) nounwind {
; CHECK-LABEL: abd_select_i16:
; CHECK: // %bb.0:
; CHECK-NEXT: and w8, w0, #0xffff
-; CHECK-NEXT: sub w8, w8, w1, uxth
-; CHECK-NEXT: cmp w8, #0
-; CHECK-NEXT: cneg w0, w8, mi
+; CHECK-NEXT: cmp w8, w1, uxth
+; CHECK-NEXT: csel w8, w0, w1, ls
+; CHECK-NEXT: csel w9, w1, w0, ls
+; CHECK-NEXT: sub w0, w9, w8
; CHECK-NEXT: ret
%cmp = icmp ule i16 %a, %b
%ab = select i1 %cmp, i16 %a, i16 %b
@@ -437,9 +439,10 @@ define i16 @abd_select_i16(i16 %a, i16 %b) nounwind {
define i32 @abd_select_i32(i32 %a, i32 %b) nounwind {
; CHECK-LABEL: abd_select_i32:
; CHECK: // %bb.0:
-; CHECK-NEXT: sub w8, w1, w0
-; CHECK-NEXT: subs w9, w0, w1
-; CHECK-NEXT: csel w0, w9, w8, hi
+; CHECK-NEXT: cmp w0, w1
+; CHECK-NEXT: csel w8, w0, w1, hi
+; CHECK-NEXT: csel w9, w1, w0, hi
+; CHECK-NEXT: sub w0, w8, w9
; CHECK-NEXT: ret
%cmp = icmp ugt i32 %a, %b
%ab = select i1 %cmp, i32 %a, i32 %b
@@ -451,9 +454,10 @@ define i32 @abd_select_i32(i32 %a, i32 %b) nounwind {
define i64 @abd_select_i64(i64 %a, i64 %b) nounwind {
; CHECK-LABEL: abd_select_i64:
; CHECK: // %bb.0:
-; CHECK-NEXT: sub x8, x1, x0
-; CHECK-NEXT: subs x9, x0, x1
-; CHECK-NEXT: csel x0, x9, x8, hi
+; CHECK-NEXT: cmp x0, x1
+; CHECK-NEXT: csel x8, x0, x1, hs
+; CHECK-NEXT: csel x9, x1, x0, hs
+; CHECK-NEXT: sub x0, x8, x9
; CHECK-NEXT: ret
%cmp = icmp uge i64 %a, %b
%ab = select i1 %cmp, i64 %a, i64 %b
@@ -465,14 +469,14 @@ define i64 @abd_select_i64(i64 %a, i64 %b) nounwind {
define i128 @abd_select_i128(i128 %a, i128 %b) nounwind {
; CHECK-LABEL: abd_select_i128:
; CHECK: // %bb.0:
-; CHECK-NEXT: subs x8, x0, x2
-; CHECK-NEXT: sbcs x9, x1, x3
-; CHECK-NEXT: cset w10, lo
-; CHECK-NEXT: sbfx x10, x10, #0, #1
-; CHECK-NEXT: eor x8, x8, x10
-; CHECK-NEXT: eor x9, x9, x10
-; CHECK-NEXT: subs x0, x8, x10
-; CHECK-NEXT: sbc x1, x9, x10
+; CHECK-NEXT: cmp x0, x2
+; CHECK-NEXT: sbcs xzr, x1, x3
+; CHECK-NEXT: csel x8, x0, x2, lo
+; CHECK-NEXT: csel x9, x2, x0, lo
+; CHECK-NEXT: csel x10, x1, x3, lo
+; CHECK-NEXT: csel x11, x3, x1, lo
+; CHECK-NEXT: subs x0, x9, x8
+; CHECK-NEXT: sbc x1, x11, x10
; CHECK-NEXT: ret
%cmp = icmp ult i128 %a, %b
%ab = select i1 %cmp, i128 %a, i128 %b
diff --git a/llvm/test/CodeGen/AArch64/midpoint-int.ll b/llvm/test/CodeGen/AArch64/midpoint-int.ll
index bbdce7c6e933b3..1043fa5c4565ee 100644
--- a/llvm/test/CodeGen/AArch64/midpoint-int.ll
+++ b/llvm/test/CodeGen/AArch64/midpoint-int.ll
@@ -13,11 +13,12 @@
define i32 @scalar_i32_signed_reg_reg(i32 %a1, i32 %a2) nounwind {
; CHECK-LABEL: scalar_i32_signed_reg_reg:
; CHECK: // %bb.0:
-; CHECK-NEXT: sub w9, w1, w0
-; CHECK-NEXT: subs w10, w0, w1
+; CHECK-NEXT: cmp w0, w1
; CHECK-NEXT: mov w8, #-1 // =0xffffffff
-; CHECK-NEXT: csel w9, w10, w9, gt
+; CHECK-NEXT: csel w9, w1, w0, gt
+; CHECK-NEXT: csel w10, w0, w1, gt
; CHECK-NEXT: cneg w8, w8, le
+; CHECK-NEXT: sub w9, w10, w9
; CHECK-NEXT: lsr w9, w9, #1
; CHECK-NEXT: madd w0, w9, w8, w0
; CHECK-NEXT: ret
@@ -35,11 +36,12 @@ define i32 @scalar_i32_signed_reg_reg(i32 %a1, i32 %a2) nounwind {
define i32 @scalar_i32_unsigned_reg_reg(i32 %a1, i32 %a2) nounwind {
; CHECK-LABEL: scalar_i32_unsigned_reg_reg:
; CHECK: // %bb.0:
-; CHECK-NEXT: sub w9, w1, w0
-; CHECK-NEXT: subs w10, w0, w1
+; CHECK-NEXT: cmp w0, w1
; CHECK-NEXT: mov w8, #-1 // =0xffffffff
-; CHECK-NEXT: csel w9, w10, w9, hi
+; CHECK-NEXT: csel w9, w1, w0, hi
+; CHECK-NEXT: csel w10, w0, w1, hi
; CHECK-NEXT: cneg w8, w8, ls
+; CHECK-NEXT: sub w9, w10, w9
; CHECK-NEXT: lsr w9, w9, #1
; CHECK-NEXT: madd w0, w9, w8, w0
; CHECK-NEXT: ret
@@ -62,10 +64,10 @@ define i32 @scalar_i32_signed_mem_reg(ptr %a1_addr, i32 %a2) nounwind {
; CHECK-NEXT: ldr w9, [x0]
; CHECK-NEXT: mov w8, #-1 // =0xffffffff
; CHECK-NEXT: cmp w9, w1
-; CHECK-NEXT: sub w10, w1, w9
+; CHECK-NEXT: csel w10, w1, w9, gt
+; CHECK-NEXT: csel w11, w9, w1, gt
; CHECK-NEXT: cneg w8, w8, le
-; CHECK-NEXT: subs w11, w9, w1
-; CHECK-NEXT: csel w10, w11, w10, gt
+; CHECK-NEXT: sub w10, w11, w10
; CHECK-NEXT: lsr w10, w10, #1
; CHECK-NEXT: madd w0, w10, w8, w9
; CHECK-NEXT: ret
@@ -87,10 +89,10 @@ define i32 @scalar_i32_signed_reg_mem(i32 %a1, ptr %a2_addr) nounwind {
; CHECK-NEXT: ldr w9, [x1]
; CHECK-NEXT: mov w8, #-1 // =0xffffffff
; CHECK-NEXT: cmp w0, w9
-; CHECK-NEXT: sub w10, w9, w0
+; CHECK-NEXT: csel w10, w9, w0, gt
+; CHECK-NEXT: csel w9, w0, w9, gt
; CHECK-NEXT: cneg w8, w8, le
-; CHECK-NEXT: subs w9, w0, w9
-; CHECK-NEXT: csel w9, w9, w10, gt
+; CHECK-NEXT: sub w9, w9, w10
; CHECK-NEXT: lsr w9, w9, #1
; CHECK-NEXT: madd w0, w9, w8, w0
; CHECK-NEXT: ret
@@ -113,10 +115,10 @@ define i32 @scalar_i32_signed_mem_mem(ptr %a1_addr, ptr %a2_addr) nounwind {
; CHECK-NEXT: ldr w10, [x1]
; CHECK-NEXT: mov w8, #-1 // =0xffffffff
; CHECK-NEXT: cmp w9, w10
-; CHECK-NEXT: sub w11, w10, w9
+; CHECK-NEXT: csel w11, w10, w9, gt
+; CHECK-NEXT: csel w10, w9, w10, gt
; CHECK-NEXT: cneg w8, w8, le
-; CHECK-NEXT: subs w10, w9, w10
-; CHECK-NEXT: csel w10, w10, w11, gt
+; CHECK-NEXT: sub w10, w10, w11
; CHECK-NEXT: lsr w10, w10, #1
; CHECK-NEXT: madd w0, w10, w8, w9
; CHECK-NEXT: ret
@@ -142,11 +144,12 @@ define i32 @scalar_i32_signed_mem_mem(ptr %a1_addr, ptr %a2_addr) nounwind {
define i64 @scalar_i64_signed_reg_reg(i64 %a1, i64 %a2) nounwind {
; CHECK-LABEL: scalar_i64_signed_reg_reg:
; CHECK: // %bb.0:
-; CHECK-NEXT: sub x9, x1, x0
-; CHECK-NEXT: subs x10, x0, x1
+; CHECK-NEXT: cmp x0, x1
; CHECK-NEXT: mov x8, #-1 // =0xffffffffffffffff
-; CHECK-NEXT: csel x9, x10, x9, gt
+; CHECK-NEXT: csel x9, x1, x0, gt
+; CHECK-NEXT: csel x10, x0, x1, gt
; CHECK-NEXT: cneg x8, x8, le
+; CHECK-NEXT: sub x9, x10, x9
; CHECK-NEXT: lsr x9, x9, #1
; CHECK-NEXT: madd x0, x9, x8, x0
; CHECK-NEXT: ret
@@ -164,11 +167,12 @@ define i64 @scalar_i64_signed_reg_reg(i64 %a1, i64 %a2) nounwind {
define i64 @scalar_i64_unsigned_reg_reg(i64 %a1, i64 %a2) nounwind {
; CHECK-LABEL: scalar_i64_unsigned_reg_reg:
; CHECK: // %bb.0:
-; CHECK-NEXT: sub x9, x1, x0
-; CHECK-NEXT: subs x10, x0, x1
+; CHECK-NEXT: cmp x0, x1
; CHECK-NEXT: mov x8, #-1 // =0xffffffffffffffff
-; CHECK-NEXT: csel x9, x10, x9, hi
+; CHECK-NEXT: csel x9, x1, x0, hi
+; CHECK-NEXT: csel x10, x0, x1, hi
; CHECK-NEXT: cneg x8, x8, ls
+; CHECK-NEXT: sub x9, x10, x9
; CHECK-NEXT: lsr x9, x9, #1
; CHECK-NEXT: madd x0, x9, x8, x0
; CHECK-NEXT: ret
@@ -191,10 +195,10 @@ define i64 @scalar_i64_signed_mem_reg(ptr %a1_addr, i64 %a2) nounwind {
; CHECK-NEXT: ldr x9, [x0]
; CHECK-NEXT: mov x8, #-1 // =0xffffffffffffffff
; CHECK-NEXT: cmp x9, x1
-; CHECK-NEXT: sub x10, x1, x9
+; CHECK-NEXT: csel x10, x1, x9, gt
+; CHECK-NEXT: csel x11, x9, x1, gt
; CHECK-NEXT: cneg x8, x8, le
-; CHECK-NEXT: subs x11, x9, x1
-; CHECK-NEXT: csel x10, x11, x10, gt
+; CHECK-NEXT: sub x10, x11, x10
; CHECK-NEXT: lsr x10, x10, #1
; CHECK-NEXT: madd x0, x10, x8, x9
; CHECK-NEXT: ret
@@ -216,10 +220,10 @@ define i64 @scalar_i64_signed_reg_mem(i64 %a1, ptr %a2_addr) nounwind {
; CHECK-NEXT: ldr x9, [x1]
; CHECK-NEXT: mov x8, #-1 // =0xffffffffffffffff
; CHECK-NEXT: cmp x0, x9
-; CHECK-NEXT: sub x10, x9, x0
+; CHECK-NEXT: csel x10, x9, x0, gt
+; CHECK-NEXT: csel x9, x0, x9, gt
; CHECK-NEXT: cneg x8, x8, le
-; CHECK-NEXT: subs x9, x0, x9
-; CHECK-NEXT: csel x9, x9, x10, gt
+; CHECK-NEXT: sub x9, x9, x10
; CHECK-NEXT: lsr x9, x9, #1
; CHECK-NEXT: madd x0, x9, x8, x0
; CHECK-NEXT: ret
@@ -242,10 +246,10 @@ define i64 @scalar_i64_signed_mem_mem(ptr %a1_addr, ptr %a2_addr) nounwind {
; CHECK-NEXT: ldr x10, [x1]
; CHECK-NEXT: mov x8, #-1 // =0xffffffffffffffff
; CHECK-NEXT: cmp x9, x10
-; CHECK-NEXT: sub x11, x10, x9
+; CHECK-NEXT: csel x11, x10, x9, gt
+; CHECK-NEXT: csel x10, x9, x10, gt
; CHECK-NEXT: cneg x8, x8, le
-; CHECK-NEXT: subs x10, x9, x10
-; CHECK-NEXT: csel x10, x10, x11, gt
+; CHECK-NEXT: sub x10, x10, x11
; CHECK-NEXT: lsr x10, x10, #1
; CHECK-NEXT: madd x0, x10, x8, x9
; CHECK-NEXT: ret
@@ -271,13 +275,14 @@ define i64 @scalar_i64_signed_mem_mem(ptr %a1_addr, ptr %a2_addr) nounwind {
define i16 @scalar_i16_signed_reg_reg(i16 %a1, i16 %a2) nounwind {
; CHECK-LABEL: scalar_i16_signed_reg_reg:
; CHECK: // %bb.0:
-; CHECK-NEXT: sxth w9, w1
-; CHECK-NEXT: sxth w10, w0
+; CHECK-NEXT: sxth w9, w0
; CHECK-NEXT: mov w8, #-1 // =0xffffffff
-; CHECK-NEXT: subs w9, w10, w9
-; CHECK-NEXT: cneg w9, w9, mi
+; CHECK-NEXT: cmp w9, w1, sxth
+; CHECK-NEXT: csel w9, w1, w0, gt
+; CHECK-NEXT: csel w10, w0, w1, gt
; CHECK-NEXT: cneg w8, w8, le
-; CHECK-NEXT: lsr w9, w9, #1
+; CHECK-NEXT: sub w9, w10, w9
+; CHECK-NEXT: ubfx w9, w9, #1, #15
; CHECK-NEXT: madd w0, w9, w8, w0
; CHECK-NEXT: ret
%t3 = icmp sgt i16 %a1, %a2 ; signed
@@ -294,13 +299,14 @@ define i16 @scalar_i16_signed_reg_reg(i16 %a1, i16 %a2) nounwind {
define i16 @scalar_i16_unsigned_reg_reg(i16 %a1, i16 %a2) nounwind {
; CHECK-LABEL: scalar_i16_unsigned_reg_reg:
; CHECK: // %bb.0:
-; CHECK-NEXT: and w9, w1, #0xffff
-; CHECK-NEXT: and w10, w0, #0xffff
+; CHECK-NEXT: and w9, w0, #0xffff
; CHECK-NEXT: mov w8, #-1 // =0xffffffff
-; CHECK-NEXT: subs w9, w10, w9
-; CHECK-NEXT: cneg w9, w9, mi
+; CHECK-NEXT: cmp w9, w1, uxth
+; CHECK-NEXT: csel w9, w1, w0, hi
+; CHECK-NEXT: csel w10, w0, w1, hi
; CHECK-NEXT: cneg w8, w8, ls
-; CHECK-NEXT: lsr w9, w9, #1
+; CHECK-NEXT: sub w9, w10, w9
+; CHECK-NEXT: ubfx w9, w9, #1, #15
; CHECK-NEXT: madd w0, w9, w8, w0
; CHECK-NEXT: ret
%t3 = icmp ugt i16 %a1, %a2
@@ -319,14 +325,15 @@ define i16 @scalar_i16_unsigned_reg_reg(i16 %a1, i16 %a2) nounwind {
define i16 @scalar_i16_signed_mem_reg(ptr %a1_addr, i16 %a2) nounwind {
; CHECK-LABEL: scalar_i16_signed_mem_reg:
; CHECK: // %bb.0:
-; CHECK-NEXT: sxth w9, w1
-; CHECK-NEXT: ldrsh w10, [x0]
+; CHECK-NEXT: ldrsh w9, [x0]
; CHECK-NEXT: mov w8, #-1 // =0xffffffff
-; CHECK-NEXT: subs w9, w10, w9
-; CHECK-NEXT: cneg w9, w9, mi
+; CHECK-NEXT: cmp w9, w1, sxth
+; CHECK-NEXT: csel w10, w1, w9, gt
+; CHECK-NEXT: csel w11, w9, w1, gt
; CHECK-NEXT: cneg w8, w8, le
-; CHECK-NEXT: lsr w9, w9, #1
-; CHECK-NEXT: madd w0, w9, w8, w10
+; CHECK-NEXT: sub w10, w11, w10
+; CHECK-NEXT: ubfx w10, w10, #1, #15
+; CHECK-NEXT: madd w0, w10, w8, w9
; CHECK-NEXT: ret
%a1 = load i16, ptr %a1_addr
%t3 = icmp sgt i16 %a1, %a2 ; signed
@@ -346,10 +353,12 @@ define i16 @scalar_i16_signed_reg_mem(i16 %a1, ptr %a2_addr) nounwind {
; CHECK-NEXT: sxth w9, w0
; CHECK-NEXT: ldrsh w10, [x1]
; CHECK-NEXT: mov w8, #-1 // =0xffffffff
-; CHECK-NEXT: subs w9, w9, w10
-; CHECK-NEXT: cneg w9, w9, mi
+; CHECK-NEXT: cmp w9, w10
+; CHECK-NEXT: csel w9, w10, w0, gt
+; CHECK-NEXT: csel w10, w0, w10, gt
; CHECK-NEXT: cneg w8, w8, le
-; CHECK-NEXT: lsr w9, w9, #1
+; CHECK-NEXT: sub w9, w10, w9
+; CHECK-NEXT: ubfx w9, w9, #1, #15
; CHECK-NEXT: madd w0, w9, w8, w0
; CHECK-NEXT: ret
%a2 = load i16, ptr %a2_addr
@@ -370,10 +379,12 @@ define i16 @scalar_i16_signed_mem_mem(ptr %a1_addr, ptr %a2_addr) nounwind {
; CHECK-NEXT: ldrsh w9, [x0]
; CHECK-NEXT: ldrsh w10, [x1]
; CHECK-NEXT: mov w8, #-1 // =0xffffffff
-; CHECK-NEXT: subs w10, w9, w10
-; CHECK-NEXT: cneg w10, w10, mi
+; CHECK-NEXT: cmp w9, w10
+; CHECK-NEXT: csel w11, w10, w9, gt
+; CHECK-NEXT: csel w10, w9, w10, gt
; CHECK-NEXT: cneg w8, w8, le
-; CHECK-NEXT: lsr w10, w10, #1
+; CHECK-NEXT: sub w10, w10, w11
+; CHECK-NEXT: ubfx w10, w10, #1, #15
; CHECK-NEXT: madd w0, w10, w8, w9
; CHECK-NEXT: ret
%a1 = load i16, ptr %a1_addr
@@ -398,13 +409,14 @@ define i16 @scalar_i16_signed_mem_mem(ptr %a1_addr, ptr %a2_addr) nounwind {
define i8 @scalar_i8_signed_reg_reg(i8 %a1, i8 %a2) nounwind {
; CHECK-LABEL: scalar_i8_signed_reg_reg:
; CHECK: // %bb.0:
-; CHECK-NEXT: sxtb w9, w1
-; CHECK-NEXT: sxtb w10, w0
+; CHECK-NEXT: sxtb w9, w0
; CHECK-NEXT: mov w8, #-1 // =0xffffffff
-; CHECK-NEXT: subs w9, w10, w9
-; CHECK-NEXT: cneg w9, w9, mi
+; CHECK-NEXT: cmp w9, w1, sxtb
+; CHECK-NEXT: csel w9, w1, w0, gt
+; CHECK-NEXT: csel w10, w0, w1, gt
; CHECK-NEXT: cneg w8, w8, le
-; CHECK-NEXT: lsr w9, w9, #1
+; CHECK-NEXT: sub w9, w10, w9
+; CHECK-NEXT: ubfx w9, w9, #1, #7
; CHECK-NEXT: madd w0, w9, w8, w0
; CHECK-NEXT: ret
%t3 = icmp sgt i8 %a1, %a2 ; signed
@@ -421,13 +433,14 @@ define i8 @scalar_i8_signed_reg_reg(i8 %a1, i8 %a2) nounwind {
define i8 @scalar_i8_unsigned_reg_reg(i8 %a1, i8 %a2) nounwind {
; CHECK-LABEL: scalar_i8_unsigned_reg_reg:
; CHECK: // %bb.0:
-; CHECK-NEXT: and w9, w1, #0xff
-; CHECK-NEXT: and w10, w0, #0xff
+; CHECK-NEXT: and w9, w0, #0xff
; CHECK-NEXT: mov w8, #-1 // =0xffffffff
-; CHECK-NEXT: subs w9, w10, w9
-; CHECK-NEXT: cneg w9, w9, mi
+; CHECK-NEXT: cmp w9, w1, uxtb
+; CHECK-NEXT: csel w9, w1, w0, hi
+; CHECK-NEXT: csel w10, w0, w1, hi
; CHECK-NEXT: cneg w8, w8, ls
-; CHECK-NEXT: lsr w9, w9, #1
+; CHECK-NEXT: sub w9, w10, w9
+; CHECK-NEXT: ubfx w9, w9, #1, #7
; CHECK-NEXT: madd w0, w9, w8, w0
; CHECK-NEXT: ret
%t3 = icmp ugt i8 %a1, %a2
@@ -446,14 +459,15 @@ define i8 @scalar_i8_unsigned_reg_reg(i8 %a1, i8 %a2) nounwind {
define i8 @scalar_i8_signed_mem_reg(ptr %a1_addr, i8 %a2) nounwind {
; CHECK-LABEL: scalar_i8_signed_mem_reg:
; CHECK: // %bb.0:
-; CHECK-NEXT: sxtb w9, w1
-; CHECK-NEXT: ldrsb w10, [x0]
+; CHECK-NEXT: ldrsb w9, [x0]
; CHECK-NEXT: mov w8, #-1 // =0xffffffff
-; CHECK-NEXT: subs w9, w10, w9
-; CHECK-NEXT: cneg w9, w9, mi
+; CHECK-NEXT: cmp w9, w1, sxtb
+; CHECK-NEXT: csel w10, w1, w9, gt
+; CHECK-NEXT: csel w11, w9, w1, gt
; CHECK-NEXT: cneg w8, w8, le
-; CHECK-NEXT: lsr w9, w9, #1
-; CHECK-NEXT: madd w0, w9, w8, w10
+; CHECK-NEXT: sub w10, w11, w10
+; CHECK-NEXT: ubfx w10, w10, #1, #7
+; CHECK-NEXT: madd w0, w10, w8, w9
; CHECK-NEXT: ret
%a1 = load i8, ptr %a1_addr
%t3 = icmp sgt i8 %a1, %a2 ; signed
@@ -473,10 +487,12 @@ define i8 @scalar_i8_signed_reg_mem(i8 %a1, ptr %a2_addr) nounwind {
; CHECK-NEXT: sxtb w9, w0
; CHECK-NEXT: ldrsb w10, [x1]
; CHECK-NEXT: mov w8, #-1 // =0xffffffff
-; CHECK-NEXT: subs w9, w9, w10
-; CHECK-NEXT: cneg w9, w9, mi
+; CHECK-NEXT: cmp w9, w10
+; CHECK-NEXT: csel w9, w10, w0, gt
+; CHECK-NEXT: csel w10, w0, w10, gt
; CHECK-NEXT: cneg w8, w8, le
-; CHECK-NEXT: lsr w9, w9, #1
+; CHECK-NEXT: sub w9, w10, w9
+; CHECK-NEXT: ubfx w9, w9, #1, #7
; CHECK-NEXT: madd w0, w9, w8, w0
; CHECK-NEXT: ret
%a2 = load i8, ptr %a2_addr
@@ -497,10 +513,12 @@ define i8 @scalar_i8_signed_mem_mem(ptr %a1_addr, ptr %a2_addr) nounwind {
; CHECK-NEXT: ldrsb w9, [x0]
; CHECK-NEXT: ldrsb w10, [x1]
; CHECK-NEXT: mov w8, #-1 // =0xffffffff
-; CHECK-NEXT: subs w10, w9, w10
-; CHECK-NEXT: cneg w10, w10, mi
+; CHECK-NEXT: cmp w9, w10
+; CHECK-NEXT: csel w11, w10, w9, gt
+; CHECK-NEXT: csel w10, w9, w10, gt
; CHECK-NEXT: cneg w8, w8, le
-; CHECK-NEXT: lsr w10, w10, #1
+; CHECK-NEXT: sub w10, w10, w11
+; CHECK-NEXT: ubfx w10, w10, #1, #7
; CHECK-NEXT: madd w0, w10, w8, w9
; CHECK-NEXT: ret
%a1 = load i8, ptr %a1_addr
diff --git a/llvm/test/CodeGen/RISCV/abds.ll b/llvm/test/CodeGen/RISCV/abds.ll
index 94dceafb2b0dcd..e639d4b30d4c94 100644
--- a/llvm/test/CodeGen/RISCV/abds.ll
+++ b/llvm/test/CodeGen/RISCV/abds.ll
@@ -2348,27 +2348,31 @@ define i32 @abd_sub_i32(i32 %a, i32 %b) nounwind {
define i8 @abd_select_i8(i8 %a, i8 %b) nounwind {
; RV32I-LABEL: abd_select_i8:
; RV32I: # %bb.0:
-; RV32I-NEXT: slli a1, a1, 24
-; RV32I-NEXT: srai a1, a1, 24
-; RV32I-NEXT: slli a0, a0, 24
-; RV32I-NEXT: srai a0, a0, 24
-; RV32I-NEXT: sub a0, a0, a1
-; RV32I-NEXT: srai a1, a0, 31
-; RV32I-NEXT: xor a0, a0, a1
+; RV32I-NEXT: slli a2, a1, 24
+; RV32I-NEXT: srai a2, a2, 24
+; RV32I-NEXT: slli a3, a0, 24
+; RV32I-NEXT: srai a3, a3, 24
+; RV32I-NEXT: blt a3, a2, .LBB34_2
+; RV32I-NEXT: # %bb.1:
; RV32I-NEXT: sub a0, a0, a1
; RV32I-NEXT: ret
+; RV32I-NEXT: .LBB34_2:
+; RV32I-NEXT: sub a0, a1, a0
+; RV32I-NEXT: ret
;
; RV64I-LABEL: abd_select_i8:
; RV64I: # %bb.0:
-; RV64I-NEXT: slli a1, a1, 56
-; RV64I-NEXT: srai a1, a1, 56
-; RV64I-NEXT: slli a0, a0, 56
-; RV64I-NEXT: srai a0, a0, 56
-; RV64I-NEXT: sub a0, a0, a1
-; RV64I-NEXT: srai a1, a0, 63
-; RV64I-NEXT: xor a0, a0, a1
+; RV64I-NEXT: slli a2, a1, 56
+; RV64I-NEXT: srai a2, a2, 56
+; RV64I-NEXT: slli a3, a0, 56
+; RV64I-NEXT: srai a3, a3, 56
+; RV64I-NEXT: blt a3, a2, .LBB34_2
+; RV64I-NEXT: # %bb.1:
; RV64I-NEXT: sub a0, a0, a1
; RV64I-NEXT: ret
+; RV64I-NEXT: .LBB34_2:
+; RV64I-NEXT: sub a0, a1, a0
+; RV64I-NEXT: ret
;
; ZBB-LABEL: abd_select_i8:
; ZBB: # %bb.0:
@@ -2388,27 +2392,31 @@ define i8 @abd_select_i8(i8 %a, i8 %b) nounwind {
define i16 @abd_select_i16(i16 %a, i16 %b) nounwind {
; RV32I-LABEL: abd_select_i16:
; RV32I: # %bb.0:
-; RV32I-NEXT: slli a1, a1, 16
-; RV32I-NEXT: srai a1, a1, 16
-; RV32I-NEXT: slli a0, a0, 16
-; RV32I-NEXT: srai a0, a0, 16
-; RV32I-NEXT: sub a0, a0, a1
-; RV32I-NEXT: srai a1, a0, 31
-; RV32I-NEXT: xor a0, a0, a1
+; RV32I-NEXT: slli a2, a0, 16
+; RV32I-NEXT: srai a2, a2, 16
+; RV32I-NEXT: slli a3, a1, 16
+; RV32I-NEXT: srai a3, a3, 16
+; RV32I-NEXT: bge a3, a2, .LBB35_2
+; RV32I-NEXT: # %bb.1:
; RV32I-NEXT: sub a0, a0, a1
; RV32I-NEXT: ret
+; RV32I-NEXT: .LBB35_2:
+; RV32I-NEXT: sub a0, a1, a0
+; RV32I-NEXT: ret
;
; RV64I-LABEL: abd_select_i16:
; RV64I: # %bb.0:
-; RV64I-NEXT: slli a1, a1, 48
-; RV64I-NEXT: srai a1, a1, 48
-; RV64I-NEXT: slli a0, a0, 48
-; RV64I-NEXT: srai a0, a0, 48
-; RV64I-NEXT: sub a0, a0, a1
-; RV64I-NEXT: srai a1, a0, 63
-; RV64I-NEXT: xor a0, a0, a1
+; RV64I-NEXT: slli a2, a0, 48
+; RV64I-NEXT: srai a2, a2, 48
+; RV64I-NEXT: slli a3, a1, 48
+; RV64I-NEXT: srai a3, a3, 48
+; RV64I-NEXT: bge a3, a2, .LBB35_2
+; RV64I-NEXT: # %bb.1:
; RV64I-NEXT: sub a0, a0, a1
; RV64I-NEXT: ret
+; RV64I-NEXT: .LBB35_2:
+; RV64I-NEXT: sub a0, a1, a0
+; RV64I-NEXT: ret
;
; ZBB-LABEL: abd_select_i16:
; ZBB: # %bb.0:
@@ -2438,12 +2446,14 @@ define i32 @abd_select_i32(i32 %a, i32 %b) nounwind {
;
; RV64I-LABEL: abd_select_i32:
; RV64I: # %bb.0:
-; RV64I-NEXT: sext.w a1, a1
-; RV64I-NEXT: sext.w a0, a0
-; RV64I-NEXT: sub a0, a0, a1
-; RV64I-NEXT: srai a1, a0, 63
-; RV64I-NEXT: xor a0, a0, a1
-; RV64I-NEXT: sub a0, a0, a1
+; RV64I-NEXT: sext.w a2, a0
+; RV64I-NEXT: sext.w a3, a1
+; RV64I-NEXT: blt a3, a2, .LBB36_2
+; RV64I-NEXT: # %bb.1:
+; RV64I-NEXT: subw a0, a1, a0
+; RV64I-NEXT: ret
+; RV64I-NEXT: .LBB36_2:
+; RV64I-NEXT: subw a0, a0, a1
; RV64I-NEXT: ret
;
; RV32ZBB-LABEL: abd_select_i32:
@@ -2471,28 +2481,32 @@ define i32 @abd_select_i32(i32 %a, i32 %b) nounwind {
define i64 @abd_select_i64(i64 %a, i64 %b) nounwind {
; RV32I-LABEL: abd_select_i64:
; RV32I: # %bb.0:
-; RV32I-NEXT: sltu a4, a2, a0
-; RV32I-NEXT: mv a5, a4
-; RV32I-NEXT: beq a1, a3, .LBB37_2
+; RV32I-NEXT: beq a1, a3, .LBB37_3
; RV32I-NEXT: # %bb.1:
-; RV32I-NEXT: slt a5, a3, a1
+; RV32I-NEXT: slt a4, a1, a3
+; RV32I-NEXT: bnez a4, .LBB37_4
; RV32I-NEXT: .LBB37_2:
-; RV32I-NEXT: bnez a5, .LBB37_4
-; RV32I-NEXT: # %bb.3:
-; RV32I-NEXT: sub a1, a3, a1
-; RV32I-NEXT: sub a1, a1, a4
-; RV32I-NEXT: sub a0, a2, a0
-; RV32I-NEXT: ret
-; RV32I-NEXT: .LBB37_4:
+; RV32I-NEXT: mv a4, a1
+; RV32I-NEXT: mv a5, a0
+; RV32I-NEXT: mv a1, a3
+; RV32I-NEXT: mv a0, a2
+; RV32I-NEXT: j .LBB37_5
+; RV32I-NEXT: .LBB37_3:
; RV32I-NEXT: sltu a4, a0, a2
-; RV32I-NEXT: sub a1, a1, a3
-; RV32I-NEXT: sub a1, a1, a4
-; RV32I-NEXT: sub a0, a0, a2
+; RV32I-NEXT: beqz a4, .LBB37_2
+; RV32I-NEXT: .LBB37_4:
+; RV32I-NEXT: mv a4, a3
+; RV32I-NEXT: mv a5, a2
+; RV32I-NEXT: .LBB37_5:
+; RV32I-NEXT: sltu a2, a5, a0
+; RV32I-NEXT: sub a1, a4, a1
+; RV32I-NEXT: sub a1, a1, a2
+; RV32I-NEXT: sub a0, a5, a0
; RV32I-NEXT: ret
;
; RV64I-LABEL: abd_select_i64:
; RV64I: # %bb.0:
-; RV64I-NEXT: blt a1, a0, .LBB37_2
+; RV64I-NEXT: bge a0, a1, .LBB37_2
; RV64I-NEXT: # %bb.1:
; RV64I-NEXT: sub a0, a1, a0
; RV64I-NEXT: ret
@@ -2537,98 +2551,97 @@ define i64 @abd_select_i64(i64 %a, i64 %b) nounwind {
define i128 @abd_select_i128(i128 %a, i128 %b) nounwind {
; RV32I-LABEL: abd_select_i128:
; RV32I: # %bb.0:
-; RV32I-NEXT: lw a3, 0(a1)
-; RV32I-NEXT: lw a4, 4(a1)
-; RV32I-NEXT: lw a6, 8(a1)
-; RV32I-NEXT: lw t0, 12(a1)
-; RV32I-NEXT: lw a7, 8(a2)
-; RV32I-NEXT: lw t1, 12(a2)
-; RV32I-NEXT: lw a5, 0(a2)
-; RV32I-NEXT: lw a1, 4(a2)
-; RV32I-NEXT: sltu a2, a7, a6
-; RV32I-NEXT: mv t4, a2
-; RV32I-NEXT: beq t0, t1, .LBB38_2
+; RV32I-NEXT: lw a7, 4(a2)
+; RV32I-NEXT: lw a6, 8(a2)
+; RV32I-NEXT: lw t0, 12(a2)
+; RV32I-NEXT: lw a5, 12(a1)
+; RV32I-NEXT: lw a3, 4(a1)
+; RV32I-NEXT: lw a4, 8(a1)
+; RV32I-NEXT: beq a5, t0, .LBB38_2
; RV32I-NEXT: # %bb.1:
-; RV32I-NEXT: slt t4, t1, t0
+; RV32I-NEXT: slt t1, a5, t0
+; RV32I-NEXT: j .LBB38_3
; RV32I-NEXT: .LBB38_2:
-; RV32I-NEXT: sltu t2, a5, a3
-; RV32I-NEXT: sltu t5, a1, a4
-; RV32I-NEXT: mv t3, t2
-; RV32I-NEXT: beq a4, a1, .LBB38_4
-; RV32I-NEXT: # %bb.3:
-; RV32I-NEXT: mv t3, t5
-; RV32I-NEXT: .LBB38_4:
-; RV32I-NEXT: addi sp, sp, -16
-; RV32I-NEXT: sw s0, 12(sp) # 4-byte Folded Spill
-; RV32I-NEXT: xor t6, t0, t1
-; RV32I-NEXT: xor s0, a6, a7
-; RV32I-NEXT: or t6, s0, t6
-; RV32I-NEXT: beqz t6, .LBB38_6
-; RV32I-NEXT: # %bb.5:
-; RV32I-NEXT: mv t3, t4
+; RV32I-NEXT: sltu t1, a4, a6
+; RV32I-NEXT: .LBB38_3:
+; RV32I-NEXT: lw t3, 0(a2)
+; RV32I-NEXT: lw a1, 0(a1)
+; RV32I-NEXT: beq a3, a7, .LBB38_5
+; RV32I-NEXT: # %bb.4:
+; RV32I-NEXT: sltu a2, a3, a7
+; RV32I-NEXT: j .LBB38_6
+; RV32I-NEXT: .LBB38_5:
+; RV32I-NEXT: sltu a2, a1, t3
; RV32I-NEXT: .LBB38_6:
-; RV32I-NEXT: mv t4, t2
-; RV32I-NEXT: beq a1, a4, .LBB38_8
+; RV32I-NEXT: xor t2, a5, t0
+; RV32I-NEXT: xor t4, a4, a6
+; RV32I-NEXT: or t2, t4, t2
+; RV32I-NEXT: beqz t2, .LBB38_8
; RV32I-NEXT: # %bb.7:
-; RV32I-NEXT: mv t4, t5
+; RV32I-NEXT: mv a2, t1
; RV32I-NEXT: .LBB38_8:
-; RV32I-NEXT: sltu t5, a3, a5
-; RV32I-NEXT: mv t6, t5
-; RV32I-NEXT: beq a4, a1, .LBB38_10
+; RV32I-NEXT: bnez a2, .LBB38_10
; RV32I-NEXT: # %bb.9:
-; RV32I-NEXT: sltu t6, a4, a1
+; RV32I-NEXT: mv a2, t3
+; RV32I-NEXT: mv t1, a7
+; RV32I-NEXT: mv t4, t0
+; RV32I-NEXT: mv t2, a6
+; RV32I-NEXT: j .LBB38_11
; RV32I-NEXT: .LBB38_10:
-; RV32I-NEXT: bnez t3, .LBB38_12
-; RV32I-NEXT: # %bb.11:
-; RV32I-NEXT: sub t0, t1, t0
+; RV32I-NEXT: mv a2, a1
+; RV32I-NEXT: mv t1, a3
+; RV32I-NEXT: mv t4, a5
+; RV32I-NEXT: mv t2, a4
+; RV32I-NEXT: mv a1, t3
+; RV32I-NEXT: mv a3, a7
+; RV32I-NEXT: mv a5, t0
+; RV32I-NEXT: mv a4, a6
+; RV32I-NEXT: .LBB38_11:
+; RV32I-NEXT: sltu a6, a4, t2
+; RV32I-NEXT: sub a7, a5, t4
+; RV32I-NEXT: sltu a5, a1, a2
; RV32I-NEXT: sub a6, a7, a6
-; RV32I-NEXT: sub a2, t0, a2
-; RV32I-NEXT: sltu a7, a6, t4
-; RV32I-NEXT: sub a2, a2, a7
-; RV32I-NEXT: sub a3, a5, a3
-; RV32I-NEXT: sub a1, a1, a4
-; RV32I-NEXT: sub a1, a1, t2
-; RV32I-NEXT: sub a4, a6, t4
-; RV32I-NEXT: j .LBB38_13
-; RV32I-NEXT: .LBB38_12:
-; RV32I-NEXT: sltu a2, a6, a7
-; RV32I-NEXT: sub t0, t0, t1
-; RV32I-NEXT: sub a2, t0, a2
-; RV32I-NEXT: sub a6, a6, a7
-; RV32I-NEXT: sltu a7, a6, t6
-; RV32I-NEXT: sub a2, a2, a7
-; RV32I-NEXT: sub a3, a3, a5
-; RV32I-NEXT: sub a4, a4, a1
-; RV32I-NEXT: sub a1, a4, t5
-; RV32I-NEXT: sub a4, a6, t6
+; RV32I-NEXT: mv a7, a5
+; RV32I-NEXT: beq a3, t1, .LBB38_13
+; RV32I-NEXT: # %bb.12:
+; RV32I-NEXT: sltu a7, a3, t1
; RV32I-NEXT: .LBB38_13:
+; RV32I-NEXT: sub a4, a4, t2
+; RV32I-NEXT: sltu t0, a4, a7
+; RV32I-NEXT: sub a6, a6, t0
+; RV32I-NEXT: sub a4, a4, a7
+; RV32I-NEXT: sub a3, a3, t1
+; RV32I-NEXT: sub a3, a3, a5
+; RV32I-NEXT: sub a1, a1, a2
+; RV32I-NEXT: sw a1, 0(a0)
+; RV32I-NEXT: sw a3, 4(a0)
; RV32I-NEXT: sw a4, 8(a0)
-; RV32I-NEXT: sw a1, 4(a0)
-; RV32I-NEXT: sw a3, 0(a0)
-; RV32I-NEXT: sw a2, 12(a0)
-; RV32I-NEXT: lw s0, 12(sp) # 4-byte Folded Reload
-; RV32I-NEXT: addi sp, sp, 16
+; RV32I-NEXT: sw a6, 12(a0)
; RV32I-NEXT: ret
;
; RV64I-LABEL: abd_select_i128:
; RV64I: # %bb.0:
-; RV64I-NEXT: sltu a4, a2, a0
-; RV64I-NEXT: mv a5, a4
-; RV64I-NEXT: beq a1, a3, .LBB38_2
+; RV64I-NEXT: beq a1, a3, .LBB38_3
; RV64I-NEXT: # %bb.1:
-; RV64I-NEXT: slt a5, a3, a1
+; RV64I-NEXT: slt a4, a1, a3
+; RV64I-NEXT: beqz a4, .LBB38_4
; RV64I-NEXT: .LBB38_2:
-; RV64I-NEXT: bnez a5, .LBB38_4
-; RV64I-NEXT: # %bb.3:
-; RV64I-NEXT: sub a1, a3, a1
-; RV64I-NEXT: sub a1, a1, a4
-; RV64I-NEXT: sub a0, a2, a0
-; RV64I-NEXT: ret
-; RV64I-NEXT: .LBB38_4:
+; RV64I-NEXT: mv a4, a1
+; RV64I-NEXT: mv a5, a0
+; RV64I-NEXT: mv a1, a3
+; RV64I-NEXT: mv a0, a2
+; RV64I-NEXT: j .LBB38_5
+; RV64I-NEXT: .LBB38_3:
; RV64I-NEXT: sltu a4, a0, a2
-; RV64I-NEXT: sub a1, a1, a3
+; RV64I-NEXT: bnez a4, .LBB38_2
+; RV64I-NEXT: .LBB38_4:
+; RV64I-NEXT: mv a4, a3
+; RV64I-NEXT: mv a5, a2
+; RV64I-NEXT: .LBB38_5:
+; RV64I-NEXT: sltu a2, a0, a5
; RV64I-NEXT: sub a1, a1, a4
-; RV64I-NEXT: sub a0, a0, a2
+; RV64I-NEXT: sub a1, a1, a2
+; RV64I-NEXT: sub a0, a0, a5
; RV64I-NEXT: ret
;
; RV32ZBB-LABEL: abd_select_i128:
diff --git a/llvm/test/CodeGen/RISCV/abdu.ll b/llvm/test/CodeGen/RISCV/abdu.ll
index 814bca98523ce8..a04a800157dbb1 100644
--- a/llvm/test/CodeGen/RISCV/abdu.ll
+++ b/llvm/test/CodeGen/RISCV/abdu.ll
@@ -1725,25 +1725,17 @@ define i128 @abd_cmp_i128(i128 %a, i128 %b) nounwind {
;
define i8 @abd_select_i8(i8 %a, i8 %b) nounwind {
-; RV32I-LABEL: abd_select_i8:
-; RV32I: # %bb.0:
-; RV32I-NEXT: andi a1, a1, 255
-; RV32I-NEXT: andi a0, a0, 255
-; RV32I-NEXT: sub a0, a0, a1
-; RV32I-NEXT: srai a1, a0, 31
-; RV32I-NEXT: xor a0, a0, a1
-; RV32I-NEXT: sub a0, a0, a1
-; RV32I-NEXT: ret
-;
-; RV64I-LABEL: abd_select_i8:
-; RV64I: # %bb.0:
-; RV64I-NEXT: andi a1, a1, 255
-; RV64I-NEXT: andi a0, a0, 255
-; RV64I-NEXT: sub a0, a0, a1
-; RV64I-NEXT: srai a1, a0, 63
-; RV64I-NEXT: xor a0, a0, a1
-; RV64I-NEXT: sub a0, a0, a1
-; RV64I-NEXT: ret
+; NOZBB-LABEL: abd_select_i8:
+; NOZBB: # %bb.0:
+; NOZBB-NEXT: andi a2, a1, 255
+; NOZBB-NEXT: andi a3, a0, 255
+; NOZBB-NEXT: bltu a3, a2, .LBB23_2
+; NOZBB-NEXT: # %bb.1:
+; NOZBB-NEXT: sub a0, a0, a1
+; NOZBB-NEXT: ret
+; NOZBB-NEXT: .LBB23_2:
+; NOZBB-NEXT: sub a0, a1, a0
+; NOZBB-NEXT: ret
;
; ZBB-LABEL: abd_select_i8:
; ZBB: # %bb.0:
@@ -1765,25 +1757,29 @@ define i16 @abd_select_i16(i16 %a, i16 %b) nounwind {
; RV32I: # %bb.0:
; RV32I-NEXT: lui a2, 16
; RV32I-NEXT: addi a2, a2, -1
-; RV32I-NEXT: and a1, a1, a2
-; RV32I-NEXT: and a0, a0, a2
-; RV32I-NEXT: sub a0, a0, a1
-; RV32I-NEXT: srai a1, a0, 31
-; RV32I-NEXT: xor a0, a0, a1
+; RV32I-NEXT: and a3, a0, a2
+; RV32I-NEXT: and a2, a1, a2
+; RV32I-NEXT: bgeu a2, a3, .LBB24_2
+; RV32I-NEXT: # %bb.1:
; RV32I-NEXT: sub a0, a0, a1
; RV32I-NEXT: ret
+; RV32I-NEXT: .LBB24_2:
+; RV32I-NEXT: sub a0, a1, a0
+; RV32I-NEXT: ret
;
; RV64I-LABEL: abd_select_i16:
; RV64I: # %bb.0:
; RV64I-NEXT: lui a2, 16
; RV64I-NEXT: addiw a2, a2, -1
-; RV64I-NEXT: and a1, a1, a2
-; RV64I-NEXT: and a0, a0, a2
-; RV64I-NEXT: sub a0, a0, a1
-; RV64I-NEXT: srai a1, a0, 63
-; RV64I-NEXT: xor a0, a0, a1
+; RV64I-NEXT: and a3, a0, a2
+; RV64I-NEXT: and a2, a1, a2
+; RV64I-NEXT: bgeu a2, a3, .LBB24_2
+; RV64I-NEXT: # %bb.1:
; RV64I-NEXT: sub a0, a0, a1
; RV64I-NEXT: ret
+; RV64I-NEXT: .LBB24_2:
+; RV64I-NEXT: sub a0, a1, a0
+; RV64I-NEXT: ret
;
; ZBB-LABEL: abd_select_i16:
; ZBB: # %bb.0:
@@ -1813,14 +1809,14 @@ define i32 @abd_select_i32(i32 %a, i32 %b) nounwind {
;
; RV64I-LABEL: abd_select_i32:
; RV64I: # %bb.0:
-; RV64I-NEXT: slli a1, a1, 32
-; RV64I-NEXT: srli a1, a1, 32
-; RV64I-NEXT: slli a0, a0, 32
-; RV64I-NEXT: srli a0, a0, 32
-; RV64I-NEXT: sub a0, a0, a1
-; RV64I-NEXT: srai a1, a0, 63
-; RV64I-NEXT: xor a0, a0, a1
-; RV64I-NEXT: sub a0, a0, a1
+; RV64I-NEXT: sext.w a2, a0
+; RV64I-NEXT: sext.w a3, a1
+; RV64I-NEXT: bltu a3, a2, .LBB25_2
+; RV64I-NEXT: # %bb.1:
+; RV64I-NEXT: subw a0, a1, a0
+; RV64I-NEXT: ret
+; RV64I-NEXT: .LBB25_2:
+; RV64I-NEXT: subw a0, a0, a1
; RV64I-NEXT: ret
;
; RV32ZBB-LABEL: abd_select_i32:
@@ -1850,29 +1846,32 @@ define i32 @abd_select_i32(i32 %a, i32 %b) nounwind {
define i64 @abd_select_i64(i64 %a, i64 %b) nounwind {
; RV32I-LABEL: abd_select_i64:
; RV32I: # %bb.0:
-; RV32I-NEXT: sltu a4, a0, a2
-; RV32I-NEXT: sub a3, a1, a3
-; RV32I-NEXT: sub a3, a3, a4
-; RV32I-NEXT: sub a2, a0, a2
-; RV32I-NEXT: beq a3, a1, .LBB26_2
+; RV32I-NEXT: beq a1, a3, .LBB26_3
; RV32I-NEXT: # %bb.1:
-; RV32I-NEXT: sltu a0, a1, a3
-; RV32I-NEXT: j .LBB26_3
+; RV32I-NEXT: sltu a4, a1, a3
+; RV32I-NEXT: bnez a4, .LBB26_4
; RV32I-NEXT: .LBB26_2:
-; RV32I-NEXT: sltu a0, a0, a2
+; RV32I-NEXT: mv a4, a1
+; RV32I-NEXT: mv a5, a0
+; RV32I-NEXT: mv a1, a3
+; RV32I-NEXT: mv a0, a2
+; RV32I-NEXT: j .LBB26_5
; RV32I-NEXT: .LBB26_3:
-; RV32I-NEXT: neg a1, a0
-; RV32I-NEXT: xor a2, a2, a1
-; RV32I-NEXT: sltu a4, a2, a1
-; RV32I-NEXT: xor a1, a3, a1
-; RV32I-NEXT: add a1, a1, a0
-; RV32I-NEXT: sub a1, a1, a4
-; RV32I-NEXT: add a0, a2, a0
+; RV32I-NEXT: sltu a4, a0, a2
+; RV32I-NEXT: beqz a4, .LBB26_2
+; RV32I-NEXT: .LBB26_4:
+; RV32I-NEXT: mv a4, a3
+; RV32I-NEXT: mv a5, a2
+; RV32I-NEXT: .LBB26_5:
+; RV32I-NEXT: sltu a2, a5, a0
+; RV32I-NEXT: sub a1, a4, a1
+; RV32I-NEXT: sub a1, a1, a2
+; RV32I-NEXT: sub a0, a5, a0
; RV32I-NEXT: ret
;
; RV64I-LABEL: abd_select_i64:
; RV64I: # %bb.0:
-; RV64I-NEXT: bltu a1, a0, .LBB26_2
+; RV64I-NEXT: bgeu a0, a1, .LBB26_2
; RV64I-NEXT: # %bb.1:
; RV64I-NEXT: sub a0, a1, a0
; RV64I-NEXT: ret
@@ -1918,98 +1917,97 @@ define i64 @abd_select_i64(i64 %a, i64 %b) nounwind {
define i128 @abd_select_i128(i128 %a, i128 %b) nounwind {
; RV32I-LABEL: abd_select_i128:
; RV32I: # %bb.0:
-; RV32I-NEXT: lw a3, 0(a2)
-; RV32I-NEXT: lw a5, 4(a2)
+; RV32I-NEXT: lw a7, 4(a2)
; RV32I-NEXT: lw a6, 8(a2)
-; RV32I-NEXT: lw a7, 12(a2)
-; RV32I-NEXT: lw a2, 8(a1)
-; RV32I-NEXT: lw a4, 12(a1)
-; RV32I-NEXT: lw t0, 0(a1)
-; RV32I-NEXT: lw a1, 4(a1)
-; RV32I-NEXT: sltu t1, a2, a6
-; RV32I-NEXT: sub a7, a4, a7
-; RV32I-NEXT: sltu t2, t0, a3
-; RV32I-NEXT: sub a7, a7, t1
-; RV32I-NEXT: mv t1, t2
-; RV32I-NEXT: beq a1, a5, .LBB27_2
+; RV32I-NEXT: lw t0, 12(a2)
+; RV32I-NEXT: lw a5, 12(a1)
+; RV32I-NEXT: lw a3, 4(a1)
+; RV32I-NEXT: lw a4, 8(a1)
+; RV32I-NEXT: beq a5, t0, .LBB27_2
; RV32I-NEXT: # %bb.1:
-; RV32I-NEXT: sltu t1, a1, a5
+; RV32I-NEXT: sltu t1, a5, t0
+; RV32I-NEXT: j .LBB27_3
; RV32I-NEXT: .LBB27_2:
-; RV32I-NEXT: sub t3, a2, a6
-; RV32I-NEXT: sltu a6, t3, t1
-; RV32I-NEXT: sub a6, a7, a6
-; RV32I-NEXT: sub a7, t3, t1
-; RV32I-NEXT: beq a6, a4, .LBB27_4
-; RV32I-NEXT: # %bb.3:
; RV32I-NEXT: sltu t1, a4, a6
-; RV32I-NEXT: j .LBB27_5
-; RV32I-NEXT: .LBB27_4:
-; RV32I-NEXT: sltu t1, a2, a7
+; RV32I-NEXT: .LBB27_3:
+; RV32I-NEXT: lw t3, 0(a2)
+; RV32I-NEXT: lw a1, 0(a1)
+; RV32I-NEXT: beq a3, a7, .LBB27_5
+; RV32I-NEXT: # %bb.4:
+; RV32I-NEXT: sltu a2, a3, a7
+; RV32I-NEXT: j .LBB27_6
; RV32I-NEXT: .LBB27_5:
-; RV32I-NEXT: sub a5, a1, a5
-; RV32I-NEXT: sub a5, a5, t2
-; RV32I-NEXT: sub a3, t0, a3
-; RV32I-NEXT: beq a5, a1, .LBB27_7
-; RV32I-NEXT: # %bb.6:
-; RV32I-NEXT: sltu a1, a1, a5
-; RV32I-NEXT: j .LBB27_8
-; RV32I-NEXT: .LBB27_7:
-; RV32I-NEXT: sltu a1, t0, a3
+; RV32I-NEXT: sltu a2, a1, t3
+; RV32I-NEXT: .LBB27_6:
+; RV32I-NEXT: xor t2, a5, t0
+; RV32I-NEXT: xor t4, a4, a6
+; RV32I-NEXT: or t2, t4, t2
+; RV32I-NEXT: beqz t2, .LBB27_8
+; RV32I-NEXT: # %bb.7:
+; RV32I-NEXT: mv a2, t1
; RV32I-NEXT: .LBB27_8:
-; RV32I-NEXT: xor a4, a6, a4
-; RV32I-NEXT: xor a2, a7, a2
-; RV32I-NEXT: or a2, a2, a4
-; RV32I-NEXT: beqz a2, .LBB27_10
+; RV32I-NEXT: bnez a2, .LBB27_10
; RV32I-NEXT: # %bb.9:
-; RV32I-NEXT: mv a1, t1
+; RV32I-NEXT: mv a2, t3
+; RV32I-NEXT: mv t1, a7
+; RV32I-NEXT: mv t4, t0
+; RV32I-NEXT: mv t2, a6
+; RV32I-NEXT: j .LBB27_11
; RV32I-NEXT: .LBB27_10:
-; RV32I-NEXT: neg t0, a1
-; RV32I-NEXT: xor a2, a7, t0
-; RV32I-NEXT: sltu a4, a2, t0
-; RV32I-NEXT: xor a6, a6, t0
-; RV32I-NEXT: add a6, a6, a1
-; RV32I-NEXT: sub a4, a6, a4
-; RV32I-NEXT: xor a3, a3, t0
-; RV32I-NEXT: sltu a6, a3, t0
-; RV32I-NEXT: xor a7, a5, t0
-; RV32I-NEXT: mv t1, a6
-; RV32I-NEXT: beqz a5, .LBB27_12
-; RV32I-NEXT: # %bb.11:
-; RV32I-NEXT: sltu t1, a7, t0
-; RV32I-NEXT: .LBB27_12:
-; RV32I-NEXT: add a2, a2, a1
-; RV32I-NEXT: sltu a5, a2, t1
-; RV32I-NEXT: sub a4, a4, a5
-; RV32I-NEXT: sub a2, a2, t1
-; RV32I-NEXT: add a7, a7, a1
-; RV32I-NEXT: sub a5, a7, a6
-; RV32I-NEXT: add a1, a3, a1
+; RV32I-NEXT: mv a2, a1
+; RV32I-NEXT: mv t1, a3
+; RV32I-NEXT: mv t4, a5
+; RV32I-NEXT: mv t2, a4
+; RV32I-NEXT: mv a1, t3
+; RV32I-NEXT: mv a3, a7
+; RV32I-NEXT: mv a5, t0
+; RV32I-NEXT: mv a4, a6
+; RV32I-NEXT: .LBB27_11:
+; RV32I-NEXT: sltu a6, a4, t2
+; RV32I-NEXT: sub a7, a5, t4
+; RV32I-NEXT: sltu a5, a1, a2
+; RV32I-NEXT: sub a6, a7, a6
+; RV32I-NEXT: mv a7, a5
+; RV32I-NEXT: beq a3, t1, .LBB27_13
+; RV32I-NEXT: # %bb.12:
+; RV32I-NEXT: sltu a7, a3, t1
+; RV32I-NEXT: .LBB27_13:
+; RV32I-NEXT: sub a4, a4, t2
+; RV32I-NEXT: sltu t0, a4, a7
+; RV32I-NEXT: sub a6, a6, t0
+; RV32I-NEXT: sub a4, a4, a7
+; RV32I-NEXT: sub a3, a3, t1
+; RV32I-NEXT: sub a3, a3, a5
+; RV32I-NEXT: sub a1, a1, a2
; RV32I-NEXT: sw a1, 0(a0)
-; RV32I-NEXT: sw a5, 4(a0)
-; RV32I-NEXT: sw a2, 8(a0)
-; RV32I-NEXT: sw a4, 12(a0)
+; RV32I-NEXT: sw a3, 4(a0)
+; RV32I-NEXT: sw a4, 8(a0)
+; RV32I-NEXT: sw a6, 12(a0)
; RV32I-NEXT: ret
;
; RV64I-LABEL: abd_select_i128:
; RV64I: # %bb.0:
-; RV64I-NEXT: sltu a4, a0, a2
-; RV64I-NEXT: sub a3, a1, a3
-; RV64I-NEXT: sub a3, a3, a4
-; RV64I-NEXT: sub a2, a0, a2
-; RV64I-NEXT: beq a3, a1, .LBB27_2
+; RV64I-NEXT: beq a1, a3, .LBB27_3
; RV64I-NEXT: # %bb.1:
-; RV64I-NEXT: sltu a0, a1, a3
-; RV64I-NEXT: j .LBB27_3
+; RV64I-NEXT: sltu a4, a1, a3
+; RV64I-NEXT: beqz a4, .LBB27_4
; RV64I-NEXT: .LBB27_2:
-; RV64I-NEXT: sltu a0, a0, a2
+; RV64I-NEXT: mv a4, a1
+; RV64I-NEXT: mv a5, a0
+; RV64I-NEXT: mv a1, a3
+; RV64I-NEXT: mv a0, a2
+; RV64I-NEXT: j .LBB27_5
; RV64I-NEXT: .LBB27_3:
-; RV64I-NEXT: neg a1, a0
-; RV64I-NEXT: xor a2, a2, a1
-; RV64I-NEXT: sltu a4, a2, a1
-; RV64I-NEXT: xor a1, a3, a1
-; RV64I-NEXT: add a1, a1, a0
+; RV64I-NEXT: sltu a4, a0, a2
+; RV64I-NEXT: bnez a4, .LBB27_2
+; RV64I-NEXT: .LBB27_4:
+; RV64I-NEXT: mv a4, a3
+; RV64I-NEXT: mv a5, a2
+; RV64I-NEXT: .LBB27_5:
+; RV64I-NEXT: sltu a2, a0, a5
; RV64I-NEXT: sub a1, a1, a4
-; RV64I-NEXT: add a0, a2, a0
+; RV64I-NEXT: sub a1, a1, a2
+; RV64I-NEXT: sub a0, a0, a5
; RV64I-NEXT: ret
;
; RV32ZBB-LABEL: abd_select_i128:
@@ -2131,4 +2129,3 @@ declare i32 @llvm.umin.i32(i32, i32)
declare i64 @llvm.umin.i64(i64, i64)
;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
; CHECK: {{.*}}
-; NOZBB: {{.*}}
diff --git a/llvm/test/CodeGen/X86/abds.ll b/llvm/test/CodeGen/X86/abds.ll
index 0356c2702a4199..4c524c28b160ab 100644
--- a/llvm/test/CodeGen/X86/abds.ll
+++ b/llvm/test/CodeGen/X86/abds.ll
@@ -1161,23 +1161,24 @@ define i32 @abd_sub_i32(i32 %a, i32 %b) nounwind {
define i8 @abd_select_i8(i8 %a, i8 %b) nounwind {
; X86-LABEL: abd_select_i8:
; X86: # %bb.0:
-; X86-NEXT: movsbl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: movsbl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: cmpb %cl, %al
; X86-NEXT: movl %ecx, %edx
-; X86-NEXT: subl %eax, %edx
-; X86-NEXT: subl %ecx, %eax
-; X86-NEXT: cmovll %edx, %eax
+; X86-NEXT: cmovll %eax, %edx
+; X86-NEXT: cmovll %ecx, %eax
+; X86-NEXT: subb %dl, %al
; X86-NEXT: # kill: def $al killed $al killed $eax
; X86-NEXT: retl
;
; X64-LABEL: abd_select_i8:
; X64: # %bb.0:
-; X64-NEXT: movsbl %sil, %eax
-; X64-NEXT: movsbl %dil, %ecx
-; X64-NEXT: movl %ecx, %edx
-; X64-NEXT: subl %eax, %edx
-; X64-NEXT: subl %ecx, %eax
-; X64-NEXT: cmovll %edx, %eax
+; X64-NEXT: movl %edi, %eax
+; X64-NEXT: cmpb %sil, %al
+; X64-NEXT: movl %esi, %ecx
+; X64-NEXT: cmovll %edi, %ecx
+; X64-NEXT: cmovll %esi, %eax
+; X64-NEXT: subb %cl, %al
; X64-NEXT: # kill: def $al killed $al killed $eax
; X64-NEXT: retq
%cmp = icmp slt i8 %a, %b
@@ -1190,23 +1191,24 @@ define i8 @abd_select_i8(i8 %a, i8 %b) nounwind {
define i16 @abd_select_i16(i16 %a, i16 %b) nounwind {
; X86-LABEL: abd_select_i16:
; X86: # %bb.0:
-; X86-NEXT: movswl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: movswl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: cmpw %cx, %ax
; X86-NEXT: movl %ecx, %edx
-; X86-NEXT: subl %eax, %edx
-; X86-NEXT: subl %ecx, %eax
-; X86-NEXT: cmovll %edx, %eax
+; X86-NEXT: cmovlel %eax, %edx
+; X86-NEXT: cmovlel %ecx, %eax
+; X86-NEXT: subl %edx, %eax
; X86-NEXT: # kill: def $ax killed $ax killed $eax
; X86-NEXT: retl
;
; X64-LABEL: abd_select_i16:
; X64: # %bb.0:
-; X64-NEXT: movswl %si, %eax
-; X64-NEXT: movswl %di, %ecx
-; X64-NEXT: movl %ecx, %edx
-; X64-NEXT: subl %eax, %edx
+; X64-NEXT: movl %edi, %eax
+; X64-NEXT: cmpw %si, %ax
+; X64-NEXT: movl %esi, %ecx
+; X64-NEXT: cmovlel %edi, %ecx
+; X64-NEXT: cmovlel %esi, %eax
; X64-NEXT: subl %ecx, %eax
-; X64-NEXT: cmovll %edx, %eax
; X64-NEXT: # kill: def $ax killed $ax killed $eax
; X64-NEXT: retq
%cmp = icmp sle i16 %a, %b
@@ -1219,20 +1221,22 @@ define i16 @abd_select_i16(i16 %a, i16 %b) nounwind {
define i32 @abd_select_i32(i32 %a, i32 %b) nounwind {
; X86-LABEL: abd_select_i32:
; X86: # %bb.0:
-; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: movl %ecx, %edx
-; X86-NEXT: subl %eax, %edx
-; X86-NEXT: subl %ecx, %eax
-; X86-NEXT: cmovll %edx, %eax
+; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
+; X86-NEXT: cmpl %ecx, %edx
+; X86-NEXT: movl %ecx, %eax
+; X86-NEXT: cmovgl %edx, %eax
+; X86-NEXT: cmovgl %ecx, %edx
+; X86-NEXT: subl %edx, %eax
; X86-NEXT: retl
;
; X64-LABEL: abd_select_i32:
; X64: # %bb.0:
-; X64-NEXT: movl %edi, %eax
-; X64-NEXT: subl %esi, %eax
-; X64-NEXT: subl %edi, %esi
-; X64-NEXT: cmovgel %esi, %eax
+; X64-NEXT: cmpl %esi, %edi
+; X64-NEXT: movl %esi, %eax
+; X64-NEXT: cmovgl %edi, %eax
+; X64-NEXT: cmovgl %esi, %edi
+; X64-NEXT: subl %edi, %eax
; X64-NEXT: retq
%cmp = icmp sgt i32 %a, %b
%ab = select i1 %cmp, i32 %a, i32 %b
@@ -1247,18 +1251,21 @@ define i64 @abd_select_i64(i64 %a, i64 %b) nounwind {
; X86-NEXT: pushl %ebx
; X86-NEXT: pushl %edi
; X86-NEXT: pushl %esi
-; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
-; X86-NEXT: movl %ecx, %edi
-; X86-NEXT: subl %eax, %edi
-; X86-NEXT: movl %esi, %ebx
-; X86-NEXT: sbbl %edx, %ebx
-; X86-NEXT: subl %ecx, %eax
-; X86-NEXT: sbbl %esi, %edx
-; X86-NEXT: cmovll %edi, %eax
-; X86-NEXT: cmovll %ebx, %edx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: cmpl %esi, %ebx
+; X86-NEXT: movl %ecx, %eax
+; X86-NEXT: sbbl %edi, %eax
+; X86-NEXT: movl %edi, %edx
+; X86-NEXT: cmovgel %ecx, %edx
+; X86-NEXT: movl %esi, %eax
+; X86-NEXT: cmovgel %ebx, %eax
+; X86-NEXT: cmovgel %edi, %ecx
+; X86-NEXT: cmovgel %esi, %ebx
+; X86-NEXT: subl %ebx, %eax
+; X86-NEXT: sbbl %ecx, %edx
; X86-NEXT: popl %esi
; X86-NEXT: popl %edi
; X86-NEXT: popl %ebx
@@ -1266,10 +1273,11 @@ define i64 @abd_select_i64(i64 %a, i64 %b) nounwind {
;
; X64-LABEL: abd_select_i64:
; X64: # %bb.0:
-; X64-NEXT: movq %rdi, %rax
-; X64-NEXT: subq %rsi, %rax
-; X64-NEXT: subq %rdi, %rsi
-; X64-NEXT: cmovgeq %rsi, %rax
+; X64-NEXT: cmpq %rsi, %rdi
+; X64-NEXT: movq %rsi, %rax
+; X64-NEXT: cmovgeq %rdi, %rax
+; X64-NEXT: cmovgeq %rsi, %rdi
+; X64-NEXT: subq %rdi, %rax
; X64-NEXT: retq
%cmp = icmp sge i64 %a, %b
%ab = select i1 %cmp, i64 %a, i64 %b
@@ -1286,32 +1294,42 @@ define i128 @abd_select_i128(i128 %a, i128 %b) nounwind {
; X86-NEXT: pushl %edi
; X86-NEXT: pushl %esi
; X86-NEXT: pushl %eax
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ebp
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
-; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: cmpl {{[0-9]+}}(%esp), %edx
+; X86-NEXT: movl %esi, %eax
+; X86-NEXT: sbbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movl %ecx, %eax
+; X86-NEXT: sbbl %ebp, %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx
-; X86-NEXT: subl %edx, %eax
-; X86-NEXT: movl %eax, (%esp) # 4-byte Spill
-; X86-NEXT: sbbl %esi, %ebx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ebp
-; X86-NEXT: sbbl %ecx, %ebp
; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
-; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: sbbl %edi, %eax
-; X86-NEXT: subl {{[0-9]+}}(%esp), %edx
-; X86-NEXT: sbbl {{[0-9]+}}(%esp), %esi
-; X86-NEXT: sbbl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: sbbl {{[0-9]+}}(%esp), %edi
-; X86-NEXT: cmovll (%esp), %edx # 4-byte Folded Reload
-; X86-NEXT: cmovll %ebx, %esi
+; X86-NEXT: movl %edi, %eax
+; X86-NEXT: sbbl %ebx, %eax
+; X86-NEXT: movl %ebx, %eax
+; X86-NEXT: cmovll %edi, %eax
+; X86-NEXT: movl %eax, (%esp) # 4-byte Spill
+; X86-NEXT: cmovll %ebx, %edi
+; X86-NEXT: movl %ebp, %ebx
+; X86-NEXT: cmovll %ecx, %ebx
; X86-NEXT: cmovll %ebp, %ecx
-; X86-NEXT: cmovll %eax, %edi
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: movl %edi, 12(%eax)
-; X86-NEXT: movl %ecx, 8(%eax)
-; X86-NEXT: movl %esi, 4(%eax)
+; X86-NEXT: movl %eax, %ebp
+; X86-NEXT: cmovll %esi, %ebp
+; X86-NEXT: cmovll %eax, %esi
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: cmovll %edx, %eax
+; X86-NEXT: cmovll {{[0-9]+}}(%esp), %edx
+; X86-NEXT: subl %eax, %edx
+; X86-NEXT: sbbl %ebp, %esi
+; X86-NEXT: sbbl %ebx, %ecx
+; X86-NEXT: sbbl (%esp), %edi # 4-byte Folded Reload
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movl %edx, (%eax)
+; X86-NEXT: movl %esi, 4(%eax)
+; X86-NEXT: movl %ecx, 8(%eax)
+; X86-NEXT: movl %edi, 12(%eax)
; X86-NEXT: addl $4, %esp
; X86-NEXT: popl %esi
; X86-NEXT: popl %edi
@@ -1322,14 +1340,18 @@ define i128 @abd_select_i128(i128 %a, i128 %b) nounwind {
; X64-LABEL: abd_select_i128:
; X64: # %bb.0:
; X64-NEXT: movq %rdi, %rax
-; X64-NEXT: subq %rdx, %rax
-; X64-NEXT: movq %rsi, %r8
-; X64-NEXT: sbbq %rcx, %r8
-; X64-NEXT: subq %rdi, %rdx
-; X64-NEXT: sbbq %rsi, %rcx
-; X64-NEXT: cmovgeq %rdx, %rax
-; X64-NEXT: cmovgeq %rcx, %r8
-; X64-NEXT: movq %r8, %rdx
+; X64-NEXT: cmpq %rdx, %rdi
+; X64-NEXT: movq %rsi, %rdi
+; X64-NEXT: sbbq %rcx, %rdi
+; X64-NEXT: movq %rcx, %rdi
+; X64-NEXT: cmovlq %rsi, %rdi
+; X64-NEXT: movq %rdx, %r8
+; X64-NEXT: cmovlq %rax, %r8
+; X64-NEXT: cmovlq %rcx, %rsi
+; X64-NEXT: cmovlq %rdx, %rax
+; X64-NEXT: subq %r8, %rax
+; X64-NEXT: sbbq %rdi, %rsi
+; X64-NEXT: movq %rsi, %rdx
; X64-NEXT: retq
%cmp = icmp slt i128 %a, %b
%ab = select i1 %cmp, i128 %a, i128 %b
diff --git a/llvm/test/CodeGen/X86/abdu.ll b/llvm/test/CodeGen/X86/abdu.ll
index 27acec32fd3489..fe9006a8aec234 100644
--- a/llvm/test/CodeGen/X86/abdu.ll
+++ b/llvm/test/CodeGen/X86/abdu.ll
@@ -775,23 +775,24 @@ define i128 @abd_cmp_i128(i128 %a, i128 %b) nounwind {
define i8 @abd_select_i8(i8 %a, i8 %b) nounwind {
; X86-LABEL: abd_select_i8:
; X86: # %bb.0:
-; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: cmpb %cl, %al
; X86-NEXT: movl %ecx, %edx
-; X86-NEXT: subl %eax, %edx
-; X86-NEXT: subl %ecx, %eax
-; X86-NEXT: cmovbl %edx, %eax
+; X86-NEXT: cmovbl %eax, %edx
+; X86-NEXT: cmovbl %ecx, %eax
+; X86-NEXT: subb %dl, %al
; X86-NEXT: # kill: def $al killed $al killed $eax
; X86-NEXT: retl
;
; X64-LABEL: abd_select_i8:
; X64: # %bb.0:
-; X64-NEXT: movzbl %sil, %eax
-; X64-NEXT: movzbl %dil, %ecx
-; X64-NEXT: movl %ecx, %edx
-; X64-NEXT: subl %eax, %edx
-; X64-NEXT: subl %ecx, %eax
-; X64-NEXT: cmovbl %edx, %eax
+; X64-NEXT: movl %edi, %eax
+; X64-NEXT: cmpb %sil, %al
+; X64-NEXT: movl %esi, %ecx
+; X64-NEXT: cmovbl %edi, %ecx
+; X64-NEXT: cmovbl %esi, %eax
+; X64-NEXT: subb %cl, %al
; X64-NEXT: # kill: def $al killed $al killed $eax
; X64-NEXT: retq
%cmp = icmp ult i8 %a, %b
@@ -804,23 +805,24 @@ define i8 @abd_select_i8(i8 %a, i8 %b) nounwind {
define i16 @abd_select_i16(i16 %a, i16 %b) nounwind {
; X86-LABEL: abd_select_i16:
; X86: # %bb.0:
-; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: movzwl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: cmpw %cx, %ax
; X86-NEXT: movl %ecx, %edx
-; X86-NEXT: subl %eax, %edx
-; X86-NEXT: subl %ecx, %eax
-; X86-NEXT: cmovbl %edx, %eax
+; X86-NEXT: cmovbel %eax, %edx
+; X86-NEXT: cmovbel %ecx, %eax
+; X86-NEXT: subl %edx, %eax
; X86-NEXT: # kill: def $ax killed $ax killed $eax
; X86-NEXT: retl
;
; X64-LABEL: abd_select_i16:
; X64: # %bb.0:
-; X64-NEXT: movzwl %si, %eax
-; X64-NEXT: movzwl %di, %ecx
-; X64-NEXT: movl %ecx, %edx
-; X64-NEXT: subl %eax, %edx
+; X64-NEXT: movl %edi, %eax
+; X64-NEXT: cmpw %si, %ax
+; X64-NEXT: movl %esi, %ecx
+; X64-NEXT: cmovbel %edi, %ecx
+; X64-NEXT: cmovbel %esi, %eax
; X64-NEXT: subl %ecx, %eax
-; X64-NEXT: cmovbl %edx, %eax
; X64-NEXT: # kill: def $ax killed $ax killed $eax
; X64-NEXT: retq
%cmp = icmp ule i16 %a, %b
@@ -833,20 +835,22 @@ define i16 @abd_select_i16(i16 %a, i16 %b) nounwind {
define i32 @abd_select_i32(i32 %a, i32 %b) nounwind {
; X86-LABEL: abd_select_i32:
; X86: # %bb.0:
-; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: movl %ecx, %edx
-; X86-NEXT: subl %eax, %edx
-; X86-NEXT: subl %ecx, %eax
-; X86-NEXT: cmovbl %edx, %eax
+; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
+; X86-NEXT: cmpl %ecx, %edx
+; X86-NEXT: movl %ecx, %eax
+; X86-NEXT: cmoval %edx, %eax
+; X86-NEXT: cmoval %ecx, %edx
+; X86-NEXT: subl %edx, %eax
; X86-NEXT: retl
;
; X64-LABEL: abd_select_i32:
; X64: # %bb.0:
-; X64-NEXT: movl %edi, %eax
-; X64-NEXT: subl %esi, %eax
-; X64-NEXT: subl %edi, %esi
-; X64-NEXT: cmovael %esi, %eax
+; X64-NEXT: cmpl %esi, %edi
+; X64-NEXT: movl %esi, %eax
+; X64-NEXT: cmoval %edi, %eax
+; X64-NEXT: cmoval %esi, %edi
+; X64-NEXT: subl %edi, %eax
; X64-NEXT: retq
%cmp = icmp ugt i32 %a, %b
%ab = select i1 %cmp, i32 %a, i32 %b
@@ -858,24 +862,36 @@ define i32 @abd_select_i32(i32 %a, i32 %b) nounwind {
define i64 @abd_select_i64(i64 %a, i64 %b) nounwind {
; X86-LABEL: abd_select_i64:
; X86: # %bb.0:
-; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
-; X86-NEXT: xorl %ecx, %ecx
-; X86-NEXT: subl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: sbbl {{[0-9]+}}(%esp), %edx
-; X86-NEXT: sbbl %ecx, %ecx
-; X86-NEXT: xorl %ecx, %edx
-; X86-NEXT: xorl %ecx, %eax
-; X86-NEXT: subl %ecx, %eax
+; X86-NEXT: pushl %ebx
+; X86-NEXT: pushl %edi
+; X86-NEXT: pushl %esi
+; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
+; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: cmpl %esi, %ebx
+; X86-NEXT: movl %ecx, %eax
+; X86-NEXT: sbbl %edi, %eax
+; X86-NEXT: movl %edi, %edx
+; X86-NEXT: cmovael %ecx, %edx
+; X86-NEXT: movl %esi, %eax
+; X86-NEXT: cmovael %ebx, %eax
+; X86-NEXT: cmovael %edi, %ecx
+; X86-NEXT: cmovael %esi, %ebx
+; X86-NEXT: subl %ebx, %eax
; X86-NEXT: sbbl %ecx, %edx
+; X86-NEXT: popl %esi
+; X86-NEXT: popl %edi
+; X86-NEXT: popl %ebx
; X86-NEXT: retl
;
; X64-LABEL: abd_select_i64:
; X64: # %bb.0:
-; X64-NEXT: movq %rdi, %rax
-; X64-NEXT: subq %rsi, %rax
-; X64-NEXT: subq %rdi, %rsi
-; X64-NEXT: cmovaeq %rsi, %rax
+; X64-NEXT: cmpq %rsi, %rdi
+; X64-NEXT: movq %rsi, %rax
+; X64-NEXT: cmovaeq %rdi, %rax
+; X64-NEXT: cmovaeq %rsi, %rdi
+; X64-NEXT: subq %rdi, %rax
; X64-NEXT: retq
%cmp = icmp uge i64 %a, %b
%ab = select i1 %cmp, i64 %a, i64 %b
@@ -887,47 +903,67 @@ define i64 @abd_select_i64(i64 %a, i64 %b) nounwind {
define i128 @abd_select_i128(i128 %a, i128 %b) nounwind {
; X86-LABEL: abd_select_i128:
; X86: # %bb.0:
+; X86-NEXT: pushl %ebp
; X86-NEXT: pushl %ebx
; X86-NEXT: pushl %edi
; X86-NEXT: pushl %esi
-; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
+; X86-NEXT: pushl %eax
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ebp
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
+; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
+; X86-NEXT: cmpl {{[0-9]+}}(%esp), %edx
+; X86-NEXT: movl %esi, %eax
+; X86-NEXT: sbbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movl %ecx, %eax
+; X86-NEXT: sbbl %ebp, %eax
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
+; X86-NEXT: movl %edi, %eax
+; X86-NEXT: sbbl %ebx, %eax
+; X86-NEXT: movl %ebx, %eax
+; X86-NEXT: cmovbl %edi, %eax
+; X86-NEXT: movl %eax, (%esp) # 4-byte Spill
+; X86-NEXT: cmovbl %ebx, %edi
+; X86-NEXT: movl %ebp, %ebx
+; X86-NEXT: cmovbl %ecx, %ebx
+; X86-NEXT: cmovbl %ebp, %ecx
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: xorl %ebx, %ebx
-; X86-NEXT: subl {{[0-9]+}}(%esp), %edi
-; X86-NEXT: sbbl {{[0-9]+}}(%esp), %esi
-; X86-NEXT: sbbl {{[0-9]+}}(%esp), %edx
-; X86-NEXT: sbbl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: sbbl %ebx, %ebx
-; X86-NEXT: xorl %ebx, %ecx
-; X86-NEXT: xorl %ebx, %edx
-; X86-NEXT: xorl %ebx, %esi
-; X86-NEXT: xorl %ebx, %edi
-; X86-NEXT: subl %ebx, %edi
-; X86-NEXT: sbbl %ebx, %esi
-; X86-NEXT: sbbl %ebx, %edx
+; X86-NEXT: movl %eax, %ebp
+; X86-NEXT: cmovbl %esi, %ebp
+; X86-NEXT: cmovbl %eax, %esi
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: cmovbl %edx, %eax
+; X86-NEXT: cmovbl {{[0-9]+}}(%esp), %edx
+; X86-NEXT: subl %eax, %edx
+; X86-NEXT: sbbl %ebp, %esi
; X86-NEXT: sbbl %ebx, %ecx
-; X86-NEXT: movl %edi, (%eax)
+; X86-NEXT: sbbl (%esp), %edi # 4-byte Folded Reload
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movl %edx, (%eax)
; X86-NEXT: movl %esi, 4(%eax)
-; X86-NEXT: movl %edx, 8(%eax)
-; X86-NEXT: movl %ecx, 12(%eax)
+; X86-NEXT: movl %ecx, 8(%eax)
+; X86-NEXT: movl %edi, 12(%eax)
+; X86-NEXT: addl $4, %esp
; X86-NEXT: popl %esi
; X86-NEXT: popl %edi
; X86-NEXT: popl %ebx
+; X86-NEXT: popl %ebp
; X86-NEXT: retl $4
;
; X64-LABEL: abd_select_i128:
; X64: # %bb.0:
; X64-NEXT: movq %rdi, %rax
-; X64-NEXT: xorl %edi, %edi
-; X64-NEXT: subq %rdx, %rax
-; X64-NEXT: sbbq %rcx, %rsi
-; X64-NEXT: sbbq %rdi, %rdi
-; X64-NEXT: xorq %rdi, %rsi
-; X64-NEXT: xorq %rdi, %rax
-; X64-NEXT: subq %rdi, %rax
+; X64-NEXT: cmpq %rdx, %rdi
+; X64-NEXT: movq %rsi, %rdi
+; X64-NEXT: sbbq %rcx, %rdi
+; X64-NEXT: movq %rcx, %rdi
+; X64-NEXT: cmovbq %rsi, %rdi
+; X64-NEXT: movq %rdx, %r8
+; X64-NEXT: cmovbq %rax, %r8
+; X64-NEXT: cmovbq %rcx, %rsi
+; X64-NEXT: cmovbq %rdx, %rax
+; X64-NEXT: subq %r8, %rax
; X64-NEXT: sbbq %rdi, %rsi
; X64-NEXT: movq %rsi, %rdx
; X64-NEXT: retq
diff --git a/llvm/test/CodeGen/X86/midpoint-int.ll b/llvm/test/CodeGen/X86/midpoint-int.ll
index 5ffee3fa6bda4b..601166d67f6f27 100644
--- a/llvm/test/CodeGen/X86/midpoint-int.ll
+++ b/llvm/test/CodeGen/X86/midpoint-int.ll
@@ -14,34 +14,37 @@
define i32 @scalar_i32_signed_reg_reg(i32 %a1, i32 %a2) nounwind {
; X64-LABEL: scalar_i32_signed_reg_reg:
; X64: # %bb.0:
-; X64-NEXT: xorl %ecx, %ecx
-; X64-NEXT: movl %edi, %eax
-; X64-NEXT: subl %esi, %eax
-; X64-NEXT: setle %cl
-; X64-NEXT: leal -1(%rcx,%rcx), %ecx
-; X64-NEXT: subl %edi, %esi
-; X64-NEXT: cmovgel %esi, %eax
-; X64-NEXT: shrl %eax
-; X64-NEXT: imull %ecx, %eax
+; X64-NEXT: xorl %eax, %eax
+; X64-NEXT: cmpl %esi, %edi
+; X64-NEXT: setle %al
+; X64-NEXT: leal -1(%rax,%rax), %eax
+; X64-NEXT: movl %edi, %ecx
+; X64-NEXT: cmovgl %esi, %ecx
+; X64-NEXT: cmovgl %edi, %esi
+; X64-NEXT: subl %ecx, %esi
+; X64-NEXT: shrl %esi
+; X64-NEXT: imull %esi, %eax
; X64-NEXT: addl %edi, %eax
; X64-NEXT: retq
;
; X86-LABEL: scalar_i32_signed_reg_reg:
; X86: # %bb.0:
; X86-NEXT: pushl %esi
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
-; X86-NEXT: xorl %eax, %eax
-; X86-NEXT: cmpl %esi, %ecx
-; X86-NEXT: setle %al
-; X86-NEXT: leal -1(%eax,%eax), %edx
+; X86-NEXT: xorl %edx, %edx
+; X86-NEXT: cmpl %eax, %ecx
+; X86-NEXT: setle %dl
+; X86-NEXT: leal -1(%edx,%edx), %edx
+; X86-NEXT: jg .LBB0_1
+; X86-NEXT: # %bb.2:
+; X86-NEXT: movl %ecx, %esi
+; X86-NEXT: jmp .LBB0_3
+; X86-NEXT: .LBB0_1:
+; X86-NEXT: movl %eax, %esi
; X86-NEXT: movl %ecx, %eax
+; X86-NEXT: .LBB0_3:
; X86-NEXT: subl %esi, %eax
-; X86-NEXT: jg .LBB0_2
-; X86-NEXT: # %bb.1:
-; X86-NEXT: subl %ecx, %esi
-; X86-NEXT: movl %esi, %eax
-; X86-NEXT: .LBB0_2:
; X86-NEXT: shrl %eax
; X86-NEXT: imull %edx, %eax
; X86-NEXT: addl %ecx, %eax
@@ -61,41 +64,41 @@ define i32 @scalar_i32_signed_reg_reg(i32 %a1, i32 %a2) nounwind {
define i32 @scalar_i32_unsigned_reg_reg(i32 %a1, i32 %a2) nounwind {
; X64-LABEL: scalar_i32_unsigned_reg_reg:
; X64: # %bb.0:
-; X64-NEXT: xorl %ecx, %ecx
-; X64-NEXT: cmpl %edi, %esi
-; X64-NEXT: sbbl %ecx, %ecx
-; X64-NEXT: movl %edi, %eax
-; X64-NEXT: subl %esi, %eax
-; X64-NEXT: subl %edi, %esi
-; X64-NEXT: cmovael %esi, %eax
-; X64-NEXT: orl $1, %ecx
-; X64-NEXT: shrl %eax
-; X64-NEXT: imull %ecx, %eax
+; X64-NEXT: xorl %eax, %eax
+; X64-NEXT: cmpl %esi, %edi
+; X64-NEXT: setbe %al
+; X64-NEXT: leal -1(%rax,%rax), %eax
+; X64-NEXT: movl %edi, %ecx
+; X64-NEXT: cmoval %esi, %ecx
+; X64-NEXT: cmoval %edi, %esi
+; X64-NEXT: subl %ecx, %esi
+; X64-NEXT: shrl %esi
+; X64-NEXT: imull %esi, %eax
; X64-NEXT: addl %edi, %eax
; X64-NEXT: retq
;
; X86-LABEL: scalar_i32_unsigned_reg_reg:
; X86: # %bb.0:
-; X86-NEXT: pushl %edi
; X86-NEXT: pushl %esi
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
; X86-NEXT: xorl %edx, %edx
-; X86-NEXT: movl %edi, %esi
-; X86-NEXT: subl %ecx, %esi
-; X86-NEXT: sbbl %edx, %edx
-; X86-NEXT: orl $1, %edx
+; X86-NEXT: cmpl %eax, %ecx
+; X86-NEXT: setbe %dl
+; X86-NEXT: leal -1(%edx,%edx), %edx
+; X86-NEXT: ja .LBB1_1
+; X86-NEXT: # %bb.2:
+; X86-NEXT: movl %ecx, %esi
+; X86-NEXT: jmp .LBB1_3
+; X86-NEXT: .LBB1_1:
+; X86-NEXT: movl %eax, %esi
; X86-NEXT: movl %ecx, %eax
-; X86-NEXT: subl %edi, %eax
-; X86-NEXT: ja .LBB1_2
-; X86-NEXT: # %bb.1:
-; X86-NEXT: movl %esi, %eax
-; X86-NEXT: .LBB1_2:
+; X86-NEXT: .LBB1_3:
+; X86-NEXT: subl %esi, %eax
; X86-NEXT: shrl %eax
; X86-NEXT: imull %edx, %eax
; X86-NEXT: addl %ecx, %eax
; X86-NEXT: popl %esi
-; X86-NEXT: popl %edi
; X86-NEXT: retl
%t3 = icmp ugt i32 %a1, %a2
%t4 = select i1 %t3, i32 -1, i32 1
@@ -114,37 +117,40 @@ define i32 @scalar_i32_signed_mem_reg(ptr %a1_addr, i32 %a2) nounwind {
; X64-LABEL: scalar_i32_signed_mem_reg:
; X64: # %bb.0:
; X64-NEXT: movl (%rdi), %ecx
-; X64-NEXT: xorl %edx, %edx
-; X64-NEXT: movl %ecx, %eax
-; X64-NEXT: subl %esi, %eax
-; X64-NEXT: setle %dl
-; X64-NEXT: leal -1(%rdx,%rdx), %edx
-; X64-NEXT: subl %ecx, %esi
-; X64-NEXT: cmovgel %esi, %eax
-; X64-NEXT: shrl %eax
-; X64-NEXT: imull %edx, %eax
+; X64-NEXT: xorl %eax, %eax
+; X64-NEXT: cmpl %esi, %ecx
+; X64-NEXT: setle %al
+; X64-NEXT: leal -1(%rax,%rax), %eax
+; X64-NEXT: movl %ecx, %edx
+; X64-NEXT: cmovgl %esi, %edx
+; X64-NEXT: cmovgl %ecx, %esi
+; X64-NEXT: subl %edx, %esi
+; X64-NEXT: shrl %esi
+; X64-NEXT: imull %esi, %eax
; X64-NEXT: addl %ecx, %eax
; X64-NEXT: retq
;
; X86-LABEL: scalar_i32_signed_mem_reg:
; X86: # %bb.0:
; X86-NEXT: pushl %esi
-; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: movl (%eax), %ecx
-; X86-NEXT: xorl %eax, %eax
-; X86-NEXT: cmpl %edx, %ecx
-; X86-NEXT: setle %al
-; X86-NEXT: leal -1(%eax,%eax), %esi
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: movl (%ecx), %ecx
+; X86-NEXT: xorl %edx, %edx
+; X86-NEXT: cmpl %eax, %ecx
+; X86-NEXT: setle %dl
+; X86-NEXT: leal -1(%edx,%edx), %edx
+; X86-NEXT: jg .LBB2_1
+; X86-NEXT: # %bb.2:
+; X86-NEXT: movl %ecx, %esi
+; X86-NEXT: jmp .LBB2_3
+; X86-NEXT: .LBB2_1:
+; X86-NEXT: movl %eax, %esi
; X86-NEXT: movl %ecx, %eax
-; X86-NEXT: subl %edx, %eax
-; X86-NEXT: jg .LBB2_2
-; X86-NEXT: # %bb.1:
-; X86-NEXT: subl %ecx, %edx
-; X86-NEXT: movl %edx, %eax
-; X86-NEXT: .LBB2_2:
+; X86-NEXT: .LBB2_3:
+; X86-NEXT: subl %esi, %eax
; X86-NEXT: shrl %eax
-; X86-NEXT: imull %esi, %eax
+; X86-NEXT: imull %edx, %eax
; X86-NEXT: addl %ecx, %eax
; X86-NEXT: popl %esi
; X86-NEXT: retl
@@ -165,12 +171,13 @@ define i32 @scalar_i32_signed_reg_mem(i32 %a1, ptr %a2_addr) nounwind {
; X64: # %bb.0:
; X64-NEXT: movl (%rsi), %eax
; X64-NEXT: xorl %ecx, %ecx
-; X64-NEXT: movl %edi, %edx
-; X64-NEXT: subl %eax, %edx
+; X64-NEXT: cmpl %eax, %edi
; X64-NEXT: setle %cl
; X64-NEXT: leal -1(%rcx,%rcx), %ecx
-; X64-NEXT: subl %edi, %eax
-; X64-NEXT: cmovll %edx, %eax
+; X64-NEXT: movl %edi, %edx
+; X64-NEXT: cmovgl %eax, %edx
+; X64-NEXT: cmovgl %edi, %eax
+; X64-NEXT: subl %edx, %eax
; X64-NEXT: shrl %eax
; X64-NEXT: imull %ecx, %eax
; X64-NEXT: addl %edi, %eax
@@ -181,18 +188,20 @@ define i32 @scalar_i32_signed_reg_mem(i32 %a1, ptr %a2_addr) nounwind {
; X86-NEXT: pushl %esi
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: movl (%eax), %esi
-; X86-NEXT: xorl %eax, %eax
-; X86-NEXT: cmpl %esi, %ecx
-; X86-NEXT: setle %al
-; X86-NEXT: leal -1(%eax,%eax), %edx
+; X86-NEXT: movl (%eax), %eax
+; X86-NEXT: xorl %edx, %edx
+; X86-NEXT: cmpl %eax, %ecx
+; X86-NEXT: setle %dl
+; X86-NEXT: leal -1(%edx,%edx), %edx
+; X86-NEXT: jg .LBB3_1
+; X86-NEXT: # %bb.2:
+; X86-NEXT: movl %ecx, %esi
+; X86-NEXT: jmp .LBB3_3
+; X86-NEXT: .LBB3_1:
+; X86-NEXT: movl %eax, %esi
; X86-NEXT: movl %ecx, %eax
+; X86-NEXT: .LBB3_3:
; X86-NEXT: subl %esi, %eax
-; X86-NEXT: jg .LBB3_2
-; X86-NEXT: # %bb.1:
-; X86-NEXT: subl %ecx, %esi
-; X86-NEXT: movl %esi, %eax
-; X86-NEXT: .LBB3_2:
; X86-NEXT: shrl %eax
; X86-NEXT: imull %edx, %eax
; X86-NEXT: addl %ecx, %eax
@@ -216,12 +225,13 @@ define i32 @scalar_i32_signed_mem_mem(ptr %a1_addr, ptr %a2_addr) nounwind {
; X64-NEXT: movl (%rdi), %ecx
; X64-NEXT: movl (%rsi), %eax
; X64-NEXT: xorl %edx, %edx
-; X64-NEXT: movl %ecx, %esi
-; X64-NEXT: subl %eax, %esi
+; X64-NEXT: cmpl %eax, %ecx
; X64-NEXT: setle %dl
; X64-NEXT: leal -1(%rdx,%rdx), %edx
-; X64-NEXT: subl %ecx, %eax
-; X64-NEXT: cmovll %esi, %eax
+; X64-NEXT: movl %ecx, %esi
+; X64-NEXT: cmovgl %eax, %esi
+; X64-NEXT: cmovgl %ecx, %eax
+; X64-NEXT: subl %esi, %eax
; X64-NEXT: shrl %eax
; X64-NEXT: imull %edx, %eax
; X64-NEXT: addl %ecx, %eax
@@ -233,18 +243,20 @@ define i32 @scalar_i32_signed_mem_mem(ptr %a1_addr, ptr %a2_addr) nounwind {
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movl (%ecx), %ecx
-; X86-NEXT: movl (%eax), %esi
-; X86-NEXT: xorl %eax, %eax
-; X86-NEXT: cmpl %esi, %ecx
-; X86-NEXT: setle %al
-; X86-NEXT: leal -1(%eax,%eax), %edx
+; X86-NEXT: movl (%eax), %eax
+; X86-NEXT: xorl %edx, %edx
+; X86-NEXT: cmpl %eax, %ecx
+; X86-NEXT: setle %dl
+; X86-NEXT: leal -1(%edx,%edx), %edx
+; X86-NEXT: jg .LBB4_1
+; X86-NEXT: # %bb.2:
+; X86-NEXT: movl %ecx, %esi
+; X86-NEXT: jmp .LBB4_3
+; X86-NEXT: .LBB4_1:
+; X86-NEXT: movl %eax, %esi
; X86-NEXT: movl %ecx, %eax
+; X86-NEXT: .LBB4_3:
; X86-NEXT: subl %esi, %eax
-; X86-NEXT: jg .LBB4_2
-; X86-NEXT: # %bb.1:
-; X86-NEXT: subl %ecx, %esi
-; X86-NEXT: movl %esi, %eax
-; X86-NEXT: .LBB4_2:
; X86-NEXT: shrl %eax
; X86-NEXT: imull %edx, %eax
; X86-NEXT: addl %ecx, %eax
@@ -272,15 +284,16 @@ define i32 @scalar_i32_signed_mem_mem(ptr %a1_addr, ptr %a2_addr) nounwind {
define i64 @scalar_i64_signed_reg_reg(i64 %a1, i64 %a2) nounwind {
; X64-LABEL: scalar_i64_signed_reg_reg:
; X64: # %bb.0:
-; X64-NEXT: xorl %ecx, %ecx
-; X64-NEXT: movq %rdi, %rax
-; X64-NEXT: subq %rsi, %rax
-; X64-NEXT: setle %cl
-; X64-NEXT: leaq -1(%rcx,%rcx), %rcx
-; X64-NEXT: subq %rdi, %rsi
-; X64-NEXT: cmovgeq %rsi, %rax
-; X64-NEXT: shrq %rax
-; X64-NEXT: imulq %rcx, %rax
+; X64-NEXT: xorl %eax, %eax
+; X64-NEXT: cmpq %rsi, %rdi
+; X64-NEXT: setle %al
+; X64-NEXT: leaq -1(%rax,%rax), %rax
+; X64-NEXT: movq %rdi, %rcx
+; X64-NEXT: cmovgq %rsi, %rcx
+; X64-NEXT: cmovgq %rdi, %rsi
+; X64-NEXT: subq %rcx, %rsi
+; X64-NEXT: shrq %rsi
+; X64-NEXT: imulq %rsi, %rax
; X64-NEXT: addq %rdi, %rax
; X64-NEXT: retq
;
@@ -290,37 +303,41 @@ define i64 @scalar_i64_signed_reg_reg(i64 %a1, i64 %a2) nounwind {
; X86-NEXT: pushl %ebx
; X86-NEXT: pushl %edi
; X86-NEXT: pushl %esi
-; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
-; X86-NEXT: cmpl %esi, %edx
-; X86-NEXT: movl %ecx, %eax
-; X86-NEXT: sbbl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: setl %al
-; X86-NEXT: movzbl %al, %edi
-; X86-NEXT: negl %edi
-; X86-NEXT: movl %edi, %ebx
-; X86-NEXT: orl $1, %ebx
-; X86-NEXT: movl %esi, %eax
-; X86-NEXT: subl %edx, %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %ebp
-; X86-NEXT: sbbl %ecx, %ebp
-; X86-NEXT: subl %esi, %edx
-; X86-NEXT: sbbl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: jl .LBB5_2
-; X86-NEXT: # %bb.1:
-; X86-NEXT: movl %edx, %eax
-; X86-NEXT: movl %ecx, %ebp
-; X86-NEXT: .LBB5_2:
-; X86-NEXT: shrdl $1, %ebp, %eax
-; X86-NEXT: shrl %ebp
-; X86-NEXT: imull %eax, %edi
-; X86-NEXT: mull %ebx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
+; X86-NEXT: cmpl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movl %edi, %edx
+; X86-NEXT: sbbl %ebp, %edx
+; X86-NEXT: setl %dl
+; X86-NEXT: movzbl %dl, %ebx
+; X86-NEXT: jl .LBB5_1
+; X86-NEXT: # %bb.2:
+; X86-NEXT: movl %ebp, %ecx
+; X86-NEXT: movl %ebp, %edx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
+; X86-NEXT: jmp .LBB5_3
+; X86-NEXT: .LBB5_1:
+; X86-NEXT: movl %edi, %edx
+; X86-NEXT: movl %eax, %esi
+; X86-NEXT: movl %ebp, %ecx
+; X86-NEXT: movl %ebp, %edi
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: .LBB5_3:
+; X86-NEXT: negl %ebx
+; X86-NEXT: movl %ebx, %ebp
+; X86-NEXT: orl $1, %ebp
+; X86-NEXT: subl %esi, %eax
+; X86-NEXT: sbbl %edx, %edi
+; X86-NEXT: shrdl $1, %edi, %eax
+; X86-NEXT: imull %eax, %ebx
+; X86-NEXT: mull %ebp
+; X86-NEXT: addl %ebx, %edx
+; X86-NEXT: shrl %edi
+; X86-NEXT: imull %ebp, %edi
; X86-NEXT: addl %edi, %edx
-; X86-NEXT: imull %ebx, %ebp
-; X86-NEXT: addl %ebp, %edx
; X86-NEXT: addl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: adcl {{[0-9]+}}(%esp), %edx
+; X86-NEXT: adcl %ecx, %edx
; X86-NEXT: popl %esi
; X86-NEXT: popl %edi
; X86-NEXT: popl %ebx
@@ -340,16 +357,16 @@ define i64 @scalar_i64_signed_reg_reg(i64 %a1, i64 %a2) nounwind {
define i64 @scalar_i64_unsigned_reg_reg(i64 %a1, i64 %a2) nounwind {
; X64-LABEL: scalar_i64_unsigned_reg_reg:
; X64: # %bb.0:
-; X64-NEXT: xorl %ecx, %ecx
-; X64-NEXT: cmpq %rdi, %rsi
-; X64-NEXT: sbbq %rcx, %rcx
-; X64-NEXT: movq %rdi, %rax
-; X64-NEXT: subq %rsi, %rax
-; X64-NEXT: subq %rdi, %rsi
-; X64-NEXT: cmovaeq %rsi, %rax
-; X64-NEXT: orq $1, %rcx
-; X64-NEXT: shrq %rax
-; X64-NEXT: imulq %rcx, %rax
+; X64-NEXT: xorl %eax, %eax
+; X64-NEXT: cmpq %rsi, %rdi
+; X64-NEXT: setbe %al
+; X64-NEXT: leaq -1(%rax,%rax), %rax
+; X64-NEXT: movq %rdi, %rcx
+; X64-NEXT: cmovaq %rsi, %rcx
+; X64-NEXT: cmovaq %rdi, %rsi
+; X64-NEXT: subq %rcx, %rsi
+; X64-NEXT: shrq %rsi
+; X64-NEXT: imulq %rsi, %rax
; X64-NEXT: addq %rdi, %rax
; X64-NEXT: retq
;
@@ -360,34 +377,39 @@ define i64 @scalar_i64_unsigned_reg_reg(i64 %a1, i64 %a2) nounwind {
; X86-NEXT: pushl %edi
; X86-NEXT: pushl %esi
; X86-NEXT: movl {{[0-9]+}}(%esp), %ebp
-; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
-; X86-NEXT: xorl %edx, %edx
-; X86-NEXT: cmpl %eax, %ebp
-; X86-NEXT: sbbl %edi, %esi
-; X86-NEXT: movl %edi, %ecx
-; X86-NEXT: movl $0, %ebx
+; X86-NEXT: xorl %ebx, %ebx
+; X86-NEXT: cmpl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movl %edi, %edx
+; X86-NEXT: sbbl %ebp, %edx
+; X86-NEXT: setb %dl
; X86-NEXT: sbbl %ebx, %ebx
-; X86-NEXT: movl %ebx, %edi
-; X86-NEXT: orl $1, %edi
+; X86-NEXT: testb %dl, %dl
+; X86-NEXT: jne .LBB6_1
+; X86-NEXT: # %bb.2:
+; X86-NEXT: movl %ebp, %ecx
+; X86-NEXT: movl %ebp, %edx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
+; X86-NEXT: jmp .LBB6_3
+; X86-NEXT: .LBB6_1:
+; X86-NEXT: movl %edi, %edx
; X86-NEXT: movl %eax, %esi
-; X86-NEXT: subl %ebp, %esi
-; X86-NEXT: movl %ecx, %eax
-; X86-NEXT: sbbl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: sbbl %edx, %edx
-; X86-NEXT: xorl %edx, %eax
-; X86-NEXT: xorl %edx, %esi
-; X86-NEXT: subl %edx, %esi
-; X86-NEXT: sbbl %edx, %eax
-; X86-NEXT: movl %eax, %ebp
-; X86-NEXT: shldl $31, %esi, %eax
+; X86-NEXT: movl %ebp, %ecx
+; X86-NEXT: movl %ebp, %edi
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: .LBB6_3:
+; X86-NEXT: movl %ebx, %ebp
+; X86-NEXT: orl $1, %ebp
+; X86-NEXT: subl %esi, %eax
+; X86-NEXT: sbbl %edx, %edi
+; X86-NEXT: shrdl $1, %edi, %eax
; X86-NEXT: imull %eax, %ebx
-; X86-NEXT: mull %edi
+; X86-NEXT: mull %ebp
; X86-NEXT: addl %ebx, %edx
-; X86-NEXT: shrl %ebp
-; X86-NEXT: imull %edi, %ebp
-; X86-NEXT: addl %ebp, %edx
+; X86-NEXT: shrl %edi
+; X86-NEXT: imull %ebp, %edi
+; X86-NEXT: addl %edi, %edx
; X86-NEXT: addl {{[0-9]+}}(%esp), %eax
; X86-NEXT: adcl %ecx, %edx
; X86-NEXT: popl %esi
@@ -412,15 +434,16 @@ define i64 @scalar_i64_signed_mem_reg(ptr %a1_addr, i64 %a2) nounwind {
; X64-LABEL: scalar_i64_signed_mem_reg:
; X64: # %bb.0:
; X64-NEXT: movq (%rdi), %rcx
-; X64-NEXT: xorl %edx, %edx
-; X64-NEXT: movq %rcx, %rax
-; X64-NEXT: subq %rsi, %rax
-; X64-NEXT: setle %dl
-; X64-NEXT: leaq -1(%rdx,%rdx), %rdx
-; X64-NEXT: subq %rcx, %rsi
-; X64-NEXT: cmovgeq %rsi, %rax
-; X64-NEXT: shrq %rax
-; X64-NEXT: imulq %rdx, %rax
+; X64-NEXT: xorl %eax, %eax
+; X64-NEXT: cmpq %rsi, %rcx
+; X64-NEXT: setle %al
+; X64-NEXT: leaq -1(%rax,%rax), %rax
+; X64-NEXT: movq %rcx, %rdx
+; X64-NEXT: cmovgq %rsi, %rdx
+; X64-NEXT: cmovgq %rcx, %rsi
+; X64-NEXT: subq %rdx, %rsi
+; X64-NEXT: shrq %rsi
+; X64-NEXT: imulq %rsi, %rax
; X64-NEXT: addq %rcx, %rax
; X64-NEXT: retq
;
@@ -430,45 +453,43 @@ define i64 @scalar_i64_signed_mem_reg(ptr %a1_addr, i64 %a2) nounwind {
; X86-NEXT: pushl %ebx
; X86-NEXT: pushl %edi
; X86-NEXT: pushl %esi
-; X86-NEXT: subl $12, %esp
-; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: pushl %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: movl (%eax), %ebx
-; X86-NEXT: movl 4(%eax), %esi
-; X86-NEXT: cmpl %ebx, %edx
-; X86-NEXT: movl %ecx, %eax
-; X86-NEXT: sbbl %esi, %eax
-; X86-NEXT: setl %al
-; X86-NEXT: movzbl %al, %edi
-; X86-NEXT: negl %edi
-; X86-NEXT: movl %edi, %eax
-; X86-NEXT: orl $1, %eax
-; X86-NEXT: movl %eax, (%esp) # 4-byte Spill
-; X86-NEXT: movl %ebx, %eax
+; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: movl (%ecx), %esi
+; X86-NEXT: movl 4(%ecx), %ecx
+; X86-NEXT: cmpl %esi, %eax
+; X86-NEXT: movl %edi, %edx
+; X86-NEXT: sbbl %ecx, %edx
+; X86-NEXT: setl %dl
+; X86-NEXT: movzbl %dl, %ebx
+; X86-NEXT: jl .LBB7_1
+; X86-NEXT: # %bb.2:
+; X86-NEXT: movl %ecx, (%esp) # 4-byte Spill
+; X86-NEXT: movl %esi, %edx
+; X86-NEXT: jmp .LBB7_3
+; X86-NEXT: .LBB7_1:
+; X86-NEXT: movl %edi, (%esp) # 4-byte Spill
+; X86-NEXT: movl %eax, %edx
+; X86-NEXT: movl %ecx, %edi
+; X86-NEXT: movl %esi, %eax
+; X86-NEXT: .LBB7_3:
+; X86-NEXT: negl %ebx
+; X86-NEXT: movl %ebx, %ebp
+; X86-NEXT: orl $1, %ebp
; X86-NEXT: subl %edx, %eax
-; X86-NEXT: movl %esi, %ebp
-; X86-NEXT: sbbl %ecx, %ebp
-; X86-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: subl %ebx, %edx
-; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: sbbl %esi, %ecx
-; X86-NEXT: jl .LBB7_2
-; X86-NEXT: # %bb.1:
-; X86-NEXT: movl %edx, %eax
-; X86-NEXT: movl %ecx, %ebp
-; X86-NEXT: .LBB7_2:
-; X86-NEXT: shrdl $1, %ebp, %eax
-; X86-NEXT: shrl %ebp
-; X86-NEXT: imull %eax, %edi
-; X86-NEXT: movl (%esp), %ecx # 4-byte Reload
-; X86-NEXT: mull %ecx
+; X86-NEXT: sbbl (%esp), %edi # 4-byte Folded Reload
+; X86-NEXT: shrdl $1, %edi, %eax
+; X86-NEXT: imull %eax, %ebx
+; X86-NEXT: mull %ebp
+; X86-NEXT: addl %ebx, %edx
+; X86-NEXT: shrl %edi
+; X86-NEXT: imull %ebp, %edi
; X86-NEXT: addl %edi, %edx
-; X86-NEXT: imull %ecx, %ebp
-; X86-NEXT: addl %ebp, %edx
-; X86-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
-; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
-; X86-NEXT: addl $12, %esp
+; X86-NEXT: addl %esi, %eax
+; X86-NEXT: adcl %ecx, %edx
+; X86-NEXT: addl $4, %esp
; X86-NEXT: popl %esi
; X86-NEXT: popl %edi
; X86-NEXT: popl %ebx
@@ -491,12 +512,13 @@ define i64 @scalar_i64_signed_reg_mem(i64 %a1, ptr %a2_addr) nounwind {
; X64: # %bb.0:
; X64-NEXT: movq (%rsi), %rax
; X64-NEXT: xorl %ecx, %ecx
-; X64-NEXT: movq %rdi, %rdx
-; X64-NEXT: subq %rax, %rdx
+; X64-NEXT: cmpq %rax, %rdi
; X64-NEXT: setle %cl
; X64-NEXT: leaq -1(%rcx,%rcx), %rcx
-; X64-NEXT: subq %rdi, %rax
-; X64-NEXT: cmovlq %rdx, %rax
+; X64-NEXT: movq %rdi, %rdx
+; X64-NEXT: cmovgq %rax, %rdx
+; X64-NEXT: cmovgq %rdi, %rax
+; X64-NEXT: subq %rdx, %rax
; X64-NEXT: shrq %rax
; X64-NEXT: imulq %rcx, %rax
; X64-NEXT: addq %rdi, %rax
@@ -508,38 +530,42 @@ define i64 @scalar_i64_signed_reg_mem(i64 %a1, ptr %a2_addr) nounwind {
; X86-NEXT: pushl %ebx
; X86-NEXT: pushl %edi
; X86-NEXT: pushl %esi
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ebp
+; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
+; X86-NEXT: movl (%edx), %eax
+; X86-NEXT: movl 4(%edx), %edi
+; X86-NEXT: cmpl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movl %edi, %edx
+; X86-NEXT: sbbl %ebp, %edx
+; X86-NEXT: setl %dl
+; X86-NEXT: movzbl %dl, %ebx
+; X86-NEXT: jl .LBB8_1
+; X86-NEXT: # %bb.2:
+; X86-NEXT: movl %ebp, %ecx
+; X86-NEXT: movl %ebp, %edx
; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
+; X86-NEXT: jmp .LBB8_3
+; X86-NEXT: .LBB8_1:
+; X86-NEXT: movl %edi, %edx
+; X86-NEXT: movl %eax, %esi
+; X86-NEXT: movl %ebp, %ecx
+; X86-NEXT: movl %ebp, %edi
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: movl (%eax), %edx
-; X86-NEXT: movl 4(%eax), %ecx
-; X86-NEXT: cmpl %esi, %edx
-; X86-NEXT: movl %ecx, %eax
-; X86-NEXT: sbbl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: setl %al
-; X86-NEXT: movzbl %al, %edi
-; X86-NEXT: negl %edi
-; X86-NEXT: movl %edi, %ebx
-; X86-NEXT: orl $1, %ebx
-; X86-NEXT: movl %esi, %eax
-; X86-NEXT: subl %edx, %eax
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ebp
-; X86-NEXT: sbbl %ecx, %ebp
-; X86-NEXT: subl %esi, %edx
-; X86-NEXT: sbbl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: jl .LBB8_2
-; X86-NEXT: # %bb.1:
-; X86-NEXT: movl %edx, %eax
-; X86-NEXT: movl %ecx, %ebp
-; X86-NEXT: .LBB8_2:
-; X86-NEXT: shrdl $1, %ebp, %eax
-; X86-NEXT: shrl %ebp
-; X86-NEXT: imull %eax, %edi
-; X86-NEXT: mull %ebx
+; X86-NEXT: .LBB8_3:
+; X86-NEXT: negl %ebx
+; X86-NEXT: movl %ebx, %ebp
+; X86-NEXT: orl $1, %ebp
+; X86-NEXT: subl %esi, %eax
+; X86-NEXT: sbbl %edx, %edi
+; X86-NEXT: shrdl $1, %edi, %eax
+; X86-NEXT: imull %eax, %ebx
+; X86-NEXT: mull %ebp
+; X86-NEXT: addl %ebx, %edx
+; X86-NEXT: shrl %edi
+; X86-NEXT: imull %ebp, %edi
; X86-NEXT: addl %edi, %edx
-; X86-NEXT: imull %ebx, %ebp
-; X86-NEXT: addl %ebp, %edx
; X86-NEXT: addl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: adcl {{[0-9]+}}(%esp), %edx
+; X86-NEXT: adcl %ecx, %edx
; X86-NEXT: popl %esi
; X86-NEXT: popl %edi
; X86-NEXT: popl %ebx
@@ -563,12 +589,13 @@ define i64 @scalar_i64_signed_mem_mem(ptr %a1_addr, ptr %a2_addr) nounwind {
; X64-NEXT: movq (%rdi), %rcx
; X64-NEXT: movq (%rsi), %rax
; X64-NEXT: xorl %edx, %edx
-; X64-NEXT: movq %rcx, %rsi
-; X64-NEXT: subq %rax, %rsi
+; X64-NEXT: cmpq %rax, %rcx
; X64-NEXT: setle %dl
; X64-NEXT: leaq -1(%rdx,%rdx), %rdx
-; X64-NEXT: subq %rcx, %rax
-; X64-NEXT: cmovlq %rsi, %rax
+; X64-NEXT: movq %rcx, %rsi
+; X64-NEXT: cmovgq %rax, %rsi
+; X64-NEXT: cmovgq %rcx, %rax
+; X64-NEXT: subq %rsi, %rax
; X64-NEXT: shrq %rax
; X64-NEXT: imulq %rdx, %rax
; X64-NEXT: addq %rcx, %rax
@@ -580,46 +607,44 @@ define i64 @scalar_i64_signed_mem_mem(ptr %a1_addr, ptr %a2_addr) nounwind {
; X86-NEXT: pushl %ebx
; X86-NEXT: pushl %edi
; X86-NEXT: pushl %esi
-; X86-NEXT: subl $12, %esp
+; X86-NEXT: pushl %eax
+; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: movl (%ecx), %ebx
-; X86-NEXT: movl 4(%ecx), %esi
-; X86-NEXT: movl (%eax), %edx
+; X86-NEXT: movl (%eax), %esi
; X86-NEXT: movl 4(%eax), %ecx
-; X86-NEXT: cmpl %ebx, %edx
-; X86-NEXT: movl %ecx, %eax
-; X86-NEXT: sbbl %esi, %eax
-; X86-NEXT: setl %al
-; X86-NEXT: movzbl %al, %edi
-; X86-NEXT: negl %edi
-; X86-NEXT: movl %edi, %eax
-; X86-NEXT: orl $1, %eax
-; X86-NEXT: movl %eax, (%esp) # 4-byte Spill
-; X86-NEXT: movl %ebx, %eax
+; X86-NEXT: movl (%edx), %eax
+; X86-NEXT: movl 4(%edx), %edi
+; X86-NEXT: cmpl %esi, %eax
+; X86-NEXT: movl %edi, %edx
+; X86-NEXT: sbbl %ecx, %edx
+; X86-NEXT: setl %dl
+; X86-NEXT: movzbl %dl, %ebx
+; X86-NEXT: jl .LBB9_1
+; X86-NEXT: # %bb.2:
+; X86-NEXT: movl %ecx, (%esp) # 4-byte Spill
+; X86-NEXT: movl %esi, %edx
+; X86-NEXT: jmp .LBB9_3
+; X86-NEXT: .LBB9_1:
+; X86-NEXT: movl %edi, (%esp) # 4-byte Spill
+; X86-NEXT: movl %eax, %edx
+; X86-NEXT: movl %ecx, %edi
+; X86-NEXT: movl %esi, %eax
+; X86-NEXT: .LBB9_3:
+; X86-NEXT: negl %ebx
+; X86-NEXT: movl %ebx, %ebp
+; X86-NEXT: orl $1, %ebp
; X86-NEXT: subl %edx, %eax
-; X86-NEXT: movl %esi, %ebp
-; X86-NEXT: sbbl %ecx, %ebp
-; X86-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: subl %ebx, %edx
-; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: sbbl %esi, %ecx
-; X86-NEXT: jl .LBB9_2
-; X86-NEXT: # %bb.1:
-; X86-NEXT: movl %edx, %eax
-; X86-NEXT: movl %ecx, %ebp
-; X86-NEXT: .LBB9_2:
-; X86-NEXT: shrdl $1, %ebp, %eax
-; X86-NEXT: shrl %ebp
-; X86-NEXT: imull %eax, %edi
-; X86-NEXT: movl (%esp), %ecx # 4-byte Reload
-; X86-NEXT: mull %ecx
+; X86-NEXT: sbbl (%esp), %edi # 4-byte Folded Reload
+; X86-NEXT: shrdl $1, %edi, %eax
+; X86-NEXT: imull %eax, %ebx
+; X86-NEXT: mull %ebp
+; X86-NEXT: addl %ebx, %edx
+; X86-NEXT: shrl %edi
+; X86-NEXT: imull %ebp, %edi
; X86-NEXT: addl %edi, %edx
-; X86-NEXT: imull %ecx, %ebp
-; X86-NEXT: addl %ebp, %edx
-; X86-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
-; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
-; X86-NEXT: addl $12, %esp
+; X86-NEXT: addl %esi, %eax
+; X86-NEXT: adcl %ecx, %edx
+; X86-NEXT: addl $4, %esp
; X86-NEXT: popl %esi
; X86-NEXT: popl %edi
; X86-NEXT: popl %ebx
@@ -652,11 +677,9 @@ define i16 @scalar_i16_signed_reg_reg(i16 %a1, i16 %a2) nounwind {
; X64-NEXT: setle %al
; X64-NEXT: leal -1(%rax,%rax), %ecx
; X64-NEXT: movl %edi, %eax
-; X64-NEXT: subl %esi, %eax
-; X64-NEXT: movswl %di, %edx
-; X64-NEXT: movswl %si, %esi
-; X64-NEXT: subl %edx, %esi
-; X64-NEXT: cmovll %eax, %esi
+; X64-NEXT: cmovgl %esi, %eax
+; X64-NEXT: cmovgl %edi, %esi
+; X64-NEXT: subl %eax, %esi
; X64-NEXT: movzwl %si, %eax
; X64-NEXT: shrl %eax
; X64-NEXT: imull %ecx, %eax
@@ -666,25 +689,28 @@ define i16 @scalar_i16_signed_reg_reg(i16 %a1, i16 %a2) nounwind {
;
; X86-LABEL: scalar_i16_signed_reg_reg:
; X86: # %bb.0:
-; X86-NEXT: pushl %ebx
-; X86-NEXT: movzwl {{[0-9]+}}(%esp), %edx
+; X86-NEXT: pushl %esi
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: xorl %edx, %edx
+; X86-NEXT: cmpw %ax, %cx
+; X86-NEXT: setle %dl
+; X86-NEXT: leal -1(%edx,%edx), %edx
+; X86-NEXT: jg .LBB10_1
+; X86-NEXT: # %bb.2:
+; X86-NEXT: movl %ecx, %esi
+; X86-NEXT: jmp .LBB10_3
+; X86-NEXT: .LBB10_1:
+; X86-NEXT: movl %eax, %esi
; X86-NEXT: movl %ecx, %eax
-; X86-NEXT: subw %dx, %ax
-; X86-NEXT: jg .LBB10_2
-; X86-NEXT: # %bb.1:
-; X86-NEXT: negl %eax
-; X86-NEXT: .LBB10_2:
-; X86-NEXT: xorl %ebx, %ebx
-; X86-NEXT: cmpw %dx, %cx
-; X86-NEXT: setle %bl
-; X86-NEXT: leal -1(%ebx,%ebx), %edx
+; X86-NEXT: .LBB10_3:
+; X86-NEXT: subl %esi, %eax
; X86-NEXT: movzwl %ax, %eax
; X86-NEXT: shrl %eax
; X86-NEXT: imull %edx, %eax
; X86-NEXT: addl %ecx, %eax
; X86-NEXT: # kill: def $ax killed $ax killed $eax
-; X86-NEXT: popl %ebx
+; X86-NEXT: popl %esi
; X86-NEXT: retl
%t3 = icmp sgt i16 %a1, %a2 ; signed
%t4 = select i1 %t3, i16 -1, i16 1
@@ -700,16 +726,14 @@ define i16 @scalar_i16_signed_reg_reg(i16 %a1, i16 %a2) nounwind {
define i16 @scalar_i16_unsigned_reg_reg(i16 %a1, i16 %a2) nounwind {
; X64-LABEL: scalar_i16_unsigned_reg_reg:
; X64: # %bb.0:
-; X64-NEXT: xorl %ecx, %ecx
-; X64-NEXT: cmpw %di, %si
-; X64-NEXT: sbbl %ecx, %ecx
-; X64-NEXT: orl $1, %ecx
+; X64-NEXT: xorl %eax, %eax
+; X64-NEXT: cmpw %si, %di
+; X64-NEXT: setbe %al
+; X64-NEXT: leal -1(%rax,%rax), %ecx
; X64-NEXT: movl %edi, %eax
-; X64-NEXT: subl %esi, %eax
-; X64-NEXT: movzwl %di, %edx
-; X64-NEXT: movzwl %si, %esi
-; X64-NEXT: subl %edx, %esi
-; X64-NEXT: cmovbl %eax, %esi
+; X64-NEXT: cmoval %esi, %eax
+; X64-NEXT: cmoval %edi, %esi
+; X64-NEXT: subl %eax, %esi
; X64-NEXT: movzwl %si, %eax
; X64-NEXT: shrl %eax
; X64-NEXT: imull %ecx, %eax
@@ -720,21 +744,24 @@ define i16 @scalar_i16_unsigned_reg_reg(i16 %a1, i16 %a2) nounwind {
; X86-LABEL: scalar_i16_unsigned_reg_reg:
; X86: # %bb.0:
; X86-NEXT: pushl %esi
-; X86-NEXT: movzwl {{[0-9]+}}(%esp), %edx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: xorl %edx, %edx
+; X86-NEXT: cmpw %ax, %cx
+; X86-NEXT: setbe %dl
+; X86-NEXT: leal -1(%edx,%edx), %edx
+; X86-NEXT: ja .LBB11_1
+; X86-NEXT: # %bb.2:
+; X86-NEXT: movl %ecx, %esi
+; X86-NEXT: jmp .LBB11_3
+; X86-NEXT: .LBB11_1:
+; X86-NEXT: movl %eax, %esi
; X86-NEXT: movl %ecx, %eax
-; X86-NEXT: subw %dx, %ax
-; X86-NEXT: ja .LBB11_2
-; X86-NEXT: # %bb.1:
-; X86-NEXT: negl %eax
-; X86-NEXT: .LBB11_2:
-; X86-NEXT: xorl %esi, %esi
-; X86-NEXT: cmpw %cx, %dx
-; X86-NEXT: sbbl %esi, %esi
-; X86-NEXT: orl $1, %esi
+; X86-NEXT: .LBB11_3:
+; X86-NEXT: subl %esi, %eax
; X86-NEXT: movzwl %ax, %eax
; X86-NEXT: shrl %eax
-; X86-NEXT: imull %esi, %eax
+; X86-NEXT: imull %edx, %eax
; X86-NEXT: addl %ecx, %eax
; X86-NEXT: # kill: def $ax killed $ax killed $eax
; X86-NEXT: popl %esi
@@ -755,16 +782,15 @@ define i16 @scalar_i16_unsigned_reg_reg(i16 %a1, i16 %a2) nounwind {
define i16 @scalar_i16_signed_mem_reg(ptr %a1_addr, i16 %a2) nounwind {
; X64-LABEL: scalar_i16_signed_mem_reg:
; X64: # %bb.0:
-; X64-NEXT: movswl (%rdi), %ecx
+; X64-NEXT: movzwl (%rdi), %ecx
; X64-NEXT: xorl %eax, %eax
; X64-NEXT: cmpw %si, %cx
; X64-NEXT: setle %al
; X64-NEXT: leal -1(%rax,%rax), %edx
; X64-NEXT: movl %ecx, %eax
-; X64-NEXT: subl %esi, %eax
-; X64-NEXT: movswl %si, %esi
-; X64-NEXT: subl %ecx, %esi
-; X64-NEXT: cmovll %eax, %esi
+; X64-NEXT: cmovgl %esi, %eax
+; X64-NEXT: cmovgl %ecx, %esi
+; X64-NEXT: subl %eax, %esi
; X64-NEXT: movzwl %si, %eax
; X64-NEXT: shrl %eax
; X64-NEXT: imull %edx, %eax
@@ -774,26 +800,29 @@ define i16 @scalar_i16_signed_mem_reg(ptr %a1_addr, i16 %a2) nounwind {
;
; X86-LABEL: scalar_i16_signed_mem_reg:
; X86: # %bb.0:
-; X86-NEXT: pushl %ebx
-; X86-NEXT: movzwl {{[0-9]+}}(%esp), %edx
+; X86-NEXT: pushl %esi
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: movzwl (%eax), %ecx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: movzwl (%ecx), %ecx
+; X86-NEXT: xorl %edx, %edx
+; X86-NEXT: cmpw %ax, %cx
+; X86-NEXT: setle %dl
+; X86-NEXT: leal -1(%edx,%edx), %edx
+; X86-NEXT: jg .LBB12_1
+; X86-NEXT: # %bb.2:
+; X86-NEXT: movl %ecx, %esi
+; X86-NEXT: jmp .LBB12_3
+; X86-NEXT: .LBB12_1:
+; X86-NEXT: movl %eax, %esi
; X86-NEXT: movl %ecx, %eax
-; X86-NEXT: subw %dx, %ax
-; X86-NEXT: jg .LBB12_2
-; X86-NEXT: # %bb.1:
-; X86-NEXT: negl %eax
-; X86-NEXT: .LBB12_2:
-; X86-NEXT: xorl %ebx, %ebx
-; X86-NEXT: cmpw %dx, %cx
-; X86-NEXT: setle %bl
-; X86-NEXT: leal -1(%ebx,%ebx), %edx
+; X86-NEXT: .LBB12_3:
+; X86-NEXT: subl %esi, %eax
; X86-NEXT: movzwl %ax, %eax
; X86-NEXT: shrl %eax
; X86-NEXT: imull %edx, %eax
; X86-NEXT: addl %ecx, %eax
; X86-NEXT: # kill: def $ax killed $ax killed $eax
-; X86-NEXT: popl %ebx
+; X86-NEXT: popl %esi
; X86-NEXT: retl
%a1 = load i16, ptr %a1_addr
%t3 = icmp sgt i16 %a1, %a2 ; signed
@@ -810,16 +839,15 @@ define i16 @scalar_i16_signed_mem_reg(ptr %a1_addr, i16 %a2) nounwind {
define i16 @scalar_i16_signed_reg_mem(i16 %a1, ptr %a2_addr) nounwind {
; X64-LABEL: scalar_i16_signed_reg_mem:
; X64: # %bb.0:
-; X64-NEXT: movswl (%rsi), %eax
+; X64-NEXT: movzwl (%rsi), %eax
; X64-NEXT: xorl %ecx, %ecx
; X64-NEXT: cmpw %ax, %di
; X64-NEXT: setle %cl
; X64-NEXT: leal -1(%rcx,%rcx), %ecx
; X64-NEXT: movl %edi, %edx
-; X64-NEXT: subl %eax, %edx
-; X64-NEXT: movswl %di, %esi
-; X64-NEXT: subl %esi, %eax
-; X64-NEXT: cmovll %edx, %eax
+; X64-NEXT: cmovgl %eax, %edx
+; X64-NEXT: cmovgl %edi, %eax
+; X64-NEXT: subl %edx, %eax
; X64-NEXT: movzwl %ax, %eax
; X64-NEXT: shrl %eax
; X64-NEXT: imull %ecx, %eax
@@ -829,26 +857,29 @@ define i16 @scalar_i16_signed_reg_mem(i16 %a1, ptr %a2_addr) nounwind {
;
; X86-LABEL: scalar_i16_signed_reg_mem:
; X86: # %bb.0:
-; X86-NEXT: pushl %ebx
+; X86-NEXT: pushl %esi
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: movzwl (%eax), %edx
+; X86-NEXT: movzwl (%eax), %eax
+; X86-NEXT: xorl %edx, %edx
+; X86-NEXT: cmpw %ax, %cx
+; X86-NEXT: setle %dl
+; X86-NEXT: leal -1(%edx,%edx), %edx
+; X86-NEXT: jg .LBB13_1
+; X86-NEXT: # %bb.2:
+; X86-NEXT: movl %ecx, %esi
+; X86-NEXT: jmp .LBB13_3
+; X86-NEXT: .LBB13_1:
+; X86-NEXT: movl %eax, %esi
; X86-NEXT: movl %ecx, %eax
-; X86-NEXT: subw %dx, %ax
-; X86-NEXT: jg .LBB13_2
-; X86-NEXT: # %bb.1:
-; X86-NEXT: negl %eax
-; X86-NEXT: .LBB13_2:
-; X86-NEXT: xorl %ebx, %ebx
-; X86-NEXT: cmpw %dx, %cx
-; X86-NEXT: setle %bl
-; X86-NEXT: leal -1(%ebx,%ebx), %edx
+; X86-NEXT: .LBB13_3:
+; X86-NEXT: subl %esi, %eax
; X86-NEXT: movzwl %ax, %eax
; X86-NEXT: shrl %eax
; X86-NEXT: imull %edx, %eax
; X86-NEXT: addl %ecx, %eax
; X86-NEXT: # kill: def $ax killed $ax killed $eax
-; X86-NEXT: popl %ebx
+; X86-NEXT: popl %esi
; X86-NEXT: retl
%a2 = load i16, ptr %a2_addr
%t3 = icmp sgt i16 %a1, %a2 ; signed
@@ -865,16 +896,16 @@ define i16 @scalar_i16_signed_reg_mem(i16 %a1, ptr %a2_addr) nounwind {
define i16 @scalar_i16_signed_mem_mem(ptr %a1_addr, ptr %a2_addr) nounwind {
; X64-LABEL: scalar_i16_signed_mem_mem:
; X64: # %bb.0:
-; X64-NEXT: movswl (%rdi), %ecx
-; X64-NEXT: movswl (%rsi), %eax
+; X64-NEXT: movzwl (%rdi), %ecx
+; X64-NEXT: movzwl (%rsi), %eax
; X64-NEXT: xorl %edx, %edx
; X64-NEXT: cmpw %ax, %cx
; X64-NEXT: setle %dl
; X64-NEXT: leal -1(%rdx,%rdx), %edx
; X64-NEXT: movl %ecx, %esi
-; X64-NEXT: subl %eax, %esi
-; X64-NEXT: subl %ecx, %eax
-; X64-NEXT: cmovll %esi, %eax
+; X64-NEXT: cmovgl %eax, %esi
+; X64-NEXT: cmovgl %ecx, %eax
+; X64-NEXT: subl %esi, %eax
; X64-NEXT: movzwl %ax, %eax
; X64-NEXT: shrl %eax
; X64-NEXT: imull %edx, %eax
@@ -884,27 +915,30 @@ define i16 @scalar_i16_signed_mem_mem(ptr %a1_addr, ptr %a2_addr) nounwind {
;
; X86-LABEL: scalar_i16_signed_mem_mem:
; X86: # %bb.0:
-; X86-NEXT: pushl %ebx
+; X86-NEXT: pushl %esi
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movzwl (%ecx), %ecx
-; X86-NEXT: movzwl (%eax), %edx
+; X86-NEXT: movzwl (%eax), %eax
+; X86-NEXT: xorl %edx, %edx
+; X86-NEXT: cmpw %ax, %cx
+; X86-NEXT: setle %dl
+; X86-NEXT: leal -1(%edx,%edx), %edx
+; X86-NEXT: jg .LBB14_1
+; X86-NEXT: # %bb.2:
+; X86-NEXT: movl %ecx, %esi
+; X86-NEXT: jmp .LBB14_3
+; X86-NEXT: .LBB14_1:
+; X86-NEXT: movl %eax, %esi
; X86-NEXT: movl %ecx, %eax
-; X86-NEXT: subw %dx, %ax
-; X86-NEXT: jg .LBB14_2
-; X86-NEXT: # %bb.1:
-; X86-NEXT: negl %eax
-; X86-NEXT: .LBB14_2:
-; X86-NEXT: xorl %ebx, %ebx
-; X86-NEXT: cmpw %dx, %cx
-; X86-NEXT: setle %bl
-; X86-NEXT: leal -1(%ebx,%ebx), %edx
+; X86-NEXT: .LBB14_3:
+; X86-NEXT: subl %esi, %eax
; X86-NEXT: movzwl %ax, %eax
; X86-NEXT: shrl %eax
; X86-NEXT: imull %edx, %eax
; X86-NEXT: addl %ecx, %eax
; X86-NEXT: # kill: def $ax killed $ax killed $eax
-; X86-NEXT: popl %ebx
+; X86-NEXT: popl %esi
; X86-NEXT: retl
%a1 = load i16, ptr %a1_addr
%a2 = load i16, ptr %a2_addr
@@ -928,36 +962,38 @@ define i16 @scalar_i16_signed_mem_mem(ptr %a1_addr, ptr %a2_addr) nounwind {
define i8 @scalar_i8_signed_reg_reg(i8 %a1, i8 %a2) nounwind {
; X64-LABEL: scalar_i8_signed_reg_reg:
; X64: # %bb.0:
-; X64-NEXT: cmpb %sil, %dil
+; X64-NEXT: movl %esi, %eax
+; X64-NEXT: cmpb %al, %dil
; X64-NEXT: setg %cl
+; X64-NEXT: movl %edi, %edx
+; X64-NEXT: cmovgl %esi, %edx
+; X64-NEXT: cmovgl %edi, %eax
; X64-NEXT: negb %cl
; X64-NEXT: orb $1, %cl
-; X64-NEXT: movsbl %dil, %edx
-; X64-NEXT: subl %esi, %edi
-; X64-NEXT: movsbl %sil, %eax
-; X64-NEXT: subl %edx, %eax
-; X64-NEXT: cmovll %edi, %eax
+; X64-NEXT: subb %dl, %al
; X64-NEXT: shrb %al
; X64-NEXT: # kill: def $al killed $al killed $eax
; X64-NEXT: mulb %cl
-; X64-NEXT: addb %dl, %al
+; X64-NEXT: addb %dil, %al
; X64-NEXT: retq
;
; X86-LABEL: scalar_i8_signed_reg_reg:
; X86: # %bb.0:
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: movb {{[0-9]+}}(%esp), %ah
-; X86-NEXT: cmpb %ah, %cl
+; X86-NEXT: cmpb %al, %cl
; X86-NEXT: setg %dl
-; X86-NEXT: negb %dl
-; X86-NEXT: orb $1, %dl
+; X86-NEXT: jg .LBB15_1
+; X86-NEXT: # %bb.2:
+; X86-NEXT: movb %cl, %ah
+; X86-NEXT: jmp .LBB15_3
+; X86-NEXT: .LBB15_1:
+; X86-NEXT: movb %al, %ah
; X86-NEXT: movb %cl, %al
+; X86-NEXT: .LBB15_3:
; X86-NEXT: subb %ah, %al
-; X86-NEXT: jg .LBB15_2
-; X86-NEXT: # %bb.1:
-; X86-NEXT: subb %cl, %ah
-; X86-NEXT: movb %ah, %al
-; X86-NEXT: .LBB15_2:
+; X86-NEXT: negb %dl
+; X86-NEXT: orb $1, %dl
; X86-NEXT: shrb %al
; X86-NEXT: mulb %dl
; X86-NEXT: addb %cl, %al
@@ -976,36 +1012,38 @@ define i8 @scalar_i8_signed_reg_reg(i8 %a1, i8 %a2) nounwind {
define i8 @scalar_i8_unsigned_reg_reg(i8 %a1, i8 %a2) nounwind {
; X64-LABEL: scalar_i8_unsigned_reg_reg:
; X64: # %bb.0:
-; X64-NEXT: xorl %ecx, %ecx
-; X64-NEXT: cmpb %dil, %sil
-; X64-NEXT: sbbl %ecx, %ecx
+; X64-NEXT: movl %esi, %eax
+; X64-NEXT: cmpb %al, %dil
+; X64-NEXT: seta %cl
+; X64-NEXT: movl %edi, %edx
+; X64-NEXT: cmoval %esi, %edx
+; X64-NEXT: cmoval %edi, %eax
+; X64-NEXT: negb %cl
; X64-NEXT: orb $1, %cl
-; X64-NEXT: movzbl %dil, %edx
-; X64-NEXT: subl %esi, %edi
-; X64-NEXT: movzbl %sil, %eax
-; X64-NEXT: subl %edx, %eax
-; X64-NEXT: cmovbl %edi, %eax
+; X64-NEXT: subb %dl, %al
; X64-NEXT: shrb %al
; X64-NEXT: # kill: def $al killed $al killed $eax
; X64-NEXT: mulb %cl
-; X64-NEXT: addb %dl, %al
+; X64-NEXT: addb %dil, %al
; X64-NEXT: retq
;
; X86-LABEL: scalar_i8_unsigned_reg_reg:
; X86: # %bb.0:
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: movb {{[0-9]+}}(%esp), %ch
-; X86-NEXT: xorl %edx, %edx
-; X86-NEXT: movb %ch, %ah
-; X86-NEXT: subb %cl, %ah
-; X86-NEXT: sbbl %edx, %edx
-; X86-NEXT: orb $1, %dl
+; X86-NEXT: cmpb %al, %cl
+; X86-NEXT: seta %dl
+; X86-NEXT: ja .LBB16_1
+; X86-NEXT: # %bb.2:
+; X86-NEXT: movb %cl, %ah
+; X86-NEXT: jmp .LBB16_3
+; X86-NEXT: .LBB16_1:
+; X86-NEXT: movb %al, %ah
; X86-NEXT: movb %cl, %al
-; X86-NEXT: subb %ch, %al
-; X86-NEXT: ja .LBB16_2
-; X86-NEXT: # %bb.1:
-; X86-NEXT: movb %ah, %al
-; X86-NEXT: .LBB16_2:
+; X86-NEXT: .LBB16_3:
+; X86-NEXT: subb %ah, %al
+; X86-NEXT: negb %dl
+; X86-NEXT: orb $1, %dl
; X86-NEXT: shrb %al
; X86-NEXT: mulb %dl
; X86-NEXT: addb %cl, %al
@@ -1026,16 +1064,16 @@ define i8 @scalar_i8_unsigned_reg_reg(i8 %a1, i8 %a2) nounwind {
define i8 @scalar_i8_signed_mem_reg(ptr %a1_addr, i8 %a2) nounwind {
; X64-LABEL: scalar_i8_signed_mem_reg:
; X64: # %bb.0:
-; X64-NEXT: movsbl (%rdi), %ecx
+; X64-NEXT: movzbl (%rdi), %ecx
; X64-NEXT: cmpb %sil, %cl
; X64-NEXT: setg %dl
+; X64-NEXT: movl %ecx, %edi
+; X64-NEXT: cmovgl %esi, %edi
+; X64-NEXT: movl %ecx, %eax
+; X64-NEXT: cmovlel %esi, %eax
; X64-NEXT: negb %dl
; X64-NEXT: orb $1, %dl
-; X64-NEXT: movl %ecx, %edi
-; X64-NEXT: subl %esi, %edi
-; X64-NEXT: movsbl %sil, %eax
-; X64-NEXT: subl %ecx, %eax
-; X64-NEXT: cmovll %edi, %eax
+; X64-NEXT: subb %dil, %al
; X64-NEXT: shrb %al
; X64-NEXT: # kill: def $al killed $al killed $eax
; X64-NEXT: mulb %dl
@@ -1044,20 +1082,22 @@ define i8 @scalar_i8_signed_mem_reg(ptr %a1_addr, i8 %a2) nounwind {
;
; X86-LABEL: scalar_i8_signed_mem_reg:
; X86: # %bb.0:
-; X86-NEXT: movb {{[0-9]+}}(%esp), %ah
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movzbl (%ecx), %ecx
-; X86-NEXT: cmpb %ah, %cl
+; X86-NEXT: cmpb %al, %cl
; X86-NEXT: setg %dl
-; X86-NEXT: negb %dl
-; X86-NEXT: orb $1, %dl
+; X86-NEXT: jg .LBB17_1
+; X86-NEXT: # %bb.2:
+; X86-NEXT: movb %cl, %ah
+; X86-NEXT: jmp .LBB17_3
+; X86-NEXT: .LBB17_1:
+; X86-NEXT: movb %al, %ah
; X86-NEXT: movb %cl, %al
+; X86-NEXT: .LBB17_3:
; X86-NEXT: subb %ah, %al
-; X86-NEXT: jg .LBB17_2
-; X86-NEXT: # %bb.1:
-; X86-NEXT: subb %cl, %ah
-; X86-NEXT: movb %ah, %al
-; X86-NEXT: .LBB17_2:
+; X86-NEXT: negb %dl
+; X86-NEXT: orb $1, %dl
; X86-NEXT: shrb %al
; X86-NEXT: mulb %dl
; X86-NEXT: addb %cl, %al
@@ -1077,37 +1117,39 @@ define i8 @scalar_i8_signed_mem_reg(ptr %a1_addr, i8 %a2) nounwind {
define i8 @scalar_i8_signed_reg_mem(i8 %a1, ptr %a2_addr) nounwind {
; X64-LABEL: scalar_i8_signed_reg_mem:
; X64: # %bb.0:
-; X64-NEXT: movsbl (%rsi), %eax
+; X64-NEXT: movzbl (%rsi), %eax
; X64-NEXT: cmpb %al, %dil
; X64-NEXT: setg %cl
+; X64-NEXT: movl %edi, %edx
+; X64-NEXT: cmovgl %eax, %edx
+; X64-NEXT: cmovgl %edi, %eax
; X64-NEXT: negb %cl
; X64-NEXT: orb $1, %cl
-; X64-NEXT: movsbl %dil, %edx
-; X64-NEXT: subl %eax, %edi
-; X64-NEXT: subl %edx, %eax
-; X64-NEXT: cmovll %edi, %eax
+; X64-NEXT: subb %dl, %al
; X64-NEXT: shrb %al
; X64-NEXT: # kill: def $al killed $al killed $eax
; X64-NEXT: mulb %cl
-; X64-NEXT: addb %dl, %al
+; X64-NEXT: addb %dil, %al
; X64-NEXT: retq
;
; X86-LABEL: scalar_i8_signed_reg_mem:
; X86: # %bb.0:
; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: movb (%eax), %ah
-; X86-NEXT: cmpb %ah, %cl
+; X86-NEXT: movzbl (%eax), %eax
+; X86-NEXT: cmpb %al, %cl
; X86-NEXT: setg %dl
-; X86-NEXT: negb %dl
-; X86-NEXT: orb $1, %dl
+; X86-NEXT: jg .LBB18_1
+; X86-NEXT: # %bb.2:
+; X86-NEXT: movb %cl, %ah
+; X86-NEXT: jmp .LBB18_3
+; X86-NEXT: .LBB18_1:
+; X86-NEXT: movb %al, %ah
; X86-NEXT: movb %cl, %al
+; X86-NEXT: .LBB18_3:
; X86-NEXT: subb %ah, %al
-; X86-NEXT: jg .LBB18_2
-; X86-NEXT: # %bb.1:
-; X86-NEXT: subb %cl, %ah
-; X86-NEXT: movb %ah, %al
-; X86-NEXT: .LBB18_2:
+; X86-NEXT: negb %dl
+; X86-NEXT: orb $1, %dl
; X86-NEXT: shrb %al
; X86-NEXT: mulb %dl
; X86-NEXT: addb %cl, %al
@@ -1127,16 +1169,16 @@ define i8 @scalar_i8_signed_reg_mem(i8 %a1, ptr %a2_addr) nounwind {
define i8 @scalar_i8_signed_mem_mem(ptr %a1_addr, ptr %a2_addr) nounwind {
; X64-LABEL: scalar_i8_signed_mem_mem:
; X64: # %bb.0:
-; X64-NEXT: movsbl (%rdi), %ecx
-; X64-NEXT: movsbl (%rsi), %eax
+; X64-NEXT: movzbl (%rdi), %ecx
+; X64-NEXT: movzbl (%rsi), %eax
; X64-NEXT: cmpb %al, %cl
; X64-NEXT: setg %dl
+; X64-NEXT: movl %ecx, %esi
+; X64-NEXT: cmovgl %eax, %esi
+; X64-NEXT: cmovgl %ecx, %eax
; X64-NEXT: negb %dl
; X64-NEXT: orb $1, %dl
-; X64-NEXT: movl %ecx, %esi
-; X64-NEXT: subl %eax, %esi
-; X64-NEXT: subl %ecx, %eax
-; X64-NEXT: cmovll %esi, %eax
+; X64-NEXT: subb %sil, %al
; X64-NEXT: shrb %al
; X64-NEXT: # kill: def $al killed $al killed $eax
; X64-NEXT: mulb %dl
@@ -1148,18 +1190,20 @@ define i8 @scalar_i8_signed_mem_mem(ptr %a1_addr, ptr %a2_addr) nounwind {
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movzbl (%ecx), %ecx
-; X86-NEXT: movb (%eax), %ah
-; X86-NEXT: cmpb %ah, %cl
+; X86-NEXT: movzbl (%eax), %eax
+; X86-NEXT: cmpb %al, %cl
; X86-NEXT: setg %dl
-; X86-NEXT: negb %dl
-; X86-NEXT: orb $1, %dl
+; X86-NEXT: jg .LBB19_1
+; X86-NEXT: # %bb.2:
+; X86-NEXT: movb %cl, %ah
+; X86-NEXT: jmp .LBB19_3
+; X86-NEXT: .LBB19_1:
+; X86-NEXT: movb %al, %ah
; X86-NEXT: movb %cl, %al
+; X86-NEXT: .LBB19_3:
; X86-NEXT: subb %ah, %al
-; X86-NEXT: jg .LBB19_2
-; X86-NEXT: # %bb.1:
-; X86-NEXT: subb %cl, %ah
-; X86-NEXT: movb %ah, %al
-; X86-NEXT: .LBB19_2:
+; X86-NEXT: negb %dl
+; X86-NEXT: orb $1, %dl
; X86-NEXT: shrb %al
; X86-NEXT: mulb %dl
; X86-NEXT: addb %cl, %al
diff --git a/llvm/unittests/CodeGen/SelectionDAGPatternMatchTest.cpp b/llvm/unittests/CodeGen/SelectionDAGPatternMatchTest.cpp
index 7400b6c1984f72..e404dd08b5f1e2 100644
--- a/llvm/unittests/CodeGen/SelectionDAGPatternMatchTest.cpp
+++ b/llvm/unittests/CodeGen/SelectionDAGPatternMatchTest.cpp
@@ -201,23 +201,6 @@ TEST_F(SelectionDAGPatternMatchTest, matchBinaryOp) {
SDValue UMax = DAG->getNode(ISD::UMAX, DL, Int32VT, Op0, Op1);
SDValue UMin = DAG->getNode(ISD::UMIN, DL, Int32VT, Op1, Op0);
- SDValue ICMP_GT = DAG->getSetCC(DL, MVT::i1, Op0, Op1, ISD::SETGT);
- SDValue ICMP_GE = DAG->getSetCC(DL, MVT::i1, Op0, Op1, ISD::SETGE);
- SDValue ICMP_UGT = DAG->getSetCC(DL, MVT::i1, Op0, Op1, ISD::SETUGT);
- SDValue ICMP_UGE = DAG->getSetCC(DL, MVT::i1, Op0, Op1, ISD::SETUGE);
- SDValue ICMP_LT = DAG->getSetCC(DL, MVT::i1, Op0, Op1, ISD::SETLT);
- SDValue ICMP_LE = DAG->getSetCC(DL, MVT::i1, Op0, Op1, ISD::SETLE);
- SDValue ICMP_ULT = DAG->getSetCC(DL, MVT::i1, Op0, Op1, ISD::SETULT);
- SDValue ICMP_ULE = DAG->getSetCC(DL, MVT::i1, Op0, Op1, ISD::SETULE);
- SDValue SMaxLikeGT = DAG->getSelect(DL, MVT::i32, ICMP_GT, Op0, Op1);
- SDValue SMaxLikeGE = DAG->getSelect(DL, MVT::i32, ICMP_GE, Op0, Op1);
- SDValue UMaxLikeUGT = DAG->getSelect(DL, MVT::i32, ICMP_UGT, Op0, Op1);
- SDValue UMaxLikeUGE = DAG->getSelect(DL, MVT::i32, ICMP_UGE, Op0, Op1);
- SDValue SMinLikeLT = DAG->getSelect(DL, MVT::i32, ICMP_LT, Op0, Op1);
- SDValue SMinLikeLE = DAG->getSelect(DL, MVT::i32, ICMP_LE, Op0, Op1);
- SDValue UMinLikeULT = DAG->getSelect(DL, MVT::i32, ICMP_ULT, Op0, Op1);
- SDValue UMinLikeULE = DAG->getSelect(DL, MVT::i32, ICMP_ULE, Op0, Op1);
-
SDValue SFAdd = DAG->getNode(ISD::STRICT_FADD, DL, {Float32VT, MVT::Other},
{DAG->getEntryNode(), Op2, Op2});
@@ -248,24 +231,12 @@ TEST_F(SelectionDAGPatternMatchTest, matchBinaryOp) {
EXPECT_TRUE(sd_match(SMax, m_c_BinOp(ISD::SMAX, m_Value(), m_Value())));
EXPECT_TRUE(sd_match(SMax, m_SMax(m_Value(), m_Value())));
- EXPECT_TRUE(sd_match(SMax, m_SMaxLike(m_Value(), m_Value())));
- EXPECT_TRUE(sd_match(SMaxLikeGT, m_SMaxLike(m_Value(), m_Value())));
- EXPECT_TRUE(sd_match(SMaxLikeGE, m_SMaxLike(m_Value(), m_Value())));
EXPECT_TRUE(sd_match(SMin, m_c_BinOp(ISD::SMIN, m_Value(), m_Value())));
EXPECT_TRUE(sd_match(SMin, m_SMin(m_Value(), m_Value())));
- EXPECT_TRUE(sd_match(SMin, m_SMinLike(m_Value(), m_Value())));
- EXPECT_TRUE(sd_match(SMinLikeLT, m_SMinLike(m_Value(), m_Value())));
- EXPECT_TRUE(sd_match(SMinLikeLE, m_SMinLike(m_Value(), m_Value())));
EXPECT_TRUE(sd_match(UMax, m_c_BinOp(ISD::UMAX, m_Value(), m_Value())));
EXPECT_TRUE(sd_match(UMax, m_UMax(m_Value(), m_Value())));
- EXPECT_TRUE(sd_match(UMax, m_UMaxLike(m_Value(), m_Value())));
- EXPECT_TRUE(sd_match(UMaxLikeUGT, m_UMaxLike(m_Value(), m_Value())));
- EXPECT_TRUE(sd_match(UMaxLikeUGE, m_UMaxLike(m_Value(), m_Value())));
EXPECT_TRUE(sd_match(UMin, m_c_BinOp(ISD::UMIN, m_Value(), m_Value())));
EXPECT_TRUE(sd_match(UMin, m_UMin(m_Value(), m_Value())));
- EXPECT_TRUE(sd_match(UMin, m_UMinLike(m_Value(), m_Value())));
- EXPECT_TRUE(sd_match(UMinLikeULT, m_UMinLike(m_Value(), m_Value())));
- EXPECT_TRUE(sd_match(UMinLikeULE, m_UMinLike(m_Value(), m_Value())));
SDValue BindVal;
EXPECT_TRUE(sd_match(SFAdd, m_ChainedBinOp(ISD::STRICT_FADD, m_Value(BindVal),
More information about the llvm-commits
mailing list