[llvm] [DAG] Enhance SDPatternMatch to match integer minimum and maximum patterns in addition to the existing ISD nodes. (PR #111774)
via llvm-commits
llvm-commits at lists.llvm.org
Wed Oct 9 19:38:08 PDT 2024
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-aarch64
Author: None (c8ef)
<details>
<summary>Changes</summary>
Closes #<!-- -->108218.
This patch adds icmp+select patterns for integer min/max matchers in SDPatternMatch, similar to those in IR PatternMatch.
---
Patch is 105.15 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/111774.diff
9 Files Affected:
- (modified) llvm/include/llvm/CodeGen/SDPatternMatch.h (+86-8)
- (modified) llvm/test/CodeGen/AArch64/abds.ll (+19-24)
- (modified) llvm/test/CodeGen/AArch64/abdu.ll (+20-24)
- (modified) llvm/test/CodeGen/AArch64/midpoint-int.ll (+74-92)
- (modified) llvm/test/CodeGen/RISCV/abds.ll (+123-136)
- (modified) llvm/test/CodeGen/RISCV/abdu.ll (+129-126)
- (modified) llvm/test/CodeGen/X86/abds.ll (+71-93)
- (modified) llvm/test/CodeGen/X86/abdu.ll (+68-104)
- (modified) llvm/test/CodeGen/X86/midpoint-int.ll (+447-491)
``````````diff
diff --git a/llvm/include/llvm/CodeGen/SDPatternMatch.h b/llvm/include/llvm/CodeGen/SDPatternMatch.h
index 04135ee7e1c022..b629dd50aced00 100644
--- a/llvm/include/llvm/CodeGen/SDPatternMatch.h
+++ b/llvm/include/llvm/CodeGen/SDPatternMatch.h
@@ -542,6 +542,80 @@ struct BinaryOpc_match {
}
};
+template <typename LHS_P, typename RHS_P, typename Pred_t,
+ bool Commutable = false, bool ExcludeChain = false>
+struct MaxMin_match {
+ using PredType = Pred_t;
+ LHS_P LHS;
+ RHS_P RHS;
+
+ MaxMin_match(const LHS_P &L, const RHS_P &R) : LHS(L), RHS(R) {}
+
+ template <typename MatchContext>
+ bool match(const MatchContext &Ctx, SDValue N) {
+ if (sd_context_match(N, Ctx, m_Opc(ISD::SELECT))) {
+ EffectiveOperands<ExcludeChain> EO_SELECT(N, Ctx);
+ assert(EO_SELECT.Size == 3);
+ SDValue Cond = N->getOperand(EO_SELECT.FirstIndex);
+ SDValue TrueValue = N->getOperand(EO_SELECT.FirstIndex + 1);
+ SDValue FalseValue = N->getOperand(EO_SELECT.FirstIndex + 2);
+
+ if (sd_context_match(Cond, Ctx, m_Opc(ISD::SETCC))) {
+ EffectiveOperands<ExcludeChain> EO_SETCC(Cond, Ctx);
+ assert(EO_SETCC.Size == 3);
+ SDValue L = Cond->getOperand(EO_SETCC.FirstIndex);
+ SDValue R = Cond->getOperand(EO_SETCC.FirstIndex + 1);
+ CondCodeSDNode *CondNode =
+ cast<CondCodeSDNode>(Cond->getOperand(EO_SETCC.FirstIndex + 2));
+
+ if ((TrueValue != L || FalseValue != R) &&
+ (TrueValue != R || FalseValue != L)) {
+ return false;
+ }
+
+ ISD::CondCode Cond =
+ TrueValue == L ? CondNode->get()
+ : getSetCCInverse(CondNode->get(), L.getValueType());
+ if (!Pred_t::match(Cond)) {
+ return false;
+ }
+ return (LHS.match(Ctx, L) && RHS.match(Ctx, R)) ||
+ (Commutable && LHS.match(Ctx, R) && RHS.match(Ctx, L));
+ }
+ }
+
+ return false;
+ }
+};
+
+// Helper class for identifying signed max predicates.
+struct smax_pred_ty {
+ static bool match(ISD::CondCode Cond) {
+ return Cond == ISD::CondCode::SETGT || Cond == ISD::CondCode::SETGE;
+ }
+};
+
+// Helper class for identifying unsigned max predicates.
+struct umax_pred_ty {
+ static bool match(ISD::CondCode Cond) {
+ return Cond == ISD::CondCode::SETUGT || Cond == ISD::CondCode::SETUGE;
+ }
+};
+
+// Helper class for identifying signed min predicates.
+struct smin_pred_ty {
+ static bool match(ISD::CondCode Cond) {
+ return Cond == ISD::CondCode::SETLT || Cond == ISD::CondCode::SETLE;
+ }
+};
+
+// Helper class for identifying unsigned min predicates.
+struct umin_pred_ty {
+ static bool match(ISD::CondCode Cond) {
+ return Cond == ISD::CondCode::SETULT || Cond == ISD::CondCode::SETULE;
+ }
+};
+
template <typename LHS, typename RHS>
inline BinaryOpc_match<LHS, RHS> m_BinOp(unsigned Opc, const LHS &L,
const RHS &R) {
@@ -609,23 +683,27 @@ inline BinaryOpc_match<LHS, RHS, true> m_Xor(const LHS &L, const RHS &R) {
}
template <typename LHS, typename RHS>
-inline BinaryOpc_match<LHS, RHS, true> m_SMin(const LHS &L, const RHS &R) {
- return BinaryOpc_match<LHS, RHS, true>(ISD::SMIN, L, R);
+inline auto m_SMin(const LHS &L, const RHS &R) {
+ return m_AnyOf(BinaryOpc_match<LHS, RHS, true>(ISD::SMIN, L, R),
+ MaxMin_match<LHS, RHS, smin_pred_ty, true>(L, R));
}
template <typename LHS, typename RHS>
-inline BinaryOpc_match<LHS, RHS, true> m_SMax(const LHS &L, const RHS &R) {
- return BinaryOpc_match<LHS, RHS, true>(ISD::SMAX, L, R);
+inline auto m_SMax(const LHS &L, const RHS &R) {
+ return m_AnyOf(BinaryOpc_match<LHS, RHS, true>(ISD::SMAX, L, R),
+ MaxMin_match<LHS, RHS, smax_pred_ty, true>(L, R));
}
template <typename LHS, typename RHS>
-inline BinaryOpc_match<LHS, RHS, true> m_UMin(const LHS &L, const RHS &R) {
- return BinaryOpc_match<LHS, RHS, true>(ISD::UMIN, L, R);
+inline auto m_UMin(const LHS &L, const RHS &R) {
+ return m_AnyOf(BinaryOpc_match<LHS, RHS, true>(ISD::UMIN, L, R),
+ MaxMin_match<LHS, RHS, umin_pred_ty, true>(L, R));
}
template <typename LHS, typename RHS>
-inline BinaryOpc_match<LHS, RHS, true> m_UMax(const LHS &L, const RHS &R) {
- return BinaryOpc_match<LHS, RHS, true>(ISD::UMAX, L, R);
+inline auto m_UMax(const LHS &L, const RHS &R) {
+ return m_AnyOf(BinaryOpc_match<LHS, RHS, true>(ISD::UMAX, L, R),
+ MaxMin_match<LHS, RHS, umax_pred_ty, true>(L, R));
}
template <typename LHS, typename RHS>
diff --git a/llvm/test/CodeGen/AArch64/abds.ll b/llvm/test/CodeGen/AArch64/abds.ll
index e5cc04f9be1a1f..62db30f17747cf 100644
--- a/llvm/test/CodeGen/AArch64/abds.ll
+++ b/llvm/test/CodeGen/AArch64/abds.ll
@@ -547,10 +547,9 @@ define i8 @abd_select_i8(i8 %a, i8 %b) nounwind {
; CHECK-LABEL: abd_select_i8:
; CHECK: // %bb.0:
; CHECK-NEXT: sxtb w8, w0
-; CHECK-NEXT: cmp w8, w1, sxtb
-; CHECK-NEXT: csel w8, w0, w1, lt
-; CHECK-NEXT: csel w9, w1, w0, lt
-; CHECK-NEXT: sub w0, w9, w8
+; CHECK-NEXT: sub w8, w8, w1, sxtb
+; CHECK-NEXT: cmp w8, #0
+; CHECK-NEXT: cneg w0, w8, mi
; CHECK-NEXT: ret
%cmp = icmp slt i8 %a, %b
%ab = select i1 %cmp, i8 %a, i8 %b
@@ -563,10 +562,9 @@ define i16 @abd_select_i16(i16 %a, i16 %b) nounwind {
; CHECK-LABEL: abd_select_i16:
; CHECK: // %bb.0:
; CHECK-NEXT: sxth w8, w0
-; CHECK-NEXT: cmp w8, w1, sxth
-; CHECK-NEXT: csel w8, w0, w1, le
-; CHECK-NEXT: csel w9, w1, w0, le
-; CHECK-NEXT: sub w0, w9, w8
+; CHECK-NEXT: sub w8, w8, w1, sxth
+; CHECK-NEXT: cmp w8, #0
+; CHECK-NEXT: cneg w0, w8, mi
; CHECK-NEXT: ret
%cmp = icmp sle i16 %a, %b
%ab = select i1 %cmp, i16 %a, i16 %b
@@ -578,10 +576,9 @@ define i16 @abd_select_i16(i16 %a, i16 %b) nounwind {
define i32 @abd_select_i32(i32 %a, i32 %b) nounwind {
; CHECK-LABEL: abd_select_i32:
; CHECK: // %bb.0:
-; CHECK-NEXT: cmp w0, w1
-; CHECK-NEXT: csel w8, w0, w1, gt
-; CHECK-NEXT: csel w9, w1, w0, gt
-; CHECK-NEXT: sub w0, w8, w9
+; CHECK-NEXT: sub w8, w1, w0
+; CHECK-NEXT: subs w9, w0, w1
+; CHECK-NEXT: csel w0, w9, w8, gt
; CHECK-NEXT: ret
%cmp = icmp sgt i32 %a, %b
%ab = select i1 %cmp, i32 %a, i32 %b
@@ -593,10 +590,9 @@ define i32 @abd_select_i32(i32 %a, i32 %b) nounwind {
define i64 @abd_select_i64(i64 %a, i64 %b) nounwind {
; CHECK-LABEL: abd_select_i64:
; CHECK: // %bb.0:
-; CHECK-NEXT: cmp x0, x1
-; CHECK-NEXT: csel x8, x0, x1, ge
-; CHECK-NEXT: csel x9, x1, x0, ge
-; CHECK-NEXT: sub x0, x8, x9
+; CHECK-NEXT: sub x8, x1, x0
+; CHECK-NEXT: subs x9, x0, x1
+; CHECK-NEXT: csel x0, x9, x8, gt
; CHECK-NEXT: ret
%cmp = icmp sge i64 %a, %b
%ab = select i1 %cmp, i64 %a, i64 %b
@@ -608,14 +604,13 @@ define i64 @abd_select_i64(i64 %a, i64 %b) nounwind {
define i128 @abd_select_i128(i128 %a, i128 %b) nounwind {
; CHECK-LABEL: abd_select_i128:
; CHECK: // %bb.0:
-; CHECK-NEXT: cmp x0, x2
-; CHECK-NEXT: sbcs xzr, x1, x3
-; CHECK-NEXT: csel x8, x0, x2, lt
-; CHECK-NEXT: csel x9, x2, x0, lt
-; CHECK-NEXT: csel x10, x1, x3, lt
-; CHECK-NEXT: csel x11, x3, x1, lt
-; CHECK-NEXT: subs x0, x9, x8
-; CHECK-NEXT: sbc x1, x11, x10
+; CHECK-NEXT: subs x8, x0, x2
+; CHECK-NEXT: sbc x9, x1, x3
+; CHECK-NEXT: subs x10, x2, x0
+; CHECK-NEXT: sbc x11, x3, x1
+; CHECK-NEXT: sbcs xzr, x3, x1
+; CHECK-NEXT: csel x0, x8, x10, lt
+; CHECK-NEXT: csel x1, x9, x11, lt
; CHECK-NEXT: ret
%cmp = icmp slt i128 %a, %b
%ab = select i1 %cmp, i128 %a, i128 %b
diff --git a/llvm/test/CodeGen/AArch64/abdu.ll b/llvm/test/CodeGen/AArch64/abdu.ll
index 0a44ae16884582..4585de96c848f2 100644
--- a/llvm/test/CodeGen/AArch64/abdu.ll
+++ b/llvm/test/CodeGen/AArch64/abdu.ll
@@ -408,10 +408,9 @@ define i8 @abd_select_i8(i8 %a, i8 %b) nounwind {
; CHECK-LABEL: abd_select_i8:
; CHECK: // %bb.0:
; CHECK-NEXT: and w8, w0, #0xff
-; CHECK-NEXT: cmp w8, w1, uxtb
-; CHECK-NEXT: csel w8, w0, w1, lo
-; CHECK-NEXT: csel w9, w1, w0, lo
-; CHECK-NEXT: sub w0, w9, w8
+; CHECK-NEXT: sub w8, w8, w1, uxtb
+; CHECK-NEXT: cmp w8, #0
+; CHECK-NEXT: cneg w0, w8, mi
; CHECK-NEXT: ret
%cmp = icmp ult i8 %a, %b
%ab = select i1 %cmp, i8 %a, i8 %b
@@ -424,10 +423,9 @@ define i16 @abd_select_i16(i16 %a, i16 %b) nounwind {
; CHECK-LABEL: abd_select_i16:
; CHECK: // %bb.0:
; CHECK-NEXT: and w8, w0, #0xffff
-; CHECK-NEXT: cmp w8, w1, uxth
-; CHECK-NEXT: csel w8, w0, w1, ls
-; CHECK-NEXT: csel w9, w1, w0, ls
-; CHECK-NEXT: sub w0, w9, w8
+; CHECK-NEXT: sub w8, w8, w1, uxth
+; CHECK-NEXT: cmp w8, #0
+; CHECK-NEXT: cneg w0, w8, mi
; CHECK-NEXT: ret
%cmp = icmp ule i16 %a, %b
%ab = select i1 %cmp, i16 %a, i16 %b
@@ -439,10 +437,9 @@ define i16 @abd_select_i16(i16 %a, i16 %b) nounwind {
define i32 @abd_select_i32(i32 %a, i32 %b) nounwind {
; CHECK-LABEL: abd_select_i32:
; CHECK: // %bb.0:
-; CHECK-NEXT: cmp w0, w1
-; CHECK-NEXT: csel w8, w0, w1, hi
-; CHECK-NEXT: csel w9, w1, w0, hi
-; CHECK-NEXT: sub w0, w8, w9
+; CHECK-NEXT: sub w8, w1, w0
+; CHECK-NEXT: subs w9, w0, w1
+; CHECK-NEXT: csel w0, w9, w8, hi
; CHECK-NEXT: ret
%cmp = icmp ugt i32 %a, %b
%ab = select i1 %cmp, i32 %a, i32 %b
@@ -454,10 +451,9 @@ define i32 @abd_select_i32(i32 %a, i32 %b) nounwind {
define i64 @abd_select_i64(i64 %a, i64 %b) nounwind {
; CHECK-LABEL: abd_select_i64:
; CHECK: // %bb.0:
-; CHECK-NEXT: cmp x0, x1
-; CHECK-NEXT: csel x8, x0, x1, hs
-; CHECK-NEXT: csel x9, x1, x0, hs
-; CHECK-NEXT: sub x0, x8, x9
+; CHECK-NEXT: sub x8, x1, x0
+; CHECK-NEXT: subs x9, x0, x1
+; CHECK-NEXT: csel x0, x9, x8, hi
; CHECK-NEXT: ret
%cmp = icmp uge i64 %a, %b
%ab = select i1 %cmp, i64 %a, i64 %b
@@ -469,14 +465,14 @@ define i64 @abd_select_i64(i64 %a, i64 %b) nounwind {
define i128 @abd_select_i128(i128 %a, i128 %b) nounwind {
; CHECK-LABEL: abd_select_i128:
; CHECK: // %bb.0:
-; CHECK-NEXT: cmp x0, x2
-; CHECK-NEXT: sbcs xzr, x1, x3
-; CHECK-NEXT: csel x8, x0, x2, lo
-; CHECK-NEXT: csel x9, x2, x0, lo
-; CHECK-NEXT: csel x10, x1, x3, lo
-; CHECK-NEXT: csel x11, x3, x1, lo
-; CHECK-NEXT: subs x0, x9, x8
-; CHECK-NEXT: sbc x1, x11, x10
+; CHECK-NEXT: subs x8, x0, x2
+; CHECK-NEXT: sbcs x9, x1, x3
+; CHECK-NEXT: cset w10, lo
+; CHECK-NEXT: sbfx x10, x10, #0, #1
+; CHECK-NEXT: eor x8, x8, x10
+; CHECK-NEXT: eor x9, x9, x10
+; CHECK-NEXT: subs x0, x8, x10
+; CHECK-NEXT: sbc x1, x9, x10
; CHECK-NEXT: ret
%cmp = icmp ult i128 %a, %b
%ab = select i1 %cmp, i128 %a, i128 %b
diff --git a/llvm/test/CodeGen/AArch64/midpoint-int.ll b/llvm/test/CodeGen/AArch64/midpoint-int.ll
index 1043fa5c4565ee..bbdce7c6e933b3 100644
--- a/llvm/test/CodeGen/AArch64/midpoint-int.ll
+++ b/llvm/test/CodeGen/AArch64/midpoint-int.ll
@@ -13,12 +13,11 @@
define i32 @scalar_i32_signed_reg_reg(i32 %a1, i32 %a2) nounwind {
; CHECK-LABEL: scalar_i32_signed_reg_reg:
; CHECK: // %bb.0:
-; CHECK-NEXT: cmp w0, w1
+; CHECK-NEXT: sub w9, w1, w0
+; CHECK-NEXT: subs w10, w0, w1
; CHECK-NEXT: mov w8, #-1 // =0xffffffff
-; CHECK-NEXT: csel w9, w1, w0, gt
-; CHECK-NEXT: csel w10, w0, w1, gt
+; CHECK-NEXT: csel w9, w10, w9, gt
; CHECK-NEXT: cneg w8, w8, le
-; CHECK-NEXT: sub w9, w10, w9
; CHECK-NEXT: lsr w9, w9, #1
; CHECK-NEXT: madd w0, w9, w8, w0
; CHECK-NEXT: ret
@@ -36,12 +35,11 @@ define i32 @scalar_i32_signed_reg_reg(i32 %a1, i32 %a2) nounwind {
define i32 @scalar_i32_unsigned_reg_reg(i32 %a1, i32 %a2) nounwind {
; CHECK-LABEL: scalar_i32_unsigned_reg_reg:
; CHECK: // %bb.0:
-; CHECK-NEXT: cmp w0, w1
+; CHECK-NEXT: sub w9, w1, w0
+; CHECK-NEXT: subs w10, w0, w1
; CHECK-NEXT: mov w8, #-1 // =0xffffffff
-; CHECK-NEXT: csel w9, w1, w0, hi
-; CHECK-NEXT: csel w10, w0, w1, hi
+; CHECK-NEXT: csel w9, w10, w9, hi
; CHECK-NEXT: cneg w8, w8, ls
-; CHECK-NEXT: sub w9, w10, w9
; CHECK-NEXT: lsr w9, w9, #1
; CHECK-NEXT: madd w0, w9, w8, w0
; CHECK-NEXT: ret
@@ -64,10 +62,10 @@ define i32 @scalar_i32_signed_mem_reg(ptr %a1_addr, i32 %a2) nounwind {
; CHECK-NEXT: ldr w9, [x0]
; CHECK-NEXT: mov w8, #-1 // =0xffffffff
; CHECK-NEXT: cmp w9, w1
-; CHECK-NEXT: csel w10, w1, w9, gt
-; CHECK-NEXT: csel w11, w9, w1, gt
+; CHECK-NEXT: sub w10, w1, w9
; CHECK-NEXT: cneg w8, w8, le
-; CHECK-NEXT: sub w10, w11, w10
+; CHECK-NEXT: subs w11, w9, w1
+; CHECK-NEXT: csel w10, w11, w10, gt
; CHECK-NEXT: lsr w10, w10, #1
; CHECK-NEXT: madd w0, w10, w8, w9
; CHECK-NEXT: ret
@@ -89,10 +87,10 @@ define i32 @scalar_i32_signed_reg_mem(i32 %a1, ptr %a2_addr) nounwind {
; CHECK-NEXT: ldr w9, [x1]
; CHECK-NEXT: mov w8, #-1 // =0xffffffff
; CHECK-NEXT: cmp w0, w9
-; CHECK-NEXT: csel w10, w9, w0, gt
-; CHECK-NEXT: csel w9, w0, w9, gt
+; CHECK-NEXT: sub w10, w9, w0
; CHECK-NEXT: cneg w8, w8, le
-; CHECK-NEXT: sub w9, w9, w10
+; CHECK-NEXT: subs w9, w0, w9
+; CHECK-NEXT: csel w9, w9, w10, gt
; CHECK-NEXT: lsr w9, w9, #1
; CHECK-NEXT: madd w0, w9, w8, w0
; CHECK-NEXT: ret
@@ -115,10 +113,10 @@ define i32 @scalar_i32_signed_mem_mem(ptr %a1_addr, ptr %a2_addr) nounwind {
; CHECK-NEXT: ldr w10, [x1]
; CHECK-NEXT: mov w8, #-1 // =0xffffffff
; CHECK-NEXT: cmp w9, w10
-; CHECK-NEXT: csel w11, w10, w9, gt
-; CHECK-NEXT: csel w10, w9, w10, gt
+; CHECK-NEXT: sub w11, w10, w9
; CHECK-NEXT: cneg w8, w8, le
-; CHECK-NEXT: sub w10, w10, w11
+; CHECK-NEXT: subs w10, w9, w10
+; CHECK-NEXT: csel w10, w10, w11, gt
; CHECK-NEXT: lsr w10, w10, #1
; CHECK-NEXT: madd w0, w10, w8, w9
; CHECK-NEXT: ret
@@ -144,12 +142,11 @@ define i32 @scalar_i32_signed_mem_mem(ptr %a1_addr, ptr %a2_addr) nounwind {
define i64 @scalar_i64_signed_reg_reg(i64 %a1, i64 %a2) nounwind {
; CHECK-LABEL: scalar_i64_signed_reg_reg:
; CHECK: // %bb.0:
-; CHECK-NEXT: cmp x0, x1
+; CHECK-NEXT: sub x9, x1, x0
+; CHECK-NEXT: subs x10, x0, x1
; CHECK-NEXT: mov x8, #-1 // =0xffffffffffffffff
-; CHECK-NEXT: csel x9, x1, x0, gt
-; CHECK-NEXT: csel x10, x0, x1, gt
+; CHECK-NEXT: csel x9, x10, x9, gt
; CHECK-NEXT: cneg x8, x8, le
-; CHECK-NEXT: sub x9, x10, x9
; CHECK-NEXT: lsr x9, x9, #1
; CHECK-NEXT: madd x0, x9, x8, x0
; CHECK-NEXT: ret
@@ -167,12 +164,11 @@ define i64 @scalar_i64_signed_reg_reg(i64 %a1, i64 %a2) nounwind {
define i64 @scalar_i64_unsigned_reg_reg(i64 %a1, i64 %a2) nounwind {
; CHECK-LABEL: scalar_i64_unsigned_reg_reg:
; CHECK: // %bb.0:
-; CHECK-NEXT: cmp x0, x1
+; CHECK-NEXT: sub x9, x1, x0
+; CHECK-NEXT: subs x10, x0, x1
; CHECK-NEXT: mov x8, #-1 // =0xffffffffffffffff
-; CHECK-NEXT: csel x9, x1, x0, hi
-; CHECK-NEXT: csel x10, x0, x1, hi
+; CHECK-NEXT: csel x9, x10, x9, hi
; CHECK-NEXT: cneg x8, x8, ls
-; CHECK-NEXT: sub x9, x10, x9
; CHECK-NEXT: lsr x9, x9, #1
; CHECK-NEXT: madd x0, x9, x8, x0
; CHECK-NEXT: ret
@@ -195,10 +191,10 @@ define i64 @scalar_i64_signed_mem_reg(ptr %a1_addr, i64 %a2) nounwind {
; CHECK-NEXT: ldr x9, [x0]
; CHECK-NEXT: mov x8, #-1 // =0xffffffffffffffff
; CHECK-NEXT: cmp x9, x1
-; CHECK-NEXT: csel x10, x1, x9, gt
-; CHECK-NEXT: csel x11, x9, x1, gt
+; CHECK-NEXT: sub x10, x1, x9
; CHECK-NEXT: cneg x8, x8, le
-; CHECK-NEXT: sub x10, x11, x10
+; CHECK-NEXT: subs x11, x9, x1
+; CHECK-NEXT: csel x10, x11, x10, gt
; CHECK-NEXT: lsr x10, x10, #1
; CHECK-NEXT: madd x0, x10, x8, x9
; CHECK-NEXT: ret
@@ -220,10 +216,10 @@ define i64 @scalar_i64_signed_reg_mem(i64 %a1, ptr %a2_addr) nounwind {
; CHECK-NEXT: ldr x9, [x1]
; CHECK-NEXT: mov x8, #-1 // =0xffffffffffffffff
; CHECK-NEXT: cmp x0, x9
-; CHECK-NEXT: csel x10, x9, x0, gt
-; CHECK-NEXT: csel x9, x0, x9, gt
+; CHECK-NEXT: sub x10, x9, x0
; CHECK-NEXT: cneg x8, x8, le
-; CHECK-NEXT: sub x9, x9, x10
+; CHECK-NEXT: subs x9, x0, x9
+; CHECK-NEXT: csel x9, x9, x10, gt
; CHECK-NEXT: lsr x9, x9, #1
; CHECK-NEXT: madd x0, x9, x8, x0
; CHECK-NEXT: ret
@@ -246,10 +242,10 @@ define i64 @scalar_i64_signed_mem_mem(ptr %a1_addr, ptr %a2_addr) nounwind {
; CHECK-NEXT: ldr x10, [x1]
; CHECK-NEXT: mov x8, #-1 // =0xffffffffffffffff
; CHECK-NEXT: cmp x9, x10
-; CHECK-NEXT: csel x11, x10, x9, gt
-; CHECK-NEXT: csel x10, x9, x10, gt
+; CHECK-NEXT: sub x11, x10, x9
; CHECK-NEXT: cneg x8, x8, le
-; CHECK-NEXT: sub x10, x10, x11
+; CHECK-NEXT: subs x10, x9, x10
+; CHECK-NEXT: csel x10, x10, x11, gt
; CHECK-NEXT: lsr x10, x10, #1
; CHECK-NEXT: madd x0, x10, x8, x9
; CHECK-NEXT: ret
@@ -275,14 +271,13 @@ define i64 @scalar_i64_signed_mem_mem(ptr %a1_addr, ptr %a2_addr) nounwind {
define i16 @scalar_i16_signed_reg_reg(i16 %a1, i16 %a2) nounwind {
; CHECK-LABEL: scalar_i16_signed_reg_reg:
; CHECK: // %bb.0:
-; CHECK-NEXT: sxth w9, w0
+; CHECK-NEXT: sxth w9, w1
+; CHECK-NEXT: sxth w10, w0
; CHECK-NEXT: mov w8, #-1 // =0xffffffff
-; CHECK-NEXT: cmp w9, w1, sxth
-; CHECK-NEXT: csel w9, w1, w0, gt
-; CHECK-NEXT: csel w10, w0, w1, gt
+; CHECK-NEXT: subs w9, w10, w9
+; CHECK-NEXT: cneg w9, w9, mi
; CHECK-NEXT: cneg w8, w8, le
-; CHECK-NEXT: sub w9, w10, w9
-; CHECK-NEXT: ubfx w9, w9, #1, #15
+; CHECK-NEXT: lsr w9, w9, #1
; CHECK-NEXT: madd w0, w9, w8, w0
; CHECK-NEXT: ret
%t3 = icmp sgt i16 %a1, %a2 ; signed
@@ -299,14 +294,13 @@ define i16 @scalar_i16_signed_reg_reg(i16 %a1, i16 %a2) nounwind {
define i16 @scalar_i16_unsigned_reg_reg(i16 %a1, i16 %a2) nounwind {
; CHECK-LABEL: scalar_i16_unsigned_reg_reg:
; CHECK: // %bb.0:
-; CHECK-NEXT: and w9, w0, #0xffff
+; CHECK-NEXT: and w9, w1, #0xffff
+; CHECK-NEXT: and w10, w0, #0xffff
; CHECK-NEXT: mov w8, #-1 // =0xffffffff
-; CHECK-NEXT: cmp w9, w1, uxth
-; CHECK-NEXT: csel w9, w1, w0, hi
-; CHECK-NEXT: csel w10, w0, w1, hi
+; CHECK-NEXT: subs w9, w10, w9
+; CHECK-NEXT: cneg w9, w9, mi
; CHECK-NEXT: cneg w8, w8, ls
-; CHECK-NEXT: sub w9, w10, w9
-; CHECK-NEXT: ubfx w9, w9, #1, #15
+; CHECK-NEXT: lsr w9, w9, #1
; CHECK-NEXT: madd w0, w9, w8, w0
; CHECK-NEXT: ret
%t3 = icmp ugt i16 %a1, %a2
@@ -325,15 +319,14 @@ define i16 @scalar_i16_unsigned_reg_reg(i16 %a1, i16 %a2) nounwind {
define i16 @scalar_i16_signed_mem_reg(ptr %a1_addr, i16 %a2) nounwind {
; CHECK-LABEL: scalar_i16_signed_mem_reg:
; CHECK: // %bb.0:
-; CHECK-NEXT: ldrsh w9, [x0]
+; CHECK-NEXT: sxth w9, w1
+; CHECK-NEXT: ldrsh w10, [x0]
; CHECK-NEXT: mov w8, #-1 // =0xffffffff
-; CHECK-NEXT: cmp w9, w1, sxth
-; CHECK-NEXT: csel w10, w1, w9, gt
-; CHECK-NEXT: csel w11, w9, w1, gt
+; CHECK-NEXT: subs w9, w10, w9
+; CHECK-NEXT: cneg w9, w9, mi
; CHECK-NEXT: cneg w8, w8, le
-; CHECK-NEXT: sub w10, w11, w10
-; CHECK-NEXT: ubfx w10, w10, #1, #15
-; CHECK-NEXT: madd w0, w10, w8, w9
+; CHECK-NEXT: lsr w9, w9, #1
+; CHECK-NEXT: madd w0, w9, w8, w10
; CHECK-NEXT: ret
%a1 = load i16, ptr %a1_addr
%t3 = icmp sgt i16 %a1, %a2 ; signed
@@ -353,12 +346,10 @@ define i16 @scalar_i16_signed_reg_mem(i16 %a1, ptr %a2_addr) nounwind {
; CHECK-NEXT: sxth w9, w0
; CHECK-NEXT: ldrsh w10, [x1]
; CHECK-NEXT: mov w8, #-1 // =0xffffffff
-; CHECK-NEXT: cmp w9, w10
-; CHECK-NEXT: csel w9, w10, w0, gt
-; CHECK-NEXT: csel w10, w0, w10, gt
+; CHECK-NEXT: subs w9, w9, w10
+; CHECK-NEXT: cneg w9, w9, mi
; CHECK-NEXT: cneg w8, w8, le
-; CHECK-NEXT: sub w9, w10, w9
-; CHECK-NEXT: ubfx w9, w9, #1, #15
+; CHECK-NEXT: lsr w9, w9, #1
; CHECK-NEXT: madd w0, w9, w8, w0
; CHECK-NEXT: ret
%a2 = load i16, ptr %a2_addr
@@ -379,12 +370,10 @@ define i16 @scalar_i16_signed_mem_mem(ptr %a1_addr, ptr %a2_addr) nounwind {
; CHECK-NEXT: ldrsh w9, [x0]
; CHECK-NEXT: ldrsh w10, [x1]
; CHECK-NEXT: mov w8, #-1 // =0xffffffff
-; CHECK-NEXT: cmp w9, w10
-; CHECK-NEXT: csel w11, w10, w9, gt
-; CHECK-NEXT: csel w10, w9, w10, gt
+; CHECK-NEXT: subs w10, w9, w10
+; CHECK-NEXT: cneg w10, w10, mi
; CHECK-NEXT: cneg w8, w8, le
-; CHECK-NEXT: sub w10, w10, w11
-; CHECK-NEXT: ubfx w10, w10, #1, #15
+; CHE...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/111774
More information about the llvm-commits
mailing list