[llvm] (Draft) [DAG] Combine icmps into subo_carry (PR #106291)
via llvm-commits
llvm-commits at lists.llvm.org
Wed Aug 28 11:50:09 PDT 2024
https://github.com/v01dXYZ updated https://github.com/llvm/llvm-project/pull/106291
>From 17a883e2afd6e92db5feee86cad1caf123ca86e0 Mon Sep 17 00:00:00 2001
From: v01dxyz <v01dxyz at v01d.xyz>
Date: Wed, 28 Aug 2024 14:02:51 +0200
Subject: [PATCH 1/2] test pre-commit: Combine ICmps into Sub Overflow Carry if
possible
---
.../CodeGen/AArch64/subo-carry-from-icmps.ll | 128 +++++++++
.../test/CodeGen/X86/subo-carry-from-icmps.ll | 263 ++++++++++++++++++
2 files changed, 391 insertions(+)
create mode 100644 llvm/test/CodeGen/AArch64/subo-carry-from-icmps.ll
create mode 100644 llvm/test/CodeGen/X86/subo-carry-from-icmps.ll
diff --git a/llvm/test/CodeGen/AArch64/subo-carry-from-icmps.ll b/llvm/test/CodeGen/AArch64/subo-carry-from-icmps.ll
new file mode 100644
index 00000000000000..3151ad55a39c63
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/subo-carry-from-icmps.ll
@@ -0,0 +1,128 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc < %s -mtriple=aarch64 | FileCheck %s
+
+define i1 @is_usub_overflow_i32_with_two_i16(i16 %a0, i16 %a1, i16 %b0, i16 %b1) {
+; CHECK-LABEL: is_usub_overflow_i32_with_two_i16:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: and w9, w0, #0xffff
+; CHECK-NEXT: and w8, w2, #0xffff
+; CHECK-NEXT: cmp w9, w1, uxth
+; CHECK-NEXT: cset w9, lo
+; CHECK-NEXT: cmp w8, w3, uxth
+; CHECK-NEXT: csel w8, wzr, w9, ne
+; CHECK-NEXT: csinc w0, w8, wzr, hs
+; CHECK-NEXT: ret
+entry:
+ %carry.A = icmp ult i16 %a0, %a1
+ %carry.B = icmp ult i16 %b0, %b1
+ %equal.B = icmp eq i16 %b0, %b1
+
+ %is.carry.A.propagated = and i1 %carry.A, %equal.B
+ %carry.B.with.borrow.A = or i1 %carry.B, %is.carry.A.propagated
+
+ ret i1 %carry.B.with.borrow.A
+}
+
+define i1 @is_usub_overflow_i64_with_two_i32(i32 %a0, i32 %a1, i32 %b0, i32 %b1) {
+; CHECK-LABEL: is_usub_overflow_i64_with_two_i32:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: cmp w0, w1
+; CHECK-NEXT: cset w8, lo
+; CHECK-NEXT: cmp w2, w3
+; CHECK-NEXT: csel w8, wzr, w8, ne
+; CHECK-NEXT: csinc w0, w8, wzr, hs
+; CHECK-NEXT: ret
+entry:
+ %carry.A = icmp ult i32 %a0, %a1
+ %carry.B = icmp ult i32 %b0, %b1
+ %equal.B = icmp eq i32 %b0, %b1
+
+ %is.carry.A.propagated = and i1 %carry.A, %equal.B
+ %carry.B.with.borrow.A = or i1 %carry.B, %is.carry.A.propagated
+
+ ret i1 %carry.B.with.borrow.A
+}
+
+define i1 @is_usub_overflow_i128_with_two_i64(i64 %a0, i64 %a1, i64 %b0, i64 %b1) {
+;
+; CHECK-LABEL: is_usub_overflow_i128_with_two_i64:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: cmp x0, x1
+; CHECK-NEXT: cset w8, lo
+; CHECK-NEXT: cmp x2, x3
+; CHECK-NEXT: csel w8, wzr, w8, ne
+; CHECK-NEXT: csinc w0, w8, wzr, hs
+; CHECK-NEXT: ret
+entry:
+ %carry.A = icmp ult i64 %a0, %a1
+ %carry.B = icmp ult i64 %b0, %b1
+ %equal.B = icmp eq i64 %b0, %b1
+
+ %is.carry.A.propagated = and i1 %carry.A, %equal.B
+ %carry.B.with.borrow.A = or i1 %carry.B, %is.carry.A.propagated
+
+ ret i1 %carry.B.with.borrow.A
+}
+
+; check GE/UGE are supported too
+define i1 @i128_with_ge(i64 %a0, i64 %a1, i64 %b0, i64 %b1) {
+;
+; CHECK-LABEL: i128_with_ge:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: cmp x1, x0
+; CHECK-NEXT: ccmp x2, x3, #0, hs
+; CHECK-NEXT: ccmp x3, x2, #2, ne
+; CHECK-NEXT: cset w0, hs
+; CHECK-NEXT: ret
+entry:
+ %carry.A = icmp uge i64 %a1, %a0
+ %carry.B = icmp uge i64 %b1, %b0
+ %equal.B = icmp eq i64 %b0, %b1
+
+ ; propagate carry only if B0 == B1
+ %is.carry.A.propagated = and i1 %carry.A, %equal.B
+ %carry.B.with.borrow.A = or i1 %carry.B, %is.carry.A.propagated
+
+ ret i1 %carry.B.with.borrow.A
+}
+
+; do not combine if the check for CarryA propagating is not EQ
+define i1 @nothing_if_not_eq(i64 %a0, i64 %a1, i64 %b0, i64 %b1) {
+; CHECK-LABEL: nothing_if_not_eq:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: cmp x1, x0
+; CHECK-NEXT: ccmp x2, x3, #2, hs
+; CHECK-NEXT: ccmp x3, x2, #2, hi
+; CHECK-NEXT: cset w0, hs
+; CHECK-NEXT: ret
+entry:
+ %carry.A = icmp uge i64 %a1, %a0
+ %carry.B = icmp uge i64 %b1, %b0
+ %equal.B = icmp ule i64 %b0, %b1
+
+ ; propagate carry only if B0 == B1
+ %is.carry.A.propagated = and i1 %carry.A, %equal.B
+ %carry.B.with.borrow.A = or i1 %carry.B, %is.carry.A.propagated
+
+ ret i1 %carry.B.with.borrow.A
+}
+
+define i1 @is_ssub_overflow_i64_with_two_i32(i32 %a0, i32 %a1, i32 %b0, i32 %b1) {
+; CHECK-LABEL: is_ssub_overflow_i64_with_two_i32:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: cmp w0, w1
+; CHECK-NEXT: cset w8, lt
+; CHECK-NEXT: cmp w2, w3
+; CHECK-NEXT: csel w8, wzr, w8, ne
+; CHECK-NEXT: csinc w0, w8, wzr, ge
+; CHECK-NEXT: ret
+entry:
+ %carry.A = icmp slt i32 %a0, %a1
+ %carry.B = icmp slt i32 %b0, %b1
+ %equal.B = icmp eq i32 %b0, %b1
+
+ %is.carry.A.propagated = and i1 %carry.A, %equal.B
+ %carry.B.with.borrow.A = or i1 %carry.B, %is.carry.A.propagated
+
+ ret i1 %carry.B.with.borrow.A
+}
diff --git a/llvm/test/CodeGen/X86/subo-carry-from-icmps.ll b/llvm/test/CodeGen/X86/subo-carry-from-icmps.ll
new file mode 100644
index 00000000000000..f421e3b2e03179
--- /dev/null
+++ b/llvm/test/CodeGen/X86/subo-carry-from-icmps.ll
@@ -0,0 +1,263 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc < %s -mtriple=i686 -mattr=cmov | FileCheck %s --check-prefix=X86
+; RUN: llc < %s -mtriple=x86_64-linux | FileCheck %s --check-prefix=X64
+
+define i1 @is_usub_overflow_i32_with_two_i16(i16 %a0, i16 %a1, i16 %b0, i16 %b1) {
+; X86-LABEL: is_usub_overflow_i32_with_two_i16:
+; X86: # %bb.0: # %entry
+; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movzwl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: cmpw {{[0-9]+}}(%esp), %cx
+; X86-NEXT: setb %cl
+; X86-NEXT: cmpw {{[0-9]+}}(%esp), %ax
+; X86-NEXT: setb %dl
+; X86-NEXT: sete %al
+; X86-NEXT: andb %cl, %al
+; X86-NEXT: orb %dl, %al
+; X86-NEXT: retl
+;
+; X64-LABEL: is_usub_overflow_i32_with_two_i16:
+; X64: # %bb.0: # %entry
+; X64-NEXT: cmpw %si, %di
+; X64-NEXT: setb %sil
+; X64-NEXT: cmpw %cx, %dx
+; X64-NEXT: setb %cl
+; X64-NEXT: sete %al
+; X64-NEXT: andb %sil, %al
+; X64-NEXT: orb %cl, %al
+; X64-NEXT: retq
+entry:
+ %carry.A = icmp ult i16 %a0, %a1
+ %carry.B = icmp ult i16 %b0, %b1
+ %equal.B = icmp eq i16 %b0, %b1
+
+ %is.carry.A.propagated = and i1 %carry.A, %equal.B
+ %carry.B.with.borrow.A = or i1 %carry.B, %is.carry.A.propagated
+
+ ret i1 %carry.B.with.borrow.A
+}
+
+define i1 @is_usub_overflow_i64_with_two_i32(i32 %a0, i32 %a1, i32 %b0, i32 %b1) {
+; X86-LABEL: is_usub_overflow_i64_with_two_i32:
+; X86: # %bb.0: # %entry
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: cmpl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: setb %cl
+; X86-NEXT: cmpl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: setb %dl
+; X86-NEXT: sete %al
+; X86-NEXT: andb %cl, %al
+; X86-NEXT: orb %dl, %al
+; X86-NEXT: retl
+;
+; X64-LABEL: is_usub_overflow_i64_with_two_i32:
+; X64: # %bb.0: # %entry
+; X64-NEXT: cmpl %esi, %edi
+; X64-NEXT: setb %sil
+; X64-NEXT: cmpl %ecx, %edx
+; X64-NEXT: setb %cl
+; X64-NEXT: sete %al
+; X64-NEXT: andb %sil, %al
+; X64-NEXT: orb %cl, %al
+; X64-NEXT: retq
+entry:
+ %carry.A = icmp ult i32 %a0, %a1
+ %carry.B = icmp ult i32 %b0, %b1
+ %equal.B = icmp eq i32 %b0, %b1
+
+ %is.carry.A.propagated = and i1 %carry.A, %equal.B
+ %carry.B.with.borrow.A = or i1 %carry.B, %is.carry.A.propagated
+
+ ret i1 %carry.B.with.borrow.A
+}
+
+define i1 @is_usub_overflow_i128_with_two_i64(i64 %a0, i64 %a1, i64 %b0, i64 %b1) {
+; X86-LABEL: is_usub_overflow_i128_with_two_i64:
+; X86: # %bb.0: # %entry
+; X86-NEXT: pushl %ebx
+; X86-NEXT: .cfi_def_cfa_offset 8
+; X86-NEXT: pushl %edi
+; X86-NEXT: .cfi_def_cfa_offset 12
+; X86-NEXT: pushl %esi
+; X86-NEXT: .cfi_def_cfa_offset 16
+; X86-NEXT: .cfi_offset %esi, -16
+; X86-NEXT: .cfi_offset %edi, -12
+; X86-NEXT: .cfi_offset %ebx, -8
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx
+; X86-NEXT: cmpl {{[0-9]+}}(%esp), %edi
+; X86-NEXT: sbbl {{[0-9]+}}(%esp), %ebx
+; X86-NEXT: setb %bl
+; X86-NEXT: cmpl %ecx, %eax
+; X86-NEXT: movl %edx, %edi
+; X86-NEXT: sbbl %esi, %edi
+; X86-NEXT: setb %bh
+; X86-NEXT: xorl %esi, %edx
+; X86-NEXT: xorl %ecx, %eax
+; X86-NEXT: orl %edx, %eax
+; X86-NEXT: sete %al
+; X86-NEXT: andb %bl, %al
+; X86-NEXT: orb %bh, %al
+; X86-NEXT: popl %esi
+; X86-NEXT: .cfi_def_cfa_offset 12
+; X86-NEXT: popl %edi
+; X86-NEXT: .cfi_def_cfa_offset 8
+; X86-NEXT: popl %ebx
+; X86-NEXT: .cfi_def_cfa_offset 4
+; X86-NEXT: retl
+;
+; X64-LABEL: is_usub_overflow_i128_with_two_i64:
+; X64: # %bb.0: # %entry
+; X64-NEXT: cmpq %rsi, %rdi
+; X64-NEXT: setb %sil
+; X64-NEXT: cmpq %rcx, %rdx
+; X64-NEXT: setb %cl
+; X64-NEXT: sete %al
+; X64-NEXT: andb %sil, %al
+; X64-NEXT: orb %cl, %al
+; X64-NEXT: retq
+entry:
+ %carry.A = icmp ult i64 %a0, %a1
+ %carry.B = icmp ult i64 %b0, %b1
+ %equal.B = icmp eq i64 %b0, %b1
+
+ %is.carry.A.propagated = and i1 %carry.A, %equal.B
+ %carry.B.with.borrow.A = or i1 %carry.B, %is.carry.A.propagated
+
+ ret i1 %carry.B.with.borrow.A
+}
+
+; check GE/UGE are supported too
+define i1 @i128_with_ge(i64 %a0, i64 %a1, i64 %b0, i64 %b1) {
+; X86-LABEL: i128_with_ge:
+; X86: # %bb.0: # %entry
+; X86-NEXT: pushl %ebx
+; X86-NEXT: .cfi_def_cfa_offset 8
+; X86-NEXT: pushl %edi
+; X86-NEXT: .cfi_def_cfa_offset 12
+; X86-NEXT: pushl %esi
+; X86-NEXT: .cfi_def_cfa_offset 16
+; X86-NEXT: .cfi_offset %esi, -16
+; X86-NEXT: .cfi_offset %edi, -12
+; X86-NEXT: .cfi_offset %ebx, -8
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
+; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx
+; X86-NEXT: cmpl {{[0-9]+}}(%esp), %edi
+; X86-NEXT: sbbl {{[0-9]+}}(%esp), %ebx
+; X86-NEXT: setae %bl
+; X86-NEXT: cmpl %eax, %edx
+; X86-NEXT: movl %esi, %edi
+; X86-NEXT: sbbl %ecx, %edi
+; X86-NEXT: setae %bh
+; X86-NEXT: xorl %esi, %ecx
+; X86-NEXT: xorl %edx, %eax
+; X86-NEXT: orl %ecx, %eax
+; X86-NEXT: sete %al
+; X86-NEXT: andb %bl, %al
+; X86-NEXT: orb %bh, %al
+; X86-NEXT: popl %esi
+; X86-NEXT: .cfi_def_cfa_offset 12
+; X86-NEXT: popl %edi
+; X86-NEXT: .cfi_def_cfa_offset 8
+; X86-NEXT: popl %ebx
+; X86-NEXT: .cfi_def_cfa_offset 4
+; X86-NEXT: retl
+;
+; X64-LABEL: i128_with_ge:
+; X64: # %bb.0: # %entry
+; X64-NEXT: cmpq %rdi, %rsi
+; X64-NEXT: setae %sil
+; X64-NEXT: cmpq %rdx, %rcx
+; X64-NEXT: setae %cl
+; X64-NEXT: sete %al
+; X64-NEXT: andb %sil, %al
+; X64-NEXT: orb %cl, %al
+; X64-NEXT: retq
+entry:
+ %carry.A = icmp uge i64 %a1, %a0
+ %carry.B = icmp uge i64 %b1, %b0
+ %equal.B = icmp eq i64 %b0, %b1
+
+ ; propagate carry only if B0 == B1
+ %is.carry.A.propagated = and i1 %carry.A, %equal.B
+ %carry.B.with.borrow.A = or i1 %carry.B, %is.carry.A.propagated
+
+ ret i1 %carry.B.with.borrow.A
+}
+
+; do not combine if the check for CarryA propagating is not EQ
+define i1 @nothing_if_not_eq(i64 %a0, i64 %a1, i64 %b0, i64 %b1) {
+; X86-LABEL: nothing_if_not_eq:
+; X86: # %bb.0: # %entry
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: cmpl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: sbbl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: setae %al
+; X86-NEXT: retl
+;
+; X64-LABEL: nothing_if_not_eq:
+; X64: # %bb.0: # %entry
+; X64-NEXT: cmpq %rdi, %rsi
+; X64-NEXT: setae %sil
+; X64-NEXT: cmpq %rdx, %rcx
+; X64-NEXT: setae %cl
+; X64-NEXT: setae %al
+; X64-NEXT: andb %sil, %al
+; X64-NEXT: orb %cl, %al
+; X64-NEXT: retq
+entry:
+ %carry.A = icmp uge i64 %a1, %a0
+ %carry.B = icmp uge i64 %b1, %b0
+ %equal.B = icmp ule i64 %b0, %b1
+
+ ; propagate carry only if B0 == B1
+ %is.carry.A.propagated = and i1 %carry.A, %equal.B
+ %carry.B.with.borrow.A = or i1 %carry.B, %is.carry.A.propagated
+
+ ret i1 %carry.B.with.borrow.A
+}
+
+define i1 @is_ssub_overflow_i64_with_two_i32(i32 %a0, i32 %a1, i32 %b0, i32 %b1) {
+; X86-LABEL: is_ssub_overflow_i64_with_two_i32:
+; X86: # %bb.0: # %entry
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: cmpl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: setl %cl
+; X86-NEXT: cmpl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: setl %dl
+; X86-NEXT: sete %al
+; X86-NEXT: andb %cl, %al
+; X86-NEXT: orb %dl, %al
+; X86-NEXT: retl
+;
+; X64-LABEL: is_ssub_overflow_i64_with_two_i32:
+; X64: # %bb.0: # %entry
+; X64-NEXT: cmpl %esi, %edi
+; X64-NEXT: setl %sil
+; X64-NEXT: cmpl %ecx, %edx
+; X64-NEXT: setl %cl
+; X64-NEXT: sete %al
+; X64-NEXT: andb %sil, %al
+; X64-NEXT: orb %cl, %al
+; X64-NEXT: retq
+entry:
+ %carry.A = icmp slt i32 %a0, %a1
+ %carry.B = icmp slt i32 %b0, %b1
+ %equal.B = icmp eq i32 %b0, %b1
+
+ %is.carry.A.propagated = and i1 %carry.A, %equal.B
+ %carry.B.with.borrow.A = or i1 %carry.B, %is.carry.A.propagated
+
+ ret i1 %carry.B.with.borrow.A
+}
>From 10510b52b30219825b251f8e31e020b161a0c5f7 Mon Sep 17 00:00:00 2001
From: v01dxyz <v01dxyz at v01d.xyz>
Date: Tue, 27 Aug 2024 23:16:00 +0200
Subject: [PATCH 2/2] (Draft) [DAG] Combine icmps into subo_carry
---
llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 81 +++++++++
.../CodeGen/AArch64/subo-carry-from-icmps.ll | 52 +++---
llvm/test/CodeGen/X86/subcarry.ll | 9 +-
.../test/CodeGen/X86/subo-carry-from-icmps.ll | 154 +++++-------------
4 files changed, 150 insertions(+), 146 deletions(-)
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index b0a906743f29ff..82bed9a2c6dda6 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -7717,6 +7717,84 @@ SDValue DAGCombiner::visitORLike(SDValue N0, SDValue N1, const SDLoc &DL) {
return SDValue();
}
+/* TMP coment
+
+ it should combine for example this pattern into
+
+ (or (icmp ult B0 B1)
+ (and (icmp ult A0 A1)
+ (icmp eq B0 B1)))
+
+ -->
+
+ (usubo_carry B0 B1 (usubo_carry A0 A1 0):1):1
+*/
+static SDValue foldLogicOfICmpsToSubCarry(SelectionDAG &DAG, SDValue N0,
+ SDValue N1, SDNode *N) {
+ SDLoc DL(N);
+
+ SDValue B0;
+ SDValue B1;
+ ISD::CondCode BCondCode;
+ if (!(sd_match(N0,
+ m_SetCC(m_Value(B0), m_Value(B1), m_CondCode(BCondCode))) &&
+ !isTrueWhenEqual(BCondCode)))
+ return SDValue();
+
+ SDValue A0;
+ SDValue A1;
+ SDValue BEq;
+ ISD::CondCode ACondCode;
+
+ if (!(sd_match(N1,
+ m_And(m_SetCC(m_Value(A0), m_Value(A1), m_CondCode(ACondCode)),
+ m_Value(BEq))) &&
+ !isTrueWhenEqual(ACondCode) &&
+ isSignedIntSetCC(BCondCode) == isSignedIntSetCC(ACondCode)))
+ return SDValue();
+
+ auto SwapIfNotLT = [](SDValue &Op0, SDValue &Op1, ISD::CondCode CondCode) {
+ if (CondCode == ISD::CondCode::SETUGT || CondCode == ISD::CondCode::SETGT)
+ std::swap(Op0, Op1);
+ };
+
+ SwapIfNotLT(B0, B1, BCondCode);
+
+ ISD::CondCode LECondCode = isSignedIntSetCC(BCondCode)
+ ? ISD::CondCode::SETLE
+ : ISD::CondCode::SETULE;
+ ISD::CondCode GECondCode = isSignedIntSetCC(BCondCode)
+ ? ISD::CondCode::SETGE
+ : ISD::CondCode::SETUGE;
+
+ // much simpler to use a big Any than to try to do it programatically
+ if (!(sd_match(BEq,
+ m_AnyOf(m_c_SetCC(m_Specific(B0), m_Specific(B1),
+ m_SpecificCondCode(ISD::CondCode::SETEQ)),
+ m_SetCC(m_Specific(B0), m_Specific(B1),
+ m_SpecificCondCode(LECondCode)),
+ m_SetCC(m_Specific(B1), m_Specific(B0),
+ m_SpecificCondCode(GECondCode))))))
+ return SDValue();
+
+ SwapIfNotLT(A0, A1, ACondCode);
+
+ unsigned OpCode =
+ isSignedIntSetCC(BCondCode) ? ISD::SSUBO_CARRY : ISD::USUBO_CARRY;
+ EVT AVT = A0.getValueType();
+ SDVTList AOverflowOpVT = DAG.getVTList(AVT, MVT::i1);
+ SDValue ACarry = DAG.getNode(OpCode, DL, AOverflowOpVT, A0, A1,
+ DAG.getConstant(0, DL, MVT::i1))
+ .getValue(1);
+
+ EVT BVT = B0.getValueType();
+ SDVTList BOverflowOpVT = DAG.getVTList(BVT, MVT::i1);
+ SDValue BCarry =
+ DAG.getNode(OpCode, DL, BOverflowOpVT, B0, B1, ACarry).getValue(1);
+
+ return BCarry;
+}
+
/// OR combines for which the commuted variant will be tried as well.
static SDValue visitORCommutative(SelectionDAG &DAG, SDValue N0, SDValue N1,
SDNode *N) {
@@ -7811,6 +7889,9 @@ static SDValue visitORCommutative(SelectionDAG &DAG, SDValue N0, SDValue N1,
}
}
+ if (SDValue R = foldLogicOfICmpsToSubCarry(DAG, N0, N1, N))
+ return R;
+
return SDValue();
}
diff --git a/llvm/test/CodeGen/AArch64/subo-carry-from-icmps.ll b/llvm/test/CodeGen/AArch64/subo-carry-from-icmps.ll
index 3151ad55a39c63..4035536e6ea33e 100644
--- a/llvm/test/CodeGen/AArch64/subo-carry-from-icmps.ll
+++ b/llvm/test/CodeGen/AArch64/subo-carry-from-icmps.ll
@@ -4,13 +4,15 @@
define i1 @is_usub_overflow_i32_with_two_i16(i16 %a0, i16 %a1, i16 %b0, i16 %b1) {
; CHECK-LABEL: is_usub_overflow_i32_with_two_i16:
; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: and w9, w0, #0xffff
-; CHECK-NEXT: and w8, w2, #0xffff
-; CHECK-NEXT: cmp w9, w1, uxth
-; CHECK-NEXT: cset w9, lo
-; CHECK-NEXT: cmp w8, w3, uxth
-; CHECK-NEXT: csel w8, wzr, w9, ne
-; CHECK-NEXT: csinc w0, w8, wzr, hs
+; CHECK-NEXT: and w8, w0, #0xffff
+; CHECK-NEXT: sxth w9, w2
+; CHECK-NEXT: sub w8, w8, w1, uxth
+; CHECK-NEXT: tst w8, #0xffff0000
+; CHECK-NEXT: sxth w8, w3
+; CHECK-NEXT: cset w10, ne
+; CHECK-NEXT: cmp wzr, w10
+; CHECK-NEXT: sbcs wzr, w9, w8
+; CHECK-NEXT: cset w0, lo
; CHECK-NEXT: ret
entry:
%carry.A = icmp ult i16 %a0, %a1
@@ -27,10 +29,8 @@ define i1 @is_usub_overflow_i64_with_two_i32(i32 %a0, i32 %a1, i32 %b0, i32 %b1)
; CHECK-LABEL: is_usub_overflow_i64_with_two_i32:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: cmp w0, w1
-; CHECK-NEXT: cset w8, lo
-; CHECK-NEXT: cmp w2, w3
-; CHECK-NEXT: csel w8, wzr, w8, ne
-; CHECK-NEXT: csinc w0, w8, wzr, hs
+; CHECK-NEXT: sbcs wzr, w2, w3
+; CHECK-NEXT: cset w0, lo
; CHECK-NEXT: ret
entry:
%carry.A = icmp ult i32 %a0, %a1
@@ -48,10 +48,8 @@ define i1 @is_usub_overflow_i128_with_two_i64(i64 %a0, i64 %a1, i64 %b0, i64 %b1
; CHECK-LABEL: is_usub_overflow_i128_with_two_i64:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: cmp x0, x1
-; CHECK-NEXT: cset w8, lo
-; CHECK-NEXT: cmp x2, x3
-; CHECK-NEXT: csel w8, wzr, w8, ne
-; CHECK-NEXT: csinc w0, w8, wzr, hs
+; CHECK-NEXT: sbcs xzr, x2, x3
+; CHECK-NEXT: cset w0, lo
; CHECK-NEXT: ret
entry:
%carry.A = icmp ult i64 %a0, %a1
@@ -65,18 +63,16 @@ entry:
}
; check GE/UGE are supported too
-define i1 @i128_with_ge(i64 %a0, i64 %a1, i64 %b0, i64 %b1) {
-;
-; CHECK-LABEL: i128_with_ge:
+define i1 @i128_with_gt(i64 %a0, i64 %a1, i64 %b0, i64 %b1) {
+; CHECK-LABEL: i128_with_gt:
; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: cmp x1, x0
-; CHECK-NEXT: ccmp x2, x3, #0, hs
-; CHECK-NEXT: ccmp x3, x2, #2, ne
-; CHECK-NEXT: cset w0, hs
+; CHECK-NEXT: cmp x0, x1
+; CHECK-NEXT: sbcs xzr, x2, x3
+; CHECK-NEXT: cset w0, lo
; CHECK-NEXT: ret
entry:
- %carry.A = icmp uge i64 %a1, %a0
- %carry.B = icmp uge i64 %b1, %b0
+ %carry.A = icmp ugt i64 %a1, %a0
+ %carry.B = icmp ugt i64 %b1, %b0
%equal.B = icmp eq i64 %b0, %b1
; propagate carry only if B0 == B1
@@ -111,10 +107,10 @@ define i1 @is_ssub_overflow_i64_with_two_i32(i32 %a0, i32 %a1, i32 %b0, i32 %b1)
; CHECK-LABEL: is_ssub_overflow_i64_with_two_i32:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: cmp w0, w1
-; CHECK-NEXT: cset w8, lt
-; CHECK-NEXT: cmp w2, w3
-; CHECK-NEXT: csel w8, wzr, w8, ne
-; CHECK-NEXT: csinc w0, w8, wzr, ge
+; CHECK-NEXT: cset w8, vs
+; CHECK-NEXT: cmp wzr, w8
+; CHECK-NEXT: sbcs wzr, w2, w3
+; CHECK-NEXT: cset w0, vs
; CHECK-NEXT: ret
entry:
%carry.A = icmp slt i32 %a0, %a1
diff --git a/llvm/test/CodeGen/X86/subcarry.ll b/llvm/test/CodeGen/X86/subcarry.ll
index 1e9db9f55a8d5c..edb3e0cbe44796 100644
--- a/llvm/test/CodeGen/X86/subcarry.ll
+++ b/llvm/test/CodeGen/X86/subcarry.ll
@@ -676,17 +676,12 @@ define i1 @subcarry_ult_2x64(i64 %x0, i64 %x1, i64 %y0, i64 %y1) nounwind {
; https://github.com/llvm/llvm-project/commit/926e7312b2f20f2f7b0a3d5ddbd29da5625507f3
; This is also the result of "naive" implementation (x1 < y1) | ((x0 < y0) & (x1 == y1)).
; C source: https://godbolt.org/z/W1qqvqGbr
-; TODO: This should be optimized to cmp + sbb.
define i1 @subcarry_ult_2x64_2(i64 %x0, i64 %x1, i64 %y0, i64 %y1) nounwind {
; CHECK-LABEL: subcarry_ult_2x64_2:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: cmpq %rdx, %rdi
-; CHECK-NEXT: setb %dl
-; CHECK-NEXT: cmpq %rcx, %rsi
-; CHECK-NEXT: setb %cl
-; CHECK-NEXT: sete %al
-; CHECK-NEXT: andb %dl, %al
-; CHECK-NEXT: orb %cl, %al
+; CHECK-NEXT: sbbq %rcx, %rsi
+; CHECK-NEXT: setb %al
; CHECK-NEXT: retq
entry:
%0 = icmp ult i64 %x0, %y0
diff --git a/llvm/test/CodeGen/X86/subo-carry-from-icmps.ll b/llvm/test/CodeGen/X86/subo-carry-from-icmps.ll
index f421e3b2e03179..67ec0d1b600dcc 100644
--- a/llvm/test/CodeGen/X86/subo-carry-from-icmps.ll
+++ b/llvm/test/CodeGen/X86/subo-carry-from-icmps.ll
@@ -8,23 +8,15 @@ define i1 @is_usub_overflow_i32_with_two_i16(i16 %a0, i16 %a1, i16 %b0, i16 %b1)
; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movzwl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: cmpw {{[0-9]+}}(%esp), %cx
-; X86-NEXT: setb %cl
-; X86-NEXT: cmpw {{[0-9]+}}(%esp), %ax
-; X86-NEXT: setb %dl
-; X86-NEXT: sete %al
-; X86-NEXT: andb %cl, %al
-; X86-NEXT: orb %dl, %al
+; X86-NEXT: sbbw {{[0-9]+}}(%esp), %ax
+; X86-NEXT: setb %al
; X86-NEXT: retl
;
; X64-LABEL: is_usub_overflow_i32_with_two_i16:
; X64: # %bb.0: # %entry
; X64-NEXT: cmpw %si, %di
-; X64-NEXT: setb %sil
-; X64-NEXT: cmpw %cx, %dx
-; X64-NEXT: setb %cl
-; X64-NEXT: sete %al
-; X64-NEXT: andb %sil, %al
-; X64-NEXT: orb %cl, %al
+; X64-NEXT: sbbw %cx, %dx
+; X64-NEXT: setb %al
; X64-NEXT: retq
entry:
%carry.A = icmp ult i16 %a0, %a1
@@ -43,23 +35,15 @@ define i1 @is_usub_overflow_i64_with_two_i32(i32 %a0, i32 %a1, i32 %b0, i32 %b1)
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: cmpl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: setb %cl
-; X86-NEXT: cmpl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: setb %dl
-; X86-NEXT: sete %al
-; X86-NEXT: andb %cl, %al
-; X86-NEXT: orb %dl, %al
+; X86-NEXT: sbbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: setb %al
; X86-NEXT: retl
;
; X64-LABEL: is_usub_overflow_i64_with_two_i32:
; X64: # %bb.0: # %entry
; X64-NEXT: cmpl %esi, %edi
-; X64-NEXT: setb %sil
-; X64-NEXT: cmpl %ecx, %edx
-; X64-NEXT: setb %cl
-; X64-NEXT: sete %al
-; X64-NEXT: andb %sil, %al
-; X64-NEXT: orb %cl, %al
+; X64-NEXT: sbbl %ecx, %edx
+; X64-NEXT: setb %al
; X64-NEXT: retq
entry:
%carry.A = icmp ult i32 %a0, %a1
@@ -75,51 +59,27 @@ entry:
define i1 @is_usub_overflow_i128_with_two_i64(i64 %a0, i64 %a1, i64 %b0, i64 %b1) {
; X86-LABEL: is_usub_overflow_i128_with_two_i64:
; X86: # %bb.0: # %entry
-; X86-NEXT: pushl %ebx
-; X86-NEXT: .cfi_def_cfa_offset 8
-; X86-NEXT: pushl %edi
-; X86-NEXT: .cfi_def_cfa_offset 12
; X86-NEXT: pushl %esi
-; X86-NEXT: .cfi_def_cfa_offset 16
-; X86-NEXT: .cfi_offset %esi, -16
-; X86-NEXT: .cfi_offset %edi, -12
-; X86-NEXT: .cfi_offset %ebx, -8
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
+; X86-NEXT: .cfi_def_cfa_offset 8
+; X86-NEXT: .cfi_offset %esi, -8
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx
-; X86-NEXT: cmpl {{[0-9]+}}(%esp), %edi
-; X86-NEXT: sbbl {{[0-9]+}}(%esp), %ebx
-; X86-NEXT: setb %bl
-; X86-NEXT: cmpl %ecx, %eax
-; X86-NEXT: movl %edx, %edi
-; X86-NEXT: sbbl %esi, %edi
-; X86-NEXT: setb %bh
-; X86-NEXT: xorl %esi, %edx
-; X86-NEXT: xorl %ecx, %eax
-; X86-NEXT: orl %edx, %eax
-; X86-NEXT: sete %al
-; X86-NEXT: andb %bl, %al
-; X86-NEXT: orb %bh, %al
+; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
+; X86-NEXT: cmpl {{[0-9]+}}(%esp), %edx
+; X86-NEXT: sbbl {{[0-9]+}}(%esp), %esi
+; X86-NEXT: sbbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: sbbl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: setb %al
; X86-NEXT: popl %esi
-; X86-NEXT: .cfi_def_cfa_offset 12
-; X86-NEXT: popl %edi
-; X86-NEXT: .cfi_def_cfa_offset 8
-; X86-NEXT: popl %ebx
; X86-NEXT: .cfi_def_cfa_offset 4
; X86-NEXT: retl
;
; X64-LABEL: is_usub_overflow_i128_with_two_i64:
; X64: # %bb.0: # %entry
; X64-NEXT: cmpq %rsi, %rdi
-; X64-NEXT: setb %sil
-; X64-NEXT: cmpq %rcx, %rdx
-; X64-NEXT: setb %cl
-; X64-NEXT: sete %al
-; X64-NEXT: andb %sil, %al
-; X64-NEXT: orb %cl, %al
+; X64-NEXT: sbbq %rcx, %rdx
+; X64-NEXT: setb %al
; X64-NEXT: retq
entry:
%carry.A = icmp ult i64 %a0, %a1
@@ -132,59 +92,35 @@ entry:
ret i1 %carry.B.with.borrow.A
}
-; check GE/UGE are supported too
-define i1 @i128_with_ge(i64 %a0, i64 %a1, i64 %b0, i64 %b1) {
-; X86-LABEL: i128_with_ge:
+; check GT/UGT are supported too
+define i1 @i128_with_gt(i64 %a0, i64 %a1, i64 %b0, i64 %b1) {
+; X86-LABEL: i128_with_gt:
; X86: # %bb.0: # %entry
-; X86-NEXT: pushl %ebx
-; X86-NEXT: .cfi_def_cfa_offset 8
-; X86-NEXT: pushl %edi
-; X86-NEXT: .cfi_def_cfa_offset 12
; X86-NEXT: pushl %esi
-; X86-NEXT: .cfi_def_cfa_offset 16
-; X86-NEXT: .cfi_offset %esi, -16
-; X86-NEXT: .cfi_offset %edi, -12
-; X86-NEXT: .cfi_offset %ebx, -8
+; X86-NEXT: .cfi_def_cfa_offset 8
+; X86-NEXT: .cfi_offset %esi, -8
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
-; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx
-; X86-NEXT: cmpl {{[0-9]+}}(%esp), %edi
-; X86-NEXT: sbbl {{[0-9]+}}(%esp), %ebx
-; X86-NEXT: setae %bl
-; X86-NEXT: cmpl %eax, %edx
-; X86-NEXT: movl %esi, %edi
-; X86-NEXT: sbbl %ecx, %edi
-; X86-NEXT: setae %bh
-; X86-NEXT: xorl %esi, %ecx
-; X86-NEXT: xorl %edx, %eax
-; X86-NEXT: orl %ecx, %eax
-; X86-NEXT: sete %al
-; X86-NEXT: andb %bl, %al
-; X86-NEXT: orb %bh, %al
+; X86-NEXT: cmpl {{[0-9]+}}(%esp), %edx
+; X86-NEXT: sbbl {{[0-9]+}}(%esp), %esi
+; X86-NEXT: sbbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: sbbl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: setb %al
; X86-NEXT: popl %esi
-; X86-NEXT: .cfi_def_cfa_offset 12
-; X86-NEXT: popl %edi
-; X86-NEXT: .cfi_def_cfa_offset 8
-; X86-NEXT: popl %ebx
; X86-NEXT: .cfi_def_cfa_offset 4
; X86-NEXT: retl
;
-; X64-LABEL: i128_with_ge:
+; X64-LABEL: i128_with_gt:
; X64: # %bb.0: # %entry
-; X64-NEXT: cmpq %rdi, %rsi
-; X64-NEXT: setae %sil
-; X64-NEXT: cmpq %rdx, %rcx
-; X64-NEXT: setae %cl
-; X64-NEXT: sete %al
-; X64-NEXT: andb %sil, %al
-; X64-NEXT: orb %cl, %al
+; X64-NEXT: cmpq %rsi, %rdi
+; X64-NEXT: sbbq %rcx, %rdx
+; X64-NEXT: setb %al
; X64-NEXT: retq
entry:
- %carry.A = icmp uge i64 %a1, %a0
- %carry.B = icmp uge i64 %b1, %b0
+ %carry.A = icmp ugt i64 %a1, %a0
+ %carry.B = icmp ugt i64 %b1, %b0
%equal.B = icmp eq i64 %b0, %b1
; propagate carry only if B0 == B1
@@ -233,23 +169,19 @@ define i1 @is_ssub_overflow_i64_with_two_i32(i32 %a0, i32 %a1, i32 %b0, i32 %b1)
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: cmpl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: setl %cl
-; X86-NEXT: cmpl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: setl %dl
-; X86-NEXT: sete %al
-; X86-NEXT: andb %cl, %al
-; X86-NEXT: orb %dl, %al
+; X86-NEXT: seto %cl
+; X86-NEXT: addb $-1, %cl
+; X86-NEXT: sbbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: seto %al
; X86-NEXT: retl
;
; X64-LABEL: is_ssub_overflow_i64_with_two_i32:
; X64: # %bb.0: # %entry
; X64-NEXT: cmpl %esi, %edi
-; X64-NEXT: setl %sil
-; X64-NEXT: cmpl %ecx, %edx
-; X64-NEXT: setl %cl
-; X64-NEXT: sete %al
-; X64-NEXT: andb %sil, %al
-; X64-NEXT: orb %cl, %al
+; X64-NEXT: seto %al
+; X64-NEXT: addb $-1, %al
+; X64-NEXT: sbbl %ecx, %edx
+; X64-NEXT: seto %al
; X64-NEXT: retq
entry:
%carry.A = icmp slt i32 %a0, %a1
More information about the llvm-commits
mailing list