[llvm] (Draft) [DAG] Combine icmps into subo_carry (PR #106291)
via llvm-commits
llvm-commits at lists.llvm.org
Tue Aug 27 14:16:49 PDT 2024
https://github.com/v01dXYZ created https://github.com/llvm/llvm-project/pull/106291
Fixes #106118
>From f63c4b9a0bb4028857d5e050678090dc16493b93 Mon Sep 17 00:00:00 2001
From: v01dxyz <v01dxyz at v01d.xyz>
Date: Tue, 27 Aug 2024 23:16:00 +0200
Subject: [PATCH] (Draft) [DAG] Combine icmps into subo_carry
---
llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 53 ++++++
.../test/CodeGen/X86/subo-carry-from-icmps.ll | 164 ++++++++++++++++++
2 files changed, 217 insertions(+)
create mode 100644 llvm/test/CodeGen/X86/subo-carry-from-icmps.ll
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index b27f06f94ff0e7..3f840cbab8bf66 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -7811,6 +7811,59 @@ static SDValue visitORCommutative(SelectionDAG &DAG, SDValue N0, SDValue N1,
}
}
+ /* TMP coment
+
+ it should combine for example this pattern into
+
+ (or (icmp ult B0 B1)
+ (and (icmp ult A0 A1)
+ (icmp eq B0 B1)))
+
+ -->
+
+ (usubo_carry B0 B1 (usubo_carry A0 A1 0):1):1
+ */
+ SDValue B0;
+ SDValue B1;
+ SDValue A0;
+ SDValue A1;
+ SDValue BEq;
+ ISD::CondCode BCondCode;
+ ISD::CondCode ACondCode;
+ if (sd_match(N0, m_SetCC(m_Value(B0), m_Value(B1), m_CondCode(BCondCode))) &&
+ (BCondCode != ISD::SETNE && BCondCode != ISD::SETEQ) &&
+ sd_match(N1,
+ m_And(m_SetCC(m_Value(A0), m_Value(A1), m_CondCode(ACondCode)),
+ m_Value(BEq))) &&
+ (ACondCode != ISD::SETNE && ACondCode != ISD::SETEQ) &&
+ isSignedIntSetCC(BCondCode) == isSignedIntSetCC(ACondCode) &&
+ // TODO? include other CondCodes. but it would increase code complexity
+ sd_match(BEq, m_c_SetCC(m_Specific(B0), m_Specific(B1),
+ m_SpecificCondCode(ISD::SETEQ)))) {
+
+ if (isTrueWhenEqual(BCondCode))
+ std::swap(B0, B1);
+
+ if (isTrueWhenEqual(ACondCode))
+ std::swap(A0, A1);
+
+ unsigned OpCode = BCondCode == isSignedIntSetCC(BCondCode)
+ ? ISD::SSUBO_CARRY
+ : ISD::USUBO_CARRY;
+
+ EVT AVT = A0.getValueType();
+ SDVTList AOverflowOpVT = DAG.getVTList(AVT, MVT::i1);
+ SDValue ACarry = DAG.getNode(OpCode, DL, AOverflowOpVT, A0, A1,
+ DAG.getConstant(0, DL, MVT::i1))
+ .getValue(1);
+
+ EVT BVT = B0.getValueType();
+ SDVTList BOverflowOpVT = DAG.getVTList(BVT, MVT::i1);
+ SDValue BCarry =
+ DAG.getNode(OpCode, DL, BOverflowOpVT, B0, B1, ACarry).getValue(1);
+ return BCarry;
+ }
+
return SDValue();
}
diff --git a/llvm/test/CodeGen/X86/subo-carry-from-icmps.ll b/llvm/test/CodeGen/X86/subo-carry-from-icmps.ll
new file mode 100644
index 00000000000000..8fa64fb0d7c437
--- /dev/null
+++ b/llvm/test/CodeGen/X86/subo-carry-from-icmps.ll
@@ -0,0 +1,164 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc < %s -mtriple=i686 -mattr=cmov | FileCheck %s --check-prefix=X86
+; RUN: llc < %s -mtriple=x86_64-linux | FileCheck %s --check-prefix=X64
+
+define i1 @is_usub_overflow_i64_with_two_i32(i32 %a0, i32 %a1, i32 %b0, i32 %b1) {
+; X86-LABEL: is_usub_overflow_i64_with_two_i32:
+; X86: # %bb.0: # %entry
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: cmpl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: sbbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: setb %al
+; X86-NEXT: retl
+;
+; X64-LABEL: is_usub_overflow_i64_with_two_i32:
+; X64: # %bb.0: # %entry
+; X64-NEXT: cmpl %esi, %edi
+; X64-NEXT: sbbl %ecx, %edx
+; X64-NEXT: setb %al
+; X64-NEXT: retq
+entry:
+ %carry.A = icmp ult i32 %a0, %a1
+ %carry.B = icmp ult i32 %b0, %b1
+ %equal.B = icmp eq i32 %b0, %b1
+
+ %is.carry.A.propagated = and i1 %carry.A, %equal.B
+ %carry.B.with.borrow.A = or i1 %carry.B, %is.carry.A.propagated
+
+ ret i1 %carry.B.with.borrow.A
+}
+
+define i1 @is_usub_overflow_i128_with_two_i64(i64 %a0, i64 %a1, i64 %b0, i64 %b1) {
+; X86-LABEL: is_usub_overflow_i128_with_two_i64:
+; X86: # %bb.0: # %entry
+; X86-NEXT: pushl %esi
+; X86-NEXT: .cfi_def_cfa_offset 8
+; X86-NEXT: .cfi_offset %esi, -8
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
+; X86-NEXT: cmpl {{[0-9]+}}(%esp), %edx
+; X86-NEXT: sbbl {{[0-9]+}}(%esp), %esi
+; X86-NEXT: sbbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: sbbl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: setb %al
+; X86-NEXT: popl %esi
+; X86-NEXT: .cfi_def_cfa_offset 4
+; X86-NEXT: retl
+;
+; X64-LABEL: is_usub_overflow_i128_with_two_i64:
+; X64: # %bb.0: # %entry
+; X64-NEXT: cmpq %rsi, %rdi
+; X64-NEXT: sbbq %rcx, %rdx
+; X64-NEXT: setb %al
+; X64-NEXT: retq
+entry:
+ %carry.A = icmp ult i64 %a0, %a1
+ %carry.B = icmp ult i64 %b0, %b1
+ %equal.B = icmp eq i64 %b0, %b1
+
+ %is.carry.A.propagated = and i1 %carry.A, %equal.B
+ %carry.B.with.borrow.A = or i1 %carry.B, %is.carry.A.propagated
+
+ ret i1 %carry.B.with.borrow.A
+}
+
+; check GE/UGE are supported too
+define i1 @i128_with_ge(i64 %a0, i64 %a1, i64 %b0, i64 %b1) {
+; X86-LABEL: i128_with_ge:
+; X86: # %bb.0: # %entry
+; X86-NEXT: pushl %esi
+; X86-NEXT: .cfi_def_cfa_offset 8
+; X86-NEXT: .cfi_offset %esi, -8
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
+; X86-NEXT: cmpl {{[0-9]+}}(%esp), %edx
+; X86-NEXT: sbbl {{[0-9]+}}(%esp), %esi
+; X86-NEXT: sbbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: sbbl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: setb %al
+; X86-NEXT: popl %esi
+; X86-NEXT: .cfi_def_cfa_offset 4
+; X86-NEXT: retl
+;
+; X64-LABEL: i128_with_ge:
+; X64: # %bb.0: # %entry
+; X64-NEXT: cmpq %rsi, %rdi
+; X64-NEXT: sbbq %rcx, %rdx
+; X64-NEXT: setb %al
+; X64-NEXT: retq
+entry:
+ %carry.A = icmp uge i64 %a1, %a0
+ %carry.B = icmp uge i64 %b1, %b0
+ %equal.B = icmp eq i64 %b0, %b1
+
+ ; propagate carry only if B0 == B1
+ %is.carry.A.propagated = and i1 %carry.A, %equal.B
+ %carry.B.with.borrow.A = or i1 %carry.B, %is.carry.A.propagated
+
+ ret i1 %carry.B.with.borrow.A
+}
+
+; do not combine if the check for CarryA propagating is not EQ
+define i1 @nothing_if_not_eq(i64 %a0, i64 %a1, i64 %b0, i64 %b1) {
+; X86-LABEL: nothing_if_not_eq:
+; X86: # %bb.0: # %entry
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: cmpl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: sbbl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: setae %al
+; X86-NEXT: retl
+;
+; X64-LABEL: nothing_if_not_eq:
+; X64: # %bb.0: # %entry
+; X64-NEXT: cmpq %rdi, %rsi
+; X64-NEXT: setae %sil
+; X64-NEXT: cmpq %rdx, %rcx
+; X64-NEXT: setae %cl
+; X64-NEXT: setae %al
+; X64-NEXT: andb %sil, %al
+; X64-NEXT: orb %cl, %al
+; X64-NEXT: retq
+entry:
+ %carry.A = icmp uge i64 %a1, %a0
+ %carry.B = icmp uge i64 %b1, %b0
+ %equal.B = icmp ule i64 %b0, %b1
+
+ ; propagate carry only if B0 == B1
+ %is.carry.A.propagated = and i1 %carry.A, %equal.B
+ %carry.B.with.borrow.A = or i1 %carry.B, %is.carry.A.propagated
+
+ ret i1 %carry.B.with.borrow.A
+}
+
+define i1 @is_ssub_overflow_i64_with_two_i32(i32 %a0, i32 %a1, i32 %b0, i32 %b1) {
+; X86-LABEL: is_ssub_overflow_i64_with_two_i32:
+; X86: # %bb.0: # %entry
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: cmpl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: sbbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: setb %al
+; X86-NEXT: retl
+;
+; X64-LABEL: is_ssub_overflow_i64_with_two_i32:
+; X64: # %bb.0: # %entry
+; X64-NEXT: cmpl %esi, %edi
+; X64-NEXT: sbbl %ecx, %edx
+; X64-NEXT: setb %al
+; X64-NEXT: retq
+entry:
+ %carry.A = icmp slt i32 %a0, %a1
+ %carry.B = icmp slt i32 %b0, %b1
+ %equal.B = icmp eq i32 %b0, %b1
+
+ %is.carry.A.propagated = and i1 %carry.A, %equal.B
+ %carry.B.with.borrow.A = or i1 %carry.B, %is.carry.A.propagated
+
+ ret i1 %carry.B.with.borrow.A
+}
More information about the llvm-commits
mailing list