[llvm] (Draft) [DAG] Combine icmps into subo_carry (PR #106291)

via llvm-commits llvm-commits at lists.llvm.org
Tue Aug 27 14:16:49 PDT 2024


https://github.com/v01dXYZ created https://github.com/llvm/llvm-project/pull/106291

Fixes #106118 

>From f63c4b9a0bb4028857d5e050678090dc16493b93 Mon Sep 17 00:00:00 2001
From: v01dxyz <v01dxyz at v01d.xyz>
Date: Tue, 27 Aug 2024 23:16:00 +0200
Subject: [PATCH] (Draft) [DAG] Combine icmps into subo_carry

---
 llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp |  53 ++++++
 .../test/CodeGen/X86/subo-carry-from-icmps.ll | 164 ++++++++++++++++++
 2 files changed, 217 insertions(+)
 create mode 100644 llvm/test/CodeGen/X86/subo-carry-from-icmps.ll

diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index b27f06f94ff0e7..3f840cbab8bf66 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -7811,6 +7811,59 @@ static SDValue visitORCommutative(SelectionDAG &DAG, SDValue N0, SDValue N1,
     }
   }
 
+  /* TMP coment
+
+    it should combine for example this pattern into
+
+    (or (icmp ult B0 B1)
+        (and (icmp ult A0 A1)
+             (icmp eq B0 B1)))
+
+             -->
+
+     (usubo_carry B0 B1 (usubo_carry A0 A1 0):1):1
+   */
+  SDValue B0;
+  SDValue B1;
+  SDValue A0;
+  SDValue A1;
+  SDValue BEq;
+  ISD::CondCode BCondCode;
+  ISD::CondCode ACondCode;
+  if (sd_match(N0, m_SetCC(m_Value(B0), m_Value(B1), m_CondCode(BCondCode))) &&
+      (BCondCode != ISD::SETNE && BCondCode != ISD::SETEQ) &&
+      sd_match(N1,
+               m_And(m_SetCC(m_Value(A0), m_Value(A1), m_CondCode(ACondCode)),
+                     m_Value(BEq))) &&
+      (ACondCode != ISD::SETNE && ACondCode != ISD::SETEQ) &&
+      isSignedIntSetCC(BCondCode) == isSignedIntSetCC(ACondCode) &&
+      // TODO? include other CondCodes. but it would increase code complexity
+      sd_match(BEq, m_c_SetCC(m_Specific(B0), m_Specific(B1),
+                              m_SpecificCondCode(ISD::SETEQ)))) {
+
+    if (isTrueWhenEqual(BCondCode))
+      std::swap(B0, B1);
+
+    if (isTrueWhenEqual(ACondCode))
+      std::swap(A0, A1);
+
+    unsigned OpCode = BCondCode == isSignedIntSetCC(BCondCode)
+                          ? ISD::SSUBO_CARRY
+                          : ISD::USUBO_CARRY;
+
+    EVT AVT = A0.getValueType();
+    SDVTList AOverflowOpVT = DAG.getVTList(AVT, MVT::i1);
+    SDValue ACarry = DAG.getNode(OpCode, DL, AOverflowOpVT, A0, A1,
+                                 DAG.getConstant(0, DL, MVT::i1))
+                         .getValue(1);
+
+    EVT BVT = B0.getValueType();
+    SDVTList BOverflowOpVT = DAG.getVTList(BVT, MVT::i1);
+    SDValue BCarry =
+        DAG.getNode(OpCode, DL, BOverflowOpVT, B0, B1, ACarry).getValue(1);
+    return BCarry;
+  }
+
   return SDValue();
 }
 
diff --git a/llvm/test/CodeGen/X86/subo-carry-from-icmps.ll b/llvm/test/CodeGen/X86/subo-carry-from-icmps.ll
new file mode 100644
index 00000000000000..8fa64fb0d7c437
--- /dev/null
+++ b/llvm/test/CodeGen/X86/subo-carry-from-icmps.ll
@@ -0,0 +1,164 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc < %s -mtriple=i686 -mattr=cmov | FileCheck %s --check-prefix=X86
+; RUN: llc < %s -mtriple=x86_64-linux | FileCheck %s --check-prefix=X64
+
+define i1 @is_usub_overflow_i64_with_two_i32(i32 %a0, i32 %a1, i32 %b0, i32 %b1) {
+; X86-LABEL: is_usub_overflow_i64_with_two_i32:
+; X86:       # %bb.0: # %entry
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    cmpl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    sbbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    setb %al
+; X86-NEXT:    retl
+;
+; X64-LABEL: is_usub_overflow_i64_with_two_i32:
+; X64:       # %bb.0: # %entry
+; X64-NEXT:    cmpl %esi, %edi
+; X64-NEXT:    sbbl %ecx, %edx
+; X64-NEXT:    setb %al
+; X64-NEXT:    retq
+entry:
+  %carry.A = icmp ult i32 %a0, %a1
+  %carry.B = icmp ult i32 %b0, %b1
+  %equal.B = icmp eq i32 %b0, %b1
+
+  %is.carry.A.propagated = and i1 %carry.A, %equal.B
+  %carry.B.with.borrow.A = or i1 %carry.B, %is.carry.A.propagated
+
+  ret i1 %carry.B.with.borrow.A
+}
+
+define i1 @is_usub_overflow_i128_with_two_i64(i64 %a0, i64 %a1, i64 %b0, i64 %b1) {
+; X86-LABEL: is_usub_overflow_i128_with_two_i64:
+; X86:       # %bb.0: # %entry
+; X86-NEXT:    pushl %esi
+; X86-NEXT:    .cfi_def_cfa_offset 8
+; X86-NEXT:    .cfi_offset %esi, -8
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %esi
+; X86-NEXT:    cmpl {{[0-9]+}}(%esp), %edx
+; X86-NEXT:    sbbl {{[0-9]+}}(%esp), %esi
+; X86-NEXT:    sbbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    sbbl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    setb %al
+; X86-NEXT:    popl %esi
+; X86-NEXT:    .cfi_def_cfa_offset 4
+; X86-NEXT:    retl
+;
+; X64-LABEL: is_usub_overflow_i128_with_two_i64:
+; X64:       # %bb.0: # %entry
+; X64-NEXT:    cmpq %rsi, %rdi
+; X64-NEXT:    sbbq %rcx, %rdx
+; X64-NEXT:    setb %al
+; X64-NEXT:    retq
+entry:
+  %carry.A = icmp ult i64 %a0, %a1
+  %carry.B = icmp ult i64 %b0, %b1
+  %equal.B = icmp eq i64 %b0, %b1
+
+  %is.carry.A.propagated = and i1 %carry.A, %equal.B
+  %carry.B.with.borrow.A = or i1 %carry.B, %is.carry.A.propagated
+
+  ret i1 %carry.B.with.borrow.A
+}
+
+; check GE/UGE are supported too
+define i1 @i128_with_ge(i64 %a0, i64 %a1, i64 %b0, i64 %b1) {
+; X86-LABEL: i128_with_ge:
+; X86:       # %bb.0: # %entry
+; X86-NEXT:    pushl %esi
+; X86-NEXT:    .cfi_def_cfa_offset 8
+; X86-NEXT:    .cfi_offset %esi, -8
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %esi
+; X86-NEXT:    cmpl {{[0-9]+}}(%esp), %edx
+; X86-NEXT:    sbbl {{[0-9]+}}(%esp), %esi
+; X86-NEXT:    sbbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    sbbl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    setb %al
+; X86-NEXT:    popl %esi
+; X86-NEXT:    .cfi_def_cfa_offset 4
+; X86-NEXT:    retl
+;
+; X64-LABEL: i128_with_ge:
+; X64:       # %bb.0: # %entry
+; X64-NEXT:    cmpq %rsi, %rdi
+; X64-NEXT:    sbbq %rcx, %rdx
+; X64-NEXT:    setb %al
+; X64-NEXT:    retq
+entry:
+  %carry.A = icmp uge i64 %a1, %a0
+  %carry.B = icmp uge i64 %b1, %b0
+  %equal.B = icmp eq i64 %b0, %b1
+
+  ; propagate carry only if B0 == B1
+  %is.carry.A.propagated = and i1 %carry.A, %equal.B
+  %carry.B.with.borrow.A = or i1 %carry.B, %is.carry.A.propagated
+
+  ret i1 %carry.B.with.borrow.A
+}
+
+; do not combine if the check for CarryA propagating is not EQ
+define i1 @nothing_if_not_eq(i64 %a0, i64 %a1, i64 %b0, i64 %b1) {
+; X86-LABEL: nothing_if_not_eq:
+; X86:       # %bb.0: # %entry
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    cmpl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    sbbl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    setae %al
+; X86-NEXT:    retl
+;
+; X64-LABEL: nothing_if_not_eq:
+; X64:       # %bb.0: # %entry
+; X64-NEXT:    cmpq %rdi, %rsi
+; X64-NEXT:    setae %sil
+; X64-NEXT:    cmpq %rdx, %rcx
+; X64-NEXT:    setae %cl
+; X64-NEXT:    setae %al
+; X64-NEXT:    andb %sil, %al
+; X64-NEXT:    orb %cl, %al
+; X64-NEXT:    retq
+entry:
+  %carry.A = icmp uge i64 %a1, %a0
+  %carry.B = icmp uge i64 %b1, %b0
+  %equal.B = icmp ule i64 %b0, %b1
+
+  ; propagate carry only if B0 == B1
+  %is.carry.A.propagated = and i1 %carry.A, %equal.B
+  %carry.B.with.borrow.A = or i1 %carry.B, %is.carry.A.propagated
+
+  ret i1 %carry.B.with.borrow.A
+}
+
+define i1 @is_ssub_overflow_i64_with_two_i32(i32 %a0, i32 %a1, i32 %b0, i32 %b1) {
+; X86-LABEL: is_ssub_overflow_i64_with_two_i32:
+; X86:       # %bb.0: # %entry
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    cmpl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    sbbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    setb %al
+; X86-NEXT:    retl
+;
+; X64-LABEL: is_ssub_overflow_i64_with_two_i32:
+; X64:       # %bb.0: # %entry
+; X64-NEXT:    cmpl %esi, %edi
+; X64-NEXT:    sbbl %ecx, %edx
+; X64-NEXT:    setb %al
+; X64-NEXT:    retq
+entry:
+  %carry.A = icmp slt i32 %a0, %a1
+  %carry.B = icmp slt i32 %b0, %b1
+  %equal.B = icmp eq i32 %b0, %b1
+
+  %is.carry.A.propagated = and i1 %carry.A, %equal.B
+  %carry.B.with.borrow.A = or i1 %carry.B, %is.carry.A.propagated
+
+  ret i1 %carry.B.with.borrow.A
+}



More information about the llvm-commits mailing list