[llvm] b25b9a7 - [DAG] visitSELECT - add "select usubo(x, y).overflow, (sub y, x), (usubo x, y) -> abdu(x, y)" fold (and neg equivalent)
Simon Pilgrim via llvm-commits
llvm-commits at lists.llvm.org
Wed Sep 4 03:59:30 PDT 2024
Author: Simon Pilgrim
Date: 2024-09-04T11:59:10+01:00
New Revision: b25b9a7d6c872e42121aa024f362fae0b15dd72c
URL: https://github.com/llvm/llvm-project/commit/b25b9a7d6c872e42121aa024f362fae0b15dd72c
DIFF: https://github.com/llvm/llvm-project/commit/b25b9a7d6c872e42121aa024f362fae0b15dd72c.diff
LOG: [DAG] visitSELECT - add "select usubo(x, y).overflow, (sub y, x), (usubo x, y) -> abdu(x, y)" fold (and neg equivalent)
Handle cases where CGP has merged the CMP+SUB into a USUBO node - improves a few outstanding niggles from #100810
Added:
Modified:
llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
llvm/test/CodeGen/X86/abdu-neg.ll
llvm/test/CodeGen/X86/abdu.ll
Removed:
################################################################################
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index b0a906743f29ff..6390231341f96d 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -11719,6 +11719,24 @@ SDValue DAGCombiner::visitSELECT(SDNode *N) {
N2_2, Flags);
}
}
+
+ // select usubo(x, y).overflow, (sub y, x), (usubo x, y) -> abdu(x, y)
+ if (N0.getOpcode() == ISD::USUBO && N0.getResNo() == 1 &&
+ N2.getNode() == N0.getNode() && N2.getResNo() == 0 &&
+ N1.getOpcode() == ISD::SUB && N2.getOperand(0) == N1.getOperand(1) &&
+ N2.getOperand(1) == N1.getOperand(0) &&
+ (!LegalOperations || TLI.isOperationLegal(ISD::ABDU, VT)))
+ return DAG.getNode(ISD::ABDU, DL, VT, N0.getOperand(0), N0.getOperand(1));
+
+ // select usubo(x, y).overflow, (usubo x, y), (sub y, x) -> neg (abdu x, y)
+ if (N0.getOpcode() == ISD::USUBO && N0.getResNo() == 1 &&
+ N1.getNode() == N0.getNode() && N1.getResNo() == 0 &&
+ N2.getOpcode() == ISD::SUB && N2.getOperand(0) == N1.getOperand(1) &&
+ N2.getOperand(1) == N1.getOperand(0) &&
+ (!LegalOperations || TLI.isOperationLegal(ISD::ABDU, VT)))
+ return DAG.getNegative(
+ DAG.getNode(ISD::ABDU, DL, VT, N0.getOperand(0), N0.getOperand(1)),
+ DL, VT);
}
// Fold selects based on a setcc into other things, such as min/max/abs.
@@ -11776,6 +11794,9 @@ SDValue DAGCombiner::visitSELECT(SDNode *N) {
return SelectNode;
}
+ if (SDValue ABD = foldSelectToABD(Cond0, Cond1, N1, N2, CC, DL))
+ return ABD;
+
if (SDValue NewSel = SimplifySelect(DL, N0, N1, N2))
return NewSel;
}
diff --git a/llvm/test/CodeGen/X86/abdu-neg.ll b/llvm/test/CodeGen/X86/abdu-neg.ll
index 507f7681400ef2..24962be43b5cfd 100644
--- a/llvm/test/CodeGen/X86/abdu-neg.ll
+++ b/llvm/test/CodeGen/X86/abdu-neg.ll
@@ -751,27 +751,23 @@ define i8 @abd_cmp_i8(i8 %a, i8 %b) nounwind {
define i16 @abd_cmp_i16(i16 %a, i16 %b) nounwind {
; X86-LABEL: abd_cmp_i16:
; X86: # %bb.0:
-; X86-NEXT: pushl %esi
+; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movzwl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: movzwl {{[0-9]+}}(%esp), %edx
-; X86-NEXT: movl %ecx, %esi
-; X86-NEXT: subw %dx, %si
-; X86-NEXT: movl %esi, %eax
+; X86-NEXT: subl %eax, %ecx
+; X86-NEXT: movl %ecx, %eax
; X86-NEXT: negl %eax
-; X86-NEXT: cmpw %dx, %cx
-; X86-NEXT: cmovbl %esi, %eax
+; X86-NEXT: cmovnsl %ecx, %eax
; X86-NEXT: # kill: def $ax killed $ax killed $eax
-; X86-NEXT: popl %esi
; X86-NEXT: retl
;
; X64-LABEL: abd_cmp_i16:
; X64: # %bb.0:
-; X64-NEXT: movl %edi, %ecx
-; X64-NEXT: subw %si, %cx
+; X64-NEXT: movzwl %si, %eax
+; X64-NEXT: movzwl %di, %ecx
+; X64-NEXT: subl %eax, %ecx
; X64-NEXT: movl %ecx, %eax
; X64-NEXT: negl %eax
-; X64-NEXT: cmpw %si, %di
-; X64-NEXT: cmovbl %ecx, %eax
+; X64-NEXT: cmovnsl %ecx, %eax
; X64-NEXT: # kill: def $ax killed $ax killed $eax
; X64-NEXT: retq
%cmp = icmp ult i16 %a, %b
@@ -811,33 +807,30 @@ define i32 @abd_cmp_i32(i32 %a, i32 %b) nounwind {
define i64 @abd_cmp_i64(i64 %a, i64 %b) nounwind {
; X86-LABEL: abd_cmp_i64:
; X86: # %bb.0:
-; X86-NEXT: pushl %ebx
-; X86-NEXT: pushl %edi
; X86-NEXT: pushl %esi
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
-; X86-NEXT: movl %ecx, %edi
-; X86-NEXT: subl %eax, %edi
-; X86-NEXT: movl %esi, %ebx
-; X86-NEXT: sbbl %edx, %ebx
-; X86-NEXT: subl %ecx, %eax
-; X86-NEXT: sbbl %esi, %edx
-; X86-NEXT: cmovael %edi, %eax
-; X86-NEXT: cmovael %ebx, %edx
+; X86-NEXT: xorl %edx, %edx
+; X86-NEXT: subl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: sbbl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: movl $0, %esi
+; X86-NEXT: sbbl %esi, %esi
+; X86-NEXT: xorl %esi, %ecx
+; X86-NEXT: xorl %esi, %eax
+; X86-NEXT: subl %esi, %eax
+; X86-NEXT: sbbl %esi, %ecx
+; X86-NEXT: negl %eax
+; X86-NEXT: sbbl %ecx, %edx
; X86-NEXT: popl %esi
-; X86-NEXT: popl %edi
-; X86-NEXT: popl %ebx
; X86-NEXT: retl
;
; X64-LABEL: abd_cmp_i64:
; X64: # %bb.0:
; X64-NEXT: movq %rdi, %rax
; X64-NEXT: subq %rsi, %rax
+; X64-NEXT: subq %rdi, %rsi
+; X64-NEXT: cmovaeq %rsi, %rax
; X64-NEXT: negq %rax
-; X64-NEXT: subq %rsi, %rdi
-; X64-NEXT: cmovbq %rdi, %rax
; X64-NEXT: retq
%cmp = icmp ult i64 %a, %b
%ab = sub i64 %a, %b
@@ -853,34 +846,36 @@ define i128 @abd_cmp_i128(i128 %a, i128 %b) nounwind {
; X86-NEXT: pushl %ebx
; X86-NEXT: pushl %edi
; X86-NEXT: pushl %esi
-; X86-NEXT: pushl %eax
+; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
-; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx
-; X86-NEXT: subl %edx, %eax
-; X86-NEXT: movl %eax, (%esp) # 4-byte Spill
-; X86-NEXT: sbbl %esi, %ebx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ebp
-; X86-NEXT: sbbl %ecx, %ebp
-; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: sbbl %edi, %eax
+; X86-NEXT: xorl %edi, %edi
; X86-NEXT: subl {{[0-9]+}}(%esp), %edx
+; X86-NEXT: sbbl {{[0-9]+}}(%esp), %ebx
; X86-NEXT: sbbl {{[0-9]+}}(%esp), %esi
; X86-NEXT: sbbl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: sbbl {{[0-9]+}}(%esp), %edi
-; X86-NEXT: cmovael (%esp), %edx # 4-byte Folded Reload
-; X86-NEXT: cmovael %ebx, %esi
-; X86-NEXT: cmovael %ebp, %ecx
-; X86-NEXT: cmovael %eax, %edi
-; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: movl %edi, 12(%eax)
-; X86-NEXT: movl %ecx, 8(%eax)
-; X86-NEXT: movl %esi, 4(%eax)
+; X86-NEXT: movl $0, %ebp
+; X86-NEXT: sbbl %ebp, %ebp
+; X86-NEXT: xorl %ebp, %ecx
+; X86-NEXT: xorl %ebp, %esi
+; X86-NEXT: xorl %ebp, %ebx
+; X86-NEXT: xorl %ebp, %edx
+; X86-NEXT: subl %ebp, %edx
+; X86-NEXT: sbbl %ebp, %ebx
+; X86-NEXT: sbbl %ebp, %esi
+; X86-NEXT: sbbl %ebp, %ecx
+; X86-NEXT: negl %edx
+; X86-NEXT: movl $0, %ebp
+; X86-NEXT: sbbl %ebx, %ebp
+; X86-NEXT: movl $0, %ebx
+; X86-NEXT: sbbl %esi, %ebx
+; X86-NEXT: sbbl %ecx, %edi
; X86-NEXT: movl %edx, (%eax)
-; X86-NEXT: addl $4, %esp
+; X86-NEXT: movl %ebp, 4(%eax)
+; X86-NEXT: movl %ebx, 8(%eax)
+; X86-NEXT: movl %edi, 12(%eax)
; X86-NEXT: popl %esi
; X86-NEXT: popl %edi
; X86-NEXT: popl %ebx
@@ -889,15 +884,19 @@ define i128 @abd_cmp_i128(i128 %a, i128 %b) nounwind {
;
; X64-LABEL: abd_cmp_i128:
; X64: # %bb.0:
-; X64-NEXT: movq %rdx, %rax
-; X64-NEXT: subq %rdi, %rax
-; X64-NEXT: movq %rcx, %r8
-; X64-NEXT: sbbq %rsi, %r8
-; X64-NEXT: subq %rdx, %rdi
+; X64-NEXT: movq %rdi, %rax
+; X64-NEXT: xorl %edi, %edi
+; X64-NEXT: subq %rdx, %rax
; X64-NEXT: sbbq %rcx, %rsi
-; X64-NEXT: cmovbq %rdi, %rax
-; X64-NEXT: cmovbq %rsi, %r8
-; X64-NEXT: movq %r8, %rdx
+; X64-NEXT: movl $0, %ecx
+; X64-NEXT: sbbq %rcx, %rcx
+; X64-NEXT: xorq %rcx, %rsi
+; X64-NEXT: xorq %rcx, %rax
+; X64-NEXT: subq %rcx, %rax
+; X64-NEXT: sbbq %rcx, %rsi
+; X64-NEXT: negq %rax
+; X64-NEXT: sbbq %rsi, %rdi
+; X64-NEXT: movq %rdi, %rdx
; X64-NEXT: retq
%cmp = icmp ult i128 %a, %b
%ab = sub i128 %a, %b
diff --git a/llvm/test/CodeGen/X86/abdu.ll b/llvm/test/CodeGen/X86/abdu.ll
index 290894d2712e84..3bee81b61b98a3 100644
--- a/llvm/test/CodeGen/X86/abdu.ll
+++ b/llvm/test/CodeGen/X86/abdu.ll
@@ -608,25 +608,21 @@ define i8 @abd_cmp_i8(i8 %a, i8 %b) nounwind {
; X86: # %bb.0:
; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: movl %eax, %edx
-; X86-NEXT: subb %cl, %dl
-; X86-NEXT: negb %dl
-; X86-NEXT: subb %cl, %al
-; X86-NEXT: movzbl %al, %ecx
-; X86-NEXT: movzbl %dl, %eax
-; X86-NEXT: cmovael %ecx, %eax
+; X86-NEXT: subl %eax, %ecx
+; X86-NEXT: movl %ecx, %eax
+; X86-NEXT: negl %eax
+; X86-NEXT: cmovsl %ecx, %eax
; X86-NEXT: # kill: def $al killed $al killed $eax
; X86-NEXT: retl
;
; X64-LABEL: abd_cmp_i8:
; X64: # %bb.0:
-; X64-NEXT: movl %esi, %eax
-; X64-NEXT: subb %dil, %al
-; X64-NEXT: negb %al
-; X64-NEXT: subb %dil, %sil
+; X64-NEXT: movzbl %dil, %eax
; X64-NEXT: movzbl %sil, %ecx
-; X64-NEXT: movzbl %al, %eax
-; X64-NEXT: cmovael %ecx, %eax
+; X64-NEXT: subl %eax, %ecx
+; X64-NEXT: movl %ecx, %eax
+; X64-NEXT: negl %eax
+; X64-NEXT: cmovsl %ecx, %eax
; X64-NEXT: # kill: def $al killed $al killed $eax
; X64-NEXT: retq
%cmp = icmp ugt i8 %a, %b
@@ -670,9 +666,8 @@ define i32 @abd_cmp_i32(i32 %a, i32 %b) nounwind {
; X86: # %bb.0:
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: movl %eax, %edx
-; X86-NEXT: subl %ecx, %edx
-; X86-NEXT: negl %edx
+; X86-NEXT: movl %ecx, %edx
+; X86-NEXT: subl %eax, %edx
; X86-NEXT: subl %ecx, %eax
; X86-NEXT: cmovbl %edx, %eax
; X86-NEXT: retl
@@ -681,9 +676,8 @@ define i32 @abd_cmp_i32(i32 %a, i32 %b) nounwind {
; X64: # %bb.0:
; X64-NEXT: movl %edi, %eax
; X64-NEXT: subl %esi, %eax
-; X64-NEXT: negl %eax
-; X64-NEXT: subl %esi, %edi
-; X64-NEXT: cmovael %edi, %eax
+; X64-NEXT: subl %edi, %esi
+; X64-NEXT: cmovael %esi, %eax
; X64-NEXT: retq
%cmp = icmp ult i32 %a, %b
%ab = sub i32 %a, %b
More information about the llvm-commits
mailing list