[llvm] 6ec889e - [DAG] Add support for neg(abd(x,y)) patterns.
Simon Pilgrim via llvm-commits
llvm-commits at lists.llvm.org
Fri Sep 6 05:16:26 PDT 2024
Author: Simon Pilgrim
Date: 2024-09-06T13:16:09+01:00
New Revision: 6ec889e53f1ae048cc9aee79c91118fc8bbc6974
URL: https://github.com/llvm/llvm-project/commit/6ec889e53f1ae048cc9aee79c91118fc8bbc6974
DIFF: https://github.com/llvm/llvm-project/commit/6ec889e53f1ae048cc9aee79c91118fc8bbc6974.diff
LOG: [DAG] Add support for neg(abd(x,y)) patterns.
Currently limited to cases which have legal/custom ABDS/ABDU handling - I'll extend this for all targets in future (similar to how we support neg(abs(x))) once I've addressed some outstanding regressions on aarch64/riscv.
Helps avoid a lot of extra cmov instructions on x86 in particular, and allows us to more easily improve the codegen in future commits.
Added:
Modified:
llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
llvm/test/CodeGen/PowerPC/ppc64-P9-vabsd.ll
llvm/test/CodeGen/X86/abds-neg.ll
llvm/test/CodeGen/X86/abdu-neg.ll
Removed:
################################################################################
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 37272a09b336ab..bb907633e1f824 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -4103,12 +4103,24 @@ SDValue DAGCombiner::visitSUB(SDNode *N) {
sd_match(N1, m_SMin(m_Specific(A), m_Specific(B))))
return DAG.getNode(ISD::ABDS, DL, VT, A, B);
+ // smin(a,b) - smax(a,b) --> neg(abds(a,b))
+ if (hasOperation(ISD::ABDS, VT) &&
+ sd_match(N0, m_SMin(m_Value(A), m_Value(B))) &&
+ sd_match(N1, m_SMax(m_Specific(A), m_Specific(B))))
+ return DAG.getNegative(DAG.getNode(ISD::ABDS, DL, VT, A, B), DL, VT);
+
// umax(a,b) - umin(a,b) --> abdu(a,b)
if ((!LegalOperations || hasOperation(ISD::ABDU, VT)) &&
sd_match(N0, m_UMax(m_Value(A), m_Value(B))) &&
sd_match(N1, m_UMin(m_Specific(A), m_Specific(B))))
return DAG.getNode(ISD::ABDU, DL, VT, A, B);
+ // umin(a,b) - umax(a,b) --> neg(abdu(a,b))
+ if (hasOperation(ISD::ABDU, VT) &&
+ sd_match(N0, m_UMin(m_Value(A), m_Value(B))) &&
+ sd_match(N1, m_UMax(m_Specific(A), m_Specific(B))))
+ return DAG.getNegative(DAG.getNode(ISD::ABDU, DL, VT, A, B), DL, VT);
+
return SDValue();
}
@@ -11605,6 +11617,10 @@ SDValue DAGCombiner::foldSelectToABD(SDValue LHS, SDValue RHS, SDValue True,
if (sd_match(True, m_Sub(m_Specific(LHS), m_Specific(RHS))) &&
sd_match(False, m_Sub(m_Specific(RHS), m_Specific(LHS))))
return DAG.getNode(ABDOpc, DL, VT, LHS, RHS);
+ if (sd_match(True, m_Sub(m_Specific(RHS), m_Specific(LHS))) &&
+ sd_match(False, m_Sub(m_Specific(LHS), m_Specific(RHS))) &&
+ hasOperation(ABDOpc, VT))
+ return DAG.getNegative(DAG.getNode(ABDOpc, DL, VT, LHS, RHS), DL, VT);
break;
case ISD::SETLT:
case ISD::SETLE:
@@ -11613,6 +11629,10 @@ SDValue DAGCombiner::foldSelectToABD(SDValue LHS, SDValue RHS, SDValue True,
if (sd_match(True, m_Sub(m_Specific(RHS), m_Specific(LHS))) &&
sd_match(False, m_Sub(m_Specific(LHS), m_Specific(RHS))))
return DAG.getNode(ABDOpc, DL, VT, LHS, RHS);
+ if (sd_match(True, m_Sub(m_Specific(LHS), m_Specific(RHS))) &&
+ sd_match(False, m_Sub(m_Specific(RHS), m_Specific(LHS))) &&
+ hasOperation(ABDOpc, VT))
+ return DAG.getNegative(DAG.getNode(ABDOpc, DL, VT, LHS, RHS), DL, VT);
break;
default:
break;
diff --git a/llvm/test/CodeGen/PowerPC/ppc64-P9-vabsd.ll b/llvm/test/CodeGen/PowerPC/ppc64-P9-vabsd.ll
index 64a2e89e34811e..b540948b20f756 100644
--- a/llvm/test/CodeGen/PowerPC/ppc64-P9-vabsd.ll
+++ b/llvm/test/CodeGen/PowerPC/ppc64-P9-vabsd.ll
@@ -1933,13 +1933,19 @@ define <16 x i8> @absd_int8_sle(<16 x i8>, <16 x i8>) {
; some cases we are unable to optimize
; check whether goes beyond the scope
define <4 x i32> @absd_int32_ugt_opp(<4 x i32>, <4 x i32>) {
-; CHECK-LABEL: absd_int32_ugt_opp:
-; CHECK: # %bb.0:
-; CHECK-NEXT: vcmpgtuw v4, v2, v3
-; CHECK-NEXT: vsubuwm v5, v2, v3
-; CHECK-NEXT: vsubuwm v2, v3, v2
-; CHECK-NEXT: xxsel v2, v5, v2, v4
-; CHECK-NEXT: blr
+; CHECK-PWR9-LABEL: absd_int32_ugt_opp:
+; CHECK-PWR9: # %bb.0:
+; CHECK-PWR9-NEXT: vabsduw v2, v2, v3
+; CHECK-PWR9-NEXT: vnegw v2, v2
+; CHECK-PWR9-NEXT: blr
+;
+; CHECK-PWR78-LABEL: absd_int32_ugt_opp:
+; CHECK-PWR78: # %bb.0:
+; CHECK-PWR78-NEXT: vcmpgtuw v4, v2, v3
+; CHECK-PWR78-NEXT: vsubuwm v5, v2, v3
+; CHECK-PWR78-NEXT: vsubuwm v2, v3, v2
+; CHECK-PWR78-NEXT: xxsel v2, v5, v2, v4
+; CHECK-PWR78-NEXT: blr
%3 = icmp ugt <4 x i32> %0, %1
%4 = sub <4 x i32> %0, %1
%5 = sub <4 x i32> %1, %0
diff --git a/llvm/test/CodeGen/X86/abds-neg.ll b/llvm/test/CodeGen/X86/abds-neg.ll
index f837f49abf7a4f..b22c8c54e89a64 100644
--- a/llvm/test/CodeGen/X86/abds-neg.ll
+++ b/llvm/test/CodeGen/X86/abds-neg.ll
@@ -522,23 +522,25 @@ define i128 @abd_ext_i128_undef(i128 %a, i128 %b) nounwind {
define i8 @abd_minmax_i8(i8 %a, i8 %b) nounwind {
; X86-LABEL: abd_minmax_i8:
; X86: # %bb.0:
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
-; X86-NEXT: cmpb %cl, %dl
+; X86-NEXT: movsbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movsbl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: subl %eax, %ecx
; X86-NEXT: movl %ecx, %eax
-; X86-NEXT: cmovll %edx, %eax
-; X86-NEXT: cmovgl %edx, %ecx
-; X86-NEXT: subb %cl, %al
+; X86-NEXT: negl %eax
+; X86-NEXT: cmovsl %ecx, %eax
+; X86-NEXT: negb %al
; X86-NEXT: # kill: def $al killed $al killed $eax
; X86-NEXT: retl
;
; X64-LABEL: abd_minmax_i8:
; X64: # %bb.0:
-; X64-NEXT: cmpb %sil, %dil
-; X64-NEXT: movl %esi, %eax
-; X64-NEXT: cmovll %edi, %eax
-; X64-NEXT: cmovgl %edi, %esi
-; X64-NEXT: subb %sil, %al
+; X64-NEXT: movsbl %sil, %eax
+; X64-NEXT: movsbl %dil, %ecx
+; X64-NEXT: subl %eax, %ecx
+; X64-NEXT: movl %ecx, %eax
+; X64-NEXT: negl %eax
+; X64-NEXT: cmovsl %ecx, %eax
+; X64-NEXT: negb %al
; X64-NEXT: # kill: def $al killed $al killed $eax
; X64-NEXT: retq
%min = call i8 @llvm.smin.i8(i8 %a, i8 %b)
@@ -550,23 +552,23 @@ define i8 @abd_minmax_i8(i8 %a, i8 %b) nounwind {
define i16 @abd_minmax_i16(i16 %a, i16 %b) nounwind {
; X86-LABEL: abd_minmax_i16:
; X86: # %bb.0:
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
-; X86-NEXT: cmpw %cx, %dx
+; X86-NEXT: movswl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movswl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: subl %eax, %ecx
; X86-NEXT: movl %ecx, %eax
-; X86-NEXT: cmovll %edx, %eax
-; X86-NEXT: cmovgl %edx, %ecx
-; X86-NEXT: subl %ecx, %eax
+; X86-NEXT: negl %eax
+; X86-NEXT: cmovnsl %ecx, %eax
; X86-NEXT: # kill: def $ax killed $ax killed $eax
; X86-NEXT: retl
;
; X64-LABEL: abd_minmax_i16:
; X64: # %bb.0:
-; X64-NEXT: cmpw %si, %di
-; X64-NEXT: movl %esi, %eax
-; X64-NEXT: cmovll %edi, %eax
-; X64-NEXT: cmovgl %edi, %esi
-; X64-NEXT: subl %esi, %eax
+; X64-NEXT: movswl %si, %eax
+; X64-NEXT: movswl %di, %ecx
+; X64-NEXT: subl %eax, %ecx
+; X64-NEXT: movl %ecx, %eax
+; X64-NEXT: negl %eax
+; X64-NEXT: cmovnsl %ecx, %eax
; X64-NEXT: # kill: def $ax killed $ax killed $eax
; X64-NEXT: retq
%min = call i16 @llvm.smin.i16(i16 %a, i16 %b)
@@ -578,22 +580,22 @@ define i16 @abd_minmax_i16(i16 %a, i16 %b) nounwind {
define i32 @abd_minmax_i32(i32 %a, i32 %b) nounwind {
; X86-LABEL: abd_minmax_i32:
; X86: # %bb.0:
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
-; X86-NEXT: cmpl %ecx, %edx
-; X86-NEXT: movl %ecx, %eax
-; X86-NEXT: cmovll %edx, %eax
-; X86-NEXT: cmovgl %edx, %ecx
+; X86-NEXT: movl %ecx, %edx
+; X86-NEXT: subl %eax, %edx
; X86-NEXT: subl %ecx, %eax
+; X86-NEXT: cmovll %edx, %eax
+; X86-NEXT: negl %eax
; X86-NEXT: retl
;
; X64-LABEL: abd_minmax_i32:
; X64: # %bb.0:
-; X64-NEXT: cmpl %esi, %edi
-; X64-NEXT: movl %esi, %eax
-; X64-NEXT: cmovll %edi, %eax
-; X64-NEXT: cmovgl %edi, %esi
+; X64-NEXT: movl %edi, %eax
; X64-NEXT: subl %esi, %eax
+; X64-NEXT: subl %edi, %esi
+; X64-NEXT: cmovgel %esi, %eax
+; X64-NEXT: negl %eax
; X64-NEXT: retq
%min = call i32 @llvm.smin.i32(i32 %a, i32 %b)
%max = call i32 @llvm.smax.i32(i32 %a, i32 %b)
@@ -634,11 +636,11 @@ define i64 @abd_minmax_i64(i64 %a, i64 %b) nounwind {
;
; X64-LABEL: abd_minmax_i64:
; X64: # %bb.0:
-; X64-NEXT: cmpq %rsi, %rdi
-; X64-NEXT: movq %rsi, %rax
-; X64-NEXT: cmovlq %rdi, %rax
-; X64-NEXT: cmovgq %rdi, %rsi
+; X64-NEXT: movq %rdi, %rax
; X64-NEXT: subq %rsi, %rax
+; X64-NEXT: subq %rdi, %rsi
+; X64-NEXT: cmovgeq %rsi, %rax
+; X64-NEXT: negq %rax
; X64-NEXT: retq
%min = call i64 @llvm.smin.i64(i64 %a, i64 %b)
%max = call i64 @llvm.smax.i64(i64 %a, i64 %b)
@@ -736,27 +738,25 @@ define i128 @abd_minmax_i128(i128 %a, i128 %b) nounwind {
define i8 @abd_cmp_i8(i8 %a, i8 %b) nounwind {
; X86-LABEL: abd_cmp_i8:
; X86: # %bb.0:
-; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: movl %eax, %edx
-; X86-NEXT: subb %cl, %dl
-; X86-NEXT: negb %dl
-; X86-NEXT: subb %cl, %al
-; X86-NEXT: movzbl %al, %ecx
-; X86-NEXT: movzbl %dl, %eax
-; X86-NEXT: cmovlel %ecx, %eax
+; X86-NEXT: movsbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movsbl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: subl %eax, %ecx
+; X86-NEXT: movl %ecx, %eax
+; X86-NEXT: negl %eax
+; X86-NEXT: cmovsl %ecx, %eax
+; X86-NEXT: negb %al
; X86-NEXT: # kill: def $al killed $al killed $eax
; X86-NEXT: retl
;
; X64-LABEL: abd_cmp_i8:
; X64: # %bb.0:
-; X64-NEXT: movl %edi, %eax
-; X64-NEXT: subb %sil, %al
+; X64-NEXT: movsbl %sil, %eax
+; X64-NEXT: movsbl %dil, %ecx
+; X64-NEXT: subl %eax, %ecx
+; X64-NEXT: movl %ecx, %eax
+; X64-NEXT: negl %eax
+; X64-NEXT: cmovsl %ecx, %eax
; X64-NEXT: negb %al
-; X64-NEXT: subb %sil, %dil
-; X64-NEXT: movzbl %dil, %ecx
-; X64-NEXT: movzbl %al, %eax
-; X64-NEXT: cmovlel %ecx, %eax
; X64-NEXT: # kill: def $al killed $al killed $eax
; X64-NEXT: retq
%cmp = icmp sle i8 %a, %b
@@ -769,27 +769,23 @@ define i8 @abd_cmp_i8(i8 %a, i8 %b) nounwind {
define i16 @abd_cmp_i16(i16 %a, i16 %b) nounwind {
; X86-LABEL: abd_cmp_i16:
; X86: # %bb.0:
-; X86-NEXT: pushl %esi
-; X86-NEXT: movzwl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: movzwl {{[0-9]+}}(%esp), %edx
-; X86-NEXT: movl %ecx, %esi
-; X86-NEXT: subw %dx, %si
-; X86-NEXT: movl %esi, %eax
+; X86-NEXT: movswl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movswl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: subl %eax, %ecx
+; X86-NEXT: movl %ecx, %eax
; X86-NEXT: negl %eax
-; X86-NEXT: cmpw %dx, %cx
-; X86-NEXT: cmovll %esi, %eax
+; X86-NEXT: cmovnsl %ecx, %eax
; X86-NEXT: # kill: def $ax killed $ax killed $eax
-; X86-NEXT: popl %esi
; X86-NEXT: retl
;
; X64-LABEL: abd_cmp_i16:
; X64: # %bb.0:
-; X64-NEXT: movl %edi, %ecx
-; X64-NEXT: subw %si, %cx
+; X64-NEXT: movswl %si, %eax
+; X64-NEXT: movswl %di, %ecx
+; X64-NEXT: subl %eax, %ecx
; X64-NEXT: movl %ecx, %eax
; X64-NEXT: negl %eax
-; X64-NEXT: cmpw %si, %di
-; X64-NEXT: cmovll %ecx, %eax
+; X64-NEXT: cmovnsl %ecx, %eax
; X64-NEXT: # kill: def $ax killed $ax killed $eax
; X64-NEXT: retq
%cmp = icmp slt i16 %a, %b
@@ -804,20 +800,20 @@ define i32 @abd_cmp_i32(i32 %a, i32 %b) nounwind {
; X86: # %bb.0:
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: movl %eax, %edx
-; X86-NEXT: subl %ecx, %edx
-; X86-NEXT: negl %edx
+; X86-NEXT: movl %ecx, %edx
+; X86-NEXT: subl %eax, %edx
; X86-NEXT: subl %ecx, %eax
-; X86-NEXT: cmovgel %edx, %eax
+; X86-NEXT: cmovll %edx, %eax
+; X86-NEXT: negl %eax
; X86-NEXT: retl
;
; X64-LABEL: abd_cmp_i32:
; X64: # %bb.0:
; X64-NEXT: movl %edi, %eax
; X64-NEXT: subl %esi, %eax
+; X64-NEXT: subl %edi, %esi
+; X64-NEXT: cmovgel %esi, %eax
; X64-NEXT: negl %eax
-; X64-NEXT: subl %esi, %edi
-; X64-NEXT: cmovll %edi, %eax
; X64-NEXT: retq
%cmp = icmp sge i32 %a, %b
%ab = sub i32 %a, %b
@@ -853,9 +849,9 @@ define i64 @abd_cmp_i64(i64 %a, i64 %b) nounwind {
; X64: # %bb.0:
; X64-NEXT: movq %rdi, %rax
; X64-NEXT: subq %rsi, %rax
+; X64-NEXT: subq %rdi, %rsi
+; X64-NEXT: cmovgeq %rsi, %rax
; X64-NEXT: negq %rax
-; X64-NEXT: subq %rsi, %rdi
-; X64-NEXT: cmovlq %rdi, %rax
; X64-NEXT: retq
%cmp = icmp slt i64 %a, %b
%ab = sub i64 %a, %b
diff --git a/llvm/test/CodeGen/X86/abdu-neg.ll b/llvm/test/CodeGen/X86/abdu-neg.ll
index 24962be43b5cfd..f9da1a9e04530f 100644
--- a/llvm/test/CodeGen/X86/abdu-neg.ll
+++ b/llvm/test/CodeGen/X86/abdu-neg.ll
@@ -504,23 +504,25 @@ define i128 @abd_ext_i128_undef(i128 %a, i128 %b) nounwind {
define i8 @abd_minmax_i8(i8 %a, i8 %b) nounwind {
; X86-LABEL: abd_minmax_i8:
; X86: # %bb.0:
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
-; X86-NEXT: cmpb %cl, %dl
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: subl %eax, %ecx
; X86-NEXT: movl %ecx, %eax
-; X86-NEXT: cmovbl %edx, %eax
-; X86-NEXT: cmoval %edx, %ecx
-; X86-NEXT: subb %cl, %al
+; X86-NEXT: negl %eax
+; X86-NEXT: cmovsl %ecx, %eax
+; X86-NEXT: negb %al
; X86-NEXT: # kill: def $al killed $al killed $eax
; X86-NEXT: retl
;
; X64-LABEL: abd_minmax_i8:
; X64: # %bb.0:
-; X64-NEXT: cmpb %sil, %dil
-; X64-NEXT: movl %esi, %eax
-; X64-NEXT: cmovbl %edi, %eax
-; X64-NEXT: cmoval %edi, %esi
-; X64-NEXT: subb %sil, %al
+; X64-NEXT: movzbl %sil, %eax
+; X64-NEXT: movzbl %dil, %ecx
+; X64-NEXT: subl %eax, %ecx
+; X64-NEXT: movl %ecx, %eax
+; X64-NEXT: negl %eax
+; X64-NEXT: cmovsl %ecx, %eax
+; X64-NEXT: negb %al
; X64-NEXT: # kill: def $al killed $al killed $eax
; X64-NEXT: retq
%min = call i8 @llvm.umin.i8(i8 %a, i8 %b)
@@ -532,23 +534,23 @@ define i8 @abd_minmax_i8(i8 %a, i8 %b) nounwind {
define i16 @abd_minmax_i16(i16 %a, i16 %b) nounwind {
; X86-LABEL: abd_minmax_i16:
; X86: # %bb.0:
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
-; X86-NEXT: cmpw %cx, %dx
+; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movzwl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: subl %eax, %ecx
; X86-NEXT: movl %ecx, %eax
-; X86-NEXT: cmovbl %edx, %eax
-; X86-NEXT: cmoval %edx, %ecx
-; X86-NEXT: subl %ecx, %eax
+; X86-NEXT: negl %eax
+; X86-NEXT: cmovnsl %ecx, %eax
; X86-NEXT: # kill: def $ax killed $ax killed $eax
; X86-NEXT: retl
;
; X64-LABEL: abd_minmax_i16:
; X64: # %bb.0:
-; X64-NEXT: cmpw %si, %di
-; X64-NEXT: movl %esi, %eax
-; X64-NEXT: cmovbl %edi, %eax
-; X64-NEXT: cmoval %edi, %esi
-; X64-NEXT: subl %esi, %eax
+; X64-NEXT: movzwl %si, %eax
+; X64-NEXT: movzwl %di, %ecx
+; X64-NEXT: subl %eax, %ecx
+; X64-NEXT: movl %ecx, %eax
+; X64-NEXT: negl %eax
+; X64-NEXT: cmovnsl %ecx, %eax
; X64-NEXT: # kill: def $ax killed $ax killed $eax
; X64-NEXT: retq
%min = call i16 @llvm.umin.i16(i16 %a, i16 %b)
@@ -560,22 +562,22 @@ define i16 @abd_minmax_i16(i16 %a, i16 %b) nounwind {
define i32 @abd_minmax_i32(i32 %a, i32 %b) nounwind {
; X86-LABEL: abd_minmax_i32:
; X86: # %bb.0:
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
-; X86-NEXT: cmpl %ecx, %edx
-; X86-NEXT: movl %ecx, %eax
-; X86-NEXT: cmovbl %edx, %eax
-; X86-NEXT: cmoval %edx, %ecx
+; X86-NEXT: movl %ecx, %edx
+; X86-NEXT: subl %eax, %edx
; X86-NEXT: subl %ecx, %eax
+; X86-NEXT: cmovbl %edx, %eax
+; X86-NEXT: negl %eax
; X86-NEXT: retl
;
; X64-LABEL: abd_minmax_i32:
; X64: # %bb.0:
-; X64-NEXT: cmpl %esi, %edi
-; X64-NEXT: movl %esi, %eax
-; X64-NEXT: cmovbl %edi, %eax
-; X64-NEXT: cmoval %edi, %esi
+; X64-NEXT: movl %edi, %eax
; X64-NEXT: subl %esi, %eax
+; X64-NEXT: subl %edi, %esi
+; X64-NEXT: cmovael %esi, %eax
+; X64-NEXT: negl %eax
; X64-NEXT: retq
%min = call i32 @llvm.umin.i32(i32 %a, i32 %b)
%max = call i32 @llvm.umax.i32(i32 %a, i32 %b)
@@ -616,11 +618,11 @@ define i64 @abd_minmax_i64(i64 %a, i64 %b) nounwind {
;
; X64-LABEL: abd_minmax_i64:
; X64: # %bb.0:
-; X64-NEXT: cmpq %rsi, %rdi
-; X64-NEXT: movq %rsi, %rax
-; X64-NEXT: cmovbq %rdi, %rax
-; X64-NEXT: cmovaq %rdi, %rsi
+; X64-NEXT: movq %rdi, %rax
; X64-NEXT: subq %rsi, %rax
+; X64-NEXT: subq %rdi, %rsi
+; X64-NEXT: cmovaeq %rsi, %rax
+; X64-NEXT: negq %rax
; X64-NEXT: retq
%min = call i64 @llvm.umin.i64(i64 %a, i64 %b)
%max = call i64 @llvm.umax.i64(i64 %a, i64 %b)
@@ -720,25 +722,23 @@ define i8 @abd_cmp_i8(i8 %a, i8 %b) nounwind {
; X86: # %bb.0:
; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: movl %eax, %edx
-; X86-NEXT: subb %cl, %dl
-; X86-NEXT: negb %dl
-; X86-NEXT: subb %cl, %al
-; X86-NEXT: movzbl %al, %ecx
-; X86-NEXT: movzbl %dl, %eax
-; X86-NEXT: cmovbel %ecx, %eax
+; X86-NEXT: subl %eax, %ecx
+; X86-NEXT: movl %ecx, %eax
+; X86-NEXT: negl %eax
+; X86-NEXT: cmovsl %ecx, %eax
+; X86-NEXT: negb %al
; X86-NEXT: # kill: def $al killed $al killed $eax
; X86-NEXT: retl
;
; X64-LABEL: abd_cmp_i8:
; X64: # %bb.0:
-; X64-NEXT: movl %edi, %eax
-; X64-NEXT: subb %sil, %al
-; X64-NEXT: negb %al
-; X64-NEXT: subb %sil, %dil
+; X64-NEXT: movzbl %sil, %eax
; X64-NEXT: movzbl %dil, %ecx
-; X64-NEXT: movzbl %al, %eax
-; X64-NEXT: cmovbel %ecx, %eax
+; X64-NEXT: subl %eax, %ecx
+; X64-NEXT: movl %ecx, %eax
+; X64-NEXT: negl %eax
+; X64-NEXT: cmovsl %ecx, %eax
+; X64-NEXT: negb %al
; X64-NEXT: # kill: def $al killed $al killed $eax
; X64-NEXT: retq
%cmp = icmp ule i8 %a, %b
@@ -782,20 +782,20 @@ define i32 @abd_cmp_i32(i32 %a, i32 %b) nounwind {
; X86: # %bb.0:
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: movl %eax, %edx
-; X86-NEXT: subl %ecx, %edx
-; X86-NEXT: negl %edx
+; X86-NEXT: movl %ecx, %edx
+; X86-NEXT: subl %eax, %edx
; X86-NEXT: subl %ecx, %eax
-; X86-NEXT: cmovael %edx, %eax
+; X86-NEXT: cmovbl %edx, %eax
+; X86-NEXT: negl %eax
; X86-NEXT: retl
;
; X64-LABEL: abd_cmp_i32:
; X64: # %bb.0:
; X64-NEXT: movl %edi, %eax
; X64-NEXT: subl %esi, %eax
+; X64-NEXT: subl %edi, %esi
+; X64-NEXT: cmovael %esi, %eax
; X64-NEXT: negl %eax
-; X64-NEXT: subl %esi, %edi
-; X64-NEXT: cmovbl %edi, %eax
; X64-NEXT: retq
%cmp = icmp uge i32 %a, %b
%ab = sub i32 %a, %b
More information about the llvm-commits
mailing list