[llvm] 9a1cb8a - [X86] Add abds/abdu lowering for scalar i8/i16/i32/i64 types
Simon Pilgrim via llvm-commits
llvm-commits at lists.llvm.org
Fri May 5 03:50:05 PDT 2023
Author: Simon Pilgrim
Date: 2023-05-05T11:49:33+01:00
New Revision: 9a1cb8a856207cbcf7fa62bdeb26f22b089cc809
URL: https://github.com/llvm/llvm-project/commit/9a1cb8a856207cbcf7fa62bdeb26f22b089cc809
DIFF: https://github.com/llvm/llvm-project/commit/9a1cb8a856207cbcf7fa62bdeb26f22b089cc809.diff
LOG: [X86] Add abds/abdu lowering for scalar i8/i16/i32/i64 types
The next step will be to begin adding generic legalization/lowering support
Added:
Modified:
llvm/lib/Target/X86/X86ISelLowering.cpp
llvm/test/CodeGen/X86/abds.ll
llvm/test/CodeGen/X86/abdu.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 0b1b9768f32e..5f9aec6bc5e4 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -229,6 +229,15 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::ABS , MVT::i64 , Custom);
}
+ // Absolute
diff erence.
+ for (auto Op : {ISD::ABDS, ISD::ABDU}) {
+ setOperationAction(Op , MVT::i8 , Custom);
+ setOperationAction(Op , MVT::i16 , Custom);
+ setOperationAction(Op , MVT::i32 , Custom);
+ if (Subtarget.is64Bit())
+ setOperationAction(Op , MVT::i64 , Custom);
+ }
+
// Signed saturation subtraction.
setOperationAction(ISD::SSUBSAT , MVT::i8 , Custom);
setOperationAction(ISD::SSUBSAT , MVT::i16 , Custom);
@@ -30389,6 +30398,21 @@ static SDValue LowerABD(SDValue Op, const X86Subtarget &Subtarget,
DAG.getNode(ISD::USUBSAT, dl, VT, LHS, RHS),
DAG.getNode(ISD::USUBSAT, dl, VT, RHS, LHS));
+ if (VT.isScalarInteger()) {
+ unsigned WideBits = std::max<unsigned>(2 * VT.getScalarSizeInBits(), 32u);
+ MVT WideVT = MVT::getIntegerVT(WideBits);
+ if (TLI.isTypeLegal(WideVT)) {
+ // abds(lhs, rhs) -> trunc(abs(sub(sext(lhs), sext(rhs))))
+ // abdu(lhs, rhs) -> trunc(abs(sub(zext(lhs), zext(rhs))))
+ unsigned ExtOpc = IsSigned ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
+ LHS = DAG.getNode(ExtOpc, dl, WideVT, LHS);
+ RHS = DAG.getNode(ExtOpc, dl, WideVT, RHS);
+ SDValue Diff = DAG.getNode(ISD::SUB, dl, WideVT, LHS, RHS);
+ SDValue AbsDiff = DAG.getNode(ISD::ABS, dl, WideVT, Diff);
+ return DAG.getNode(ISD::TRUNCATE, dl, VT, AbsDiff);
+ }
+ }
+
// abds(lhs, rhs) -> select(sgt(lhs,rhs), sub(lhs,rhs), sub(rhs,lhs))
// abdu(lhs, rhs) -> select(ugt(lhs,rhs), sub(lhs,rhs), sub(rhs,lhs))
EVT CCVT = TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
diff --git a/llvm/test/CodeGen/X86/abds.ll b/llvm/test/CodeGen/X86/abds.ll
index d5d5c8d2d9fb..c52736851651 100644
--- a/llvm/test/CodeGen/X86/abds.ll
+++ b/llvm/test/CodeGen/X86/abds.ll
@@ -20,15 +20,13 @@ define i8 @abd_ext_i8(i8 %a, i8 %b) nounwind {
;
; X64-LABEL: abd_ext_i8:
; X64: # %bb.0:
-; X64-NEXT: # kill: def $esi killed $esi def $rsi
-; X64-NEXT: # kill: def $edi killed $edi def $rdi
-; X64-NEXT: movsbq %dil, %rcx
-; X64-NEXT: movsbq %sil, %rax
-; X64-NEXT: subq %rax, %rcx
-; X64-NEXT: movq %rcx, %rax
-; X64-NEXT: negq %rax
-; X64-NEXT: cmovsq %rcx, %rax
-; X64-NEXT: # kill: def $al killed $al killed $rax
+; X64-NEXT: movsbl %sil, %eax
+; X64-NEXT: movsbl %dil, %ecx
+; X64-NEXT: subl %eax, %ecx
+; X64-NEXT: movl %ecx, %eax
+; X64-NEXT: negl %eax
+; X64-NEXT: cmovsl %ecx, %eax
+; X64-NEXT: # kill: def $al killed $al killed $eax
; X64-NEXT: retq
%aext = sext i8 %a to i64
%bext = sext i8 %b to i64
@@ -52,15 +50,13 @@ define i8 @abd_ext_i8_undef(i8 %a, i8 %b) nounwind {
;
; X64-LABEL: abd_ext_i8_undef:
; X64: # %bb.0:
-; X64-NEXT: # kill: def $esi killed $esi def $rsi
-; X64-NEXT: # kill: def $edi killed $edi def $rdi
-; X64-NEXT: movsbq %dil, %rcx
-; X64-NEXT: movsbq %sil, %rax
-; X64-NEXT: subq %rax, %rcx
-; X64-NEXT: movq %rcx, %rax
-; X64-NEXT: negq %rax
-; X64-NEXT: cmovsq %rcx, %rax
-; X64-NEXT: # kill: def $al killed $al killed $rax
+; X64-NEXT: movsbl %sil, %eax
+; X64-NEXT: movsbl %dil, %ecx
+; X64-NEXT: subl %eax, %ecx
+; X64-NEXT: movl %ecx, %eax
+; X64-NEXT: negl %eax
+; X64-NEXT: cmovsl %ecx, %eax
+; X64-NEXT: # kill: def $al killed $al killed $eax
; X64-NEXT: retq
%aext = sext i8 %a to i64
%bext = sext i8 %b to i64
@@ -84,15 +80,13 @@ define i16 @abd_ext_i16(i16 %a, i16 %b) nounwind {
;
; X64-LABEL: abd_ext_i16:
; X64: # %bb.0:
-; X64-NEXT: # kill: def $esi killed $esi def $rsi
-; X64-NEXT: # kill: def $edi killed $edi def $rdi
-; X64-NEXT: movswq %di, %rcx
-; X64-NEXT: movswq %si, %rax
-; X64-NEXT: subq %rax, %rcx
-; X64-NEXT: movq %rcx, %rax
-; X64-NEXT: negq %rax
-; X64-NEXT: cmovsq %rcx, %rax
-; X64-NEXT: # kill: def $ax killed $ax killed $rax
+; X64-NEXT: movswl %si, %eax
+; X64-NEXT: movswl %di, %ecx
+; X64-NEXT: subl %eax, %ecx
+; X64-NEXT: movl %ecx, %eax
+; X64-NEXT: negl %eax
+; X64-NEXT: cmovsl %ecx, %eax
+; X64-NEXT: # kill: def $ax killed $ax killed $eax
; X64-NEXT: retq
%aext = sext i16 %a to i64
%bext = sext i16 %b to i64
@@ -116,15 +110,13 @@ define i16 @abd_ext_i16_undef(i16 %a, i16 %b) nounwind {
;
; X64-LABEL: abd_ext_i16_undef:
; X64: # %bb.0:
-; X64-NEXT: # kill: def $esi killed $esi def $rsi
-; X64-NEXT: # kill: def $edi killed $edi def $rdi
-; X64-NEXT: movswq %di, %rcx
-; X64-NEXT: movswq %si, %rax
-; X64-NEXT: subq %rax, %rcx
-; X64-NEXT: movq %rcx, %rax
-; X64-NEXT: negq %rax
-; X64-NEXT: cmovsq %rcx, %rax
-; X64-NEXT: # kill: def $ax killed $ax killed $rax
+; X64-NEXT: movswl %si, %eax
+; X64-NEXT: movswl %di, %ecx
+; X64-NEXT: subl %eax, %ecx
+; X64-NEXT: movl %ecx, %eax
+; X64-NEXT: negl %eax
+; X64-NEXT: cmovsl %ecx, %eax
+; X64-NEXT: # kill: def $ax killed $ax killed $eax
; X64-NEXT: retq
%aext = sext i16 %a to i64
%bext = sext i16 %b to i64
@@ -137,25 +129,19 @@ define i16 @abd_ext_i16_undef(i16 %a, i16 %b) nounwind {
define i32 @abd_ext_i32(i32 %a, i32 %b) nounwind {
; X86-LABEL: abd_ext_i32:
; X86: # %bb.0:
-; X86-NEXT: pushl %esi
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: movl %ecx, %edx
-; X86-NEXT: sarl $31, %edx
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: movl %eax, %esi
-; X86-NEXT: sarl $31, %esi
+; X86-NEXT: movl %eax, %edx
+; X86-NEXT: subl %ecx, %edx
+; X86-NEXT: negl %edx
; X86-NEXT: subl %ecx, %eax
-; X86-NEXT: sbbl %edx, %esi
-; X86-NEXT: sarl $31, %esi
-; X86-NEXT: xorl %esi, %eax
-; X86-NEXT: subl %esi, %eax
-; X86-NEXT: popl %esi
+; X86-NEXT: cmovlel %edx, %eax
; X86-NEXT: retl
;
; X64-LABEL: abd_ext_i32:
; X64: # %bb.0:
-; X64-NEXT: movslq %edi, %rcx
; X64-NEXT: movslq %esi, %rax
+; X64-NEXT: movslq %edi, %rcx
; X64-NEXT: subq %rax, %rcx
; X64-NEXT: movq %rcx, %rax
; X64-NEXT: negq %rax
@@ -173,25 +159,19 @@ define i32 @abd_ext_i32(i32 %a, i32 %b) nounwind {
define i32 @abd_ext_i32_undef(i32 %a, i32 %b) nounwind {
; X86-LABEL: abd_ext_i32_undef:
; X86: # %bb.0:
-; X86-NEXT: pushl %esi
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: movl %ecx, %edx
-; X86-NEXT: sarl $31, %edx
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: movl %eax, %esi
-; X86-NEXT: sarl $31, %esi
+; X86-NEXT: movl %eax, %edx
+; X86-NEXT: subl %ecx, %edx
+; X86-NEXT: negl %edx
; X86-NEXT: subl %ecx, %eax
-; X86-NEXT: sbbl %edx, %esi
-; X86-NEXT: sarl $31, %esi
-; X86-NEXT: xorl %esi, %eax
-; X86-NEXT: subl %esi, %eax
-; X86-NEXT: popl %esi
+; X86-NEXT: cmovlel %edx, %eax
; X86-NEXT: retl
;
; X64-LABEL: abd_ext_i32_undef:
; X64: # %bb.0:
-; X64-NEXT: movslq %edi, %rcx
; X64-NEXT: movslq %esi, %rax
+; X64-NEXT: movslq %edi, %rcx
; X64-NEXT: subq %rax, %rcx
; X64-NEXT: movq %rcx, %rax
; X64-NEXT: negq %rax
@@ -235,15 +215,10 @@ define i64 @abd_ext_i64(i64 %a, i64 %b) nounwind {
; X64-LABEL: abd_ext_i64:
; X64: # %bb.0:
; X64-NEXT: movq %rdi, %rax
-; X64-NEXT: movq %rdi, %rcx
-; X64-NEXT: sarq $63, %rcx
-; X64-NEXT: movq %rsi, %rdx
-; X64-NEXT: sarq $63, %rdx
; X64-NEXT: subq %rsi, %rax
-; X64-NEXT: sbbq %rdx, %rcx
-; X64-NEXT: sarq $63, %rcx
-; X64-NEXT: xorq %rcx, %rax
-; X64-NEXT: subq %rcx, %rax
+; X64-NEXT: negq %rax
+; X64-NEXT: subq %rsi, %rdi
+; X64-NEXT: cmovgq %rdi, %rax
; X64-NEXT: retq
%aext = sext i64 %a to i128
%bext = sext i64 %b to i128
@@ -282,15 +257,10 @@ define i64 @abd_ext_i64_undef(i64 %a, i64 %b) nounwind {
; X64-LABEL: abd_ext_i64_undef:
; X64: # %bb.0:
; X64-NEXT: movq %rdi, %rax
-; X64-NEXT: movq %rdi, %rcx
-; X64-NEXT: sarq $63, %rcx
-; X64-NEXT: movq %rsi, %rdx
-; X64-NEXT: sarq $63, %rdx
; X64-NEXT: subq %rsi, %rax
-; X64-NEXT: sbbq %rdx, %rcx
-; X64-NEXT: sarq $63, %rcx
-; X64-NEXT: xorq %rcx, %rax
-; X64-NEXT: subq %rcx, %rax
+; X64-NEXT: negq %rax
+; X64-NEXT: subq %rsi, %rdi
+; X64-NEXT: cmovgq %rdi, %rax
; X64-NEXT: retq
%aext = sext i64 %a to i128
%bext = sext i64 %b to i128
@@ -307,24 +277,23 @@ define i64 @abd_ext_i64_undef(i64 %a, i64 %b) nounwind {
define i8 @abd_minmax_i8(i8 %a, i8 %b) nounwind {
; X86-LABEL: abd_minmax_i8:
; X86: # %bb.0:
-; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: cmpb %al, %cl
-; X86-NEXT: movl %eax, %edx
-; X86-NEXT: cmovll %ecx, %edx
-; X86-NEXT: cmovgl %ecx, %eax
-; X86-NEXT: subb %dl, %al
+; X86-NEXT: movsbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movsbl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: subl %eax, %ecx
+; X86-NEXT: movl %ecx, %eax
+; X86-NEXT: negl %eax
+; X86-NEXT: cmovsl %ecx, %eax
; X86-NEXT: # kill: def $al killed $al killed $eax
; X86-NEXT: retl
;
; X64-LABEL: abd_minmax_i8:
; X64: # %bb.0:
-; X64-NEXT: movl %esi, %eax
-; X64-NEXT: cmpb %al, %dil
-; X64-NEXT: movl %esi, %ecx
-; X64-NEXT: cmovll %edi, %ecx
-; X64-NEXT: cmovgl %edi, %eax
-; X64-NEXT: subb %cl, %al
+; X64-NEXT: movsbl %sil, %eax
+; X64-NEXT: movsbl %dil, %ecx
+; X64-NEXT: subl %eax, %ecx
+; X64-NEXT: movl %ecx, %eax
+; X64-NEXT: negl %eax
+; X64-NEXT: cmovsl %ecx, %eax
; X64-NEXT: # kill: def $al killed $al killed $eax
; X64-NEXT: retq
%min = call i8 @llvm.smin.i8(i8 %a, i8 %b)
@@ -336,24 +305,23 @@ define i8 @abd_minmax_i8(i8 %a, i8 %b) nounwind {
define i16 @abd_minmax_i16(i16 %a, i16 %b) nounwind {
; X86-LABEL: abd_minmax_i16:
; X86: # %bb.0:
-; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: cmpw %ax, %cx
-; X86-NEXT: movl %eax, %edx
-; X86-NEXT: cmovll %ecx, %edx
-; X86-NEXT: cmovgl %ecx, %eax
-; X86-NEXT: subl %edx, %eax
+; X86-NEXT: movswl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movswl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: subl %eax, %ecx
+; X86-NEXT: movl %ecx, %eax
+; X86-NEXT: negl %eax
+; X86-NEXT: cmovsl %ecx, %eax
; X86-NEXT: # kill: def $ax killed $ax killed $eax
; X86-NEXT: retl
;
; X64-LABEL: abd_minmax_i16:
; X64: # %bb.0:
-; X64-NEXT: movl %esi, %eax
-; X64-NEXT: cmpw %ax, %di
-; X64-NEXT: movl %esi, %ecx
-; X64-NEXT: cmovll %edi, %ecx
-; X64-NEXT: cmovgl %edi, %eax
-; X64-NEXT: subl %ecx, %eax
+; X64-NEXT: movswl %si, %eax
+; X64-NEXT: movswl %di, %ecx
+; X64-NEXT: subl %eax, %ecx
+; X64-NEXT: movl %ecx, %eax
+; X64-NEXT: negl %eax
+; X64-NEXT: cmovsl %ecx, %eax
; X64-NEXT: # kill: def $ax killed $ax killed $eax
; X64-NEXT: retq
%min = call i16 @llvm.smin.i16(i16 %a, i16 %b)
@@ -365,23 +333,24 @@ define i16 @abd_minmax_i16(i16 %a, i16 %b) nounwind {
define i32 @abd_minmax_i32(i32 %a, i32 %b) nounwind {
; X86-LABEL: abd_minmax_i32:
; X86: # %bb.0:
-; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: cmpl %eax, %ecx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movl %eax, %edx
-; X86-NEXT: cmovll %ecx, %edx
-; X86-NEXT: cmovgl %ecx, %eax
-; X86-NEXT: subl %edx, %eax
+; X86-NEXT: subl %ecx, %edx
+; X86-NEXT: negl %edx
+; X86-NEXT: subl %ecx, %eax
+; X86-NEXT: cmovlel %edx, %eax
; X86-NEXT: retl
;
; X64-LABEL: abd_minmax_i32:
; X64: # %bb.0:
-; X64-NEXT: movl %esi, %eax
-; X64-NEXT: cmpl %esi, %edi
-; X64-NEXT: movl %esi, %ecx
-; X64-NEXT: cmovll %edi, %ecx
-; X64-NEXT: cmovgl %edi, %eax
-; X64-NEXT: subl %ecx, %eax
+; X64-NEXT: movslq %esi, %rax
+; X64-NEXT: movslq %edi, %rcx
+; X64-NEXT: subq %rax, %rcx
+; X64-NEXT: movq %rcx, %rax
+; X64-NEXT: negq %rax
+; X64-NEXT: cmovsq %rcx, %rax
+; X64-NEXT: # kill: def $eax killed $eax killed $rax
; X64-NEXT: retq
%min = call i32 @llvm.smin.i32(i32 %a, i32 %b)
%max = call i32 @llvm.smax.i32(i32 %a, i32 %b)
@@ -424,12 +393,11 @@ define i64 @abd_minmax_i64(i64 %a, i64 %b) nounwind {
;
; X64-LABEL: abd_minmax_i64:
; X64: # %bb.0:
-; X64-NEXT: movq %rsi, %rax
-; X64-NEXT: cmpq %rsi, %rdi
-; X64-NEXT: movq %rsi, %rcx
-; X64-NEXT: cmovlq %rdi, %rcx
+; X64-NEXT: movq %rdi, %rax
+; X64-NEXT: subq %rsi, %rax
+; X64-NEXT: negq %rax
+; X64-NEXT: subq %rsi, %rdi
; X64-NEXT: cmovgq %rdi, %rax
-; X64-NEXT: subq %rcx, %rax
; X64-NEXT: retq
%min = call i64 @llvm.smin.i64(i64 %a, i64 %b)
%max = call i64 @llvm.smax.i64(i64 %a, i64 %b)
diff --git a/llvm/test/CodeGen/X86/abdu.ll b/llvm/test/CodeGen/X86/abdu.ll
index 7095f7712082..f98daa1b68b5 100644
--- a/llvm/test/CodeGen/X86/abdu.ll
+++ b/llvm/test/CodeGen/X86/abdu.ll
@@ -20,13 +20,13 @@ define i8 @abd_ext_i8(i8 %a, i8 %b) nounwind {
;
; X64-LABEL: abd_ext_i8:
; X64: # %bb.0:
-; X64-NEXT: movzbl %dil, %ecx
; X64-NEXT: movzbl %sil, %eax
-; X64-NEXT: subq %rax, %rcx
-; X64-NEXT: movq %rcx, %rax
-; X64-NEXT: negq %rax
-; X64-NEXT: cmovsq %rcx, %rax
-; X64-NEXT: # kill: def $al killed $al killed $rax
+; X64-NEXT: movzbl %dil, %ecx
+; X64-NEXT: subl %eax, %ecx
+; X64-NEXT: movl %ecx, %eax
+; X64-NEXT: negl %eax
+; X64-NEXT: cmovsl %ecx, %eax
+; X64-NEXT: # kill: def $al killed $al killed $eax
; X64-NEXT: retq
%aext = zext i8 %a to i64
%bext = zext i8 %b to i64
@@ -50,13 +50,13 @@ define i8 @abd_ext_i8_undef(i8 %a, i8 %b) nounwind {
;
; X64-LABEL: abd_ext_i8_undef:
; X64: # %bb.0:
-; X64-NEXT: movzbl %dil, %ecx
; X64-NEXT: movzbl %sil, %eax
-; X64-NEXT: subq %rax, %rcx
-; X64-NEXT: movq %rcx, %rax
-; X64-NEXT: negq %rax
-; X64-NEXT: cmovsq %rcx, %rax
-; X64-NEXT: # kill: def $al killed $al killed $rax
+; X64-NEXT: movzbl %dil, %ecx
+; X64-NEXT: subl %eax, %ecx
+; X64-NEXT: movl %ecx, %eax
+; X64-NEXT: negl %eax
+; X64-NEXT: cmovsl %ecx, %eax
+; X64-NEXT: # kill: def $al killed $al killed $eax
; X64-NEXT: retq
%aext = zext i8 %a to i64
%bext = zext i8 %b to i64
@@ -80,13 +80,13 @@ define i16 @abd_ext_i16(i16 %a, i16 %b) nounwind {
;
; X64-LABEL: abd_ext_i16:
; X64: # %bb.0:
-; X64-NEXT: movzwl %di, %ecx
; X64-NEXT: movzwl %si, %eax
-; X64-NEXT: subq %rax, %rcx
-; X64-NEXT: movq %rcx, %rax
-; X64-NEXT: negq %rax
-; X64-NEXT: cmovsq %rcx, %rax
-; X64-NEXT: # kill: def $ax killed $ax killed $rax
+; X64-NEXT: movzwl %di, %ecx
+; X64-NEXT: subl %eax, %ecx
+; X64-NEXT: movl %ecx, %eax
+; X64-NEXT: negl %eax
+; X64-NEXT: cmovsl %ecx, %eax
+; X64-NEXT: # kill: def $ax killed $ax killed $eax
; X64-NEXT: retq
%aext = zext i16 %a to i64
%bext = zext i16 %b to i64
@@ -110,13 +110,13 @@ define i16 @abd_ext_i16_undef(i16 %a, i16 %b) nounwind {
;
; X64-LABEL: abd_ext_i16_undef:
; X64: # %bb.0:
-; X64-NEXT: movzwl %di, %ecx
; X64-NEXT: movzwl %si, %eax
-; X64-NEXT: subq %rax, %rcx
-; X64-NEXT: movq %rcx, %rax
-; X64-NEXT: negq %rax
-; X64-NEXT: cmovsq %rcx, %rax
-; X64-NEXT: # kill: def $ax killed $ax killed $rax
+; X64-NEXT: movzwl %di, %ecx
+; X64-NEXT: subl %eax, %ecx
+; X64-NEXT: movl %ecx, %eax
+; X64-NEXT: negl %eax
+; X64-NEXT: cmovsl %ecx, %eax
+; X64-NEXT: # kill: def $ax killed $ax killed $eax
; X64-NEXT: retq
%aext = zext i16 %a to i64
%bext = zext i16 %b to i64
@@ -129,19 +129,19 @@ define i16 @abd_ext_i16_undef(i16 %a, i16 %b) nounwind {
define i32 @abd_ext_i32(i32 %a, i32 %b) nounwind {
; X86-LABEL: abd_ext_i32:
; X86: # %bb.0:
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: xorl %ecx, %ecx
-; X86-NEXT: subl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: sbbl %ecx, %ecx
-; X86-NEXT: sarl $31, %ecx
-; X86-NEXT: xorl %ecx, %eax
+; X86-NEXT: movl %eax, %edx
+; X86-NEXT: subl %ecx, %edx
+; X86-NEXT: negl %edx
; X86-NEXT: subl %ecx, %eax
+; X86-NEXT: cmovbel %edx, %eax
; X86-NEXT: retl
;
; X64-LABEL: abd_ext_i32:
; X64: # %bb.0:
-; X64-NEXT: movl %edi, %ecx
; X64-NEXT: movl %esi, %eax
+; X64-NEXT: movl %edi, %ecx
; X64-NEXT: subq %rax, %rcx
; X64-NEXT: movq %rcx, %rax
; X64-NEXT: negq %rax
@@ -159,19 +159,19 @@ define i32 @abd_ext_i32(i32 %a, i32 %b) nounwind {
define i32 @abd_ext_i32_undef(i32 %a, i32 %b) nounwind {
; X86-LABEL: abd_ext_i32_undef:
; X86: # %bb.0:
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: xorl %ecx, %ecx
-; X86-NEXT: subl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: sbbl %ecx, %ecx
-; X86-NEXT: sarl $31, %ecx
-; X86-NEXT: xorl %ecx, %eax
+; X86-NEXT: movl %eax, %edx
+; X86-NEXT: subl %ecx, %edx
+; X86-NEXT: negl %edx
; X86-NEXT: subl %ecx, %eax
+; X86-NEXT: cmovbel %edx, %eax
; X86-NEXT: retl
;
; X64-LABEL: abd_ext_i32_undef:
; X64: # %bb.0:
-; X64-NEXT: movl %edi, %ecx
; X64-NEXT: movl %esi, %eax
+; X64-NEXT: movl %edi, %ecx
; X64-NEXT: subq %rax, %rcx
; X64-NEXT: movq %rcx, %rax
; X64-NEXT: negq %rax
@@ -209,12 +209,10 @@ define i64 @abd_ext_i64(i64 %a, i64 %b) nounwind {
; X64-LABEL: abd_ext_i64:
; X64: # %bb.0:
; X64-NEXT: movq %rdi, %rax
-; X64-NEXT: xorl %ecx, %ecx
; X64-NEXT: subq %rsi, %rax
-; X64-NEXT: sbbq %rcx, %rcx
-; X64-NEXT: sarq $63, %rcx
-; X64-NEXT: xorq %rcx, %rax
-; X64-NEXT: subq %rcx, %rax
+; X64-NEXT: negq %rax
+; X64-NEXT: subq %rsi, %rdi
+; X64-NEXT: cmovaq %rdi, %rax
; X64-NEXT: retq
%aext = zext i64 %a to i128
%bext = zext i64 %b to i128
@@ -247,12 +245,10 @@ define i64 @abd_ext_i64_undef(i64 %a, i64 %b) nounwind {
; X64-LABEL: abd_ext_i64_undef:
; X64: # %bb.0:
; X64-NEXT: movq %rdi, %rax
-; X64-NEXT: xorl %ecx, %ecx
; X64-NEXT: subq %rsi, %rax
-; X64-NEXT: sbbq %rcx, %rcx
-; X64-NEXT: sarq $63, %rcx
-; X64-NEXT: xorq %rcx, %rax
-; X64-NEXT: subq %rcx, %rax
+; X64-NEXT: negq %rax
+; X64-NEXT: subq %rsi, %rdi
+; X64-NEXT: cmovaq %rdi, %rax
; X64-NEXT: retq
%aext = zext i64 %a to i128
%bext = zext i64 %b to i128
@@ -269,24 +265,23 @@ define i64 @abd_ext_i64_undef(i64 %a, i64 %b) nounwind {
define i8 @abd_minmax_i8(i8 %a, i8 %b) nounwind {
; X86-LABEL: abd_minmax_i8:
; X86: # %bb.0:
-; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: cmpb %al, %cl
-; X86-NEXT: movl %eax, %edx
-; X86-NEXT: cmovbl %ecx, %edx
-; X86-NEXT: cmoval %ecx, %eax
-; X86-NEXT: subb %dl, %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: subl %eax, %ecx
+; X86-NEXT: movl %ecx, %eax
+; X86-NEXT: negl %eax
+; X86-NEXT: cmovsl %ecx, %eax
; X86-NEXT: # kill: def $al killed $al killed $eax
; X86-NEXT: retl
;
; X64-LABEL: abd_minmax_i8:
; X64: # %bb.0:
-; X64-NEXT: movl %esi, %eax
-; X64-NEXT: cmpb %al, %dil
-; X64-NEXT: movl %esi, %ecx
-; X64-NEXT: cmovbl %edi, %ecx
-; X64-NEXT: cmoval %edi, %eax
-; X64-NEXT: subb %cl, %al
+; X64-NEXT: movzbl %sil, %eax
+; X64-NEXT: movzbl %dil, %ecx
+; X64-NEXT: subl %eax, %ecx
+; X64-NEXT: movl %ecx, %eax
+; X64-NEXT: negl %eax
+; X64-NEXT: cmovsl %ecx, %eax
; X64-NEXT: # kill: def $al killed $al killed $eax
; X64-NEXT: retq
%min = call i8 @llvm.umin.i8(i8 %a, i8 %b)
@@ -298,24 +293,23 @@ define i8 @abd_minmax_i8(i8 %a, i8 %b) nounwind {
define i16 @abd_minmax_i16(i16 %a, i16 %b) nounwind {
; X86-LABEL: abd_minmax_i16:
; X86: # %bb.0:
-; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: cmpw %ax, %cx
-; X86-NEXT: movl %eax, %edx
-; X86-NEXT: cmovbl %ecx, %edx
-; X86-NEXT: cmoval %ecx, %eax
-; X86-NEXT: subl %edx, %eax
+; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movzwl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: subl %eax, %ecx
+; X86-NEXT: movl %ecx, %eax
+; X86-NEXT: negl %eax
+; X86-NEXT: cmovsl %ecx, %eax
; X86-NEXT: # kill: def $ax killed $ax killed $eax
; X86-NEXT: retl
;
; X64-LABEL: abd_minmax_i16:
; X64: # %bb.0:
-; X64-NEXT: movl %esi, %eax
-; X64-NEXT: cmpw %ax, %di
-; X64-NEXT: movl %esi, %ecx
-; X64-NEXT: cmovbl %edi, %ecx
-; X64-NEXT: cmoval %edi, %eax
-; X64-NEXT: subl %ecx, %eax
+; X64-NEXT: movzwl %si, %eax
+; X64-NEXT: movzwl %di, %ecx
+; X64-NEXT: subl %eax, %ecx
+; X64-NEXT: movl %ecx, %eax
+; X64-NEXT: negl %eax
+; X64-NEXT: cmovsl %ecx, %eax
; X64-NEXT: # kill: def $ax killed $ax killed $eax
; X64-NEXT: retq
%min = call i16 @llvm.umin.i16(i16 %a, i16 %b)
@@ -327,23 +321,24 @@ define i16 @abd_minmax_i16(i16 %a, i16 %b) nounwind {
define i32 @abd_minmax_i32(i32 %a, i32 %b) nounwind {
; X86-LABEL: abd_minmax_i32:
; X86: # %bb.0:
-; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: cmpl %eax, %ecx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movl %eax, %edx
-; X86-NEXT: cmovbl %ecx, %edx
-; X86-NEXT: cmoval %ecx, %eax
-; X86-NEXT: subl %edx, %eax
+; X86-NEXT: subl %ecx, %edx
+; X86-NEXT: negl %edx
+; X86-NEXT: subl %ecx, %eax
+; X86-NEXT: cmovbel %edx, %eax
; X86-NEXT: retl
;
; X64-LABEL: abd_minmax_i32:
; X64: # %bb.0:
; X64-NEXT: movl %esi, %eax
-; X64-NEXT: cmpl %esi, %edi
-; X64-NEXT: movl %esi, %ecx
-; X64-NEXT: cmovbl %edi, %ecx
-; X64-NEXT: cmoval %edi, %eax
-; X64-NEXT: subl %ecx, %eax
+; X64-NEXT: movl %edi, %ecx
+; X64-NEXT: subq %rax, %rcx
+; X64-NEXT: movq %rcx, %rax
+; X64-NEXT: negq %rax
+; X64-NEXT: cmovsq %rcx, %rax
+; X64-NEXT: # kill: def $eax killed $eax killed $rax
; X64-NEXT: retq
%min = call i32 @llvm.umin.i32(i32 %a, i32 %b)
%max = call i32 @llvm.umax.i32(i32 %a, i32 %b)
@@ -386,12 +381,11 @@ define i64 @abd_minmax_i64(i64 %a, i64 %b) nounwind {
;
; X64-LABEL: abd_minmax_i64:
; X64: # %bb.0:
-; X64-NEXT: movq %rsi, %rax
-; X64-NEXT: cmpq %rsi, %rdi
-; X64-NEXT: movq %rsi, %rcx
-; X64-NEXT: cmovbq %rdi, %rcx
+; X64-NEXT: movq %rdi, %rax
+; X64-NEXT: subq %rsi, %rax
+; X64-NEXT: negq %rax
+; X64-NEXT: subq %rsi, %rdi
; X64-NEXT: cmovaq %rdi, %rax
-; X64-NEXT: subq %rcx, %rax
; X64-NEXT: retq
%min = call i64 @llvm.umin.i64(i64 %a, i64 %b)
%max = call i64 @llvm.umax.i64(i64 %a, i64 %b)
More information about the llvm-commits
mailing list