[llvm] 96602c3 - [X86] Add negated abds/abdu test coverage - based off #100810

Simon Pilgrim via llvm-commits llvm-commits at lists.llvm.org
Mon Aug 5 05:53:03 PDT 2024


Author: Simon Pilgrim
Date: 2024-08-05T13:43:39+01:00
New Revision: 96602c3ad504a26d4fb8b5ca73e878a7206ab26c

URL: https://github.com/llvm/llvm-project/commit/96602c3ad504a26d4fb8b5ca73e878a7206ab26c
DIFF: https://github.com/llvm/llvm-project/commit/96602c3ad504a26d4fb8b5ca73e878a7206ab26c.diff

LOG: [X86] Add negated abds/abdu test coverage - based off #100810

Added: 
    llvm/test/CodeGen/X86/abds-neg.ll
    llvm/test/CodeGen/X86/abdu-neg.ll

Modified: 
    

Removed: 
    


################################################################################
diff  --git a/llvm/test/CodeGen/X86/abds-neg.ll b/llvm/test/CodeGen/X86/abds-neg.ll
new file mode 100644
index 0000000000000..753b1519c8f90
--- /dev/null
+++ b/llvm/test/CodeGen/X86/abds-neg.ll
@@ -0,0 +1,1312 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=i686 -mattr=cmov | FileCheck %s --check-prefixes=X86
+; RUN: llc < %s -mtriple=x86_64-linux | FileCheck %s --check-prefixes=X64
+
+;
+; trunc(nabs(sub(sext(a),sext(b)))) -> nabds(a,b)
+;
+
+define i8 @abd_ext_i8(i8 %a, i8 %b) nounwind {
+; X86-LABEL: abd_ext_i8:
+; X86:       # %bb.0:
+; X86-NEXT:    movsbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movsbl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    subl %eax, %ecx
+; X86-NEXT:    movl %ecx, %eax
+; X86-NEXT:    negl %eax
+; X86-NEXT:    cmovsl %ecx, %eax
+; X86-NEXT:    negb %al
+; X86-NEXT:    # kill: def $al killed $al killed $eax
+; X86-NEXT:    retl
+;
+; X64-LABEL: abd_ext_i8:
+; X64:       # %bb.0:
+; X64-NEXT:    movsbl %sil, %eax
+; X64-NEXT:    movsbl %dil, %ecx
+; X64-NEXT:    subl %eax, %ecx
+; X64-NEXT:    movl %ecx, %eax
+; X64-NEXT:    negl %eax
+; X64-NEXT:    cmovsl %ecx, %eax
+; X64-NEXT:    negb %al
+; X64-NEXT:    # kill: def $al killed $al killed $eax
+; X64-NEXT:    retq
+  %aext = sext i8 %a to i64
+  %bext = sext i8 %b to i64
+  %sub = sub i64 %aext, %bext
+  %abs = call i64 @llvm.abs.i64(i64 %sub, i1 false)
+  %nabs = sub i64 0, %abs
+  %trunc = trunc i64 %nabs to i8
+  ret i8 %trunc
+}
+
+define i8 @abd_ext_i8_i16(i8 %a, i16 %b) nounwind {
+; X86-LABEL: abd_ext_i8_i16:
+; X86:       # %bb.0:
+; X86-NEXT:    movswl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movsbl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    subl %eax, %ecx
+; X86-NEXT:    movl %ecx, %eax
+; X86-NEXT:    negl %eax
+; X86-NEXT:    cmovsl %ecx, %eax
+; X86-NEXT:    negb %al
+; X86-NEXT:    # kill: def $al killed $al killed $eax
+; X86-NEXT:    retl
+;
+; X64-LABEL: abd_ext_i8_i16:
+; X64:       # %bb.0:
+; X64-NEXT:    movswl %si, %eax
+; X64-NEXT:    movsbl %dil, %ecx
+; X64-NEXT:    subl %eax, %ecx
+; X64-NEXT:    movl %ecx, %eax
+; X64-NEXT:    negl %eax
+; X64-NEXT:    cmovsl %ecx, %eax
+; X64-NEXT:    negb %al
+; X64-NEXT:    # kill: def $al killed $al killed $eax
+; X64-NEXT:    retq
+  %aext = sext i8 %a to i64
+  %bext = sext i16 %b to i64
+  %sub = sub i64 %aext, %bext
+  %abs = call i64 @llvm.abs.i64(i64 %sub, i1 false)
+  %nabs = sub i64 0, %abs
+  %trunc = trunc i64 %nabs to i8
+  ret i8 %trunc
+}
+
+define i8 @abd_ext_i8_undef(i8 %a, i8 %b) nounwind {
+; X86-LABEL: abd_ext_i8_undef:
+; X86:       # %bb.0:
+; X86-NEXT:    movsbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movsbl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    subl %eax, %ecx
+; X86-NEXT:    movl %ecx, %eax
+; X86-NEXT:    negl %eax
+; X86-NEXT:    cmovsl %ecx, %eax
+; X86-NEXT:    negb %al
+; X86-NEXT:    # kill: def $al killed $al killed $eax
+; X86-NEXT:    retl
+;
+; X64-LABEL: abd_ext_i8_undef:
+; X64:       # %bb.0:
+; X64-NEXT:    movsbl %sil, %eax
+; X64-NEXT:    movsbl %dil, %ecx
+; X64-NEXT:    subl %eax, %ecx
+; X64-NEXT:    movl %ecx, %eax
+; X64-NEXT:    negl %eax
+; X64-NEXT:    cmovsl %ecx, %eax
+; X64-NEXT:    negb %al
+; X64-NEXT:    # kill: def $al killed $al killed $eax
+; X64-NEXT:    retq
+  %aext = sext i8 %a to i64
+  %bext = sext i8 %b to i64
+  %sub = sub i64 %aext, %bext
+  %abs = call i64 @llvm.abs.i64(i64 %sub, i1 true)
+  %nabs = sub i64 0, %abs
+  %trunc = trunc i64 %nabs to i8
+  ret i8 %trunc
+}
+
+define i16 @abd_ext_i16(i16 %a, i16 %b) nounwind {
+; X86-LABEL: abd_ext_i16:
+; X86:       # %bb.0:
+; X86-NEXT:    movswl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movswl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    subl %eax, %ecx
+; X86-NEXT:    movl %ecx, %eax
+; X86-NEXT:    negl %eax
+; X86-NEXT:    cmovnsl %ecx, %eax
+; X86-NEXT:    # kill: def $ax killed $ax killed $eax
+; X86-NEXT:    retl
+;
+; X64-LABEL: abd_ext_i16:
+; X64:       # %bb.0:
+; X64-NEXT:    movswl %si, %eax
+; X64-NEXT:    movswl %di, %ecx
+; X64-NEXT:    subl %eax, %ecx
+; X64-NEXT:    movl %ecx, %eax
+; X64-NEXT:    negl %eax
+; X64-NEXT:    cmovnsl %ecx, %eax
+; X64-NEXT:    # kill: def $ax killed $ax killed $eax
+; X64-NEXT:    retq
+  %aext = sext i16 %a to i64
+  %bext = sext i16 %b to i64
+  %sub = sub i64 %aext, %bext
+  %abs = call i64 @llvm.abs.i64(i64 %sub, i1 false)
+  %nabs = sub i64 0, %abs
+  %trunc = trunc i64 %nabs to i16
+  ret i16 %trunc
+}
+
+define i16 @abd_ext_i16_i32(i16 %a, i32 %b) nounwind {
+; X86-LABEL: abd_ext_i16_i32:
+; X86:       # %bb.0:
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movswl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movl %eax, %edx
+; X86-NEXT:    subl %ecx, %edx
+; X86-NEXT:    negl %edx
+; X86-NEXT:    subl %ecx, %eax
+; X86-NEXT:    cmovlel %edx, %eax
+; X86-NEXT:    negl %eax
+; X86-NEXT:    # kill: def $ax killed $ax killed $eax
+; X86-NEXT:    retl
+;
+; X64-LABEL: abd_ext_i16_i32:
+; X64:       # %bb.0:
+; X64-NEXT:    # kill: def $edi killed $edi def $rdi
+; X64-NEXT:    movswq %di, %rcx
+; X64-NEXT:    movslq %esi, %rax
+; X64-NEXT:    subq %rax, %rcx
+; X64-NEXT:    movq %rcx, %rax
+; X64-NEXT:    negq %rax
+; X64-NEXT:    cmovsq %rcx, %rax
+; X64-NEXT:    negl %eax
+; X64-NEXT:    # kill: def $ax killed $ax killed $rax
+; X64-NEXT:    retq
+  %aext = sext i16 %a to i64
+  %bext = sext i32 %b to i64
+  %sub = sub i64 %aext, %bext
+  %abs = call i64 @llvm.abs.i64(i64 %sub, i1 false)
+  %nabs = sub i64 0, %abs
+  %trunc = trunc i64 %nabs to i16
+  ret i16 %trunc
+}
+
+define i16 @abd_ext_i16_undef(i16 %a, i16 %b) nounwind {
+; X86-LABEL: abd_ext_i16_undef:
+; X86:       # %bb.0:
+; X86-NEXT:    movswl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movswl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    subl %eax, %ecx
+; X86-NEXT:    movl %ecx, %eax
+; X86-NEXT:    negl %eax
+; X86-NEXT:    cmovnsl %ecx, %eax
+; X86-NEXT:    # kill: def $ax killed $ax killed $eax
+; X86-NEXT:    retl
+;
+; X64-LABEL: abd_ext_i16_undef:
+; X64:       # %bb.0:
+; X64-NEXT:    movswl %si, %eax
+; X64-NEXT:    movswl %di, %ecx
+; X64-NEXT:    subl %eax, %ecx
+; X64-NEXT:    movl %ecx, %eax
+; X64-NEXT:    negl %eax
+; X64-NEXT:    cmovnsl %ecx, %eax
+; X64-NEXT:    # kill: def $ax killed $ax killed $eax
+; X64-NEXT:    retq
+  %aext = sext i16 %a to i64
+  %bext = sext i16 %b to i64
+  %sub = sub i64 %aext, %bext
+  %abs = call i64 @llvm.abs.i64(i64 %sub, i1 true)
+  %nabs = sub i64 0, %abs
+  %trunc = trunc i64 %nabs to i16
+  ret i16 %trunc
+}
+
+define i32 @abd_ext_i32(i32 %a, i32 %b) nounwind {
+; X86-LABEL: abd_ext_i32:
+; X86:       # %bb.0:
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movl %eax, %edx
+; X86-NEXT:    subl %ecx, %edx
+; X86-NEXT:    negl %edx
+; X86-NEXT:    subl %ecx, %eax
+; X86-NEXT:    cmovlel %edx, %eax
+; X86-NEXT:    negl %eax
+; X86-NEXT:    retl
+;
+; X64-LABEL: abd_ext_i32:
+; X64:       # %bb.0:
+; X64-NEXT:    movslq %esi, %rax
+; X64-NEXT:    movslq %edi, %rcx
+; X64-NEXT:    subq %rax, %rcx
+; X64-NEXT:    movq %rcx, %rax
+; X64-NEXT:    negq %rax
+; X64-NEXT:    cmovsq %rcx, %rax
+; X64-NEXT:    negl %eax
+; X64-NEXT:    # kill: def $eax killed $eax killed $rax
+; X64-NEXT:    retq
+  %aext = sext i32 %a to i64
+  %bext = sext i32 %b to i64
+  %sub = sub i64 %aext, %bext
+  %abs = call i64 @llvm.abs.i64(i64 %sub, i1 false)
+  %nabs = sub i64 0, %abs
+  %trunc = trunc i64 %nabs to i32
+  ret i32 %trunc
+}
+
+define i32 @abd_ext_i32_i16(i32 %a, i16 %b) nounwind {
+; X86-LABEL: abd_ext_i32_i16:
+; X86:       # %bb.0:
+; X86-NEXT:    movswl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movl %eax, %edx
+; X86-NEXT:    subl %ecx, %edx
+; X86-NEXT:    negl %edx
+; X86-NEXT:    subl %ecx, %eax
+; X86-NEXT:    cmovlel %edx, %eax
+; X86-NEXT:    negl %eax
+; X86-NEXT:    retl
+;
+; X64-LABEL: abd_ext_i32_i16:
+; X64:       # %bb.0:
+; X64-NEXT:    # kill: def $esi killed $esi def $rsi
+; X64-NEXT:    movswq %si, %rax
+; X64-NEXT:    movslq %edi, %rcx
+; X64-NEXT:    subq %rax, %rcx
+; X64-NEXT:    movq %rcx, %rax
+; X64-NEXT:    negq %rax
+; X64-NEXT:    cmovsq %rcx, %rax
+; X64-NEXT:    negl %eax
+; X64-NEXT:    # kill: def $eax killed $eax killed $rax
+; X64-NEXT:    retq
+  %aext = sext i32 %a to i64
+  %bext = sext i16 %b to i64
+  %sub = sub i64 %aext, %bext
+  %abs = call i64 @llvm.abs.i64(i64 %sub, i1 false)
+  %nabs = sub i64 0, %abs
+  %trunc = trunc i64 %nabs to i32
+  ret i32 %trunc
+}
+
+define i32 @abd_ext_i32_undef(i32 %a, i32 %b) nounwind {
+; X86-LABEL: abd_ext_i32_undef:
+; X86:       # %bb.0:
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movl %eax, %edx
+; X86-NEXT:    subl %ecx, %edx
+; X86-NEXT:    negl %edx
+; X86-NEXT:    subl %ecx, %eax
+; X86-NEXT:    cmovlel %edx, %eax
+; X86-NEXT:    negl %eax
+; X86-NEXT:    retl
+;
+; X64-LABEL: abd_ext_i32_undef:
+; X64:       # %bb.0:
+; X64-NEXT:    movslq %esi, %rax
+; X64-NEXT:    movslq %edi, %rcx
+; X64-NEXT:    subq %rax, %rcx
+; X64-NEXT:    movq %rcx, %rax
+; X64-NEXT:    negq %rax
+; X64-NEXT:    cmovsq %rcx, %rax
+; X64-NEXT:    negl %eax
+; X64-NEXT:    # kill: def $eax killed $eax killed $rax
+; X64-NEXT:    retq
+  %aext = sext i32 %a to i64
+  %bext = sext i32 %b to i64
+  %sub = sub i64 %aext, %bext
+  %abs = call i64 @llvm.abs.i64(i64 %sub, i1 true)
+  %nabs = sub i64 0, %abs
+  %trunc = trunc i64 %nabs to i32
+  ret i32 %trunc
+}
+
+define i64 @abd_ext_i64(i64 %a, i64 %b) nounwind {
+; X86-LABEL: abd_ext_i64:
+; X86:       # %bb.0:
+; X86-NEXT:    pushl %edi
+; X86-NEXT:    pushl %esi
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %esi
+; X86-NEXT:    movl %esi, %edi
+; X86-NEXT:    sarl $31, %edi
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movl %ecx, %edx
+; X86-NEXT:    sarl $31, %edx
+; X86-NEXT:    subl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    sbbl %esi, %ecx
+; X86-NEXT:    movl %edx, %esi
+; X86-NEXT:    sbbl %edi, %esi
+; X86-NEXT:    sbbl %edi, %edx
+; X86-NEXT:    sarl $31, %edx
+; X86-NEXT:    xorl %edx, %ecx
+; X86-NEXT:    xorl %edx, %eax
+; X86-NEXT:    subl %edx, %eax
+; X86-NEXT:    sbbl %edx, %ecx
+; X86-NEXT:    xorl %edx, %edx
+; X86-NEXT:    negl %eax
+; X86-NEXT:    sbbl %ecx, %edx
+; X86-NEXT:    popl %esi
+; X86-NEXT:    popl %edi
+; X86-NEXT:    retl
+;
+; X64-LABEL: abd_ext_i64:
+; X64:       # %bb.0:
+; X64-NEXT:    movq %rdi, %rax
+; X64-NEXT:    subq %rsi, %rax
+; X64-NEXT:    negq %rax
+; X64-NEXT:    subq %rsi, %rdi
+; X64-NEXT:    cmovgq %rdi, %rax
+; X64-NEXT:    negq %rax
+; X64-NEXT:    retq
+  %aext = sext i64 %a to i128
+  %bext = sext i64 %b to i128
+  %sub = sub i128 %aext, %bext
+  %abs = call i128 @llvm.abs.i128(i128 %sub, i1 false)
+  %nabs = sub i128 0, %abs
+  %trunc = trunc i128 %nabs to i64
+  ret i64 %trunc
+}
+
+define i64 @abd_ext_i64_undef(i64 %a, i64 %b) nounwind {
+; X86-LABEL: abd_ext_i64_undef:
+; X86:       # %bb.0:
+; X86-NEXT:    pushl %edi
+; X86-NEXT:    pushl %esi
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %esi
+; X86-NEXT:    movl %esi, %edi
+; X86-NEXT:    sarl $31, %edi
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movl %ecx, %edx
+; X86-NEXT:    sarl $31, %edx
+; X86-NEXT:    subl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    sbbl %esi, %ecx
+; X86-NEXT:    movl %edx, %esi
+; X86-NEXT:    sbbl %edi, %esi
+; X86-NEXT:    sbbl %edi, %edx
+; X86-NEXT:    sarl $31, %edx
+; X86-NEXT:    xorl %edx, %ecx
+; X86-NEXT:    xorl %edx, %eax
+; X86-NEXT:    subl %edx, %eax
+; X86-NEXT:    sbbl %edx, %ecx
+; X86-NEXT:    xorl %edx, %edx
+; X86-NEXT:    negl %eax
+; X86-NEXT:    sbbl %ecx, %edx
+; X86-NEXT:    popl %esi
+; X86-NEXT:    popl %edi
+; X86-NEXT:    retl
+;
+; X64-LABEL: abd_ext_i64_undef:
+; X64:       # %bb.0:
+; X64-NEXT:    movq %rdi, %rax
+; X64-NEXT:    subq %rsi, %rax
+; X64-NEXT:    negq %rax
+; X64-NEXT:    subq %rsi, %rdi
+; X64-NEXT:    cmovgq %rdi, %rax
+; X64-NEXT:    negq %rax
+; X64-NEXT:    retq
+  %aext = sext i64 %a to i128
+  %bext = sext i64 %b to i128
+  %sub = sub i128 %aext, %bext
+  %abs = call i128 @llvm.abs.i128(i128 %sub, i1 true)
+  %nabs = sub i128 0, %abs
+  %trunc = trunc i128 %nabs to i64
+  ret i64 %trunc
+}
+
+define i128 @abd_ext_i128(i128 %a, i128 %b) nounwind {
+; X86-LABEL: abd_ext_i128:
+; X86:       # %bb.0:
+; X86-NEXT:    pushl %ebp
+; X86-NEXT:    pushl %ebx
+; X86-NEXT:    pushl %edi
+; X86-NEXT:    pushl %esi
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %esi
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ebp
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movl %eax, %edi
+; X86-NEXT:    sarl $31, %edi
+; X86-NEXT:    movl %ebp, %ebx
+; X86-NEXT:    sarl $31, %ebx
+; X86-NEXT:    subl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    sbbl {{[0-9]+}}(%esp), %esi
+; X86-NEXT:    sbbl {{[0-9]+}}(%esp), %edx
+; X86-NEXT:    sbbl %ebp, %eax
+; X86-NEXT:    movl %edi, %ebp
+; X86-NEXT:    sbbl %ebx, %ebp
+; X86-NEXT:    movl %edi, %ebp
+; X86-NEXT:    sbbl %ebx, %ebp
+; X86-NEXT:    movl %edi, %ebp
+; X86-NEXT:    sbbl %ebx, %ebp
+; X86-NEXT:    sbbl %ebx, %edi
+; X86-NEXT:    sarl $31, %edi
+; X86-NEXT:    xorl %edi, %eax
+; X86-NEXT:    xorl %edi, %edx
+; X86-NEXT:    xorl %edi, %esi
+; X86-NEXT:    xorl %edi, %ecx
+; X86-NEXT:    subl %edi, %ecx
+; X86-NEXT:    sbbl %edi, %esi
+; X86-NEXT:    sbbl %edi, %edx
+; X86-NEXT:    sbbl %edi, %eax
+; X86-NEXT:    xorl %edi, %edi
+; X86-NEXT:    negl %ecx
+; X86-NEXT:    movl $0, %ebx
+; X86-NEXT:    sbbl %esi, %ebx
+; X86-NEXT:    movl $0, %esi
+; X86-NEXT:    sbbl %edx, %esi
+; X86-NEXT:    sbbl %eax, %edi
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movl %ecx, (%eax)
+; X86-NEXT:    movl %ebx, 4(%eax)
+; X86-NEXT:    movl %esi, 8(%eax)
+; X86-NEXT:    movl %edi, 12(%eax)
+; X86-NEXT:    popl %esi
+; X86-NEXT:    popl %edi
+; X86-NEXT:    popl %ebx
+; X86-NEXT:    popl %ebp
+; X86-NEXT:    retl $4
+;
+; X64-LABEL: abd_ext_i128:
+; X64:       # %bb.0:
+; X64-NEXT:    movq %rdi, %rax
+; X64-NEXT:    movq %rsi, %rdi
+; X64-NEXT:    sarq $63, %rdi
+; X64-NEXT:    movq %rcx, %r8
+; X64-NEXT:    sarq $63, %r8
+; X64-NEXT:    subq %rdx, %rax
+; X64-NEXT:    sbbq %rcx, %rsi
+; X64-NEXT:    movq %rdi, %rcx
+; X64-NEXT:    sbbq %r8, %rcx
+; X64-NEXT:    sbbq %r8, %rdi
+; X64-NEXT:    sarq $63, %rdi
+; X64-NEXT:    xorq %rdi, %rsi
+; X64-NEXT:    xorq %rdi, %rax
+; X64-NEXT:    subq %rdi, %rax
+; X64-NEXT:    sbbq %rdi, %rsi
+; X64-NEXT:    xorl %edx, %edx
+; X64-NEXT:    negq %rax
+; X64-NEXT:    sbbq %rsi, %rdx
+; X64-NEXT:    retq
+  %aext = sext i128 %a to i256
+  %bext = sext i128 %b to i256
+  %sub = sub i256 %aext, %bext
+  %abs = call i256 @llvm.abs.i256(i256 %sub, i1 false)
+  %nabs = sub i256 0, %abs
+  %trunc = trunc i256 %nabs to i128
+  ret i128 %trunc
+}
+
+define i128 @abd_ext_i128_undef(i128 %a, i128 %b) nounwind {
+; X86-LABEL: abd_ext_i128_undef:
+; X86:       # %bb.0:
+; X86-NEXT:    pushl %ebp
+; X86-NEXT:    pushl %ebx
+; X86-NEXT:    pushl %edi
+; X86-NEXT:    pushl %esi
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %esi
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ebp
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movl %eax, %edi
+; X86-NEXT:    sarl $31, %edi
+; X86-NEXT:    movl %ebp, %ebx
+; X86-NEXT:    sarl $31, %ebx
+; X86-NEXT:    subl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    sbbl {{[0-9]+}}(%esp), %esi
+; X86-NEXT:    sbbl {{[0-9]+}}(%esp), %edx
+; X86-NEXT:    sbbl %ebp, %eax
+; X86-NEXT:    movl %edi, %ebp
+; X86-NEXT:    sbbl %ebx, %ebp
+; X86-NEXT:    movl %edi, %ebp
+; X86-NEXT:    sbbl %ebx, %ebp
+; X86-NEXT:    movl %edi, %ebp
+; X86-NEXT:    sbbl %ebx, %ebp
+; X86-NEXT:    sbbl %ebx, %edi
+; X86-NEXT:    sarl $31, %edi
+; X86-NEXT:    xorl %edi, %eax
+; X86-NEXT:    xorl %edi, %edx
+; X86-NEXT:    xorl %edi, %esi
+; X86-NEXT:    xorl %edi, %ecx
+; X86-NEXT:    subl %edi, %ecx
+; X86-NEXT:    sbbl %edi, %esi
+; X86-NEXT:    sbbl %edi, %edx
+; X86-NEXT:    sbbl %edi, %eax
+; X86-NEXT:    xorl %edi, %edi
+; X86-NEXT:    negl %ecx
+; X86-NEXT:    movl $0, %ebx
+; X86-NEXT:    sbbl %esi, %ebx
+; X86-NEXT:    movl $0, %esi
+; X86-NEXT:    sbbl %edx, %esi
+; X86-NEXT:    sbbl %eax, %edi
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movl %ecx, (%eax)
+; X86-NEXT:    movl %ebx, 4(%eax)
+; X86-NEXT:    movl %esi, 8(%eax)
+; X86-NEXT:    movl %edi, 12(%eax)
+; X86-NEXT:    popl %esi
+; X86-NEXT:    popl %edi
+; X86-NEXT:    popl %ebx
+; X86-NEXT:    popl %ebp
+; X86-NEXT:    retl $4
+;
+; X64-LABEL: abd_ext_i128_undef:
+; X64:       # %bb.0:
+; X64-NEXT:    movq %rdi, %rax
+; X64-NEXT:    movq %rsi, %rdi
+; X64-NEXT:    sarq $63, %rdi
+; X64-NEXT:    movq %rcx, %r8
+; X64-NEXT:    sarq $63, %r8
+; X64-NEXT:    subq %rdx, %rax
+; X64-NEXT:    sbbq %rcx, %rsi
+; X64-NEXT:    movq %rdi, %rcx
+; X64-NEXT:    sbbq %r8, %rcx
+; X64-NEXT:    sbbq %r8, %rdi
+; X64-NEXT:    sarq $63, %rdi
+; X64-NEXT:    xorq %rdi, %rsi
+; X64-NEXT:    xorq %rdi, %rax
+; X64-NEXT:    subq %rdi, %rax
+; X64-NEXT:    sbbq %rdi, %rsi
+; X64-NEXT:    xorl %edx, %edx
+; X64-NEXT:    negq %rax
+; X64-NEXT:    sbbq %rsi, %rdx
+; X64-NEXT:    retq
+  %aext = sext i128 %a to i256
+  %bext = sext i128 %b to i256
+  %sub = sub i256 %aext, %bext
+  %abs = call i256 @llvm.abs.i256(i256 %sub, i1 true)
+  %nabs = sub i256 0, %abs
+  %trunc = trunc i256 %nabs to i128
+  ret i128 %trunc
+}
+
+;
+; sub(smin(a,b),smax(a,b)) -> nabds(a,b)
+;
+
+define i8 @abd_minmax_i8(i8 %a, i8 %b) nounwind {
+; X86-LABEL: abd_minmax_i8:
+; X86:       # %bb.0:
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
+; X86-NEXT:    cmpb %cl, %dl
+; X86-NEXT:    movl %ecx, %eax
+; X86-NEXT:    cmovll %edx, %eax
+; X86-NEXT:    cmovgl %edx, %ecx
+; X86-NEXT:    subb %cl, %al
+; X86-NEXT:    # kill: def $al killed $al killed $eax
+; X86-NEXT:    retl
+;
+; X64-LABEL: abd_minmax_i8:
+; X64:       # %bb.0:
+; X64-NEXT:    cmpb %sil, %dil
+; X64-NEXT:    movl %esi, %eax
+; X64-NEXT:    cmovll %edi, %eax
+; X64-NEXT:    cmovgl %edi, %esi
+; X64-NEXT:    subb %sil, %al
+; X64-NEXT:    # kill: def $al killed $al killed $eax
+; X64-NEXT:    retq
+  %min = call i8 @llvm.smin.i8(i8 %a, i8 %b)
+  %max = call i8 @llvm.smax.i8(i8 %a, i8 %b)
+  %sub = sub i8 %min, %max
+  ret i8 %sub
+}
+
+define i16 @abd_minmax_i16(i16 %a, i16 %b) nounwind {
+; X86-LABEL: abd_minmax_i16:
+; X86:       # %bb.0:
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
+; X86-NEXT:    cmpw %cx, %dx
+; X86-NEXT:    movl %ecx, %eax
+; X86-NEXT:    cmovll %edx, %eax
+; X86-NEXT:    cmovgl %edx, %ecx
+; X86-NEXT:    subl %ecx, %eax
+; X86-NEXT:    # kill: def $ax killed $ax killed $eax
+; X86-NEXT:    retl
+;
+; X64-LABEL: abd_minmax_i16:
+; X64:       # %bb.0:
+; X64-NEXT:    cmpw %si, %di
+; X64-NEXT:    movl %esi, %eax
+; X64-NEXT:    cmovll %edi, %eax
+; X64-NEXT:    cmovgl %edi, %esi
+; X64-NEXT:    subl %esi, %eax
+; X64-NEXT:    # kill: def $ax killed $ax killed $eax
+; X64-NEXT:    retq
+  %min = call i16 @llvm.smin.i16(i16 %a, i16 %b)
+  %max = call i16 @llvm.smax.i16(i16 %a, i16 %b)
+  %sub = sub i16 %min, %max
+  ret i16 %sub
+}
+
+define i32 @abd_minmax_i32(i32 %a, i32 %b) nounwind {
+; X86-LABEL: abd_minmax_i32:
+; X86:       # %bb.0:
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
+; X86-NEXT:    cmpl %ecx, %edx
+; X86-NEXT:    movl %ecx, %eax
+; X86-NEXT:    cmovll %edx, %eax
+; X86-NEXT:    cmovgl %edx, %ecx
+; X86-NEXT:    subl %ecx, %eax
+; X86-NEXT:    retl
+;
+; X64-LABEL: abd_minmax_i32:
+; X64:       # %bb.0:
+; X64-NEXT:    cmpl %esi, %edi
+; X64-NEXT:    movl %esi, %eax
+; X64-NEXT:    cmovll %edi, %eax
+; X64-NEXT:    cmovgl %edi, %esi
+; X64-NEXT:    subl %esi, %eax
+; X64-NEXT:    retq
+  %min = call i32 @llvm.smin.i32(i32 %a, i32 %b)
+  %max = call i32 @llvm.smax.i32(i32 %a, i32 %b)
+  %sub = sub i32 %min, %max
+  ret i32 %sub
+}
+
+define i64 @abd_minmax_i64(i64 %a, i64 %b) nounwind {
+; X86-LABEL: abd_minmax_i64:
+; X86:       # %bb.0:
+; X86-NEXT:    pushl %ebp
+; X86-NEXT:    pushl %ebx
+; X86-NEXT:    pushl %edi
+; X86-NEXT:    pushl %esi
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %esi
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %edi
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ebx
+; X86-NEXT:    cmpl %esi, %edi
+; X86-NEXT:    movl %ebx, %eax
+; X86-NEXT:    sbbl %ecx, %eax
+; X86-NEXT:    movl %ecx, %edx
+; X86-NEXT:    cmovll %ebx, %edx
+; X86-NEXT:    movl %esi, %eax
+; X86-NEXT:    cmovll %edi, %eax
+; X86-NEXT:    cmpl %edi, %esi
+; X86-NEXT:    movl %ecx, %ebp
+; X86-NEXT:    sbbl %ebx, %ebp
+; X86-NEXT:    cmovll %ebx, %ecx
+; X86-NEXT:    cmovll %edi, %esi
+; X86-NEXT:    subl %esi, %eax
+; X86-NEXT:    sbbl %ecx, %edx
+; X86-NEXT:    popl %esi
+; X86-NEXT:    popl %edi
+; X86-NEXT:    popl %ebx
+; X86-NEXT:    popl %ebp
+; X86-NEXT:    retl
+;
+; X64-LABEL: abd_minmax_i64:
+; X64:       # %bb.0:
+; X64-NEXT:    cmpq %rsi, %rdi
+; X64-NEXT:    movq %rsi, %rax
+; X64-NEXT:    cmovlq %rdi, %rax
+; X64-NEXT:    cmovgq %rdi, %rsi
+; X64-NEXT:    subq %rsi, %rax
+; X64-NEXT:    retq
+  %min = call i64 @llvm.smin.i64(i64 %a, i64 %b)
+  %max = call i64 @llvm.smax.i64(i64 %a, i64 %b)
+  %sub = sub i64 %min, %max
+  ret i64 %sub
+}
+
+define i128 @abd_minmax_i128(i128 %a, i128 %b) nounwind {
+; X86-LABEL: abd_minmax_i128:
+; X86:       # %bb.0:
+; X86-NEXT:    pushl %ebp
+; X86-NEXT:    pushl %ebx
+; X86-NEXT:    pushl %edi
+; X86-NEXT:    pushl %esi
+; X86-NEXT:    pushl %eax
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ebp
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ebx
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %esi
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    cmpl %eax, %esi
+; X86-NEXT:    sbbl %ebx, %ecx
+; X86-NEXT:    movl %edx, %ecx
+; X86-NEXT:    sbbl %ebp, %ecx
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %edi
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
+; X86-NEXT:    movl %edx, %ecx
+; X86-NEXT:    sbbl %edi, %ecx
+; X86-NEXT:    movl %edi, %ecx
+; X86-NEXT:    cmovll %edx, %ecx
+; X86-NEXT:    movl %ecx, (%esp) # 4-byte Spill
+; X86-NEXT:    cmovll {{[0-9]+}}(%esp), %ebp
+; X86-NEXT:    movl %ebx, %ecx
+; X86-NEXT:    cmovll {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movl %eax, %edx
+; X86-NEXT:    cmovll %esi, %edx
+; X86-NEXT:    cmpl %esi, %eax
+; X86-NEXT:    movl %ebx, %esi
+; X86-NEXT:    sbbl {{[0-9]+}}(%esp), %esi
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %esi
+; X86-NEXT:    sbbl {{[0-9]+}}(%esp), %esi
+; X86-NEXT:    movl %edi, %esi
+; X86-NEXT:    sbbl {{[0-9]+}}(%esp), %esi
+; X86-NEXT:    cmovll {{[0-9]+}}(%esp), %edi
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %esi
+; X86-NEXT:    cmovll {{[0-9]+}}(%esp), %esi
+; X86-NEXT:    cmovll {{[0-9]+}}(%esp), %ebx
+; X86-NEXT:    cmovll {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    subl %eax, %edx
+; X86-NEXT:    sbbl %ebx, %ecx
+; X86-NEXT:    sbbl %esi, %ebp
+; X86-NEXT:    movl (%esp), %esi # 4-byte Reload
+; X86-NEXT:    sbbl %edi, %esi
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movl %edx, (%eax)
+; X86-NEXT:    movl %ecx, 4(%eax)
+; X86-NEXT:    movl %ebp, 8(%eax)
+; X86-NEXT:    movl %esi, 12(%eax)
+; X86-NEXT:    addl $4, %esp
+; X86-NEXT:    popl %esi
+; X86-NEXT:    popl %edi
+; X86-NEXT:    popl %ebx
+; X86-NEXT:    popl %ebp
+; X86-NEXT:    retl $4
+;
+; X64-LABEL: abd_minmax_i128:
+; X64:       # %bb.0:
+; X64-NEXT:    cmpq %rdx, %rdi
+; X64-NEXT:    movq %rsi, %rax
+; X64-NEXT:    sbbq %rcx, %rax
+; X64-NEXT:    movq %rcx, %r8
+; X64-NEXT:    cmovlq %rsi, %r8
+; X64-NEXT:    movq %rdx, %rax
+; X64-NEXT:    cmovlq %rdi, %rax
+; X64-NEXT:    cmpq %rdi, %rdx
+; X64-NEXT:    movq %rcx, %r9
+; X64-NEXT:    sbbq %rsi, %r9
+; X64-NEXT:    cmovlq %rsi, %rcx
+; X64-NEXT:    cmovlq %rdi, %rdx
+; X64-NEXT:    subq %rdx, %rax
+; X64-NEXT:    sbbq %rcx, %r8
+; X64-NEXT:    movq %r8, %rdx
+; X64-NEXT:    retq
+  %min = call i128 @llvm.smin.i128(i128 %a, i128 %b)
+  %max = call i128 @llvm.smax.i128(i128 %a, i128 %b)
+  %sub = sub i128 %min, %max
+  ret i128 %sub
+}
+
+;
+; select(icmp(a,b),sub(a,b),sub(b,a)) -> nabds(a,b)
+;
+
+define i8 @abd_cmp_i8(i8 %a, i8 %b) nounwind {
+; X86-LABEL: abd_cmp_i8:
+; X86:       # %bb.0:
+; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movl %eax, %edx
+; X86-NEXT:    subb %cl, %dl
+; X86-NEXT:    negb %dl
+; X86-NEXT:    subb %cl, %al
+; X86-NEXT:    movzbl %al, %ecx
+; X86-NEXT:    movzbl %dl, %eax
+; X86-NEXT:    cmovlel %ecx, %eax
+; X86-NEXT:    # kill: def $al killed $al killed $eax
+; X86-NEXT:    retl
+;
+; X64-LABEL: abd_cmp_i8:
+; X64:       # %bb.0:
+; X64-NEXT:    movl %edi, %eax
+; X64-NEXT:    subb %sil, %al
+; X64-NEXT:    negb %al
+; X64-NEXT:    subb %sil, %dil
+; X64-NEXT:    movzbl %dil, %ecx
+; X64-NEXT:    movzbl %al, %eax
+; X64-NEXT:    cmovlel %ecx, %eax
+; X64-NEXT:    # kill: def $al killed $al killed $eax
+; X64-NEXT:    retq
+  %cmp = icmp sle i8 %a, %b
+  %ab = sub i8 %a, %b
+  %ba = sub i8 %b, %a
+  %sel = select i1 %cmp, i8 %ab, i8 %ba
+  ret i8 %sel
+}
+
+define i16 @abd_cmp_i16(i16 %a, i16 %b) nounwind {
+; X86-LABEL: abd_cmp_i16:
+; X86:       # %bb.0:
+; X86-NEXT:    pushl %esi
+; X86-NEXT:    movzwl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movzwl {{[0-9]+}}(%esp), %edx
+; X86-NEXT:    movl %ecx, %esi
+; X86-NEXT:    subw %dx, %si
+; X86-NEXT:    movl %esi, %eax
+; X86-NEXT:    negl %eax
+; X86-NEXT:    cmpw %dx, %cx
+; X86-NEXT:    cmovll %esi, %eax
+; X86-NEXT:    # kill: def $ax killed $ax killed $eax
+; X86-NEXT:    popl %esi
+; X86-NEXT:    retl
+;
+; X64-LABEL: abd_cmp_i16:
+; X64:       # %bb.0:
+; X64-NEXT:    movl %edi, %ecx
+; X64-NEXT:    subw %si, %cx
+; X64-NEXT:    movl %ecx, %eax
+; X64-NEXT:    negl %eax
+; X64-NEXT:    cmpw %si, %di
+; X64-NEXT:    cmovll %ecx, %eax
+; X64-NEXT:    # kill: def $ax killed $ax killed $eax
+; X64-NEXT:    retq
+  %cmp = icmp slt i16 %a, %b
+  %ab = sub i16 %a, %b
+  %ba = sub i16 %b, %a
+  %sel = select i1 %cmp, i16 %ab, i16 %ba
+  ret i16 %sel
+}
+
+define i32 @abd_cmp_i32(i32 %a, i32 %b) nounwind {
+; X86-LABEL: abd_cmp_i32:
+; X86:       # %bb.0:
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movl %eax, %edx
+; X86-NEXT:    subl %ecx, %edx
+; X86-NEXT:    negl %edx
+; X86-NEXT:    subl %ecx, %eax
+; X86-NEXT:    cmovgel %edx, %eax
+; X86-NEXT:    retl
+;
+; X64-LABEL: abd_cmp_i32:
+; X64:       # %bb.0:
+; X64-NEXT:    movl %edi, %eax
+; X64-NEXT:    subl %esi, %eax
+; X64-NEXT:    negl %eax
+; X64-NEXT:    subl %esi, %edi
+; X64-NEXT:    cmovll %edi, %eax
+; X64-NEXT:    retq
+  %cmp = icmp sge i32 %a, %b
+  %ab = sub i32 %a, %b
+  %ba = sub i32 %b, %a
+  %sel = select i1 %cmp, i32 %ba, i32 %ab
+  ret i32 %sel
+}
+
+define i64 @abd_cmp_i64(i64 %a, i64 %b) nounwind {
+; X86-LABEL: abd_cmp_i64:
+; X86:       # %bb.0:
+; X86-NEXT:    pushl %ebx
+; X86-NEXT:    pushl %edi
+; X86-NEXT:    pushl %esi
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %esi
+; X86-NEXT:    movl %ecx, %edi
+; X86-NEXT:    subl %eax, %edi
+; X86-NEXT:    movl %esi, %ebx
+; X86-NEXT:    sbbl %edx, %ebx
+; X86-NEXT:    subl %ecx, %eax
+; X86-NEXT:    sbbl %esi, %edx
+; X86-NEXT:    cmovll %edi, %eax
+; X86-NEXT:    cmovll %ebx, %edx
+; X86-NEXT:    popl %esi
+; X86-NEXT:    popl %edi
+; X86-NEXT:    popl %ebx
+; X86-NEXT:    retl
+;
+; X64-LABEL: abd_cmp_i64:
+; X64:       # %bb.0:
+; X64-NEXT:    movq %rdi, %rax
+; X64-NEXT:    subq %rsi, %rax
+; X64-NEXT:    negq %rax
+; X64-NEXT:    subq %rsi, %rdi
+; X64-NEXT:    cmovgeq %rdi, %rax
+; X64-NEXT:    retq
+  %cmp = icmp slt i64 %a, %b
+  %ab = sub i64 %a, %b
+  %ba = sub i64 %b, %a
+  %sel = select i1 %cmp, i64 %ba, i64 %ab
+  ret i64 %sel
+}
+
+define i128 @abd_cmp_i128(i128 %a, i128 %b) nounwind {
+; X86-LABEL: abd_cmp_i128:
+; X86:       # %bb.0:
+; X86-NEXT:    pushl %ebp
+; X86-NEXT:    pushl %ebx
+; X86-NEXT:    pushl %edi
+; X86-NEXT:    pushl %esi
+; X86-NEXT:    pushl %eax
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %esi
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ebx
+; X86-NEXT:    subl %edx, %eax
+; X86-NEXT:    movl %eax, (%esp) # 4-byte Spill
+; X86-NEXT:    sbbl %esi, %ebx
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ebp
+; X86-NEXT:    sbbl %ecx, %ebp
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %edi
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    sbbl %edi, %eax
+; X86-NEXT:    subl {{[0-9]+}}(%esp), %edx
+; X86-NEXT:    sbbl {{[0-9]+}}(%esp), %esi
+; X86-NEXT:    sbbl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    sbbl {{[0-9]+}}(%esp), %edi
+; X86-NEXT:    cmovll (%esp), %edx # 4-byte Folded Reload
+; X86-NEXT:    cmovll %ebx, %esi
+; X86-NEXT:    cmovll %ebp, %ecx
+; X86-NEXT:    cmovll %eax, %edi
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movl %edi, 12(%eax)
+; X86-NEXT:    movl %ecx, 8(%eax)
+; X86-NEXT:    movl %esi, 4(%eax)
+; X86-NEXT:    movl %edx, (%eax)
+; X86-NEXT:    addl $4, %esp
+; X86-NEXT:    popl %esi
+; X86-NEXT:    popl %edi
+; X86-NEXT:    popl %ebx
+; X86-NEXT:    popl %ebp
+; X86-NEXT:    retl $4
+;
+; X64-LABEL: abd_cmp_i128:
+; X64:       # %bb.0:
+; X64-NEXT:    movq %rdx, %rax
+; X64-NEXT:    subq %rdi, %rax
+; X64-NEXT:    movq %rcx, %r8
+; X64-NEXT:    sbbq %rsi, %r8
+; X64-NEXT:    subq %rdx, %rdi
+; X64-NEXT:    sbbq %rcx, %rsi
+; X64-NEXT:    cmovgeq %rdi, %rax
+; X64-NEXT:    cmovgeq %rsi, %r8
+; X64-NEXT:    movq %r8, %rdx
+; X64-NEXT:    retq
+  %cmp = icmp slt i128 %a, %b
+  %ab = sub i128 %a, %b
+  %ba = sub i128 %b, %a
+  %sel = select i1 %cmp, i128 %ba, i128 %ab
+  ret i128 %sel
+}
+
+;
+; nabs(sub_nsw(x, y)) -> nabds(a,b)
+;
+
+define i8 @abd_subnsw_i8(i8 %a, i8 %b) nounwind {
+; X86-LABEL: abd_subnsw_i8:
+; X86:       # %bb.0:
+; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    subb {{[0-9]+}}(%esp), %cl
+; X86-NEXT:    movl %ecx, %eax
+; X86-NEXT:    sarb $7, %al
+; X86-NEXT:    xorb %al, %cl
+; X86-NEXT:    subb %cl, %al
+; X86-NEXT:    retl
+;
+; X64-LABEL: abd_subnsw_i8:
+; X64:       # %bb.0:
+; X64-NEXT:    subb %sil, %dil
+; X64-NEXT:    movl %edi, %eax
+; X64-NEXT:    sarb $7, %al
+; X64-NEXT:    xorb %al, %dil
+; X64-NEXT:    subb %dil, %al
+; X64-NEXT:    retq
+  %sub = sub nsw i8 %a, %b
+  %abs = call i8 @llvm.abs.i8(i8 %sub, i1 false)
+  %nabs = sub i8 0, %abs
+  ret i8 %nabs
+}
+
+define i8 @abd_subnsw_i8_undef(i8 %a, i8 %b) nounwind {
+; X86-LABEL: abd_subnsw_i8_undef:
+; X86:       # %bb.0:
+; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    subb {{[0-9]+}}(%esp), %cl
+; X86-NEXT:    movl %ecx, %eax
+; X86-NEXT:    sarb $7, %al
+; X86-NEXT:    xorb %al, %cl
+; X86-NEXT:    subb %cl, %al
+; X86-NEXT:    retl
+;
+; X64-LABEL: abd_subnsw_i8_undef:
+; X64:       # %bb.0:
+; X64-NEXT:    subb %sil, %dil
+; X64-NEXT:    movl %edi, %eax
+; X64-NEXT:    sarb $7, %al
+; X64-NEXT:    xorb %al, %dil
+; X64-NEXT:    subb %dil, %al
+; X64-NEXT:    retq
+  %sub = sub nsw i8 %a, %b
+  %abs = call i8 @llvm.abs.i8(i8 %sub, i1 true)
+  %nabs = sub i8 0, %abs
+  ret i8 %nabs
+}
+
+define i16 @abd_subnsw_i16(i16 %a, i16 %b) nounwind {
+; X86-LABEL: abd_subnsw_i16:
+; X86:       # %bb.0:
+; X86-NEXT:    movzwl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    subw {{[0-9]+}}(%esp), %cx
+; X86-NEXT:    movl %ecx, %eax
+; X86-NEXT:    negw %ax
+; X86-NEXT:    cmovnsw %cx, %ax
+; X86-NEXT:    retl
+;
+; X64-LABEL: abd_subnsw_i16:
+; X64:       # %bb.0:
+; X64-NEXT:    subl %esi, %edi
+; X64-NEXT:    movl %edi, %eax
+; X64-NEXT:    negw %ax
+; X64-NEXT:    cmovnsw %di, %ax
+; X64-NEXT:    retq
+  %sub = sub nsw i16 %a, %b
+  %abs = call i16 @llvm.abs.i16(i16 %sub, i1 false)
+  %nabs = sub i16 0, %abs
+  ret i16 %nabs
+}
+
+define i16 @abd_subnsw_i16_undef(i16 %a, i16 %b) nounwind {
+; X86-LABEL: abd_subnsw_i16_undef:
+; X86:       # %bb.0:
+; X86-NEXT:    movzwl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    subw {{[0-9]+}}(%esp), %cx
+; X86-NEXT:    movl %ecx, %eax
+; X86-NEXT:    negw %ax
+; X86-NEXT:    cmovnsw %cx, %ax
+; X86-NEXT:    retl
+;
+; X64-LABEL: abd_subnsw_i16_undef:
+; X64:       # %bb.0:
+; X64-NEXT:    subl %esi, %edi
+; X64-NEXT:    movl %edi, %eax
+; X64-NEXT:    negw %ax
+; X64-NEXT:    cmovnsw %di, %ax
+; X64-NEXT:    retq
+  %sub = sub nsw i16 %a, %b
+  %abs = call i16 @llvm.abs.i16(i16 %sub, i1 true)
+  %nabs = sub i16 0, %abs
+  ret i16 %nabs
+}
+
+define i32 @abd_subnsw_i32(i32 %a, i32 %b) nounwind {
+; X86-LABEL: abd_subnsw_i32:
+; X86:       # %bb.0:
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    subl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movl %ecx, %eax
+; X86-NEXT:    negl %eax
+; X86-NEXT:    cmovnsl %ecx, %eax
+; X86-NEXT:    retl
+;
+; X64-LABEL: abd_subnsw_i32:
+; X64:       # %bb.0:
+; X64-NEXT:    subl %esi, %edi
+; X64-NEXT:    movl %edi, %eax
+; X64-NEXT:    negl %eax
+; X64-NEXT:    cmovnsl %edi, %eax
+; X64-NEXT:    retq
+  %sub = sub nsw i32 %a, %b
+  %abs = call i32 @llvm.abs.i32(i32 %sub, i1 false)
+  %nabs = sub i32 0, %abs
+  ret i32 %nabs
+}
+
+define i32 @abd_subnsw_i32_undef(i32 %a, i32 %b) nounwind {
+; X86-LABEL: abd_subnsw_i32_undef:
+; X86:       # %bb.0:
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    subl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movl %ecx, %eax
+; X86-NEXT:    negl %eax
+; X86-NEXT:    cmovnsl %ecx, %eax
+; X86-NEXT:    retl
+;
+; X64-LABEL: abd_subnsw_i32_undef:
+; X64:       # %bb.0:
+; X64-NEXT:    subl %esi, %edi
+; X64-NEXT:    movl %edi, %eax
+; X64-NEXT:    negl %eax
+; X64-NEXT:    cmovnsl %edi, %eax
+; X64-NEXT:    retq
+  %sub = sub nsw i32 %a, %b
+  %abs = call i32 @llvm.abs.i32(i32 %sub, i1 true)
+  %nabs = sub i32 0, %abs
+  ret i32 %nabs
+}
+
+define i64 @abd_subnsw_i64(i64 %a, i64 %b) nounwind {
+; X86-LABEL: abd_subnsw_i64:
+; X86:       # %bb.0:
+; X86-NEXT:    pushl %esi
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %esi
+; X86-NEXT:    subl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    sbbl {{[0-9]+}}(%esp), %esi
+; X86-NEXT:    movl %esi, %edx
+; X86-NEXT:    sarl $31, %edx
+; X86-NEXT:    xorl %edx, %esi
+; X86-NEXT:    xorl %edx, %ecx
+; X86-NEXT:    movl %edx, %eax
+; X86-NEXT:    subl %ecx, %eax
+; X86-NEXT:    sbbl %esi, %edx
+; X86-NEXT:    popl %esi
+; X86-NEXT:    retl
+;
+; X64-LABEL: abd_subnsw_i64:
+; X64:       # %bb.0:
+; X64-NEXT:    subq %rsi, %rdi
+; X64-NEXT:    movq %rdi, %rax
+; X64-NEXT:    negq %rax
+; X64-NEXT:    cmovnsq %rdi, %rax
+; X64-NEXT:    retq
+  %sub = sub nsw i64 %a, %b
+  %abs = call i64 @llvm.abs.i64(i64 %sub, i1 false)
+  %nabs = sub i64 0, %abs
+  ret i64 %nabs
+}
+
+define i64 @abd_subnsw_i64_undef(i64 %a, i64 %b) nounwind {
+; X86-LABEL: abd_subnsw_i64_undef:
+; X86:       # %bb.0:
+; X86-NEXT:    pushl %esi
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %esi
+; X86-NEXT:    subl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    sbbl {{[0-9]+}}(%esp), %esi
+; X86-NEXT:    movl %esi, %edx
+; X86-NEXT:    sarl $31, %edx
+; X86-NEXT:    xorl %edx, %esi
+; X86-NEXT:    xorl %edx, %ecx
+; X86-NEXT:    movl %edx, %eax
+; X86-NEXT:    subl %ecx, %eax
+; X86-NEXT:    sbbl %esi, %edx
+; X86-NEXT:    popl %esi
+; X86-NEXT:    retl
+;
+; X64-LABEL: abd_subnsw_i64_undef:
+; X64:       # %bb.0:
+; X64-NEXT:    subq %rsi, %rdi
+; X64-NEXT:    movq %rdi, %rax
+; X64-NEXT:    negq %rax
+; X64-NEXT:    cmovnsq %rdi, %rax
+; X64-NEXT:    retq
+  %sub = sub nsw i64 %a, %b
+  %abs = call i64 @llvm.abs.i64(i64 %sub, i1 true)
+  %nabs = sub i64 0, %abs
+  ret i64 %nabs
+}
+
+define i128 @abd_subnsw_i128(i128 %a, i128 %b) nounwind {
+; X86-LABEL: abd_subnsw_i128:
+; X86:       # %bb.0:
+; X86-NEXT:    pushl %ebp
+; X86-NEXT:    pushl %ebx
+; X86-NEXT:    pushl %edi
+; X86-NEXT:    pushl %esi
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %edi
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %esi
+; X86-NEXT:    subl {{[0-9]+}}(%esp), %edi
+; X86-NEXT:    sbbl {{[0-9]+}}(%esp), %esi
+; X86-NEXT:    sbbl {{[0-9]+}}(%esp), %edx
+; X86-NEXT:    sbbl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movl %ecx, %ebx
+; X86-NEXT:    sarl $31, %ebx
+; X86-NEXT:    xorl %ebx, %ecx
+; X86-NEXT:    xorl %ebx, %edx
+; X86-NEXT:    xorl %ebx, %esi
+; X86-NEXT:    xorl %ebx, %edi
+; X86-NEXT:    movl %ebx, %ebp
+; X86-NEXT:    subl %edi, %ebp
+; X86-NEXT:    movl %ebx, %edi
+; X86-NEXT:    sbbl %esi, %edi
+; X86-NEXT:    movl %ebx, %esi
+; X86-NEXT:    sbbl %edx, %esi
+; X86-NEXT:    sbbl %ecx, %ebx
+; X86-NEXT:    movl %ebp, (%eax)
+; X86-NEXT:    movl %edi, 4(%eax)
+; X86-NEXT:    movl %esi, 8(%eax)
+; X86-NEXT:    movl %ebx, 12(%eax)
+; X86-NEXT:    popl %esi
+; X86-NEXT:    popl %edi
+; X86-NEXT:    popl %ebx
+; X86-NEXT:    popl %ebp
+; X86-NEXT:    retl $4
+;
+; X64-LABEL: abd_subnsw_i128:
+; X64:       # %bb.0:
+; X64-NEXT:    subq %rdx, %rdi
+; X64-NEXT:    sbbq %rcx, %rsi
+; X64-NEXT:    movq %rsi, %rdx
+; X64-NEXT:    sarq $63, %rdx
+; X64-NEXT:    xorq %rdx, %rsi
+; X64-NEXT:    xorq %rdx, %rdi
+; X64-NEXT:    movq %rdx, %rax
+; X64-NEXT:    subq %rdi, %rax
+; X64-NEXT:    sbbq %rsi, %rdx
+; X64-NEXT:    retq
+  %sub = sub nsw i128 %a, %b
+  %abs = call i128 @llvm.abs.i128(i128 %sub, i1 false)
+  %nabs = sub i128 0, %abs
+  ret i128 %nabs
+}
+
+define i128 @abd_subnsw_i128_undef(i128 %a, i128 %b) nounwind {
+; X86-LABEL: abd_subnsw_i128_undef:
+; X86:       # %bb.0:
+; X86-NEXT:    pushl %ebp
+; X86-NEXT:    pushl %ebx
+; X86-NEXT:    pushl %edi
+; X86-NEXT:    pushl %esi
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %edi
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %esi
+; X86-NEXT:    subl {{[0-9]+}}(%esp), %edi
+; X86-NEXT:    sbbl {{[0-9]+}}(%esp), %esi
+; X86-NEXT:    sbbl {{[0-9]+}}(%esp), %edx
+; X86-NEXT:    sbbl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movl %ecx, %ebx
+; X86-NEXT:    sarl $31, %ebx
+; X86-NEXT:    xorl %ebx, %ecx
+; X86-NEXT:    xorl %ebx, %edx
+; X86-NEXT:    xorl %ebx, %esi
+; X86-NEXT:    xorl %ebx, %edi
+; X86-NEXT:    movl %ebx, %ebp
+; X86-NEXT:    subl %edi, %ebp
+; X86-NEXT:    movl %ebx, %edi
+; X86-NEXT:    sbbl %esi, %edi
+; X86-NEXT:    movl %ebx, %esi
+; X86-NEXT:    sbbl %edx, %esi
+; X86-NEXT:    sbbl %ecx, %ebx
+; X86-NEXT:    movl %ebp, (%eax)
+; X86-NEXT:    movl %edi, 4(%eax)
+; X86-NEXT:    movl %esi, 8(%eax)
+; X86-NEXT:    movl %ebx, 12(%eax)
+; X86-NEXT:    popl %esi
+; X86-NEXT:    popl %edi
+; X86-NEXT:    popl %ebx
+; X86-NEXT:    popl %ebp
+; X86-NEXT:    retl $4
+;
+; X64-LABEL: abd_subnsw_i128_undef:
+; X64:       # %bb.0:
+; X64-NEXT:    subq %rdx, %rdi
+; X64-NEXT:    sbbq %rcx, %rsi
+; X64-NEXT:    movq %rsi, %rdx
+; X64-NEXT:    sarq $63, %rdx
+; X64-NEXT:    xorq %rdx, %rsi
+; X64-NEXT:    xorq %rdx, %rdi
+; X64-NEXT:    movq %rdx, %rax
+; X64-NEXT:    subq %rdi, %rax
+; X64-NEXT:    sbbq %rsi, %rdx
+; X64-NEXT:    retq
+  %sub = sub nsw i128 %a, %b
+  %abs = call i128 @llvm.abs.i128(i128 %sub, i1 true)
+  %nabs = sub i128 0, %abs
+  ret i128 %nabs
+}
+
+declare i8 @llvm.abs.i8(i8, i1)
+declare i16 @llvm.abs.i16(i16, i1)
+declare i32 @llvm.abs.i32(i32, i1)
+declare i64 @llvm.abs.i64(i64, i1)
+declare i128 @llvm.abs.i128(i128, i1)
+
+declare i8 @llvm.smax.i8(i8, i8)
+declare i16 @llvm.smax.i16(i16, i16)
+declare i32 @llvm.smax.i32(i32, i32)
+declare i64 @llvm.smax.i64(i64, i64)
+
+declare i8 @llvm.smin.i8(i8, i8)
+declare i16 @llvm.smin.i16(i16, i16)
+declare i32 @llvm.smin.i32(i32, i32)
+declare i64 @llvm.smin.i64(i64, i64)

diff  --git a/llvm/test/CodeGen/X86/abdu-neg.ll b/llvm/test/CodeGen/X86/abdu-neg.ll
new file mode 100644
index 0000000000000..48a8575acfacf
--- /dev/null
+++ b/llvm/test/CodeGen/X86/abdu-neg.ll
@@ -0,0 +1,958 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=i686 -mattr=cmov | FileCheck %s --check-prefixes=X86
+; RUN: llc < %s -mtriple=x86_64-linux | FileCheck %s --check-prefixes=X64
+
+;
+; trunc(nabs(sub(zext(a),zext(b)))) -> nabds(a,b)
+;
+
+define i8 @abd_ext_i8(i8 %a, i8 %b) nounwind {
+; X86-LABEL: abd_ext_i8:
+; X86:       # %bb.0:
+; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    subl %eax, %ecx
+; X86-NEXT:    movl %ecx, %eax
+; X86-NEXT:    negl %eax
+; X86-NEXT:    cmovsl %ecx, %eax
+; X86-NEXT:    negb %al
+; X86-NEXT:    # kill: def $al killed $al killed $eax
+; X86-NEXT:    retl
+;
+; X64-LABEL: abd_ext_i8:
+; X64:       # %bb.0:
+; X64-NEXT:    movzbl %sil, %eax
+; X64-NEXT:    movzbl %dil, %ecx
+; X64-NEXT:    subl %eax, %ecx
+; X64-NEXT:    movl %ecx, %eax
+; X64-NEXT:    negl %eax
+; X64-NEXT:    cmovsl %ecx, %eax
+; X64-NEXT:    negb %al
+; X64-NEXT:    # kill: def $al killed $al killed $eax
+; X64-NEXT:    retq
+  %aext = zext i8 %a to i64
+  %bext = zext i8 %b to i64
+  %sub = sub i64 %aext, %bext
+  %abs = call i64 @llvm.abs.i64(i64 %sub, i1 false)
+  %nabs = sub i64 0, %abs
+  %trunc = trunc i64 %nabs to i8
+  ret i8 %trunc
+}
+
+define i8 @abd_ext_i8_i16(i8 %a, i16 %b) nounwind {
+; X86-LABEL: abd_ext_i8_i16:
+; X86:       # %bb.0:
+; X86-NEXT:    movzwl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    subl %eax, %ecx
+; X86-NEXT:    movl %ecx, %eax
+; X86-NEXT:    negl %eax
+; X86-NEXT:    cmovsl %ecx, %eax
+; X86-NEXT:    negb %al
+; X86-NEXT:    # kill: def $al killed $al killed $eax
+; X86-NEXT:    retl
+;
+; X64-LABEL: abd_ext_i8_i16:
+; X64:       # %bb.0:
+; X64-NEXT:    movzwl %si, %eax
+; X64-NEXT:    movzbl %dil, %ecx
+; X64-NEXT:    subl %eax, %ecx
+; X64-NEXT:    movl %ecx, %eax
+; X64-NEXT:    negl %eax
+; X64-NEXT:    cmovsl %ecx, %eax
+; X64-NEXT:    negb %al
+; X64-NEXT:    # kill: def $al killed $al killed $eax
+; X64-NEXT:    retq
+  %aext = zext i8 %a to i64
+  %bext = zext i16 %b to i64
+  %sub = sub i64 %aext, %bext
+  %abs = call i64 @llvm.abs.i64(i64 %sub, i1 false)
+  %nabs = sub i64 0, %abs
+  %trunc = trunc i64 %nabs to i8
+  ret i8 %trunc
+}
+
+define i8 @abd_ext_i8_undef(i8 %a, i8 %b) nounwind {
+; X86-LABEL: abd_ext_i8_undef:
+; X86:       # %bb.0:
+; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    subl %eax, %ecx
+; X86-NEXT:    movl %ecx, %eax
+; X86-NEXT:    negl %eax
+; X86-NEXT:    cmovsl %ecx, %eax
+; X86-NEXT:    negb %al
+; X86-NEXT:    # kill: def $al killed $al killed $eax
+; X86-NEXT:    retl
+;
+; X64-LABEL: abd_ext_i8_undef:
+; X64:       # %bb.0:
+; X64-NEXT:    movzbl %sil, %eax
+; X64-NEXT:    movzbl %dil, %ecx
+; X64-NEXT:    subl %eax, %ecx
+; X64-NEXT:    movl %ecx, %eax
+; X64-NEXT:    negl %eax
+; X64-NEXT:    cmovsl %ecx, %eax
+; X64-NEXT:    negb %al
+; X64-NEXT:    # kill: def $al killed $al killed $eax
+; X64-NEXT:    retq
+  %aext = zext i8 %a to i64
+  %bext = zext i8 %b to i64
+  %sub = sub i64 %aext, %bext
+  %abs = call i64 @llvm.abs.i64(i64 %sub, i1 true)
+  %nabs = sub i64 0, %abs
+  %trunc = trunc i64 %nabs to i8
+  ret i8 %trunc
+}
+
+define i16 @abd_ext_i16(i16 %a, i16 %b) nounwind {
+; X86-LABEL: abd_ext_i16:
+; X86:       # %bb.0:
+; X86-NEXT:    movzwl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movzwl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    subl %eax, %ecx
+; X86-NEXT:    movl %ecx, %eax
+; X86-NEXT:    negl %eax
+; X86-NEXT:    cmovnsl %ecx, %eax
+; X86-NEXT:    # kill: def $ax killed $ax killed $eax
+; X86-NEXT:    retl
+;
+; X64-LABEL: abd_ext_i16:
+; X64:       # %bb.0:
+; X64-NEXT:    movzwl %si, %eax
+; X64-NEXT:    movzwl %di, %ecx
+; X64-NEXT:    subl %eax, %ecx
+; X64-NEXT:    movl %ecx, %eax
+; X64-NEXT:    negl %eax
+; X64-NEXT:    cmovnsl %ecx, %eax
+; X64-NEXT:    # kill: def $ax killed $ax killed $eax
+; X64-NEXT:    retq
+  %aext = zext i16 %a to i64
+  %bext = zext i16 %b to i64
+  %sub = sub i64 %aext, %bext
+  %abs = call i64 @llvm.abs.i64(i64 %sub, i1 false)
+  %nabs = sub i64 0, %abs
+  %trunc = trunc i64 %nabs to i16
+  ret i16 %trunc
+}
+
+define i16 @abd_ext_i16_i32(i16 %a, i32 %b) nounwind {
+; X86-LABEL: abd_ext_i16_i32:
+; X86:       # %bb.0:
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movzwl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movl %eax, %edx
+; X86-NEXT:    subl %ecx, %edx
+; X86-NEXT:    negl %edx
+; X86-NEXT:    subl %ecx, %eax
+; X86-NEXT:    cmovbel %edx, %eax
+; X86-NEXT:    negl %eax
+; X86-NEXT:    # kill: def $ax killed $ax killed $eax
+; X86-NEXT:    retl
+;
+; X64-LABEL: abd_ext_i16_i32:
+; X64:       # %bb.0:
+; X64-NEXT:    movl %esi, %eax
+; X64-NEXT:    movzwl %di, %ecx
+; X64-NEXT:    subq %rax, %rcx
+; X64-NEXT:    movq %rcx, %rax
+; X64-NEXT:    negq %rax
+; X64-NEXT:    cmovsq %rcx, %rax
+; X64-NEXT:    negl %eax
+; X64-NEXT:    # kill: def $ax killed $ax killed $rax
+; X64-NEXT:    retq
+  %aext = zext i16 %a to i64
+  %bext = zext i32 %b to i64
+  %sub = sub i64 %aext, %bext
+  %abs = call i64 @llvm.abs.i64(i64 %sub, i1 false)
+  %nabs = sub i64 0, %abs
+  %trunc = trunc i64 %nabs to i16
+  ret i16 %trunc
+}
+
+define i16 @abd_ext_i16_undef(i16 %a, i16 %b) nounwind {
+; X86-LABEL: abd_ext_i16_undef:
+; X86:       # %bb.0:
+; X86-NEXT:    movzwl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movzwl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    subl %eax, %ecx
+; X86-NEXT:    movl %ecx, %eax
+; X86-NEXT:    negl %eax
+; X86-NEXT:    cmovnsl %ecx, %eax
+; X86-NEXT:    # kill: def $ax killed $ax killed $eax
+; X86-NEXT:    retl
+;
+; X64-LABEL: abd_ext_i16_undef:
+; X64:       # %bb.0:
+; X64-NEXT:    movzwl %si, %eax
+; X64-NEXT:    movzwl %di, %ecx
+; X64-NEXT:    subl %eax, %ecx
+; X64-NEXT:    movl %ecx, %eax
+; X64-NEXT:    negl %eax
+; X64-NEXT:    cmovnsl %ecx, %eax
+; X64-NEXT:    # kill: def $ax killed $ax killed $eax
+; X64-NEXT:    retq
+  %aext = zext i16 %a to i64
+  %bext = zext i16 %b to i64
+  %sub = sub i64 %aext, %bext
+  %abs = call i64 @llvm.abs.i64(i64 %sub, i1 true)
+  %nabs = sub i64 0, %abs
+  %trunc = trunc i64 %nabs to i16
+  ret i16 %trunc
+}
+
+define i32 @abd_ext_i32(i32 %a, i32 %b) nounwind {
+; X86-LABEL: abd_ext_i32:
+; X86:       # %bb.0:
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movl %eax, %edx
+; X86-NEXT:    subl %ecx, %edx
+; X86-NEXT:    negl %edx
+; X86-NEXT:    subl %ecx, %eax
+; X86-NEXT:    cmovbel %edx, %eax
+; X86-NEXT:    negl %eax
+; X86-NEXT:    retl
+;
+; X64-LABEL: abd_ext_i32:
+; X64:       # %bb.0:
+; X64-NEXT:    movl %esi, %eax
+; X64-NEXT:    movl %edi, %ecx
+; X64-NEXT:    subq %rax, %rcx
+; X64-NEXT:    movq %rcx, %rax
+; X64-NEXT:    negq %rax
+; X64-NEXT:    cmovsq %rcx, %rax
+; X64-NEXT:    negl %eax
+; X64-NEXT:    # kill: def $eax killed $eax killed $rax
+; X64-NEXT:    retq
+  %aext = zext i32 %a to i64
+  %bext = zext i32 %b to i64
+  %sub = sub i64 %aext, %bext
+  %abs = call i64 @llvm.abs.i64(i64 %sub, i1 false)
+  %nabs = sub i64 0, %abs
+  %trunc = trunc i64 %nabs to i32
+  ret i32 %trunc
+}
+
+define i32 @abd_ext_i32_i16(i32 %a, i16 %b) nounwind {
+; X86-LABEL: abd_ext_i32_i16:
+; X86:       # %bb.0:
+; X86-NEXT:    movzwl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movl %eax, %edx
+; X86-NEXT:    subl %ecx, %edx
+; X86-NEXT:    negl %edx
+; X86-NEXT:    subl %ecx, %eax
+; X86-NEXT:    cmovbel %edx, %eax
+; X86-NEXT:    negl %eax
+; X86-NEXT:    retl
+;
+; X64-LABEL: abd_ext_i32_i16:
+; X64:       # %bb.0:
+; X64-NEXT:    movl %edi, %ecx
+; X64-NEXT:    movzwl %si, %eax
+; X64-NEXT:    subq %rax, %rcx
+; X64-NEXT:    movq %rcx, %rax
+; X64-NEXT:    negq %rax
+; X64-NEXT:    cmovsq %rcx, %rax
+; X64-NEXT:    negl %eax
+; X64-NEXT:    # kill: def $eax killed $eax killed $rax
+; X64-NEXT:    retq
+  %aext = zext i32 %a to i64
+  %bext = zext i16 %b to i64
+  %sub = sub i64 %aext, %bext
+  %abs = call i64 @llvm.abs.i64(i64 %sub, i1 false)
+  %nabs = sub i64 0, %abs
+  %trunc = trunc i64 %nabs to i32
+  ret i32 %trunc
+}
+
+define i32 @abd_ext_i32_undef(i32 %a, i32 %b) nounwind {
+; X86-LABEL: abd_ext_i32_undef:
+; X86:       # %bb.0:
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movl %eax, %edx
+; X86-NEXT:    subl %ecx, %edx
+; X86-NEXT:    negl %edx
+; X86-NEXT:    subl %ecx, %eax
+; X86-NEXT:    cmovbel %edx, %eax
+; X86-NEXT:    negl %eax
+; X86-NEXT:    retl
+;
+; X64-LABEL: abd_ext_i32_undef:
+; X64:       # %bb.0:
+; X64-NEXT:    movl %esi, %eax
+; X64-NEXT:    movl %edi, %ecx
+; X64-NEXT:    subq %rax, %rcx
+; X64-NEXT:    movq %rcx, %rax
+; X64-NEXT:    negq %rax
+; X64-NEXT:    cmovsq %rcx, %rax
+; X64-NEXT:    negl %eax
+; X64-NEXT:    # kill: def $eax killed $eax killed $rax
+; X64-NEXT:    retq
+  %aext = zext i32 %a to i64
+  %bext = zext i32 %b to i64
+  %sub = sub i64 %aext, %bext
+  %abs = call i64 @llvm.abs.i64(i64 %sub, i1 true)
+  %nabs = sub i64 0, %abs
+  %trunc = trunc i64 %nabs to i32
+  ret i32 %trunc
+}
+
+define i64 @abd_ext_i64(i64 %a, i64 %b) nounwind {
+; X86-LABEL: abd_ext_i64:
+; X86:       # %bb.0:
+; X86-NEXT:    pushl %esi
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    xorl %edx, %edx
+; X86-NEXT:    subl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    sbbl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movl $0, %esi
+; X86-NEXT:    sbbl %esi, %esi
+; X86-NEXT:    movl $0, %esi
+; X86-NEXT:    sbbl %esi, %esi
+; X86-NEXT:    xorl %esi, %ecx
+; X86-NEXT:    xorl %esi, %eax
+; X86-NEXT:    subl %esi, %eax
+; X86-NEXT:    sbbl %esi, %ecx
+; X86-NEXT:    negl %eax
+; X86-NEXT:    sbbl %ecx, %edx
+; X86-NEXT:    popl %esi
+; X86-NEXT:    retl
+;
+; X64-LABEL: abd_ext_i64:
+; X64:       # %bb.0:
+; X64-NEXT:    movq %rdi, %rax
+; X64-NEXT:    subq %rsi, %rax
+; X64-NEXT:    negq %rax
+; X64-NEXT:    subq %rsi, %rdi
+; X64-NEXT:    cmovaq %rdi, %rax
+; X64-NEXT:    negq %rax
+; X64-NEXT:    retq
+  %aext = zext i64 %a to i128
+  %bext = zext i64 %b to i128
+  %sub = sub i128 %aext, %bext
+  %abs = call i128 @llvm.abs.i128(i128 %sub, i1 false)
+  %nabs = sub i128 0, %abs
+  %trunc = trunc i128 %nabs to i64
+  ret i64 %trunc
+}
+
+define i64 @abd_ext_i64_undef(i64 %a, i64 %b) nounwind {
+; X86-LABEL: abd_ext_i64_undef:
+; X86:       # %bb.0:
+; X86-NEXT:    pushl %esi
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    xorl %edx, %edx
+; X86-NEXT:    subl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    sbbl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movl $0, %esi
+; X86-NEXT:    sbbl %esi, %esi
+; X86-NEXT:    movl $0, %esi
+; X86-NEXT:    sbbl %esi, %esi
+; X86-NEXT:    xorl %esi, %ecx
+; X86-NEXT:    xorl %esi, %eax
+; X86-NEXT:    subl %esi, %eax
+; X86-NEXT:    sbbl %esi, %ecx
+; X86-NEXT:    negl %eax
+; X86-NEXT:    sbbl %ecx, %edx
+; X86-NEXT:    popl %esi
+; X86-NEXT:    retl
+;
+; X64-LABEL: abd_ext_i64_undef:
+; X64:       # %bb.0:
+; X64-NEXT:    movq %rdi, %rax
+; X64-NEXT:    subq %rsi, %rax
+; X64-NEXT:    negq %rax
+; X64-NEXT:    subq %rsi, %rdi
+; X64-NEXT:    cmovaq %rdi, %rax
+; X64-NEXT:    negq %rax
+; X64-NEXT:    retq
+  %aext = zext i64 %a to i128
+  %bext = zext i64 %b to i128
+  %sub = sub i128 %aext, %bext
+  %abs = call i128 @llvm.abs.i128(i128 %sub, i1 true)
+  %nabs = sub i128 0, %abs
+  %trunc = trunc i128 %nabs to i64
+  ret i64 %trunc
+}
+
+define i128 @abd_ext_i128(i128 %a, i128 %b) nounwind {
+; X86-LABEL: abd_ext_i128:
+; X86:       # %bb.0:
+; X86-NEXT:    pushl %ebp
+; X86-NEXT:    pushl %ebx
+; X86-NEXT:    pushl %edi
+; X86-NEXT:    pushl %esi
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %edi
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %esi
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ebx
+; X86-NEXT:    xorl %ecx, %ecx
+; X86-NEXT:    subl {{[0-9]+}}(%esp), %esi
+; X86-NEXT:    sbbl {{[0-9]+}}(%esp), %ebx
+; X86-NEXT:    sbbl {{[0-9]+}}(%esp), %edi
+; X86-NEXT:    sbbl {{[0-9]+}}(%esp), %edx
+; X86-NEXT:    movl $0, %ebp
+; X86-NEXT:    sbbl %ebp, %ebp
+; X86-NEXT:    movl $0, %ebp
+; X86-NEXT:    sbbl %ebp, %ebp
+; X86-NEXT:    movl $0, %ebp
+; X86-NEXT:    sbbl %ebp, %ebp
+; X86-NEXT:    movl $0, %ebp
+; X86-NEXT:    sbbl %ebp, %ebp
+; X86-NEXT:    xorl %ebp, %edx
+; X86-NEXT:    xorl %ebp, %edi
+; X86-NEXT:    xorl %ebp, %ebx
+; X86-NEXT:    xorl %ebp, %esi
+; X86-NEXT:    subl %ebp, %esi
+; X86-NEXT:    sbbl %ebp, %ebx
+; X86-NEXT:    sbbl %ebp, %edi
+; X86-NEXT:    sbbl %ebp, %edx
+; X86-NEXT:    negl %esi
+; X86-NEXT:    movl $0, %ebp
+; X86-NEXT:    sbbl %ebx, %ebp
+; X86-NEXT:    movl $0, %ebx
+; X86-NEXT:    sbbl %edi, %ebx
+; X86-NEXT:    sbbl %edx, %ecx
+; X86-NEXT:    movl %esi, (%eax)
+; X86-NEXT:    movl %ebp, 4(%eax)
+; X86-NEXT:    movl %ebx, 8(%eax)
+; X86-NEXT:    movl %ecx, 12(%eax)
+; X86-NEXT:    popl %esi
+; X86-NEXT:    popl %edi
+; X86-NEXT:    popl %ebx
+; X86-NEXT:    popl %ebp
+; X86-NEXT:    retl $4
+;
+; X64-LABEL: abd_ext_i128:
+; X64:       # %bb.0:
+; X64-NEXT:    movq %rdi, %rax
+; X64-NEXT:    xorl %edi, %edi
+; X64-NEXT:    subq %rdx, %rax
+; X64-NEXT:    sbbq %rcx, %rsi
+; X64-NEXT:    movl $0, %ecx
+; X64-NEXT:    sbbq %rcx, %rcx
+; X64-NEXT:    movl $0, %ecx
+; X64-NEXT:    sbbq %rcx, %rcx
+; X64-NEXT:    xorq %rcx, %rsi
+; X64-NEXT:    xorq %rcx, %rax
+; X64-NEXT:    subq %rcx, %rax
+; X64-NEXT:    sbbq %rcx, %rsi
+; X64-NEXT:    negq %rax
+; X64-NEXT:    sbbq %rsi, %rdi
+; X64-NEXT:    movq %rdi, %rdx
+; X64-NEXT:    retq
+  %aext = zext i128 %a to i256
+  %bext = zext i128 %b to i256
+  %sub = sub i256 %aext, %bext
+  %abs = call i256 @llvm.abs.i256(i256 %sub, i1 false)
+  %nabs = sub i256 0, %abs
+  %trunc = trunc i256 %nabs to i128
+  ret i128 %trunc
+}
+
+define i128 @abd_ext_i128_undef(i128 %a, i128 %b) nounwind {
+; X86-LABEL: abd_ext_i128_undef:
+; X86:       # %bb.0:
+; X86-NEXT:    pushl %ebp
+; X86-NEXT:    pushl %ebx
+; X86-NEXT:    pushl %edi
+; X86-NEXT:    pushl %esi
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %edi
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %esi
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ebx
+; X86-NEXT:    xorl %ecx, %ecx
+; X86-NEXT:    subl {{[0-9]+}}(%esp), %esi
+; X86-NEXT:    sbbl {{[0-9]+}}(%esp), %ebx
+; X86-NEXT:    sbbl {{[0-9]+}}(%esp), %edi
+; X86-NEXT:    sbbl {{[0-9]+}}(%esp), %edx
+; X86-NEXT:    movl $0, %ebp
+; X86-NEXT:    sbbl %ebp, %ebp
+; X86-NEXT:    movl $0, %ebp
+; X86-NEXT:    sbbl %ebp, %ebp
+; X86-NEXT:    movl $0, %ebp
+; X86-NEXT:    sbbl %ebp, %ebp
+; X86-NEXT:    movl $0, %ebp
+; X86-NEXT:    sbbl %ebp, %ebp
+; X86-NEXT:    xorl %ebp, %edx
+; X86-NEXT:    xorl %ebp, %edi
+; X86-NEXT:    xorl %ebp, %ebx
+; X86-NEXT:    xorl %ebp, %esi
+; X86-NEXT:    subl %ebp, %esi
+; X86-NEXT:    sbbl %ebp, %ebx
+; X86-NEXT:    sbbl %ebp, %edi
+; X86-NEXT:    sbbl %ebp, %edx
+; X86-NEXT:    negl %esi
+; X86-NEXT:    movl $0, %ebp
+; X86-NEXT:    sbbl %ebx, %ebp
+; X86-NEXT:    movl $0, %ebx
+; X86-NEXT:    sbbl %edi, %ebx
+; X86-NEXT:    sbbl %edx, %ecx
+; X86-NEXT:    movl %esi, (%eax)
+; X86-NEXT:    movl %ebp, 4(%eax)
+; X86-NEXT:    movl %ebx, 8(%eax)
+; X86-NEXT:    movl %ecx, 12(%eax)
+; X86-NEXT:    popl %esi
+; X86-NEXT:    popl %edi
+; X86-NEXT:    popl %ebx
+; X86-NEXT:    popl %ebp
+; X86-NEXT:    retl $4
+;
+; X64-LABEL: abd_ext_i128_undef:
+; X64:       # %bb.0:
+; X64-NEXT:    movq %rdi, %rax
+; X64-NEXT:    xorl %edi, %edi
+; X64-NEXT:    subq %rdx, %rax
+; X64-NEXT:    sbbq %rcx, %rsi
+; X64-NEXT:    movl $0, %ecx
+; X64-NEXT:    sbbq %rcx, %rcx
+; X64-NEXT:    movl $0, %ecx
+; X64-NEXT:    sbbq %rcx, %rcx
+; X64-NEXT:    xorq %rcx, %rsi
+; X64-NEXT:    xorq %rcx, %rax
+; X64-NEXT:    subq %rcx, %rax
+; X64-NEXT:    sbbq %rcx, %rsi
+; X64-NEXT:    negq %rax
+; X64-NEXT:    sbbq %rsi, %rdi
+; X64-NEXT:    movq %rdi, %rdx
+; X64-NEXT:    retq
+  %aext = zext i128 %a to i256
+  %bext = zext i128 %b to i256
+  %sub = sub i256 %aext, %bext
+  %abs = call i256 @llvm.abs.i256(i256 %sub, i1 true)
+  %nabs = sub i256 0, %abs
+  %trunc = trunc i256 %nabs to i128
+  ret i128 %trunc
+}
+
+;
+; sub(umin(a,b),umax(a,b)) -> nabds(a,b)
+;
+
+define i8 @abd_minmax_i8(i8 %a, i8 %b) nounwind {
+; X86-LABEL: abd_minmax_i8:
+; X86:       # %bb.0:
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
+; X86-NEXT:    cmpb %cl, %dl
+; X86-NEXT:    movl %ecx, %eax
+; X86-NEXT:    cmovbl %edx, %eax
+; X86-NEXT:    cmoval %edx, %ecx
+; X86-NEXT:    subb %cl, %al
+; X86-NEXT:    # kill: def $al killed $al killed $eax
+; X86-NEXT:    retl
+;
+; X64-LABEL: abd_minmax_i8:
+; X64:       # %bb.0:
+; X64-NEXT:    cmpb %sil, %dil
+; X64-NEXT:    movl %esi, %eax
+; X64-NEXT:    cmovbl %edi, %eax
+; X64-NEXT:    cmoval %edi, %esi
+; X64-NEXT:    subb %sil, %al
+; X64-NEXT:    # kill: def $al killed $al killed $eax
+; X64-NEXT:    retq
+  %min = call i8 @llvm.umin.i8(i8 %a, i8 %b)
+  %max = call i8 @llvm.umax.i8(i8 %a, i8 %b)
+  %sub = sub i8 %min, %max
+  ret i8 %sub
+}
+
+define i16 @abd_minmax_i16(i16 %a, i16 %b) nounwind {
+; X86-LABEL: abd_minmax_i16:
+; X86:       # %bb.0:
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
+; X86-NEXT:    cmpw %cx, %dx
+; X86-NEXT:    movl %ecx, %eax
+; X86-NEXT:    cmovbl %edx, %eax
+; X86-NEXT:    cmoval %edx, %ecx
+; X86-NEXT:    subl %ecx, %eax
+; X86-NEXT:    # kill: def $ax killed $ax killed $eax
+; X86-NEXT:    retl
+;
+; X64-LABEL: abd_minmax_i16:
+; X64:       # %bb.0:
+; X64-NEXT:    cmpw %si, %di
+; X64-NEXT:    movl %esi, %eax
+; X64-NEXT:    cmovbl %edi, %eax
+; X64-NEXT:    cmoval %edi, %esi
+; X64-NEXT:    subl %esi, %eax
+; X64-NEXT:    # kill: def $ax killed $ax killed $eax
+; X64-NEXT:    retq
+  %min = call i16 @llvm.umin.i16(i16 %a, i16 %b)
+  %max = call i16 @llvm.umax.i16(i16 %a, i16 %b)
+  %sub = sub i16 %min, %max
+  ret i16 %sub
+}
+
+define i32 @abd_minmax_i32(i32 %a, i32 %b) nounwind {
+; X86-LABEL: abd_minmax_i32:
+; X86:       # %bb.0:
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
+; X86-NEXT:    cmpl %ecx, %edx
+; X86-NEXT:    movl %ecx, %eax
+; X86-NEXT:    cmovbl %edx, %eax
+; X86-NEXT:    cmoval %edx, %ecx
+; X86-NEXT:    subl %ecx, %eax
+; X86-NEXT:    retl
+;
+; X64-LABEL: abd_minmax_i32:
+; X64:       # %bb.0:
+; X64-NEXT:    cmpl %esi, %edi
+; X64-NEXT:    movl %esi, %eax
+; X64-NEXT:    cmovbl %edi, %eax
+; X64-NEXT:    cmoval %edi, %esi
+; X64-NEXT:    subl %esi, %eax
+; X64-NEXT:    retq
+  %min = call i32 @llvm.umin.i32(i32 %a, i32 %b)
+  %max = call i32 @llvm.umax.i32(i32 %a, i32 %b)
+  %sub = sub i32 %min, %max
+  ret i32 %sub
+}
+
+define i64 @abd_minmax_i64(i64 %a, i64 %b) nounwind {
+; X86-LABEL: abd_minmax_i64:
+; X86:       # %bb.0:
+; X86-NEXT:    pushl %ebp
+; X86-NEXT:    pushl %ebx
+; X86-NEXT:    pushl %edi
+; X86-NEXT:    pushl %esi
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %esi
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %edi
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ebx
+; X86-NEXT:    cmpl %esi, %edi
+; X86-NEXT:    movl %ebx, %eax
+; X86-NEXT:    sbbl %ecx, %eax
+; X86-NEXT:    movl %ecx, %edx
+; X86-NEXT:    cmovbl %ebx, %edx
+; X86-NEXT:    movl %esi, %eax
+; X86-NEXT:    cmovbl %edi, %eax
+; X86-NEXT:    cmpl %edi, %esi
+; X86-NEXT:    movl %ecx, %ebp
+; X86-NEXT:    sbbl %ebx, %ebp
+; X86-NEXT:    cmovbl %ebx, %ecx
+; X86-NEXT:    cmovbl %edi, %esi
+; X86-NEXT:    subl %esi, %eax
+; X86-NEXT:    sbbl %ecx, %edx
+; X86-NEXT:    popl %esi
+; X86-NEXT:    popl %edi
+; X86-NEXT:    popl %ebx
+; X86-NEXT:    popl %ebp
+; X86-NEXT:    retl
+;
+; X64-LABEL: abd_minmax_i64:
+; X64:       # %bb.0:
+; X64-NEXT:    cmpq %rsi, %rdi
+; X64-NEXT:    movq %rsi, %rax
+; X64-NEXT:    cmovbq %rdi, %rax
+; X64-NEXT:    cmovaq %rdi, %rsi
+; X64-NEXT:    subq %rsi, %rax
+; X64-NEXT:    retq
+  %min = call i64 @llvm.umin.i64(i64 %a, i64 %b)
+  %max = call i64 @llvm.umax.i64(i64 %a, i64 %b)
+  %sub = sub i64 %min, %max
+  ret i64 %sub
+}
+
+define i128 @abd_minmax_i128(i128 %a, i128 %b) nounwind {
+; X86-LABEL: abd_minmax_i128:
+; X86:       # %bb.0:
+; X86-NEXT:    pushl %ebp
+; X86-NEXT:    pushl %ebx
+; X86-NEXT:    pushl %edi
+; X86-NEXT:    pushl %esi
+; X86-NEXT:    pushl %eax
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ebp
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ebx
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %esi
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    cmpl %eax, %esi
+; X86-NEXT:    sbbl %ebx, %ecx
+; X86-NEXT:    movl %edx, %ecx
+; X86-NEXT:    sbbl %ebp, %ecx
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %edi
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
+; X86-NEXT:    movl %edx, %ecx
+; X86-NEXT:    sbbl %edi, %ecx
+; X86-NEXT:    movl %edi, %ecx
+; X86-NEXT:    cmovbl %edx, %ecx
+; X86-NEXT:    movl %ecx, (%esp) # 4-byte Spill
+; X86-NEXT:    cmovbl {{[0-9]+}}(%esp), %ebp
+; X86-NEXT:    movl %ebx, %ecx
+; X86-NEXT:    cmovbl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movl %eax, %edx
+; X86-NEXT:    cmovbl %esi, %edx
+; X86-NEXT:    cmpl %esi, %eax
+; X86-NEXT:    movl %ebx, %esi
+; X86-NEXT:    sbbl {{[0-9]+}}(%esp), %esi
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %esi
+; X86-NEXT:    sbbl {{[0-9]+}}(%esp), %esi
+; X86-NEXT:    movl %edi, %esi
+; X86-NEXT:    sbbl {{[0-9]+}}(%esp), %esi
+; X86-NEXT:    cmovbl {{[0-9]+}}(%esp), %edi
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %esi
+; X86-NEXT:    cmovbl {{[0-9]+}}(%esp), %esi
+; X86-NEXT:    cmovbl {{[0-9]+}}(%esp), %ebx
+; X86-NEXT:    cmovbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    subl %eax, %edx
+; X86-NEXT:    sbbl %ebx, %ecx
+; X86-NEXT:    sbbl %esi, %ebp
+; X86-NEXT:    movl (%esp), %esi # 4-byte Reload
+; X86-NEXT:    sbbl %edi, %esi
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movl %edx, (%eax)
+; X86-NEXT:    movl %ecx, 4(%eax)
+; X86-NEXT:    movl %ebp, 8(%eax)
+; X86-NEXT:    movl %esi, 12(%eax)
+; X86-NEXT:    addl $4, %esp
+; X86-NEXT:    popl %esi
+; X86-NEXT:    popl %edi
+; X86-NEXT:    popl %ebx
+; X86-NEXT:    popl %ebp
+; X86-NEXT:    retl $4
+;
+; X64-LABEL: abd_minmax_i128:
+; X64:       # %bb.0:
+; X64-NEXT:    cmpq %rdx, %rdi
+; X64-NEXT:    movq %rsi, %rax
+; X64-NEXT:    sbbq %rcx, %rax
+; X64-NEXT:    movq %rcx, %r8
+; X64-NEXT:    cmovbq %rsi, %r8
+; X64-NEXT:    movq %rdx, %rax
+; X64-NEXT:    cmovbq %rdi, %rax
+; X64-NEXT:    cmpq %rdi, %rdx
+; X64-NEXT:    movq %rcx, %r9
+; X64-NEXT:    sbbq %rsi, %r9
+; X64-NEXT:    cmovbq %rsi, %rcx
+; X64-NEXT:    cmovbq %rdi, %rdx
+; X64-NEXT:    subq %rdx, %rax
+; X64-NEXT:    sbbq %rcx, %r8
+; X64-NEXT:    movq %r8, %rdx
+; X64-NEXT:    retq
+  %min = call i128 @llvm.umin.i128(i128 %a, i128 %b)
+  %max = call i128 @llvm.umax.i128(i128 %a, i128 %b)
+  %sub = sub i128 %min, %max
+  ret i128 %sub
+}
+
+;
+; select(icmp(a,b),sub(a,b),sub(b,a)) -> nabds(a,b)
+;
+
+define i8 @abd_cmp_i8(i8 %a, i8 %b) nounwind {
+; X86-LABEL: abd_cmp_i8:
+; X86:       # %bb.0:
+; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movl %eax, %edx
+; X86-NEXT:    subb %cl, %dl
+; X86-NEXT:    negb %dl
+; X86-NEXT:    subb %cl, %al
+; X86-NEXT:    movzbl %al, %ecx
+; X86-NEXT:    movzbl %dl, %eax
+; X86-NEXT:    cmovbel %ecx, %eax
+; X86-NEXT:    # kill: def $al killed $al killed $eax
+; X86-NEXT:    retl
+;
+; X64-LABEL: abd_cmp_i8:
+; X64:       # %bb.0:
+; X64-NEXT:    movl %edi, %eax
+; X64-NEXT:    subb %sil, %al
+; X64-NEXT:    negb %al
+; X64-NEXT:    subb %sil, %dil
+; X64-NEXT:    movzbl %dil, %ecx
+; X64-NEXT:    movzbl %al, %eax
+; X64-NEXT:    cmovbel %ecx, %eax
+; X64-NEXT:    # kill: def $al killed $al killed $eax
+; X64-NEXT:    retq
+  %cmp = icmp ule i8 %a, %b
+  %ab = sub i8 %a, %b
+  %ba = sub i8 %b, %a
+  %sel = select i1 %cmp, i8 %ab, i8 %ba
+  ret i8 %sel
+}
+
+define i16 @abd_cmp_i16(i16 %a, i16 %b) nounwind {
+; X86-LABEL: abd_cmp_i16:
+; X86:       # %bb.0:
+; X86-NEXT:    pushl %esi
+; X86-NEXT:    movzwl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movzwl {{[0-9]+}}(%esp), %edx
+; X86-NEXT:    movl %ecx, %esi
+; X86-NEXT:    subw %dx, %si
+; X86-NEXT:    movl %esi, %eax
+; X86-NEXT:    negl %eax
+; X86-NEXT:    cmpw %dx, %cx
+; X86-NEXT:    cmovbl %esi, %eax
+; X86-NEXT:    # kill: def $ax killed $ax killed $eax
+; X86-NEXT:    popl %esi
+; X86-NEXT:    retl
+;
+; X64-LABEL: abd_cmp_i16:
+; X64:       # %bb.0:
+; X64-NEXT:    movl %edi, %ecx
+; X64-NEXT:    subw %si, %cx
+; X64-NEXT:    movl %ecx, %eax
+; X64-NEXT:    negl %eax
+; X64-NEXT:    cmpw %si, %di
+; X64-NEXT:    cmovbl %ecx, %eax
+; X64-NEXT:    # kill: def $ax killed $ax killed $eax
+; X64-NEXT:    retq
+  %cmp = icmp ult i16 %a, %b
+  %ab = sub i16 %a, %b
+  %ba = sub i16 %b, %a
+  %sel = select i1 %cmp, i16 %ab, i16 %ba
+  ret i16 %sel
+}
+
+define i32 @abd_cmp_i32(i32 %a, i32 %b) nounwind {
+; X86-LABEL: abd_cmp_i32:
+; X86:       # %bb.0:
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movl %eax, %edx
+; X86-NEXT:    subl %ecx, %edx
+; X86-NEXT:    negl %edx
+; X86-NEXT:    subl %ecx, %eax
+; X86-NEXT:    cmovael %edx, %eax
+; X86-NEXT:    retl
+;
+; X64-LABEL: abd_cmp_i32:
+; X64:       # %bb.0:
+; X64-NEXT:    movl %edi, %eax
+; X64-NEXT:    subl %esi, %eax
+; X64-NEXT:    negl %eax
+; X64-NEXT:    subl %esi, %edi
+; X64-NEXT:    cmovbl %edi, %eax
+; X64-NEXT:    retq
+  %cmp = icmp uge i32 %a, %b
+  %ab = sub i32 %a, %b
+  %ba = sub i32 %b, %a
+  %sel = select i1 %cmp, i32 %ba, i32 %ab
+  ret i32 %sel
+}
+
+define i64 @abd_cmp_i64(i64 %a, i64 %b) nounwind {
+; X86-LABEL: abd_cmp_i64:
+; X86:       # %bb.0:
+; X86-NEXT:    pushl %ebx
+; X86-NEXT:    pushl %edi
+; X86-NEXT:    pushl %esi
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %esi
+; X86-NEXT:    movl %ecx, %edi
+; X86-NEXT:    subl %eax, %edi
+; X86-NEXT:    movl %esi, %ebx
+; X86-NEXT:    sbbl %edx, %ebx
+; X86-NEXT:    subl %ecx, %eax
+; X86-NEXT:    sbbl %esi, %edx
+; X86-NEXT:    cmovbl %edi, %eax
+; X86-NEXT:    cmovbl %ebx, %edx
+; X86-NEXT:    popl %esi
+; X86-NEXT:    popl %edi
+; X86-NEXT:    popl %ebx
+; X86-NEXT:    retl
+;
+; X64-LABEL: abd_cmp_i64:
+; X64:       # %bb.0:
+; X64-NEXT:    movq %rdi, %rax
+; X64-NEXT:    subq %rsi, %rax
+; X64-NEXT:    negq %rax
+; X64-NEXT:    subq %rsi, %rdi
+; X64-NEXT:    cmovaeq %rdi, %rax
+; X64-NEXT:    retq
+  %cmp = icmp ult i64 %a, %b
+  %ab = sub i64 %a, %b
+  %ba = sub i64 %b, %a
+  %sel = select i1 %cmp, i64 %ba, i64 %ab
+  ret i64 %sel
+}
+
+define i128 @abd_cmp_i128(i128 %a, i128 %b) nounwind {
+; X86-LABEL: abd_cmp_i128:
+; X86:       # %bb.0:
+; X86-NEXT:    pushl %ebp
+; X86-NEXT:    pushl %ebx
+; X86-NEXT:    pushl %edi
+; X86-NEXT:    pushl %esi
+; X86-NEXT:    pushl %eax
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %esi
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ebx
+; X86-NEXT:    subl %edx, %eax
+; X86-NEXT:    movl %eax, (%esp) # 4-byte Spill
+; X86-NEXT:    sbbl %esi, %ebx
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ebp
+; X86-NEXT:    sbbl %ecx, %ebp
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %edi
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    sbbl %edi, %eax
+; X86-NEXT:    subl {{[0-9]+}}(%esp), %edx
+; X86-NEXT:    sbbl {{[0-9]+}}(%esp), %esi
+; X86-NEXT:    sbbl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    sbbl {{[0-9]+}}(%esp), %edi
+; X86-NEXT:    cmovbl (%esp), %edx # 4-byte Folded Reload
+; X86-NEXT:    cmovbl %ebx, %esi
+; X86-NEXT:    cmovbl %ebp, %ecx
+; X86-NEXT:    cmovbl %eax, %edi
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movl %edi, 12(%eax)
+; X86-NEXT:    movl %ecx, 8(%eax)
+; X86-NEXT:    movl %esi, 4(%eax)
+; X86-NEXT:    movl %edx, (%eax)
+; X86-NEXT:    addl $4, %esp
+; X86-NEXT:    popl %esi
+; X86-NEXT:    popl %edi
+; X86-NEXT:    popl %ebx
+; X86-NEXT:    popl %ebp
+; X86-NEXT:    retl $4
+;
+; X64-LABEL: abd_cmp_i128:
+; X64:       # %bb.0:
+; X64-NEXT:    movq %rdx, %rax
+; X64-NEXT:    subq %rdi, %rax
+; X64-NEXT:    movq %rcx, %r8
+; X64-NEXT:    sbbq %rsi, %r8
+; X64-NEXT:    subq %rdx, %rdi
+; X64-NEXT:    sbbq %rcx, %rsi
+; X64-NEXT:    cmovaeq %rdi, %rax
+; X64-NEXT:    cmovaeq %rsi, %r8
+; X64-NEXT:    movq %r8, %rdx
+; X64-NEXT:    retq
+  %cmp = icmp ult i128 %a, %b
+  %ab = sub i128 %a, %b
+  %ba = sub i128 %b, %a
+  %sel = select i1 %cmp, i128 %ba, i128 %ab
+  ret i128 %sel
+}
+
+declare i8 @llvm.abs.i8(i8, i1)
+declare i16 @llvm.abs.i16(i16, i1)
+declare i32 @llvm.abs.i32(i32, i1)
+declare i64 @llvm.abs.i64(i64, i1)
+declare i128 @llvm.abs.i128(i128, i1)
+
+declare i8 @llvm.umax.i8(i8, i8)
+declare i16 @llvm.umax.i16(i16, i16)
+declare i32 @llvm.umax.i32(i32, i32)
+declare i64 @llvm.umax.i64(i64, i64)
+
+declare i8 @llvm.umin.i8(i8, i8)
+declare i16 @llvm.umin.i16(i16, i16)
+declare i32 @llvm.umin.i32(i32, i32)
+declare i64 @llvm.umin.i64(i64, i64)


        


More information about the llvm-commits mailing list