[llvm] 23d6fc3 - [X86] Add i128 abds/abdu test coverage - based off #100810

Simon Pilgrim via llvm-commits llvm-commits at lists.llvm.org
Mon Aug 5 05:53:01 PDT 2024


Author: Simon Pilgrim
Date: 2024-08-05T13:43:38+01:00
New Revision: 23d6fc37170a8c1bb285cfae16fadefc49c6cdb1

URL: https://github.com/llvm/llvm-project/commit/23d6fc37170a8c1bb285cfae16fadefc49c6cdb1
DIFF: https://github.com/llvm/llvm-project/commit/23d6fc37170a8c1bb285cfae16fadefc49c6cdb1.diff

LOG: [X86] Add i128 abds/abdu test coverage - based off #100810

Added: 
    

Modified: 
    llvm/test/CodeGen/X86/abds.ll
    llvm/test/CodeGen/X86/abdu.ll

Removed: 
    


################################################################################
diff  --git a/llvm/test/CodeGen/X86/abds.ll b/llvm/test/CodeGen/X86/abds.ll
index a80339427984a..918079786a636 100644
--- a/llvm/test/CodeGen/X86/abds.ll
+++ b/llvm/test/CodeGen/X86/abds.ll
@@ -363,6 +363,154 @@ define i64 @abd_ext_i64_undef(i64 %a, i64 %b) nounwind {
   ret i64 %trunc
 }
 
+define i128 @abd_ext_i128(i128 %a, i128 %b) nounwind {
+; X86-LABEL: abd_ext_i128:
+; X86:       # %bb.0:
+; X86-NEXT:    pushl %ebp
+; X86-NEXT:    pushl %ebx
+; X86-NEXT:    pushl %edi
+; X86-NEXT:    pushl %esi
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ebp
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %edi
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %esi
+; X86-NEXT:    movl %esi, %eax
+; X86-NEXT:    sarl $31, %eax
+; X86-NEXT:    movl %ebp, %ebx
+; X86-NEXT:    sarl $31, %ebx
+; X86-NEXT:    subl {{[0-9]+}}(%esp), %edx
+; X86-NEXT:    sbbl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    sbbl {{[0-9]+}}(%esp), %edi
+; X86-NEXT:    sbbl %ebp, %esi
+; X86-NEXT:    movl %eax, %ebp
+; X86-NEXT:    sbbl %ebx, %ebp
+; X86-NEXT:    movl %eax, %ebp
+; X86-NEXT:    sbbl %ebx, %ebp
+; X86-NEXT:    movl %eax, %ebp
+; X86-NEXT:    sbbl %ebx, %ebp
+; X86-NEXT:    sbbl %ebx, %eax
+; X86-NEXT:    sarl $31, %eax
+; X86-NEXT:    xorl %eax, %esi
+; X86-NEXT:    xorl %eax, %edi
+; X86-NEXT:    xorl %eax, %ecx
+; X86-NEXT:    xorl %eax, %edx
+; X86-NEXT:    subl %eax, %edx
+; X86-NEXT:    sbbl %eax, %ecx
+; X86-NEXT:    sbbl %eax, %edi
+; X86-NEXT:    sbbl %eax, %esi
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movl %edx, (%eax)
+; X86-NEXT:    movl %ecx, 4(%eax)
+; X86-NEXT:    movl %edi, 8(%eax)
+; X86-NEXT:    movl %esi, 12(%eax)
+; X86-NEXT:    popl %esi
+; X86-NEXT:    popl %edi
+; X86-NEXT:    popl %ebx
+; X86-NEXT:    popl %ebp
+; X86-NEXT:    retl $4
+;
+; X64-LABEL: abd_ext_i128:
+; X64:       # %bb.0:
+; X64-NEXT:    movq %rdi, %rax
+; X64-NEXT:    movq %rsi, %rdi
+; X64-NEXT:    sarq $63, %rdi
+; X64-NEXT:    movq %rcx, %r8
+; X64-NEXT:    sarq $63, %r8
+; X64-NEXT:    subq %rdx, %rax
+; X64-NEXT:    sbbq %rcx, %rsi
+; X64-NEXT:    movq %rdi, %rcx
+; X64-NEXT:    sbbq %r8, %rcx
+; X64-NEXT:    sbbq %r8, %rdi
+; X64-NEXT:    sarq $63, %rdi
+; X64-NEXT:    xorq %rdi, %rsi
+; X64-NEXT:    xorq %rdi, %rax
+; X64-NEXT:    subq %rdi, %rax
+; X64-NEXT:    sbbq %rdi, %rsi
+; X64-NEXT:    movq %rsi, %rdx
+; X64-NEXT:    retq
+  %aext = sext i128 %a to i256
+  %bext = sext i128 %b to i256
+  %sub = sub i256 %aext, %bext
+  %abs = call i256 @llvm.abs.i256(i256 %sub, i1 false)
+  %trunc = trunc i256 %abs to i128
+  ret i128 %trunc
+}
+
+define i128 @abd_ext_i128_undef(i128 %a, i128 %b) nounwind {
+; X86-LABEL: abd_ext_i128_undef:
+; X86:       # %bb.0:
+; X86-NEXT:    pushl %ebp
+; X86-NEXT:    pushl %ebx
+; X86-NEXT:    pushl %edi
+; X86-NEXT:    pushl %esi
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ebp
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %edi
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %esi
+; X86-NEXT:    movl %esi, %eax
+; X86-NEXT:    sarl $31, %eax
+; X86-NEXT:    movl %ebp, %ebx
+; X86-NEXT:    sarl $31, %ebx
+; X86-NEXT:    subl {{[0-9]+}}(%esp), %edx
+; X86-NEXT:    sbbl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    sbbl {{[0-9]+}}(%esp), %edi
+; X86-NEXT:    sbbl %ebp, %esi
+; X86-NEXT:    movl %eax, %ebp
+; X86-NEXT:    sbbl %ebx, %ebp
+; X86-NEXT:    movl %eax, %ebp
+; X86-NEXT:    sbbl %ebx, %ebp
+; X86-NEXT:    movl %eax, %ebp
+; X86-NEXT:    sbbl %ebx, %ebp
+; X86-NEXT:    sbbl %ebx, %eax
+; X86-NEXT:    sarl $31, %eax
+; X86-NEXT:    xorl %eax, %esi
+; X86-NEXT:    xorl %eax, %edi
+; X86-NEXT:    xorl %eax, %ecx
+; X86-NEXT:    xorl %eax, %edx
+; X86-NEXT:    subl %eax, %edx
+; X86-NEXT:    sbbl %eax, %ecx
+; X86-NEXT:    sbbl %eax, %edi
+; X86-NEXT:    sbbl %eax, %esi
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movl %edx, (%eax)
+; X86-NEXT:    movl %ecx, 4(%eax)
+; X86-NEXT:    movl %edi, 8(%eax)
+; X86-NEXT:    movl %esi, 12(%eax)
+; X86-NEXT:    popl %esi
+; X86-NEXT:    popl %edi
+; X86-NEXT:    popl %ebx
+; X86-NEXT:    popl %ebp
+; X86-NEXT:    retl $4
+;
+; X64-LABEL: abd_ext_i128_undef:
+; X64:       # %bb.0:
+; X64-NEXT:    movq %rdi, %rax
+; X64-NEXT:    movq %rsi, %rdi
+; X64-NEXT:    sarq $63, %rdi
+; X64-NEXT:    movq %rcx, %r8
+; X64-NEXT:    sarq $63, %r8
+; X64-NEXT:    subq %rdx, %rax
+; X64-NEXT:    sbbq %rcx, %rsi
+; X64-NEXT:    movq %rdi, %rcx
+; X64-NEXT:    sbbq %r8, %rcx
+; X64-NEXT:    sbbq %r8, %rdi
+; X64-NEXT:    sarq $63, %rdi
+; X64-NEXT:    xorq %rdi, %rsi
+; X64-NEXT:    xorq %rdi, %rax
+; X64-NEXT:    subq %rdi, %rax
+; X64-NEXT:    sbbq %rdi, %rsi
+; X64-NEXT:    movq %rsi, %rdx
+; X64-NEXT:    retq
+  %aext = sext i128 %a to i256
+  %bext = sext i128 %b to i256
+  %sub = sub i256 %aext, %bext
+  %abs = call i256 @llvm.abs.i256(i256 %sub, i1 true)
+  %trunc = trunc i256 %abs to i128
+  ret i128 %trunc
+}
+
 ;
 ; sub(smax(a,b),smin(a,b)) -> abds(a,b)
 ;
@@ -496,6 +644,91 @@ define i64 @abd_minmax_i64(i64 %a, i64 %b) nounwind {
   ret i64 %sub
 }
 
+define i128 @abd_minmax_i128(i128 %a, i128 %b) nounwind {
+; X86-LABEL: abd_minmax_i128:
+; X86:       # %bb.0:
+; X86-NEXT:    pushl %ebp
+; X86-NEXT:    pushl %ebx
+; X86-NEXT:    pushl %edi
+; X86-NEXT:    pushl %esi
+; X86-NEXT:    subl $8, %esp
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %esi
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ebx
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    cmpl %edx, {{[0-9]+}}(%esp)
+; X86-NEXT:    sbbl %ebx, %eax
+; X86-NEXT:    movl %esi, %eax
+; X86-NEXT:    sbbl %ecx, %eax
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %edi
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ebp
+; X86-NEXT:    movl %ebp, %eax
+; X86-NEXT:    sbbl %edi, %eax
+; X86-NEXT:    movl %edi, %eax
+; X86-NEXT:    cmovll %ebp, %eax
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %ecx, %eax
+; X86-NEXT:    cmovll %esi, %eax
+; X86-NEXT:    movl %eax, (%esp) # 4-byte Spill
+; X86-NEXT:    movl %ebx, %ebp
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    cmovll %eax, %ebp
+; X86-NEXT:    movl %edx, %eax
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %esi
+; X86-NEXT:    cmovll %esi, %eax
+; X86-NEXT:    cmpl %esi, %edx
+; X86-NEXT:    movl %ebx, %esi
+; X86-NEXT:    sbbl {{[0-9]+}}(%esp), %esi
+; X86-NEXT:    movl %ecx, %esi
+; X86-NEXT:    sbbl {{[0-9]+}}(%esp), %esi
+; X86-NEXT:    movl %edi, %esi
+; X86-NEXT:    sbbl {{[0-9]+}}(%esp), %esi
+; X86-NEXT:    cmovll {{[0-9]+}}(%esp), %edi
+; X86-NEXT:    cmovll {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    cmovll {{[0-9]+}}(%esp), %ebx
+; X86-NEXT:    cmovll {{[0-9]+}}(%esp), %edx
+; X86-NEXT:    subl %eax, %edx
+; X86-NEXT:    sbbl %ebp, %ebx
+; X86-NEXT:    sbbl (%esp), %ecx # 4-byte Folded Reload
+; X86-NEXT:    sbbl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movl %edx, (%eax)
+; X86-NEXT:    movl %ebx, 4(%eax)
+; X86-NEXT:    movl %ecx, 8(%eax)
+; X86-NEXT:    movl %edi, 12(%eax)
+; X86-NEXT:    addl $8, %esp
+; X86-NEXT:    popl %esi
+; X86-NEXT:    popl %edi
+; X86-NEXT:    popl %ebx
+; X86-NEXT:    popl %ebp
+; X86-NEXT:    retl $4
+;
+; X64-LABEL: abd_minmax_i128:
+; X64:       # %bb.0:
+; X64-NEXT:    movq %rdx, %rax
+; X64-NEXT:    cmpq %rdx, %rdi
+; X64-NEXT:    movq %rsi, %rdx
+; X64-NEXT:    sbbq %rcx, %rdx
+; X64-NEXT:    movq %rcx, %rdx
+; X64-NEXT:    cmovlq %rsi, %rdx
+; X64-NEXT:    movq %rax, %r8
+; X64-NEXT:    cmovlq %rdi, %r8
+; X64-NEXT:    cmpq %rdi, %rax
+; X64-NEXT:    movq %rcx, %r9
+; X64-NEXT:    sbbq %rsi, %r9
+; X64-NEXT:    cmovlq %rsi, %rcx
+; X64-NEXT:    cmovlq %rdi, %rax
+; X64-NEXT:    subq %r8, %rax
+; X64-NEXT:    sbbq %rdx, %rcx
+; X64-NEXT:    movq %rcx, %rdx
+; X64-NEXT:    retq
+  %min = call i128 @llvm.smin.i128(i128 %a, i128 %b)
+  %max = call i128 @llvm.smax.i128(i128 %a, i128 %b)
+  %sub = sub i128 %max, %min
+  ret i128 %sub
+}
+
 ;
 ; select(icmp(a,b),sub(a,b),sub(b,a)) -> abds(a,b)
 ;
@@ -631,6 +864,66 @@ define i64 @abd_cmp_i64(i64 %a, i64 %b) nounwind {
   ret i64 %sel
 }
 
+define i128 @abd_cmp_i128(i128 %a, i128 %b) nounwind {
+; X86-LABEL: abd_cmp_i128:
+; X86:       # %bb.0:
+; X86-NEXT:    pushl %ebp
+; X86-NEXT:    pushl %ebx
+; X86-NEXT:    pushl %edi
+; X86-NEXT:    pushl %esi
+; X86-NEXT:    pushl %eax
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %esi
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ebx
+; X86-NEXT:    subl %edx, %eax
+; X86-NEXT:    movl %eax, (%esp) # 4-byte Spill
+; X86-NEXT:    sbbl %esi, %ebx
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ebp
+; X86-NEXT:    sbbl %ecx, %ebp
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %edi
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    sbbl %edi, %eax
+; X86-NEXT:    subl {{[0-9]+}}(%esp), %edx
+; X86-NEXT:    sbbl {{[0-9]+}}(%esp), %esi
+; X86-NEXT:    sbbl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    sbbl {{[0-9]+}}(%esp), %edi
+; X86-NEXT:    cmovgel (%esp), %edx # 4-byte Folded Reload
+; X86-NEXT:    cmovgel %ebx, %esi
+; X86-NEXT:    cmovgel %ebp, %ecx
+; X86-NEXT:    cmovgel %eax, %edi
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movl %edi, 12(%eax)
+; X86-NEXT:    movl %ecx, 8(%eax)
+; X86-NEXT:    movl %esi, 4(%eax)
+; X86-NEXT:    movl %edx, (%eax)
+; X86-NEXT:    addl $4, %esp
+; X86-NEXT:    popl %esi
+; X86-NEXT:    popl %edi
+; X86-NEXT:    popl %ebx
+; X86-NEXT:    popl %ebp
+; X86-NEXT:    retl $4
+;
+; X64-LABEL: abd_cmp_i128:
+; X64:       # %bb.0:
+; X64-NEXT:    movq %rdx, %rax
+; X64-NEXT:    subq %rdi, %rax
+; X64-NEXT:    movq %rcx, %r8
+; X64-NEXT:    sbbq %rsi, %r8
+; X64-NEXT:    subq %rdx, %rdi
+; X64-NEXT:    sbbq %rcx, %rsi
+; X64-NEXT:    cmovlq %rdi, %rax
+; X64-NEXT:    cmovlq %rsi, %r8
+; X64-NEXT:    movq %r8, %rdx
+; X64-NEXT:    retq
+  %cmp = icmp sge i128 %a, %b
+  %ab = sub i128 %a, %b
+  %ba = sub i128 %b, %a
+  %sel = select i1 %cmp, i128 %ba, i128 %ab
+  ret i128 %sel
+}
+
 ;
 ; abs(sub_nsw(x, y)) -> abds(a,b)
 ;
@@ -829,6 +1122,110 @@ define i64 @abd_subnsw_i64_undef(i64 %a, i64 %b) nounwind {
   ret i64 %abs
 }
 
+define i128 @abd_subnsw_i128(i128 %a, i128 %b) nounwind {
+; X86-LABEL: abd_subnsw_i128:
+; X86:       # %bb.0:
+; X86-NEXT:    pushl %ebx
+; X86-NEXT:    pushl %edi
+; X86-NEXT:    pushl %esi
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %edi
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %esi
+; X86-NEXT:    subl {{[0-9]+}}(%esp), %edi
+; X86-NEXT:    sbbl {{[0-9]+}}(%esp), %esi
+; X86-NEXT:    sbbl {{[0-9]+}}(%esp), %edx
+; X86-NEXT:    sbbl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movl %ecx, %ebx
+; X86-NEXT:    sarl $31, %ebx
+; X86-NEXT:    xorl %ebx, %ecx
+; X86-NEXT:    xorl %ebx, %edx
+; X86-NEXT:    xorl %ebx, %esi
+; X86-NEXT:    xorl %ebx, %edi
+; X86-NEXT:    subl %ebx, %edi
+; X86-NEXT:    sbbl %ebx, %esi
+; X86-NEXT:    sbbl %ebx, %edx
+; X86-NEXT:    sbbl %ebx, %ecx
+; X86-NEXT:    movl %edi, (%eax)
+; X86-NEXT:    movl %esi, 4(%eax)
+; X86-NEXT:    movl %edx, 8(%eax)
+; X86-NEXT:    movl %ecx, 12(%eax)
+; X86-NEXT:    popl %esi
+; X86-NEXT:    popl %edi
+; X86-NEXT:    popl %ebx
+; X86-NEXT:    retl $4
+;
+; X64-LABEL: abd_subnsw_i128:
+; X64:       # %bb.0:
+; X64-NEXT:    movq %rdi, %rax
+; X64-NEXT:    subq %rdx, %rax
+; X64-NEXT:    sbbq %rcx, %rsi
+; X64-NEXT:    movq %rsi, %rcx
+; X64-NEXT:    sarq $63, %rcx
+; X64-NEXT:    xorq %rcx, %rsi
+; X64-NEXT:    xorq %rcx, %rax
+; X64-NEXT:    subq %rcx, %rax
+; X64-NEXT:    sbbq %rcx, %rsi
+; X64-NEXT:    movq %rsi, %rdx
+; X64-NEXT:    retq
+  %sub = sub nsw i128 %a, %b
+  %abs = call i128 @llvm.abs.i128(i128 %sub, i1 false)
+  ret i128 %abs
+}
+
+define i128 @abd_subnsw_i128_undef(i128 %a, i128 %b) nounwind {
+; X86-LABEL: abd_subnsw_i128_undef:
+; X86:       # %bb.0:
+; X86-NEXT:    pushl %ebx
+; X86-NEXT:    pushl %edi
+; X86-NEXT:    pushl %esi
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %edi
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %esi
+; X86-NEXT:    subl {{[0-9]+}}(%esp), %edi
+; X86-NEXT:    sbbl {{[0-9]+}}(%esp), %esi
+; X86-NEXT:    sbbl {{[0-9]+}}(%esp), %edx
+; X86-NEXT:    sbbl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movl %ecx, %ebx
+; X86-NEXT:    sarl $31, %ebx
+; X86-NEXT:    xorl %ebx, %ecx
+; X86-NEXT:    xorl %ebx, %edx
+; X86-NEXT:    xorl %ebx, %esi
+; X86-NEXT:    xorl %ebx, %edi
+; X86-NEXT:    subl %ebx, %edi
+; X86-NEXT:    sbbl %ebx, %esi
+; X86-NEXT:    sbbl %ebx, %edx
+; X86-NEXT:    sbbl %ebx, %ecx
+; X86-NEXT:    movl %edi, (%eax)
+; X86-NEXT:    movl %esi, 4(%eax)
+; X86-NEXT:    movl %edx, 8(%eax)
+; X86-NEXT:    movl %ecx, 12(%eax)
+; X86-NEXT:    popl %esi
+; X86-NEXT:    popl %edi
+; X86-NEXT:    popl %ebx
+; X86-NEXT:    retl $4
+;
+; X64-LABEL: abd_subnsw_i128_undef:
+; X64:       # %bb.0:
+; X64-NEXT:    movq %rdi, %rax
+; X64-NEXT:    subq %rdx, %rax
+; X64-NEXT:    sbbq %rcx, %rsi
+; X64-NEXT:    movq %rsi, %rcx
+; X64-NEXT:    sarq $63, %rcx
+; X64-NEXT:    xorq %rcx, %rsi
+; X64-NEXT:    xorq %rcx, %rax
+; X64-NEXT:    subq %rcx, %rax
+; X64-NEXT:    sbbq %rcx, %rsi
+; X64-NEXT:    movq %rsi, %rdx
+; X64-NEXT:    retq
+  %sub = sub nsw i128 %a, %b
+  %abs = call i128 @llvm.abs.i128(i128 %sub, i1 true)
+  ret i128 %abs
+}
+
 ;
 ; negative tests
 ;

diff  --git a/llvm/test/CodeGen/X86/abdu.ll b/llvm/test/CodeGen/X86/abdu.ll
index d1f07b9eaadcb..8768170702360 100644
--- a/llvm/test/CodeGen/X86/abdu.ll
+++ b/llvm/test/CodeGen/X86/abdu.ll
@@ -347,6 +347,136 @@ define i64 @abd_ext_i64_undef(i64 %a, i64 %b) nounwind {
   ret i64 %trunc
 }
 
+define i128 @abd_ext_i128(i128 %a, i128 %b) nounwind {
+; X86-LABEL: abd_ext_i128:
+; X86:       # %bb.0:
+; X86-NEXT:    pushl %ebp
+; X86-NEXT:    pushl %ebx
+; X86-NEXT:    pushl %edi
+; X86-NEXT:    pushl %esi
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %edi
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %esi
+; X86-NEXT:    xorl %ebx, %ebx
+; X86-NEXT:    subl {{[0-9]+}}(%esp), %edi
+; X86-NEXT:    sbbl {{[0-9]+}}(%esp), %esi
+; X86-NEXT:    sbbl {{[0-9]+}}(%esp), %edx
+; X86-NEXT:    sbbl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movl $0, %ebp
+; X86-NEXT:    sbbl %ebp, %ebp
+; X86-NEXT:    movl $0, %ebp
+; X86-NEXT:    sbbl %ebp, %ebp
+; X86-NEXT:    movl $0, %ebp
+; X86-NEXT:    sbbl %ebp, %ebp
+; X86-NEXT:    sbbl %ebx, %ebx
+; X86-NEXT:    xorl %ebx, %ecx
+; X86-NEXT:    xorl %ebx, %edx
+; X86-NEXT:    xorl %ebx, %esi
+; X86-NEXT:    xorl %ebx, %edi
+; X86-NEXT:    subl %ebx, %edi
+; X86-NEXT:    sbbl %ebx, %esi
+; X86-NEXT:    sbbl %ebx, %edx
+; X86-NEXT:    sbbl %ebx, %ecx
+; X86-NEXT:    movl %edi, (%eax)
+; X86-NEXT:    movl %esi, 4(%eax)
+; X86-NEXT:    movl %edx, 8(%eax)
+; X86-NEXT:    movl %ecx, 12(%eax)
+; X86-NEXT:    popl %esi
+; X86-NEXT:    popl %edi
+; X86-NEXT:    popl %ebx
+; X86-NEXT:    popl %ebp
+; X86-NEXT:    retl $4
+;
+; X64-LABEL: abd_ext_i128:
+; X64:       # %bb.0:
+; X64-NEXT:    movq %rdi, %rax
+; X64-NEXT:    xorl %edi, %edi
+; X64-NEXT:    subq %rdx, %rax
+; X64-NEXT:    sbbq %rcx, %rsi
+; X64-NEXT:    movl $0, %ecx
+; X64-NEXT:    sbbq %rcx, %rcx
+; X64-NEXT:    sbbq %rdi, %rdi
+; X64-NEXT:    xorq %rdi, %rsi
+; X64-NEXT:    xorq %rdi, %rax
+; X64-NEXT:    subq %rdi, %rax
+; X64-NEXT:    sbbq %rdi, %rsi
+; X64-NEXT:    movq %rsi, %rdx
+; X64-NEXT:    retq
+  %aext = zext i128 %a to i256
+  %bext = zext i128 %b to i256
+  %sub = sub i256 %aext, %bext
+  %abs = call i256 @llvm.abs.i256(i256 %sub, i1 false)
+  %trunc = trunc i256 %abs to i128
+  ret i128 %trunc
+}
+
+define i128 @abd_ext_i128_undef(i128 %a, i128 %b) nounwind {
+; X86-LABEL: abd_ext_i128_undef:
+; X86:       # %bb.0:
+; X86-NEXT:    pushl %ebp
+; X86-NEXT:    pushl %ebx
+; X86-NEXT:    pushl %edi
+; X86-NEXT:    pushl %esi
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %edi
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %esi
+; X86-NEXT:    xorl %ebx, %ebx
+; X86-NEXT:    subl {{[0-9]+}}(%esp), %edi
+; X86-NEXT:    sbbl {{[0-9]+}}(%esp), %esi
+; X86-NEXT:    sbbl {{[0-9]+}}(%esp), %edx
+; X86-NEXT:    sbbl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movl $0, %ebp
+; X86-NEXT:    sbbl %ebp, %ebp
+; X86-NEXT:    movl $0, %ebp
+; X86-NEXT:    sbbl %ebp, %ebp
+; X86-NEXT:    movl $0, %ebp
+; X86-NEXT:    sbbl %ebp, %ebp
+; X86-NEXT:    sbbl %ebx, %ebx
+; X86-NEXT:    xorl %ebx, %ecx
+; X86-NEXT:    xorl %ebx, %edx
+; X86-NEXT:    xorl %ebx, %esi
+; X86-NEXT:    xorl %ebx, %edi
+; X86-NEXT:    subl %ebx, %edi
+; X86-NEXT:    sbbl %ebx, %esi
+; X86-NEXT:    sbbl %ebx, %edx
+; X86-NEXT:    sbbl %ebx, %ecx
+; X86-NEXT:    movl %edi, (%eax)
+; X86-NEXT:    movl %esi, 4(%eax)
+; X86-NEXT:    movl %edx, 8(%eax)
+; X86-NEXT:    movl %ecx, 12(%eax)
+; X86-NEXT:    popl %esi
+; X86-NEXT:    popl %edi
+; X86-NEXT:    popl %ebx
+; X86-NEXT:    popl %ebp
+; X86-NEXT:    retl $4
+;
+; X64-LABEL: abd_ext_i128_undef:
+; X64:       # %bb.0:
+; X64-NEXT:    movq %rdi, %rax
+; X64-NEXT:    xorl %edi, %edi
+; X64-NEXT:    subq %rdx, %rax
+; X64-NEXT:    sbbq %rcx, %rsi
+; X64-NEXT:    movl $0, %ecx
+; X64-NEXT:    sbbq %rcx, %rcx
+; X64-NEXT:    sbbq %rdi, %rdi
+; X64-NEXT:    xorq %rdi, %rsi
+; X64-NEXT:    xorq %rdi, %rax
+; X64-NEXT:    subq %rdi, %rax
+; X64-NEXT:    sbbq %rdi, %rsi
+; X64-NEXT:    movq %rsi, %rdx
+; X64-NEXT:    retq
+  %aext = zext i128 %a to i256
+  %bext = zext i128 %b to i256
+  %sub = sub i256 %aext, %bext
+  %abs = call i256 @llvm.abs.i256(i256 %sub, i1 true)
+  %trunc = trunc i256 %abs to i128
+  ret i128 %trunc
+}
+
 ;
 ; sub(umax(a,b),umin(a,b)) -> abdu(a,b)
 ;
@@ -480,6 +610,91 @@ define i64 @abd_minmax_i64(i64 %a, i64 %b) nounwind {
   ret i64 %sub
 }
 
+define i128 @abd_minmax_i128(i128 %a, i128 %b) nounwind {
+; X86-LABEL: abd_minmax_i128:
+; X86:       # %bb.0:
+; X86-NEXT:    pushl %ebp
+; X86-NEXT:    pushl %ebx
+; X86-NEXT:    pushl %edi
+; X86-NEXT:    pushl %esi
+; X86-NEXT:    subl $8, %esp
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %esi
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ebx
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    cmpl %edx, {{[0-9]+}}(%esp)
+; X86-NEXT:    sbbl %ebx, %eax
+; X86-NEXT:    movl %esi, %eax
+; X86-NEXT:    sbbl %ecx, %eax
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %edi
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ebp
+; X86-NEXT:    movl %ebp, %eax
+; X86-NEXT:    sbbl %edi, %eax
+; X86-NEXT:    movl %edi, %eax
+; X86-NEXT:    cmovbl %ebp, %eax
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %ecx, %eax
+; X86-NEXT:    cmovbl %esi, %eax
+; X86-NEXT:    movl %eax, (%esp) # 4-byte Spill
+; X86-NEXT:    movl %ebx, %ebp
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    cmovbl %eax, %ebp
+; X86-NEXT:    movl %edx, %eax
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %esi
+; X86-NEXT:    cmovbl %esi, %eax
+; X86-NEXT:    cmpl %esi, %edx
+; X86-NEXT:    movl %ebx, %esi
+; X86-NEXT:    sbbl {{[0-9]+}}(%esp), %esi
+; X86-NEXT:    movl %ecx, %esi
+; X86-NEXT:    sbbl {{[0-9]+}}(%esp), %esi
+; X86-NEXT:    movl %edi, %esi
+; X86-NEXT:    sbbl {{[0-9]+}}(%esp), %esi
+; X86-NEXT:    cmovbl {{[0-9]+}}(%esp), %edi
+; X86-NEXT:    cmovbl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    cmovbl {{[0-9]+}}(%esp), %ebx
+; X86-NEXT:    cmovbl {{[0-9]+}}(%esp), %edx
+; X86-NEXT:    subl %eax, %edx
+; X86-NEXT:    sbbl %ebp, %ebx
+; X86-NEXT:    sbbl (%esp), %ecx # 4-byte Folded Reload
+; X86-NEXT:    sbbl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movl %edx, (%eax)
+; X86-NEXT:    movl %ebx, 4(%eax)
+; X86-NEXT:    movl %ecx, 8(%eax)
+; X86-NEXT:    movl %edi, 12(%eax)
+; X86-NEXT:    addl $8, %esp
+; X86-NEXT:    popl %esi
+; X86-NEXT:    popl %edi
+; X86-NEXT:    popl %ebx
+; X86-NEXT:    popl %ebp
+; X86-NEXT:    retl $4
+;
+; X64-LABEL: abd_minmax_i128:
+; X64:       # %bb.0:
+; X64-NEXT:    movq %rdx, %rax
+; X64-NEXT:    cmpq %rdx, %rdi
+; X64-NEXT:    movq %rsi, %rdx
+; X64-NEXT:    sbbq %rcx, %rdx
+; X64-NEXT:    movq %rcx, %rdx
+; X64-NEXT:    cmovbq %rsi, %rdx
+; X64-NEXT:    movq %rax, %r8
+; X64-NEXT:    cmovbq %rdi, %r8
+; X64-NEXT:    cmpq %rdi, %rax
+; X64-NEXT:    movq %rcx, %r9
+; X64-NEXT:    sbbq %rsi, %r9
+; X64-NEXT:    cmovbq %rsi, %rcx
+; X64-NEXT:    cmovbq %rdi, %rax
+; X64-NEXT:    subq %r8, %rax
+; X64-NEXT:    sbbq %rdx, %rcx
+; X64-NEXT:    movq %rcx, %rdx
+; X64-NEXT:    retq
+  %min = call i128 @llvm.umin.i128(i128 %a, i128 %b)
+  %max = call i128 @llvm.umax.i128(i128 %a, i128 %b)
+  %sub = sub i128 %max, %min
+  ret i128 %sub
+}
+
 ;
 ; select(icmp(a,b),sub(a,b),sub(b,a)) -> abdu(a,b)
 ;
@@ -615,6 +830,66 @@ define i64 @abd_cmp_i64(i64 %a, i64 %b) nounwind {
   ret i64 %sel
 }
 
+define i128 @abd_cmp_i128(i128 %a, i128 %b) nounwind {
+; X86-LABEL: abd_cmp_i128:
+; X86:       # %bb.0:
+; X86-NEXT:    pushl %ebp
+; X86-NEXT:    pushl %ebx
+; X86-NEXT:    pushl %edi
+; X86-NEXT:    pushl %esi
+; X86-NEXT:    pushl %eax
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %esi
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ebx
+; X86-NEXT:    subl %edx, %eax
+; X86-NEXT:    movl %eax, (%esp) # 4-byte Spill
+; X86-NEXT:    sbbl %esi, %ebx
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ebp
+; X86-NEXT:    sbbl %ecx, %ebp
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %edi
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    sbbl %edi, %eax
+; X86-NEXT:    subl {{[0-9]+}}(%esp), %edx
+; X86-NEXT:    sbbl {{[0-9]+}}(%esp), %esi
+; X86-NEXT:    sbbl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    sbbl {{[0-9]+}}(%esp), %edi
+; X86-NEXT:    cmovael (%esp), %edx # 4-byte Folded Reload
+; X86-NEXT:    cmovael %ebx, %esi
+; X86-NEXT:    cmovael %ebp, %ecx
+; X86-NEXT:    cmovael %eax, %edi
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movl %edi, 12(%eax)
+; X86-NEXT:    movl %ecx, 8(%eax)
+; X86-NEXT:    movl %esi, 4(%eax)
+; X86-NEXT:    movl %edx, (%eax)
+; X86-NEXT:    addl $4, %esp
+; X86-NEXT:    popl %esi
+; X86-NEXT:    popl %edi
+; X86-NEXT:    popl %ebx
+; X86-NEXT:    popl %ebp
+; X86-NEXT:    retl $4
+;
+; X64-LABEL: abd_cmp_i128:
+; X64:       # %bb.0:
+; X64-NEXT:    movq %rdx, %rax
+; X64-NEXT:    subq %rdi, %rax
+; X64-NEXT:    movq %rcx, %r8
+; X64-NEXT:    sbbq %rsi, %r8
+; X64-NEXT:    subq %rdx, %rdi
+; X64-NEXT:    sbbq %rcx, %rsi
+; X64-NEXT:    cmovbq %rdi, %rax
+; X64-NEXT:    cmovbq %rsi, %r8
+; X64-NEXT:    movq %r8, %rdx
+; X64-NEXT:    retq
+  %cmp = icmp uge i128 %a, %b
+  %ab = sub i128 %a, %b
+  %ba = sub i128 %b, %a
+  %sel = select i1 %cmp, i128 %ba, i128 %ab
+  ret i128 %sel
+}
+
 declare i8 @llvm.abs.i8(i8, i1)
 declare i16 @llvm.abs.i16(i16, i1)
 declare i32 @llvm.abs.i32(i32, i1)


        


More information about the llvm-commits mailing list