[llvm] f04a859 - [X86] Add umax(x, 1) vector test coverage for Issue #61225
Simon Pilgrim via llvm-commits
llvm-commits at lists.llvm.org
Tue Mar 7 06:34:16 PST 2023
Author: Simon Pilgrim
Date: 2023-03-07T14:34:03Z
New Revision: f04a859e4183301b5adbce89975907fe8f30ccd3
URL: https://github.com/llvm/llvm-project/commit/f04a859e4183301b5adbce89975907fe8f30ccd3
DIFF: https://github.com/llvm/llvm-project/commit/f04a859e4183301b5adbce89975907fe8f30ccd3.diff
LOG: [X86] Add umax(x,1) vector test coverage for Issue #61225
Added:
Modified:
llvm/test/CodeGen/X86/umax.ll
Removed:
################################################################################
diff --git a/llvm/test/CodeGen/X86/umax.ll b/llvm/test/CodeGen/X86/umax.ll
index ce71a891f45ee..55114f12183df 100644
--- a/llvm/test/CodeGen/X86/umax.ll
+++ b/llvm/test/CodeGen/X86/umax.ll
@@ -17,6 +17,7 @@ declare <3 x i32> @llvm.umax.v3i32(<3 x i32>, <3 x i32>)
declare <4 x i32> @llvm.umax.v4i32(<4 x i32>, <4 x i32>)
declare <8 x i32> @llvm.umax.v8i32(<8 x i32>, <8 x i32>)
+declare <2 x i64> @llvm.umax.v2i64(<2 x i64>, <2 x i64>)
declare <8 x i16> @llvm.umax.v8i16(<8 x i16>, <8 x i16>)
declare <16 x i8> @llvm.umax.v16i8(<16 x i8>, <16 x i8>)
@@ -292,6 +293,195 @@ define i128 @test_i128(i128 %a, i128 %b) nounwind {
ret i128 %r
}
+define i128 @test_i128_1(i128 %a, i128 %b) nounwind {
+; X64-LABEL: test_i128_1:
+; X64: # %bb.0:
+; X64-NEXT: movq %rsi, %rdx
+; X64-NEXT: cmpq $1, %rdi
+; X64-NEXT: movq %rdi, %rcx
+; X64-NEXT: adcq $0, %rcx
+; X64-NEXT: testq %rsi, %rsi
+; X64-NEXT: movl $1, %eax
+; X64-NEXT: cmovneq %rdi, %rax
+; X64-NEXT: cmoveq %rcx, %rax
+; X64-NEXT: retq
+;
+; X86-LABEL: test_i128_1:
+; X86: # %bb.0:
+; X86-NEXT: pushl %ebp
+; X86-NEXT: pushl %ebx
+; X86-NEXT: pushl %edi
+; X86-NEXT: pushl %esi
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
+; X86-NEXT: cmpl $1, %eax
+; X86-NEXT: movl %eax, %ebx
+; X86-NEXT: adcl $0, %ebx
+; X86-NEXT: testl %esi, %esi
+; X86-NEXT: movl $1, %edi
+; X86-NEXT: cmovnel %eax, %edi
+; X86-NEXT: cmovel %ebx, %edi
+; X86-NEXT: xorl %ebx, %ebx
+; X86-NEXT: movl %ecx, %ebp
+; X86-NEXT: negl %ebp
+; X86-NEXT: movl $0, %ebp
+; X86-NEXT: sbbl %edx, %ebp
+; X86-NEXT: movl $1, %ebp
+; X86-NEXT: cmovbl %eax, %ebp
+; X86-NEXT: movl %ecx, %eax
+; X86-NEXT: cmovbl %esi, %ebx
+; X86-NEXT: orl %edx, %eax
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movl %edx, 12(%eax)
+; X86-NEXT: movl %ecx, 8(%eax)
+; X86-NEXT: cmovel %edi, %ebp
+; X86-NEXT: cmovel %esi, %ebx
+; X86-NEXT: movl %ebx, 4(%eax)
+; X86-NEXT: movl %ebp, (%eax)
+; X86-NEXT: popl %esi
+; X86-NEXT: popl %edi
+; X86-NEXT: popl %ebx
+; X86-NEXT: popl %ebp
+; X86-NEXT: retl $4
+ %r = call i128 @llvm.umax.i128(i128 %a, i128 1)
+ ret i128 %r
+}
+
+define <2 x i64> @test_v2i64(<2 x i64> %a, <2 x i64> %b) nounwind {
+; SSE-LABEL: test_v2i64:
+; SSE: # %bb.0:
+; SSE-NEXT: movdqa {{.*#+}} xmm2 = [9223372039002259456,9223372039002259456]
+; SSE-NEXT: movdqa %xmm1, %xmm3
+; SSE-NEXT: pxor %xmm2, %xmm3
+; SSE-NEXT: pxor %xmm0, %xmm2
+; SSE-NEXT: movdqa %xmm2, %xmm4
+; SSE-NEXT: pcmpgtd %xmm3, %xmm4
+; SSE-NEXT: pshufd {{.*#+}} xmm5 = xmm4[0,0,2,2]
+; SSE-NEXT: pcmpeqd %xmm3, %xmm2
+; SSE-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
+; SSE-NEXT: pand %xmm5, %xmm2
+; SSE-NEXT: pshufd {{.*#+}} xmm3 = xmm4[1,1,3,3]
+; SSE-NEXT: por %xmm2, %xmm3
+; SSE-NEXT: pand %xmm3, %xmm0
+; SSE-NEXT: pandn %xmm1, %xmm3
+; SSE-NEXT: por %xmm3, %xmm0
+; SSE-NEXT: retq
+;
+; AVX-LABEL: test_v2i64:
+; AVX: # %bb.0:
+; AVX-NEXT: vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
+; AVX-NEXT: vpxor %xmm2, %xmm1, %xmm3
+; AVX-NEXT: vpxor %xmm2, %xmm0, %xmm2
+; AVX-NEXT: vpcmpgtq %xmm3, %xmm2, %xmm2
+; AVX-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
+; AVX-NEXT: retq
+;
+; X86-LABEL: test_v2i64:
+; X86: # %bb.0:
+; X86-NEXT: pushl %ebp
+; X86-NEXT: pushl %ebx
+; X86-NEXT: pushl %edi
+; X86-NEXT: pushl %esi
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
+; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx
+; X86-NEXT: cmpl %edx, %eax
+; X86-NEXT: movl %edx, %ebp
+; X86-NEXT: cmoval %eax, %ebp
+; X86-NEXT: cmpl %edi, %ebx
+; X86-NEXT: cmoval %eax, %edx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: cmovel %ebp, %edx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ebp
+; X86-NEXT: cmoval %ebx, %edi
+; X86-NEXT: cmpl %ecx, %eax
+; X86-NEXT: movl %ecx, %ebx
+; X86-NEXT: cmoval %eax, %ebx
+; X86-NEXT: cmpl %esi, %ebp
+; X86-NEXT: cmoval %eax, %ecx
+; X86-NEXT: cmovel %ebx, %ecx
+; X86-NEXT: cmoval %ebp, %esi
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movl %esi, 12(%eax)
+; X86-NEXT: movl %ecx, 8(%eax)
+; X86-NEXT: movl %edi, 4(%eax)
+; X86-NEXT: movl %edx, (%eax)
+; X86-NEXT: popl %esi
+; X86-NEXT: popl %edi
+; X86-NEXT: popl %ebx
+; X86-NEXT: popl %ebp
+; X86-NEXT: retl $4
+ %r = call <2 x i64> @llvm.umax.v2i64(<2 x i64> %a, <2 x i64> %b)
+ ret <2 x i64> %r
+}
+
+define <2 x i64> @test_v2i64_1(<2 x i64> %a, <2 x i64> %b) nounwind {
+; SSE-LABEL: test_v2i64_1:
+; SSE: # %bb.0:
+; SSE-NEXT: movdqa {{.*#+}} xmm1 = [9223372039002259456,9223372039002259456]
+; SSE-NEXT: pxor %xmm0, %xmm1
+; SSE-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,1,3,3]
+; SSE-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
+; SSE-NEXT: pshufd {{.*#+}} xmm3 = xmm1[0,0,2,2]
+; SSE-NEXT: pcmpeqd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2
+; SSE-NEXT: pand %xmm3, %xmm2
+; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
+; SSE-NEXT: por %xmm2, %xmm1
+; SSE-NEXT: pand %xmm1, %xmm0
+; SSE-NEXT: pandn {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
+; SSE-NEXT: por %xmm1, %xmm0
+; SSE-NEXT: retq
+;
+; AVX-LABEL: test_v2i64_1:
+; AVX: # %bb.0:
+; AVX-NEXT: vpxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
+; AVX-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
+; AVX-NEXT: vmovapd {{.*#+}} xmm2 = [1,1]
+; AVX-NEXT: vblendvpd %xmm1, %xmm0, %xmm2, %xmm0
+; AVX-NEXT: retq
+;
+; X86-LABEL: test_v2i64_1:
+; X86: # %bb.0:
+; X86-NEXT: pushl %ebp
+; X86-NEXT: pushl %ebx
+; X86-NEXT: pushl %edi
+; X86-NEXT: pushl %esi
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
+; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
+; X86-NEXT: cmpl $1, %ecx
+; X86-NEXT: movl %ecx, %ebx
+; X86-NEXT: adcl $0, %ebx
+; X86-NEXT: testl %esi, %esi
+; X86-NEXT: movl $1, %ebp
+; X86-NEXT: cmovel %ebp, %ecx
+; X86-NEXT: cmovel %ebx, %ecx
+; X86-NEXT: cmpl $1, %edi
+; X86-NEXT: movl %edi, %ebx
+; X86-NEXT: adcl $0, %ebx
+; X86-NEXT: testl %edx, %edx
+; X86-NEXT: cmovnel %edi, %ebp
+; X86-NEXT: cmovel %ebx, %ebp
+; X86-NEXT: movl %edx, 12(%eax)
+; X86-NEXT: movl %ebp, 8(%eax)
+; X86-NEXT: movl %esi, 4(%eax)
+; X86-NEXT: movl %ecx, (%eax)
+; X86-NEXT: popl %esi
+; X86-NEXT: popl %edi
+; X86-NEXT: popl %ebx
+; X86-NEXT: popl %ebp
+; X86-NEXT: retl $4
+ %r = call <2 x i64> @llvm.umax.v2i64(<2 x i64> %a, <2 x i64> <i64 1, i64 1>)
+ ret <2 x i64> %r
+}
+
define <1 x i32> @test_v1i32(<1 x i32> %a, <1 x i32> %b) nounwind {
; X64-LABEL: test_v1i32:
; X64: # %bb.0:
@@ -439,6 +629,57 @@ define <4 x i32> @test_v4i32(<4 x i32> %a, <4 x i32> %b) nounwind {
ret <4 x i32> %r
}
+define <4 x i32> @test_v4i32_1(<4 x i32> %a, <4 x i32> %b) nounwind {
+; SSE-LABEL: test_v4i32_1:
+; SSE: # %bb.0:
+; SSE-NEXT: movdqa {{.*#+}} xmm1 = [2147483648,2147483648,2147483648,2147483648]
+; SSE-NEXT: pxor %xmm0, %xmm1
+; SSE-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
+; SSE-NEXT: pand %xmm1, %xmm0
+; SSE-NEXT: paddd %xmm1, %xmm0
+; SSE-NEXT: pcmpeqd %xmm1, %xmm1
+; SSE-NEXT: psubd %xmm1, %xmm0
+; SSE-NEXT: retq
+;
+; AVX1-LABEL: test_v4i32_1:
+; AVX1: # %bb.0:
+; AVX1-NEXT: vpmaxud {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: test_v4i32_1:
+; AVX2: # %bb.0:
+; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [1,1,1,1]
+; AVX2-NEXT: vpmaxud %xmm1, %xmm0, %xmm0
+; AVX2-NEXT: retq
+;
+; X86-LABEL: test_v4i32_1:
+; X86: # %bb.0:
+; X86-NEXT: pushl %edi
+; X86-NEXT: pushl %esi
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
+; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
+; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: cmpl $1, %ecx
+; X86-NEXT: adcl $0, %ecx
+; X86-NEXT: cmpl $1, %edx
+; X86-NEXT: adcl $0, %edx
+; X86-NEXT: cmpl $1, %esi
+; X86-NEXT: adcl $0, %esi
+; X86-NEXT: cmpl $1, %edi
+; X86-NEXT: adcl $0, %edi
+; X86-NEXT: movl %edi, 12(%eax)
+; X86-NEXT: movl %esi, 8(%eax)
+; X86-NEXT: movl %edx, 4(%eax)
+; X86-NEXT: movl %ecx, (%eax)
+; X86-NEXT: popl %esi
+; X86-NEXT: popl %edi
+; X86-NEXT: retl $4
+ %r = call <4 x i32> @llvm.umax.v4i32(<4 x i32> %a, <4 x i32> <i32 1, i32 1, i32 1, i32 1>)
+ ret <4 x i32> %r
+}
+
define <8 x i32> @test_v8i32(<8 x i32> %a, <8 x i32> %b) nounwind {
; SSE-LABEL: test_v8i32:
; SSE: # %bb.0:
@@ -537,6 +778,95 @@ define <8 x i32> @test_v8i32(<8 x i32> %a, <8 x i32> %b) nounwind {
ret <8 x i32> %r
}
+define <8 x i32> @test_v8i32_1(<8 x i32> %a, <8 x i32> %b) nounwind {
+; SSE-LABEL: test_v8i32_1:
+; SSE: # %bb.0:
+; SSE-NEXT: movdqa {{.*#+}} xmm2 = [2147483648,2147483648,2147483648,2147483648]
+; SSE-NEXT: movdqa %xmm0, %xmm3
+; SSE-NEXT: pxor %xmm2, %xmm3
+; SSE-NEXT: movdqa {{.*#+}} xmm4 = [2147483649,2147483649,2147483649,2147483649]
+; SSE-NEXT: pcmpgtd %xmm4, %xmm3
+; SSE-NEXT: pand %xmm3, %xmm0
+; SSE-NEXT: paddd %xmm3, %xmm0
+; SSE-NEXT: pcmpeqd %xmm3, %xmm3
+; SSE-NEXT: psubd %xmm3, %xmm0
+; SSE-NEXT: pxor %xmm1, %xmm2
+; SSE-NEXT: pcmpgtd %xmm4, %xmm2
+; SSE-NEXT: pand %xmm2, %xmm1
+; SSE-NEXT: paddd %xmm2, %xmm1
+; SSE-NEXT: psubd %xmm3, %xmm1
+; SSE-NEXT: retq
+;
+; AVX1-LABEL: test_v8i32_1:
+; AVX1: # %bb.0:
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
+; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [1,1,1,1]
+; AVX1-NEXT: vpmaxud %xmm2, %xmm1, %xmm1
+; AVX1-NEXT: vpmaxud %xmm2, %xmm0, %xmm0
+; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: test_v8i32_1:
+; AVX2: # %bb.0:
+; AVX2-NEXT: vpbroadcastd {{.*#+}} ymm1 = [1,1,1,1,1,1,1,1]
+; AVX2-NEXT: vpmaxud %ymm1, %ymm0, %ymm0
+; AVX2-NEXT: retq
+;
+; X86-LABEL: test_v8i32_1:
+; X86: # %bb.0:
+; X86-NEXT: pushl %ebp
+; X86-NEXT: pushl %ebx
+; X86-NEXT: pushl %edi
+; X86-NEXT: pushl %esi
+; X86-NEXT: subl $8, %esp
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ebp
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
+; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
+; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: cmpl $1, %eax
+; X86-NEXT: adcl $0, %eax
+; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: cmpl $1, %edx
+; X86-NEXT: adcl $0, %edx
+; X86-NEXT: cmpl $1, %esi
+; X86-NEXT: adcl $0, %esi
+; X86-NEXT: cmpl $1, %edi
+; X86-NEXT: adcl $0, %edi
+; X86-NEXT: cmpl $1, %ebx
+; X86-NEXT: adcl $0, %ebx
+; X86-NEXT: cmpl $1, %ebp
+; X86-NEXT: adcl $0, %ebp
+; X86-NEXT: cmpl $1, %ecx
+; X86-NEXT: adcl $0, %ecx
+; X86-NEXT: movl %ecx, (%esp) # 4-byte Spill
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: cmpl $1, %eax
+; X86-NEXT: adcl $0, %eax
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: movl %eax, 28(%ecx)
+; X86-NEXT: movl (%esp), %eax # 4-byte Reload
+; X86-NEXT: movl %eax, 24(%ecx)
+; X86-NEXT: movl %ebp, 20(%ecx)
+; X86-NEXT: movl %ebx, 16(%ecx)
+; X86-NEXT: movl %edi, 12(%ecx)
+; X86-NEXT: movl %esi, 8(%ecx)
+; X86-NEXT: movl %edx, 4(%ecx)
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT: movl %eax, (%ecx)
+; X86-NEXT: movl %ecx, %eax
+; X86-NEXT: addl $8, %esp
+; X86-NEXT: popl %esi
+; X86-NEXT: popl %edi
+; X86-NEXT: popl %ebx
+; X86-NEXT: popl %ebp
+; X86-NEXT: retl $4
+ %r = call <8 x i32> @llvm.umax.v8i32(<8 x i32> %a, <8 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>)
+ ret <8 x i32> %r
+}
+
define <8 x i16> @test_v8i16(<8 x i16> %a, <8 x i16> %b) nounwind {
; SSE-LABEL: test_v8i16:
; SSE: # %bb.0:
@@ -612,6 +942,74 @@ define <8 x i16> @test_v8i16(<8 x i16> %a, <8 x i16> %b) nounwind {
ret <8 x i16> %r
}
+define <8 x i16> @test_v8i16_1(<8 x i16> %a, <8 x i16> %b) nounwind {
+; SSE-LABEL: test_v8i16_1:
+; SSE: # %bb.0:
+; SSE-NEXT: movdqa {{.*#+}} xmm1 = [1,1,1,1,1,1,1,1]
+; SSE-NEXT: psubusw %xmm0, %xmm1
+; SSE-NEXT: paddw %xmm1, %xmm0
+; SSE-NEXT: retq
+;
+; AVX-LABEL: test_v8i16_1:
+; AVX: # %bb.0:
+; AVX-NEXT: vpmaxuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; AVX-NEXT: retq
+;
+; X86-LABEL: test_v8i16_1:
+; X86: # %bb.0:
+; X86-NEXT: pushl %ebp
+; X86-NEXT: pushl %ebx
+; X86-NEXT: pushl %edi
+; X86-NEXT: pushl %esi
+; X86-NEXT: subl $8, %esp
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
+; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ebp
+; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
+; X86-NEXT: cmpw $1, %dx
+; X86-NEXT: adcl $0, %edx
+; X86-NEXT: cmpw $1, %bp
+; X86-NEXT: adcl $0, %ebp
+; X86-NEXT: cmpw $1, %bx
+; X86-NEXT: adcl $0, %ebx
+; X86-NEXT: cmpw $1, %di
+; X86-NEXT: adcl $0, %edi
+; X86-NEXT: cmpw $1, %si
+; X86-NEXT: adcl $0, %esi
+; X86-NEXT: cmpw $1, %cx
+; X86-NEXT: adcl $0, %ecx
+; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: cmpw $1, %ax
+; X86-NEXT: adcl $0, %eax
+; X86-NEXT: movl %eax, (%esp) # 4-byte Spill
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: cmpw $1, %ax
+; X86-NEXT: adcl $0, %eax
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: movw %ax, 14(%ecx)
+; X86-NEXT: movl (%esp), %eax # 4-byte Reload
+; X86-NEXT: movw %ax, 12(%ecx)
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT: movw %ax, 10(%ecx)
+; X86-NEXT: movw %si, 8(%ecx)
+; X86-NEXT: movw %di, 6(%ecx)
+; X86-NEXT: movw %bx, 4(%ecx)
+; X86-NEXT: movw %bp, 2(%ecx)
+; X86-NEXT: movw %dx, (%ecx)
+; X86-NEXT: movl %ecx, %eax
+; X86-NEXT: addl $8, %esp
+; X86-NEXT: popl %esi
+; X86-NEXT: popl %edi
+; X86-NEXT: popl %ebx
+; X86-NEXT: popl %ebp
+; X86-NEXT: retl $4
+ %r = call <8 x i16> @llvm.umax.v8i16(<8 x i16> %a, <8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>)
+ ret <8 x i16> %r
+}
+
define <16 x i8> @test_v16i8(<16 x i8> %a, <16 x i8> %b) nounwind {
; SSE-LABEL: test_v16i8:
; SSE: # %bb.0:
@@ -747,6 +1145,125 @@ define <16 x i8> @test_v16i8(<16 x i8> %a, <16 x i8> %b) nounwind {
ret <16 x i8> %r
}
+define <16 x i8> @test_v16i8_1(<16 x i8> %a, <16 x i8> %b) nounwind {
+; SSE-LABEL: test_v16i8_1:
+; SSE: # %bb.0:
+; SSE-NEXT: pmaxub {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; SSE-NEXT: retq
+;
+; AVX-LABEL: test_v16i8_1:
+; AVX: # %bb.0:
+; AVX-NEXT: vpmaxub {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; AVX-NEXT: retq
+;
+; X86-LABEL: test_v16i8_1:
+; X86: # %bb.0:
+; X86-NEXT: pushl %ebp
+; X86-NEXT: pushl %ebx
+; X86-NEXT: pushl %edi
+; X86-NEXT: pushl %esi
+; X86-NEXT: subl $40, %esp
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx
+; X86-NEXT: cmpb $1, %bl
+; X86-NEXT: adcl $0, %ebx
+; X86-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: cmpb $1, %dl
+; X86-NEXT: adcl $0, %edx
+; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: cmpb $1, %cl
+; X86-NEXT: adcl $0, %ecx
+; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: cmpb $1, %al
+; X86-NEXT: adcl $0, %eax
+; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: cmpb $1, %al
+; X86-NEXT: adcl $0, %eax
+; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: cmpb $1, %al
+; X86-NEXT: adcl $0, %eax
+; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: cmpb $1, %al
+; X86-NEXT: adcl $0, %eax
+; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: cmpb $1, %al
+; X86-NEXT: adcl $0, %eax
+; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: cmpb $1, %al
+; X86-NEXT: adcl $0, %eax
+; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: cmpb $1, %al
+; X86-NEXT: adcl $0, %eax
+; X86-NEXT: movl %eax, (%esp) # 4-byte Spill
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: cmpb $1, %al
+; X86-NEXT: adcl $0, %eax
+; X86-NEXT: movl %eax, %ebp
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: cmpb $1, %al
+; X86-NEXT: adcl $0, %eax
+; X86-NEXT: movl %eax, %edi
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: cmpb $1, %al
+; X86-NEXT: adcl $0, %eax
+; X86-NEXT: movl %eax, %esi
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx
+; X86-NEXT: cmpb $1, %bl
+; X86-NEXT: adcl $0, %ebx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
+; X86-NEXT: cmpb $1, %dl
+; X86-NEXT: adcl $0, %edx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: cmpb $1, %cl
+; X86-NEXT: adcl $0, %ecx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movb %cl, 15(%eax)
+; X86-NEXT: movb %dl, 14(%eax)
+; X86-NEXT: movb %bl, 13(%eax)
+; X86-NEXT: movl %esi, %ecx
+; X86-NEXT: movb %cl, 12(%eax)
+; X86-NEXT: movl %edi, %ecx
+; X86-NEXT: movb %cl, 11(%eax)
+; X86-NEXT: movl %ebp, %ecx
+; X86-NEXT: movb %cl, 10(%eax)
+; X86-NEXT: movl (%esp), %ecx # 4-byte Reload
+; X86-NEXT: movb %cl, 9(%eax)
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT: movb %cl, 8(%eax)
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT: movb %cl, 7(%eax)
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT: movb %cl, 6(%eax)
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT: movb %cl, 5(%eax)
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT: movb %cl, 4(%eax)
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT: movb %cl, 3(%eax)
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT: movb %cl, 2(%eax)
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT: movb %cl, 1(%eax)
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT: movb %cl, (%eax)
+; X86-NEXT: addl $40, %esp
+; X86-NEXT: popl %esi
+; X86-NEXT: popl %edi
+; X86-NEXT: popl %ebx
+; X86-NEXT: popl %ebp
+; X86-NEXT: retl $4
+ %r = call <16 x i8> @llvm.umax.v16i8(<16 x i8> %a, <16 x i8> <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>)
+ ret <16 x i8> %r
+}
+
define i16 @test_signbits_i16(i16 %a, i16 %b) nounwind {
; X64-LABEL: test_signbits_i16:
; X64: # %bb.0:
More information about the llvm-commits
mailing list