[llvm] r265925 - [X86] Added widening tests for and/xor/or bit operations
Simon Pilgrim via llvm-commits
llvm-commits at lists.llvm.org
Mon Apr 11 03:16:28 PDT 2016
Author: rksimon
Date: Mon Apr 11 05:16:27 2016
New Revision: 265925
URL: http://llvm.org/viewvc/llvm-project?rev=265925&view=rev
Log:
[X86] Added widening tests for and/xor/or bit operations
Part of additional tests requested for D18944
Added:
llvm/trunk/test/CodeGen/X86/widen_bitops-0.ll
Added: llvm/trunk/test/CodeGen/X86/widen_bitops-0.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/widen_bitops-0.ll?rev=265925&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/X86/widen_bitops-0.ll (added)
+++ llvm/trunk/test/CodeGen/X86/widen_bitops-0.ll Mon Apr 11 05:16:27 2016
@@ -0,0 +1,663 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=i686-unknown -mattr=+sse4.2 | FileCheck %s --check-prefix=X32-SSE --check-prefix=X32-SSE42
+; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+sse4.2 | FileCheck %s --check-prefix=X64-SSE --check-prefix=X64-SSE42
+
+define i24 @and_i24_as_v3i8(i24 %a, i24 %b) nounwind {
+; X32-SSE-LABEL: and_i24_as_v3i8:
+; X32-SSE: # BB#0:
+; X32-SSE-NEXT: subl $12, %esp
+; X32-SSE-NEXT: pmovzxbd {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero
+; X32-SSE-NEXT: pmovzxbd {{.*#+}} xmm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero
+; X32-SSE-NEXT: pand %xmm0, %xmm1
+; X32-SSE-NEXT: pshufb {{.*#+}} xmm1 = xmm1[0,4,8,12,u,u,u,u,u,u,u,u,u,u,u,u]
+; X32-SSE-NEXT: movd %xmm1, %eax
+; X32-SSE-NEXT: addl $12, %esp
+; X32-SSE-NEXT: retl
+;
+; X64-SSE-LABEL: and_i24_as_v3i8:
+; X64-SSE: # BB#0:
+; X64-SSE-NEXT: movd %esi, %xmm0
+; X64-SSE-NEXT: pmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
+; X64-SSE-NEXT: movd %edi, %xmm1
+; X64-SSE-NEXT: pmovzxbd {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero
+; X64-SSE-NEXT: pand %xmm0, %xmm1
+; X64-SSE-NEXT: pshufb {{.*#+}} xmm1 = xmm1[0,4,8,12,u,u,u,u,u,u,u,u,u,u,u,u]
+; X64-SSE-NEXT: movd %xmm1, %eax
+; X64-SSE-NEXT: retq
+ %1 = bitcast i24 %a to <3 x i8>
+ %2 = bitcast i24 %b to <3 x i8>
+ %3 = and <3 x i8> %1, %2
+ %4 = bitcast <3 x i8> %3 to i24
+ ret i24 %4
+}
+
+define i24 @xor_i24_as_v3i8(i24 %a, i24 %b) nounwind {
+; X32-SSE-LABEL: xor_i24_as_v3i8:
+; X32-SSE: # BB#0:
+; X32-SSE-NEXT: subl $12, %esp
+; X32-SSE-NEXT: pmovzxbd {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero
+; X32-SSE-NEXT: pmovzxbd {{.*#+}} xmm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero
+; X32-SSE-NEXT: pxor %xmm0, %xmm1
+; X32-SSE-NEXT: pshufb {{.*#+}} xmm1 = xmm1[0,4,8,12,u,u,u,u,u,u,u,u,u,u,u,u]
+; X32-SSE-NEXT: movd %xmm1, %eax
+; X32-SSE-NEXT: addl $12, %esp
+; X32-SSE-NEXT: retl
+;
+; X64-SSE-LABEL: xor_i24_as_v3i8:
+; X64-SSE: # BB#0:
+; X64-SSE-NEXT: movd %esi, %xmm0
+; X64-SSE-NEXT: pmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
+; X64-SSE-NEXT: movd %edi, %xmm1
+; X64-SSE-NEXT: pmovzxbd {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero
+; X64-SSE-NEXT: pxor %xmm0, %xmm1
+; X64-SSE-NEXT: pshufb {{.*#+}} xmm1 = xmm1[0,4,8,12,u,u,u,u,u,u,u,u,u,u,u,u]
+; X64-SSE-NEXT: movd %xmm1, %eax
+; X64-SSE-NEXT: retq
+ %1 = bitcast i24 %a to <3 x i8>
+ %2 = bitcast i24 %b to <3 x i8>
+ %3 = xor <3 x i8> %1, %2
+ %4 = bitcast <3 x i8> %3 to i24
+ ret i24 %4
+}
+
+define i24 @or_i24_as_v3i8(i24 %a, i24 %b) nounwind {
+; X32-SSE-LABEL: or_i24_as_v3i8:
+; X32-SSE: # BB#0:
+; X32-SSE-NEXT: subl $12, %esp
+; X32-SSE-NEXT: pmovzxbd {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero
+; X32-SSE-NEXT: pmovzxbd {{.*#+}} xmm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero
+; X32-SSE-NEXT: por %xmm0, %xmm1
+; X32-SSE-NEXT: pshufb {{.*#+}} xmm1 = xmm1[0,4,8,12,u,u,u,u,u,u,u,u,u,u,u,u]
+; X32-SSE-NEXT: movd %xmm1, %eax
+; X32-SSE-NEXT: addl $12, %esp
+; X32-SSE-NEXT: retl
+;
+; X64-SSE-LABEL: or_i24_as_v3i8:
+; X64-SSE: # BB#0:
+; X64-SSE-NEXT: movd %esi, %xmm0
+; X64-SSE-NEXT: pmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
+; X64-SSE-NEXT: movd %edi, %xmm1
+; X64-SSE-NEXT: pmovzxbd {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero
+; X64-SSE-NEXT: por %xmm0, %xmm1
+; X64-SSE-NEXT: pshufb {{.*#+}} xmm1 = xmm1[0,4,8,12,u,u,u,u,u,u,u,u,u,u,u,u]
+; X64-SSE-NEXT: movd %xmm1, %eax
+; X64-SSE-NEXT: retq
+ %1 = bitcast i24 %a to <3 x i8>
+ %2 = bitcast i24 %b to <3 x i8>
+ %3 = or <3 x i8> %1, %2
+ %4 = bitcast <3 x i8> %3 to i24
+ ret i24 %4
+}
+
+define <8 x i3> @and_v8i3_as_i24(<8 x i3> %a, <8 x i3> %b) nounwind {
+; X32-SSE-LABEL: and_v8i3_as_i24:
+; X32-SSE: # BB#0:
+; X32-SSE-NEXT: pushl %ebp
+; X32-SSE-NEXT: movl %esp, %ebp
+; X32-SSE-NEXT: andl $-8, %esp
+; X32-SSE-NEXT: subl $24, %esp
+; X32-SSE-NEXT: pextrw $7, %xmm1, %eax
+; X32-SSE-NEXT: andl $15, %eax
+; X32-SSE-NEXT: movb %al, (%esp)
+; X32-SSE-NEXT: pextrw $6, %xmm1, %eax
+; X32-SSE-NEXT: andl $15, %eax
+; X32-SSE-NEXT: movb %al, (%esp)
+; X32-SSE-NEXT: pextrw $5, %xmm1, %eax
+; X32-SSE-NEXT: andl $15, %eax
+; X32-SSE-NEXT: movb %al, (%esp)
+; X32-SSE-NEXT: pextrw $4, %xmm1, %eax
+; X32-SSE-NEXT: andl $15, %eax
+; X32-SSE-NEXT: movb %al, (%esp)
+; X32-SSE-NEXT: pextrw $3, %xmm1, %eax
+; X32-SSE-NEXT: andl $15, %eax
+; X32-SSE-NEXT: movb %al, (%esp)
+; X32-SSE-NEXT: pextrw $2, %xmm1, %eax
+; X32-SSE-NEXT: andl $15, %eax
+; X32-SSE-NEXT: movb %al, (%esp)
+; X32-SSE-NEXT: pextrw $1, %xmm1, %eax
+; X32-SSE-NEXT: andl $15, %eax
+; X32-SSE-NEXT: movb %al, (%esp)
+; X32-SSE-NEXT: movd %xmm1, %eax
+; X32-SSE-NEXT: andl $15, %eax
+; X32-SSE-NEXT: movb %al, (%esp)
+; X32-SSE-NEXT: pextrw $7, %xmm0, %eax
+; X32-SSE-NEXT: andl $15, %eax
+; X32-SSE-NEXT: movb %al, {{[0-9]+}}(%esp)
+; X32-SSE-NEXT: pextrw $6, %xmm0, %eax
+; X32-SSE-NEXT: andl $15, %eax
+; X32-SSE-NEXT: movb %al, {{[0-9]+}}(%esp)
+; X32-SSE-NEXT: pextrw $5, %xmm0, %eax
+; X32-SSE-NEXT: andl $15, %eax
+; X32-SSE-NEXT: movb %al, {{[0-9]+}}(%esp)
+; X32-SSE-NEXT: pextrw $4, %xmm0, %eax
+; X32-SSE-NEXT: andl $15, %eax
+; X32-SSE-NEXT: movb %al, {{[0-9]+}}(%esp)
+; X32-SSE-NEXT: pextrw $3, %xmm0, %eax
+; X32-SSE-NEXT: andl $15, %eax
+; X32-SSE-NEXT: movb %al, {{[0-9]+}}(%esp)
+; X32-SSE-NEXT: pextrw $2, %xmm0, %eax
+; X32-SSE-NEXT: andl $15, %eax
+; X32-SSE-NEXT: movb %al, {{[0-9]+}}(%esp)
+; X32-SSE-NEXT: pextrw $1, %xmm0, %eax
+; X32-SSE-NEXT: andl $15, %eax
+; X32-SSE-NEXT: movb %al, {{[0-9]+}}(%esp)
+; X32-SSE-NEXT: movd %xmm0, %eax
+; X32-SSE-NEXT: andl $15, %eax
+; X32-SSE-NEXT: movb %al, {{[0-9]+}}(%esp)
+; X32-SSE-NEXT: movzbl {{[0-9]+}}(%esp), %eax
+; X32-SSE-NEXT: shll $16, %eax
+; X32-SSE-NEXT: movzwl (%esp), %ecx
+; X32-SSE-NEXT: orl %eax, %ecx
+; X32-SSE-NEXT: movzbl {{[0-9]+}}(%esp), %edx
+; X32-SSE-NEXT: shll $16, %edx
+; X32-SSE-NEXT: movzwl {{[0-9]+}}(%esp), %eax
+; X32-SSE-NEXT: orl %edx, %eax
+; X32-SSE-NEXT: andl %ecx, %eax
+; X32-SSE-NEXT: movw %ax, {{[0-9]+}}(%esp)
+; X32-SSE-NEXT: shrl $16, %eax
+; X32-SSE-NEXT: movb %al, {{[0-9]+}}(%esp)
+; X32-SSE-NEXT: movzwl {{[0-9]+}}(%esp), %eax
+; X32-SSE-NEXT: movl %eax, %ecx
+; X32-SSE-NEXT: shrl $3, %ecx
+; X32-SSE-NEXT: andl $7, %ecx
+; X32-SSE-NEXT: movl %eax, %edx
+; X32-SSE-NEXT: andl $7, %edx
+; X32-SSE-NEXT: movd %edx, %xmm1
+; X32-SSE-NEXT: pinsrw $1, %ecx, %xmm1
+; X32-SSE-NEXT: movl %eax, %ecx
+; X32-SSE-NEXT: shrl $6, %ecx
+; X32-SSE-NEXT: andl $7, %ecx
+; X32-SSE-NEXT: pinsrw $2, %ecx, %xmm1
+; X32-SSE-NEXT: movl %eax, %ecx
+; X32-SSE-NEXT: shrl $9, %ecx
+; X32-SSE-NEXT: andl $7, %ecx
+; X32-SSE-NEXT: pinsrw $3, %ecx, %xmm1
+; X32-SSE-NEXT: movl %eax, %ecx
+; X32-SSE-NEXT: shrl $12, %ecx
+; X32-SSE-NEXT: andl $7, %ecx
+; X32-SSE-NEXT: pinsrw $4, %ecx, %xmm1
+; X32-SSE-NEXT: shrl $15, %eax
+; X32-SSE-NEXT: pinsrw $5, %eax, %xmm1
+; X32-SSE-NEXT: pxor %xmm0, %xmm0
+; X32-SSE-NEXT: pblendw {{.*#+}} xmm0 = xmm1[0,1,2,3,4,5],xmm0[6,7]
+; X32-SSE-NEXT: movl %ebp, %esp
+; X32-SSE-NEXT: popl %ebp
+; X32-SSE-NEXT: retl
+;
+; X64-SSE-LABEL: and_v8i3_as_i24:
+; X64-SSE: # BB#0:
+; X64-SSE-NEXT: pextrw $7, %xmm1, %eax
+; X64-SSE-NEXT: andl $15, %eax
+; X64-SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
+; X64-SSE-NEXT: pextrw $6, %xmm1, %eax
+; X64-SSE-NEXT: andl $15, %eax
+; X64-SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
+; X64-SSE-NEXT: pextrw $5, %xmm1, %eax
+; X64-SSE-NEXT: andl $15, %eax
+; X64-SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
+; X64-SSE-NEXT: pextrw $4, %xmm1, %eax
+; X64-SSE-NEXT: andl $15, %eax
+; X64-SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
+; X64-SSE-NEXT: pextrw $3, %xmm1, %eax
+; X64-SSE-NEXT: andl $15, %eax
+; X64-SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
+; X64-SSE-NEXT: pextrw $2, %xmm1, %eax
+; X64-SSE-NEXT: andl $15, %eax
+; X64-SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
+; X64-SSE-NEXT: pextrw $1, %xmm1, %eax
+; X64-SSE-NEXT: andl $15, %eax
+; X64-SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
+; X64-SSE-NEXT: movd %xmm1, %eax
+; X64-SSE-NEXT: andl $15, %eax
+; X64-SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
+; X64-SSE-NEXT: pextrw $7, %xmm0, %eax
+; X64-SSE-NEXT: andl $15, %eax
+; X64-SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
+; X64-SSE-NEXT: pextrw $6, %xmm0, %eax
+; X64-SSE-NEXT: andl $15, %eax
+; X64-SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
+; X64-SSE-NEXT: pextrw $5, %xmm0, %eax
+; X64-SSE-NEXT: andl $15, %eax
+; X64-SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
+; X64-SSE-NEXT: pextrw $4, %xmm0, %eax
+; X64-SSE-NEXT: andl $15, %eax
+; X64-SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
+; X64-SSE-NEXT: pextrw $3, %xmm0, %eax
+; X64-SSE-NEXT: andl $15, %eax
+; X64-SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
+; X64-SSE-NEXT: pextrw $2, %xmm0, %eax
+; X64-SSE-NEXT: andl $15, %eax
+; X64-SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
+; X64-SSE-NEXT: pextrw $1, %xmm0, %eax
+; X64-SSE-NEXT: andl $15, %eax
+; X64-SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
+; X64-SSE-NEXT: movd %xmm0, %eax
+; X64-SSE-NEXT: andl $15, %eax
+; X64-SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
+; X64-SSE-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
+; X64-SSE-NEXT: shll $16, %eax
+; X64-SSE-NEXT: movzwl -{{[0-9]+}}(%rsp), %ecx
+; X64-SSE-NEXT: orl %eax, %ecx
+; X64-SSE-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
+; X64-SSE-NEXT: shll $16, %eax
+; X64-SSE-NEXT: movzwl -{{[0-9]+}}(%rsp), %edx
+; X64-SSE-NEXT: orl %eax, %edx
+; X64-SSE-NEXT: andl %ecx, %edx
+; X64-SSE-NEXT: movw %dx, -{{[0-9]+}}(%rsp)
+; X64-SSE-NEXT: shrl $16, %edx
+; X64-SSE-NEXT: movb %dl, -{{[0-9]+}}(%rsp)
+; X64-SSE-NEXT: movzwl -{{[0-9]+}}(%rsp), %eax
+; X64-SSE-NEXT: movl %eax, %ecx
+; X64-SSE-NEXT: shrl $3, %ecx
+; X64-SSE-NEXT: andl $7, %ecx
+; X64-SSE-NEXT: movl %eax, %edx
+; X64-SSE-NEXT: andl $7, %edx
+; X64-SSE-NEXT: movd %edx, %xmm0
+; X64-SSE-NEXT: pinsrw $1, %ecx, %xmm0
+; X64-SSE-NEXT: movl %eax, %ecx
+; X64-SSE-NEXT: shrl $6, %ecx
+; X64-SSE-NEXT: andl $7, %ecx
+; X64-SSE-NEXT: pinsrw $2, %ecx, %xmm0
+; X64-SSE-NEXT: movl %eax, %ecx
+; X64-SSE-NEXT: shrl $9, %ecx
+; X64-SSE-NEXT: andl $7, %ecx
+; X64-SSE-NEXT: pinsrw $3, %ecx, %xmm0
+; X64-SSE-NEXT: movl %eax, %ecx
+; X64-SSE-NEXT: shrl $12, %ecx
+; X64-SSE-NEXT: andl $7, %ecx
+; X64-SSE-NEXT: pinsrw $4, %ecx, %xmm0
+; X64-SSE-NEXT: shrl $15, %eax
+; X64-SSE-NEXT: movzwl %ax, %eax
+; X64-SSE-NEXT: pinsrw $5, %eax, %xmm0
+; X64-SSE-NEXT: xorl %eax, %eax
+; X64-SSE-NEXT: pinsrw $6, %eax, %xmm0
+; X64-SSE-NEXT: pinsrw $7, %eax, %xmm0
+; X64-SSE-NEXT: retq
+ %1 = bitcast <8 x i3> %a to i24
+ %2 = bitcast <8 x i3> %b to i24
+ %3 = and i24 %1, %2
+ %4 = bitcast i24 %3 to <8 x i3>
+ ret <8 x i3> %4
+}
+
+define <8 x i3> @xor_v8i3_as_i24(<8 x i3> %a, <8 x i3> %b) nounwind {
+; X32-SSE-LABEL: xor_v8i3_as_i24:
+; X32-SSE: # BB#0:
+; X32-SSE-NEXT: pushl %ebp
+; X32-SSE-NEXT: movl %esp, %ebp
+; X32-SSE-NEXT: andl $-8, %esp
+; X32-SSE-NEXT: subl $24, %esp
+; X32-SSE-NEXT: pextrw $7, %xmm1, %eax
+; X32-SSE-NEXT: andl $15, %eax
+; X32-SSE-NEXT: movb %al, (%esp)
+; X32-SSE-NEXT: pextrw $6, %xmm1, %eax
+; X32-SSE-NEXT: andl $15, %eax
+; X32-SSE-NEXT: movb %al, (%esp)
+; X32-SSE-NEXT: pextrw $5, %xmm1, %eax
+; X32-SSE-NEXT: andl $15, %eax
+; X32-SSE-NEXT: movb %al, (%esp)
+; X32-SSE-NEXT: pextrw $4, %xmm1, %eax
+; X32-SSE-NEXT: andl $15, %eax
+; X32-SSE-NEXT: movb %al, (%esp)
+; X32-SSE-NEXT: pextrw $3, %xmm1, %eax
+; X32-SSE-NEXT: andl $15, %eax
+; X32-SSE-NEXT: movb %al, (%esp)
+; X32-SSE-NEXT: pextrw $2, %xmm1, %eax
+; X32-SSE-NEXT: andl $15, %eax
+; X32-SSE-NEXT: movb %al, (%esp)
+; X32-SSE-NEXT: pextrw $1, %xmm1, %eax
+; X32-SSE-NEXT: andl $15, %eax
+; X32-SSE-NEXT: movb %al, (%esp)
+; X32-SSE-NEXT: movd %xmm1, %eax
+; X32-SSE-NEXT: andl $15, %eax
+; X32-SSE-NEXT: movb %al, (%esp)
+; X32-SSE-NEXT: pextrw $7, %xmm0, %eax
+; X32-SSE-NEXT: andl $15, %eax
+; X32-SSE-NEXT: movb %al, {{[0-9]+}}(%esp)
+; X32-SSE-NEXT: pextrw $6, %xmm0, %eax
+; X32-SSE-NEXT: andl $15, %eax
+; X32-SSE-NEXT: movb %al, {{[0-9]+}}(%esp)
+; X32-SSE-NEXT: pextrw $5, %xmm0, %eax
+; X32-SSE-NEXT: andl $15, %eax
+; X32-SSE-NEXT: movb %al, {{[0-9]+}}(%esp)
+; X32-SSE-NEXT: pextrw $4, %xmm0, %eax
+; X32-SSE-NEXT: andl $15, %eax
+; X32-SSE-NEXT: movb %al, {{[0-9]+}}(%esp)
+; X32-SSE-NEXT: pextrw $3, %xmm0, %eax
+; X32-SSE-NEXT: andl $15, %eax
+; X32-SSE-NEXT: movb %al, {{[0-9]+}}(%esp)
+; X32-SSE-NEXT: pextrw $2, %xmm0, %eax
+; X32-SSE-NEXT: andl $15, %eax
+; X32-SSE-NEXT: movb %al, {{[0-9]+}}(%esp)
+; X32-SSE-NEXT: pextrw $1, %xmm0, %eax
+; X32-SSE-NEXT: andl $15, %eax
+; X32-SSE-NEXT: movb %al, {{[0-9]+}}(%esp)
+; X32-SSE-NEXT: movd %xmm0, %eax
+; X32-SSE-NEXT: andl $15, %eax
+; X32-SSE-NEXT: movb %al, {{[0-9]+}}(%esp)
+; X32-SSE-NEXT: movzbl {{[0-9]+}}(%esp), %eax
+; X32-SSE-NEXT: shll $16, %eax
+; X32-SSE-NEXT: movzwl (%esp), %ecx
+; X32-SSE-NEXT: orl %eax, %ecx
+; X32-SSE-NEXT: movzbl {{[0-9]+}}(%esp), %edx
+; X32-SSE-NEXT: shll $16, %edx
+; X32-SSE-NEXT: movzwl {{[0-9]+}}(%esp), %eax
+; X32-SSE-NEXT: orl %edx, %eax
+; X32-SSE-NEXT: xorl %ecx, %eax
+; X32-SSE-NEXT: movw %ax, {{[0-9]+}}(%esp)
+; X32-SSE-NEXT: shrl $16, %eax
+; X32-SSE-NEXT: movb %al, {{[0-9]+}}(%esp)
+; X32-SSE-NEXT: movzwl {{[0-9]+}}(%esp), %eax
+; X32-SSE-NEXT: movl %eax, %ecx
+; X32-SSE-NEXT: shrl $3, %ecx
+; X32-SSE-NEXT: andl $7, %ecx
+; X32-SSE-NEXT: movl %eax, %edx
+; X32-SSE-NEXT: andl $7, %edx
+; X32-SSE-NEXT: movd %edx, %xmm1
+; X32-SSE-NEXT: pinsrw $1, %ecx, %xmm1
+; X32-SSE-NEXT: movl %eax, %ecx
+; X32-SSE-NEXT: shrl $6, %ecx
+; X32-SSE-NEXT: andl $7, %ecx
+; X32-SSE-NEXT: pinsrw $2, %ecx, %xmm1
+; X32-SSE-NEXT: movl %eax, %ecx
+; X32-SSE-NEXT: shrl $9, %ecx
+; X32-SSE-NEXT: andl $7, %ecx
+; X32-SSE-NEXT: pinsrw $3, %ecx, %xmm1
+; X32-SSE-NEXT: movl %eax, %ecx
+; X32-SSE-NEXT: shrl $12, %ecx
+; X32-SSE-NEXT: andl $7, %ecx
+; X32-SSE-NEXT: pinsrw $4, %ecx, %xmm1
+; X32-SSE-NEXT: shrl $15, %eax
+; X32-SSE-NEXT: pinsrw $5, %eax, %xmm1
+; X32-SSE-NEXT: pxor %xmm0, %xmm0
+; X32-SSE-NEXT: pblendw {{.*#+}} xmm0 = xmm1[0,1,2,3,4,5],xmm0[6,7]
+; X32-SSE-NEXT: movl %ebp, %esp
+; X32-SSE-NEXT: popl %ebp
+; X32-SSE-NEXT: retl
+;
+; X64-SSE-LABEL: xor_v8i3_as_i24:
+; X64-SSE: # BB#0:
+; X64-SSE-NEXT: pextrw $7, %xmm1, %eax
+; X64-SSE-NEXT: andl $15, %eax
+; X64-SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
+; X64-SSE-NEXT: pextrw $6, %xmm1, %eax
+; X64-SSE-NEXT: andl $15, %eax
+; X64-SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
+; X64-SSE-NEXT: pextrw $5, %xmm1, %eax
+; X64-SSE-NEXT: andl $15, %eax
+; X64-SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
+; X64-SSE-NEXT: pextrw $4, %xmm1, %eax
+; X64-SSE-NEXT: andl $15, %eax
+; X64-SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
+; X64-SSE-NEXT: pextrw $3, %xmm1, %eax
+; X64-SSE-NEXT: andl $15, %eax
+; X64-SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
+; X64-SSE-NEXT: pextrw $2, %xmm1, %eax
+; X64-SSE-NEXT: andl $15, %eax
+; X64-SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
+; X64-SSE-NEXT: pextrw $1, %xmm1, %eax
+; X64-SSE-NEXT: andl $15, %eax
+; X64-SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
+; X64-SSE-NEXT: movd %xmm1, %eax
+; X64-SSE-NEXT: andl $15, %eax
+; X64-SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
+; X64-SSE-NEXT: pextrw $7, %xmm0, %eax
+; X64-SSE-NEXT: andl $15, %eax
+; X64-SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
+; X64-SSE-NEXT: pextrw $6, %xmm0, %eax
+; X64-SSE-NEXT: andl $15, %eax
+; X64-SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
+; X64-SSE-NEXT: pextrw $5, %xmm0, %eax
+; X64-SSE-NEXT: andl $15, %eax
+; X64-SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
+; X64-SSE-NEXT: pextrw $4, %xmm0, %eax
+; X64-SSE-NEXT: andl $15, %eax
+; X64-SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
+; X64-SSE-NEXT: pextrw $3, %xmm0, %eax
+; X64-SSE-NEXT: andl $15, %eax
+; X64-SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
+; X64-SSE-NEXT: pextrw $2, %xmm0, %eax
+; X64-SSE-NEXT: andl $15, %eax
+; X64-SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
+; X64-SSE-NEXT: pextrw $1, %xmm0, %eax
+; X64-SSE-NEXT: andl $15, %eax
+; X64-SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
+; X64-SSE-NEXT: movd %xmm0, %eax
+; X64-SSE-NEXT: andl $15, %eax
+; X64-SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
+; X64-SSE-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
+; X64-SSE-NEXT: shll $16, %eax
+; X64-SSE-NEXT: movzwl -{{[0-9]+}}(%rsp), %ecx
+; X64-SSE-NEXT: orl %eax, %ecx
+; X64-SSE-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
+; X64-SSE-NEXT: shll $16, %eax
+; X64-SSE-NEXT: movzwl -{{[0-9]+}}(%rsp), %edx
+; X64-SSE-NEXT: orl %eax, %edx
+; X64-SSE-NEXT: xorl %ecx, %edx
+; X64-SSE-NEXT: movw %dx, -{{[0-9]+}}(%rsp)
+; X64-SSE-NEXT: shrl $16, %edx
+; X64-SSE-NEXT: movb %dl, -{{[0-9]+}}(%rsp)
+; X64-SSE-NEXT: movzwl -{{[0-9]+}}(%rsp), %eax
+; X64-SSE-NEXT: movl %eax, %ecx
+; X64-SSE-NEXT: shrl $3, %ecx
+; X64-SSE-NEXT: andl $7, %ecx
+; X64-SSE-NEXT: movl %eax, %edx
+; X64-SSE-NEXT: andl $7, %edx
+; X64-SSE-NEXT: movd %edx, %xmm0
+; X64-SSE-NEXT: pinsrw $1, %ecx, %xmm0
+; X64-SSE-NEXT: movl %eax, %ecx
+; X64-SSE-NEXT: shrl $6, %ecx
+; X64-SSE-NEXT: andl $7, %ecx
+; X64-SSE-NEXT: pinsrw $2, %ecx, %xmm0
+; X64-SSE-NEXT: movl %eax, %ecx
+; X64-SSE-NEXT: shrl $9, %ecx
+; X64-SSE-NEXT: andl $7, %ecx
+; X64-SSE-NEXT: pinsrw $3, %ecx, %xmm0
+; X64-SSE-NEXT: movl %eax, %ecx
+; X64-SSE-NEXT: shrl $12, %ecx
+; X64-SSE-NEXT: andl $7, %ecx
+; X64-SSE-NEXT: pinsrw $4, %ecx, %xmm0
+; X64-SSE-NEXT: shrl $15, %eax
+; X64-SSE-NEXT: movzwl %ax, %eax
+; X64-SSE-NEXT: pinsrw $5, %eax, %xmm0
+; X64-SSE-NEXT: xorl %eax, %eax
+; X64-SSE-NEXT: pinsrw $6, %eax, %xmm0
+; X64-SSE-NEXT: pinsrw $7, %eax, %xmm0
+; X64-SSE-NEXT: retq
+ %1 = bitcast <8 x i3> %a to i24
+ %2 = bitcast <8 x i3> %b to i24
+ %3 = xor i24 %1, %2
+ %4 = bitcast i24 %3 to <8 x i3>
+ ret <8 x i3> %4
+}
+
+define <8 x i3> @or_v8i3_as_i24(<8 x i3> %a, <8 x i3> %b) nounwind {
+; X32-SSE-LABEL: or_v8i3_as_i24:
+; X32-SSE: # BB#0:
+; X32-SSE-NEXT: pushl %ebp
+; X32-SSE-NEXT: movl %esp, %ebp
+; X32-SSE-NEXT: andl $-8, %esp
+; X32-SSE-NEXT: subl $24, %esp
+; X32-SSE-NEXT: pextrw $7, %xmm1, %eax
+; X32-SSE-NEXT: andl $15, %eax
+; X32-SSE-NEXT: movb %al, (%esp)
+; X32-SSE-NEXT: pextrw $6, %xmm1, %eax
+; X32-SSE-NEXT: andl $15, %eax
+; X32-SSE-NEXT: movb %al, (%esp)
+; X32-SSE-NEXT: pextrw $5, %xmm1, %eax
+; X32-SSE-NEXT: andl $15, %eax
+; X32-SSE-NEXT: movb %al, (%esp)
+; X32-SSE-NEXT: pextrw $4, %xmm1, %eax
+; X32-SSE-NEXT: andl $15, %eax
+; X32-SSE-NEXT: movb %al, (%esp)
+; X32-SSE-NEXT: pextrw $3, %xmm1, %eax
+; X32-SSE-NEXT: andl $15, %eax
+; X32-SSE-NEXT: movb %al, (%esp)
+; X32-SSE-NEXT: pextrw $2, %xmm1, %eax
+; X32-SSE-NEXT: andl $15, %eax
+; X32-SSE-NEXT: movb %al, (%esp)
+; X32-SSE-NEXT: pextrw $1, %xmm1, %eax
+; X32-SSE-NEXT: andl $15, %eax
+; X32-SSE-NEXT: movb %al, (%esp)
+; X32-SSE-NEXT: movd %xmm1, %eax
+; X32-SSE-NEXT: andl $15, %eax
+; X32-SSE-NEXT: movb %al, (%esp)
+; X32-SSE-NEXT: pextrw $7, %xmm0, %eax
+; X32-SSE-NEXT: andl $15, %eax
+; X32-SSE-NEXT: movb %al, {{[0-9]+}}(%esp)
+; X32-SSE-NEXT: pextrw $6, %xmm0, %eax
+; X32-SSE-NEXT: andl $15, %eax
+; X32-SSE-NEXT: movb %al, {{[0-9]+}}(%esp)
+; X32-SSE-NEXT: pextrw $5, %xmm0, %eax
+; X32-SSE-NEXT: andl $15, %eax
+; X32-SSE-NEXT: movb %al, {{[0-9]+}}(%esp)
+; X32-SSE-NEXT: pextrw $4, %xmm0, %eax
+; X32-SSE-NEXT: andl $15, %eax
+; X32-SSE-NEXT: movb %al, {{[0-9]+}}(%esp)
+; X32-SSE-NEXT: pextrw $3, %xmm0, %eax
+; X32-SSE-NEXT: andl $15, %eax
+; X32-SSE-NEXT: movb %al, {{[0-9]+}}(%esp)
+; X32-SSE-NEXT: pextrw $2, %xmm0, %eax
+; X32-SSE-NEXT: andl $15, %eax
+; X32-SSE-NEXT: movb %al, {{[0-9]+}}(%esp)
+; X32-SSE-NEXT: pextrw $1, %xmm0, %eax
+; X32-SSE-NEXT: andl $15, %eax
+; X32-SSE-NEXT: movb %al, {{[0-9]+}}(%esp)
+; X32-SSE-NEXT: movd %xmm0, %eax
+; X32-SSE-NEXT: andl $15, %eax
+; X32-SSE-NEXT: movb %al, {{[0-9]+}}(%esp)
+; X32-SSE-NEXT: movzbl {{[0-9]+}}(%esp), %eax
+; X32-SSE-NEXT: shll $16, %eax
+; X32-SSE-NEXT: movzwl (%esp), %ecx
+; X32-SSE-NEXT: orl %eax, %ecx
+; X32-SSE-NEXT: movzbl {{[0-9]+}}(%esp), %edx
+; X32-SSE-NEXT: shll $16, %edx
+; X32-SSE-NEXT: movzwl {{[0-9]+}}(%esp), %eax
+; X32-SSE-NEXT: orl %edx, %eax
+; X32-SSE-NEXT: orl %ecx, %eax
+; X32-SSE-NEXT: movw %ax, {{[0-9]+}}(%esp)
+; X32-SSE-NEXT: shrl $16, %eax
+; X32-SSE-NEXT: movb %al, {{[0-9]+}}(%esp)
+; X32-SSE-NEXT: movzwl {{[0-9]+}}(%esp), %eax
+; X32-SSE-NEXT: movl %eax, %ecx
+; X32-SSE-NEXT: shrl $3, %ecx
+; X32-SSE-NEXT: andl $7, %ecx
+; X32-SSE-NEXT: movl %eax, %edx
+; X32-SSE-NEXT: andl $7, %edx
+; X32-SSE-NEXT: movd %edx, %xmm1
+; X32-SSE-NEXT: pinsrw $1, %ecx, %xmm1
+; X32-SSE-NEXT: movl %eax, %ecx
+; X32-SSE-NEXT: shrl $6, %ecx
+; X32-SSE-NEXT: andl $7, %ecx
+; X32-SSE-NEXT: pinsrw $2, %ecx, %xmm1
+; X32-SSE-NEXT: movl %eax, %ecx
+; X32-SSE-NEXT: shrl $9, %ecx
+; X32-SSE-NEXT: andl $7, %ecx
+; X32-SSE-NEXT: pinsrw $3, %ecx, %xmm1
+; X32-SSE-NEXT: movl %eax, %ecx
+; X32-SSE-NEXT: shrl $12, %ecx
+; X32-SSE-NEXT: andl $7, %ecx
+; X32-SSE-NEXT: pinsrw $4, %ecx, %xmm1
+; X32-SSE-NEXT: shrl $15, %eax
+; X32-SSE-NEXT: pinsrw $5, %eax, %xmm1
+; X32-SSE-NEXT: pxor %xmm0, %xmm0
+; X32-SSE-NEXT: pblendw {{.*#+}} xmm0 = xmm1[0,1,2,3,4,5],xmm0[6,7]
+; X32-SSE-NEXT: movl %ebp, %esp
+; X32-SSE-NEXT: popl %ebp
+; X32-SSE-NEXT: retl
+;
+; X64-SSE-LABEL: or_v8i3_as_i24:
+; X64-SSE: # BB#0:
+; X64-SSE-NEXT: pextrw $7, %xmm1, %eax
+; X64-SSE-NEXT: andl $15, %eax
+; X64-SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
+; X64-SSE-NEXT: pextrw $6, %xmm1, %eax
+; X64-SSE-NEXT: andl $15, %eax
+; X64-SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
+; X64-SSE-NEXT: pextrw $5, %xmm1, %eax
+; X64-SSE-NEXT: andl $15, %eax
+; X64-SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
+; X64-SSE-NEXT: pextrw $4, %xmm1, %eax
+; X64-SSE-NEXT: andl $15, %eax
+; X64-SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
+; X64-SSE-NEXT: pextrw $3, %xmm1, %eax
+; X64-SSE-NEXT: andl $15, %eax
+; X64-SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
+; X64-SSE-NEXT: pextrw $2, %xmm1, %eax
+; X64-SSE-NEXT: andl $15, %eax
+; X64-SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
+; X64-SSE-NEXT: pextrw $1, %xmm1, %eax
+; X64-SSE-NEXT: andl $15, %eax
+; X64-SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
+; X64-SSE-NEXT: movd %xmm1, %eax
+; X64-SSE-NEXT: andl $15, %eax
+; X64-SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
+; X64-SSE-NEXT: pextrw $7, %xmm0, %eax
+; X64-SSE-NEXT: andl $15, %eax
+; X64-SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
+; X64-SSE-NEXT: pextrw $6, %xmm0, %eax
+; X64-SSE-NEXT: andl $15, %eax
+; X64-SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
+; X64-SSE-NEXT: pextrw $5, %xmm0, %eax
+; X64-SSE-NEXT: andl $15, %eax
+; X64-SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
+; X64-SSE-NEXT: pextrw $4, %xmm0, %eax
+; X64-SSE-NEXT: andl $15, %eax
+; X64-SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
+; X64-SSE-NEXT: pextrw $3, %xmm0, %eax
+; X64-SSE-NEXT: andl $15, %eax
+; X64-SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
+; X64-SSE-NEXT: pextrw $2, %xmm0, %eax
+; X64-SSE-NEXT: andl $15, %eax
+; X64-SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
+; X64-SSE-NEXT: pextrw $1, %xmm0, %eax
+; X64-SSE-NEXT: andl $15, %eax
+; X64-SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
+; X64-SSE-NEXT: movd %xmm0, %eax
+; X64-SSE-NEXT: andl $15, %eax
+; X64-SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
+; X64-SSE-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
+; X64-SSE-NEXT: shll $16, %eax
+; X64-SSE-NEXT: movzwl -{{[0-9]+}}(%rsp), %ecx
+; X64-SSE-NEXT: orl %eax, %ecx
+; X64-SSE-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
+; X64-SSE-NEXT: shll $16, %eax
+; X64-SSE-NEXT: movzwl -{{[0-9]+}}(%rsp), %edx
+; X64-SSE-NEXT: orl %eax, %edx
+; X64-SSE-NEXT: orl %ecx, %edx
+; X64-SSE-NEXT: movw %dx, -{{[0-9]+}}(%rsp)
+; X64-SSE-NEXT: shrl $16, %edx
+; X64-SSE-NEXT: movb %dl, -{{[0-9]+}}(%rsp)
+; X64-SSE-NEXT: movzwl -{{[0-9]+}}(%rsp), %eax
+; X64-SSE-NEXT: movl %eax, %ecx
+; X64-SSE-NEXT: shrl $3, %ecx
+; X64-SSE-NEXT: andl $7, %ecx
+; X64-SSE-NEXT: movl %eax, %edx
+; X64-SSE-NEXT: andl $7, %edx
+; X64-SSE-NEXT: movd %edx, %xmm0
+; X64-SSE-NEXT: pinsrw $1, %ecx, %xmm0
+; X64-SSE-NEXT: movl %eax, %ecx
+; X64-SSE-NEXT: shrl $6, %ecx
+; X64-SSE-NEXT: andl $7, %ecx
+; X64-SSE-NEXT: pinsrw $2, %ecx, %xmm0
+; X64-SSE-NEXT: movl %eax, %ecx
+; X64-SSE-NEXT: shrl $9, %ecx
+; X64-SSE-NEXT: andl $7, %ecx
+; X64-SSE-NEXT: pinsrw $3, %ecx, %xmm0
+; X64-SSE-NEXT: movl %eax, %ecx
+; X64-SSE-NEXT: shrl $12, %ecx
+; X64-SSE-NEXT: andl $7, %ecx
+; X64-SSE-NEXT: pinsrw $4, %ecx, %xmm0
+; X64-SSE-NEXT: shrl $15, %eax
+; X64-SSE-NEXT: movzwl %ax, %eax
+; X64-SSE-NEXT: pinsrw $5, %eax, %xmm0
+; X64-SSE-NEXT: xorl %eax, %eax
+; X64-SSE-NEXT: pinsrw $6, %eax, %xmm0
+; X64-SSE-NEXT: pinsrw $7, %eax, %xmm0
+; X64-SSE-NEXT: retq
+ %1 = bitcast <8 x i3> %a to i24
+ %2 = bitcast <8 x i3> %b to i24
+ %3 = or i24 %1, %2
+ %4 = bitcast i24 %3 to <8 x i3>
+ ret <8 x i3> %4
+}
More information about the llvm-commits
mailing list