[llvm] r265998 - [DAGCombiner] Fold xor/and/or (bitcast(A), bitcast(B)) -> bitcast(op (A, B)) anytime before LegalizeVectorOprs
Simon Pilgrim via llvm-commits
llvm-commits at lists.llvm.org
Mon Apr 11 14:10:33 PDT 2016
Author: rksimon
Date: Mon Apr 11 16:10:33 2016
New Revision: 265998
URL: http://llvm.org/viewvc/llvm-project?rev=265998&view=rev
Log:
[DAGCombiner] Fold xor/and/or (bitcast(A), bitcast(B)) -> bitcast(op (A,B)) anytime before LegalizeVectorOprs
xor/and/or (bitcast(A), bitcast(B)) -> bitcast(op (A,B)) was only being combined at the AfterLegalizeTypes stage, this patch permits the combine to occur anytime before then as well.
The main aim with this to improve the ability to recognise bitmasks that can be converted to shuffles.
I had to modify a number of AVX512 mask tests as the basic bitcast to/from scalar pattern was being stripped out, preventing testing of the mmask bitops. By replacing the bitcasts with loads we can get almost the same result.
Differential Revision: http://reviews.llvm.org/D18944
Modified:
llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
llvm/trunk/test/CodeGen/X86/avx512-mask-op.ll
llvm/trunk/test/CodeGen/X86/avx512-select.ll
llvm/trunk/test/CodeGen/X86/avx512bw-mask-op.ll
llvm/trunk/test/CodeGen/X86/avx512dq-mask-op.ll
llvm/trunk/test/CodeGen/X86/vector-shuffle-128-v4.ll
llvm/trunk/test/CodeGen/X86/vector-shuffle-128-v8.ll
llvm/trunk/test/CodeGen/X86/widen_bitops-0.ll
llvm/trunk/test/CodeGen/X86/widen_bitops-1.ll
Modified: llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp?rev=265998&r1=265997&r2=265998&view=diff
==============================================================================
--- llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp (original)
+++ llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp Mon Apr 11 16:10:33 2016
@@ -2765,7 +2765,7 @@ SDValue DAGCombiner::SimplifyBinOpWithSa
}
// Simplify xor/and/or (bitcast(A), bitcast(B)) -> bitcast(op (A,B))
- // Only perform this optimization after type legalization and before
+ // Only perform this optimization up until type legalization, before
// LegalizeVectorOprs. LegalizeVectorOprs promotes vector operations by
// adding bitcasts. For example (xor v4i32) is promoted to (v2i64), and
// we don't want to undo this promotion.
@@ -2773,7 +2773,7 @@ SDValue DAGCombiner::SimplifyBinOpWithSa
// on scalars.
if ((N0.getOpcode() == ISD::BITCAST ||
N0.getOpcode() == ISD::SCALAR_TO_VECTOR) &&
- Level == AfterLegalizeTypes) {
+ Level <= AfterLegalizeTypes) {
SDValue In0 = N0.getOperand(0);
SDValue In1 = N1.getOperand(0);
EVT In0Ty = In0.getValueType();
Modified: llvm/trunk/test/CodeGen/X86/avx512-mask-op.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512-mask-op.ll?rev=265998&r1=265997&r2=265998&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx512-mask-op.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx512-mask-op.ll Mon Apr 11 16:10:33 2016
@@ -77,15 +77,33 @@ define void @mask8_mem(i8* %ptr) {
define i16 @mand16(i16 %x, i16 %y) {
; CHECK-LABEL: mand16:
; CHECK: ## BB#0:
-; CHECK-NEXT: kmovw %edi, %k0
-; CHECK-NEXT: kmovw %esi, %k1
+; CHECK-NEXT: movl %edi, %eax
+; CHECK-NEXT: xorl %esi, %eax
+; CHECK-NEXT: andl %esi, %edi
+; CHECK-NEXT: orl %eax, %edi
+; CHECK-NEXT: movw %di, %ax
+; CHECK-NEXT: retq
+ %ma = bitcast i16 %x to <16 x i1>
+ %mb = bitcast i16 %y to <16 x i1>
+ %mc = and <16 x i1> %ma, %mb
+ %md = xor <16 x i1> %ma, %mb
+ %me = or <16 x i1> %mc, %md
+ %ret = bitcast <16 x i1> %me to i16
+ ret i16 %ret
+}
+
+define i16 @mand16_mem(<16 x i1>* %x, <16 x i1>* %y) {
+; CHECK-LABEL: mand16_mem:
+; CHECK: ## BB#0:
+; CHECK-NEXT: kmovw (%rdi), %k0
+; CHECK-NEXT: kmovw (%rsi), %k1
; CHECK-NEXT: kandw %k1, %k0, %k2
; CHECK-NEXT: kxorw %k1, %k0, %k0
; CHECK-NEXT: korw %k0, %k2, %k0
; CHECK-NEXT: kmovw %k0, %eax
; CHECK-NEXT: retq
- %ma = bitcast i16 %x to <16 x i1>
- %mb = bitcast i16 %y to <16 x i1>
+ %ma = load <16 x i1>, <16 x i1>* %x
+ %mb = load <16 x i1>, <16 x i1>* %y
%mc = and <16 x i1> %ma, %mb
%md = xor <16 x i1> %ma, %mb
%me = or <16 x i1> %mc, %md
@@ -265,13 +283,13 @@ define <16 x i8> @test8(<16 x i32>%a, <1
; KNL: ## BB#0:
; KNL-NEXT: vpxord %zmm2, %zmm2, %zmm2
; KNL-NEXT: cmpl %esi, %edi
-; KNL-NEXT: jg LBB14_1
+; KNL-NEXT: jg LBB15_1
; KNL-NEXT: ## BB#2:
; KNL-NEXT: vpcmpltud %zmm2, %zmm1, %k1
-; KNL-NEXT: jmp LBB14_3
-; KNL-NEXT: LBB14_1:
+; KNL-NEXT: jmp LBB15_3
+; KNL-NEXT: LBB15_1:
; KNL-NEXT: vpcmpgtd %zmm2, %zmm0, %k1
-; KNL-NEXT: LBB14_3:
+; KNL-NEXT: LBB15_3:
; KNL-NEXT: vpbroadcastd {{.*}}(%rip), %zmm0 {%k1} {z}
; KNL-NEXT: vpmovdb %zmm0, %xmm0
; KNL-NEXT: retq
@@ -280,12 +298,12 @@ define <16 x i8> @test8(<16 x i32>%a, <1
; SKX: ## BB#0:
; SKX-NEXT: vpxord %zmm2, %zmm2, %zmm2
; SKX-NEXT: cmpl %esi, %edi
-; SKX-NEXT: jg LBB14_1
+; SKX-NEXT: jg LBB15_1
; SKX-NEXT: ## BB#2:
; SKX-NEXT: vpcmpltud %zmm2, %zmm1, %k0
; SKX-NEXT: vpmovm2b %k0, %xmm0
; SKX-NEXT: retq
-; SKX-NEXT: LBB14_1:
+; SKX-NEXT: LBB15_1:
; SKX-NEXT: vpcmpgtd %zmm2, %zmm0, %k0
; SKX-NEXT: vpmovm2b %k0, %xmm0
; SKX-NEXT: retq
@@ -300,13 +318,13 @@ define <16 x i1> @test9(<16 x i1>%a, <16
; KNL-LABEL: test9:
; KNL: ## BB#0:
; KNL-NEXT: cmpl %esi, %edi
-; KNL-NEXT: jg LBB15_1
+; KNL-NEXT: jg LBB16_1
; KNL-NEXT: ## BB#2:
; KNL-NEXT: vpmovsxbd %xmm1, %zmm0
-; KNL-NEXT: jmp LBB15_3
-; KNL-NEXT: LBB15_1:
+; KNL-NEXT: jmp LBB16_3
+; KNL-NEXT: LBB16_1:
; KNL-NEXT: vpmovsxbd %xmm0, %zmm0
-; KNL-NEXT: LBB15_3:
+; KNL-NEXT: LBB16_3:
; KNL-NEXT: vpslld $31, %zmm0, %zmm0
; KNL-NEXT: vptestmd %zmm0, %zmm0, %k1
; KNL-NEXT: vpbroadcastd {{.*}}(%rip), %zmm0 {%k1} {z}
@@ -316,13 +334,13 @@ define <16 x i1> @test9(<16 x i1>%a, <16
; SKX-LABEL: test9:
; SKX: ## BB#0:
; SKX-NEXT: cmpl %esi, %edi
-; SKX-NEXT: jg LBB15_1
+; SKX-NEXT: jg LBB16_1
; SKX-NEXT: ## BB#2:
; SKX-NEXT: vpsllw $7, %xmm1, %xmm0
-; SKX-NEXT: jmp LBB15_3
-; SKX-NEXT: LBB15_1:
+; SKX-NEXT: jmp LBB16_3
+; SKX-NEXT: LBB16_1:
; SKX-NEXT: vpsllw $7, %xmm0, %xmm0
-; SKX-NEXT: LBB15_3:
+; SKX-NEXT: LBB16_3:
; SKX-NEXT: vpmovb2m %xmm0, %k0
; SKX-NEXT: vpmovm2b %k0, %xmm0
; SKX-NEXT: retq
@@ -339,22 +357,22 @@ define <4 x i1> @test11(<4 x i1>%a, <4 x
; KNL-LABEL: test11:
; KNL: ## BB#0:
; KNL-NEXT: cmpl %esi, %edi
-; KNL-NEXT: jg LBB17_2
+; KNL-NEXT: jg LBB18_2
; KNL-NEXT: ## BB#1:
; KNL-NEXT: vmovaps %zmm1, %zmm0
-; KNL-NEXT: LBB17_2:
+; KNL-NEXT: LBB18_2:
; KNL-NEXT: retq
;
; SKX-LABEL: test11:
; SKX: ## BB#0:
; SKX-NEXT: cmpl %esi, %edi
-; SKX-NEXT: jg LBB17_1
+; SKX-NEXT: jg LBB18_1
; SKX-NEXT: ## BB#2:
; SKX-NEXT: vpslld $31, %xmm1, %xmm0
-; SKX-NEXT: jmp LBB17_3
-; SKX-NEXT: LBB17_1:
+; SKX-NEXT: jmp LBB18_3
+; SKX-NEXT: LBB18_1:
; SKX-NEXT: vpslld $31, %xmm0, %xmm0
-; SKX-NEXT: LBB17_3:
+; SKX-NEXT: LBB18_3:
; SKX-NEXT: vptestmd %xmm0, %xmm0, %k0
; SKX-NEXT: vpmovm2d %k0, %xmm0
; SKX-NEXT: retq
@@ -794,11 +812,11 @@ define void @ktest_1(<8 x double> %in, d
; KNL-NEXT: vcmpltpd %zmm1, %zmm0, %k0 {%k1}
; KNL-NEXT: kmovw %k0, %eax
; KNL-NEXT: testb %al, %al
-; KNL-NEXT: je LBB38_2
+; KNL-NEXT: je LBB39_2
; KNL-NEXT: ## BB#1: ## %L1
; KNL-NEXT: vmovapd %zmm0, (%rdi)
; KNL-NEXT: retq
-; KNL-NEXT: LBB38_2: ## %L2
+; KNL-NEXT: LBB39_2: ## %L2
; KNL-NEXT: vmovapd %zmm0, 8(%rdi)
; KNL-NEXT: retq
;
@@ -809,11 +827,11 @@ define void @ktest_1(<8 x double> %in, d
; SKX-NEXT: vmovupd 8(%rdi), %zmm1 {%k1} {z}
; SKX-NEXT: vcmpltpd %zmm1, %zmm0, %k0 {%k1}
; SKX-NEXT: ktestb %k0, %k0
-; SKX-NEXT: je LBB38_2
+; SKX-NEXT: je LBB39_2
; SKX-NEXT: ## BB#1: ## %L1
; SKX-NEXT: vmovapd %zmm0, (%rdi)
; SKX-NEXT: retq
-; SKX-NEXT: LBB38_2: ## %L2
+; SKX-NEXT: LBB39_2: ## %L2
; SKX-NEXT: vmovapd %zmm0, 8(%rdi)
; SKX-NEXT: retq
%addr1 = getelementptr double, double * %base, i64 0
@@ -859,12 +877,12 @@ define void @ktest_2(<32 x float> %in, f
; SKX-NEXT: kunpckwd %k1, %k2, %k1
; SKX-NEXT: kord %k1, %k0, %k0
; SKX-NEXT: ktestd %k0, %k0
-; SKX-NEXT: je LBB39_2
+; SKX-NEXT: je LBB40_2
; SKX-NEXT: ## BB#1: ## %L1
; SKX-NEXT: vmovaps %zmm0, (%rdi)
; SKX-NEXT: vmovaps %zmm1, 64(%rdi)
; SKX-NEXT: retq
-; SKX-NEXT: LBB39_2: ## %L2
+; SKX-NEXT: LBB40_2: ## %L2
; SKX-NEXT: vmovaps %zmm0, 4(%rdi)
; SKX-NEXT: vmovaps %zmm1, 68(%rdi)
; SKX-NEXT: retq
Modified: llvm/trunk/test/CodeGen/X86/avx512-select.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512-select.ll?rev=265998&r1=265997&r2=265998&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx512-select.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx512-select.ll Mon Apr 11 16:10:33 2016
@@ -71,10 +71,8 @@ define <16 x double> @select04(<16 x dou
define i8 @select05(i8 %a.0, i8 %m) {
; CHECK-LABEL: select05:
; CHECK: ## BB#0:
-; CHECK-NEXT: kmovw %esi, %k0
-; CHECK-NEXT: kmovw %edi, %k1
-; CHECK-NEXT: korw %k1, %k0, %k0
-; CHECK-NEXT: kmovw %k0, %eax
+; CHECK-NEXT: orl %esi, %edi
+; CHECK-NEXT: movb %dil, %al
; CHECK-NEXT: retq
%mask = bitcast i8 %m to <8 x i1>
%a = bitcast i8 %a.0 to <8 x i1>
@@ -83,13 +81,28 @@ define i8 @select05(i8 %a.0, i8 %m) {
ret i8 %res;
}
+define i8 @select05_mem(<8 x i1>* %a.0, <8 x i1>* %m) {
+; CHECK-LABEL: select05_mem:
+; CHECK: ## BB#0:
+; CHECK-NEXT: movzbw (%rsi), %ax
+; CHECK-NEXT: kmovw %eax, %k0
+; CHECK-NEXT: movzbw (%rdi), %ax
+; CHECK-NEXT: kmovw %eax, %k1
+; CHECK-NEXT: korw %k1, %k0, %k0
+; CHECK-NEXT: kmovw %k0, %eax
+; CHECK-NEXT: retq
+ %mask = load <8 x i1> , <8 x i1>* %m
+ %a = load <8 x i1> , <8 x i1>* %a.0
+ %r = select <8 x i1> %mask, <8 x i1> <i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1>, <8 x i1> %a
+ %res = bitcast <8 x i1> %r to i8
+ ret i8 %res;
+}
+
define i8 @select06(i8 %a.0, i8 %m) {
; CHECK-LABEL: select06:
; CHECK: ## BB#0:
-; CHECK-NEXT: kmovw %esi, %k0
-; CHECK-NEXT: kmovw %edi, %k1
-; CHECK-NEXT: kandw %k1, %k0, %k0
-; CHECK-NEXT: kmovw %k0, %eax
+; CHECK-NEXT: andl %esi, %edi
+; CHECK-NEXT: movb %dil, %al
; CHECK-NEXT: retq
%mask = bitcast i8 %m to <8 x i1>
%a = bitcast i8 %a.0 to <8 x i1>
@@ -98,6 +111,22 @@ define i8 @select06(i8 %a.0, i8 %m) {
ret i8 %res;
}
+define i8 @select06_mem(<8 x i1>* %a.0, <8 x i1>* %m) {
+; CHECK-LABEL: select06_mem:
+; CHECK: ## BB#0:
+; CHECK-NEXT: movzbw (%rsi), %ax
+; CHECK-NEXT: kmovw %eax, %k0
+; CHECK-NEXT: movzbw (%rdi), %ax
+; CHECK-NEXT: kmovw %eax, %k1
+; CHECK-NEXT: kandw %k1, %k0, %k0
+; CHECK-NEXT: kmovw %k0, %eax
+; CHECK-NEXT: retq
+ %mask = load <8 x i1> , <8 x i1>* %m
+ %a = load <8 x i1> , <8 x i1>* %a.0
+ %r = select <8 x i1> %mask, <8 x i1> %a, <8 x i1> zeroinitializer
+ %res = bitcast <8 x i1> %r to i8
+ ret i8 %res;
+}
define i8 @select07(i8 %a.0, i8 %b.0, i8 %m) {
; CHECK-LABEL: select07:
; CHECK: ## BB#0:
Modified: llvm/trunk/test/CodeGen/X86/avx512bw-mask-op.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512bw-mask-op.ll?rev=265998&r1=265997&r2=265998&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx512bw-mask-op.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx512bw-mask-op.ll Mon Apr 11 16:10:33 2016
@@ -80,15 +80,33 @@ define void @mask64_mem(i64* %ptr) {
define i32 @mand32(i32 %x, i32 %y) {
; CHECK-LABEL: mand32:
; CHECK: ## BB#0:
-; CHECK-NEXT: kmovd %edi, %k0
-; CHECK-NEXT: kmovd %esi, %k1
+; CHECK-NEXT: movl %edi, %eax
+; CHECK-NEXT: andl %esi, %eax
+; CHECK-NEXT: xorl %esi, %edi
+; CHECK-NEXT: orl %eax, %edi
+; CHECK-NEXT: movl %edi, %eax
+; CHECK-NEXT: retq
+ %ma = bitcast i32 %x to <32 x i1>
+ %mb = bitcast i32 %y to <32 x i1>
+ %mc = and <32 x i1> %ma, %mb
+ %md = xor <32 x i1> %ma, %mb
+ %me = or <32 x i1> %mc, %md
+ %ret = bitcast <32 x i1> %me to i32
+ ret i32 %ret
+}
+
+define i32 @mand32_mem(<32 x i1>* %x, <32 x i1>* %y) {
+; CHECK-LABEL: mand32_mem:
+; CHECK: ## BB#0:
+; CHECK-NEXT: kmovd (%rdi), %k0
+; CHECK-NEXT: kmovd (%rsi), %k1
; CHECK-NEXT: kandd %k1, %k0, %k2
; CHECK-NEXT: kxord %k1, %k0, %k0
; CHECK-NEXT: kord %k0, %k2, %k0
; CHECK-NEXT: kmovd %k0, %eax
; CHECK-NEXT: retq
- %ma = bitcast i32 %x to <32 x i1>
- %mb = bitcast i32 %y to <32 x i1>
+ %ma = load <32 x i1>, <32 x i1>* %x
+ %mb = load <32 x i1>, <32 x i1>* %y
%mc = and <32 x i1> %ma, %mb
%md = xor <32 x i1> %ma, %mb
%me = or <32 x i1> %mc, %md
@@ -99,15 +117,33 @@ define i32 @mand32(i32 %x, i32 %y) {
define i64 @mand64(i64 %x, i64 %y) {
; CHECK-LABEL: mand64:
; CHECK: ## BB#0:
-; CHECK-NEXT: kmovq %rdi, %k0
-; CHECK-NEXT: kmovq %rsi, %k1
+; CHECK-NEXT: movq %rdi, %rax
+; CHECK-NEXT: andq %rsi, %rax
+; CHECK-NEXT: xorq %rsi, %rdi
+; CHECK-NEXT: orq %rax, %rdi
+; CHECK-NEXT: movq %rdi, %rax
+; CHECK-NEXT: retq
+ %ma = bitcast i64 %x to <64 x i1>
+ %mb = bitcast i64 %y to <64 x i1>
+ %mc = and <64 x i1> %ma, %mb
+ %md = xor <64 x i1> %ma, %mb
+ %me = or <64 x i1> %mc, %md
+ %ret = bitcast <64 x i1> %me to i64
+ ret i64 %ret
+}
+
+define i64 @mand64_mem(<64 x i1>* %x, <64 x i1>* %y) {
+; CHECK-LABEL: mand64_mem:
+; CHECK: ## BB#0:
+; CHECK-NEXT: kmovq (%rdi), %k0
+; CHECK-NEXT: kmovq (%rsi), %k1
; CHECK-NEXT: kandq %k1, %k0, %k2
; CHECK-NEXT: kxorq %k1, %k0, %k0
; CHECK-NEXT: korq %k0, %k2, %k0
; CHECK-NEXT: kmovq %k0, %rax
; CHECK-NEXT: retq
- %ma = bitcast i64 %x to <64 x i1>
- %mb = bitcast i64 %y to <64 x i1>
+ %ma = load <64 x i1>, <64 x i1>* %x
+ %mb = load <64 x i1>, <64 x i1>* %y
%mc = and <64 x i1> %ma, %mb
%md = xor <64 x i1> %ma, %mb
%me = or <64 x i1> %mc, %md
Modified: llvm/trunk/test/CodeGen/X86/avx512dq-mask-op.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512dq-mask-op.ll?rev=265998&r1=265997&r2=265998&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx512dq-mask-op.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx512dq-mask-op.ll Mon Apr 11 16:10:33 2016
@@ -32,15 +32,33 @@ define void @mask8_mem(i8* %ptr) {
define i8 @mand8(i8 %x, i8 %y) {
; CHECK-LABEL: mand8:
; CHECK: ## BB#0:
-; CHECK-NEXT: kmovb %edi, %k0
-; CHECK-NEXT: kmovb %esi, %k1
+; CHECK-NEXT: movl %edi, %eax
+; CHECK-NEXT: xorl %esi, %eax
+; CHECK-NEXT: andl %esi, %edi
+; CHECK-NEXT: orl %eax, %edi
+; CHECK-NEXT: movb %dil, %al
+; CHECK-NEXT: retq
+ %ma = bitcast i8 %x to <8 x i1>
+ %mb = bitcast i8 %y to <8 x i1>
+ %mc = and <8 x i1> %ma, %mb
+ %md = xor <8 x i1> %ma, %mb
+ %me = or <8 x i1> %mc, %md
+ %ret = bitcast <8 x i1> %me to i8
+ ret i8 %ret
+}
+
+define i8 @mand8_mem(<8 x i1>* %x, <8 x i1>* %y) {
+; CHECK-LABEL: mand8_mem:
+; CHECK: ## BB#0:
+; CHECK-NEXT: kmovb (%rdi), %k0
+; CHECK-NEXT: kmovb (%rsi), %k1
; CHECK-NEXT: kandb %k1, %k0, %k2
; CHECK-NEXT: kxorb %k1, %k0, %k0
; CHECK-NEXT: korb %k0, %k2, %k0
; CHECK-NEXT: kmovb %k0, %eax
; CHECK-NEXT: retq
- %ma = bitcast i8 %x to <8 x i1>
- %mb = bitcast i8 %y to <8 x i1>
+ %ma = load <8 x i1>, <8 x i1>* %x
+ %mb = load <8 x i1>, <8 x i1>* %y
%mc = and <8 x i1> %ma, %mb
%md = xor <8 x i1> %ma, %mb
%me = or <8 x i1> %mc, %md
Modified: llvm/trunk/test/CodeGen/X86/vector-shuffle-128-v4.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vector-shuffle-128-v4.ll?rev=265998&r1=265997&r2=265998&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vector-shuffle-128-v4.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vector-shuffle-128-v4.ll Mon Apr 11 16:10:33 2016
@@ -1869,48 +1869,34 @@ define <4 x float> @mask_v4f32_4127(<4 x
define <4 x float> @mask_v4f32_0127(<4 x float> %a, <4 x float> %b) {
; SSE2-LABEL: mask_v4f32_0127:
; SSE2: # BB#0:
-; SSE2-NEXT: andps {{.*}}(%rip), %xmm0
-; SSE2-NEXT: andps {{.*}}(%rip), %xmm1
-; SSE2-NEXT: orps %xmm1, %xmm0
+; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,0],xmm1[2,0]
+; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,1],xmm0[2,0]
+; SSE2-NEXT: movaps %xmm1, %xmm0
; SSE2-NEXT: retq
;
; SSE3-LABEL: mask_v4f32_0127:
; SSE3: # BB#0:
-; SSE3-NEXT: andps {{.*}}(%rip), %xmm0
-; SSE3-NEXT: andps {{.*}}(%rip), %xmm1
-; SSE3-NEXT: orps %xmm1, %xmm0
+; SSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,0],xmm1[2,0]
+; SSE3-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,1],xmm0[2,0]
+; SSE3-NEXT: movaps %xmm1, %xmm0
; SSE3-NEXT: retq
;
; SSSE3-LABEL: mask_v4f32_0127:
; SSSE3: # BB#0:
-; SSSE3-NEXT: andps {{.*}}(%rip), %xmm0
-; SSSE3-NEXT: andps {{.*}}(%rip), %xmm1
-; SSSE3-NEXT: orps %xmm1, %xmm0
+; SSSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,0],xmm1[2,0]
+; SSSE3-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,1],xmm0[2,0]
+; SSSE3-NEXT: movaps %xmm1, %xmm0
; SSSE3-NEXT: retq
;
; SSE41-LABEL: mask_v4f32_0127:
; SSE41: # BB#0:
-; SSE41-NEXT: pxor %xmm2, %xmm2
-; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm2[0,1,2,3,4,5],xmm0[6,7]
-; SSE41-NEXT: pblendw {{.*#+}} xmm2 = xmm1[0,1,2,3,4,5],xmm2[6,7]
-; SSE41-NEXT: por %xmm2, %xmm0
+; SSE41-NEXT: blendps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[3]
; SSE41-NEXT: retq
;
-; AVX1-LABEL: mask_v4f32_0127:
-; AVX1: # BB#0:
-; AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2
-; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm2[0,1,2,3,4,5],xmm0[6,7]
-; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0,1,2,3,4,5],xmm2[6,7]
-; AVX1-NEXT: vpor %xmm0, %xmm1, %xmm0
-; AVX1-NEXT: retq
-;
-; AVX2-LABEL: mask_v4f32_0127:
-; AVX2: # BB#0:
-; AVX2-NEXT: vpxor %xmm2, %xmm2, %xmm2
-; AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm2[0,1,2],xmm0[3]
-; AVX2-NEXT: vpblendd {{.*#+}} xmm1 = xmm1[0,1,2],xmm2[3]
-; AVX2-NEXT: vpor %xmm0, %xmm1, %xmm0
-; AVX2-NEXT: retq
+; AVX-LABEL: mask_v4f32_0127:
+; AVX: # BB#0:
+; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[3]
+; AVX-NEXT: retq
%1 = bitcast <4 x float> %a to <2 x i64>
%2 = bitcast <4 x float> %b to <2 x i64>
%3 = and <2 x i64> %1, <i64 0, i64 -4294967296>
@@ -1923,47 +1909,38 @@ define <4 x float> @mask_v4f32_0127(<4 x
define <4 x i32> @mask_v4i32_0127(<4 x i32> %a, <4 x i32> %b) {
; SSE2-LABEL: mask_v4i32_0127:
; SSE2: # BB#0:
-; SSE2-NEXT: andps {{.*}}(%rip), %xmm0
-; SSE2-NEXT: andps {{.*}}(%rip), %xmm1
-; SSE2-NEXT: orps %xmm1, %xmm0
+; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,0],xmm1[2,0]
+; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,1],xmm0[2,0]
+; SSE2-NEXT: movaps %xmm1, %xmm0
; SSE2-NEXT: retq
;
; SSE3-LABEL: mask_v4i32_0127:
; SSE3: # BB#0:
-; SSE3-NEXT: andps {{.*}}(%rip), %xmm0
-; SSE3-NEXT: andps {{.*}}(%rip), %xmm1
-; SSE3-NEXT: orps %xmm1, %xmm0
+; SSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,0],xmm1[2,0]
+; SSE3-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,1],xmm0[2,0]
+; SSE3-NEXT: movaps %xmm1, %xmm0
; SSE3-NEXT: retq
;
; SSSE3-LABEL: mask_v4i32_0127:
; SSSE3: # BB#0:
-; SSSE3-NEXT: andps {{.*}}(%rip), %xmm0
-; SSSE3-NEXT: andps {{.*}}(%rip), %xmm1
-; SSSE3-NEXT: orps %xmm1, %xmm0
+; SSSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,0],xmm1[2,0]
+; SSSE3-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,1],xmm0[2,0]
+; SSSE3-NEXT: movaps %xmm1, %xmm0
; SSSE3-NEXT: retq
;
; SSE41-LABEL: mask_v4i32_0127:
; SSE41: # BB#0:
-; SSE41-NEXT: pxor %xmm2, %xmm2
-; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm2[0,1,2,3,4,5],xmm0[6,7]
-; SSE41-NEXT: pblendw {{.*#+}} xmm2 = xmm1[0,1,2,3,4,5],xmm2[6,7]
-; SSE41-NEXT: por %xmm2, %xmm0
+; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm1[0,1,2,3,4,5],xmm0[6,7]
; SSE41-NEXT: retq
;
; AVX1-LABEL: mask_v4i32_0127:
; AVX1: # BB#0:
-; AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2
-; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm2[0,1,2,3,4,5],xmm0[6,7]
-; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0,1,2,3,4,5],xmm2[6,7]
-; AVX1-NEXT: vpor %xmm0, %xmm1, %xmm0
+; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm1[0,1,2,3,4,5],xmm0[6,7]
; AVX1-NEXT: retq
;
; AVX2-LABEL: mask_v4i32_0127:
; AVX2: # BB#0:
-; AVX2-NEXT: vpxor %xmm2, %xmm2, %xmm2
-; AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm2[0,1,2],xmm0[3]
-; AVX2-NEXT: vpblendd {{.*#+}} xmm1 = xmm1[0,1,2],xmm2[3]
-; AVX2-NEXT: vpor %xmm0, %xmm1, %xmm0
+; AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[3]
; AVX2-NEXT: retq
%1 = bitcast <4 x i32> %a to <2 x i64>
%2 = bitcast <4 x i32> %b to <2 x i64>
Modified: llvm/trunk/test/CodeGen/X86/vector-shuffle-128-v8.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vector-shuffle-128-v8.ll?rev=265998&r1=265997&r2=265998&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vector-shuffle-128-v8.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vector-shuffle-128-v8.ll Mon Apr 11 16:10:33 2016
@@ -2140,40 +2140,31 @@ define <8 x i16> @shuffle_v8i16_8012345u
define <8 x i16> @mask_v8i16_012345ef(<8 x i16> %a, <8 x i16> %b) {
; SSE2-LABEL: mask_v8i16_012345ef:
; SSE2: # BB#0:
-; SSE2-NEXT: andps {{.*}}(%rip), %xmm0
-; SSE2-NEXT: andps {{.*}}(%rip), %xmm1
-; SSE2-NEXT: orps %xmm1, %xmm0
+; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,0],xmm1[2,0]
+; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,1],xmm0[2,0]
+; SSE2-NEXT: movaps %xmm1, %xmm0
; SSE2-NEXT: retq
;
; SSSE3-LABEL: mask_v8i16_012345ef:
; SSSE3: # BB#0:
-; SSSE3-NEXT: andps {{.*}}(%rip), %xmm0
-; SSSE3-NEXT: andps {{.*}}(%rip), %xmm1
-; SSSE3-NEXT: orps %xmm1, %xmm0
+; SSSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,0],xmm1[2,0]
+; SSSE3-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,1],xmm0[2,0]
+; SSSE3-NEXT: movaps %xmm1, %xmm0
; SSSE3-NEXT: retq
;
; SSE41-LABEL: mask_v8i16_012345ef:
; SSE41: # BB#0:
-; SSE41-NEXT: pxor %xmm2, %xmm2
-; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm2[0,1,2,3,4,5],xmm0[6,7]
-; SSE41-NEXT: pblendw {{.*#+}} xmm2 = xmm1[0,1,2,3,4,5],xmm2[6,7]
-; SSE41-NEXT: por %xmm2, %xmm0
+; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm1[0,1,2,3,4,5],xmm0[6,7]
; SSE41-NEXT: retq
;
; AVX1-LABEL: mask_v8i16_012345ef:
; AVX1: # BB#0:
-; AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2
-; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm2[0,1,2,3,4,5],xmm0[6,7]
-; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0,1,2,3,4,5],xmm2[6,7]
-; AVX1-NEXT: vpor %xmm0, %xmm1, %xmm0
+; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm1[0,1,2,3,4,5],xmm0[6,7]
; AVX1-NEXT: retq
;
; AVX2-LABEL: mask_v8i16_012345ef:
; AVX2: # BB#0:
-; AVX2-NEXT: vpxor %xmm2, %xmm2, %xmm2
-; AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm2[0,1,2],xmm0[3]
-; AVX2-NEXT: vpblendd {{.*#+}} xmm1 = xmm1[0,1,2],xmm2[3]
-; AVX2-NEXT: vpor %xmm0, %xmm1, %xmm0
+; AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[3]
; AVX2-NEXT: retq
%1 = bitcast <8 x i16> %a to <2 x i64>
%2 = bitcast <8 x i16> %b to <2 x i64>
Modified: llvm/trunk/test/CodeGen/X86/widen_bitops-0.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/widen_bitops-0.ll?rev=265998&r1=265997&r2=265998&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/widen_bitops-0.ll (original)
+++ llvm/trunk/test/CodeGen/X86/widen_bitops-0.ll Mon Apr 11 16:10:33 2016
@@ -9,24 +9,14 @@
define i24 @and_i24_as_v3i8(i24 %a, i24 %b) nounwind {
; X32-SSE-LABEL: and_i24_as_v3i8:
; X32-SSE: # BB#0:
-; X32-SSE-NEXT: subl $12, %esp
-; X32-SSE-NEXT: pmovzxbd {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero
-; X32-SSE-NEXT: pmovzxbd {{.*#+}} xmm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero
-; X32-SSE-NEXT: pand %xmm0, %xmm1
-; X32-SSE-NEXT: pshufb {{.*#+}} xmm1 = xmm1[0,4,8,12,u,u,u,u,u,u,u,u,u,u,u,u]
-; X32-SSE-NEXT: movd %xmm1, %eax
-; X32-SSE-NEXT: addl $12, %esp
+; X32-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X32-SSE-NEXT: andl {{[0-9]+}}(%esp), %eax
; X32-SSE-NEXT: retl
;
; X64-SSE-LABEL: and_i24_as_v3i8:
; X64-SSE: # BB#0:
-; X64-SSE-NEXT: movd %esi, %xmm0
-; X64-SSE-NEXT: pmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
-; X64-SSE-NEXT: movd %edi, %xmm1
-; X64-SSE-NEXT: pmovzxbd {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero
-; X64-SSE-NEXT: pand %xmm0, %xmm1
-; X64-SSE-NEXT: pshufb {{.*#+}} xmm1 = xmm1[0,4,8,12,u,u,u,u,u,u,u,u,u,u,u,u]
-; X64-SSE-NEXT: movd %xmm1, %eax
+; X64-SSE-NEXT: andl %esi, %edi
+; X64-SSE-NEXT: movl %edi, %eax
; X64-SSE-NEXT: retq
%1 = bitcast i24 %a to <3 x i8>
%2 = bitcast i24 %b to <3 x i8>
@@ -38,24 +28,14 @@ define i24 @and_i24_as_v3i8(i24 %a, i24
define i24 @xor_i24_as_v3i8(i24 %a, i24 %b) nounwind {
; X32-SSE-LABEL: xor_i24_as_v3i8:
; X32-SSE: # BB#0:
-; X32-SSE-NEXT: subl $12, %esp
-; X32-SSE-NEXT: pmovzxbd {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero
-; X32-SSE-NEXT: pmovzxbd {{.*#+}} xmm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero
-; X32-SSE-NEXT: pxor %xmm0, %xmm1
-; X32-SSE-NEXT: pshufb {{.*#+}} xmm1 = xmm1[0,4,8,12,u,u,u,u,u,u,u,u,u,u,u,u]
-; X32-SSE-NEXT: movd %xmm1, %eax
-; X32-SSE-NEXT: addl $12, %esp
+; X32-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X32-SSE-NEXT: xorl {{[0-9]+}}(%esp), %eax
; X32-SSE-NEXT: retl
;
; X64-SSE-LABEL: xor_i24_as_v3i8:
; X64-SSE: # BB#0:
-; X64-SSE-NEXT: movd %esi, %xmm0
-; X64-SSE-NEXT: pmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
-; X64-SSE-NEXT: movd %edi, %xmm1
-; X64-SSE-NEXT: pmovzxbd {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero
-; X64-SSE-NEXT: pxor %xmm0, %xmm1
-; X64-SSE-NEXT: pshufb {{.*#+}} xmm1 = xmm1[0,4,8,12,u,u,u,u,u,u,u,u,u,u,u,u]
-; X64-SSE-NEXT: movd %xmm1, %eax
+; X64-SSE-NEXT: xorl %esi, %edi
+; X64-SSE-NEXT: movl %edi, %eax
; X64-SSE-NEXT: retq
%1 = bitcast i24 %a to <3 x i8>
%2 = bitcast i24 %b to <3 x i8>
@@ -67,24 +47,14 @@ define i24 @xor_i24_as_v3i8(i24 %a, i24
define i24 @or_i24_as_v3i8(i24 %a, i24 %b) nounwind {
; X32-SSE-LABEL: or_i24_as_v3i8:
; X32-SSE: # BB#0:
-; X32-SSE-NEXT: subl $12, %esp
-; X32-SSE-NEXT: pmovzxbd {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero
-; X32-SSE-NEXT: pmovzxbd {{.*#+}} xmm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero
-; X32-SSE-NEXT: por %xmm0, %xmm1
-; X32-SSE-NEXT: pshufb {{.*#+}} xmm1 = xmm1[0,4,8,12,u,u,u,u,u,u,u,u,u,u,u,u]
-; X32-SSE-NEXT: movd %xmm1, %eax
-; X32-SSE-NEXT: addl $12, %esp
+; X32-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X32-SSE-NEXT: orl {{[0-9]+}}(%esp), %eax
; X32-SSE-NEXT: retl
;
; X64-SSE-LABEL: or_i24_as_v3i8:
; X64-SSE: # BB#0:
-; X64-SSE-NEXT: movd %esi, %xmm0
-; X64-SSE-NEXT: pmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
-; X64-SSE-NEXT: movd %edi, %xmm1
-; X64-SSE-NEXT: pmovzxbd {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero
-; X64-SSE-NEXT: por %xmm0, %xmm1
-; X64-SSE-NEXT: pshufb {{.*#+}} xmm1 = xmm1[0,4,8,12,u,u,u,u,u,u,u,u,u,u,u,u]
-; X64-SSE-NEXT: movd %xmm1, %eax
+; X64-SSE-NEXT: orl %esi, %edi
+; X64-SSE-NEXT: movl %edi, %eax
; X64-SSE-NEXT: retq
%1 = bitcast i24 %a to <3 x i8>
%2 = bitcast i24 %b to <3 x i8>
@@ -100,186 +70,14 @@ define i24 @or_i24_as_v3i8(i24 %a, i24 %
define i24 @and_i24_as_v8i3(i24 %a, i24 %b) nounwind {
; X32-SSE-LABEL: and_i24_as_v8i3:
; X32-SSE: # BB#0:
-; X32-SSE-NEXT: pushl %ebp
-; X32-SSE-NEXT: movl %esp, %ebp
-; X32-SSE-NEXT: andl $-8, %esp
-; X32-SSE-NEXT: subl $24, %esp
-; X32-SSE-NEXT: movl 12(%ebp), %eax
-; X32-SSE-NEXT: movw %ax, {{[0-9]+}}(%esp)
-; X32-SSE-NEXT: shrl $16, %eax
-; X32-SSE-NEXT: movb %al, {{[0-9]+}}(%esp)
-; X32-SSE-NEXT: movl 8(%ebp), %eax
-; X32-SSE-NEXT: movw %ax, {{[0-9]+}}(%esp)
-; X32-SSE-NEXT: shrl $16, %eax
-; X32-SSE-NEXT: movb %al, {{[0-9]+}}(%esp)
-; X32-SSE-NEXT: movzwl {{[0-9]+}}(%esp), %eax
-; X32-SSE-NEXT: movl %eax, %ecx
-; X32-SSE-NEXT: shrl $3, %ecx
-; X32-SSE-NEXT: andl $7, %ecx
-; X32-SSE-NEXT: movl %eax, %edx
-; X32-SSE-NEXT: andl $7, %edx
-; X32-SSE-NEXT: movd %edx, %xmm1
-; X32-SSE-NEXT: pinsrw $1, %ecx, %xmm1
-; X32-SSE-NEXT: movl %eax, %ecx
-; X32-SSE-NEXT: shrl $6, %ecx
-; X32-SSE-NEXT: andl $7, %ecx
-; X32-SSE-NEXT: pinsrw $2, %ecx, %xmm1
-; X32-SSE-NEXT: movl %eax, %ecx
-; X32-SSE-NEXT: shrl $9, %ecx
-; X32-SSE-NEXT: andl $7, %ecx
-; X32-SSE-NEXT: pinsrw $3, %ecx, %xmm1
-; X32-SSE-NEXT: movl %eax, %ecx
-; X32-SSE-NEXT: shrl $12, %ecx
-; X32-SSE-NEXT: andl $7, %ecx
-; X32-SSE-NEXT: pinsrw $4, %ecx, %xmm1
-; X32-SSE-NEXT: shrl $15, %eax
-; X32-SSE-NEXT: pinsrw $5, %eax, %xmm1
-; X32-SSE-NEXT: pxor %xmm2, %xmm2
-; X32-SSE-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0,1,2,3,4,5],xmm2[6,7]
-; X32-SSE-NEXT: movzwl {{[0-9]+}}(%esp), %eax
-; X32-SSE-NEXT: movl %eax, %ecx
-; X32-SSE-NEXT: shrl $3, %ecx
-; X32-SSE-NEXT: andl $7, %ecx
-; X32-SSE-NEXT: movl %eax, %edx
-; X32-SSE-NEXT: andl $7, %edx
-; X32-SSE-NEXT: movd %edx, %xmm0
-; X32-SSE-NEXT: pinsrw $1, %ecx, %xmm0
-; X32-SSE-NEXT: movl %eax, %ecx
-; X32-SSE-NEXT: shrl $6, %ecx
-; X32-SSE-NEXT: andl $7, %ecx
-; X32-SSE-NEXT: pinsrw $2, %ecx, %xmm0
-; X32-SSE-NEXT: movl %eax, %ecx
-; X32-SSE-NEXT: shrl $9, %ecx
-; X32-SSE-NEXT: andl $7, %ecx
-; X32-SSE-NEXT: pinsrw $3, %ecx, %xmm0
-; X32-SSE-NEXT: movl %eax, %ecx
-; X32-SSE-NEXT: shrl $12, %ecx
-; X32-SSE-NEXT: andl $7, %ecx
-; X32-SSE-NEXT: pinsrw $4, %ecx, %xmm0
-; X32-SSE-NEXT: shrl $15, %eax
-; X32-SSE-NEXT: pinsrw $5, %eax, %xmm0
-; X32-SSE-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5],xmm2[6,7]
-; X32-SSE-NEXT: pand %xmm1, %xmm0
-; X32-SSE-NEXT: pextrw $7, %xmm0, %eax
-; X32-SSE-NEXT: andl $15, %eax
-; X32-SSE-NEXT: movb %al, (%esp)
-; X32-SSE-NEXT: pextrw $6, %xmm0, %eax
-; X32-SSE-NEXT: andl $15, %eax
-; X32-SSE-NEXT: movb %al, (%esp)
-; X32-SSE-NEXT: pextrw $5, %xmm0, %eax
-; X32-SSE-NEXT: andl $15, %eax
-; X32-SSE-NEXT: movb %al, (%esp)
-; X32-SSE-NEXT: pextrw $4, %xmm0, %eax
-; X32-SSE-NEXT: andl $15, %eax
-; X32-SSE-NEXT: movb %al, (%esp)
-; X32-SSE-NEXT: pextrw $3, %xmm0, %eax
-; X32-SSE-NEXT: andl $15, %eax
-; X32-SSE-NEXT: movb %al, (%esp)
-; X32-SSE-NEXT: pextrw $2, %xmm0, %eax
-; X32-SSE-NEXT: andl $15, %eax
-; X32-SSE-NEXT: movb %al, (%esp)
-; X32-SSE-NEXT: pextrw $1, %xmm0, %eax
-; X32-SSE-NEXT: andl $15, %eax
-; X32-SSE-NEXT: movb %al, (%esp)
-; X32-SSE-NEXT: movd %xmm0, %eax
-; X32-SSE-NEXT: andl $15, %eax
-; X32-SSE-NEXT: movb %al, (%esp)
-; X32-SSE-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
-; X32-SSE-NEXT: shll $16, %ecx
-; X32-SSE-NEXT: movzwl (%esp), %eax
-; X32-SSE-NEXT: orl %ecx, %eax
-; X32-SSE-NEXT: movl %ebp, %esp
-; X32-SSE-NEXT: popl %ebp
+; X32-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X32-SSE-NEXT: andl {{[0-9]+}}(%esp), %eax
; X32-SSE-NEXT: retl
;
; X64-SSE-LABEL: and_i24_as_v8i3:
; X64-SSE: # BB#0:
-; X64-SSE-NEXT: movw %si, -{{[0-9]+}}(%rsp)
-; X64-SSE-NEXT: shrl $16, %esi
-; X64-SSE-NEXT: movb %sil, -{{[0-9]+}}(%rsp)
-; X64-SSE-NEXT: movw %di, -{{[0-9]+}}(%rsp)
-; X64-SSE-NEXT: shrl $16, %edi
-; X64-SSE-NEXT: movb %dil, -{{[0-9]+}}(%rsp)
-; X64-SSE-NEXT: movzwl -{{[0-9]+}}(%rsp), %eax
-; X64-SSE-NEXT: movl %eax, %ecx
-; X64-SSE-NEXT: shrl $3, %ecx
-; X64-SSE-NEXT: andl $7, %ecx
-; X64-SSE-NEXT: movl %eax, %edx
-; X64-SSE-NEXT: andl $7, %edx
-; X64-SSE-NEXT: movd %edx, %xmm0
-; X64-SSE-NEXT: pinsrw $1, %ecx, %xmm0
-; X64-SSE-NEXT: movl %eax, %ecx
-; X64-SSE-NEXT: shrl $6, %ecx
-; X64-SSE-NEXT: andl $7, %ecx
-; X64-SSE-NEXT: pinsrw $2, %ecx, %xmm0
-; X64-SSE-NEXT: movl %eax, %ecx
-; X64-SSE-NEXT: shrl $9, %ecx
-; X64-SSE-NEXT: andl $7, %ecx
-; X64-SSE-NEXT: pinsrw $3, %ecx, %xmm0
-; X64-SSE-NEXT: movl %eax, %ecx
-; X64-SSE-NEXT: shrl $12, %ecx
-; X64-SSE-NEXT: andl $7, %ecx
-; X64-SSE-NEXT: pinsrw $4, %ecx, %xmm0
-; X64-SSE-NEXT: shrl $15, %eax
-; X64-SSE-NEXT: movzwl %ax, %eax
-; X64-SSE-NEXT: pinsrw $5, %eax, %xmm0
-; X64-SSE-NEXT: xorl %eax, %eax
-; X64-SSE-NEXT: pinsrw $6, %eax, %xmm0
-; X64-SSE-NEXT: pinsrw $7, %eax, %xmm0
-; X64-SSE-NEXT: movzwl -{{[0-9]+}}(%rsp), %ecx
-; X64-SSE-NEXT: movl %ecx, %edx
-; X64-SSE-NEXT: shrl $3, %edx
-; X64-SSE-NEXT: andl $7, %edx
-; X64-SSE-NEXT: movl %ecx, %esi
-; X64-SSE-NEXT: andl $7, %esi
-; X64-SSE-NEXT: movd %esi, %xmm1
-; X64-SSE-NEXT: pinsrw $1, %edx, %xmm1
-; X64-SSE-NEXT: movl %ecx, %edx
-; X64-SSE-NEXT: shrl $6, %edx
-; X64-SSE-NEXT: andl $7, %edx
-; X64-SSE-NEXT: pinsrw $2, %edx, %xmm1
-; X64-SSE-NEXT: movl %ecx, %edx
-; X64-SSE-NEXT: shrl $9, %edx
-; X64-SSE-NEXT: andl $7, %edx
-; X64-SSE-NEXT: pinsrw $3, %edx, %xmm1
-; X64-SSE-NEXT: movl %ecx, %edx
-; X64-SSE-NEXT: shrl $12, %edx
-; X64-SSE-NEXT: andl $7, %edx
-; X64-SSE-NEXT: pinsrw $4, %edx, %xmm1
-; X64-SSE-NEXT: shrl $15, %ecx
-; X64-SSE-NEXT: movzwl %cx, %ecx
-; X64-SSE-NEXT: pinsrw $5, %ecx, %xmm1
-; X64-SSE-NEXT: pinsrw $6, %eax, %xmm1
-; X64-SSE-NEXT: pinsrw $7, %eax, %xmm1
-; X64-SSE-NEXT: pand %xmm0, %xmm1
-; X64-SSE-NEXT: pextrw $7, %xmm1, %eax
-; X64-SSE-NEXT: andl $15, %eax
-; X64-SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
-; X64-SSE-NEXT: pextrw $6, %xmm1, %eax
-; X64-SSE-NEXT: andl $15, %eax
-; X64-SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
-; X64-SSE-NEXT: pextrw $5, %xmm1, %eax
-; X64-SSE-NEXT: andl $15, %eax
-; X64-SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
-; X64-SSE-NEXT: pextrw $4, %xmm1, %eax
-; X64-SSE-NEXT: andl $15, %eax
-; X64-SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
-; X64-SSE-NEXT: pextrw $3, %xmm1, %eax
-; X64-SSE-NEXT: andl $15, %eax
-; X64-SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
-; X64-SSE-NEXT: pextrw $2, %xmm1, %eax
-; X64-SSE-NEXT: andl $15, %eax
-; X64-SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
-; X64-SSE-NEXT: pextrw $1, %xmm1, %eax
-; X64-SSE-NEXT: andl $15, %eax
-; X64-SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
-; X64-SSE-NEXT: movd %xmm1, %eax
-; X64-SSE-NEXT: andl $15, %eax
-; X64-SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
-; X64-SSE-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx
-; X64-SSE-NEXT: shll $16, %ecx
-; X64-SSE-NEXT: movzwl -{{[0-9]+}}(%rsp), %eax
-; X64-SSE-NEXT: orl %ecx, %eax
+; X64-SSE-NEXT: andl %esi, %edi
+; X64-SSE-NEXT: movl %edi, %eax
; X64-SSE-NEXT: retq
%1 = bitcast i24 %a to <8 x i3>
%2 = bitcast i24 %b to <8 x i3>
@@ -291,186 +89,14 @@ define i24 @and_i24_as_v8i3(i24 %a, i24
define i24 @xor_i24_as_v8i3(i24 %a, i24 %b) nounwind {
; X32-SSE-LABEL: xor_i24_as_v8i3:
; X32-SSE: # BB#0:
-; X32-SSE-NEXT: pushl %ebp
-; X32-SSE-NEXT: movl %esp, %ebp
-; X32-SSE-NEXT: andl $-8, %esp
-; X32-SSE-NEXT: subl $24, %esp
-; X32-SSE-NEXT: movl 12(%ebp), %eax
-; X32-SSE-NEXT: movw %ax, {{[0-9]+}}(%esp)
-; X32-SSE-NEXT: shrl $16, %eax
-; X32-SSE-NEXT: movb %al, {{[0-9]+}}(%esp)
-; X32-SSE-NEXT: movl 8(%ebp), %eax
-; X32-SSE-NEXT: movw %ax, {{[0-9]+}}(%esp)
-; X32-SSE-NEXT: shrl $16, %eax
-; X32-SSE-NEXT: movb %al, {{[0-9]+}}(%esp)
-; X32-SSE-NEXT: movzwl {{[0-9]+}}(%esp), %eax
-; X32-SSE-NEXT: movl %eax, %ecx
-; X32-SSE-NEXT: shrl $3, %ecx
-; X32-SSE-NEXT: andl $7, %ecx
-; X32-SSE-NEXT: movl %eax, %edx
-; X32-SSE-NEXT: andl $7, %edx
-; X32-SSE-NEXT: movd %edx, %xmm1
-; X32-SSE-NEXT: pinsrw $1, %ecx, %xmm1
-; X32-SSE-NEXT: movl %eax, %ecx
-; X32-SSE-NEXT: shrl $6, %ecx
-; X32-SSE-NEXT: andl $7, %ecx
-; X32-SSE-NEXT: pinsrw $2, %ecx, %xmm1
-; X32-SSE-NEXT: movl %eax, %ecx
-; X32-SSE-NEXT: shrl $9, %ecx
-; X32-SSE-NEXT: andl $7, %ecx
-; X32-SSE-NEXT: pinsrw $3, %ecx, %xmm1
-; X32-SSE-NEXT: movl %eax, %ecx
-; X32-SSE-NEXT: shrl $12, %ecx
-; X32-SSE-NEXT: andl $7, %ecx
-; X32-SSE-NEXT: pinsrw $4, %ecx, %xmm1
-; X32-SSE-NEXT: shrl $15, %eax
-; X32-SSE-NEXT: pinsrw $5, %eax, %xmm1
-; X32-SSE-NEXT: pxor %xmm2, %xmm2
-; X32-SSE-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0,1,2,3,4,5],xmm2[6,7]
-; X32-SSE-NEXT: movzwl {{[0-9]+}}(%esp), %eax
-; X32-SSE-NEXT: movl %eax, %ecx
-; X32-SSE-NEXT: shrl $3, %ecx
-; X32-SSE-NEXT: andl $7, %ecx
-; X32-SSE-NEXT: movl %eax, %edx
-; X32-SSE-NEXT: andl $7, %edx
-; X32-SSE-NEXT: movd %edx, %xmm0
-; X32-SSE-NEXT: pinsrw $1, %ecx, %xmm0
-; X32-SSE-NEXT: movl %eax, %ecx
-; X32-SSE-NEXT: shrl $6, %ecx
-; X32-SSE-NEXT: andl $7, %ecx
-; X32-SSE-NEXT: pinsrw $2, %ecx, %xmm0
-; X32-SSE-NEXT: movl %eax, %ecx
-; X32-SSE-NEXT: shrl $9, %ecx
-; X32-SSE-NEXT: andl $7, %ecx
-; X32-SSE-NEXT: pinsrw $3, %ecx, %xmm0
-; X32-SSE-NEXT: movl %eax, %ecx
-; X32-SSE-NEXT: shrl $12, %ecx
-; X32-SSE-NEXT: andl $7, %ecx
-; X32-SSE-NEXT: pinsrw $4, %ecx, %xmm0
-; X32-SSE-NEXT: shrl $15, %eax
-; X32-SSE-NEXT: pinsrw $5, %eax, %xmm0
-; X32-SSE-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5],xmm2[6,7]
-; X32-SSE-NEXT: pxor %xmm1, %xmm0
-; X32-SSE-NEXT: pextrw $7, %xmm0, %eax
-; X32-SSE-NEXT: andl $15, %eax
-; X32-SSE-NEXT: movb %al, (%esp)
-; X32-SSE-NEXT: pextrw $6, %xmm0, %eax
-; X32-SSE-NEXT: andl $15, %eax
-; X32-SSE-NEXT: movb %al, (%esp)
-; X32-SSE-NEXT: pextrw $5, %xmm0, %eax
-; X32-SSE-NEXT: andl $15, %eax
-; X32-SSE-NEXT: movb %al, (%esp)
-; X32-SSE-NEXT: pextrw $4, %xmm0, %eax
-; X32-SSE-NEXT: andl $15, %eax
-; X32-SSE-NEXT: movb %al, (%esp)
-; X32-SSE-NEXT: pextrw $3, %xmm0, %eax
-; X32-SSE-NEXT: andl $15, %eax
-; X32-SSE-NEXT: movb %al, (%esp)
-; X32-SSE-NEXT: pextrw $2, %xmm0, %eax
-; X32-SSE-NEXT: andl $15, %eax
-; X32-SSE-NEXT: movb %al, (%esp)
-; X32-SSE-NEXT: pextrw $1, %xmm0, %eax
-; X32-SSE-NEXT: andl $15, %eax
-; X32-SSE-NEXT: movb %al, (%esp)
-; X32-SSE-NEXT: movd %xmm0, %eax
-; X32-SSE-NEXT: andl $15, %eax
-; X32-SSE-NEXT: movb %al, (%esp)
-; X32-SSE-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
-; X32-SSE-NEXT: shll $16, %ecx
-; X32-SSE-NEXT: movzwl (%esp), %eax
-; X32-SSE-NEXT: orl %ecx, %eax
-; X32-SSE-NEXT: movl %ebp, %esp
-; X32-SSE-NEXT: popl %ebp
+; X32-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X32-SSE-NEXT: xorl {{[0-9]+}}(%esp), %eax
; X32-SSE-NEXT: retl
;
; X64-SSE-LABEL: xor_i24_as_v8i3:
; X64-SSE: # BB#0:
-; X64-SSE-NEXT: movw %si, -{{[0-9]+}}(%rsp)
-; X64-SSE-NEXT: shrl $16, %esi
-; X64-SSE-NEXT: movb %sil, -{{[0-9]+}}(%rsp)
-; X64-SSE-NEXT: movw %di, -{{[0-9]+}}(%rsp)
-; X64-SSE-NEXT: shrl $16, %edi
-; X64-SSE-NEXT: movb %dil, -{{[0-9]+}}(%rsp)
-; X64-SSE-NEXT: movzwl -{{[0-9]+}}(%rsp), %eax
-; X64-SSE-NEXT: movl %eax, %ecx
-; X64-SSE-NEXT: shrl $3, %ecx
-; X64-SSE-NEXT: andl $7, %ecx
-; X64-SSE-NEXT: movl %eax, %edx
-; X64-SSE-NEXT: andl $7, %edx
-; X64-SSE-NEXT: movd %edx, %xmm0
-; X64-SSE-NEXT: pinsrw $1, %ecx, %xmm0
-; X64-SSE-NEXT: movl %eax, %ecx
-; X64-SSE-NEXT: shrl $6, %ecx
-; X64-SSE-NEXT: andl $7, %ecx
-; X64-SSE-NEXT: pinsrw $2, %ecx, %xmm0
-; X64-SSE-NEXT: movl %eax, %ecx
-; X64-SSE-NEXT: shrl $9, %ecx
-; X64-SSE-NEXT: andl $7, %ecx
-; X64-SSE-NEXT: pinsrw $3, %ecx, %xmm0
-; X64-SSE-NEXT: movl %eax, %ecx
-; X64-SSE-NEXT: shrl $12, %ecx
-; X64-SSE-NEXT: andl $7, %ecx
-; X64-SSE-NEXT: pinsrw $4, %ecx, %xmm0
-; X64-SSE-NEXT: shrl $15, %eax
-; X64-SSE-NEXT: movzwl %ax, %eax
-; X64-SSE-NEXT: pinsrw $5, %eax, %xmm0
-; X64-SSE-NEXT: xorl %eax, %eax
-; X64-SSE-NEXT: pinsrw $6, %eax, %xmm0
-; X64-SSE-NEXT: pinsrw $7, %eax, %xmm0
-; X64-SSE-NEXT: movzwl -{{[0-9]+}}(%rsp), %ecx
-; X64-SSE-NEXT: movl %ecx, %edx
-; X64-SSE-NEXT: shrl $3, %edx
-; X64-SSE-NEXT: andl $7, %edx
-; X64-SSE-NEXT: movl %ecx, %esi
-; X64-SSE-NEXT: andl $7, %esi
-; X64-SSE-NEXT: movd %esi, %xmm1
-; X64-SSE-NEXT: pinsrw $1, %edx, %xmm1
-; X64-SSE-NEXT: movl %ecx, %edx
-; X64-SSE-NEXT: shrl $6, %edx
-; X64-SSE-NEXT: andl $7, %edx
-; X64-SSE-NEXT: pinsrw $2, %edx, %xmm1
-; X64-SSE-NEXT: movl %ecx, %edx
-; X64-SSE-NEXT: shrl $9, %edx
-; X64-SSE-NEXT: andl $7, %edx
-; X64-SSE-NEXT: pinsrw $3, %edx, %xmm1
-; X64-SSE-NEXT: movl %ecx, %edx
-; X64-SSE-NEXT: shrl $12, %edx
-; X64-SSE-NEXT: andl $7, %edx
-; X64-SSE-NEXT: pinsrw $4, %edx, %xmm1
-; X64-SSE-NEXT: shrl $15, %ecx
-; X64-SSE-NEXT: movzwl %cx, %ecx
-; X64-SSE-NEXT: pinsrw $5, %ecx, %xmm1
-; X64-SSE-NEXT: pinsrw $6, %eax, %xmm1
-; X64-SSE-NEXT: pinsrw $7, %eax, %xmm1
-; X64-SSE-NEXT: pxor %xmm0, %xmm1
-; X64-SSE-NEXT: pextrw $7, %xmm1, %eax
-; X64-SSE-NEXT: andl $15, %eax
-; X64-SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
-; X64-SSE-NEXT: pextrw $6, %xmm1, %eax
-; X64-SSE-NEXT: andl $15, %eax
-; X64-SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
-; X64-SSE-NEXT: pextrw $5, %xmm1, %eax
-; X64-SSE-NEXT: andl $15, %eax
-; X64-SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
-; X64-SSE-NEXT: pextrw $4, %xmm1, %eax
-; X64-SSE-NEXT: andl $15, %eax
-; X64-SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
-; X64-SSE-NEXT: pextrw $3, %xmm1, %eax
-; X64-SSE-NEXT: andl $15, %eax
-; X64-SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
-; X64-SSE-NEXT: pextrw $2, %xmm1, %eax
-; X64-SSE-NEXT: andl $15, %eax
-; X64-SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
-; X64-SSE-NEXT: pextrw $1, %xmm1, %eax
-; X64-SSE-NEXT: andl $15, %eax
-; X64-SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
-; X64-SSE-NEXT: movd %xmm1, %eax
-; X64-SSE-NEXT: andl $15, %eax
-; X64-SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
-; X64-SSE-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx
-; X64-SSE-NEXT: shll $16, %ecx
-; X64-SSE-NEXT: movzwl -{{[0-9]+}}(%rsp), %eax
-; X64-SSE-NEXT: orl %ecx, %eax
+; X64-SSE-NEXT: xorl %esi, %edi
+; X64-SSE-NEXT: movl %edi, %eax
; X64-SSE-NEXT: retq
%1 = bitcast i24 %a to <8 x i3>
%2 = bitcast i24 %b to <8 x i3>
@@ -482,186 +108,14 @@ define i24 @xor_i24_as_v8i3(i24 %a, i24
define i24 @or_i24_as_v8i3(i24 %a, i24 %b) nounwind {
; X32-SSE-LABEL: or_i24_as_v8i3:
; X32-SSE: # BB#0:
-; X32-SSE-NEXT: pushl %ebp
-; X32-SSE-NEXT: movl %esp, %ebp
-; X32-SSE-NEXT: andl $-8, %esp
-; X32-SSE-NEXT: subl $24, %esp
-; X32-SSE-NEXT: movl 12(%ebp), %eax
-; X32-SSE-NEXT: movw %ax, {{[0-9]+}}(%esp)
-; X32-SSE-NEXT: shrl $16, %eax
-; X32-SSE-NEXT: movb %al, {{[0-9]+}}(%esp)
-; X32-SSE-NEXT: movl 8(%ebp), %eax
-; X32-SSE-NEXT: movw %ax, {{[0-9]+}}(%esp)
-; X32-SSE-NEXT: shrl $16, %eax
-; X32-SSE-NEXT: movb %al, {{[0-9]+}}(%esp)
-; X32-SSE-NEXT: movzwl {{[0-9]+}}(%esp), %eax
-; X32-SSE-NEXT: movl %eax, %ecx
-; X32-SSE-NEXT: shrl $3, %ecx
-; X32-SSE-NEXT: andl $7, %ecx
-; X32-SSE-NEXT: movl %eax, %edx
-; X32-SSE-NEXT: andl $7, %edx
-; X32-SSE-NEXT: movd %edx, %xmm1
-; X32-SSE-NEXT: pinsrw $1, %ecx, %xmm1
-; X32-SSE-NEXT: movl %eax, %ecx
-; X32-SSE-NEXT: shrl $6, %ecx
-; X32-SSE-NEXT: andl $7, %ecx
-; X32-SSE-NEXT: pinsrw $2, %ecx, %xmm1
-; X32-SSE-NEXT: movl %eax, %ecx
-; X32-SSE-NEXT: shrl $9, %ecx
-; X32-SSE-NEXT: andl $7, %ecx
-; X32-SSE-NEXT: pinsrw $3, %ecx, %xmm1
-; X32-SSE-NEXT: movl %eax, %ecx
-; X32-SSE-NEXT: shrl $12, %ecx
-; X32-SSE-NEXT: andl $7, %ecx
-; X32-SSE-NEXT: pinsrw $4, %ecx, %xmm1
-; X32-SSE-NEXT: shrl $15, %eax
-; X32-SSE-NEXT: pinsrw $5, %eax, %xmm1
-; X32-SSE-NEXT: pxor %xmm2, %xmm2
-; X32-SSE-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0,1,2,3,4,5],xmm2[6,7]
-; X32-SSE-NEXT: movzwl {{[0-9]+}}(%esp), %eax
-; X32-SSE-NEXT: movl %eax, %ecx
-; X32-SSE-NEXT: shrl $3, %ecx
-; X32-SSE-NEXT: andl $7, %ecx
-; X32-SSE-NEXT: movl %eax, %edx
-; X32-SSE-NEXT: andl $7, %edx
-; X32-SSE-NEXT: movd %edx, %xmm0
-; X32-SSE-NEXT: pinsrw $1, %ecx, %xmm0
-; X32-SSE-NEXT: movl %eax, %ecx
-; X32-SSE-NEXT: shrl $6, %ecx
-; X32-SSE-NEXT: andl $7, %ecx
-; X32-SSE-NEXT: pinsrw $2, %ecx, %xmm0
-; X32-SSE-NEXT: movl %eax, %ecx
-; X32-SSE-NEXT: shrl $9, %ecx
-; X32-SSE-NEXT: andl $7, %ecx
-; X32-SSE-NEXT: pinsrw $3, %ecx, %xmm0
-; X32-SSE-NEXT: movl %eax, %ecx
-; X32-SSE-NEXT: shrl $12, %ecx
-; X32-SSE-NEXT: andl $7, %ecx
-; X32-SSE-NEXT: pinsrw $4, %ecx, %xmm0
-; X32-SSE-NEXT: shrl $15, %eax
-; X32-SSE-NEXT: pinsrw $5, %eax, %xmm0
-; X32-SSE-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5],xmm2[6,7]
-; X32-SSE-NEXT: por %xmm1, %xmm0
-; X32-SSE-NEXT: pextrw $7, %xmm0, %eax
-; X32-SSE-NEXT: andl $15, %eax
-; X32-SSE-NEXT: movb %al, (%esp)
-; X32-SSE-NEXT: pextrw $6, %xmm0, %eax
-; X32-SSE-NEXT: andl $15, %eax
-; X32-SSE-NEXT: movb %al, (%esp)
-; X32-SSE-NEXT: pextrw $5, %xmm0, %eax
-; X32-SSE-NEXT: andl $15, %eax
-; X32-SSE-NEXT: movb %al, (%esp)
-; X32-SSE-NEXT: pextrw $4, %xmm0, %eax
-; X32-SSE-NEXT: andl $15, %eax
-; X32-SSE-NEXT: movb %al, (%esp)
-; X32-SSE-NEXT: pextrw $3, %xmm0, %eax
-; X32-SSE-NEXT: andl $15, %eax
-; X32-SSE-NEXT: movb %al, (%esp)
-; X32-SSE-NEXT: pextrw $2, %xmm0, %eax
-; X32-SSE-NEXT: andl $15, %eax
-; X32-SSE-NEXT: movb %al, (%esp)
-; X32-SSE-NEXT: pextrw $1, %xmm0, %eax
-; X32-SSE-NEXT: andl $15, %eax
-; X32-SSE-NEXT: movb %al, (%esp)
-; X32-SSE-NEXT: movd %xmm0, %eax
-; X32-SSE-NEXT: andl $15, %eax
-; X32-SSE-NEXT: movb %al, (%esp)
-; X32-SSE-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
-; X32-SSE-NEXT: shll $16, %ecx
-; X32-SSE-NEXT: movzwl (%esp), %eax
-; X32-SSE-NEXT: orl %ecx, %eax
-; X32-SSE-NEXT: movl %ebp, %esp
-; X32-SSE-NEXT: popl %ebp
+; X32-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X32-SSE-NEXT: orl {{[0-9]+}}(%esp), %eax
; X32-SSE-NEXT: retl
;
; X64-SSE-LABEL: or_i24_as_v8i3:
; X64-SSE: # BB#0:
-; X64-SSE-NEXT: movw %si, -{{[0-9]+}}(%rsp)
-; X64-SSE-NEXT: shrl $16, %esi
-; X64-SSE-NEXT: movb %sil, -{{[0-9]+}}(%rsp)
-; X64-SSE-NEXT: movw %di, -{{[0-9]+}}(%rsp)
-; X64-SSE-NEXT: shrl $16, %edi
-; X64-SSE-NEXT: movb %dil, -{{[0-9]+}}(%rsp)
-; X64-SSE-NEXT: movzwl -{{[0-9]+}}(%rsp), %eax
-; X64-SSE-NEXT: movl %eax, %ecx
-; X64-SSE-NEXT: shrl $3, %ecx
-; X64-SSE-NEXT: andl $7, %ecx
-; X64-SSE-NEXT: movl %eax, %edx
-; X64-SSE-NEXT: andl $7, %edx
-; X64-SSE-NEXT: movd %edx, %xmm0
-; X64-SSE-NEXT: pinsrw $1, %ecx, %xmm0
-; X64-SSE-NEXT: movl %eax, %ecx
-; X64-SSE-NEXT: shrl $6, %ecx
-; X64-SSE-NEXT: andl $7, %ecx
-; X64-SSE-NEXT: pinsrw $2, %ecx, %xmm0
-; X64-SSE-NEXT: movl %eax, %ecx
-; X64-SSE-NEXT: shrl $9, %ecx
-; X64-SSE-NEXT: andl $7, %ecx
-; X64-SSE-NEXT: pinsrw $3, %ecx, %xmm0
-; X64-SSE-NEXT: movl %eax, %ecx
-; X64-SSE-NEXT: shrl $12, %ecx
-; X64-SSE-NEXT: andl $7, %ecx
-; X64-SSE-NEXT: pinsrw $4, %ecx, %xmm0
-; X64-SSE-NEXT: shrl $15, %eax
-; X64-SSE-NEXT: movzwl %ax, %eax
-; X64-SSE-NEXT: pinsrw $5, %eax, %xmm0
-; X64-SSE-NEXT: xorl %eax, %eax
-; X64-SSE-NEXT: pinsrw $6, %eax, %xmm0
-; X64-SSE-NEXT: pinsrw $7, %eax, %xmm0
-; X64-SSE-NEXT: movzwl -{{[0-9]+}}(%rsp), %ecx
-; X64-SSE-NEXT: movl %ecx, %edx
-; X64-SSE-NEXT: shrl $3, %edx
-; X64-SSE-NEXT: andl $7, %edx
-; X64-SSE-NEXT: movl %ecx, %esi
-; X64-SSE-NEXT: andl $7, %esi
-; X64-SSE-NEXT: movd %esi, %xmm1
-; X64-SSE-NEXT: pinsrw $1, %edx, %xmm1
-; X64-SSE-NEXT: movl %ecx, %edx
-; X64-SSE-NEXT: shrl $6, %edx
-; X64-SSE-NEXT: andl $7, %edx
-; X64-SSE-NEXT: pinsrw $2, %edx, %xmm1
-; X64-SSE-NEXT: movl %ecx, %edx
-; X64-SSE-NEXT: shrl $9, %edx
-; X64-SSE-NEXT: andl $7, %edx
-; X64-SSE-NEXT: pinsrw $3, %edx, %xmm1
-; X64-SSE-NEXT: movl %ecx, %edx
-; X64-SSE-NEXT: shrl $12, %edx
-; X64-SSE-NEXT: andl $7, %edx
-; X64-SSE-NEXT: pinsrw $4, %edx, %xmm1
-; X64-SSE-NEXT: shrl $15, %ecx
-; X64-SSE-NEXT: movzwl %cx, %ecx
-; X64-SSE-NEXT: pinsrw $5, %ecx, %xmm1
-; X64-SSE-NEXT: pinsrw $6, %eax, %xmm1
-; X64-SSE-NEXT: pinsrw $7, %eax, %xmm1
-; X64-SSE-NEXT: por %xmm0, %xmm1
-; X64-SSE-NEXT: pextrw $7, %xmm1, %eax
-; X64-SSE-NEXT: andl $15, %eax
-; X64-SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
-; X64-SSE-NEXT: pextrw $6, %xmm1, %eax
-; X64-SSE-NEXT: andl $15, %eax
-; X64-SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
-; X64-SSE-NEXT: pextrw $5, %xmm1, %eax
-; X64-SSE-NEXT: andl $15, %eax
-; X64-SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
-; X64-SSE-NEXT: pextrw $4, %xmm1, %eax
-; X64-SSE-NEXT: andl $15, %eax
-; X64-SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
-; X64-SSE-NEXT: pextrw $3, %xmm1, %eax
-; X64-SSE-NEXT: andl $15, %eax
-; X64-SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
-; X64-SSE-NEXT: pextrw $2, %xmm1, %eax
-; X64-SSE-NEXT: andl $15, %eax
-; X64-SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
-; X64-SSE-NEXT: pextrw $1, %xmm1, %eax
-; X64-SSE-NEXT: andl $15, %eax
-; X64-SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
-; X64-SSE-NEXT: movd %xmm1, %eax
-; X64-SSE-NEXT: andl $15, %eax
-; X64-SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
-; X64-SSE-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx
-; X64-SSE-NEXT: shll $16, %ecx
-; X64-SSE-NEXT: movzwl -{{[0-9]+}}(%rsp), %eax
-; X64-SSE-NEXT: orl %ecx, %eax
+; X64-SSE-NEXT: orl %esi, %edi
+; X64-SSE-NEXT: movl %edi, %eax
; X64-SSE-NEXT: retq
%1 = bitcast i24 %a to <8 x i3>
%2 = bitcast i24 %b to <8 x i3>
@@ -677,22 +131,16 @@ define i24 @or_i24_as_v8i3(i24 %a, i24 %
define <3 x i8> @and_v3i8_as_i24(<3 x i8> %a, <3 x i8> %b) nounwind {
; X32-SSE-LABEL: and_v3i8_as_i24:
; X32-SSE: # BB#0:
-; X32-SSE-NEXT: subl $12, %esp
-; X32-SSE-NEXT: pinsrb $0, {{[0-9]+}}(%esp), %xmm0
-; X32-SSE-NEXT: pinsrb $1, {{[0-9]+}}(%esp), %xmm0
-; X32-SSE-NEXT: pinsrb $2, {{[0-9]+}}(%esp), %xmm0
-; X32-SSE-NEXT: movd %xmm0, %eax
; X32-SSE-NEXT: pinsrb $0, {{[0-9]+}}(%esp), %xmm0
-; X32-SSE-NEXT: pinsrb $1, {{[0-9]+}}(%esp), %xmm0
-; X32-SSE-NEXT: pinsrb $2, {{[0-9]+}}(%esp), %xmm0
-; X32-SSE-NEXT: movd %xmm0, %ecx
-; X32-SSE-NEXT: andl %eax, %ecx
-; X32-SSE-NEXT: movd %ecx, %xmm0
-; X32-SSE-NEXT: pmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
-; X32-SSE-NEXT: pextrb $0, %xmm0, %eax
-; X32-SSE-NEXT: pextrb $4, %xmm0, %edx
-; X32-SSE-NEXT: pextrb $8, %xmm0, %ecx
-; X32-SSE-NEXT: addl $12, %esp
+; X32-SSE-NEXT: pinsrb $4, {{[0-9]+}}(%esp), %xmm0
+; X32-SSE-NEXT: pinsrb $8, {{[0-9]+}}(%esp), %xmm0
+; X32-SSE-NEXT: pinsrb $0, {{[0-9]+}}(%esp), %xmm1
+; X32-SSE-NEXT: pinsrb $4, {{[0-9]+}}(%esp), %xmm1
+; X32-SSE-NEXT: pinsrb $8, {{[0-9]+}}(%esp), %xmm1
+; X32-SSE-NEXT: pand %xmm0, %xmm1
+; X32-SSE-NEXT: pextrb $0, %xmm1, %eax
+; X32-SSE-NEXT: pextrb $4, %xmm1, %edx
+; X32-SSE-NEXT: pextrb $8, %xmm1, %ecx
; X32-SSE-NEXT: retl
;
; X64-SSE-LABEL: and_v3i8_as_i24:
@@ -700,20 +148,13 @@ define <3 x i8> @and_v3i8_as_i24(<3 x i8
; X64-SSE-NEXT: movd %ecx, %xmm0
; X64-SSE-NEXT: pinsrd $1, %r8d, %xmm0
; X64-SSE-NEXT: pinsrd $2, %r9d, %xmm0
-; X64-SSE-NEXT: movdqa {{.*#+}} xmm1 = <0,4,8,128,u,u,u,u,u,u,u,u,u,u,u,u>
-; X64-SSE-NEXT: pshufb %xmm1, %xmm0
-; X64-SSE-NEXT: movd %xmm0, %eax
-; X64-SSE-NEXT: movd %edi, %xmm0
-; X64-SSE-NEXT: pinsrd $1, %esi, %xmm0
-; X64-SSE-NEXT: pinsrd $2, %edx, %xmm0
-; X64-SSE-NEXT: pshufb %xmm1, %xmm0
-; X64-SSE-NEXT: movd %xmm0, %ecx
-; X64-SSE-NEXT: andl %eax, %ecx
-; X64-SSE-NEXT: movd %ecx, %xmm0
-; X64-SSE-NEXT: pmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
-; X64-SSE-NEXT: pextrb $0, %xmm0, %eax
-; X64-SSE-NEXT: pextrb $4, %xmm0, %edx
-; X64-SSE-NEXT: pextrb $8, %xmm0, %ecx
+; X64-SSE-NEXT: movd %edi, %xmm1
+; X64-SSE-NEXT: pinsrd $1, %esi, %xmm1
+; X64-SSE-NEXT: pinsrd $2, %edx, %xmm1
+; X64-SSE-NEXT: pand %xmm0, %xmm1
+; X64-SSE-NEXT: pextrb $0, %xmm1, %eax
+; X64-SSE-NEXT: pextrb $4, %xmm1, %edx
+; X64-SSE-NEXT: pextrb $8, %xmm1, %ecx
; X64-SSE-NEXT: retq
%1 = bitcast <3 x i8> %a to i24
%2 = bitcast <3 x i8> %b to i24
@@ -725,22 +166,16 @@ define <3 x i8> @and_v3i8_as_i24(<3 x i8
define <3 x i8> @xor_v3i8_as_i24(<3 x i8> %a, <3 x i8> %b) nounwind {
; X32-SSE-LABEL: xor_v3i8_as_i24:
; X32-SSE: # BB#0:
-; X32-SSE-NEXT: subl $12, %esp
-; X32-SSE-NEXT: pinsrb $0, {{[0-9]+}}(%esp), %xmm0
-; X32-SSE-NEXT: pinsrb $1, {{[0-9]+}}(%esp), %xmm0
-; X32-SSE-NEXT: pinsrb $2, {{[0-9]+}}(%esp), %xmm0
-; X32-SSE-NEXT: movd %xmm0, %eax
; X32-SSE-NEXT: pinsrb $0, {{[0-9]+}}(%esp), %xmm0
-; X32-SSE-NEXT: pinsrb $1, {{[0-9]+}}(%esp), %xmm0
-; X32-SSE-NEXT: pinsrb $2, {{[0-9]+}}(%esp), %xmm0
-; X32-SSE-NEXT: movd %xmm0, %ecx
-; X32-SSE-NEXT: xorl %eax, %ecx
-; X32-SSE-NEXT: movd %ecx, %xmm0
-; X32-SSE-NEXT: pmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
-; X32-SSE-NEXT: pextrb $0, %xmm0, %eax
-; X32-SSE-NEXT: pextrb $4, %xmm0, %edx
-; X32-SSE-NEXT: pextrb $8, %xmm0, %ecx
-; X32-SSE-NEXT: addl $12, %esp
+; X32-SSE-NEXT: pinsrb $4, {{[0-9]+}}(%esp), %xmm0
+; X32-SSE-NEXT: pinsrb $8, {{[0-9]+}}(%esp), %xmm0
+; X32-SSE-NEXT: pinsrb $0, {{[0-9]+}}(%esp), %xmm1
+; X32-SSE-NEXT: pinsrb $4, {{[0-9]+}}(%esp), %xmm1
+; X32-SSE-NEXT: pinsrb $8, {{[0-9]+}}(%esp), %xmm1
+; X32-SSE-NEXT: pxor %xmm0, %xmm1
+; X32-SSE-NEXT: pextrb $0, %xmm1, %eax
+; X32-SSE-NEXT: pextrb $4, %xmm1, %edx
+; X32-SSE-NEXT: pextrb $8, %xmm1, %ecx
; X32-SSE-NEXT: retl
;
; X64-SSE-LABEL: xor_v3i8_as_i24:
@@ -748,20 +183,13 @@ define <3 x i8> @xor_v3i8_as_i24(<3 x i8
; X64-SSE-NEXT: movd %ecx, %xmm0
; X64-SSE-NEXT: pinsrd $1, %r8d, %xmm0
; X64-SSE-NEXT: pinsrd $2, %r9d, %xmm0
-; X64-SSE-NEXT: movdqa {{.*#+}} xmm1 = <0,4,8,128,u,u,u,u,u,u,u,u,u,u,u,u>
-; X64-SSE-NEXT: pshufb %xmm1, %xmm0
-; X64-SSE-NEXT: movd %xmm0, %eax
-; X64-SSE-NEXT: movd %edi, %xmm0
-; X64-SSE-NEXT: pinsrd $1, %esi, %xmm0
-; X64-SSE-NEXT: pinsrd $2, %edx, %xmm0
-; X64-SSE-NEXT: pshufb %xmm1, %xmm0
-; X64-SSE-NEXT: movd %xmm0, %ecx
-; X64-SSE-NEXT: xorl %eax, %ecx
-; X64-SSE-NEXT: movd %ecx, %xmm0
-; X64-SSE-NEXT: pmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
-; X64-SSE-NEXT: pextrb $0, %xmm0, %eax
-; X64-SSE-NEXT: pextrb $4, %xmm0, %edx
-; X64-SSE-NEXT: pextrb $8, %xmm0, %ecx
+; X64-SSE-NEXT: movd %edi, %xmm1
+; X64-SSE-NEXT: pinsrd $1, %esi, %xmm1
+; X64-SSE-NEXT: pinsrd $2, %edx, %xmm1
+; X64-SSE-NEXT: pxor %xmm0, %xmm1
+; X64-SSE-NEXT: pextrb $0, %xmm1, %eax
+; X64-SSE-NEXT: pextrb $4, %xmm1, %edx
+; X64-SSE-NEXT: pextrb $8, %xmm1, %ecx
; X64-SSE-NEXT: retq
%1 = bitcast <3 x i8> %a to i24
%2 = bitcast <3 x i8> %b to i24
@@ -773,22 +201,16 @@ define <3 x i8> @xor_v3i8_as_i24(<3 x i8
define <3 x i8> @or_v3i8_as_i24(<3 x i8> %a, <3 x i8> %b) nounwind {
; X32-SSE-LABEL: or_v3i8_as_i24:
; X32-SSE: # BB#0:
-; X32-SSE-NEXT: subl $12, %esp
-; X32-SSE-NEXT: pinsrb $0, {{[0-9]+}}(%esp), %xmm0
-; X32-SSE-NEXT: pinsrb $1, {{[0-9]+}}(%esp), %xmm0
-; X32-SSE-NEXT: pinsrb $2, {{[0-9]+}}(%esp), %xmm0
-; X32-SSE-NEXT: movd %xmm0, %eax
; X32-SSE-NEXT: pinsrb $0, {{[0-9]+}}(%esp), %xmm0
-; X32-SSE-NEXT: pinsrb $1, {{[0-9]+}}(%esp), %xmm0
-; X32-SSE-NEXT: pinsrb $2, {{[0-9]+}}(%esp), %xmm0
-; X32-SSE-NEXT: movd %xmm0, %ecx
-; X32-SSE-NEXT: orl %eax, %ecx
-; X32-SSE-NEXT: movd %ecx, %xmm0
-; X32-SSE-NEXT: pmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
-; X32-SSE-NEXT: pextrb $0, %xmm0, %eax
-; X32-SSE-NEXT: pextrb $4, %xmm0, %edx
-; X32-SSE-NEXT: pextrb $8, %xmm0, %ecx
-; X32-SSE-NEXT: addl $12, %esp
+; X32-SSE-NEXT: pinsrb $4, {{[0-9]+}}(%esp), %xmm0
+; X32-SSE-NEXT: pinsrb $8, {{[0-9]+}}(%esp), %xmm0
+; X32-SSE-NEXT: pinsrb $0, {{[0-9]+}}(%esp), %xmm1
+; X32-SSE-NEXT: pinsrb $4, {{[0-9]+}}(%esp), %xmm1
+; X32-SSE-NEXT: pinsrb $8, {{[0-9]+}}(%esp), %xmm1
+; X32-SSE-NEXT: por %xmm0, %xmm1
+; X32-SSE-NEXT: pextrb $0, %xmm1, %eax
+; X32-SSE-NEXT: pextrb $4, %xmm1, %edx
+; X32-SSE-NEXT: pextrb $8, %xmm1, %ecx
; X32-SSE-NEXT: retl
;
; X64-SSE-LABEL: or_v3i8_as_i24:
@@ -796,20 +218,13 @@ define <3 x i8> @or_v3i8_as_i24(<3 x i8>
; X64-SSE-NEXT: movd %ecx, %xmm0
; X64-SSE-NEXT: pinsrd $1, %r8d, %xmm0
; X64-SSE-NEXT: pinsrd $2, %r9d, %xmm0
-; X64-SSE-NEXT: movdqa {{.*#+}} xmm1 = <0,4,8,128,u,u,u,u,u,u,u,u,u,u,u,u>
-; X64-SSE-NEXT: pshufb %xmm1, %xmm0
-; X64-SSE-NEXT: movd %xmm0, %eax
-; X64-SSE-NEXT: movd %edi, %xmm0
-; X64-SSE-NEXT: pinsrd $1, %esi, %xmm0
-; X64-SSE-NEXT: pinsrd $2, %edx, %xmm0
-; X64-SSE-NEXT: pshufb %xmm1, %xmm0
-; X64-SSE-NEXT: movd %xmm0, %ecx
-; X64-SSE-NEXT: orl %eax, %ecx
-; X64-SSE-NEXT: movd %ecx, %xmm0
-; X64-SSE-NEXT: pmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
-; X64-SSE-NEXT: pextrb $0, %xmm0, %eax
-; X64-SSE-NEXT: pextrb $4, %xmm0, %edx
-; X64-SSE-NEXT: pextrb $8, %xmm0, %ecx
+; X64-SSE-NEXT: movd %edi, %xmm1
+; X64-SSE-NEXT: pinsrd $1, %esi, %xmm1
+; X64-SSE-NEXT: pinsrd $2, %edx, %xmm1
+; X64-SSE-NEXT: por %xmm0, %xmm1
+; X64-SSE-NEXT: pextrb $0, %xmm1, %eax
+; X64-SSE-NEXT: pextrb $4, %xmm1, %edx
+; X64-SSE-NEXT: pextrb $8, %xmm1, %ecx
; X64-SSE-NEXT: retq
%1 = bitcast <3 x i8> %a to i24
%2 = bitcast <3 x i8> %b to i24
@@ -825,186 +240,12 @@ define <3 x i8> @or_v3i8_as_i24(<3 x i8>
define <8 x i3> @and_v8i3_as_i24(<8 x i3> %a, <8 x i3> %b) nounwind {
; X32-SSE-LABEL: and_v8i3_as_i24:
; X32-SSE: # BB#0:
-; X32-SSE-NEXT: pushl %ebp
-; X32-SSE-NEXT: movl %esp, %ebp
-; X32-SSE-NEXT: andl $-8, %esp
-; X32-SSE-NEXT: subl $24, %esp
-; X32-SSE-NEXT: pextrw $7, %xmm1, %eax
-; X32-SSE-NEXT: andl $15, %eax
-; X32-SSE-NEXT: movb %al, (%esp)
-; X32-SSE-NEXT: pextrw $6, %xmm1, %eax
-; X32-SSE-NEXT: andl $15, %eax
-; X32-SSE-NEXT: movb %al, (%esp)
-; X32-SSE-NEXT: pextrw $5, %xmm1, %eax
-; X32-SSE-NEXT: andl $15, %eax
-; X32-SSE-NEXT: movb %al, (%esp)
-; X32-SSE-NEXT: pextrw $4, %xmm1, %eax
-; X32-SSE-NEXT: andl $15, %eax
-; X32-SSE-NEXT: movb %al, (%esp)
-; X32-SSE-NEXT: pextrw $3, %xmm1, %eax
-; X32-SSE-NEXT: andl $15, %eax
-; X32-SSE-NEXT: movb %al, (%esp)
-; X32-SSE-NEXT: pextrw $2, %xmm1, %eax
-; X32-SSE-NEXT: andl $15, %eax
-; X32-SSE-NEXT: movb %al, (%esp)
-; X32-SSE-NEXT: pextrw $1, %xmm1, %eax
-; X32-SSE-NEXT: andl $15, %eax
-; X32-SSE-NEXT: movb %al, (%esp)
-; X32-SSE-NEXT: movd %xmm1, %eax
-; X32-SSE-NEXT: andl $15, %eax
-; X32-SSE-NEXT: movb %al, (%esp)
-; X32-SSE-NEXT: pextrw $7, %xmm0, %eax
-; X32-SSE-NEXT: andl $15, %eax
-; X32-SSE-NEXT: movb %al, {{[0-9]+}}(%esp)
-; X32-SSE-NEXT: pextrw $6, %xmm0, %eax
-; X32-SSE-NEXT: andl $15, %eax
-; X32-SSE-NEXT: movb %al, {{[0-9]+}}(%esp)
-; X32-SSE-NEXT: pextrw $5, %xmm0, %eax
-; X32-SSE-NEXT: andl $15, %eax
-; X32-SSE-NEXT: movb %al, {{[0-9]+}}(%esp)
-; X32-SSE-NEXT: pextrw $4, %xmm0, %eax
-; X32-SSE-NEXT: andl $15, %eax
-; X32-SSE-NEXT: movb %al, {{[0-9]+}}(%esp)
-; X32-SSE-NEXT: pextrw $3, %xmm0, %eax
-; X32-SSE-NEXT: andl $15, %eax
-; X32-SSE-NEXT: movb %al, {{[0-9]+}}(%esp)
-; X32-SSE-NEXT: pextrw $2, %xmm0, %eax
-; X32-SSE-NEXT: andl $15, %eax
-; X32-SSE-NEXT: movb %al, {{[0-9]+}}(%esp)
-; X32-SSE-NEXT: pextrw $1, %xmm0, %eax
-; X32-SSE-NEXT: andl $15, %eax
-; X32-SSE-NEXT: movb %al, {{[0-9]+}}(%esp)
-; X32-SSE-NEXT: movd %xmm0, %eax
-; X32-SSE-NEXT: andl $15, %eax
-; X32-SSE-NEXT: movb %al, {{[0-9]+}}(%esp)
-; X32-SSE-NEXT: movzbl {{[0-9]+}}(%esp), %eax
-; X32-SSE-NEXT: shll $16, %eax
-; X32-SSE-NEXT: movzwl (%esp), %ecx
-; X32-SSE-NEXT: orl %eax, %ecx
-; X32-SSE-NEXT: movzbl {{[0-9]+}}(%esp), %edx
-; X32-SSE-NEXT: shll $16, %edx
-; X32-SSE-NEXT: movzwl {{[0-9]+}}(%esp), %eax
-; X32-SSE-NEXT: orl %edx, %eax
-; X32-SSE-NEXT: andl %ecx, %eax
-; X32-SSE-NEXT: movw %ax, {{[0-9]+}}(%esp)
-; X32-SSE-NEXT: shrl $16, %eax
-; X32-SSE-NEXT: movb %al, {{[0-9]+}}(%esp)
-; X32-SSE-NEXT: movzwl {{[0-9]+}}(%esp), %eax
-; X32-SSE-NEXT: movl %eax, %ecx
-; X32-SSE-NEXT: shrl $3, %ecx
-; X32-SSE-NEXT: andl $7, %ecx
-; X32-SSE-NEXT: movl %eax, %edx
-; X32-SSE-NEXT: andl $7, %edx
-; X32-SSE-NEXT: movd %edx, %xmm1
-; X32-SSE-NEXT: pinsrw $1, %ecx, %xmm1
-; X32-SSE-NEXT: movl %eax, %ecx
-; X32-SSE-NEXT: shrl $6, %ecx
-; X32-SSE-NEXT: andl $7, %ecx
-; X32-SSE-NEXT: pinsrw $2, %ecx, %xmm1
-; X32-SSE-NEXT: movl %eax, %ecx
-; X32-SSE-NEXT: shrl $9, %ecx
-; X32-SSE-NEXT: andl $7, %ecx
-; X32-SSE-NEXT: pinsrw $3, %ecx, %xmm1
-; X32-SSE-NEXT: movl %eax, %ecx
-; X32-SSE-NEXT: shrl $12, %ecx
-; X32-SSE-NEXT: andl $7, %ecx
-; X32-SSE-NEXT: pinsrw $4, %ecx, %xmm1
-; X32-SSE-NEXT: shrl $15, %eax
-; X32-SSE-NEXT: pinsrw $5, %eax, %xmm1
-; X32-SSE-NEXT: pxor %xmm0, %xmm0
-; X32-SSE-NEXT: pblendw {{.*#+}} xmm0 = xmm1[0,1,2,3,4,5],xmm0[6,7]
-; X32-SSE-NEXT: movl %ebp, %esp
-; X32-SSE-NEXT: popl %ebp
+; X32-SSE-NEXT: andps %xmm1, %xmm0
; X32-SSE-NEXT: retl
;
; X64-SSE-LABEL: and_v8i3_as_i24:
; X64-SSE: # BB#0:
-; X64-SSE-NEXT: pextrw $7, %xmm1, %eax
-; X64-SSE-NEXT: andl $15, %eax
-; X64-SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
-; X64-SSE-NEXT: pextrw $6, %xmm1, %eax
-; X64-SSE-NEXT: andl $15, %eax
-; X64-SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
-; X64-SSE-NEXT: pextrw $5, %xmm1, %eax
-; X64-SSE-NEXT: andl $15, %eax
-; X64-SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
-; X64-SSE-NEXT: pextrw $4, %xmm1, %eax
-; X64-SSE-NEXT: andl $15, %eax
-; X64-SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
-; X64-SSE-NEXT: pextrw $3, %xmm1, %eax
-; X64-SSE-NEXT: andl $15, %eax
-; X64-SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
-; X64-SSE-NEXT: pextrw $2, %xmm1, %eax
-; X64-SSE-NEXT: andl $15, %eax
-; X64-SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
-; X64-SSE-NEXT: pextrw $1, %xmm1, %eax
-; X64-SSE-NEXT: andl $15, %eax
-; X64-SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
-; X64-SSE-NEXT: movd %xmm1, %eax
-; X64-SSE-NEXT: andl $15, %eax
-; X64-SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
-; X64-SSE-NEXT: pextrw $7, %xmm0, %eax
-; X64-SSE-NEXT: andl $15, %eax
-; X64-SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
-; X64-SSE-NEXT: pextrw $6, %xmm0, %eax
-; X64-SSE-NEXT: andl $15, %eax
-; X64-SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
-; X64-SSE-NEXT: pextrw $5, %xmm0, %eax
-; X64-SSE-NEXT: andl $15, %eax
-; X64-SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
-; X64-SSE-NEXT: pextrw $4, %xmm0, %eax
-; X64-SSE-NEXT: andl $15, %eax
-; X64-SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
-; X64-SSE-NEXT: pextrw $3, %xmm0, %eax
-; X64-SSE-NEXT: andl $15, %eax
-; X64-SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
-; X64-SSE-NEXT: pextrw $2, %xmm0, %eax
-; X64-SSE-NEXT: andl $15, %eax
-; X64-SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
-; X64-SSE-NEXT: pextrw $1, %xmm0, %eax
-; X64-SSE-NEXT: andl $15, %eax
-; X64-SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
-; X64-SSE-NEXT: movd %xmm0, %eax
-; X64-SSE-NEXT: andl $15, %eax
-; X64-SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
-; X64-SSE-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
-; X64-SSE-NEXT: shll $16, %eax
-; X64-SSE-NEXT: movzwl -{{[0-9]+}}(%rsp), %ecx
-; X64-SSE-NEXT: orl %eax, %ecx
-; X64-SSE-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
-; X64-SSE-NEXT: shll $16, %eax
-; X64-SSE-NEXT: movzwl -{{[0-9]+}}(%rsp), %edx
-; X64-SSE-NEXT: orl %eax, %edx
-; X64-SSE-NEXT: andl %ecx, %edx
-; X64-SSE-NEXT: movw %dx, -{{[0-9]+}}(%rsp)
-; X64-SSE-NEXT: shrl $16, %edx
-; X64-SSE-NEXT: movb %dl, -{{[0-9]+}}(%rsp)
-; X64-SSE-NEXT: movzwl -{{[0-9]+}}(%rsp), %eax
-; X64-SSE-NEXT: movl %eax, %ecx
-; X64-SSE-NEXT: shrl $3, %ecx
-; X64-SSE-NEXT: andl $7, %ecx
-; X64-SSE-NEXT: movl %eax, %edx
-; X64-SSE-NEXT: andl $7, %edx
-; X64-SSE-NEXT: movd %edx, %xmm0
-; X64-SSE-NEXT: pinsrw $1, %ecx, %xmm0
-; X64-SSE-NEXT: movl %eax, %ecx
-; X64-SSE-NEXT: shrl $6, %ecx
-; X64-SSE-NEXT: andl $7, %ecx
-; X64-SSE-NEXT: pinsrw $2, %ecx, %xmm0
-; X64-SSE-NEXT: movl %eax, %ecx
-; X64-SSE-NEXT: shrl $9, %ecx
-; X64-SSE-NEXT: andl $7, %ecx
-; X64-SSE-NEXT: pinsrw $3, %ecx, %xmm0
-; X64-SSE-NEXT: movl %eax, %ecx
-; X64-SSE-NEXT: shrl $12, %ecx
-; X64-SSE-NEXT: andl $7, %ecx
-; X64-SSE-NEXT: pinsrw $4, %ecx, %xmm0
-; X64-SSE-NEXT: shrl $15, %eax
-; X64-SSE-NEXT: movzwl %ax, %eax
-; X64-SSE-NEXT: pinsrw $5, %eax, %xmm0
-; X64-SSE-NEXT: xorl %eax, %eax
-; X64-SSE-NEXT: pinsrw $6, %eax, %xmm0
-; X64-SSE-NEXT: pinsrw $7, %eax, %xmm0
+; X64-SSE-NEXT: andps %xmm1, %xmm0
; X64-SSE-NEXT: retq
%1 = bitcast <8 x i3> %a to i24
%2 = bitcast <8 x i3> %b to i24
@@ -1016,186 +257,12 @@ define <8 x i3> @and_v8i3_as_i24(<8 x i3
define <8 x i3> @xor_v8i3_as_i24(<8 x i3> %a, <8 x i3> %b) nounwind {
; X32-SSE-LABEL: xor_v8i3_as_i24:
; X32-SSE: # BB#0:
-; X32-SSE-NEXT: pushl %ebp
-; X32-SSE-NEXT: movl %esp, %ebp
-; X32-SSE-NEXT: andl $-8, %esp
-; X32-SSE-NEXT: subl $24, %esp
-; X32-SSE-NEXT: pextrw $7, %xmm1, %eax
-; X32-SSE-NEXT: andl $15, %eax
-; X32-SSE-NEXT: movb %al, (%esp)
-; X32-SSE-NEXT: pextrw $6, %xmm1, %eax
-; X32-SSE-NEXT: andl $15, %eax
-; X32-SSE-NEXT: movb %al, (%esp)
-; X32-SSE-NEXT: pextrw $5, %xmm1, %eax
-; X32-SSE-NEXT: andl $15, %eax
-; X32-SSE-NEXT: movb %al, (%esp)
-; X32-SSE-NEXT: pextrw $4, %xmm1, %eax
-; X32-SSE-NEXT: andl $15, %eax
-; X32-SSE-NEXT: movb %al, (%esp)
-; X32-SSE-NEXT: pextrw $3, %xmm1, %eax
-; X32-SSE-NEXT: andl $15, %eax
-; X32-SSE-NEXT: movb %al, (%esp)
-; X32-SSE-NEXT: pextrw $2, %xmm1, %eax
-; X32-SSE-NEXT: andl $15, %eax
-; X32-SSE-NEXT: movb %al, (%esp)
-; X32-SSE-NEXT: pextrw $1, %xmm1, %eax
-; X32-SSE-NEXT: andl $15, %eax
-; X32-SSE-NEXT: movb %al, (%esp)
-; X32-SSE-NEXT: movd %xmm1, %eax
-; X32-SSE-NEXT: andl $15, %eax
-; X32-SSE-NEXT: movb %al, (%esp)
-; X32-SSE-NEXT: pextrw $7, %xmm0, %eax
-; X32-SSE-NEXT: andl $15, %eax
-; X32-SSE-NEXT: movb %al, {{[0-9]+}}(%esp)
-; X32-SSE-NEXT: pextrw $6, %xmm0, %eax
-; X32-SSE-NEXT: andl $15, %eax
-; X32-SSE-NEXT: movb %al, {{[0-9]+}}(%esp)
-; X32-SSE-NEXT: pextrw $5, %xmm0, %eax
-; X32-SSE-NEXT: andl $15, %eax
-; X32-SSE-NEXT: movb %al, {{[0-9]+}}(%esp)
-; X32-SSE-NEXT: pextrw $4, %xmm0, %eax
-; X32-SSE-NEXT: andl $15, %eax
-; X32-SSE-NEXT: movb %al, {{[0-9]+}}(%esp)
-; X32-SSE-NEXT: pextrw $3, %xmm0, %eax
-; X32-SSE-NEXT: andl $15, %eax
-; X32-SSE-NEXT: movb %al, {{[0-9]+}}(%esp)
-; X32-SSE-NEXT: pextrw $2, %xmm0, %eax
-; X32-SSE-NEXT: andl $15, %eax
-; X32-SSE-NEXT: movb %al, {{[0-9]+}}(%esp)
-; X32-SSE-NEXT: pextrw $1, %xmm0, %eax
-; X32-SSE-NEXT: andl $15, %eax
-; X32-SSE-NEXT: movb %al, {{[0-9]+}}(%esp)
-; X32-SSE-NEXT: movd %xmm0, %eax
-; X32-SSE-NEXT: andl $15, %eax
-; X32-SSE-NEXT: movb %al, {{[0-9]+}}(%esp)
-; X32-SSE-NEXT: movzbl {{[0-9]+}}(%esp), %eax
-; X32-SSE-NEXT: shll $16, %eax
-; X32-SSE-NEXT: movzwl (%esp), %ecx
-; X32-SSE-NEXT: orl %eax, %ecx
-; X32-SSE-NEXT: movzbl {{[0-9]+}}(%esp), %edx
-; X32-SSE-NEXT: shll $16, %edx
-; X32-SSE-NEXT: movzwl {{[0-9]+}}(%esp), %eax
-; X32-SSE-NEXT: orl %edx, %eax
-; X32-SSE-NEXT: xorl %ecx, %eax
-; X32-SSE-NEXT: movw %ax, {{[0-9]+}}(%esp)
-; X32-SSE-NEXT: shrl $16, %eax
-; X32-SSE-NEXT: movb %al, {{[0-9]+}}(%esp)
-; X32-SSE-NEXT: movzwl {{[0-9]+}}(%esp), %eax
-; X32-SSE-NEXT: movl %eax, %ecx
-; X32-SSE-NEXT: shrl $3, %ecx
-; X32-SSE-NEXT: andl $7, %ecx
-; X32-SSE-NEXT: movl %eax, %edx
-; X32-SSE-NEXT: andl $7, %edx
-; X32-SSE-NEXT: movd %edx, %xmm1
-; X32-SSE-NEXT: pinsrw $1, %ecx, %xmm1
-; X32-SSE-NEXT: movl %eax, %ecx
-; X32-SSE-NEXT: shrl $6, %ecx
-; X32-SSE-NEXT: andl $7, %ecx
-; X32-SSE-NEXT: pinsrw $2, %ecx, %xmm1
-; X32-SSE-NEXT: movl %eax, %ecx
-; X32-SSE-NEXT: shrl $9, %ecx
-; X32-SSE-NEXT: andl $7, %ecx
-; X32-SSE-NEXT: pinsrw $3, %ecx, %xmm1
-; X32-SSE-NEXT: movl %eax, %ecx
-; X32-SSE-NEXT: shrl $12, %ecx
-; X32-SSE-NEXT: andl $7, %ecx
-; X32-SSE-NEXT: pinsrw $4, %ecx, %xmm1
-; X32-SSE-NEXT: shrl $15, %eax
-; X32-SSE-NEXT: pinsrw $5, %eax, %xmm1
-; X32-SSE-NEXT: pxor %xmm0, %xmm0
-; X32-SSE-NEXT: pblendw {{.*#+}} xmm0 = xmm1[0,1,2,3,4,5],xmm0[6,7]
-; X32-SSE-NEXT: movl %ebp, %esp
-; X32-SSE-NEXT: popl %ebp
+; X32-SSE-NEXT: xorps %xmm1, %xmm0
; X32-SSE-NEXT: retl
;
; X64-SSE-LABEL: xor_v8i3_as_i24:
; X64-SSE: # BB#0:
-; X64-SSE-NEXT: pextrw $7, %xmm1, %eax
-; X64-SSE-NEXT: andl $15, %eax
-; X64-SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
-; X64-SSE-NEXT: pextrw $6, %xmm1, %eax
-; X64-SSE-NEXT: andl $15, %eax
-; X64-SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
-; X64-SSE-NEXT: pextrw $5, %xmm1, %eax
-; X64-SSE-NEXT: andl $15, %eax
-; X64-SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
-; X64-SSE-NEXT: pextrw $4, %xmm1, %eax
-; X64-SSE-NEXT: andl $15, %eax
-; X64-SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
-; X64-SSE-NEXT: pextrw $3, %xmm1, %eax
-; X64-SSE-NEXT: andl $15, %eax
-; X64-SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
-; X64-SSE-NEXT: pextrw $2, %xmm1, %eax
-; X64-SSE-NEXT: andl $15, %eax
-; X64-SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
-; X64-SSE-NEXT: pextrw $1, %xmm1, %eax
-; X64-SSE-NEXT: andl $15, %eax
-; X64-SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
-; X64-SSE-NEXT: movd %xmm1, %eax
-; X64-SSE-NEXT: andl $15, %eax
-; X64-SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
-; X64-SSE-NEXT: pextrw $7, %xmm0, %eax
-; X64-SSE-NEXT: andl $15, %eax
-; X64-SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
-; X64-SSE-NEXT: pextrw $6, %xmm0, %eax
-; X64-SSE-NEXT: andl $15, %eax
-; X64-SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
-; X64-SSE-NEXT: pextrw $5, %xmm0, %eax
-; X64-SSE-NEXT: andl $15, %eax
-; X64-SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
-; X64-SSE-NEXT: pextrw $4, %xmm0, %eax
-; X64-SSE-NEXT: andl $15, %eax
-; X64-SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
-; X64-SSE-NEXT: pextrw $3, %xmm0, %eax
-; X64-SSE-NEXT: andl $15, %eax
-; X64-SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
-; X64-SSE-NEXT: pextrw $2, %xmm0, %eax
-; X64-SSE-NEXT: andl $15, %eax
-; X64-SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
-; X64-SSE-NEXT: pextrw $1, %xmm0, %eax
-; X64-SSE-NEXT: andl $15, %eax
-; X64-SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
-; X64-SSE-NEXT: movd %xmm0, %eax
-; X64-SSE-NEXT: andl $15, %eax
-; X64-SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
-; X64-SSE-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
-; X64-SSE-NEXT: shll $16, %eax
-; X64-SSE-NEXT: movzwl -{{[0-9]+}}(%rsp), %ecx
-; X64-SSE-NEXT: orl %eax, %ecx
-; X64-SSE-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
-; X64-SSE-NEXT: shll $16, %eax
-; X64-SSE-NEXT: movzwl -{{[0-9]+}}(%rsp), %edx
-; X64-SSE-NEXT: orl %eax, %edx
-; X64-SSE-NEXT: xorl %ecx, %edx
-; X64-SSE-NEXT: movw %dx, -{{[0-9]+}}(%rsp)
-; X64-SSE-NEXT: shrl $16, %edx
-; X64-SSE-NEXT: movb %dl, -{{[0-9]+}}(%rsp)
-; X64-SSE-NEXT: movzwl -{{[0-9]+}}(%rsp), %eax
-; X64-SSE-NEXT: movl %eax, %ecx
-; X64-SSE-NEXT: shrl $3, %ecx
-; X64-SSE-NEXT: andl $7, %ecx
-; X64-SSE-NEXT: movl %eax, %edx
-; X64-SSE-NEXT: andl $7, %edx
-; X64-SSE-NEXT: movd %edx, %xmm0
-; X64-SSE-NEXT: pinsrw $1, %ecx, %xmm0
-; X64-SSE-NEXT: movl %eax, %ecx
-; X64-SSE-NEXT: shrl $6, %ecx
-; X64-SSE-NEXT: andl $7, %ecx
-; X64-SSE-NEXT: pinsrw $2, %ecx, %xmm0
-; X64-SSE-NEXT: movl %eax, %ecx
-; X64-SSE-NEXT: shrl $9, %ecx
-; X64-SSE-NEXT: andl $7, %ecx
-; X64-SSE-NEXT: pinsrw $3, %ecx, %xmm0
-; X64-SSE-NEXT: movl %eax, %ecx
-; X64-SSE-NEXT: shrl $12, %ecx
-; X64-SSE-NEXT: andl $7, %ecx
-; X64-SSE-NEXT: pinsrw $4, %ecx, %xmm0
-; X64-SSE-NEXT: shrl $15, %eax
-; X64-SSE-NEXT: movzwl %ax, %eax
-; X64-SSE-NEXT: pinsrw $5, %eax, %xmm0
-; X64-SSE-NEXT: xorl %eax, %eax
-; X64-SSE-NEXT: pinsrw $6, %eax, %xmm0
-; X64-SSE-NEXT: pinsrw $7, %eax, %xmm0
+; X64-SSE-NEXT: xorps %xmm1, %xmm0
; X64-SSE-NEXT: retq
%1 = bitcast <8 x i3> %a to i24
%2 = bitcast <8 x i3> %b to i24
@@ -1207,186 +274,12 @@ define <8 x i3> @xor_v8i3_as_i24(<8 x i3
define <8 x i3> @or_v8i3_as_i24(<8 x i3> %a, <8 x i3> %b) nounwind {
; X32-SSE-LABEL: or_v8i3_as_i24:
; X32-SSE: # BB#0:
-; X32-SSE-NEXT: pushl %ebp
-; X32-SSE-NEXT: movl %esp, %ebp
-; X32-SSE-NEXT: andl $-8, %esp
-; X32-SSE-NEXT: subl $24, %esp
-; X32-SSE-NEXT: pextrw $7, %xmm1, %eax
-; X32-SSE-NEXT: andl $15, %eax
-; X32-SSE-NEXT: movb %al, (%esp)
-; X32-SSE-NEXT: pextrw $6, %xmm1, %eax
-; X32-SSE-NEXT: andl $15, %eax
-; X32-SSE-NEXT: movb %al, (%esp)
-; X32-SSE-NEXT: pextrw $5, %xmm1, %eax
-; X32-SSE-NEXT: andl $15, %eax
-; X32-SSE-NEXT: movb %al, (%esp)
-; X32-SSE-NEXT: pextrw $4, %xmm1, %eax
-; X32-SSE-NEXT: andl $15, %eax
-; X32-SSE-NEXT: movb %al, (%esp)
-; X32-SSE-NEXT: pextrw $3, %xmm1, %eax
-; X32-SSE-NEXT: andl $15, %eax
-; X32-SSE-NEXT: movb %al, (%esp)
-; X32-SSE-NEXT: pextrw $2, %xmm1, %eax
-; X32-SSE-NEXT: andl $15, %eax
-; X32-SSE-NEXT: movb %al, (%esp)
-; X32-SSE-NEXT: pextrw $1, %xmm1, %eax
-; X32-SSE-NEXT: andl $15, %eax
-; X32-SSE-NEXT: movb %al, (%esp)
-; X32-SSE-NEXT: movd %xmm1, %eax
-; X32-SSE-NEXT: andl $15, %eax
-; X32-SSE-NEXT: movb %al, (%esp)
-; X32-SSE-NEXT: pextrw $7, %xmm0, %eax
-; X32-SSE-NEXT: andl $15, %eax
-; X32-SSE-NEXT: movb %al, {{[0-9]+}}(%esp)
-; X32-SSE-NEXT: pextrw $6, %xmm0, %eax
-; X32-SSE-NEXT: andl $15, %eax
-; X32-SSE-NEXT: movb %al, {{[0-9]+}}(%esp)
-; X32-SSE-NEXT: pextrw $5, %xmm0, %eax
-; X32-SSE-NEXT: andl $15, %eax
-; X32-SSE-NEXT: movb %al, {{[0-9]+}}(%esp)
-; X32-SSE-NEXT: pextrw $4, %xmm0, %eax
-; X32-SSE-NEXT: andl $15, %eax
-; X32-SSE-NEXT: movb %al, {{[0-9]+}}(%esp)
-; X32-SSE-NEXT: pextrw $3, %xmm0, %eax
-; X32-SSE-NEXT: andl $15, %eax
-; X32-SSE-NEXT: movb %al, {{[0-9]+}}(%esp)
-; X32-SSE-NEXT: pextrw $2, %xmm0, %eax
-; X32-SSE-NEXT: andl $15, %eax
-; X32-SSE-NEXT: movb %al, {{[0-9]+}}(%esp)
-; X32-SSE-NEXT: pextrw $1, %xmm0, %eax
-; X32-SSE-NEXT: andl $15, %eax
-; X32-SSE-NEXT: movb %al, {{[0-9]+}}(%esp)
-; X32-SSE-NEXT: movd %xmm0, %eax
-; X32-SSE-NEXT: andl $15, %eax
-; X32-SSE-NEXT: movb %al, {{[0-9]+}}(%esp)
-; X32-SSE-NEXT: movzbl {{[0-9]+}}(%esp), %eax
-; X32-SSE-NEXT: shll $16, %eax
-; X32-SSE-NEXT: movzwl (%esp), %ecx
-; X32-SSE-NEXT: orl %eax, %ecx
-; X32-SSE-NEXT: movzbl {{[0-9]+}}(%esp), %edx
-; X32-SSE-NEXT: shll $16, %edx
-; X32-SSE-NEXT: movzwl {{[0-9]+}}(%esp), %eax
-; X32-SSE-NEXT: orl %edx, %eax
-; X32-SSE-NEXT: orl %ecx, %eax
-; X32-SSE-NEXT: movw %ax, {{[0-9]+}}(%esp)
-; X32-SSE-NEXT: shrl $16, %eax
-; X32-SSE-NEXT: movb %al, {{[0-9]+}}(%esp)
-; X32-SSE-NEXT: movzwl {{[0-9]+}}(%esp), %eax
-; X32-SSE-NEXT: movl %eax, %ecx
-; X32-SSE-NEXT: shrl $3, %ecx
-; X32-SSE-NEXT: andl $7, %ecx
-; X32-SSE-NEXT: movl %eax, %edx
-; X32-SSE-NEXT: andl $7, %edx
-; X32-SSE-NEXT: movd %edx, %xmm1
-; X32-SSE-NEXT: pinsrw $1, %ecx, %xmm1
-; X32-SSE-NEXT: movl %eax, %ecx
-; X32-SSE-NEXT: shrl $6, %ecx
-; X32-SSE-NEXT: andl $7, %ecx
-; X32-SSE-NEXT: pinsrw $2, %ecx, %xmm1
-; X32-SSE-NEXT: movl %eax, %ecx
-; X32-SSE-NEXT: shrl $9, %ecx
-; X32-SSE-NEXT: andl $7, %ecx
-; X32-SSE-NEXT: pinsrw $3, %ecx, %xmm1
-; X32-SSE-NEXT: movl %eax, %ecx
-; X32-SSE-NEXT: shrl $12, %ecx
-; X32-SSE-NEXT: andl $7, %ecx
-; X32-SSE-NEXT: pinsrw $4, %ecx, %xmm1
-; X32-SSE-NEXT: shrl $15, %eax
-; X32-SSE-NEXT: pinsrw $5, %eax, %xmm1
-; X32-SSE-NEXT: pxor %xmm0, %xmm0
-; X32-SSE-NEXT: pblendw {{.*#+}} xmm0 = xmm1[0,1,2,3,4,5],xmm0[6,7]
-; X32-SSE-NEXT: movl %ebp, %esp
-; X32-SSE-NEXT: popl %ebp
+; X32-SSE-NEXT: orps %xmm1, %xmm0
; X32-SSE-NEXT: retl
;
; X64-SSE-LABEL: or_v8i3_as_i24:
; X64-SSE: # BB#0:
-; X64-SSE-NEXT: pextrw $7, %xmm1, %eax
-; X64-SSE-NEXT: andl $15, %eax
-; X64-SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
-; X64-SSE-NEXT: pextrw $6, %xmm1, %eax
-; X64-SSE-NEXT: andl $15, %eax
-; X64-SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
-; X64-SSE-NEXT: pextrw $5, %xmm1, %eax
-; X64-SSE-NEXT: andl $15, %eax
-; X64-SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
-; X64-SSE-NEXT: pextrw $4, %xmm1, %eax
-; X64-SSE-NEXT: andl $15, %eax
-; X64-SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
-; X64-SSE-NEXT: pextrw $3, %xmm1, %eax
-; X64-SSE-NEXT: andl $15, %eax
-; X64-SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
-; X64-SSE-NEXT: pextrw $2, %xmm1, %eax
-; X64-SSE-NEXT: andl $15, %eax
-; X64-SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
-; X64-SSE-NEXT: pextrw $1, %xmm1, %eax
-; X64-SSE-NEXT: andl $15, %eax
-; X64-SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
-; X64-SSE-NEXT: movd %xmm1, %eax
-; X64-SSE-NEXT: andl $15, %eax
-; X64-SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
-; X64-SSE-NEXT: pextrw $7, %xmm0, %eax
-; X64-SSE-NEXT: andl $15, %eax
-; X64-SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
-; X64-SSE-NEXT: pextrw $6, %xmm0, %eax
-; X64-SSE-NEXT: andl $15, %eax
-; X64-SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
-; X64-SSE-NEXT: pextrw $5, %xmm0, %eax
-; X64-SSE-NEXT: andl $15, %eax
-; X64-SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
-; X64-SSE-NEXT: pextrw $4, %xmm0, %eax
-; X64-SSE-NEXT: andl $15, %eax
-; X64-SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
-; X64-SSE-NEXT: pextrw $3, %xmm0, %eax
-; X64-SSE-NEXT: andl $15, %eax
-; X64-SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
-; X64-SSE-NEXT: pextrw $2, %xmm0, %eax
-; X64-SSE-NEXT: andl $15, %eax
-; X64-SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
-; X64-SSE-NEXT: pextrw $1, %xmm0, %eax
-; X64-SSE-NEXT: andl $15, %eax
-; X64-SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
-; X64-SSE-NEXT: movd %xmm0, %eax
-; X64-SSE-NEXT: andl $15, %eax
-; X64-SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
-; X64-SSE-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
-; X64-SSE-NEXT: shll $16, %eax
-; X64-SSE-NEXT: movzwl -{{[0-9]+}}(%rsp), %ecx
-; X64-SSE-NEXT: orl %eax, %ecx
-; X64-SSE-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
-; X64-SSE-NEXT: shll $16, %eax
-; X64-SSE-NEXT: movzwl -{{[0-9]+}}(%rsp), %edx
-; X64-SSE-NEXT: orl %eax, %edx
-; X64-SSE-NEXT: orl %ecx, %edx
-; X64-SSE-NEXT: movw %dx, -{{[0-9]+}}(%rsp)
-; X64-SSE-NEXT: shrl $16, %edx
-; X64-SSE-NEXT: movb %dl, -{{[0-9]+}}(%rsp)
-; X64-SSE-NEXT: movzwl -{{[0-9]+}}(%rsp), %eax
-; X64-SSE-NEXT: movl %eax, %ecx
-; X64-SSE-NEXT: shrl $3, %ecx
-; X64-SSE-NEXT: andl $7, %ecx
-; X64-SSE-NEXT: movl %eax, %edx
-; X64-SSE-NEXT: andl $7, %edx
-; X64-SSE-NEXT: movd %edx, %xmm0
-; X64-SSE-NEXT: pinsrw $1, %ecx, %xmm0
-; X64-SSE-NEXT: movl %eax, %ecx
-; X64-SSE-NEXT: shrl $6, %ecx
-; X64-SSE-NEXT: andl $7, %ecx
-; X64-SSE-NEXT: pinsrw $2, %ecx, %xmm0
-; X64-SSE-NEXT: movl %eax, %ecx
-; X64-SSE-NEXT: shrl $9, %ecx
-; X64-SSE-NEXT: andl $7, %ecx
-; X64-SSE-NEXT: pinsrw $3, %ecx, %xmm0
-; X64-SSE-NEXT: movl %eax, %ecx
-; X64-SSE-NEXT: shrl $12, %ecx
-; X64-SSE-NEXT: andl $7, %ecx
-; X64-SSE-NEXT: pinsrw $4, %ecx, %xmm0
-; X64-SSE-NEXT: shrl $15, %eax
-; X64-SSE-NEXT: movzwl %ax, %eax
-; X64-SSE-NEXT: pinsrw $5, %eax, %xmm0
-; X64-SSE-NEXT: xorl %eax, %eax
-; X64-SSE-NEXT: pinsrw $6, %eax, %xmm0
-; X64-SSE-NEXT: pinsrw $7, %eax, %xmm0
+; X64-SSE-NEXT: orps %xmm1, %xmm0
; X64-SSE-NEXT: retq
%1 = bitcast <8 x i3> %a to i24
%2 = bitcast <8 x i3> %b to i24
Modified: llvm/trunk/test/CodeGen/X86/widen_bitops-1.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/widen_bitops-1.ll?rev=265998&r1=265997&r2=265998&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/widen_bitops-1.ll (original)
+++ llvm/trunk/test/CodeGen/X86/widen_bitops-1.ll Mon Apr 11 16:10:33 2016
@@ -9,24 +9,14 @@
define i32 @and_i32_as_v4i8(i32 %a, i32 %b) nounwind {
; X32-SSE-LABEL: and_i32_as_v4i8:
; X32-SSE: # BB#0:
-; X32-SSE-NEXT: pushl %eax
-; X32-SSE-NEXT: pmovzxbd {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero
-; X32-SSE-NEXT: pmovzxbd {{.*#+}} xmm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero
-; X32-SSE-NEXT: pand %xmm0, %xmm1
-; X32-SSE-NEXT: pshufb {{.*#+}} xmm1 = xmm1[0,4,8,12,u,u,u,u,u,u,u,u,u,u,u,u]
-; X32-SSE-NEXT: movd %xmm1, %eax
-; X32-SSE-NEXT: popl %ecx
+; X32-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X32-SSE-NEXT: andl {{[0-9]+}}(%esp), %eax
; X32-SSE-NEXT: retl
;
; X64-SSE-LABEL: and_i32_as_v4i8:
; X64-SSE: # BB#0:
-; X64-SSE-NEXT: movd %esi, %xmm0
-; X64-SSE-NEXT: pmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
-; X64-SSE-NEXT: movd %edi, %xmm1
-; X64-SSE-NEXT: pmovzxbd {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero
-; X64-SSE-NEXT: pand %xmm0, %xmm1
-; X64-SSE-NEXT: pshufb {{.*#+}} xmm1 = xmm1[0,4,8,12,u,u,u,u,u,u,u,u,u,u,u,u]
-; X64-SSE-NEXT: movd %xmm1, %eax
+; X64-SSE-NEXT: andl %esi, %edi
+; X64-SSE-NEXT: movl %edi, %eax
; X64-SSE-NEXT: retq
%1 = bitcast i32 %a to <4 x i8>
%2 = bitcast i32 %b to <4 x i8>
@@ -38,24 +28,14 @@ define i32 @and_i32_as_v4i8(i32 %a, i32
define i32 @xor_i32_as_v4i8(i32 %a, i32 %b) nounwind {
; X32-SSE-LABEL: xor_i32_as_v4i8:
; X32-SSE: # BB#0:
-; X32-SSE-NEXT: pushl %eax
-; X32-SSE-NEXT: pmovzxbd {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero
-; X32-SSE-NEXT: pmovzxbd {{.*#+}} xmm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero
-; X32-SSE-NEXT: pxor %xmm0, %xmm1
-; X32-SSE-NEXT: pshufb {{.*#+}} xmm1 = xmm1[0,4,8,12,u,u,u,u,u,u,u,u,u,u,u,u]
-; X32-SSE-NEXT: movd %xmm1, %eax
-; X32-SSE-NEXT: popl %ecx
+; X32-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X32-SSE-NEXT: xorl {{[0-9]+}}(%esp), %eax
; X32-SSE-NEXT: retl
;
; X64-SSE-LABEL: xor_i32_as_v4i8:
; X64-SSE: # BB#0:
-; X64-SSE-NEXT: movd %esi, %xmm0
-; X64-SSE-NEXT: pmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
-; X64-SSE-NEXT: movd %edi, %xmm1
-; X64-SSE-NEXT: pmovzxbd {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero
-; X64-SSE-NEXT: pxor %xmm0, %xmm1
-; X64-SSE-NEXT: pshufb {{.*#+}} xmm1 = xmm1[0,4,8,12,u,u,u,u,u,u,u,u,u,u,u,u]
-; X64-SSE-NEXT: movd %xmm1, %eax
+; X64-SSE-NEXT: xorl %esi, %edi
+; X64-SSE-NEXT: movl %edi, %eax
; X64-SSE-NEXT: retq
%1 = bitcast i32 %a to <4 x i8>
%2 = bitcast i32 %b to <4 x i8>
@@ -67,24 +47,14 @@ define i32 @xor_i32_as_v4i8(i32 %a, i32
define i32 @or_i32_as_v4i8(i32 %a, i32 %b) nounwind {
; X32-SSE-LABEL: or_i32_as_v4i8:
; X32-SSE: # BB#0:
-; X32-SSE-NEXT: pushl %eax
-; X32-SSE-NEXT: pmovzxbd {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero
-; X32-SSE-NEXT: pmovzxbd {{.*#+}} xmm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero
-; X32-SSE-NEXT: por %xmm0, %xmm1
-; X32-SSE-NEXT: pshufb {{.*#+}} xmm1 = xmm1[0,4,8,12,u,u,u,u,u,u,u,u,u,u,u,u]
-; X32-SSE-NEXT: movd %xmm1, %eax
-; X32-SSE-NEXT: popl %ecx
+; X32-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X32-SSE-NEXT: orl {{[0-9]+}}(%esp), %eax
; X32-SSE-NEXT: retl
;
; X64-SSE-LABEL: or_i32_as_v4i8:
; X64-SSE: # BB#0:
-; X64-SSE-NEXT: movd %esi, %xmm0
-; X64-SSE-NEXT: pmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
-; X64-SSE-NEXT: movd %edi, %xmm1
-; X64-SSE-NEXT: pmovzxbd {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero
-; X64-SSE-NEXT: por %xmm0, %xmm1
-; X64-SSE-NEXT: pshufb {{.*#+}} xmm1 = xmm1[0,4,8,12,u,u,u,u,u,u,u,u,u,u,u,u]
-; X64-SSE-NEXT: movd %xmm1, %eax
+; X64-SSE-NEXT: orl %esi, %edi
+; X64-SSE-NEXT: movl %edi, %eax
; X64-SSE-NEXT: retq
%1 = bitcast i32 %a to <4 x i8>
%2 = bitcast i32 %b to <4 x i8>
@@ -100,186 +70,14 @@ define i32 @or_i32_as_v4i8(i32 %a, i32 %
define i32 @and_i32_as_v8i4(i32 %a, i32 %b) nounwind {
; X32-SSE-LABEL: and_i32_as_v8i4:
; X32-SSE: # BB#0:
-; X32-SSE-NEXT: pushl %ebp
-; X32-SSE-NEXT: movl %esp, %ebp
-; X32-SSE-NEXT: andl $-8, %esp
-; X32-SSE-NEXT: subl $24, %esp
-; X32-SSE-NEXT: movl 12(%ebp), %eax
-; X32-SSE-NEXT: movl %eax, %ecx
-; X32-SSE-NEXT: shrl $4, %ecx
-; X32-SSE-NEXT: andl $15, %ecx
-; X32-SSE-NEXT: movl %eax, %edx
-; X32-SSE-NEXT: andl $15, %edx
-; X32-SSE-NEXT: movd %edx, %xmm0
-; X32-SSE-NEXT: pinsrw $1, %ecx, %xmm0
-; X32-SSE-NEXT: movl %eax, %ecx
-; X32-SSE-NEXT: shrl $8, %ecx
-; X32-SSE-NEXT: andl $15, %ecx
-; X32-SSE-NEXT: pinsrw $2, %ecx, %xmm0
-; X32-SSE-NEXT: movl %eax, %ecx
-; X32-SSE-NEXT: shrl $12, %ecx
-; X32-SSE-NEXT: andl $15, %ecx
-; X32-SSE-NEXT: pinsrw $3, %ecx, %xmm0
-; X32-SSE-NEXT: movl %eax, %ecx
-; X32-SSE-NEXT: shrl $16, %ecx
-; X32-SSE-NEXT: andl $15, %ecx
-; X32-SSE-NEXT: pinsrw $4, %ecx, %xmm0
-; X32-SSE-NEXT: movl %eax, %ecx
-; X32-SSE-NEXT: shrl $20, %ecx
-; X32-SSE-NEXT: andl $15, %ecx
-; X32-SSE-NEXT: pinsrw $5, %ecx, %xmm0
-; X32-SSE-NEXT: movl %eax, %ecx
-; X32-SSE-NEXT: shrl $24, %ecx
-; X32-SSE-NEXT: andl $15, %ecx
-; X32-SSE-NEXT: pinsrw $6, %ecx, %xmm0
-; X32-SSE-NEXT: shrl $28, %eax
-; X32-SSE-NEXT: pinsrw $7, %eax, %xmm0
-; X32-SSE-NEXT: movl 8(%ebp), %eax
-; X32-SSE-NEXT: movl %eax, %ecx
-; X32-SSE-NEXT: shrl $4, %ecx
-; X32-SSE-NEXT: andl $15, %ecx
-; X32-SSE-NEXT: movl %eax, %edx
-; X32-SSE-NEXT: andl $15, %edx
-; X32-SSE-NEXT: movd %edx, %xmm1
-; X32-SSE-NEXT: pinsrw $1, %ecx, %xmm1
-; X32-SSE-NEXT: movl %eax, %ecx
-; X32-SSE-NEXT: shrl $8, %ecx
-; X32-SSE-NEXT: andl $15, %ecx
-; X32-SSE-NEXT: pinsrw $2, %ecx, %xmm1
-; X32-SSE-NEXT: movl %eax, %ecx
-; X32-SSE-NEXT: shrl $12, %ecx
-; X32-SSE-NEXT: andl $15, %ecx
-; X32-SSE-NEXT: pinsrw $3, %ecx, %xmm1
-; X32-SSE-NEXT: movl %eax, %ecx
-; X32-SSE-NEXT: shrl $16, %ecx
-; X32-SSE-NEXT: andl $15, %ecx
-; X32-SSE-NEXT: pinsrw $4, %ecx, %xmm1
-; X32-SSE-NEXT: movl %eax, %ecx
-; X32-SSE-NEXT: shrl $20, %ecx
-; X32-SSE-NEXT: andl $15, %ecx
-; X32-SSE-NEXT: pinsrw $5, %ecx, %xmm1
-; X32-SSE-NEXT: movl %eax, %ecx
-; X32-SSE-NEXT: shrl $24, %ecx
-; X32-SSE-NEXT: andl $15, %ecx
-; X32-SSE-NEXT: pinsrw $6, %ecx, %xmm1
-; X32-SSE-NEXT: shrl $28, %eax
-; X32-SSE-NEXT: pinsrw $7, %eax, %xmm1
-; X32-SSE-NEXT: pand %xmm0, %xmm1
-; X32-SSE-NEXT: pextrw $7, %xmm1, %eax
-; X32-SSE-NEXT: andl $15, %eax
-; X32-SSE-NEXT: movb %al, (%esp)
-; X32-SSE-NEXT: pextrw $6, %xmm1, %eax
-; X32-SSE-NEXT: andl $15, %eax
-; X32-SSE-NEXT: movb %al, (%esp)
-; X32-SSE-NEXT: pextrw $5, %xmm1, %eax
-; X32-SSE-NEXT: andl $15, %eax
-; X32-SSE-NEXT: movb %al, (%esp)
-; X32-SSE-NEXT: pextrw $4, %xmm1, %eax
-; X32-SSE-NEXT: andl $15, %eax
-; X32-SSE-NEXT: movb %al, (%esp)
-; X32-SSE-NEXT: pextrw $3, %xmm1, %eax
-; X32-SSE-NEXT: andl $15, %eax
-; X32-SSE-NEXT: movb %al, (%esp)
-; X32-SSE-NEXT: pextrw $2, %xmm1, %eax
-; X32-SSE-NEXT: andl $15, %eax
-; X32-SSE-NEXT: movb %al, (%esp)
-; X32-SSE-NEXT: pextrw $1, %xmm1, %eax
-; X32-SSE-NEXT: andl $15, %eax
-; X32-SSE-NEXT: movb %al, (%esp)
-; X32-SSE-NEXT: movd %xmm1, %eax
-; X32-SSE-NEXT: andl $15, %eax
-; X32-SSE-NEXT: movb %al, (%esp)
-; X32-SSE-NEXT: movl (%esp), %eax
-; X32-SSE-NEXT: movl %ebp, %esp
-; X32-SSE-NEXT: popl %ebp
+; X32-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X32-SSE-NEXT: andl {{[0-9]+}}(%esp), %eax
; X32-SSE-NEXT: retl
;
; X64-SSE-LABEL: and_i32_as_v8i4:
; X64-SSE: # BB#0:
-; X64-SSE-NEXT: movl %esi, %eax
-; X64-SSE-NEXT: shrl $4, %eax
-; X64-SSE-NEXT: andl $15, %eax
-; X64-SSE-NEXT: movl %esi, %ecx
-; X64-SSE-NEXT: andl $15, %ecx
-; X64-SSE-NEXT: movd %ecx, %xmm0
-; X64-SSE-NEXT: pinsrw $1, %eax, %xmm0
-; X64-SSE-NEXT: movl %esi, %eax
-; X64-SSE-NEXT: shrl $8, %eax
-; X64-SSE-NEXT: andl $15, %eax
-; X64-SSE-NEXT: pinsrw $2, %eax, %xmm0
-; X64-SSE-NEXT: movl %esi, %eax
-; X64-SSE-NEXT: shrl $12, %eax
-; X64-SSE-NEXT: andl $15, %eax
-; X64-SSE-NEXT: pinsrw $3, %eax, %xmm0
-; X64-SSE-NEXT: movl %esi, %eax
-; X64-SSE-NEXT: shrl $16, %eax
-; X64-SSE-NEXT: andl $15, %eax
-; X64-SSE-NEXT: pinsrw $4, %eax, %xmm0
-; X64-SSE-NEXT: movl %esi, %eax
-; X64-SSE-NEXT: shrl $20, %eax
-; X64-SSE-NEXT: andl $15, %eax
-; X64-SSE-NEXT: pinsrw $5, %eax, %xmm0
-; X64-SSE-NEXT: movl %esi, %eax
-; X64-SSE-NEXT: shrl $24, %eax
-; X64-SSE-NEXT: andl $15, %eax
-; X64-SSE-NEXT: pinsrw $6, %eax, %xmm0
-; X64-SSE-NEXT: shrl $28, %esi
-; X64-SSE-NEXT: pinsrw $7, %esi, %xmm0
+; X64-SSE-NEXT: andl %esi, %edi
; X64-SSE-NEXT: movl %edi, %eax
-; X64-SSE-NEXT: shrl $4, %eax
-; X64-SSE-NEXT: andl $15, %eax
-; X64-SSE-NEXT: movl %edi, %ecx
-; X64-SSE-NEXT: andl $15, %ecx
-; X64-SSE-NEXT: movd %ecx, %xmm1
-; X64-SSE-NEXT: pinsrw $1, %eax, %xmm1
-; X64-SSE-NEXT: movl %edi, %eax
-; X64-SSE-NEXT: shrl $8, %eax
-; X64-SSE-NEXT: andl $15, %eax
-; X64-SSE-NEXT: pinsrw $2, %eax, %xmm1
-; X64-SSE-NEXT: movl %edi, %eax
-; X64-SSE-NEXT: shrl $12, %eax
-; X64-SSE-NEXT: andl $15, %eax
-; X64-SSE-NEXT: pinsrw $3, %eax, %xmm1
-; X64-SSE-NEXT: movl %edi, %eax
-; X64-SSE-NEXT: shrl $16, %eax
-; X64-SSE-NEXT: andl $15, %eax
-; X64-SSE-NEXT: pinsrw $4, %eax, %xmm1
-; X64-SSE-NEXT: movl %edi, %eax
-; X64-SSE-NEXT: shrl $20, %eax
-; X64-SSE-NEXT: andl $15, %eax
-; X64-SSE-NEXT: pinsrw $5, %eax, %xmm1
-; X64-SSE-NEXT: movl %edi, %eax
-; X64-SSE-NEXT: shrl $24, %eax
-; X64-SSE-NEXT: andl $15, %eax
-; X64-SSE-NEXT: pinsrw $6, %eax, %xmm1
-; X64-SSE-NEXT: shrl $28, %edi
-; X64-SSE-NEXT: pinsrw $7, %edi, %xmm1
-; X64-SSE-NEXT: pand %xmm0, %xmm1
-; X64-SSE-NEXT: pextrw $7, %xmm1, %eax
-; X64-SSE-NEXT: andl $15, %eax
-; X64-SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
-; X64-SSE-NEXT: pextrw $6, %xmm1, %eax
-; X64-SSE-NEXT: andl $15, %eax
-; X64-SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
-; X64-SSE-NEXT: pextrw $5, %xmm1, %eax
-; X64-SSE-NEXT: andl $15, %eax
-; X64-SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
-; X64-SSE-NEXT: pextrw $4, %xmm1, %eax
-; X64-SSE-NEXT: andl $15, %eax
-; X64-SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
-; X64-SSE-NEXT: pextrw $3, %xmm1, %eax
-; X64-SSE-NEXT: andl $15, %eax
-; X64-SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
-; X64-SSE-NEXT: pextrw $2, %xmm1, %eax
-; X64-SSE-NEXT: andl $15, %eax
-; X64-SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
-; X64-SSE-NEXT: pextrw $1, %xmm1, %eax
-; X64-SSE-NEXT: andl $15, %eax
-; X64-SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
-; X64-SSE-NEXT: movd %xmm1, %eax
-; X64-SSE-NEXT: andl $15, %eax
-; X64-SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
-; X64-SSE-NEXT: movl -{{[0-9]+}}(%rsp), %eax
; X64-SSE-NEXT: retq
%1 = bitcast i32 %a to <8 x i4>
%2 = bitcast i32 %b to <8 x i4>
@@ -291,186 +89,14 @@ define i32 @and_i32_as_v8i4(i32 %a, i32
define i32 @xor_i32_as_v8i4(i32 %a, i32 %b) nounwind {
; X32-SSE-LABEL: xor_i32_as_v8i4:
; X32-SSE: # BB#0:
-; X32-SSE-NEXT: pushl %ebp
-; X32-SSE-NEXT: movl %esp, %ebp
-; X32-SSE-NEXT: andl $-8, %esp
-; X32-SSE-NEXT: subl $24, %esp
-; X32-SSE-NEXT: movl 12(%ebp), %eax
-; X32-SSE-NEXT: movl %eax, %ecx
-; X32-SSE-NEXT: shrl $4, %ecx
-; X32-SSE-NEXT: andl $15, %ecx
-; X32-SSE-NEXT: movl %eax, %edx
-; X32-SSE-NEXT: andl $15, %edx
-; X32-SSE-NEXT: movd %edx, %xmm0
-; X32-SSE-NEXT: pinsrw $1, %ecx, %xmm0
-; X32-SSE-NEXT: movl %eax, %ecx
-; X32-SSE-NEXT: shrl $8, %ecx
-; X32-SSE-NEXT: andl $15, %ecx
-; X32-SSE-NEXT: pinsrw $2, %ecx, %xmm0
-; X32-SSE-NEXT: movl %eax, %ecx
-; X32-SSE-NEXT: shrl $12, %ecx
-; X32-SSE-NEXT: andl $15, %ecx
-; X32-SSE-NEXT: pinsrw $3, %ecx, %xmm0
-; X32-SSE-NEXT: movl %eax, %ecx
-; X32-SSE-NEXT: shrl $16, %ecx
-; X32-SSE-NEXT: andl $15, %ecx
-; X32-SSE-NEXT: pinsrw $4, %ecx, %xmm0
-; X32-SSE-NEXT: movl %eax, %ecx
-; X32-SSE-NEXT: shrl $20, %ecx
-; X32-SSE-NEXT: andl $15, %ecx
-; X32-SSE-NEXT: pinsrw $5, %ecx, %xmm0
-; X32-SSE-NEXT: movl %eax, %ecx
-; X32-SSE-NEXT: shrl $24, %ecx
-; X32-SSE-NEXT: andl $15, %ecx
-; X32-SSE-NEXT: pinsrw $6, %ecx, %xmm0
-; X32-SSE-NEXT: shrl $28, %eax
-; X32-SSE-NEXT: pinsrw $7, %eax, %xmm0
-; X32-SSE-NEXT: movl 8(%ebp), %eax
-; X32-SSE-NEXT: movl %eax, %ecx
-; X32-SSE-NEXT: shrl $4, %ecx
-; X32-SSE-NEXT: andl $15, %ecx
-; X32-SSE-NEXT: movl %eax, %edx
-; X32-SSE-NEXT: andl $15, %edx
-; X32-SSE-NEXT: movd %edx, %xmm1
-; X32-SSE-NEXT: pinsrw $1, %ecx, %xmm1
-; X32-SSE-NEXT: movl %eax, %ecx
-; X32-SSE-NEXT: shrl $8, %ecx
-; X32-SSE-NEXT: andl $15, %ecx
-; X32-SSE-NEXT: pinsrw $2, %ecx, %xmm1
-; X32-SSE-NEXT: movl %eax, %ecx
-; X32-SSE-NEXT: shrl $12, %ecx
-; X32-SSE-NEXT: andl $15, %ecx
-; X32-SSE-NEXT: pinsrw $3, %ecx, %xmm1
-; X32-SSE-NEXT: movl %eax, %ecx
-; X32-SSE-NEXT: shrl $16, %ecx
-; X32-SSE-NEXT: andl $15, %ecx
-; X32-SSE-NEXT: pinsrw $4, %ecx, %xmm1
-; X32-SSE-NEXT: movl %eax, %ecx
-; X32-SSE-NEXT: shrl $20, %ecx
-; X32-SSE-NEXT: andl $15, %ecx
-; X32-SSE-NEXT: pinsrw $5, %ecx, %xmm1
-; X32-SSE-NEXT: movl %eax, %ecx
-; X32-SSE-NEXT: shrl $24, %ecx
-; X32-SSE-NEXT: andl $15, %ecx
-; X32-SSE-NEXT: pinsrw $6, %ecx, %xmm1
-; X32-SSE-NEXT: shrl $28, %eax
-; X32-SSE-NEXT: pinsrw $7, %eax, %xmm1
-; X32-SSE-NEXT: pxor %xmm0, %xmm1
-; X32-SSE-NEXT: pextrw $7, %xmm1, %eax
-; X32-SSE-NEXT: andl $15, %eax
-; X32-SSE-NEXT: movb %al, (%esp)
-; X32-SSE-NEXT: pextrw $6, %xmm1, %eax
-; X32-SSE-NEXT: andl $15, %eax
-; X32-SSE-NEXT: movb %al, (%esp)
-; X32-SSE-NEXT: pextrw $5, %xmm1, %eax
-; X32-SSE-NEXT: andl $15, %eax
-; X32-SSE-NEXT: movb %al, (%esp)
-; X32-SSE-NEXT: pextrw $4, %xmm1, %eax
-; X32-SSE-NEXT: andl $15, %eax
-; X32-SSE-NEXT: movb %al, (%esp)
-; X32-SSE-NEXT: pextrw $3, %xmm1, %eax
-; X32-SSE-NEXT: andl $15, %eax
-; X32-SSE-NEXT: movb %al, (%esp)
-; X32-SSE-NEXT: pextrw $2, %xmm1, %eax
-; X32-SSE-NEXT: andl $15, %eax
-; X32-SSE-NEXT: movb %al, (%esp)
-; X32-SSE-NEXT: pextrw $1, %xmm1, %eax
-; X32-SSE-NEXT: andl $15, %eax
-; X32-SSE-NEXT: movb %al, (%esp)
-; X32-SSE-NEXT: movd %xmm1, %eax
-; X32-SSE-NEXT: andl $15, %eax
-; X32-SSE-NEXT: movb %al, (%esp)
-; X32-SSE-NEXT: movl (%esp), %eax
-; X32-SSE-NEXT: movl %ebp, %esp
-; X32-SSE-NEXT: popl %ebp
+; X32-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X32-SSE-NEXT: xorl {{[0-9]+}}(%esp), %eax
; X32-SSE-NEXT: retl
;
; X64-SSE-LABEL: xor_i32_as_v8i4:
; X64-SSE: # BB#0:
-; X64-SSE-NEXT: movl %esi, %eax
-; X64-SSE-NEXT: shrl $4, %eax
-; X64-SSE-NEXT: andl $15, %eax
-; X64-SSE-NEXT: movl %esi, %ecx
-; X64-SSE-NEXT: andl $15, %ecx
-; X64-SSE-NEXT: movd %ecx, %xmm0
-; X64-SSE-NEXT: pinsrw $1, %eax, %xmm0
-; X64-SSE-NEXT: movl %esi, %eax
-; X64-SSE-NEXT: shrl $8, %eax
-; X64-SSE-NEXT: andl $15, %eax
-; X64-SSE-NEXT: pinsrw $2, %eax, %xmm0
-; X64-SSE-NEXT: movl %esi, %eax
-; X64-SSE-NEXT: shrl $12, %eax
-; X64-SSE-NEXT: andl $15, %eax
-; X64-SSE-NEXT: pinsrw $3, %eax, %xmm0
-; X64-SSE-NEXT: movl %esi, %eax
-; X64-SSE-NEXT: shrl $16, %eax
-; X64-SSE-NEXT: andl $15, %eax
-; X64-SSE-NEXT: pinsrw $4, %eax, %xmm0
-; X64-SSE-NEXT: movl %esi, %eax
-; X64-SSE-NEXT: shrl $20, %eax
-; X64-SSE-NEXT: andl $15, %eax
-; X64-SSE-NEXT: pinsrw $5, %eax, %xmm0
-; X64-SSE-NEXT: movl %esi, %eax
-; X64-SSE-NEXT: shrl $24, %eax
-; X64-SSE-NEXT: andl $15, %eax
-; X64-SSE-NEXT: pinsrw $6, %eax, %xmm0
-; X64-SSE-NEXT: shrl $28, %esi
-; X64-SSE-NEXT: pinsrw $7, %esi, %xmm0
-; X64-SSE-NEXT: movl %edi, %eax
-; X64-SSE-NEXT: shrl $4, %eax
-; X64-SSE-NEXT: andl $15, %eax
-; X64-SSE-NEXT: movl %edi, %ecx
-; X64-SSE-NEXT: andl $15, %ecx
-; X64-SSE-NEXT: movd %ecx, %xmm1
-; X64-SSE-NEXT: pinsrw $1, %eax, %xmm1
+; X64-SSE-NEXT: xorl %esi, %edi
; X64-SSE-NEXT: movl %edi, %eax
-; X64-SSE-NEXT: shrl $8, %eax
-; X64-SSE-NEXT: andl $15, %eax
-; X64-SSE-NEXT: pinsrw $2, %eax, %xmm1
-; X64-SSE-NEXT: movl %edi, %eax
-; X64-SSE-NEXT: shrl $12, %eax
-; X64-SSE-NEXT: andl $15, %eax
-; X64-SSE-NEXT: pinsrw $3, %eax, %xmm1
-; X64-SSE-NEXT: movl %edi, %eax
-; X64-SSE-NEXT: shrl $16, %eax
-; X64-SSE-NEXT: andl $15, %eax
-; X64-SSE-NEXT: pinsrw $4, %eax, %xmm1
-; X64-SSE-NEXT: movl %edi, %eax
-; X64-SSE-NEXT: shrl $20, %eax
-; X64-SSE-NEXT: andl $15, %eax
-; X64-SSE-NEXT: pinsrw $5, %eax, %xmm1
-; X64-SSE-NEXT: movl %edi, %eax
-; X64-SSE-NEXT: shrl $24, %eax
-; X64-SSE-NEXT: andl $15, %eax
-; X64-SSE-NEXT: pinsrw $6, %eax, %xmm1
-; X64-SSE-NEXT: shrl $28, %edi
-; X64-SSE-NEXT: pinsrw $7, %edi, %xmm1
-; X64-SSE-NEXT: pxor %xmm0, %xmm1
-; X64-SSE-NEXT: pextrw $7, %xmm1, %eax
-; X64-SSE-NEXT: andl $15, %eax
-; X64-SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
-; X64-SSE-NEXT: pextrw $6, %xmm1, %eax
-; X64-SSE-NEXT: andl $15, %eax
-; X64-SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
-; X64-SSE-NEXT: pextrw $5, %xmm1, %eax
-; X64-SSE-NEXT: andl $15, %eax
-; X64-SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
-; X64-SSE-NEXT: pextrw $4, %xmm1, %eax
-; X64-SSE-NEXT: andl $15, %eax
-; X64-SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
-; X64-SSE-NEXT: pextrw $3, %xmm1, %eax
-; X64-SSE-NEXT: andl $15, %eax
-; X64-SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
-; X64-SSE-NEXT: pextrw $2, %xmm1, %eax
-; X64-SSE-NEXT: andl $15, %eax
-; X64-SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
-; X64-SSE-NEXT: pextrw $1, %xmm1, %eax
-; X64-SSE-NEXT: andl $15, %eax
-; X64-SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
-; X64-SSE-NEXT: movd %xmm1, %eax
-; X64-SSE-NEXT: andl $15, %eax
-; X64-SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
-; X64-SSE-NEXT: movl -{{[0-9]+}}(%rsp), %eax
; X64-SSE-NEXT: retq
%1 = bitcast i32 %a to <8 x i4>
%2 = bitcast i32 %b to <8 x i4>
@@ -482,186 +108,14 @@ define i32 @xor_i32_as_v8i4(i32 %a, i32
define i32 @or_i32_as_v8i4(i32 %a, i32 %b) nounwind {
; X32-SSE-LABEL: or_i32_as_v8i4:
; X32-SSE: # BB#0:
-; X32-SSE-NEXT: pushl %ebp
-; X32-SSE-NEXT: movl %esp, %ebp
-; X32-SSE-NEXT: andl $-8, %esp
-; X32-SSE-NEXT: subl $24, %esp
-; X32-SSE-NEXT: movl 12(%ebp), %eax
-; X32-SSE-NEXT: movl %eax, %ecx
-; X32-SSE-NEXT: shrl $4, %ecx
-; X32-SSE-NEXT: andl $15, %ecx
-; X32-SSE-NEXT: movl %eax, %edx
-; X32-SSE-NEXT: andl $15, %edx
-; X32-SSE-NEXT: movd %edx, %xmm0
-; X32-SSE-NEXT: pinsrw $1, %ecx, %xmm0
-; X32-SSE-NEXT: movl %eax, %ecx
-; X32-SSE-NEXT: shrl $8, %ecx
-; X32-SSE-NEXT: andl $15, %ecx
-; X32-SSE-NEXT: pinsrw $2, %ecx, %xmm0
-; X32-SSE-NEXT: movl %eax, %ecx
-; X32-SSE-NEXT: shrl $12, %ecx
-; X32-SSE-NEXT: andl $15, %ecx
-; X32-SSE-NEXT: pinsrw $3, %ecx, %xmm0
-; X32-SSE-NEXT: movl %eax, %ecx
-; X32-SSE-NEXT: shrl $16, %ecx
-; X32-SSE-NEXT: andl $15, %ecx
-; X32-SSE-NEXT: pinsrw $4, %ecx, %xmm0
-; X32-SSE-NEXT: movl %eax, %ecx
-; X32-SSE-NEXT: shrl $20, %ecx
-; X32-SSE-NEXT: andl $15, %ecx
-; X32-SSE-NEXT: pinsrw $5, %ecx, %xmm0
-; X32-SSE-NEXT: movl %eax, %ecx
-; X32-SSE-NEXT: shrl $24, %ecx
-; X32-SSE-NEXT: andl $15, %ecx
-; X32-SSE-NEXT: pinsrw $6, %ecx, %xmm0
-; X32-SSE-NEXT: shrl $28, %eax
-; X32-SSE-NEXT: pinsrw $7, %eax, %xmm0
-; X32-SSE-NEXT: movl 8(%ebp), %eax
-; X32-SSE-NEXT: movl %eax, %ecx
-; X32-SSE-NEXT: shrl $4, %ecx
-; X32-SSE-NEXT: andl $15, %ecx
-; X32-SSE-NEXT: movl %eax, %edx
-; X32-SSE-NEXT: andl $15, %edx
-; X32-SSE-NEXT: movd %edx, %xmm1
-; X32-SSE-NEXT: pinsrw $1, %ecx, %xmm1
-; X32-SSE-NEXT: movl %eax, %ecx
-; X32-SSE-NEXT: shrl $8, %ecx
-; X32-SSE-NEXT: andl $15, %ecx
-; X32-SSE-NEXT: pinsrw $2, %ecx, %xmm1
-; X32-SSE-NEXT: movl %eax, %ecx
-; X32-SSE-NEXT: shrl $12, %ecx
-; X32-SSE-NEXT: andl $15, %ecx
-; X32-SSE-NEXT: pinsrw $3, %ecx, %xmm1
-; X32-SSE-NEXT: movl %eax, %ecx
-; X32-SSE-NEXT: shrl $16, %ecx
-; X32-SSE-NEXT: andl $15, %ecx
-; X32-SSE-NEXT: pinsrw $4, %ecx, %xmm1
-; X32-SSE-NEXT: movl %eax, %ecx
-; X32-SSE-NEXT: shrl $20, %ecx
-; X32-SSE-NEXT: andl $15, %ecx
-; X32-SSE-NEXT: pinsrw $5, %ecx, %xmm1
-; X32-SSE-NEXT: movl %eax, %ecx
-; X32-SSE-NEXT: shrl $24, %ecx
-; X32-SSE-NEXT: andl $15, %ecx
-; X32-SSE-NEXT: pinsrw $6, %ecx, %xmm1
-; X32-SSE-NEXT: shrl $28, %eax
-; X32-SSE-NEXT: pinsrw $7, %eax, %xmm1
-; X32-SSE-NEXT: por %xmm0, %xmm1
-; X32-SSE-NEXT: pextrw $7, %xmm1, %eax
-; X32-SSE-NEXT: andl $15, %eax
-; X32-SSE-NEXT: movb %al, (%esp)
-; X32-SSE-NEXT: pextrw $6, %xmm1, %eax
-; X32-SSE-NEXT: andl $15, %eax
-; X32-SSE-NEXT: movb %al, (%esp)
-; X32-SSE-NEXT: pextrw $5, %xmm1, %eax
-; X32-SSE-NEXT: andl $15, %eax
-; X32-SSE-NEXT: movb %al, (%esp)
-; X32-SSE-NEXT: pextrw $4, %xmm1, %eax
-; X32-SSE-NEXT: andl $15, %eax
-; X32-SSE-NEXT: movb %al, (%esp)
-; X32-SSE-NEXT: pextrw $3, %xmm1, %eax
-; X32-SSE-NEXT: andl $15, %eax
-; X32-SSE-NEXT: movb %al, (%esp)
-; X32-SSE-NEXT: pextrw $2, %xmm1, %eax
-; X32-SSE-NEXT: andl $15, %eax
-; X32-SSE-NEXT: movb %al, (%esp)
-; X32-SSE-NEXT: pextrw $1, %xmm1, %eax
-; X32-SSE-NEXT: andl $15, %eax
-; X32-SSE-NEXT: movb %al, (%esp)
-; X32-SSE-NEXT: movd %xmm1, %eax
-; X32-SSE-NEXT: andl $15, %eax
-; X32-SSE-NEXT: movb %al, (%esp)
-; X32-SSE-NEXT: movl (%esp), %eax
-; X32-SSE-NEXT: movl %ebp, %esp
-; X32-SSE-NEXT: popl %ebp
+; X32-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X32-SSE-NEXT: orl {{[0-9]+}}(%esp), %eax
; X32-SSE-NEXT: retl
;
; X64-SSE-LABEL: or_i32_as_v8i4:
; X64-SSE: # BB#0:
-; X64-SSE-NEXT: movl %esi, %eax
-; X64-SSE-NEXT: shrl $4, %eax
-; X64-SSE-NEXT: andl $15, %eax
-; X64-SSE-NEXT: movl %esi, %ecx
-; X64-SSE-NEXT: andl $15, %ecx
-; X64-SSE-NEXT: movd %ecx, %xmm0
-; X64-SSE-NEXT: pinsrw $1, %eax, %xmm0
-; X64-SSE-NEXT: movl %esi, %eax
-; X64-SSE-NEXT: shrl $8, %eax
-; X64-SSE-NEXT: andl $15, %eax
-; X64-SSE-NEXT: pinsrw $2, %eax, %xmm0
-; X64-SSE-NEXT: movl %esi, %eax
-; X64-SSE-NEXT: shrl $12, %eax
-; X64-SSE-NEXT: andl $15, %eax
-; X64-SSE-NEXT: pinsrw $3, %eax, %xmm0
-; X64-SSE-NEXT: movl %esi, %eax
-; X64-SSE-NEXT: shrl $16, %eax
-; X64-SSE-NEXT: andl $15, %eax
-; X64-SSE-NEXT: pinsrw $4, %eax, %xmm0
-; X64-SSE-NEXT: movl %esi, %eax
-; X64-SSE-NEXT: shrl $20, %eax
-; X64-SSE-NEXT: andl $15, %eax
-; X64-SSE-NEXT: pinsrw $5, %eax, %xmm0
-; X64-SSE-NEXT: movl %esi, %eax
-; X64-SSE-NEXT: shrl $24, %eax
-; X64-SSE-NEXT: andl $15, %eax
-; X64-SSE-NEXT: pinsrw $6, %eax, %xmm0
-; X64-SSE-NEXT: shrl $28, %esi
-; X64-SSE-NEXT: pinsrw $7, %esi, %xmm0
-; X64-SSE-NEXT: movl %edi, %eax
-; X64-SSE-NEXT: shrl $4, %eax
-; X64-SSE-NEXT: andl $15, %eax
-; X64-SSE-NEXT: movl %edi, %ecx
-; X64-SSE-NEXT: andl $15, %ecx
-; X64-SSE-NEXT: movd %ecx, %xmm1
-; X64-SSE-NEXT: pinsrw $1, %eax, %xmm1
-; X64-SSE-NEXT: movl %edi, %eax
-; X64-SSE-NEXT: shrl $8, %eax
-; X64-SSE-NEXT: andl $15, %eax
-; X64-SSE-NEXT: pinsrw $2, %eax, %xmm1
+; X64-SSE-NEXT: orl %esi, %edi
; X64-SSE-NEXT: movl %edi, %eax
-; X64-SSE-NEXT: shrl $12, %eax
-; X64-SSE-NEXT: andl $15, %eax
-; X64-SSE-NEXT: pinsrw $3, %eax, %xmm1
-; X64-SSE-NEXT: movl %edi, %eax
-; X64-SSE-NEXT: shrl $16, %eax
-; X64-SSE-NEXT: andl $15, %eax
-; X64-SSE-NEXT: pinsrw $4, %eax, %xmm1
-; X64-SSE-NEXT: movl %edi, %eax
-; X64-SSE-NEXT: shrl $20, %eax
-; X64-SSE-NEXT: andl $15, %eax
-; X64-SSE-NEXT: pinsrw $5, %eax, %xmm1
-; X64-SSE-NEXT: movl %edi, %eax
-; X64-SSE-NEXT: shrl $24, %eax
-; X64-SSE-NEXT: andl $15, %eax
-; X64-SSE-NEXT: pinsrw $6, %eax, %xmm1
-; X64-SSE-NEXT: shrl $28, %edi
-; X64-SSE-NEXT: pinsrw $7, %edi, %xmm1
-; X64-SSE-NEXT: por %xmm0, %xmm1
-; X64-SSE-NEXT: pextrw $7, %xmm1, %eax
-; X64-SSE-NEXT: andl $15, %eax
-; X64-SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
-; X64-SSE-NEXT: pextrw $6, %xmm1, %eax
-; X64-SSE-NEXT: andl $15, %eax
-; X64-SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
-; X64-SSE-NEXT: pextrw $5, %xmm1, %eax
-; X64-SSE-NEXT: andl $15, %eax
-; X64-SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
-; X64-SSE-NEXT: pextrw $4, %xmm1, %eax
-; X64-SSE-NEXT: andl $15, %eax
-; X64-SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
-; X64-SSE-NEXT: pextrw $3, %xmm1, %eax
-; X64-SSE-NEXT: andl $15, %eax
-; X64-SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
-; X64-SSE-NEXT: pextrw $2, %xmm1, %eax
-; X64-SSE-NEXT: andl $15, %eax
-; X64-SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
-; X64-SSE-NEXT: pextrw $1, %xmm1, %eax
-; X64-SSE-NEXT: andl $15, %eax
-; X64-SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
-; X64-SSE-NEXT: movd %xmm1, %eax
-; X64-SSE-NEXT: andl $15, %eax
-; X64-SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
-; X64-SSE-NEXT: movl -{{[0-9]+}}(%rsp), %eax
; X64-SSE-NEXT: retq
%1 = bitcast i32 %a to <8 x i4>
%2 = bitcast i32 %b to <8 x i4>
@@ -677,28 +131,12 @@ define i32 @or_i32_as_v8i4(i32 %a, i32 %
define <4 x i8> @and_v4i8_as_i32(<4 x i8> %a, <4 x i8> %b) nounwind {
; X32-SSE-LABEL: and_v4i8_as_i32:
; X32-SSE: # BB#0:
-; X32-SSE-NEXT: subl $12, %esp
-; X32-SSE-NEXT: movdqa {{.*#+}} xmm2 = <0,4,8,12,u,u,u,u,u,u,u,u,u,u,u,u>
-; X32-SSE-NEXT: pshufb %xmm2, %xmm1
-; X32-SSE-NEXT: movd %xmm1, %eax
-; X32-SSE-NEXT: pshufb %xmm2, %xmm0
-; X32-SSE-NEXT: movd %xmm0, %ecx
-; X32-SSE-NEXT: andl %eax, %ecx
-; X32-SSE-NEXT: movd %ecx, %xmm0
-; X32-SSE-NEXT: pmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
-; X32-SSE-NEXT: addl $12, %esp
+; X32-SSE-NEXT: andps %xmm1, %xmm0
; X32-SSE-NEXT: retl
;
; X64-SSE-LABEL: and_v4i8_as_i32:
; X64-SSE: # BB#0:
-; X64-SSE-NEXT: movdqa {{.*#+}} xmm2 = <0,4,8,12,u,u,u,u,u,u,u,u,u,u,u,u>
-; X64-SSE-NEXT: pshufb %xmm2, %xmm1
-; X64-SSE-NEXT: movd %xmm1, %eax
-; X64-SSE-NEXT: pshufb %xmm2, %xmm0
-; X64-SSE-NEXT: movd %xmm0, %ecx
-; X64-SSE-NEXT: andl %eax, %ecx
-; X64-SSE-NEXT: movd %ecx, %xmm0
-; X64-SSE-NEXT: pmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
+; X64-SSE-NEXT: andps %xmm1, %xmm0
; X64-SSE-NEXT: retq
%1 = bitcast <4 x i8> %a to i32
%2 = bitcast <4 x i8> %b to i32
@@ -710,28 +148,12 @@ define <4 x i8> @and_v4i8_as_i32(<4 x i8
define <4 x i8> @xor_v4i8_as_i32(<4 x i8> %a, <4 x i8> %b) nounwind {
; X32-SSE-LABEL: xor_v4i8_as_i32:
; X32-SSE: # BB#0:
-; X32-SSE-NEXT: subl $12, %esp
-; X32-SSE-NEXT: movdqa {{.*#+}} xmm2 = <0,4,8,12,u,u,u,u,u,u,u,u,u,u,u,u>
-; X32-SSE-NEXT: pshufb %xmm2, %xmm1
-; X32-SSE-NEXT: movd %xmm1, %eax
-; X32-SSE-NEXT: pshufb %xmm2, %xmm0
-; X32-SSE-NEXT: movd %xmm0, %ecx
-; X32-SSE-NEXT: xorl %eax, %ecx
-; X32-SSE-NEXT: movd %ecx, %xmm0
-; X32-SSE-NEXT: pmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
-; X32-SSE-NEXT: addl $12, %esp
+; X32-SSE-NEXT: xorps %xmm1, %xmm0
; X32-SSE-NEXT: retl
;
; X64-SSE-LABEL: xor_v4i8_as_i32:
; X64-SSE: # BB#0:
-; X64-SSE-NEXT: movdqa {{.*#+}} xmm2 = <0,4,8,12,u,u,u,u,u,u,u,u,u,u,u,u>
-; X64-SSE-NEXT: pshufb %xmm2, %xmm1
-; X64-SSE-NEXT: movd %xmm1, %eax
-; X64-SSE-NEXT: pshufb %xmm2, %xmm0
-; X64-SSE-NEXT: movd %xmm0, %ecx
-; X64-SSE-NEXT: xorl %eax, %ecx
-; X64-SSE-NEXT: movd %ecx, %xmm0
-; X64-SSE-NEXT: pmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
+; X64-SSE-NEXT: xorps %xmm1, %xmm0
; X64-SSE-NEXT: retq
%1 = bitcast <4 x i8> %a to i32
%2 = bitcast <4 x i8> %b to i32
@@ -743,28 +165,12 @@ define <4 x i8> @xor_v4i8_as_i32(<4 x i8
define <4 x i8> @or_v4i8_as_i32(<4 x i8> %a, <4 x i8> %b) nounwind {
; X32-SSE-LABEL: or_v4i8_as_i32:
; X32-SSE: # BB#0:
-; X32-SSE-NEXT: subl $12, %esp
-; X32-SSE-NEXT: movdqa {{.*#+}} xmm2 = <0,4,8,12,u,u,u,u,u,u,u,u,u,u,u,u>
-; X32-SSE-NEXT: pshufb %xmm2, %xmm1
-; X32-SSE-NEXT: movd %xmm1, %eax
-; X32-SSE-NEXT: pshufb %xmm2, %xmm0
-; X32-SSE-NEXT: movd %xmm0, %ecx
-; X32-SSE-NEXT: orl %eax, %ecx
-; X32-SSE-NEXT: movd %ecx, %xmm0
-; X32-SSE-NEXT: pmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
-; X32-SSE-NEXT: addl $12, %esp
+; X32-SSE-NEXT: orps %xmm1, %xmm0
; X32-SSE-NEXT: retl
;
; X64-SSE-LABEL: or_v4i8_as_i32:
; X64-SSE: # BB#0:
-; X64-SSE-NEXT: movdqa {{.*#+}} xmm2 = <0,4,8,12,u,u,u,u,u,u,u,u,u,u,u,u>
-; X64-SSE-NEXT: pshufb %xmm2, %xmm1
-; X64-SSE-NEXT: movd %xmm1, %eax
-; X64-SSE-NEXT: pshufb %xmm2, %xmm0
-; X64-SSE-NEXT: movd %xmm0, %ecx
-; X64-SSE-NEXT: orl %eax, %ecx
-; X64-SSE-NEXT: movd %ecx, %xmm0
-; X64-SSE-NEXT: pmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
+; X64-SSE-NEXT: orps %xmm1, %xmm0
; X64-SSE-NEXT: retq
%1 = bitcast <4 x i8> %a to i32
%2 = bitcast <4 x i8> %b to i32
@@ -780,174 +186,12 @@ define <4 x i8> @or_v4i8_as_i32(<4 x i8>
define <8 x i4> @and_v8i4_as_i32(<8 x i4> %a, <8 x i4> %b) nounwind {
; X32-SSE-LABEL: and_v8i4_as_i32:
; X32-SSE: # BB#0:
-; X32-SSE-NEXT: pushl %ebp
-; X32-SSE-NEXT: movl %esp, %ebp
-; X32-SSE-NEXT: andl $-8, %esp
-; X32-SSE-NEXT: subl $24, %esp
-; X32-SSE-NEXT: pextrw $7, %xmm0, %eax
-; X32-SSE-NEXT: andl $15, %eax
-; X32-SSE-NEXT: movb %al, {{[0-9]+}}(%esp)
-; X32-SSE-NEXT: pextrw $6, %xmm0, %eax
-; X32-SSE-NEXT: andl $15, %eax
-; X32-SSE-NEXT: movb %al, {{[0-9]+}}(%esp)
-; X32-SSE-NEXT: pextrw $5, %xmm0, %eax
-; X32-SSE-NEXT: andl $15, %eax
-; X32-SSE-NEXT: movb %al, {{[0-9]+}}(%esp)
-; X32-SSE-NEXT: pextrw $4, %xmm0, %eax
-; X32-SSE-NEXT: andl $15, %eax
-; X32-SSE-NEXT: movb %al, {{[0-9]+}}(%esp)
-; X32-SSE-NEXT: pextrw $3, %xmm0, %eax
-; X32-SSE-NEXT: andl $15, %eax
-; X32-SSE-NEXT: movb %al, {{[0-9]+}}(%esp)
-; X32-SSE-NEXT: pextrw $2, %xmm0, %eax
-; X32-SSE-NEXT: andl $15, %eax
-; X32-SSE-NEXT: movb %al, {{[0-9]+}}(%esp)
-; X32-SSE-NEXT: pextrw $1, %xmm0, %eax
-; X32-SSE-NEXT: andl $15, %eax
-; X32-SSE-NEXT: movb %al, {{[0-9]+}}(%esp)
-; X32-SSE-NEXT: movd %xmm0, %eax
-; X32-SSE-NEXT: andl $15, %eax
-; X32-SSE-NEXT: movb %al, {{[0-9]+}}(%esp)
-; X32-SSE-NEXT: pextrw $7, %xmm1, %eax
-; X32-SSE-NEXT: andl $15, %eax
-; X32-SSE-NEXT: movb %al, (%esp)
-; X32-SSE-NEXT: pextrw $6, %xmm1, %eax
-; X32-SSE-NEXT: andl $15, %eax
-; X32-SSE-NEXT: movb %al, (%esp)
-; X32-SSE-NEXT: pextrw $5, %xmm1, %eax
-; X32-SSE-NEXT: andl $15, %eax
-; X32-SSE-NEXT: movb %al, (%esp)
-; X32-SSE-NEXT: pextrw $4, %xmm1, %eax
-; X32-SSE-NEXT: andl $15, %eax
-; X32-SSE-NEXT: movb %al, (%esp)
-; X32-SSE-NEXT: pextrw $3, %xmm1, %eax
-; X32-SSE-NEXT: andl $15, %eax
-; X32-SSE-NEXT: movb %al, (%esp)
-; X32-SSE-NEXT: pextrw $2, %xmm1, %eax
-; X32-SSE-NEXT: andl $15, %eax
-; X32-SSE-NEXT: movb %al, (%esp)
-; X32-SSE-NEXT: pextrw $1, %xmm1, %eax
-; X32-SSE-NEXT: andl $15, %eax
-; X32-SSE-NEXT: movb %al, (%esp)
-; X32-SSE-NEXT: movd %xmm1, %eax
-; X32-SSE-NEXT: andl $15, %eax
-; X32-SSE-NEXT: movb %al, (%esp)
-; X32-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X32-SSE-NEXT: andl (%esp), %eax
-; X32-SSE-NEXT: movl %eax, %ecx
-; X32-SSE-NEXT: shrl $4, %ecx
-; X32-SSE-NEXT: andl $15, %ecx
-; X32-SSE-NEXT: movl %eax, %edx
-; X32-SSE-NEXT: andl $15, %edx
-; X32-SSE-NEXT: movd %edx, %xmm0
-; X32-SSE-NEXT: pinsrw $1, %ecx, %xmm0
-; X32-SSE-NEXT: movl %eax, %ecx
-; X32-SSE-NEXT: shrl $8, %ecx
-; X32-SSE-NEXT: andl $15, %ecx
-; X32-SSE-NEXT: pinsrw $2, %ecx, %xmm0
-; X32-SSE-NEXT: movl %eax, %ecx
-; X32-SSE-NEXT: shrl $12, %ecx
-; X32-SSE-NEXT: andl $15, %ecx
-; X32-SSE-NEXT: pinsrw $3, %ecx, %xmm0
-; X32-SSE-NEXT: movl %eax, %ecx
-; X32-SSE-NEXT: shrl $16, %ecx
-; X32-SSE-NEXT: andl $15, %ecx
-; X32-SSE-NEXT: pinsrw $4, %ecx, %xmm0
-; X32-SSE-NEXT: movl %eax, %ecx
-; X32-SSE-NEXT: shrl $20, %ecx
-; X32-SSE-NEXT: andl $15, %ecx
-; X32-SSE-NEXT: pinsrw $5, %ecx, %xmm0
-; X32-SSE-NEXT: movl %eax, %ecx
-; X32-SSE-NEXT: shrl $24, %ecx
-; X32-SSE-NEXT: andl $15, %ecx
-; X32-SSE-NEXT: pinsrw $6, %ecx, %xmm0
-; X32-SSE-NEXT: shrl $28, %eax
-; X32-SSE-NEXT: pinsrw $7, %eax, %xmm0
-; X32-SSE-NEXT: movl %ebp, %esp
-; X32-SSE-NEXT: popl %ebp
+; X32-SSE-NEXT: andps %xmm1, %xmm0
; X32-SSE-NEXT: retl
;
; X64-SSE-LABEL: and_v8i4_as_i32:
; X64-SSE: # BB#0:
-; X64-SSE-NEXT: pextrw $7, %xmm0, %eax
-; X64-SSE-NEXT: andl $15, %eax
-; X64-SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
-; X64-SSE-NEXT: pextrw $6, %xmm0, %eax
-; X64-SSE-NEXT: andl $15, %eax
-; X64-SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
-; X64-SSE-NEXT: pextrw $5, %xmm0, %eax
-; X64-SSE-NEXT: andl $15, %eax
-; X64-SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
-; X64-SSE-NEXT: pextrw $4, %xmm0, %eax
-; X64-SSE-NEXT: andl $15, %eax
-; X64-SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
-; X64-SSE-NEXT: pextrw $3, %xmm0, %eax
-; X64-SSE-NEXT: andl $15, %eax
-; X64-SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
-; X64-SSE-NEXT: pextrw $2, %xmm0, %eax
-; X64-SSE-NEXT: andl $15, %eax
-; X64-SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
-; X64-SSE-NEXT: pextrw $1, %xmm0, %eax
-; X64-SSE-NEXT: andl $15, %eax
-; X64-SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
-; X64-SSE-NEXT: movd %xmm0, %eax
-; X64-SSE-NEXT: andl $15, %eax
-; X64-SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
-; X64-SSE-NEXT: pextrw $7, %xmm1, %eax
-; X64-SSE-NEXT: andl $15, %eax
-; X64-SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
-; X64-SSE-NEXT: pextrw $6, %xmm1, %eax
-; X64-SSE-NEXT: andl $15, %eax
-; X64-SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
-; X64-SSE-NEXT: pextrw $5, %xmm1, %eax
-; X64-SSE-NEXT: andl $15, %eax
-; X64-SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
-; X64-SSE-NEXT: pextrw $4, %xmm1, %eax
-; X64-SSE-NEXT: andl $15, %eax
-; X64-SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
-; X64-SSE-NEXT: pextrw $3, %xmm1, %eax
-; X64-SSE-NEXT: andl $15, %eax
-; X64-SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
-; X64-SSE-NEXT: pextrw $2, %xmm1, %eax
-; X64-SSE-NEXT: andl $15, %eax
-; X64-SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
-; X64-SSE-NEXT: pextrw $1, %xmm1, %eax
-; X64-SSE-NEXT: andl $15, %eax
-; X64-SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
-; X64-SSE-NEXT: movd %xmm1, %eax
-; X64-SSE-NEXT: andl $15, %eax
-; X64-SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
-; X64-SSE-NEXT: movl -{{[0-9]+}}(%rsp), %eax
-; X64-SSE-NEXT: andl -{{[0-9]+}}(%rsp), %eax
-; X64-SSE-NEXT: movl %eax, %ecx
-; X64-SSE-NEXT: shrl $4, %ecx
-; X64-SSE-NEXT: andl $15, %ecx
-; X64-SSE-NEXT: movl %eax, %edx
-; X64-SSE-NEXT: andl $15, %edx
-; X64-SSE-NEXT: movd %edx, %xmm0
-; X64-SSE-NEXT: pinsrw $1, %ecx, %xmm0
-; X64-SSE-NEXT: movl %eax, %ecx
-; X64-SSE-NEXT: shrl $8, %ecx
-; X64-SSE-NEXT: andl $15, %ecx
-; X64-SSE-NEXT: pinsrw $2, %ecx, %xmm0
-; X64-SSE-NEXT: movl %eax, %ecx
-; X64-SSE-NEXT: shrl $12, %ecx
-; X64-SSE-NEXT: andl $15, %ecx
-; X64-SSE-NEXT: pinsrw $3, %ecx, %xmm0
-; X64-SSE-NEXT: movl %eax, %ecx
-; X64-SSE-NEXT: shrl $16, %ecx
-; X64-SSE-NEXT: andl $15, %ecx
-; X64-SSE-NEXT: pinsrw $4, %ecx, %xmm0
-; X64-SSE-NEXT: movl %eax, %ecx
-; X64-SSE-NEXT: shrl $20, %ecx
-; X64-SSE-NEXT: andl $15, %ecx
-; X64-SSE-NEXT: pinsrw $5, %ecx, %xmm0
-; X64-SSE-NEXT: movl %eax, %ecx
-; X64-SSE-NEXT: shrl $24, %ecx
-; X64-SSE-NEXT: andl $15, %ecx
-; X64-SSE-NEXT: pinsrw $6, %ecx, %xmm0
-; X64-SSE-NEXT: shrl $28, %eax
-; X64-SSE-NEXT: pinsrw $7, %eax, %xmm0
+; X64-SSE-NEXT: andps %xmm1, %xmm0
; X64-SSE-NEXT: retq
%1 = bitcast <8 x i4> %a to i32
%2 = bitcast <8 x i4> %b to i32
@@ -959,174 +203,12 @@ define <8 x i4> @and_v8i4_as_i32(<8 x i4
define <8 x i4> @xor_v8i4_as_i32(<8 x i4> %a, <8 x i4> %b) nounwind {
; X32-SSE-LABEL: xor_v8i4_as_i32:
; X32-SSE: # BB#0:
-; X32-SSE-NEXT: pushl %ebp
-; X32-SSE-NEXT: movl %esp, %ebp
-; X32-SSE-NEXT: andl $-8, %esp
-; X32-SSE-NEXT: subl $24, %esp
-; X32-SSE-NEXT: pextrw $7, %xmm0, %eax
-; X32-SSE-NEXT: andl $15, %eax
-; X32-SSE-NEXT: movb %al, {{[0-9]+}}(%esp)
-; X32-SSE-NEXT: pextrw $6, %xmm0, %eax
-; X32-SSE-NEXT: andl $15, %eax
-; X32-SSE-NEXT: movb %al, {{[0-9]+}}(%esp)
-; X32-SSE-NEXT: pextrw $5, %xmm0, %eax
-; X32-SSE-NEXT: andl $15, %eax
-; X32-SSE-NEXT: movb %al, {{[0-9]+}}(%esp)
-; X32-SSE-NEXT: pextrw $4, %xmm0, %eax
-; X32-SSE-NEXT: andl $15, %eax
-; X32-SSE-NEXT: movb %al, {{[0-9]+}}(%esp)
-; X32-SSE-NEXT: pextrw $3, %xmm0, %eax
-; X32-SSE-NEXT: andl $15, %eax
-; X32-SSE-NEXT: movb %al, {{[0-9]+}}(%esp)
-; X32-SSE-NEXT: pextrw $2, %xmm0, %eax
-; X32-SSE-NEXT: andl $15, %eax
-; X32-SSE-NEXT: movb %al, {{[0-9]+}}(%esp)
-; X32-SSE-NEXT: pextrw $1, %xmm0, %eax
-; X32-SSE-NEXT: andl $15, %eax
-; X32-SSE-NEXT: movb %al, {{[0-9]+}}(%esp)
-; X32-SSE-NEXT: movd %xmm0, %eax
-; X32-SSE-NEXT: andl $15, %eax
-; X32-SSE-NEXT: movb %al, {{[0-9]+}}(%esp)
-; X32-SSE-NEXT: pextrw $7, %xmm1, %eax
-; X32-SSE-NEXT: andl $15, %eax
-; X32-SSE-NEXT: movb %al, (%esp)
-; X32-SSE-NEXT: pextrw $6, %xmm1, %eax
-; X32-SSE-NEXT: andl $15, %eax
-; X32-SSE-NEXT: movb %al, (%esp)
-; X32-SSE-NEXT: pextrw $5, %xmm1, %eax
-; X32-SSE-NEXT: andl $15, %eax
-; X32-SSE-NEXT: movb %al, (%esp)
-; X32-SSE-NEXT: pextrw $4, %xmm1, %eax
-; X32-SSE-NEXT: andl $15, %eax
-; X32-SSE-NEXT: movb %al, (%esp)
-; X32-SSE-NEXT: pextrw $3, %xmm1, %eax
-; X32-SSE-NEXT: andl $15, %eax
-; X32-SSE-NEXT: movb %al, (%esp)
-; X32-SSE-NEXT: pextrw $2, %xmm1, %eax
-; X32-SSE-NEXT: andl $15, %eax
-; X32-SSE-NEXT: movb %al, (%esp)
-; X32-SSE-NEXT: pextrw $1, %xmm1, %eax
-; X32-SSE-NEXT: andl $15, %eax
-; X32-SSE-NEXT: movb %al, (%esp)
-; X32-SSE-NEXT: movd %xmm1, %eax
-; X32-SSE-NEXT: andl $15, %eax
-; X32-SSE-NEXT: movb %al, (%esp)
-; X32-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X32-SSE-NEXT: xorl (%esp), %eax
-; X32-SSE-NEXT: movl %eax, %ecx
-; X32-SSE-NEXT: shrl $4, %ecx
-; X32-SSE-NEXT: andl $15, %ecx
-; X32-SSE-NEXT: movl %eax, %edx
-; X32-SSE-NEXT: andl $15, %edx
-; X32-SSE-NEXT: movd %edx, %xmm0
-; X32-SSE-NEXT: pinsrw $1, %ecx, %xmm0
-; X32-SSE-NEXT: movl %eax, %ecx
-; X32-SSE-NEXT: shrl $8, %ecx
-; X32-SSE-NEXT: andl $15, %ecx
-; X32-SSE-NEXT: pinsrw $2, %ecx, %xmm0
-; X32-SSE-NEXT: movl %eax, %ecx
-; X32-SSE-NEXT: shrl $12, %ecx
-; X32-SSE-NEXT: andl $15, %ecx
-; X32-SSE-NEXT: pinsrw $3, %ecx, %xmm0
-; X32-SSE-NEXT: movl %eax, %ecx
-; X32-SSE-NEXT: shrl $16, %ecx
-; X32-SSE-NEXT: andl $15, %ecx
-; X32-SSE-NEXT: pinsrw $4, %ecx, %xmm0
-; X32-SSE-NEXT: movl %eax, %ecx
-; X32-SSE-NEXT: shrl $20, %ecx
-; X32-SSE-NEXT: andl $15, %ecx
-; X32-SSE-NEXT: pinsrw $5, %ecx, %xmm0
-; X32-SSE-NEXT: movl %eax, %ecx
-; X32-SSE-NEXT: shrl $24, %ecx
-; X32-SSE-NEXT: andl $15, %ecx
-; X32-SSE-NEXT: pinsrw $6, %ecx, %xmm0
-; X32-SSE-NEXT: shrl $28, %eax
-; X32-SSE-NEXT: pinsrw $7, %eax, %xmm0
-; X32-SSE-NEXT: movl %ebp, %esp
-; X32-SSE-NEXT: popl %ebp
+; X32-SSE-NEXT: xorps %xmm1, %xmm0
; X32-SSE-NEXT: retl
;
; X64-SSE-LABEL: xor_v8i4_as_i32:
; X64-SSE: # BB#0:
-; X64-SSE-NEXT: pextrw $7, %xmm0, %eax
-; X64-SSE-NEXT: andl $15, %eax
-; X64-SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
-; X64-SSE-NEXT: pextrw $6, %xmm0, %eax
-; X64-SSE-NEXT: andl $15, %eax
-; X64-SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
-; X64-SSE-NEXT: pextrw $5, %xmm0, %eax
-; X64-SSE-NEXT: andl $15, %eax
-; X64-SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
-; X64-SSE-NEXT: pextrw $4, %xmm0, %eax
-; X64-SSE-NEXT: andl $15, %eax
-; X64-SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
-; X64-SSE-NEXT: pextrw $3, %xmm0, %eax
-; X64-SSE-NEXT: andl $15, %eax
-; X64-SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
-; X64-SSE-NEXT: pextrw $2, %xmm0, %eax
-; X64-SSE-NEXT: andl $15, %eax
-; X64-SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
-; X64-SSE-NEXT: pextrw $1, %xmm0, %eax
-; X64-SSE-NEXT: andl $15, %eax
-; X64-SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
-; X64-SSE-NEXT: movd %xmm0, %eax
-; X64-SSE-NEXT: andl $15, %eax
-; X64-SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
-; X64-SSE-NEXT: pextrw $7, %xmm1, %eax
-; X64-SSE-NEXT: andl $15, %eax
-; X64-SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
-; X64-SSE-NEXT: pextrw $6, %xmm1, %eax
-; X64-SSE-NEXT: andl $15, %eax
-; X64-SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
-; X64-SSE-NEXT: pextrw $5, %xmm1, %eax
-; X64-SSE-NEXT: andl $15, %eax
-; X64-SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
-; X64-SSE-NEXT: pextrw $4, %xmm1, %eax
-; X64-SSE-NEXT: andl $15, %eax
-; X64-SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
-; X64-SSE-NEXT: pextrw $3, %xmm1, %eax
-; X64-SSE-NEXT: andl $15, %eax
-; X64-SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
-; X64-SSE-NEXT: pextrw $2, %xmm1, %eax
-; X64-SSE-NEXT: andl $15, %eax
-; X64-SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
-; X64-SSE-NEXT: pextrw $1, %xmm1, %eax
-; X64-SSE-NEXT: andl $15, %eax
-; X64-SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
-; X64-SSE-NEXT: movd %xmm1, %eax
-; X64-SSE-NEXT: andl $15, %eax
-; X64-SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
-; X64-SSE-NEXT: movl -{{[0-9]+}}(%rsp), %eax
-; X64-SSE-NEXT: xorl -{{[0-9]+}}(%rsp), %eax
-; X64-SSE-NEXT: movl %eax, %ecx
-; X64-SSE-NEXT: shrl $4, %ecx
-; X64-SSE-NEXT: andl $15, %ecx
-; X64-SSE-NEXT: movl %eax, %edx
-; X64-SSE-NEXT: andl $15, %edx
-; X64-SSE-NEXT: movd %edx, %xmm0
-; X64-SSE-NEXT: pinsrw $1, %ecx, %xmm0
-; X64-SSE-NEXT: movl %eax, %ecx
-; X64-SSE-NEXT: shrl $8, %ecx
-; X64-SSE-NEXT: andl $15, %ecx
-; X64-SSE-NEXT: pinsrw $2, %ecx, %xmm0
-; X64-SSE-NEXT: movl %eax, %ecx
-; X64-SSE-NEXT: shrl $12, %ecx
-; X64-SSE-NEXT: andl $15, %ecx
-; X64-SSE-NEXT: pinsrw $3, %ecx, %xmm0
-; X64-SSE-NEXT: movl %eax, %ecx
-; X64-SSE-NEXT: shrl $16, %ecx
-; X64-SSE-NEXT: andl $15, %ecx
-; X64-SSE-NEXT: pinsrw $4, %ecx, %xmm0
-; X64-SSE-NEXT: movl %eax, %ecx
-; X64-SSE-NEXT: shrl $20, %ecx
-; X64-SSE-NEXT: andl $15, %ecx
-; X64-SSE-NEXT: pinsrw $5, %ecx, %xmm0
-; X64-SSE-NEXT: movl %eax, %ecx
-; X64-SSE-NEXT: shrl $24, %ecx
-; X64-SSE-NEXT: andl $15, %ecx
-; X64-SSE-NEXT: pinsrw $6, %ecx, %xmm0
-; X64-SSE-NEXT: shrl $28, %eax
-; X64-SSE-NEXT: pinsrw $7, %eax, %xmm0
+; X64-SSE-NEXT: xorps %xmm1, %xmm0
; X64-SSE-NEXT: retq
%1 = bitcast <8 x i4> %a to i32
%2 = bitcast <8 x i4> %b to i32
@@ -1138,174 +220,12 @@ define <8 x i4> @xor_v8i4_as_i32(<8 x i4
define <8 x i4> @or_v8i4_as_i32(<8 x i4> %a, <8 x i4> %b) nounwind {
; X32-SSE-LABEL: or_v8i4_as_i32:
; X32-SSE: # BB#0:
-; X32-SSE-NEXT: pushl %ebp
-; X32-SSE-NEXT: movl %esp, %ebp
-; X32-SSE-NEXT: andl $-8, %esp
-; X32-SSE-NEXT: subl $24, %esp
-; X32-SSE-NEXT: pextrw $7, %xmm0, %eax
-; X32-SSE-NEXT: andl $15, %eax
-; X32-SSE-NEXT: movb %al, {{[0-9]+}}(%esp)
-; X32-SSE-NEXT: pextrw $6, %xmm0, %eax
-; X32-SSE-NEXT: andl $15, %eax
-; X32-SSE-NEXT: movb %al, {{[0-9]+}}(%esp)
-; X32-SSE-NEXT: pextrw $5, %xmm0, %eax
-; X32-SSE-NEXT: andl $15, %eax
-; X32-SSE-NEXT: movb %al, {{[0-9]+}}(%esp)
-; X32-SSE-NEXT: pextrw $4, %xmm0, %eax
-; X32-SSE-NEXT: andl $15, %eax
-; X32-SSE-NEXT: movb %al, {{[0-9]+}}(%esp)
-; X32-SSE-NEXT: pextrw $3, %xmm0, %eax
-; X32-SSE-NEXT: andl $15, %eax
-; X32-SSE-NEXT: movb %al, {{[0-9]+}}(%esp)
-; X32-SSE-NEXT: pextrw $2, %xmm0, %eax
-; X32-SSE-NEXT: andl $15, %eax
-; X32-SSE-NEXT: movb %al, {{[0-9]+}}(%esp)
-; X32-SSE-NEXT: pextrw $1, %xmm0, %eax
-; X32-SSE-NEXT: andl $15, %eax
-; X32-SSE-NEXT: movb %al, {{[0-9]+}}(%esp)
-; X32-SSE-NEXT: movd %xmm0, %eax
-; X32-SSE-NEXT: andl $15, %eax
-; X32-SSE-NEXT: movb %al, {{[0-9]+}}(%esp)
-; X32-SSE-NEXT: pextrw $7, %xmm1, %eax
-; X32-SSE-NEXT: andl $15, %eax
-; X32-SSE-NEXT: movb %al, (%esp)
-; X32-SSE-NEXT: pextrw $6, %xmm1, %eax
-; X32-SSE-NEXT: andl $15, %eax
-; X32-SSE-NEXT: movb %al, (%esp)
-; X32-SSE-NEXT: pextrw $5, %xmm1, %eax
-; X32-SSE-NEXT: andl $15, %eax
-; X32-SSE-NEXT: movb %al, (%esp)
-; X32-SSE-NEXT: pextrw $4, %xmm1, %eax
-; X32-SSE-NEXT: andl $15, %eax
-; X32-SSE-NEXT: movb %al, (%esp)
-; X32-SSE-NEXT: pextrw $3, %xmm1, %eax
-; X32-SSE-NEXT: andl $15, %eax
-; X32-SSE-NEXT: movb %al, (%esp)
-; X32-SSE-NEXT: pextrw $2, %xmm1, %eax
-; X32-SSE-NEXT: andl $15, %eax
-; X32-SSE-NEXT: movb %al, (%esp)
-; X32-SSE-NEXT: pextrw $1, %xmm1, %eax
-; X32-SSE-NEXT: andl $15, %eax
-; X32-SSE-NEXT: movb %al, (%esp)
-; X32-SSE-NEXT: movd %xmm1, %eax
-; X32-SSE-NEXT: andl $15, %eax
-; X32-SSE-NEXT: movb %al, (%esp)
-; X32-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X32-SSE-NEXT: orl (%esp), %eax
-; X32-SSE-NEXT: movl %eax, %ecx
-; X32-SSE-NEXT: shrl $4, %ecx
-; X32-SSE-NEXT: andl $15, %ecx
-; X32-SSE-NEXT: movl %eax, %edx
-; X32-SSE-NEXT: andl $15, %edx
-; X32-SSE-NEXT: movd %edx, %xmm0
-; X32-SSE-NEXT: pinsrw $1, %ecx, %xmm0
-; X32-SSE-NEXT: movl %eax, %ecx
-; X32-SSE-NEXT: shrl $8, %ecx
-; X32-SSE-NEXT: andl $15, %ecx
-; X32-SSE-NEXT: pinsrw $2, %ecx, %xmm0
-; X32-SSE-NEXT: movl %eax, %ecx
-; X32-SSE-NEXT: shrl $12, %ecx
-; X32-SSE-NEXT: andl $15, %ecx
-; X32-SSE-NEXT: pinsrw $3, %ecx, %xmm0
-; X32-SSE-NEXT: movl %eax, %ecx
-; X32-SSE-NEXT: shrl $16, %ecx
-; X32-SSE-NEXT: andl $15, %ecx
-; X32-SSE-NEXT: pinsrw $4, %ecx, %xmm0
-; X32-SSE-NEXT: movl %eax, %ecx
-; X32-SSE-NEXT: shrl $20, %ecx
-; X32-SSE-NEXT: andl $15, %ecx
-; X32-SSE-NEXT: pinsrw $5, %ecx, %xmm0
-; X32-SSE-NEXT: movl %eax, %ecx
-; X32-SSE-NEXT: shrl $24, %ecx
-; X32-SSE-NEXT: andl $15, %ecx
-; X32-SSE-NEXT: pinsrw $6, %ecx, %xmm0
-; X32-SSE-NEXT: shrl $28, %eax
-; X32-SSE-NEXT: pinsrw $7, %eax, %xmm0
-; X32-SSE-NEXT: movl %ebp, %esp
-; X32-SSE-NEXT: popl %ebp
+; X32-SSE-NEXT: orps %xmm1, %xmm0
; X32-SSE-NEXT: retl
;
; X64-SSE-LABEL: or_v8i4_as_i32:
; X64-SSE: # BB#0:
-; X64-SSE-NEXT: pextrw $7, %xmm0, %eax
-; X64-SSE-NEXT: andl $15, %eax
-; X64-SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
-; X64-SSE-NEXT: pextrw $6, %xmm0, %eax
-; X64-SSE-NEXT: andl $15, %eax
-; X64-SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
-; X64-SSE-NEXT: pextrw $5, %xmm0, %eax
-; X64-SSE-NEXT: andl $15, %eax
-; X64-SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
-; X64-SSE-NEXT: pextrw $4, %xmm0, %eax
-; X64-SSE-NEXT: andl $15, %eax
-; X64-SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
-; X64-SSE-NEXT: pextrw $3, %xmm0, %eax
-; X64-SSE-NEXT: andl $15, %eax
-; X64-SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
-; X64-SSE-NEXT: pextrw $2, %xmm0, %eax
-; X64-SSE-NEXT: andl $15, %eax
-; X64-SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
-; X64-SSE-NEXT: pextrw $1, %xmm0, %eax
-; X64-SSE-NEXT: andl $15, %eax
-; X64-SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
-; X64-SSE-NEXT: movd %xmm0, %eax
-; X64-SSE-NEXT: andl $15, %eax
-; X64-SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
-; X64-SSE-NEXT: pextrw $7, %xmm1, %eax
-; X64-SSE-NEXT: andl $15, %eax
-; X64-SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
-; X64-SSE-NEXT: pextrw $6, %xmm1, %eax
-; X64-SSE-NEXT: andl $15, %eax
-; X64-SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
-; X64-SSE-NEXT: pextrw $5, %xmm1, %eax
-; X64-SSE-NEXT: andl $15, %eax
-; X64-SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
-; X64-SSE-NEXT: pextrw $4, %xmm1, %eax
-; X64-SSE-NEXT: andl $15, %eax
-; X64-SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
-; X64-SSE-NEXT: pextrw $3, %xmm1, %eax
-; X64-SSE-NEXT: andl $15, %eax
-; X64-SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
-; X64-SSE-NEXT: pextrw $2, %xmm1, %eax
-; X64-SSE-NEXT: andl $15, %eax
-; X64-SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
-; X64-SSE-NEXT: pextrw $1, %xmm1, %eax
-; X64-SSE-NEXT: andl $15, %eax
-; X64-SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
-; X64-SSE-NEXT: movd %xmm1, %eax
-; X64-SSE-NEXT: andl $15, %eax
-; X64-SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
-; X64-SSE-NEXT: movl -{{[0-9]+}}(%rsp), %eax
-; X64-SSE-NEXT: orl -{{[0-9]+}}(%rsp), %eax
-; X64-SSE-NEXT: movl %eax, %ecx
-; X64-SSE-NEXT: shrl $4, %ecx
-; X64-SSE-NEXT: andl $15, %ecx
-; X64-SSE-NEXT: movl %eax, %edx
-; X64-SSE-NEXT: andl $15, %edx
-; X64-SSE-NEXT: movd %edx, %xmm0
-; X64-SSE-NEXT: pinsrw $1, %ecx, %xmm0
-; X64-SSE-NEXT: movl %eax, %ecx
-; X64-SSE-NEXT: shrl $8, %ecx
-; X64-SSE-NEXT: andl $15, %ecx
-; X64-SSE-NEXT: pinsrw $2, %ecx, %xmm0
-; X64-SSE-NEXT: movl %eax, %ecx
-; X64-SSE-NEXT: shrl $12, %ecx
-; X64-SSE-NEXT: andl $15, %ecx
-; X64-SSE-NEXT: pinsrw $3, %ecx, %xmm0
-; X64-SSE-NEXT: movl %eax, %ecx
-; X64-SSE-NEXT: shrl $16, %ecx
-; X64-SSE-NEXT: andl $15, %ecx
-; X64-SSE-NEXT: pinsrw $4, %ecx, %xmm0
-; X64-SSE-NEXT: movl %eax, %ecx
-; X64-SSE-NEXT: shrl $20, %ecx
-; X64-SSE-NEXT: andl $15, %ecx
-; X64-SSE-NEXT: pinsrw $5, %ecx, %xmm0
-; X64-SSE-NEXT: movl %eax, %ecx
-; X64-SSE-NEXT: shrl $24, %ecx
-; X64-SSE-NEXT: andl $15, %ecx
-; X64-SSE-NEXT: pinsrw $6, %ecx, %xmm0
-; X64-SSE-NEXT: shrl $28, %eax
-; X64-SSE-NEXT: pinsrw $7, %eax, %xmm0
+; X64-SSE-NEXT: orps %xmm1, %xmm0
; X64-SSE-NEXT: retq
%1 = bitcast <8 x i4> %a to i32
%2 = bitcast <8 x i4> %b to i32
More information about the llvm-commits
mailing list