[llvm] r294581 - [X86][SSE] Attempt to break register dependencies during lowerBuildVector
Simon Pilgrim via llvm-commits
llvm-commits at lists.llvm.org
Thu Feb 9 03:50:20 PST 2017
Author: rksimon
Date: Thu Feb 9 05:50:19 2017
New Revision: 294581
URL: http://llvm.org/viewvc/llvm-project?rev=294581&view=rev
Log:
[X86][SSE] Attempt to break register dependencies during lowerBuildVector
LowerBuildVectorv16i8/LowerBuildVectorv8i16 insert values into a UNDEF vector if the build vector doesn't contain any zero elements, resulting in register dependencies with a previous use of the register.
This patch attempts to break the register dependency by either always zeroing the vector before hand or (if we're inserting to the 0'th element) by using VZEXT_MOVL(SCALAR_TO_VECTOR(i32 AEXT(Elt))) which lowers to (V)MOVD and performs a similar function. Additionally (V)MOVD is a shorter instruction than PINSRB/PINSRW. We already do something similar for SSE41 PINSRD.
On pre-SSE41 LowerBuildVectorv16i8 we go a little further and use VZEXT_MOVL(SCALAR_TO_VECTOR(i32 ZEXT(Elt))) if the build vector contains zeros to avoid the vector zeroing at the cost of a scalar zero extension, which can probably be brought over to the other cases in a future patch in some cases (load folding etc.)
Differential Revision: https://reviews.llvm.org/D29720
Modified:
llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
llvm/trunk/test/CodeGen/X86/avx512-intrinsics.ll
llvm/trunk/test/CodeGen/X86/avx512bwvl-intrinsics.ll
llvm/trunk/test/CodeGen/X86/avx512vl-intrinsics.ll
llvm/trunk/test/CodeGen/X86/buildvec-insertvec.ll
llvm/trunk/test/CodeGen/X86/promote-vec3.ll
llvm/trunk/test/CodeGen/X86/shuffle-vs-trunc-512.ll
llvm/trunk/test/CodeGen/X86/vec_cast2.ll
llvm/trunk/test/CodeGen/X86/vector-shuffle-128-v16.ll
llvm/trunk/test/CodeGen/X86/widen_bitops-0.ll
llvm/trunk/test/CodeGen/X86/widen_conv-3.ll
llvm/trunk/test/CodeGen/X86/widen_conv-4.ll
Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.cpp?rev=294581&r1=294580&r2=294581&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Thu Feb 9 05:50:19 2017
@@ -5942,12 +5942,21 @@ static SDValue LowerBuildVectorv16i8(SDV
for (unsigned i = 0; i < 16; ++i) {
bool IsNonZero = (NonZeros & (1 << i)) != 0;
if (IsNonZero) {
+ // If the build vector contains zeros or our first insertion is not the
+ // first index then insert into zero vector to break any register
+ // dependency else use SCALAR_TO_VECTOR/VZEXT_MOVL.
if (First) {
- if (NumZero)
- V = getZeroVector(MVT::v16i8, Subtarget, DAG, dl);
- else
- V = DAG.getUNDEF(MVT::v16i8);
First = false;
+ if (NumZero || 0 != i)
+ V = getZeroVector(MVT::v16i8, Subtarget, DAG, dl);
+ else {
+ assert(0 == i && "Expected insertion into zero-index");
+ V = DAG.getAnyExtOrTrunc(Op.getOperand(i), dl, MVT::i32);
+ V = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v4i32, V);
+ V = DAG.getNode(X86ISD::VZEXT_MOVL, dl, MVT::v4i32, V);
+ V = DAG.getBitcast(MVT::v16i8, V);
+ continue;
+ }
}
V = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v16i8, V,
Op.getOperand(i), DAG.getIntPtrConstant(i, dl));
@@ -5969,6 +5978,8 @@ static SDValue LowerBuildVectorv16i8(SDV
}
if ((i & 1) != 0) {
+ // FIXME: Investigate extending to i32 instead of just i16.
+ // FIXME: Investigate combining the first 4 bytes as a i32 instead.
SDValue ThisElt, LastElt;
bool LastIsNonZero = (NonZeros & (1 << (i - 1))) != 0;
if (LastIsNonZero) {
@@ -5984,9 +5995,18 @@ static SDValue LowerBuildVectorv16i8(SDV
} else
ThisElt = LastElt;
- if (ThisElt)
- V = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v8i16, V, ThisElt,
- DAG.getIntPtrConstant(i / 2, dl));
+ if (ThisElt) {
+ if (1 == i) {
+ V = NumZero ? DAG.getZExtOrTrunc(ThisElt, dl, MVT::i32)
+ : DAG.getAnyExtOrTrunc(ThisElt, dl, MVT::i32);
+ V = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v4i32, V);
+ V = DAG.getNode(X86ISD::VZEXT_MOVL, dl, MVT::v4i32, V);
+ V = DAG.getBitcast(MVT::v8i16, V);
+ } else {
+ V = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v8i16, V, ThisElt,
+ DAG.getIntPtrConstant(i / 2, dl));
+ }
+ }
}
}
@@ -6007,12 +6027,21 @@ static SDValue LowerBuildVectorv8i16(SDV
for (unsigned i = 0; i < 8; ++i) {
bool IsNonZero = (NonZeros & (1 << i)) != 0;
if (IsNonZero) {
+ // If the build vector contains zeros or our first insertion is not the
+ // first index then insert into zero vector to break any register
+ // dependency else use SCALAR_TO_VECTOR/VZEXT_MOVL.
if (First) {
- if (NumZero)
- V = getZeroVector(MVT::v8i16, Subtarget, DAG, dl);
- else
- V = DAG.getUNDEF(MVT::v8i16);
First = false;
+ if (NumZero || 0 != i)
+ V = getZeroVector(MVT::v8i16, Subtarget, DAG, dl);
+ else {
+ assert(0 == i && "Expected insertion into zero-index");
+ V = DAG.getAnyExtOrTrunc(Op.getOperand(i), dl, MVT::i32);
+ V = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v4i32, V);
+ V = DAG.getNode(X86ISD::VZEXT_MOVL, dl, MVT::v4i32, V);
+ V = DAG.getBitcast(MVT::v8i16, V);
+ continue;
+ }
}
V = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v8i16, V,
Op.getOperand(i), DAG.getIntPtrConstant(i, dl));
Modified: llvm/trunk/test/CodeGen/X86/avx512-intrinsics.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512-intrinsics.ll?rev=294581&r1=294580&r2=294581&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx512-intrinsics.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx512-intrinsics.ll Thu Feb 9 05:50:19 2017
@@ -1062,7 +1062,7 @@ define <8 x i8> @test_cmp_q_512(<8 x i64
; CHECK-NEXT: vpcmpordq %zmm1, %zmm0, %k0
; CHECK-NEXT: kmovw %k4, %eax
; CHECK-NEXT: kmovw %k3, %ecx
-; CHECK-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0
+; CHECK-NEXT: vmovd %ecx, %xmm0
; CHECK-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
; CHECK-NEXT: kmovw %k5, %eax
; CHECK-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0
@@ -1110,7 +1110,7 @@ define <8 x i8> @test_mask_cmp_q_512(<8
; CHECK-NEXT: vpcmpordq %zmm1, %zmm0, %k3 {%k3}
; CHECK-NEXT: kmovw %k5, %eax
; CHECK-NEXT: kmovw %k4, %ecx
-; CHECK-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0
+; CHECK-NEXT: vmovd %ecx, %xmm0
; CHECK-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
; CHECK-NEXT: kmovw %k6, %eax
; CHECK-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0
@@ -1159,7 +1159,7 @@ define <8 x i8> @test_ucmp_q_512(<8 x i6
; CHECK-NEXT: vpcmporduq %zmm1, %zmm0, %k0
; CHECK-NEXT: kmovw %k4, %eax
; CHECK-NEXT: kmovw %k3, %ecx
-; CHECK-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0
+; CHECK-NEXT: vmovd %ecx, %xmm0
; CHECK-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
; CHECK-NEXT: kmovw %k5, %eax
; CHECK-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0
@@ -1207,7 +1207,7 @@ define <8 x i8> @test_mask_ucmp_q_512(<8
; CHECK-NEXT: vpcmporduq %zmm1, %zmm0, %k3 {%k3}
; CHECK-NEXT: kmovw %k5, %eax
; CHECK-NEXT: kmovw %k4, %ecx
-; CHECK-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0
+; CHECK-NEXT: vmovd %ecx, %xmm0
; CHECK-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
; CHECK-NEXT: kmovw %k6, %eax
; CHECK-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0
Modified: llvm/trunk/test/CodeGen/X86/avx512bwvl-intrinsics.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512bwvl-intrinsics.ll?rev=294581&r1=294580&r2=294581&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx512bwvl-intrinsics.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx512bwvl-intrinsics.ll Thu Feb 9 05:50:19 2017
@@ -696,7 +696,7 @@ define <8 x i8> @test_cmp_w_128(<8 x i16
; CHECK-NEXT: vpcmpordw %xmm1, %xmm0, %k0 ## encoding: [0x62,0xf3,0xfd,0x08,0x3f,0xc1,0x07]
; CHECK-NEXT: kmovw %k4, %eax ## encoding: [0xc5,0xf8,0x93,0xc4]
; CHECK-NEXT: kmovw %k3, %ecx ## encoding: [0xc5,0xf8,0x93,0xcb]
-; CHECK-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc1,0x00]
+; CHECK-NEXT: vmovd %ecx, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc1]
; CHECK-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x02]
; CHECK-NEXT: kmovw %k5, %eax ## encoding: [0xc5,0xf8,0x93,0xc5]
; CHECK-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x04]
@@ -744,7 +744,7 @@ define <8 x i8> @test_mask_cmp_w_128(<8
; CHECK-NEXT: vpcmpordw %xmm1, %xmm0, %k3 {%k3} ## encoding: [0x62,0xf3,0xfd,0x0b,0x3f,0xd9,0x07]
; CHECK-NEXT: kmovw %k5, %eax ## encoding: [0xc5,0xf8,0x93,0xc5]
; CHECK-NEXT: kmovw %k4, %ecx ## encoding: [0xc5,0xf8,0x93,0xcc]
-; CHECK-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc1,0x00]
+; CHECK-NEXT: vmovd %ecx, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc1]
; CHECK-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x02]
; CHECK-NEXT: kmovw %k6, %eax ## encoding: [0xc5,0xf8,0x93,0xc6]
; CHECK-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x04]
@@ -793,7 +793,7 @@ define <8 x i8> @test_ucmp_w_128(<8 x i1
; CHECK-NEXT: vpcmporduw %xmm1, %xmm0, %k0 ## encoding: [0x62,0xf3,0xfd,0x08,0x3e,0xc1,0x07]
; CHECK-NEXT: kmovw %k4, %eax ## encoding: [0xc5,0xf8,0x93,0xc4]
; CHECK-NEXT: kmovw %k3, %ecx ## encoding: [0xc5,0xf8,0x93,0xcb]
-; CHECK-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc1,0x00]
+; CHECK-NEXT: vmovd %ecx, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc1]
; CHECK-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x02]
; CHECK-NEXT: kmovw %k5, %eax ## encoding: [0xc5,0xf8,0x93,0xc5]
; CHECK-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x04]
@@ -841,7 +841,7 @@ define <8 x i8> @test_mask_ucmp_w_128(<8
; CHECK-NEXT: vpcmporduw %xmm1, %xmm0, %k3 {%k3} ## encoding: [0x62,0xf3,0xfd,0x0b,0x3e,0xd9,0x07]
; CHECK-NEXT: kmovw %k5, %eax ## encoding: [0xc5,0xf8,0x93,0xc5]
; CHECK-NEXT: kmovw %k4, %ecx ## encoding: [0xc5,0xf8,0x93,0xcc]
-; CHECK-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc1,0x00]
+; CHECK-NEXT: vmovd %ecx, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc1]
; CHECK-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x02]
; CHECK-NEXT: kmovw %k6, %eax ## encoding: [0xc5,0xf8,0x93,0xc6]
; CHECK-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x04]
Modified: llvm/trunk/test/CodeGen/X86/avx512vl-intrinsics.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512vl-intrinsics.ll?rev=294581&r1=294580&r2=294581&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx512vl-intrinsics.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx512vl-intrinsics.ll Thu Feb 9 05:50:19 2017
@@ -16,7 +16,7 @@ define <8 x i8> @test_cmp_d_256(<8 x i32
; CHECK-NEXT: vpcmpordd %ymm1, %ymm0, %k0 ## encoding: [0x62,0xf3,0x7d,0x28,0x1f,0xc1,0x07]
; CHECK-NEXT: kmovw %k4, %eax ## encoding: [0xc5,0xf8,0x93,0xc4]
; CHECK-NEXT: kmovw %k3, %ecx ## encoding: [0xc5,0xf8,0x93,0xcb]
-; CHECK-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc1,0x00]
+; CHECK-NEXT: vmovd %ecx, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc1]
; CHECK-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x02]
; CHECK-NEXT: kmovw %k5, %eax ## encoding: [0xc5,0xf8,0x93,0xc5]
; CHECK-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x04]
@@ -64,7 +64,7 @@ define <8 x i8> @test_mask_cmp_d_256(<8
; CHECK-NEXT: vpcmpordd %ymm1, %ymm0, %k3 {%k3} ## encoding: [0x62,0xf3,0x7d,0x2b,0x1f,0xd9,0x07]
; CHECK-NEXT: kmovw %k5, %eax ## encoding: [0xc5,0xf8,0x93,0xc5]
; CHECK-NEXT: kmovw %k4, %ecx ## encoding: [0xc5,0xf8,0x93,0xcc]
-; CHECK-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc1,0x00]
+; CHECK-NEXT: vmovd %ecx, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc1]
; CHECK-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x02]
; CHECK-NEXT: kmovw %k6, %eax ## encoding: [0xc5,0xf8,0x93,0xc6]
; CHECK-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x04]
@@ -113,7 +113,7 @@ define <8 x i8> @test_ucmp_d_256(<8 x i3
; CHECK-NEXT: vpcmpordud %ymm1, %ymm0, %k0 ## encoding: [0x62,0xf3,0x7d,0x28,0x1e,0xc1,0x07]
; CHECK-NEXT: kmovw %k4, %eax ## encoding: [0xc5,0xf8,0x93,0xc4]
; CHECK-NEXT: kmovw %k3, %ecx ## encoding: [0xc5,0xf8,0x93,0xcb]
-; CHECK-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc1,0x00]
+; CHECK-NEXT: vmovd %ecx, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc1]
; CHECK-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x02]
; CHECK-NEXT: kmovw %k5, %eax ## encoding: [0xc5,0xf8,0x93,0xc5]
; CHECK-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x04]
@@ -161,7 +161,7 @@ define <8 x i8> @test_mask_ucmp_d_256(<8
; CHECK-NEXT: vpcmpordud %ymm1, %ymm0, %k3 {%k3} ## encoding: [0x62,0xf3,0x7d,0x2b,0x1e,0xd9,0x07]
; CHECK-NEXT: kmovw %k5, %eax ## encoding: [0xc5,0xf8,0x93,0xc5]
; CHECK-NEXT: kmovw %k4, %ecx ## encoding: [0xc5,0xf8,0x93,0xcc]
-; CHECK-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc1,0x00]
+; CHECK-NEXT: vmovd %ecx, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc1]
; CHECK-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x02]
; CHECK-NEXT: kmovw %k6, %eax ## encoding: [0xc5,0xf8,0x93,0xc6]
; CHECK-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x04]
@@ -210,7 +210,7 @@ define <8 x i8> @test_cmp_q_256(<4 x i64
; CHECK-NEXT: vpcmpordq %ymm1, %ymm0, %k0 ## encoding: [0x62,0xf3,0xfd,0x28,0x1f,0xc1,0x07]
; CHECK-NEXT: kmovw %k7, %eax ## encoding: [0xc5,0xf8,0x93,0xc7]
; CHECK-NEXT: kmovw %k5, %ecx ## encoding: [0xc5,0xf8,0x93,0xcd]
-; CHECK-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc1,0x00]
+; CHECK-NEXT: vmovd %ecx, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc1]
; CHECK-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x02]
; CHECK-NEXT: kmovw %k6, %eax ## encoding: [0xc5,0xf8,0x93,0xc6]
; CHECK-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x04]
@@ -258,7 +258,7 @@ define <8 x i8> @test_mask_cmp_q_256(<4
; CHECK-NEXT: vpcmpordq %ymm1, %ymm0, %k7 {%k7} ## encoding: [0x62,0xf3,0xfd,0x2f,0x1f,0xf9,0x07]
; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0]
; CHECK-NEXT: kmovw %k5, %ecx ## encoding: [0xc5,0xf8,0x93,0xcd]
-; CHECK-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc1,0x00]
+; CHECK-NEXT: vmovd %ecx, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc1]
; CHECK-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x02]
; CHECK-NEXT: kmovw %k6, %eax ## encoding: [0xc5,0xf8,0x93,0xc6]
; CHECK-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x04]
@@ -307,7 +307,7 @@ define <8 x i8> @test_ucmp_q_256(<4 x i6
; CHECK-NEXT: vpcmporduq %ymm1, %ymm0, %k0 ## encoding: [0x62,0xf3,0xfd,0x28,0x1e,0xc1,0x07]
; CHECK-NEXT: kmovw %k7, %eax ## encoding: [0xc5,0xf8,0x93,0xc7]
; CHECK-NEXT: kmovw %k5, %ecx ## encoding: [0xc5,0xf8,0x93,0xcd]
-; CHECK-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc1,0x00]
+; CHECK-NEXT: vmovd %ecx, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc1]
; CHECK-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x02]
; CHECK-NEXT: kmovw %k6, %eax ## encoding: [0xc5,0xf8,0x93,0xc6]
; CHECK-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x04]
@@ -355,7 +355,7 @@ define <8 x i8> @test_mask_ucmp_q_256(<4
; CHECK-NEXT: vpcmporduq %ymm1, %ymm0, %k7 {%k7} ## encoding: [0x62,0xf3,0xfd,0x2f,0x1e,0xf9,0x07]
; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0]
; CHECK-NEXT: kmovw %k5, %ecx ## encoding: [0xc5,0xf8,0x93,0xcd]
-; CHECK-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc1,0x00]
+; CHECK-NEXT: vmovd %ecx, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc1]
; CHECK-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x02]
; CHECK-NEXT: kmovw %k6, %eax ## encoding: [0xc5,0xf8,0x93,0xc6]
; CHECK-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x04]
@@ -406,7 +406,7 @@ define <8 x i8> @test_cmp_d_128(<4 x i32
; CHECK-NEXT: vpcmpordd %xmm1, %xmm0, %k0 ## encoding: [0x62,0xf3,0x7d,0x08,0x1f,0xc1,0x07]
; CHECK-NEXT: kmovw %k7, %eax ## encoding: [0xc5,0xf8,0x93,0xc7]
; CHECK-NEXT: kmovw %k5, %ecx ## encoding: [0xc5,0xf8,0x93,0xcd]
-; CHECK-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc1,0x00]
+; CHECK-NEXT: vmovd %ecx, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc1]
; CHECK-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x02]
; CHECK-NEXT: kmovw %k6, %eax ## encoding: [0xc5,0xf8,0x93,0xc6]
; CHECK-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x04]
@@ -454,7 +454,7 @@ define <8 x i8> @test_mask_cmp_d_128(<4
; CHECK-NEXT: vpcmpordd %xmm1, %xmm0, %k7 {%k7} ## encoding: [0x62,0xf3,0x7d,0x0f,0x1f,0xf9,0x07]
; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0]
; CHECK-NEXT: kmovw %k5, %ecx ## encoding: [0xc5,0xf8,0x93,0xcd]
-; CHECK-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc1,0x00]
+; CHECK-NEXT: vmovd %ecx, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc1]
; CHECK-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x02]
; CHECK-NEXT: kmovw %k6, %eax ## encoding: [0xc5,0xf8,0x93,0xc6]
; CHECK-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x04]
@@ -503,7 +503,7 @@ define <8 x i8> @test_ucmp_d_128(<4 x i3
; CHECK-NEXT: vpcmpordud %xmm1, %xmm0, %k0 ## encoding: [0x62,0xf3,0x7d,0x08,0x1e,0xc1,0x07]
; CHECK-NEXT: kmovw %k7, %eax ## encoding: [0xc5,0xf8,0x93,0xc7]
; CHECK-NEXT: kmovw %k5, %ecx ## encoding: [0xc5,0xf8,0x93,0xcd]
-; CHECK-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc1,0x00]
+; CHECK-NEXT: vmovd %ecx, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc1]
; CHECK-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x02]
; CHECK-NEXT: kmovw %k6, %eax ## encoding: [0xc5,0xf8,0x93,0xc6]
; CHECK-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x04]
@@ -551,7 +551,7 @@ define <8 x i8> @test_mask_ucmp_d_128(<4
; CHECK-NEXT: vpcmpordud %xmm1, %xmm0, %k7 {%k7} ## encoding: [0x62,0xf3,0x7d,0x0f,0x1e,0xf9,0x07]
; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0]
; CHECK-NEXT: kmovw %k5, %ecx ## encoding: [0xc5,0xf8,0x93,0xcd]
-; CHECK-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc1,0x00]
+; CHECK-NEXT: vmovd %ecx, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc1]
; CHECK-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x02]
; CHECK-NEXT: kmovw %k6, %eax ## encoding: [0xc5,0xf8,0x93,0xc6]
; CHECK-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x04]
@@ -600,7 +600,7 @@ define <8 x i8> @test_cmp_q_128(<2 x i64
; CHECK-NEXT: vpcmpordq %xmm1, %xmm0, %k0 ## encoding: [0x62,0xf3,0xfd,0x08,0x1f,0xc1,0x07]
; CHECK-NEXT: kmovw %k7, %eax ## encoding: [0xc5,0xf8,0x93,0xc7]
; CHECK-NEXT: kmovw %k5, %ecx ## encoding: [0xc5,0xf8,0x93,0xcd]
-; CHECK-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc1,0x00]
+; CHECK-NEXT: vmovd %ecx, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc1]
; CHECK-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x02]
; CHECK-NEXT: kmovw %k6, %eax ## encoding: [0xc5,0xf8,0x93,0xc6]
; CHECK-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x04]
@@ -648,7 +648,7 @@ define <8 x i8> @test_mask_cmp_q_128(<2
; CHECK-NEXT: vpcmpordq %xmm1, %xmm0, %k7 {%k7} ## encoding: [0x62,0xf3,0xfd,0x0f,0x1f,0xf9,0x07]
; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0]
; CHECK-NEXT: kmovw %k5, %ecx ## encoding: [0xc5,0xf8,0x93,0xcd]
-; CHECK-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc1,0x00]
+; CHECK-NEXT: vmovd %ecx, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc1]
; CHECK-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x02]
; CHECK-NEXT: kmovw %k6, %eax ## encoding: [0xc5,0xf8,0x93,0xc6]
; CHECK-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x04]
@@ -697,7 +697,7 @@ define <8 x i8> @test_ucmp_q_128(<2 x i6
; CHECK-NEXT: vpcmporduq %xmm1, %xmm0, %k0 ## encoding: [0x62,0xf3,0xfd,0x08,0x1e,0xc1,0x07]
; CHECK-NEXT: kmovw %k7, %eax ## encoding: [0xc5,0xf8,0x93,0xc7]
; CHECK-NEXT: kmovw %k5, %ecx ## encoding: [0xc5,0xf8,0x93,0xcd]
-; CHECK-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc1,0x00]
+; CHECK-NEXT: vmovd %ecx, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc1]
; CHECK-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x02]
; CHECK-NEXT: kmovw %k6, %eax ## encoding: [0xc5,0xf8,0x93,0xc6]
; CHECK-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x04]
@@ -745,7 +745,7 @@ define <8 x i8> @test_mask_ucmp_q_128(<2
; CHECK-NEXT: vpcmporduq %xmm1, %xmm0, %k7 {%k7} ## encoding: [0x62,0xf3,0xfd,0x0f,0x1e,0xf9,0x07]
; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0]
; CHECK-NEXT: kmovw %k5, %ecx ## encoding: [0xc5,0xf8,0x93,0xcd]
-; CHECK-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc1,0x00]
+; CHECK-NEXT: vmovd %ecx, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc1]
; CHECK-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x02]
; CHECK-NEXT: kmovw %k6, %eax ## encoding: [0xc5,0xf8,0x93,0xc6]
; CHECK-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x04]
Modified: llvm/trunk/test/CodeGen/X86/buildvec-insertvec.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/buildvec-insertvec.ll?rev=294581&r1=294580&r2=294581&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/buildvec-insertvec.ll (original)
+++ llvm/trunk/test/CodeGen/X86/buildvec-insertvec.ll Thu Feb 9 05:50:19 2017
@@ -270,6 +270,7 @@ define <8 x i16> @test_buildvector_v8i16
define <8 x i16> @test_buildvector_v8i16_partial(i16 %a1, i16 %a3, i16 %a4, i16 %a5) {
; CHECK-LABEL: test_buildvector_v8i16_partial:
; CHECK: # BB#0:
+; CHECK-NEXT: pxor %xmm0, %xmm0
; CHECK-NEXT: pinsrw $1, %edi, %xmm0
; CHECK-NEXT: pinsrw $3, %esi, %xmm0
; CHECK-NEXT: pinsrw $4, %edx, %xmm0
@@ -419,6 +420,7 @@ define <16 x i8> @test_buildvector_v16i8
;
; SSE41-LABEL: test_buildvector_v16i8_partial:
; SSE41: # BB#0:
+; SSE41-NEXT: pxor %xmm0, %xmm0
; SSE41-NEXT: pinsrb $2, %edi, %xmm0
; SSE41-NEXT: pinsrb $6, %esi, %xmm0
; SSE41-NEXT: pinsrb $8, %edx, %xmm0
@@ -448,10 +450,9 @@ define <16 x i8> @test_buildvector_v16i8
define <16 x i8> @test_buildvector_v16i8_register_zero(i8 %a0, i8 %a4, i8 %a6, i8 %a8, i8 %a11, i8 %a12, i8 %a15) {
; SSE2-LABEL: test_buildvector_v16i8_register_zero:
; SSE2: # BB#0:
-; SSE2-NEXT: movzbl %dil, %eax
-; SSE2-NEXT: pxor %xmm0, %xmm0
-; SSE2-NEXT: pinsrw $0, %eax, %xmm0
; SSE2-NEXT: movzbl %sil, %eax
+; SSE2-NEXT: movzbl %dil, %esi
+; SSE2-NEXT: movd %esi, %xmm0
; SSE2-NEXT: pinsrw $2, %eax, %xmm0
; SSE2-NEXT: movzbl %dl, %eax
; SSE2-NEXT: pinsrw $3, %eax, %xmm0
Modified: llvm/trunk/test/CodeGen/X86/promote-vec3.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/promote-vec3.ll?rev=294581&r1=294580&r2=294581&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/promote-vec3.ll (original)
+++ llvm/trunk/test/CodeGen/X86/promote-vec3.ll Thu Feb 9 05:50:19 2017
@@ -9,17 +9,16 @@ define <3 x i16> @zext_i8(<3 x i8>) {
; SSE3-LABEL: zext_i8:
; SSE3: # BB#0:
; SSE3-NEXT: movzbl {{[0-9]+}}(%esp), %eax
-; SSE3-NEXT: pxor %xmm0, %xmm0
-; SSE3-NEXT: pxor %xmm1, %xmm1
-; SSE3-NEXT: pinsrw $0, %eax, %xmm1
+; SSE3-NEXT: movd %eax, %xmm0
; SSE3-NEXT: movzbl {{[0-9]+}}(%esp), %eax
-; SSE3-NEXT: pinsrw $1, %eax, %xmm1
+; SSE3-NEXT: pinsrw $1, %eax, %xmm0
; SSE3-NEXT: movzbl {{[0-9]+}}(%esp), %eax
-; SSE3-NEXT: pinsrw $2, %eax, %xmm1
-; SSE3-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
-; SSE3-NEXT: movd %xmm1, %eax
-; SSE3-NEXT: pextrw $2, %xmm1, %edx
-; SSE3-NEXT: pextrw $4, %xmm1, %ecx
+; SSE3-NEXT: pinsrw $2, %eax, %xmm0
+; SSE3-NEXT: pxor %xmm1, %xmm1
+; SSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
+; SSE3-NEXT: movd %xmm0, %eax
+; SSE3-NEXT: pextrw $2, %xmm0, %edx
+; SSE3-NEXT: pextrw $4, %xmm0, %ecx
; SSE3-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
; SSE3-NEXT: # kill: %DX<def> %DX<kill> %EDX<kill>
; SSE3-NEXT: # kill: %CX<def> %CX<kill> %ECX<kill>
@@ -74,7 +73,7 @@ define <3 x i16> @sext_i8(<3 x i8>) {
; SSE3-LABEL: sext_i8:
; SSE3: # BB#0:
; SSE3-NEXT: movzbl {{[0-9]+}}(%esp), %eax
-; SSE3-NEXT: pinsrw $0, %eax, %xmm0
+; SSE3-NEXT: movd %eax, %xmm0
; SSE3-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; SSE3-NEXT: pinsrw $1, %eax, %xmm0
; SSE3-NEXT: movzbl {{[0-9]+}}(%esp), %eax
@@ -93,7 +92,7 @@ define <3 x i16> @sext_i8(<3 x i8>) {
;
; SSE41-LABEL: sext_i8:
; SSE41: # BB#0:
-; SSE41-NEXT: pinsrb $0, {{[0-9]+}}(%esp), %xmm0
+; SSE41-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
; SSE41-NEXT: pinsrb $4, {{[0-9]+}}(%esp), %xmm0
; SSE41-NEXT: pinsrb $8, {{[0-9]+}}(%esp), %xmm0
; SSE41-NEXT: pslld $24, %xmm0
@@ -108,7 +107,7 @@ define <3 x i16> @sext_i8(<3 x i8>) {
;
; AVX-32-LABEL: sext_i8:
; AVX-32: # BB#0:
-; AVX-32-NEXT: vpinsrb $0, {{[0-9]+}}(%esp), %xmm0, %xmm0
+; AVX-32-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
; AVX-32-NEXT: vpinsrb $4, {{[0-9]+}}(%esp), %xmm0, %xmm0
; AVX-32-NEXT: vpinsrb $8, {{[0-9]+}}(%esp), %xmm0, %xmm0
; AVX-32-NEXT: vpslld $24, %xmm0, %xmm0
Modified: llvm/trunk/test/CodeGen/X86/shuffle-vs-trunc-512.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/shuffle-vs-trunc-512.ll?rev=294581&r1=294580&r2=294581&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/shuffle-vs-trunc-512.ll (original)
+++ llvm/trunk/test/CodeGen/X86/shuffle-vs-trunc-512.ll Thu Feb 9 05:50:19 2017
@@ -482,7 +482,7 @@ define void @shuffle_v64i8_to_v8i8(<64 x
; AVX512BW-NEXT: vpextrb $0, %xmm1, %ecx
; AVX512BW-NEXT: vpextrb $8, %xmm0, %edx
; AVX512BW-NEXT: vpextrb $0, %xmm0, %edi
-; AVX512BW-NEXT: vpinsrb $0, %edi, %xmm0, %xmm0
+; AVX512BW-NEXT: vmovd %edi, %xmm0
; AVX512BW-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0
; AVX512BW-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0
; AVX512BW-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0
@@ -496,9 +496,9 @@ define void @shuffle_v64i8_to_v8i8(<64 x
; AVX512BWVL-LABEL: shuffle_v64i8_to_v8i8:
; AVX512BWVL: # BB#0:
; AVX512BWVL-NEXT: vmovdqu8 (%rdi), %zmm0
-; AVX512BWVL-NEXT: vpextrb $0, %xmm0, %eax
-; AVX512BWVL-NEXT: vpinsrb $0, %eax, %xmm0, %xmm1
; AVX512BWVL-NEXT: vpextrb $8, %xmm0, %eax
+; AVX512BWVL-NEXT: vpextrb $0, %xmm0, %ecx
+; AVX512BWVL-NEXT: vmovd %ecx, %xmm1
; AVX512BWVL-NEXT: vpinsrb $2, %eax, %xmm1, %xmm1
; AVX512BWVL-NEXT: vextracti32x4 $1, %zmm0, %xmm2
; AVX512BWVL-NEXT: vpextrb $0, %xmm2, %eax
Modified: llvm/trunk/test/CodeGen/X86/vec_cast2.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vec_cast2.ll?rev=294581&r1=294580&r2=294581&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vec_cast2.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vec_cast2.ll Thu Feb 9 05:50:19 2017
@@ -97,10 +97,10 @@ define <8 x i8> @foo3_8(<8 x float> %src
;
; CHECK-WIDE-LABEL: foo3_8:
; CHECK-WIDE: ## BB#0:
-; CHECK-WIDE-NEXT: vcvttss2si %xmm0, %eax
-; CHECK-WIDE-NEXT: vpinsrb $0, %eax, %xmm0, %xmm1
-; CHECK-WIDE-NEXT: vmovshdup {{.*#+}} xmm2 = xmm0[1,1,3,3]
-; CHECK-WIDE-NEXT: vcvttss2si %xmm2, %eax
+; CHECK-WIDE-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
+; CHECK-WIDE-NEXT: vcvttss2si %xmm1, %eax
+; CHECK-WIDE-NEXT: vcvttss2si %xmm0, %ecx
+; CHECK-WIDE-NEXT: vmovd %ecx, %xmm1
; CHECK-WIDE-NEXT: vpinsrb $1, %eax, %xmm1, %xmm1
; CHECK-WIDE-NEXT: vpermilpd {{.*#+}} xmm2 = xmm0[1,0]
; CHECK-WIDE-NEXT: vcvttss2si %xmm2, %eax
@@ -134,10 +134,10 @@ define <4 x i8> @foo3_4(<4 x float> %src
;
; CHECK-WIDE-LABEL: foo3_4:
; CHECK-WIDE: ## BB#0:
-; CHECK-WIDE-NEXT: vcvttss2si %xmm0, %eax
-; CHECK-WIDE-NEXT: vpinsrb $0, %eax, %xmm0, %xmm1
-; CHECK-WIDE-NEXT: vmovshdup {{.*#+}} xmm2 = xmm0[1,1,3,3]
-; CHECK-WIDE-NEXT: vcvttss2si %xmm2, %eax
+; CHECK-WIDE-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
+; CHECK-WIDE-NEXT: vcvttss2si %xmm1, %eax
+; CHECK-WIDE-NEXT: vcvttss2si %xmm0, %ecx
+; CHECK-WIDE-NEXT: vmovd %ecx, %xmm1
; CHECK-WIDE-NEXT: vpinsrb $1, %eax, %xmm1, %xmm1
; CHECK-WIDE-NEXT: vpermilpd {{.*#+}} xmm2 = xmm0[1,0]
; CHECK-WIDE-NEXT: vcvttss2si %xmm2, %eax
Modified: llvm/trunk/test/CodeGen/X86/vector-shuffle-128-v16.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vector-shuffle-128-v16.ll?rev=294581&r1=294580&r2=294581&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vector-shuffle-128-v16.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vector-shuffle-128-v16.ll Thu Feb 9 05:50:19 2017
@@ -1718,17 +1718,17 @@ define <16 x i8> @PR31364(i8* nocapture
; SSE2-NEXT: movzbl (%rsi), %ecx
; SSE2-NEXT: shll $8, %ecx
; SSE2-NEXT: orl %eax, %ecx
-; SSE2-NEXT: pxor %xmm0, %xmm0
+; SSE2-NEXT: movzwl %cx, %eax
+; SSE2-NEXT: movd %eax, %xmm0
; SSE2-NEXT: pxor %xmm1, %xmm1
-; SSE2-NEXT: pinsrw $0, %ecx, %xmm1
-; SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
-; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[0,1,0,3]
-; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[1,1,1,1,4,5,6,7]
-; SSE2-NEXT: pshufhw {{.*#+}} xmm2 = xmm0[0,1,2,3,5,4,4,4]
-; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[0,1,0,1]
+; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
+; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,1,0,3]
+; SSE2-NEXT: pshuflw {{.*#+}} xmm1 = xmm1[1,1,1,1,4,5,6,7]
+; SSE2-NEXT: pshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,5,4,4,4]
+; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[1,1,1,1,4,5,6,7]
; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,5,5,5,7]
-; SSE2-NEXT: packuswb %xmm2, %xmm0
+; SSE2-NEXT: packuswb %xmm1, %xmm0
; SSE2-NEXT: retq
;
; SSSE3-LABEL: PR31364:
@@ -1737,8 +1737,8 @@ define <16 x i8> @PR31364(i8* nocapture
; SSSE3-NEXT: movzbl (%rsi), %ecx
; SSSE3-NEXT: shll $8, %ecx
; SSSE3-NEXT: orl %eax, %ecx
-; SSSE3-NEXT: pxor %xmm0, %xmm0
-; SSSE3-NEXT: pinsrw $0, %ecx, %xmm0
+; SSSE3-NEXT: movzwl %cx, %eax
+; SSSE3-NEXT: movd %eax, %xmm0
; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[1,1,1,1,1,1,1],zero,xmm0[1,1,1,1,1,0,0,0]
; SSSE3-NEXT: retq
;
Modified: llvm/trunk/test/CodeGen/X86/widen_bitops-0.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/widen_bitops-0.ll?rev=294581&r1=294580&r2=294581&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/widen_bitops-0.ll (original)
+++ llvm/trunk/test/CodeGen/X86/widen_bitops-0.ll Thu Feb 9 05:50:19 2017
@@ -131,10 +131,10 @@ define i24 @or_i24_as_v8i3(i24 %a, i24 %
define <3 x i8> @and_v3i8_as_i24(<3 x i8> %a, <3 x i8> %b) nounwind {
; X32-SSE-LABEL: and_v3i8_as_i24:
; X32-SSE: # BB#0:
-; X32-SSE-NEXT: pinsrb $0, {{[0-9]+}}(%esp), %xmm0
+; X32-SSE-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
; X32-SSE-NEXT: pinsrb $4, {{[0-9]+}}(%esp), %xmm0
; X32-SSE-NEXT: pinsrb $8, {{[0-9]+}}(%esp), %xmm0
-; X32-SSE-NEXT: pinsrb $0, {{[0-9]+}}(%esp), %xmm1
+; X32-SSE-NEXT: movd {{.*#+}} xmm1 = mem[0],zero,zero,zero
; X32-SSE-NEXT: pinsrb $4, {{[0-9]+}}(%esp), %xmm1
; X32-SSE-NEXT: pinsrb $8, {{[0-9]+}}(%esp), %xmm1
; X32-SSE-NEXT: pand %xmm0, %xmm1
@@ -172,10 +172,10 @@ define <3 x i8> @and_v3i8_as_i24(<3 x i8
define <3 x i8> @xor_v3i8_as_i24(<3 x i8> %a, <3 x i8> %b) nounwind {
; X32-SSE-LABEL: xor_v3i8_as_i24:
; X32-SSE: # BB#0:
-; X32-SSE-NEXT: pinsrb $0, {{[0-9]+}}(%esp), %xmm0
+; X32-SSE-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
; X32-SSE-NEXT: pinsrb $4, {{[0-9]+}}(%esp), %xmm0
; X32-SSE-NEXT: pinsrb $8, {{[0-9]+}}(%esp), %xmm0
-; X32-SSE-NEXT: pinsrb $0, {{[0-9]+}}(%esp), %xmm1
+; X32-SSE-NEXT: movd {{.*#+}} xmm1 = mem[0],zero,zero,zero
; X32-SSE-NEXT: pinsrb $4, {{[0-9]+}}(%esp), %xmm1
; X32-SSE-NEXT: pinsrb $8, {{[0-9]+}}(%esp), %xmm1
; X32-SSE-NEXT: pxor %xmm0, %xmm1
@@ -213,10 +213,10 @@ define <3 x i8> @xor_v3i8_as_i24(<3 x i8
define <3 x i8> @or_v3i8_as_i24(<3 x i8> %a, <3 x i8> %b) nounwind {
; X32-SSE-LABEL: or_v3i8_as_i24:
; X32-SSE: # BB#0:
-; X32-SSE-NEXT: pinsrb $0, {{[0-9]+}}(%esp), %xmm0
+; X32-SSE-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
; X32-SSE-NEXT: pinsrb $4, {{[0-9]+}}(%esp), %xmm0
; X32-SSE-NEXT: pinsrb $8, {{[0-9]+}}(%esp), %xmm0
-; X32-SSE-NEXT: pinsrb $0, {{[0-9]+}}(%esp), %xmm1
+; X32-SSE-NEXT: movd {{.*#+}} xmm1 = mem[0],zero,zero,zero
; X32-SSE-NEXT: pinsrb $4, {{[0-9]+}}(%esp), %xmm1
; X32-SSE-NEXT: pinsrb $8, {{[0-9]+}}(%esp), %xmm1
; X32-SSE-NEXT: por %xmm0, %xmm1
Modified: llvm/trunk/test/CodeGen/X86/widen_conv-3.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/widen_conv-3.ll?rev=294581&r1=294580&r2=294581&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/widen_conv-3.ll (original)
+++ llvm/trunk/test/CodeGen/X86/widen_conv-3.ll Thu Feb 9 05:50:19 2017
@@ -65,7 +65,7 @@ define void @convert_v3i8_to_v3f32(<3 x
; X86-SSE2-NEXT: shll $8, %edx
; X86-SSE2-NEXT: movzbl (%esp), %esi
; X86-SSE2-NEXT: orl %edx, %esi
-; X86-SSE2-NEXT: pinsrw $0, %esi, %xmm0
+; X86-SSE2-NEXT: movd %esi, %xmm0
; X86-SSE2-NEXT: movzbl 2(%ecx), %ecx
; X86-SSE2-NEXT: pinsrw $1, %ecx, %xmm0
; X86-SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
@@ -115,7 +115,7 @@ define void @convert_v3i8_to_v3f32(<3 x
; X64-SSE2-NEXT: shll $8, %eax
; X64-SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx
; X64-SSE2-NEXT: orl %eax, %ecx
-; X64-SSE2-NEXT: pinsrw $0, %ecx, %xmm0
+; X64-SSE2-NEXT: movd %ecx, %xmm0
; X64-SSE2-NEXT: movzbl 2(%rsi), %eax
; X64-SSE2-NEXT: pinsrw $1, %eax, %xmm0
; X64-SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
Modified: llvm/trunk/test/CodeGen/X86/widen_conv-4.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/widen_conv-4.ll?rev=294581&r1=294580&r2=294581&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/widen_conv-4.ll (original)
+++ llvm/trunk/test/CodeGen/X86/widen_conv-4.ll Thu Feb 9 05:50:19 2017
@@ -91,7 +91,7 @@ define void @convert_v3i8_to_v3f32(<3 x
; X86-SSE2-NEXT: shll $8, %edx
; X86-SSE2-NEXT: movzbl (%esp), %esi
; X86-SSE2-NEXT: orl %edx, %esi
-; X86-SSE2-NEXT: pinsrw $0, %esi, %xmm0
+; X86-SSE2-NEXT: movd %esi, %xmm0
; X86-SSE2-NEXT: movzbl 2(%ecx), %ecx
; X86-SSE2-NEXT: pinsrw $1, %ecx, %xmm0
; X86-SSE2-NEXT: pxor %xmm1, %xmm1
@@ -140,7 +140,7 @@ define void @convert_v3i8_to_v3f32(<3 x
; X64-SSE2-NEXT: shll $8, %eax
; X64-SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx
; X64-SSE2-NEXT: orl %eax, %ecx
-; X64-SSE2-NEXT: pinsrw $0, %ecx, %xmm0
+; X64-SSE2-NEXT: movd %ecx, %xmm0
; X64-SSE2-NEXT: movzbl 2(%rsi), %eax
; X64-SSE2-NEXT: pinsrw $1, %eax, %xmm0
; X64-SSE2-NEXT: pxor %xmm1, %xmm1
More information about the llvm-commits
mailing list