[llvm] 4e21140 - [X86] combineVectorShiftImm - fold (shift (logic X, C2), C1) -> (logic (shift X, C1), (shift C2, C1))
Simon Pilgrim via llvm-commits
llvm-commits at lists.llvm.org
Thu Mar 30 04:47:37 PDT 2023
Author: Simon Pilgrim
Date: 2023-03-30T12:47:27+01:00
New Revision: 4e21140465caad9bf666e02a2396b2acb961e313
URL: https://github.com/llvm/llvm-project/commit/4e21140465caad9bf666e02a2396b2acb961e313
DIFF: https://github.com/llvm/llvm-project/commit/4e21140465caad9bf666e02a2396b2acb961e313.diff
LOG: [X86] combineVectorShiftImm - fold (shift (logic X, C2), C1) -> (logic (shift X, C1), (shift C2, C1))
Helps expose a number of cases that we could/should reassociate - if only the vector constants hadn't already been lowered :(
It also forms a number of additional VPTERNLOG nodes, bringing together logic ops that had been stuck on either side of a shift op
Added:
Modified:
llvm/lib/Target/X86/X86ISelLowering.cpp
llvm/test/CodeGen/X86/combine-rotates.ll
llvm/test/CodeGen/X86/combine-sdiv.ll
llvm/test/CodeGen/X86/combine-shl.ll
llvm/test/CodeGen/X86/combine-srl.ll
llvm/test/CodeGen/X86/dpbusd_i4.ll
llvm/test/CodeGen/X86/funnel-shift-rot.ll
llvm/test/CodeGen/X86/selectcc-to-shiftand.ll
llvm/test/CodeGen/X86/shrink-const.ll
llvm/test/CodeGen/X86/vector-fshl-rot-128.ll
llvm/test/CodeGen/X86/vector-fshl-rot-sub128.ll
llvm/test/CodeGen/X86/vector-fshr-rot-128.ll
llvm/test/CodeGen/X86/vector-fshr-rot-sub128.ll
llvm/test/CodeGen/X86/vector-rotate-128.ll
llvm/test/CodeGen/X86/vector-shuffle-128-v16.ll
llvm/test/CodeGen/X86/vector_splat-const-shift-of-constmasked.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 8db9372f9ab40..2a8a7d14aca89 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -48847,11 +48847,11 @@ static SDValue combineVectorShiftImm(SDNode *N, SelectionDAG &DAG,
bool LogicalShift = X86ISD::VSHLI == Opcode || X86ISD::VSRLI == Opcode;
EVT VT = N->getValueType(0);
SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
unsigned NumBitsPerElt = VT.getScalarSizeInBits();
assert(VT == N0.getValueType() && (NumBitsPerElt % 8) == 0 &&
"Unexpected value type");
- assert(N->getOperand(1).getValueType() == MVT::i8 &&
- "Unexpected shift amount type");
+ assert(N1.getValueType() == MVT::i8 && "Unexpected shift amount type");
// (shift undef, X) -> 0
if (N0.isUndef())
@@ -48911,11 +48911,11 @@ static SDValue combineVectorShiftImm(SDNode *N, SelectionDAG &DAG,
return Res;
}
- // Constant Folding.
- APInt UndefElts;
- SmallVector<APInt, 32> EltBits;
- if (N->isOnlyUserOf(N0.getNode()) &&
- getTargetConstantBitsFromNode(N0, NumBitsPerElt, UndefElts, EltBits)) {
+ auto TryConstantFold = [&](SDValue V) {
+ APInt UndefElts;
+ SmallVector<APInt, 32> EltBits;
+ if (!getTargetConstantBitsFromNode(V, NumBitsPerElt, UndefElts, EltBits))
+ return SDValue();
assert(EltBits.size() == VT.getVectorNumElements() &&
"Unexpected shift value type");
// Undef elements need to fold to 0. It's possible SimplifyDemandedBits
@@ -48935,6 +48935,26 @@ static SDValue combineVectorShiftImm(SDNode *N, SelectionDAG &DAG,
// Reset undef elements since they were zeroed above.
UndefElts = 0;
return getConstVector(EltBits, UndefElts, VT.getSimpleVT(), DAG, SDLoc(N));
+ };
+
+ // Constant Folding.
+ if (N->isOnlyUserOf(N0.getNode())) {
+ if (SDValue C = TryConstantFold(N0))
+ return C;
+
+ // Fold (shift (logic X, C2), C1) -> (logic (shift X, C1), (shift C2, C1))
+ // Don't break NOT patterns.
+ SDValue BC = peekThroughOneUseBitcasts(N0);
+ if (ISD::isBitwiseLogicOp(BC.getOpcode()) &&
+ BC->isOnlyUserOf(BC.getOperand(1).getNode()) &&
+ !ISD::isBuildVectorAllOnes(BC.getOperand(1).getNode())) {
+ if (SDValue RHS = TryConstantFold(BC.getOperand(1))) {
+ SDLoc DL(N);
+ SDValue LHS = DAG.getNode(Opcode, DL, VT,
+ DAG.getBitcast(VT, BC.getOperand(0)), N1);
+ return DAG.getNode(BC.getOpcode(), DL, VT, LHS, RHS);
+ }
+ }
}
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
diff --git a/llvm/test/CodeGen/X86/combine-rotates.ll b/llvm/test/CodeGen/X86/combine-rotates.ll
index dee22d10d8fcb..09e97dbe39711 100644
--- a/llvm/test/CodeGen/X86/combine-rotates.ll
+++ b/llvm/test/CodeGen/X86/combine-rotates.ll
@@ -117,26 +117,25 @@ define <4 x i32> @combine_vec_rot_select_zero(<4 x i32>, <4 x i32>) {
; SSE2-LABEL: combine_vec_rot_select_zero:
; SSE2: # %bb.0:
; SSE2-NEXT: pxor %xmm2, %xmm2
-; SSE2-NEXT: movdqa {{.*#+}} xmm3 = [31,31,31,31]
-; SSE2-NEXT: pand %xmm1, %xmm3
-; SSE2-NEXT: pslld $23, %xmm3
-; SSE2-NEXT: paddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3
-; SSE2-NEXT: cvttps2dq %xmm3, %xmm3
-; SSE2-NEXT: movdqa %xmm0, %xmm4
-; SSE2-NEXT: pmuludq %xmm3, %xmm4
-; SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm4[1,3,2,3]
-; SSE2-NEXT: pshufd {{.*#+}} xmm6 = xmm0[1,1,3,3]
-; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm3[1,1,3,3]
-; SSE2-NEXT: pmuludq %xmm6, %xmm3
-; SSE2-NEXT: pshufd {{.*#+}} xmm6 = xmm3[1,3,2,3]
-; SSE2-NEXT: punpckldq {{.*#+}} xmm5 = xmm5[0],xmm6[0],xmm5[1],xmm6[1]
-; SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm4[0,2,2,3]
-; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm3[0,2,2,3]
-; SSE2-NEXT: punpckldq {{.*#+}} xmm4 = xmm4[0],xmm3[0],xmm4[1],xmm3[1]
-; SSE2-NEXT: por %xmm5, %xmm4
; SSE2-NEXT: pcmpeqd %xmm1, %xmm2
+; SSE2-NEXT: pslld $23, %xmm1
+; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
+; SSE2-NEXT: paddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
+; SSE2-NEXT: cvttps2dq %xmm1, %xmm1
+; SSE2-NEXT: movdqa %xmm0, %xmm3
+; SSE2-NEXT: pmuludq %xmm1, %xmm3
+; SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm3[1,3,2,3]
+; SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm0[1,1,3,3]
+; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
+; SSE2-NEXT: pmuludq %xmm5, %xmm1
+; SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm1[1,3,2,3]
+; SSE2-NEXT: punpckldq {{.*#+}} xmm4 = xmm4[0],xmm5[0],xmm4[1],xmm5[1]
+; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm3[0,2,2,3]
+; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
+; SSE2-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm1[0],xmm3[1],xmm1[1]
+; SSE2-NEXT: por %xmm4, %xmm3
; SSE2-NEXT: pand %xmm2, %xmm0
-; SSE2-NEXT: pandn %xmm4, %xmm2
+; SSE2-NEXT: pandn %xmm3, %xmm2
; SSE2-NEXT: por %xmm2, %xmm0
; SSE2-NEXT: retq
;
@@ -183,8 +182,8 @@ define <4 x i32> @combine_vec_rot_select_zero(<4 x i32>, <4 x i32>) {
define <4 x i32> @rotate_demanded_bits(<4 x i32>, <4 x i32>) {
; SSE2-LABEL: rotate_demanded_bits:
; SSE2: # %bb.0:
-; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
; SSE2-NEXT: pslld $23, %xmm1
+; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
; SSE2-NEXT: paddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
; SSE2-NEXT: cvttps2dq %xmm1, %xmm1
; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,1,3,3]
@@ -234,8 +233,8 @@ define <4 x i32> @rotate_demanded_bits(<4 x i32>, <4 x i32>) {
define <4 x i32> @rotate_demanded_bits_2(<4 x i32>, <4 x i32>) {
; SSE2-LABEL: rotate_demanded_bits_2:
; SSE2: # %bb.0:
-; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
; SSE2-NEXT: pslld $23, %xmm1
+; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
; SSE2-NEXT: paddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
; SSE2-NEXT: cvttps2dq %xmm1, %xmm1
; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,1,3,3]
@@ -285,9 +284,8 @@ define <4 x i32> @rotate_demanded_bits_2(<4 x i32>, <4 x i32>) {
define <4 x i32> @rotate_demanded_bits_3(<4 x i32>, <4 x i32>) {
; SSE2-LABEL: rotate_demanded_bits_3:
; SSE2: # %bb.0:
-; SSE2-NEXT: paddd %xmm1, %xmm1
+; SSE2-NEXT: pslld $24, %xmm1
; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
-; SSE2-NEXT: pslld $23, %xmm1
; SSE2-NEXT: paddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
; SSE2-NEXT: cvttps2dq %xmm1, %xmm1
; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,1,3,3]
@@ -339,8 +337,8 @@ define <4 x i32> @rotate_demanded_bits_3(<4 x i32>, <4 x i32>) {
define <4 x i32> @rotl_binop_shuffle(<4 x i32>, <4 x i32>) {
; SSE2-LABEL: rotl_binop_shuffle:
; SSE2: # %bb.0:
-; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
; SSE2-NEXT: pslld $23, %xmm1
+; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
; SSE2-NEXT: paddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
; SSE2-NEXT: cvttps2dq %xmm1, %xmm1
; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,1,3,3]
diff --git a/llvm/test/CodeGen/X86/combine-sdiv.ll b/llvm/test/CodeGen/X86/combine-sdiv.ll
index d9a6b6d9ff8a2..c8bee6bdcca52 100644
--- a/llvm/test/CodeGen/X86/combine-sdiv.ll
+++ b/llvm/test/CodeGen/X86/combine-sdiv.ll
@@ -169,15 +169,41 @@ define <4 x i32> @combine_vec_sdiv_dupe(<4 x i32> %x) {
define <4 x i32> @combine_vec_sdiv_by_pos0(<4 x i32> %x) {
; SSE-LABEL: combine_vec_sdiv_by_pos0:
; SSE: # %bb.0:
-; SSE-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; SSE-NEXT: psrld $2, %xmm0
+; SSE-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; SSE-NEXT: retq
;
-; AVX-LABEL: combine_vec_sdiv_by_pos0:
-; AVX: # %bb.0:
-; AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
-; AVX-NEXT: vpsrld $2, %xmm0, %xmm0
-; AVX-NEXT: retq
+; AVX1-LABEL: combine_vec_sdiv_by_pos0:
+; AVX1: # %bb.0:
+; AVX1-NEXT: vpsrld $2, %xmm0, %xmm0
+; AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: combine_vec_sdiv_by_pos0:
+; AVX2: # %bb.0:
+; AVX2-NEXT: vpsrld $2, %xmm0, %xmm0
+; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [63,63,63,63]
+; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
+; AVX2-NEXT: retq
+;
+; AVX512F-LABEL: combine_vec_sdiv_by_pos0:
+; AVX512F: # %bb.0:
+; AVX512F-NEXT: vpsrld $2, %xmm0, %xmm0
+; AVX512F-NEXT: vpbroadcastd {{.*#+}} xmm1 = [63,63,63,63]
+; AVX512F-NEXT: vpand %xmm1, %xmm0, %xmm0
+; AVX512F-NEXT: retq
+;
+; AVX512BW-LABEL: combine_vec_sdiv_by_pos0:
+; AVX512BW: # %bb.0:
+; AVX512BW-NEXT: vpsrld $2, %xmm0, %xmm0
+; AVX512BW-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm0
+; AVX512BW-NEXT: retq
+;
+; XOP-LABEL: combine_vec_sdiv_by_pos0:
+; XOP: # %bb.0:
+; XOP-NEXT: vpsrld $2, %xmm0, %xmm0
+; XOP-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; XOP-NEXT: retq
%1 = and <4 x i32> %x, <i32 255, i32 255, i32 255, i32 255>
%2 = sdiv <4 x i32> %1, <i32 4, i32 4, i32 4, i32 4>
ret <4 x i32> %2
diff --git a/llvm/test/CodeGen/X86/combine-shl.ll b/llvm/test/CodeGen/X86/combine-shl.ll
index 11159148cb335..e443e8472f31f 100644
--- a/llvm/test/CodeGen/X86/combine-shl.ll
+++ b/llvm/test/CodeGen/X86/combine-shl.ll
@@ -114,8 +114,8 @@ define <4 x i32> @combine_vec_shl_trunc_and(<4 x i32> %x, <4 x i64> %y) {
; SSE2-LABEL: combine_vec_shl_trunc_and:
; SSE2: # %bb.0:
; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,2],xmm2[0,2]
-; SSE2-NEXT: andps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
; SSE2-NEXT: pslld $23, %xmm1
+; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
; SSE2-NEXT: paddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
; SSE2-NEXT: cvttps2dq %xmm1, %xmm1
; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,1,3,3]
@@ -130,8 +130,8 @@ define <4 x i32> @combine_vec_shl_trunc_and(<4 x i32> %x, <4 x i64> %y) {
; SSE41-LABEL: combine_vec_shl_trunc_and:
; SSE41: # %bb.0:
; SSE41-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,2],xmm2[0,2]
-; SSE41-NEXT: andps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
; SSE41-NEXT: pslld $23, %xmm1
+; SSE41-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
; SSE41-NEXT: paddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
; SSE41-NEXT: cvttps2dq %xmm1, %xmm1
; SSE41-NEXT: pmulld %xmm1, %xmm0
diff --git a/llvm/test/CodeGen/X86/combine-srl.ll b/llvm/test/CodeGen/X86/combine-srl.ll
index 22ba979684916..7bd2683286ab7 100644
--- a/llvm/test/CodeGen/X86/combine-srl.ll
+++ b/llvm/test/CodeGen/X86/combine-srl.ll
@@ -323,18 +323,15 @@ define <4 x i32> @combine_vec_lshr_ashr_sign(<4 x i32> %x, <4 x i32> %y) {
define <4 x i32> @combine_vec_lshr_lzcnt_bit0(<4 x i32> %x) {
; SSE-LABEL: combine_vec_lshr_lzcnt_bit0:
; SSE: # %bb.0:
-; SSE-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; SSE-NEXT: psrld $4, %xmm0
-; SSE-NEXT: pxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; SSE-NEXT: pandn {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; SSE-NEXT: retq
;
; AVX-LABEL: combine_vec_lshr_lzcnt_bit0:
; AVX: # %bb.0:
-; AVX-NEXT: vpbroadcastd {{.*#+}} xmm1 = [16,16,16,16]
-; AVX-NEXT: vpand %xmm1, %xmm0, %xmm0
; AVX-NEXT: vpsrld $4, %xmm0, %xmm0
; AVX-NEXT: vpbroadcastd {{.*#+}} xmm1 = [1,1,1,1]
-; AVX-NEXT: vpxor %xmm1, %xmm0, %xmm0
+; AVX-NEXT: vpandn %xmm1, %xmm0, %xmm0
; AVX-NEXT: retq
%1 = and <4 x i32> %x, <i32 16, i32 16, i32 16, i32 16>
%2 = call <4 x i32> @llvm.ctlz.v4i32(<4 x i32> %1, i1 0)
diff --git a/llvm/test/CodeGen/X86/dpbusd_i4.ll b/llvm/test/CodeGen/X86/dpbusd_i4.ll
index 63b58cf41a52f..1c045344dcd0a 100644
--- a/llvm/test/CodeGen/X86/dpbusd_i4.ll
+++ b/llvm/test/CodeGen/X86/dpbusd_i4.ll
@@ -52,10 +52,9 @@ define i32 @mul_i4i4(<16 x i4> %a, <16 x i4> %b, i32 %c) {
; CHECK-LABEL: mul_i4i4:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vpsllw $4, %xmm1, %xmm1
-; CHECK-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
; CHECK-NEXT: vpsrlw $4, %xmm1, %xmm1
; CHECK-NEXT: vmovdqa {{.*#+}} xmm2 = [8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8]
-; CHECK-NEXT: vpxor %xmm2, %xmm1, %xmm1
+; CHECK-NEXT: vpternlogq $108, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm1
; CHECK-NEXT: vpsubb %xmm2, %xmm1, %xmm1
; CHECK-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
diff --git a/llvm/test/CodeGen/X86/funnel-shift-rot.ll b/llvm/test/CodeGen/X86/funnel-shift-rot.ll
index 79870de6a2589..7d106fce44555 100644
--- a/llvm/test/CodeGen/X86/funnel-shift-rot.ll
+++ b/llvm/test/CodeGen/X86/funnel-shift-rot.ll
@@ -130,8 +130,8 @@ define i32 @rotl_i32(i32 %x, i32 %z) nounwind {
define <4 x i32> @rotl_v4i32(<4 x i32> %x, <4 x i32> %z) nounwind {
; X86-SSE2-LABEL: rotl_v4i32:
; X86-SSE2: # %bb.0:
-; X86-SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1
; X86-SSE2-NEXT: pslld $23, %xmm1
+; X86-SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1
; X86-SSE2-NEXT: paddd {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1
; X86-SSE2-NEXT: cvttps2dq %xmm1, %xmm1
; X86-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,1,3,3]
@@ -309,8 +309,8 @@ define <4 x i32> @rotr_v4i32(<4 x i32> %x, <4 x i32> %z) nounwind {
; X86-SSE2: # %bb.0:
; X86-SSE2-NEXT: pxor %xmm2, %xmm2
; X86-SSE2-NEXT: psubd %xmm1, %xmm2
-; X86-SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm2
; X86-SSE2-NEXT: pslld $23, %xmm2
+; X86-SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm2
; X86-SSE2-NEXT: paddd {{\.?LCPI[0-9]+_[0-9]+}}, %xmm2
; X86-SSE2-NEXT: cvttps2dq %xmm2, %xmm1
; X86-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,1,3,3]
diff --git a/llvm/test/CodeGen/X86/selectcc-to-shiftand.ll b/llvm/test/CodeGen/X86/selectcc-to-shiftand.ll
index c1e182460c08f..220b7dc46dd94 100644
--- a/llvm/test/CodeGen/X86/selectcc-to-shiftand.ll
+++ b/llvm/test/CodeGen/X86/selectcc-to-shiftand.ll
@@ -213,8 +213,8 @@ define <16 x i8> @sel_shift_bool_v16i8(<16 x i1> %t) {
define <8 x i16> @sel_shift_bool_v8i16(<8 x i1> %t) {
; ANY-LABEL: sel_shift_bool_v8i16:
; ANY: # %bb.0:
-; ANY-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; ANY-NEXT: psllw $7, %xmm0
+; ANY-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; ANY-NEXT: retq
%shl= select <8 x i1> %t, <8 x i16> <i16 128, i16 128, i16 128, i16 128, i16 128, i16 128, i16 128, i16 128>, <8 x i16> zeroinitializer
ret <8 x i16> %shl
@@ -223,8 +223,8 @@ define <8 x i16> @sel_shift_bool_v8i16(<8 x i1> %t) {
define <4 x i32> @sel_shift_bool_v4i32(<4 x i1> %t) {
; ANY-LABEL: sel_shift_bool_v4i32:
; ANY: # %bb.0:
-; ANY-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; ANY-NEXT: pslld $6, %xmm0
+; ANY-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; ANY-NEXT: retq
%shl = select <4 x i1> %t, <4 x i32> <i32 64, i32 64, i32 64, i32 64>, <4 x i32> zeroinitializer
ret <4 x i32> %shl
@@ -233,8 +233,8 @@ define <4 x i32> @sel_shift_bool_v4i32(<4 x i1> %t) {
define <2 x i64> @sel_shift_bool_v2i64(<2 x i1> %t) {
; ANY-LABEL: sel_shift_bool_v2i64:
; ANY: # %bb.0:
-; ANY-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; ANY-NEXT: psllq $16, %xmm0
+; ANY-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; ANY-NEXT: retq
%shl = select <2 x i1> %t, <2 x i64> <i64 65536, i64 65536>, <2 x i64> zeroinitializer
ret <2 x i64> %shl
diff --git a/llvm/test/CodeGen/X86/shrink-const.ll b/llvm/test/CodeGen/X86/shrink-const.ll
index 1f4929c517081..a071e543e0dde 100644
--- a/llvm/test/CodeGen/X86/shrink-const.ll
+++ b/llvm/test/CodeGen/X86/shrink-const.ll
@@ -7,15 +7,15 @@ define <4 x i32> @sext_vector_constants(<4 x i32> %a0) {
; SSE-LABEL: sext_vector_constants:
; SSE: # %bb.0:
; SSE-NEXT: psrld $9, %xmm0
-; SSE-NEXT: pxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; SSE-NEXT: pslld $26, %xmm0
+; SSE-NEXT: pxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; SSE-NEXT: retq
;
; AVX-LABEL: sext_vector_constants:
; AVX: # %bb.0:
; AVX-NEXT: vpsrld $9, %xmm0, %xmm0
-; AVX-NEXT: vpxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
; AVX-NEXT: vpslld $26, %xmm0, %xmm0
+; AVX-NEXT: vpxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
; AVX-NEXT: retq
%1 = lshr <4 x i32> %a0, <i32 9, i32 9, i32 9, i32 9>
%2 = xor <4 x i32> %1, <i32 314523200, i32 -2085372448, i32 144496960, i32 1532773600>
diff --git a/llvm/test/CodeGen/X86/vector-fshl-rot-128.ll b/llvm/test/CodeGen/X86/vector-fshl-rot-128.ll
index df26de9d7b2a6..8db2b7cf3a6b9 100644
--- a/llvm/test/CodeGen/X86/vector-fshl-rot-128.ll
+++ b/llvm/test/CodeGen/X86/vector-fshl-rot-128.ll
@@ -171,8 +171,8 @@ define <2 x i64> @var_funnnel_v2i64(<2 x i64> %x, <2 x i64> %amt) nounwind {
define <4 x i32> @var_funnnel_v4i32(<4 x i32> %x, <4 x i32> %amt) nounwind {
; SSE2-LABEL: var_funnnel_v4i32:
; SSE2: # %bb.0:
-; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
; SSE2-NEXT: pslld $23, %xmm1
+; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
; SSE2-NEXT: paddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
; SSE2-NEXT: cvttps2dq %xmm1, %xmm1
; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,1,3,3]
@@ -191,8 +191,8 @@ define <4 x i32> @var_funnnel_v4i32(<4 x i32> %x, <4 x i32> %amt) nounwind {
; SSE41-LABEL: var_funnnel_v4i32:
; SSE41: # %bb.0:
; SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,1,3,3]
-; SSE41-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
; SSE41-NEXT: pslld $23, %xmm1
+; SSE41-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
; SSE41-NEXT: paddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
; SSE41-NEXT: cvttps2dq %xmm1, %xmm1
; SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm1[1,1,3,3]
@@ -208,8 +208,8 @@ define <4 x i32> @var_funnnel_v4i32(<4 x i32> %x, <4 x i32> %amt) nounwind {
; AVX1-LABEL: var_funnnel_v4i32:
; AVX1: # %bb.0:
; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm0[1,1,3,3]
-; AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
; AVX1-NEXT: vpslld $23, %xmm1, %xmm1
+; AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
; AVX1-NEXT: vpaddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
; AVX1-NEXT: vcvttps2dq %xmm1, %xmm1
; AVX1-NEXT: vpshufd {{.*#+}} xmm3 = xmm1[1,1,3,3]
@@ -282,8 +282,8 @@ define <4 x i32> @var_funnnel_v4i32(<4 x i32> %x, <4 x i32> %amt) nounwind {
;
; X86-SSE2-LABEL: var_funnnel_v4i32:
; X86-SSE2: # %bb.0:
-; X86-SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1
; X86-SSE2-NEXT: pslld $23, %xmm1
+; X86-SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1
; X86-SSE2-NEXT: paddd {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1
; X86-SSE2-NEXT: cvttps2dq %xmm1, %xmm1
; X86-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,1,3,3]
diff --git a/llvm/test/CodeGen/X86/vector-fshl-rot-sub128.ll b/llvm/test/CodeGen/X86/vector-fshl-rot-sub128.ll
index da4de476774ac..537096e48b066 100644
--- a/llvm/test/CodeGen/X86/vector-fshl-rot-sub128.ll
+++ b/llvm/test/CodeGen/X86/vector-fshl-rot-sub128.ll
@@ -24,8 +24,8 @@ declare <2 x i32> @llvm.fshl.v2i32(<2 x i32>, <2 x i32>, <2 x i32>)
define <2 x i32> @var_funnnel_v2i32(<2 x i32> %x, <2 x i32> %amt) nounwind {
; SSE2-LABEL: var_funnnel_v2i32:
; SSE2: # %bb.0:
-; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
; SSE2-NEXT: pslld $23, %xmm1
+; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
; SSE2-NEXT: paddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
; SSE2-NEXT: cvttps2dq %xmm1, %xmm1
; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,1,3,3]
@@ -44,8 +44,8 @@ define <2 x i32> @var_funnnel_v2i32(<2 x i32> %x, <2 x i32> %amt) nounwind {
; SSE41-LABEL: var_funnnel_v2i32:
; SSE41: # %bb.0:
; SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,1,3,3]
-; SSE41-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
; SSE41-NEXT: pslld $23, %xmm1
+; SSE41-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
; SSE41-NEXT: paddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
; SSE41-NEXT: cvttps2dq %xmm1, %xmm1
; SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm1[1,1,3,3]
@@ -61,8 +61,8 @@ define <2 x i32> @var_funnnel_v2i32(<2 x i32> %x, <2 x i32> %amt) nounwind {
; AVX1-LABEL: var_funnnel_v2i32:
; AVX1: # %bb.0:
; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm0[1,1,3,3]
-; AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
; AVX1-NEXT: vpslld $23, %xmm1, %xmm1
+; AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
; AVX1-NEXT: vpaddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
; AVX1-NEXT: vcvttps2dq %xmm1, %xmm1
; AVX1-NEXT: vpshufd {{.*#+}} xmm3 = xmm1[1,1,3,3]
@@ -135,8 +135,8 @@ define <2 x i32> @var_funnnel_v2i32(<2 x i32> %x, <2 x i32> %amt) nounwind {
;
; X86-SSE2-LABEL: var_funnnel_v2i32:
; X86-SSE2: # %bb.0:
-; X86-SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1
; X86-SSE2-NEXT: pslld $23, %xmm1
+; X86-SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1
; X86-SSE2-NEXT: paddd {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1
; X86-SSE2-NEXT: cvttps2dq %xmm1, %xmm1
; X86-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,1,3,3]
diff --git a/llvm/test/CodeGen/X86/vector-fshr-rot-128.ll b/llvm/test/CodeGen/X86/vector-fshr-rot-128.ll
index fa9560059a16e..a5967ed1cca9e 100644
--- a/llvm/test/CodeGen/X86/vector-fshr-rot-128.ll
+++ b/llvm/test/CodeGen/X86/vector-fshr-rot-128.ll
@@ -175,8 +175,8 @@ define <4 x i32> @var_funnnel_v4i32(<4 x i32> %x, <4 x i32> %amt) nounwind {
; SSE2: # %bb.0:
; SSE2-NEXT: pxor %xmm2, %xmm2
; SSE2-NEXT: psubd %xmm1, %xmm2
-; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2
; SSE2-NEXT: pslld $23, %xmm2
+; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2
; SSE2-NEXT: paddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2
; SSE2-NEXT: cvttps2dq %xmm2, %xmm1
; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,1,3,3]
@@ -196,8 +196,8 @@ define <4 x i32> @var_funnnel_v4i32(<4 x i32> %x, <4 x i32> %amt) nounwind {
; SSE41: # %bb.0:
; SSE41-NEXT: pxor %xmm2, %xmm2
; SSE41-NEXT: psubd %xmm1, %xmm2
-; SSE41-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2
; SSE41-NEXT: pslld $23, %xmm2
+; SSE41-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2
; SSE41-NEXT: paddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2
; SSE41-NEXT: cvttps2dq %xmm2, %xmm1
; SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,1,3,3]
@@ -215,8 +215,8 @@ define <4 x i32> @var_funnnel_v4i32(<4 x i32> %x, <4 x i32> %amt) nounwind {
; AVX1: # %bb.0:
; AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2
; AVX1-NEXT: vpsubd %xmm1, %xmm2, %xmm1
-; AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
; AVX1-NEXT: vpslld $23, %xmm1, %xmm1
+; AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
; AVX1-NEXT: vpaddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
; AVX1-NEXT: vcvttps2dq %xmm1, %xmm1
; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm1[1,1,3,3]
@@ -294,8 +294,8 @@ define <4 x i32> @var_funnnel_v4i32(<4 x i32> %x, <4 x i32> %amt) nounwind {
; X86-SSE2: # %bb.0:
; X86-SSE2-NEXT: pxor %xmm2, %xmm2
; X86-SSE2-NEXT: psubd %xmm1, %xmm2
-; X86-SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm2
; X86-SSE2-NEXT: pslld $23, %xmm2
+; X86-SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm2
; X86-SSE2-NEXT: paddd {{\.?LCPI[0-9]+_[0-9]+}}, %xmm2
; X86-SSE2-NEXT: cvttps2dq %xmm2, %xmm1
; X86-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,1,3,3]
diff --git a/llvm/test/CodeGen/X86/vector-fshr-rot-sub128.ll b/llvm/test/CodeGen/X86/vector-fshr-rot-sub128.ll
index a034790707f7b..54acb196f275d 100644
--- a/llvm/test/CodeGen/X86/vector-fshr-rot-sub128.ll
+++ b/llvm/test/CodeGen/X86/vector-fshr-rot-sub128.ll
@@ -26,8 +26,8 @@ define <2 x i32> @var_funnnel_v2i32(<2 x i32> %x, <2 x i32> %amt) nounwind {
; SSE2: # %bb.0:
; SSE2-NEXT: pxor %xmm2, %xmm2
; SSE2-NEXT: psubd %xmm1, %xmm2
-; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2
; SSE2-NEXT: pslld $23, %xmm2
+; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2
; SSE2-NEXT: paddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2
; SSE2-NEXT: cvttps2dq %xmm2, %xmm1
; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,1,3,3]
@@ -47,8 +47,8 @@ define <2 x i32> @var_funnnel_v2i32(<2 x i32> %x, <2 x i32> %amt) nounwind {
; SSE41: # %bb.0:
; SSE41-NEXT: pxor %xmm2, %xmm2
; SSE41-NEXT: psubd %xmm1, %xmm2
-; SSE41-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2
; SSE41-NEXT: pslld $23, %xmm2
+; SSE41-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2
; SSE41-NEXT: paddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2
; SSE41-NEXT: cvttps2dq %xmm2, %xmm1
; SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,1,3,3]
@@ -66,8 +66,8 @@ define <2 x i32> @var_funnnel_v2i32(<2 x i32> %x, <2 x i32> %amt) nounwind {
; AVX1: # %bb.0:
; AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2
; AVX1-NEXT: vpsubd %xmm1, %xmm2, %xmm1
-; AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
; AVX1-NEXT: vpslld $23, %xmm1, %xmm1
+; AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
; AVX1-NEXT: vpaddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
; AVX1-NEXT: vcvttps2dq %xmm1, %xmm1
; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm1[1,1,3,3]
@@ -145,8 +145,8 @@ define <2 x i32> @var_funnnel_v2i32(<2 x i32> %x, <2 x i32> %amt) nounwind {
; X86-SSE2: # %bb.0:
; X86-SSE2-NEXT: pxor %xmm2, %xmm2
; X86-SSE2-NEXT: psubd %xmm1, %xmm2
-; X86-SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm2
; X86-SSE2-NEXT: pslld $23, %xmm2
+; X86-SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm2
; X86-SSE2-NEXT: paddd {{\.?LCPI[0-9]+_[0-9]+}}, %xmm2
; X86-SSE2-NEXT: cvttps2dq %xmm2, %xmm1
; X86-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,1,3,3]
diff --git a/llvm/test/CodeGen/X86/vector-rotate-128.ll b/llvm/test/CodeGen/X86/vector-rotate-128.ll
index 0cd027aa05154..fd165999da2cc 100644
--- a/llvm/test/CodeGen/X86/vector-rotate-128.ll
+++ b/llvm/test/CodeGen/X86/vector-rotate-128.ll
@@ -126,8 +126,8 @@ define <2 x i64> @var_rotate_v2i64(<2 x i64> %a, <2 x i64> %b) nounwind {
define <4 x i32> @var_rotate_v4i32(<4 x i32> %a, <4 x i32> %b) nounwind {
; SSE2-LABEL: var_rotate_v4i32:
; SSE2: # %bb.0:
-; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
; SSE2-NEXT: pslld $23, %xmm1
+; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
; SSE2-NEXT: paddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
; SSE2-NEXT: cvttps2dq %xmm1, %xmm1
; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,1,3,3]
@@ -146,8 +146,8 @@ define <4 x i32> @var_rotate_v4i32(<4 x i32> %a, <4 x i32> %b) nounwind {
; SSE41-LABEL: var_rotate_v4i32:
; SSE41: # %bb.0:
; SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,1,3,3]
-; SSE41-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
; SSE41-NEXT: pslld $23, %xmm1
+; SSE41-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
; SSE41-NEXT: paddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
; SSE41-NEXT: cvttps2dq %xmm1, %xmm1
; SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm1[1,1,3,3]
@@ -163,8 +163,8 @@ define <4 x i32> @var_rotate_v4i32(<4 x i32> %a, <4 x i32> %b) nounwind {
; AVX1-LABEL: var_rotate_v4i32:
; AVX1: # %bb.0:
; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm0[1,1,3,3]
-; AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
; AVX1-NEXT: vpslld $23, %xmm1, %xmm1
+; AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
; AVX1-NEXT: vpaddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
; AVX1-NEXT: vcvttps2dq %xmm1, %xmm1
; AVX1-NEXT: vpshufd {{.*#+}} xmm3 = xmm1[1,1,3,3]
@@ -209,8 +209,8 @@ define <4 x i32> @var_rotate_v4i32(<4 x i32> %a, <4 x i32> %b) nounwind {
;
; X86-SSE2-LABEL: var_rotate_v4i32:
; X86-SSE2: # %bb.0:
-; X86-SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1
; X86-SSE2-NEXT: pslld $23, %xmm1
+; X86-SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1
; X86-SSE2-NEXT: paddd {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1
; X86-SSE2-NEXT: cvttps2dq %xmm1, %xmm1
; X86-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,1,3,3]
diff --git a/llvm/test/CodeGen/X86/vector-shuffle-128-v16.ll b/llvm/test/CodeGen/X86/vector-shuffle-128-v16.ll
index 3fdafe298d740..66ca022f8de20 100644
--- a/llvm/test/CodeGen/X86/vector-shuffle-128-v16.ll
+++ b/llvm/test/CodeGen/X86/vector-shuffle-128-v16.ll
@@ -769,8 +769,8 @@ define <16 x i8> @shuffle_v16i8_02_20_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu(
; SSE2: # %bb.0:
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,1,1]
; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
-; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; SSE2-NEXT: psrlq $16, %xmm0
+; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; SSE2-NEXT: packuswb %xmm0, %xmm0
; SSE2-NEXT: retq
;
diff --git a/llvm/test/CodeGen/X86/vector_splat-const-shift-of-constmasked.ll b/llvm/test/CodeGen/X86/vector_splat-const-shift-of-constmasked.ll
index 06d642e2931ce..144a9e2e5439a 100644
--- a/llvm/test/CodeGen/X86/vector_splat-const-shift-of-constmasked.ll
+++ b/llvm/test/CodeGen/X86/vector_splat-const-shift-of-constmasked.ll
@@ -17,30 +17,30 @@
define <16 x i8> @test_128_i8_x_16_7_mask_lshr_1(<16 x i8> %a0) {
; X86-SSE2-LABEL: test_128_i8_x_16_7_mask_lshr_1:
; X86-SSE2: # %bb.0:
-; X86-SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
; X86-SSE2-NEXT: psrlw $1, %xmm0
; X86-SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
+; X86-SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
; X86-SSE2-NEXT: retl
;
; X86-AVX-LABEL: test_128_i8_x_16_7_mask_lshr_1:
; X86-AVX: # %bb.0:
-; X86-AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0
; X86-AVX-NEXT: vpsrlw $1, %xmm0, %xmm0
; X86-AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0
+; X86-AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0
; X86-AVX-NEXT: retl
;
; X64-SSE2-LABEL: test_128_i8_x_16_7_mask_lshr_1:
; X64-SSE2: # %bb.0:
-; X64-SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; X64-SSE2-NEXT: psrlw $1, %xmm0
; X64-SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; X64-SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; X64-SSE2-NEXT: retq
;
; X64-AVX-LABEL: test_128_i8_x_16_7_mask_lshr_1:
; X64-AVX: # %bb.0:
-; X64-AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
; X64-AVX-NEXT: vpsrlw $1, %xmm0, %xmm0
; X64-AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; X64-AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
; X64-AVX-NEXT: retq
%t0 = and <16 x i8> %a0, <i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7>
%t1 = lshr <16 x i8> %t0, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
@@ -50,26 +50,26 @@ define <16 x i8> @test_128_i8_x_16_7_mask_lshr_1(<16 x i8> %a0) {
define <16 x i8> @test_128_i8_x_16_28_mask_lshr_1(<16 x i8> %a0) {
; X86-SSE2-LABEL: test_128_i8_x_16_28_mask_lshr_1:
; X86-SSE2: # %bb.0:
-; X86-SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
; X86-SSE2-NEXT: psrlw $1, %xmm0
+; X86-SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
; X86-SSE2-NEXT: retl
;
; X86-AVX-LABEL: test_128_i8_x_16_28_mask_lshr_1:
; X86-AVX: # %bb.0:
-; X86-AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0
; X86-AVX-NEXT: vpsrlw $1, %xmm0, %xmm0
+; X86-AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0
; X86-AVX-NEXT: retl
;
; X64-SSE2-LABEL: test_128_i8_x_16_28_mask_lshr_1:
; X64-SSE2: # %bb.0:
-; X64-SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; X64-SSE2-NEXT: psrlw $1, %xmm0
+; X64-SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; X64-SSE2-NEXT: retq
;
; X64-AVX-LABEL: test_128_i8_x_16_28_mask_lshr_1:
; X64-AVX: # %bb.0:
-; X64-AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
; X64-AVX-NEXT: vpsrlw $1, %xmm0, %xmm0
+; X64-AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
; X64-AVX-NEXT: retq
%t0 = and <16 x i8> %a0, <i8 28, i8 28, i8 28, i8 28, i8 28, i8 28, i8 28, i8 28, i8 28, i8 28, i8 28, i8 28, i8 28, i8 28, i8 28, i8 28>
%t1 = lshr <16 x i8> %t0, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
@@ -78,26 +78,26 @@ define <16 x i8> @test_128_i8_x_16_28_mask_lshr_1(<16 x i8> %a0) {
define <16 x i8> @test_128_i8_x_16_28_mask_lshr_2(<16 x i8> %a0) {
; X86-SSE2-LABEL: test_128_i8_x_16_28_mask_lshr_2:
; X86-SSE2: # %bb.0:
-; X86-SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
; X86-SSE2-NEXT: psrlw $2, %xmm0
+; X86-SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
; X86-SSE2-NEXT: retl
;
; X86-AVX-LABEL: test_128_i8_x_16_28_mask_lshr_2:
; X86-AVX: # %bb.0:
-; X86-AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0
; X86-AVX-NEXT: vpsrlw $2, %xmm0, %xmm0
+; X86-AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0
; X86-AVX-NEXT: retl
;
; X64-SSE2-LABEL: test_128_i8_x_16_28_mask_lshr_2:
; X64-SSE2: # %bb.0:
-; X64-SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; X64-SSE2-NEXT: psrlw $2, %xmm0
+; X64-SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; X64-SSE2-NEXT: retq
;
; X64-AVX-LABEL: test_128_i8_x_16_28_mask_lshr_2:
; X64-AVX: # %bb.0:
-; X64-AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
; X64-AVX-NEXT: vpsrlw $2, %xmm0, %xmm0
+; X64-AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
; X64-AVX-NEXT: retq
%t0 = and <16 x i8> %a0, <i8 28, i8 28, i8 28, i8 28, i8 28, i8 28, i8 28, i8 28, i8 28, i8 28, i8 28, i8 28, i8 28, i8 28, i8 28, i8 28>
%t1 = lshr <16 x i8> %t0, <i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2>
@@ -106,30 +106,30 @@ define <16 x i8> @test_128_i8_x_16_28_mask_lshr_2(<16 x i8> %a0) {
define <16 x i8> @test_128_i8_x_16_28_mask_lshr_3(<16 x i8> %a0) {
; X86-SSE2-LABEL: test_128_i8_x_16_28_mask_lshr_3:
; X86-SSE2: # %bb.0:
-; X86-SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
; X86-SSE2-NEXT: psrlw $3, %xmm0
; X86-SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
+; X86-SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
; X86-SSE2-NEXT: retl
;
; X86-AVX-LABEL: test_128_i8_x_16_28_mask_lshr_3:
; X86-AVX: # %bb.0:
-; X86-AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0
; X86-AVX-NEXT: vpsrlw $3, %xmm0, %xmm0
; X86-AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0
+; X86-AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0
; X86-AVX-NEXT: retl
;
; X64-SSE2-LABEL: test_128_i8_x_16_28_mask_lshr_3:
; X64-SSE2: # %bb.0:
-; X64-SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; X64-SSE2-NEXT: psrlw $3, %xmm0
; X64-SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; X64-SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; X64-SSE2-NEXT: retq
;
; X64-AVX-LABEL: test_128_i8_x_16_28_mask_lshr_3:
; X64-AVX: # %bb.0:
-; X64-AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
; X64-AVX-NEXT: vpsrlw $3, %xmm0, %xmm0
; X64-AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; X64-AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
; X64-AVX-NEXT: retq
%t0 = and <16 x i8> %a0, <i8 28, i8 28, i8 28, i8 28, i8 28, i8 28, i8 28, i8 28, i8 28, i8 28, i8 28, i8 28, i8 28, i8 28, i8 28, i8 28>
%t1 = lshr <16 x i8> %t0, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
@@ -138,30 +138,30 @@ define <16 x i8> @test_128_i8_x_16_28_mask_lshr_3(<16 x i8> %a0) {
define <16 x i8> @test_128_i8_x_16_28_mask_lshr_4(<16 x i8> %a0) {
; X86-SSE2-LABEL: test_128_i8_x_16_28_mask_lshr_4:
; X86-SSE2: # %bb.0:
-; X86-SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
; X86-SSE2-NEXT: psrlw $4, %xmm0
; X86-SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
+; X86-SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
; X86-SSE2-NEXT: retl
;
; X86-AVX-LABEL: test_128_i8_x_16_28_mask_lshr_4:
; X86-AVX: # %bb.0:
-; X86-AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0
; X86-AVX-NEXT: vpsrlw $4, %xmm0, %xmm0
; X86-AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0
+; X86-AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0
; X86-AVX-NEXT: retl
;
; X64-SSE2-LABEL: test_128_i8_x_16_28_mask_lshr_4:
; X64-SSE2: # %bb.0:
-; X64-SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; X64-SSE2-NEXT: psrlw $4, %xmm0
; X64-SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; X64-SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; X64-SSE2-NEXT: retq
;
; X64-AVX-LABEL: test_128_i8_x_16_28_mask_lshr_4:
; X64-AVX: # %bb.0:
-; X64-AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
; X64-AVX-NEXT: vpsrlw $4, %xmm0, %xmm0
; X64-AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; X64-AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
; X64-AVX-NEXT: retq
%t0 = and <16 x i8> %a0, <i8 28, i8 28, i8 28, i8 28, i8 28, i8 28, i8 28, i8 28, i8 28, i8 28, i8 28, i8 28, i8 28, i8 28, i8 28, i8 28>
%t1 = lshr <16 x i8> %t0, <i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4>
@@ -171,26 +171,26 @@ define <16 x i8> @test_128_i8_x_16_28_mask_lshr_4(<16 x i8> %a0) {
define <16 x i8> @test_128_i8_x_16_224_mask_lshr_1(<16 x i8> %a0) {
; X86-SSE2-LABEL: test_128_i8_x_16_224_mask_lshr_1:
; X86-SSE2: # %bb.0:
-; X86-SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
; X86-SSE2-NEXT: psrlw $1, %xmm0
+; X86-SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
; X86-SSE2-NEXT: retl
;
; X86-AVX-LABEL: test_128_i8_x_16_224_mask_lshr_1:
; X86-AVX: # %bb.0:
-; X86-AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0
; X86-AVX-NEXT: vpsrlw $1, %xmm0, %xmm0
+; X86-AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0
; X86-AVX-NEXT: retl
;
; X64-SSE2-LABEL: test_128_i8_x_16_224_mask_lshr_1:
; X64-SSE2: # %bb.0:
-; X64-SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; X64-SSE2-NEXT: psrlw $1, %xmm0
+; X64-SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; X64-SSE2-NEXT: retq
;
; X64-AVX-LABEL: test_128_i8_x_16_224_mask_lshr_1:
; X64-AVX: # %bb.0:
-; X64-AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
; X64-AVX-NEXT: vpsrlw $1, %xmm0, %xmm0
+; X64-AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
; X64-AVX-NEXT: retq
%t0 = and <16 x i8> %a0, <i8 224, i8 224, i8 224, i8 224, i8 224, i8 224, i8 224, i8 224, i8 224, i8 224, i8 224, i8 224, i8 224, i8 224, i8 224, i8 224>
%t1 = lshr <16 x i8> %t0, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
@@ -199,26 +199,26 @@ define <16 x i8> @test_128_i8_x_16_224_mask_lshr_1(<16 x i8> %a0) {
define <16 x i8> @test_128_i8_x_16_224_mask_lshr_4(<16 x i8> %a0) {
; X86-SSE2-LABEL: test_128_i8_x_16_224_mask_lshr_4:
; X86-SSE2: # %bb.0:
-; X86-SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
; X86-SSE2-NEXT: psrlw $4, %xmm0
+; X86-SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
; X86-SSE2-NEXT: retl
;
; X86-AVX-LABEL: test_128_i8_x_16_224_mask_lshr_4:
; X86-AVX: # %bb.0:
-; X86-AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0
; X86-AVX-NEXT: vpsrlw $4, %xmm0, %xmm0
+; X86-AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0
; X86-AVX-NEXT: retl
;
; X64-SSE2-LABEL: test_128_i8_x_16_224_mask_lshr_4:
; X64-SSE2: # %bb.0:
-; X64-SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; X64-SSE2-NEXT: psrlw $4, %xmm0
+; X64-SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; X64-SSE2-NEXT: retq
;
; X64-AVX-LABEL: test_128_i8_x_16_224_mask_lshr_4:
; X64-AVX: # %bb.0:
-; X64-AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
; X64-AVX-NEXT: vpsrlw $4, %xmm0, %xmm0
+; X64-AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
; X64-AVX-NEXT: retq
%t0 = and <16 x i8> %a0, <i8 224, i8 224, i8 224, i8 224, i8 224, i8 224, i8 224, i8 224, i8 224, i8 224, i8 224, i8 224, i8 224, i8 224, i8 224, i8 224>
%t1 = lshr <16 x i8> %t0, <i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4>
@@ -286,30 +286,30 @@ define <16 x i8> @test_128_i8_x_16_224_mask_lshr_6(<16 x i8> %a0) {
define <16 x i8> @test_128_i8_x_16_7_mask_ashr_1(<16 x i8> %a0) {
; X86-SSE2-LABEL: test_128_i8_x_16_7_mask_ashr_1:
; X86-SSE2: # %bb.0:
-; X86-SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
; X86-SSE2-NEXT: psrlw $1, %xmm0
; X86-SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
+; X86-SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
; X86-SSE2-NEXT: retl
;
; X86-AVX-LABEL: test_128_i8_x_16_7_mask_ashr_1:
; X86-AVX: # %bb.0:
-; X86-AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0
; X86-AVX-NEXT: vpsrlw $1, %xmm0, %xmm0
; X86-AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0
+; X86-AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0
; X86-AVX-NEXT: retl
;
; X64-SSE2-LABEL: test_128_i8_x_16_7_mask_ashr_1:
; X64-SSE2: # %bb.0:
-; X64-SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; X64-SSE2-NEXT: psrlw $1, %xmm0
; X64-SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; X64-SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; X64-SSE2-NEXT: retq
;
; X64-AVX-LABEL: test_128_i8_x_16_7_mask_ashr_1:
; X64-AVX: # %bb.0:
-; X64-AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
; X64-AVX-NEXT: vpsrlw $1, %xmm0, %xmm0
; X64-AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; X64-AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
; X64-AVX-NEXT: retq
%t0 = and <16 x i8> %a0, <i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7>
%t1 = ashr <16 x i8> %t0, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
@@ -319,26 +319,26 @@ define <16 x i8> @test_128_i8_x_16_7_mask_ashr_1(<16 x i8> %a0) {
define <16 x i8> @test_128_i8_x_16_28_mask_ashr_1(<16 x i8> %a0) {
; X86-SSE2-LABEL: test_128_i8_x_16_28_mask_ashr_1:
; X86-SSE2: # %bb.0:
-; X86-SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
; X86-SSE2-NEXT: psrlw $1, %xmm0
+; X86-SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
; X86-SSE2-NEXT: retl
;
; X86-AVX-LABEL: test_128_i8_x_16_28_mask_ashr_1:
; X86-AVX: # %bb.0:
-; X86-AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0
; X86-AVX-NEXT: vpsrlw $1, %xmm0, %xmm0
+; X86-AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0
; X86-AVX-NEXT: retl
;
; X64-SSE2-LABEL: test_128_i8_x_16_28_mask_ashr_1:
; X64-SSE2: # %bb.0:
-; X64-SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; X64-SSE2-NEXT: psrlw $1, %xmm0
+; X64-SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; X64-SSE2-NEXT: retq
;
; X64-AVX-LABEL: test_128_i8_x_16_28_mask_ashr_1:
; X64-AVX: # %bb.0:
-; X64-AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
; X64-AVX-NEXT: vpsrlw $1, %xmm0, %xmm0
+; X64-AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
; X64-AVX-NEXT: retq
%t0 = and <16 x i8> %a0, <i8 28, i8 28, i8 28, i8 28, i8 28, i8 28, i8 28, i8 28, i8 28, i8 28, i8 28, i8 28, i8 28, i8 28, i8 28, i8 28>
%t1 = ashr <16 x i8> %t0, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
@@ -347,26 +347,26 @@ define <16 x i8> @test_128_i8_x_16_28_mask_ashr_1(<16 x i8> %a0) {
define <16 x i8> @test_128_i8_x_16_28_mask_ashr_2(<16 x i8> %a0) {
; X86-SSE2-LABEL: test_128_i8_x_16_28_mask_ashr_2:
; X86-SSE2: # %bb.0:
-; X86-SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
; X86-SSE2-NEXT: psrlw $2, %xmm0
+; X86-SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
; X86-SSE2-NEXT: retl
;
; X86-AVX-LABEL: test_128_i8_x_16_28_mask_ashr_2:
; X86-AVX: # %bb.0:
-; X86-AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0
; X86-AVX-NEXT: vpsrlw $2, %xmm0, %xmm0
+; X86-AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0
; X86-AVX-NEXT: retl
;
; X64-SSE2-LABEL: test_128_i8_x_16_28_mask_ashr_2:
; X64-SSE2: # %bb.0:
-; X64-SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; X64-SSE2-NEXT: psrlw $2, %xmm0
+; X64-SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; X64-SSE2-NEXT: retq
;
; X64-AVX-LABEL: test_128_i8_x_16_28_mask_ashr_2:
; X64-AVX: # %bb.0:
-; X64-AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
; X64-AVX-NEXT: vpsrlw $2, %xmm0, %xmm0
+; X64-AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
; X64-AVX-NEXT: retq
%t0 = and <16 x i8> %a0, <i8 28, i8 28, i8 28, i8 28, i8 28, i8 28, i8 28, i8 28, i8 28, i8 28, i8 28, i8 28, i8 28, i8 28, i8 28, i8 28>
%t1 = ashr <16 x i8> %t0, <i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2>
@@ -375,30 +375,30 @@ define <16 x i8> @test_128_i8_x_16_28_mask_ashr_2(<16 x i8> %a0) {
define <16 x i8> @test_128_i8_x_16_28_mask_ashr_3(<16 x i8> %a0) {
; X86-SSE2-LABEL: test_128_i8_x_16_28_mask_ashr_3:
; X86-SSE2: # %bb.0:
-; X86-SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
; X86-SSE2-NEXT: psrlw $3, %xmm0
; X86-SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
+; X86-SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
; X86-SSE2-NEXT: retl
;
; X86-AVX-LABEL: test_128_i8_x_16_28_mask_ashr_3:
; X86-AVX: # %bb.0:
-; X86-AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0
; X86-AVX-NEXT: vpsrlw $3, %xmm0, %xmm0
; X86-AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0
+; X86-AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0
; X86-AVX-NEXT: retl
;
; X64-SSE2-LABEL: test_128_i8_x_16_28_mask_ashr_3:
; X64-SSE2: # %bb.0:
-; X64-SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; X64-SSE2-NEXT: psrlw $3, %xmm0
; X64-SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; X64-SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; X64-SSE2-NEXT: retq
;
; X64-AVX-LABEL: test_128_i8_x_16_28_mask_ashr_3:
; X64-AVX: # %bb.0:
-; X64-AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
; X64-AVX-NEXT: vpsrlw $3, %xmm0, %xmm0
; X64-AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; X64-AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
; X64-AVX-NEXT: retq
%t0 = and <16 x i8> %a0, <i8 28, i8 28, i8 28, i8 28, i8 28, i8 28, i8 28, i8 28, i8 28, i8 28, i8 28, i8 28, i8 28, i8 28, i8 28, i8 28>
%t1 = ashr <16 x i8> %t0, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
@@ -407,30 +407,30 @@ define <16 x i8> @test_128_i8_x_16_28_mask_ashr_3(<16 x i8> %a0) {
define <16 x i8> @test_128_i8_x_16_28_mask_ashr_4(<16 x i8> %a0) {
; X86-SSE2-LABEL: test_128_i8_x_16_28_mask_ashr_4:
; X86-SSE2: # %bb.0:
-; X86-SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
; X86-SSE2-NEXT: psrlw $4, %xmm0
; X86-SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
+; X86-SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
; X86-SSE2-NEXT: retl
;
; X86-AVX-LABEL: test_128_i8_x_16_28_mask_ashr_4:
; X86-AVX: # %bb.0:
-; X86-AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0
; X86-AVX-NEXT: vpsrlw $4, %xmm0, %xmm0
; X86-AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0
+; X86-AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0
; X86-AVX-NEXT: retl
;
; X64-SSE2-LABEL: test_128_i8_x_16_28_mask_ashr_4:
; X64-SSE2: # %bb.0:
-; X64-SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; X64-SSE2-NEXT: psrlw $4, %xmm0
; X64-SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; X64-SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; X64-SSE2-NEXT: retq
;
; X64-AVX-LABEL: test_128_i8_x_16_28_mask_ashr_4:
; X64-AVX: # %bb.0:
-; X64-AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
; X64-AVX-NEXT: vpsrlw $4, %xmm0, %xmm0
; X64-AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; X64-AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
; X64-AVX-NEXT: retq
%t0 = and <16 x i8> %a0, <i8 28, i8 28, i8 28, i8 28, i8 28, i8 28, i8 28, i8 28, i8 28, i8 28, i8 28, i8 28, i8 28, i8 28, i8 28, i8 28>
%t1 = ashr <16 x i8> %t0, <i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4>
@@ -440,8 +440,8 @@ define <16 x i8> @test_128_i8_x_16_28_mask_ashr_4(<16 x i8> %a0) {
define <16 x i8> @test_128_i8_x_16_224_mask_ashr_1(<16 x i8> %a0) {
; X86-SSE2-LABEL: test_128_i8_x_16_224_mask_ashr_1:
; X86-SSE2: # %bb.0:
-; X86-SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
; X86-SSE2-NEXT: psrlw $1, %xmm0
+; X86-SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
; X86-SSE2-NEXT: movdqa {{.*#+}} xmm1 = [64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64]
; X86-SSE2-NEXT: pxor %xmm1, %xmm0
; X86-SSE2-NEXT: psubb %xmm1, %xmm0
@@ -449,8 +449,8 @@ define <16 x i8> @test_128_i8_x_16_224_mask_ashr_1(<16 x i8> %a0) {
;
; X86-AVX-LABEL: test_128_i8_x_16_224_mask_ashr_1:
; X86-AVX: # %bb.0:
-; X86-AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0
; X86-AVX-NEXT: vpsrlw $1, %xmm0, %xmm0
+; X86-AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0
; X86-AVX-NEXT: vmovdqa {{.*#+}} xmm1 = [64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64]
; X86-AVX-NEXT: vpxor %xmm1, %xmm0, %xmm0
; X86-AVX-NEXT: vpsubb %xmm1, %xmm0, %xmm0
@@ -458,8 +458,8 @@ define <16 x i8> @test_128_i8_x_16_224_mask_ashr_1(<16 x i8> %a0) {
;
; X64-SSE2-LABEL: test_128_i8_x_16_224_mask_ashr_1:
; X64-SSE2: # %bb.0:
-; X64-SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; X64-SSE2-NEXT: psrlw $1, %xmm0
+; X64-SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; X64-SSE2-NEXT: movdqa {{.*#+}} xmm1 = [64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64]
; X64-SSE2-NEXT: pxor %xmm1, %xmm0
; X64-SSE2-NEXT: psubb %xmm1, %xmm0
@@ -467,8 +467,8 @@ define <16 x i8> @test_128_i8_x_16_224_mask_ashr_1(<16 x i8> %a0) {
;
; X64-AVX-LABEL: test_128_i8_x_16_224_mask_ashr_1:
; X64-AVX: # %bb.0:
-; X64-AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
; X64-AVX-NEXT: vpsrlw $1, %xmm0, %xmm0
+; X64-AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
; X64-AVX-NEXT: vmovdqa {{.*#+}} xmm1 = [64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64]
; X64-AVX-NEXT: vpxor %xmm1, %xmm0, %xmm0
; X64-AVX-NEXT: vpsubb %xmm1, %xmm0, %xmm0
@@ -480,8 +480,8 @@ define <16 x i8> @test_128_i8_x_16_224_mask_ashr_1(<16 x i8> %a0) {
define <16 x i8> @test_128_i8_x_16_224_mask_ashr_4(<16 x i8> %a0) {
; X86-SSE2-LABEL: test_128_i8_x_16_224_mask_ashr_4:
; X86-SSE2: # %bb.0:
-; X86-SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
; X86-SSE2-NEXT: psrlw $4, %xmm0
+; X86-SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
; X86-SSE2-NEXT: movdqa {{.*#+}} xmm1 = [8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8]
; X86-SSE2-NEXT: pxor %xmm1, %xmm0
; X86-SSE2-NEXT: psubb %xmm1, %xmm0
@@ -489,8 +489,8 @@ define <16 x i8> @test_128_i8_x_16_224_mask_ashr_4(<16 x i8> %a0) {
;
; X86-AVX-LABEL: test_128_i8_x_16_224_mask_ashr_4:
; X86-AVX: # %bb.0:
-; X86-AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0
; X86-AVX-NEXT: vpsrlw $4, %xmm0, %xmm0
+; X86-AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0
; X86-AVX-NEXT: vmovdqa {{.*#+}} xmm1 = [8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8]
; X86-AVX-NEXT: vpxor %xmm1, %xmm0, %xmm0
; X86-AVX-NEXT: vpsubb %xmm1, %xmm0, %xmm0
@@ -498,8 +498,8 @@ define <16 x i8> @test_128_i8_x_16_224_mask_ashr_4(<16 x i8> %a0) {
;
; X64-SSE2-LABEL: test_128_i8_x_16_224_mask_ashr_4:
; X64-SSE2: # %bb.0:
-; X64-SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; X64-SSE2-NEXT: psrlw $4, %xmm0
+; X64-SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; X64-SSE2-NEXT: movdqa {{.*#+}} xmm1 = [8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8]
; X64-SSE2-NEXT: pxor %xmm1, %xmm0
; X64-SSE2-NEXT: psubb %xmm1, %xmm0
@@ -507,8 +507,8 @@ define <16 x i8> @test_128_i8_x_16_224_mask_ashr_4(<16 x i8> %a0) {
;
; X64-AVX-LABEL: test_128_i8_x_16_224_mask_ashr_4:
; X64-AVX: # %bb.0:
-; X64-AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
; X64-AVX-NEXT: vpsrlw $4, %xmm0, %xmm0
+; X64-AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
; X64-AVX-NEXT: vmovdqa {{.*#+}} xmm1 = [8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8]
; X64-AVX-NEXT: vpxor %xmm1, %xmm0, %xmm0
; X64-AVX-NEXT: vpsubb %xmm1, %xmm0, %xmm0
@@ -631,26 +631,26 @@ define <16 x i8> @test_128_i8_x_16_7_mask_shl_1(<16 x i8> %a0) {
define <16 x i8> @test_128_i8_x_16_7_mask_shl_4(<16 x i8> %a0) {
; X86-SSE2-LABEL: test_128_i8_x_16_7_mask_shl_4:
; X86-SSE2: # %bb.0:
-; X86-SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
; X86-SSE2-NEXT: psllw $4, %xmm0
+; X86-SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
; X86-SSE2-NEXT: retl
;
; X86-AVX-LABEL: test_128_i8_x_16_7_mask_shl_4:
; X86-AVX: # %bb.0:
-; X86-AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0
; X86-AVX-NEXT: vpsllw $4, %xmm0, %xmm0
+; X86-AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0
; X86-AVX-NEXT: retl
;
; X64-SSE2-LABEL: test_128_i8_x_16_7_mask_shl_4:
; X64-SSE2: # %bb.0:
-; X64-SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; X64-SSE2-NEXT: psllw $4, %xmm0
+; X64-SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; X64-SSE2-NEXT: retq
;
; X64-AVX-LABEL: test_128_i8_x_16_7_mask_shl_4:
; X64-AVX: # %bb.0:
-; X64-AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
; X64-AVX-NEXT: vpsllw $4, %xmm0, %xmm0
+; X64-AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
; X64-AVX-NEXT: retq
%t0 = and <16 x i8> %a0, <i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7>
%t1 = shl <16 x i8> %t0, <i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4>
@@ -744,26 +744,26 @@ define <16 x i8> @test_128_i8_x_16_28_mask_shl_1(<16 x i8> %a0) {
define <16 x i8> @test_128_i8_x_16_28_mask_shl_2(<16 x i8> %a0) {
; X86-SSE2-LABEL: test_128_i8_x_16_28_mask_shl_2:
; X86-SSE2: # %bb.0:
-; X86-SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
; X86-SSE2-NEXT: psllw $2, %xmm0
+; X86-SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
; X86-SSE2-NEXT: retl
;
; X86-AVX-LABEL: test_128_i8_x_16_28_mask_shl_2:
; X86-AVX: # %bb.0:
-; X86-AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0
; X86-AVX-NEXT: vpsllw $2, %xmm0, %xmm0
+; X86-AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0
; X86-AVX-NEXT: retl
;
; X64-SSE2-LABEL: test_128_i8_x_16_28_mask_shl_2:
; X64-SSE2: # %bb.0:
-; X64-SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; X64-SSE2-NEXT: psllw $2, %xmm0
+; X64-SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; X64-SSE2-NEXT: retq
;
; X64-AVX-LABEL: test_128_i8_x_16_28_mask_shl_2:
; X64-AVX: # %bb.0:
-; X64-AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
; X64-AVX-NEXT: vpsllw $2, %xmm0, %xmm0
+; X64-AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
; X64-AVX-NEXT: retq
%t0 = and <16 x i8> %a0, <i8 28, i8 28, i8 28, i8 28, i8 28, i8 28, i8 28, i8 28, i8 28, i8 28, i8 28, i8 28, i8 28, i8 28, i8 28, i8 28>
%t1 = shl <16 x i8> %t0, <i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2>
@@ -772,26 +772,26 @@ define <16 x i8> @test_128_i8_x_16_28_mask_shl_2(<16 x i8> %a0) {
define <16 x i8> @test_128_i8_x_16_28_mask_shl_3(<16 x i8> %a0) {
; X86-SSE2-LABEL: test_128_i8_x_16_28_mask_shl_3:
; X86-SSE2: # %bb.0:
-; X86-SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
; X86-SSE2-NEXT: psllw $3, %xmm0
+; X86-SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
; X86-SSE2-NEXT: retl
;
; X86-AVX-LABEL: test_128_i8_x_16_28_mask_shl_3:
; X86-AVX: # %bb.0:
-; X86-AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0
; X86-AVX-NEXT: vpsllw $3, %xmm0, %xmm0
+; X86-AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0
; X86-AVX-NEXT: retl
;
; X64-SSE2-LABEL: test_128_i8_x_16_28_mask_shl_3:
; X64-SSE2: # %bb.0:
-; X64-SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; X64-SSE2-NEXT: psllw $3, %xmm0
+; X64-SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; X64-SSE2-NEXT: retq
;
; X64-AVX-LABEL: test_128_i8_x_16_28_mask_shl_3:
; X64-AVX: # %bb.0:
-; X64-AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
; X64-AVX-NEXT: vpsllw $3, %xmm0, %xmm0
+; X64-AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
; X64-AVX-NEXT: retq
%t0 = and <16 x i8> %a0, <i8 28, i8 28, i8 28, i8 28, i8 28, i8 28, i8 28, i8 28, i8 28, i8 28, i8 28, i8 28, i8 28, i8 28, i8 28, i8 28>
%t1 = shl <16 x i8> %t0, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
@@ -800,30 +800,30 @@ define <16 x i8> @test_128_i8_x_16_28_mask_shl_3(<16 x i8> %a0) {
define <16 x i8> @test_128_i8_x_16_28_mask_shl_4(<16 x i8> %a0) {
; X86-SSE2-LABEL: test_128_i8_x_16_28_mask_shl_4:
; X86-SSE2: # %bb.0:
-; X86-SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
; X86-SSE2-NEXT: psllw $4, %xmm0
; X86-SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
+; X86-SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
; X86-SSE2-NEXT: retl
;
; X86-AVX-LABEL: test_128_i8_x_16_28_mask_shl_4:
; X86-AVX: # %bb.0:
-; X86-AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0
; X86-AVX-NEXT: vpsllw $4, %xmm0, %xmm0
; X86-AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0
+; X86-AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0
; X86-AVX-NEXT: retl
;
; X64-SSE2-LABEL: test_128_i8_x_16_28_mask_shl_4:
; X64-SSE2: # %bb.0:
-; X64-SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; X64-SSE2-NEXT: psllw $4, %xmm0
; X64-SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; X64-SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; X64-SSE2-NEXT: retq
;
; X64-AVX-LABEL: test_128_i8_x_16_28_mask_shl_4:
; X64-AVX: # %bb.0:
-; X64-AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
; X64-AVX-NEXT: vpsllw $4, %xmm0, %xmm0
; X64-AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; X64-AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
; X64-AVX-NEXT: retq
%t0 = and <16 x i8> %a0, <i8 28, i8 28, i8 28, i8 28, i8 28, i8 28, i8 28, i8 28, i8 28, i8 28, i8 28, i8 28, i8 28, i8 28, i8 28, i8 28>
%t1 = shl <16 x i8> %t0, <i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4>
@@ -868,26 +868,26 @@ define <16 x i8> @test_128_i8_x_16_224_mask_shl_1(<16 x i8> %a0) {
define <8 x i16> @test_128_i16_x_8_127_mask_lshr_1(<8 x i16> %a0) {
; X86-SSE2-LABEL: test_128_i16_x_8_127_mask_lshr_1:
; X86-SSE2: # %bb.0:
-; X86-SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
; X86-SSE2-NEXT: psrlw $1, %xmm0
+; X86-SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
; X86-SSE2-NEXT: retl
;
; X86-AVX-LABEL: test_128_i16_x_8_127_mask_lshr_1:
; X86-AVX: # %bb.0:
-; X86-AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0
; X86-AVX-NEXT: vpsrlw $1, %xmm0, %xmm0
+; X86-AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0
; X86-AVX-NEXT: retl
;
; X64-SSE2-LABEL: test_128_i16_x_8_127_mask_lshr_1:
; X64-SSE2: # %bb.0:
-; X64-SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; X64-SSE2-NEXT: psrlw $1, %xmm0
+; X64-SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; X64-SSE2-NEXT: retq
;
; X64-AVX-LABEL: test_128_i16_x_8_127_mask_lshr_1:
; X64-AVX: # %bb.0:
-; X64-AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
; X64-AVX-NEXT: vpsrlw $1, %xmm0, %xmm0
+; X64-AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
; X64-AVX-NEXT: retq
%t0 = and <8 x i16> %a0, <i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127>
%t1 = lshr <8 x i16> %t0, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
@@ -897,26 +897,26 @@ define <8 x i16> @test_128_i16_x_8_127_mask_lshr_1(<8 x i16> %a0) {
define <8 x i16> @test_128_i16_x_8_2032_mask_lshr_3(<8 x i16> %a0) {
; X86-SSE2-LABEL: test_128_i16_x_8_2032_mask_lshr_3:
; X86-SSE2: # %bb.0:
-; X86-SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
; X86-SSE2-NEXT: psrlw $3, %xmm0
+; X86-SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
; X86-SSE2-NEXT: retl
;
; X86-AVX-LABEL: test_128_i16_x_8_2032_mask_lshr_3:
; X86-AVX: # %bb.0:
-; X86-AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0
; X86-AVX-NEXT: vpsrlw $3, %xmm0, %xmm0
+; X86-AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0
; X86-AVX-NEXT: retl
;
; X64-SSE2-LABEL: test_128_i16_x_8_2032_mask_lshr_3:
; X64-SSE2: # %bb.0:
-; X64-SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; X64-SSE2-NEXT: psrlw $3, %xmm0
+; X64-SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; X64-SSE2-NEXT: retq
;
; X64-AVX-LABEL: test_128_i16_x_8_2032_mask_lshr_3:
; X64-AVX: # %bb.0:
-; X64-AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
; X64-AVX-NEXT: vpsrlw $3, %xmm0, %xmm0
+; X64-AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
; X64-AVX-NEXT: retq
%t0 = and <8 x i16> %a0, <i16 2032, i16 2032, i16 2032, i16 2032, i16 2032, i16 2032, i16 2032, i16 2032>
%t1 = lshr <8 x i16> %t0, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
@@ -925,26 +925,26 @@ define <8 x i16> @test_128_i16_x_8_2032_mask_lshr_3(<8 x i16> %a0) {
define <8 x i16> @test_128_i16_x_8_2032_mask_lshr_4(<8 x i16> %a0) {
; X86-SSE2-LABEL: test_128_i16_x_8_2032_mask_lshr_4:
; X86-SSE2: # %bb.0:
-; X86-SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
; X86-SSE2-NEXT: psrlw $4, %xmm0
+; X86-SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
; X86-SSE2-NEXT: retl
;
; X86-AVX-LABEL: test_128_i16_x_8_2032_mask_lshr_4:
; X86-AVX: # %bb.0:
-; X86-AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0
; X86-AVX-NEXT: vpsrlw $4, %xmm0, %xmm0
+; X86-AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0
; X86-AVX-NEXT: retl
;
; X64-SSE2-LABEL: test_128_i16_x_8_2032_mask_lshr_4:
; X64-SSE2: # %bb.0:
-; X64-SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; X64-SSE2-NEXT: psrlw $4, %xmm0
+; X64-SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; X64-SSE2-NEXT: retq
;
; X64-AVX-LABEL: test_128_i16_x_8_2032_mask_lshr_4:
; X64-AVX: # %bb.0:
-; X64-AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
; X64-AVX-NEXT: vpsrlw $4, %xmm0, %xmm0
+; X64-AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
; X64-AVX-NEXT: retq
%t0 = and <8 x i16> %a0, <i16 2032, i16 2032, i16 2032, i16 2032, i16 2032, i16 2032, i16 2032, i16 2032>
%t1 = lshr <8 x i16> %t0, <i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4>
@@ -953,26 +953,26 @@ define <8 x i16> @test_128_i16_x_8_2032_mask_lshr_4(<8 x i16> %a0) {
define <8 x i16> @test_128_i16_x_8_2032_mask_lshr_5(<8 x i16> %a0) {
; X86-SSE2-LABEL: test_128_i16_x_8_2032_mask_lshr_5:
; X86-SSE2: # %bb.0:
-; X86-SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
; X86-SSE2-NEXT: psrlw $5, %xmm0
+; X86-SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
; X86-SSE2-NEXT: retl
;
; X86-AVX-LABEL: test_128_i16_x_8_2032_mask_lshr_5:
; X86-AVX: # %bb.0:
-; X86-AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0
; X86-AVX-NEXT: vpsrlw $5, %xmm0, %xmm0
+; X86-AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0
; X86-AVX-NEXT: retl
;
; X64-SSE2-LABEL: test_128_i16_x_8_2032_mask_lshr_5:
; X64-SSE2: # %bb.0:
-; X64-SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; X64-SSE2-NEXT: psrlw $5, %xmm0
+; X64-SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; X64-SSE2-NEXT: retq
;
; X64-AVX-LABEL: test_128_i16_x_8_2032_mask_lshr_5:
; X64-AVX: # %bb.0:
-; X64-AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
; X64-AVX-NEXT: vpsrlw $5, %xmm0, %xmm0
+; X64-AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
; X64-AVX-NEXT: retq
%t0 = and <8 x i16> %a0, <i16 2032, i16 2032, i16 2032, i16 2032, i16 2032, i16 2032, i16 2032, i16 2032>
%t1 = lshr <8 x i16> %t0, <i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5>
@@ -981,26 +981,26 @@ define <8 x i16> @test_128_i16_x_8_2032_mask_lshr_5(<8 x i16> %a0) {
define <8 x i16> @test_128_i16_x_8_2032_mask_lshr_6(<8 x i16> %a0) {
; X86-SSE2-LABEL: test_128_i16_x_8_2032_mask_lshr_6:
; X86-SSE2: # %bb.0:
-; X86-SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
; X86-SSE2-NEXT: psrlw $6, %xmm0
+; X86-SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
; X86-SSE2-NEXT: retl
;
; X86-AVX-LABEL: test_128_i16_x_8_2032_mask_lshr_6:
; X86-AVX: # %bb.0:
-; X86-AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0
; X86-AVX-NEXT: vpsrlw $6, %xmm0, %xmm0
+; X86-AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0
; X86-AVX-NEXT: retl
;
; X64-SSE2-LABEL: test_128_i16_x_8_2032_mask_lshr_6:
; X64-SSE2: # %bb.0:
-; X64-SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; X64-SSE2-NEXT: psrlw $6, %xmm0
+; X64-SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; X64-SSE2-NEXT: retq
;
; X64-AVX-LABEL: test_128_i16_x_8_2032_mask_lshr_6:
; X64-AVX: # %bb.0:
-; X64-AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
; X64-AVX-NEXT: vpsrlw $6, %xmm0, %xmm0
+; X64-AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
; X64-AVX-NEXT: retq
%t0 = and <8 x i16> %a0, <i16 2032, i16 2032, i16 2032, i16 2032, i16 2032, i16 2032, i16 2032, i16 2032>
%t1 = lshr <8 x i16> %t0, <i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6>
@@ -1010,26 +1010,26 @@ define <8 x i16> @test_128_i16_x_8_2032_mask_lshr_6(<8 x i16> %a0) {
define <8 x i16> @test_128_i16_x_8_65024_mask_lshr_1(<8 x i16> %a0) {
; X86-SSE2-LABEL: test_128_i16_x_8_65024_mask_lshr_1:
; X86-SSE2: # %bb.0:
-; X86-SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
; X86-SSE2-NEXT: psrlw $1, %xmm0
+; X86-SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
; X86-SSE2-NEXT: retl
;
; X86-AVX-LABEL: test_128_i16_x_8_65024_mask_lshr_1:
; X86-AVX: # %bb.0:
-; X86-AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0
; X86-AVX-NEXT: vpsrlw $1, %xmm0, %xmm0
+; X86-AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0
; X86-AVX-NEXT: retl
;
; X64-SSE2-LABEL: test_128_i16_x_8_65024_mask_lshr_1:
; X64-SSE2: # %bb.0:
-; X64-SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; X64-SSE2-NEXT: psrlw $1, %xmm0
+; X64-SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; X64-SSE2-NEXT: retq
;
; X64-AVX-LABEL: test_128_i16_x_8_65024_mask_lshr_1:
; X64-AVX: # %bb.0:
-; X64-AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
; X64-AVX-NEXT: vpsrlw $1, %xmm0, %xmm0
+; X64-AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
; X64-AVX-NEXT: retq
%t0 = and <8 x i16> %a0, <i16 65024, i16 65024, i16 65024, i16 65024, i16 65024, i16 65024, i16 65024, i16 65024>
%t1 = lshr <8 x i16> %t0, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
@@ -1038,26 +1038,26 @@ define <8 x i16> @test_128_i16_x_8_65024_mask_lshr_1(<8 x i16> %a0) {
define <8 x i16> @test_128_i16_x_8_65024_mask_lshr_8(<8 x i16> %a0) {
; X86-SSE2-LABEL: test_128_i16_x_8_65024_mask_lshr_8:
; X86-SSE2: # %bb.0:
-; X86-SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
; X86-SSE2-NEXT: psrlw $8, %xmm0
+; X86-SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
; X86-SSE2-NEXT: retl
;
; X86-AVX-LABEL: test_128_i16_x_8_65024_mask_lshr_8:
; X86-AVX: # %bb.0:
-; X86-AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0
; X86-AVX-NEXT: vpsrlw $8, %xmm0, %xmm0
+; X86-AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0
; X86-AVX-NEXT: retl
;
; X64-SSE2-LABEL: test_128_i16_x_8_65024_mask_lshr_8:
; X64-SSE2: # %bb.0:
-; X64-SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; X64-SSE2-NEXT: psrlw $8, %xmm0
+; X64-SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; X64-SSE2-NEXT: retq
;
; X64-AVX-LABEL: test_128_i16_x_8_65024_mask_lshr_8:
; X64-AVX: # %bb.0:
-; X64-AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
; X64-AVX-NEXT: vpsrlw $8, %xmm0, %xmm0
+; X64-AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
; X64-AVX-NEXT: retq
%t0 = and <8 x i16> %a0, <i16 65024, i16 65024, i16 65024, i16 65024, i16 65024, i16 65024, i16 65024, i16 65024>
%t1 = lshr <8 x i16> %t0, <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
@@ -1117,26 +1117,26 @@ define <8 x i16> @test_128_i16_x_8_65024_mask_lshr_10(<8 x i16> %a0) {
define <8 x i16> @test_128_i16_x_8_127_mask_ashr_1(<8 x i16> %a0) {
; X86-SSE2-LABEL: test_128_i16_x_8_127_mask_ashr_1:
; X86-SSE2: # %bb.0:
-; X86-SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
; X86-SSE2-NEXT: psrlw $1, %xmm0
+; X86-SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
; X86-SSE2-NEXT: retl
;
; X86-AVX-LABEL: test_128_i16_x_8_127_mask_ashr_1:
; X86-AVX: # %bb.0:
-; X86-AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0
; X86-AVX-NEXT: vpsrlw $1, %xmm0, %xmm0
+; X86-AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0
; X86-AVX-NEXT: retl
;
; X64-SSE2-LABEL: test_128_i16_x_8_127_mask_ashr_1:
; X64-SSE2: # %bb.0:
-; X64-SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; X64-SSE2-NEXT: psrlw $1, %xmm0
+; X64-SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; X64-SSE2-NEXT: retq
;
; X64-AVX-LABEL: test_128_i16_x_8_127_mask_ashr_1:
; X64-AVX: # %bb.0:
-; X64-AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
; X64-AVX-NEXT: vpsrlw $1, %xmm0, %xmm0
+; X64-AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
; X64-AVX-NEXT: retq
%t0 = and <8 x i16> %a0, <i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127>
%t1 = ashr <8 x i16> %t0, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
@@ -1146,26 +1146,26 @@ define <8 x i16> @test_128_i16_x_8_127_mask_ashr_1(<8 x i16> %a0) {
define <8 x i16> @test_128_i16_x_8_2032_mask_ashr_3(<8 x i16> %a0) {
; X86-SSE2-LABEL: test_128_i16_x_8_2032_mask_ashr_3:
; X86-SSE2: # %bb.0:
-; X86-SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
; X86-SSE2-NEXT: psrlw $3, %xmm0
+; X86-SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
; X86-SSE2-NEXT: retl
;
; X86-AVX-LABEL: test_128_i16_x_8_2032_mask_ashr_3:
; X86-AVX: # %bb.0:
-; X86-AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0
; X86-AVX-NEXT: vpsrlw $3, %xmm0, %xmm0
+; X86-AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0
; X86-AVX-NEXT: retl
;
; X64-SSE2-LABEL: test_128_i16_x_8_2032_mask_ashr_3:
; X64-SSE2: # %bb.0:
-; X64-SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; X64-SSE2-NEXT: psrlw $3, %xmm0
+; X64-SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; X64-SSE2-NEXT: retq
;
; X64-AVX-LABEL: test_128_i16_x_8_2032_mask_ashr_3:
; X64-AVX: # %bb.0:
-; X64-AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
; X64-AVX-NEXT: vpsrlw $3, %xmm0, %xmm0
+; X64-AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
; X64-AVX-NEXT: retq
%t0 = and <8 x i16> %a0, <i16 2032, i16 2032, i16 2032, i16 2032, i16 2032, i16 2032, i16 2032, i16 2032>
%t1 = ashr <8 x i16> %t0, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
@@ -1174,26 +1174,26 @@ define <8 x i16> @test_128_i16_x_8_2032_mask_ashr_3(<8 x i16> %a0) {
define <8 x i16> @test_128_i16_x_8_2032_mask_ashr_4(<8 x i16> %a0) {
; X86-SSE2-LABEL: test_128_i16_x_8_2032_mask_ashr_4:
; X86-SSE2: # %bb.0:
-; X86-SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
; X86-SSE2-NEXT: psrlw $4, %xmm0
+; X86-SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
; X86-SSE2-NEXT: retl
;
; X86-AVX-LABEL: test_128_i16_x_8_2032_mask_ashr_4:
; X86-AVX: # %bb.0:
-; X86-AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0
; X86-AVX-NEXT: vpsrlw $4, %xmm0, %xmm0
+; X86-AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0
; X86-AVX-NEXT: retl
;
; X64-SSE2-LABEL: test_128_i16_x_8_2032_mask_ashr_4:
; X64-SSE2: # %bb.0:
-; X64-SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; X64-SSE2-NEXT: psrlw $4, %xmm0
+; X64-SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; X64-SSE2-NEXT: retq
;
; X64-AVX-LABEL: test_128_i16_x_8_2032_mask_ashr_4:
; X64-AVX: # %bb.0:
-; X64-AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
; X64-AVX-NEXT: vpsrlw $4, %xmm0, %xmm0
+; X64-AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
; X64-AVX-NEXT: retq
%t0 = and <8 x i16> %a0, <i16 2032, i16 2032, i16 2032, i16 2032, i16 2032, i16 2032, i16 2032, i16 2032>
%t1 = ashr <8 x i16> %t0, <i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4>
@@ -1202,26 +1202,26 @@ define <8 x i16> @test_128_i16_x_8_2032_mask_ashr_4(<8 x i16> %a0) {
define <8 x i16> @test_128_i16_x_8_2032_mask_ashr_5(<8 x i16> %a0) {
; X86-SSE2-LABEL: test_128_i16_x_8_2032_mask_ashr_5:
; X86-SSE2: # %bb.0:
-; X86-SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
; X86-SSE2-NEXT: psrlw $5, %xmm0
+; X86-SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
; X86-SSE2-NEXT: retl
;
; X86-AVX-LABEL: test_128_i16_x_8_2032_mask_ashr_5:
; X86-AVX: # %bb.0:
-; X86-AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0
; X86-AVX-NEXT: vpsrlw $5, %xmm0, %xmm0
+; X86-AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0
; X86-AVX-NEXT: retl
;
; X64-SSE2-LABEL: test_128_i16_x_8_2032_mask_ashr_5:
; X64-SSE2: # %bb.0:
-; X64-SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; X64-SSE2-NEXT: psrlw $5, %xmm0
+; X64-SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; X64-SSE2-NEXT: retq
;
; X64-AVX-LABEL: test_128_i16_x_8_2032_mask_ashr_5:
; X64-AVX: # %bb.0:
-; X64-AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
; X64-AVX-NEXT: vpsrlw $5, %xmm0, %xmm0
+; X64-AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
; X64-AVX-NEXT: retq
%t0 = and <8 x i16> %a0, <i16 2032, i16 2032, i16 2032, i16 2032, i16 2032, i16 2032, i16 2032, i16 2032>
%t1 = ashr <8 x i16> %t0, <i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5>
@@ -1230,26 +1230,26 @@ define <8 x i16> @test_128_i16_x_8_2032_mask_ashr_5(<8 x i16> %a0) {
define <8 x i16> @test_128_i16_x_8_2032_mask_ashr_6(<8 x i16> %a0) {
; X86-SSE2-LABEL: test_128_i16_x_8_2032_mask_ashr_6:
; X86-SSE2: # %bb.0:
-; X86-SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
; X86-SSE2-NEXT: psrlw $6, %xmm0
+; X86-SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
; X86-SSE2-NEXT: retl
;
; X86-AVX-LABEL: test_128_i16_x_8_2032_mask_ashr_6:
; X86-AVX: # %bb.0:
-; X86-AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0
; X86-AVX-NEXT: vpsrlw $6, %xmm0, %xmm0
+; X86-AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0
; X86-AVX-NEXT: retl
;
; X64-SSE2-LABEL: test_128_i16_x_8_2032_mask_ashr_6:
; X64-SSE2: # %bb.0:
-; X64-SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; X64-SSE2-NEXT: psrlw $6, %xmm0
+; X64-SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; X64-SSE2-NEXT: retq
;
; X64-AVX-LABEL: test_128_i16_x_8_2032_mask_ashr_6:
; X64-AVX: # %bb.0:
-; X64-AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
; X64-AVX-NEXT: vpsrlw $6, %xmm0, %xmm0
+; X64-AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
; X64-AVX-NEXT: retq
%t0 = and <8 x i16> %a0, <i16 2032, i16 2032, i16 2032, i16 2032, i16 2032, i16 2032, i16 2032, i16 2032>
%t1 = ashr <8 x i16> %t0, <i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6>
@@ -1259,26 +1259,26 @@ define <8 x i16> @test_128_i16_x_8_2032_mask_ashr_6(<8 x i16> %a0) {
define <8 x i16> @test_128_i16_x_8_65024_mask_ashr_1(<8 x i16> %a0) {
; X86-SSE2-LABEL: test_128_i16_x_8_65024_mask_ashr_1:
; X86-SSE2: # %bb.0:
-; X86-SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
; X86-SSE2-NEXT: psraw $1, %xmm0
+; X86-SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
; X86-SSE2-NEXT: retl
;
; X86-AVX-LABEL: test_128_i16_x_8_65024_mask_ashr_1:
; X86-AVX: # %bb.0:
-; X86-AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0
; X86-AVX-NEXT: vpsraw $1, %xmm0, %xmm0
+; X86-AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0
; X86-AVX-NEXT: retl
;
; X64-SSE2-LABEL: test_128_i16_x_8_65024_mask_ashr_1:
; X64-SSE2: # %bb.0:
-; X64-SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; X64-SSE2-NEXT: psraw $1, %xmm0
+; X64-SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; X64-SSE2-NEXT: retq
;
; X64-AVX-LABEL: test_128_i16_x_8_65024_mask_ashr_1:
; X64-AVX: # %bb.0:
-; X64-AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
; X64-AVX-NEXT: vpsraw $1, %xmm0, %xmm0
+; X64-AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
; X64-AVX-NEXT: retq
%t0 = and <8 x i16> %a0, <i16 65024, i16 65024, i16 65024, i16 65024, i16 65024, i16 65024, i16 65024, i16 65024>
%t1 = ashr <8 x i16> %t0, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
@@ -1287,26 +1287,26 @@ define <8 x i16> @test_128_i16_x_8_65024_mask_ashr_1(<8 x i16> %a0) {
define <8 x i16> @test_128_i16_x_8_65024_mask_ashr_8(<8 x i16> %a0) {
; X86-SSE2-LABEL: test_128_i16_x_8_65024_mask_ashr_8:
; X86-SSE2: # %bb.0:
-; X86-SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
; X86-SSE2-NEXT: psraw $8, %xmm0
+; X86-SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
; X86-SSE2-NEXT: retl
;
; X86-AVX-LABEL: test_128_i16_x_8_65024_mask_ashr_8:
; X86-AVX: # %bb.0:
-; X86-AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0
; X86-AVX-NEXT: vpsraw $8, %xmm0, %xmm0
+; X86-AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0
; X86-AVX-NEXT: retl
;
; X64-SSE2-LABEL: test_128_i16_x_8_65024_mask_ashr_8:
; X64-SSE2: # %bb.0:
-; X64-SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; X64-SSE2-NEXT: psraw $8, %xmm0
+; X64-SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; X64-SSE2-NEXT: retq
;
; X64-AVX-LABEL: test_128_i16_x_8_65024_mask_ashr_8:
; X64-AVX: # %bb.0:
-; X64-AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
; X64-AVX-NEXT: vpsraw $8, %xmm0, %xmm0
+; X64-AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
; X64-AVX-NEXT: retq
%t0 = and <8 x i16> %a0, <i16 65024, i16 65024, i16 65024, i16 65024, i16 65024, i16 65024, i16 65024, i16 65024>
%t1 = ashr <8 x i16> %t0, <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
@@ -1394,26 +1394,26 @@ define <8 x i16> @test_128_i16_x_8_127_mask_shl_1(<8 x i16> %a0) {
define <8 x i16> @test_128_i16_x_8_127_mask_shl_8(<8 x i16> %a0) {
; X86-SSE2-LABEL: test_128_i16_x_8_127_mask_shl_8:
; X86-SSE2: # %bb.0:
-; X86-SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
; X86-SSE2-NEXT: psllw $8, %xmm0
+; X86-SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
; X86-SSE2-NEXT: retl
;
; X86-AVX-LABEL: test_128_i16_x_8_127_mask_shl_8:
; X86-AVX: # %bb.0:
-; X86-AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0
; X86-AVX-NEXT: vpsllw $8, %xmm0, %xmm0
+; X86-AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0
; X86-AVX-NEXT: retl
;
; X64-SSE2-LABEL: test_128_i16_x_8_127_mask_shl_8:
; X64-SSE2: # %bb.0:
-; X64-SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; X64-SSE2-NEXT: psllw $8, %xmm0
+; X64-SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; X64-SSE2-NEXT: retq
;
; X64-AVX-LABEL: test_128_i16_x_8_127_mask_shl_8:
; X64-AVX: # %bb.0:
-; X64-AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
; X64-AVX-NEXT: vpsllw $8, %xmm0, %xmm0
+; X64-AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
; X64-AVX-NEXT: retq
%t0 = and <8 x i16> %a0, <i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127>
%t1 = shl <8 x i16> %t0, <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
@@ -1471,26 +1471,26 @@ define <8 x i16> @test_128_i16_x_8_127_mask_shl_10(<8 x i16> %a0) {
define <8 x i16> @test_128_i16_x_8_2032_mask_shl_3(<8 x i16> %a0) {
; X86-SSE2-LABEL: test_128_i16_x_8_2032_mask_shl_3:
; X86-SSE2: # %bb.0:
-; X86-SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
; X86-SSE2-NEXT: psllw $3, %xmm0
+; X86-SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
; X86-SSE2-NEXT: retl
;
; X86-AVX-LABEL: test_128_i16_x_8_2032_mask_shl_3:
; X86-AVX: # %bb.0:
-; X86-AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0
; X86-AVX-NEXT: vpsllw $3, %xmm0, %xmm0
+; X86-AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0
; X86-AVX-NEXT: retl
;
; X64-SSE2-LABEL: test_128_i16_x_8_2032_mask_shl_3:
; X64-SSE2: # %bb.0:
-; X64-SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; X64-SSE2-NEXT: psllw $3, %xmm0
+; X64-SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; X64-SSE2-NEXT: retq
;
; X64-AVX-LABEL: test_128_i16_x_8_2032_mask_shl_3:
; X64-AVX: # %bb.0:
-; X64-AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
; X64-AVX-NEXT: vpsllw $3, %xmm0, %xmm0
+; X64-AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
; X64-AVX-NEXT: retq
%t0 = and <8 x i16> %a0, <i16 2032, i16 2032, i16 2032, i16 2032, i16 2032, i16 2032, i16 2032, i16 2032>
%t1 = shl <8 x i16> %t0, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
@@ -1499,26 +1499,26 @@ define <8 x i16> @test_128_i16_x_8_2032_mask_shl_3(<8 x i16> %a0) {
define <8 x i16> @test_128_i16_x_8_2032_mask_shl_4(<8 x i16> %a0) {
; X86-SSE2-LABEL: test_128_i16_x_8_2032_mask_shl_4:
; X86-SSE2: # %bb.0:
-; X86-SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
; X86-SSE2-NEXT: psllw $4, %xmm0
+; X86-SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
; X86-SSE2-NEXT: retl
;
; X86-AVX-LABEL: test_128_i16_x_8_2032_mask_shl_4:
; X86-AVX: # %bb.0:
-; X86-AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0
; X86-AVX-NEXT: vpsllw $4, %xmm0, %xmm0
+; X86-AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0
; X86-AVX-NEXT: retl
;
; X64-SSE2-LABEL: test_128_i16_x_8_2032_mask_shl_4:
; X64-SSE2: # %bb.0:
-; X64-SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; X64-SSE2-NEXT: psllw $4, %xmm0
+; X64-SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; X64-SSE2-NEXT: retq
;
; X64-AVX-LABEL: test_128_i16_x_8_2032_mask_shl_4:
; X64-AVX: # %bb.0:
-; X64-AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
; X64-AVX-NEXT: vpsllw $4, %xmm0, %xmm0
+; X64-AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
; X64-AVX-NEXT: retq
%t0 = and <8 x i16> %a0, <i16 2032, i16 2032, i16 2032, i16 2032, i16 2032, i16 2032, i16 2032, i16 2032>
%t1 = shl <8 x i16> %t0, <i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4>
@@ -1527,26 +1527,26 @@ define <8 x i16> @test_128_i16_x_8_2032_mask_shl_4(<8 x i16> %a0) {
define <8 x i16> @test_128_i16_x_8_2032_mask_shl_5(<8 x i16> %a0) {
; X86-SSE2-LABEL: test_128_i16_x_8_2032_mask_shl_5:
; X86-SSE2: # %bb.0:
-; X86-SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
; X86-SSE2-NEXT: psllw $5, %xmm0
+; X86-SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
; X86-SSE2-NEXT: retl
;
; X86-AVX-LABEL: test_128_i16_x_8_2032_mask_shl_5:
; X86-AVX: # %bb.0:
-; X86-AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0
; X86-AVX-NEXT: vpsllw $5, %xmm0, %xmm0
+; X86-AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0
; X86-AVX-NEXT: retl
;
; X64-SSE2-LABEL: test_128_i16_x_8_2032_mask_shl_5:
; X64-SSE2: # %bb.0:
-; X64-SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; X64-SSE2-NEXT: psllw $5, %xmm0
+; X64-SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; X64-SSE2-NEXT: retq
;
; X64-AVX-LABEL: test_128_i16_x_8_2032_mask_shl_5:
; X64-AVX: # %bb.0:
-; X64-AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
; X64-AVX-NEXT: vpsllw $5, %xmm0, %xmm0
+; X64-AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
; X64-AVX-NEXT: retq
%t0 = and <8 x i16> %a0, <i16 2032, i16 2032, i16 2032, i16 2032, i16 2032, i16 2032, i16 2032, i16 2032>
%t1 = shl <8 x i16> %t0, <i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5>
@@ -1555,26 +1555,26 @@ define <8 x i16> @test_128_i16_x_8_2032_mask_shl_5(<8 x i16> %a0) {
define <8 x i16> @test_128_i16_x_8_2032_mask_shl_6(<8 x i16> %a0) {
; X86-SSE2-LABEL: test_128_i16_x_8_2032_mask_shl_6:
; X86-SSE2: # %bb.0:
-; X86-SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
; X86-SSE2-NEXT: psllw $6, %xmm0
+; X86-SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
; X86-SSE2-NEXT: retl
;
; X86-AVX-LABEL: test_128_i16_x_8_2032_mask_shl_6:
; X86-AVX: # %bb.0:
-; X86-AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0
; X86-AVX-NEXT: vpsllw $6, %xmm0, %xmm0
+; X86-AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0
; X86-AVX-NEXT: retl
;
; X64-SSE2-LABEL: test_128_i16_x_8_2032_mask_shl_6:
; X64-SSE2: # %bb.0:
-; X64-SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; X64-SSE2-NEXT: psllw $6, %xmm0
+; X64-SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; X64-SSE2-NEXT: retq
;
; X64-AVX-LABEL: test_128_i16_x_8_2032_mask_shl_6:
; X64-AVX: # %bb.0:
-; X64-AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
; X64-AVX-NEXT: vpsllw $6, %xmm0, %xmm0
+; X64-AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
; X64-AVX-NEXT: retq
%t0 = and <8 x i16> %a0, <i16 2032, i16 2032, i16 2032, i16 2032, i16 2032, i16 2032, i16 2032, i16 2032>
%t1 = shl <8 x i16> %t0, <i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6>
@@ -1619,40 +1619,40 @@ define <8 x i16> @test_128_i16_x_8_65024_mask_shl_1(<8 x i16> %a0) {
define <4 x i32> @test_128_i32_x_4_32767_mask_lshr_1(<4 x i32> %a0) {
; X86-SSE2-LABEL: test_128_i32_x_4_32767_mask_lshr_1:
; X86-SSE2: # %bb.0:
-; X86-SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
; X86-SSE2-NEXT: psrld $1, %xmm0
+; X86-SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
; X86-SSE2-NEXT: retl
;
; X86-AVX1-LABEL: test_128_i32_x_4_32767_mask_lshr_1:
; X86-AVX1: # %bb.0:
-; X86-AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0
; X86-AVX1-NEXT: vpsrld $1, %xmm0, %xmm0
+; X86-AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0
; X86-AVX1-NEXT: retl
;
; X86-AVX2-LABEL: test_128_i32_x_4_32767_mask_lshr_1:
; X86-AVX2: # %bb.0:
-; X86-AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [32767,32767,32767,32767]
-; X86-AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
; X86-AVX2-NEXT: vpsrld $1, %xmm0, %xmm0
+; X86-AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [16383,16383,16383,16383]
+; X86-AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
; X86-AVX2-NEXT: retl
;
; X64-SSE2-LABEL: test_128_i32_x_4_32767_mask_lshr_1:
; X64-SSE2: # %bb.0:
-; X64-SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; X64-SSE2-NEXT: psrld $1, %xmm0
+; X64-SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; X64-SSE2-NEXT: retq
;
; X64-AVX1-LABEL: test_128_i32_x_4_32767_mask_lshr_1:
; X64-AVX1: # %bb.0:
-; X64-AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
; X64-AVX1-NEXT: vpsrld $1, %xmm0, %xmm0
+; X64-AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
; X64-AVX1-NEXT: retq
;
; X64-AVX2-LABEL: test_128_i32_x_4_32767_mask_lshr_1:
; X64-AVX2: # %bb.0:
-; X64-AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [32767,32767,32767,32767]
-; X64-AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
; X64-AVX2-NEXT: vpsrld $1, %xmm0, %xmm0
+; X64-AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [16383,16383,16383,16383]
+; X64-AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
; X64-AVX2-NEXT: retq
%t0 = and <4 x i32> %a0, <i32 32767, i32 32767, i32 32767, i32 32767>
%t1 = lshr <4 x i32> %t0, <i32 1, i32 1, i32 1, i32 1>
@@ -1662,40 +1662,40 @@ define <4 x i32> @test_128_i32_x_4_32767_mask_lshr_1(<4 x i32> %a0) {
define <4 x i32> @test_128_i32_x_4_8388352_mask_lshr_7(<4 x i32> %a0) {
; X86-SSE2-LABEL: test_128_i32_x_4_8388352_mask_lshr_7:
; X86-SSE2: # %bb.0:
-; X86-SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
; X86-SSE2-NEXT: psrld $7, %xmm0
+; X86-SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
; X86-SSE2-NEXT: retl
;
; X86-AVX1-LABEL: test_128_i32_x_4_8388352_mask_lshr_7:
; X86-AVX1: # %bb.0:
-; X86-AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0
; X86-AVX1-NEXT: vpsrld $7, %xmm0, %xmm0
+; X86-AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0
; X86-AVX1-NEXT: retl
;
; X86-AVX2-LABEL: test_128_i32_x_4_8388352_mask_lshr_7:
; X86-AVX2: # %bb.0:
-; X86-AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [8388352,8388352,8388352,8388352]
-; X86-AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
; X86-AVX2-NEXT: vpsrld $7, %xmm0, %xmm0
+; X86-AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [65534,65534,65534,65534]
+; X86-AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
; X86-AVX2-NEXT: retl
;
; X64-SSE2-LABEL: test_128_i32_x_4_8388352_mask_lshr_7:
; X64-SSE2: # %bb.0:
-; X64-SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; X64-SSE2-NEXT: psrld $7, %xmm0
+; X64-SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; X64-SSE2-NEXT: retq
;
; X64-AVX1-LABEL: test_128_i32_x_4_8388352_mask_lshr_7:
; X64-AVX1: # %bb.0:
-; X64-AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
; X64-AVX1-NEXT: vpsrld $7, %xmm0, %xmm0
+; X64-AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
; X64-AVX1-NEXT: retq
;
; X64-AVX2-LABEL: test_128_i32_x_4_8388352_mask_lshr_7:
; X64-AVX2: # %bb.0:
-; X64-AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [8388352,8388352,8388352,8388352]
-; X64-AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
; X64-AVX2-NEXT: vpsrld $7, %xmm0, %xmm0
+; X64-AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [65534,65534,65534,65534]
+; X64-AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
; X64-AVX2-NEXT: retq
%t0 = and <4 x i32> %a0, <i32 8388352, i32 8388352, i32 8388352, i32 8388352>
%t1 = lshr <4 x i32> %t0, <i32 7, i32 7, i32 7, i32 7>
@@ -1704,40 +1704,40 @@ define <4 x i32> @test_128_i32_x_4_8388352_mask_lshr_7(<4 x i32> %a0) {
define <4 x i32> @test_128_i32_x_4_8388352_mask_lshr_8(<4 x i32> %a0) {
; X86-SSE2-LABEL: test_128_i32_x_4_8388352_mask_lshr_8:
; X86-SSE2: # %bb.0:
-; X86-SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
; X86-SSE2-NEXT: psrld $8, %xmm0
+; X86-SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
; X86-SSE2-NEXT: retl
;
; X86-AVX1-LABEL: test_128_i32_x_4_8388352_mask_lshr_8:
; X86-AVX1: # %bb.0:
-; X86-AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0
; X86-AVX1-NEXT: vpsrld $8, %xmm0, %xmm0
+; X86-AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0
; X86-AVX1-NEXT: retl
;
; X86-AVX2-LABEL: test_128_i32_x_4_8388352_mask_lshr_8:
; X86-AVX2: # %bb.0:
-; X86-AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [8388352,8388352,8388352,8388352]
-; X86-AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
; X86-AVX2-NEXT: vpsrld $8, %xmm0, %xmm0
+; X86-AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [32767,32767,32767,32767]
+; X86-AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
; X86-AVX2-NEXT: retl
;
; X64-SSE2-LABEL: test_128_i32_x_4_8388352_mask_lshr_8:
; X64-SSE2: # %bb.0:
-; X64-SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; X64-SSE2-NEXT: psrld $8, %xmm0
+; X64-SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; X64-SSE2-NEXT: retq
;
; X64-AVX1-LABEL: test_128_i32_x_4_8388352_mask_lshr_8:
; X64-AVX1: # %bb.0:
-; X64-AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
; X64-AVX1-NEXT: vpsrld $8, %xmm0, %xmm0
+; X64-AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
; X64-AVX1-NEXT: retq
;
; X64-AVX2-LABEL: test_128_i32_x_4_8388352_mask_lshr_8:
; X64-AVX2: # %bb.0:
-; X64-AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [8388352,8388352,8388352,8388352]
-; X64-AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
; X64-AVX2-NEXT: vpsrld $8, %xmm0, %xmm0
+; X64-AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [32767,32767,32767,32767]
+; X64-AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
; X64-AVX2-NEXT: retq
%t0 = and <4 x i32> %a0, <i32 8388352, i32 8388352, i32 8388352, i32 8388352>
%t1 = lshr <4 x i32> %t0, <i32 8, i32 8, i32 8, i32 8>
@@ -1746,40 +1746,40 @@ define <4 x i32> @test_128_i32_x_4_8388352_mask_lshr_8(<4 x i32> %a0) {
define <4 x i32> @test_128_i32_x_4_8388352_mask_lshr_9(<4 x i32> %a0) {
; X86-SSE2-LABEL: test_128_i32_x_4_8388352_mask_lshr_9:
; X86-SSE2: # %bb.0:
-; X86-SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
; X86-SSE2-NEXT: psrld $9, %xmm0
+; X86-SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
; X86-SSE2-NEXT: retl
;
; X86-AVX1-LABEL: test_128_i32_x_4_8388352_mask_lshr_9:
; X86-AVX1: # %bb.0:
-; X86-AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0
; X86-AVX1-NEXT: vpsrld $9, %xmm0, %xmm0
+; X86-AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0
; X86-AVX1-NEXT: retl
;
; X86-AVX2-LABEL: test_128_i32_x_4_8388352_mask_lshr_9:
; X86-AVX2: # %bb.0:
-; X86-AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [8388352,8388352,8388352,8388352]
-; X86-AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
; X86-AVX2-NEXT: vpsrld $9, %xmm0, %xmm0
+; X86-AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [16383,16383,16383,16383]
+; X86-AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
; X86-AVX2-NEXT: retl
;
; X64-SSE2-LABEL: test_128_i32_x_4_8388352_mask_lshr_9:
; X64-SSE2: # %bb.0:
-; X64-SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; X64-SSE2-NEXT: psrld $9, %xmm0
+; X64-SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; X64-SSE2-NEXT: retq
;
; X64-AVX1-LABEL: test_128_i32_x_4_8388352_mask_lshr_9:
; X64-AVX1: # %bb.0:
-; X64-AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
; X64-AVX1-NEXT: vpsrld $9, %xmm0, %xmm0
+; X64-AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
; X64-AVX1-NEXT: retq
;
; X64-AVX2-LABEL: test_128_i32_x_4_8388352_mask_lshr_9:
; X64-AVX2: # %bb.0:
-; X64-AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [8388352,8388352,8388352,8388352]
-; X64-AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
; X64-AVX2-NEXT: vpsrld $9, %xmm0, %xmm0
+; X64-AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [16383,16383,16383,16383]
+; X64-AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
; X64-AVX2-NEXT: retq
%t0 = and <4 x i32> %a0, <i32 8388352, i32 8388352, i32 8388352, i32 8388352>
%t1 = lshr <4 x i32> %t0, <i32 9, i32 9, i32 9, i32 9>
@@ -1788,40 +1788,40 @@ define <4 x i32> @test_128_i32_x_4_8388352_mask_lshr_9(<4 x i32> %a0) {
define <4 x i32> @test_128_i32_x_4_8388352_mask_lshr_10(<4 x i32> %a0) {
; X86-SSE2-LABEL: test_128_i32_x_4_8388352_mask_lshr_10:
; X86-SSE2: # %bb.0:
-; X86-SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
; X86-SSE2-NEXT: psrld $10, %xmm0
+; X86-SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
; X86-SSE2-NEXT: retl
;
; X86-AVX1-LABEL: test_128_i32_x_4_8388352_mask_lshr_10:
; X86-AVX1: # %bb.0:
-; X86-AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0
; X86-AVX1-NEXT: vpsrld $10, %xmm0, %xmm0
+; X86-AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0
; X86-AVX1-NEXT: retl
;
; X86-AVX2-LABEL: test_128_i32_x_4_8388352_mask_lshr_10:
; X86-AVX2: # %bb.0:
-; X86-AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [8388352,8388352,8388352,8388352]
-; X86-AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
; X86-AVX2-NEXT: vpsrld $10, %xmm0, %xmm0
+; X86-AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [8191,8191,8191,8191]
+; X86-AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
; X86-AVX2-NEXT: retl
;
; X64-SSE2-LABEL: test_128_i32_x_4_8388352_mask_lshr_10:
; X64-SSE2: # %bb.0:
-; X64-SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; X64-SSE2-NEXT: psrld $10, %xmm0
+; X64-SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; X64-SSE2-NEXT: retq
;
; X64-AVX1-LABEL: test_128_i32_x_4_8388352_mask_lshr_10:
; X64-AVX1: # %bb.0:
-; X64-AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
; X64-AVX1-NEXT: vpsrld $10, %xmm0, %xmm0
+; X64-AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
; X64-AVX1-NEXT: retq
;
; X64-AVX2-LABEL: test_128_i32_x_4_8388352_mask_lshr_10:
; X64-AVX2: # %bb.0:
-; X64-AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [8388352,8388352,8388352,8388352]
-; X64-AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
; X64-AVX2-NEXT: vpsrld $10, %xmm0, %xmm0
+; X64-AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [8191,8191,8191,8191]
+; X64-AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
; X64-AVX2-NEXT: retq
%t0 = and <4 x i32> %a0, <i32 8388352, i32 8388352, i32 8388352, i32 8388352>
%t1 = lshr <4 x i32> %t0, <i32 10, i32 10, i32 10, i32 10>
@@ -1831,40 +1831,40 @@ define <4 x i32> @test_128_i32_x_4_8388352_mask_lshr_10(<4 x i32> %a0) {
define <4 x i32> @test_128_i32_x_4_4294836224_mask_lshr_1(<4 x i32> %a0) {
; X86-SSE2-LABEL: test_128_i32_x_4_4294836224_mask_lshr_1:
; X86-SSE2: # %bb.0:
-; X86-SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
; X86-SSE2-NEXT: psrld $1, %xmm0
+; X86-SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
; X86-SSE2-NEXT: retl
;
; X86-AVX1-LABEL: test_128_i32_x_4_4294836224_mask_lshr_1:
; X86-AVX1: # %bb.0:
-; X86-AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0
; X86-AVX1-NEXT: vpsrld $1, %xmm0, %xmm0
+; X86-AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0
; X86-AVX1-NEXT: retl
;
; X86-AVX2-LABEL: test_128_i32_x_4_4294836224_mask_lshr_1:
; X86-AVX2: # %bb.0:
-; X86-AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [4294836224,4294836224,4294836224,4294836224]
-; X86-AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
; X86-AVX2-NEXT: vpsrld $1, %xmm0, %xmm0
+; X86-AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [2147418112,2147418112,2147418112,2147418112]
+; X86-AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
; X86-AVX2-NEXT: retl
;
; X64-SSE2-LABEL: test_128_i32_x_4_4294836224_mask_lshr_1:
; X64-SSE2: # %bb.0:
-; X64-SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; X64-SSE2-NEXT: psrld $1, %xmm0
+; X64-SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; X64-SSE2-NEXT: retq
;
; X64-AVX1-LABEL: test_128_i32_x_4_4294836224_mask_lshr_1:
; X64-AVX1: # %bb.0:
-; X64-AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
; X64-AVX1-NEXT: vpsrld $1, %xmm0, %xmm0
+; X64-AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
; X64-AVX1-NEXT: retq
;
; X64-AVX2-LABEL: test_128_i32_x_4_4294836224_mask_lshr_1:
; X64-AVX2: # %bb.0:
-; X64-AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [4294836224,4294836224,4294836224,4294836224]
-; X64-AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
; X64-AVX2-NEXT: vpsrld $1, %xmm0, %xmm0
+; X64-AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [2147418112,2147418112,2147418112,2147418112]
+; X64-AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
; X64-AVX2-NEXT: retq
%t0 = and <4 x i32> %a0, <i32 4294836224, i32 4294836224, i32 4294836224, i32 4294836224>
%t1 = lshr <4 x i32> %t0, <i32 1, i32 1, i32 1, i32 1>
@@ -1873,40 +1873,40 @@ define <4 x i32> @test_128_i32_x_4_4294836224_mask_lshr_1(<4 x i32> %a0) {
define <4 x i32> @test_128_i32_x_4_4294836224_mask_lshr_16(<4 x i32> %a0) {
; X86-SSE2-LABEL: test_128_i32_x_4_4294836224_mask_lshr_16:
; X86-SSE2: # %bb.0:
-; X86-SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
; X86-SSE2-NEXT: psrld $16, %xmm0
+; X86-SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
; X86-SSE2-NEXT: retl
;
; X86-AVX1-LABEL: test_128_i32_x_4_4294836224_mask_lshr_16:
; X86-AVX1: # %bb.0:
-; X86-AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0
; X86-AVX1-NEXT: vpsrld $16, %xmm0, %xmm0
+; X86-AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0
; X86-AVX1-NEXT: retl
;
; X86-AVX2-LABEL: test_128_i32_x_4_4294836224_mask_lshr_16:
; X86-AVX2: # %bb.0:
-; X86-AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [4294836224,4294836224,4294836224,4294836224]
-; X86-AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
; X86-AVX2-NEXT: vpsrld $16, %xmm0, %xmm0
+; X86-AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [65534,65534,65534,65534]
+; X86-AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
; X86-AVX2-NEXT: retl
;
; X64-SSE2-LABEL: test_128_i32_x_4_4294836224_mask_lshr_16:
; X64-SSE2: # %bb.0:
-; X64-SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; X64-SSE2-NEXT: psrld $16, %xmm0
+; X64-SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; X64-SSE2-NEXT: retq
;
; X64-AVX1-LABEL: test_128_i32_x_4_4294836224_mask_lshr_16:
; X64-AVX1: # %bb.0:
-; X64-AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
; X64-AVX1-NEXT: vpsrld $16, %xmm0, %xmm0
+; X64-AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
; X64-AVX1-NEXT: retq
;
; X64-AVX2-LABEL: test_128_i32_x_4_4294836224_mask_lshr_16:
; X64-AVX2: # %bb.0:
-; X64-AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [4294836224,4294836224,4294836224,4294836224]
-; X64-AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
; X64-AVX2-NEXT: vpsrld $16, %xmm0, %xmm0
+; X64-AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [65534,65534,65534,65534]
+; X64-AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
; X64-AVX2-NEXT: retq
%t0 = and <4 x i32> %a0, <i32 4294836224, i32 4294836224, i32 4294836224, i32 4294836224>
%t1 = lshr <4 x i32> %t0, <i32 16, i32 16, i32 16, i32 16>
@@ -1966,40 +1966,40 @@ define <4 x i32> @test_128_i32_x_4_4294836224_mask_lshr_18(<4 x i32> %a0) {
define <4 x i32> @test_128_i32_x_4_32767_mask_ashr_1(<4 x i32> %a0) {
; X86-SSE2-LABEL: test_128_i32_x_4_32767_mask_ashr_1:
; X86-SSE2: # %bb.0:
-; X86-SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
; X86-SSE2-NEXT: psrld $1, %xmm0
+; X86-SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
; X86-SSE2-NEXT: retl
;
; X86-AVX1-LABEL: test_128_i32_x_4_32767_mask_ashr_1:
; X86-AVX1: # %bb.0:
-; X86-AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0
; X86-AVX1-NEXT: vpsrld $1, %xmm0, %xmm0
+; X86-AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0
; X86-AVX1-NEXT: retl
;
; X86-AVX2-LABEL: test_128_i32_x_4_32767_mask_ashr_1:
; X86-AVX2: # %bb.0:
-; X86-AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [32767,32767,32767,32767]
-; X86-AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
; X86-AVX2-NEXT: vpsrld $1, %xmm0, %xmm0
+; X86-AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [16383,16383,16383,16383]
+; X86-AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
; X86-AVX2-NEXT: retl
;
; X64-SSE2-LABEL: test_128_i32_x_4_32767_mask_ashr_1:
; X64-SSE2: # %bb.0:
-; X64-SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; X64-SSE2-NEXT: psrld $1, %xmm0
+; X64-SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; X64-SSE2-NEXT: retq
;
; X64-AVX1-LABEL: test_128_i32_x_4_32767_mask_ashr_1:
; X64-AVX1: # %bb.0:
-; X64-AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
; X64-AVX1-NEXT: vpsrld $1, %xmm0, %xmm0
+; X64-AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
; X64-AVX1-NEXT: retq
;
; X64-AVX2-LABEL: test_128_i32_x_4_32767_mask_ashr_1:
; X64-AVX2: # %bb.0:
-; X64-AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [32767,32767,32767,32767]
-; X64-AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
; X64-AVX2-NEXT: vpsrld $1, %xmm0, %xmm0
+; X64-AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [16383,16383,16383,16383]
+; X64-AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
; X64-AVX2-NEXT: retq
%t0 = and <4 x i32> %a0, <i32 32767, i32 32767, i32 32767, i32 32767>
%t1 = ashr <4 x i32> %t0, <i32 1, i32 1, i32 1, i32 1>
@@ -2009,40 +2009,40 @@ define <4 x i32> @test_128_i32_x_4_32767_mask_ashr_1(<4 x i32> %a0) {
define <4 x i32> @test_128_i32_x_4_8388352_mask_ashr_7(<4 x i32> %a0) {
; X86-SSE2-LABEL: test_128_i32_x_4_8388352_mask_ashr_7:
; X86-SSE2: # %bb.0:
-; X86-SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
; X86-SSE2-NEXT: psrld $7, %xmm0
+; X86-SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
; X86-SSE2-NEXT: retl
;
; X86-AVX1-LABEL: test_128_i32_x_4_8388352_mask_ashr_7:
; X86-AVX1: # %bb.0:
-; X86-AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0
; X86-AVX1-NEXT: vpsrld $7, %xmm0, %xmm0
+; X86-AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0
; X86-AVX1-NEXT: retl
;
; X86-AVX2-LABEL: test_128_i32_x_4_8388352_mask_ashr_7:
; X86-AVX2: # %bb.0:
-; X86-AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [8388352,8388352,8388352,8388352]
-; X86-AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
; X86-AVX2-NEXT: vpsrld $7, %xmm0, %xmm0
+; X86-AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [65534,65534,65534,65534]
+; X86-AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
; X86-AVX2-NEXT: retl
;
; X64-SSE2-LABEL: test_128_i32_x_4_8388352_mask_ashr_7:
; X64-SSE2: # %bb.0:
-; X64-SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; X64-SSE2-NEXT: psrld $7, %xmm0
+; X64-SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; X64-SSE2-NEXT: retq
;
; X64-AVX1-LABEL: test_128_i32_x_4_8388352_mask_ashr_7:
; X64-AVX1: # %bb.0:
-; X64-AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
; X64-AVX1-NEXT: vpsrld $7, %xmm0, %xmm0
+; X64-AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
; X64-AVX1-NEXT: retq
;
; X64-AVX2-LABEL: test_128_i32_x_4_8388352_mask_ashr_7:
; X64-AVX2: # %bb.0:
-; X64-AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [8388352,8388352,8388352,8388352]
-; X64-AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
; X64-AVX2-NEXT: vpsrld $7, %xmm0, %xmm0
+; X64-AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [65534,65534,65534,65534]
+; X64-AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
; X64-AVX2-NEXT: retq
%t0 = and <4 x i32> %a0, <i32 8388352, i32 8388352, i32 8388352, i32 8388352>
%t1 = ashr <4 x i32> %t0, <i32 7, i32 7, i32 7, i32 7>
@@ -2051,40 +2051,40 @@ define <4 x i32> @test_128_i32_x_4_8388352_mask_ashr_7(<4 x i32> %a0) {
define <4 x i32> @test_128_i32_x_4_8388352_mask_ashr_8(<4 x i32> %a0) {
; X86-SSE2-LABEL: test_128_i32_x_4_8388352_mask_ashr_8:
; X86-SSE2: # %bb.0:
-; X86-SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
; X86-SSE2-NEXT: psrld $8, %xmm0
+; X86-SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
; X86-SSE2-NEXT: retl
;
; X86-AVX1-LABEL: test_128_i32_x_4_8388352_mask_ashr_8:
; X86-AVX1: # %bb.0:
-; X86-AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0
; X86-AVX1-NEXT: vpsrld $8, %xmm0, %xmm0
+; X86-AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0
; X86-AVX1-NEXT: retl
;
; X86-AVX2-LABEL: test_128_i32_x_4_8388352_mask_ashr_8:
; X86-AVX2: # %bb.0:
-; X86-AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [8388352,8388352,8388352,8388352]
-; X86-AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
; X86-AVX2-NEXT: vpsrld $8, %xmm0, %xmm0
+; X86-AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [32767,32767,32767,32767]
+; X86-AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
; X86-AVX2-NEXT: retl
;
; X64-SSE2-LABEL: test_128_i32_x_4_8388352_mask_ashr_8:
; X64-SSE2: # %bb.0:
-; X64-SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; X64-SSE2-NEXT: psrld $8, %xmm0
+; X64-SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; X64-SSE2-NEXT: retq
;
; X64-AVX1-LABEL: test_128_i32_x_4_8388352_mask_ashr_8:
; X64-AVX1: # %bb.0:
-; X64-AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
; X64-AVX1-NEXT: vpsrld $8, %xmm0, %xmm0
+; X64-AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
; X64-AVX1-NEXT: retq
;
; X64-AVX2-LABEL: test_128_i32_x_4_8388352_mask_ashr_8:
; X64-AVX2: # %bb.0:
-; X64-AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [8388352,8388352,8388352,8388352]
-; X64-AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
; X64-AVX2-NEXT: vpsrld $8, %xmm0, %xmm0
+; X64-AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [32767,32767,32767,32767]
+; X64-AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
; X64-AVX2-NEXT: retq
%t0 = and <4 x i32> %a0, <i32 8388352, i32 8388352, i32 8388352, i32 8388352>
%t1 = ashr <4 x i32> %t0, <i32 8, i32 8, i32 8, i32 8>
@@ -2093,40 +2093,40 @@ define <4 x i32> @test_128_i32_x_4_8388352_mask_ashr_8(<4 x i32> %a0) {
define <4 x i32> @test_128_i32_x_4_8388352_mask_ashr_9(<4 x i32> %a0) {
; X86-SSE2-LABEL: test_128_i32_x_4_8388352_mask_ashr_9:
; X86-SSE2: # %bb.0:
-; X86-SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
; X86-SSE2-NEXT: psrld $9, %xmm0
+; X86-SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
; X86-SSE2-NEXT: retl
;
; X86-AVX1-LABEL: test_128_i32_x_4_8388352_mask_ashr_9:
; X86-AVX1: # %bb.0:
-; X86-AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0
; X86-AVX1-NEXT: vpsrld $9, %xmm0, %xmm0
+; X86-AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0
; X86-AVX1-NEXT: retl
;
; X86-AVX2-LABEL: test_128_i32_x_4_8388352_mask_ashr_9:
; X86-AVX2: # %bb.0:
-; X86-AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [8388352,8388352,8388352,8388352]
-; X86-AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
; X86-AVX2-NEXT: vpsrld $9, %xmm0, %xmm0
+; X86-AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [16383,16383,16383,16383]
+; X86-AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
; X86-AVX2-NEXT: retl
;
; X64-SSE2-LABEL: test_128_i32_x_4_8388352_mask_ashr_9:
; X64-SSE2: # %bb.0:
-; X64-SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; X64-SSE2-NEXT: psrld $9, %xmm0
+; X64-SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; X64-SSE2-NEXT: retq
;
; X64-AVX1-LABEL: test_128_i32_x_4_8388352_mask_ashr_9:
; X64-AVX1: # %bb.0:
-; X64-AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
; X64-AVX1-NEXT: vpsrld $9, %xmm0, %xmm0
+; X64-AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
; X64-AVX1-NEXT: retq
;
; X64-AVX2-LABEL: test_128_i32_x_4_8388352_mask_ashr_9:
; X64-AVX2: # %bb.0:
-; X64-AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [8388352,8388352,8388352,8388352]
-; X64-AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
; X64-AVX2-NEXT: vpsrld $9, %xmm0, %xmm0
+; X64-AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [16383,16383,16383,16383]
+; X64-AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
; X64-AVX2-NEXT: retq
%t0 = and <4 x i32> %a0, <i32 8388352, i32 8388352, i32 8388352, i32 8388352>
%t1 = ashr <4 x i32> %t0, <i32 9, i32 9, i32 9, i32 9>
@@ -2135,40 +2135,40 @@ define <4 x i32> @test_128_i32_x_4_8388352_mask_ashr_9(<4 x i32> %a0) {
define <4 x i32> @test_128_i32_x_4_8388352_mask_ashr_10(<4 x i32> %a0) {
; X86-SSE2-LABEL: test_128_i32_x_4_8388352_mask_ashr_10:
; X86-SSE2: # %bb.0:
-; X86-SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
; X86-SSE2-NEXT: psrld $10, %xmm0
+; X86-SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
; X86-SSE2-NEXT: retl
;
; X86-AVX1-LABEL: test_128_i32_x_4_8388352_mask_ashr_10:
; X86-AVX1: # %bb.0:
-; X86-AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0
; X86-AVX1-NEXT: vpsrld $10, %xmm0, %xmm0
+; X86-AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0
; X86-AVX1-NEXT: retl
;
; X86-AVX2-LABEL: test_128_i32_x_4_8388352_mask_ashr_10:
; X86-AVX2: # %bb.0:
-; X86-AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [8388352,8388352,8388352,8388352]
-; X86-AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
; X86-AVX2-NEXT: vpsrld $10, %xmm0, %xmm0
+; X86-AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [8191,8191,8191,8191]
+; X86-AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
; X86-AVX2-NEXT: retl
;
; X64-SSE2-LABEL: test_128_i32_x_4_8388352_mask_ashr_10:
; X64-SSE2: # %bb.0:
-; X64-SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; X64-SSE2-NEXT: psrld $10, %xmm0
+; X64-SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; X64-SSE2-NEXT: retq
;
; X64-AVX1-LABEL: test_128_i32_x_4_8388352_mask_ashr_10:
; X64-AVX1: # %bb.0:
-; X64-AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
; X64-AVX1-NEXT: vpsrld $10, %xmm0, %xmm0
+; X64-AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
; X64-AVX1-NEXT: retq
;
; X64-AVX2-LABEL: test_128_i32_x_4_8388352_mask_ashr_10:
; X64-AVX2: # %bb.0:
-; X64-AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [8388352,8388352,8388352,8388352]
-; X64-AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
; X64-AVX2-NEXT: vpsrld $10, %xmm0, %xmm0
+; X64-AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [8191,8191,8191,8191]
+; X64-AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
; X64-AVX2-NEXT: retq
%t0 = and <4 x i32> %a0, <i32 8388352, i32 8388352, i32 8388352, i32 8388352>
%t1 = ashr <4 x i32> %t0, <i32 10, i32 10, i32 10, i32 10>
@@ -2178,41 +2178,29 @@ define <4 x i32> @test_128_i32_x_4_8388352_mask_ashr_10(<4 x i32> %a0) {
define <4 x i32> @test_128_i32_x_4_4294836224_mask_ashr_1(<4 x i32> %a0) {
; X86-SSE2-LABEL: test_128_i32_x_4_4294836224_mask_ashr_1:
; X86-SSE2: # %bb.0:
-; X86-SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
; X86-SSE2-NEXT: psrad $1, %xmm0
+; X86-SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
; X86-SSE2-NEXT: retl
;
-; X86-AVX1-LABEL: test_128_i32_x_4_4294836224_mask_ashr_1:
-; X86-AVX1: # %bb.0:
-; X86-AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0
-; X86-AVX1-NEXT: vpsrad $1, %xmm0, %xmm0
-; X86-AVX1-NEXT: retl
-;
-; X86-AVX2-LABEL: test_128_i32_x_4_4294836224_mask_ashr_1:
-; X86-AVX2: # %bb.0:
-; X86-AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [4294836224,4294836224,4294836224,4294836224]
-; X86-AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
-; X86-AVX2-NEXT: vpsrad $1, %xmm0, %xmm0
-; X86-AVX2-NEXT: retl
+; X86-AVX-LABEL: test_128_i32_x_4_4294836224_mask_ashr_1:
+; X86-AVX: # %bb.0:
+; X86-AVX-NEXT: vpsrad $1, %xmm0, %xmm0
+; X86-AVX-NEXT: vpxor %xmm1, %xmm1, %xmm1
+; X86-AVX-NEXT: vpblendw {{.*#+}} xmm0 = xmm1[0],xmm0[1],xmm1[2],xmm0[3],xmm1[4],xmm0[5],xmm1[6],xmm0[7]
+; X86-AVX-NEXT: retl
;
; X64-SSE2-LABEL: test_128_i32_x_4_4294836224_mask_ashr_1:
; X64-SSE2: # %bb.0:
-; X64-SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; X64-SSE2-NEXT: psrad $1, %xmm0
+; X64-SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; X64-SSE2-NEXT: retq
;
-; X64-AVX1-LABEL: test_128_i32_x_4_4294836224_mask_ashr_1:
-; X64-AVX1: # %bb.0:
-; X64-AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
-; X64-AVX1-NEXT: vpsrad $1, %xmm0, %xmm0
-; X64-AVX1-NEXT: retq
-;
-; X64-AVX2-LABEL: test_128_i32_x_4_4294836224_mask_ashr_1:
-; X64-AVX2: # %bb.0:
-; X64-AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [4294836224,4294836224,4294836224,4294836224]
-; X64-AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
-; X64-AVX2-NEXT: vpsrad $1, %xmm0, %xmm0
-; X64-AVX2-NEXT: retq
+; X64-AVX-LABEL: test_128_i32_x_4_4294836224_mask_ashr_1:
+; X64-AVX: # %bb.0:
+; X64-AVX-NEXT: vpsrad $1, %xmm0, %xmm0
+; X64-AVX-NEXT: vpxor %xmm1, %xmm1, %xmm1
+; X64-AVX-NEXT: vpblendw {{.*#+}} xmm0 = xmm1[0],xmm0[1],xmm1[2],xmm0[3],xmm1[4],xmm0[5],xmm1[6],xmm0[7]
+; X64-AVX-NEXT: retq
%t0 = and <4 x i32> %a0, <i32 4294836224, i32 4294836224, i32 4294836224, i32 4294836224>
%t1 = ashr <4 x i32> %t0, <i32 1, i32 1, i32 1, i32 1>
ret <4 x i32> %t1
@@ -2220,40 +2208,40 @@ define <4 x i32> @test_128_i32_x_4_4294836224_mask_ashr_1(<4 x i32> %a0) {
define <4 x i32> @test_128_i32_x_4_4294836224_mask_ashr_16(<4 x i32> %a0) {
; X86-SSE2-LABEL: test_128_i32_x_4_4294836224_mask_ashr_16:
; X86-SSE2: # %bb.0:
-; X86-SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
; X86-SSE2-NEXT: psrad $16, %xmm0
+; X86-SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
; X86-SSE2-NEXT: retl
;
; X86-AVX1-LABEL: test_128_i32_x_4_4294836224_mask_ashr_16:
; X86-AVX1: # %bb.0:
-; X86-AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0
; X86-AVX1-NEXT: vpsrad $16, %xmm0, %xmm0
+; X86-AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0
; X86-AVX1-NEXT: retl
;
; X86-AVX2-LABEL: test_128_i32_x_4_4294836224_mask_ashr_16:
; X86-AVX2: # %bb.0:
-; X86-AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [4294836224,4294836224,4294836224,4294836224]
-; X86-AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
; X86-AVX2-NEXT: vpsrad $16, %xmm0, %xmm0
+; X86-AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [4294967294,4294967294,4294967294,4294967294]
+; X86-AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
; X86-AVX2-NEXT: retl
;
; X64-SSE2-LABEL: test_128_i32_x_4_4294836224_mask_ashr_16:
; X64-SSE2: # %bb.0:
-; X64-SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; X64-SSE2-NEXT: psrad $16, %xmm0
+; X64-SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; X64-SSE2-NEXT: retq
;
; X64-AVX1-LABEL: test_128_i32_x_4_4294836224_mask_ashr_16:
; X64-AVX1: # %bb.0:
-; X64-AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
; X64-AVX1-NEXT: vpsrad $16, %xmm0, %xmm0
+; X64-AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
; X64-AVX1-NEXT: retq
;
; X64-AVX2-LABEL: test_128_i32_x_4_4294836224_mask_ashr_16:
; X64-AVX2: # %bb.0:
-; X64-AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [4294836224,4294836224,4294836224,4294836224]
-; X64-AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
; X64-AVX2-NEXT: vpsrad $16, %xmm0, %xmm0
+; X64-AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [4294967294,4294967294,4294967294,4294967294]
+; X64-AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
; X64-AVX2-NEXT: retq
%t0 = and <4 x i32> %a0, <i32 4294836224, i32 4294836224, i32 4294836224, i32 4294836224>
%t1 = ashr <4 x i32> %t0, <i32 16, i32 16, i32 16, i32 16>
@@ -2355,40 +2343,40 @@ define <4 x i32> @test_128_i32_x_4_32767_mask_shl_1(<4 x i32> %a0) {
define <4 x i32> @test_128_i32_x_4_32767_mask_shl_16(<4 x i32> %a0) {
; X86-SSE2-LABEL: test_128_i32_x_4_32767_mask_shl_16:
; X86-SSE2: # %bb.0:
-; X86-SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
; X86-SSE2-NEXT: pslld $16, %xmm0
+; X86-SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
; X86-SSE2-NEXT: retl
;
; X86-AVX1-LABEL: test_128_i32_x_4_32767_mask_shl_16:
; X86-AVX1: # %bb.0:
-; X86-AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0
; X86-AVX1-NEXT: vpslld $16, %xmm0, %xmm0
+; X86-AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0
; X86-AVX1-NEXT: retl
;
; X86-AVX2-LABEL: test_128_i32_x_4_32767_mask_shl_16:
; X86-AVX2: # %bb.0:
-; X86-AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [32767,32767,32767,32767]
-; X86-AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
; X86-AVX2-NEXT: vpslld $16, %xmm0, %xmm0
+; X86-AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [2147418112,2147418112,2147418112,2147418112]
+; X86-AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
; X86-AVX2-NEXT: retl
;
; X64-SSE2-LABEL: test_128_i32_x_4_32767_mask_shl_16:
; X64-SSE2: # %bb.0:
-; X64-SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; X64-SSE2-NEXT: pslld $16, %xmm0
+; X64-SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; X64-SSE2-NEXT: retq
;
; X64-AVX1-LABEL: test_128_i32_x_4_32767_mask_shl_16:
; X64-AVX1: # %bb.0:
-; X64-AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
; X64-AVX1-NEXT: vpslld $16, %xmm0, %xmm0
+; X64-AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
; X64-AVX1-NEXT: retq
;
; X64-AVX2-LABEL: test_128_i32_x_4_32767_mask_shl_16:
; X64-AVX2: # %bb.0:
-; X64-AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [32767,32767,32767,32767]
-; X64-AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
; X64-AVX2-NEXT: vpslld $16, %xmm0, %xmm0
+; X64-AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [2147418112,2147418112,2147418112,2147418112]
+; X64-AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
; X64-AVX2-NEXT: retq
%t0 = and <4 x i32> %a0, <i32 32767, i32 32767, i32 32767, i32 32767>
%t1 = shl <4 x i32> %t0, <i32 16, i32 16, i32 16, i32 16>
@@ -2446,40 +2434,40 @@ define <4 x i32> @test_128_i32_x_4_32767_mask_shl_18(<4 x i32> %a0) {
define <4 x i32> @test_128_i32_x_4_8388352_mask_shl_7(<4 x i32> %a0) {
; X86-SSE2-LABEL: test_128_i32_x_4_8388352_mask_shl_7:
; X86-SSE2: # %bb.0:
-; X86-SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
; X86-SSE2-NEXT: pslld $7, %xmm0
+; X86-SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
; X86-SSE2-NEXT: retl
;
; X86-AVX1-LABEL: test_128_i32_x_4_8388352_mask_shl_7:
; X86-AVX1: # %bb.0:
-; X86-AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0
; X86-AVX1-NEXT: vpslld $7, %xmm0, %xmm0
+; X86-AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0
; X86-AVX1-NEXT: retl
;
; X86-AVX2-LABEL: test_128_i32_x_4_8388352_mask_shl_7:
; X86-AVX2: # %bb.0:
-; X86-AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [8388352,8388352,8388352,8388352]
-; X86-AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
; X86-AVX2-NEXT: vpslld $7, %xmm0, %xmm0
+; X86-AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [1073709056,1073709056,1073709056,1073709056]
+; X86-AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
; X86-AVX2-NEXT: retl
;
; X64-SSE2-LABEL: test_128_i32_x_4_8388352_mask_shl_7:
; X64-SSE2: # %bb.0:
-; X64-SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; X64-SSE2-NEXT: pslld $7, %xmm0
+; X64-SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; X64-SSE2-NEXT: retq
;
; X64-AVX1-LABEL: test_128_i32_x_4_8388352_mask_shl_7:
; X64-AVX1: # %bb.0:
-; X64-AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
; X64-AVX1-NEXT: vpslld $7, %xmm0, %xmm0
+; X64-AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
; X64-AVX1-NEXT: retq
;
; X64-AVX2-LABEL: test_128_i32_x_4_8388352_mask_shl_7:
; X64-AVX2: # %bb.0:
-; X64-AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [8388352,8388352,8388352,8388352]
-; X64-AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
; X64-AVX2-NEXT: vpslld $7, %xmm0, %xmm0
+; X64-AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [1073709056,1073709056,1073709056,1073709056]
+; X64-AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
; X64-AVX2-NEXT: retq
%t0 = and <4 x i32> %a0, <i32 8388352, i32 8388352, i32 8388352, i32 8388352>
%t1 = shl <4 x i32> %t0, <i32 7, i32 7, i32 7, i32 7>
@@ -2488,40 +2476,40 @@ define <4 x i32> @test_128_i32_x_4_8388352_mask_shl_7(<4 x i32> %a0) {
define <4 x i32> @test_128_i32_x_4_8388352_mask_shl_8(<4 x i32> %a0) {
; X86-SSE2-LABEL: test_128_i32_x_4_8388352_mask_shl_8:
; X86-SSE2: # %bb.0:
-; X86-SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
; X86-SSE2-NEXT: pslld $8, %xmm0
+; X86-SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
; X86-SSE2-NEXT: retl
;
; X86-AVX1-LABEL: test_128_i32_x_4_8388352_mask_shl_8:
; X86-AVX1: # %bb.0:
-; X86-AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0
; X86-AVX1-NEXT: vpslld $8, %xmm0, %xmm0
+; X86-AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0
; X86-AVX1-NEXT: retl
;
; X86-AVX2-LABEL: test_128_i32_x_4_8388352_mask_shl_8:
; X86-AVX2: # %bb.0:
-; X86-AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [8388352,8388352,8388352,8388352]
-; X86-AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
; X86-AVX2-NEXT: vpslld $8, %xmm0, %xmm0
+; X86-AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [2147418112,2147418112,2147418112,2147418112]
+; X86-AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
; X86-AVX2-NEXT: retl
;
; X64-SSE2-LABEL: test_128_i32_x_4_8388352_mask_shl_8:
; X64-SSE2: # %bb.0:
-; X64-SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; X64-SSE2-NEXT: pslld $8, %xmm0
+; X64-SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; X64-SSE2-NEXT: retq
;
; X64-AVX1-LABEL: test_128_i32_x_4_8388352_mask_shl_8:
; X64-AVX1: # %bb.0:
-; X64-AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
; X64-AVX1-NEXT: vpslld $8, %xmm0, %xmm0
+; X64-AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
; X64-AVX1-NEXT: retq
;
; X64-AVX2-LABEL: test_128_i32_x_4_8388352_mask_shl_8:
; X64-AVX2: # %bb.0:
-; X64-AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [8388352,8388352,8388352,8388352]
-; X64-AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
; X64-AVX2-NEXT: vpslld $8, %xmm0, %xmm0
+; X64-AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [2147418112,2147418112,2147418112,2147418112]
+; X64-AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
; X64-AVX2-NEXT: retq
%t0 = and <4 x i32> %a0, <i32 8388352, i32 8388352, i32 8388352, i32 8388352>
%t1 = shl <4 x i32> %t0, <i32 8, i32 8, i32 8, i32 8>
@@ -2530,40 +2518,40 @@ define <4 x i32> @test_128_i32_x_4_8388352_mask_shl_8(<4 x i32> %a0) {
define <4 x i32> @test_128_i32_x_4_8388352_mask_shl_9(<4 x i32> %a0) {
; X86-SSE2-LABEL: test_128_i32_x_4_8388352_mask_shl_9:
; X86-SSE2: # %bb.0:
-; X86-SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
; X86-SSE2-NEXT: pslld $9, %xmm0
+; X86-SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
; X86-SSE2-NEXT: retl
;
; X86-AVX1-LABEL: test_128_i32_x_4_8388352_mask_shl_9:
; X86-AVX1: # %bb.0:
-; X86-AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0
; X86-AVX1-NEXT: vpslld $9, %xmm0, %xmm0
+; X86-AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0
; X86-AVX1-NEXT: retl
;
; X86-AVX2-LABEL: test_128_i32_x_4_8388352_mask_shl_9:
; X86-AVX2: # %bb.0:
-; X86-AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [8388352,8388352,8388352,8388352]
-; X86-AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
; X86-AVX2-NEXT: vpslld $9, %xmm0, %xmm0
+; X86-AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [4294836224,4294836224,4294836224,4294836224]
+; X86-AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
; X86-AVX2-NEXT: retl
;
; X64-SSE2-LABEL: test_128_i32_x_4_8388352_mask_shl_9:
; X64-SSE2: # %bb.0:
-; X64-SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; X64-SSE2-NEXT: pslld $9, %xmm0
+; X64-SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; X64-SSE2-NEXT: retq
;
; X64-AVX1-LABEL: test_128_i32_x_4_8388352_mask_shl_9:
; X64-AVX1: # %bb.0:
-; X64-AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
; X64-AVX1-NEXT: vpslld $9, %xmm0, %xmm0
+; X64-AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
; X64-AVX1-NEXT: retq
;
; X64-AVX2-LABEL: test_128_i32_x_4_8388352_mask_shl_9:
; X64-AVX2: # %bb.0:
-; X64-AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [8388352,8388352,8388352,8388352]
-; X64-AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
; X64-AVX2-NEXT: vpslld $9, %xmm0, %xmm0
+; X64-AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [4294836224,4294836224,4294836224,4294836224]
+; X64-AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
; X64-AVX2-NEXT: retq
%t0 = and <4 x i32> %a0, <i32 8388352, i32 8388352, i32 8388352, i32 8388352>
%t1 = shl <4 x i32> %t0, <i32 9, i32 9, i32 9, i32 9>
@@ -2572,40 +2560,40 @@ define <4 x i32> @test_128_i32_x_4_8388352_mask_shl_9(<4 x i32> %a0) {
define <4 x i32> @test_128_i32_x_4_8388352_mask_shl_10(<4 x i32> %a0) {
; X86-SSE2-LABEL: test_128_i32_x_4_8388352_mask_shl_10:
; X86-SSE2: # %bb.0:
-; X86-SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
; X86-SSE2-NEXT: pslld $10, %xmm0
+; X86-SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
; X86-SSE2-NEXT: retl
;
; X86-AVX1-LABEL: test_128_i32_x_4_8388352_mask_shl_10:
; X86-AVX1: # %bb.0:
-; X86-AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0
; X86-AVX1-NEXT: vpslld $10, %xmm0, %xmm0
+; X86-AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0
; X86-AVX1-NEXT: retl
;
; X86-AVX2-LABEL: test_128_i32_x_4_8388352_mask_shl_10:
; X86-AVX2: # %bb.0:
-; X86-AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [8388352,8388352,8388352,8388352]
-; X86-AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
; X86-AVX2-NEXT: vpslld $10, %xmm0, %xmm0
+; X86-AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [4294705152,4294705152,4294705152,4294705152]
+; X86-AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
; X86-AVX2-NEXT: retl
;
; X64-SSE2-LABEL: test_128_i32_x_4_8388352_mask_shl_10:
; X64-SSE2: # %bb.0:
-; X64-SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; X64-SSE2-NEXT: pslld $10, %xmm0
+; X64-SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; X64-SSE2-NEXT: retq
;
; X64-AVX1-LABEL: test_128_i32_x_4_8388352_mask_shl_10:
; X64-AVX1: # %bb.0:
-; X64-AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
; X64-AVX1-NEXT: vpslld $10, %xmm0, %xmm0
+; X64-AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
; X64-AVX1-NEXT: retq
;
; X64-AVX2-LABEL: test_128_i32_x_4_8388352_mask_shl_10:
; X64-AVX2: # %bb.0:
-; X64-AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [8388352,8388352,8388352,8388352]
-; X64-AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
; X64-AVX2-NEXT: vpslld $10, %xmm0, %xmm0
+; X64-AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [4294705152,4294705152,4294705152,4294705152]
+; X64-AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
; X64-AVX2-NEXT: retq
%t0 = and <4 x i32> %a0, <i32 8388352, i32 8388352, i32 8388352, i32 8388352>
%t1 = shl <4 x i32> %t0, <i32 10, i32 10, i32 10, i32 10>
@@ -2664,26 +2652,26 @@ define <4 x i32> @test_128_i32_x_4_4294836224_mask_shl_1(<4 x i32> %a0) {
define <2 x i64> @test_128_i64_x_2_2147483647_mask_lshr_1(<2 x i64> %a0) {
; X86-SSE2-LABEL: test_128_i64_x_2_2147483647_mask_lshr_1:
; X86-SSE2: # %bb.0:
-; X86-SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
; X86-SSE2-NEXT: psrlq $1, %xmm0
+; X86-SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
; X86-SSE2-NEXT: retl
;
; X86-AVX-LABEL: test_128_i64_x_2_2147483647_mask_lshr_1:
; X86-AVX: # %bb.0:
-; X86-AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0
; X86-AVX-NEXT: vpsrlq $1, %xmm0, %xmm0
+; X86-AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0
; X86-AVX-NEXT: retl
;
; X64-SSE2-LABEL: test_128_i64_x_2_2147483647_mask_lshr_1:
; X64-SSE2: # %bb.0:
-; X64-SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; X64-SSE2-NEXT: psrlq $1, %xmm0
+; X64-SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; X64-SSE2-NEXT: retq
;
; X64-AVX-LABEL: test_128_i64_x_2_2147483647_mask_lshr_1:
; X64-AVX: # %bb.0:
-; X64-AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
; X64-AVX-NEXT: vpsrlq $1, %xmm0, %xmm0
+; X64-AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
; X64-AVX-NEXT: retq
%t0 = and <2 x i64> %a0, <i64 2147483647, i64 2147483647>
%t1 = lshr <2 x i64> %t0, <i64 1, i64 1>
@@ -2693,26 +2681,26 @@ define <2 x i64> @test_128_i64_x_2_2147483647_mask_lshr_1(<2 x i64> %a0) {
define <2 x i64> @test_128_i64_x_2_140737488289792_mask_lshr_15(<2 x i64> %a0) {
; X86-SSE2-LABEL: test_128_i64_x_2_140737488289792_mask_lshr_15:
; X86-SSE2: # %bb.0:
-; X86-SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
; X86-SSE2-NEXT: psrlq $15, %xmm0
+; X86-SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
; X86-SSE2-NEXT: retl
;
; X86-AVX-LABEL: test_128_i64_x_2_140737488289792_mask_lshr_15:
; X86-AVX: # %bb.0:
-; X86-AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0
; X86-AVX-NEXT: vpsrlq $15, %xmm0, %xmm0
+; X86-AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0
; X86-AVX-NEXT: retl
;
; X64-SSE2-LABEL: test_128_i64_x_2_140737488289792_mask_lshr_15:
; X64-SSE2: # %bb.0:
-; X64-SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; X64-SSE2-NEXT: psrlq $15, %xmm0
+; X64-SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; X64-SSE2-NEXT: retq
;
; X64-AVX-LABEL: test_128_i64_x_2_140737488289792_mask_lshr_15:
; X64-AVX: # %bb.0:
-; X64-AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
; X64-AVX-NEXT: vpsrlq $15, %xmm0, %xmm0
+; X64-AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
; X64-AVX-NEXT: retq
%t0 = and <2 x i64> %a0, <i64 140737488289792, i64 140737488289792>
%t1 = lshr <2 x i64> %t0, <i64 15, i64 15>
@@ -2721,26 +2709,26 @@ define <2 x i64> @test_128_i64_x_2_140737488289792_mask_lshr_15(<2 x i64> %a0) {
define <2 x i64> @test_128_i64_x_2_140737488289792_mask_lshr_16(<2 x i64> %a0) {
; X86-SSE2-LABEL: test_128_i64_x_2_140737488289792_mask_lshr_16:
; X86-SSE2: # %bb.0:
-; X86-SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
; X86-SSE2-NEXT: psrlq $16, %xmm0
+; X86-SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
; X86-SSE2-NEXT: retl
;
; X86-AVX-LABEL: test_128_i64_x_2_140737488289792_mask_lshr_16:
; X86-AVX: # %bb.0:
-; X86-AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0
; X86-AVX-NEXT: vpsrlq $16, %xmm0, %xmm0
+; X86-AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0
; X86-AVX-NEXT: retl
;
; X64-SSE2-LABEL: test_128_i64_x_2_140737488289792_mask_lshr_16:
; X64-SSE2: # %bb.0:
-; X64-SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; X64-SSE2-NEXT: psrlq $16, %xmm0
+; X64-SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; X64-SSE2-NEXT: retq
;
; X64-AVX-LABEL: test_128_i64_x_2_140737488289792_mask_lshr_16:
; X64-AVX: # %bb.0:
-; X64-AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
; X64-AVX-NEXT: vpsrlq $16, %xmm0, %xmm0
+; X64-AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
; X64-AVX-NEXT: retq
%t0 = and <2 x i64> %a0, <i64 140737488289792, i64 140737488289792>
%t1 = lshr <2 x i64> %t0, <i64 16, i64 16>
@@ -2749,26 +2737,26 @@ define <2 x i64> @test_128_i64_x_2_140737488289792_mask_lshr_16(<2 x i64> %a0) {
define <2 x i64> @test_128_i64_x_2_140737488289792_mask_lshr_17(<2 x i64> %a0) {
; X86-SSE2-LABEL: test_128_i64_x_2_140737488289792_mask_lshr_17:
; X86-SSE2: # %bb.0:
-; X86-SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
; X86-SSE2-NEXT: psrlq $17, %xmm0
+; X86-SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
; X86-SSE2-NEXT: retl
;
; X86-AVX-LABEL: test_128_i64_x_2_140737488289792_mask_lshr_17:
; X86-AVX: # %bb.0:
-; X86-AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0
; X86-AVX-NEXT: vpsrlq $17, %xmm0, %xmm0
+; X86-AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0
; X86-AVX-NEXT: retl
;
; X64-SSE2-LABEL: test_128_i64_x_2_140737488289792_mask_lshr_17:
; X64-SSE2: # %bb.0:
-; X64-SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; X64-SSE2-NEXT: psrlq $17, %xmm0
+; X64-SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; X64-SSE2-NEXT: retq
;
; X64-AVX-LABEL: test_128_i64_x_2_140737488289792_mask_lshr_17:
; X64-AVX: # %bb.0:
-; X64-AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
; X64-AVX-NEXT: vpsrlq $17, %xmm0, %xmm0
+; X64-AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
; X64-AVX-NEXT: retq
%t0 = and <2 x i64> %a0, <i64 140737488289792, i64 140737488289792>
%t1 = lshr <2 x i64> %t0, <i64 17, i64 17>
@@ -2777,26 +2765,26 @@ define <2 x i64> @test_128_i64_x_2_140737488289792_mask_lshr_17(<2 x i64> %a0) {
define <2 x i64> @test_128_i64_x_2_140737488289792_mask_lshr_18(<2 x i64> %a0) {
; X86-SSE2-LABEL: test_128_i64_x_2_140737488289792_mask_lshr_18:
; X86-SSE2: # %bb.0:
-; X86-SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
; X86-SSE2-NEXT: psrlq $18, %xmm0
+; X86-SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
; X86-SSE2-NEXT: retl
;
; X86-AVX-LABEL: test_128_i64_x_2_140737488289792_mask_lshr_18:
; X86-AVX: # %bb.0:
-; X86-AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0
; X86-AVX-NEXT: vpsrlq $18, %xmm0, %xmm0
+; X86-AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0
; X86-AVX-NEXT: retl
;
; X64-SSE2-LABEL: test_128_i64_x_2_140737488289792_mask_lshr_18:
; X64-SSE2: # %bb.0:
-; X64-SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; X64-SSE2-NEXT: psrlq $18, %xmm0
+; X64-SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; X64-SSE2-NEXT: retq
;
; X64-AVX-LABEL: test_128_i64_x_2_140737488289792_mask_lshr_18:
; X64-AVX: # %bb.0:
-; X64-AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
; X64-AVX-NEXT: vpsrlq $18, %xmm0, %xmm0
+; X64-AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
; X64-AVX-NEXT: retq
%t0 = and <2 x i64> %a0, <i64 140737488289792, i64 140737488289792>
%t1 = lshr <2 x i64> %t0, <i64 18, i64 18>
@@ -2806,26 +2794,26 @@ define <2 x i64> @test_128_i64_x_2_140737488289792_mask_lshr_18(<2 x i64> %a0) {
define <2 x i64> @test_128_i64_x_2_18446744065119617024_mask_lshr_1(<2 x i64> %a0) {
; X86-SSE2-LABEL: test_128_i64_x_2_18446744065119617024_mask_lshr_1:
; X86-SSE2: # %bb.0:
-; X86-SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
; X86-SSE2-NEXT: psrlq $1, %xmm0
+; X86-SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
; X86-SSE2-NEXT: retl
;
; X86-AVX-LABEL: test_128_i64_x_2_18446744065119617024_mask_lshr_1:
; X86-AVX: # %bb.0:
-; X86-AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0
; X86-AVX-NEXT: vpsrlq $1, %xmm0, %xmm0
+; X86-AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0
; X86-AVX-NEXT: retl
;
; X64-SSE2-LABEL: test_128_i64_x_2_18446744065119617024_mask_lshr_1:
; X64-SSE2: # %bb.0:
-; X64-SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; X64-SSE2-NEXT: psrlq $1, %xmm0
+; X64-SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; X64-SSE2-NEXT: retq
;
; X64-AVX-LABEL: test_128_i64_x_2_18446744065119617024_mask_lshr_1:
; X64-AVX: # %bb.0:
-; X64-AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
; X64-AVX-NEXT: vpsrlq $1, %xmm0, %xmm0
+; X64-AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
; X64-AVX-NEXT: retq
%t0 = and <2 x i64> %a0, <i64 18446744065119617024, i64 18446744065119617024>
%t1 = lshr <2 x i64> %t0, <i64 1, i64 1>
@@ -2834,33 +2822,26 @@ define <2 x i64> @test_128_i64_x_2_18446744065119617024_mask_lshr_1(<2 x i64> %a
define <2 x i64> @test_128_i64_x_2_18446744065119617024_mask_lshr_32(<2 x i64> %a0) {
; X86-SSE2-LABEL: test_128_i64_x_2_18446744065119617024_mask_lshr_32:
; X86-SSE2: # %bb.0:
-; X86-SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
; X86-SSE2-NEXT: psrlq $32, %xmm0
+; X86-SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
; X86-SSE2-NEXT: retl
;
-; X86-AVX1-LABEL: test_128_i64_x_2_18446744065119617024_mask_lshr_32:
-; X86-AVX1: # %bb.0:
-; X86-AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0
-; X86-AVX1-NEXT: vpsrlq $32, %xmm0, %xmm0
-; X86-AVX1-NEXT: retl
-;
-; X86-AVX2-LABEL: test_128_i64_x_2_18446744065119617024_mask_lshr_32:
-; X86-AVX2: # %bb.0:
-; X86-AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [4294967294,4294967294,4294967294,4294967294]
-; X86-AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
-; X86-AVX2-NEXT: vpsrlq $32, %xmm0, %xmm0
-; X86-AVX2-NEXT: retl
+; X86-AVX-LABEL: test_128_i64_x_2_18446744065119617024_mask_lshr_32:
+; X86-AVX: # %bb.0:
+; X86-AVX-NEXT: vpsrlq $32, %xmm0, %xmm0
+; X86-AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0
+; X86-AVX-NEXT: retl
;
; X64-SSE2-LABEL: test_128_i64_x_2_18446744065119617024_mask_lshr_32:
; X64-SSE2: # %bb.0:
-; X64-SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; X64-SSE2-NEXT: psrlq $32, %xmm0
+; X64-SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; X64-SSE2-NEXT: retq
;
; X64-AVX-LABEL: test_128_i64_x_2_18446744065119617024_mask_lshr_32:
; X64-AVX: # %bb.0:
-; X64-AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
; X64-AVX-NEXT: vpsrlq $32, %xmm0, %xmm0
+; X64-AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
; X64-AVX-NEXT: retq
%t0 = and <2 x i64> %a0, <i64 18446744065119617024, i64 18446744065119617024>
%t1 = lshr <2 x i64> %t0, <i64 32, i64 32>
@@ -2920,26 +2901,26 @@ define <2 x i64> @test_128_i64_x_2_18446744065119617024_mask_lshr_34(<2 x i64> %
define <2 x i64> @test_128_i64_x_2_2147483647_mask_ashr_1(<2 x i64> %a0) {
; X86-SSE2-LABEL: test_128_i64_x_2_2147483647_mask_ashr_1:
; X86-SSE2: # %bb.0:
-; X86-SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
; X86-SSE2-NEXT: psrlq $1, %xmm0
+; X86-SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
; X86-SSE2-NEXT: retl
;
; X86-AVX-LABEL: test_128_i64_x_2_2147483647_mask_ashr_1:
; X86-AVX: # %bb.0:
-; X86-AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0
; X86-AVX-NEXT: vpsrlq $1, %xmm0, %xmm0
+; X86-AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0
; X86-AVX-NEXT: retl
;
; X64-SSE2-LABEL: test_128_i64_x_2_2147483647_mask_ashr_1:
; X64-SSE2: # %bb.0:
-; X64-SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; X64-SSE2-NEXT: psrlq $1, %xmm0
+; X64-SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; X64-SSE2-NEXT: retq
;
; X64-AVX-LABEL: test_128_i64_x_2_2147483647_mask_ashr_1:
; X64-AVX: # %bb.0:
-; X64-AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
; X64-AVX-NEXT: vpsrlq $1, %xmm0, %xmm0
+; X64-AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
; X64-AVX-NEXT: retq
%t0 = and <2 x i64> %a0, <i64 2147483647, i64 2147483647>
%t1 = ashr <2 x i64> %t0, <i64 1, i64 1>
@@ -2949,26 +2930,26 @@ define <2 x i64> @test_128_i64_x_2_2147483647_mask_ashr_1(<2 x i64> %a0) {
define <2 x i64> @test_128_i64_x_2_140737488289792_mask_ashr_15(<2 x i64> %a0) {
; X86-SSE2-LABEL: test_128_i64_x_2_140737488289792_mask_ashr_15:
; X86-SSE2: # %bb.0:
-; X86-SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
; X86-SSE2-NEXT: psrlq $15, %xmm0
+; X86-SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
; X86-SSE2-NEXT: retl
;
; X86-AVX-LABEL: test_128_i64_x_2_140737488289792_mask_ashr_15:
; X86-AVX: # %bb.0:
-; X86-AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0
; X86-AVX-NEXT: vpsrlq $15, %xmm0, %xmm0
+; X86-AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0
; X86-AVX-NEXT: retl
;
; X64-SSE2-LABEL: test_128_i64_x_2_140737488289792_mask_ashr_15:
; X64-SSE2: # %bb.0:
-; X64-SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; X64-SSE2-NEXT: psrlq $15, %xmm0
+; X64-SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; X64-SSE2-NEXT: retq
;
; X64-AVX-LABEL: test_128_i64_x_2_140737488289792_mask_ashr_15:
; X64-AVX: # %bb.0:
-; X64-AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
; X64-AVX-NEXT: vpsrlq $15, %xmm0, %xmm0
+; X64-AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
; X64-AVX-NEXT: retq
%t0 = and <2 x i64> %a0, <i64 140737488289792, i64 140737488289792>
%t1 = ashr <2 x i64> %t0, <i64 15, i64 15>
@@ -2977,26 +2958,26 @@ define <2 x i64> @test_128_i64_x_2_140737488289792_mask_ashr_15(<2 x i64> %a0) {
define <2 x i64> @test_128_i64_x_2_140737488289792_mask_ashr_16(<2 x i64> %a0) {
; X86-SSE2-LABEL: test_128_i64_x_2_140737488289792_mask_ashr_16:
; X86-SSE2: # %bb.0:
-; X86-SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
; X86-SSE2-NEXT: psrlq $16, %xmm0
+; X86-SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
; X86-SSE2-NEXT: retl
;
; X86-AVX-LABEL: test_128_i64_x_2_140737488289792_mask_ashr_16:
; X86-AVX: # %bb.0:
-; X86-AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0
; X86-AVX-NEXT: vpsrlq $16, %xmm0, %xmm0
+; X86-AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0
; X86-AVX-NEXT: retl
;
; X64-SSE2-LABEL: test_128_i64_x_2_140737488289792_mask_ashr_16:
; X64-SSE2: # %bb.0:
-; X64-SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; X64-SSE2-NEXT: psrlq $16, %xmm0
+; X64-SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; X64-SSE2-NEXT: retq
;
; X64-AVX-LABEL: test_128_i64_x_2_140737488289792_mask_ashr_16:
; X64-AVX: # %bb.0:
-; X64-AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
; X64-AVX-NEXT: vpsrlq $16, %xmm0, %xmm0
+; X64-AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
; X64-AVX-NEXT: retq
%t0 = and <2 x i64> %a0, <i64 140737488289792, i64 140737488289792>
%t1 = ashr <2 x i64> %t0, <i64 16, i64 16>
@@ -3005,26 +2986,26 @@ define <2 x i64> @test_128_i64_x_2_140737488289792_mask_ashr_16(<2 x i64> %a0) {
define <2 x i64> @test_128_i64_x_2_140737488289792_mask_ashr_17(<2 x i64> %a0) {
; X86-SSE2-LABEL: test_128_i64_x_2_140737488289792_mask_ashr_17:
; X86-SSE2: # %bb.0:
-; X86-SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
; X86-SSE2-NEXT: psrlq $17, %xmm0
+; X86-SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
; X86-SSE2-NEXT: retl
;
; X86-AVX-LABEL: test_128_i64_x_2_140737488289792_mask_ashr_17:
; X86-AVX: # %bb.0:
-; X86-AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0
; X86-AVX-NEXT: vpsrlq $17, %xmm0, %xmm0
+; X86-AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0
; X86-AVX-NEXT: retl
;
; X64-SSE2-LABEL: test_128_i64_x_2_140737488289792_mask_ashr_17:
; X64-SSE2: # %bb.0:
-; X64-SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; X64-SSE2-NEXT: psrlq $17, %xmm0
+; X64-SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; X64-SSE2-NEXT: retq
;
; X64-AVX-LABEL: test_128_i64_x_2_140737488289792_mask_ashr_17:
; X64-AVX: # %bb.0:
-; X64-AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
; X64-AVX-NEXT: vpsrlq $17, %xmm0, %xmm0
+; X64-AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
; X64-AVX-NEXT: retq
%t0 = and <2 x i64> %a0, <i64 140737488289792, i64 140737488289792>
%t1 = ashr <2 x i64> %t0, <i64 17, i64 17>
@@ -3033,26 +3014,26 @@ define <2 x i64> @test_128_i64_x_2_140737488289792_mask_ashr_17(<2 x i64> %a0) {
define <2 x i64> @test_128_i64_x_2_140737488289792_mask_ashr_18(<2 x i64> %a0) {
; X86-SSE2-LABEL: test_128_i64_x_2_140737488289792_mask_ashr_18:
; X86-SSE2: # %bb.0:
-; X86-SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
; X86-SSE2-NEXT: psrlq $18, %xmm0
+; X86-SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
; X86-SSE2-NEXT: retl
;
; X86-AVX-LABEL: test_128_i64_x_2_140737488289792_mask_ashr_18:
; X86-AVX: # %bb.0:
-; X86-AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0
; X86-AVX-NEXT: vpsrlq $18, %xmm0, %xmm0
+; X86-AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0
; X86-AVX-NEXT: retl
;
; X64-SSE2-LABEL: test_128_i64_x_2_140737488289792_mask_ashr_18:
; X64-SSE2: # %bb.0:
-; X64-SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; X64-SSE2-NEXT: psrlq $18, %xmm0
+; X64-SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; X64-SSE2-NEXT: retq
;
; X64-AVX-LABEL: test_128_i64_x_2_140737488289792_mask_ashr_18:
; X64-AVX: # %bb.0:
-; X64-AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
; X64-AVX-NEXT: vpsrlq $18, %xmm0, %xmm0
+; X64-AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
; X64-AVX-NEXT: retq
%t0 = and <2 x i64> %a0, <i64 140737488289792, i64 140737488289792>
%t1 = ashr <2 x i64> %t0, <i64 18, i64 18>
@@ -3068,7 +3049,6 @@ define <2 x i64> @test_128_i64_x_2_18446744065119617024_mask_ashr_1(<2 x i64> %a
;
; X86-AVX1-LABEL: test_128_i64_x_2_18446744065119617024_mask_ashr_1:
; X86-AVX1: # %bb.0:
-; X86-AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0
; X86-AVX1-NEXT: vpsrad $1, %xmm0, %xmm0
; X86-AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
; X86-AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3],xmm1[4,5],xmm0[6,7]
@@ -3083,15 +3063,23 @@ define <2 x i64> @test_128_i64_x_2_18446744065119617024_mask_ashr_1(<2 x i64> %a
;
; X64-SSE2-LABEL: test_128_i64_x_2_18446744065119617024_mask_ashr_1:
; X64-SSE2: # %bb.0:
-; X64-SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; X64-SSE2-NEXT: psrad $1, %xmm0
+; X64-SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; X64-SSE2-NEXT: retq
;
-; X64-AVX-LABEL: test_128_i64_x_2_18446744065119617024_mask_ashr_1:
-; X64-AVX: # %bb.0:
-; X64-AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
-; X64-AVX-NEXT: vpsrad $1, %xmm0, %xmm0
-; X64-AVX-NEXT: retq
+; X64-AVX1-LABEL: test_128_i64_x_2_18446744065119617024_mask_ashr_1:
+; X64-AVX1: # %bb.0:
+; X64-AVX1-NEXT: vpsrad $1, %xmm0, %xmm0
+; X64-AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
+; X64-AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3],xmm1[4,5],xmm0[6,7]
+; X64-AVX1-NEXT: retq
+;
+; X64-AVX2-LABEL: test_128_i64_x_2_18446744065119617024_mask_ashr_1:
+; X64-AVX2: # %bb.0:
+; X64-AVX2-NEXT: vpsrad $1, %xmm0, %xmm0
+; X64-AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
+; X64-AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm1[0],xmm0[1],xmm1[2],xmm0[3]
+; X64-AVX2-NEXT: retq
%t0 = and <2 x i64> %a0, <i64 18446744065119617024, i64 18446744065119617024>
%t1 = ashr <2 x i64> %t0, <i64 1, i64 1>
ret <2 x i64> %t1
@@ -3298,33 +3286,26 @@ define <2 x i64> @test_128_i64_x_2_2147483647_mask_shl_1(<2 x i64> %a0) {
define <2 x i64> @test_128_i64_x_2_2147483647_mask_shl_32(<2 x i64> %a0) {
; X86-SSE2-LABEL: test_128_i64_x_2_2147483647_mask_shl_32:
; X86-SSE2: # %bb.0:
-; X86-SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
; X86-SSE2-NEXT: psllq $32, %xmm0
+; X86-SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
; X86-SSE2-NEXT: retl
;
-; X86-AVX1-LABEL: test_128_i64_x_2_2147483647_mask_shl_32:
-; X86-AVX1: # %bb.0:
-; X86-AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0
-; X86-AVX1-NEXT: vpsllq $32, %xmm0, %xmm0
-; X86-AVX1-NEXT: retl
-;
-; X86-AVX2-LABEL: test_128_i64_x_2_2147483647_mask_shl_32:
-; X86-AVX2: # %bb.0:
-; X86-AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [2147483647,2147483647,2147483647,2147483647]
-; X86-AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
-; X86-AVX2-NEXT: vpsllq $32, %xmm0, %xmm0
-; X86-AVX2-NEXT: retl
+; X86-AVX-LABEL: test_128_i64_x_2_2147483647_mask_shl_32:
+; X86-AVX: # %bb.0:
+; X86-AVX-NEXT: vpsllq $32, %xmm0, %xmm0
+; X86-AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0
+; X86-AVX-NEXT: retl
;
; X64-SSE2-LABEL: test_128_i64_x_2_2147483647_mask_shl_32:
; X64-SSE2: # %bb.0:
-; X64-SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; X64-SSE2-NEXT: psllq $32, %xmm0
+; X64-SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; X64-SSE2-NEXT: retq
;
; X64-AVX-LABEL: test_128_i64_x_2_2147483647_mask_shl_32:
; X64-AVX: # %bb.0:
-; X64-AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
; X64-AVX-NEXT: vpsllq $32, %xmm0, %xmm0
+; X64-AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
; X64-AVX-NEXT: retq
%t0 = and <2 x i64> %a0, <i64 2147483647, i64 2147483647>
%t1 = shl <2 x i64> %t0, <i64 32, i64 32>
@@ -3382,26 +3363,26 @@ define <2 x i64> @test_128_i64_x_2_2147483647_mask_shl_34(<2 x i64> %a0) {
define <2 x i64> @test_128_i64_x_2_140737488289792_mask_shl_15(<2 x i64> %a0) {
; X86-SSE2-LABEL: test_128_i64_x_2_140737488289792_mask_shl_15:
; X86-SSE2: # %bb.0:
-; X86-SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
; X86-SSE2-NEXT: psllq $15, %xmm0
+; X86-SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
; X86-SSE2-NEXT: retl
;
; X86-AVX-LABEL: test_128_i64_x_2_140737488289792_mask_shl_15:
; X86-AVX: # %bb.0:
-; X86-AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0
; X86-AVX-NEXT: vpsllq $15, %xmm0, %xmm0
+; X86-AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0
; X86-AVX-NEXT: retl
;
; X64-SSE2-LABEL: test_128_i64_x_2_140737488289792_mask_shl_15:
; X64-SSE2: # %bb.0:
-; X64-SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; X64-SSE2-NEXT: psllq $15, %xmm0
+; X64-SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; X64-SSE2-NEXT: retq
;
; X64-AVX-LABEL: test_128_i64_x_2_140737488289792_mask_shl_15:
; X64-AVX: # %bb.0:
-; X64-AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
; X64-AVX-NEXT: vpsllq $15, %xmm0, %xmm0
+; X64-AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
; X64-AVX-NEXT: retq
%t0 = and <2 x i64> %a0, <i64 140737488289792, i64 140737488289792>
%t1 = shl <2 x i64> %t0, <i64 15, i64 15>
@@ -3410,26 +3391,26 @@ define <2 x i64> @test_128_i64_x_2_140737488289792_mask_shl_15(<2 x i64> %a0) {
define <2 x i64> @test_128_i64_x_2_140737488289792_mask_shl_16(<2 x i64> %a0) {
; X86-SSE2-LABEL: test_128_i64_x_2_140737488289792_mask_shl_16:
; X86-SSE2: # %bb.0:
-; X86-SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
; X86-SSE2-NEXT: psllq $16, %xmm0
+; X86-SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
; X86-SSE2-NEXT: retl
;
; X86-AVX-LABEL: test_128_i64_x_2_140737488289792_mask_shl_16:
; X86-AVX: # %bb.0:
-; X86-AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0
; X86-AVX-NEXT: vpsllq $16, %xmm0, %xmm0
+; X86-AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0
; X86-AVX-NEXT: retl
;
; X64-SSE2-LABEL: test_128_i64_x_2_140737488289792_mask_shl_16:
; X64-SSE2: # %bb.0:
-; X64-SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; X64-SSE2-NEXT: psllq $16, %xmm0
+; X64-SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; X64-SSE2-NEXT: retq
;
; X64-AVX-LABEL: test_128_i64_x_2_140737488289792_mask_shl_16:
; X64-AVX: # %bb.0:
-; X64-AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
; X64-AVX-NEXT: vpsllq $16, %xmm0, %xmm0
+; X64-AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
; X64-AVX-NEXT: retq
%t0 = and <2 x i64> %a0, <i64 140737488289792, i64 140737488289792>
%t1 = shl <2 x i64> %t0, <i64 16, i64 16>
@@ -3438,26 +3419,26 @@ define <2 x i64> @test_128_i64_x_2_140737488289792_mask_shl_16(<2 x i64> %a0) {
define <2 x i64> @test_128_i64_x_2_140737488289792_mask_shl_17(<2 x i64> %a0) {
; X86-SSE2-LABEL: test_128_i64_x_2_140737488289792_mask_shl_17:
; X86-SSE2: # %bb.0:
-; X86-SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
; X86-SSE2-NEXT: psllq $17, %xmm0
+; X86-SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
; X86-SSE2-NEXT: retl
;
; X86-AVX-LABEL: test_128_i64_x_2_140737488289792_mask_shl_17:
; X86-AVX: # %bb.0:
-; X86-AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0
; X86-AVX-NEXT: vpsllq $17, %xmm0, %xmm0
+; X86-AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0
; X86-AVX-NEXT: retl
;
; X64-SSE2-LABEL: test_128_i64_x_2_140737488289792_mask_shl_17:
; X64-SSE2: # %bb.0:
-; X64-SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; X64-SSE2-NEXT: psllq $17, %xmm0
+; X64-SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; X64-SSE2-NEXT: retq
;
; X64-AVX-LABEL: test_128_i64_x_2_140737488289792_mask_shl_17:
; X64-AVX: # %bb.0:
-; X64-AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
; X64-AVX-NEXT: vpsllq $17, %xmm0, %xmm0
+; X64-AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
; X64-AVX-NEXT: retq
%t0 = and <2 x i64> %a0, <i64 140737488289792, i64 140737488289792>
%t1 = shl <2 x i64> %t0, <i64 17, i64 17>
@@ -3466,26 +3447,26 @@ define <2 x i64> @test_128_i64_x_2_140737488289792_mask_shl_17(<2 x i64> %a0) {
define <2 x i64> @test_128_i64_x_2_140737488289792_mask_shl_18(<2 x i64> %a0) {
; X86-SSE2-LABEL: test_128_i64_x_2_140737488289792_mask_shl_18:
; X86-SSE2: # %bb.0:
-; X86-SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
; X86-SSE2-NEXT: psllq $18, %xmm0
+; X86-SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
; X86-SSE2-NEXT: retl
;
; X86-AVX-LABEL: test_128_i64_x_2_140737488289792_mask_shl_18:
; X86-AVX: # %bb.0:
-; X86-AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0
; X86-AVX-NEXT: vpsllq $18, %xmm0, %xmm0
+; X86-AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0
; X86-AVX-NEXT: retl
;
; X64-SSE2-LABEL: test_128_i64_x_2_140737488289792_mask_shl_18:
; X64-SSE2: # %bb.0:
-; X64-SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; X64-SSE2-NEXT: psllq $18, %xmm0
+; X64-SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; X64-SSE2-NEXT: retq
;
; X64-AVX-LABEL: test_128_i64_x_2_140737488289792_mask_shl_18:
; X64-AVX: # %bb.0:
-; X64-AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
; X64-AVX-NEXT: vpsllq $18, %xmm0, %xmm0
+; X64-AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
; X64-AVX-NEXT: retq
%t0 = and <2 x i64> %a0, <i64 140737488289792, i64 140737488289792>
%t1 = shl <2 x i64> %t0, <i64 18, i64 18>
More information about the llvm-commits
mailing list