[llvm] f84b784 - [X86] LowerShiftByScalarImmediate - move shl(x, 1) -> add(freeze(x),freeze(x)) to X86FixupInstTunings (#161007)
via llvm-commits
llvm-commits at lists.llvm.org
Mon Sep 29 00:29:08 PDT 2025
Author: Simon Pilgrim
Date: 2025-09-29T07:29:04Z
New Revision: f84b784dac61d419318ed6598412d197f1e05ee9
URL: https://github.com/llvm/llvm-project/commit/f84b784dac61d419318ed6598412d197f1e05ee9
DIFF: https://github.com/llvm/llvm-project/commit/f84b784dac61d419318ed6598412d197f1e05ee9.diff
LOG: [X86] LowerShiftByScalarImmediate - move shl(x,1) -> add(freeze(x),freeze(x)) to X86FixupInstTunings (#161007)
Avoid the shl(x,1) -> add(freeze(x),freeze(x)) if the shift-imm if
legal, and leave it to X86FixupInstTunings.
Helps avoid missed optimisations due to oneuse limits, avoids
unnecessary freezes and allows AVX512 to fold to mi memory folding
variants.
Fixes #161006
Added:
Modified:
llvm/lib/Target/X86/X86FixupInstTuning.cpp
llvm/lib/Target/X86/X86ISelLowering.cpp
llvm/test/CodeGen/X86/combine-add.ll
llvm/test/CodeGen/X86/combine-mul.ll
llvm/test/CodeGen/X86/combine-sdiv.ll
llvm/test/CodeGen/X86/known-signbits-shl.ll
llvm/test/CodeGen/X86/masked_gather_scatter.ll
llvm/test/CodeGen/X86/oddsubvector.ll
llvm/test/CodeGen/X86/pr62286.ll
llvm/test/CodeGen/X86/pr74736.ll
llvm/test/CodeGen/X86/shift-i512.ll
llvm/test/CodeGen/X86/sse2-intrinsics-fast-isel.ll
llvm/test/CodeGen/X86/vec_shift6.ll
llvm/test/CodeGen/X86/vector-gep.ll
llvm/test/CodeGen/X86/vector-idiv-udiv-128.ll
llvm/test/CodeGen/X86/vector-idiv-udiv-256.ll
llvm/test/CodeGen/X86/vector-idiv-udiv-512.ll
llvm/test/CodeGen/X86/vector-mul.ll
llvm/test/CodeGen/X86/vector-reduce-or-cmp.ll
llvm/test/CodeGen/X86/vector-shift-shl-128.ll
llvm/test/CodeGen/X86/vector-shuffle-combining.ll
llvm/test/CodeGen/X86/vector_splat-const-shift-of-constmasked.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/X86/X86FixupInstTuning.cpp b/llvm/lib/Target/X86/X86FixupInstTuning.cpp
index 33dc0a232815c..a1d4e0bc62310 100644
--- a/llvm/lib/Target/X86/X86FixupInstTuning.cpp
+++ b/llvm/lib/Target/X86/X86FixupInstTuning.cpp
@@ -277,6 +277,22 @@ bool X86FixupInstTuningPass::processInstruction(
return true;
};
+ // Is ADD(X,X) more efficient than SHL(X,1)?
+ auto ProcessShiftLeftToAdd = [&](unsigned AddOpc) -> bool {
+ if (MI.getOperand(NumOperands - 1).getImm() != 1)
+ return false;
+ if (!NewOpcPreferable(AddOpc, /*ReplaceInTie*/ true))
+ return false;
+ LLVM_DEBUG(dbgs() << "Replacing: " << MI);
+ {
+ MI.setDesc(TII->get(AddOpc));
+ MI.removeOperand(NumOperands - 1);
+ MI.addOperand(MI.getOperand(NumOperands - 2));
+ }
+ LLVM_DEBUG(dbgs() << " With: " << MI);
+ return false;
+ };
+
switch (Opc) {
case X86::BLENDPDrri:
return ProcessBLENDToMOV(X86::MOVSDrr, 0x3, 0x1);
@@ -563,6 +579,44 @@ bool X86FixupInstTuningPass::processInstruction(
return ProcessUNPCKPS(X86::VPUNPCKHDQZ256rmkz);
case X86::VUNPCKHPSZrmkz:
return ProcessUNPCKPS(X86::VPUNPCKHDQZrmkz);
+
+ case X86::PSLLWri:
+ return ProcessShiftLeftToAdd(X86::PADDWrr);
+ case X86::VPSLLWri:
+ return ProcessShiftLeftToAdd(X86::VPADDWrr);
+ case X86::VPSLLWYri:
+ return ProcessShiftLeftToAdd(X86::VPADDWYrr);
+ case X86::VPSLLWZ128ri:
+ return ProcessShiftLeftToAdd(X86::VPADDWZ128rr);
+ case X86::VPSLLWZ256ri:
+ return ProcessShiftLeftToAdd(X86::VPADDWZ256rr);
+ case X86::VPSLLWZri:
+ return ProcessShiftLeftToAdd(X86::VPADDWZrr);
+ case X86::PSLLDri:
+ return ProcessShiftLeftToAdd(X86::PADDDrr);
+ case X86::VPSLLDri:
+ return ProcessShiftLeftToAdd(X86::VPADDDrr);
+ case X86::VPSLLDYri:
+ return ProcessShiftLeftToAdd(X86::VPADDDYrr);
+ case X86::VPSLLDZ128ri:
+ return ProcessShiftLeftToAdd(X86::VPADDDZ128rr);
+ case X86::VPSLLDZ256ri:
+ return ProcessShiftLeftToAdd(X86::VPADDDZ256rr);
+ case X86::VPSLLDZri:
+ return ProcessShiftLeftToAdd(X86::VPADDDZrr);
+ case X86::PSLLQri:
+ return ProcessShiftLeftToAdd(X86::PADDQrr);
+ case X86::VPSLLQri:
+ return ProcessShiftLeftToAdd(X86::VPADDQrr);
+ case X86::VPSLLQYri:
+ return ProcessShiftLeftToAdd(X86::VPADDQYrr);
+ case X86::VPSLLQZ128ri:
+ return ProcessShiftLeftToAdd(X86::VPADDQZ128rr);
+ case X86::VPSLLQZ256ri:
+ return ProcessShiftLeftToAdd(X86::VPADDQZ256rr);
+ case X86::VPSLLQZri:
+ return ProcessShiftLeftToAdd(X86::VPADDQZrr);
+
default:
return false;
}
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index efeddd7c9bd4b..fcfeb661aa891 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -30313,22 +30313,8 @@ static SDValue LowerShiftByScalarImmediate(SDValue Op, SelectionDAG &DAG,
uint64_t ShiftAmt = APIntShiftAmt.getZExtValue();
- if (supportedVectorShiftWithImm(VT, Subtarget, Op.getOpcode())) {
- // Hardware support for vector shifts is sparse which makes us scalarize the
- // vector operations in many cases. Also, on sandybridge ADD is faster than
- // shl: (shl V, 1) -> (add (freeze V), (freeze V))
- if (Op.getOpcode() == ISD::SHL && ShiftAmt == 1) {
- // R may be undef at run-time, but (shl R, 1) must be an even number (LSB
- // must be 0). (add undef, undef) however can be any value. To make this
- // safe, we must freeze R to ensure that register allocation uses the same
- // register for an undefined value. This ensures that the result will
- // still be even and preserves the original semantics.
- R = DAG.getFreeze(R);
- return DAG.getNode(ISD::ADD, dl, VT, R, R);
- }
-
+ if (supportedVectorShiftWithImm(VT, Subtarget, Op.getOpcode()))
return getTargetVShiftByConstNode(X86Opc, dl, VT, R, ShiftAmt, DAG);
- }
// i64 SRA needs to be performed as partial shifts.
if (((!Subtarget.hasXOP() && VT == MVT::v2i64) ||
diff --git a/llvm/test/CodeGen/X86/combine-add.ll b/llvm/test/CodeGen/X86/combine-add.ll
index ff9f995c4765b..51a8bf5b48415 100644
--- a/llvm/test/CodeGen/X86/combine-add.ll
+++ b/llvm/test/CodeGen/X86/combine-add.ll
@@ -235,10 +235,10 @@ define void @PR52039(ptr %pa, ptr %pb) {
; SSE-NEXT: psubd %xmm1, %xmm3
; SSE-NEXT: psubd %xmm0, %xmm2
; SSE-NEXT: movdqa %xmm2, %xmm0
-; SSE-NEXT: paddd %xmm2, %xmm0
+; SSE-NEXT: paddd %xmm0, %xmm0
; SSE-NEXT: paddd %xmm2, %xmm0
; SSE-NEXT: movdqa %xmm3, %xmm1
-; SSE-NEXT: paddd %xmm3, %xmm1
+; SSE-NEXT: paddd %xmm1, %xmm1
; SSE-NEXT: paddd %xmm3, %xmm1
; SSE-NEXT: movdqu %xmm3, 16(%rsi)
; SSE-NEXT: movdqu %xmm2, (%rsi)
diff --git a/llvm/test/CodeGen/X86/combine-mul.ll b/llvm/test/CodeGen/X86/combine-mul.ll
index 8e4a50ea266c3..ae4d24f91ffc0 100644
--- a/llvm/test/CodeGen/X86/combine-mul.ll
+++ b/llvm/test/CodeGen/X86/combine-mul.ll
@@ -81,7 +81,7 @@ define <4 x i64> @combine_vec_mul_pow2c(<4 x i64> %x) {
; SSE-LABEL: combine_vec_mul_pow2c:
; SSE: # %bb.0:
; SSE-NEXT: movdqa %xmm0, %xmm2
-; SSE-NEXT: paddq %xmm0, %xmm2
+; SSE-NEXT: paddq %xmm2, %xmm2
; SSE-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm2[4,5,6,7]
; SSE-NEXT: movdqa %xmm1, %xmm2
; SSE-NEXT: psllq $4, %xmm2
diff --git a/llvm/test/CodeGen/X86/combine-sdiv.ll b/llvm/test/CodeGen/X86/combine-sdiv.ll
index 98187d61c1f84..6bcbfe1808933 100644
--- a/llvm/test/CodeGen/X86/combine-sdiv.ll
+++ b/llvm/test/CodeGen/X86/combine-sdiv.ll
@@ -2187,13 +2187,13 @@ define <16 x i8> @non_splat_minus_one_divisor_1(<16 x i8> %A) {
; SSE41-NEXT: pxor %xmm0, %xmm0
; SSE41-NEXT: pxor %xmm3, %xmm3
; SSE41-NEXT: pcmpgtb %xmm1, %xmm3
-; SSE41-NEXT: pmovzxbw {{.*#+}} xmm4 = xmm3[0],zero,xmm3[1],zero,xmm3[2],zero,xmm3[3],zero,xmm3[4],zero,xmm3[5],zero,xmm3[6],zero,xmm3[7],zero
; SSE41-NEXT: punpckhbw {{.*#+}} xmm3 = xmm3[8],xmm0[8],xmm3[9],xmm0[9],xmm3[10],xmm0[10],xmm3[11],xmm0[11],xmm3[12],xmm0[12],xmm3[13],xmm0[13],xmm3[14],xmm0[14],xmm3[15],xmm0[15]
; SSE41-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3 # [256,2,2,2,2,128,2,128]
; SSE41-NEXT: psrlw $8, %xmm3
-; SSE41-NEXT: paddw %xmm4, %xmm4
-; SSE41-NEXT: pmovsxbw %xmm1, %xmm2
-; SSE41-NEXT: pblendw {{.*#+}} xmm2 = xmm2[0,1],xmm4[2],xmm2[3,4,5],xmm4[6],xmm2[7]
+; SSE41-NEXT: pmovsxbw %xmm1, %xmm0
+; SSE41-NEXT: pmovzxbw {{.*#+}} xmm2 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero
+; SSE41-NEXT: paddw %xmm2, %xmm2
+; SSE41-NEXT: pblendw {{.*#+}} xmm2 = xmm0[0,1],xmm2[2],xmm0[3,4,5],xmm2[6],xmm0[7]
; SSE41-NEXT: psrlw $8, %xmm2
; SSE41-NEXT: packuswb %xmm3, %xmm2
; SSE41-NEXT: paddb %xmm1, %xmm2
@@ -2201,15 +2201,14 @@ define <16 x i8> @non_splat_minus_one_divisor_1(<16 x i8> %A) {
; SSE41-NEXT: punpckhbw {{.*#+}} xmm0 = xmm0[8],xmm2[8],xmm0[9],xmm2[9],xmm0[10],xmm2[10],xmm0[11],xmm2[11],xmm0[12],xmm2[12],xmm0[13],xmm2[13],xmm0[14],xmm2[14],xmm0[15],xmm2[15]
; SSE41-NEXT: psraw $8, %xmm0
; SSE41-NEXT: movdqa %xmm0, %xmm3
-; SSE41-NEXT: paddw %xmm0, %xmm3
-; SSE41-NEXT: psllw $7, %xmm0
-; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4],xmm3[5],xmm0[6],xmm3[7]
-; SSE41-NEXT: psrlw $8, %xmm0
+; SSE41-NEXT: psllw $7, %xmm3
+; SSE41-NEXT: pblendw {{.*#+}} xmm3 = xmm3[0,1,2,3,4],xmm0[5],xmm3[6],xmm0[7]
+; SSE41-NEXT: psrlw $8, %xmm3
; SSE41-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
; SSE41-NEXT: psraw $8, %xmm2
; SSE41-NEXT: psllw $7, %xmm2
; SSE41-NEXT: psrlw $8, %xmm2
-; SSE41-NEXT: packuswb %xmm0, %xmm2
+; SSE41-NEXT: packuswb %xmm3, %xmm2
; SSE41-NEXT: movaps {{.*#+}} xmm0 = [0,0,255,0,0,0,255,0,0,255,255,255,255,255,255,255]
; SSE41-NEXT: pblendvb %xmm0, %xmm2, %xmm1
; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [255,255,0,255,255,255,0,255,255,0,0,0,0,255,0,255]
@@ -2225,18 +2224,17 @@ define <16 x i8> @non_splat_minus_one_divisor_1(<16 x i8> %A) {
; AVX1-NEXT: vpunpckhbw {{.*#+}} xmm1 = xmm2[8],xmm1[8],xmm2[9],xmm1[9],xmm2[10],xmm1[10],xmm2[11],xmm1[11],xmm2[12],xmm1[12],xmm2[13],xmm1[13],xmm2[14],xmm1[14],xmm2[15],xmm1[15]
; AVX1-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 # [256,2,2,2,2,128,2,128]
; AVX1-NEXT: vpsrlw $8, %xmm1, %xmm1
-; AVX1-NEXT: vpmovzxbw {{.*#+}} xmm2 = xmm2[0],zero,xmm2[1],zero,xmm2[2],zero,xmm2[3],zero,xmm2[4],zero,xmm2[5],zero,xmm2[6],zero,xmm2[7],zero
-; AVX1-NEXT: vpaddw %xmm2, %xmm2, %xmm2
-; AVX1-NEXT: vpmovsxbw %xmm0, %xmm3
-; AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm3[0,1],xmm2[2],xmm3[3,4,5],xmm2[6],xmm3[7]
+; AVX1-NEXT: vpmovsxbw %xmm0, %xmm2
+; AVX1-NEXT: vpmovzxbw {{.*#+}} xmm3 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
+; AVX1-NEXT: vpaddw %xmm3, %xmm3, %xmm3
+; AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm2[0,1],xmm3[2],xmm2[3,4,5],xmm3[6],xmm2[7]
; AVX1-NEXT: vpsrlw $8, %xmm2, %xmm2
; AVX1-NEXT: vpackuswb %xmm1, %xmm2, %xmm1
; AVX1-NEXT: vpaddb %xmm1, %xmm0, %xmm1
; AVX1-NEXT: vpunpckhbw {{.*#+}} xmm2 = xmm1[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15]
; AVX1-NEXT: vpsraw $8, %xmm2, %xmm2
-; AVX1-NEXT: vpaddw %xmm2, %xmm2, %xmm3
-; AVX1-NEXT: vpsllw $7, %xmm2, %xmm2
-; AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm2[0,1,2,3,4],xmm3[5],xmm2[6],xmm3[7]
+; AVX1-NEXT: vpsllw $7, %xmm2, %xmm3
+; AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm3[0,1,2,3,4],xmm2[5],xmm3[6],xmm2[7]
; AVX1-NEXT: vpsrlw $8, %xmm2, %xmm2
; AVX1-NEXT: vpunpcklbw {{.*#+}} xmm1 = xmm1[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
; AVX1-NEXT: vpsraw $8, %xmm1, %xmm1
diff --git a/llvm/test/CodeGen/X86/known-signbits-shl.ll b/llvm/test/CodeGen/X86/known-signbits-shl.ll
index 473fecc307ed4..57d557dec11b9 100644
--- a/llvm/test/CodeGen/X86/known-signbits-shl.ll
+++ b/llvm/test/CodeGen/X86/known-signbits-shl.ll
@@ -137,7 +137,7 @@ define void @computeNumSignBits_shl_zext_vec_3(<2 x i8> %x, ptr %p) nounwind {
; X64-NEXT: pandn {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
; X64-NEXT: por %xmm2, %xmm1
; X64-NEXT: movdqa %xmm0, %xmm2
-; X64-NEXT: paddw %xmm0, %xmm2
+; X64-NEXT: paddw %xmm2, %xmm2
; X64-NEXT: movdqa %xmm2, %xmm3
; X64-NEXT: psraw $1, %xmm3
; X64-NEXT: pcmpeqw %xmm0, %xmm3
diff --git a/llvm/test/CodeGen/X86/masked_gather_scatter.ll b/llvm/test/CodeGen/X86/masked_gather_scatter.ll
index 4e6f666fa05de..4cde581c10508 100644
--- a/llvm/test/CodeGen/X86/masked_gather_scatter.ll
+++ b/llvm/test/CodeGen/X86/masked_gather_scatter.ll
@@ -4806,9 +4806,8 @@ define <16 x float> @test_gather_structpt_16f32_mask_index(ptr %x, ptr %arr, <16
; X64-KNL-NEXT: vpmovsxbd %xmm0, %zmm0
; X64-KNL-NEXT: vpslld $31, %zmm0, %zmm0
; X64-KNL-NEXT: vptestmd %zmm0, %zmm0, %k1
-; X64-KNL-NEXT: vmovdqu64 (%rsi), %zmm0
+; X64-KNL-NEXT: vpslld $1, (%rsi), %zmm0
; X64-KNL-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %zmm0
-; X64-KNL-NEXT: vpaddd %zmm0, %zmm0, %zmm0
; X64-KNL-NEXT: vgatherdps (%rdi,%zmm0,8), %zmm1 {%k1}
; X64-KNL-NEXT: vmovaps %zmm1, %zmm0
; X64-KNL-NEXT: retq
@@ -4830,9 +4829,8 @@ define <16 x float> @test_gather_structpt_16f32_mask_index(ptr %x, ptr %arr, <16
; X64-SKX-SMALL-NEXT: vpmovsxbd %xmm0, %zmm0
; X64-SKX-SMALL-NEXT: vpslld $31, %zmm0, %zmm0
; X64-SKX-SMALL-NEXT: vpmovd2m %zmm0, %k1
-; X64-SKX-SMALL-NEXT: vmovdqu64 (%rsi), %zmm0
+; X64-SKX-SMALL-NEXT: vpslld $1, (%rsi), %zmm0
; X64-SKX-SMALL-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %zmm0
-; X64-SKX-SMALL-NEXT: vpaddd %zmm0, %zmm0, %zmm0
; X64-SKX-SMALL-NEXT: vgatherdps (%rdi,%zmm0,8), %zmm1 {%k1}
; X64-SKX-SMALL-NEXT: vmovaps %zmm1, %zmm0
; X64-SKX-SMALL-NEXT: retq
@@ -4842,10 +4840,9 @@ define <16 x float> @test_gather_structpt_16f32_mask_index(ptr %x, ptr %arr, <16
; X64-SKX-LARGE-NEXT: vpmovsxbd %xmm0, %zmm0
; X64-SKX-LARGE-NEXT: vpslld $31, %zmm0, %zmm0
; X64-SKX-LARGE-NEXT: vpmovd2m %zmm0, %k1
-; X64-SKX-LARGE-NEXT: vmovdqu64 (%rsi), %zmm0
+; X64-SKX-LARGE-NEXT: vpslld $1, (%rsi), %zmm0
; X64-SKX-LARGE-NEXT: movabsq ${{\.?LCPI[0-9]+_[0-9]+}}, %rax
; X64-SKX-LARGE-NEXT: vpandd (%rax){1to16}, %zmm0, %zmm0
-; X64-SKX-LARGE-NEXT: vpaddd %zmm0, %zmm0, %zmm0
; X64-SKX-LARGE-NEXT: vgatherdps (%rdi,%zmm0,8), %zmm1 {%k1}
; X64-SKX-LARGE-NEXT: vmovaps %zmm1, %zmm0
; X64-SKX-LARGE-NEXT: retq
@@ -4875,9 +4872,8 @@ define <16 x float> @test_gather_structpt_16f32_mask_index_offset(ptr %x, ptr %a
; X64-KNL-NEXT: vpmovsxbd %xmm0, %zmm0
; X64-KNL-NEXT: vpslld $31, %zmm0, %zmm0
; X64-KNL-NEXT: vptestmd %zmm0, %zmm0, %k1
-; X64-KNL-NEXT: vmovdqu64 (%rsi), %zmm0
+; X64-KNL-NEXT: vpslld $1, (%rsi), %zmm0
; X64-KNL-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %zmm0
-; X64-KNL-NEXT: vpaddd %zmm0, %zmm0, %zmm0
; X64-KNL-NEXT: vgatherdps 4(%rdi,%zmm0,8), %zmm1 {%k1}
; X64-KNL-NEXT: vmovaps %zmm1, %zmm0
; X64-KNL-NEXT: retq
@@ -4899,9 +4895,8 @@ define <16 x float> @test_gather_structpt_16f32_mask_index_offset(ptr %x, ptr %a
; X64-SKX-SMALL-NEXT: vpmovsxbd %xmm0, %zmm0
; X64-SKX-SMALL-NEXT: vpslld $31, %zmm0, %zmm0
; X64-SKX-SMALL-NEXT: vpmovd2m %zmm0, %k1
-; X64-SKX-SMALL-NEXT: vmovdqu64 (%rsi), %zmm0
+; X64-SKX-SMALL-NEXT: vpslld $1, (%rsi), %zmm0
; X64-SKX-SMALL-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %zmm0
-; X64-SKX-SMALL-NEXT: vpaddd %zmm0, %zmm0, %zmm0
; X64-SKX-SMALL-NEXT: vgatherdps 4(%rdi,%zmm0,8), %zmm1 {%k1}
; X64-SKX-SMALL-NEXT: vmovaps %zmm1, %zmm0
; X64-SKX-SMALL-NEXT: retq
@@ -4911,10 +4906,9 @@ define <16 x float> @test_gather_structpt_16f32_mask_index_offset(ptr %x, ptr %a
; X64-SKX-LARGE-NEXT: vpmovsxbd %xmm0, %zmm0
; X64-SKX-LARGE-NEXT: vpslld $31, %zmm0, %zmm0
; X64-SKX-LARGE-NEXT: vpmovd2m %zmm0, %k1
-; X64-SKX-LARGE-NEXT: vmovdqu64 (%rsi), %zmm0
+; X64-SKX-LARGE-NEXT: vpslld $1, (%rsi), %zmm0
; X64-SKX-LARGE-NEXT: movabsq ${{\.?LCPI[0-9]+_[0-9]+}}, %rax
; X64-SKX-LARGE-NEXT: vpandd (%rax){1to16}, %zmm0, %zmm0
-; X64-SKX-LARGE-NEXT: vpaddd %zmm0, %zmm0, %zmm0
; X64-SKX-LARGE-NEXT: vgatherdps 4(%rdi,%zmm0,8), %zmm1 {%k1}
; X64-SKX-LARGE-NEXT: vmovaps %zmm1, %zmm0
; X64-SKX-LARGE-NEXT: retq
@@ -4944,9 +4938,8 @@ define {<16 x float>, <16 x float>} @test_gather_structpt_16f32_mask_index_pair(
; X64-KNL-NEXT: vpmovsxbd %xmm0, %zmm0
; X64-KNL-NEXT: vpslld $31, %zmm0, %zmm0
; X64-KNL-NEXT: vptestmd %zmm0, %zmm0, %k1
-; X64-KNL-NEXT: vmovdqu64 (%rsi), %zmm0
-; X64-KNL-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %zmm0
-; X64-KNL-NEXT: vpaddd %zmm0, %zmm0, %zmm2
+; X64-KNL-NEXT: vpslld $1, (%rsi), %zmm0
+; X64-KNL-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %zmm2
; X64-KNL-NEXT: kmovw %k1, %k2
; X64-KNL-NEXT: vmovaps %zmm1, %zmm0
; X64-KNL-NEXT: vgatherdps (%rdi,%zmm2,8), %zmm0 {%k2}
@@ -4972,9 +4965,8 @@ define {<16 x float>, <16 x float>} @test_gather_structpt_16f32_mask_index_pair(
; X64-SKX-SMALL-NEXT: vpmovsxbd %xmm0, %zmm0
; X64-SKX-SMALL-NEXT: vpslld $31, %zmm0, %zmm0
; X64-SKX-SMALL-NEXT: vpmovd2m %zmm0, %k1
-; X64-SKX-SMALL-NEXT: vmovdqu64 (%rsi), %zmm0
-; X64-SKX-SMALL-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %zmm0
-; X64-SKX-SMALL-NEXT: vpaddd %zmm0, %zmm0, %zmm2
+; X64-SKX-SMALL-NEXT: vpslld $1, (%rsi), %zmm0
+; X64-SKX-SMALL-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %zmm2
; X64-SKX-SMALL-NEXT: kmovw %k1, %k2
; X64-SKX-SMALL-NEXT: vmovaps %zmm1, %zmm0
; X64-SKX-SMALL-NEXT: vgatherdps (%rdi,%zmm2,8), %zmm0 {%k2}
@@ -4986,10 +4978,9 @@ define {<16 x float>, <16 x float>} @test_gather_structpt_16f32_mask_index_pair(
; X64-SKX-LARGE-NEXT: vpmovsxbd %xmm0, %zmm0
; X64-SKX-LARGE-NEXT: vpslld $31, %zmm0, %zmm0
; X64-SKX-LARGE-NEXT: vpmovd2m %zmm0, %k1
-; X64-SKX-LARGE-NEXT: vmovdqu64 (%rsi), %zmm0
+; X64-SKX-LARGE-NEXT: vpslld $1, (%rsi), %zmm0
; X64-SKX-LARGE-NEXT: movabsq ${{\.?LCPI[0-9]+_[0-9]+}}, %rax
-; X64-SKX-LARGE-NEXT: vpandd (%rax){1to16}, %zmm0, %zmm0
-; X64-SKX-LARGE-NEXT: vpaddd %zmm0, %zmm0, %zmm2
+; X64-SKX-LARGE-NEXT: vpandd (%rax){1to16}, %zmm0, %zmm2
; X64-SKX-LARGE-NEXT: kmovw %k1, %k2
; X64-SKX-LARGE-NEXT: vmovaps %zmm1, %zmm0
; X64-SKX-LARGE-NEXT: vgatherdps (%rdi,%zmm2,8), %zmm0 {%k2}
diff --git a/llvm/test/CodeGen/X86/oddsubvector.ll b/llvm/test/CodeGen/X86/oddsubvector.ll
index f53983036a016..5df1867f73c8e 100644
--- a/llvm/test/CodeGen/X86/oddsubvector.ll
+++ b/llvm/test/CodeGen/X86/oddsubvector.ll
@@ -155,10 +155,10 @@ define <16 x i32> @PR42819(ptr %a0) {
define void @PR42833() {
; SSE2-LABEL: PR42833:
; SSE2: # %bb.0:
+; SSE2-NEXT: movl b(%rip), %eax
; SSE2-NEXT: movdqa c+144(%rip), %xmm2
; SSE2-NEXT: movdqa c+128(%rip), %xmm0
-; SSE2-NEXT: movd %xmm0, %eax
-; SSE2-NEXT: addl b(%rip), %eax
+; SSE2-NEXT: addl c+128(%rip), %eax
; SSE2-NEXT: movd %eax, %xmm1
; SSE2-NEXT: movd %eax, %xmm3
; SSE2-NEXT: paddd %xmm0, %xmm3
@@ -166,7 +166,7 @@ define void @PR42833() {
; SSE2-NEXT: psubd %xmm2, %xmm4
; SSE2-NEXT: paddd %xmm2, %xmm2
; SSE2-NEXT: movdqa %xmm0, %xmm5
-; SSE2-NEXT: paddd %xmm0, %xmm5
+; SSE2-NEXT: paddd %xmm5, %xmm5
; SSE2-NEXT: movss {{.*#+}} xmm5 = xmm3[0],xmm5[1,2,3]
; SSE2-NEXT: movdqa %xmm2, c+144(%rip)
; SSE2-NEXT: movaps %xmm5, c+128(%rip)
@@ -191,17 +191,17 @@ define void @PR42833() {
;
; SSE42-LABEL: PR42833:
; SSE42: # %bb.0:
+; SSE42-NEXT: movl b(%rip), %eax
; SSE42-NEXT: movdqa c+144(%rip), %xmm1
; SSE42-NEXT: movdqa c+128(%rip), %xmm0
-; SSE42-NEXT: movd %xmm0, %eax
-; SSE42-NEXT: addl b(%rip), %eax
+; SSE42-NEXT: addl c+128(%rip), %eax
; SSE42-NEXT: movd %eax, %xmm2
; SSE42-NEXT: paddd %xmm0, %xmm2
; SSE42-NEXT: movdqa d+144(%rip), %xmm3
; SSE42-NEXT: psubd %xmm1, %xmm3
; SSE42-NEXT: paddd %xmm1, %xmm1
; SSE42-NEXT: movdqa %xmm0, %xmm4
-; SSE42-NEXT: paddd %xmm0, %xmm4
+; SSE42-NEXT: paddd %xmm4, %xmm4
; SSE42-NEXT: pblendw {{.*#+}} xmm4 = xmm2[0,1],xmm4[2,3,4,5,6,7]
; SSE42-NEXT: movdqa %xmm1, c+144(%rip)
; SSE42-NEXT: movdqa %xmm4, c+128(%rip)
diff --git a/llvm/test/CodeGen/X86/pr62286.ll b/llvm/test/CodeGen/X86/pr62286.ll
index ce03f8fad4a19..161e9651a9cf2 100644
--- a/llvm/test/CodeGen/X86/pr62286.ll
+++ b/llvm/test/CodeGen/X86/pr62286.ll
@@ -26,27 +26,33 @@ define i64 @PR62286(i32 %a) {
; AVX1-LABEL: PR62286:
; AVX1: # %bb.0:
; AVX1-NEXT: vmovd %edi, %xmm0
-; AVX1-NEXT: vpslldq {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3]
+; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[0,0,0,0]
+; AVX1-NEXT: vxorps %xmm2, %xmm2, %xmm2
+; AVX1-NEXT: vblendps {{.*#+}} ymm1 = ymm2[0,1,2],ymm1[3],ymm2[4,5,6,7]
; AVX1-NEXT: vpaddd %xmm0, %xmm0, %xmm0
-; AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2
-; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm2[2,3,4,5,6,7]
-; AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
+; AVX1-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1
+; AVX1-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3],ymm0[4],ymm1[5,6,7]
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
+; AVX1-NEXT: vorps %xmm1, %xmm0, %xmm0
+; AVX1-NEXT: vpmovsxdq %xmm0, %xmm1
+; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
; AVX1-NEXT: vpmovsxdq %xmm0, %xmm0
-; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[2,3,2,3]
-; AVX1-NEXT: vpmovsxdq %xmm1, %xmm1
-; AVX1-NEXT: vpaddq %xmm1, %xmm0, %xmm0
+; AVX1-NEXT: vpaddq %xmm0, %xmm1, %xmm0
; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
; AVX1-NEXT: vpaddq %xmm1, %xmm0, %xmm0
; AVX1-NEXT: vmovq %xmm0, %rax
+; AVX1-NEXT: vzeroupper
; AVX1-NEXT: retq
;
; AVX2-LABEL: PR62286:
; AVX2: # %bb.0:
; AVX2-NEXT: vmovd %edi, %xmm0
-; AVX2-NEXT: vpaddd %xmm0, %xmm0, %xmm1
-; AVX2-NEXT: vpslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3]
-; AVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
-; AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3]
+; AVX2-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,1,0]
+; AVX2-NEXT: vpaddd %ymm0, %ymm0, %ymm1
+; AVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
+; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm1[0],ymm0[1,2,3],ymm1[4],ymm0[5,6,7]
+; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
+; AVX2-NEXT: vpor %xmm1, %xmm0, %xmm0
; AVX2-NEXT: vpmovsxdq %xmm0, %ymm0
; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
; AVX2-NEXT: vpaddq %xmm1, %xmm0, %xmm0
@@ -59,12 +65,12 @@ define i64 @PR62286(i32 %a) {
; AVX512-LABEL: PR62286:
; AVX512: # %bb.0:
; AVX512-NEXT: vmovd %edi, %xmm0
-; AVX512-NEXT: movb $8, %al
+; AVX512-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,1,0]
+; AVX512-NEXT: vpaddd %ymm0, %ymm0, %ymm1
+; AVX512-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %zmm0
+; AVX512-NEXT: movw $4369, %ax # imm = 0x1111
; AVX512-NEXT: kmovd %eax, %k1
-; AVX512-NEXT: vpexpandd %ymm0, %ymm1 {%k1} {z}
-; AVX512-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1
-; AVX512-NEXT: vpaddd %ymm0, %ymm0, %ymm0
-; AVX512-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3],ymm0[4],ymm1[5,6,7]
+; AVX512-NEXT: vmovdqa32 %zmm1, %zmm0 {%k1}
; AVX512-NEXT: vpmovsxdq %ymm0, %zmm0
; AVX512-NEXT: vextracti64x4 $1, %zmm0, %ymm1
; AVX512-NEXT: vpaddq %zmm1, %zmm0, %zmm0
diff --git a/llvm/test/CodeGen/X86/pr74736.ll b/llvm/test/CodeGen/X86/pr74736.ll
index ceccee00c9457..58955265580bd 100644
--- a/llvm/test/CodeGen/X86/pr74736.ll
+++ b/llvm/test/CodeGen/X86/pr74736.ll
@@ -6,8 +6,8 @@ define void @main(<16 x i32> %0, i32 %1) {
; SSE-LABEL: main:
; SSE: # %bb.0: # %entry
; SSE-NEXT: movd %edi, %xmm4
-; SSE-NEXT: movss {{.*#+}} xmm0 = [1,0,0,0]
-; SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,0],xmm4[1,0]
+; SSE-NEXT: movsd {{.*#+}} xmm0 = [0,1,0,0]
+; SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm4[2,0]
; SSE-NEXT: paddd %xmm0, %xmm0
; SSE-NEXT: paddd %xmm1, %xmm1
; SSE-NEXT: paddd %xmm3, %xmm3
@@ -32,20 +32,20 @@ define void @main(<16 x i32> %0, i32 %1) {
; AVX-LABEL: main:
; AVX: # %bb.0: # %entry
; AVX-NEXT: vpxor %xmm2, %xmm2, %xmm2
-; AVX-NEXT: vpblendd {{.*#+}} xmm2 = xmm2[0],xmm0[1,2,3]
; AVX-NEXT: movl $1, %eax
; AVX-NEXT: vpinsrd $1, %eax, %xmm2, %xmm2
; AVX-NEXT: vpinsrd $3, %edi, %xmm2, %xmm2
-; AVX-NEXT: vpblendd {{.*#+}} ymm0 = ymm2[0,1,2,3],ymm0[4,5,6,7]
-; AVX-NEXT: vpaddd %ymm0, %ymm0, %ymm0
-; AVX-NEXT: vpaddd %ymm1, %ymm1, %ymm1
-; AVX-NEXT: vpmovsxbd {{.*#+}} ymm2 = [0,1,1,3,3,5,5,7]
-; AVX-NEXT: vpermd %ymm0, %ymm2, %ymm2
+; AVX-NEXT: vpblendd {{.*#+}} ymm2 = ymm2[0,1,2,3],ymm0[4,5,6,7]
+; AVX-NEXT: vpaddd %ymm2, %ymm2, %ymm2
+; AVX-NEXT: vpaddd %ymm1, %ymm1, %ymm3
; AVX-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[0,1]
; AVX-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[3,3,3,3,7,7,7,7]
-; AVX-NEXT: vpshufd {{.*#+}} ymm1 = ymm1[0,1,1,3,4,5,5,7]
+; AVX-NEXT: vpaddd %ymm0, %ymm0, %ymm0
+; AVX-NEXT: vpshufd {{.*#+}} ymm1 = ymm3[0,1,1,3,4,5,5,7]
; AVX-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3],ymm0[4],ymm1[5,6,7]
-; AVX-NEXT: vpxor %ymm0, %ymm2, %ymm0
+; AVX-NEXT: vpmovsxbd {{.*#+}} ymm1 = [0,1,1,3,3,5,5,7]
+; AVX-NEXT: vpermd %ymm2, %ymm1, %ymm1
+; AVX-NEXT: vpxor %ymm0, %ymm1, %ymm0
; AVX-NEXT: vextracti128 $1, %ymm0, %xmm1
; AVX-NEXT: vpxor %xmm1, %xmm0, %xmm0
; AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
diff --git a/llvm/test/CodeGen/X86/shift-i512.ll b/llvm/test/CodeGen/X86/shift-i512.ll
index 756019d0e98a0..03b61d9235254 100644
--- a/llvm/test/CodeGen/X86/shift-i512.ll
+++ b/llvm/test/CodeGen/X86/shift-i512.ll
@@ -10,7 +10,7 @@ define <8 x i64> @shl_i512_1(<8 x i64> %a) {
; AVX512VL: # %bb.0:
; AVX512VL-NEXT: valignq {{.*#+}} zmm1 = zmm0[3,4,5,6,7,0,1,2]
; AVX512VL-NEXT: vextracti128 $1, %ymm0, %xmm2
-; AVX512VL-NEXT: vpsllq $1, %xmm0, %xmm3
+; AVX512VL-NEXT: vpaddq %xmm0, %xmm0, %xmm3
; AVX512VL-NEXT: vpshufd {{.*#+}} xmm4 = xmm0[2,3,2,3]
; AVX512VL-NEXT: vpsrlq $63, %xmm4, %xmm4
; AVX512VL-NEXT: vpaddq %xmm2, %xmm2, %xmm2
@@ -34,7 +34,7 @@ define <8 x i64> @shl_i512_1(<8 x i64> %a) {
; AVX512VBMI-NEXT: vextracti128 $1, %ymm0, %xmm2
; AVX512VBMI-NEXT: vpshufd {{.*#+}} xmm3 = xmm0[2,3,2,3]
; AVX512VBMI-NEXT: vpshldq $1, %xmm3, %xmm2, %xmm3
-; AVX512VBMI-NEXT: vpsllq $1, %xmm0, %xmm4
+; AVX512VBMI-NEXT: vpaddq %xmm0, %xmm0, %xmm4
; AVX512VBMI-NEXT: vinserti128 $1, %xmm3, %ymm4, %ymm3
; AVX512VBMI-NEXT: vinserti128 $1, %xmm1, %ymm2, %ymm1
; AVX512VBMI-NEXT: vpshufd {{.*#+}} ymm1 = ymm1[2,3,2,3,6,7,6,7]
@@ -51,7 +51,7 @@ define <8 x i64> @shl_i512_1(<8 x i64> %a) {
; ZNVER4-NEXT: vextracti32x4 $2, %zmm0, %xmm1
; ZNVER4-NEXT: vextracti128 $1, %ymm0, %xmm2
; ZNVER4-NEXT: vpshufd {{.*#+}} xmm3 = xmm0[2,3,2,3]
-; ZNVER4-NEXT: vpsllq $1, %xmm0, %xmm4
+; ZNVER4-NEXT: vpaddq %xmm0, %xmm0, %xmm4
; ZNVER4-NEXT: vinserti128 $1, %xmm1, %ymm2, %ymm1
; ZNVER4-NEXT: vpshldq $1, %xmm3, %xmm2, %xmm3
; ZNVER4-NEXT: vextracti64x4 $1, %zmm0, %ymm2
diff --git a/llvm/test/CodeGen/X86/sse2-intrinsics-fast-isel.ll b/llvm/test/CodeGen/X86/sse2-intrinsics-fast-isel.ll
index 3f48b22e2b9ff..a48be037ebebc 100644
--- a/llvm/test/CodeGen/X86/sse2-intrinsics-fast-isel.ll
+++ b/llvm/test/CodeGen/X86/sse2-intrinsics-fast-isel.ll
@@ -5791,20 +5791,20 @@ declare <2 x i64> @llvm.x86.sse2.psll.q(<2 x i64>, <2 x i64>) nounwind readnone
define <2 x i64> @test_mm_slli_epi16(<2 x i64> %a0) {
; SSE-LABEL: test_mm_slli_epi16:
; SSE: # %bb.0:
-; SSE-NEXT: psllw $1, %xmm0 # encoding: [0x66,0x0f,0x71,0xf0,0x01]
+; SSE-NEXT: psllw $2, %xmm0 # encoding: [0x66,0x0f,0x71,0xf0,0x02]
; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3]
;
; AVX1-LABEL: test_mm_slli_epi16:
; AVX1: # %bb.0:
-; AVX1-NEXT: vpsllw $1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0x71,0xf0,0x01]
+; AVX1-NEXT: vpsllw $2, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0x71,0xf0,0x02]
; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3]
;
; AVX512-LABEL: test_mm_slli_epi16:
; AVX512: # %bb.0:
-; AVX512-NEXT: vpsllw $1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x71,0xf0,0x01]
+; AVX512-NEXT: vpsllw $2, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x71,0xf0,0x02]
; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3]
%arg0 = bitcast <2 x i64> %a0 to <8 x i16>
- %res = call <8 x i16> @llvm.x86.sse2.pslli.w(<8 x i16> %arg0, i32 1)
+ %res = call <8 x i16> @llvm.x86.sse2.pslli.w(<8 x i16> %arg0, i32 2)
%bc = bitcast <8 x i16> %res to <2 x i64>
ret <2 x i64> %bc
}
@@ -5813,20 +5813,20 @@ declare <8 x i16> @llvm.x86.sse2.pslli.w(<8 x i16>, i32) nounwind readnone
define <2 x i64> @test_mm_slli_epi32(<2 x i64> %a0) {
; SSE-LABEL: test_mm_slli_epi32:
; SSE: # %bb.0:
-; SSE-NEXT: pslld $1, %xmm0 # encoding: [0x66,0x0f,0x72,0xf0,0x01]
+; SSE-NEXT: pslld $2, %xmm0 # encoding: [0x66,0x0f,0x72,0xf0,0x02]
; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3]
;
; AVX1-LABEL: test_mm_slli_epi32:
; AVX1: # %bb.0:
-; AVX1-NEXT: vpslld $1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0x72,0xf0,0x01]
+; AVX1-NEXT: vpslld $2, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0x72,0xf0,0x02]
; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3]
;
; AVX512-LABEL: test_mm_slli_epi32:
; AVX512: # %bb.0:
-; AVX512-NEXT: vpslld $1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x72,0xf0,0x01]
+; AVX512-NEXT: vpslld $2, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x72,0xf0,0x02]
; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3]
%arg0 = bitcast <2 x i64> %a0 to <4 x i32>
- %res = call <4 x i32> @llvm.x86.sse2.pslli.d(<4 x i32> %arg0, i32 1)
+ %res = call <4 x i32> @llvm.x86.sse2.pslli.d(<4 x i32> %arg0, i32 2)
%bc = bitcast <4 x i32> %res to <2 x i64>
ret <2 x i64> %bc
}
@@ -5835,19 +5835,19 @@ declare <4 x i32> @llvm.x86.sse2.pslli.d(<4 x i32>, i32) nounwind readnone
define <2 x i64> @test_mm_slli_epi64(<2 x i64> %a0) {
; SSE-LABEL: test_mm_slli_epi64:
; SSE: # %bb.0:
-; SSE-NEXT: psllq $1, %xmm0 # encoding: [0x66,0x0f,0x73,0xf0,0x01]
+; SSE-NEXT: psllq $2, %xmm0 # encoding: [0x66,0x0f,0x73,0xf0,0x02]
; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3]
;
; AVX1-LABEL: test_mm_slli_epi64:
; AVX1: # %bb.0:
-; AVX1-NEXT: vpsllq $1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0x73,0xf0,0x01]
+; AVX1-NEXT: vpsllq $2, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0x73,0xf0,0x02]
; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3]
;
; AVX512-LABEL: test_mm_slli_epi64:
; AVX512: # %bb.0:
-; AVX512-NEXT: vpsllq $1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x73,0xf0,0x01]
+; AVX512-NEXT: vpsllq $2, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x73,0xf0,0x02]
; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3]
- %res = call <2 x i64> @llvm.x86.sse2.pslli.q(<2 x i64> %a0, i32 1)
+ %res = call <2 x i64> @llvm.x86.sse2.pslli.q(<2 x i64> %a0, i32 2)
ret <2 x i64> %res
}
declare <2 x i64> @llvm.x86.sse2.pslli.q(<2 x i64>, i32) nounwind readnone
diff --git a/llvm/test/CodeGen/X86/vec_shift6.ll b/llvm/test/CodeGen/X86/vec_shift6.ll
index 71e659c681d17..219e32c86c848 100644
--- a/llvm/test/CodeGen/X86/vec_shift6.ll
+++ b/llvm/test/CodeGen/X86/vec_shift6.ll
@@ -28,14 +28,14 @@ define <8 x i16> @test2(<8 x i16> %a) {
; SSE2-LABEL: test2:
; SSE2: # %bb.0:
; SSE2-NEXT: movdqa %xmm0, %xmm1
-; SSE2-NEXT: paddw %xmm0, %xmm1
+; SSE2-NEXT: paddw %xmm1, %xmm1
; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3]
; SSE2-NEXT: retq
;
; SSE41-LABEL: test2:
; SSE41: # %bb.0:
; SSE41-NEXT: movdqa %xmm0, %xmm1
-; SSE41-NEXT: paddw %xmm0, %xmm1
+; SSE41-NEXT: paddw %xmm1, %xmm1
; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5,6,7]
; SSE41-NEXT: retq
;
@@ -56,7 +56,7 @@ define <4 x i32> @test3(<4 x i32> %a) {
; SSE2-LABEL: test3:
; SSE2: # %bb.0:
; SSE2-NEXT: movdqa %xmm0, %xmm1
-; SSE2-NEXT: paddd %xmm0, %xmm1
+; SSE2-NEXT: paddd %xmm1, %xmm1
; SSE2-NEXT: pslld $2, %xmm0
; SSE2-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
; SSE2-NEXT: retq
@@ -81,14 +81,14 @@ define <4 x i32> @test4(<4 x i32> %a) {
; SSE2-LABEL: test4:
; SSE2: # %bb.0:
; SSE2-NEXT: movdqa %xmm0, %xmm1
-; SSE2-NEXT: paddd %xmm0, %xmm1
+; SSE2-NEXT: paddd %xmm1, %xmm1
; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3]
; SSE2-NEXT: retq
;
; SSE41-LABEL: test4:
; SSE41: # %bb.0:
; SSE41-NEXT: movdqa %xmm0, %xmm1
-; SSE41-NEXT: paddd %xmm0, %xmm1
+; SSE41-NEXT: paddd %xmm1, %xmm1
; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5,6,7]
; SSE41-NEXT: retq
;
diff --git a/llvm/test/CodeGen/X86/vector-gep.ll b/llvm/test/CodeGen/X86/vector-gep.ll
index 5c485592295d3..b4cffcd171b33 100644
--- a/llvm/test/CodeGen/X86/vector-gep.ll
+++ b/llvm/test/CodeGen/X86/vector-gep.ll
@@ -122,91 +122,87 @@ define <64 x ptr> @AGEP9(ptr %param, <64 x i32> %off) nounwind {
; CHECK-NEXT: movl %esp, %ebp
; CHECK-NEXT: andl $-32, %esp
; CHECK-NEXT: subl $160, %esp
-; CHECK-NEXT: vmovdqa %ymm2, %ymm5
-; CHECK-NEXT: vmovdqa %ymm1, %ymm3
-; CHECK-NEXT: vmovdqa %ymm0, %ymm1
-; CHECK-NEXT: vmovdqa 72(%ebp), %ymm0
-; CHECK-NEXT: vmovdqa 40(%ebp), %ymm2
-; CHECK-NEXT: vpaddd %xmm2, %xmm2, %xmm4
-; CHECK-NEXT: vbroadcastss 12(%ebp), %xmm7
-; CHECK-NEXT: vpaddd %xmm4, %xmm7, %xmm4
-; CHECK-NEXT: vmovdqa %xmm4, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
-; CHECK-NEXT: vextractf128 $1, %ymm2, %xmm2
-; CHECK-NEXT: vpaddd %xmm2, %xmm2, %xmm2
-; CHECK-NEXT: vpaddd %xmm2, %xmm7, %xmm2
-; CHECK-NEXT: vmovdqa %xmm2, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
-; CHECK-NEXT: vpaddd %xmm0, %xmm0, %xmm2
-; CHECK-NEXT: vpaddd %xmm2, %xmm7, %xmm2
-; CHECK-NEXT: vmovdqa %xmm2, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
+; CHECK-NEXT: vpaddd %xmm0, %xmm0, %xmm3
+; CHECK-NEXT: vbroadcastss 12(%ebp), %xmm5
+; CHECK-NEXT: vpaddd %xmm3, %xmm5, %xmm3
+; CHECK-NEXT: vmovdqa %xmm3, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm0
; CHECK-NEXT: vpaddd %xmm0, %xmm0, %xmm0
-; CHECK-NEXT: vpaddd %xmm0, %xmm7, %xmm0
+; CHECK-NEXT: vpaddd %xmm0, %xmm5, %xmm0
; CHECK-NEXT: vmovdqa %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
-; CHECK-NEXT: vmovdqa 104(%ebp), %ymm0
-; CHECK-NEXT: vpaddd %xmm0, %xmm0, %xmm2
-; CHECK-NEXT: vpaddd %xmm2, %xmm7, %xmm2
-; CHECK-NEXT: vmovdqa %xmm2, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
-; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm0
+; CHECK-NEXT: vpaddd %xmm1, %xmm1, %xmm0
+; CHECK-NEXT: vpaddd %xmm0, %xmm5, %xmm0
+; CHECK-NEXT: vmovdqa %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
+; CHECK-NEXT: vextractf128 $1, %ymm1, %xmm0
; CHECK-NEXT: vpaddd %xmm0, %xmm0, %xmm0
-; CHECK-NEXT: vpaddd %xmm0, %xmm7, %xmm0
+; CHECK-NEXT: vpaddd %xmm0, %xmm5, %xmm0
; CHECK-NEXT: vmovdqa %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
-; CHECK-NEXT: vmovdqa 136(%ebp), %ymm0
-; CHECK-NEXT: vpaddd %xmm0, %xmm0, %xmm2
-; CHECK-NEXT: vpaddd %xmm2, %xmm7, %xmm2
-; CHECK-NEXT: vmovdqa %xmm2, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
-; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm0
+; CHECK-NEXT: vpaddd %xmm2, %xmm2, %xmm0
+; CHECK-NEXT: vpaddd %xmm0, %xmm5, %xmm0
+; CHECK-NEXT: vmovdqa %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
+; CHECK-NEXT: vextractf128 $1, %ymm2, %xmm0
; CHECK-NEXT: vpaddd %xmm0, %xmm0, %xmm0
-; CHECK-NEXT: vpaddd %xmm0, %xmm7, %xmm0
+; CHECK-NEXT: vpaddd %xmm0, %xmm5, %xmm0
; CHECK-NEXT: vmovdqa %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
-; CHECK-NEXT: vmovdqa 168(%ebp), %ymm0
-; CHECK-NEXT: vpaddd %xmm0, %xmm0, %xmm2
-; CHECK-NEXT: vpaddd %xmm2, %xmm7, %xmm2
-; CHECK-NEXT: vmovdqa %xmm2, (%esp) # 16-byte Spill
-; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm0
+; CHECK-NEXT: vmovdqa 40(%ebp), %xmm0
; CHECK-NEXT: vpaddd %xmm0, %xmm0, %xmm0
-; CHECK-NEXT: vpaddd %xmm0, %xmm7, %xmm2
-; CHECK-NEXT: vpaddd %xmm1, %xmm1, %xmm0
-; CHECK-NEXT: vpaddd %xmm0, %xmm7, %xmm0
-; CHECK-NEXT: vextractf128 $1, %ymm1, %xmm1
-; CHECK-NEXT: vpaddd %xmm1, %xmm1, %xmm1
-; CHECK-NEXT: vpaddd %xmm1, %xmm7, %xmm1
-; CHECK-NEXT: vpaddd %xmm3, %xmm3, %xmm6
-; CHECK-NEXT: vpaddd %xmm6, %xmm7, %xmm6
-; CHECK-NEXT: vextractf128 $1, %ymm3, %xmm3
-; CHECK-NEXT: vpaddd %xmm3, %xmm3, %xmm3
-; CHECK-NEXT: vpaddd %xmm3, %xmm7, %xmm3
-; CHECK-NEXT: vmovdqa %ymm5, %ymm4
-; CHECK-NEXT: vpaddd %xmm4, %xmm4, %xmm5
-; CHECK-NEXT: vpaddd %xmm5, %xmm7, %xmm5
-; CHECK-NEXT: vextractf128 $1, %ymm4, %xmm4
+; CHECK-NEXT: vpaddd %xmm0, %xmm5, %xmm0
+; CHECK-NEXT: vmovdqa %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
+; CHECK-NEXT: vmovdqa 56(%ebp), %xmm0
+; CHECK-NEXT: vpaddd %xmm0, %xmm0, %xmm0
+; CHECK-NEXT: vpaddd %xmm0, %xmm5, %xmm0
+; CHECK-NEXT: vmovdqa %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
+; CHECK-NEXT: vmovdqa 72(%ebp), %xmm0
+; CHECK-NEXT: vpaddd %xmm0, %xmm0, %xmm0
+; CHECK-NEXT: vpaddd %xmm0, %xmm5, %xmm0
+; CHECK-NEXT: vmovdqa %xmm0, (%esp) # 16-byte Spill
+; CHECK-NEXT: vmovdqa 88(%ebp), %xmm0
+; CHECK-NEXT: vpaddd %xmm0, %xmm0, %xmm0
+; CHECK-NEXT: vpaddd %xmm0, %xmm5, %xmm2
+; CHECK-NEXT: vmovdqa 104(%ebp), %xmm0
+; CHECK-NEXT: vpaddd %xmm0, %xmm0, %xmm0
+; CHECK-NEXT: vpaddd %xmm0, %xmm5, %xmm1
+; CHECK-NEXT: vmovdqa 120(%ebp), %xmm0
+; CHECK-NEXT: vpaddd %xmm0, %xmm0, %xmm0
+; CHECK-NEXT: vpaddd %xmm0, %xmm5, %xmm0
+; CHECK-NEXT: vmovdqa 136(%ebp), %xmm6
+; CHECK-NEXT: vpaddd %xmm6, %xmm6, %xmm6
+; CHECK-NEXT: vpaddd %xmm6, %xmm5, %xmm6
+; CHECK-NEXT: vmovdqa 152(%ebp), %xmm7
+; CHECK-NEXT: vpaddd %xmm7, %xmm7, %xmm7
+; CHECK-NEXT: vpaddd %xmm7, %xmm5, %xmm7
+; CHECK-NEXT: vmovdqa 168(%ebp), %xmm4
; CHECK-NEXT: vpaddd %xmm4, %xmm4, %xmm4
-; CHECK-NEXT: vpaddd %xmm4, %xmm7, %xmm4
+; CHECK-NEXT: vpaddd %xmm4, %xmm5, %xmm4
+; CHECK-NEXT: vmovdqa 184(%ebp), %xmm3
+; CHECK-NEXT: vpaddd %xmm3, %xmm3, %xmm3
+; CHECK-NEXT: vpaddd %xmm3, %xmm5, %xmm3
; CHECK-NEXT: movl 8(%ebp), %eax
-; CHECK-NEXT: vmovdqa %xmm4, 80(%eax)
-; CHECK-NEXT: vmovdqa %xmm5, 64(%eax)
-; CHECK-NEXT: vmovdqa %xmm3, 48(%eax)
-; CHECK-NEXT: vmovdqa %xmm6, 32(%eax)
-; CHECK-NEXT: vmovdqa %xmm1, 16(%eax)
-; CHECK-NEXT: vmovdqa %xmm0, (%eax)
-; CHECK-NEXT: vmovdqa %xmm2, 240(%eax)
+; CHECK-NEXT: vmovdqa %xmm3, 240(%eax)
+; CHECK-NEXT: vmovdqa %xmm4, 224(%eax)
+; CHECK-NEXT: vmovdqa %xmm7, 208(%eax)
+; CHECK-NEXT: vmovdqa %xmm6, 192(%eax)
+; CHECK-NEXT: vmovdqa %xmm0, 176(%eax)
+; CHECK-NEXT: vmovdqa %xmm1, 160(%eax)
+; CHECK-NEXT: vmovdqa %xmm2, 144(%eax)
; CHECK-NEXT: vmovaps (%esp), %xmm0 # 16-byte Reload
-; CHECK-NEXT: vmovaps %xmm0, 224(%eax)
+; CHECK-NEXT: vmovaps %xmm0, 128(%eax)
; CHECK-NEXT: vmovaps {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
-; CHECK-NEXT: vmovaps %xmm0, 208(%eax)
+; CHECK-NEXT: vmovaps %xmm0, 112(%eax)
; CHECK-NEXT: vmovaps {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
-; CHECK-NEXT: vmovaps %xmm0, 192(%eax)
+; CHECK-NEXT: vmovaps %xmm0, 96(%eax)
; CHECK-NEXT: vmovaps {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
-; CHECK-NEXT: vmovaps %xmm0, 176(%eax)
+; CHECK-NEXT: vmovaps %xmm0, 80(%eax)
; CHECK-NEXT: vmovaps {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
-; CHECK-NEXT: vmovaps %xmm0, 160(%eax)
+; CHECK-NEXT: vmovaps %xmm0, 64(%eax)
; CHECK-NEXT: vmovaps {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
-; CHECK-NEXT: vmovaps %xmm0, 144(%eax)
+; CHECK-NEXT: vmovaps %xmm0, 48(%eax)
; CHECK-NEXT: vmovaps {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
-; CHECK-NEXT: vmovaps %xmm0, 128(%eax)
+; CHECK-NEXT: vmovaps %xmm0, 32(%eax)
; CHECK-NEXT: vmovaps {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
-; CHECK-NEXT: vmovaps %xmm0, 112(%eax)
+; CHECK-NEXT: vmovaps %xmm0, 16(%eax)
; CHECK-NEXT: vmovaps {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
-; CHECK-NEXT: vmovaps %xmm0, 96(%eax)
+; CHECK-NEXT: vmovaps %xmm0, (%eax)
; CHECK-NEXT: movl %ebp, %esp
; CHECK-NEXT: popl %ebp
; CHECK-NEXT: vzeroupper
diff --git a/llvm/test/CodeGen/X86/vector-idiv-udiv-128.ll b/llvm/test/CodeGen/X86/vector-idiv-udiv-128.ll
index 13f7d68ccb893..33d80f63dbcc8 100644
--- a/llvm/test/CodeGen/X86/vector-idiv-udiv-128.ll
+++ b/llvm/test/CodeGen/X86/vector-idiv-udiv-128.ll
@@ -652,7 +652,7 @@ define <16 x i8> @test_rem7_16i8(<16 x i8> %a) nounwind {
; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
; SSE2-NEXT: paddb %xmm4, %xmm1
; SSE2-NEXT: movdqa %xmm1, %xmm2
-; SSE2-NEXT: psllw $1, %xmm2
+; SSE2-NEXT: paddw %xmm2, %xmm2
; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2
; SSE2-NEXT: psrlw $2, %xmm1
; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
@@ -678,7 +678,7 @@ define <16 x i8> @test_rem7_16i8(<16 x i8> %a) nounwind {
; SSE41-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
; SSE41-NEXT: paddb %xmm3, %xmm1
; SSE41-NEXT: movdqa %xmm1, %xmm2
-; SSE41-NEXT: psllw $1, %xmm2
+; SSE41-NEXT: paddw %xmm2, %xmm2
; SSE41-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2
; SSE41-NEXT: psrlw $2, %xmm1
; SSE41-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
@@ -701,7 +701,7 @@ define <16 x i8> @test_rem7_16i8(<16 x i8> %a) nounwind {
; AVX1-NEXT: vpsrlw $1, %xmm2, %xmm2
; AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm2
; AVX1-NEXT: vpaddb %xmm1, %xmm2, %xmm1
-; AVX1-NEXT: vpsllw $1, %xmm1, %xmm2
+; AVX1-NEXT: vpaddw %xmm1, %xmm1, %xmm2
; AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm2
; AVX1-NEXT: vpsrlw $2, %xmm1, %xmm1
; AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
@@ -720,7 +720,7 @@ define <16 x i8> @test_rem7_16i8(<16 x i8> %a) nounwind {
; AVX2NOBW-NEXT: vpsrlw $1, %xmm2, %xmm2
; AVX2NOBW-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm2
; AVX2NOBW-NEXT: vpaddb %xmm1, %xmm2, %xmm1
-; AVX2NOBW-NEXT: vpsllw $1, %xmm1, %xmm2
+; AVX2NOBW-NEXT: vpaddw %xmm1, %xmm1, %xmm2
; AVX2NOBW-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm2
; AVX2NOBW-NEXT: vpsrlw $2, %xmm1, %xmm1
; AVX2NOBW-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
@@ -739,7 +739,7 @@ define <16 x i8> @test_rem7_16i8(<16 x i8> %a) nounwind {
; AVX512BW-NEXT: vpsrlw $1, %xmm2, %xmm2
; AVX512BW-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm2
; AVX512BW-NEXT: vpaddb %xmm1, %xmm2, %xmm1
-; AVX512BW-NEXT: vpsllw $1, %xmm1, %xmm2
+; AVX512BW-NEXT: vpaddw %xmm1, %xmm1, %xmm2
; AVX512BW-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm2
; AVX512BW-NEXT: vpsrlw $2, %xmm1, %xmm1
; AVX512BW-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
diff --git a/llvm/test/CodeGen/X86/vector-idiv-udiv-256.ll b/llvm/test/CodeGen/X86/vector-idiv-udiv-256.ll
index 1a5c3730c1839..e43108fe7d784 100644
--- a/llvm/test/CodeGen/X86/vector-idiv-udiv-256.ll
+++ b/llvm/test/CodeGen/X86/vector-idiv-udiv-256.ll
@@ -590,7 +590,7 @@ define <32 x i8> @test_rem7_32i8(<32 x i8> %a) nounwind {
; AVX1-NEXT: vbroadcastss {{.*#+}} xmm6 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127]
; AVX1-NEXT: vpand %xmm6, %xmm5, %xmm5
; AVX1-NEXT: vpaddb %xmm3, %xmm5, %xmm3
-; AVX1-NEXT: vpsllw $1, %xmm3, %xmm5
+; AVX1-NEXT: vpaddw %xmm3, %xmm3, %xmm5
; AVX1-NEXT: vbroadcastss {{.*#+}} xmm7 = [248,248,248,248,248,248,248,248,248,248,248,248,248,248,248,248]
; AVX1-NEXT: vpand %xmm7, %xmm5, %xmm5
; AVX1-NEXT: vpsrlw $2, %xmm3, %xmm3
@@ -609,7 +609,7 @@ define <32 x i8> @test_rem7_32i8(<32 x i8> %a) nounwind {
; AVX1-NEXT: vpsrlw $1, %xmm3, %xmm3
; AVX1-NEXT: vpand %xmm6, %xmm3, %xmm3
; AVX1-NEXT: vpaddb %xmm2, %xmm3, %xmm2
-; AVX1-NEXT: vpsllw $1, %xmm2, %xmm3
+; AVX1-NEXT: vpaddw %xmm2, %xmm2, %xmm3
; AVX1-NEXT: vpand %xmm7, %xmm3, %xmm3
; AVX1-NEXT: vpsrlw $2, %xmm2, %xmm2
; AVX1-NEXT: vpand %xmm2, %xmm8, %xmm2
@@ -633,7 +633,7 @@ define <32 x i8> @test_rem7_32i8(<32 x i8> %a) nounwind {
; AVX2NOBW-NEXT: vpsrlw $1, %ymm2, %ymm2
; AVX2NOBW-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm2, %ymm2
; AVX2NOBW-NEXT: vpaddb %ymm1, %ymm2, %ymm1
-; AVX2NOBW-NEXT: vpsllw $1, %ymm1, %ymm2
+; AVX2NOBW-NEXT: vpaddw %ymm1, %ymm1, %ymm2
; AVX2NOBW-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm2, %ymm2
; AVX2NOBW-NEXT: vpsrlw $2, %ymm1, %ymm1
; AVX2NOBW-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1
@@ -651,7 +651,7 @@ define <32 x i8> @test_rem7_32i8(<32 x i8> %a) nounwind {
; AVX512BW-NEXT: vpsrlw $1, %ymm2, %ymm2
; AVX512BW-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm2, %ymm2
; AVX512BW-NEXT: vpaddb %ymm1, %ymm2, %ymm1
-; AVX512BW-NEXT: vpsllw $1, %ymm1, %ymm2
+; AVX512BW-NEXT: vpaddw %ymm1, %ymm1, %ymm2
; AVX512BW-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm2, %ymm2
; AVX512BW-NEXT: vpsrlw $2, %ymm1, %ymm1
; AVX512BW-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1
diff --git a/llvm/test/CodeGen/X86/vector-idiv-udiv-512.ll b/llvm/test/CodeGen/X86/vector-idiv-udiv-512.ll
index 9c56894f0c59c..bf98bcca59c04 100644
--- a/llvm/test/CodeGen/X86/vector-idiv-udiv-512.ll
+++ b/llvm/test/CodeGen/X86/vector-idiv-udiv-512.ll
@@ -485,7 +485,7 @@ define <64 x i8> @test_rem7_64i8(<64 x i8> %a) nounwind {
; AVX512F-NEXT: vpbroadcastb {{.*#+}} ymm6 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127]
; AVX512F-NEXT: vpand %ymm6, %ymm5, %ymm5
; AVX512F-NEXT: vpaddb %ymm3, %ymm5, %ymm3
-; AVX512F-NEXT: vpsllw $1, %ymm3, %ymm5
+; AVX512F-NEXT: vpaddw %ymm3, %ymm3, %ymm5
; AVX512F-NEXT: vpbroadcastb {{.*#+}} ymm7 = [248,248,248,248,248,248,248,248,248,248,248,248,248,248,248,248,248,248,248,248,248,248,248,248,248,248,248,248,248,248,248,248]
; AVX512F-NEXT: vpand %ymm7, %ymm5, %ymm5
; AVX512F-NEXT: vpsrlw $2, %ymm3, %ymm3
@@ -504,7 +504,7 @@ define <64 x i8> @test_rem7_64i8(<64 x i8> %a) nounwind {
; AVX512F-NEXT: vpsrlw $1, %ymm3, %ymm3
; AVX512F-NEXT: vpand %ymm6, %ymm3, %ymm3
; AVX512F-NEXT: vpaddb %ymm2, %ymm3, %ymm2
-; AVX512F-NEXT: vpsllw $1, %ymm2, %ymm3
+; AVX512F-NEXT: vpaddw %ymm2, %ymm2, %ymm3
; AVX512F-NEXT: vpand %ymm7, %ymm3, %ymm3
; AVX512F-NEXT: vpsrlw $2, %ymm2, %ymm2
; AVX512F-NEXT: vpand %ymm2, %ymm8, %ymm2
@@ -528,7 +528,7 @@ define <64 x i8> @test_rem7_64i8(<64 x i8> %a) nounwind {
; AVX512BW-NEXT: vpsrlw $1, %zmm2, %zmm2
; AVX512BW-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm2, %zmm2
; AVX512BW-NEXT: vpaddb %zmm1, %zmm2, %zmm1
-; AVX512BW-NEXT: vpsllw $1, %zmm1, %zmm2
+; AVX512BW-NEXT: vpaddw %zmm1, %zmm1, %zmm2
; AVX512BW-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm2, %zmm2
; AVX512BW-NEXT: vpsrlw $2, %zmm1, %zmm1
; AVX512BW-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm1, %zmm1
diff --git a/llvm/test/CodeGen/X86/vector-mul.ll b/llvm/test/CodeGen/X86/vector-mul.ll
index 13b21a747878b..6e1bf25908302 100644
--- a/llvm/test/CodeGen/X86/vector-mul.ll
+++ b/llvm/test/CodeGen/X86/vector-mul.ll
@@ -821,10 +821,10 @@ define <16 x i16> @madd_v16i16_3(<16 x i16> %a0, <16 x i16> %a1) nounwind {
; X86-SSE-NEXT: andl $-16, %esp
; X86-SSE-NEXT: subl $16, %esp
; X86-SSE-NEXT: movdqa %xmm1, %xmm3
-; X86-SSE-NEXT: paddw %xmm1, %xmm3
+; X86-SSE-NEXT: paddw %xmm3, %xmm3
; X86-SSE-NEXT: paddw %xmm3, %xmm1
; X86-SSE-NEXT: movdqa %xmm0, %xmm3
-; X86-SSE-NEXT: paddw %xmm0, %xmm3
+; X86-SSE-NEXT: paddw %xmm3, %xmm3
; X86-SSE-NEXT: paddw %xmm2, %xmm0
; X86-SSE-NEXT: paddw %xmm3, %xmm0
; X86-SSE-NEXT: paddw 8(%ebp), %xmm1
@@ -835,9 +835,9 @@ define <16 x i16> @madd_v16i16_3(<16 x i16> %a0, <16 x i16> %a1) nounwind {
; X64-SSE-LABEL: madd_v16i16_3:
; X64-SSE: # %bb.0:
; X64-SSE-NEXT: movdqa %xmm1, %xmm4
-; X64-SSE-NEXT: paddw %xmm1, %xmm4
+; X64-SSE-NEXT: paddw %xmm4, %xmm4
; X64-SSE-NEXT: movdqa %xmm0, %xmm5
-; X64-SSE-NEXT: paddw %xmm0, %xmm5
+; X64-SSE-NEXT: paddw %xmm5, %xmm5
; X64-SSE-NEXT: paddw %xmm2, %xmm0
; X64-SSE-NEXT: paddw %xmm5, %xmm0
; X64-SSE-NEXT: paddw %xmm3, %xmm1
diff --git a/llvm/test/CodeGen/X86/vector-reduce-or-cmp.ll b/llvm/test/CodeGen/X86/vector-reduce-or-cmp.ll
index 227e000c6be7f..ab1feba98b008 100644
--- a/llvm/test/CodeGen/X86/vector-reduce-or-cmp.ll
+++ b/llvm/test/CodeGen/X86/vector-reduce-or-cmp.ll
@@ -907,7 +907,7 @@ define i1 @mask_v8i32_2(<8 x i32> %a0) {
; SSE2-LABEL: mask_v8i32_2:
; SSE2: # %bb.0:
; SSE2-NEXT: por %xmm1, %xmm0
-; SSE2-NEXT: pslld $1, %xmm0
+; SSE2-NEXT: paddd %xmm0, %xmm0
; SSE2-NEXT: movmskps %xmm0, %eax
; SSE2-NEXT: testl %eax, %eax
; SSE2-NEXT: sete %al
diff --git a/llvm/test/CodeGen/X86/vector-shift-shl-128.ll b/llvm/test/CodeGen/X86/vector-shift-shl-128.ll
index 2b1cf5b671e53..99dac74d8127b 100644
--- a/llvm/test/CodeGen/X86/vector-shift-shl-128.ll
+++ b/llvm/test/CodeGen/X86/vector-shift-shl-128.ll
@@ -927,7 +927,7 @@ define <2 x i64> @constant_shift_v2i64(<2 x i64> %a) nounwind {
; SSE2-LABEL: constant_shift_v2i64:
; SSE2: # %bb.0:
; SSE2-NEXT: movdqa %xmm0, %xmm1
-; SSE2-NEXT: paddq %xmm0, %xmm1
+; SSE2-NEXT: paddq %xmm1, %xmm1
; SSE2-NEXT: psllq $7, %xmm0
; SSE2-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
; SSE2-NEXT: retq
@@ -975,7 +975,7 @@ define <2 x i64> @constant_shift_v2i64(<2 x i64> %a) nounwind {
; X86-SSE-LABEL: constant_shift_v2i64:
; X86-SSE: # %bb.0:
; X86-SSE-NEXT: movdqa %xmm0, %xmm1
-; X86-SSE-NEXT: paddq %xmm0, %xmm1
+; X86-SSE-NEXT: paddq %xmm1, %xmm1
; X86-SSE-NEXT: psllq $7, %xmm0
; X86-SSE-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
; X86-SSE-NEXT: retl
diff --git a/llvm/test/CodeGen/X86/vector-shuffle-combining.ll b/llvm/test/CodeGen/X86/vector-shuffle-combining.ll
index 5b61de5a3b772..ee9d8a55aeb3e 100644
--- a/llvm/test/CodeGen/X86/vector-shuffle-combining.ll
+++ b/llvm/test/CodeGen/X86/vector-shuffle-combining.ll
@@ -3550,14 +3550,14 @@ define <8 x i16> @PR141475(i32 %in) {
; SSE-LABEL: PR141475:
; SSE: # %bb.0:
; SSE-NEXT: movd %edi, %xmm0
-; SSE-NEXT: pslld $1, %xmm0
+; SSE-NEXT: paddd %xmm0, %xmm0
; SSE-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
; SSE-NEXT: retq
;
; AVX-LABEL: PR141475:
; AVX: # %bb.0:
; AVX-NEXT: vmovd %edi, %xmm0
-; AVX-NEXT: vpslld $1, %xmm0, %xmm0
+; AVX-NEXT: vpaddd %xmm0, %xmm0, %xmm0
; AVX-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
; AVX-NEXT: retq
%mul = shl i32 %in, 1
diff --git a/llvm/test/CodeGen/X86/vector_splat-const-shift-of-constmasked.ll b/llvm/test/CodeGen/X86/vector_splat-const-shift-of-constmasked.ll
index 54dc107fd0c10..3b93734c24deb 100644
--- a/llvm/test/CodeGen/X86/vector_splat-const-shift-of-constmasked.ll
+++ b/llvm/test/CodeGen/X86/vector_splat-const-shift-of-constmasked.ll
@@ -1438,26 +1438,26 @@ define <8 x i16> @test_128_i16_x_8_65024_mask_ashr_10(<8 x i16> %a0) {
define <8 x i16> @test_128_i16_x_8_127_mask_shl_1(<8 x i16> %a0) {
; X86-SSE2-LABEL: test_128_i16_x_8_127_mask_shl_1:
; X86-SSE2: # %bb.0:
-; X86-SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
; X86-SSE2-NEXT: paddw %xmm0, %xmm0
+; X86-SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
; X86-SSE2-NEXT: retl
;
; X86-AVX-LABEL: test_128_i16_x_8_127_mask_shl_1:
; X86-AVX: # %bb.0:
-; X86-AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0
; X86-AVX-NEXT: vpaddw %xmm0, %xmm0, %xmm0
+; X86-AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0
; X86-AVX-NEXT: retl
;
; X64-SSE2-LABEL: test_128_i16_x_8_127_mask_shl_1:
; X64-SSE2: # %bb.0:
-; X64-SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; X64-SSE2-NEXT: paddw %xmm0, %xmm0
+; X64-SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; X64-SSE2-NEXT: retq
;
; X64-AVX-LABEL: test_128_i16_x_8_127_mask_shl_1:
; X64-AVX: # %bb.0:
-; X64-AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
; X64-AVX-NEXT: vpaddw %xmm0, %xmm0, %xmm0
+; X64-AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
; X64-AVX-NEXT: retq
%t0 = and <8 x i16> %a0, <i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127>
%t1 = shl <8 x i16> %t0, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
@@ -1656,26 +1656,26 @@ define <8 x i16> @test_128_i16_x_8_2032_mask_shl_6(<8 x i16> %a0) {
define <8 x i16> @test_128_i16_x_8_65024_mask_shl_1(<8 x i16> %a0) {
; X86-SSE2-LABEL: test_128_i16_x_8_65024_mask_shl_1:
; X86-SSE2: # %bb.0:
-; X86-SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
; X86-SSE2-NEXT: paddw %xmm0, %xmm0
+; X86-SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
; X86-SSE2-NEXT: retl
;
; X86-AVX-LABEL: test_128_i16_x_8_65024_mask_shl_1:
; X86-AVX: # %bb.0:
-; X86-AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0
; X86-AVX-NEXT: vpaddw %xmm0, %xmm0, %xmm0
+; X86-AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0
; X86-AVX-NEXT: retl
;
; X64-SSE2-LABEL: test_128_i16_x_8_65024_mask_shl_1:
; X64-SSE2: # %bb.0:
-; X64-SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; X64-SSE2-NEXT: paddw %xmm0, %xmm0
+; X64-SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; X64-SSE2-NEXT: retq
;
; X64-AVX-LABEL: test_128_i16_x_8_65024_mask_shl_1:
; X64-AVX: # %bb.0:
-; X64-AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
; X64-AVX-NEXT: vpaddw %xmm0, %xmm0, %xmm0
+; X64-AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
; X64-AVX-NEXT: retq
%t0 = and <8 x i16> %a0, <i16 65024, i16 65024, i16 65024, i16 65024, i16 65024, i16 65024, i16 65024, i16 65024>
%t1 = shl <8 x i16> %t0, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
@@ -2373,40 +2373,40 @@ define <4 x i32> @test_128_i32_x_4_4294836224_mask_ashr_18(<4 x i32> %a0) {
define <4 x i32> @test_128_i32_x_4_32767_mask_shl_1(<4 x i32> %a0) {
; X86-SSE2-LABEL: test_128_i32_x_4_32767_mask_shl_1:
; X86-SSE2: # %bb.0:
-; X86-SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
; X86-SSE2-NEXT: paddd %xmm0, %xmm0
+; X86-SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
; X86-SSE2-NEXT: retl
;
; X86-AVX1-LABEL: test_128_i32_x_4_32767_mask_shl_1:
; X86-AVX1: # %bb.0:
-; X86-AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0
; X86-AVX1-NEXT: vpaddd %xmm0, %xmm0, %xmm0
+; X86-AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0
; X86-AVX1-NEXT: retl
;
; X86-AVX2-LABEL: test_128_i32_x_4_32767_mask_shl_1:
; X86-AVX2: # %bb.0:
-; X86-AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [32767,32767,32767,32767]
-; X86-AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
; X86-AVX2-NEXT: vpaddd %xmm0, %xmm0, %xmm0
+; X86-AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [65534,65534,65534,65534]
+; X86-AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
; X86-AVX2-NEXT: retl
;
; X64-SSE2-LABEL: test_128_i32_x_4_32767_mask_shl_1:
; X64-SSE2: # %bb.0:
-; X64-SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; X64-SSE2-NEXT: paddd %xmm0, %xmm0
+; X64-SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; X64-SSE2-NEXT: retq
;
; X64-AVX1-LABEL: test_128_i32_x_4_32767_mask_shl_1:
; X64-AVX1: # %bb.0:
-; X64-AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
; X64-AVX1-NEXT: vpaddd %xmm0, %xmm0, %xmm0
+; X64-AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
; X64-AVX1-NEXT: retq
;
; X64-AVX2-LABEL: test_128_i32_x_4_32767_mask_shl_1:
; X64-AVX2: # %bb.0:
-; X64-AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [32767,32767,32767,32767]
-; X64-AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
; X64-AVX2-NEXT: vpaddd %xmm0, %xmm0, %xmm0
+; X64-AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [65534,65534,65534,65534]
+; X64-AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
; X64-AVX2-NEXT: retq
%t0 = and <4 x i32> %a0, <i32 32767, i32 32767, i32 32767, i32 32767>
%t1 = shl <4 x i32> %t0, <i32 1, i32 1, i32 1, i32 1>
@@ -2675,40 +2675,40 @@ define <4 x i32> @test_128_i32_x_4_8388352_mask_shl_10(<4 x i32> %a0) {
define <4 x i32> @test_128_i32_x_4_4294836224_mask_shl_1(<4 x i32> %a0) {
; X86-SSE2-LABEL: test_128_i32_x_4_4294836224_mask_shl_1:
; X86-SSE2: # %bb.0:
-; X86-SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
; X86-SSE2-NEXT: paddd %xmm0, %xmm0
+; X86-SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
; X86-SSE2-NEXT: retl
;
; X86-AVX1-LABEL: test_128_i32_x_4_4294836224_mask_shl_1:
; X86-AVX1: # %bb.0:
-; X86-AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0
; X86-AVX1-NEXT: vpaddd %xmm0, %xmm0, %xmm0
+; X86-AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0
; X86-AVX1-NEXT: retl
;
; X86-AVX2-LABEL: test_128_i32_x_4_4294836224_mask_shl_1:
; X86-AVX2: # %bb.0:
-; X86-AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [4294836224,4294836224,4294836224,4294836224]
-; X86-AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
; X86-AVX2-NEXT: vpaddd %xmm0, %xmm0, %xmm0
+; X86-AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [4294705152,4294705152,4294705152,4294705152]
+; X86-AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
; X86-AVX2-NEXT: retl
;
; X64-SSE2-LABEL: test_128_i32_x_4_4294836224_mask_shl_1:
; X64-SSE2: # %bb.0:
-; X64-SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; X64-SSE2-NEXT: paddd %xmm0, %xmm0
+; X64-SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; X64-SSE2-NEXT: retq
;
; X64-AVX1-LABEL: test_128_i32_x_4_4294836224_mask_shl_1:
; X64-AVX1: # %bb.0:
-; X64-AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
; X64-AVX1-NEXT: vpaddd %xmm0, %xmm0, %xmm0
+; X64-AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
; X64-AVX1-NEXT: retq
;
; X64-AVX2-LABEL: test_128_i32_x_4_4294836224_mask_shl_1:
; X64-AVX2: # %bb.0:
-; X64-AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [4294836224,4294836224,4294836224,4294836224]
-; X64-AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
; X64-AVX2-NEXT: vpaddd %xmm0, %xmm0, %xmm0
+; X64-AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [4294705152,4294705152,4294705152,4294705152]
+; X64-AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
; X64-AVX2-NEXT: retq
%t0 = and <4 x i32> %a0, <i32 4294836224, i32 4294836224, i32 4294836224, i32 4294836224>
%t1 = shl <4 x i32> %t0, <i32 1, i32 1, i32 1, i32 1>
@@ -3325,26 +3325,26 @@ define <2 x i64> @test_128_i64_x_2_18446744065119617024_mask_ashr_34(<2 x i64> %
define <2 x i64> @test_128_i64_x_2_2147483647_mask_shl_1(<2 x i64> %a0) {
; X86-SSE2-LABEL: test_128_i64_x_2_2147483647_mask_shl_1:
; X86-SSE2: # %bb.0:
-; X86-SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
; X86-SSE2-NEXT: paddq %xmm0, %xmm0
+; X86-SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
; X86-SSE2-NEXT: retl
;
; X86-AVX-LABEL: test_128_i64_x_2_2147483647_mask_shl_1:
; X86-AVX: # %bb.0:
-; X86-AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0
; X86-AVX-NEXT: vpaddq %xmm0, %xmm0, %xmm0
+; X86-AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0
; X86-AVX-NEXT: retl
;
; X64-SSE2-LABEL: test_128_i64_x_2_2147483647_mask_shl_1:
; X64-SSE2: # %bb.0:
-; X64-SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; X64-SSE2-NEXT: paddq %xmm0, %xmm0
+; X64-SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; X64-SSE2-NEXT: retq
;
; X64-AVX-LABEL: test_128_i64_x_2_2147483647_mask_shl_1:
; X64-AVX: # %bb.0:
-; X64-AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
; X64-AVX-NEXT: vpaddq %xmm0, %xmm0, %xmm0
+; X64-AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
; X64-AVX-NEXT: retq
%t0 = and <2 x i64> %a0, <i64 2147483647, i64 2147483647>
%t1 = shl <2 x i64> %t0, <i64 1, i64 1>
@@ -3543,26 +3543,26 @@ define <2 x i64> @test_128_i64_x_2_140737488289792_mask_shl_18(<2 x i64> %a0) {
define <2 x i64> @test_128_i64_x_2_18446744065119617024_mask_shl_1(<2 x i64> %a0) {
; X86-SSE2-LABEL: test_128_i64_x_2_18446744065119617024_mask_shl_1:
; X86-SSE2: # %bb.0:
-; X86-SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
; X86-SSE2-NEXT: paddq %xmm0, %xmm0
+; X86-SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
; X86-SSE2-NEXT: retl
;
; X86-AVX-LABEL: test_128_i64_x_2_18446744065119617024_mask_shl_1:
; X86-AVX: # %bb.0:
-; X86-AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0
; X86-AVX-NEXT: vpaddq %xmm0, %xmm0, %xmm0
+; X86-AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0
; X86-AVX-NEXT: retl
;
; X64-SSE2-LABEL: test_128_i64_x_2_18446744065119617024_mask_shl_1:
; X64-SSE2: # %bb.0:
-; X64-SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; X64-SSE2-NEXT: paddq %xmm0, %xmm0
+; X64-SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; X64-SSE2-NEXT: retq
;
; X64-AVX-LABEL: test_128_i64_x_2_18446744065119617024_mask_shl_1:
; X64-AVX: # %bb.0:
-; X64-AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
; X64-AVX-NEXT: vpaddq %xmm0, %xmm0, %xmm0
+; X64-AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
; X64-AVX-NEXT: retq
%t0 = and <2 x i64> %a0, <i64 18446744065119617024, i64 18446744065119617024>
%t1 = shl <2 x i64> %t0, <i64 1, i64 1>
More information about the llvm-commits
mailing list