[llvm] 82a5c84 - [X86][AVX512] Fold concat(and(x,y),and(z,w)) -> and(concat(x,z),concat(y,w)) for 512-bit vectors
Simon Pilgrim via llvm-commits
llvm-commits at lists.llvm.org
Sat Aug 1 12:34:57 PDT 2020
Author: Simon Pilgrim
Date: 2020-08-01T20:34:39+01:00
New Revision: 82a5c848e7f531ee636f643450072059397ac90c
URL: https://github.com/llvm/llvm-project/commit/82a5c848e7f531ee636f643450072059397ac90c
DIFF: https://github.com/llvm/llvm-project/commit/82a5c848e7f531ee636f643450072059397ac90c.diff
LOG: [X86][AVX512] Fold concat(and(x,y),and(z,w)) -> and(concat(x,z),concat(y,w)) for 512-bit vectors
Helps vpternlog folding on non-AVX512BW targets
Added:
Modified:
llvm/lib/Target/X86/X86ISelLowering.cpp
llvm/test/CodeGen/X86/avx512-insert-extract.ll
llvm/test/CodeGen/X86/vector-bitreverse.ll
llvm/test/CodeGen/X86/vector-fshl-512.ll
llvm/test/CodeGen/X86/vector-fshl-rot-512.ll
llvm/test/CodeGen/X86/vector-fshr-512.ll
llvm/test/CodeGen/X86/vector-fshr-rot-512.ll
llvm/test/CodeGen/X86/vector-idiv-udiv-512.ll
llvm/test/CodeGen/X86/vector-rotate-512.ll
llvm/test/CodeGen/X86/vector-shift-lshr-512.ll
llvm/test/CodeGen/X86/vector-shift-shl-512.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index b89502dc7020..c135b9162072 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -48090,6 +48090,25 @@ static SDValue combineConcatVectorOps(const SDLoc &DL, MVT VT,
Op0.getOperand(1));
}
break;
+ case ISD::AND:
+ case ISD::OR:
+ case ISD::XOR:
+ case X86ISD::ANDNP:
+ // TODO: Add 256-bit support.
+ if (!IsSplat && VT.is512BitVector()) {
+ SmallVector<SDValue, 2> LHS, RHS;
+ for (unsigned i = 0; i != NumOps; ++i) {
+ LHS.push_back(Ops[i].getOperand(0));
+ RHS.push_back(Ops[i].getOperand(1));
+ }
+ MVT SrcVT = Op0.getOperand(0).getSimpleValueType();
+ SrcVT = MVT::getVectorVT(SrcVT.getScalarType(),
+ NumOps * SrcVT.getVectorNumElements());
+ return DAG.getNode(Op0.getOpcode(), DL, VT,
+ DAG.getNode(ISD::CONCAT_VECTORS, DL, SrcVT, LHS),
+ DAG.getNode(ISD::CONCAT_VECTORS, DL, SrcVT, RHS));
+ }
+ break;
case X86ISD::PACKSS:
case X86ISD::PACKUS:
if (!IsSplat && NumOps == 2 && VT.is256BitVector() &&
diff --git a/llvm/test/CodeGen/X86/avx512-insert-extract.ll b/llvm/test/CodeGen/X86/avx512-insert-extract.ll
index f6ffd6419c13..41bdaf21baa3 100644
--- a/llvm/test/CodeGen/X86/avx512-insert-extract.ll
+++ b/llvm/test/CodeGen/X86/avx512-insert-extract.ll
@@ -1692,16 +1692,15 @@ define i64 @test_insertelement_variable_v64i1(<64 x i8> %a, i8 %b, i32 %index) {
; KNL-NEXT: andq $-64, %rsp
; KNL-NEXT: subq $128, %rsp
; KNL-NEXT: ## kill: def $esi killed $esi def $rsi
-; KNL-NEXT: vpxor %xmm1, %xmm1, %xmm1
-; KNL-NEXT: vpcmpeqb %ymm1, %ymm0, %ymm2
-; KNL-NEXT: vpternlogq $15, %zmm2, %zmm2, %zmm2
-; KNL-NEXT: vextracti64x4 $1, %zmm0, %ymm0
-; KNL-NEXT: vpcmpeqb %ymm1, %ymm0, %ymm0
+; KNL-NEXT: vextracti64x4 $1, %zmm0, %ymm1
+; KNL-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; KNL-NEXT: vpcmpeqb %ymm2, %ymm1, %ymm1
+; KNL-NEXT: vpcmpeqb %ymm2, %ymm0, %ymm0
+; KNL-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
; KNL-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0
; KNL-NEXT: andl $63, %esi
; KNL-NEXT: testb %dil, %dil
-; KNL-NEXT: vmovdqa %ymm0, {{[0-9]+}}(%rsp)
-; KNL-NEXT: vmovdqa %ymm2, (%rsp)
+; KNL-NEXT: vmovdqa64 %zmm0, (%rsp)
; KNL-NEXT: setne (%rsp,%rsi)
; KNL-NEXT: vpmovsxbd (%rsp), %zmm0
; KNL-NEXT: vpslld $31, %zmm0, %zmm0
@@ -1772,116 +1771,115 @@ define i96 @test_insertelement_variable_v96i1(<96 x i8> %a, i8 %b, i32 %index) {
; KNL-NEXT: subq $192, %rsp
; KNL-NEXT: movl 744(%rbp), %eax
; KNL-NEXT: andl $127, %eax
-; KNL-NEXT: vmovd %edi, %xmm0
-; KNL-NEXT: vpinsrb $1, %esi, %xmm0, %xmm0
-; KNL-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
-; KNL-NEXT: vpinsrb $3, %ecx, %xmm0, %xmm0
-; KNL-NEXT: vpinsrb $4, %r8d, %xmm0, %xmm0
-; KNL-NEXT: vpinsrb $5, %r9d, %xmm0, %xmm0
-; KNL-NEXT: vpinsrb $6, 16(%rbp), %xmm0, %xmm0
-; KNL-NEXT: vpinsrb $7, 24(%rbp), %xmm0, %xmm0
-; KNL-NEXT: vpinsrb $8, 32(%rbp), %xmm0, %xmm0
-; KNL-NEXT: vpinsrb $9, 40(%rbp), %xmm0, %xmm0
-; KNL-NEXT: vpinsrb $10, 48(%rbp), %xmm0, %xmm0
-; KNL-NEXT: vpinsrb $11, 56(%rbp), %xmm0, %xmm0
-; KNL-NEXT: vpinsrb $12, 64(%rbp), %xmm0, %xmm0
-; KNL-NEXT: vpinsrb $13, 72(%rbp), %xmm0, %xmm0
-; KNL-NEXT: vpinsrb $14, 80(%rbp), %xmm0, %xmm0
-; KNL-NEXT: vpinsrb $15, 88(%rbp), %xmm0, %xmm0
+; KNL-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; KNL-NEXT: vpinsrb $1, 232(%rbp), %xmm0, %xmm0
+; KNL-NEXT: vpinsrb $2, 240(%rbp), %xmm0, %xmm0
+; KNL-NEXT: vpinsrb $3, 248(%rbp), %xmm0, %xmm0
+; KNL-NEXT: vpinsrb $4, 256(%rbp), %xmm0, %xmm0
+; KNL-NEXT: vpinsrb $5, 264(%rbp), %xmm0, %xmm0
+; KNL-NEXT: vpinsrb $6, 272(%rbp), %xmm0, %xmm0
+; KNL-NEXT: vpinsrb $7, 280(%rbp), %xmm0, %xmm0
+; KNL-NEXT: vpinsrb $8, 288(%rbp), %xmm0, %xmm0
+; KNL-NEXT: vpinsrb $9, 296(%rbp), %xmm0, %xmm0
+; KNL-NEXT: vpinsrb $10, 304(%rbp), %xmm0, %xmm0
+; KNL-NEXT: vpinsrb $11, 312(%rbp), %xmm0, %xmm0
+; KNL-NEXT: vpinsrb $12, 320(%rbp), %xmm0, %xmm0
+; KNL-NEXT: vpinsrb $13, 328(%rbp), %xmm0, %xmm0
+; KNL-NEXT: vpinsrb $14, 336(%rbp), %xmm0, %xmm0
+; KNL-NEXT: vpinsrb $15, 344(%rbp), %xmm0, %xmm0
; KNL-NEXT: vmovd {{.*#+}} xmm1 = mem[0],zero,zero,zero
-; KNL-NEXT: vpinsrb $1, 104(%rbp), %xmm1, %xmm1
-; KNL-NEXT: vpinsrb $2, 112(%rbp), %xmm1, %xmm1
-; KNL-NEXT: vpinsrb $3, 120(%rbp), %xmm1, %xmm1
-; KNL-NEXT: vpinsrb $4, 128(%rbp), %xmm1, %xmm1
-; KNL-NEXT: vpinsrb $5, 136(%rbp), %xmm1, %xmm1
-; KNL-NEXT: vpinsrb $6, 144(%rbp), %xmm1, %xmm1
-; KNL-NEXT: vpinsrb $7, 152(%rbp), %xmm1, %xmm1
-; KNL-NEXT: vpinsrb $8, 160(%rbp), %xmm1, %xmm1
-; KNL-NEXT: vpinsrb $9, 168(%rbp), %xmm1, %xmm1
-; KNL-NEXT: vpinsrb $10, 176(%rbp), %xmm1, %xmm1
-; KNL-NEXT: vpinsrb $11, 184(%rbp), %xmm1, %xmm1
-; KNL-NEXT: vpinsrb $12, 192(%rbp), %xmm1, %xmm1
-; KNL-NEXT: vpinsrb $13, 200(%rbp), %xmm1, %xmm1
-; KNL-NEXT: vpinsrb $14, 208(%rbp), %xmm1, %xmm1
-; KNL-NEXT: vpinsrb $15, 216(%rbp), %xmm1, %xmm1
-; KNL-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
-; KNL-NEXT: vpxor %xmm1, %xmm1, %xmm1
-; KNL-NEXT: vpcmpeqb %ymm1, %ymm0, %ymm0
-; KNL-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0
-; KNL-NEXT: vmovd {{.*#+}} xmm2 = mem[0],zero,zero,zero
-; KNL-NEXT: vpinsrb $1, 232(%rbp), %xmm2, %xmm2
-; KNL-NEXT: vpinsrb $2, 240(%rbp), %xmm2, %xmm2
-; KNL-NEXT: vpinsrb $3, 248(%rbp), %xmm2, %xmm2
-; KNL-NEXT: vpinsrb $4, 256(%rbp), %xmm2, %xmm2
-; KNL-NEXT: vpinsrb $5, 264(%rbp), %xmm2, %xmm2
-; KNL-NEXT: vpinsrb $6, 272(%rbp), %xmm2, %xmm2
-; KNL-NEXT: vpinsrb $7, 280(%rbp), %xmm2, %xmm2
-; KNL-NEXT: vpinsrb $8, 288(%rbp), %xmm2, %xmm2
-; KNL-NEXT: vpinsrb $9, 296(%rbp), %xmm2, %xmm2
-; KNL-NEXT: vpinsrb $10, 304(%rbp), %xmm2, %xmm2
-; KNL-NEXT: vpinsrb $11, 312(%rbp), %xmm2, %xmm2
-; KNL-NEXT: vpinsrb $12, 320(%rbp), %xmm2, %xmm2
-; KNL-NEXT: vpinsrb $13, 328(%rbp), %xmm2, %xmm2
-; KNL-NEXT: vpinsrb $14, 336(%rbp), %xmm2, %xmm2
-; KNL-NEXT: vpinsrb $15, 344(%rbp), %xmm2, %xmm2
+; KNL-NEXT: vpinsrb $1, 360(%rbp), %xmm1, %xmm1
+; KNL-NEXT: vpinsrb $2, 368(%rbp), %xmm1, %xmm1
+; KNL-NEXT: vpinsrb $3, 376(%rbp), %xmm1, %xmm1
+; KNL-NEXT: vpinsrb $4, 384(%rbp), %xmm1, %xmm1
+; KNL-NEXT: vpinsrb $5, 392(%rbp), %xmm1, %xmm1
+; KNL-NEXT: vpinsrb $6, 400(%rbp), %xmm1, %xmm1
+; KNL-NEXT: vpinsrb $7, 408(%rbp), %xmm1, %xmm1
+; KNL-NEXT: vpinsrb $8, 416(%rbp), %xmm1, %xmm1
+; KNL-NEXT: vpinsrb $9, 424(%rbp), %xmm1, %xmm1
+; KNL-NEXT: vpinsrb $10, 432(%rbp), %xmm1, %xmm1
+; KNL-NEXT: vpinsrb $11, 440(%rbp), %xmm1, %xmm1
+; KNL-NEXT: vpinsrb $12, 448(%rbp), %xmm1, %xmm1
+; KNL-NEXT: vpinsrb $13, 456(%rbp), %xmm1, %xmm1
+; KNL-NEXT: vpinsrb $14, 464(%rbp), %xmm1, %xmm1
+; KNL-NEXT: vpinsrb $15, 472(%rbp), %xmm1, %xmm1
+; KNL-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm1
+; KNL-NEXT: vpxor %xmm0, %xmm0, %xmm0
+; KNL-NEXT: vpcmpeqb %ymm0, %ymm1, %ymm1
+; KNL-NEXT: vmovd %edi, %xmm2
+; KNL-NEXT: vpinsrb $1, %esi, %xmm2, %xmm2
+; KNL-NEXT: vpinsrb $2, %edx, %xmm2, %xmm2
+; KNL-NEXT: vpinsrb $3, %ecx, %xmm2, %xmm2
+; KNL-NEXT: vpinsrb $4, %r8d, %xmm2, %xmm2
+; KNL-NEXT: vpinsrb $5, %r9d, %xmm2, %xmm2
+; KNL-NEXT: vpinsrb $6, 16(%rbp), %xmm2, %xmm2
+; KNL-NEXT: vpinsrb $7, 24(%rbp), %xmm2, %xmm2
+; KNL-NEXT: vpinsrb $8, 32(%rbp), %xmm2, %xmm2
+; KNL-NEXT: vpinsrb $9, 40(%rbp), %xmm2, %xmm2
+; KNL-NEXT: vpinsrb $10, 48(%rbp), %xmm2, %xmm2
+; KNL-NEXT: vpinsrb $11, 56(%rbp), %xmm2, %xmm2
+; KNL-NEXT: vpinsrb $12, 64(%rbp), %xmm2, %xmm2
+; KNL-NEXT: vpinsrb $13, 72(%rbp), %xmm2, %xmm2
+; KNL-NEXT: vpinsrb $14, 80(%rbp), %xmm2, %xmm2
+; KNL-NEXT: vpinsrb $15, 88(%rbp), %xmm2, %xmm2
; KNL-NEXT: vmovd {{.*#+}} xmm3 = mem[0],zero,zero,zero
-; KNL-NEXT: vpinsrb $1, 360(%rbp), %xmm3, %xmm3
-; KNL-NEXT: vpinsrb $2, 368(%rbp), %xmm3, %xmm3
-; KNL-NEXT: vpinsrb $3, 376(%rbp), %xmm3, %xmm3
-; KNL-NEXT: vpinsrb $4, 384(%rbp), %xmm3, %xmm3
-; KNL-NEXT: vpinsrb $5, 392(%rbp), %xmm3, %xmm3
-; KNL-NEXT: vpinsrb $6, 400(%rbp), %xmm3, %xmm3
-; KNL-NEXT: vpinsrb $7, 408(%rbp), %xmm3, %xmm3
-; KNL-NEXT: vpinsrb $8, 416(%rbp), %xmm3, %xmm3
-; KNL-NEXT: vpinsrb $9, 424(%rbp), %xmm3, %xmm3
-; KNL-NEXT: vpinsrb $10, 432(%rbp), %xmm3, %xmm3
-; KNL-NEXT: vpinsrb $11, 440(%rbp), %xmm3, %xmm3
-; KNL-NEXT: vpinsrb $12, 448(%rbp), %xmm3, %xmm3
-; KNL-NEXT: vpinsrb $13, 456(%rbp), %xmm3, %xmm3
-; KNL-NEXT: vpinsrb $14, 464(%rbp), %xmm3, %xmm3
-; KNL-NEXT: vpinsrb $15, 472(%rbp), %xmm3, %xmm3
+; KNL-NEXT: vpinsrb $1, 104(%rbp), %xmm3, %xmm3
+; KNL-NEXT: vpinsrb $2, 112(%rbp), %xmm3, %xmm3
+; KNL-NEXT: vpinsrb $3, 120(%rbp), %xmm3, %xmm3
+; KNL-NEXT: vpinsrb $4, 128(%rbp), %xmm3, %xmm3
+; KNL-NEXT: vpinsrb $5, 136(%rbp), %xmm3, %xmm3
+; KNL-NEXT: vpinsrb $6, 144(%rbp), %xmm3, %xmm3
+; KNL-NEXT: vpinsrb $7, 152(%rbp), %xmm3, %xmm3
+; KNL-NEXT: vpinsrb $8, 160(%rbp), %xmm3, %xmm3
+; KNL-NEXT: vpinsrb $9, 168(%rbp), %xmm3, %xmm3
+; KNL-NEXT: vpinsrb $10, 176(%rbp), %xmm3, %xmm3
+; KNL-NEXT: vpinsrb $11, 184(%rbp), %xmm3, %xmm3
+; KNL-NEXT: vpinsrb $12, 192(%rbp), %xmm3, %xmm3
+; KNL-NEXT: vpinsrb $13, 200(%rbp), %xmm3, %xmm3
+; KNL-NEXT: vpinsrb $14, 208(%rbp), %xmm3, %xmm3
+; KNL-NEXT: vpinsrb $15, 216(%rbp), %xmm3, %xmm3
; KNL-NEXT: vinserti128 $1, %xmm3, %ymm2, %ymm2
-; KNL-NEXT: vpcmpeqb %ymm1, %ymm2, %ymm2
-; KNL-NEXT: vpternlogq $15, %zmm2, %zmm2, %zmm2
-; KNL-NEXT: vmovd {{.*#+}} xmm3 = mem[0],zero,zero,zero
-; KNL-NEXT: vpinsrb $1, 488(%rbp), %xmm3, %xmm3
-; KNL-NEXT: vpinsrb $2, 496(%rbp), %xmm3, %xmm3
-; KNL-NEXT: vpinsrb $3, 504(%rbp), %xmm3, %xmm3
-; KNL-NEXT: vpinsrb $4, 512(%rbp), %xmm3, %xmm3
-; KNL-NEXT: vpinsrb $5, 520(%rbp), %xmm3, %xmm3
-; KNL-NEXT: vpinsrb $6, 528(%rbp), %xmm3, %xmm3
-; KNL-NEXT: vpinsrb $7, 536(%rbp), %xmm3, %xmm3
-; KNL-NEXT: vpinsrb $8, 544(%rbp), %xmm3, %xmm3
-; KNL-NEXT: vpinsrb $9, 552(%rbp), %xmm3, %xmm3
-; KNL-NEXT: vpinsrb $10, 560(%rbp), %xmm3, %xmm3
-; KNL-NEXT: vpinsrb $11, 568(%rbp), %xmm3, %xmm3
-; KNL-NEXT: vpinsrb $12, 576(%rbp), %xmm3, %xmm3
-; KNL-NEXT: vpinsrb $13, 584(%rbp), %xmm3, %xmm3
-; KNL-NEXT: vpinsrb $14, 592(%rbp), %xmm3, %xmm3
-; KNL-NEXT: vpinsrb $15, 600(%rbp), %xmm3, %xmm3
-; KNL-NEXT: vmovd {{.*#+}} xmm4 = mem[0],zero,zero,zero
-; KNL-NEXT: vpinsrb $1, 616(%rbp), %xmm4, %xmm4
-; KNL-NEXT: vpinsrb $2, 624(%rbp), %xmm4, %xmm4
-; KNL-NEXT: vpinsrb $3, 632(%rbp), %xmm4, %xmm4
-; KNL-NEXT: vpinsrb $4, 640(%rbp), %xmm4, %xmm4
-; KNL-NEXT: vpinsrb $5, 648(%rbp), %xmm4, %xmm4
-; KNL-NEXT: vpinsrb $6, 656(%rbp), %xmm4, %xmm4
-; KNL-NEXT: vpinsrb $7, 664(%rbp), %xmm4, %xmm4
-; KNL-NEXT: vpinsrb $8, 672(%rbp), %xmm4, %xmm4
-; KNL-NEXT: vpinsrb $9, 680(%rbp), %xmm4, %xmm4
-; KNL-NEXT: vpinsrb $10, 688(%rbp), %xmm4, %xmm4
-; KNL-NEXT: vpinsrb $11, 696(%rbp), %xmm4, %xmm4
-; KNL-NEXT: vpinsrb $12, 704(%rbp), %xmm4, %xmm4
-; KNL-NEXT: vpinsrb $13, 712(%rbp), %xmm4, %xmm4
-; KNL-NEXT: vpinsrb $14, 720(%rbp), %xmm4, %xmm4
-; KNL-NEXT: vpinsrb $15, 728(%rbp), %xmm4, %xmm4
-; KNL-NEXT: vinserti128 $1, %xmm4, %ymm3, %ymm3
-; KNL-NEXT: vpcmpeqb %ymm1, %ymm3, %ymm1
+; KNL-NEXT: vpcmpeqb %ymm0, %ymm2, %ymm2
+; KNL-NEXT: vinserti64x4 $1, %ymm1, %zmm2, %zmm1
; KNL-NEXT: vpternlogq $15, %zmm1, %zmm1, %zmm1
+; KNL-NEXT: vmovd {{.*#+}} xmm2 = mem[0],zero,zero,zero
+; KNL-NEXT: vpinsrb $1, 488(%rbp), %xmm2, %xmm2
+; KNL-NEXT: vpinsrb $2, 496(%rbp), %xmm2, %xmm2
+; KNL-NEXT: vpinsrb $3, 504(%rbp), %xmm2, %xmm2
+; KNL-NEXT: vpinsrb $4, 512(%rbp), %xmm2, %xmm2
+; KNL-NEXT: vpinsrb $5, 520(%rbp), %xmm2, %xmm2
+; KNL-NEXT: vpinsrb $6, 528(%rbp), %xmm2, %xmm2
+; KNL-NEXT: vpinsrb $7, 536(%rbp), %xmm2, %xmm2
+; KNL-NEXT: vpinsrb $8, 544(%rbp), %xmm2, %xmm2
+; KNL-NEXT: vpinsrb $9, 552(%rbp), %xmm2, %xmm2
+; KNL-NEXT: vpinsrb $10, 560(%rbp), %xmm2, %xmm2
+; KNL-NEXT: vpinsrb $11, 568(%rbp), %xmm2, %xmm2
+; KNL-NEXT: vpinsrb $12, 576(%rbp), %xmm2, %xmm2
+; KNL-NEXT: vpinsrb $13, 584(%rbp), %xmm2, %xmm2
+; KNL-NEXT: vpinsrb $14, 592(%rbp), %xmm2, %xmm2
+; KNL-NEXT: vpinsrb $15, 600(%rbp), %xmm2, %xmm2
+; KNL-NEXT: vmovd {{.*#+}} xmm3 = mem[0],zero,zero,zero
+; KNL-NEXT: vpinsrb $1, 616(%rbp), %xmm3, %xmm3
+; KNL-NEXT: vpinsrb $2, 624(%rbp), %xmm3, %xmm3
+; KNL-NEXT: vpinsrb $3, 632(%rbp), %xmm3, %xmm3
+; KNL-NEXT: vpinsrb $4, 640(%rbp), %xmm3, %xmm3
+; KNL-NEXT: vpinsrb $5, 648(%rbp), %xmm3, %xmm3
+; KNL-NEXT: vpinsrb $6, 656(%rbp), %xmm3, %xmm3
+; KNL-NEXT: vpinsrb $7, 664(%rbp), %xmm3, %xmm3
+; KNL-NEXT: vpinsrb $8, 672(%rbp), %xmm3, %xmm3
+; KNL-NEXT: vpinsrb $9, 680(%rbp), %xmm3, %xmm3
+; KNL-NEXT: vpinsrb $10, 688(%rbp), %xmm3, %xmm3
+; KNL-NEXT: vpinsrb $11, 696(%rbp), %xmm3, %xmm3
+; KNL-NEXT: vpinsrb $12, 704(%rbp), %xmm3, %xmm3
+; KNL-NEXT: vpinsrb $13, 712(%rbp), %xmm3, %xmm3
+; KNL-NEXT: vpinsrb $14, 720(%rbp), %xmm3, %xmm3
+; KNL-NEXT: vpinsrb $15, 728(%rbp), %xmm3, %xmm3
+; KNL-NEXT: vinserti128 $1, %xmm3, %ymm2, %ymm2
+; KNL-NEXT: vpcmpeqb %ymm0, %ymm2, %ymm0
+; KNL-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0
; KNL-NEXT: cmpb $0, 736(%rbp)
-; KNL-NEXT: vmovdqa %ymm1, {{[0-9]+}}(%rsp)
-; KNL-NEXT: vmovdqa %ymm2, {{[0-9]+}}(%rsp)
-; KNL-NEXT: vmovdqa %ymm0, (%rsp)
+; KNL-NEXT: vmovdqa %ymm0, {{[0-9]+}}(%rsp)
+; KNL-NEXT: vmovdqa64 %zmm1, (%rsp)
; KNL-NEXT: setne (%rsp,%rax)
; KNL-NEXT: vpmovsxbd (%rsp), %zmm0
; KNL-NEXT: vpslld $31, %zmm0, %zmm0
@@ -2079,23 +2077,21 @@ define i128 @test_insertelement_variable_v128i1(<128 x i8> %a, i8 %b, i32 %index
; KNL-NEXT: andq $-64, %rsp
; KNL-NEXT: subq $192, %rsp
; KNL-NEXT: ## kill: def $esi killed $esi def $rsi
-; KNL-NEXT: vpxor %xmm2, %xmm2, %xmm2
-; KNL-NEXT: vpcmpeqb %ymm2, %ymm0, %ymm3
-; KNL-NEXT: vpternlogq $15, %zmm3, %zmm3, %zmm3
-; KNL-NEXT: vextracti64x4 $1, %zmm0, %ymm0
-; KNL-NEXT: vpcmpeqb %ymm2, %ymm0, %ymm0
+; KNL-NEXT: vextracti64x4 $1, %zmm0, %ymm2
+; KNL-NEXT: vpxor %xmm3, %xmm3, %xmm3
+; KNL-NEXT: vpcmpeqb %ymm3, %ymm2, %ymm2
+; KNL-NEXT: vpcmpeqb %ymm3, %ymm0, %ymm0
+; KNL-NEXT: vinserti64x4 $1, %ymm2, %zmm0, %zmm0
; KNL-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0
-; KNL-NEXT: vpcmpeqb %ymm2, %ymm1, %ymm4
-; KNL-NEXT: vpternlogq $15, %zmm4, %zmm4, %zmm4
-; KNL-NEXT: vextracti64x4 $1, %zmm1, %ymm1
-; KNL-NEXT: vpcmpeqb %ymm2, %ymm1, %ymm1
+; KNL-NEXT: vextracti64x4 $1, %zmm1, %ymm2
+; KNL-NEXT: vpcmpeqb %ymm3, %ymm2, %ymm2
+; KNL-NEXT: vpcmpeqb %ymm3, %ymm1, %ymm1
+; KNL-NEXT: vinserti64x4 $1, %ymm2, %zmm1, %zmm1
; KNL-NEXT: vpternlogq $15, %zmm1, %zmm1, %zmm1
; KNL-NEXT: andl $127, %esi
; KNL-NEXT: testb %dil, %dil
-; KNL-NEXT: vmovdqa %ymm1, {{[0-9]+}}(%rsp)
-; KNL-NEXT: vmovdqa %ymm4, {{[0-9]+}}(%rsp)
-; KNL-NEXT: vmovdqa %ymm0, {{[0-9]+}}(%rsp)
-; KNL-NEXT: vmovdqa %ymm3, (%rsp)
+; KNL-NEXT: vmovdqa64 %zmm1, {{[0-9]+}}(%rsp)
+; KNL-NEXT: vmovdqa64 %zmm0, (%rsp)
; KNL-NEXT: setne (%rsp,%rsi)
; KNL-NEXT: vpmovsxbd (%rsp), %zmm0
; KNL-NEXT: vpslld $31, %zmm0, %zmm0
diff --git a/llvm/test/CodeGen/X86/vector-bitreverse.ll b/llvm/test/CodeGen/X86/vector-bitreverse.ll
index 5d9cd1643aec..f99fc38c6625 100644
--- a/llvm/test/CodeGen/X86/vector-bitreverse.ll
+++ b/llvm/test/CodeGen/X86/vector-bitreverse.ll
@@ -1314,18 +1314,18 @@ define <64 x i8> @test_bitreverse_v64i8(<64 x i8> %a) nounwind {
; AVX512F-NEXT: vpand %ymm2, %ymm1, %ymm3
; AVX512F-NEXT: vmovdqa {{.*#+}} ymm4 = [0,128,64,192,32,160,96,224,16,144,80,208,48,176,112,240,0,128,64,192,32,160,96,224,16,144,80,208,48,176,112,240]
; AVX512F-NEXT: vpshufb %ymm3, %ymm4, %ymm3
+; AVX512F-NEXT: vpand %ymm2, %ymm0, %ymm5
+; AVX512F-NEXT: vpshufb %ymm5, %ymm4, %ymm4
+; AVX512F-NEXT: vinserti64x4 $1, %ymm3, %zmm4, %zmm3
; AVX512F-NEXT: vpsrlw $4, %ymm1, %ymm1
; AVX512F-NEXT: vpand %ymm2, %ymm1, %ymm1
-; AVX512F-NEXT: vmovdqa {{.*#+}} ymm5 = [0,8,4,12,2,10,6,14,1,9,5,13,3,11,7,15,0,8,4,12,2,10,6,14,1,9,5,13,3,11,7,15]
-; AVX512F-NEXT: vpshufb %ymm1, %ymm5, %ymm1
-; AVX512F-NEXT: vpor %ymm1, %ymm3, %ymm1
-; AVX512F-NEXT: vpand %ymm2, %ymm0, %ymm3
-; AVX512F-NEXT: vpshufb %ymm3, %ymm4, %ymm3
+; AVX512F-NEXT: vmovdqa {{.*#+}} ymm4 = [0,8,4,12,2,10,6,14,1,9,5,13,3,11,7,15,0,8,4,12,2,10,6,14,1,9,5,13,3,11,7,15]
+; AVX512F-NEXT: vpshufb %ymm1, %ymm4, %ymm1
; AVX512F-NEXT: vpsrlw $4, %ymm0, %ymm0
; AVX512F-NEXT: vpand %ymm2, %ymm0, %ymm0
-; AVX512F-NEXT: vpshufb %ymm0, %ymm5, %ymm0
-; AVX512F-NEXT: vpor %ymm0, %ymm3, %ymm0
+; AVX512F-NEXT: vpshufb %ymm0, %ymm4, %ymm0
; AVX512F-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
+; AVX512F-NEXT: vporq %zmm0, %zmm3, %zmm0
; AVX512F-NEXT: retq
;
; AVX512BW-LABEL: test_bitreverse_v64i8:
@@ -1591,19 +1591,19 @@ define <32 x i16> @test_bitreverse_v32i16(<32 x i16> %a) nounwind {
; AVX512F-NEXT: vpand %ymm3, %ymm1, %ymm4
; AVX512F-NEXT: vmovdqa {{.*#+}} ymm5 = [0,128,64,192,32,160,96,224,16,144,80,208,48,176,112,240,0,128,64,192,32,160,96,224,16,144,80,208,48,176,112,240]
; AVX512F-NEXT: vpshufb %ymm4, %ymm5, %ymm4
-; AVX512F-NEXT: vpsrlw $4, %ymm1, %ymm1
-; AVX512F-NEXT: vpand %ymm3, %ymm1, %ymm1
-; AVX512F-NEXT: vmovdqa {{.*#+}} ymm6 = [0,8,4,12,2,10,6,14,1,9,5,13,3,11,7,15,0,8,4,12,2,10,6,14,1,9,5,13,3,11,7,15]
-; AVX512F-NEXT: vpshufb %ymm1, %ymm6, %ymm1
-; AVX512F-NEXT: vpor %ymm1, %ymm4, %ymm1
; AVX512F-NEXT: vpshufb %ymm2, %ymm0, %ymm0
; AVX512F-NEXT: vpand %ymm3, %ymm0, %ymm2
; AVX512F-NEXT: vpshufb %ymm2, %ymm5, %ymm2
+; AVX512F-NEXT: vinserti64x4 $1, %ymm4, %zmm2, %zmm2
+; AVX512F-NEXT: vpsrlw $4, %ymm1, %ymm1
+; AVX512F-NEXT: vpand %ymm3, %ymm1, %ymm1
+; AVX512F-NEXT: vmovdqa {{.*#+}} ymm4 = [0,8,4,12,2,10,6,14,1,9,5,13,3,11,7,15,0,8,4,12,2,10,6,14,1,9,5,13,3,11,7,15]
+; AVX512F-NEXT: vpshufb %ymm1, %ymm4, %ymm1
; AVX512F-NEXT: vpsrlw $4, %ymm0, %ymm0
; AVX512F-NEXT: vpand %ymm3, %ymm0, %ymm0
-; AVX512F-NEXT: vpshufb %ymm0, %ymm6, %ymm0
-; AVX512F-NEXT: vpor %ymm0, %ymm2, %ymm0
+; AVX512F-NEXT: vpshufb %ymm0, %ymm4, %ymm0
; AVX512F-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
+; AVX512F-NEXT: vporq %zmm0, %zmm2, %zmm0
; AVX512F-NEXT: retq
;
; AVX512BW-LABEL: test_bitreverse_v32i16:
@@ -1887,19 +1887,19 @@ define <16 x i32> @test_bitreverse_v16i32(<16 x i32> %a) nounwind {
; AVX512F-NEXT: vpand %ymm3, %ymm1, %ymm4
; AVX512F-NEXT: vmovdqa {{.*#+}} ymm5 = [0,128,64,192,32,160,96,224,16,144,80,208,48,176,112,240,0,128,64,192,32,160,96,224,16,144,80,208,48,176,112,240]
; AVX512F-NEXT: vpshufb %ymm4, %ymm5, %ymm4
-; AVX512F-NEXT: vpsrlw $4, %ymm1, %ymm1
-; AVX512F-NEXT: vpand %ymm3, %ymm1, %ymm1
-; AVX512F-NEXT: vmovdqa {{.*#+}} ymm6 = [0,8,4,12,2,10,6,14,1,9,5,13,3,11,7,15,0,8,4,12,2,10,6,14,1,9,5,13,3,11,7,15]
-; AVX512F-NEXT: vpshufb %ymm1, %ymm6, %ymm1
-; AVX512F-NEXT: vpor %ymm1, %ymm4, %ymm1
; AVX512F-NEXT: vpshufb %ymm2, %ymm0, %ymm0
; AVX512F-NEXT: vpand %ymm3, %ymm0, %ymm2
; AVX512F-NEXT: vpshufb %ymm2, %ymm5, %ymm2
+; AVX512F-NEXT: vinserti64x4 $1, %ymm4, %zmm2, %zmm2
+; AVX512F-NEXT: vpsrlw $4, %ymm1, %ymm1
+; AVX512F-NEXT: vpand %ymm3, %ymm1, %ymm1
+; AVX512F-NEXT: vmovdqa {{.*#+}} ymm4 = [0,8,4,12,2,10,6,14,1,9,5,13,3,11,7,15,0,8,4,12,2,10,6,14,1,9,5,13,3,11,7,15]
+; AVX512F-NEXT: vpshufb %ymm1, %ymm4, %ymm1
; AVX512F-NEXT: vpsrlw $4, %ymm0, %ymm0
; AVX512F-NEXT: vpand %ymm3, %ymm0, %ymm0
-; AVX512F-NEXT: vpshufb %ymm0, %ymm6, %ymm0
-; AVX512F-NEXT: vpor %ymm0, %ymm2, %ymm0
+; AVX512F-NEXT: vpshufb %ymm0, %ymm4, %ymm0
; AVX512F-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
+; AVX512F-NEXT: vporq %zmm0, %zmm2, %zmm0
; AVX512F-NEXT: retq
;
; AVX512BW-LABEL: test_bitreverse_v16i32:
@@ -2191,19 +2191,19 @@ define <8 x i64> @test_bitreverse_v8i64(<8 x i64> %a) nounwind {
; AVX512F-NEXT: vpand %ymm3, %ymm1, %ymm4
; AVX512F-NEXT: vmovdqa {{.*#+}} ymm5 = [0,128,64,192,32,160,96,224,16,144,80,208,48,176,112,240,0,128,64,192,32,160,96,224,16,144,80,208,48,176,112,240]
; AVX512F-NEXT: vpshufb %ymm4, %ymm5, %ymm4
-; AVX512F-NEXT: vpsrlw $4, %ymm1, %ymm1
-; AVX512F-NEXT: vpand %ymm3, %ymm1, %ymm1
-; AVX512F-NEXT: vmovdqa {{.*#+}} ymm6 = [0,8,4,12,2,10,6,14,1,9,5,13,3,11,7,15,0,8,4,12,2,10,6,14,1,9,5,13,3,11,7,15]
-; AVX512F-NEXT: vpshufb %ymm1, %ymm6, %ymm1
-; AVX512F-NEXT: vpor %ymm1, %ymm4, %ymm1
; AVX512F-NEXT: vpshufb %ymm2, %ymm0, %ymm0
; AVX512F-NEXT: vpand %ymm3, %ymm0, %ymm2
; AVX512F-NEXT: vpshufb %ymm2, %ymm5, %ymm2
+; AVX512F-NEXT: vinserti64x4 $1, %ymm4, %zmm2, %zmm2
+; AVX512F-NEXT: vpsrlw $4, %ymm1, %ymm1
+; AVX512F-NEXT: vpand %ymm3, %ymm1, %ymm1
+; AVX512F-NEXT: vmovdqa {{.*#+}} ymm4 = [0,8,4,12,2,10,6,14,1,9,5,13,3,11,7,15,0,8,4,12,2,10,6,14,1,9,5,13,3,11,7,15]
+; AVX512F-NEXT: vpshufb %ymm1, %ymm4, %ymm1
; AVX512F-NEXT: vpsrlw $4, %ymm0, %ymm0
; AVX512F-NEXT: vpand %ymm3, %ymm0, %ymm0
-; AVX512F-NEXT: vpshufb %ymm0, %ymm6, %ymm0
-; AVX512F-NEXT: vpor %ymm0, %ymm2, %ymm0
+; AVX512F-NEXT: vpshufb %ymm0, %ymm4, %ymm0
; AVX512F-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
+; AVX512F-NEXT: vporq %zmm0, %zmm2, %zmm0
; AVX512F-NEXT: retq
;
; AVX512BW-LABEL: test_bitreverse_v8i64:
diff --git a/llvm/test/CodeGen/X86/vector-fshl-512.ll b/llvm/test/CodeGen/X86/vector-fshl-512.ll
index 088a590a2e07..60406c45ba89 100644
--- a/llvm/test/CodeGen/X86/vector-fshl-512.ll
+++ b/llvm/test/CodeGen/X86/vector-fshl-512.ll
@@ -821,31 +821,30 @@ define <64 x i8> @splatvar_funnnel_v64i8(<64 x i8> %x, <64 x i8> %y, <64 x i8> %
; AVX512F-NEXT: vpand {{.*}}(%rip), %xmm2, %xmm2
; AVX512F-NEXT: vpmovzxbq {{.*#+}} xmm4 = xmm2[0],zero,zero,zero,zero,zero,zero,zero,xmm2[1],zero,zero,zero,zero,zero,zero,zero
; AVX512F-NEXT: vpsllw %xmm4, %ymm3, %ymm3
+; AVX512F-NEXT: vpsllw %xmm4, %ymm0, %ymm5
+; AVX512F-NEXT: vinserti64x4 $1, %ymm3, %zmm5, %zmm3
; AVX512F-NEXT: vpcmpeqd %xmm5, %xmm5, %xmm5
-; AVX512F-NEXT: vpsllw %xmm4, %xmm5, %xmm6
-; AVX512F-NEXT: vpbroadcastb %xmm6, %ymm6
-; AVX512F-NEXT: vpand %ymm6, %ymm3, %ymm3
-; AVX512F-NEXT: vpsllw %xmm4, %ymm0, %ymm4
-; AVX512F-NEXT: vpand %ymm6, %ymm4, %ymm4
-; AVX512F-NEXT: vinserti64x4 $1, %ymm3, %zmm4, %zmm3
+; AVX512F-NEXT: vpsllw %xmm4, %xmm5, %xmm4
+; AVX512F-NEXT: vpbroadcastb %xmm4, %ymm4
+; AVX512F-NEXT: vinserti64x4 $1, %ymm4, %zmm4, %zmm4
+; AVX512F-NEXT: vpandq %zmm4, %zmm3, %zmm3
; AVX512F-NEXT: vpbroadcastb %xmm2, %ymm2
; AVX512F-NEXT: vmovdqa {{.*#+}} xmm4 = [8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8]
; AVX512F-NEXT: vpsubb %xmm2, %xmm4, %xmm4
; AVX512F-NEXT: vpmovzxbq {{.*#+}} xmm4 = xmm4[0],zero,zero,zero,zero,zero,zero,zero,xmm4[1],zero,zero,zero,zero,zero,zero,zero
; AVX512F-NEXT: vextracti64x4 $1, %zmm1, %ymm6
; AVX512F-NEXT: vpsrlw %xmm4, %ymm6, %ymm6
-; AVX512F-NEXT: vpsrlw %xmm4, %xmm5, %xmm5
-; AVX512F-NEXT: vpsrlw $8, %xmm5, %xmm5
-; AVX512F-NEXT: vpbroadcastb %xmm5, %ymm5
-; AVX512F-NEXT: vpand %ymm5, %ymm6, %ymm6
; AVX512F-NEXT: vpsrlw %xmm4, %ymm1, %ymm1
-; AVX512F-NEXT: vpand %ymm5, %ymm1, %ymm1
; AVX512F-NEXT: vinserti64x4 $1, %ymm6, %zmm1, %zmm1
-; AVX512F-NEXT: vporq %zmm1, %zmm3, %zmm1
-; AVX512F-NEXT: vpxor %xmm3, %xmm3, %xmm3
-; AVX512F-NEXT: vpcmpeqb %ymm3, %ymm2, %ymm2
-; AVX512F-NEXT: vinserti64x4 $1, %ymm2, %zmm2, %zmm2
-; AVX512F-NEXT: vpternlogq $226, %zmm1, %zmm2, %zmm0
+; AVX512F-NEXT: vpsrlw %xmm4, %xmm5, %xmm4
+; AVX512F-NEXT: vpsrlw $8, %xmm4, %xmm4
+; AVX512F-NEXT: vpbroadcastb %xmm4, %ymm4
+; AVX512F-NEXT: vinserti64x4 $1, %ymm4, %zmm4, %zmm4
+; AVX512F-NEXT: vpternlogq $236, %zmm1, %zmm3, %zmm4
+; AVX512F-NEXT: vpxor %xmm1, %xmm1, %xmm1
+; AVX512F-NEXT: vpcmpeqb %ymm1, %ymm2, %ymm1
+; AVX512F-NEXT: vinserti64x4 $1, %ymm1, %zmm1, %zmm1
+; AVX512F-NEXT: vpternlogq $226, %zmm4, %zmm1, %zmm0
; AVX512F-NEXT: retq
;
; AVX512VL-LABEL: splatvar_funnnel_v64i8:
@@ -854,31 +853,30 @@ define <64 x i8> @splatvar_funnnel_v64i8(<64 x i8> %x, <64 x i8> %y, <64 x i8> %
; AVX512VL-NEXT: vpand {{.*}}(%rip), %xmm2, %xmm2
; AVX512VL-NEXT: vpmovzxbq {{.*#+}} xmm4 = xmm2[0],zero,zero,zero,zero,zero,zero,zero,xmm2[1],zero,zero,zero,zero,zero,zero,zero
; AVX512VL-NEXT: vpsllw %xmm4, %ymm3, %ymm3
+; AVX512VL-NEXT: vpsllw %xmm4, %ymm0, %ymm5
+; AVX512VL-NEXT: vinserti64x4 $1, %ymm3, %zmm5, %zmm3
; AVX512VL-NEXT: vpcmpeqd %xmm5, %xmm5, %xmm5
-; AVX512VL-NEXT: vpsllw %xmm4, %xmm5, %xmm6
-; AVX512VL-NEXT: vpbroadcastb %xmm6, %ymm6
-; AVX512VL-NEXT: vpand %ymm6, %ymm3, %ymm3
-; AVX512VL-NEXT: vpsllw %xmm4, %ymm0, %ymm4
-; AVX512VL-NEXT: vpand %ymm6, %ymm4, %ymm4
-; AVX512VL-NEXT: vinserti64x4 $1, %ymm3, %zmm4, %zmm3
+; AVX512VL-NEXT: vpsllw %xmm4, %xmm5, %xmm4
+; AVX512VL-NEXT: vpbroadcastb %xmm4, %ymm4
+; AVX512VL-NEXT: vinserti64x4 $1, %ymm4, %zmm4, %zmm4
+; AVX512VL-NEXT: vpandq %zmm4, %zmm3, %zmm3
; AVX512VL-NEXT: vpbroadcastb %xmm2, %ymm2
; AVX512VL-NEXT: vmovdqa {{.*#+}} xmm4 = [8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8]
; AVX512VL-NEXT: vpsubb %xmm2, %xmm4, %xmm4
; AVX512VL-NEXT: vpmovzxbq {{.*#+}} xmm4 = xmm4[0],zero,zero,zero,zero,zero,zero,zero,xmm4[1],zero,zero,zero,zero,zero,zero,zero
; AVX512VL-NEXT: vextracti64x4 $1, %zmm1, %ymm6
; AVX512VL-NEXT: vpsrlw %xmm4, %ymm6, %ymm6
-; AVX512VL-NEXT: vpsrlw %xmm4, %xmm5, %xmm5
-; AVX512VL-NEXT: vpsrlw $8, %xmm5, %xmm5
-; AVX512VL-NEXT: vpbroadcastb %xmm5, %ymm5
-; AVX512VL-NEXT: vpand %ymm5, %ymm6, %ymm6
; AVX512VL-NEXT: vpsrlw %xmm4, %ymm1, %ymm1
-; AVX512VL-NEXT: vpand %ymm5, %ymm1, %ymm1
; AVX512VL-NEXT: vinserti64x4 $1, %ymm6, %zmm1, %zmm1
-; AVX512VL-NEXT: vporq %zmm1, %zmm3, %zmm1
-; AVX512VL-NEXT: vpxor %xmm3, %xmm3, %xmm3
-; AVX512VL-NEXT: vpcmpeqb %ymm3, %ymm2, %ymm2
-; AVX512VL-NEXT: vinserti64x4 $1, %ymm2, %zmm2, %zmm2
-; AVX512VL-NEXT: vpternlogq $226, %zmm1, %zmm2, %zmm0
+; AVX512VL-NEXT: vpsrlw %xmm4, %xmm5, %xmm4
+; AVX512VL-NEXT: vpsrlw $8, %xmm4, %xmm4
+; AVX512VL-NEXT: vpbroadcastb %xmm4, %ymm4
+; AVX512VL-NEXT: vinserti64x4 $1, %ymm4, %zmm4, %zmm4
+; AVX512VL-NEXT: vpternlogq $236, %zmm1, %zmm3, %zmm4
+; AVX512VL-NEXT: vpxor %xmm1, %xmm1, %xmm1
+; AVX512VL-NEXT: vpcmpeqb %ymm1, %ymm2, %ymm1
+; AVX512VL-NEXT: vinserti64x4 $1, %ymm1, %zmm1, %zmm1
+; AVX512VL-NEXT: vpternlogq $226, %zmm4, %zmm1, %zmm0
; AVX512VL-NEXT: retq
;
; AVX512BW-LABEL: splatvar_funnnel_v64i8:
@@ -1510,40 +1508,28 @@ define <32 x i16> @splatconstant_funnnel_v32i16(<32 x i16> %x, <32 x i16> %y) no
define <64 x i8> @splatconstant_funnnel_v64i8(<64 x i8> %x, <64 x i8> %y) nounwind {
; AVX512F-LABEL: splatconstant_funnnel_v64i8:
; AVX512F: # %bb.0:
-; AVX512F-NEXT: vextracti64x4 $1, %zmm1, %ymm2
-; AVX512F-NEXT: vpsrlw $4, %ymm2, %ymm2
-; AVX512F-NEXT: vmovdqa {{.*#+}} ymm3 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
-; AVX512F-NEXT: vpand %ymm3, %ymm2, %ymm2
-; AVX512F-NEXT: vpsrlw $4, %ymm1, %ymm1
-; AVX512F-NEXT: vpand %ymm3, %ymm1, %ymm1
-; AVX512F-NEXT: vinserti64x4 $1, %ymm2, %zmm1, %zmm1
-; AVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm2
-; AVX512F-NEXT: vpsllw $4, %ymm2, %ymm2
-; AVX512F-NEXT: vmovdqa {{.*#+}} ymm3 = [240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240]
-; AVX512F-NEXT: vpand %ymm3, %ymm2, %ymm2
+; AVX512F-NEXT: vpsllw $4, %ymm0, %ymm2
+; AVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm0
; AVX512F-NEXT: vpsllw $4, %ymm0, %ymm0
-; AVX512F-NEXT: vpand %ymm3, %ymm0, %ymm0
-; AVX512F-NEXT: vinserti64x4 $1, %ymm2, %zmm0, %zmm0
-; AVX512F-NEXT: vporq %zmm1, %zmm0, %zmm0
+; AVX512F-NEXT: vinserti64x4 $1, %ymm0, %zmm2, %zmm2
+; AVX512F-NEXT: vpsrlw $4, %ymm1, %ymm0
+; AVX512F-NEXT: vextracti64x4 $1, %zmm1, %ymm1
+; AVX512F-NEXT: vpsrlw $4, %ymm1, %ymm1
+; AVX512F-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
+; AVX512F-NEXT: vpternlogq $216, {{.*}}(%rip), %zmm2, %zmm0
; AVX512F-NEXT: retq
;
; AVX512VL-LABEL: splatconstant_funnnel_v64i8:
; AVX512VL: # %bb.0:
-; AVX512VL-NEXT: vextracti64x4 $1, %zmm1, %ymm2
-; AVX512VL-NEXT: vpsrlw $4, %ymm2, %ymm2
-; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm3 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
-; AVX512VL-NEXT: vpand %ymm3, %ymm2, %ymm2
-; AVX512VL-NEXT: vpsrlw $4, %ymm1, %ymm1
-; AVX512VL-NEXT: vpand %ymm3, %ymm1, %ymm1
-; AVX512VL-NEXT: vinserti64x4 $1, %ymm2, %zmm1, %zmm1
-; AVX512VL-NEXT: vextracti64x4 $1, %zmm0, %ymm2
-; AVX512VL-NEXT: vpsllw $4, %ymm2, %ymm2
-; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm3 = [240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240]
-; AVX512VL-NEXT: vpand %ymm3, %ymm2, %ymm2
+; AVX512VL-NEXT: vpsllw $4, %ymm0, %ymm2
+; AVX512VL-NEXT: vextracti64x4 $1, %zmm0, %ymm0
; AVX512VL-NEXT: vpsllw $4, %ymm0, %ymm0
-; AVX512VL-NEXT: vpand %ymm3, %ymm0, %ymm0
-; AVX512VL-NEXT: vinserti64x4 $1, %ymm2, %zmm0, %zmm0
-; AVX512VL-NEXT: vporq %zmm1, %zmm0, %zmm0
+; AVX512VL-NEXT: vinserti64x4 $1, %ymm0, %zmm2, %zmm2
+; AVX512VL-NEXT: vpsrlw $4, %ymm1, %ymm0
+; AVX512VL-NEXT: vextracti64x4 $1, %zmm1, %ymm1
+; AVX512VL-NEXT: vpsrlw $4, %ymm1, %ymm1
+; AVX512VL-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
+; AVX512VL-NEXT: vpternlogq $216, {{.*}}(%rip), %zmm2, %zmm0
; AVX512VL-NEXT: retq
;
; AVX512BW-LABEL: splatconstant_funnnel_v64i8:
diff --git a/llvm/test/CodeGen/X86/vector-fshl-rot-512.ll b/llvm/test/CodeGen/X86/vector-fshl-rot-512.ll
index 2481b8ebfe25..6671f3ec4c0f 100644
--- a/llvm/test/CodeGen/X86/vector-fshl-rot-512.ll
+++ b/llvm/test/CodeGen/X86/vector-fshl-rot-512.ll
@@ -483,14 +483,13 @@ define <64 x i8> @splatvar_funnnel_v64i8(<64 x i8> %x, <64 x i8> %amt) nounwind
; AVX512F-NEXT: vpand {{.*}}(%rip), %xmm2, %xmm2
; AVX512F-NEXT: vpmovzxbq {{.*#+}} xmm2 = xmm2[0],zero,zero,zero,zero,zero,zero,zero,xmm2[1],zero,zero,zero,zero,zero,zero,zero
; AVX512F-NEXT: vpsllw %xmm2, %ymm1, %ymm1
-; AVX512F-NEXT: vpcmpeqd %xmm4, %xmm4, %xmm4
-; AVX512F-NEXT: vpsllw %xmm2, %xmm4, %xmm4
-; AVX512F-NEXT: vpbroadcastb %xmm4, %ymm4
-; AVX512F-NEXT: vpand %ymm4, %ymm1, %ymm1
; AVX512F-NEXT: vpsllw %xmm2, %ymm0, %ymm0
-; AVX512F-NEXT: vpand %ymm4, %ymm0, %ymm0
-; AVX512F-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
-; AVX512F-NEXT: vporq %zmm3, %zmm0, %zmm0
+; AVX512F-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm1
+; AVX512F-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
+; AVX512F-NEXT: vpsllw %xmm2, %xmm0, %xmm0
+; AVX512F-NEXT: vpbroadcastb %xmm0, %ymm0
+; AVX512F-NEXT: vinserti64x4 $1, %ymm0, %zmm0, %zmm0
+; AVX512F-NEXT: vpternlogq $236, %zmm1, %zmm3, %zmm0
; AVX512F-NEXT: retq
;
; AVX512VL-LABEL: splatvar_funnnel_v64i8:
@@ -529,14 +528,13 @@ define <64 x i8> @splatvar_funnnel_v64i8(<64 x i8> %x, <64 x i8> %amt) nounwind
; AVX512VL-NEXT: vpand {{.*}}(%rip), %xmm2, %xmm2
; AVX512VL-NEXT: vpmovzxbq {{.*#+}} xmm2 = xmm2[0],zero,zero,zero,zero,zero,zero,zero,xmm2[1],zero,zero,zero,zero,zero,zero,zero
; AVX512VL-NEXT: vpsllw %xmm2, %ymm1, %ymm1
-; AVX512VL-NEXT: vpcmpeqd %xmm4, %xmm4, %xmm4
-; AVX512VL-NEXT: vpsllw %xmm2, %xmm4, %xmm4
-; AVX512VL-NEXT: vpbroadcastb %xmm4, %ymm4
-; AVX512VL-NEXT: vpand %ymm4, %ymm1, %ymm1
; AVX512VL-NEXT: vpsllw %xmm2, %ymm0, %ymm0
-; AVX512VL-NEXT: vpand %ymm4, %ymm0, %ymm0
-; AVX512VL-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
-; AVX512VL-NEXT: vporq %zmm3, %zmm0, %zmm0
+; AVX512VL-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm1
+; AVX512VL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
+; AVX512VL-NEXT: vpsllw %xmm2, %xmm0, %xmm0
+; AVX512VL-NEXT: vpbroadcastb %xmm0, %ymm0
+; AVX512VL-NEXT: vinserti64x4 $1, %ymm0, %zmm0, %zmm0
+; AVX512VL-NEXT: vpternlogq $236, %zmm1, %zmm3, %zmm0
; AVX512VL-NEXT: retq
;
; AVX512BW-LABEL: splatvar_funnnel_v64i8:
@@ -886,38 +884,26 @@ define <32 x i16> @splatconstant_funnnel_v32i16(<32 x i16> %x) nounwind {
define <64 x i8> @splatconstant_funnnel_v64i8(<64 x i8> %x) nounwind {
; AVX512F-LABEL: splatconstant_funnnel_v64i8:
; AVX512F: # %bb.0:
-; AVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm1
-; AVX512F-NEXT: vpsrlw $4, %ymm1, %ymm2
-; AVX512F-NEXT: vmovdqa {{.*#+}} ymm3 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
-; AVX512F-NEXT: vpand %ymm3, %ymm2, %ymm2
-; AVX512F-NEXT: vpsrlw $4, %ymm0, %ymm4
-; AVX512F-NEXT: vpand %ymm3, %ymm4, %ymm3
-; AVX512F-NEXT: vinserti64x4 $1, %ymm2, %zmm3, %zmm2
-; AVX512F-NEXT: vpsllw $4, %ymm1, %ymm1
-; AVX512F-NEXT: vmovdqa {{.*#+}} ymm3 = [240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240]
-; AVX512F-NEXT: vpand %ymm3, %ymm1, %ymm1
-; AVX512F-NEXT: vpsllw $4, %ymm0, %ymm0
-; AVX512F-NEXT: vpand %ymm3, %ymm0, %ymm0
-; AVX512F-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
-; AVX512F-NEXT: vporq %zmm2, %zmm0, %zmm0
+; AVX512F-NEXT: vpsllw $4, %ymm0, %ymm1
+; AVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm2
+; AVX512F-NEXT: vpsllw $4, %ymm2, %ymm3
+; AVX512F-NEXT: vinserti64x4 $1, %ymm3, %zmm1, %zmm1
+; AVX512F-NEXT: vpsrlw $4, %ymm0, %ymm0
+; AVX512F-NEXT: vpsrlw $4, %ymm2, %ymm2
+; AVX512F-NEXT: vinserti64x4 $1, %ymm2, %zmm0, %zmm0
+; AVX512F-NEXT: vpternlogq $216, {{.*}}(%rip), %zmm1, %zmm0
; AVX512F-NEXT: retq
;
; AVX512VL-LABEL: splatconstant_funnnel_v64i8:
; AVX512VL: # %bb.0:
-; AVX512VL-NEXT: vextracti64x4 $1, %zmm0, %ymm1
-; AVX512VL-NEXT: vpsrlw $4, %ymm1, %ymm2
-; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm3 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
-; AVX512VL-NEXT: vpand %ymm3, %ymm2, %ymm2
-; AVX512VL-NEXT: vpsrlw $4, %ymm0, %ymm4
-; AVX512VL-NEXT: vpand %ymm3, %ymm4, %ymm3
-; AVX512VL-NEXT: vinserti64x4 $1, %ymm2, %zmm3, %zmm2
-; AVX512VL-NEXT: vpsllw $4, %ymm1, %ymm1
-; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm3 = [240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240]
-; AVX512VL-NEXT: vpand %ymm3, %ymm1, %ymm1
-; AVX512VL-NEXT: vpsllw $4, %ymm0, %ymm0
-; AVX512VL-NEXT: vpand %ymm3, %ymm0, %ymm0
-; AVX512VL-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
-; AVX512VL-NEXT: vporq %zmm2, %zmm0, %zmm0
+; AVX512VL-NEXT: vpsllw $4, %ymm0, %ymm1
+; AVX512VL-NEXT: vextracti64x4 $1, %zmm0, %ymm2
+; AVX512VL-NEXT: vpsllw $4, %ymm2, %ymm3
+; AVX512VL-NEXT: vinserti64x4 $1, %ymm3, %zmm1, %zmm1
+; AVX512VL-NEXT: vpsrlw $4, %ymm0, %ymm0
+; AVX512VL-NEXT: vpsrlw $4, %ymm2, %ymm2
+; AVX512VL-NEXT: vinserti64x4 $1, %ymm2, %zmm0, %zmm0
+; AVX512VL-NEXT: vpternlogq $216, {{.*}}(%rip), %zmm1, %zmm0
; AVX512VL-NEXT: retq
;
; AVX512BW-LABEL: splatconstant_funnnel_v64i8:
diff --git a/llvm/test/CodeGen/X86/vector-fshr-512.ll b/llvm/test/CodeGen/X86/vector-fshr-512.ll
index fa70e840081f..9aa74f165bdd 100644
--- a/llvm/test/CodeGen/X86/vector-fshr-512.ll
+++ b/llvm/test/CodeGen/X86/vector-fshr-512.ll
@@ -805,68 +805,66 @@ define <32 x i16> @splatvar_funnnel_v32i16(<32 x i16> %x, <32 x i16> %y, <32 x i
define <64 x i8> @splatvar_funnnel_v64i8(<64 x i8> %x, <64 x i8> %y, <64 x i8> %amt) nounwind {
; AVX512F-LABEL: splatvar_funnnel_v64i8:
; AVX512F: # %bb.0:
-; AVX512F-NEXT: vextracti64x4 $1, %zmm1, %ymm3
; AVX512F-NEXT: vpand {{.*}}(%rip), %xmm2, %xmm2
-; AVX512F-NEXT: vpmovzxbq {{.*#+}} xmm4 = xmm2[0],zero,zero,zero,zero,zero,zero,zero,xmm2[1],zero,zero,zero,zero,zero,zero,zero
-; AVX512F-NEXT: vpsrlw %xmm4, %ymm3, %ymm3
-; AVX512F-NEXT: vpcmpeqd %xmm5, %xmm5, %xmm5
-; AVX512F-NEXT: vpsrlw %xmm4, %xmm5, %xmm6
-; AVX512F-NEXT: vpsrlw $8, %xmm6, %xmm6
-; AVX512F-NEXT: vpbroadcastb %xmm6, %ymm6
-; AVX512F-NEXT: vpand %ymm6, %ymm3, %ymm3
-; AVX512F-NEXT: vpsrlw %xmm4, %ymm1, %ymm4
-; AVX512F-NEXT: vpand %ymm6, %ymm4, %ymm4
-; AVX512F-NEXT: vinserti64x4 $1, %ymm3, %zmm4, %zmm3
-; AVX512F-NEXT: vpbroadcastb %xmm2, %ymm2
+; AVX512F-NEXT: vpbroadcastb %xmm2, %ymm3
; AVX512F-NEXT: vmovdqa {{.*#+}} xmm4 = [8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8]
-; AVX512F-NEXT: vpsubb %xmm2, %xmm4, %xmm4
+; AVX512F-NEXT: vpsubb %xmm3, %xmm4, %xmm4
; AVX512F-NEXT: vpmovzxbq {{.*#+}} xmm4 = xmm4[0],zero,zero,zero,zero,zero,zero,zero,xmm4[1],zero,zero,zero,zero,zero,zero,zero
-; AVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm6
-; AVX512F-NEXT: vpsllw %xmm4, %ymm6, %ymm6
-; AVX512F-NEXT: vpsllw %xmm4, %xmm5, %xmm5
-; AVX512F-NEXT: vpbroadcastb %xmm5, %ymm5
-; AVX512F-NEXT: vpand %ymm5, %ymm6, %ymm6
+; AVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm5
+; AVX512F-NEXT: vpsllw %xmm4, %ymm5, %ymm5
; AVX512F-NEXT: vpsllw %xmm4, %ymm0, %ymm0
-; AVX512F-NEXT: vpand %ymm5, %ymm0, %ymm0
-; AVX512F-NEXT: vinserti64x4 $1, %ymm6, %zmm0, %zmm0
-; AVX512F-NEXT: vporq %zmm3, %zmm0, %zmm3
+; AVX512F-NEXT: vinserti64x4 $1, %ymm5, %zmm0, %zmm0
+; AVX512F-NEXT: vpcmpeqd %xmm5, %xmm5, %xmm5
+; AVX512F-NEXT: vpsllw %xmm4, %xmm5, %xmm4
+; AVX512F-NEXT: vpbroadcastb %xmm4, %ymm4
+; AVX512F-NEXT: vinserti64x4 $1, %ymm4, %zmm4, %zmm4
+; AVX512F-NEXT: vpandq %zmm4, %zmm0, %zmm0
+; AVX512F-NEXT: vextracti64x4 $1, %zmm1, %ymm4
+; AVX512F-NEXT: vpmovzxbq {{.*#+}} xmm2 = xmm2[0],zero,zero,zero,zero,zero,zero,zero,xmm2[1],zero,zero,zero,zero,zero,zero,zero
+; AVX512F-NEXT: vpsrlw %xmm2, %ymm4, %ymm4
+; AVX512F-NEXT: vpsrlw %xmm2, %ymm1, %ymm6
+; AVX512F-NEXT: vinserti64x4 $1, %ymm4, %zmm6, %zmm4
+; AVX512F-NEXT: vpsrlw %xmm2, %xmm5, %xmm2
+; AVX512F-NEXT: vpsrlw $8, %xmm2, %xmm2
+; AVX512F-NEXT: vpbroadcastb %xmm2, %ymm2
+; AVX512F-NEXT: vinserti64x4 $1, %ymm2, %zmm2, %zmm2
+; AVX512F-NEXT: vpternlogq $236, %zmm4, %zmm0, %zmm2
; AVX512F-NEXT: vpxor %xmm0, %xmm0, %xmm0
-; AVX512F-NEXT: vpcmpeqb %ymm0, %ymm2, %ymm0
+; AVX512F-NEXT: vpcmpeqb %ymm0, %ymm3, %ymm0
; AVX512F-NEXT: vinserti64x4 $1, %ymm0, %zmm0, %zmm0
-; AVX512F-NEXT: vpternlogq $202, %zmm3, %zmm1, %zmm0
+; AVX512F-NEXT: vpternlogq $202, %zmm2, %zmm1, %zmm0
; AVX512F-NEXT: retq
;
; AVX512VL-LABEL: splatvar_funnnel_v64i8:
; AVX512VL: # %bb.0:
-; AVX512VL-NEXT: vextracti64x4 $1, %zmm1, %ymm3
; AVX512VL-NEXT: vpand {{.*}}(%rip), %xmm2, %xmm2
-; AVX512VL-NEXT: vpmovzxbq {{.*#+}} xmm4 = xmm2[0],zero,zero,zero,zero,zero,zero,zero,xmm2[1],zero,zero,zero,zero,zero,zero,zero
-; AVX512VL-NEXT: vpsrlw %xmm4, %ymm3, %ymm3
-; AVX512VL-NEXT: vpcmpeqd %xmm5, %xmm5, %xmm5
-; AVX512VL-NEXT: vpsrlw %xmm4, %xmm5, %xmm6
-; AVX512VL-NEXT: vpsrlw $8, %xmm6, %xmm6
-; AVX512VL-NEXT: vpbroadcastb %xmm6, %ymm6
-; AVX512VL-NEXT: vpand %ymm6, %ymm3, %ymm3
-; AVX512VL-NEXT: vpsrlw %xmm4, %ymm1, %ymm4
-; AVX512VL-NEXT: vpand %ymm6, %ymm4, %ymm4
-; AVX512VL-NEXT: vinserti64x4 $1, %ymm3, %zmm4, %zmm3
-; AVX512VL-NEXT: vpbroadcastb %xmm2, %ymm2
+; AVX512VL-NEXT: vpbroadcastb %xmm2, %ymm3
; AVX512VL-NEXT: vmovdqa {{.*#+}} xmm4 = [8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8]
-; AVX512VL-NEXT: vpsubb %xmm2, %xmm4, %xmm4
+; AVX512VL-NEXT: vpsubb %xmm3, %xmm4, %xmm4
; AVX512VL-NEXT: vpmovzxbq {{.*#+}} xmm4 = xmm4[0],zero,zero,zero,zero,zero,zero,zero,xmm4[1],zero,zero,zero,zero,zero,zero,zero
-; AVX512VL-NEXT: vextracti64x4 $1, %zmm0, %ymm6
-; AVX512VL-NEXT: vpsllw %xmm4, %ymm6, %ymm6
-; AVX512VL-NEXT: vpsllw %xmm4, %xmm5, %xmm5
-; AVX512VL-NEXT: vpbroadcastb %xmm5, %ymm5
-; AVX512VL-NEXT: vpand %ymm5, %ymm6, %ymm6
+; AVX512VL-NEXT: vextracti64x4 $1, %zmm0, %ymm5
+; AVX512VL-NEXT: vpsllw %xmm4, %ymm5, %ymm5
; AVX512VL-NEXT: vpsllw %xmm4, %ymm0, %ymm0
-; AVX512VL-NEXT: vpand %ymm5, %ymm0, %ymm0
-; AVX512VL-NEXT: vinserti64x4 $1, %ymm6, %zmm0, %zmm0
-; AVX512VL-NEXT: vporq %zmm3, %zmm0, %zmm3
+; AVX512VL-NEXT: vinserti64x4 $1, %ymm5, %zmm0, %zmm0
+; AVX512VL-NEXT: vpcmpeqd %xmm5, %xmm5, %xmm5
+; AVX512VL-NEXT: vpsllw %xmm4, %xmm5, %xmm4
+; AVX512VL-NEXT: vpbroadcastb %xmm4, %ymm4
+; AVX512VL-NEXT: vinserti64x4 $1, %ymm4, %zmm4, %zmm4
+; AVX512VL-NEXT: vpandq %zmm4, %zmm0, %zmm0
+; AVX512VL-NEXT: vextracti64x4 $1, %zmm1, %ymm4
+; AVX512VL-NEXT: vpmovzxbq {{.*#+}} xmm2 = xmm2[0],zero,zero,zero,zero,zero,zero,zero,xmm2[1],zero,zero,zero,zero,zero,zero,zero
+; AVX512VL-NEXT: vpsrlw %xmm2, %ymm4, %ymm4
+; AVX512VL-NEXT: vpsrlw %xmm2, %ymm1, %ymm6
+; AVX512VL-NEXT: vinserti64x4 $1, %ymm4, %zmm6, %zmm4
+; AVX512VL-NEXT: vpsrlw %xmm2, %xmm5, %xmm2
+; AVX512VL-NEXT: vpsrlw $8, %xmm2, %xmm2
+; AVX512VL-NEXT: vpbroadcastb %xmm2, %ymm2
+; AVX512VL-NEXT: vinserti64x4 $1, %ymm2, %zmm2, %zmm2
+; AVX512VL-NEXT: vpternlogq $236, %zmm4, %zmm0, %zmm2
; AVX512VL-NEXT: vpxor %xmm0, %xmm0, %xmm0
-; AVX512VL-NEXT: vpcmpeqb %ymm0, %ymm2, %ymm0
+; AVX512VL-NEXT: vpcmpeqb %ymm0, %ymm3, %ymm0
; AVX512VL-NEXT: vinserti64x4 $1, %ymm0, %zmm0, %zmm0
-; AVX512VL-NEXT: vpternlogq $202, %zmm3, %zmm1, %zmm0
+; AVX512VL-NEXT: vpternlogq $202, %zmm2, %zmm1, %zmm0
; AVX512VL-NEXT: retq
;
; AVX512BW-LABEL: splatvar_funnnel_v64i8:
@@ -1494,40 +1492,28 @@ define <32 x i16> @splatconstant_funnnel_v32i16(<32 x i16> %x, <32 x i16> %y) no
define <64 x i8> @splatconstant_funnnel_v64i8(<64 x i8> %x, <64 x i8> %y) nounwind {
; AVX512F-LABEL: splatconstant_funnnel_v64i8:
; AVX512F: # %bb.0:
-; AVX512F-NEXT: vextracti64x4 $1, %zmm1, %ymm2
-; AVX512F-NEXT: vpsrlw $4, %ymm2, %ymm2
-; AVX512F-NEXT: vmovdqa {{.*#+}} ymm3 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
-; AVX512F-NEXT: vpand %ymm3, %ymm2, %ymm2
-; AVX512F-NEXT: vpsrlw $4, %ymm1, %ymm1
-; AVX512F-NEXT: vpand %ymm3, %ymm1, %ymm1
-; AVX512F-NEXT: vinserti64x4 $1, %ymm2, %zmm1, %zmm1
-; AVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm2
-; AVX512F-NEXT: vpsllw $4, %ymm2, %ymm2
-; AVX512F-NEXT: vmovdqa {{.*#+}} ymm3 = [240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240]
-; AVX512F-NEXT: vpand %ymm3, %ymm2, %ymm2
+; AVX512F-NEXT: vpsllw $4, %ymm0, %ymm2
+; AVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm0
; AVX512F-NEXT: vpsllw $4, %ymm0, %ymm0
-; AVX512F-NEXT: vpand %ymm3, %ymm0, %ymm0
-; AVX512F-NEXT: vinserti64x4 $1, %ymm2, %zmm0, %zmm0
-; AVX512F-NEXT: vporq %zmm1, %zmm0, %zmm0
+; AVX512F-NEXT: vinserti64x4 $1, %ymm0, %zmm2, %zmm2
+; AVX512F-NEXT: vpsrlw $4, %ymm1, %ymm0
+; AVX512F-NEXT: vextracti64x4 $1, %zmm1, %ymm1
+; AVX512F-NEXT: vpsrlw $4, %ymm1, %ymm1
+; AVX512F-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
+; AVX512F-NEXT: vpternlogq $216, {{.*}}(%rip), %zmm2, %zmm0
; AVX512F-NEXT: retq
;
; AVX512VL-LABEL: splatconstant_funnnel_v64i8:
; AVX512VL: # %bb.0:
-; AVX512VL-NEXT: vextracti64x4 $1, %zmm1, %ymm2
-; AVX512VL-NEXT: vpsrlw $4, %ymm2, %ymm2
-; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm3 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
-; AVX512VL-NEXT: vpand %ymm3, %ymm2, %ymm2
-; AVX512VL-NEXT: vpsrlw $4, %ymm1, %ymm1
-; AVX512VL-NEXT: vpand %ymm3, %ymm1, %ymm1
-; AVX512VL-NEXT: vinserti64x4 $1, %ymm2, %zmm1, %zmm1
-; AVX512VL-NEXT: vextracti64x4 $1, %zmm0, %ymm2
-; AVX512VL-NEXT: vpsllw $4, %ymm2, %ymm2
-; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm3 = [240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240]
-; AVX512VL-NEXT: vpand %ymm3, %ymm2, %ymm2
+; AVX512VL-NEXT: vpsllw $4, %ymm0, %ymm2
+; AVX512VL-NEXT: vextracti64x4 $1, %zmm0, %ymm0
; AVX512VL-NEXT: vpsllw $4, %ymm0, %ymm0
-; AVX512VL-NEXT: vpand %ymm3, %ymm0, %ymm0
-; AVX512VL-NEXT: vinserti64x4 $1, %ymm2, %zmm0, %zmm0
-; AVX512VL-NEXT: vporq %zmm1, %zmm0, %zmm0
+; AVX512VL-NEXT: vinserti64x4 $1, %ymm0, %zmm2, %zmm2
+; AVX512VL-NEXT: vpsrlw $4, %ymm1, %ymm0
+; AVX512VL-NEXT: vextracti64x4 $1, %zmm1, %ymm1
+; AVX512VL-NEXT: vpsrlw $4, %ymm1, %ymm1
+; AVX512VL-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
+; AVX512VL-NEXT: vpternlogq $216, {{.*}}(%rip), %zmm2, %zmm0
; AVX512VL-NEXT: retq
;
; AVX512BW-LABEL: splatconstant_funnnel_v64i8:
diff --git a/llvm/test/CodeGen/X86/vector-fshr-rot-512.ll b/llvm/test/CodeGen/X86/vector-fshr-rot-512.ll
index 04a883171a7c..0084702b7fd7 100644
--- a/llvm/test/CodeGen/X86/vector-fshr-rot-512.ll
+++ b/llvm/test/CodeGen/X86/vector-fshr-rot-512.ll
@@ -480,15 +480,14 @@ define <64 x i8> @splatvar_funnnel_v64i8(<64 x i8> %x, <64 x i8> %amt) nounwind
; AVX512F-NEXT: vpand {{.*}}(%rip), %xmm1, %xmm1
; AVX512F-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
; AVX512F-NEXT: vpsrlw %xmm1, %ymm2, %ymm2
-; AVX512F-NEXT: vpcmpeqd %xmm4, %xmm4, %xmm4
-; AVX512F-NEXT: vpsrlw %xmm1, %xmm4, %xmm4
-; AVX512F-NEXT: vpsrlw $8, %xmm4, %xmm4
-; AVX512F-NEXT: vpbroadcastb %xmm4, %ymm4
-; AVX512F-NEXT: vpand %ymm4, %ymm2, %ymm2
; AVX512F-NEXT: vpsrlw %xmm1, %ymm0, %ymm0
-; AVX512F-NEXT: vpand %ymm4, %ymm0, %ymm0
-; AVX512F-NEXT: vinserti64x4 $1, %ymm2, %zmm0, %zmm0
-; AVX512F-NEXT: vporq %zmm0, %zmm3, %zmm0
+; AVX512F-NEXT: vinserti64x4 $1, %ymm2, %zmm0, %zmm2
+; AVX512F-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
+; AVX512F-NEXT: vpsrlw %xmm1, %xmm0, %xmm0
+; AVX512F-NEXT: vpsrlw $8, %xmm0, %xmm0
+; AVX512F-NEXT: vpbroadcastb %xmm0, %ymm0
+; AVX512F-NEXT: vinserti64x4 $1, %ymm0, %zmm0, %zmm0
+; AVX512F-NEXT: vpternlogq $236, %zmm2, %zmm3, %zmm0
; AVX512F-NEXT: retq
;
; AVX512VL-LABEL: splatvar_funnnel_v64i8:
@@ -524,15 +523,14 @@ define <64 x i8> @splatvar_funnnel_v64i8(<64 x i8> %x, <64 x i8> %amt) nounwind
; AVX512VL-NEXT: vpand {{.*}}(%rip), %xmm1, %xmm1
; AVX512VL-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
; AVX512VL-NEXT: vpsrlw %xmm1, %ymm2, %ymm2
-; AVX512VL-NEXT: vpcmpeqd %xmm4, %xmm4, %xmm4
-; AVX512VL-NEXT: vpsrlw %xmm1, %xmm4, %xmm4
-; AVX512VL-NEXT: vpsrlw $8, %xmm4, %xmm4
-; AVX512VL-NEXT: vpbroadcastb %xmm4, %ymm4
-; AVX512VL-NEXT: vpand %ymm4, %ymm2, %ymm2
; AVX512VL-NEXT: vpsrlw %xmm1, %ymm0, %ymm0
-; AVX512VL-NEXT: vpand %ymm4, %ymm0, %ymm0
-; AVX512VL-NEXT: vinserti64x4 $1, %ymm2, %zmm0, %zmm0
-; AVX512VL-NEXT: vporq %zmm0, %zmm3, %zmm0
+; AVX512VL-NEXT: vinserti64x4 $1, %ymm2, %zmm0, %zmm2
+; AVX512VL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
+; AVX512VL-NEXT: vpsrlw %xmm1, %xmm0, %xmm0
+; AVX512VL-NEXT: vpsrlw $8, %xmm0, %xmm0
+; AVX512VL-NEXT: vpbroadcastb %xmm0, %ymm0
+; AVX512VL-NEXT: vinserti64x4 $1, %ymm0, %zmm0, %zmm0
+; AVX512VL-NEXT: vpternlogq $236, %zmm2, %zmm3, %zmm0
; AVX512VL-NEXT: retq
;
; AVX512BW-LABEL: splatvar_funnnel_v64i8:
@@ -882,38 +880,26 @@ define <32 x i16> @splatconstant_funnnel_v32i16(<32 x i16> %x) nounwind {
define <64 x i8> @splatconstant_funnnel_v64i8(<64 x i8> %x) nounwind {
; AVX512F-LABEL: splatconstant_funnnel_v64i8:
; AVX512F: # %bb.0:
-; AVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm1
-; AVX512F-NEXT: vpsrlw $4, %ymm1, %ymm2
-; AVX512F-NEXT: vmovdqa {{.*#+}} ymm3 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
-; AVX512F-NEXT: vpand %ymm3, %ymm2, %ymm2
-; AVX512F-NEXT: vpsrlw $4, %ymm0, %ymm4
-; AVX512F-NEXT: vpand %ymm3, %ymm4, %ymm3
-; AVX512F-NEXT: vinserti64x4 $1, %ymm2, %zmm3, %zmm2
-; AVX512F-NEXT: vpsllw $4, %ymm1, %ymm1
-; AVX512F-NEXT: vmovdqa {{.*#+}} ymm3 = [240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240]
-; AVX512F-NEXT: vpand %ymm3, %ymm1, %ymm1
-; AVX512F-NEXT: vpsllw $4, %ymm0, %ymm0
-; AVX512F-NEXT: vpand %ymm3, %ymm0, %ymm0
-; AVX512F-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
-; AVX512F-NEXT: vporq %zmm2, %zmm0, %zmm0
+; AVX512F-NEXT: vpsllw $4, %ymm0, %ymm1
+; AVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm2
+; AVX512F-NEXT: vpsllw $4, %ymm2, %ymm3
+; AVX512F-NEXT: vinserti64x4 $1, %ymm3, %zmm1, %zmm1
+; AVX512F-NEXT: vpsrlw $4, %ymm0, %ymm0
+; AVX512F-NEXT: vpsrlw $4, %ymm2, %ymm2
+; AVX512F-NEXT: vinserti64x4 $1, %ymm2, %zmm0, %zmm0
+; AVX512F-NEXT: vpternlogq $216, {{.*}}(%rip), %zmm1, %zmm0
; AVX512F-NEXT: retq
;
; AVX512VL-LABEL: splatconstant_funnnel_v64i8:
; AVX512VL: # %bb.0:
-; AVX512VL-NEXT: vextracti64x4 $1, %zmm0, %ymm1
-; AVX512VL-NEXT: vpsrlw $4, %ymm1, %ymm2
-; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm3 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
-; AVX512VL-NEXT: vpand %ymm3, %ymm2, %ymm2
-; AVX512VL-NEXT: vpsrlw $4, %ymm0, %ymm4
-; AVX512VL-NEXT: vpand %ymm3, %ymm4, %ymm3
-; AVX512VL-NEXT: vinserti64x4 $1, %ymm2, %zmm3, %zmm2
-; AVX512VL-NEXT: vpsllw $4, %ymm1, %ymm1
-; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm3 = [240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240]
-; AVX512VL-NEXT: vpand %ymm3, %ymm1, %ymm1
-; AVX512VL-NEXT: vpsllw $4, %ymm0, %ymm0
-; AVX512VL-NEXT: vpand %ymm3, %ymm0, %ymm0
-; AVX512VL-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
-; AVX512VL-NEXT: vporq %zmm2, %zmm0, %zmm0
+; AVX512VL-NEXT: vpsllw $4, %ymm0, %ymm1
+; AVX512VL-NEXT: vextracti64x4 $1, %zmm0, %ymm2
+; AVX512VL-NEXT: vpsllw $4, %ymm2, %ymm3
+; AVX512VL-NEXT: vinserti64x4 $1, %ymm3, %zmm1, %zmm1
+; AVX512VL-NEXT: vpsrlw $4, %ymm0, %ymm0
+; AVX512VL-NEXT: vpsrlw $4, %ymm2, %ymm2
+; AVX512VL-NEXT: vinserti64x4 $1, %ymm2, %zmm0, %zmm0
+; AVX512VL-NEXT: vpternlogq $216, {{.*}}(%rip), %zmm1, %zmm0
; AVX512VL-NEXT: retq
;
; AVX512BW-LABEL: splatconstant_funnnel_v64i8:
diff --git a/llvm/test/CodeGen/X86/vector-idiv-udiv-512.ll b/llvm/test/CodeGen/X86/vector-idiv-udiv-512.ll
index 5b88eaec0596..e756f3ecc353 100644
--- a/llvm/test/CodeGen/X86/vector-idiv-udiv-512.ll
+++ b/llvm/test/CodeGen/X86/vector-idiv-udiv-512.ll
@@ -133,38 +133,36 @@ define <32 x i16> @test_div7_32i16(<32 x i16> %a) nounwind {
define <64 x i8> @test_div7_64i8(<64 x i8> %a) nounwind {
; AVX512F-LABEL: test_div7_64i8:
; AVX512F: # %bb.0:
-; AVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm1
-; AVX512F-NEXT: vpxor %xmm2, %xmm2, %xmm2
-; AVX512F-NEXT: vpunpckhbw {{.*#+}} ymm3 = ymm1[8],ymm2[8],ymm1[9],ymm2[9],ymm1[10],ymm2[10],ymm1[11],ymm2[11],ymm1[12],ymm2[12],ymm1[13],ymm2[13],ymm1[14],ymm2[14],ymm1[15],ymm2[15],ymm1[24],ymm2[24],ymm1[25],ymm2[25],ymm1[26],ymm2[26],ymm1[27],ymm2[27],ymm1[28],ymm2[28],ymm1[29],ymm2[29],ymm1[30],ymm2[30],ymm1[31],ymm2[31]
-; AVX512F-NEXT: vmovdqa {{.*#+}} ymm4 = [37,37,37,37,37,37,37,37,37,37,37,37,37,37,37,37]
-; AVX512F-NEXT: vpmullw %ymm4, %ymm3, %ymm3
-; AVX512F-NEXT: vpsrlw $8, %ymm3, %ymm3
-; AVX512F-NEXT: vpunpcklbw {{.*#+}} ymm5 = ymm1[0],ymm2[0],ymm1[1],ymm2[1],ymm1[2],ymm2[2],ymm1[3],ymm2[3],ymm1[4],ymm2[4],ymm1[5],ymm2[5],ymm1[6],ymm2[6],ymm1[7],ymm2[7],ymm1[16],ymm2[16],ymm1[17],ymm2[17],ymm1[18],ymm2[18],ymm1[19],ymm2[19],ymm1[20],ymm2[20],ymm1[21],ymm2[21],ymm1[22],ymm2[22],ymm1[23],ymm2[23]
-; AVX512F-NEXT: vpmullw %ymm4, %ymm5, %ymm5
-; AVX512F-NEXT: vpsrlw $8, %ymm5, %ymm5
-; AVX512F-NEXT: vpackuswb %ymm3, %ymm5, %ymm3
-; AVX512F-NEXT: vpsubb %ymm3, %ymm1, %ymm1
-; AVX512F-NEXT: vpsrlw $1, %ymm1, %ymm1
-; AVX512F-NEXT: vmovdqa {{.*#+}} ymm5 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127]
-; AVX512F-NEXT: vpand %ymm5, %ymm1, %ymm1
-; AVX512F-NEXT: vpaddb %ymm3, %ymm1, %ymm1
-; AVX512F-NEXT: vpsrlw $2, %ymm1, %ymm1
-; AVX512F-NEXT: vmovdqa {{.*#+}} ymm3 = [63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63]
-; AVX512F-NEXT: vpand %ymm3, %ymm1, %ymm1
-; AVX512F-NEXT: vpunpckhbw {{.*#+}} ymm6 = ymm0[8],ymm2[8],ymm0[9],ymm2[9],ymm0[10],ymm2[10],ymm0[11],ymm2[11],ymm0[12],ymm2[12],ymm0[13],ymm2[13],ymm0[14],ymm2[14],ymm0[15],ymm2[15],ymm0[24],ymm2[24],ymm0[25],ymm2[25],ymm0[26],ymm2[26],ymm0[27],ymm2[27],ymm0[28],ymm2[28],ymm0[29],ymm2[29],ymm0[30],ymm2[30],ymm0[31],ymm2[31]
-; AVX512F-NEXT: vpmullw %ymm4, %ymm6, %ymm6
-; AVX512F-NEXT: vpsrlw $8, %ymm6, %ymm6
-; AVX512F-NEXT: vpunpcklbw {{.*#+}} ymm2 = ymm0[0],ymm2[0],ymm0[1],ymm2[1],ymm0[2],ymm2[2],ymm0[3],ymm2[3],ymm0[4],ymm2[4],ymm0[5],ymm2[5],ymm0[6],ymm2[6],ymm0[7],ymm2[7],ymm0[16],ymm2[16],ymm0[17],ymm2[17],ymm0[18],ymm2[18],ymm0[19],ymm2[19],ymm0[20],ymm2[20],ymm0[21],ymm2[21],ymm0[22],ymm2[22],ymm0[23],ymm2[23]
-; AVX512F-NEXT: vpmullw %ymm4, %ymm2, %ymm2
+; AVX512F-NEXT: vpxor %xmm1, %xmm1, %xmm1
+; AVX512F-NEXT: vpunpckhbw {{.*#+}} ymm2 = ymm0[8],ymm1[8],ymm0[9],ymm1[9],ymm0[10],ymm1[10],ymm0[11],ymm1[11],ymm0[12],ymm1[12],ymm0[13],ymm1[13],ymm0[14],ymm1[14],ymm0[15],ymm1[15],ymm0[24],ymm1[24],ymm0[25],ymm1[25],ymm0[26],ymm1[26],ymm0[27],ymm1[27],ymm0[28],ymm1[28],ymm0[29],ymm1[29],ymm0[30],ymm1[30],ymm0[31],ymm1[31]
+; AVX512F-NEXT: vmovdqa {{.*#+}} ymm3 = [37,37,37,37,37,37,37,37,37,37,37,37,37,37,37,37]
+; AVX512F-NEXT: vpmullw %ymm3, %ymm2, %ymm2
; AVX512F-NEXT: vpsrlw $8, %ymm2, %ymm2
-; AVX512F-NEXT: vpackuswb %ymm6, %ymm2, %ymm2
-; AVX512F-NEXT: vpsubb %ymm2, %ymm0, %ymm0
+; AVX512F-NEXT: vpunpcklbw {{.*#+}} ymm4 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[4],ymm1[4],ymm0[5],ymm1[5],ymm0[6],ymm1[6],ymm0[7],ymm1[7],ymm0[16],ymm1[16],ymm0[17],ymm1[17],ymm0[18],ymm1[18],ymm0[19],ymm1[19],ymm0[20],ymm1[20],ymm0[21],ymm1[21],ymm0[22],ymm1[22],ymm0[23],ymm1[23]
+; AVX512F-NEXT: vpmullw %ymm3, %ymm4, %ymm4
+; AVX512F-NEXT: vpsrlw $8, %ymm4, %ymm4
+; AVX512F-NEXT: vpackuswb %ymm2, %ymm4, %ymm2
+; AVX512F-NEXT: vpsubb %ymm2, %ymm0, %ymm4
+; AVX512F-NEXT: vpsrlw $1, %ymm4, %ymm4
+; AVX512F-NEXT: vmovdqa {{.*#+}} ymm5 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127]
+; AVX512F-NEXT: vpand %ymm5, %ymm4, %ymm4
+; AVX512F-NEXT: vpaddb %ymm2, %ymm4, %ymm2
+; AVX512F-NEXT: vpsrlw $2, %ymm2, %ymm2
+; AVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm0
+; AVX512F-NEXT: vpunpckhbw {{.*#+}} ymm4 = ymm0[8],ymm1[8],ymm0[9],ymm1[9],ymm0[10],ymm1[10],ymm0[11],ymm1[11],ymm0[12],ymm1[12],ymm0[13],ymm1[13],ymm0[14],ymm1[14],ymm0[15],ymm1[15],ymm0[24],ymm1[24],ymm0[25],ymm1[25],ymm0[26],ymm1[26],ymm0[27],ymm1[27],ymm0[28],ymm1[28],ymm0[29],ymm1[29],ymm0[30],ymm1[30],ymm0[31],ymm1[31]
+; AVX512F-NEXT: vpmullw %ymm3, %ymm4, %ymm4
+; AVX512F-NEXT: vpsrlw $8, %ymm4, %ymm4
+; AVX512F-NEXT: vpunpcklbw {{.*#+}} ymm1 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[4],ymm1[4],ymm0[5],ymm1[5],ymm0[6],ymm1[6],ymm0[7],ymm1[7],ymm0[16],ymm1[16],ymm0[17],ymm1[17],ymm0[18],ymm1[18],ymm0[19],ymm1[19],ymm0[20],ymm1[20],ymm0[21],ymm1[21],ymm0[22],ymm1[22],ymm0[23],ymm1[23]
+; AVX512F-NEXT: vpmullw %ymm3, %ymm1, %ymm1
+; AVX512F-NEXT: vpsrlw $8, %ymm1, %ymm1
+; AVX512F-NEXT: vpackuswb %ymm4, %ymm1, %ymm1
+; AVX512F-NEXT: vpsubb %ymm1, %ymm0, %ymm0
; AVX512F-NEXT: vpsrlw $1, %ymm0, %ymm0
; AVX512F-NEXT: vpand %ymm5, %ymm0, %ymm0
-; AVX512F-NEXT: vpaddb %ymm2, %ymm0, %ymm0
+; AVX512F-NEXT: vpaddb %ymm1, %ymm0, %ymm0
; AVX512F-NEXT: vpsrlw $2, %ymm0, %ymm0
-; AVX512F-NEXT: vpand %ymm3, %ymm0, %ymm0
-; AVX512F-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
+; AVX512F-NEXT: vinserti64x4 $1, %ymm0, %zmm2, %zmm0
+; AVX512F-NEXT: vpandq {{.*}}(%rip), %zmm0, %zmm0
; AVX512F-NEXT: retq
;
; AVX512BW-LABEL: test_div7_64i8:
diff --git a/llvm/test/CodeGen/X86/vector-rotate-512.ll b/llvm/test/CodeGen/X86/vector-rotate-512.ll
index 831c03f03825..690d9f721bb2 100644
--- a/llvm/test/CodeGen/X86/vector-rotate-512.ll
+++ b/llvm/test/CodeGen/X86/vector-rotate-512.ll
@@ -418,22 +418,21 @@ define <64 x i8> @splatvar_rotate_v64i8(<64 x i8> %a, <64 x i8> %b) nounwind {
; AVX512F-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
; AVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm3
; AVX512F-NEXT: vpsllw %xmm2, %ymm3, %ymm4
+; AVX512F-NEXT: vpsllw %xmm2, %ymm0, %ymm5
+; AVX512F-NEXT: vinserti64x4 $1, %ymm4, %zmm5, %zmm4
; AVX512F-NEXT: vpcmpeqd %xmm5, %xmm5, %xmm5
-; AVX512F-NEXT: vpsllw %xmm2, %xmm5, %xmm6
-; AVX512F-NEXT: vpbroadcastb %xmm6, %ymm6
-; AVX512F-NEXT: vpand %ymm6, %ymm4, %ymm4
-; AVX512F-NEXT: vpsllw %xmm2, %ymm0, %ymm2
-; AVX512F-NEXT: vpand %ymm6, %ymm2, %ymm2
-; AVX512F-NEXT: vinserti64x4 $1, %ymm4, %zmm2, %zmm2
+; AVX512F-NEXT: vpsllw %xmm2, %xmm5, %xmm2
+; AVX512F-NEXT: vpbroadcastb %xmm2, %ymm2
+; AVX512F-NEXT: vinserti64x4 $1, %ymm2, %zmm2, %zmm2
+; AVX512F-NEXT: vpandq %zmm2, %zmm4, %zmm2
; AVX512F-NEXT: vpsrlw %xmm1, %ymm3, %ymm3
-; AVX512F-NEXT: vpsrlw %xmm1, %xmm5, %xmm4
-; AVX512F-NEXT: vpsrlw $8, %xmm4, %xmm4
-; AVX512F-NEXT: vpbroadcastb %xmm4, %ymm4
-; AVX512F-NEXT: vpand %ymm4, %ymm3, %ymm3
; AVX512F-NEXT: vpsrlw %xmm1, %ymm0, %ymm0
-; AVX512F-NEXT: vpand %ymm4, %ymm0, %ymm0
-; AVX512F-NEXT: vinserti64x4 $1, %ymm3, %zmm0, %zmm0
-; AVX512F-NEXT: vporq %zmm0, %zmm2, %zmm0
+; AVX512F-NEXT: vinserti64x4 $1, %ymm3, %zmm0, %zmm3
+; AVX512F-NEXT: vpsrlw %xmm1, %xmm5, %xmm0
+; AVX512F-NEXT: vpsrlw $8, %xmm0, %xmm0
+; AVX512F-NEXT: vpbroadcastb %xmm0, %ymm0
+; AVX512F-NEXT: vinserti64x4 $1, %ymm0, %zmm0, %zmm0
+; AVX512F-NEXT: vpternlogq $236, %zmm3, %zmm2, %zmm0
; AVX512F-NEXT: retq
;
; AVX512VL-LABEL: splatvar_rotate_v64i8:
@@ -445,22 +444,21 @@ define <64 x i8> @splatvar_rotate_v64i8(<64 x i8> %a, <64 x i8> %b) nounwind {
; AVX512VL-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
; AVX512VL-NEXT: vextracti64x4 $1, %zmm0, %ymm3
; AVX512VL-NEXT: vpsllw %xmm2, %ymm3, %ymm4
+; AVX512VL-NEXT: vpsllw %xmm2, %ymm0, %ymm5
+; AVX512VL-NEXT: vinserti64x4 $1, %ymm4, %zmm5, %zmm4
; AVX512VL-NEXT: vpcmpeqd %xmm5, %xmm5, %xmm5
-; AVX512VL-NEXT: vpsllw %xmm2, %xmm5, %xmm6
-; AVX512VL-NEXT: vpbroadcastb %xmm6, %ymm6
-; AVX512VL-NEXT: vpand %ymm6, %ymm4, %ymm4
-; AVX512VL-NEXT: vpsllw %xmm2, %ymm0, %ymm2
-; AVX512VL-NEXT: vpand %ymm6, %ymm2, %ymm2
-; AVX512VL-NEXT: vinserti64x4 $1, %ymm4, %zmm2, %zmm2
+; AVX512VL-NEXT: vpsllw %xmm2, %xmm5, %xmm2
+; AVX512VL-NEXT: vpbroadcastb %xmm2, %ymm2
+; AVX512VL-NEXT: vinserti64x4 $1, %ymm2, %zmm2, %zmm2
+; AVX512VL-NEXT: vpandq %zmm2, %zmm4, %zmm2
; AVX512VL-NEXT: vpsrlw %xmm1, %ymm3, %ymm3
-; AVX512VL-NEXT: vpsrlw %xmm1, %xmm5, %xmm4
-; AVX512VL-NEXT: vpsrlw $8, %xmm4, %xmm4
-; AVX512VL-NEXT: vpbroadcastb %xmm4, %ymm4
-; AVX512VL-NEXT: vpand %ymm4, %ymm3, %ymm3
; AVX512VL-NEXT: vpsrlw %xmm1, %ymm0, %ymm0
-; AVX512VL-NEXT: vpand %ymm4, %ymm0, %ymm0
-; AVX512VL-NEXT: vinserti64x4 $1, %ymm3, %zmm0, %zmm0
-; AVX512VL-NEXT: vporq %zmm0, %zmm2, %zmm0
+; AVX512VL-NEXT: vinserti64x4 $1, %ymm3, %zmm0, %zmm3
+; AVX512VL-NEXT: vpsrlw %xmm1, %xmm5, %xmm0
+; AVX512VL-NEXT: vpsrlw $8, %xmm0, %xmm0
+; AVX512VL-NEXT: vpbroadcastb %xmm0, %ymm0
+; AVX512VL-NEXT: vinserti64x4 $1, %ymm0, %zmm0, %zmm0
+; AVX512VL-NEXT: vpternlogq $236, %zmm3, %zmm2, %zmm0
; AVX512VL-NEXT: retq
;
; AVX512BW-LABEL: splatvar_rotate_v64i8:
@@ -809,38 +807,26 @@ define <32 x i16> @splatconstant_rotate_v32i16(<32 x i16> %a) nounwind {
define <64 x i8> @splatconstant_rotate_v64i8(<64 x i8> %a) nounwind {
; AVX512F-LABEL: splatconstant_rotate_v64i8:
; AVX512F: # %bb.0:
-; AVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm1
-; AVX512F-NEXT: vpsllw $4, %ymm1, %ymm2
-; AVX512F-NEXT: vmovdqa {{.*#+}} ymm3 = [240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240]
-; AVX512F-NEXT: vpand %ymm3, %ymm2, %ymm2
-; AVX512F-NEXT: vpsllw $4, %ymm0, %ymm4
-; AVX512F-NEXT: vpand %ymm3, %ymm4, %ymm3
-; AVX512F-NEXT: vinserti64x4 $1, %ymm2, %zmm3, %zmm2
-; AVX512F-NEXT: vpsrlw $4, %ymm1, %ymm1
-; AVX512F-NEXT: vmovdqa {{.*#+}} ymm3 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
-; AVX512F-NEXT: vpand %ymm3, %ymm1, %ymm1
+; AVX512F-NEXT: vpsllw $4, %ymm0, %ymm1
+; AVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm2
+; AVX512F-NEXT: vpsllw $4, %ymm2, %ymm3
+; AVX512F-NEXT: vinserti64x4 $1, %ymm3, %zmm1, %zmm1
; AVX512F-NEXT: vpsrlw $4, %ymm0, %ymm0
-; AVX512F-NEXT: vpand %ymm3, %ymm0, %ymm0
-; AVX512F-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
-; AVX512F-NEXT: vporq %zmm0, %zmm2, %zmm0
+; AVX512F-NEXT: vpsrlw $4, %ymm2, %ymm2
+; AVX512F-NEXT: vinserti64x4 $1, %ymm2, %zmm0, %zmm0
+; AVX512F-NEXT: vpternlogq $216, {{.*}}(%rip), %zmm1, %zmm0
; AVX512F-NEXT: retq
;
; AVX512VL-LABEL: splatconstant_rotate_v64i8:
; AVX512VL: # %bb.0:
-; AVX512VL-NEXT: vextracti64x4 $1, %zmm0, %ymm1
-; AVX512VL-NEXT: vpsllw $4, %ymm1, %ymm2
-; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm3 = [240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240]
-; AVX512VL-NEXT: vpand %ymm3, %ymm2, %ymm2
-; AVX512VL-NEXT: vpsllw $4, %ymm0, %ymm4
-; AVX512VL-NEXT: vpand %ymm3, %ymm4, %ymm3
-; AVX512VL-NEXT: vinserti64x4 $1, %ymm2, %zmm3, %zmm2
-; AVX512VL-NEXT: vpsrlw $4, %ymm1, %ymm1
-; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm3 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
-; AVX512VL-NEXT: vpand %ymm3, %ymm1, %ymm1
+; AVX512VL-NEXT: vpsllw $4, %ymm0, %ymm1
+; AVX512VL-NEXT: vextracti64x4 $1, %zmm0, %ymm2
+; AVX512VL-NEXT: vpsllw $4, %ymm2, %ymm3
+; AVX512VL-NEXT: vinserti64x4 $1, %ymm3, %zmm1, %zmm1
; AVX512VL-NEXT: vpsrlw $4, %ymm0, %ymm0
-; AVX512VL-NEXT: vpand %ymm3, %ymm0, %ymm0
-; AVX512VL-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
-; AVX512VL-NEXT: vporq %zmm0, %zmm2, %zmm0
+; AVX512VL-NEXT: vpsrlw $4, %ymm2, %ymm2
+; AVX512VL-NEXT: vinserti64x4 $1, %ymm2, %zmm0, %zmm0
+; AVX512VL-NEXT: vpternlogq $216, {{.*}}(%rip), %zmm1, %zmm0
; AVX512VL-NEXT: retq
;
; AVX512BW-LABEL: splatconstant_rotate_v64i8:
@@ -947,40 +933,28 @@ define <32 x i16> @splatconstant_rotate_mask_v32i16(<32 x i16> %a) nounwind {
define <64 x i8> @splatconstant_rotate_mask_v64i8(<64 x i8> %a) nounwind {
; AVX512F-LABEL: splatconstant_rotate_mask_v64i8:
; AVX512F: # %bb.0:
-; AVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm1
-; AVX512F-NEXT: vpsllw $4, %ymm1, %ymm2
-; AVX512F-NEXT: vmovdqa {{.*#+}} ymm3 = [240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240]
-; AVX512F-NEXT: vpand %ymm3, %ymm2, %ymm2
-; AVX512F-NEXT: vpsllw $4, %ymm0, %ymm4
-; AVX512F-NEXT: vpand %ymm3, %ymm4, %ymm3
-; AVX512F-NEXT: vinserti64x4 $1, %ymm2, %zmm3, %zmm2
-; AVX512F-NEXT: vpsrlw $4, %ymm1, %ymm1
-; AVX512F-NEXT: vmovdqa {{.*#+}} ymm3 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
-; AVX512F-NEXT: vpand %ymm3, %ymm1, %ymm1
+; AVX512F-NEXT: vpsllw $4, %ymm0, %ymm1
+; AVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm2
+; AVX512F-NEXT: vpsllw $4, %ymm2, %ymm3
+; AVX512F-NEXT: vinserti64x4 $1, %ymm3, %zmm1, %zmm1
; AVX512F-NEXT: vpsrlw $4, %ymm0, %ymm0
-; AVX512F-NEXT: vpand %ymm3, %ymm0, %ymm0
-; AVX512F-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm1
-; AVX512F-NEXT: vpandq {{.*}}(%rip), %zmm2, %zmm0
-; AVX512F-NEXT: vpternlogq $248, {{.*}}(%rip), %zmm1, %zmm0
+; AVX512F-NEXT: vpsrlw $4, %ymm2, %ymm2
+; AVX512F-NEXT: vinserti64x4 $1, %ymm2, %zmm0, %zmm2
+; AVX512F-NEXT: vpandq {{.*}}(%rip), %zmm1, %zmm0
+; AVX512F-NEXT: vpternlogq $248, {{.*}}(%rip), %zmm2, %zmm0
; AVX512F-NEXT: retq
;
; AVX512VL-LABEL: splatconstant_rotate_mask_v64i8:
; AVX512VL: # %bb.0:
-; AVX512VL-NEXT: vextracti64x4 $1, %zmm0, %ymm1
-; AVX512VL-NEXT: vpsllw $4, %ymm1, %ymm2
-; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm3 = [240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240]
-; AVX512VL-NEXT: vpand %ymm3, %ymm2, %ymm2
-; AVX512VL-NEXT: vpsllw $4, %ymm0, %ymm4
-; AVX512VL-NEXT: vpand %ymm3, %ymm4, %ymm3
-; AVX512VL-NEXT: vinserti64x4 $1, %ymm2, %zmm3, %zmm2
-; AVX512VL-NEXT: vpsrlw $4, %ymm1, %ymm1
-; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm3 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
-; AVX512VL-NEXT: vpand %ymm3, %ymm1, %ymm1
+; AVX512VL-NEXT: vpsllw $4, %ymm0, %ymm1
+; AVX512VL-NEXT: vextracti64x4 $1, %zmm0, %ymm2
+; AVX512VL-NEXT: vpsllw $4, %ymm2, %ymm3
+; AVX512VL-NEXT: vinserti64x4 $1, %ymm3, %zmm1, %zmm1
; AVX512VL-NEXT: vpsrlw $4, %ymm0, %ymm0
-; AVX512VL-NEXT: vpand %ymm3, %ymm0, %ymm0
-; AVX512VL-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm1
-; AVX512VL-NEXT: vpandq {{.*}}(%rip), %zmm2, %zmm0
-; AVX512VL-NEXT: vpternlogq $248, {{.*}}(%rip), %zmm1, %zmm0
+; AVX512VL-NEXT: vpsrlw $4, %ymm2, %ymm2
+; AVX512VL-NEXT: vinserti64x4 $1, %ymm2, %zmm0, %zmm2
+; AVX512VL-NEXT: vpandq {{.*}}(%rip), %zmm1, %zmm0
+; AVX512VL-NEXT: vpternlogq $248, {{.*}}(%rip), %zmm2, %zmm0
; AVX512VL-NEXT: retq
;
; AVX512BW-LABEL: splatconstant_rotate_mask_v64i8:
diff --git a/llvm/test/CodeGen/X86/vector-shift-lshr-512.ll b/llvm/test/CodeGen/X86/vector-shift-lshr-512.ll
index 7cdeb29d3584..c311f138e789 100644
--- a/llvm/test/CodeGen/X86/vector-shift-lshr-512.ll
+++ b/llvm/test/CodeGen/X86/vector-shift-lshr-512.ll
@@ -156,14 +156,14 @@ define <64 x i8> @splatvar_shift_v64i8(<64 x i8> %a, <64 x i8> %b) nounwind {
; AVX512DQ-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
; AVX512DQ-NEXT: vextracti64x4 $1, %zmm0, %ymm2
; AVX512DQ-NEXT: vpsrlw %xmm1, %ymm2, %ymm2
-; AVX512DQ-NEXT: vpcmpeqd %xmm3, %xmm3, %xmm3
-; AVX512DQ-NEXT: vpsrlw %xmm1, %xmm3, %xmm3
-; AVX512DQ-NEXT: vpsrlw $8, %xmm3, %xmm3
-; AVX512DQ-NEXT: vpbroadcastb %xmm3, %ymm3
-; AVX512DQ-NEXT: vpand %ymm3, %ymm2, %ymm2
; AVX512DQ-NEXT: vpsrlw %xmm1, %ymm0, %ymm0
-; AVX512DQ-NEXT: vpand %ymm3, %ymm0, %ymm0
; AVX512DQ-NEXT: vinserti64x4 $1, %ymm2, %zmm0, %zmm0
+; AVX512DQ-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2
+; AVX512DQ-NEXT: vpsrlw %xmm1, %xmm2, %xmm1
+; AVX512DQ-NEXT: vpsrlw $8, %xmm1, %xmm1
+; AVX512DQ-NEXT: vpbroadcastb %xmm1, %ymm1
+; AVX512DQ-NEXT: vinserti64x4 $1, %ymm1, %zmm1, %zmm1
+; AVX512DQ-NEXT: vpandq %zmm1, %zmm0, %zmm0
; AVX512DQ-NEXT: retq
;
; AVX512BW-LABEL: splatvar_shift_v64i8:
@@ -308,13 +308,11 @@ define <32 x i16> @splatconstant_shift_v32i16(<32 x i16> %a) nounwind {
define <64 x i8> @splatconstant_shift_v64i8(<64 x i8> %a) nounwind {
; AVX512DQ-LABEL: splatconstant_shift_v64i8:
; AVX512DQ: # %bb.0:
-; AVX512DQ-NEXT: vextracti64x4 $1, %zmm0, %ymm1
-; AVX512DQ-NEXT: vpsrlw $3, %ymm1, %ymm1
-; AVX512DQ-NEXT: vmovdqa {{.*#+}} ymm2 = [31,31,31,31,31,31,31,31,31,31,31,31,31,31,31,31,31,31,31,31,31,31,31,31,31,31,31,31,31,31,31,31]
-; AVX512DQ-NEXT: vpand %ymm2, %ymm1, %ymm1
+; AVX512DQ-NEXT: vpsrlw $3, %ymm0, %ymm1
+; AVX512DQ-NEXT: vextracti64x4 $1, %zmm0, %ymm0
; AVX512DQ-NEXT: vpsrlw $3, %ymm0, %ymm0
-; AVX512DQ-NEXT: vpand %ymm2, %ymm0, %ymm0
-; AVX512DQ-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
+; AVX512DQ-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0
+; AVX512DQ-NEXT: vpandq {{.*}}(%rip), %zmm0, %zmm0
; AVX512DQ-NEXT: retq
;
; AVX512BW-LABEL: splatconstant_shift_v64i8:
diff --git a/llvm/test/CodeGen/X86/vector-shift-shl-512.ll b/llvm/test/CodeGen/X86/vector-shift-shl-512.ll
index 91146cfdf1d4..1bf878739bf8 100644
--- a/llvm/test/CodeGen/X86/vector-shift-shl-512.ll
+++ b/llvm/test/CodeGen/X86/vector-shift-shl-512.ll
@@ -151,13 +151,13 @@ define <64 x i8> @splatvar_shift_v64i8(<64 x i8> %a, <64 x i8> %b) nounwind {
; AVX512DQ-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
; AVX512DQ-NEXT: vextracti64x4 $1, %zmm0, %ymm2
; AVX512DQ-NEXT: vpsllw %xmm1, %ymm2, %ymm2
-; AVX512DQ-NEXT: vpcmpeqd %xmm3, %xmm3, %xmm3
-; AVX512DQ-NEXT: vpsllw %xmm1, %xmm3, %xmm3
-; AVX512DQ-NEXT: vpbroadcastb %xmm3, %ymm3
-; AVX512DQ-NEXT: vpand %ymm3, %ymm2, %ymm2
; AVX512DQ-NEXT: vpsllw %xmm1, %ymm0, %ymm0
-; AVX512DQ-NEXT: vpand %ymm3, %ymm0, %ymm0
; AVX512DQ-NEXT: vinserti64x4 $1, %ymm2, %zmm0, %zmm0
+; AVX512DQ-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2
+; AVX512DQ-NEXT: vpsllw %xmm1, %xmm2, %xmm1
+; AVX512DQ-NEXT: vpbroadcastb %xmm1, %ymm1
+; AVX512DQ-NEXT: vinserti64x4 $1, %ymm1, %zmm1, %zmm1
+; AVX512DQ-NEXT: vpandq %zmm1, %zmm0, %zmm0
; AVX512DQ-NEXT: retq
;
; AVX512BW-LABEL: splatvar_shift_v64i8:
@@ -306,13 +306,11 @@ define <32 x i16> @splatconstant_shift_v32i16(<32 x i16> %a) nounwind {
define <64 x i8> @splatconstant_shift_v64i8(<64 x i8> %a) nounwind {
; AVX512DQ-LABEL: splatconstant_shift_v64i8:
; AVX512DQ: # %bb.0:
-; AVX512DQ-NEXT: vextracti64x4 $1, %zmm0, %ymm1
-; AVX512DQ-NEXT: vpsllw $3, %ymm1, %ymm1
-; AVX512DQ-NEXT: vmovdqa {{.*#+}} ymm2 = [248,248,248,248,248,248,248,248,248,248,248,248,248,248,248,248,248,248,248,248,248,248,248,248,248,248,248,248,248,248,248,248]
-; AVX512DQ-NEXT: vpand %ymm2, %ymm1, %ymm1
+; AVX512DQ-NEXT: vpsllw $3, %ymm0, %ymm1
+; AVX512DQ-NEXT: vextracti64x4 $1, %zmm0, %ymm0
; AVX512DQ-NEXT: vpsllw $3, %ymm0, %ymm0
-; AVX512DQ-NEXT: vpand %ymm2, %ymm0, %ymm0
-; AVX512DQ-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
+; AVX512DQ-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0
+; AVX512DQ-NEXT: vpandq {{.*}}(%rip), %zmm0, %zmm0
; AVX512DQ-NEXT: retq
;
; AVX512BW-LABEL: splatconstant_shift_v64i8:
More information about the llvm-commits
mailing list