[llvm] aeb3c77 - [X86] Add shift by splat modulo amount vector tests
Simon Pilgrim via llvm-commits
llvm-commits at lists.llvm.org
Tue Nov 16 12:46:37 PST 2021
Author: Simon Pilgrim
Date: 2021-11-16T20:46:17Z
New Revision: aeb3c772d316ed71836bccf515517e769150e6a1
URL: https://github.com/llvm/llvm-project/commit/aeb3c772d316ed71836bccf515517e769150e6a1
DIFF: https://github.com/llvm/llvm-project/commit/aeb3c772d316ed71836bccf515517e769150e6a1.diff
LOG: [X86] Add shift by splat modulo amount vector tests
Shows failure to fold zero_extend_vector_inreg(and(x, c)) -> bitcast(and(x,c')) when we're only demanding the 0'th extended element, such as with the SSE variable shift ops.
Added:
Modified:
llvm/test/CodeGen/X86/vector-shift-ashr-128.ll
llvm/test/CodeGen/X86/vector-shift-ashr-256.ll
llvm/test/CodeGen/X86/vector-shift-ashr-512.ll
llvm/test/CodeGen/X86/vector-shift-lshr-128.ll
llvm/test/CodeGen/X86/vector-shift-lshr-256.ll
llvm/test/CodeGen/X86/vector-shift-lshr-512.ll
llvm/test/CodeGen/X86/vector-shift-shl-128.ll
llvm/test/CodeGen/X86/vector-shift-shl-256.ll
llvm/test/CodeGen/X86/vector-shift-shl-512.ll
Removed:
################################################################################
diff --git a/llvm/test/CodeGen/X86/vector-shift-ashr-128.ll b/llvm/test/CodeGen/X86/vector-shift-ashr-128.ll
index 5021a25cb8a9..b151a7d56748 100644
--- a/llvm/test/CodeGen/X86/vector-shift-ashr-128.ll
+++ b/llvm/test/CodeGen/X86/vector-shift-ashr-128.ll
@@ -920,6 +920,359 @@ define <16 x i8> @splatvar_shift_v16i8(<16 x i8> %a, <16 x i8> %b) nounwind {
ret <16 x i8> %shift
}
+;
+; Uniform Variable Modulo Shifts
+;
+
+define <2 x i64> @splatvar_modulo_shift_v2i64(<2 x i64> %a, <2 x i64> %b) nounwind {
+; SSE-LABEL: splatvar_modulo_shift_v2i64:
+; SSE: # %bb.0:
+; SSE-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
+; SSE-NEXT: movdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
+; SSE-NEXT: psrlq %xmm1, %xmm2
+; SSE-NEXT: psrlq %xmm1, %xmm0
+; SSE-NEXT: pxor %xmm2, %xmm0
+; SSE-NEXT: psubq %xmm2, %xmm0
+; SSE-NEXT: retq
+;
+; AVX-LABEL: splatvar_modulo_shift_v2i64:
+; AVX: # %bb.0:
+; AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
+; AVX-NEXT: vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
+; AVX-NEXT: vpsrlq %xmm1, %xmm2, %xmm2
+; AVX-NEXT: vpsrlq %xmm1, %xmm0, %xmm0
+; AVX-NEXT: vpxor %xmm2, %xmm0, %xmm0
+; AVX-NEXT: vpsubq %xmm2, %xmm0, %xmm0
+; AVX-NEXT: retq
+;
+; XOPAVX1-LABEL: splatvar_modulo_shift_v2i64:
+; XOPAVX1: # %bb.0:
+; XOPAVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
+; XOPAVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; XOPAVX1-NEXT: vpsubq %xmm1, %xmm2, %xmm1
+; XOPAVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,1,0,1]
+; XOPAVX1-NEXT: vpshaq %xmm1, %xmm0, %xmm0
+; XOPAVX1-NEXT: retq
+;
+; XOPAVX2-LABEL: splatvar_modulo_shift_v2i64:
+; XOPAVX2: # %bb.0:
+; XOPAVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
+; XOPAVX2-NEXT: vpbroadcastq %xmm1, %xmm1
+; XOPAVX2-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; XOPAVX2-NEXT: vpsubq %xmm1, %xmm2, %xmm1
+; XOPAVX2-NEXT: vpshaq %xmm1, %xmm0, %xmm0
+; XOPAVX2-NEXT: retq
+;
+; AVX512-LABEL: splatvar_modulo_shift_v2i64:
+; AVX512: # %bb.0:
+; AVX512-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
+; AVX512-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
+; AVX512-NEXT: vpsraq %xmm1, %zmm0, %zmm0
+; AVX512-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
+; AVX512-NEXT: vzeroupper
+; AVX512-NEXT: retq
+;
+; AVX512VL-LABEL: splatvar_modulo_shift_v2i64:
+; AVX512VL: # %bb.0:
+; AVX512VL-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
+; AVX512VL-NEXT: vpsraq %xmm1, %xmm0, %xmm0
+; AVX512VL-NEXT: retq
+;
+; X86-SSE-LABEL: splatvar_modulo_shift_v2i64:
+; X86-SSE: # %bb.0:
+; X86-SSE-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1
+; X86-SSE-NEXT: movdqa {{.*#+}} xmm2 = [0,2147483648,0,2147483648]
+; X86-SSE-NEXT: psrlq %xmm1, %xmm2
+; X86-SSE-NEXT: psrlq %xmm1, %xmm0
+; X86-SSE-NEXT: pxor %xmm2, %xmm0
+; X86-SSE-NEXT: psubq %xmm2, %xmm0
+; X86-SSE-NEXT: retl
+ %mod = and <2 x i64> %b, <i64 63, i64 63>
+ %splat = shufflevector <2 x i64> %mod, <2 x i64> undef, <2 x i32> zeroinitializer
+ %shift = ashr <2 x i64> %a, %splat
+ ret <2 x i64> %shift
+}
+
+define <4 x i32> @splatvar_modulo_shift_v4i32(<4 x i32> %a, <4 x i32> %b) nounwind {
+; SSE2-LABEL: splatvar_modulo_shift_v4i32:
+; SSE2: # %bb.0:
+; SSE2-NEXT: movd %xmm1, %eax
+; SSE2-NEXT: andl $31, %eax
+; SSE2-NEXT: movd %eax, %xmm1
+; SSE2-NEXT: psrad %xmm1, %xmm0
+; SSE2-NEXT: retq
+;
+; SSE41-LABEL: splatvar_modulo_shift_v4i32:
+; SSE41: # %bb.0:
+; SSE41-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
+; SSE41-NEXT: pmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
+; SSE41-NEXT: psrad %xmm1, %xmm0
+; SSE41-NEXT: retq
+;
+; AVX1-LABEL: splatvar_modulo_shift_v4i32:
+; AVX1: # %bb.0:
+; AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
+; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
+; AVX1-NEXT: vpsrad %xmm1, %xmm0, %xmm0
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: splatvar_modulo_shift_v4i32:
+; AVX2: # %bb.0:
+; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm2 = [31,31,31,31]
+; AVX2-NEXT: vpand %xmm2, %xmm1, %xmm1
+; AVX2-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
+; AVX2-NEXT: vpsrad %xmm1, %xmm0, %xmm0
+; AVX2-NEXT: retq
+;
+; XOPAVX1-LABEL: splatvar_modulo_shift_v4i32:
+; XOPAVX1: # %bb.0:
+; XOPAVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
+; XOPAVX1-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
+; XOPAVX1-NEXT: vpsrad %xmm1, %xmm0, %xmm0
+; XOPAVX1-NEXT: retq
+;
+; XOPAVX2-LABEL: splatvar_modulo_shift_v4i32:
+; XOPAVX2: # %bb.0:
+; XOPAVX2-NEXT: vpbroadcastd {{.*#+}} xmm2 = [31,31,31,31]
+; XOPAVX2-NEXT: vpand %xmm2, %xmm1, %xmm1
+; XOPAVX2-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
+; XOPAVX2-NEXT: vpsrad %xmm1, %xmm0, %xmm0
+; XOPAVX2-NEXT: retq
+;
+; AVX512-LABEL: splatvar_modulo_shift_v4i32:
+; AVX512: # %bb.0:
+; AVX512-NEXT: vpbroadcastd {{.*#+}} xmm2 = [31,31,31,31]
+; AVX512-NEXT: vpand %xmm2, %xmm1, %xmm1
+; AVX512-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
+; AVX512-NEXT: vpsrad %xmm1, %xmm0, %xmm0
+; AVX512-NEXT: retq
+;
+; AVX512VL-LABEL: splatvar_modulo_shift_v4i32:
+; AVX512VL: # %bb.0:
+; AVX512VL-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm1, %xmm1
+; AVX512VL-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
+; AVX512VL-NEXT: vpsrad %xmm1, %xmm0, %xmm0
+; AVX512VL-NEXT: retq
+;
+; X86-SSE-LABEL: splatvar_modulo_shift_v4i32:
+; X86-SSE: # %bb.0:
+; X86-SSE-NEXT: movd %xmm1, %eax
+; X86-SSE-NEXT: andl $31, %eax
+; X86-SSE-NEXT: movd %eax, %xmm1
+; X86-SSE-NEXT: psrad %xmm1, %xmm0
+; X86-SSE-NEXT: retl
+ %mod = and <4 x i32> %b, <i32 31, i32 31, i32 31, i32 31>
+ %splat = shufflevector <4 x i32> %mod, <4 x i32> undef, <4 x i32> zeroinitializer
+ %shift = ashr <4 x i32> %a, %splat
+ ret <4 x i32> %shift
+}
+
+define <8 x i16> @splatvar_modulo_shift_v8i16(<8 x i16> %a, <8 x i16> %b) nounwind {
+; SSE2-LABEL: splatvar_modulo_shift_v8i16:
+; SSE2: # %bb.0:
+; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
+; SSE2-NEXT: pslldq {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm1[0,1]
+; SSE2-NEXT: psrldq {{.*#+}} xmm1 = xmm1[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; SSE2-NEXT: psraw %xmm1, %xmm0
+; SSE2-NEXT: retq
+;
+; SSE41-LABEL: splatvar_modulo_shift_v8i16:
+; SSE41: # %bb.0:
+; SSE41-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
+; SSE41-NEXT: pmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
+; SSE41-NEXT: psraw %xmm1, %xmm0
+; SSE41-NEXT: retq
+;
+; AVX-LABEL: splatvar_modulo_shift_v8i16:
+; AVX: # %bb.0:
+; AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
+; AVX-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
+; AVX-NEXT: vpsraw %xmm1, %xmm0, %xmm0
+; AVX-NEXT: retq
+;
+; XOP-LABEL: splatvar_modulo_shift_v8i16:
+; XOP: # %bb.0:
+; XOP-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
+; XOP-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
+; XOP-NEXT: vpsraw %xmm1, %xmm0, %xmm0
+; XOP-NEXT: retq
+;
+; AVX512-LABEL: splatvar_modulo_shift_v8i16:
+; AVX512: # %bb.0:
+; AVX512-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
+; AVX512-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
+; AVX512-NEXT: vpsraw %xmm1, %xmm0, %xmm0
+; AVX512-NEXT: retq
+;
+; AVX512VL-LABEL: splatvar_modulo_shift_v8i16:
+; AVX512VL: # %bb.0:
+; AVX512VL-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
+; AVX512VL-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
+; AVX512VL-NEXT: vpsraw %xmm1, %xmm0, %xmm0
+; AVX512VL-NEXT: retq
+;
+; X86-SSE-LABEL: splatvar_modulo_shift_v8i16:
+; X86-SSE: # %bb.0:
+; X86-SSE-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1
+; X86-SSE-NEXT: pslldq {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm1[0,1]
+; X86-SSE-NEXT: psrldq {{.*#+}} xmm1 = xmm1[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; X86-SSE-NEXT: psraw %xmm1, %xmm0
+; X86-SSE-NEXT: retl
+ %mod = and <8 x i16> %b, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
+ %splat = shufflevector <8 x i16> %mod, <8 x i16> undef, <8 x i32> zeroinitializer
+ %shift = ashr <8 x i16> %a, %splat
+ ret <8 x i16> %shift
+}
+
+define <16 x i8> @splatvar_modulo_shift_v16i8(<16 x i8> %a, <16 x i8> %b) nounwind {
+; SSE2-LABEL: splatvar_modulo_shift_v16i8:
+; SSE2: # %bb.0:
+; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
+; SSE2-NEXT: pslldq {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm1[0]
+; SSE2-NEXT: psrldq {{.*#+}} xmm1 = xmm1[15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; SSE2-NEXT: psrlw %xmm1, %xmm0
+; SSE2-NEXT: pcmpeqd %xmm2, %xmm2
+; SSE2-NEXT: psrlw %xmm1, %xmm2
+; SSE2-NEXT: psrlw $8, %xmm2
+; SSE2-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
+; SSE2-NEXT: pshuflw {{.*#+}} xmm2 = xmm2[0,0,0,0,4,5,6,7]
+; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[0,0,0,0]
+; SSE2-NEXT: pand %xmm2, %xmm0
+; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [32896,32896,32896,32896,32896,32896,32896,32896]
+; SSE2-NEXT: psrlw %xmm1, %xmm2
+; SSE2-NEXT: pxor %xmm2, %xmm0
+; SSE2-NEXT: psubb %xmm2, %xmm0
+; SSE2-NEXT: retq
+;
+; SSE41-LABEL: splatvar_modulo_shift_v16i8:
+; SSE41: # %bb.0:
+; SSE41-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
+; SSE41-NEXT: pmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
+; SSE41-NEXT: psrlw %xmm1, %xmm0
+; SSE41-NEXT: pcmpeqd %xmm2, %xmm2
+; SSE41-NEXT: psrlw %xmm1, %xmm2
+; SSE41-NEXT: pshufb {{.*#+}} xmm2 = xmm2[1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1]
+; SSE41-NEXT: pand %xmm2, %xmm0
+; SSE41-NEXT: movdqa {{.*#+}} xmm2 = [32896,32896,32896,32896,32896,32896,32896,32896]
+; SSE41-NEXT: psrlw %xmm1, %xmm2
+; SSE41-NEXT: pxor %xmm2, %xmm0
+; SSE41-NEXT: psubb %xmm2, %xmm0
+; SSE41-NEXT: retq
+;
+; AVX1-LABEL: splatvar_modulo_shift_v16i8:
+; AVX1: # %bb.0:
+; AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
+; AVX1-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
+; AVX1-NEXT: vpsrlw %xmm1, %xmm0, %xmm0
+; AVX1-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2
+; AVX1-NEXT: vpsrlw %xmm1, %xmm2, %xmm2
+; AVX1-NEXT: vpshufb {{.*#+}} xmm2 = xmm2[1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1]
+; AVX1-NEXT: vpand %xmm2, %xmm0, %xmm0
+; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [32896,32896,32896,32896,32896,32896,32896,32896]
+; AVX1-NEXT: vpsrlw %xmm1, %xmm2, %xmm1
+; AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
+; AVX1-NEXT: vpsubb %xmm1, %xmm0, %xmm0
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: splatvar_modulo_shift_v16i8:
+; AVX2: # %bb.0:
+; AVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
+; AVX2-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
+; AVX2-NEXT: vpsrlw %xmm1, %xmm0, %xmm0
+; AVX2-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2
+; AVX2-NEXT: vpsrlw %xmm1, %xmm2, %xmm2
+; AVX2-NEXT: vpsrlw $8, %xmm2, %xmm2
+; AVX2-NEXT: vpbroadcastb %xmm2, %xmm2
+; AVX2-NEXT: vpand %xmm2, %xmm0, %xmm0
+; AVX2-NEXT: vmovdqa {{.*#+}} xmm2 = [32896,32896,32896,32896,32896,32896,32896,32896]
+; AVX2-NEXT: vpsrlw %xmm1, %xmm2, %xmm1
+; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
+; AVX2-NEXT: vpsubb %xmm1, %xmm0, %xmm0
+; AVX2-NEXT: retq
+;
+; XOPAVX1-LABEL: splatvar_modulo_shift_v16i8:
+; XOPAVX1: # %bb.0:
+; XOPAVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
+; XOPAVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; XOPAVX1-NEXT: vpshufb %xmm2, %xmm1, %xmm1
+; XOPAVX1-NEXT: vpsubb %xmm1, %xmm2, %xmm1
+; XOPAVX1-NEXT: vpshab %xmm1, %xmm0, %xmm0
+; XOPAVX1-NEXT: retq
+;
+; XOPAVX2-LABEL: splatvar_modulo_shift_v16i8:
+; XOPAVX2: # %bb.0:
+; XOPAVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
+; XOPAVX2-NEXT: vpbroadcastb %xmm1, %xmm1
+; XOPAVX2-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; XOPAVX2-NEXT: vpsubb %xmm1, %xmm2, %xmm1
+; XOPAVX2-NEXT: vpshab %xmm1, %xmm0, %xmm0
+; XOPAVX2-NEXT: retq
+;
+; AVX512DQ-LABEL: splatvar_modulo_shift_v16i8:
+; AVX512DQ: # %bb.0:
+; AVX512DQ-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
+; AVX512DQ-NEXT: vpmovsxbd %xmm0, %zmm0
+; AVX512DQ-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
+; AVX512DQ-NEXT: vpsrad %xmm1, %zmm0, %zmm0
+; AVX512DQ-NEXT: vpmovdb %zmm0, %xmm0
+; AVX512DQ-NEXT: vzeroupper
+; AVX512DQ-NEXT: retq
+;
+; AVX512BW-LABEL: splatvar_modulo_shift_v16i8:
+; AVX512BW: # %bb.0:
+; AVX512BW-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
+; AVX512BW-NEXT: vpmovsxbw %xmm0, %ymm0
+; AVX512BW-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
+; AVX512BW-NEXT: vpsraw %xmm1, %ymm0, %ymm0
+; AVX512BW-NEXT: vpmovwb %zmm0, %ymm0
+; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
+; AVX512BW-NEXT: vzeroupper
+; AVX512BW-NEXT: retq
+;
+; AVX512DQVL-LABEL: splatvar_modulo_shift_v16i8:
+; AVX512DQVL: # %bb.0:
+; AVX512DQVL-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
+; AVX512DQVL-NEXT: vpmovsxbd %xmm0, %zmm0
+; AVX512DQVL-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
+; AVX512DQVL-NEXT: vpsrad %xmm1, %zmm0, %zmm0
+; AVX512DQVL-NEXT: vpmovdb %zmm0, %xmm0
+; AVX512DQVL-NEXT: vzeroupper
+; AVX512DQVL-NEXT: retq
+;
+; AVX512BWVL-LABEL: splatvar_modulo_shift_v16i8:
+; AVX512BWVL: # %bb.0:
+; AVX512BWVL-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
+; AVX512BWVL-NEXT: vpmovsxbw %xmm0, %ymm0
+; AVX512BWVL-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
+; AVX512BWVL-NEXT: vpsraw %xmm1, %ymm0, %ymm0
+; AVX512BWVL-NEXT: vpmovwb %ymm0, %xmm0
+; AVX512BWVL-NEXT: vzeroupper
+; AVX512BWVL-NEXT: retq
+;
+; X86-SSE-LABEL: splatvar_modulo_shift_v16i8:
+; X86-SSE: # %bb.0:
+; X86-SSE-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1
+; X86-SSE-NEXT: pslldq {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm1[0]
+; X86-SSE-NEXT: psrldq {{.*#+}} xmm1 = xmm1[15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; X86-SSE-NEXT: psrlw %xmm1, %xmm0
+; X86-SSE-NEXT: pcmpeqd %xmm2, %xmm2
+; X86-SSE-NEXT: psrlw %xmm1, %xmm2
+; X86-SSE-NEXT: psrlw $8, %xmm2
+; X86-SSE-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
+; X86-SSE-NEXT: pshuflw {{.*#+}} xmm2 = xmm2[0,0,0,0,4,5,6,7]
+; X86-SSE-NEXT: pshufd {{.*#+}} xmm2 = xmm2[0,0,0,0]
+; X86-SSE-NEXT: pand %xmm2, %xmm0
+; X86-SSE-NEXT: movdqa {{.*#+}} xmm2 = [32896,32896,32896,32896,32896,32896,32896,32896]
+; X86-SSE-NEXT: psrlw %xmm1, %xmm2
+; X86-SSE-NEXT: pxor %xmm2, %xmm0
+; X86-SSE-NEXT: psubb %xmm2, %xmm0
+; X86-SSE-NEXT: retl
+ %mod = and <16 x i8> %b, <i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7>
+ %splat = shufflevector <16 x i8> %mod, <16 x i8> undef, <16 x i32> zeroinitializer
+ %shift = ashr <16 x i8> %a, %splat
+ ret <16 x i8> %shift
+}
+
;
; Constant Shifts
;
diff --git a/llvm/test/CodeGen/X86/vector-shift-ashr-256.ll b/llvm/test/CodeGen/X86/vector-shift-ashr-256.ll
index 94b3b3a75a4c..a9bcc73f97fe 100644
--- a/llvm/test/CodeGen/X86/vector-shift-ashr-256.ll
+++ b/llvm/test/CodeGen/X86/vector-shift-ashr-256.ll
@@ -1000,6 +1000,401 @@ define <32 x i8> @splatvar_shift_v32i8(<32 x i8> %a, <32 x i8> %b) nounwind {
ret <32 x i8> %shift
}
+;
+; Uniform Variable Modulo Shifts
+;
+
+define <4 x i64> @splatvar_modulo_shift_v4i64(<4 x i64> %a, <4 x i64> %b) nounwind {
+; AVX1-LABEL: splatvar_modulo_shift_v4i64:
+; AVX1: # %bb.0:
+; AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
+; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
+; AVX1-NEXT: vpsrlq %xmm1, %xmm2, %xmm2
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
+; AVX1-NEXT: vpsrlq %xmm1, %xmm3, %xmm3
+; AVX1-NEXT: vpxor %xmm2, %xmm3, %xmm3
+; AVX1-NEXT: vpsubq %xmm2, %xmm3, %xmm3
+; AVX1-NEXT: vpsrlq %xmm1, %xmm0, %xmm0
+; AVX1-NEXT: vpxor %xmm2, %xmm0, %xmm0
+; AVX1-NEXT: vpsubq %xmm2, %xmm0, %xmm0
+; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm0
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: splatvar_modulo_shift_v4i64:
+; AVX2: # %bb.0:
+; AVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
+; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm2 = [9223372036854775808,9223372036854775808,9223372036854775808,9223372036854775808]
+; AVX2-NEXT: vpsrlq %xmm1, %ymm2, %ymm2
+; AVX2-NEXT: vpsrlq %xmm1, %ymm0, %ymm0
+; AVX2-NEXT: vpxor %ymm2, %ymm0, %ymm0
+; AVX2-NEXT: vpsubq %ymm2, %ymm0, %ymm0
+; AVX2-NEXT: retq
+;
+; XOPAVX1-LABEL: splatvar_modulo_shift_v4i64:
+; XOPAVX1: # %bb.0:
+; XOPAVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
+; XOPAVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,1,0,1]
+; XOPAVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; XOPAVX1-NEXT: vpsubq %xmm1, %xmm2, %xmm1
+; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
+; XOPAVX1-NEXT: vpshaq %xmm1, %xmm2, %xmm2
+; XOPAVX1-NEXT: vpshaq %xmm1, %xmm0, %xmm0
+; XOPAVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
+; XOPAVX1-NEXT: retq
+;
+; XOPAVX2-LABEL: splatvar_modulo_shift_v4i64:
+; XOPAVX2: # %bb.0:
+; XOPAVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
+; XOPAVX2-NEXT: vpbroadcastq {{.*#+}} ymm2 = [9223372036854775808,9223372036854775808,9223372036854775808,9223372036854775808]
+; XOPAVX2-NEXT: vpsrlq %xmm1, %ymm2, %ymm2
+; XOPAVX2-NEXT: vpsrlq %xmm1, %ymm0, %ymm0
+; XOPAVX2-NEXT: vpxor %ymm2, %ymm0, %ymm0
+; XOPAVX2-NEXT: vpsubq %ymm2, %ymm0, %ymm0
+; XOPAVX2-NEXT: retq
+;
+; AVX512-LABEL: splatvar_modulo_shift_v4i64:
+; AVX512: # %bb.0:
+; AVX512-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
+; AVX512-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
+; AVX512-NEXT: vpsraq %xmm1, %zmm0, %zmm0
+; AVX512-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
+; AVX512-NEXT: retq
+;
+; AVX512VL-LABEL: splatvar_modulo_shift_v4i64:
+; AVX512VL: # %bb.0:
+; AVX512VL-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
+; AVX512VL-NEXT: vpsraq %xmm1, %ymm0, %ymm0
+; AVX512VL-NEXT: retq
+;
+; X86-AVX1-LABEL: splatvar_modulo_shift_v4i64:
+; X86-AVX1: # %bb.0:
+; X86-AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1, %xmm1
+; X86-AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [0,2147483648,0,2147483648]
+; X86-AVX1-NEXT: vpsrlq %xmm1, %xmm2, %xmm2
+; X86-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
+; X86-AVX1-NEXT: vpsrlq %xmm1, %xmm3, %xmm3
+; X86-AVX1-NEXT: vpxor %xmm2, %xmm3, %xmm3
+; X86-AVX1-NEXT: vpsubq %xmm2, %xmm3, %xmm3
+; X86-AVX1-NEXT: vpsrlq %xmm1, %xmm0, %xmm0
+; X86-AVX1-NEXT: vpxor %xmm2, %xmm0, %xmm0
+; X86-AVX1-NEXT: vpsubq %xmm2, %xmm0, %xmm0
+; X86-AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm0
+; X86-AVX1-NEXT: retl
+;
+; X86-AVX2-LABEL: splatvar_modulo_shift_v4i64:
+; X86-AVX2: # %bb.0:
+; X86-AVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1, %xmm1
+; X86-AVX2-NEXT: vmovdqa {{.*#+}} ymm2 = [0,2147483648,0,2147483648,0,2147483648,0,2147483648]
+; X86-AVX2-NEXT: vpsrlq %xmm1, %ymm2, %ymm2
+; X86-AVX2-NEXT: vpsrlq %xmm1, %ymm0, %ymm0
+; X86-AVX2-NEXT: vpxor %ymm2, %ymm0, %ymm0
+; X86-AVX2-NEXT: vpsubq %ymm2, %ymm0, %ymm0
+; X86-AVX2-NEXT: retl
+ %mod = and <4 x i64> %b, <i64 63, i64 63, i64 63, i64 63>
+ %splat = shufflevector <4 x i64> %mod, <4 x i64> undef, <4 x i32> zeroinitializer
+ %shift = ashr <4 x i64> %a, %splat
+ ret <4 x i64> %shift
+}
+
+define <8 x i32> @splatvar_modulo_shift_v8i32(<8 x i32> %a, <8 x i32> %b) nounwind {
+; AVX1-LABEL: splatvar_modulo_shift_v8i32:
+; AVX1: # %bb.0:
+; AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
+; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
+; AVX1-NEXT: vpsrad %xmm1, %xmm2, %xmm2
+; AVX1-NEXT: vpsrad %xmm1, %xmm0, %xmm0
+; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: splatvar_modulo_shift_v8i32:
+; AVX2: # %bb.0:
+; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm2 = [31,31,31,31]
+; AVX2-NEXT: vpand %xmm2, %xmm1, %xmm1
+; AVX2-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
+; AVX2-NEXT: vpsrad %xmm1, %ymm0, %ymm0
+; AVX2-NEXT: retq
+;
+; XOPAVX1-LABEL: splatvar_modulo_shift_v8i32:
+; XOPAVX1: # %bb.0:
+; XOPAVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
+; XOPAVX1-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
+; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
+; XOPAVX1-NEXT: vpsrad %xmm1, %xmm2, %xmm2
+; XOPAVX1-NEXT: vpsrad %xmm1, %xmm0, %xmm0
+; XOPAVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
+; XOPAVX1-NEXT: retq
+;
+; XOPAVX2-LABEL: splatvar_modulo_shift_v8i32:
+; XOPAVX2: # %bb.0:
+; XOPAVX2-NEXT: vpbroadcastd {{.*#+}} xmm2 = [31,31,31,31]
+; XOPAVX2-NEXT: vpand %xmm2, %xmm1, %xmm1
+; XOPAVX2-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
+; XOPAVX2-NEXT: vpsrad %xmm1, %ymm0, %ymm0
+; XOPAVX2-NEXT: retq
+;
+; AVX512-LABEL: splatvar_modulo_shift_v8i32:
+; AVX512: # %bb.0:
+; AVX512-NEXT: vpbroadcastd {{.*#+}} xmm2 = [31,31,31,31]
+; AVX512-NEXT: vpand %xmm2, %xmm1, %xmm1
+; AVX512-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
+; AVX512-NEXT: vpsrad %xmm1, %ymm0, %ymm0
+; AVX512-NEXT: retq
+;
+; AVX512VL-LABEL: splatvar_modulo_shift_v8i32:
+; AVX512VL: # %bb.0:
+; AVX512VL-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm1, %xmm1
+; AVX512VL-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
+; AVX512VL-NEXT: vpsrad %xmm1, %ymm0, %ymm0
+; AVX512VL-NEXT: retq
+;
+; X86-AVX1-LABEL: splatvar_modulo_shift_v8i32:
+; X86-AVX1: # %bb.0:
+; X86-AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1, %xmm1
+; X86-AVX1-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
+; X86-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
+; X86-AVX1-NEXT: vpsrad %xmm1, %xmm2, %xmm2
+; X86-AVX1-NEXT: vpsrad %xmm1, %xmm0, %xmm0
+; X86-AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
+; X86-AVX1-NEXT: retl
+;
+; X86-AVX2-LABEL: splatvar_modulo_shift_v8i32:
+; X86-AVX2: # %bb.0:
+; X86-AVX2-NEXT: vpbroadcastd {{.*#+}} xmm2 = [31,31,31,31]
+; X86-AVX2-NEXT: vpand %xmm2, %xmm1, %xmm1
+; X86-AVX2-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
+; X86-AVX2-NEXT: vpsrad %xmm1, %ymm0, %ymm0
+; X86-AVX2-NEXT: retl
+ %mod = and <8 x i32> %b, <i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31>
+ %splat = shufflevector <8 x i32> %mod, <8 x i32> undef, <8 x i32> zeroinitializer
+ %shift = ashr <8 x i32> %a, %splat
+ ret <8 x i32> %shift
+}
+
+define <16 x i16> @splatvar_modulo_shift_v16i16(<16 x i16> %a, <16 x i16> %b) nounwind {
+; AVX1-LABEL: splatvar_modulo_shift_v16i16:
+; AVX1: # %bb.0:
+; AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
+; AVX1-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
+; AVX1-NEXT: vpsraw %xmm1, %xmm2, %xmm2
+; AVX1-NEXT: vpsraw %xmm1, %xmm0, %xmm0
+; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: splatvar_modulo_shift_v16i16:
+; AVX2: # %bb.0:
+; AVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
+; AVX2-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
+; AVX2-NEXT: vpsraw %xmm1, %ymm0, %ymm0
+; AVX2-NEXT: retq
+;
+; XOPAVX1-LABEL: splatvar_modulo_shift_v16i16:
+; XOPAVX1: # %bb.0:
+; XOPAVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
+; XOPAVX1-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
+; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
+; XOPAVX1-NEXT: vpsraw %xmm1, %xmm2, %xmm2
+; XOPAVX1-NEXT: vpsraw %xmm1, %xmm0, %xmm0
+; XOPAVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
+; XOPAVX1-NEXT: retq
+;
+; XOPAVX2-LABEL: splatvar_modulo_shift_v16i16:
+; XOPAVX2: # %bb.0:
+; XOPAVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
+; XOPAVX2-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
+; XOPAVX2-NEXT: vpsraw %xmm1, %ymm0, %ymm0
+; XOPAVX2-NEXT: retq
+;
+; AVX512-LABEL: splatvar_modulo_shift_v16i16:
+; AVX512: # %bb.0:
+; AVX512-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
+; AVX512-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
+; AVX512-NEXT: vpsraw %xmm1, %ymm0, %ymm0
+; AVX512-NEXT: retq
+;
+; AVX512VL-LABEL: splatvar_modulo_shift_v16i16:
+; AVX512VL: # %bb.0:
+; AVX512VL-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
+; AVX512VL-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
+; AVX512VL-NEXT: vpsraw %xmm1, %ymm0, %ymm0
+; AVX512VL-NEXT: retq
+;
+; X86-AVX1-LABEL: splatvar_modulo_shift_v16i16:
+; X86-AVX1: # %bb.0:
+; X86-AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1, %xmm1
+; X86-AVX1-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
+; X86-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
+; X86-AVX1-NEXT: vpsraw %xmm1, %xmm2, %xmm2
+; X86-AVX1-NEXT: vpsraw %xmm1, %xmm0, %xmm0
+; X86-AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
+; X86-AVX1-NEXT: retl
+;
+; X86-AVX2-LABEL: splatvar_modulo_shift_v16i16:
+; X86-AVX2: # %bb.0:
+; X86-AVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1, %xmm1
+; X86-AVX2-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
+; X86-AVX2-NEXT: vpsraw %xmm1, %ymm0, %ymm0
+; X86-AVX2-NEXT: retl
+ %mod = and <16 x i16> %b, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
+ %splat = shufflevector <16 x i16> %mod, <16 x i16> undef, <16 x i32> zeroinitializer
+ %shift = ashr <16 x i16> %a, %splat
+ ret <16 x i16> %shift
+}
+
+define <32 x i8> @splatvar_modulo_shift_v32i8(<32 x i8> %a, <32 x i8> %b) nounwind {
+; AVX1-LABEL: splatvar_modulo_shift_v32i8:
+; AVX1: # %bb.0:
+; AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
+; AVX1-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
+; AVX1-NEXT: vpsrlw %xmm1, %xmm2, %xmm2
+; AVX1-NEXT: vpcmpeqd %xmm3, %xmm3, %xmm3
+; AVX1-NEXT: vpsrlw %xmm1, %xmm3, %xmm3
+; AVX1-NEXT: vpshufb {{.*#+}} xmm3 = xmm3[1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1]
+; AVX1-NEXT: vpand %xmm3, %xmm2, %xmm2
+; AVX1-NEXT: vmovdqa {{.*#+}} xmm4 = [32896,32896,32896,32896,32896,32896,32896,32896]
+; AVX1-NEXT: vpsrlw %xmm1, %xmm4, %xmm4
+; AVX1-NEXT: vpxor %xmm4, %xmm2, %xmm2
+; AVX1-NEXT: vpsubb %xmm4, %xmm2, %xmm2
+; AVX1-NEXT: vpsrlw %xmm1, %xmm0, %xmm0
+; AVX1-NEXT: vpand %xmm3, %xmm0, %xmm0
+; AVX1-NEXT: vpxor %xmm4, %xmm0, %xmm0
+; AVX1-NEXT: vpsubb %xmm4, %xmm0, %xmm0
+; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: splatvar_modulo_shift_v32i8:
+; AVX2: # %bb.0:
+; AVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
+; AVX2-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
+; AVX2-NEXT: vpsrlw %xmm1, %ymm0, %ymm0
+; AVX2-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2
+; AVX2-NEXT: vpsrlw %xmm1, %xmm2, %xmm2
+; AVX2-NEXT: vpsrlw $8, %xmm2, %xmm2
+; AVX2-NEXT: vpbroadcastb %xmm2, %ymm2
+; AVX2-NEXT: vpand %ymm2, %ymm0, %ymm0
+; AVX2-NEXT: vmovdqa {{.*#+}} ymm2 = [32896,32896,32896,32896,32896,32896,32896,32896,32896,32896,32896,32896,32896,32896,32896,32896]
+; AVX2-NEXT: vpsrlw %xmm1, %ymm2, %ymm1
+; AVX2-NEXT: vpxor %ymm1, %ymm0, %ymm0
+; AVX2-NEXT: vpsubb %ymm1, %ymm0, %ymm0
+; AVX2-NEXT: retq
+;
+; XOPAVX1-LABEL: splatvar_modulo_shift_v32i8:
+; XOPAVX1: # %bb.0:
+; XOPAVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
+; XOPAVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; XOPAVX1-NEXT: vpshufb %xmm2, %xmm1, %xmm1
+; XOPAVX1-NEXT: vpsubb %xmm1, %xmm2, %xmm1
+; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
+; XOPAVX1-NEXT: vpshab %xmm1, %xmm2, %xmm2
+; XOPAVX1-NEXT: vpshab %xmm1, %xmm0, %xmm0
+; XOPAVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
+; XOPAVX1-NEXT: retq
+;
+; XOPAVX2-LABEL: splatvar_modulo_shift_v32i8:
+; XOPAVX2: # %bb.0:
+; XOPAVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
+; XOPAVX2-NEXT: vpbroadcastb %xmm1, %xmm1
+; XOPAVX2-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; XOPAVX2-NEXT: vpsubb %xmm1, %xmm2, %xmm1
+; XOPAVX2-NEXT: vextracti128 $1, %ymm0, %xmm2
+; XOPAVX2-NEXT: vpshab %xmm1, %xmm2, %xmm2
+; XOPAVX2-NEXT: vpshab %xmm1, %xmm0, %xmm0
+; XOPAVX2-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm0
+; XOPAVX2-NEXT: retq
+;
+; AVX512DQ-LABEL: splatvar_modulo_shift_v32i8:
+; AVX512DQ: # %bb.0:
+; AVX512DQ-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
+; AVX512DQ-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
+; AVX512DQ-NEXT: vpsrlw %xmm1, %ymm0, %ymm0
+; AVX512DQ-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2
+; AVX512DQ-NEXT: vpsrlw %xmm1, %xmm2, %xmm2
+; AVX512DQ-NEXT: vpsrlw $8, %xmm2, %xmm2
+; AVX512DQ-NEXT: vpbroadcastb %xmm2, %ymm2
+; AVX512DQ-NEXT: vpand %ymm2, %ymm0, %ymm0
+; AVX512DQ-NEXT: vmovdqa {{.*#+}} ymm2 = [32896,32896,32896,32896,32896,32896,32896,32896,32896,32896,32896,32896,32896,32896,32896,32896]
+; AVX512DQ-NEXT: vpsrlw %xmm1, %ymm2, %ymm1
+; AVX512DQ-NEXT: vpxor %ymm1, %ymm0, %ymm0
+; AVX512DQ-NEXT: vpsubb %ymm1, %ymm0, %ymm0
+; AVX512DQ-NEXT: retq
+;
+; AVX512BW-LABEL: splatvar_modulo_shift_v32i8:
+; AVX512BW: # %bb.0:
+; AVX512BW-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
+; AVX512BW-NEXT: vpmovsxbw %ymm0, %zmm0
+; AVX512BW-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
+; AVX512BW-NEXT: vpsraw %xmm1, %zmm0, %zmm0
+; AVX512BW-NEXT: vpmovwb %zmm0, %ymm0
+; AVX512BW-NEXT: retq
+;
+; AVX512DQVL-LABEL: splatvar_modulo_shift_v32i8:
+; AVX512DQVL: # %bb.0:
+; AVX512DQVL-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
+; AVX512DQVL-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
+; AVX512DQVL-NEXT: vpsrlw %xmm1, %ymm0, %ymm0
+; AVX512DQVL-NEXT: vmovdqa {{.*#+}} ymm2 = [32896,32896,32896,32896,32896,32896,32896,32896,32896,32896,32896,32896,32896,32896,32896,32896]
+; AVX512DQVL-NEXT: vpsrlw %xmm1, %ymm2, %ymm2
+; AVX512DQVL-NEXT: vpcmpeqd %xmm3, %xmm3, %xmm3
+; AVX512DQVL-NEXT: vpsrlw %xmm1, %xmm3, %xmm1
+; AVX512DQVL-NEXT: vpsrlw $8, %xmm1, %xmm1
+; AVX512DQVL-NEXT: vpbroadcastb %xmm1, %ymm1
+; AVX512DQVL-NEXT: vpternlogq $108, %ymm0, %ymm2, %ymm1
+; AVX512DQVL-NEXT: vpsubb %ymm2, %ymm1, %ymm0
+; AVX512DQVL-NEXT: retq
+;
+; AVX512BWVL-LABEL: splatvar_modulo_shift_v32i8:
+; AVX512BWVL: # %bb.0:
+; AVX512BWVL-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
+; AVX512BWVL-NEXT: vpmovsxbw %ymm0, %zmm0
+; AVX512BWVL-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
+; AVX512BWVL-NEXT: vpsraw %xmm1, %zmm0, %zmm0
+; AVX512BWVL-NEXT: vpmovwb %zmm0, %ymm0
+; AVX512BWVL-NEXT: retq
+;
+; X86-AVX1-LABEL: splatvar_modulo_shift_v32i8:
+; X86-AVX1: # %bb.0:
+; X86-AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1, %xmm1
+; X86-AVX1-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
+; X86-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
+; X86-AVX1-NEXT: vpsrlw %xmm1, %xmm2, %xmm2
+; X86-AVX1-NEXT: vpcmpeqd %xmm3, %xmm3, %xmm3
+; X86-AVX1-NEXT: vpsrlw %xmm1, %xmm3, %xmm3
+; X86-AVX1-NEXT: vpshufb {{.*#+}} xmm3 = xmm3[1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1]
+; X86-AVX1-NEXT: vpand %xmm3, %xmm2, %xmm2
+; X86-AVX1-NEXT: vmovdqa {{.*#+}} xmm4 = [32896,32896,32896,32896,32896,32896,32896,32896]
+; X86-AVX1-NEXT: vpsrlw %xmm1, %xmm4, %xmm4
+; X86-AVX1-NEXT: vpxor %xmm4, %xmm2, %xmm2
+; X86-AVX1-NEXT: vpsubb %xmm4, %xmm2, %xmm2
+; X86-AVX1-NEXT: vpsrlw %xmm1, %xmm0, %xmm0
+; X86-AVX1-NEXT: vpand %xmm3, %xmm0, %xmm0
+; X86-AVX1-NEXT: vpxor %xmm4, %xmm0, %xmm0
+; X86-AVX1-NEXT: vpsubb %xmm4, %xmm0, %xmm0
+; X86-AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
+; X86-AVX1-NEXT: retl
+;
+; X86-AVX2-LABEL: splatvar_modulo_shift_v32i8:
+; X86-AVX2: # %bb.0:
+; X86-AVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1, %xmm1
+; X86-AVX2-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
+; X86-AVX2-NEXT: vpsrlw %xmm1, %ymm0, %ymm0
+; X86-AVX2-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2
+; X86-AVX2-NEXT: vpsrlw %xmm1, %xmm2, %xmm2
+; X86-AVX2-NEXT: vpsrlw $8, %xmm2, %xmm2
+; X86-AVX2-NEXT: vpbroadcastb %xmm2, %ymm2
+; X86-AVX2-NEXT: vpand %ymm2, %ymm0, %ymm0
+; X86-AVX2-NEXT: vmovdqa {{.*#+}} ymm2 = [32896,32896,32896,32896,32896,32896,32896,32896,32896,32896,32896,32896,32896,32896,32896,32896]
+; X86-AVX2-NEXT: vpsrlw %xmm1, %ymm2, %ymm1
+; X86-AVX2-NEXT: vpxor %ymm1, %ymm0, %ymm0
+; X86-AVX2-NEXT: vpsubb %ymm1, %ymm0, %ymm0
+; X86-AVX2-NEXT: retl
+ %mod = and <32 x i8> %b, <i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7>
+ %splat = shufflevector <32 x i8> %mod, <32 x i8> undef, <32 x i32> zeroinitializer
+ %shift = ashr <32 x i8> %a, %splat
+ ret <32 x i8> %shift
+}
+
;
; Constant Shifts
;
diff --git a/llvm/test/CodeGen/X86/vector-shift-ashr-512.ll b/llvm/test/CodeGen/X86/vector-shift-ashr-512.ll
index 6550968703ac..bc878940f883 100644
--- a/llvm/test/CodeGen/X86/vector-shift-ashr-512.ll
+++ b/llvm/test/CodeGen/X86/vector-shift-ashr-512.ll
@@ -226,6 +226,102 @@ define <64 x i8> @splatvar_shift_v64i8(<64 x i8> %a, <64 x i8> %b) nounwind {
ret <64 x i8> %shift
}
+;
+; Uniform Variable Modulo Shifts
+;
+
+define <8 x i64> @splatvar_modulo_shift_v8i64(<8 x i64> %a, <8 x i64> %b) nounwind {
+; ALL-LABEL: splatvar_modulo_shift_v8i64:
+; ALL: # %bb.0:
+; ALL-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
+; ALL-NEXT: vpsraq %xmm1, %zmm0, %zmm0
+; ALL-NEXT: retq
+ %mod = and <8 x i64> %b, <i64 63, i64 63, i64 63, i64 63, i64 63, i64 63, i64 63, i64 63>
+ %splat = shufflevector <8 x i64> %mod, <8 x i64> undef, <8 x i32> zeroinitializer
+ %shift = ashr <8 x i64> %a, %splat
+ ret <8 x i64> %shift
+}
+
+define <16 x i32> @splatvar_modulo_shift_v16i32(<16 x i32> %a, <16 x i32> %b) nounwind {
+; ALL-LABEL: splatvar_modulo_shift_v16i32:
+; ALL: # %bb.0:
+; ALL-NEXT: vpbroadcastd {{.*#+}} xmm2 = [31,31,31,31]
+; ALL-NEXT: vpand %xmm2, %xmm1, %xmm1
+; ALL-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
+; ALL-NEXT: vpsrad %xmm1, %zmm0, %zmm0
+; ALL-NEXT: retq
+ %mod = and <16 x i32> %b, <i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31>
+ %splat = shufflevector <16 x i32> %mod, <16 x i32> undef, <16 x i32> zeroinitializer
+ %shift = ashr <16 x i32> %a, %splat
+ ret <16 x i32> %shift
+}
+
+define <32 x i16> @splatvar_modulo_shift_v32i16(<32 x i16> %a, <32 x i16> %b) nounwind {
+; AVX512DQ-LABEL: splatvar_modulo_shift_v32i16:
+; AVX512DQ: # %bb.0:
+; AVX512DQ-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
+; AVX512DQ-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
+; AVX512DQ-NEXT: vextracti64x4 $1, %zmm0, %ymm2
+; AVX512DQ-NEXT: vpsraw %xmm1, %ymm2, %ymm2
+; AVX512DQ-NEXT: vpsraw %xmm1, %ymm0, %ymm0
+; AVX512DQ-NEXT: vinserti64x4 $1, %ymm2, %zmm0, %zmm0
+; AVX512DQ-NEXT: retq
+;
+; AVX512BW-LABEL: splatvar_modulo_shift_v32i16:
+; AVX512BW: # %bb.0:
+; AVX512BW-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
+; AVX512BW-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
+; AVX512BW-NEXT: vpsraw %xmm1, %zmm0, %zmm0
+; AVX512BW-NEXT: retq
+ %mod = and <32 x i16> %b, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
+ %splat = shufflevector <32 x i16> %mod, <32 x i16> undef, <32 x i32> zeroinitializer
+ %shift = ashr <32 x i16> %a, %splat
+ ret <32 x i16> %shift
+}
+
+define <64 x i8> @splatvar_modulo_shift_v64i8(<64 x i8> %a, <64 x i8> %b) nounwind {
+; AVX512DQ-LABEL: splatvar_modulo_shift_v64i8:
+; AVX512DQ: # %bb.0:
+; AVX512DQ-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
+; AVX512DQ-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
+; AVX512DQ-NEXT: vextracti64x4 $1, %zmm0, %ymm2
+; AVX512DQ-NEXT: vpsrlw %xmm1, %ymm2, %ymm2
+; AVX512DQ-NEXT: vpcmpeqd %xmm3, %xmm3, %xmm3
+; AVX512DQ-NEXT: vpsrlw %xmm1, %xmm3, %xmm3
+; AVX512DQ-NEXT: vpsrlw $8, %xmm3, %xmm3
+; AVX512DQ-NEXT: vpbroadcastb %xmm3, %ymm3
+; AVX512DQ-NEXT: vpand %ymm3, %ymm2, %ymm2
+; AVX512DQ-NEXT: vmovdqa {{.*#+}} ymm4 = [32896,32896,32896,32896,32896,32896,32896,32896,32896,32896,32896,32896,32896,32896,32896,32896]
+; AVX512DQ-NEXT: vpsrlw %xmm1, %ymm4, %ymm4
+; AVX512DQ-NEXT: vpxor %ymm4, %ymm2, %ymm2
+; AVX512DQ-NEXT: vpsubb %ymm4, %ymm2, %ymm2
+; AVX512DQ-NEXT: vpsrlw %xmm1, %ymm0, %ymm0
+; AVX512DQ-NEXT: vpand %ymm3, %ymm0, %ymm0
+; AVX512DQ-NEXT: vpxor %ymm4, %ymm0, %ymm0
+; AVX512DQ-NEXT: vpsubb %ymm4, %ymm0, %ymm0
+; AVX512DQ-NEXT: vinserti64x4 $1, %ymm2, %zmm0, %zmm0
+; AVX512DQ-NEXT: retq
+;
+; AVX512BW-LABEL: splatvar_modulo_shift_v64i8:
+; AVX512BW: # %bb.0:
+; AVX512BW-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
+; AVX512BW-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
+; AVX512BW-NEXT: vpsrlw %xmm1, %zmm0, %zmm0
+; AVX512BW-NEXT: vmovdqa64 {{.*#+}} zmm2 = [32896,32896,32896,32896,32896,32896,32896,32896,32896,32896,32896,32896,32896,32896,32896,32896,32896,32896,32896,32896,32896,32896,32896,32896,32896,32896,32896,32896,32896,32896,32896,32896]
+; AVX512BW-NEXT: vpsrlw %xmm1, %zmm2, %zmm2
+; AVX512BW-NEXT: vpcmpeqd %xmm3, %xmm3, %xmm3
+; AVX512BW-NEXT: vpsrlw %xmm1, %xmm3, %xmm1
+; AVX512BW-NEXT: vpsrlw $8, %xmm1, %xmm1
+; AVX512BW-NEXT: vpbroadcastb %xmm1, %zmm1
+; AVX512BW-NEXT: vpternlogq $108, %zmm0, %zmm2, %zmm1
+; AVX512BW-NEXT: vpsubb %zmm2, %zmm1, %zmm0
+; AVX512BW-NEXT: retq
+ %mod = and <64 x i8> %b, <i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7>
+ %splat = shufflevector <64 x i8> %mod, <64 x i8> undef, <64 x i32> zeroinitializer
+ %shift = ashr <64 x i8> %a, %splat
+ ret <64 x i8> %shift
+}
+
;
; Constant Shifts
;
diff --git a/llvm/test/CodeGen/X86/vector-shift-lshr-128.ll b/llvm/test/CodeGen/X86/vector-shift-lshr-128.ll
index 1d39c167c280..76b32990a529 100644
--- a/llvm/test/CodeGen/X86/vector-shift-lshr-128.ll
+++ b/llvm/test/CodeGen/X86/vector-shift-lshr-128.ll
@@ -766,6 +766,312 @@ define <16 x i8> @splatvar_shift_v16i8(<16 x i8> %a, <16 x i8> %b) nounwind {
ret <16 x i8> %shift
}
+;
+; Uniform Variable Modulo Shifts
+;
+
+define <2 x i64> @splatvar_modulo_shift_v2i64(<2 x i64> %a, <2 x i64> %b) nounwind {
+; SSE-LABEL: splatvar_modulo_shift_v2i64:
+; SSE: # %bb.0:
+; SSE-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
+; SSE-NEXT: psrlq %xmm1, %xmm0
+; SSE-NEXT: retq
+;
+; AVX-LABEL: splatvar_modulo_shift_v2i64:
+; AVX: # %bb.0:
+; AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
+; AVX-NEXT: vpsrlq %xmm1, %xmm0, %xmm0
+; AVX-NEXT: retq
+;
+; XOP-LABEL: splatvar_modulo_shift_v2i64:
+; XOP: # %bb.0:
+; XOP-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
+; XOP-NEXT: vpsrlq %xmm1, %xmm0, %xmm0
+; XOP-NEXT: retq
+;
+; AVX512-LABEL: splatvar_modulo_shift_v2i64:
+; AVX512: # %bb.0:
+; AVX512-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
+; AVX512-NEXT: vpsrlq %xmm1, %xmm0, %xmm0
+; AVX512-NEXT: retq
+;
+; AVX512VL-LABEL: splatvar_modulo_shift_v2i64:
+; AVX512VL: # %bb.0:
+; AVX512VL-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
+; AVX512VL-NEXT: vpsrlq %xmm1, %xmm0, %xmm0
+; AVX512VL-NEXT: retq
+;
+; X86-SSE-LABEL: splatvar_modulo_shift_v2i64:
+; X86-SSE: # %bb.0:
+; X86-SSE-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1
+; X86-SSE-NEXT: psrlq %xmm1, %xmm0
+; X86-SSE-NEXT: retl
+ %mod = and <2 x i64> %b, <i64 63, i64 63>
+ %splat = shufflevector <2 x i64> %mod, <2 x i64> undef, <2 x i32> zeroinitializer
+ %shift = lshr <2 x i64> %a, %splat
+ ret <2 x i64> %shift
+}
+
+define <4 x i32> @splatvar_modulo_shift_v4i32(<4 x i32> %a, <4 x i32> %b) nounwind {
+; SSE2-LABEL: splatvar_modulo_shift_v4i32:
+; SSE2: # %bb.0:
+; SSE2-NEXT: movd %xmm1, %eax
+; SSE2-NEXT: andl $31, %eax
+; SSE2-NEXT: movd %eax, %xmm1
+; SSE2-NEXT: psrld %xmm1, %xmm0
+; SSE2-NEXT: retq
+;
+; SSE41-LABEL: splatvar_modulo_shift_v4i32:
+; SSE41: # %bb.0:
+; SSE41-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
+; SSE41-NEXT: pmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
+; SSE41-NEXT: psrld %xmm1, %xmm0
+; SSE41-NEXT: retq
+;
+; AVX1-LABEL: splatvar_modulo_shift_v4i32:
+; AVX1: # %bb.0:
+; AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
+; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
+; AVX1-NEXT: vpsrld %xmm1, %xmm0, %xmm0
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: splatvar_modulo_shift_v4i32:
+; AVX2: # %bb.0:
+; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm2 = [31,31,31,31]
+; AVX2-NEXT: vpand %xmm2, %xmm1, %xmm1
+; AVX2-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
+; AVX2-NEXT: vpsrld %xmm1, %xmm0, %xmm0
+; AVX2-NEXT: retq
+;
+; XOPAVX1-LABEL: splatvar_modulo_shift_v4i32:
+; XOPAVX1: # %bb.0:
+; XOPAVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
+; XOPAVX1-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
+; XOPAVX1-NEXT: vpsrld %xmm1, %xmm0, %xmm0
+; XOPAVX1-NEXT: retq
+;
+; XOPAVX2-LABEL: splatvar_modulo_shift_v4i32:
+; XOPAVX2: # %bb.0:
+; XOPAVX2-NEXT: vpbroadcastd {{.*#+}} xmm2 = [31,31,31,31]
+; XOPAVX2-NEXT: vpand %xmm2, %xmm1, %xmm1
+; XOPAVX2-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
+; XOPAVX2-NEXT: vpsrld %xmm1, %xmm0, %xmm0
+; XOPAVX2-NEXT: retq
+;
+; AVX512-LABEL: splatvar_modulo_shift_v4i32:
+; AVX512: # %bb.0:
+; AVX512-NEXT: vpbroadcastd {{.*#+}} xmm2 = [31,31,31,31]
+; AVX512-NEXT: vpand %xmm2, %xmm1, %xmm1
+; AVX512-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
+; AVX512-NEXT: vpsrld %xmm1, %xmm0, %xmm0
+; AVX512-NEXT: retq
+;
+; AVX512VL-LABEL: splatvar_modulo_shift_v4i32:
+; AVX512VL: # %bb.0:
+; AVX512VL-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm1, %xmm1
+; AVX512VL-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
+; AVX512VL-NEXT: vpsrld %xmm1, %xmm0, %xmm0
+; AVX512VL-NEXT: retq
+;
+; X86-SSE-LABEL: splatvar_modulo_shift_v4i32:
+; X86-SSE: # %bb.0:
+; X86-SSE-NEXT: movd %xmm1, %eax
+; X86-SSE-NEXT: andl $31, %eax
+; X86-SSE-NEXT: movd %eax, %xmm1
+; X86-SSE-NEXT: psrld %xmm1, %xmm0
+; X86-SSE-NEXT: retl
+ %mod = and <4 x i32> %b, <i32 31, i32 31, i32 31, i32 31>
+ %splat = shufflevector <4 x i32> %mod, <4 x i32> undef, <4 x i32> zeroinitializer
+ %shift = lshr <4 x i32> %a, %splat
+ ret <4 x i32> %shift
+}
+
+define <8 x i16> @splatvar_modulo_shift_v8i16(<8 x i16> %a, <8 x i16> %b) nounwind {
+; SSE2-LABEL: splatvar_modulo_shift_v8i16:
+; SSE2: # %bb.0:
+; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
+; SSE2-NEXT: pslldq {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm1[0,1]
+; SSE2-NEXT: psrldq {{.*#+}} xmm1 = xmm1[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; SSE2-NEXT: psrlw %xmm1, %xmm0
+; SSE2-NEXT: retq
+;
+; SSE41-LABEL: splatvar_modulo_shift_v8i16:
+; SSE41: # %bb.0:
+; SSE41-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
+; SSE41-NEXT: pmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
+; SSE41-NEXT: psrlw %xmm1, %xmm0
+; SSE41-NEXT: retq
+;
+; AVX-LABEL: splatvar_modulo_shift_v8i16:
+; AVX: # %bb.0:
+; AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
+; AVX-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
+; AVX-NEXT: vpsrlw %xmm1, %xmm0, %xmm0
+; AVX-NEXT: retq
+;
+; XOP-LABEL: splatvar_modulo_shift_v8i16:
+; XOP: # %bb.0:
+; XOP-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
+; XOP-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
+; XOP-NEXT: vpsrlw %xmm1, %xmm0, %xmm0
+; XOP-NEXT: retq
+;
+; AVX512-LABEL: splatvar_modulo_shift_v8i16:
+; AVX512: # %bb.0:
+; AVX512-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
+; AVX512-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
+; AVX512-NEXT: vpsrlw %xmm1, %xmm0, %xmm0
+; AVX512-NEXT: retq
+;
+; AVX512VL-LABEL: splatvar_modulo_shift_v8i16:
+; AVX512VL: # %bb.0:
+; AVX512VL-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
+; AVX512VL-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
+; AVX512VL-NEXT: vpsrlw %xmm1, %xmm0, %xmm0
+; AVX512VL-NEXT: retq
+;
+; X86-SSE-LABEL: splatvar_modulo_shift_v8i16:
+; X86-SSE: # %bb.0:
+; X86-SSE-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1
+; X86-SSE-NEXT: pslldq {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm1[0,1]
+; X86-SSE-NEXT: psrldq {{.*#+}} xmm1 = xmm1[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; X86-SSE-NEXT: psrlw %xmm1, %xmm0
+; X86-SSE-NEXT: retl
+ %mod = and <8 x i16> %b, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
+ %splat = shufflevector <8 x i16> %mod, <8 x i16> undef, <8 x i32> zeroinitializer
+ %shift = lshr <8 x i16> %a, %splat
+ ret <8 x i16> %shift
+}
+
+define <16 x i8> @splatvar_modulo_shift_v16i8(<16 x i8> %a, <16 x i8> %b) nounwind {
+; SSE2-LABEL: splatvar_modulo_shift_v16i8:
+; SSE2: # %bb.0:
+; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
+; SSE2-NEXT: pslldq {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm1[0]
+; SSE2-NEXT: psrldq {{.*#+}} xmm1 = xmm1[15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; SSE2-NEXT: psrlw %xmm1, %xmm0
+; SSE2-NEXT: pcmpeqd %xmm2, %xmm2
+; SSE2-NEXT: psrlw %xmm1, %xmm2
+; SSE2-NEXT: psrlw $8, %xmm2
+; SSE2-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
+; SSE2-NEXT: pshuflw {{.*#+}} xmm1 = xmm2[0,0,0,0,4,5,6,7]
+; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,0,0,0]
+; SSE2-NEXT: pand %xmm1, %xmm0
+; SSE2-NEXT: retq
+;
+; SSE41-LABEL: splatvar_modulo_shift_v16i8:
+; SSE41: # %bb.0:
+; SSE41-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
+; SSE41-NEXT: pmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
+; SSE41-NEXT: psrlw %xmm1, %xmm0
+; SSE41-NEXT: pcmpeqd %xmm2, %xmm2
+; SSE41-NEXT: psrlw %xmm1, %xmm2
+; SSE41-NEXT: pshufb {{.*#+}} xmm2 = xmm2[1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1]
+; SSE41-NEXT: pand %xmm2, %xmm0
+; SSE41-NEXT: retq
+;
+; AVX1-LABEL: splatvar_modulo_shift_v16i8:
+; AVX1: # %bb.0:
+; AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
+; AVX1-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
+; AVX1-NEXT: vpsrlw %xmm1, %xmm0, %xmm0
+; AVX1-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2
+; AVX1-NEXT: vpsrlw %xmm1, %xmm2, %xmm1
+; AVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1]
+; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: splatvar_modulo_shift_v16i8:
+; AVX2: # %bb.0:
+; AVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
+; AVX2-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
+; AVX2-NEXT: vpsrlw %xmm1, %xmm0, %xmm0
+; AVX2-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2
+; AVX2-NEXT: vpsrlw %xmm1, %xmm2, %xmm1
+; AVX2-NEXT: vpsrlw $8, %xmm1, %xmm1
+; AVX2-NEXT: vpbroadcastb %xmm1, %xmm1
+; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
+; AVX2-NEXT: retq
+;
+; XOPAVX1-LABEL: splatvar_modulo_shift_v16i8:
+; XOPAVX1: # %bb.0:
+; XOPAVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
+; XOPAVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; XOPAVX1-NEXT: vpshufb %xmm2, %xmm1, %xmm1
+; XOPAVX1-NEXT: vpsubb %xmm1, %xmm2, %xmm1
+; XOPAVX1-NEXT: vpshlb %xmm1, %xmm0, %xmm0
+; XOPAVX1-NEXT: retq
+;
+; XOPAVX2-LABEL: splatvar_modulo_shift_v16i8:
+; XOPAVX2: # %bb.0:
+; XOPAVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
+; XOPAVX2-NEXT: vpbroadcastb %xmm1, %xmm1
+; XOPAVX2-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; XOPAVX2-NEXT: vpsubb %xmm1, %xmm2, %xmm1
+; XOPAVX2-NEXT: vpshlb %xmm1, %xmm0, %xmm0
+; XOPAVX2-NEXT: retq
+;
+; AVX512DQ-LABEL: splatvar_modulo_shift_v16i8:
+; AVX512DQ: # %bb.0:
+; AVX512DQ-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
+; AVX512DQ-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero
+; AVX512DQ-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
+; AVX512DQ-NEXT: vpsrld %xmm1, %zmm0, %zmm0
+; AVX512DQ-NEXT: vpmovdb %zmm0, %xmm0
+; AVX512DQ-NEXT: vzeroupper
+; AVX512DQ-NEXT: retq
+;
+; AVX512BW-LABEL: splatvar_modulo_shift_v16i8:
+; AVX512BW: # %bb.0:
+; AVX512BW-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
+; AVX512BW-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
+; AVX512BW-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
+; AVX512BW-NEXT: vpsrlw %xmm1, %ymm0, %ymm0
+; AVX512BW-NEXT: vpmovwb %zmm0, %ymm0
+; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
+; AVX512BW-NEXT: vzeroupper
+; AVX512BW-NEXT: retq
+;
+; AVX512DQVL-LABEL: splatvar_modulo_shift_v16i8:
+; AVX512DQVL: # %bb.0:
+; AVX512DQVL-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
+; AVX512DQVL-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero
+; AVX512DQVL-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
+; AVX512DQVL-NEXT: vpsrld %xmm1, %zmm0, %zmm0
+; AVX512DQVL-NEXT: vpmovdb %zmm0, %xmm0
+; AVX512DQVL-NEXT: vzeroupper
+; AVX512DQVL-NEXT: retq
+;
+; AVX512BWVL-LABEL: splatvar_modulo_shift_v16i8:
+; AVX512BWVL: # %bb.0:
+; AVX512BWVL-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
+; AVX512BWVL-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
+; AVX512BWVL-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
+; AVX512BWVL-NEXT: vpsrlw %xmm1, %ymm0, %ymm0
+; AVX512BWVL-NEXT: vpmovwb %ymm0, %xmm0
+; AVX512BWVL-NEXT: vzeroupper
+; AVX512BWVL-NEXT: retq
+;
+; X86-SSE-LABEL: splatvar_modulo_shift_v16i8:
+; X86-SSE: # %bb.0:
+; X86-SSE-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1
+; X86-SSE-NEXT: pslldq {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm1[0]
+; X86-SSE-NEXT: psrldq {{.*#+}} xmm1 = xmm1[15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; X86-SSE-NEXT: psrlw %xmm1, %xmm0
+; X86-SSE-NEXT: pcmpeqd %xmm2, %xmm2
+; X86-SSE-NEXT: psrlw %xmm1, %xmm2
+; X86-SSE-NEXT: psrlw $8, %xmm2
+; X86-SSE-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
+; X86-SSE-NEXT: pshuflw {{.*#+}} xmm1 = xmm2[0,0,0,0,4,5,6,7]
+; X86-SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,0,0,0]
+; X86-SSE-NEXT: pand %xmm1, %xmm0
+; X86-SSE-NEXT: retl
+ %mod = and <16 x i8> %b, <i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7>
+ %splat = shufflevector <16 x i8> %mod, <16 x i8> undef, <16 x i32> zeroinitializer
+ %shift = lshr <16 x i8> %a, %splat
+ ret <16 x i8> %shift
+}
+
;
; Constant Shifts
;
diff --git a/llvm/test/CodeGen/X86/vector-shift-lshr-256.ll b/llvm/test/CodeGen/X86/vector-shift-lshr-256.ll
index 163486ad4135..44256db3979d 100644
--- a/llvm/test/CodeGen/X86/vector-shift-lshr-256.ll
+++ b/llvm/test/CodeGen/X86/vector-shift-lshr-256.ll
@@ -816,6 +816,345 @@ define <32 x i8> @splatvar_shift_v32i8(<32 x i8> %a, <32 x i8> %b) nounwind {
ret <32 x i8> %shift
}
+;
+; Uniform Variable Modulo Shifts
+;
+
+define <4 x i64> @splatvar_modulo_shift_v4i64(<4 x i64> %a, <4 x i64> %b) nounwind {
+; AVX1-LABEL: splatvar_modulo_shift_v4i64:
+; AVX1: # %bb.0:
+; AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
+; AVX1-NEXT: vpsrlq %xmm1, %xmm2, %xmm2
+; AVX1-NEXT: vpsrlq %xmm1, %xmm0, %xmm0
+; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: splatvar_modulo_shift_v4i64:
+; AVX2: # %bb.0:
+; AVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
+; AVX2-NEXT: vpsrlq %xmm1, %ymm0, %ymm0
+; AVX2-NEXT: retq
+;
+; XOPAVX1-LABEL: splatvar_modulo_shift_v4i64:
+; XOPAVX1: # %bb.0:
+; XOPAVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
+; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
+; XOPAVX1-NEXT: vpsrlq %xmm1, %xmm2, %xmm2
+; XOPAVX1-NEXT: vpsrlq %xmm1, %xmm0, %xmm0
+; XOPAVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
+; XOPAVX1-NEXT: retq
+;
+; XOPAVX2-LABEL: splatvar_modulo_shift_v4i64:
+; XOPAVX2: # %bb.0:
+; XOPAVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
+; XOPAVX2-NEXT: vpsrlq %xmm1, %ymm0, %ymm0
+; XOPAVX2-NEXT: retq
+;
+; AVX512-LABEL: splatvar_modulo_shift_v4i64:
+; AVX512: # %bb.0:
+; AVX512-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
+; AVX512-NEXT: vpsrlq %xmm1, %ymm0, %ymm0
+; AVX512-NEXT: retq
+;
+; AVX512VL-LABEL: splatvar_modulo_shift_v4i64:
+; AVX512VL: # %bb.0:
+; AVX512VL-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
+; AVX512VL-NEXT: vpsrlq %xmm1, %ymm0, %ymm0
+; AVX512VL-NEXT: retq
+;
+; X86-AVX1-LABEL: splatvar_modulo_shift_v4i64:
+; X86-AVX1: # %bb.0:
+; X86-AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1, %xmm1
+; X86-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
+; X86-AVX1-NEXT: vpsrlq %xmm1, %xmm2, %xmm2
+; X86-AVX1-NEXT: vpsrlq %xmm1, %xmm0, %xmm0
+; X86-AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
+; X86-AVX1-NEXT: retl
+;
+; X86-AVX2-LABEL: splatvar_modulo_shift_v4i64:
+; X86-AVX2: # %bb.0:
+; X86-AVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1, %xmm1
+; X86-AVX2-NEXT: vpsrlq %xmm1, %ymm0, %ymm0
+; X86-AVX2-NEXT: retl
+ %mod = and <4 x i64> %b, <i64 63, i64 63, i64 63, i64 63>
+ %splat = shufflevector <4 x i64> %mod, <4 x i64> undef, <4 x i32> zeroinitializer
+ %shift = lshr <4 x i64> %a, %splat
+ ret <4 x i64> %shift
+}
+
+define <8 x i32> @splatvar_modulo_shift_v8i32(<8 x i32> %a, <8 x i32> %b) nounwind {
+; AVX1-LABEL: splatvar_modulo_shift_v8i32:
+; AVX1: # %bb.0:
+; AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
+; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
+; AVX1-NEXT: vpsrld %xmm1, %xmm2, %xmm2
+; AVX1-NEXT: vpsrld %xmm1, %xmm0, %xmm0
+; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: splatvar_modulo_shift_v8i32:
+; AVX2: # %bb.0:
+; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm2 = [31,31,31,31]
+; AVX2-NEXT: vpand %xmm2, %xmm1, %xmm1
+; AVX2-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
+; AVX2-NEXT: vpsrld %xmm1, %ymm0, %ymm0
+; AVX2-NEXT: retq
+;
+; XOPAVX1-LABEL: splatvar_modulo_shift_v8i32:
+; XOPAVX1: # %bb.0:
+; XOPAVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
+; XOPAVX1-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
+; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
+; XOPAVX1-NEXT: vpsrld %xmm1, %xmm2, %xmm2
+; XOPAVX1-NEXT: vpsrld %xmm1, %xmm0, %xmm0
+; XOPAVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
+; XOPAVX1-NEXT: retq
+;
+; XOPAVX2-LABEL: splatvar_modulo_shift_v8i32:
+; XOPAVX2: # %bb.0:
+; XOPAVX2-NEXT: vpbroadcastd {{.*#+}} xmm2 = [31,31,31,31]
+; XOPAVX2-NEXT: vpand %xmm2, %xmm1, %xmm1
+; XOPAVX2-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
+; XOPAVX2-NEXT: vpsrld %xmm1, %ymm0, %ymm0
+; XOPAVX2-NEXT: retq
+;
+; AVX512-LABEL: splatvar_modulo_shift_v8i32:
+; AVX512: # %bb.0:
+; AVX512-NEXT: vpbroadcastd {{.*#+}} xmm2 = [31,31,31,31]
+; AVX512-NEXT: vpand %xmm2, %xmm1, %xmm1
+; AVX512-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
+; AVX512-NEXT: vpsrld %xmm1, %ymm0, %ymm0
+; AVX512-NEXT: retq
+;
+; AVX512VL-LABEL: splatvar_modulo_shift_v8i32:
+; AVX512VL: # %bb.0:
+; AVX512VL-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm1, %xmm1
+; AVX512VL-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
+; AVX512VL-NEXT: vpsrld %xmm1, %ymm0, %ymm0
+; AVX512VL-NEXT: retq
+;
+; X86-AVX1-LABEL: splatvar_modulo_shift_v8i32:
+; X86-AVX1: # %bb.0:
+; X86-AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1, %xmm1
+; X86-AVX1-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
+; X86-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
+; X86-AVX1-NEXT: vpsrld %xmm1, %xmm2, %xmm2
+; X86-AVX1-NEXT: vpsrld %xmm1, %xmm0, %xmm0
+; X86-AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
+; X86-AVX1-NEXT: retl
+;
+; X86-AVX2-LABEL: splatvar_modulo_shift_v8i32:
+; X86-AVX2: # %bb.0:
+; X86-AVX2-NEXT: vpbroadcastd {{.*#+}} xmm2 = [31,31,31,31]
+; X86-AVX2-NEXT: vpand %xmm2, %xmm1, %xmm1
+; X86-AVX2-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
+; X86-AVX2-NEXT: vpsrld %xmm1, %ymm0, %ymm0
+; X86-AVX2-NEXT: retl
+ %mod = and <8 x i32> %b, <i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31>
+ %splat = shufflevector <8 x i32> %mod, <8 x i32> undef, <8 x i32> zeroinitializer
+ %shift = lshr <8 x i32> %a, %splat
+ ret <8 x i32> %shift
+}
+
+define <16 x i16> @splatvar_modulo_shift_v16i16(<16 x i16> %a, <16 x i16> %b) nounwind {
+; AVX1-LABEL: splatvar_modulo_shift_v16i16:
+; AVX1: # %bb.0:
+; AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
+; AVX1-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
+; AVX1-NEXT: vpsrlw %xmm1, %xmm2, %xmm2
+; AVX1-NEXT: vpsrlw %xmm1, %xmm0, %xmm0
+; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: splatvar_modulo_shift_v16i16:
+; AVX2: # %bb.0:
+; AVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
+; AVX2-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
+; AVX2-NEXT: vpsrlw %xmm1, %ymm0, %ymm0
+; AVX2-NEXT: retq
+;
+; XOPAVX1-LABEL: splatvar_modulo_shift_v16i16:
+; XOPAVX1: # %bb.0:
+; XOPAVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
+; XOPAVX1-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
+; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
+; XOPAVX1-NEXT: vpsrlw %xmm1, %xmm2, %xmm2
+; XOPAVX1-NEXT: vpsrlw %xmm1, %xmm0, %xmm0
+; XOPAVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
+; XOPAVX1-NEXT: retq
+;
+; XOPAVX2-LABEL: splatvar_modulo_shift_v16i16:
+; XOPAVX2: # %bb.0:
+; XOPAVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
+; XOPAVX2-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
+; XOPAVX2-NEXT: vpsrlw %xmm1, %ymm0, %ymm0
+; XOPAVX2-NEXT: retq
+;
+; AVX512-LABEL: splatvar_modulo_shift_v16i16:
+; AVX512: # %bb.0:
+; AVX512-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
+; AVX512-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
+; AVX512-NEXT: vpsrlw %xmm1, %ymm0, %ymm0
+; AVX512-NEXT: retq
+;
+; AVX512VL-LABEL: splatvar_modulo_shift_v16i16:
+; AVX512VL: # %bb.0:
+; AVX512VL-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
+; AVX512VL-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
+; AVX512VL-NEXT: vpsrlw %xmm1, %ymm0, %ymm0
+; AVX512VL-NEXT: retq
+;
+; X86-AVX1-LABEL: splatvar_modulo_shift_v16i16:
+; X86-AVX1: # %bb.0:
+; X86-AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1, %xmm1
+; X86-AVX1-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
+; X86-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
+; X86-AVX1-NEXT: vpsrlw %xmm1, %xmm2, %xmm2
+; X86-AVX1-NEXT: vpsrlw %xmm1, %xmm0, %xmm0
+; X86-AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
+; X86-AVX1-NEXT: retl
+;
+; X86-AVX2-LABEL: splatvar_modulo_shift_v16i16:
+; X86-AVX2: # %bb.0:
+; X86-AVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1, %xmm1
+; X86-AVX2-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
+; X86-AVX2-NEXT: vpsrlw %xmm1, %ymm0, %ymm0
+; X86-AVX2-NEXT: retl
+ %mod = and <16 x i16> %b, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
+ %splat = shufflevector <16 x i16> %mod, <16 x i16> undef, <16 x i32> zeroinitializer
+ %shift = lshr <16 x i16> %a, %splat
+ ret <16 x i16> %shift
+}
+
+define <32 x i8> @splatvar_modulo_shift_v32i8(<32 x i8> %a, <32 x i8> %b) nounwind {
+; AVX1-LABEL: splatvar_modulo_shift_v32i8:
+; AVX1: # %bb.0:
+; AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
+; AVX1-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
+; AVX1-NEXT: vpsrlw %xmm1, %xmm2, %xmm2
+; AVX1-NEXT: vpcmpeqd %xmm3, %xmm3, %xmm3
+; AVX1-NEXT: vpsrlw %xmm1, %xmm3, %xmm3
+; AVX1-NEXT: vpshufb {{.*#+}} xmm3 = xmm3[1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1]
+; AVX1-NEXT: vpand %xmm3, %xmm2, %xmm2
+; AVX1-NEXT: vpsrlw %xmm1, %xmm0, %xmm0
+; AVX1-NEXT: vpand %xmm3, %xmm0, %xmm0
+; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: splatvar_modulo_shift_v32i8:
+; AVX2: # %bb.0:
+; AVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
+; AVX2-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
+; AVX2-NEXT: vpsrlw %xmm1, %ymm0, %ymm0
+; AVX2-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2
+; AVX2-NEXT: vpsrlw %xmm1, %xmm2, %xmm1
+; AVX2-NEXT: vpsrlw $8, %xmm1, %xmm1
+; AVX2-NEXT: vpbroadcastb %xmm1, %ymm1
+; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0
+; AVX2-NEXT: retq
+;
+; XOPAVX1-LABEL: splatvar_modulo_shift_v32i8:
+; XOPAVX1: # %bb.0:
+; XOPAVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
+; XOPAVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; XOPAVX1-NEXT: vpshufb %xmm2, %xmm1, %xmm1
+; XOPAVX1-NEXT: vpsubb %xmm1, %xmm2, %xmm1
+; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
+; XOPAVX1-NEXT: vpshlb %xmm1, %xmm2, %xmm2
+; XOPAVX1-NEXT: vpshlb %xmm1, %xmm0, %xmm0
+; XOPAVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
+; XOPAVX1-NEXT: retq
+;
+; XOPAVX2-LABEL: splatvar_modulo_shift_v32i8:
+; XOPAVX2: # %bb.0:
+; XOPAVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
+; XOPAVX2-NEXT: vpbroadcastb %xmm1, %xmm1
+; XOPAVX2-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; XOPAVX2-NEXT: vpsubb %xmm1, %xmm2, %xmm1
+; XOPAVX2-NEXT: vextracti128 $1, %ymm0, %xmm2
+; XOPAVX2-NEXT: vpshlb %xmm1, %xmm2, %xmm2
+; XOPAVX2-NEXT: vpshlb %xmm1, %xmm0, %xmm0
+; XOPAVX2-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm0
+; XOPAVX2-NEXT: retq
+;
+; AVX512DQ-LABEL: splatvar_modulo_shift_v32i8:
+; AVX512DQ: # %bb.0:
+; AVX512DQ-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
+; AVX512DQ-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
+; AVX512DQ-NEXT: vpsrlw %xmm1, %ymm0, %ymm0
+; AVX512DQ-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2
+; AVX512DQ-NEXT: vpsrlw %xmm1, %xmm2, %xmm1
+; AVX512DQ-NEXT: vpsrlw $8, %xmm1, %xmm1
+; AVX512DQ-NEXT: vpbroadcastb %xmm1, %ymm1
+; AVX512DQ-NEXT: vpand %ymm1, %ymm0, %ymm0
+; AVX512DQ-NEXT: retq
+;
+; AVX512BW-LABEL: splatvar_modulo_shift_v32i8:
+; AVX512BW: # %bb.0:
+; AVX512BW-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
+; AVX512BW-NEXT: vpmovzxbw {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero,ymm0[16],zero,ymm0[17],zero,ymm0[18],zero,ymm0[19],zero,ymm0[20],zero,ymm0[21],zero,ymm0[22],zero,ymm0[23],zero,ymm0[24],zero,ymm0[25],zero,ymm0[26],zero,ymm0[27],zero,ymm0[28],zero,ymm0[29],zero,ymm0[30],zero,ymm0[31],zero
+; AVX512BW-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
+; AVX512BW-NEXT: vpsrlw %xmm1, %zmm0, %zmm0
+; AVX512BW-NEXT: vpmovwb %zmm0, %ymm0
+; AVX512BW-NEXT: retq
+;
+; AVX512DQVL-LABEL: splatvar_modulo_shift_v32i8:
+; AVX512DQVL: # %bb.0:
+; AVX512DQVL-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
+; AVX512DQVL-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
+; AVX512DQVL-NEXT: vpsrlw %xmm1, %ymm0, %ymm0
+; AVX512DQVL-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2
+; AVX512DQVL-NEXT: vpsrlw %xmm1, %xmm2, %xmm1
+; AVX512DQVL-NEXT: vpsrlw $8, %xmm1, %xmm1
+; AVX512DQVL-NEXT: vpbroadcastb %xmm1, %ymm1
+; AVX512DQVL-NEXT: vpand %ymm1, %ymm0, %ymm0
+; AVX512DQVL-NEXT: retq
+;
+; AVX512BWVL-LABEL: splatvar_modulo_shift_v32i8:
+; AVX512BWVL: # %bb.0:
+; AVX512BWVL-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
+; AVX512BWVL-NEXT: vpmovzxbw {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero,ymm0[16],zero,ymm0[17],zero,ymm0[18],zero,ymm0[19],zero,ymm0[20],zero,ymm0[21],zero,ymm0[22],zero,ymm0[23],zero,ymm0[24],zero,ymm0[25],zero,ymm0[26],zero,ymm0[27],zero,ymm0[28],zero,ymm0[29],zero,ymm0[30],zero,ymm0[31],zero
+; AVX512BWVL-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
+; AVX512BWVL-NEXT: vpsrlw %xmm1, %zmm0, %zmm0
+; AVX512BWVL-NEXT: vpmovwb %zmm0, %ymm0
+; AVX512BWVL-NEXT: retq
+;
+; X86-AVX1-LABEL: splatvar_modulo_shift_v32i8:
+; X86-AVX1: # %bb.0:
+; X86-AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1, %xmm1
+; X86-AVX1-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
+; X86-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
+; X86-AVX1-NEXT: vpsrlw %xmm1, %xmm2, %xmm2
+; X86-AVX1-NEXT: vpcmpeqd %xmm3, %xmm3, %xmm3
+; X86-AVX1-NEXT: vpsrlw %xmm1, %xmm3, %xmm3
+; X86-AVX1-NEXT: vpshufb {{.*#+}} xmm3 = xmm3[1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1]
+; X86-AVX1-NEXT: vpand %xmm3, %xmm2, %xmm2
+; X86-AVX1-NEXT: vpsrlw %xmm1, %xmm0, %xmm0
+; X86-AVX1-NEXT: vpand %xmm3, %xmm0, %xmm0
+; X86-AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
+; X86-AVX1-NEXT: retl
+;
+; X86-AVX2-LABEL: splatvar_modulo_shift_v32i8:
+; X86-AVX2: # %bb.0:
+; X86-AVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1, %xmm1
+; X86-AVX2-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
+; X86-AVX2-NEXT: vpsrlw %xmm1, %ymm0, %ymm0
+; X86-AVX2-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2
+; X86-AVX2-NEXT: vpsrlw %xmm1, %xmm2, %xmm1
+; X86-AVX2-NEXT: vpsrlw $8, %xmm1, %xmm1
+; X86-AVX2-NEXT: vpbroadcastb %xmm1, %ymm1
+; X86-AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0
+; X86-AVX2-NEXT: retl
+ %mod = and <32 x i8> %b, <i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7>
+ %splat = shufflevector <32 x i8> %mod, <32 x i8> undef, <32 x i32> zeroinitializer
+ %shift = lshr <32 x i8> %a, %splat
+ ret <32 x i8> %shift
+}
+
;
; Constant Shifts
;
diff --git a/llvm/test/CodeGen/X86/vector-shift-lshr-512.ll b/llvm/test/CodeGen/X86/vector-shift-lshr-512.ll
index 1f5a20ddedb5..ee2a38089ed4 100644
--- a/llvm/test/CodeGen/X86/vector-shift-lshr-512.ll
+++ b/llvm/test/CodeGen/X86/vector-shift-lshr-512.ll
@@ -181,6 +181,93 @@ define <64 x i8> @splatvar_shift_v64i8(<64 x i8> %a, <64 x i8> %b) nounwind {
ret <64 x i8> %shift
}
+;
+; Uniform Variable Modulo Shifts
+;
+
+define <8 x i64> @splatvar_modulo_shift_v8i64(<8 x i64> %a, <8 x i64> %b) nounwind {
+; ALL-LABEL: splatvar_modulo_shift_v8i64:
+; ALL: # %bb.0:
+; ALL-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
+; ALL-NEXT: vpsrlq %xmm1, %zmm0, %zmm0
+; ALL-NEXT: retq
+ %mod = and <8 x i64> %b, <i64 63, i64 63, i64 63, i64 63, i64 63, i64 63, i64 63, i64 63>
+ %splat = shufflevector <8 x i64> %mod, <8 x i64> undef, <8 x i32> zeroinitializer
+ %shift = lshr <8 x i64> %a, %splat
+ ret <8 x i64> %shift
+}
+
+define <16 x i32> @splatvar_modulo_shift_v16i32(<16 x i32> %a, <16 x i32> %b) nounwind {
+; ALL-LABEL: splatvar_modulo_shift_v16i32:
+; ALL: # %bb.0:
+; ALL-NEXT: vpbroadcastd {{.*#+}} xmm2 = [31,31,31,31]
+; ALL-NEXT: vpand %xmm2, %xmm1, %xmm1
+; ALL-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
+; ALL-NEXT: vpsrld %xmm1, %zmm0, %zmm0
+; ALL-NEXT: retq
+ %mod = and <16 x i32> %b, <i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31>
+ %splat = shufflevector <16 x i32> %mod, <16 x i32> undef, <16 x i32> zeroinitializer
+ %shift = lshr <16 x i32> %a, %splat
+ ret <16 x i32> %shift
+}
+
+define <32 x i16> @splatvar_modulo_shift_v32i16(<32 x i16> %a, <32 x i16> %b) nounwind {
+; AVX512DQ-LABEL: splatvar_modulo_shift_v32i16:
+; AVX512DQ: # %bb.0:
+; AVX512DQ-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
+; AVX512DQ-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
+; AVX512DQ-NEXT: vextracti64x4 $1, %zmm0, %ymm2
+; AVX512DQ-NEXT: vpsrlw %xmm1, %ymm2, %ymm2
+; AVX512DQ-NEXT: vpsrlw %xmm1, %ymm0, %ymm0
+; AVX512DQ-NEXT: vinserti64x4 $1, %ymm2, %zmm0, %zmm0
+; AVX512DQ-NEXT: retq
+;
+; AVX512BW-LABEL: splatvar_modulo_shift_v32i16:
+; AVX512BW: # %bb.0:
+; AVX512BW-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
+; AVX512BW-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
+; AVX512BW-NEXT: vpsrlw %xmm1, %zmm0, %zmm0
+; AVX512BW-NEXT: retq
+ %mod = and <32 x i16> %b, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
+ %splat = shufflevector <32 x i16> %mod, <32 x i16> undef, <32 x i32> zeroinitializer
+ %shift = lshr <32 x i16> %a, %splat
+ ret <32 x i16> %shift
+}
+
+define <64 x i8> @splatvar_modulo_shift_v64i8(<64 x i8> %a, <64 x i8> %b) nounwind {
+; AVX512DQ-LABEL: splatvar_modulo_shift_v64i8:
+; AVX512DQ: # %bb.0:
+; AVX512DQ-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
+; AVX512DQ-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
+; AVX512DQ-NEXT: vextracti64x4 $1, %zmm0, %ymm2
+; AVX512DQ-NEXT: vpsrlw %xmm1, %ymm2, %ymm2
+; AVX512DQ-NEXT: vpsrlw %xmm1, %ymm0, %ymm0
+; AVX512DQ-NEXT: vinserti64x4 $1, %ymm2, %zmm0, %zmm0
+; AVX512DQ-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2
+; AVX512DQ-NEXT: vpsrlw %xmm1, %xmm2, %xmm1
+; AVX512DQ-NEXT: vpsrlw $8, %xmm1, %xmm1
+; AVX512DQ-NEXT: vpbroadcastb %xmm1, %ymm1
+; AVX512DQ-NEXT: vinserti64x4 $1, %ymm1, %zmm1, %zmm1
+; AVX512DQ-NEXT: vpandq %zmm1, %zmm0, %zmm0
+; AVX512DQ-NEXT: retq
+;
+; AVX512BW-LABEL: splatvar_modulo_shift_v64i8:
+; AVX512BW: # %bb.0:
+; AVX512BW-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
+; AVX512BW-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
+; AVX512BW-NEXT: vpsrlw %xmm1, %zmm0, %zmm0
+; AVX512BW-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2
+; AVX512BW-NEXT: vpsrlw %xmm1, %xmm2, %xmm1
+; AVX512BW-NEXT: vpsrlw $8, %xmm1, %xmm1
+; AVX512BW-NEXT: vpbroadcastb %xmm1, %zmm1
+; AVX512BW-NEXT: vpandq %zmm1, %zmm0, %zmm0
+; AVX512BW-NEXT: retq
+ %mod = and <64 x i8> %b, <i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7>
+ %splat = shufflevector <64 x i8> %mod, <64 x i8> undef, <64 x i32> zeroinitializer
+ %shift = lshr <64 x i8> %a, %splat
+ ret <64 x i8> %shift
+}
+
;
; Constant Shifts
;
diff --git a/llvm/test/CodeGen/X86/vector-shift-shl-128.ll b/llvm/test/CodeGen/X86/vector-shift-shl-128.ll
index 7626f2454a5f..c10c22b472d1 100644
--- a/llvm/test/CodeGen/X86/vector-shift-shl-128.ll
+++ b/llvm/test/CodeGen/X86/vector-shift-shl-128.ll
@@ -671,6 +671,308 @@ define <16 x i8> @splatvar_shift_v16i8(<16 x i8> %a, <16 x i8> %b) nounwind {
ret <16 x i8> %shift
}
+;
+; Uniform Variable Modulo Shifts
+;
+
+define <2 x i64> @splatvar_modulo_shift_v2i64(<2 x i64> %a, <2 x i64> %b) nounwind {
+; SSE-LABEL: splatvar_modulo_shift_v2i64:
+; SSE: # %bb.0:
+; SSE-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
+; SSE-NEXT: psllq %xmm1, %xmm0
+; SSE-NEXT: retq
+;
+; AVX-LABEL: splatvar_modulo_shift_v2i64:
+; AVX: # %bb.0:
+; AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
+; AVX-NEXT: vpsllq %xmm1, %xmm0, %xmm0
+; AVX-NEXT: retq
+;
+; XOP-LABEL: splatvar_modulo_shift_v2i64:
+; XOP: # %bb.0:
+; XOP-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
+; XOP-NEXT: vpsllq %xmm1, %xmm0, %xmm0
+; XOP-NEXT: retq
+;
+; AVX512-LABEL: splatvar_modulo_shift_v2i64:
+; AVX512: # %bb.0:
+; AVX512-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
+; AVX512-NEXT: vpsllq %xmm1, %xmm0, %xmm0
+; AVX512-NEXT: retq
+;
+; AVX512VL-LABEL: splatvar_modulo_shift_v2i64:
+; AVX512VL: # %bb.0:
+; AVX512VL-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
+; AVX512VL-NEXT: vpsllq %xmm1, %xmm0, %xmm0
+; AVX512VL-NEXT: retq
+;
+; X86-SSE-LABEL: splatvar_modulo_shift_v2i64:
+; X86-SSE: # %bb.0:
+; X86-SSE-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1
+; X86-SSE-NEXT: psllq %xmm1, %xmm0
+; X86-SSE-NEXT: retl
+ %mod = and <2 x i64> %b, <i64 63, i64 63>
+ %splat = shufflevector <2 x i64> %mod, <2 x i64> undef, <2 x i32> zeroinitializer
+ %shift = shl <2 x i64> %a, %splat
+ ret <2 x i64> %shift
+}
+
+define <4 x i32> @splatvar_modulo_shift_v4i32(<4 x i32> %a, <4 x i32> %b) nounwind {
+; SSE2-LABEL: splatvar_modulo_shift_v4i32:
+; SSE2: # %bb.0:
+; SSE2-NEXT: movd %xmm1, %eax
+; SSE2-NEXT: andl $31, %eax
+; SSE2-NEXT: movd %eax, %xmm1
+; SSE2-NEXT: pslld %xmm1, %xmm0
+; SSE2-NEXT: retq
+;
+; SSE41-LABEL: splatvar_modulo_shift_v4i32:
+; SSE41: # %bb.0:
+; SSE41-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
+; SSE41-NEXT: pmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
+; SSE41-NEXT: pslld %xmm1, %xmm0
+; SSE41-NEXT: retq
+;
+; AVX1-LABEL: splatvar_modulo_shift_v4i32:
+; AVX1: # %bb.0:
+; AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
+; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
+; AVX1-NEXT: vpslld %xmm1, %xmm0, %xmm0
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: splatvar_modulo_shift_v4i32:
+; AVX2: # %bb.0:
+; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm2 = [31,31,31,31]
+; AVX2-NEXT: vpand %xmm2, %xmm1, %xmm1
+; AVX2-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
+; AVX2-NEXT: vpslld %xmm1, %xmm0, %xmm0
+; AVX2-NEXT: retq
+;
+; XOPAVX1-LABEL: splatvar_modulo_shift_v4i32:
+; XOPAVX1: # %bb.0:
+; XOPAVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
+; XOPAVX1-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
+; XOPAVX1-NEXT: vpslld %xmm1, %xmm0, %xmm0
+; XOPAVX1-NEXT: retq
+;
+; XOPAVX2-LABEL: splatvar_modulo_shift_v4i32:
+; XOPAVX2: # %bb.0:
+; XOPAVX2-NEXT: vpbroadcastd {{.*#+}} xmm2 = [31,31,31,31]
+; XOPAVX2-NEXT: vpand %xmm2, %xmm1, %xmm1
+; XOPAVX2-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
+; XOPAVX2-NEXT: vpslld %xmm1, %xmm0, %xmm0
+; XOPAVX2-NEXT: retq
+;
+; AVX512-LABEL: splatvar_modulo_shift_v4i32:
+; AVX512: # %bb.0:
+; AVX512-NEXT: vpbroadcastd {{.*#+}} xmm2 = [31,31,31,31]
+; AVX512-NEXT: vpand %xmm2, %xmm1, %xmm1
+; AVX512-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
+; AVX512-NEXT: vpslld %xmm1, %xmm0, %xmm0
+; AVX512-NEXT: retq
+;
+; AVX512VL-LABEL: splatvar_modulo_shift_v4i32:
+; AVX512VL: # %bb.0:
+; AVX512VL-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm1, %xmm1
+; AVX512VL-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
+; AVX512VL-NEXT: vpslld %xmm1, %xmm0, %xmm0
+; AVX512VL-NEXT: retq
+;
+; X86-SSE-LABEL: splatvar_modulo_shift_v4i32:
+; X86-SSE: # %bb.0:
+; X86-SSE-NEXT: movd %xmm1, %eax
+; X86-SSE-NEXT: andl $31, %eax
+; X86-SSE-NEXT: movd %eax, %xmm1
+; X86-SSE-NEXT: pslld %xmm1, %xmm0
+; X86-SSE-NEXT: retl
+ %mod = and <4 x i32> %b, <i32 31, i32 31, i32 31, i32 31>
+ %splat = shufflevector <4 x i32> %mod, <4 x i32> undef, <4 x i32> zeroinitializer
+ %shift = shl <4 x i32> %a, %splat
+ ret <4 x i32> %shift
+}
+
+define <8 x i16> @splatvar_modulo_shift_v8i16(<8 x i16> %a, <8 x i16> %b) nounwind {
+; SSE2-LABEL: splatvar_modulo_shift_v8i16:
+; SSE2: # %bb.0:
+; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
+; SSE2-NEXT: pslldq {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm1[0,1]
+; SSE2-NEXT: psrldq {{.*#+}} xmm1 = xmm1[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; SSE2-NEXT: psllw %xmm1, %xmm0
+; SSE2-NEXT: retq
+;
+; SSE41-LABEL: splatvar_modulo_shift_v8i16:
+; SSE41: # %bb.0:
+; SSE41-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
+; SSE41-NEXT: pmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
+; SSE41-NEXT: psllw %xmm1, %xmm0
+; SSE41-NEXT: retq
+;
+; AVX-LABEL: splatvar_modulo_shift_v8i16:
+; AVX: # %bb.0:
+; AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
+; AVX-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
+; AVX-NEXT: vpsllw %xmm1, %xmm0, %xmm0
+; AVX-NEXT: retq
+;
+; XOP-LABEL: splatvar_modulo_shift_v8i16:
+; XOP: # %bb.0:
+; XOP-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
+; XOP-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
+; XOP-NEXT: vpsllw %xmm1, %xmm0, %xmm0
+; XOP-NEXT: retq
+;
+; AVX512-LABEL: splatvar_modulo_shift_v8i16:
+; AVX512: # %bb.0:
+; AVX512-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
+; AVX512-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
+; AVX512-NEXT: vpsllw %xmm1, %xmm0, %xmm0
+; AVX512-NEXT: retq
+;
+; AVX512VL-LABEL: splatvar_modulo_shift_v8i16:
+; AVX512VL: # %bb.0:
+; AVX512VL-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
+; AVX512VL-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
+; AVX512VL-NEXT: vpsllw %xmm1, %xmm0, %xmm0
+; AVX512VL-NEXT: retq
+;
+; X86-SSE-LABEL: splatvar_modulo_shift_v8i16:
+; X86-SSE: # %bb.0:
+; X86-SSE-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1
+; X86-SSE-NEXT: pslldq {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm1[0,1]
+; X86-SSE-NEXT: psrldq {{.*#+}} xmm1 = xmm1[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; X86-SSE-NEXT: psllw %xmm1, %xmm0
+; X86-SSE-NEXT: retl
+ %mod = and <8 x i16> %b, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
+ %splat = shufflevector <8 x i16> %mod, <8 x i16> undef, <8 x i32> zeroinitializer
+ %shift = shl <8 x i16> %a, %splat
+ ret <8 x i16> %shift
+}
+
+define <16 x i8> @splatvar_modulo_shift_v16i8(<16 x i8> %a, <16 x i8> %b) nounwind {
+; SSE2-LABEL: splatvar_modulo_shift_v16i8:
+; SSE2: # %bb.0:
+; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
+; SSE2-NEXT: pslldq {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm1[0]
+; SSE2-NEXT: psrldq {{.*#+}} xmm1 = xmm1[15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; SSE2-NEXT: psllw %xmm1, %xmm0
+; SSE2-NEXT: pcmpeqd %xmm2, %xmm2
+; SSE2-NEXT: psllw %xmm1, %xmm2
+; SSE2-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
+; SSE2-NEXT: pshuflw {{.*#+}} xmm1 = xmm2[0,0,0,0,4,5,6,7]
+; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,0,0,0]
+; SSE2-NEXT: pand %xmm1, %xmm0
+; SSE2-NEXT: retq
+;
+; SSE41-LABEL: splatvar_modulo_shift_v16i8:
+; SSE41: # %bb.0:
+; SSE41-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
+; SSE41-NEXT: pmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
+; SSE41-NEXT: psllw %xmm1, %xmm0
+; SSE41-NEXT: pcmpeqd %xmm2, %xmm2
+; SSE41-NEXT: psllw %xmm1, %xmm2
+; SSE41-NEXT: pxor %xmm1, %xmm1
+; SSE41-NEXT: pshufb %xmm1, %xmm2
+; SSE41-NEXT: pand %xmm2, %xmm0
+; SSE41-NEXT: retq
+;
+; AVX1-LABEL: splatvar_modulo_shift_v16i8:
+; AVX1: # %bb.0:
+; AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
+; AVX1-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
+; AVX1-NEXT: vpsllw %xmm1, %xmm0, %xmm0
+; AVX1-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2
+; AVX1-NEXT: vpsllw %xmm1, %xmm2, %xmm1
+; AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; AVX1-NEXT: vpshufb %xmm2, %xmm1, %xmm1
+; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: splatvar_modulo_shift_v16i8:
+; AVX2: # %bb.0:
+; AVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
+; AVX2-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
+; AVX2-NEXT: vpsllw %xmm1, %xmm0, %xmm0
+; AVX2-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2
+; AVX2-NEXT: vpsllw %xmm1, %xmm2, %xmm1
+; AVX2-NEXT: vpbroadcastb %xmm1, %xmm1
+; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
+; AVX2-NEXT: retq
+;
+; XOPAVX1-LABEL: splatvar_modulo_shift_v16i8:
+; XOPAVX1: # %bb.0:
+; XOPAVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
+; XOPAVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; XOPAVX1-NEXT: vpshufb %xmm2, %xmm1, %xmm1
+; XOPAVX1-NEXT: vpshlb %xmm1, %xmm0, %xmm0
+; XOPAVX1-NEXT: retq
+;
+; XOPAVX2-LABEL: splatvar_modulo_shift_v16i8:
+; XOPAVX2: # %bb.0:
+; XOPAVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
+; XOPAVX2-NEXT: vpbroadcastb %xmm1, %xmm1
+; XOPAVX2-NEXT: vpshlb %xmm1, %xmm0, %xmm0
+; XOPAVX2-NEXT: retq
+;
+; AVX512DQ-LABEL: splatvar_modulo_shift_v16i8:
+; AVX512DQ: # %bb.0:
+; AVX512DQ-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
+; AVX512DQ-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero
+; AVX512DQ-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
+; AVX512DQ-NEXT: vpslld %xmm1, %zmm0, %zmm0
+; AVX512DQ-NEXT: vpmovdb %zmm0, %xmm0
+; AVX512DQ-NEXT: vzeroupper
+; AVX512DQ-NEXT: retq
+;
+; AVX512BW-LABEL: splatvar_modulo_shift_v16i8:
+; AVX512BW: # %bb.0:
+; AVX512BW-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
+; AVX512BW-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
+; AVX512BW-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
+; AVX512BW-NEXT: vpsllw %xmm1, %ymm0, %ymm0
+; AVX512BW-NEXT: vpmovwb %zmm0, %ymm0
+; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
+; AVX512BW-NEXT: vzeroupper
+; AVX512BW-NEXT: retq
+;
+; AVX512DQVL-LABEL: splatvar_modulo_shift_v16i8:
+; AVX512DQVL: # %bb.0:
+; AVX512DQVL-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
+; AVX512DQVL-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero
+; AVX512DQVL-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
+; AVX512DQVL-NEXT: vpslld %xmm1, %zmm0, %zmm0
+; AVX512DQVL-NEXT: vpmovdb %zmm0, %xmm0
+; AVX512DQVL-NEXT: vzeroupper
+; AVX512DQVL-NEXT: retq
+;
+; AVX512BWVL-LABEL: splatvar_modulo_shift_v16i8:
+; AVX512BWVL: # %bb.0:
+; AVX512BWVL-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
+; AVX512BWVL-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
+; AVX512BWVL-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
+; AVX512BWVL-NEXT: vpsllw %xmm1, %ymm0, %ymm0
+; AVX512BWVL-NEXT: vpmovwb %ymm0, %xmm0
+; AVX512BWVL-NEXT: vzeroupper
+; AVX512BWVL-NEXT: retq
+;
+; X86-SSE-LABEL: splatvar_modulo_shift_v16i8:
+; X86-SSE: # %bb.0:
+; X86-SSE-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1
+; X86-SSE-NEXT: pslldq {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm1[0]
+; X86-SSE-NEXT: psrldq {{.*#+}} xmm1 = xmm1[15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; X86-SSE-NEXT: psllw %xmm1, %xmm0
+; X86-SSE-NEXT: pcmpeqd %xmm2, %xmm2
+; X86-SSE-NEXT: psllw %xmm1, %xmm2
+; X86-SSE-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
+; X86-SSE-NEXT: pshuflw {{.*#+}} xmm1 = xmm2[0,0,0,0,4,5,6,7]
+; X86-SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,0,0,0]
+; X86-SSE-NEXT: pand %xmm1, %xmm0
+; X86-SSE-NEXT: retl
+ %mod = and <16 x i8> %b, <i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7>
+ %splat = shufflevector <16 x i8> %mod, <16 x i8> undef, <16 x i32> zeroinitializer
+ %shift = shl <16 x i8> %a, %splat
+ ret <16 x i8> %shift
+}
+
;
; Constant Shifts
;
diff --git a/llvm/test/CodeGen/X86/vector-shift-shl-256.ll b/llvm/test/CodeGen/X86/vector-shift-shl-256.ll
index d151177d96c7..fd0ee8f05154 100644
--- a/llvm/test/CodeGen/X86/vector-shift-shl-256.ll
+++ b/llvm/test/CodeGen/X86/vector-shift-shl-256.ll
@@ -741,6 +741,340 @@ define <32 x i8> @splatvar_shift_v32i8(<32 x i8> %a, <32 x i8> %b) nounwind {
ret <32 x i8> %shift
}
+;
+; Uniform Variable Modulo Shifts
+;
+
+define <4 x i64> @splatvar_modulo_shift_v4i64(<4 x i64> %a, <4 x i64> %b) nounwind {
+; AVX1-LABEL: splatvar_modulo_shift_v4i64:
+; AVX1: # %bb.0:
+; AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
+; AVX1-NEXT: vpsllq %xmm1, %xmm2, %xmm2
+; AVX1-NEXT: vpsllq %xmm1, %xmm0, %xmm0
+; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: splatvar_modulo_shift_v4i64:
+; AVX2: # %bb.0:
+; AVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
+; AVX2-NEXT: vpsllq %xmm1, %ymm0, %ymm0
+; AVX2-NEXT: retq
+;
+; XOPAVX1-LABEL: splatvar_modulo_shift_v4i64:
+; XOPAVX1: # %bb.0:
+; XOPAVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
+; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
+; XOPAVX1-NEXT: vpsllq %xmm1, %xmm2, %xmm2
+; XOPAVX1-NEXT: vpsllq %xmm1, %xmm0, %xmm0
+; XOPAVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
+; XOPAVX1-NEXT: retq
+;
+; XOPAVX2-LABEL: splatvar_modulo_shift_v4i64:
+; XOPAVX2: # %bb.0:
+; XOPAVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
+; XOPAVX2-NEXT: vpsllq %xmm1, %ymm0, %ymm0
+; XOPAVX2-NEXT: retq
+;
+; AVX512-LABEL: splatvar_modulo_shift_v4i64:
+; AVX512: # %bb.0:
+; AVX512-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
+; AVX512-NEXT: vpsllq %xmm1, %ymm0, %ymm0
+; AVX512-NEXT: retq
+;
+; AVX512VL-LABEL: splatvar_modulo_shift_v4i64:
+; AVX512VL: # %bb.0:
+; AVX512VL-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
+; AVX512VL-NEXT: vpsllq %xmm1, %ymm0, %ymm0
+; AVX512VL-NEXT: retq
+;
+; X86-AVX1-LABEL: splatvar_modulo_shift_v4i64:
+; X86-AVX1: # %bb.0:
+; X86-AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1, %xmm1
+; X86-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
+; X86-AVX1-NEXT: vpsllq %xmm1, %xmm2, %xmm2
+; X86-AVX1-NEXT: vpsllq %xmm1, %xmm0, %xmm0
+; X86-AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
+; X86-AVX1-NEXT: retl
+;
+; X86-AVX2-LABEL: splatvar_modulo_shift_v4i64:
+; X86-AVX2: # %bb.0:
+; X86-AVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1, %xmm1
+; X86-AVX2-NEXT: vpsllq %xmm1, %ymm0, %ymm0
+; X86-AVX2-NEXT: retl
+ %mod = and <4 x i64> %b, <i64 63, i64 63, i64 63, i64 63>
+ %splat = shufflevector <4 x i64> %mod, <4 x i64> undef, <4 x i32> zeroinitializer
+ %shift = shl <4 x i64> %a, %splat
+ ret <4 x i64> %shift
+}
+
+define <8 x i32> @splatvar_modulo_shift_v8i32(<8 x i32> %a, <8 x i32> %b) nounwind {
+; AVX1-LABEL: splatvar_modulo_shift_v8i32:
+; AVX1: # %bb.0:
+; AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
+; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
+; AVX1-NEXT: vpslld %xmm1, %xmm2, %xmm2
+; AVX1-NEXT: vpslld %xmm1, %xmm0, %xmm0
+; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: splatvar_modulo_shift_v8i32:
+; AVX2: # %bb.0:
+; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm2 = [31,31,31,31]
+; AVX2-NEXT: vpand %xmm2, %xmm1, %xmm1
+; AVX2-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
+; AVX2-NEXT: vpslld %xmm1, %ymm0, %ymm0
+; AVX2-NEXT: retq
+;
+; XOPAVX1-LABEL: splatvar_modulo_shift_v8i32:
+; XOPAVX1: # %bb.0:
+; XOPAVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
+; XOPAVX1-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
+; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
+; XOPAVX1-NEXT: vpslld %xmm1, %xmm2, %xmm2
+; XOPAVX1-NEXT: vpslld %xmm1, %xmm0, %xmm0
+; XOPAVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
+; XOPAVX1-NEXT: retq
+;
+; XOPAVX2-LABEL: splatvar_modulo_shift_v8i32:
+; XOPAVX2: # %bb.0:
+; XOPAVX2-NEXT: vpbroadcastd {{.*#+}} xmm2 = [31,31,31,31]
+; XOPAVX2-NEXT: vpand %xmm2, %xmm1, %xmm1
+; XOPAVX2-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
+; XOPAVX2-NEXT: vpslld %xmm1, %ymm0, %ymm0
+; XOPAVX2-NEXT: retq
+;
+; AVX512-LABEL: splatvar_modulo_shift_v8i32:
+; AVX512: # %bb.0:
+; AVX512-NEXT: vpbroadcastd {{.*#+}} xmm2 = [31,31,31,31]
+; AVX512-NEXT: vpand %xmm2, %xmm1, %xmm1
+; AVX512-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
+; AVX512-NEXT: vpslld %xmm1, %ymm0, %ymm0
+; AVX512-NEXT: retq
+;
+; AVX512VL-LABEL: splatvar_modulo_shift_v8i32:
+; AVX512VL: # %bb.0:
+; AVX512VL-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm1, %xmm1
+; AVX512VL-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
+; AVX512VL-NEXT: vpslld %xmm1, %ymm0, %ymm0
+; AVX512VL-NEXT: retq
+;
+; X86-AVX1-LABEL: splatvar_modulo_shift_v8i32:
+; X86-AVX1: # %bb.0:
+; X86-AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1, %xmm1
+; X86-AVX1-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
+; X86-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
+; X86-AVX1-NEXT: vpslld %xmm1, %xmm2, %xmm2
+; X86-AVX1-NEXT: vpslld %xmm1, %xmm0, %xmm0
+; X86-AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
+; X86-AVX1-NEXT: retl
+;
+; X86-AVX2-LABEL: splatvar_modulo_shift_v8i32:
+; X86-AVX2: # %bb.0:
+; X86-AVX2-NEXT: vpbroadcastd {{.*#+}} xmm2 = [31,31,31,31]
+; X86-AVX2-NEXT: vpand %xmm2, %xmm1, %xmm1
+; X86-AVX2-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
+; X86-AVX2-NEXT: vpslld %xmm1, %ymm0, %ymm0
+; X86-AVX2-NEXT: retl
+ %mod = and <8 x i32> %b, <i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31>
+ %splat = shufflevector <8 x i32> %mod, <8 x i32> undef, <8 x i32> zeroinitializer
+ %shift = shl <8 x i32> %a, %splat
+ ret <8 x i32> %shift
+}
+
+define <16 x i16> @splatvar_modulo_shift_v16i16(<16 x i16> %a, <16 x i16> %b) nounwind {
+; AVX1-LABEL: splatvar_modulo_shift_v16i16:
+; AVX1: # %bb.0:
+; AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
+; AVX1-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
+; AVX1-NEXT: vpsllw %xmm1, %xmm2, %xmm2
+; AVX1-NEXT: vpsllw %xmm1, %xmm0, %xmm0
+; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: splatvar_modulo_shift_v16i16:
+; AVX2: # %bb.0:
+; AVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
+; AVX2-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
+; AVX2-NEXT: vpsllw %xmm1, %ymm0, %ymm0
+; AVX2-NEXT: retq
+;
+; XOPAVX1-LABEL: splatvar_modulo_shift_v16i16:
+; XOPAVX1: # %bb.0:
+; XOPAVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
+; XOPAVX1-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
+; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
+; XOPAVX1-NEXT: vpsllw %xmm1, %xmm2, %xmm2
+; XOPAVX1-NEXT: vpsllw %xmm1, %xmm0, %xmm0
+; XOPAVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
+; XOPAVX1-NEXT: retq
+;
+; XOPAVX2-LABEL: splatvar_modulo_shift_v16i16:
+; XOPAVX2: # %bb.0:
+; XOPAVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
+; XOPAVX2-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
+; XOPAVX2-NEXT: vpsllw %xmm1, %ymm0, %ymm0
+; XOPAVX2-NEXT: retq
+;
+; AVX512-LABEL: splatvar_modulo_shift_v16i16:
+; AVX512: # %bb.0:
+; AVX512-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
+; AVX512-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
+; AVX512-NEXT: vpsllw %xmm1, %ymm0, %ymm0
+; AVX512-NEXT: retq
+;
+; AVX512VL-LABEL: splatvar_modulo_shift_v16i16:
+; AVX512VL: # %bb.0:
+; AVX512VL-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
+; AVX512VL-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
+; AVX512VL-NEXT: vpsllw %xmm1, %ymm0, %ymm0
+; AVX512VL-NEXT: retq
+;
+; X86-AVX1-LABEL: splatvar_modulo_shift_v16i16:
+; X86-AVX1: # %bb.0:
+; X86-AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1, %xmm1
+; X86-AVX1-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
+; X86-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
+; X86-AVX1-NEXT: vpsllw %xmm1, %xmm2, %xmm2
+; X86-AVX1-NEXT: vpsllw %xmm1, %xmm0, %xmm0
+; X86-AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
+; X86-AVX1-NEXT: retl
+;
+; X86-AVX2-LABEL: splatvar_modulo_shift_v16i16:
+; X86-AVX2: # %bb.0:
+; X86-AVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1, %xmm1
+; X86-AVX2-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
+; X86-AVX2-NEXT: vpsllw %xmm1, %ymm0, %ymm0
+; X86-AVX2-NEXT: retl
+ %mod = and <16 x i16> %b, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
+ %splat = shufflevector <16 x i16> %mod, <16 x i16> undef, <16 x i32> zeroinitializer
+ %shift = shl <16 x i16> %a, %splat
+ ret <16 x i16> %shift
+}
+
+define <32 x i8> @splatvar_modulo_shift_v32i8(<32 x i8> %a, <32 x i8> %b) nounwind {
+; AVX1-LABEL: splatvar_modulo_shift_v32i8:
+; AVX1: # %bb.0:
+; AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
+; AVX1-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
+; AVX1-NEXT: vpsllw %xmm1, %xmm2, %xmm2
+; AVX1-NEXT: vpcmpeqd %xmm3, %xmm3, %xmm3
+; AVX1-NEXT: vpsllw %xmm1, %xmm3, %xmm3
+; AVX1-NEXT: vpxor %xmm4, %xmm4, %xmm4
+; AVX1-NEXT: vpshufb %xmm4, %xmm3, %xmm3
+; AVX1-NEXT: vpand %xmm3, %xmm2, %xmm2
+; AVX1-NEXT: vpsllw %xmm1, %xmm0, %xmm0
+; AVX1-NEXT: vpand %xmm3, %xmm0, %xmm0
+; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: splatvar_modulo_shift_v32i8:
+; AVX2: # %bb.0:
+; AVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
+; AVX2-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
+; AVX2-NEXT: vpsllw %xmm1, %ymm0, %ymm0
+; AVX2-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2
+; AVX2-NEXT: vpsllw %xmm1, %xmm2, %xmm1
+; AVX2-NEXT: vpbroadcastb %xmm1, %ymm1
+; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0
+; AVX2-NEXT: retq
+;
+; XOPAVX1-LABEL: splatvar_modulo_shift_v32i8:
+; XOPAVX1: # %bb.0:
+; XOPAVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
+; XOPAVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; XOPAVX1-NEXT: vpshufb %xmm2, %xmm1, %xmm1
+; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
+; XOPAVX1-NEXT: vpshlb %xmm1, %xmm2, %xmm2
+; XOPAVX1-NEXT: vpshlb %xmm1, %xmm0, %xmm0
+; XOPAVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
+; XOPAVX1-NEXT: retq
+;
+; XOPAVX2-LABEL: splatvar_modulo_shift_v32i8:
+; XOPAVX2: # %bb.0:
+; XOPAVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
+; XOPAVX2-NEXT: vpbroadcastb %xmm1, %xmm1
+; XOPAVX2-NEXT: vextracti128 $1, %ymm0, %xmm2
+; XOPAVX2-NEXT: vpshlb %xmm1, %xmm2, %xmm2
+; XOPAVX2-NEXT: vpshlb %xmm1, %xmm0, %xmm0
+; XOPAVX2-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm0
+; XOPAVX2-NEXT: retq
+;
+; AVX512DQ-LABEL: splatvar_modulo_shift_v32i8:
+; AVX512DQ: # %bb.0:
+; AVX512DQ-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
+; AVX512DQ-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
+; AVX512DQ-NEXT: vpsllw %xmm1, %ymm0, %ymm0
+; AVX512DQ-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2
+; AVX512DQ-NEXT: vpsllw %xmm1, %xmm2, %xmm1
+; AVX512DQ-NEXT: vpbroadcastb %xmm1, %ymm1
+; AVX512DQ-NEXT: vpand %ymm1, %ymm0, %ymm0
+; AVX512DQ-NEXT: retq
+;
+; AVX512BW-LABEL: splatvar_modulo_shift_v32i8:
+; AVX512BW: # %bb.0:
+; AVX512BW-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
+; AVX512BW-NEXT: vpmovzxbw {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero,ymm0[16],zero,ymm0[17],zero,ymm0[18],zero,ymm0[19],zero,ymm0[20],zero,ymm0[21],zero,ymm0[22],zero,ymm0[23],zero,ymm0[24],zero,ymm0[25],zero,ymm0[26],zero,ymm0[27],zero,ymm0[28],zero,ymm0[29],zero,ymm0[30],zero,ymm0[31],zero
+; AVX512BW-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
+; AVX512BW-NEXT: vpsllw %xmm1, %zmm0, %zmm0
+; AVX512BW-NEXT: vpmovwb %zmm0, %ymm0
+; AVX512BW-NEXT: retq
+;
+; AVX512DQVL-LABEL: splatvar_modulo_shift_v32i8:
+; AVX512DQVL: # %bb.0:
+; AVX512DQVL-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
+; AVX512DQVL-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
+; AVX512DQVL-NEXT: vpsllw %xmm1, %ymm0, %ymm0
+; AVX512DQVL-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2
+; AVX512DQVL-NEXT: vpsllw %xmm1, %xmm2, %xmm1
+; AVX512DQVL-NEXT: vpbroadcastb %xmm1, %ymm1
+; AVX512DQVL-NEXT: vpand %ymm1, %ymm0, %ymm0
+; AVX512DQVL-NEXT: retq
+;
+; AVX512BWVL-LABEL: splatvar_modulo_shift_v32i8:
+; AVX512BWVL: # %bb.0:
+; AVX512BWVL-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
+; AVX512BWVL-NEXT: vpmovzxbw {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero,ymm0[16],zero,ymm0[17],zero,ymm0[18],zero,ymm0[19],zero,ymm0[20],zero,ymm0[21],zero,ymm0[22],zero,ymm0[23],zero,ymm0[24],zero,ymm0[25],zero,ymm0[26],zero,ymm0[27],zero,ymm0[28],zero,ymm0[29],zero,ymm0[30],zero,ymm0[31],zero
+; AVX512BWVL-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
+; AVX512BWVL-NEXT: vpsllw %xmm1, %zmm0, %zmm0
+; AVX512BWVL-NEXT: vpmovwb %zmm0, %ymm0
+; AVX512BWVL-NEXT: retq
+;
+; X86-AVX1-LABEL: splatvar_modulo_shift_v32i8:
+; X86-AVX1: # %bb.0:
+; X86-AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1, %xmm1
+; X86-AVX1-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
+; X86-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
+; X86-AVX1-NEXT: vpsllw %xmm1, %xmm2, %xmm2
+; X86-AVX1-NEXT: vpcmpeqd %xmm3, %xmm3, %xmm3
+; X86-AVX1-NEXT: vpsllw %xmm1, %xmm3, %xmm3
+; X86-AVX1-NEXT: vpxor %xmm4, %xmm4, %xmm4
+; X86-AVX1-NEXT: vpshufb %xmm4, %xmm3, %xmm3
+; X86-AVX1-NEXT: vpand %xmm3, %xmm2, %xmm2
+; X86-AVX1-NEXT: vpsllw %xmm1, %xmm0, %xmm0
+; X86-AVX1-NEXT: vpand %xmm3, %xmm0, %xmm0
+; X86-AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
+; X86-AVX1-NEXT: retl
+;
+; X86-AVX2-LABEL: splatvar_modulo_shift_v32i8:
+; X86-AVX2: # %bb.0:
+; X86-AVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1, %xmm1
+; X86-AVX2-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
+; X86-AVX2-NEXT: vpsllw %xmm1, %ymm0, %ymm0
+; X86-AVX2-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2
+; X86-AVX2-NEXT: vpsllw %xmm1, %xmm2, %xmm1
+; X86-AVX2-NEXT: vpbroadcastb %xmm1, %ymm1
+; X86-AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0
+; X86-AVX2-NEXT: retl
+ %mod = and <32 x i8> %b, <i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7>
+ %splat = shufflevector <32 x i8> %mod, <32 x i8> undef, <32 x i32> zeroinitializer
+ %shift = shl <32 x i8> %a, %splat
+ ret <32 x i8> %shift
+}
+
;
; Constant Shifts
;
diff --git a/llvm/test/CodeGen/X86/vector-shift-shl-512.ll b/llvm/test/CodeGen/X86/vector-shift-shl-512.ll
index 45c653c3e78b..655bd6319a6a 100644
--- a/llvm/test/CodeGen/X86/vector-shift-shl-512.ll
+++ b/llvm/test/CodeGen/X86/vector-shift-shl-512.ll
@@ -174,6 +174,91 @@ define <64 x i8> @splatvar_shift_v64i8(<64 x i8> %a, <64 x i8> %b) nounwind {
ret <64 x i8> %shift
}
+;
+; Uniform Variable Modulo Shifts
+;
+
+define <8 x i64> @splatvar_modulo_shift_v8i64(<8 x i64> %a, <8 x i64> %b) nounwind {
+; ALL-LABEL: splatvar_modulo_shift_v8i64:
+; ALL: # %bb.0:
+; ALL-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
+; ALL-NEXT: vpsllq %xmm1, %zmm0, %zmm0
+; ALL-NEXT: retq
+ %mod = and <8 x i64> %b, <i64 63, i64 63, i64 63, i64 63, i64 63, i64 63, i64 63, i64 63>
+ %splat = shufflevector <8 x i64> %mod, <8 x i64> undef, <8 x i32> zeroinitializer
+ %shift = shl <8 x i64> %a, %splat
+ ret <8 x i64> %shift
+}
+
+define <16 x i32> @splatvar_modulo_shift_v16i32(<16 x i32> %a, <16 x i32> %b) nounwind {
+; ALL-LABEL: splatvar_modulo_shift_v16i32:
+; ALL: # %bb.0:
+; ALL-NEXT: vpbroadcastd {{.*#+}} xmm2 = [31,31,31,31]
+; ALL-NEXT: vpand %xmm2, %xmm1, %xmm1
+; ALL-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
+; ALL-NEXT: vpslld %xmm1, %zmm0, %zmm0
+; ALL-NEXT: retq
+ %mod = and <16 x i32> %b, <i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31>
+ %splat = shufflevector <16 x i32> %mod, <16 x i32> undef, <16 x i32> zeroinitializer
+ %shift = shl <16 x i32> %a, %splat
+ ret <16 x i32> %shift
+}
+
+define <32 x i16> @splatvar_modulo_shift_v32i16(<32 x i16> %a, <32 x i16> %b) nounwind {
+; AVX512DQ-LABEL: splatvar_modulo_shift_v32i16:
+; AVX512DQ: # %bb.0:
+; AVX512DQ-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
+; AVX512DQ-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
+; AVX512DQ-NEXT: vextracti64x4 $1, %zmm0, %ymm2
+; AVX512DQ-NEXT: vpsllw %xmm1, %ymm2, %ymm2
+; AVX512DQ-NEXT: vpsllw %xmm1, %ymm0, %ymm0
+; AVX512DQ-NEXT: vinserti64x4 $1, %ymm2, %zmm0, %zmm0
+; AVX512DQ-NEXT: retq
+;
+; AVX512BW-LABEL: splatvar_modulo_shift_v32i16:
+; AVX512BW: # %bb.0:
+; AVX512BW-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
+; AVX512BW-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
+; AVX512BW-NEXT: vpsllw %xmm1, %zmm0, %zmm0
+; AVX512BW-NEXT: retq
+ %mod = and <32 x i16> %b, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
+ %splat = shufflevector <32 x i16> %mod, <32 x i16> undef, <32 x i32> zeroinitializer
+ %shift = shl <32 x i16> %a, %splat
+ ret <32 x i16> %shift
+}
+
+define <64 x i8> @splatvar_modulo_shift_v64i8(<64 x i8> %a, <64 x i8> %b) nounwind {
+; AVX512DQ-LABEL: splatvar_modulo_shift_v64i8:
+; AVX512DQ: # %bb.0:
+; AVX512DQ-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
+; AVX512DQ-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
+; AVX512DQ-NEXT: vextracti64x4 $1, %zmm0, %ymm2
+; AVX512DQ-NEXT: vpsllw %xmm1, %ymm2, %ymm2
+; AVX512DQ-NEXT: vpsllw %xmm1, %ymm0, %ymm0
+; AVX512DQ-NEXT: vinserti64x4 $1, %ymm2, %zmm0, %zmm0
+; AVX512DQ-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2
+; AVX512DQ-NEXT: vpsllw %xmm1, %xmm2, %xmm1
+; AVX512DQ-NEXT: vpbroadcastb %xmm1, %ymm1
+; AVX512DQ-NEXT: vinserti64x4 $1, %ymm1, %zmm1, %zmm1
+; AVX512DQ-NEXT: vpandq %zmm1, %zmm0, %zmm0
+; AVX512DQ-NEXT: retq
+;
+; AVX512BW-LABEL: splatvar_modulo_shift_v64i8:
+; AVX512BW: # %bb.0:
+; AVX512BW-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
+; AVX512BW-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
+; AVX512BW-NEXT: vpsllw %xmm1, %zmm0, %zmm0
+; AVX512BW-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2
+; AVX512BW-NEXT: vpsllw %xmm1, %xmm2, %xmm1
+; AVX512BW-NEXT: vpbroadcastb %xmm1, %zmm1
+; AVX512BW-NEXT: vpandq %zmm1, %zmm0, %zmm0
+; AVX512BW-NEXT: retq
+ %mod = and <64 x i8> %b, <i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7>
+ %splat = shufflevector <64 x i8> %mod, <64 x i8> undef, <64 x i32> zeroinitializer
+ %shift = shl <64 x i8> %a, %splat
+ ret <64 x i8> %shift
+}
+
;
; Constant Shifts
;
More information about the llvm-commits
mailing list