[llvm-branch-commits] [llvm] d073805 - [X86] LowerRotate - VBMI2 targets can lower vXi16 rotates using funnel shifts.
Simon Pilgrim via llvm-branch-commits
llvm-branch-commits at lists.llvm.org
Fri Dec 4 03:34:29 PST 2020
Author: Simon Pilgrim
Date: 2020-12-04T11:29:23Z
New Revision: d073805be644d86f1bf885ada3d8e7548226ca6e
URL: https://github.com/llvm/llvm-project/commit/d073805be644d86f1bf885ada3d8e7548226ca6e
DIFF: https://github.com/llvm/llvm-project/commit/d073805be644d86f1bf885ada3d8e7548226ca6e.diff
LOG: [X86] LowerRotate - VBMI2 targets can lower vXi16 rotates using funnel shifts.
Ideally we'd do this inside DAGCombine but until we can make the FSHL/FSHR opcodes legal for VBMI2 it won't help us.
Added:
Modified:
llvm/lib/Target/X86/X86ISelLowering.cpp
llvm/test/CodeGen/X86/vector-fshl-rot-128.ll
llvm/test/CodeGen/X86/vector-fshl-rot-256.ll
llvm/test/CodeGen/X86/vector-fshr-rot-128.ll
llvm/test/CodeGen/X86/vector-fshr-rot-256.ll
llvm/test/CodeGen/X86/vector-rotate-128.ll
llvm/test/CodeGen/X86/vector-rotate-256.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 9bfd7ca80701..04075e924eab 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -28217,6 +28217,12 @@ static SDValue LowerRotate(SDValue Op, const X86Subtarget &Subtarget,
return Op;
}
+ // AVX512 VBMI2 vXi16 - lower to funnel shifts.
+ if (Subtarget.hasVBMI2() && 16 == EltSizeInBits) {
+ unsigned FunnelOpc = (Opcode == ISD::ROTL ? ISD::FSHL : ISD::FSHR);
+ return DAG.getNode(FunnelOpc, DL, VT, R, R, Amt);
+ }
+
assert((Opcode == ISD::ROTL) && "Only ROTL supported");
// XOP has 128-bit vector variable + immediate rotates.
diff --git a/llvm/test/CodeGen/X86/vector-fshl-rot-128.ll b/llvm/test/CodeGen/X86/vector-fshl-rot-128.ll
index e48a9321a8fa..0aed42319252 100644
--- a/llvm/test/CodeGen/X86/vector-fshl-rot-128.ll
+++ b/llvm/test/CodeGen/X86/vector-fshl-rot-128.ll
@@ -439,24 +439,16 @@ define <8 x i16> @var_funnnel_v8i16(<8 x i16> %x, <8 x i16> %amt) nounwind {
;
; AVX512VBMI2-LABEL: var_funnnel_v8i16:
; AVX512VBMI2: # %bb.0:
+; AVX512VBMI2-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
; AVX512VBMI2-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
-; AVX512VBMI2-NEXT: vpand {{.*}}(%rip), %xmm1, %xmm1
-; AVX512VBMI2-NEXT: vpsllvw %zmm1, %zmm0, %zmm2
-; AVX512VBMI2-NEXT: vmovdqa {{.*#+}} xmm3 = [16,16,16,16,16,16,16,16]
-; AVX512VBMI2-NEXT: vpsubw %xmm1, %xmm3, %xmm1
-; AVX512VBMI2-NEXT: vpsrlvw %zmm1, %zmm0, %zmm0
-; AVX512VBMI2-NEXT: vpor %xmm0, %xmm2, %xmm0
+; AVX512VBMI2-NEXT: vpshldvw %zmm1, %zmm0, %zmm0
+; AVX512VBMI2-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
; AVX512VBMI2-NEXT: vzeroupper
; AVX512VBMI2-NEXT: retq
;
; AVX512VLVBMI2-LABEL: var_funnnel_v8i16:
; AVX512VLVBMI2: # %bb.0:
-; AVX512VLVBMI2-NEXT: vpand {{.*}}(%rip), %xmm1, %xmm1
-; AVX512VLVBMI2-NEXT: vpsllvw %xmm1, %xmm0, %xmm2
-; AVX512VLVBMI2-NEXT: vmovdqa {{.*#+}} xmm3 = [16,16,16,16,16,16,16,16]
-; AVX512VLVBMI2-NEXT: vpsubw %xmm1, %xmm3, %xmm1
-; AVX512VLVBMI2-NEXT: vpsrlvw %xmm1, %xmm0, %xmm0
-; AVX512VLVBMI2-NEXT: vpor %xmm0, %xmm2, %xmm0
+; AVX512VLVBMI2-NEXT: vpshldvw %xmm1, %xmm0, %xmm0
; AVX512VLVBMI2-NEXT: retq
;
; XOP-LABEL: var_funnnel_v8i16:
@@ -1033,17 +1025,68 @@ define <8 x i16> @splatvar_funnnel_v8i16(<8 x i16> %x, <8 x i16> %amt) nounwind
; AVX-NEXT: vpor %xmm0, %xmm2, %xmm0
; AVX-NEXT: retq
;
-; AVX512-LABEL: splatvar_funnnel_v8i16:
-; AVX512: # %bb.0:
-; AVX512-NEXT: vpand {{.*}}(%rip), %xmm1, %xmm1
-; AVX512-NEXT: vpmovzxwq {{.*#+}} xmm2 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
-; AVX512-NEXT: vpsllw %xmm2, %xmm0, %xmm2
-; AVX512-NEXT: vmovdqa {{.*#+}} xmm3 = [16,16,16,16,16,16,16,16]
-; AVX512-NEXT: vpsubw %xmm1, %xmm3, %xmm1
-; AVX512-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
-; AVX512-NEXT: vpsrlw %xmm1, %xmm0, %xmm0
-; AVX512-NEXT: vpor %xmm0, %xmm2, %xmm0
-; AVX512-NEXT: retq
+; AVX512F-LABEL: splatvar_funnnel_v8i16:
+; AVX512F: # %bb.0:
+; AVX512F-NEXT: vpand {{.*}}(%rip), %xmm1, %xmm1
+; AVX512F-NEXT: vpmovzxwq {{.*#+}} xmm2 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
+; AVX512F-NEXT: vpsllw %xmm2, %xmm0, %xmm2
+; AVX512F-NEXT: vmovdqa {{.*#+}} xmm3 = [16,16,16,16,16,16,16,16]
+; AVX512F-NEXT: vpsubw %xmm1, %xmm3, %xmm1
+; AVX512F-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
+; AVX512F-NEXT: vpsrlw %xmm1, %xmm0, %xmm0
+; AVX512F-NEXT: vpor %xmm0, %xmm2, %xmm0
+; AVX512F-NEXT: retq
+;
+; AVX512VL-LABEL: splatvar_funnnel_v8i16:
+; AVX512VL: # %bb.0:
+; AVX512VL-NEXT: vpand {{.*}}(%rip), %xmm1, %xmm1
+; AVX512VL-NEXT: vpmovzxwq {{.*#+}} xmm2 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
+; AVX512VL-NEXT: vpsllw %xmm2, %xmm0, %xmm2
+; AVX512VL-NEXT: vmovdqa {{.*#+}} xmm3 = [16,16,16,16,16,16,16,16]
+; AVX512VL-NEXT: vpsubw %xmm1, %xmm3, %xmm1
+; AVX512VL-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
+; AVX512VL-NEXT: vpsrlw %xmm1, %xmm0, %xmm0
+; AVX512VL-NEXT: vpor %xmm0, %xmm2, %xmm0
+; AVX512VL-NEXT: retq
+;
+; AVX512BW-LABEL: splatvar_funnnel_v8i16:
+; AVX512BW: # %bb.0:
+; AVX512BW-NEXT: vpand {{.*}}(%rip), %xmm1, %xmm1
+; AVX512BW-NEXT: vpmovzxwq {{.*#+}} xmm2 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
+; AVX512BW-NEXT: vpsllw %xmm2, %xmm0, %xmm2
+; AVX512BW-NEXT: vmovdqa {{.*#+}} xmm3 = [16,16,16,16,16,16,16,16]
+; AVX512BW-NEXT: vpsubw %xmm1, %xmm3, %xmm1
+; AVX512BW-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
+; AVX512BW-NEXT: vpsrlw %xmm1, %xmm0, %xmm0
+; AVX512BW-NEXT: vpor %xmm0, %xmm2, %xmm0
+; AVX512BW-NEXT: retq
+;
+; AVX512VLBW-LABEL: splatvar_funnnel_v8i16:
+; AVX512VLBW: # %bb.0:
+; AVX512VLBW-NEXT: vpand {{.*}}(%rip), %xmm1, %xmm1
+; AVX512VLBW-NEXT: vpmovzxwq {{.*#+}} xmm2 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
+; AVX512VLBW-NEXT: vpsllw %xmm2, %xmm0, %xmm2
+; AVX512VLBW-NEXT: vmovdqa {{.*#+}} xmm3 = [16,16,16,16,16,16,16,16]
+; AVX512VLBW-NEXT: vpsubw %xmm1, %xmm3, %xmm1
+; AVX512VLBW-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
+; AVX512VLBW-NEXT: vpsrlw %xmm1, %xmm0, %xmm0
+; AVX512VLBW-NEXT: vpor %xmm0, %xmm2, %xmm0
+; AVX512VLBW-NEXT: retq
+;
+; AVX512VBMI2-LABEL: splatvar_funnnel_v8i16:
+; AVX512VBMI2: # %bb.0:
+; AVX512VBMI2-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
+; AVX512VBMI2-NEXT: vpbroadcastw %xmm1, %xmm1
+; AVX512VBMI2-NEXT: vpshldvw %zmm1, %zmm0, %zmm0
+; AVX512VBMI2-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
+; AVX512VBMI2-NEXT: vzeroupper
+; AVX512VBMI2-NEXT: retq
+;
+; AVX512VLVBMI2-LABEL: splatvar_funnnel_v8i16:
+; AVX512VLVBMI2: # %bb.0:
+; AVX512VLVBMI2-NEXT: vpbroadcastw %xmm1, %xmm1
+; AVX512VLVBMI2-NEXT: vpshldvw %xmm1, %xmm0, %xmm0
+; AVX512VLVBMI2-NEXT: retq
;
; XOPAVX1-LABEL: splatvar_funnnel_v8i16:
; XOPAVX1: # %bb.0:
@@ -1618,19 +1661,15 @@ define <8 x i16> @constant_funnnel_v8i16(<8 x i16> %x) nounwind {
; AVX512VBMI2-LABEL: constant_funnnel_v8i16:
; AVX512VBMI2: # %bb.0:
; AVX512VBMI2-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
-; AVX512VBMI2-NEXT: vmovdqa {{.*#+}} xmm1 = [16,15,14,13,12,11,10,9]
-; AVX512VBMI2-NEXT: vpsrlvw %zmm1, %zmm0, %zmm1
-; AVX512VBMI2-NEXT: vmovdqa {{.*#+}} xmm2 = [0,1,2,3,4,5,6,7]
-; AVX512VBMI2-NEXT: vpsllvw %zmm2, %zmm0, %zmm0
-; AVX512VBMI2-NEXT: vpor %xmm1, %xmm0, %xmm0
+; AVX512VBMI2-NEXT: vmovdqa {{.*#+}} xmm1 = [0,1,2,3,4,5,6,7]
+; AVX512VBMI2-NEXT: vpshldvw %zmm1, %zmm0, %zmm0
+; AVX512VBMI2-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
; AVX512VBMI2-NEXT: vzeroupper
; AVX512VBMI2-NEXT: retq
;
; AVX512VLVBMI2-LABEL: constant_funnnel_v8i16:
; AVX512VLVBMI2: # %bb.0:
-; AVX512VLVBMI2-NEXT: vpsrlvw {{.*}}(%rip), %xmm0, %xmm1
-; AVX512VLVBMI2-NEXT: vpsllvw {{.*}}(%rip), %xmm0, %xmm0
-; AVX512VLVBMI2-NEXT: vpor %xmm1, %xmm0, %xmm0
+; AVX512VLVBMI2-NEXT: vpshldvw {{.*}}(%rip), %xmm0, %xmm0
; AVX512VLVBMI2-NEXT: retq
;
; XOP-LABEL: constant_funnnel_v8i16:
@@ -1995,12 +2034,46 @@ define <8 x i16> @splatconstant_funnnel_v8i16(<8 x i16> %x) nounwind {
; AVX-NEXT: vpor %xmm1, %xmm0, %xmm0
; AVX-NEXT: retq
;
-; AVX512-LABEL: splatconstant_funnnel_v8i16:
-; AVX512: # %bb.0:
-; AVX512-NEXT: vpsrlw $9, %xmm0, %xmm1
-; AVX512-NEXT: vpsllw $7, %xmm0, %xmm0
-; AVX512-NEXT: vpor %xmm1, %xmm0, %xmm0
-; AVX512-NEXT: retq
+; AVX512F-LABEL: splatconstant_funnnel_v8i16:
+; AVX512F: # %bb.0:
+; AVX512F-NEXT: vpsrlw $9, %xmm0, %xmm1
+; AVX512F-NEXT: vpsllw $7, %xmm0, %xmm0
+; AVX512F-NEXT: vpor %xmm1, %xmm0, %xmm0
+; AVX512F-NEXT: retq
+;
+; AVX512VL-LABEL: splatconstant_funnnel_v8i16:
+; AVX512VL: # %bb.0:
+; AVX512VL-NEXT: vpsrlw $9, %xmm0, %xmm1
+; AVX512VL-NEXT: vpsllw $7, %xmm0, %xmm0
+; AVX512VL-NEXT: vpor %xmm1, %xmm0, %xmm0
+; AVX512VL-NEXT: retq
+;
+; AVX512BW-LABEL: splatconstant_funnnel_v8i16:
+; AVX512BW: # %bb.0:
+; AVX512BW-NEXT: vpsrlw $9, %xmm0, %xmm1
+; AVX512BW-NEXT: vpsllw $7, %xmm0, %xmm0
+; AVX512BW-NEXT: vpor %xmm1, %xmm0, %xmm0
+; AVX512BW-NEXT: retq
+;
+; AVX512VLBW-LABEL: splatconstant_funnnel_v8i16:
+; AVX512VLBW: # %bb.0:
+; AVX512VLBW-NEXT: vpsrlw $9, %xmm0, %xmm1
+; AVX512VLBW-NEXT: vpsllw $7, %xmm0, %xmm0
+; AVX512VLBW-NEXT: vpor %xmm1, %xmm0, %xmm0
+; AVX512VLBW-NEXT: retq
+;
+; AVX512VBMI2-LABEL: splatconstant_funnnel_v8i16:
+; AVX512VBMI2: # %bb.0:
+; AVX512VBMI2-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
+; AVX512VBMI2-NEXT: vpshldw $7, %zmm0, %zmm0, %zmm0
+; AVX512VBMI2-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
+; AVX512VBMI2-NEXT: vzeroupper
+; AVX512VBMI2-NEXT: retq
+;
+; AVX512VLVBMI2-LABEL: splatconstant_funnnel_v8i16:
+; AVX512VLVBMI2: # %bb.0:
+; AVX512VLVBMI2-NEXT: vpshldw $7, %xmm0, %xmm0, %xmm0
+; AVX512VLVBMI2-NEXT: retq
;
; XOP-LABEL: splatconstant_funnnel_v8i16:
; XOP: # %bb.0:
diff --git a/llvm/test/CodeGen/X86/vector-fshl-rot-256.ll b/llvm/test/CodeGen/X86/vector-fshl-rot-256.ll
index eb07daa0801c..e40268a69b8a 100644
--- a/llvm/test/CodeGen/X86/vector-fshl-rot-256.ll
+++ b/llvm/test/CodeGen/X86/vector-fshl-rot-256.ll
@@ -344,23 +344,15 @@ define <16 x i16> @var_funnnel_v16i16(<16 x i16> %x, <16 x i16> %amt) nounwind {
;
; AVX512VBMI2-LABEL: var_funnnel_v16i16:
; AVX512VBMI2: # %bb.0:
+; AVX512VBMI2-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1
; AVX512VBMI2-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
-; AVX512VBMI2-NEXT: vpand {{.*}}(%rip), %ymm1, %ymm1
-; AVX512VBMI2-NEXT: vpsllvw %zmm1, %zmm0, %zmm2
-; AVX512VBMI2-NEXT: vmovdqa {{.*#+}} ymm3 = [16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16]
-; AVX512VBMI2-NEXT: vpsubw %ymm1, %ymm3, %ymm1
-; AVX512VBMI2-NEXT: vpsrlvw %zmm1, %zmm0, %zmm0
-; AVX512VBMI2-NEXT: vpor %ymm0, %ymm2, %ymm0
+; AVX512VBMI2-NEXT: vpshldvw %zmm1, %zmm0, %zmm0
+; AVX512VBMI2-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
; AVX512VBMI2-NEXT: retq
;
; AVX512VLVBMI2-LABEL: var_funnnel_v16i16:
; AVX512VLVBMI2: # %bb.0:
-; AVX512VLVBMI2-NEXT: vpand {{.*}}(%rip), %ymm1, %ymm1
-; AVX512VLVBMI2-NEXT: vpsllvw %ymm1, %ymm0, %ymm2
-; AVX512VLVBMI2-NEXT: vmovdqa {{.*#+}} ymm3 = [16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16]
-; AVX512VLVBMI2-NEXT: vpsubw %ymm1, %ymm3, %ymm1
-; AVX512VLVBMI2-NEXT: vpsrlvw %ymm1, %ymm0, %ymm0
-; AVX512VLVBMI2-NEXT: vpor %ymm0, %ymm2, %ymm0
+; AVX512VLVBMI2-NEXT: vpshldvw %ymm1, %ymm0, %ymm0
; AVX512VLVBMI2-NEXT: retq
;
; XOPAVX1-LABEL: var_funnnel_v16i16:
@@ -821,18 +813,71 @@ define <16 x i16> @splatvar_funnnel_v16i16(<16 x i16> %x, <16 x i16> %amt) nounw
; AVX2-NEXT: vpor %ymm0, %ymm2, %ymm0
; AVX2-NEXT: retq
;
-; AVX512-LABEL: splatvar_funnnel_v16i16:
-; AVX512: # %bb.0:
-; AVX512-NEXT: vpbroadcastw %xmm1, %xmm1
-; AVX512-NEXT: vpand {{.*}}(%rip), %xmm1, %xmm1
-; AVX512-NEXT: vpmovzxwq {{.*#+}} xmm2 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
-; AVX512-NEXT: vpsllw %xmm2, %ymm0, %ymm2
-; AVX512-NEXT: vmovdqa {{.*#+}} xmm3 = [16,16,16,16,16,16,16,16]
-; AVX512-NEXT: vpsubw %xmm1, %xmm3, %xmm1
-; AVX512-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
-; AVX512-NEXT: vpsrlw %xmm1, %ymm0, %ymm0
-; AVX512-NEXT: vpor %ymm0, %ymm2, %ymm0
-; AVX512-NEXT: retq
+; AVX512F-LABEL: splatvar_funnnel_v16i16:
+; AVX512F: # %bb.0:
+; AVX512F-NEXT: vpbroadcastw %xmm1, %xmm1
+; AVX512F-NEXT: vpand {{.*}}(%rip), %xmm1, %xmm1
+; AVX512F-NEXT: vpmovzxwq {{.*#+}} xmm2 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
+; AVX512F-NEXT: vpsllw %xmm2, %ymm0, %ymm2
+; AVX512F-NEXT: vmovdqa {{.*#+}} xmm3 = [16,16,16,16,16,16,16,16]
+; AVX512F-NEXT: vpsubw %xmm1, %xmm3, %xmm1
+; AVX512F-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
+; AVX512F-NEXT: vpsrlw %xmm1, %ymm0, %ymm0
+; AVX512F-NEXT: vpor %ymm0, %ymm2, %ymm0
+; AVX512F-NEXT: retq
+;
+; AVX512VL-LABEL: splatvar_funnnel_v16i16:
+; AVX512VL: # %bb.0:
+; AVX512VL-NEXT: vpbroadcastw %xmm1, %xmm1
+; AVX512VL-NEXT: vpand {{.*}}(%rip), %xmm1, %xmm1
+; AVX512VL-NEXT: vpmovzxwq {{.*#+}} xmm2 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
+; AVX512VL-NEXT: vpsllw %xmm2, %ymm0, %ymm2
+; AVX512VL-NEXT: vmovdqa {{.*#+}} xmm3 = [16,16,16,16,16,16,16,16]
+; AVX512VL-NEXT: vpsubw %xmm1, %xmm3, %xmm1
+; AVX512VL-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
+; AVX512VL-NEXT: vpsrlw %xmm1, %ymm0, %ymm0
+; AVX512VL-NEXT: vpor %ymm0, %ymm2, %ymm0
+; AVX512VL-NEXT: retq
+;
+; AVX512BW-LABEL: splatvar_funnnel_v16i16:
+; AVX512BW: # %bb.0:
+; AVX512BW-NEXT: vpbroadcastw %xmm1, %xmm1
+; AVX512BW-NEXT: vpand {{.*}}(%rip), %xmm1, %xmm1
+; AVX512BW-NEXT: vpmovzxwq {{.*#+}} xmm2 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
+; AVX512BW-NEXT: vpsllw %xmm2, %ymm0, %ymm2
+; AVX512BW-NEXT: vmovdqa {{.*#+}} xmm3 = [16,16,16,16,16,16,16,16]
+; AVX512BW-NEXT: vpsubw %xmm1, %xmm3, %xmm1
+; AVX512BW-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
+; AVX512BW-NEXT: vpsrlw %xmm1, %ymm0, %ymm0
+; AVX512BW-NEXT: vpor %ymm0, %ymm2, %ymm0
+; AVX512BW-NEXT: retq
+;
+; AVX512VLBW-LABEL: splatvar_funnnel_v16i16:
+; AVX512VLBW: # %bb.0:
+; AVX512VLBW-NEXT: vpbroadcastw %xmm1, %xmm1
+; AVX512VLBW-NEXT: vpand {{.*}}(%rip), %xmm1, %xmm1
+; AVX512VLBW-NEXT: vpmovzxwq {{.*#+}} xmm2 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
+; AVX512VLBW-NEXT: vpsllw %xmm2, %ymm0, %ymm2
+; AVX512VLBW-NEXT: vmovdqa {{.*#+}} xmm3 = [16,16,16,16,16,16,16,16]
+; AVX512VLBW-NEXT: vpsubw %xmm1, %xmm3, %xmm1
+; AVX512VLBW-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
+; AVX512VLBW-NEXT: vpsrlw %xmm1, %ymm0, %ymm0
+; AVX512VLBW-NEXT: vpor %ymm0, %ymm2, %ymm0
+; AVX512VLBW-NEXT: retq
+;
+; AVX512VBMI2-LABEL: splatvar_funnnel_v16i16:
+; AVX512VBMI2: # %bb.0:
+; AVX512VBMI2-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
+; AVX512VBMI2-NEXT: vpbroadcastw %xmm1, %ymm1
+; AVX512VBMI2-NEXT: vpshldvw %zmm1, %zmm0, %zmm0
+; AVX512VBMI2-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
+; AVX512VBMI2-NEXT: retq
+;
+; AVX512VLVBMI2-LABEL: splatvar_funnnel_v16i16:
+; AVX512VLVBMI2: # %bb.0:
+; AVX512VLVBMI2-NEXT: vpbroadcastw %xmm1, %ymm1
+; AVX512VLVBMI2-NEXT: vpshldvw %ymm1, %ymm0, %ymm0
+; AVX512VLVBMI2-NEXT: retq
;
; XOPAVX1-LABEL: splatvar_funnnel_v16i16:
; XOPAVX1: # %bb.0:
@@ -1280,18 +1325,14 @@ define <16 x i16> @constant_funnnel_v16i16(<16 x i16> %x) nounwind {
; AVX512VBMI2-LABEL: constant_funnnel_v16i16:
; AVX512VBMI2: # %bb.0:
; AVX512VBMI2-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
-; AVX512VBMI2-NEXT: vmovdqa {{.*#+}} ymm1 = [16,15,14,13,12,11,10,9,8,7,6,5,4,3,2,1]
-; AVX512VBMI2-NEXT: vpsrlvw %zmm1, %zmm0, %zmm1
-; AVX512VBMI2-NEXT: vmovdqa {{.*#+}} ymm2 = [0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
-; AVX512VBMI2-NEXT: vpsllvw %zmm2, %zmm0, %zmm0
-; AVX512VBMI2-NEXT: vpor %ymm1, %ymm0, %ymm0
+; AVX512VBMI2-NEXT: vmovdqa {{.*#+}} ymm1 = [0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
+; AVX512VBMI2-NEXT: vpshldvw %zmm1, %zmm0, %zmm0
+; AVX512VBMI2-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
; AVX512VBMI2-NEXT: retq
;
; AVX512VLVBMI2-LABEL: constant_funnnel_v16i16:
; AVX512VLVBMI2: # %bb.0:
-; AVX512VLVBMI2-NEXT: vpsrlvw {{.*}}(%rip), %ymm0, %ymm1
-; AVX512VLVBMI2-NEXT: vpsllvw {{.*}}(%rip), %ymm0, %ymm0
-; AVX512VLVBMI2-NEXT: vpor %ymm1, %ymm0, %ymm0
+; AVX512VLVBMI2-NEXT: vpshldvw {{.*}}(%rip), %ymm0, %ymm0
; AVX512VLVBMI2-NEXT: retq
;
; XOPAVX1-LABEL: constant_funnnel_v16i16:
@@ -1660,12 +1701,45 @@ define <16 x i16> @splatconstant_funnnel_v16i16(<16 x i16> %x) nounwind {
; AVX2-NEXT: vpor %ymm1, %ymm0, %ymm0
; AVX2-NEXT: retq
;
-; AVX512-LABEL: splatconstant_funnnel_v16i16:
-; AVX512: # %bb.0:
-; AVX512-NEXT: vpsrlw $9, %ymm0, %ymm1
-; AVX512-NEXT: vpsllw $7, %ymm0, %ymm0
-; AVX512-NEXT: vpor %ymm1, %ymm0, %ymm0
-; AVX512-NEXT: retq
+; AVX512F-LABEL: splatconstant_funnnel_v16i16:
+; AVX512F: # %bb.0:
+; AVX512F-NEXT: vpsrlw $9, %ymm0, %ymm1
+; AVX512F-NEXT: vpsllw $7, %ymm0, %ymm0
+; AVX512F-NEXT: vpor %ymm1, %ymm0, %ymm0
+; AVX512F-NEXT: retq
+;
+; AVX512VL-LABEL: splatconstant_funnnel_v16i16:
+; AVX512VL: # %bb.0:
+; AVX512VL-NEXT: vpsrlw $9, %ymm0, %ymm1
+; AVX512VL-NEXT: vpsllw $7, %ymm0, %ymm0
+; AVX512VL-NEXT: vpor %ymm1, %ymm0, %ymm0
+; AVX512VL-NEXT: retq
+;
+; AVX512BW-LABEL: splatconstant_funnnel_v16i16:
+; AVX512BW: # %bb.0:
+; AVX512BW-NEXT: vpsrlw $9, %ymm0, %ymm1
+; AVX512BW-NEXT: vpsllw $7, %ymm0, %ymm0
+; AVX512BW-NEXT: vpor %ymm1, %ymm0, %ymm0
+; AVX512BW-NEXT: retq
+;
+; AVX512VLBW-LABEL: splatconstant_funnnel_v16i16:
+; AVX512VLBW: # %bb.0:
+; AVX512VLBW-NEXT: vpsrlw $9, %ymm0, %ymm1
+; AVX512VLBW-NEXT: vpsllw $7, %ymm0, %ymm0
+; AVX512VLBW-NEXT: vpor %ymm1, %ymm0, %ymm0
+; AVX512VLBW-NEXT: retq
+;
+; AVX512VBMI2-LABEL: splatconstant_funnnel_v16i16:
+; AVX512VBMI2: # %bb.0:
+; AVX512VBMI2-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
+; AVX512VBMI2-NEXT: vpshldw $7, %zmm0, %zmm0, %zmm0
+; AVX512VBMI2-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
+; AVX512VBMI2-NEXT: retq
+;
+; AVX512VLVBMI2-LABEL: splatconstant_funnnel_v16i16:
+; AVX512VLVBMI2: # %bb.0:
+; AVX512VLVBMI2-NEXT: vpshldw $7, %ymm0, %ymm0, %ymm0
+; AVX512VLVBMI2-NEXT: retq
;
; XOPAVX1-LABEL: splatconstant_funnnel_v16i16:
; XOPAVX1: # %bb.0:
diff --git a/llvm/test/CodeGen/X86/vector-fshr-rot-128.ll b/llvm/test/CodeGen/X86/vector-fshr-rot-128.ll
index 8da9d47f9b06..c2ea45d6013c 100644
--- a/llvm/test/CodeGen/X86/vector-fshr-rot-128.ll
+++ b/llvm/test/CodeGen/X86/vector-fshr-rot-128.ll
@@ -472,12 +472,8 @@ define <8 x i16> @var_funnnel_v8i16(<8 x i16> %x, <8 x i16> %amt) nounwind {
; AVX512VBMI2-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
; AVX512VBMI2-NEXT: vpxor %xmm2, %xmm2, %xmm2
; AVX512VBMI2-NEXT: vpsubw %xmm1, %xmm2, %xmm1
-; AVX512VBMI2-NEXT: vpand {{.*}}(%rip), %xmm1, %xmm1
-; AVX512VBMI2-NEXT: vpsllvw %zmm1, %zmm0, %zmm2
-; AVX512VBMI2-NEXT: vmovdqa {{.*#+}} xmm3 = [16,16,16,16,16,16,16,16]
-; AVX512VBMI2-NEXT: vpsubw %xmm1, %xmm3, %xmm1
-; AVX512VBMI2-NEXT: vpsrlvw %zmm1, %zmm0, %zmm0
-; AVX512VBMI2-NEXT: vpor %xmm0, %xmm2, %xmm0
+; AVX512VBMI2-NEXT: vpshldvw %zmm1, %zmm0, %zmm0
+; AVX512VBMI2-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
; AVX512VBMI2-NEXT: vzeroupper
; AVX512VBMI2-NEXT: retq
;
@@ -485,12 +481,7 @@ define <8 x i16> @var_funnnel_v8i16(<8 x i16> %x, <8 x i16> %amt) nounwind {
; AVX512VLVBMI2: # %bb.0:
; AVX512VLVBMI2-NEXT: vpxor %xmm2, %xmm2, %xmm2
; AVX512VLVBMI2-NEXT: vpsubw %xmm1, %xmm2, %xmm1
-; AVX512VLVBMI2-NEXT: vpand {{.*}}(%rip), %xmm1, %xmm1
-; AVX512VLVBMI2-NEXT: vpsllvw %xmm1, %xmm0, %xmm2
-; AVX512VLVBMI2-NEXT: vmovdqa {{.*#+}} xmm3 = [16,16,16,16,16,16,16,16]
-; AVX512VLVBMI2-NEXT: vpsubw %xmm1, %xmm3, %xmm1
-; AVX512VLVBMI2-NEXT: vpsrlvw %xmm1, %xmm0, %xmm0
-; AVX512VLVBMI2-NEXT: vpor %xmm0, %xmm2, %xmm0
+; AVX512VLVBMI2-NEXT: vpshldvw %xmm1, %xmm0, %xmm0
; AVX512VLVBMI2-NEXT: retq
;
; XOP-LABEL: var_funnnel_v8i16:
@@ -1099,19 +1090,80 @@ define <8 x i16> @splatvar_funnnel_v8i16(<8 x i16> %x, <8 x i16> %amt) nounwind
; AVX-NEXT: vpor %xmm0, %xmm2, %xmm0
; AVX-NEXT: retq
;
-; AVX512-LABEL: splatvar_funnnel_v8i16:
-; AVX512: # %bb.0:
-; AVX512-NEXT: vpxor %xmm2, %xmm2, %xmm2
-; AVX512-NEXT: vpsubw %xmm1, %xmm2, %xmm1
-; AVX512-NEXT: vpand {{.*}}(%rip), %xmm1, %xmm1
-; AVX512-NEXT: vpmovzxwq {{.*#+}} xmm2 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
-; AVX512-NEXT: vpsllw %xmm2, %xmm0, %xmm2
-; AVX512-NEXT: vmovdqa {{.*#+}} xmm3 = [16,16,16,16,16,16,16,16]
-; AVX512-NEXT: vpsubw %xmm1, %xmm3, %xmm1
-; AVX512-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
-; AVX512-NEXT: vpsrlw %xmm1, %xmm0, %xmm0
-; AVX512-NEXT: vpor %xmm0, %xmm2, %xmm0
-; AVX512-NEXT: retq
+; AVX512F-LABEL: splatvar_funnnel_v8i16:
+; AVX512F: # %bb.0:
+; AVX512F-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; AVX512F-NEXT: vpsubw %xmm1, %xmm2, %xmm1
+; AVX512F-NEXT: vpand {{.*}}(%rip), %xmm1, %xmm1
+; AVX512F-NEXT: vpmovzxwq {{.*#+}} xmm2 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
+; AVX512F-NEXT: vpsllw %xmm2, %xmm0, %xmm2
+; AVX512F-NEXT: vmovdqa {{.*#+}} xmm3 = [16,16,16,16,16,16,16,16]
+; AVX512F-NEXT: vpsubw %xmm1, %xmm3, %xmm1
+; AVX512F-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
+; AVX512F-NEXT: vpsrlw %xmm1, %xmm0, %xmm0
+; AVX512F-NEXT: vpor %xmm0, %xmm2, %xmm0
+; AVX512F-NEXT: retq
+;
+; AVX512VL-LABEL: splatvar_funnnel_v8i16:
+; AVX512VL: # %bb.0:
+; AVX512VL-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; AVX512VL-NEXT: vpsubw %xmm1, %xmm2, %xmm1
+; AVX512VL-NEXT: vpand {{.*}}(%rip), %xmm1, %xmm1
+; AVX512VL-NEXT: vpmovzxwq {{.*#+}} xmm2 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
+; AVX512VL-NEXT: vpsllw %xmm2, %xmm0, %xmm2
+; AVX512VL-NEXT: vmovdqa {{.*#+}} xmm3 = [16,16,16,16,16,16,16,16]
+; AVX512VL-NEXT: vpsubw %xmm1, %xmm3, %xmm1
+; AVX512VL-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
+; AVX512VL-NEXT: vpsrlw %xmm1, %xmm0, %xmm0
+; AVX512VL-NEXT: vpor %xmm0, %xmm2, %xmm0
+; AVX512VL-NEXT: retq
+;
+; AVX512BW-LABEL: splatvar_funnnel_v8i16:
+; AVX512BW: # %bb.0:
+; AVX512BW-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; AVX512BW-NEXT: vpsubw %xmm1, %xmm2, %xmm1
+; AVX512BW-NEXT: vpand {{.*}}(%rip), %xmm1, %xmm1
+; AVX512BW-NEXT: vpmovzxwq {{.*#+}} xmm2 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
+; AVX512BW-NEXT: vpsllw %xmm2, %xmm0, %xmm2
+; AVX512BW-NEXT: vmovdqa {{.*#+}} xmm3 = [16,16,16,16,16,16,16,16]
+; AVX512BW-NEXT: vpsubw %xmm1, %xmm3, %xmm1
+; AVX512BW-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
+; AVX512BW-NEXT: vpsrlw %xmm1, %xmm0, %xmm0
+; AVX512BW-NEXT: vpor %xmm0, %xmm2, %xmm0
+; AVX512BW-NEXT: retq
+;
+; AVX512VLBW-LABEL: splatvar_funnnel_v8i16:
+; AVX512VLBW: # %bb.0:
+; AVX512VLBW-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; AVX512VLBW-NEXT: vpsubw %xmm1, %xmm2, %xmm1
+; AVX512VLBW-NEXT: vpand {{.*}}(%rip), %xmm1, %xmm1
+; AVX512VLBW-NEXT: vpmovzxwq {{.*#+}} xmm2 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
+; AVX512VLBW-NEXT: vpsllw %xmm2, %xmm0, %xmm2
+; AVX512VLBW-NEXT: vmovdqa {{.*#+}} xmm3 = [16,16,16,16,16,16,16,16]
+; AVX512VLBW-NEXT: vpsubw %xmm1, %xmm3, %xmm1
+; AVX512VLBW-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
+; AVX512VLBW-NEXT: vpsrlw %xmm1, %xmm0, %xmm0
+; AVX512VLBW-NEXT: vpor %xmm0, %xmm2, %xmm0
+; AVX512VLBW-NEXT: retq
+;
+; AVX512VBMI2-LABEL: splatvar_funnnel_v8i16:
+; AVX512VBMI2: # %bb.0:
+; AVX512VBMI2-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
+; AVX512VBMI2-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; AVX512VBMI2-NEXT: vpsubw %xmm1, %xmm2, %xmm1
+; AVX512VBMI2-NEXT: vpbroadcastw %xmm1, %xmm1
+; AVX512VBMI2-NEXT: vpshldvw %zmm1, %zmm0, %zmm0
+; AVX512VBMI2-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
+; AVX512VBMI2-NEXT: vzeroupper
+; AVX512VBMI2-NEXT: retq
+;
+; AVX512VLVBMI2-LABEL: splatvar_funnnel_v8i16:
+; AVX512VLVBMI2: # %bb.0:
+; AVX512VLVBMI2-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; AVX512VLVBMI2-NEXT: vpsubw %xmm1, %xmm2, %xmm1
+; AVX512VLVBMI2-NEXT: vpbroadcastw %xmm1, %xmm1
+; AVX512VLVBMI2-NEXT: vpshldvw %xmm1, %xmm0, %xmm0
+; AVX512VLVBMI2-NEXT: retq
;
; XOPAVX1-LABEL: splatvar_funnnel_v8i16:
; XOPAVX1: # %bb.0:
@@ -1702,19 +1754,15 @@ define <8 x i16> @constant_funnnel_v8i16(<8 x i16> %x) nounwind {
; AVX512VBMI2-LABEL: constant_funnnel_v8i16:
; AVX512VBMI2: # %bb.0:
; AVX512VBMI2-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
-; AVX512VBMI2-NEXT: vmovdqa {{.*#+}} xmm1 = [16,1,2,3,4,5,6,7]
-; AVX512VBMI2-NEXT: vpsrlvw %zmm1, %zmm0, %zmm1
-; AVX512VBMI2-NEXT: vmovdqa {{.*#+}} xmm2 = [0,15,14,13,12,11,10,9]
-; AVX512VBMI2-NEXT: vpsllvw %zmm2, %zmm0, %zmm0
-; AVX512VBMI2-NEXT: vpor %xmm1, %xmm0, %xmm0
+; AVX512VBMI2-NEXT: vmovdqa {{.*#+}} xmm1 = [0,65535,65534,65533,65532,65531,65530,65529]
+; AVX512VBMI2-NEXT: vpshldvw %zmm1, %zmm0, %zmm0
+; AVX512VBMI2-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
; AVX512VBMI2-NEXT: vzeroupper
; AVX512VBMI2-NEXT: retq
;
; AVX512VLVBMI2-LABEL: constant_funnnel_v8i16:
; AVX512VLVBMI2: # %bb.0:
-; AVX512VLVBMI2-NEXT: vpsrlvw {{.*}}(%rip), %xmm0, %xmm1
-; AVX512VLVBMI2-NEXT: vpsllvw {{.*}}(%rip), %xmm0, %xmm0
-; AVX512VLVBMI2-NEXT: vpor %xmm1, %xmm0, %xmm0
+; AVX512VLVBMI2-NEXT: vpshldvw {{.*}}(%rip), %xmm0, %xmm0
; AVX512VLVBMI2-NEXT: retq
;
; XOP-LABEL: constant_funnnel_v8i16:
@@ -2079,12 +2127,46 @@ define <8 x i16> @splatconstant_funnnel_v8i16(<8 x i16> %x) nounwind {
; AVX-NEXT: vpor %xmm1, %xmm0, %xmm0
; AVX-NEXT: retq
;
-; AVX512-LABEL: splatconstant_funnnel_v8i16:
-; AVX512: # %bb.0:
-; AVX512-NEXT: vpsrlw $7, %xmm0, %xmm1
-; AVX512-NEXT: vpsllw $9, %xmm0, %xmm0
-; AVX512-NEXT: vpor %xmm1, %xmm0, %xmm0
-; AVX512-NEXT: retq
+; AVX512F-LABEL: splatconstant_funnnel_v8i16:
+; AVX512F: # %bb.0:
+; AVX512F-NEXT: vpsrlw $7, %xmm0, %xmm1
+; AVX512F-NEXT: vpsllw $9, %xmm0, %xmm0
+; AVX512F-NEXT: vpor %xmm1, %xmm0, %xmm0
+; AVX512F-NEXT: retq
+;
+; AVX512VL-LABEL: splatconstant_funnnel_v8i16:
+; AVX512VL: # %bb.0:
+; AVX512VL-NEXT: vpsrlw $7, %xmm0, %xmm1
+; AVX512VL-NEXT: vpsllw $9, %xmm0, %xmm0
+; AVX512VL-NEXT: vpor %xmm1, %xmm0, %xmm0
+; AVX512VL-NEXT: retq
+;
+; AVX512BW-LABEL: splatconstant_funnnel_v8i16:
+; AVX512BW: # %bb.0:
+; AVX512BW-NEXT: vpsrlw $7, %xmm0, %xmm1
+; AVX512BW-NEXT: vpsllw $9, %xmm0, %xmm0
+; AVX512BW-NEXT: vpor %xmm1, %xmm0, %xmm0
+; AVX512BW-NEXT: retq
+;
+; AVX512VLBW-LABEL: splatconstant_funnnel_v8i16:
+; AVX512VLBW: # %bb.0:
+; AVX512VLBW-NEXT: vpsrlw $7, %xmm0, %xmm1
+; AVX512VLBW-NEXT: vpsllw $9, %xmm0, %xmm0
+; AVX512VLBW-NEXT: vpor %xmm1, %xmm0, %xmm0
+; AVX512VLBW-NEXT: retq
+;
+; AVX512VBMI2-LABEL: splatconstant_funnnel_v8i16:
+; AVX512VBMI2: # %bb.0:
+; AVX512VBMI2-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
+; AVX512VBMI2-NEXT: vpshldw $9, %zmm0, %zmm0, %zmm0
+; AVX512VBMI2-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
+; AVX512VBMI2-NEXT: vzeroupper
+; AVX512VBMI2-NEXT: retq
+;
+; AVX512VLVBMI2-LABEL: splatconstant_funnnel_v8i16:
+; AVX512VLVBMI2: # %bb.0:
+; AVX512VLVBMI2-NEXT: vpshldw $9, %xmm0, %xmm0, %xmm0
+; AVX512VLVBMI2-NEXT: retq
;
; XOP-LABEL: splatconstant_funnnel_v8i16:
; XOP: # %bb.0:
diff --git a/llvm/test/CodeGen/X86/vector-fshr-rot-256.ll b/llvm/test/CodeGen/X86/vector-fshr-rot-256.ll
index 856ac9468e14..ad6214413f66 100644
--- a/llvm/test/CodeGen/X86/vector-fshr-rot-256.ll
+++ b/llvm/test/CodeGen/X86/vector-fshr-rot-256.ll
@@ -374,24 +374,15 @@ define <16 x i16> @var_funnnel_v16i16(<16 x i16> %x, <16 x i16> %amt) nounwind {
; AVX512VBMI2-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
; AVX512VBMI2-NEXT: vpxor %xmm2, %xmm2, %xmm2
; AVX512VBMI2-NEXT: vpsubw %ymm1, %ymm2, %ymm1
-; AVX512VBMI2-NEXT: vpand {{.*}}(%rip), %ymm1, %ymm1
-; AVX512VBMI2-NEXT: vpsllvw %zmm1, %zmm0, %zmm2
-; AVX512VBMI2-NEXT: vmovdqa {{.*#+}} ymm3 = [16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16]
-; AVX512VBMI2-NEXT: vpsubw %ymm1, %ymm3, %ymm1
-; AVX512VBMI2-NEXT: vpsrlvw %zmm1, %zmm0, %zmm0
-; AVX512VBMI2-NEXT: vpor %ymm0, %ymm2, %ymm0
+; AVX512VBMI2-NEXT: vpshldvw %zmm1, %zmm0, %zmm0
+; AVX512VBMI2-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
; AVX512VBMI2-NEXT: retq
;
; AVX512VLVBMI2-LABEL: var_funnnel_v16i16:
; AVX512VLVBMI2: # %bb.0:
; AVX512VLVBMI2-NEXT: vpxor %xmm2, %xmm2, %xmm2
; AVX512VLVBMI2-NEXT: vpsubw %ymm1, %ymm2, %ymm1
-; AVX512VLVBMI2-NEXT: vpand {{.*}}(%rip), %ymm1, %ymm1
-; AVX512VLVBMI2-NEXT: vpsllvw %ymm1, %ymm0, %ymm2
-; AVX512VLVBMI2-NEXT: vmovdqa {{.*#+}} ymm3 = [16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16]
-; AVX512VLVBMI2-NEXT: vpsubw %ymm1, %ymm3, %ymm1
-; AVX512VLVBMI2-NEXT: vpsrlvw %ymm1, %ymm0, %ymm0
-; AVX512VLVBMI2-NEXT: vpor %ymm0, %ymm2, %ymm0
+; AVX512VLVBMI2-NEXT: vpshldvw %ymm1, %ymm0, %ymm0
; AVX512VLVBMI2-NEXT: retq
;
; XOPAVX1-LABEL: var_funnnel_v16i16:
@@ -887,20 +878,83 @@ define <16 x i16> @splatvar_funnnel_v16i16(<16 x i16> %x, <16 x i16> %amt) nounw
; AVX2-NEXT: vpor %ymm0, %ymm2, %ymm0
; AVX2-NEXT: retq
;
-; AVX512-LABEL: splatvar_funnnel_v16i16:
-; AVX512: # %bb.0:
-; AVX512-NEXT: vpbroadcastw %xmm1, %xmm1
-; AVX512-NEXT: vpxor %xmm2, %xmm2, %xmm2
-; AVX512-NEXT: vpsubw %xmm1, %xmm2, %xmm1
-; AVX512-NEXT: vpand {{.*}}(%rip), %xmm1, %xmm1
-; AVX512-NEXT: vpmovzxwq {{.*#+}} xmm2 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
-; AVX512-NEXT: vpsllw %xmm2, %ymm0, %ymm2
-; AVX512-NEXT: vmovdqa {{.*#+}} xmm3 = [16,16,16,16,16,16,16,16]
-; AVX512-NEXT: vpsubw %xmm1, %xmm3, %xmm1
-; AVX512-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
-; AVX512-NEXT: vpsrlw %xmm1, %ymm0, %ymm0
-; AVX512-NEXT: vpor %ymm0, %ymm2, %ymm0
-; AVX512-NEXT: retq
+; AVX512F-LABEL: splatvar_funnnel_v16i16:
+; AVX512F: # %bb.0:
+; AVX512F-NEXT: vpbroadcastw %xmm1, %xmm1
+; AVX512F-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; AVX512F-NEXT: vpsubw %xmm1, %xmm2, %xmm1
+; AVX512F-NEXT: vpand {{.*}}(%rip), %xmm1, %xmm1
+; AVX512F-NEXT: vpmovzxwq {{.*#+}} xmm2 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
+; AVX512F-NEXT: vpsllw %xmm2, %ymm0, %ymm2
+; AVX512F-NEXT: vmovdqa {{.*#+}} xmm3 = [16,16,16,16,16,16,16,16]
+; AVX512F-NEXT: vpsubw %xmm1, %xmm3, %xmm1
+; AVX512F-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
+; AVX512F-NEXT: vpsrlw %xmm1, %ymm0, %ymm0
+; AVX512F-NEXT: vpor %ymm0, %ymm2, %ymm0
+; AVX512F-NEXT: retq
+;
+; AVX512VL-LABEL: splatvar_funnnel_v16i16:
+; AVX512VL: # %bb.0:
+; AVX512VL-NEXT: vpbroadcastw %xmm1, %xmm1
+; AVX512VL-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; AVX512VL-NEXT: vpsubw %xmm1, %xmm2, %xmm1
+; AVX512VL-NEXT: vpand {{.*}}(%rip), %xmm1, %xmm1
+; AVX512VL-NEXT: vpmovzxwq {{.*#+}} xmm2 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
+; AVX512VL-NEXT: vpsllw %xmm2, %ymm0, %ymm2
+; AVX512VL-NEXT: vmovdqa {{.*#+}} xmm3 = [16,16,16,16,16,16,16,16]
+; AVX512VL-NEXT: vpsubw %xmm1, %xmm3, %xmm1
+; AVX512VL-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
+; AVX512VL-NEXT: vpsrlw %xmm1, %ymm0, %ymm0
+; AVX512VL-NEXT: vpor %ymm0, %ymm2, %ymm0
+; AVX512VL-NEXT: retq
+;
+; AVX512BW-LABEL: splatvar_funnnel_v16i16:
+; AVX512BW: # %bb.0:
+; AVX512BW-NEXT: vpbroadcastw %xmm1, %xmm1
+; AVX512BW-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; AVX512BW-NEXT: vpsubw %xmm1, %xmm2, %xmm1
+; AVX512BW-NEXT: vpand {{.*}}(%rip), %xmm1, %xmm1
+; AVX512BW-NEXT: vpmovzxwq {{.*#+}} xmm2 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
+; AVX512BW-NEXT: vpsllw %xmm2, %ymm0, %ymm2
+; AVX512BW-NEXT: vmovdqa {{.*#+}} xmm3 = [16,16,16,16,16,16,16,16]
+; AVX512BW-NEXT: vpsubw %xmm1, %xmm3, %xmm1
+; AVX512BW-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
+; AVX512BW-NEXT: vpsrlw %xmm1, %ymm0, %ymm0
+; AVX512BW-NEXT: vpor %ymm0, %ymm2, %ymm0
+; AVX512BW-NEXT: retq
+;
+; AVX512VLBW-LABEL: splatvar_funnnel_v16i16:
+; AVX512VLBW: # %bb.0:
+; AVX512VLBW-NEXT: vpbroadcastw %xmm1, %xmm1
+; AVX512VLBW-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; AVX512VLBW-NEXT: vpsubw %xmm1, %xmm2, %xmm1
+; AVX512VLBW-NEXT: vpand {{.*}}(%rip), %xmm1, %xmm1
+; AVX512VLBW-NEXT: vpmovzxwq {{.*#+}} xmm2 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
+; AVX512VLBW-NEXT: vpsllw %xmm2, %ymm0, %ymm2
+; AVX512VLBW-NEXT: vmovdqa {{.*#+}} xmm3 = [16,16,16,16,16,16,16,16]
+; AVX512VLBW-NEXT: vpsubw %xmm1, %xmm3, %xmm1
+; AVX512VLBW-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
+; AVX512VLBW-NEXT: vpsrlw %xmm1, %ymm0, %ymm0
+; AVX512VLBW-NEXT: vpor %ymm0, %ymm2, %ymm0
+; AVX512VLBW-NEXT: retq
+;
+; AVX512VBMI2-LABEL: splatvar_funnnel_v16i16:
+; AVX512VBMI2: # %bb.0:
+; AVX512VBMI2-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
+; AVX512VBMI2-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; AVX512VBMI2-NEXT: vpsubw %xmm1, %xmm2, %xmm1
+; AVX512VBMI2-NEXT: vpbroadcastw %xmm1, %ymm1
+; AVX512VBMI2-NEXT: vpshldvw %zmm1, %zmm0, %zmm0
+; AVX512VBMI2-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
+; AVX512VBMI2-NEXT: retq
+;
+; AVX512VLVBMI2-LABEL: splatvar_funnnel_v16i16:
+; AVX512VLVBMI2: # %bb.0:
+; AVX512VLVBMI2-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; AVX512VLVBMI2-NEXT: vpsubw %xmm1, %xmm2, %xmm1
+; AVX512VLVBMI2-NEXT: vpbroadcastw %xmm1, %ymm1
+; AVX512VLVBMI2-NEXT: vpshldvw %ymm1, %ymm0, %ymm0
+; AVX512VLVBMI2-NEXT: retq
;
; XOPAVX1-LABEL: splatvar_funnnel_v16i16:
; XOPAVX1: # %bb.0:
@@ -1362,18 +1416,14 @@ define <16 x i16> @constant_funnnel_v16i16(<16 x i16> %x) nounwind {
; AVX512VBMI2-LABEL: constant_funnnel_v16i16:
; AVX512VBMI2: # %bb.0:
; AVX512VBMI2-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
-; AVX512VBMI2-NEXT: vmovdqa {{.*#+}} ymm1 = [16,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
-; AVX512VBMI2-NEXT: vpsrlvw %zmm1, %zmm0, %zmm1
-; AVX512VBMI2-NEXT: vmovdqa {{.*#+}} ymm2 = [0,15,14,13,12,11,10,9,8,7,6,5,4,3,2,1]
-; AVX512VBMI2-NEXT: vpsllvw %zmm2, %zmm0, %zmm0
-; AVX512VBMI2-NEXT: vpor %ymm1, %ymm0, %ymm0
+; AVX512VBMI2-NEXT: vmovdqa {{.*#+}} ymm1 = [0,65535,65534,65533,65532,65531,65530,65529,65528,65527,65526,65525,65524,65523,65522,65521]
+; AVX512VBMI2-NEXT: vpshldvw %zmm1, %zmm0, %zmm0
+; AVX512VBMI2-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
; AVX512VBMI2-NEXT: retq
;
; AVX512VLVBMI2-LABEL: constant_funnnel_v16i16:
; AVX512VLVBMI2: # %bb.0:
-; AVX512VLVBMI2-NEXT: vpsrlvw {{.*}}(%rip), %ymm0, %ymm1
-; AVX512VLVBMI2-NEXT: vpsllvw {{.*}}(%rip), %ymm0, %ymm0
-; AVX512VLVBMI2-NEXT: vpor %ymm1, %ymm0, %ymm0
+; AVX512VLVBMI2-NEXT: vpshldvw {{.*}}(%rip), %ymm0, %ymm0
; AVX512VLVBMI2-NEXT: retq
;
; XOPAVX1-LABEL: constant_funnnel_v16i16:
@@ -1742,12 +1792,45 @@ define <16 x i16> @splatconstant_funnnel_v16i16(<16 x i16> %x) nounwind {
; AVX2-NEXT: vpor %ymm1, %ymm0, %ymm0
; AVX2-NEXT: retq
;
-; AVX512-LABEL: splatconstant_funnnel_v16i16:
-; AVX512: # %bb.0:
-; AVX512-NEXT: vpsrlw $7, %ymm0, %ymm1
-; AVX512-NEXT: vpsllw $9, %ymm0, %ymm0
-; AVX512-NEXT: vpor %ymm1, %ymm0, %ymm0
-; AVX512-NEXT: retq
+; AVX512F-LABEL: splatconstant_funnnel_v16i16:
+; AVX512F: # %bb.0:
+; AVX512F-NEXT: vpsrlw $7, %ymm0, %ymm1
+; AVX512F-NEXT: vpsllw $9, %ymm0, %ymm0
+; AVX512F-NEXT: vpor %ymm1, %ymm0, %ymm0
+; AVX512F-NEXT: retq
+;
+; AVX512VL-LABEL: splatconstant_funnnel_v16i16:
+; AVX512VL: # %bb.0:
+; AVX512VL-NEXT: vpsrlw $7, %ymm0, %ymm1
+; AVX512VL-NEXT: vpsllw $9, %ymm0, %ymm0
+; AVX512VL-NEXT: vpor %ymm1, %ymm0, %ymm0
+; AVX512VL-NEXT: retq
+;
+; AVX512BW-LABEL: splatconstant_funnnel_v16i16:
+; AVX512BW: # %bb.0:
+; AVX512BW-NEXT: vpsrlw $7, %ymm0, %ymm1
+; AVX512BW-NEXT: vpsllw $9, %ymm0, %ymm0
+; AVX512BW-NEXT: vpor %ymm1, %ymm0, %ymm0
+; AVX512BW-NEXT: retq
+;
+; AVX512VLBW-LABEL: splatconstant_funnnel_v16i16:
+; AVX512VLBW: # %bb.0:
+; AVX512VLBW-NEXT: vpsrlw $7, %ymm0, %ymm1
+; AVX512VLBW-NEXT: vpsllw $9, %ymm0, %ymm0
+; AVX512VLBW-NEXT: vpor %ymm1, %ymm0, %ymm0
+; AVX512VLBW-NEXT: retq
+;
+; AVX512VBMI2-LABEL: splatconstant_funnnel_v16i16:
+; AVX512VBMI2: # %bb.0:
+; AVX512VBMI2-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
+; AVX512VBMI2-NEXT: vpshldw $9, %zmm0, %zmm0, %zmm0
+; AVX512VBMI2-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
+; AVX512VBMI2-NEXT: retq
+;
+; AVX512VLVBMI2-LABEL: splatconstant_funnnel_v16i16:
+; AVX512VLVBMI2: # %bb.0:
+; AVX512VLVBMI2-NEXT: vpshldw $9, %ymm0, %ymm0, %ymm0
+; AVX512VLVBMI2-NEXT: retq
;
; XOPAVX1-LABEL: splatconstant_funnnel_v16i16:
; XOPAVX1: # %bb.0:
diff --git a/llvm/test/CodeGen/X86/vector-rotate-128.ll b/llvm/test/CodeGen/X86/vector-rotate-128.ll
index fced1af3b793..a00df716657b 100644
--- a/llvm/test/CodeGen/X86/vector-rotate-128.ll
+++ b/llvm/test/CodeGen/X86/vector-rotate-128.ll
@@ -425,24 +425,16 @@ define <8 x i16> @var_rotate_v8i16(<8 x i16> %a, <8 x i16> %b) nounwind {
;
; AVX512VBMI2-LABEL: var_rotate_v8i16:
; AVX512VBMI2: # %bb.0:
+; AVX512VBMI2-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
; AVX512VBMI2-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
-; AVX512VBMI2-NEXT: vpand {{.*}}(%rip), %xmm1, %xmm1
-; AVX512VBMI2-NEXT: vpsllvw %zmm1, %zmm0, %zmm2
-; AVX512VBMI2-NEXT: vmovdqa {{.*#+}} xmm3 = [16,16,16,16,16,16,16,16]
-; AVX512VBMI2-NEXT: vpsubw %xmm1, %xmm3, %xmm1
-; AVX512VBMI2-NEXT: vpsrlvw %zmm1, %zmm0, %zmm0
-; AVX512VBMI2-NEXT: vpor %xmm0, %xmm2, %xmm0
+; AVX512VBMI2-NEXT: vpshldvw %zmm1, %zmm0, %zmm0
+; AVX512VBMI2-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
; AVX512VBMI2-NEXT: vzeroupper
; AVX512VBMI2-NEXT: retq
;
; AVX512VLVBMI2-LABEL: var_rotate_v8i16:
; AVX512VLVBMI2: # %bb.0:
-; AVX512VLVBMI2-NEXT: vpand {{.*}}(%rip), %xmm1, %xmm1
-; AVX512VLVBMI2-NEXT: vpsllvw %xmm1, %xmm0, %xmm2
-; AVX512VLVBMI2-NEXT: vmovdqa {{.*#+}} xmm3 = [16,16,16,16,16,16,16,16]
-; AVX512VLVBMI2-NEXT: vpsubw %xmm1, %xmm3, %xmm1
-; AVX512VLVBMI2-NEXT: vpsrlvw %xmm1, %xmm0, %xmm0
-; AVX512VLVBMI2-NEXT: vpor %xmm0, %xmm2, %xmm0
+; AVX512VLVBMI2-NEXT: vpshldvw %xmm1, %xmm0, %xmm0
; AVX512VLVBMI2-NEXT: retq
;
; XOP-LABEL: var_rotate_v8i16:
@@ -1000,17 +992,68 @@ define <8 x i16> @splatvar_rotate_v8i16(<8 x i16> %a, <8 x i16> %b) nounwind {
; AVX-NEXT: vpor %xmm0, %xmm2, %xmm0
; AVX-NEXT: retq
;
-; AVX512-LABEL: splatvar_rotate_v8i16:
-; AVX512: # %bb.0:
-; AVX512-NEXT: vpand {{.*}}(%rip), %xmm1, %xmm1
-; AVX512-NEXT: vpmovzxwq {{.*#+}} xmm2 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
-; AVX512-NEXT: vpsllw %xmm2, %xmm0, %xmm2
-; AVX512-NEXT: vmovdqa {{.*#+}} xmm3 = [16,16,16,16,16,16,16,16]
-; AVX512-NEXT: vpsubw %xmm1, %xmm3, %xmm1
-; AVX512-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
-; AVX512-NEXT: vpsrlw %xmm1, %xmm0, %xmm0
-; AVX512-NEXT: vpor %xmm0, %xmm2, %xmm0
-; AVX512-NEXT: retq
+; AVX512F-LABEL: splatvar_rotate_v8i16:
+; AVX512F: # %bb.0:
+; AVX512F-NEXT: vpand {{.*}}(%rip), %xmm1, %xmm1
+; AVX512F-NEXT: vpmovzxwq {{.*#+}} xmm2 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
+; AVX512F-NEXT: vpsllw %xmm2, %xmm0, %xmm2
+; AVX512F-NEXT: vmovdqa {{.*#+}} xmm3 = [16,16,16,16,16,16,16,16]
+; AVX512F-NEXT: vpsubw %xmm1, %xmm3, %xmm1
+; AVX512F-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
+; AVX512F-NEXT: vpsrlw %xmm1, %xmm0, %xmm0
+; AVX512F-NEXT: vpor %xmm0, %xmm2, %xmm0
+; AVX512F-NEXT: retq
+;
+; AVX512VL-LABEL: splatvar_rotate_v8i16:
+; AVX512VL: # %bb.0:
+; AVX512VL-NEXT: vpand {{.*}}(%rip), %xmm1, %xmm1
+; AVX512VL-NEXT: vpmovzxwq {{.*#+}} xmm2 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
+; AVX512VL-NEXT: vpsllw %xmm2, %xmm0, %xmm2
+; AVX512VL-NEXT: vmovdqa {{.*#+}} xmm3 = [16,16,16,16,16,16,16,16]
+; AVX512VL-NEXT: vpsubw %xmm1, %xmm3, %xmm1
+; AVX512VL-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
+; AVX512VL-NEXT: vpsrlw %xmm1, %xmm0, %xmm0
+; AVX512VL-NEXT: vpor %xmm0, %xmm2, %xmm0
+; AVX512VL-NEXT: retq
+;
+; AVX512BW-LABEL: splatvar_rotate_v8i16:
+; AVX512BW: # %bb.0:
+; AVX512BW-NEXT: vpand {{.*}}(%rip), %xmm1, %xmm1
+; AVX512BW-NEXT: vpmovzxwq {{.*#+}} xmm2 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
+; AVX512BW-NEXT: vpsllw %xmm2, %xmm0, %xmm2
+; AVX512BW-NEXT: vmovdqa {{.*#+}} xmm3 = [16,16,16,16,16,16,16,16]
+; AVX512BW-NEXT: vpsubw %xmm1, %xmm3, %xmm1
+; AVX512BW-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
+; AVX512BW-NEXT: vpsrlw %xmm1, %xmm0, %xmm0
+; AVX512BW-NEXT: vpor %xmm0, %xmm2, %xmm0
+; AVX512BW-NEXT: retq
+;
+; AVX512VLBW-LABEL: splatvar_rotate_v8i16:
+; AVX512VLBW: # %bb.0:
+; AVX512VLBW-NEXT: vpand {{.*}}(%rip), %xmm1, %xmm1
+; AVX512VLBW-NEXT: vpmovzxwq {{.*#+}} xmm2 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
+; AVX512VLBW-NEXT: vpsllw %xmm2, %xmm0, %xmm2
+; AVX512VLBW-NEXT: vmovdqa {{.*#+}} xmm3 = [16,16,16,16,16,16,16,16]
+; AVX512VLBW-NEXT: vpsubw %xmm1, %xmm3, %xmm1
+; AVX512VLBW-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
+; AVX512VLBW-NEXT: vpsrlw %xmm1, %xmm0, %xmm0
+; AVX512VLBW-NEXT: vpor %xmm0, %xmm2, %xmm0
+; AVX512VLBW-NEXT: retq
+;
+; AVX512VBMI2-LABEL: splatvar_rotate_v8i16:
+; AVX512VBMI2: # %bb.0:
+; AVX512VBMI2-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
+; AVX512VBMI2-NEXT: vpbroadcastw %xmm1, %xmm1
+; AVX512VBMI2-NEXT: vpshldvw %zmm1, %zmm0, %zmm0
+; AVX512VBMI2-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
+; AVX512VBMI2-NEXT: vzeroupper
+; AVX512VBMI2-NEXT: retq
+;
+; AVX512VLVBMI2-LABEL: splatvar_rotate_v8i16:
+; AVX512VLVBMI2: # %bb.0:
+; AVX512VLVBMI2-NEXT: vpbroadcastw %xmm1, %xmm1
+; AVX512VLVBMI2-NEXT: vpshldvw %xmm1, %xmm0, %xmm0
+; AVX512VLVBMI2-NEXT: retq
;
; XOPAVX1-LABEL: splatvar_rotate_v8i16:
; XOPAVX1: # %bb.0:
@@ -1570,18 +1613,14 @@ define <8 x i16> @constant_rotate_v8i16(<8 x i16> %a) nounwind {
; AVX512VBMI2: # %bb.0:
; AVX512VBMI2-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
; AVX512VBMI2-NEXT: vmovdqa {{.*#+}} xmm1 = [0,1,2,3,4,5,6,7]
-; AVX512VBMI2-NEXT: vmovdqa {{.*#+}} xmm2 = [16,15,14,13,12,11,10,9]
-; AVX512VBMI2-NEXT: vpsrlvw %zmm2, %zmm0, %zmm2
-; AVX512VBMI2-NEXT: vpsllvw %zmm1, %zmm0, %zmm0
-; AVX512VBMI2-NEXT: vpor %xmm2, %xmm0, %xmm0
+; AVX512VBMI2-NEXT: vpshldvw %zmm1, %zmm0, %zmm0
+; AVX512VBMI2-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
; AVX512VBMI2-NEXT: vzeroupper
; AVX512VBMI2-NEXT: retq
;
; AVX512VLVBMI2-LABEL: constant_rotate_v8i16:
; AVX512VLVBMI2: # %bb.0:
-; AVX512VLVBMI2-NEXT: vpsrlvw {{.*}}(%rip), %xmm0, %xmm1
-; AVX512VLVBMI2-NEXT: vpsllvw {{.*}}(%rip), %xmm0, %xmm0
-; AVX512VLVBMI2-NEXT: vpor %xmm1, %xmm0, %xmm0
+; AVX512VLVBMI2-NEXT: vpshldvw {{.*}}(%rip), %xmm0, %xmm0
; AVX512VLVBMI2-NEXT: retq
;
; XOP-LABEL: constant_rotate_v8i16:
@@ -1952,12 +1991,46 @@ define <8 x i16> @splatconstant_rotate_v8i16(<8 x i16> %a) nounwind {
; AVX-NEXT: vpor %xmm1, %xmm0, %xmm0
; AVX-NEXT: retq
;
-; AVX512-LABEL: splatconstant_rotate_v8i16:
-; AVX512: # %bb.0:
-; AVX512-NEXT: vpsrlw $9, %xmm0, %xmm1
-; AVX512-NEXT: vpsllw $7, %xmm0, %xmm0
-; AVX512-NEXT: vpor %xmm1, %xmm0, %xmm0
-; AVX512-NEXT: retq
+; AVX512F-LABEL: splatconstant_rotate_v8i16:
+; AVX512F: # %bb.0:
+; AVX512F-NEXT: vpsrlw $9, %xmm0, %xmm1
+; AVX512F-NEXT: vpsllw $7, %xmm0, %xmm0
+; AVX512F-NEXT: vpor %xmm1, %xmm0, %xmm0
+; AVX512F-NEXT: retq
+;
+; AVX512VL-LABEL: splatconstant_rotate_v8i16:
+; AVX512VL: # %bb.0:
+; AVX512VL-NEXT: vpsrlw $9, %xmm0, %xmm1
+; AVX512VL-NEXT: vpsllw $7, %xmm0, %xmm0
+; AVX512VL-NEXT: vpor %xmm1, %xmm0, %xmm0
+; AVX512VL-NEXT: retq
+;
+; AVX512BW-LABEL: splatconstant_rotate_v8i16:
+; AVX512BW: # %bb.0:
+; AVX512BW-NEXT: vpsrlw $9, %xmm0, %xmm1
+; AVX512BW-NEXT: vpsllw $7, %xmm0, %xmm0
+; AVX512BW-NEXT: vpor %xmm1, %xmm0, %xmm0
+; AVX512BW-NEXT: retq
+;
+; AVX512VLBW-LABEL: splatconstant_rotate_v8i16:
+; AVX512VLBW: # %bb.0:
+; AVX512VLBW-NEXT: vpsrlw $9, %xmm0, %xmm1
+; AVX512VLBW-NEXT: vpsllw $7, %xmm0, %xmm0
+; AVX512VLBW-NEXT: vpor %xmm1, %xmm0, %xmm0
+; AVX512VLBW-NEXT: retq
+;
+; AVX512VBMI2-LABEL: splatconstant_rotate_v8i16:
+; AVX512VBMI2: # %bb.0:
+; AVX512VBMI2-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
+; AVX512VBMI2-NEXT: vpshldw $7, %zmm0, %zmm0, %zmm0
+; AVX512VBMI2-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
+; AVX512VBMI2-NEXT: vzeroupper
+; AVX512VBMI2-NEXT: retq
+;
+; AVX512VLVBMI2-LABEL: splatconstant_rotate_v8i16:
+; AVX512VLVBMI2: # %bb.0:
+; AVX512VLVBMI2-NEXT: vpshldw $7, %xmm0, %xmm0, %xmm0
+; AVX512VLVBMI2-NEXT: retq
;
; XOP-LABEL: splatconstant_rotate_v8i16:
; XOP: # %bb.0:
@@ -2275,17 +2348,16 @@ define <8 x i16> @splatconstant_rotate_mask_v8i16(<8 x i16> %a) nounwind {
;
; AVX512VBMI2-LABEL: splatconstant_rotate_mask_v8i16:
; AVX512VBMI2: # %bb.0:
-; AVX512VBMI2-NEXT: vpsrlw $11, %xmm0, %xmm1
-; AVX512VBMI2-NEXT: vpsllw $5, %xmm0, %xmm0
-; AVX512VBMI2-NEXT: vpor %xmm1, %xmm0, %xmm0
+; AVX512VBMI2-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
+; AVX512VBMI2-NEXT: vpshldw $5, %zmm0, %zmm0, %zmm0
; AVX512VBMI2-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0
+; AVX512VBMI2-NEXT: vzeroupper
; AVX512VBMI2-NEXT: retq
;
; AVX512VLVBMI2-LABEL: splatconstant_rotate_mask_v8i16:
; AVX512VLVBMI2: # %bb.0:
-; AVX512VLVBMI2-NEXT: vpsllw $5, %xmm0, %xmm1
-; AVX512VLVBMI2-NEXT: vpsrlw $11, %xmm0, %xmm0
-; AVX512VLVBMI2-NEXT: vpternlogq $168, {{.*}}(%rip), %xmm1, %xmm0
+; AVX512VLVBMI2-NEXT: vpshldw $5, %xmm0, %xmm0, %xmm0
+; AVX512VLVBMI2-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0
; AVX512VLVBMI2-NEXT: retq
;
; XOP-LABEL: splatconstant_rotate_mask_v8i16:
diff --git a/llvm/test/CodeGen/X86/vector-rotate-256.ll b/llvm/test/CodeGen/X86/vector-rotate-256.ll
index d5b747ec2100..cb358ccc1b04 100644
--- a/llvm/test/CodeGen/X86/vector-rotate-256.ll
+++ b/llvm/test/CodeGen/X86/vector-rotate-256.ll
@@ -337,23 +337,15 @@ define <16 x i16> @var_rotate_v16i16(<16 x i16> %a, <16 x i16> %b) nounwind {
;
; AVX512VBMI2-LABEL: var_rotate_v16i16:
; AVX512VBMI2: # %bb.0:
+; AVX512VBMI2-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1
; AVX512VBMI2-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
-; AVX512VBMI2-NEXT: vpand {{.*}}(%rip), %ymm1, %ymm1
-; AVX512VBMI2-NEXT: vpsllvw %zmm1, %zmm0, %zmm2
-; AVX512VBMI2-NEXT: vmovdqa {{.*#+}} ymm3 = [16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16]
-; AVX512VBMI2-NEXT: vpsubw %ymm1, %ymm3, %ymm1
-; AVX512VBMI2-NEXT: vpsrlvw %zmm1, %zmm0, %zmm0
-; AVX512VBMI2-NEXT: vpor %ymm0, %ymm2, %ymm0
+; AVX512VBMI2-NEXT: vpshldvw %zmm1, %zmm0, %zmm0
+; AVX512VBMI2-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
; AVX512VBMI2-NEXT: retq
;
; AVX512VLVBMI2-LABEL: var_rotate_v16i16:
; AVX512VLVBMI2: # %bb.0:
-; AVX512VLVBMI2-NEXT: vpand {{.*}}(%rip), %ymm1, %ymm1
-; AVX512VLVBMI2-NEXT: vpsllvw %ymm1, %ymm0, %ymm2
-; AVX512VLVBMI2-NEXT: vmovdqa {{.*#+}} ymm3 = [16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16]
-; AVX512VLVBMI2-NEXT: vpsubw %ymm1, %ymm3, %ymm1
-; AVX512VLVBMI2-NEXT: vpsrlvw %ymm1, %ymm0, %ymm0
-; AVX512VLVBMI2-NEXT: vpor %ymm0, %ymm2, %ymm0
+; AVX512VLVBMI2-NEXT: vpshldvw %ymm1, %ymm0, %ymm0
; AVX512VLVBMI2-NEXT: retq
;
; XOPAVX1-LABEL: var_rotate_v16i16:
@@ -802,18 +794,71 @@ define <16 x i16> @splatvar_rotate_v16i16(<16 x i16> %a, <16 x i16> %b) nounwind
; AVX2-NEXT: vpor %ymm0, %ymm2, %ymm0
; AVX2-NEXT: retq
;
-; AVX512-LABEL: splatvar_rotate_v16i16:
-; AVX512: # %bb.0:
-; AVX512-NEXT: vpbroadcastw %xmm1, %xmm1
-; AVX512-NEXT: vpand {{.*}}(%rip), %xmm1, %xmm1
-; AVX512-NEXT: vpmovzxwq {{.*#+}} xmm2 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
-; AVX512-NEXT: vpsllw %xmm2, %ymm0, %ymm2
-; AVX512-NEXT: vmovdqa {{.*#+}} xmm3 = [16,16,16,16,16,16,16,16]
-; AVX512-NEXT: vpsubw %xmm1, %xmm3, %xmm1
-; AVX512-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
-; AVX512-NEXT: vpsrlw %xmm1, %ymm0, %ymm0
-; AVX512-NEXT: vpor %ymm0, %ymm2, %ymm0
-; AVX512-NEXT: retq
+; AVX512F-LABEL: splatvar_rotate_v16i16:
+; AVX512F: # %bb.0:
+; AVX512F-NEXT: vpbroadcastw %xmm1, %xmm1
+; AVX512F-NEXT: vpand {{.*}}(%rip), %xmm1, %xmm1
+; AVX512F-NEXT: vpmovzxwq {{.*#+}} xmm2 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
+; AVX512F-NEXT: vpsllw %xmm2, %ymm0, %ymm2
+; AVX512F-NEXT: vmovdqa {{.*#+}} xmm3 = [16,16,16,16,16,16,16,16]
+; AVX512F-NEXT: vpsubw %xmm1, %xmm3, %xmm1
+; AVX512F-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
+; AVX512F-NEXT: vpsrlw %xmm1, %ymm0, %ymm0
+; AVX512F-NEXT: vpor %ymm0, %ymm2, %ymm0
+; AVX512F-NEXT: retq
+;
+; AVX512VL-LABEL: splatvar_rotate_v16i16:
+; AVX512VL: # %bb.0:
+; AVX512VL-NEXT: vpbroadcastw %xmm1, %xmm1
+; AVX512VL-NEXT: vpand {{.*}}(%rip), %xmm1, %xmm1
+; AVX512VL-NEXT: vpmovzxwq {{.*#+}} xmm2 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
+; AVX512VL-NEXT: vpsllw %xmm2, %ymm0, %ymm2
+; AVX512VL-NEXT: vmovdqa {{.*#+}} xmm3 = [16,16,16,16,16,16,16,16]
+; AVX512VL-NEXT: vpsubw %xmm1, %xmm3, %xmm1
+; AVX512VL-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
+; AVX512VL-NEXT: vpsrlw %xmm1, %ymm0, %ymm0
+; AVX512VL-NEXT: vpor %ymm0, %ymm2, %ymm0
+; AVX512VL-NEXT: retq
+;
+; AVX512BW-LABEL: splatvar_rotate_v16i16:
+; AVX512BW: # %bb.0:
+; AVX512BW-NEXT: vpbroadcastw %xmm1, %xmm1
+; AVX512BW-NEXT: vpand {{.*}}(%rip), %xmm1, %xmm1
+; AVX512BW-NEXT: vpmovzxwq {{.*#+}} xmm2 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
+; AVX512BW-NEXT: vpsllw %xmm2, %ymm0, %ymm2
+; AVX512BW-NEXT: vmovdqa {{.*#+}} xmm3 = [16,16,16,16,16,16,16,16]
+; AVX512BW-NEXT: vpsubw %xmm1, %xmm3, %xmm1
+; AVX512BW-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
+; AVX512BW-NEXT: vpsrlw %xmm1, %ymm0, %ymm0
+; AVX512BW-NEXT: vpor %ymm0, %ymm2, %ymm0
+; AVX512BW-NEXT: retq
+;
+; AVX512VLBW-LABEL: splatvar_rotate_v16i16:
+; AVX512VLBW: # %bb.0:
+; AVX512VLBW-NEXT: vpbroadcastw %xmm1, %xmm1
+; AVX512VLBW-NEXT: vpand {{.*}}(%rip), %xmm1, %xmm1
+; AVX512VLBW-NEXT: vpmovzxwq {{.*#+}} xmm2 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
+; AVX512VLBW-NEXT: vpsllw %xmm2, %ymm0, %ymm2
+; AVX512VLBW-NEXT: vmovdqa {{.*#+}} xmm3 = [16,16,16,16,16,16,16,16]
+; AVX512VLBW-NEXT: vpsubw %xmm1, %xmm3, %xmm1
+; AVX512VLBW-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
+; AVX512VLBW-NEXT: vpsrlw %xmm1, %ymm0, %ymm0
+; AVX512VLBW-NEXT: vpor %ymm0, %ymm2, %ymm0
+; AVX512VLBW-NEXT: retq
+;
+; AVX512VBMI2-LABEL: splatvar_rotate_v16i16:
+; AVX512VBMI2: # %bb.0:
+; AVX512VBMI2-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
+; AVX512VBMI2-NEXT: vpbroadcastw %xmm1, %ymm1
+; AVX512VBMI2-NEXT: vpshldvw %zmm1, %zmm0, %zmm0
+; AVX512VBMI2-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
+; AVX512VBMI2-NEXT: retq
+;
+; AVX512VLVBMI2-LABEL: splatvar_rotate_v16i16:
+; AVX512VLVBMI2: # %bb.0:
+; AVX512VLVBMI2-NEXT: vpbroadcastw %xmm1, %ymm1
+; AVX512VLVBMI2-NEXT: vpshldvw %ymm1, %ymm0, %ymm0
+; AVX512VLVBMI2-NEXT: retq
;
; XOPAVX1-LABEL: splatvar_rotate_v16i16:
; XOPAVX1: # %bb.0:
@@ -1260,17 +1305,13 @@ define <16 x i16> @constant_rotate_v16i16(<16 x i16> %a) nounwind {
; AVX512VBMI2: # %bb.0:
; AVX512VBMI2-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
; AVX512VBMI2-NEXT: vmovdqa {{.*#+}} ymm1 = [0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
-; AVX512VBMI2-NEXT: vmovdqa {{.*#+}} ymm2 = [16,15,14,13,12,11,10,9,8,7,6,5,4,3,2,1]
-; AVX512VBMI2-NEXT: vpsrlvw %zmm2, %zmm0, %zmm2
-; AVX512VBMI2-NEXT: vpsllvw %zmm1, %zmm0, %zmm0
-; AVX512VBMI2-NEXT: vpor %ymm2, %ymm0, %ymm0
+; AVX512VBMI2-NEXT: vpshldvw %zmm1, %zmm0, %zmm0
+; AVX512VBMI2-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
; AVX512VBMI2-NEXT: retq
;
; AVX512VLVBMI2-LABEL: constant_rotate_v16i16:
; AVX512VLVBMI2: # %bb.0:
-; AVX512VLVBMI2-NEXT: vpsrlvw {{.*}}(%rip), %ymm0, %ymm1
-; AVX512VLVBMI2-NEXT: vpsllvw {{.*}}(%rip), %ymm0, %ymm0
-; AVX512VLVBMI2-NEXT: vpor %ymm1, %ymm0, %ymm0
+; AVX512VLVBMI2-NEXT: vpshldvw {{.*}}(%rip), %ymm0, %ymm0
; AVX512VLVBMI2-NEXT: retq
;
; XOPAVX1-LABEL: constant_rotate_v16i16:
@@ -1647,12 +1688,45 @@ define <16 x i16> @splatconstant_rotate_v16i16(<16 x i16> %a) nounwind {
; AVX2-NEXT: vpor %ymm1, %ymm0, %ymm0
; AVX2-NEXT: retq
;
-; AVX512-LABEL: splatconstant_rotate_v16i16:
-; AVX512: # %bb.0:
-; AVX512-NEXT: vpsrlw $9, %ymm0, %ymm1
-; AVX512-NEXT: vpsllw $7, %ymm0, %ymm0
-; AVX512-NEXT: vpor %ymm1, %ymm0, %ymm0
-; AVX512-NEXT: retq
+; AVX512F-LABEL: splatconstant_rotate_v16i16:
+; AVX512F: # %bb.0:
+; AVX512F-NEXT: vpsrlw $9, %ymm0, %ymm1
+; AVX512F-NEXT: vpsllw $7, %ymm0, %ymm0
+; AVX512F-NEXT: vpor %ymm1, %ymm0, %ymm0
+; AVX512F-NEXT: retq
+;
+; AVX512VL-LABEL: splatconstant_rotate_v16i16:
+; AVX512VL: # %bb.0:
+; AVX512VL-NEXT: vpsrlw $9, %ymm0, %ymm1
+; AVX512VL-NEXT: vpsllw $7, %ymm0, %ymm0
+; AVX512VL-NEXT: vpor %ymm1, %ymm0, %ymm0
+; AVX512VL-NEXT: retq
+;
+; AVX512BW-LABEL: splatconstant_rotate_v16i16:
+; AVX512BW: # %bb.0:
+; AVX512BW-NEXT: vpsrlw $9, %ymm0, %ymm1
+; AVX512BW-NEXT: vpsllw $7, %ymm0, %ymm0
+; AVX512BW-NEXT: vpor %ymm1, %ymm0, %ymm0
+; AVX512BW-NEXT: retq
+;
+; AVX512VLBW-LABEL: splatconstant_rotate_v16i16:
+; AVX512VLBW: # %bb.0:
+; AVX512VLBW-NEXT: vpsrlw $9, %ymm0, %ymm1
+; AVX512VLBW-NEXT: vpsllw $7, %ymm0, %ymm0
+; AVX512VLBW-NEXT: vpor %ymm1, %ymm0, %ymm0
+; AVX512VLBW-NEXT: retq
+;
+; AVX512VBMI2-LABEL: splatconstant_rotate_v16i16:
+; AVX512VBMI2: # %bb.0:
+; AVX512VBMI2-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
+; AVX512VBMI2-NEXT: vpshldw $7, %zmm0, %zmm0, %zmm0
+; AVX512VBMI2-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
+; AVX512VBMI2-NEXT: retq
+;
+; AVX512VLVBMI2-LABEL: splatconstant_rotate_v16i16:
+; AVX512VLVBMI2: # %bb.0:
+; AVX512VLVBMI2-NEXT: vpshldw $7, %ymm0, %ymm0, %ymm0
+; AVX512VLVBMI2-NEXT: retq
;
; XOPAVX1-LABEL: splatconstant_rotate_v16i16:
; XOPAVX1: # %bb.0:
@@ -1995,17 +2069,15 @@ define <16 x i16> @splatconstant_rotate_mask_v16i16(<16 x i16> %a) nounwind {
;
; AVX512VBMI2-LABEL: splatconstant_rotate_mask_v16i16:
; AVX512VBMI2: # %bb.0:
-; AVX512VBMI2-NEXT: vpsrlw $11, %ymm0, %ymm1
-; AVX512VBMI2-NEXT: vpsllw $5, %ymm0, %ymm0
-; AVX512VBMI2-NEXT: vpor %ymm1, %ymm0, %ymm0
+; AVX512VBMI2-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
+; AVX512VBMI2-NEXT: vpshldw $5, %zmm0, %zmm0, %zmm0
; AVX512VBMI2-NEXT: vpand {{.*}}(%rip), %ymm0, %ymm0
; AVX512VBMI2-NEXT: retq
;
; AVX512VLVBMI2-LABEL: splatconstant_rotate_mask_v16i16:
; AVX512VLVBMI2: # %bb.0:
-; AVX512VLVBMI2-NEXT: vpsllw $5, %ymm0, %ymm1
-; AVX512VLVBMI2-NEXT: vpsrlw $11, %ymm0, %ymm0
-; AVX512VLVBMI2-NEXT: vpternlogq $168, {{.*}}(%rip), %ymm1, %ymm0
+; AVX512VLVBMI2-NEXT: vpshldw $5, %ymm0, %ymm0, %ymm0
+; AVX512VLVBMI2-NEXT: vpand {{.*}}(%rip), %ymm0, %ymm0
; AVX512VLVBMI2-NEXT: retq
;
; XOPAVX1-LABEL: splatconstant_rotate_mask_v16i16:
More information about the llvm-branch-commits
mailing list