[llvm-branch-commits] [llvm] df1ddc4 - [X86] Let VBMI2 non-VLX targets still use funnel shifts instructions
Simon Pilgrim via llvm-branch-commits
llvm-branch-commits at lists.llvm.org
Fri Dec 4 03:11:57 PST 2020
Author: Simon Pilgrim
Date: 2020-12-04T11:06:43Z
New Revision: df1ddc42345356f575088c4c80f9cf54d1461e6f
URL: https://github.com/llvm/llvm-project/commit/df1ddc42345356f575088c4c80f9cf54d1461e6f
DIFF: https://github.com/llvm/llvm-project/commit/df1ddc42345356f575088c4c80f9cf54d1461e6f.diff
LOG: [X86] Let VBMI2 non-VLX targets still use funnel shifts instructions
Added:
Modified:
llvm/lib/Target/X86/X86ISelLowering.cpp
llvm/test/CodeGen/X86/vector-fshl-128.ll
llvm/test/CodeGen/X86/vector-fshl-256.ll
llvm/test/CodeGen/X86/vector-fshr-128.ll
llvm/test/CodeGen/X86/vector-fshr-256.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 13a2d8bf7735..9bfd7ca80701 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -1709,7 +1709,9 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
}
if (Subtarget.hasVBMI2()) {
- for (auto VT : { MVT::v32i16, MVT::v16i32, MVT::v8i64 }) {
+ for (auto VT : { MVT::v8i16, MVT::v4i32, MVT::v2i64,
+ MVT::v16i16, MVT::v8i32, MVT::v4i64,
+ MVT::v32i16, MVT::v16i32, MVT::v8i64 }) {
setOperationAction(ISD::FSHL, VT, Custom);
setOperationAction(ISD::FSHR, VT, Custom);
}
@@ -1879,15 +1881,6 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setTruncStoreAction(MVT::v8i16, MVT::v8i8, Legal);
}
- if (Subtarget.hasVBMI2()) {
- // TODO: Make these legal even without VLX?
- for (auto VT : { MVT::v8i16, MVT::v4i32, MVT::v2i64,
- MVT::v16i16, MVT::v8i32, MVT::v4i64 }) {
- setOperationAction(ISD::FSHL, VT, Custom);
- setOperationAction(ISD::FSHR, VT, Custom);
- }
- }
-
setOperationAction(ISD::TRUNCATE, MVT::v16i32, Custom);
setOperationAction(ISD::TRUNCATE, MVT::v8i64, Custom);
setOperationAction(ISD::TRUNCATE, MVT::v16i64, Custom);
@@ -19453,15 +19446,29 @@ static SDValue LowerFunnelShift(SDValue Op, const X86Subtarget &Subtarget,
if (IsFSHR)
std::swap(Op0, Op1);
+ // With AVX512, but not VLX we need to widen to get a 512-bit result type.
+ if (!Subtarget.hasVLX() && !VT.is512BitVector()) {
+ Op0 = widenSubVector(Op0, false, Subtarget, DAG, DL, 512);
+ Op1 = widenSubVector(Op1, false, Subtarget, DAG, DL, 512);
+ }
+
+ SDValue Funnel;
APInt APIntShiftAmt;
+ MVT ResultVT = Op0.getSimpleValueType();
if (X86::isConstantSplat(Amt, APIntShiftAmt)) {
uint64_t ShiftAmt = APIntShiftAmt.urem(VT.getScalarSizeInBits());
- return DAG.getNode(IsFSHR ? X86ISD::VSHRD : X86ISD::VSHLD, DL, VT, Op0,
- Op1, DAG.getTargetConstant(ShiftAmt, DL, MVT::i8));
- }
-
- return DAG.getNode(IsFSHR ? X86ISD::VSHRDV : X86ISD::VSHLDV, DL, VT,
- Op0, Op1, Amt);
+ Funnel =
+ DAG.getNode(IsFSHR ? X86ISD::VSHRD : X86ISD::VSHLD, DL, ResultVT, Op0,
+ Op1, DAG.getTargetConstant(ShiftAmt, DL, MVT::i8));
+ } else {
+ if (!Subtarget.hasVLX() && !VT.is512BitVector())
+ Amt = widenSubVector(Amt, false, Subtarget, DAG, DL, 512);
+ Funnel = DAG.getNode(IsFSHR ? X86ISD::VSHRDV : X86ISD::VSHLDV, DL,
+ ResultVT, Op0, Op1, Amt);
+ }
+ if (!Subtarget.hasVLX() && !VT.is512BitVector())
+ Funnel = extractSubVector(Funnel, 0, DAG, DL, VT.getSizeInBits());
+ return Funnel;
}
assert(
(VT == MVT::i8 || VT == MVT::i16 || VT == MVT::i32 || VT == MVT::i64) &&
diff --git a/llvm/test/CodeGen/X86/vector-fshl-128.ll b/llvm/test/CodeGen/X86/vector-fshl-128.ll
index d760ebdd1aa1..9b34604eb5a4 100644
--- a/llvm/test/CodeGen/X86/vector-fshl-128.ll
+++ b/llvm/test/CodeGen/X86/vector-fshl-128.ll
@@ -128,13 +128,12 @@ define <2 x i64> @var_funnnel_v2i64(<2 x i64> %x, <2 x i64> %y, <2 x i64> %amt)
;
; AVX512VBMI2-LABEL: var_funnnel_v2i64:
; AVX512VBMI2: # %bb.0:
-; AVX512VBMI2-NEXT: vmovdqa {{.*#+}} xmm3 = [63,63]
-; AVX512VBMI2-NEXT: vpandn %xmm3, %xmm2, %xmm4
-; AVX512VBMI2-NEXT: vpsrlq $1, %xmm1, %xmm1
-; AVX512VBMI2-NEXT: vpsrlvq %xmm4, %xmm1, %xmm1
-; AVX512VBMI2-NEXT: vpand %xmm3, %xmm2, %xmm2
-; AVX512VBMI2-NEXT: vpsllvq %xmm2, %xmm0, %xmm0
-; AVX512VBMI2-NEXT: vpor %xmm1, %xmm0, %xmm0
+; AVX512VBMI2-NEXT: # kill: def $xmm2 killed $xmm2 def $zmm2
+; AVX512VBMI2-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
+; AVX512VBMI2-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
+; AVX512VBMI2-NEXT: vpshldvq %zmm2, %zmm1, %zmm0
+; AVX512VBMI2-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
+; AVX512VBMI2-NEXT: vzeroupper
; AVX512VBMI2-NEXT: retq
;
; AVX512VLBW-LABEL: var_funnnel_v2i64:
@@ -336,13 +335,12 @@ define <4 x i32> @var_funnnel_v4i32(<4 x i32> %x, <4 x i32> %y, <4 x i32> %amt)
;
; AVX512VBMI2-LABEL: var_funnnel_v4i32:
; AVX512VBMI2: # %bb.0:
-; AVX512VBMI2-NEXT: vpbroadcastd {{.*#+}} xmm3 = [31,31,31,31]
-; AVX512VBMI2-NEXT: vpandn %xmm3, %xmm2, %xmm4
-; AVX512VBMI2-NEXT: vpsrld $1, %xmm1, %xmm1
-; AVX512VBMI2-NEXT: vpsrlvd %xmm4, %xmm1, %xmm1
-; AVX512VBMI2-NEXT: vpand %xmm3, %xmm2, %xmm2
-; AVX512VBMI2-NEXT: vpsllvd %xmm2, %xmm0, %xmm0
-; AVX512VBMI2-NEXT: vpor %xmm1, %xmm0, %xmm0
+; AVX512VBMI2-NEXT: # kill: def $xmm2 killed $xmm2 def $zmm2
+; AVX512VBMI2-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
+; AVX512VBMI2-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
+; AVX512VBMI2-NEXT: vpshldvd %zmm2, %zmm1, %zmm0
+; AVX512VBMI2-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
+; AVX512VBMI2-NEXT: vzeroupper
; AVX512VBMI2-NEXT: retq
;
; AVX512VLBW-LABEL: var_funnnel_v4i32:
@@ -633,14 +631,11 @@ define <8 x i16> @var_funnnel_v8i16(<8 x i16> %x, <8 x i16> %y, <8 x i16> %amt)
;
; AVX512VBMI2-LABEL: var_funnnel_v8i16:
; AVX512VBMI2: # %bb.0:
+; AVX512VBMI2-NEXT: # kill: def $xmm2 killed $xmm2 def $zmm2
+; AVX512VBMI2-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
; AVX512VBMI2-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
-; AVX512VBMI2-NEXT: vmovdqa {{.*#+}} xmm3 = [15,15,15,15,15,15,15,15]
-; AVX512VBMI2-NEXT: vpandn %xmm3, %xmm2, %xmm4
-; AVX512VBMI2-NEXT: vpsrlw $1, %xmm1, %xmm1
-; AVX512VBMI2-NEXT: vpsrlvw %zmm4, %zmm1, %zmm1
-; AVX512VBMI2-NEXT: vpand %xmm3, %xmm2, %xmm2
-; AVX512VBMI2-NEXT: vpsllvw %zmm2, %zmm0, %zmm0
-; AVX512VBMI2-NEXT: vpor %xmm1, %xmm0, %xmm0
+; AVX512VBMI2-NEXT: vpshldvw %zmm2, %zmm1, %zmm0
+; AVX512VBMI2-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
; AVX512VBMI2-NEXT: vzeroupper
; AVX512VBMI2-NEXT: retq
;
@@ -1177,14 +1172,12 @@ define <2 x i64> @splatvar_funnnel_v2i64(<2 x i64> %x, <2 x i64> %y, <2 x i64> %
;
; AVX512VBMI2-LABEL: splatvar_funnnel_v2i64:
; AVX512VBMI2: # %bb.0:
-; AVX512VBMI2-NEXT: vpbroadcastq %xmm2, %xmm3
-; AVX512VBMI2-NEXT: vmovdqa {{.*#+}} xmm4 = [63,63]
-; AVX512VBMI2-NEXT: vpand %xmm4, %xmm2, %xmm2
-; AVX512VBMI2-NEXT: vpsllq %xmm2, %xmm0, %xmm0
-; AVX512VBMI2-NEXT: vpandn %xmm4, %xmm3, %xmm2
-; AVX512VBMI2-NEXT: vpsrlq $1, %xmm1, %xmm1
-; AVX512VBMI2-NEXT: vpsrlvq %xmm2, %xmm1, %xmm1
-; AVX512VBMI2-NEXT: vpor %xmm1, %xmm0, %xmm0
+; AVX512VBMI2-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
+; AVX512VBMI2-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
+; AVX512VBMI2-NEXT: vpbroadcastq %xmm2, %xmm2
+; AVX512VBMI2-NEXT: vpshldvq %zmm2, %zmm1, %zmm0
+; AVX512VBMI2-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
+; AVX512VBMI2-NEXT: vzeroupper
; AVX512VBMI2-NEXT: retq
;
; AVX512VLBW-LABEL: splatvar_funnnel_v2i64:
@@ -1389,15 +1382,12 @@ define <4 x i32> @splatvar_funnnel_v4i32(<4 x i32> %x, <4 x i32> %y, <4 x i32> %
;
; AVX512VBMI2-LABEL: splatvar_funnnel_v4i32:
; AVX512VBMI2: # %bb.0:
-; AVX512VBMI2-NEXT: vpbroadcastd %xmm2, %xmm3
-; AVX512VBMI2-NEXT: vpbroadcastd {{.*#+}} xmm4 = [31,31,31,31]
-; AVX512VBMI2-NEXT: vpandn %xmm4, %xmm3, %xmm3
-; AVX512VBMI2-NEXT: vpsrld $1, %xmm1, %xmm1
-; AVX512VBMI2-NEXT: vpsrlvd %xmm3, %xmm1, %xmm1
-; AVX512VBMI2-NEXT: vpand %xmm4, %xmm2, %xmm2
-; AVX512VBMI2-NEXT: vpmovzxdq {{.*#+}} xmm2 = xmm2[0],zero,xmm2[1],zero
-; AVX512VBMI2-NEXT: vpslld %xmm2, %xmm0, %xmm0
-; AVX512VBMI2-NEXT: vpor %xmm1, %xmm0, %xmm0
+; AVX512VBMI2-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
+; AVX512VBMI2-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
+; AVX512VBMI2-NEXT: vpbroadcastd %xmm2, %xmm2
+; AVX512VBMI2-NEXT: vpshldvd %zmm2, %zmm1, %zmm0
+; AVX512VBMI2-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
+; AVX512VBMI2-NEXT: vzeroupper
; AVX512VBMI2-NEXT: retq
;
; AVX512VLBW-LABEL: splatvar_funnnel_v4i32:
@@ -1657,15 +1647,11 @@ define <8 x i16> @splatvar_funnnel_v8i16(<8 x i16> %x, <8 x i16> %y, <8 x i16> %
;
; AVX512VBMI2-LABEL: splatvar_funnnel_v8i16:
; AVX512VBMI2: # %bb.0:
-; AVX512VBMI2-NEXT: vpbroadcastw %xmm2, %xmm3
-; AVX512VBMI2-NEXT: vpsrlw $1, %xmm1, %xmm1
-; AVX512VBMI2-NEXT: vmovdqa {{.*#+}} xmm4 = [15,15,15,15,15,15,15,15]
-; AVX512VBMI2-NEXT: vpandn %xmm4, %xmm3, %xmm3
-; AVX512VBMI2-NEXT: vpsrlvw %zmm3, %zmm1, %zmm1
-; AVX512VBMI2-NEXT: vpand %xmm4, %xmm2, %xmm2
-; AVX512VBMI2-NEXT: vpmovzxwq {{.*#+}} xmm2 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero
-; AVX512VBMI2-NEXT: vpsllw %xmm2, %xmm0, %xmm0
-; AVX512VBMI2-NEXT: vpor %xmm1, %xmm0, %xmm0
+; AVX512VBMI2-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
+; AVX512VBMI2-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
+; AVX512VBMI2-NEXT: vpbroadcastw %xmm2, %xmm2
+; AVX512VBMI2-NEXT: vpshldvw %zmm2, %zmm1, %zmm0
+; AVX512VBMI2-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
; AVX512VBMI2-NEXT: vzeroupper
; AVX512VBMI2-NEXT: retq
;
@@ -2468,9 +2454,12 @@ define <2 x i64> @constant_funnnel_v2i64(<2 x i64> %x, <2 x i64> %y) nounwind {
;
; AVX512VBMI2-LABEL: constant_funnnel_v2i64:
; AVX512VBMI2: # %bb.0:
-; AVX512VBMI2-NEXT: vpsrlvq {{.*}}(%rip), %xmm1, %xmm1
-; AVX512VBMI2-NEXT: vpsllvq {{.*}}(%rip), %xmm0, %xmm0
-; AVX512VBMI2-NEXT: vpor %xmm1, %xmm0, %xmm0
+; AVX512VBMI2-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
+; AVX512VBMI2-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
+; AVX512VBMI2-NEXT: vmovdqa {{.*#+}} xmm2 = [4,14]
+; AVX512VBMI2-NEXT: vpshldvq %zmm2, %zmm1, %zmm0
+; AVX512VBMI2-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
+; AVX512VBMI2-NEXT: vzeroupper
; AVX512VBMI2-NEXT: retq
;
; AVX512VLBW-LABEL: constant_funnnel_v2i64:
@@ -2606,9 +2595,12 @@ define <4 x i32> @constant_funnnel_v4i32(<4 x i32> %x, <4 x i32> %y) nounwind {
;
; AVX512VBMI2-LABEL: constant_funnnel_v4i32:
; AVX512VBMI2: # %bb.0:
-; AVX512VBMI2-NEXT: vpsrlvd {{.*}}(%rip), %xmm1, %xmm1
-; AVX512VBMI2-NEXT: vpsllvd {{.*}}(%rip), %xmm0, %xmm0
-; AVX512VBMI2-NEXT: vpor %xmm1, %xmm0, %xmm0
+; AVX512VBMI2-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
+; AVX512VBMI2-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
+; AVX512VBMI2-NEXT: vmovdqa {{.*#+}} xmm2 = [4,5,6,7]
+; AVX512VBMI2-NEXT: vpshldvd %zmm2, %zmm1, %zmm0
+; AVX512VBMI2-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
+; AVX512VBMI2-NEXT: vzeroupper
; AVX512VBMI2-NEXT: retq
;
; AVX512VLBW-LABEL: constant_funnnel_v4i32:
@@ -2710,13 +2702,11 @@ define <8 x i16> @constant_funnnel_v8i16(<8 x i16> %x, <8 x i16> %y) nounwind {
;
; AVX512VBMI2-LABEL: constant_funnnel_v8i16:
; AVX512VBMI2: # %bb.0:
+; AVX512VBMI2-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
; AVX512VBMI2-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
; AVX512VBMI2-NEXT: vmovdqa {{.*#+}} xmm2 = [0,1,2,3,4,5,6,7]
-; AVX512VBMI2-NEXT: vpsllvw %zmm2, %zmm0, %zmm0
-; AVX512VBMI2-NEXT: vmovdqa {{.*#+}} xmm2 = [15,14,13,12,11,10,9,8]
-; AVX512VBMI2-NEXT: vpsrlw $1, %xmm1, %xmm1
-; AVX512VBMI2-NEXT: vpsrlvw %zmm2, %zmm1, %zmm1
-; AVX512VBMI2-NEXT: vpor %xmm1, %xmm0, %xmm0
+; AVX512VBMI2-NEXT: vpshldvw %zmm2, %zmm1, %zmm0
+; AVX512VBMI2-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
; AVX512VBMI2-NEXT: vzeroupper
; AVX512VBMI2-NEXT: retq
;
@@ -3005,9 +2995,11 @@ define <2 x i64> @splatconstant_funnnel_v2i64(<2 x i64> %x, <2 x i64> %y) nounwi
;
; AVX512VBMI2-LABEL: splatconstant_funnnel_v2i64:
; AVX512VBMI2: # %bb.0:
-; AVX512VBMI2-NEXT: vpsrlq $50, %xmm1, %xmm1
-; AVX512VBMI2-NEXT: vpsllq $14, %xmm0, %xmm0
-; AVX512VBMI2-NEXT: vpor %xmm1, %xmm0, %xmm0
+; AVX512VBMI2-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
+; AVX512VBMI2-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
+; AVX512VBMI2-NEXT: vpshldq $14, %zmm1, %zmm0, %zmm0
+; AVX512VBMI2-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
+; AVX512VBMI2-NEXT: vzeroupper
; AVX512VBMI2-NEXT: retq
;
; AVX512VLBW-LABEL: splatconstant_funnnel_v2i64:
@@ -3079,9 +3071,11 @@ define <4 x i32> @splatconstant_funnnel_v4i32(<4 x i32> %x, <4 x i32> %y) nounwi
;
; AVX512VBMI2-LABEL: splatconstant_funnnel_v4i32:
; AVX512VBMI2: # %bb.0:
-; AVX512VBMI2-NEXT: vpsrld $28, %xmm1, %xmm1
-; AVX512VBMI2-NEXT: vpslld $4, %xmm0, %xmm0
-; AVX512VBMI2-NEXT: vpor %xmm1, %xmm0, %xmm0
+; AVX512VBMI2-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
+; AVX512VBMI2-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
+; AVX512VBMI2-NEXT: vpshldd $4, %zmm1, %zmm0, %zmm0
+; AVX512VBMI2-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
+; AVX512VBMI2-NEXT: vzeroupper
; AVX512VBMI2-NEXT: retq
;
; AVX512VLBW-LABEL: splatconstant_funnnel_v4i32:
@@ -3151,9 +3145,11 @@ define <8 x i16> @splatconstant_funnnel_v8i16(<8 x i16> %x, <8 x i16> %y) nounwi
;
; AVX512VBMI2-LABEL: splatconstant_funnnel_v8i16:
; AVX512VBMI2: # %bb.0:
-; AVX512VBMI2-NEXT: vpsrlw $9, %xmm1, %xmm1
-; AVX512VBMI2-NEXT: vpsllw $7, %xmm0, %xmm0
-; AVX512VBMI2-NEXT: vpor %xmm1, %xmm0, %xmm0
+; AVX512VBMI2-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
+; AVX512VBMI2-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
+; AVX512VBMI2-NEXT: vpshldw $7, %zmm1, %zmm0, %zmm0
+; AVX512VBMI2-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
+; AVX512VBMI2-NEXT: vzeroupper
; AVX512VBMI2-NEXT: retq
;
; AVX512VLBW-LABEL: splatconstant_funnnel_v8i16:
diff --git a/llvm/test/CodeGen/X86/vector-fshl-256.ll b/llvm/test/CodeGen/X86/vector-fshl-256.ll
index 3ca4333cbc0b..d673bde95484 100644
--- a/llvm/test/CodeGen/X86/vector-fshl-256.ll
+++ b/llvm/test/CodeGen/X86/vector-fshl-256.ll
@@ -98,13 +98,11 @@ define <4 x i64> @var_funnnel_v4i64(<4 x i64> %x, <4 x i64> %y, <4 x i64> %amt)
;
; AVX512VBMI2-LABEL: var_funnnel_v4i64:
; AVX512VBMI2: # %bb.0:
-; AVX512VBMI2-NEXT: vpbroadcastq {{.*#+}} ymm3 = [63,63,63,63]
-; AVX512VBMI2-NEXT: vpandn %ymm3, %ymm2, %ymm4
-; AVX512VBMI2-NEXT: vpsrlq $1, %ymm1, %ymm1
-; AVX512VBMI2-NEXT: vpsrlvq %ymm4, %ymm1, %ymm1
-; AVX512VBMI2-NEXT: vpand %ymm3, %ymm2, %ymm2
-; AVX512VBMI2-NEXT: vpsllvq %ymm2, %ymm0, %ymm0
-; AVX512VBMI2-NEXT: vpor %ymm1, %ymm0, %ymm0
+; AVX512VBMI2-NEXT: # kill: def $ymm2 killed $ymm2 def $zmm2
+; AVX512VBMI2-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1
+; AVX512VBMI2-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
+; AVX512VBMI2-NEXT: vpshldvq %zmm2, %zmm1, %zmm0
+; AVX512VBMI2-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
; AVX512VBMI2-NEXT: retq
;
; AVX512VLBW-LABEL: var_funnnel_v4i64:
@@ -255,13 +253,11 @@ define <8 x i32> @var_funnnel_v8i32(<8 x i32> %x, <8 x i32> %y, <8 x i32> %amt)
;
; AVX512VBMI2-LABEL: var_funnnel_v8i32:
; AVX512VBMI2: # %bb.0:
-; AVX512VBMI2-NEXT: vpbroadcastd {{.*#+}} ymm3 = [31,31,31,31,31,31,31,31]
-; AVX512VBMI2-NEXT: vpandn %ymm3, %ymm2, %ymm4
-; AVX512VBMI2-NEXT: vpsrld $1, %ymm1, %ymm1
-; AVX512VBMI2-NEXT: vpsrlvd %ymm4, %ymm1, %ymm1
-; AVX512VBMI2-NEXT: vpand %ymm3, %ymm2, %ymm2
-; AVX512VBMI2-NEXT: vpsllvd %ymm2, %ymm0, %ymm0
-; AVX512VBMI2-NEXT: vpor %ymm1, %ymm0, %ymm0
+; AVX512VBMI2-NEXT: # kill: def $ymm2 killed $ymm2 def $zmm2
+; AVX512VBMI2-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1
+; AVX512VBMI2-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
+; AVX512VBMI2-NEXT: vpshldvd %zmm2, %zmm1, %zmm0
+; AVX512VBMI2-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
; AVX512VBMI2-NEXT: retq
;
; AVX512VLBW-LABEL: var_funnnel_v8i32:
@@ -457,14 +453,11 @@ define <16 x i16> @var_funnnel_v16i16(<16 x i16> %x, <16 x i16> %y, <16 x i16> %
;
; AVX512VBMI2-LABEL: var_funnnel_v16i16:
; AVX512VBMI2: # %bb.0:
+; AVX512VBMI2-NEXT: # kill: def $ymm2 killed $ymm2 def $zmm2
+; AVX512VBMI2-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1
; AVX512VBMI2-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
-; AVX512VBMI2-NEXT: vmovdqa {{.*#+}} ymm3 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
-; AVX512VBMI2-NEXT: vpandn %ymm3, %ymm2, %ymm4
-; AVX512VBMI2-NEXT: vpsrlw $1, %ymm1, %ymm1
-; AVX512VBMI2-NEXT: vpsrlvw %zmm4, %zmm1, %zmm1
-; AVX512VBMI2-NEXT: vpand %ymm3, %ymm2, %ymm2
-; AVX512VBMI2-NEXT: vpsllvw %zmm2, %zmm0, %zmm0
-; AVX512VBMI2-NEXT: vpor %ymm1, %ymm0, %ymm0
+; AVX512VBMI2-NEXT: vpshldvw %zmm2, %zmm1, %zmm0
+; AVX512VBMI2-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
; AVX512VBMI2-NEXT: retq
;
; AVX512VLBW-LABEL: var_funnnel_v16i16:
@@ -901,14 +894,11 @@ define <4 x i64> @splatvar_funnnel_v4i64(<4 x i64> %x, <4 x i64> %y, <4 x i64> %
;
; AVX512VBMI2-LABEL: splatvar_funnnel_v4i64:
; AVX512VBMI2: # %bb.0:
-; AVX512VBMI2-NEXT: vpbroadcastq %xmm2, %ymm3
-; AVX512VBMI2-NEXT: vpand {{.*}}(%rip), %xmm2, %xmm2
-; AVX512VBMI2-NEXT: vpsllq %xmm2, %ymm0, %ymm0
-; AVX512VBMI2-NEXT: vpbroadcastq {{.*#+}} ymm2 = [63,63,63,63]
-; AVX512VBMI2-NEXT: vpandn %ymm2, %ymm3, %ymm2
-; AVX512VBMI2-NEXT: vpsrlq $1, %ymm1, %ymm1
-; AVX512VBMI2-NEXT: vpsrlvq %ymm2, %ymm1, %ymm1
-; AVX512VBMI2-NEXT: vpor %ymm1, %ymm0, %ymm0
+; AVX512VBMI2-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1
+; AVX512VBMI2-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
+; AVX512VBMI2-NEXT: vpbroadcastq %xmm2, %ymm2
+; AVX512VBMI2-NEXT: vpshldvq %zmm2, %zmm1, %zmm0
+; AVX512VBMI2-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
; AVX512VBMI2-NEXT: retq
;
; AVX512VLBW-LABEL: splatvar_funnnel_v4i64:
@@ -1064,15 +1054,11 @@ define <8 x i32> @splatvar_funnnel_v8i32(<8 x i32> %x, <8 x i32> %y, <8 x i32> %
;
; AVX512VBMI2-LABEL: splatvar_funnnel_v8i32:
; AVX512VBMI2: # %bb.0:
+; AVX512VBMI2-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1
+; AVX512VBMI2-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
; AVX512VBMI2-NEXT: vpbroadcastd %xmm2, %ymm2
-; AVX512VBMI2-NEXT: vpbroadcastd {{.*#+}} ymm3 = [31,31,31,31,31,31,31,31]
-; AVX512VBMI2-NEXT: vpandn %ymm3, %ymm2, %ymm4
-; AVX512VBMI2-NEXT: vpsrld $1, %ymm1, %ymm1
-; AVX512VBMI2-NEXT: vpsrlvd %ymm4, %ymm1, %ymm1
-; AVX512VBMI2-NEXT: vpand %xmm3, %xmm2, %xmm2
-; AVX512VBMI2-NEXT: vpmovzxdq {{.*#+}} xmm2 = xmm2[0],zero,xmm2[1],zero
-; AVX512VBMI2-NEXT: vpslld %xmm2, %ymm0, %ymm0
-; AVX512VBMI2-NEXT: vpor %ymm1, %ymm0, %ymm0
+; AVX512VBMI2-NEXT: vpshldvd %zmm2, %zmm1, %zmm0
+; AVX512VBMI2-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
; AVX512VBMI2-NEXT: retq
;
; AVX512VLBW-LABEL: splatvar_funnnel_v8i32:
@@ -1249,14 +1235,11 @@ define <16 x i16> @splatvar_funnnel_v16i16(<16 x i16> %x, <16 x i16> %y, <16 x i
;
; AVX512VBMI2-LABEL: splatvar_funnnel_v16i16:
; AVX512VBMI2: # %bb.0:
+; AVX512VBMI2-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1
+; AVX512VBMI2-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
; AVX512VBMI2-NEXT: vpbroadcastw %xmm2, %ymm2
-; AVX512VBMI2-NEXT: vpsrlw $1, %ymm1, %ymm1
-; AVX512VBMI2-NEXT: vpandn {{.*}}(%rip), %ymm2, %ymm3
-; AVX512VBMI2-NEXT: vpsrlvw %zmm3, %zmm1, %zmm1
-; AVX512VBMI2-NEXT: vpand {{.*}}(%rip), %xmm2, %xmm2
-; AVX512VBMI2-NEXT: vpmovzxwq {{.*#+}} xmm2 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero
-; AVX512VBMI2-NEXT: vpsllw %xmm2, %ymm0, %ymm0
-; AVX512VBMI2-NEXT: vpor %ymm1, %ymm0, %ymm0
+; AVX512VBMI2-NEXT: vpshldvw %zmm2, %zmm1, %zmm0
+; AVX512VBMI2-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
; AVX512VBMI2-NEXT: retq
;
; AVX512VLBW-LABEL: splatvar_funnnel_v16i16:
@@ -1939,9 +1922,11 @@ define <4 x i64> @constant_funnnel_v4i64(<4 x i64> %x, <4 x i64> %y) nounwind {
;
; AVX512VBMI2-LABEL: constant_funnnel_v4i64:
; AVX512VBMI2: # %bb.0:
-; AVX512VBMI2-NEXT: vpsrlvq {{.*}}(%rip), %ymm1, %ymm1
-; AVX512VBMI2-NEXT: vpsllvq {{.*}}(%rip), %ymm0, %ymm0
-; AVX512VBMI2-NEXT: vpor %ymm1, %ymm0, %ymm0
+; AVX512VBMI2-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1
+; AVX512VBMI2-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
+; AVX512VBMI2-NEXT: vmovdqa {{.*#+}} ymm2 = [4,14,50,60]
+; AVX512VBMI2-NEXT: vpshldvq %zmm2, %zmm1, %zmm0
+; AVX512VBMI2-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
; AVX512VBMI2-NEXT: retq
;
; AVX512VLBW-LABEL: constant_funnnel_v4i64:
@@ -2035,9 +2020,11 @@ define <8 x i32> @constant_funnnel_v8i32(<8 x i32> %x, <8 x i32> %y) nounwind {
;
; AVX512VBMI2-LABEL: constant_funnnel_v8i32:
; AVX512VBMI2: # %bb.0:
-; AVX512VBMI2-NEXT: vpsrlvd {{.*}}(%rip), %ymm1, %ymm1
-; AVX512VBMI2-NEXT: vpsllvd {{.*}}(%rip), %ymm0, %ymm0
-; AVX512VBMI2-NEXT: vpor %ymm1, %ymm0, %ymm0
+; AVX512VBMI2-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1
+; AVX512VBMI2-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
+; AVX512VBMI2-NEXT: vmovdqa {{.*#+}} ymm2 = [4,5,6,7,8,9,10,11]
+; AVX512VBMI2-NEXT: vpshldvd %zmm2, %zmm1, %zmm0
+; AVX512VBMI2-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
; AVX512VBMI2-NEXT: retq
;
; AVX512VLBW-LABEL: constant_funnnel_v8i32:
@@ -2135,13 +2122,11 @@ define <16 x i16> @constant_funnnel_v16i16(<16 x i16> %x, <16 x i16> %y) nounwin
;
; AVX512VBMI2-LABEL: constant_funnnel_v16i16:
; AVX512VBMI2: # %bb.0:
+; AVX512VBMI2-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1
; AVX512VBMI2-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
; AVX512VBMI2-NEXT: vmovdqa {{.*#+}} ymm2 = [0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
-; AVX512VBMI2-NEXT: vpsllvw %zmm2, %zmm0, %zmm0
-; AVX512VBMI2-NEXT: vmovdqa {{.*#+}} ymm2 = [15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0]
-; AVX512VBMI2-NEXT: vpsrlw $1, %ymm1, %ymm1
-; AVX512VBMI2-NEXT: vpsrlvw %zmm2, %zmm1, %zmm1
-; AVX512VBMI2-NEXT: vpor %ymm1, %ymm0, %ymm0
+; AVX512VBMI2-NEXT: vpshldvw %zmm2, %zmm1, %zmm0
+; AVX512VBMI2-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
; AVX512VBMI2-NEXT: retq
;
; AVX512VLBW-LABEL: constant_funnnel_v16i16:
@@ -2449,9 +2434,10 @@ define <4 x i64> @splatconstant_funnnel_v4i64(<4 x i64> %x, <4 x i64> %y) nounwi
;
; AVX512VBMI2-LABEL: splatconstant_funnnel_v4i64:
; AVX512VBMI2: # %bb.0:
-; AVX512VBMI2-NEXT: vpsrlq $50, %ymm1, %ymm1
-; AVX512VBMI2-NEXT: vpsllq $14, %ymm0, %ymm0
-; AVX512VBMI2-NEXT: vpor %ymm1, %ymm0, %ymm0
+; AVX512VBMI2-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1
+; AVX512VBMI2-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
+; AVX512VBMI2-NEXT: vpshldq $14, %zmm1, %zmm0, %zmm0
+; AVX512VBMI2-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
; AVX512VBMI2-NEXT: retq
;
; AVX512VLBW-LABEL: splatconstant_funnnel_v4i64:
@@ -2533,9 +2519,10 @@ define <8 x i32> @splatconstant_funnnel_v8i32(<8 x i32> %x, <8 x i32> %y) nounwi
;
; AVX512VBMI2-LABEL: splatconstant_funnnel_v8i32:
; AVX512VBMI2: # %bb.0:
-; AVX512VBMI2-NEXT: vpsrld $28, %ymm1, %ymm1
-; AVX512VBMI2-NEXT: vpslld $4, %ymm0, %ymm0
-; AVX512VBMI2-NEXT: vpor %ymm1, %ymm0, %ymm0
+; AVX512VBMI2-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1
+; AVX512VBMI2-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
+; AVX512VBMI2-NEXT: vpshldd $4, %zmm1, %zmm0, %zmm0
+; AVX512VBMI2-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
; AVX512VBMI2-NEXT: retq
;
; AVX512VLBW-LABEL: splatconstant_funnnel_v8i32:
@@ -2617,9 +2604,10 @@ define <16 x i16> @splatconstant_funnnel_v16i16(<16 x i16> %x, <16 x i16> %y) no
;
; AVX512VBMI2-LABEL: splatconstant_funnnel_v16i16:
; AVX512VBMI2: # %bb.0:
-; AVX512VBMI2-NEXT: vpsrlw $9, %ymm1, %ymm1
-; AVX512VBMI2-NEXT: vpsllw $7, %ymm0, %ymm0
-; AVX512VBMI2-NEXT: vpor %ymm1, %ymm0, %ymm0
+; AVX512VBMI2-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1
+; AVX512VBMI2-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
+; AVX512VBMI2-NEXT: vpshldw $7, %zmm1, %zmm0, %zmm0
+; AVX512VBMI2-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
; AVX512VBMI2-NEXT: retq
;
; AVX512VLBW-LABEL: splatconstant_funnnel_v16i16:
diff --git a/llvm/test/CodeGen/X86/vector-fshr-128.ll b/llvm/test/CodeGen/X86/vector-fshr-128.ll
index 7fc12f3094c3..bee498617069 100644
--- a/llvm/test/CodeGen/X86/vector-fshr-128.ll
+++ b/llvm/test/CodeGen/X86/vector-fshr-128.ll
@@ -128,13 +128,12 @@ define <2 x i64> @var_funnnel_v2i64(<2 x i64> %x, <2 x i64> %y, <2 x i64> %amt)
;
; AVX512VBMI2-LABEL: var_funnnel_v2i64:
; AVX512VBMI2: # %bb.0:
-; AVX512VBMI2-NEXT: vmovdqa {{.*#+}} xmm3 = [63,63]
-; AVX512VBMI2-NEXT: vpand %xmm3, %xmm2, %xmm4
-; AVX512VBMI2-NEXT: vpsrlvq %xmm4, %xmm1, %xmm1
-; AVX512VBMI2-NEXT: vpandn %xmm3, %xmm2, %xmm2
-; AVX512VBMI2-NEXT: vpsllq $1, %xmm0, %xmm0
-; AVX512VBMI2-NEXT: vpsllvq %xmm2, %xmm0, %xmm0
-; AVX512VBMI2-NEXT: vpor %xmm1, %xmm0, %xmm0
+; AVX512VBMI2-NEXT: # kill: def $xmm2 killed $xmm2 def $zmm2
+; AVX512VBMI2-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
+; AVX512VBMI2-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
+; AVX512VBMI2-NEXT: vpshrdvq %zmm2, %zmm0, %zmm1
+; AVX512VBMI2-NEXT: vmovdqa %xmm1, %xmm0
+; AVX512VBMI2-NEXT: vzeroupper
; AVX512VBMI2-NEXT: retq
;
; AVX512VLBW-LABEL: var_funnnel_v2i64:
@@ -337,13 +336,12 @@ define <4 x i32> @var_funnnel_v4i32(<4 x i32> %x, <4 x i32> %y, <4 x i32> %amt)
;
; AVX512VBMI2-LABEL: var_funnnel_v4i32:
; AVX512VBMI2: # %bb.0:
-; AVX512VBMI2-NEXT: vpbroadcastd {{.*#+}} xmm3 = [31,31,31,31]
-; AVX512VBMI2-NEXT: vpand %xmm3, %xmm2, %xmm4
-; AVX512VBMI2-NEXT: vpsrlvd %xmm4, %xmm1, %xmm1
-; AVX512VBMI2-NEXT: vpandn %xmm3, %xmm2, %xmm2
-; AVX512VBMI2-NEXT: vpslld $1, %xmm0, %xmm0
-; AVX512VBMI2-NEXT: vpsllvd %xmm2, %xmm0, %xmm0
-; AVX512VBMI2-NEXT: vpor %xmm1, %xmm0, %xmm0
+; AVX512VBMI2-NEXT: # kill: def $xmm2 killed $xmm2 def $zmm2
+; AVX512VBMI2-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
+; AVX512VBMI2-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
+; AVX512VBMI2-NEXT: vpshrdvd %zmm2, %zmm0, %zmm1
+; AVX512VBMI2-NEXT: vmovdqa %xmm1, %xmm0
+; AVX512VBMI2-NEXT: vzeroupper
; AVX512VBMI2-NEXT: retq
;
; AVX512VLBW-LABEL: var_funnnel_v4i32:
@@ -633,14 +631,11 @@ define <8 x i16> @var_funnnel_v8i16(<8 x i16> %x, <8 x i16> %y, <8 x i16> %amt)
;
; AVX512VBMI2-LABEL: var_funnnel_v8i16:
; AVX512VBMI2: # %bb.0:
+; AVX512VBMI2-NEXT: # kill: def $xmm2 killed $xmm2 def $zmm2
; AVX512VBMI2-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
-; AVX512VBMI2-NEXT: vmovdqa {{.*#+}} xmm3 = [15,15,15,15,15,15,15,15]
-; AVX512VBMI2-NEXT: vpand %xmm3, %xmm2, %xmm4
-; AVX512VBMI2-NEXT: vpsrlvw %zmm4, %zmm1, %zmm1
-; AVX512VBMI2-NEXT: vpandn %xmm3, %xmm2, %xmm2
-; AVX512VBMI2-NEXT: vpsllw $1, %xmm0, %xmm0
-; AVX512VBMI2-NEXT: vpsllvw %zmm2, %zmm0, %zmm0
-; AVX512VBMI2-NEXT: vpor %xmm1, %xmm0, %xmm0
+; AVX512VBMI2-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
+; AVX512VBMI2-NEXT: vpshrdvw %zmm2, %zmm0, %zmm1
+; AVX512VBMI2-NEXT: vmovdqa %xmm1, %xmm0
; AVX512VBMI2-NEXT: vzeroupper
; AVX512VBMI2-NEXT: retq
;
@@ -1164,14 +1159,12 @@ define <2 x i64> @splatvar_funnnel_v2i64(<2 x i64> %x, <2 x i64> %y, <2 x i64> %
;
; AVX512VBMI2-LABEL: splatvar_funnnel_v2i64:
; AVX512VBMI2: # %bb.0:
-; AVX512VBMI2-NEXT: vpbroadcastq %xmm2, %xmm3
-; AVX512VBMI2-NEXT: vmovdqa {{.*#+}} xmm4 = [63,63]
-; AVX512VBMI2-NEXT: vpand %xmm4, %xmm2, %xmm2
-; AVX512VBMI2-NEXT: vpsrlq %xmm2, %xmm1, %xmm1
-; AVX512VBMI2-NEXT: vpandn %xmm4, %xmm3, %xmm2
-; AVX512VBMI2-NEXT: vpsllq $1, %xmm0, %xmm0
-; AVX512VBMI2-NEXT: vpsllvq %xmm2, %xmm0, %xmm0
-; AVX512VBMI2-NEXT: vpor %xmm1, %xmm0, %xmm0
+; AVX512VBMI2-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
+; AVX512VBMI2-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
+; AVX512VBMI2-NEXT: vpbroadcastq %xmm2, %xmm2
+; AVX512VBMI2-NEXT: vpshrdvq %zmm2, %zmm0, %zmm1
+; AVX512VBMI2-NEXT: vmovdqa %xmm1, %xmm0
+; AVX512VBMI2-NEXT: vzeroupper
; AVX512VBMI2-NEXT: retq
;
; AVX512VLBW-LABEL: splatvar_funnnel_v2i64:
@@ -1351,15 +1344,12 @@ define <4 x i32> @splatvar_funnnel_v4i32(<4 x i32> %x, <4 x i32> %y, <4 x i32> %
;
; AVX512VBMI2-LABEL: splatvar_funnnel_v4i32:
; AVX512VBMI2: # %bb.0:
-; AVX512VBMI2-NEXT: vpbroadcastd %xmm2, %xmm3
-; AVX512VBMI2-NEXT: vpbroadcastd {{.*#+}} xmm4 = [31,31,31,31]
-; AVX512VBMI2-NEXT: vpand %xmm4, %xmm2, %xmm2
-; AVX512VBMI2-NEXT: vpmovzxdq {{.*#+}} xmm2 = xmm2[0],zero,xmm2[1],zero
-; AVX512VBMI2-NEXT: vpsrld %xmm2, %xmm1, %xmm1
-; AVX512VBMI2-NEXT: vpandn %xmm4, %xmm3, %xmm2
-; AVX512VBMI2-NEXT: vpslld $1, %xmm0, %xmm0
-; AVX512VBMI2-NEXT: vpsllvd %xmm2, %xmm0, %xmm0
-; AVX512VBMI2-NEXT: vpor %xmm1, %xmm0, %xmm0
+; AVX512VBMI2-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
+; AVX512VBMI2-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
+; AVX512VBMI2-NEXT: vpbroadcastd %xmm2, %xmm2
+; AVX512VBMI2-NEXT: vpshrdvd %zmm2, %zmm0, %zmm1
+; AVX512VBMI2-NEXT: vmovdqa %xmm1, %xmm0
+; AVX512VBMI2-NEXT: vzeroupper
; AVX512VBMI2-NEXT: retq
;
; AVX512VLBW-LABEL: splatvar_funnnel_v4i32:
@@ -1583,15 +1573,11 @@ define <8 x i16> @splatvar_funnnel_v8i16(<8 x i16> %x, <8 x i16> %y, <8 x i16> %
;
; AVX512VBMI2-LABEL: splatvar_funnnel_v8i16:
; AVX512VBMI2: # %bb.0:
-; AVX512VBMI2-NEXT: vpbroadcastw %xmm2, %xmm3
-; AVX512VBMI2-NEXT: vpsllw $1, %xmm0, %xmm0
-; AVX512VBMI2-NEXT: vmovdqa {{.*#+}} xmm4 = [15,15,15,15,15,15,15,15]
-; AVX512VBMI2-NEXT: vpandn %xmm4, %xmm3, %xmm3
-; AVX512VBMI2-NEXT: vpsllvw %zmm3, %zmm0, %zmm0
-; AVX512VBMI2-NEXT: vpand %xmm4, %xmm2, %xmm2
-; AVX512VBMI2-NEXT: vpmovzxwq {{.*#+}} xmm2 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero
-; AVX512VBMI2-NEXT: vpsrlw %xmm2, %xmm1, %xmm1
-; AVX512VBMI2-NEXT: vpor %xmm1, %xmm0, %xmm0
+; AVX512VBMI2-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
+; AVX512VBMI2-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
+; AVX512VBMI2-NEXT: vpbroadcastw %xmm2, %xmm2
+; AVX512VBMI2-NEXT: vpshrdvw %zmm2, %zmm0, %zmm1
+; AVX512VBMI2-NEXT: vmovdqa %xmm1, %xmm0
; AVX512VBMI2-NEXT: vzeroupper
; AVX512VBMI2-NEXT: retq
;
@@ -2084,9 +2070,12 @@ define <2 x i64> @constant_funnnel_v2i64(<2 x i64> %x, <2 x i64> %y) nounwind {
;
; AVX512VBMI2-LABEL: constant_funnnel_v2i64:
; AVX512VBMI2: # %bb.0:
-; AVX512VBMI2-NEXT: vpsrlvq {{.*}}(%rip), %xmm1, %xmm1
-; AVX512VBMI2-NEXT: vpsllvq {{.*}}(%rip), %xmm0, %xmm0
-; AVX512VBMI2-NEXT: vpor %xmm1, %xmm0, %xmm0
+; AVX512VBMI2-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
+; AVX512VBMI2-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
+; AVX512VBMI2-NEXT: vmovdqa {{.*#+}} xmm2 = [4,14]
+; AVX512VBMI2-NEXT: vpshrdvq %zmm2, %zmm0, %zmm1
+; AVX512VBMI2-NEXT: vmovdqa %xmm1, %xmm0
+; AVX512VBMI2-NEXT: vzeroupper
; AVX512VBMI2-NEXT: retq
;
; AVX512VLBW-LABEL: constant_funnnel_v2i64:
@@ -2223,9 +2212,12 @@ define <4 x i32> @constant_funnnel_v4i32(<4 x i32> %x, <4 x i32> %y) nounwind {
;
; AVX512VBMI2-LABEL: constant_funnnel_v4i32:
; AVX512VBMI2: # %bb.0:
-; AVX512VBMI2-NEXT: vpsrlvd {{.*}}(%rip), %xmm1, %xmm1
-; AVX512VBMI2-NEXT: vpsllvd {{.*}}(%rip), %xmm0, %xmm0
-; AVX512VBMI2-NEXT: vpor %xmm1, %xmm0, %xmm0
+; AVX512VBMI2-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
+; AVX512VBMI2-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
+; AVX512VBMI2-NEXT: vmovdqa {{.*#+}} xmm2 = [4,5,6,7]
+; AVX512VBMI2-NEXT: vpshrdvd %zmm2, %zmm0, %zmm1
+; AVX512VBMI2-NEXT: vmovdqa %xmm1, %xmm0
+; AVX512VBMI2-NEXT: vzeroupper
; AVX512VBMI2-NEXT: retq
;
; AVX512VLBW-LABEL: constant_funnnel_v4i32:
@@ -2347,12 +2339,10 @@ define <8 x i16> @constant_funnnel_v8i16(<8 x i16> %x, <8 x i16> %y) nounwind {
; AVX512VBMI2-LABEL: constant_funnnel_v8i16:
; AVX512VBMI2: # %bb.0:
; AVX512VBMI2-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
+; AVX512VBMI2-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
; AVX512VBMI2-NEXT: vmovdqa {{.*#+}} xmm2 = [0,1,2,3,4,5,6,7]
-; AVX512VBMI2-NEXT: vpsrlvw %zmm2, %zmm1, %zmm1
-; AVX512VBMI2-NEXT: vmovdqa {{.*#+}} xmm2 = [15,14,13,12,11,10,9,8]
-; AVX512VBMI2-NEXT: vpsllw $1, %xmm0, %xmm0
-; AVX512VBMI2-NEXT: vpsllvw %zmm2, %zmm0, %zmm0
-; AVX512VBMI2-NEXT: vpor %xmm1, %xmm0, %xmm0
+; AVX512VBMI2-NEXT: vpshrdvw %zmm2, %zmm0, %zmm1
+; AVX512VBMI2-NEXT: vmovdqa %xmm1, %xmm0
; AVX512VBMI2-NEXT: vzeroupper
; AVX512VBMI2-NEXT: retq
;
@@ -2635,9 +2625,11 @@ define <2 x i64> @splatconstant_funnnel_v2i64(<2 x i64> %x, <2 x i64> %y) nounwi
;
; AVX512VBMI2-LABEL: splatconstant_funnnel_v2i64:
; AVX512VBMI2: # %bb.0:
-; AVX512VBMI2-NEXT: vpsrlq $14, %xmm1, %xmm1
-; AVX512VBMI2-NEXT: vpsllq $50, %xmm0, %xmm0
-; AVX512VBMI2-NEXT: vpor %xmm1, %xmm0, %xmm0
+; AVX512VBMI2-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
+; AVX512VBMI2-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
+; AVX512VBMI2-NEXT: vpshrdq $14, %zmm0, %zmm1, %zmm0
+; AVX512VBMI2-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
+; AVX512VBMI2-NEXT: vzeroupper
; AVX512VBMI2-NEXT: retq
;
; AVX512VLBW-LABEL: splatconstant_funnnel_v2i64:
@@ -2709,9 +2701,11 @@ define <4 x i32> @splatconstant_funnnel_v4i32(<4 x i32> %x, <4 x i32> %y) nounwi
;
; AVX512VBMI2-LABEL: splatconstant_funnnel_v4i32:
; AVX512VBMI2: # %bb.0:
-; AVX512VBMI2-NEXT: vpsrld $4, %xmm1, %xmm1
-; AVX512VBMI2-NEXT: vpslld $28, %xmm0, %xmm0
-; AVX512VBMI2-NEXT: vpor %xmm1, %xmm0, %xmm0
+; AVX512VBMI2-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
+; AVX512VBMI2-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
+; AVX512VBMI2-NEXT: vpshrdd $4, %zmm0, %zmm1, %zmm0
+; AVX512VBMI2-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
+; AVX512VBMI2-NEXT: vzeroupper
; AVX512VBMI2-NEXT: retq
;
; AVX512VLBW-LABEL: splatconstant_funnnel_v4i32:
@@ -2781,9 +2775,11 @@ define <8 x i16> @splatconstant_funnnel_v8i16(<8 x i16> %x, <8 x i16> %y) nounwi
;
; AVX512VBMI2-LABEL: splatconstant_funnnel_v8i16:
; AVX512VBMI2: # %bb.0:
-; AVX512VBMI2-NEXT: vpsrlw $7, %xmm1, %xmm1
-; AVX512VBMI2-NEXT: vpsllw $9, %xmm0, %xmm0
-; AVX512VBMI2-NEXT: vpor %xmm1, %xmm0, %xmm0
+; AVX512VBMI2-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
+; AVX512VBMI2-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
+; AVX512VBMI2-NEXT: vpshrdw $7, %zmm0, %zmm1, %zmm0
+; AVX512VBMI2-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
+; AVX512VBMI2-NEXT: vzeroupper
; AVX512VBMI2-NEXT: retq
;
; AVX512VLBW-LABEL: splatconstant_funnnel_v8i16:
diff --git a/llvm/test/CodeGen/X86/vector-fshr-256.ll b/llvm/test/CodeGen/X86/vector-fshr-256.ll
index 942a318ba238..a1ab521d8429 100644
--- a/llvm/test/CodeGen/X86/vector-fshr-256.ll
+++ b/llvm/test/CodeGen/X86/vector-fshr-256.ll
@@ -98,13 +98,11 @@ define <4 x i64> @var_funnnel_v4i64(<4 x i64> %x, <4 x i64> %y, <4 x i64> %amt)
;
; AVX512VBMI2-LABEL: var_funnnel_v4i64:
; AVX512VBMI2: # %bb.0:
-; AVX512VBMI2-NEXT: vpbroadcastq {{.*#+}} ymm3 = [63,63,63,63]
-; AVX512VBMI2-NEXT: vpand %ymm3, %ymm2, %ymm4
-; AVX512VBMI2-NEXT: vpsrlvq %ymm4, %ymm1, %ymm1
-; AVX512VBMI2-NEXT: vpandn %ymm3, %ymm2, %ymm2
-; AVX512VBMI2-NEXT: vpsllq $1, %ymm0, %ymm0
-; AVX512VBMI2-NEXT: vpsllvq %ymm2, %ymm0, %ymm0
-; AVX512VBMI2-NEXT: vpor %ymm1, %ymm0, %ymm0
+; AVX512VBMI2-NEXT: # kill: def $ymm2 killed $ymm2 def $zmm2
+; AVX512VBMI2-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1
+; AVX512VBMI2-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
+; AVX512VBMI2-NEXT: vpshrdvq %zmm2, %zmm0, %zmm1
+; AVX512VBMI2-NEXT: vmovdqa %ymm1, %ymm0
; AVX512VBMI2-NEXT: retq
;
; AVX512VLBW-LABEL: var_funnnel_v4i64:
@@ -256,13 +254,11 @@ define <8 x i32> @var_funnnel_v8i32(<8 x i32> %x, <8 x i32> %y, <8 x i32> %amt)
;
; AVX512VBMI2-LABEL: var_funnnel_v8i32:
; AVX512VBMI2: # %bb.0:
-; AVX512VBMI2-NEXT: vpbroadcastd {{.*#+}} ymm3 = [31,31,31,31,31,31,31,31]
-; AVX512VBMI2-NEXT: vpand %ymm3, %ymm2, %ymm4
-; AVX512VBMI2-NEXT: vpsrlvd %ymm4, %ymm1, %ymm1
-; AVX512VBMI2-NEXT: vpandn %ymm3, %ymm2, %ymm2
-; AVX512VBMI2-NEXT: vpslld $1, %ymm0, %ymm0
-; AVX512VBMI2-NEXT: vpsllvd %ymm2, %ymm0, %ymm0
-; AVX512VBMI2-NEXT: vpor %ymm1, %ymm0, %ymm0
+; AVX512VBMI2-NEXT: # kill: def $ymm2 killed $ymm2 def $zmm2
+; AVX512VBMI2-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1
+; AVX512VBMI2-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
+; AVX512VBMI2-NEXT: vpshrdvd %zmm2, %zmm0, %zmm1
+; AVX512VBMI2-NEXT: vmovdqa %ymm1, %ymm0
; AVX512VBMI2-NEXT: retq
;
; AVX512VLBW-LABEL: var_funnnel_v8i32:
@@ -459,14 +455,11 @@ define <16 x i16> @var_funnnel_v16i16(<16 x i16> %x, <16 x i16> %y, <16 x i16> %
;
; AVX512VBMI2-LABEL: var_funnnel_v16i16:
; AVX512VBMI2: # %bb.0:
+; AVX512VBMI2-NEXT: # kill: def $ymm2 killed $ymm2 def $zmm2
; AVX512VBMI2-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1
-; AVX512VBMI2-NEXT: vmovdqa {{.*#+}} ymm3 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
-; AVX512VBMI2-NEXT: vpand %ymm3, %ymm2, %ymm4
-; AVX512VBMI2-NEXT: vpsrlvw %zmm4, %zmm1, %zmm1
-; AVX512VBMI2-NEXT: vpandn %ymm3, %ymm2, %ymm2
-; AVX512VBMI2-NEXT: vpsllw $1, %ymm0, %ymm0
-; AVX512VBMI2-NEXT: vpsllvw %zmm2, %zmm0, %zmm0
-; AVX512VBMI2-NEXT: vpor %ymm1, %ymm0, %ymm0
+; AVX512VBMI2-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
+; AVX512VBMI2-NEXT: vpshrdvw %zmm2, %zmm0, %zmm1
+; AVX512VBMI2-NEXT: vmovdqa %ymm1, %ymm0
; AVX512VBMI2-NEXT: retq
;
; AVX512VLBW-LABEL: var_funnnel_v16i16:
@@ -890,14 +883,11 @@ define <4 x i64> @splatvar_funnnel_v4i64(<4 x i64> %x, <4 x i64> %y, <4 x i64> %
;
; AVX512VBMI2-LABEL: splatvar_funnnel_v4i64:
; AVX512VBMI2: # %bb.0:
-; AVX512VBMI2-NEXT: vpbroadcastq %xmm2, %ymm3
-; AVX512VBMI2-NEXT: vpand {{.*}}(%rip), %xmm2, %xmm2
-; AVX512VBMI2-NEXT: vpsrlq %xmm2, %ymm1, %ymm1
-; AVX512VBMI2-NEXT: vpbroadcastq {{.*#+}} ymm2 = [63,63,63,63]
-; AVX512VBMI2-NEXT: vpandn %ymm2, %ymm3, %ymm2
-; AVX512VBMI2-NEXT: vpsllq $1, %ymm0, %ymm0
-; AVX512VBMI2-NEXT: vpsllvq %ymm2, %ymm0, %ymm0
-; AVX512VBMI2-NEXT: vpor %ymm1, %ymm0, %ymm0
+; AVX512VBMI2-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1
+; AVX512VBMI2-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
+; AVX512VBMI2-NEXT: vpbroadcastq %xmm2, %ymm2
+; AVX512VBMI2-NEXT: vpshrdvq %zmm2, %zmm0, %zmm1
+; AVX512VBMI2-NEXT: vmovdqa %ymm1, %ymm0
; AVX512VBMI2-NEXT: retq
;
; AVX512VLBW-LABEL: splatvar_funnnel_v4i64:
@@ -1037,15 +1027,11 @@ define <8 x i32> @splatvar_funnnel_v8i32(<8 x i32> %x, <8 x i32> %y, <8 x i32> %
;
; AVX512VBMI2-LABEL: splatvar_funnnel_v8i32:
; AVX512VBMI2: # %bb.0:
+; AVX512VBMI2-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1
+; AVX512VBMI2-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
; AVX512VBMI2-NEXT: vpbroadcastd %xmm2, %ymm2
-; AVX512VBMI2-NEXT: vpbroadcastd {{.*#+}} ymm3 = [31,31,31,31,31,31,31,31]
-; AVX512VBMI2-NEXT: vpandn %ymm3, %ymm2, %ymm4
-; AVX512VBMI2-NEXT: vpslld $1, %ymm0, %ymm0
-; AVX512VBMI2-NEXT: vpsllvd %ymm4, %ymm0, %ymm0
-; AVX512VBMI2-NEXT: vpand %xmm3, %xmm2, %xmm2
-; AVX512VBMI2-NEXT: vpmovzxdq {{.*#+}} xmm2 = xmm2[0],zero,xmm2[1],zero
-; AVX512VBMI2-NEXT: vpsrld %xmm2, %ymm1, %ymm1
-; AVX512VBMI2-NEXT: vpor %ymm1, %ymm0, %ymm0
+; AVX512VBMI2-NEXT: vpshrdvd %zmm2, %zmm0, %zmm1
+; AVX512VBMI2-NEXT: vmovdqa %ymm1, %ymm0
; AVX512VBMI2-NEXT: retq
;
; AVX512VLBW-LABEL: splatvar_funnnel_v8i32:
@@ -1213,14 +1199,11 @@ define <16 x i16> @splatvar_funnnel_v16i16(<16 x i16> %x, <16 x i16> %y, <16 x i
;
; AVX512VBMI2-LABEL: splatvar_funnnel_v16i16:
; AVX512VBMI2: # %bb.0:
+; AVX512VBMI2-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1
+; AVX512VBMI2-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
; AVX512VBMI2-NEXT: vpbroadcastw %xmm2, %ymm2
-; AVX512VBMI2-NEXT: vpsllw $1, %ymm0, %ymm0
-; AVX512VBMI2-NEXT: vpandn {{.*}}(%rip), %ymm2, %ymm3
-; AVX512VBMI2-NEXT: vpsllvw %zmm3, %zmm0, %zmm0
-; AVX512VBMI2-NEXT: vpand {{.*}}(%rip), %xmm2, %xmm2
-; AVX512VBMI2-NEXT: vpmovzxwq {{.*#+}} xmm2 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero
-; AVX512VBMI2-NEXT: vpsrlw %xmm2, %ymm1, %ymm1
-; AVX512VBMI2-NEXT: vpor %ymm1, %ymm0, %ymm0
+; AVX512VBMI2-NEXT: vpshrdvw %zmm2, %zmm0, %zmm1
+; AVX512VBMI2-NEXT: vmovdqa %ymm1, %ymm0
; AVX512VBMI2-NEXT: retq
;
; AVX512VLBW-LABEL: splatvar_funnnel_v16i16:
@@ -1586,9 +1569,11 @@ define <4 x i64> @constant_funnnel_v4i64(<4 x i64> %x, <4 x i64> %y) nounwind {
;
; AVX512VBMI2-LABEL: constant_funnnel_v4i64:
; AVX512VBMI2: # %bb.0:
-; AVX512VBMI2-NEXT: vpsrlvq {{.*}}(%rip), %ymm1, %ymm1
-; AVX512VBMI2-NEXT: vpsllvq {{.*}}(%rip), %ymm0, %ymm0
-; AVX512VBMI2-NEXT: vpor %ymm1, %ymm0, %ymm0
+; AVX512VBMI2-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1
+; AVX512VBMI2-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
+; AVX512VBMI2-NEXT: vmovdqa {{.*#+}} ymm2 = [4,14,50,60]
+; AVX512VBMI2-NEXT: vpshrdvq %zmm2, %zmm0, %zmm1
+; AVX512VBMI2-NEXT: vmovdqa %ymm1, %ymm0
; AVX512VBMI2-NEXT: retq
;
; AVX512VLBW-LABEL: constant_funnnel_v4i64:
@@ -1683,9 +1668,11 @@ define <8 x i32> @constant_funnnel_v8i32(<8 x i32> %x, <8 x i32> %y) nounwind {
;
; AVX512VBMI2-LABEL: constant_funnnel_v8i32:
; AVX512VBMI2: # %bb.0:
-; AVX512VBMI2-NEXT: vpsrlvd {{.*}}(%rip), %ymm1, %ymm1
-; AVX512VBMI2-NEXT: vpsllvd {{.*}}(%rip), %ymm0, %ymm0
-; AVX512VBMI2-NEXT: vpor %ymm1, %ymm0, %ymm0
+; AVX512VBMI2-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1
+; AVX512VBMI2-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
+; AVX512VBMI2-NEXT: vmovdqa {{.*#+}} ymm2 = [4,5,6,7,8,9,10,11]
+; AVX512VBMI2-NEXT: vpshrdvd %zmm2, %zmm0, %zmm1
+; AVX512VBMI2-NEXT: vmovdqa %ymm1, %ymm0
; AVX512VBMI2-NEXT: retq
;
; AVX512VLBW-LABEL: constant_funnnel_v8i32:
@@ -1785,12 +1772,10 @@ define <16 x i16> @constant_funnnel_v16i16(<16 x i16> %x, <16 x i16> %y) nounwin
; AVX512VBMI2-LABEL: constant_funnnel_v16i16:
; AVX512VBMI2: # %bb.0:
; AVX512VBMI2-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1
+; AVX512VBMI2-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
; AVX512VBMI2-NEXT: vmovdqa {{.*#+}} ymm2 = [0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
-; AVX512VBMI2-NEXT: vpsrlvw %zmm2, %zmm1, %zmm1
-; AVX512VBMI2-NEXT: vmovdqa {{.*#+}} ymm2 = [15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0]
-; AVX512VBMI2-NEXT: vpsllw $1, %ymm0, %ymm0
-; AVX512VBMI2-NEXT: vpsllvw %zmm2, %zmm0, %zmm0
-; AVX512VBMI2-NEXT: vpor %ymm1, %ymm0, %ymm0
+; AVX512VBMI2-NEXT: vpshrdvw %zmm2, %zmm0, %zmm1
+; AVX512VBMI2-NEXT: vmovdqa %ymm1, %ymm0
; AVX512VBMI2-NEXT: retq
;
; AVX512VLBW-LABEL: constant_funnnel_v16i16:
@@ -2087,9 +2072,10 @@ define <4 x i64> @splatconstant_funnnel_v4i64(<4 x i64> %x, <4 x i64> %y) nounwi
;
; AVX512VBMI2-LABEL: splatconstant_funnnel_v4i64:
; AVX512VBMI2: # %bb.0:
-; AVX512VBMI2-NEXT: vpsrlq $14, %ymm1, %ymm1
-; AVX512VBMI2-NEXT: vpsllq $50, %ymm0, %ymm0
-; AVX512VBMI2-NEXT: vpor %ymm1, %ymm0, %ymm0
+; AVX512VBMI2-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1
+; AVX512VBMI2-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
+; AVX512VBMI2-NEXT: vpshrdq $14, %zmm0, %zmm1, %zmm0
+; AVX512VBMI2-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
; AVX512VBMI2-NEXT: retq
;
; AVX512VLBW-LABEL: splatconstant_funnnel_v4i64:
@@ -2171,9 +2157,10 @@ define <8 x i32> @splatconstant_funnnel_v8i32(<8 x i32> %x, <8 x i32> %y) nounwi
;
; AVX512VBMI2-LABEL: splatconstant_funnnel_v8i32:
; AVX512VBMI2: # %bb.0:
-; AVX512VBMI2-NEXT: vpsrld $4, %ymm1, %ymm1
-; AVX512VBMI2-NEXT: vpslld $28, %ymm0, %ymm0
-; AVX512VBMI2-NEXT: vpor %ymm1, %ymm0, %ymm0
+; AVX512VBMI2-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1
+; AVX512VBMI2-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
+; AVX512VBMI2-NEXT: vpshrdd $4, %zmm0, %zmm1, %zmm0
+; AVX512VBMI2-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
; AVX512VBMI2-NEXT: retq
;
; AVX512VLBW-LABEL: splatconstant_funnnel_v8i32:
@@ -2255,9 +2242,10 @@ define <16 x i16> @splatconstant_funnnel_v16i16(<16 x i16> %x, <16 x i16> %y) no
;
; AVX512VBMI2-LABEL: splatconstant_funnnel_v16i16:
; AVX512VBMI2: # %bb.0:
-; AVX512VBMI2-NEXT: vpsrlw $7, %ymm1, %ymm1
-; AVX512VBMI2-NEXT: vpsllw $9, %ymm0, %ymm0
-; AVX512VBMI2-NEXT: vpor %ymm1, %ymm0, %ymm0
+; AVX512VBMI2-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1
+; AVX512VBMI2-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
+; AVX512VBMI2-NEXT: vpshrdw $7, %zmm0, %zmm1, %zmm0
+; AVX512VBMI2-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
; AVX512VBMI2-NEXT: retq
;
; AVX512VLBW-LABEL: splatconstant_funnnel_v16i16:
More information about the llvm-branch-commits
mailing list