[llvm] 159da56 - [X86] Enable v32i16 ISD::ROTL/ROTR lowering on AVX512BW targets
Simon Pilgrim via llvm-commits
llvm-commits at lists.llvm.org
Fri Dec 24 05:30:56 PST 2021
Author: Simon Pilgrim
Date: 2021-12-24T13:30:52Z
New Revision: 159da567378ab5a4bf9b62162d16caccf3db16f9
URL: https://github.com/llvm/llvm-project/commit/159da567378ab5a4bf9b62162d16caccf3db16f9
DIFF: https://github.com/llvm/llvm-project/commit/159da567378ab5a4bf9b62162d16caccf3db16f9.diff
LOG: [X86] Enable v32i16 ISD::ROTL/ROTR lowering on AVX512BW targets
Added:
Modified:
llvm/lib/Target/X86/X86ISelLowering.cpp
llvm/test/CodeGen/X86/vector-fshl-rot-512.ll
llvm/test/CodeGen/X86/vector-fshr-rot-512.ll
llvm/test/CodeGen/X86/vector-rotate-512.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 34ea8901fb3ae..6f6361b6757b7 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -1654,6 +1654,8 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::SRL, VT, Custom);
setOperationAction(ISD::SHL, VT, Custom);
setOperationAction(ISD::SRA, VT, Custom);
+ setOperationAction(ISD::ROTL, VT, Custom);
+ setOperationAction(ISD::ROTR, VT, Custom);
setOperationAction(ISD::SETCC, VT, Custom);
// The condition codes aren't legal in SSE/AVX and under AVX512 we use
@@ -1668,21 +1670,10 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::UMIN, VT, Legal);
setOperationAction(ISD::ABS, VT, Legal);
setOperationAction(ISD::CTPOP, VT, Custom);
- setOperationAction(ISD::ROTL, VT, Custom);
- setOperationAction(ISD::ROTR, VT, Custom);
setOperationAction(ISD::STRICT_FSETCC, VT, Custom);
setOperationAction(ISD::STRICT_FSETCCS, VT, Custom);
}
- // With BWI, expanding (and promoting the shifts) is the better.
- if (!Subtarget.useBWIRegs()) {
- setOperationAction(ISD::ROTL, MVT::v32i16, Custom);
- setOperationAction(ISD::ROTR, MVT::v32i16, Custom);
- }
-
- setOperationAction(ISD::ROTL, MVT::v64i8, Custom);
- setOperationAction(ISD::ROTR, MVT::v64i8, Custom);
-
for (auto VT : { MVT::v64i8, MVT::v32i16 }) {
setOperationAction(ISD::ABS, VT, HasBWI ? Legal : Custom);
setOperationAction(ISD::CTPOP, VT, Subtarget.hasBITALG() ? Legal : Custom);
@@ -29894,12 +29885,12 @@ static SDValue LowerRotate(SDValue Op, const X86Subtarget &Subtarget,
if (VT.is512BitVector() && !Subtarget.useBWIRegs())
return splitVectorIntBinary(Op, DAG);
- assert((VT == MVT::v4i32 || VT == MVT::v8i16 || VT == MVT::v16i8 ||
- ((VT == MVT::v8i32 || VT == MVT::v16i16 || VT == MVT::v32i8) &&
- Subtarget.hasAVX2()) ||
- (VT == MVT::v32i16 && !Subtarget.useBWIRegs()) ||
- (VT == MVT::v64i8 && Subtarget.useBWIRegs())) &&
- "Only vXi32/vXi16/vXi8 vector rotates supported");
+ assert(
+ (VT == MVT::v4i32 || VT == MVT::v8i16 || VT == MVT::v16i8 ||
+ ((VT == MVT::v8i32 || VT == MVT::v16i16 || VT == MVT::v32i8) &&
+ Subtarget.hasAVX2()) ||
+ ((VT == MVT::v32i16 || VT == MVT::v64i8) && Subtarget.useBWIRegs())) &&
+ "Only vXi32/vXi16/vXi8 vector rotates supported");
MVT ExtSVT = MVT::getIntegerVT(2 * EltSizeInBits);
MVT ExtVT = MVT::getVectorVT(ExtSVT, NumElts / 2);
diff --git a/llvm/test/CodeGen/X86/vector-fshl-rot-512.ll b/llvm/test/CodeGen/X86/vector-fshl-rot-512.ll
index df579191c87fa..0e8cceb4db3fb 100644
--- a/llvm/test/CodeGen/X86/vector-fshl-rot-512.ll
+++ b/llvm/test/CodeGen/X86/vector-fshl-rot-512.ll
@@ -90,26 +90,22 @@ define <32 x i16> @var_funnnel_v32i16(<32 x i16> %x, <32 x i16> %amt) nounwind {
;
; AVX512BW-LABEL: var_funnnel_v32i16:
; AVX512BW: # %bb.0:
-; AVX512BW-NEXT: vmovdqa64 {{.*#+}} zmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
-; AVX512BW-NEXT: vpandq %zmm2, %zmm1, %zmm3
-; AVX512BW-NEXT: vpsllvw %zmm3, %zmm0, %zmm3
-; AVX512BW-NEXT: vpxor %xmm4, %xmm4, %xmm4
-; AVX512BW-NEXT: vpsubw %zmm1, %zmm4, %zmm1
-; AVX512BW-NEXT: vpandq %zmm2, %zmm1, %zmm1
+; AVX512BW-NEXT: vpandq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm1, %zmm1
+; AVX512BW-NEXT: vpsllvw %zmm1, %zmm0, %zmm2
+; AVX512BW-NEXT: vmovdqa64 {{.*#+}} zmm3 = [16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16]
+; AVX512BW-NEXT: vpsubw %zmm1, %zmm3, %zmm1
; AVX512BW-NEXT: vpsrlvw %zmm1, %zmm0, %zmm0
-; AVX512BW-NEXT: vporq %zmm0, %zmm3, %zmm0
+; AVX512BW-NEXT: vporq %zmm0, %zmm2, %zmm0
; AVX512BW-NEXT: retq
;
; AVX512VLBW-LABEL: var_funnnel_v32i16:
; AVX512VLBW: # %bb.0:
-; AVX512VLBW-NEXT: vmovdqa64 {{.*#+}} zmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
-; AVX512VLBW-NEXT: vpandq %zmm2, %zmm1, %zmm3
-; AVX512VLBW-NEXT: vpsllvw %zmm3, %zmm0, %zmm3
-; AVX512VLBW-NEXT: vpxor %xmm4, %xmm4, %xmm4
-; AVX512VLBW-NEXT: vpsubw %zmm1, %zmm4, %zmm1
-; AVX512VLBW-NEXT: vpandq %zmm2, %zmm1, %zmm1
+; AVX512VLBW-NEXT: vpandq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm1, %zmm1
+; AVX512VLBW-NEXT: vpsllvw %zmm1, %zmm0, %zmm2
+; AVX512VLBW-NEXT: vmovdqa64 {{.*#+}} zmm3 = [16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16]
+; AVX512VLBW-NEXT: vpsubw %zmm1, %zmm3, %zmm1
; AVX512VLBW-NEXT: vpsrlvw %zmm1, %zmm0, %zmm0
-; AVX512VLBW-NEXT: vporq %zmm0, %zmm3, %zmm0
+; AVX512VLBW-NEXT: vporq %zmm0, %zmm2, %zmm0
; AVX512VLBW-NEXT: retq
;
; AVX512VBMI2-LABEL: var_funnnel_v32i16:
@@ -334,30 +330,26 @@ define <32 x i16> @splatvar_funnnel_v32i16(<32 x i16> %x, <32 x i16> %amt) nounw
;
; AVX512BW-LABEL: splatvar_funnnel_v32i16:
; AVX512BW: # %bb.0:
-; AVX512BW-NEXT: vmovdqa {{.*#+}} xmm2 = [15,15,15,15,15,15,15,15]
-; AVX512BW-NEXT: vpand %xmm2, %xmm1, %xmm3
-; AVX512BW-NEXT: vpmovzxwq {{.*#+}} xmm3 = xmm3[0],zero,zero,zero,xmm3[1],zero,zero,zero
-; AVX512BW-NEXT: vpsllw %xmm3, %zmm0, %zmm3
-; AVX512BW-NEXT: vpxor %xmm4, %xmm4, %xmm4
-; AVX512BW-NEXT: vpsubw %xmm1, %xmm4, %xmm1
-; AVX512BW-NEXT: vpand %xmm2, %xmm1, %xmm1
+; AVX512BW-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
+; AVX512BW-NEXT: vpmovzxwq {{.*#+}} xmm2 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
+; AVX512BW-NEXT: vpsllw %xmm2, %zmm0, %zmm2
+; AVX512BW-NEXT: vmovdqa {{.*#+}} xmm3 = [16,16,16,16,16,16,16,16]
+; AVX512BW-NEXT: vpsubw %xmm1, %xmm3, %xmm1
; AVX512BW-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
; AVX512BW-NEXT: vpsrlw %xmm1, %zmm0, %zmm0
-; AVX512BW-NEXT: vporq %zmm0, %zmm3, %zmm0
+; AVX512BW-NEXT: vporq %zmm0, %zmm2, %zmm0
; AVX512BW-NEXT: retq
;
; AVX512VLBW-LABEL: splatvar_funnnel_v32i16:
; AVX512VLBW: # %bb.0:
-; AVX512VLBW-NEXT: vmovdqa {{.*#+}} xmm2 = [15,15,15,15,15,15,15,15]
-; AVX512VLBW-NEXT: vpand %xmm2, %xmm1, %xmm3
-; AVX512VLBW-NEXT: vpmovzxwq {{.*#+}} xmm3 = xmm3[0],zero,zero,zero,xmm3[1],zero,zero,zero
-; AVX512VLBW-NEXT: vpsllw %xmm3, %zmm0, %zmm3
-; AVX512VLBW-NEXT: vpxor %xmm4, %xmm4, %xmm4
-; AVX512VLBW-NEXT: vpsubw %xmm1, %xmm4, %xmm1
-; AVX512VLBW-NEXT: vpand %xmm2, %xmm1, %xmm1
+; AVX512VLBW-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
+; AVX512VLBW-NEXT: vpmovzxwq {{.*#+}} xmm2 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
+; AVX512VLBW-NEXT: vpsllw %xmm2, %zmm0, %zmm2
+; AVX512VLBW-NEXT: vmovdqa {{.*#+}} xmm3 = [16,16,16,16,16,16,16,16]
+; AVX512VLBW-NEXT: vpsubw %xmm1, %xmm3, %xmm1
; AVX512VLBW-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
; AVX512VLBW-NEXT: vpsrlw %xmm1, %zmm0, %zmm0
-; AVX512VLBW-NEXT: vporq %zmm0, %zmm3, %zmm0
+; AVX512VLBW-NEXT: vporq %zmm0, %zmm2, %zmm0
; AVX512VLBW-NEXT: retq
;
; AVX512VBMI2-LABEL: splatvar_funnnel_v32i16:
diff --git a/llvm/test/CodeGen/X86/vector-fshr-rot-512.ll b/llvm/test/CodeGen/X86/vector-fshr-rot-512.ll
index bd458426f1145..d7ace82e7f08f 100644
--- a/llvm/test/CodeGen/X86/vector-fshr-rot-512.ll
+++ b/llvm/test/CodeGen/X86/vector-fshr-rot-512.ll
@@ -90,26 +90,22 @@ define <32 x i16> @var_funnnel_v32i16(<32 x i16> %x, <32 x i16> %amt) nounwind {
;
; AVX512BW-LABEL: var_funnnel_v32i16:
; AVX512BW: # %bb.0:
-; AVX512BW-NEXT: vmovdqa64 {{.*#+}} zmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
-; AVX512BW-NEXT: vpandq %zmm2, %zmm1, %zmm3
-; AVX512BW-NEXT: vpsrlvw %zmm3, %zmm0, %zmm3
-; AVX512BW-NEXT: vpxor %xmm4, %xmm4, %xmm4
-; AVX512BW-NEXT: vpsubw %zmm1, %zmm4, %zmm1
-; AVX512BW-NEXT: vpandq %zmm2, %zmm1, %zmm1
+; AVX512BW-NEXT: vpandq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm1, %zmm1
+; AVX512BW-NEXT: vpsrlvw %zmm1, %zmm0, %zmm2
+; AVX512BW-NEXT: vmovdqa64 {{.*#+}} zmm3 = [16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16]
+; AVX512BW-NEXT: vpsubw %zmm1, %zmm3, %zmm1
; AVX512BW-NEXT: vpsllvw %zmm1, %zmm0, %zmm0
-; AVX512BW-NEXT: vporq %zmm0, %zmm3, %zmm0
+; AVX512BW-NEXT: vporq %zmm0, %zmm2, %zmm0
; AVX512BW-NEXT: retq
;
; AVX512VLBW-LABEL: var_funnnel_v32i16:
; AVX512VLBW: # %bb.0:
-; AVX512VLBW-NEXT: vmovdqa64 {{.*#+}} zmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
-; AVX512VLBW-NEXT: vpandq %zmm2, %zmm1, %zmm3
-; AVX512VLBW-NEXT: vpsrlvw %zmm3, %zmm0, %zmm3
-; AVX512VLBW-NEXT: vpxor %xmm4, %xmm4, %xmm4
-; AVX512VLBW-NEXT: vpsubw %zmm1, %zmm4, %zmm1
-; AVX512VLBW-NEXT: vpandq %zmm2, %zmm1, %zmm1
+; AVX512VLBW-NEXT: vpandq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm1, %zmm1
+; AVX512VLBW-NEXT: vpsrlvw %zmm1, %zmm0, %zmm2
+; AVX512VLBW-NEXT: vmovdqa64 {{.*#+}} zmm3 = [16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16]
+; AVX512VLBW-NEXT: vpsubw %zmm1, %zmm3, %zmm1
; AVX512VLBW-NEXT: vpsllvw %zmm1, %zmm0, %zmm0
-; AVX512VLBW-NEXT: vporq %zmm0, %zmm3, %zmm0
+; AVX512VLBW-NEXT: vporq %zmm0, %zmm2, %zmm0
; AVX512VLBW-NEXT: retq
;
; AVX512VBMI2-LABEL: var_funnnel_v32i16:
@@ -336,30 +332,26 @@ define <32 x i16> @splatvar_funnnel_v32i16(<32 x i16> %x, <32 x i16> %amt) nounw
;
; AVX512BW-LABEL: splatvar_funnnel_v32i16:
; AVX512BW: # %bb.0:
-; AVX512BW-NEXT: vmovdqa {{.*#+}} xmm2 = [15,15,15,15,15,15,15,15]
-; AVX512BW-NEXT: vpand %xmm2, %xmm1, %xmm3
-; AVX512BW-NEXT: vpmovzxwq {{.*#+}} xmm3 = xmm3[0],zero,zero,zero,xmm3[1],zero,zero,zero
-; AVX512BW-NEXT: vpsrlw %xmm3, %zmm0, %zmm3
-; AVX512BW-NEXT: vpxor %xmm4, %xmm4, %xmm4
-; AVX512BW-NEXT: vpsubw %xmm1, %xmm4, %xmm1
-; AVX512BW-NEXT: vpand %xmm2, %xmm1, %xmm1
+; AVX512BW-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
+; AVX512BW-NEXT: vpmovzxwq {{.*#+}} xmm2 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
+; AVX512BW-NEXT: vpsrlw %xmm2, %zmm0, %zmm2
+; AVX512BW-NEXT: vmovdqa {{.*#+}} xmm3 = [16,16,16,16,16,16,16,16]
+; AVX512BW-NEXT: vpsubw %xmm1, %xmm3, %xmm1
; AVX512BW-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
; AVX512BW-NEXT: vpsllw %xmm1, %zmm0, %zmm0
-; AVX512BW-NEXT: vporq %zmm0, %zmm3, %zmm0
+; AVX512BW-NEXT: vporq %zmm0, %zmm2, %zmm0
; AVX512BW-NEXT: retq
;
; AVX512VLBW-LABEL: splatvar_funnnel_v32i16:
; AVX512VLBW: # %bb.0:
-; AVX512VLBW-NEXT: vmovdqa {{.*#+}} xmm2 = [15,15,15,15,15,15,15,15]
-; AVX512VLBW-NEXT: vpand %xmm2, %xmm1, %xmm3
-; AVX512VLBW-NEXT: vpmovzxwq {{.*#+}} xmm3 = xmm3[0],zero,zero,zero,xmm3[1],zero,zero,zero
-; AVX512VLBW-NEXT: vpsrlw %xmm3, %zmm0, %zmm3
-; AVX512VLBW-NEXT: vpxor %xmm4, %xmm4, %xmm4
-; AVX512VLBW-NEXT: vpsubw %xmm1, %xmm4, %xmm1
-; AVX512VLBW-NEXT: vpand %xmm2, %xmm1, %xmm1
+; AVX512VLBW-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
+; AVX512VLBW-NEXT: vpmovzxwq {{.*#+}} xmm2 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
+; AVX512VLBW-NEXT: vpsrlw %xmm2, %zmm0, %zmm2
+; AVX512VLBW-NEXT: vmovdqa {{.*#+}} xmm3 = [16,16,16,16,16,16,16,16]
+; AVX512VLBW-NEXT: vpsubw %xmm1, %xmm3, %xmm1
; AVX512VLBW-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
; AVX512VLBW-NEXT: vpsllw %xmm1, %zmm0, %zmm0
-; AVX512VLBW-NEXT: vporq %zmm0, %zmm3, %zmm0
+; AVX512VLBW-NEXT: vporq %zmm0, %zmm2, %zmm0
; AVX512VLBW-NEXT: retq
;
; AVX512VBMI2-LABEL: splatvar_funnnel_v32i16:
@@ -536,15 +528,15 @@ define <32 x i16> @constant_funnnel_v32i16(<32 x i16> %x) nounwind {
;
; AVX512BW-LABEL: constant_funnnel_v32i16:
; AVX512BW: # %bb.0:
-; AVX512BW-NEXT: vpsllvw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %zmm1
-; AVX512BW-NEXT: vpsrlvw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %zmm0
+; AVX512BW-NEXT: vpsrlvw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %zmm1
+; AVX512BW-NEXT: vpsllvw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %zmm0
; AVX512BW-NEXT: vporq %zmm1, %zmm0, %zmm0
; AVX512BW-NEXT: retq
;
; AVX512VLBW-LABEL: constant_funnnel_v32i16:
; AVX512VLBW: # %bb.0:
-; AVX512VLBW-NEXT: vpsllvw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %zmm1
-; AVX512VLBW-NEXT: vpsrlvw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %zmm0
+; AVX512VLBW-NEXT: vpsrlvw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %zmm1
+; AVX512VLBW-NEXT: vpsllvw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %zmm0
; AVX512VLBW-NEXT: vporq %zmm1, %zmm0, %zmm0
; AVX512VLBW-NEXT: retq
;
@@ -706,15 +698,15 @@ define <32 x i16> @splatconstant_funnnel_v32i16(<32 x i16> %x) nounwind {
;
; AVX512BW-LABEL: splatconstant_funnnel_v32i16:
; AVX512BW: # %bb.0:
-; AVX512BW-NEXT: vpsllw $9, %zmm0, %zmm1
-; AVX512BW-NEXT: vpsrlw $7, %zmm0, %zmm0
+; AVX512BW-NEXT: vpsrlw $7, %zmm0, %zmm1
+; AVX512BW-NEXT: vpsllw $9, %zmm0, %zmm0
; AVX512BW-NEXT: vporq %zmm1, %zmm0, %zmm0
; AVX512BW-NEXT: retq
;
; AVX512VLBW-LABEL: splatconstant_funnnel_v32i16:
; AVX512VLBW: # %bb.0:
-; AVX512VLBW-NEXT: vpsllw $9, %zmm0, %zmm1
-; AVX512VLBW-NEXT: vpsrlw $7, %zmm0, %zmm0
+; AVX512VLBW-NEXT: vpsrlw $7, %zmm0, %zmm1
+; AVX512VLBW-NEXT: vpsllw $9, %zmm0, %zmm0
; AVX512VLBW-NEXT: vporq %zmm1, %zmm0, %zmm0
; AVX512VLBW-NEXT: retq
;
diff --git a/llvm/test/CodeGen/X86/vector-rotate-512.ll b/llvm/test/CodeGen/X86/vector-rotate-512.ll
index a54988c0870da..4427d3b2c79fe 100644
--- a/llvm/test/CodeGen/X86/vector-rotate-512.ll
+++ b/llvm/test/CodeGen/X86/vector-rotate-512.ll
@@ -91,20 +91,22 @@ define <32 x i16> @var_rotate_v32i16(<32 x i16> %a, <32 x i16> %b) nounwind {
;
; AVX512BW-LABEL: var_rotate_v32i16:
; AVX512BW: # %bb.0:
-; AVX512BW-NEXT: vmovdqa64 {{.*#+}} zmm2 = [16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16]
-; AVX512BW-NEXT: vpsubw %zmm1, %zmm2, %zmm2
-; AVX512BW-NEXT: vpsllvw %zmm1, %zmm0, %zmm1
-; AVX512BW-NEXT: vpsrlvw %zmm2, %zmm0, %zmm0
-; AVX512BW-NEXT: vporq %zmm0, %zmm1, %zmm0
+; AVX512BW-NEXT: vpandq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm1, %zmm1
+; AVX512BW-NEXT: vpsllvw %zmm1, %zmm0, %zmm2
+; AVX512BW-NEXT: vmovdqa64 {{.*#+}} zmm3 = [16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16]
+; AVX512BW-NEXT: vpsubw %zmm1, %zmm3, %zmm1
+; AVX512BW-NEXT: vpsrlvw %zmm1, %zmm0, %zmm0
+; AVX512BW-NEXT: vporq %zmm0, %zmm2, %zmm0
; AVX512BW-NEXT: retq
;
; AVX512VLBW-LABEL: var_rotate_v32i16:
; AVX512VLBW: # %bb.0:
-; AVX512VLBW-NEXT: vmovdqa64 {{.*#+}} zmm2 = [16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16]
-; AVX512VLBW-NEXT: vpsubw %zmm1, %zmm2, %zmm2
-; AVX512VLBW-NEXT: vpsllvw %zmm1, %zmm0, %zmm1
-; AVX512VLBW-NEXT: vpsrlvw %zmm2, %zmm0, %zmm0
-; AVX512VLBW-NEXT: vporq %zmm0, %zmm1, %zmm0
+; AVX512VLBW-NEXT: vpandq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm1, %zmm1
+; AVX512VLBW-NEXT: vpsllvw %zmm1, %zmm0, %zmm2
+; AVX512VLBW-NEXT: vmovdqa64 {{.*#+}} zmm3 = [16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16]
+; AVX512VLBW-NEXT: vpsubw %zmm1, %zmm3, %zmm1
+; AVX512VLBW-NEXT: vpsrlvw %zmm1, %zmm0, %zmm0
+; AVX512VLBW-NEXT: vporq %zmm0, %zmm2, %zmm0
; AVX512VLBW-NEXT: retq
;
; AVX512VBMI2-LABEL: var_rotate_v32i16:
@@ -341,22 +343,24 @@ define <32 x i16> @splatvar_rotate_v32i16(<32 x i16> %a, <32 x i16> %b) nounwind
;
; AVX512BW-LABEL: splatvar_rotate_v32i16:
; AVX512BW: # %bb.0:
+; AVX512BW-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
; AVX512BW-NEXT: vpmovzxwq {{.*#+}} xmm2 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
+; AVX512BW-NEXT: vpsllw %xmm2, %zmm0, %zmm2
; AVX512BW-NEXT: vmovdqa {{.*#+}} xmm3 = [16,16,16,16,16,16,16,16]
; AVX512BW-NEXT: vpsubw %xmm1, %xmm3, %xmm1
; AVX512BW-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
-; AVX512BW-NEXT: vpsllw %xmm2, %zmm0, %zmm2
; AVX512BW-NEXT: vpsrlw %xmm1, %zmm0, %zmm0
; AVX512BW-NEXT: vporq %zmm0, %zmm2, %zmm0
; AVX512BW-NEXT: retq
;
; AVX512VLBW-LABEL: splatvar_rotate_v32i16:
; AVX512VLBW: # %bb.0:
+; AVX512VLBW-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
; AVX512VLBW-NEXT: vpmovzxwq {{.*#+}} xmm2 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
+; AVX512VLBW-NEXT: vpsllw %xmm2, %zmm0, %zmm2
; AVX512VLBW-NEXT: vmovdqa {{.*#+}} xmm3 = [16,16,16,16,16,16,16,16]
; AVX512VLBW-NEXT: vpsubw %xmm1, %xmm3, %xmm1
; AVX512VLBW-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
-; AVX512VLBW-NEXT: vpsllw %xmm2, %zmm0, %zmm2
; AVX512VLBW-NEXT: vpsrlw %xmm1, %zmm0, %zmm0
; AVX512VLBW-NEXT: vporq %zmm0, %zmm2, %zmm0
; AVX512VLBW-NEXT: retq
More information about the llvm-commits
mailing list