[llvm-branch-commits] [llvm] b96a521 - [X86] LowerRotate - enable custom lowering of ROTL/ROTR vXi16 on VBMI2 targets.
Simon Pilgrim via llvm-branch-commits
llvm-branch-commits at lists.llvm.org
Fri Dec 4 04:51:22 PST 2020
Author: Simon Pilgrim
Date: 2020-12-04T12:16:59Z
New Revision: b96a5210774e653c65b04ace275e08e0af6ae31b
URL: https://github.com/llvm/llvm-project/commit/b96a5210774e653c65b04ace275e08e0af6ae31b
DIFF: https://github.com/llvm/llvm-project/commit/b96a5210774e653c65b04ace275e08e0af6ae31b.diff
LOG: [X86] LowerRotate - enable custom lowering of ROTL/ROTR vXi16 on VBMI2 targets.
Added:
Modified:
llvm/lib/Target/X86/X86ISelLowering.cpp
llvm/test/CodeGen/X86/vector-fshl-rot-512.ll
llvm/test/CodeGen/X86/vector-fshr-rot-128.ll
llvm/test/CodeGen/X86/vector-fshr-rot-256.ll
llvm/test/CodeGen/X86/vector-fshr-rot-512.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 04075e924eab..bfd80690347d 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -1715,6 +1715,11 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::FSHL, VT, Custom);
setOperationAction(ISD::FSHR, VT, Custom);
}
+
+ setOperationAction(ISD::ROTL, MVT::v32i16, Custom);
+ setOperationAction(ISD::ROTR, MVT::v8i16, Custom);
+ setOperationAction(ISD::ROTR, MVT::v16i16, Custom);
+ setOperationAction(ISD::ROTR, MVT::v32i16, Custom);
}
}// useAVX512Regs
@@ -27640,6 +27645,7 @@ static SDValue convertShiftLeftToScale(SDValue Amt, const SDLoc &dl,
MVT VT = Amt.getSimpleValueType();
if (!(VT == MVT::v8i16 || VT == MVT::v4i32 ||
(Subtarget.hasInt256() && VT == MVT::v16i16) ||
+ (Subtarget.hasVBMI2() && VT == MVT::v32i16) ||
(!Subtarget.hasAVX512() && VT == MVT::v16i8)))
return SDValue();
@@ -28249,7 +28255,8 @@ static SDValue LowerRotate(SDValue Op, const X86Subtarget &Subtarget,
return splitVectorIntBinary(Op, DAG);
assert((VT == MVT::v4i32 || VT == MVT::v8i16 || VT == MVT::v16i8 ||
- ((VT == MVT::v8i32 || VT == MVT::v16i16 || VT == MVT::v32i8) &&
+ ((VT == MVT::v8i32 || VT == MVT::v16i16 || VT == MVT::v32i8 ||
+ VT == MVT::v32i16) &&
Subtarget.hasAVX2())) &&
"Only vXi32/vXi16/vXi8 vector rotates supported");
diff --git a/llvm/test/CodeGen/X86/vector-fshl-rot-512.ll b/llvm/test/CodeGen/X86/vector-fshl-rot-512.ll
index 069bf361bdd6..f1e2f0d7f9ca 100644
--- a/llvm/test/CodeGen/X86/vector-fshl-rot-512.ll
+++ b/llvm/test/CodeGen/X86/vector-fshl-rot-512.ll
@@ -122,26 +122,12 @@ define <32 x i16> @var_funnnel_v32i16(<32 x i16> %x, <32 x i16> %amt) nounwind {
;
; AVX512VBMI2-LABEL: var_funnnel_v32i16:
; AVX512VBMI2: # %bb.0:
-; AVX512VBMI2-NEXT: vmovdqa64 {{.*#+}} zmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
-; AVX512VBMI2-NEXT: vpandq %zmm2, %zmm1, %zmm3
-; AVX512VBMI2-NEXT: vpsllvw %zmm3, %zmm0, %zmm3
-; AVX512VBMI2-NEXT: vpxor %xmm4, %xmm4, %xmm4
-; AVX512VBMI2-NEXT: vpsubw %zmm1, %zmm4, %zmm1
-; AVX512VBMI2-NEXT: vpandq %zmm2, %zmm1, %zmm1
-; AVX512VBMI2-NEXT: vpsrlvw %zmm1, %zmm0, %zmm0
-; AVX512VBMI2-NEXT: vporq %zmm0, %zmm3, %zmm0
+; AVX512VBMI2-NEXT: vpshldvw %zmm1, %zmm0, %zmm0
; AVX512VBMI2-NEXT: retq
;
; AVX512VLVBMI2-LABEL: var_funnnel_v32i16:
; AVX512VLVBMI2: # %bb.0:
-; AVX512VLVBMI2-NEXT: vmovdqa64 {{.*#+}} zmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
-; AVX512VLVBMI2-NEXT: vpandq %zmm2, %zmm1, %zmm3
-; AVX512VLVBMI2-NEXT: vpsllvw %zmm3, %zmm0, %zmm3
-; AVX512VLVBMI2-NEXT: vpxor %xmm4, %xmm4, %xmm4
-; AVX512VLVBMI2-NEXT: vpsubw %zmm1, %zmm4, %zmm1
-; AVX512VLVBMI2-NEXT: vpandq %zmm2, %zmm1, %zmm1
-; AVX512VLVBMI2-NEXT: vpsrlvw %zmm1, %zmm0, %zmm0
-; AVX512VLVBMI2-NEXT: vporq %zmm0, %zmm3, %zmm0
+; AVX512VLVBMI2-NEXT: vpshldvw %zmm1, %zmm0, %zmm0
; AVX512VLVBMI2-NEXT: retq
%res = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %x, <32 x i16> %x, <32 x i16> %amt)
ret <32 x i16> %res
@@ -546,32 +532,14 @@ define <32 x i16> @splatvar_funnnel_v32i16(<32 x i16> %x, <32 x i16> %amt) nounw
;
; AVX512VBMI2-LABEL: splatvar_funnnel_v32i16:
; AVX512VBMI2: # %bb.0:
-; AVX512VBMI2-NEXT: vpbroadcastw %xmm1, %xmm1
-; AVX512VBMI2-NEXT: vmovdqa {{.*#+}} xmm2 = [15,15,15,15,15,15,15,15]
-; AVX512VBMI2-NEXT: vpand %xmm2, %xmm1, %xmm3
-; AVX512VBMI2-NEXT: vpmovzxwq {{.*#+}} xmm3 = xmm3[0],zero,zero,zero,xmm3[1],zero,zero,zero
-; AVX512VBMI2-NEXT: vpsllw %xmm3, %zmm0, %zmm3
-; AVX512VBMI2-NEXT: vpxor %xmm4, %xmm4, %xmm4
-; AVX512VBMI2-NEXT: vpsubw %xmm1, %xmm4, %xmm1
-; AVX512VBMI2-NEXT: vpand %xmm2, %xmm1, %xmm1
-; AVX512VBMI2-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
-; AVX512VBMI2-NEXT: vpsrlw %xmm1, %zmm0, %zmm0
-; AVX512VBMI2-NEXT: vporq %zmm0, %zmm3, %zmm0
+; AVX512VBMI2-NEXT: vpbroadcastw %xmm1, %zmm1
+; AVX512VBMI2-NEXT: vpshldvw %zmm1, %zmm0, %zmm0
; AVX512VBMI2-NEXT: retq
;
; AVX512VLVBMI2-LABEL: splatvar_funnnel_v32i16:
; AVX512VLVBMI2: # %bb.0:
-; AVX512VLVBMI2-NEXT: vpbroadcastw %xmm1, %xmm1
-; AVX512VLVBMI2-NEXT: vmovdqa {{.*#+}} xmm2 = [15,15,15,15,15,15,15,15]
-; AVX512VLVBMI2-NEXT: vpand %xmm2, %xmm1, %xmm3
-; AVX512VLVBMI2-NEXT: vpmovzxwq {{.*#+}} xmm3 = xmm3[0],zero,zero,zero,xmm3[1],zero,zero,zero
-; AVX512VLVBMI2-NEXT: vpsllw %xmm3, %zmm0, %zmm3
-; AVX512VLVBMI2-NEXT: vpxor %xmm4, %xmm4, %xmm4
-; AVX512VLVBMI2-NEXT: vpsubw %xmm1, %xmm4, %xmm1
-; AVX512VLVBMI2-NEXT: vpand %xmm2, %xmm1, %xmm1
-; AVX512VLVBMI2-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
-; AVX512VLVBMI2-NEXT: vpsrlw %xmm1, %zmm0, %zmm0
-; AVX512VLVBMI2-NEXT: vporq %zmm0, %zmm3, %zmm0
+; AVX512VLVBMI2-NEXT: vpbroadcastw %xmm1, %zmm1
+; AVX512VLVBMI2-NEXT: vpshldvw %zmm1, %zmm0, %zmm0
; AVX512VLVBMI2-NEXT: retq
%splat = shufflevector <32 x i16> %amt, <32 x i16> undef, <32 x i32> zeroinitializer
%res = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %x, <32 x i16> %x, <32 x i16> %splat)
@@ -836,16 +804,12 @@ define <32 x i16> @constant_funnnel_v32i16(<32 x i16> %x) nounwind {
;
; AVX512VBMI2-LABEL: constant_funnnel_v32i16:
; AVX512VBMI2: # %bb.0:
-; AVX512VBMI2-NEXT: vpsrlvw {{.*}}(%rip), %zmm0, %zmm1
-; AVX512VBMI2-NEXT: vpsllvw {{.*}}(%rip), %zmm0, %zmm0
-; AVX512VBMI2-NEXT: vporq %zmm1, %zmm0, %zmm0
+; AVX512VBMI2-NEXT: vpshldvw {{.*}}(%rip), %zmm0, %zmm0
; AVX512VBMI2-NEXT: retq
;
; AVX512VLVBMI2-LABEL: constant_funnnel_v32i16:
; AVX512VLVBMI2: # %bb.0:
-; AVX512VLVBMI2-NEXT: vpsrlvw {{.*}}(%rip), %zmm0, %zmm1
-; AVX512VLVBMI2-NEXT: vpsllvw {{.*}}(%rip), %zmm0, %zmm0
-; AVX512VLVBMI2-NEXT: vporq %zmm1, %zmm0, %zmm0
+; AVX512VLVBMI2-NEXT: vpshldvw {{.*}}(%rip), %zmm0, %zmm0
; AVX512VLVBMI2-NEXT: retq
%res = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %x, <32 x i16> %x, <32 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15, i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15>)
ret <32 x i16> %res
@@ -1124,16 +1088,12 @@ define <32 x i16> @splatconstant_funnnel_v32i16(<32 x i16> %x) nounwind {
;
; AVX512VBMI2-LABEL: splatconstant_funnnel_v32i16:
; AVX512VBMI2: # %bb.0:
-; AVX512VBMI2-NEXT: vpsrlw $9, %zmm0, %zmm1
-; AVX512VBMI2-NEXT: vpsllw $7, %zmm0, %zmm0
-; AVX512VBMI2-NEXT: vporq %zmm1, %zmm0, %zmm0
+; AVX512VBMI2-NEXT: vpshldw $7, %zmm0, %zmm0, %zmm0
; AVX512VBMI2-NEXT: retq
;
; AVX512VLVBMI2-LABEL: splatconstant_funnnel_v32i16:
; AVX512VLVBMI2: # %bb.0:
-; AVX512VLVBMI2-NEXT: vpsrlw $9, %zmm0, %zmm1
-; AVX512VLVBMI2-NEXT: vpsllw $7, %zmm0, %zmm0
-; AVX512VLVBMI2-NEXT: vporq %zmm1, %zmm0, %zmm0
+; AVX512VLVBMI2-NEXT: vpshldw $7, %zmm0, %zmm0, %zmm0
; AVX512VLVBMI2-NEXT: retq
%res = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %x, <32 x i16> %x, <32 x i16> <i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7>)
ret <32 x i16> %res
diff --git a/llvm/test/CodeGen/X86/vector-fshr-rot-128.ll b/llvm/test/CodeGen/X86/vector-fshr-rot-128.ll
index c2ea45d6013c..69197109edcf 100644
--- a/llvm/test/CodeGen/X86/vector-fshr-rot-128.ll
+++ b/llvm/test/CodeGen/X86/vector-fshr-rot-128.ll
@@ -469,19 +469,16 @@ define <8 x i16> @var_funnnel_v8i16(<8 x i16> %x, <8 x i16> %amt) nounwind {
;
; AVX512VBMI2-LABEL: var_funnnel_v8i16:
; AVX512VBMI2: # %bb.0:
+; AVX512VBMI2-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
; AVX512VBMI2-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
-; AVX512VBMI2-NEXT: vpxor %xmm2, %xmm2, %xmm2
-; AVX512VBMI2-NEXT: vpsubw %xmm1, %xmm2, %xmm1
-; AVX512VBMI2-NEXT: vpshldvw %zmm1, %zmm0, %zmm0
+; AVX512VBMI2-NEXT: vpshrdvw %zmm1, %zmm0, %zmm0
; AVX512VBMI2-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
; AVX512VBMI2-NEXT: vzeroupper
; AVX512VBMI2-NEXT: retq
;
; AVX512VLVBMI2-LABEL: var_funnnel_v8i16:
; AVX512VLVBMI2: # %bb.0:
-; AVX512VLVBMI2-NEXT: vpxor %xmm2, %xmm2, %xmm2
-; AVX512VLVBMI2-NEXT: vpsubw %xmm1, %xmm2, %xmm1
-; AVX512VLVBMI2-NEXT: vpshldvw %xmm1, %xmm0, %xmm0
+; AVX512VLVBMI2-NEXT: vpshrdvw %xmm1, %xmm0, %xmm0
; AVX512VLVBMI2-NEXT: retq
;
; XOP-LABEL: var_funnnel_v8i16:
@@ -1149,20 +1146,16 @@ define <8 x i16> @splatvar_funnnel_v8i16(<8 x i16> %x, <8 x i16> %amt) nounwind
; AVX512VBMI2-LABEL: splatvar_funnnel_v8i16:
; AVX512VBMI2: # %bb.0:
; AVX512VBMI2-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
-; AVX512VBMI2-NEXT: vpxor %xmm2, %xmm2, %xmm2
-; AVX512VBMI2-NEXT: vpsubw %xmm1, %xmm2, %xmm1
; AVX512VBMI2-NEXT: vpbroadcastw %xmm1, %xmm1
-; AVX512VBMI2-NEXT: vpshldvw %zmm1, %zmm0, %zmm0
+; AVX512VBMI2-NEXT: vpshrdvw %zmm1, %zmm0, %zmm0
; AVX512VBMI2-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
; AVX512VBMI2-NEXT: vzeroupper
; AVX512VBMI2-NEXT: retq
;
; AVX512VLVBMI2-LABEL: splatvar_funnnel_v8i16:
; AVX512VLVBMI2: # %bb.0:
-; AVX512VLVBMI2-NEXT: vpxor %xmm2, %xmm2, %xmm2
-; AVX512VLVBMI2-NEXT: vpsubw %xmm1, %xmm2, %xmm1
; AVX512VLVBMI2-NEXT: vpbroadcastw %xmm1, %xmm1
-; AVX512VLVBMI2-NEXT: vpshldvw %xmm1, %xmm0, %xmm0
+; AVX512VLVBMI2-NEXT: vpshrdvw %xmm1, %xmm0, %xmm0
; AVX512VLVBMI2-NEXT: retq
;
; XOPAVX1-LABEL: splatvar_funnnel_v8i16:
@@ -1754,15 +1747,15 @@ define <8 x i16> @constant_funnnel_v8i16(<8 x i16> %x) nounwind {
; AVX512VBMI2-LABEL: constant_funnnel_v8i16:
; AVX512VBMI2: # %bb.0:
; AVX512VBMI2-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
-; AVX512VBMI2-NEXT: vmovdqa {{.*#+}} xmm1 = [0,65535,65534,65533,65532,65531,65530,65529]
-; AVX512VBMI2-NEXT: vpshldvw %zmm1, %zmm0, %zmm0
+; AVX512VBMI2-NEXT: vmovdqa {{.*#+}} xmm1 = [0,1,2,3,4,5,6,7]
+; AVX512VBMI2-NEXT: vpshrdvw %zmm1, %zmm0, %zmm0
; AVX512VBMI2-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
; AVX512VBMI2-NEXT: vzeroupper
; AVX512VBMI2-NEXT: retq
;
; AVX512VLVBMI2-LABEL: constant_funnnel_v8i16:
; AVX512VLVBMI2: # %bb.0:
-; AVX512VLVBMI2-NEXT: vpshldvw {{.*}}(%rip), %xmm0, %xmm0
+; AVX512VLVBMI2-NEXT: vpshrdvw {{.*}}(%rip), %xmm0, %xmm0
; AVX512VLVBMI2-NEXT: retq
;
; XOP-LABEL: constant_funnnel_v8i16:
@@ -2158,14 +2151,14 @@ define <8 x i16> @splatconstant_funnnel_v8i16(<8 x i16> %x) nounwind {
; AVX512VBMI2-LABEL: splatconstant_funnnel_v8i16:
; AVX512VBMI2: # %bb.0:
; AVX512VBMI2-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
-; AVX512VBMI2-NEXT: vpshldw $9, %zmm0, %zmm0, %zmm0
+; AVX512VBMI2-NEXT: vpshrdw $7, %zmm0, %zmm0, %zmm0
; AVX512VBMI2-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
; AVX512VBMI2-NEXT: vzeroupper
; AVX512VBMI2-NEXT: retq
;
; AVX512VLVBMI2-LABEL: splatconstant_funnnel_v8i16:
; AVX512VLVBMI2: # %bb.0:
-; AVX512VLVBMI2-NEXT: vpshldw $9, %xmm0, %xmm0, %xmm0
+; AVX512VLVBMI2-NEXT: vpshrdw $7, %xmm0, %xmm0, %xmm0
; AVX512VLVBMI2-NEXT: retq
;
; XOP-LABEL: splatconstant_funnnel_v8i16:
diff --git a/llvm/test/CodeGen/X86/vector-fshr-rot-256.ll b/llvm/test/CodeGen/X86/vector-fshr-rot-256.ll
index ad6214413f66..8ba22ffd37fe 100644
--- a/llvm/test/CodeGen/X86/vector-fshr-rot-256.ll
+++ b/llvm/test/CodeGen/X86/vector-fshr-rot-256.ll
@@ -371,18 +371,15 @@ define <16 x i16> @var_funnnel_v16i16(<16 x i16> %x, <16 x i16> %amt) nounwind {
;
; AVX512VBMI2-LABEL: var_funnnel_v16i16:
; AVX512VBMI2: # %bb.0:
+; AVX512VBMI2-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1
; AVX512VBMI2-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
-; AVX512VBMI2-NEXT: vpxor %xmm2, %xmm2, %xmm2
-; AVX512VBMI2-NEXT: vpsubw %ymm1, %ymm2, %ymm1
-; AVX512VBMI2-NEXT: vpshldvw %zmm1, %zmm0, %zmm0
+; AVX512VBMI2-NEXT: vpshrdvw %zmm1, %zmm0, %zmm0
; AVX512VBMI2-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
; AVX512VBMI2-NEXT: retq
;
; AVX512VLVBMI2-LABEL: var_funnnel_v16i16:
; AVX512VLVBMI2: # %bb.0:
-; AVX512VLVBMI2-NEXT: vpxor %xmm2, %xmm2, %xmm2
-; AVX512VLVBMI2-NEXT: vpsubw %ymm1, %ymm2, %ymm1
-; AVX512VLVBMI2-NEXT: vpshldvw %ymm1, %ymm0, %ymm0
+; AVX512VLVBMI2-NEXT: vpshrdvw %ymm1, %ymm0, %ymm0
; AVX512VLVBMI2-NEXT: retq
;
; XOPAVX1-LABEL: var_funnnel_v16i16:
@@ -941,19 +938,15 @@ define <16 x i16> @splatvar_funnnel_v16i16(<16 x i16> %x, <16 x i16> %amt) nounw
; AVX512VBMI2-LABEL: splatvar_funnnel_v16i16:
; AVX512VBMI2: # %bb.0:
; AVX512VBMI2-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
-; AVX512VBMI2-NEXT: vpxor %xmm2, %xmm2, %xmm2
-; AVX512VBMI2-NEXT: vpsubw %xmm1, %xmm2, %xmm1
; AVX512VBMI2-NEXT: vpbroadcastw %xmm1, %ymm1
-; AVX512VBMI2-NEXT: vpshldvw %zmm1, %zmm0, %zmm0
+; AVX512VBMI2-NEXT: vpshrdvw %zmm1, %zmm0, %zmm0
; AVX512VBMI2-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
; AVX512VBMI2-NEXT: retq
;
; AVX512VLVBMI2-LABEL: splatvar_funnnel_v16i16:
; AVX512VLVBMI2: # %bb.0:
-; AVX512VLVBMI2-NEXT: vpxor %xmm2, %xmm2, %xmm2
-; AVX512VLVBMI2-NEXT: vpsubw %xmm1, %xmm2, %xmm1
; AVX512VLVBMI2-NEXT: vpbroadcastw %xmm1, %ymm1
-; AVX512VLVBMI2-NEXT: vpshldvw %ymm1, %ymm0, %ymm0
+; AVX512VLVBMI2-NEXT: vpshrdvw %ymm1, %ymm0, %ymm0
; AVX512VLVBMI2-NEXT: retq
;
; XOPAVX1-LABEL: splatvar_funnnel_v16i16:
@@ -1416,14 +1409,14 @@ define <16 x i16> @constant_funnnel_v16i16(<16 x i16> %x) nounwind {
; AVX512VBMI2-LABEL: constant_funnnel_v16i16:
; AVX512VBMI2: # %bb.0:
; AVX512VBMI2-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
-; AVX512VBMI2-NEXT: vmovdqa {{.*#+}} ymm1 = [0,65535,65534,65533,65532,65531,65530,65529,65528,65527,65526,65525,65524,65523,65522,65521]
-; AVX512VBMI2-NEXT: vpshldvw %zmm1, %zmm0, %zmm0
+; AVX512VBMI2-NEXT: vmovdqa {{.*#+}} ymm1 = [0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
+; AVX512VBMI2-NEXT: vpshrdvw %zmm1, %zmm0, %zmm0
; AVX512VBMI2-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
; AVX512VBMI2-NEXT: retq
;
; AVX512VLVBMI2-LABEL: constant_funnnel_v16i16:
; AVX512VLVBMI2: # %bb.0:
-; AVX512VLVBMI2-NEXT: vpshldvw {{.*}}(%rip), %ymm0, %ymm0
+; AVX512VLVBMI2-NEXT: vpshrdvw {{.*}}(%rip), %ymm0, %ymm0
; AVX512VLVBMI2-NEXT: retq
;
; XOPAVX1-LABEL: constant_funnnel_v16i16:
@@ -1823,13 +1816,13 @@ define <16 x i16> @splatconstant_funnnel_v16i16(<16 x i16> %x) nounwind {
; AVX512VBMI2-LABEL: splatconstant_funnnel_v16i16:
; AVX512VBMI2: # %bb.0:
; AVX512VBMI2-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
-; AVX512VBMI2-NEXT: vpshldw $9, %zmm0, %zmm0, %zmm0
+; AVX512VBMI2-NEXT: vpshrdw $7, %zmm0, %zmm0, %zmm0
; AVX512VBMI2-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
; AVX512VBMI2-NEXT: retq
;
; AVX512VLVBMI2-LABEL: splatconstant_funnnel_v16i16:
; AVX512VLVBMI2: # %bb.0:
-; AVX512VLVBMI2-NEXT: vpshldw $9, %ymm0, %ymm0, %ymm0
+; AVX512VLVBMI2-NEXT: vpshrdw $7, %ymm0, %ymm0, %ymm0
; AVX512VLVBMI2-NEXT: retq
;
; XOPAVX1-LABEL: splatconstant_funnnel_v16i16:
diff --git a/llvm/test/CodeGen/X86/vector-fshr-rot-512.ll b/llvm/test/CodeGen/X86/vector-fshr-rot-512.ll
index 9e4b46001049..574221637686 100644
--- a/llvm/test/CodeGen/X86/vector-fshr-rot-512.ll
+++ b/llvm/test/CodeGen/X86/vector-fshr-rot-512.ll
@@ -122,26 +122,12 @@ define <32 x i16> @var_funnnel_v32i16(<32 x i16> %x, <32 x i16> %amt) nounwind {
;
; AVX512VBMI2-LABEL: var_funnnel_v32i16:
; AVX512VBMI2: # %bb.0:
-; AVX512VBMI2-NEXT: vmovdqa64 {{.*#+}} zmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
-; AVX512VBMI2-NEXT: vpandq %zmm2, %zmm1, %zmm3
-; AVX512VBMI2-NEXT: vpsrlvw %zmm3, %zmm0, %zmm3
-; AVX512VBMI2-NEXT: vpxor %xmm4, %xmm4, %xmm4
-; AVX512VBMI2-NEXT: vpsubw %zmm1, %zmm4, %zmm1
-; AVX512VBMI2-NEXT: vpandq %zmm2, %zmm1, %zmm1
-; AVX512VBMI2-NEXT: vpsllvw %zmm1, %zmm0, %zmm0
-; AVX512VBMI2-NEXT: vporq %zmm0, %zmm3, %zmm0
+; AVX512VBMI2-NEXT: vpshrdvw %zmm1, %zmm0, %zmm0
; AVX512VBMI2-NEXT: retq
;
; AVX512VLVBMI2-LABEL: var_funnnel_v32i16:
; AVX512VLVBMI2: # %bb.0:
-; AVX512VLVBMI2-NEXT: vmovdqa64 {{.*#+}} zmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
-; AVX512VLVBMI2-NEXT: vpandq %zmm2, %zmm1, %zmm3
-; AVX512VLVBMI2-NEXT: vpsrlvw %zmm3, %zmm0, %zmm3
-; AVX512VLVBMI2-NEXT: vpxor %xmm4, %xmm4, %xmm4
-; AVX512VLVBMI2-NEXT: vpsubw %zmm1, %zmm4, %zmm1
-; AVX512VLVBMI2-NEXT: vpandq %zmm2, %zmm1, %zmm1
-; AVX512VLVBMI2-NEXT: vpsllvw %zmm1, %zmm0, %zmm0
-; AVX512VLVBMI2-NEXT: vporq %zmm0, %zmm3, %zmm0
+; AVX512VLVBMI2-NEXT: vpshrdvw %zmm1, %zmm0, %zmm0
; AVX512VLVBMI2-NEXT: retq
%res = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %x, <32 x i16> %x, <32 x i16> %amt)
ret <32 x i16> %res
@@ -546,32 +532,14 @@ define <32 x i16> @splatvar_funnnel_v32i16(<32 x i16> %x, <32 x i16> %amt) nounw
;
; AVX512VBMI2-LABEL: splatvar_funnnel_v32i16:
; AVX512VBMI2: # %bb.0:
-; AVX512VBMI2-NEXT: vpbroadcastw %xmm1, %xmm1
-; AVX512VBMI2-NEXT: vmovdqa {{.*#+}} xmm2 = [15,15,15,15,15,15,15,15]
-; AVX512VBMI2-NEXT: vpand %xmm2, %xmm1, %xmm3
-; AVX512VBMI2-NEXT: vpmovzxwq {{.*#+}} xmm3 = xmm3[0],zero,zero,zero,xmm3[1],zero,zero,zero
-; AVX512VBMI2-NEXT: vpsrlw %xmm3, %zmm0, %zmm3
-; AVX512VBMI2-NEXT: vpxor %xmm4, %xmm4, %xmm4
-; AVX512VBMI2-NEXT: vpsubw %xmm1, %xmm4, %xmm1
-; AVX512VBMI2-NEXT: vpand %xmm2, %xmm1, %xmm1
-; AVX512VBMI2-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
-; AVX512VBMI2-NEXT: vpsllw %xmm1, %zmm0, %zmm0
-; AVX512VBMI2-NEXT: vporq %zmm0, %zmm3, %zmm0
+; AVX512VBMI2-NEXT: vpbroadcastw %xmm1, %zmm1
+; AVX512VBMI2-NEXT: vpshrdvw %zmm1, %zmm0, %zmm0
; AVX512VBMI2-NEXT: retq
;
; AVX512VLVBMI2-LABEL: splatvar_funnnel_v32i16:
; AVX512VLVBMI2: # %bb.0:
-; AVX512VLVBMI2-NEXT: vpbroadcastw %xmm1, %xmm1
-; AVX512VLVBMI2-NEXT: vmovdqa {{.*#+}} xmm2 = [15,15,15,15,15,15,15,15]
-; AVX512VLVBMI2-NEXT: vpand %xmm2, %xmm1, %xmm3
-; AVX512VLVBMI2-NEXT: vpmovzxwq {{.*#+}} xmm3 = xmm3[0],zero,zero,zero,xmm3[1],zero,zero,zero
-; AVX512VLVBMI2-NEXT: vpsrlw %xmm3, %zmm0, %zmm3
-; AVX512VLVBMI2-NEXT: vpxor %xmm4, %xmm4, %xmm4
-; AVX512VLVBMI2-NEXT: vpsubw %xmm1, %xmm4, %xmm1
-; AVX512VLVBMI2-NEXT: vpand %xmm2, %xmm1, %xmm1
-; AVX512VLVBMI2-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
-; AVX512VLVBMI2-NEXT: vpsllw %xmm1, %zmm0, %zmm0
-; AVX512VLVBMI2-NEXT: vporq %zmm0, %zmm3, %zmm0
+; AVX512VLVBMI2-NEXT: vpbroadcastw %xmm1, %zmm1
+; AVX512VLVBMI2-NEXT: vpshrdvw %zmm1, %zmm0, %zmm0
; AVX512VLVBMI2-NEXT: retq
%splat = shufflevector <32 x i16> %amt, <32 x i16> undef, <32 x i32> zeroinitializer
%res = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %x, <32 x i16> %x, <32 x i16> %splat)
@@ -832,16 +800,12 @@ define <32 x i16> @constant_funnnel_v32i16(<32 x i16> %x) nounwind {
;
; AVX512VBMI2-LABEL: constant_funnnel_v32i16:
; AVX512VBMI2: # %bb.0:
-; AVX512VBMI2-NEXT: vpsllvw {{.*}}(%rip), %zmm0, %zmm1
-; AVX512VBMI2-NEXT: vpsrlvw {{.*}}(%rip), %zmm0, %zmm0
-; AVX512VBMI2-NEXT: vporq %zmm1, %zmm0, %zmm0
+; AVX512VBMI2-NEXT: vpshrdvw {{.*}}(%rip), %zmm0, %zmm0
; AVX512VBMI2-NEXT: retq
;
; AVX512VLVBMI2-LABEL: constant_funnnel_v32i16:
; AVX512VLVBMI2: # %bb.0:
-; AVX512VLVBMI2-NEXT: vpsllvw {{.*}}(%rip), %zmm0, %zmm1
-; AVX512VLVBMI2-NEXT: vpsrlvw {{.*}}(%rip), %zmm0, %zmm0
-; AVX512VLVBMI2-NEXT: vporq %zmm1, %zmm0, %zmm0
+; AVX512VLVBMI2-NEXT: vpshrdvw {{.*}}(%rip), %zmm0, %zmm0
; AVX512VLVBMI2-NEXT: retq
%res = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %x, <32 x i16> %x, <32 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15, i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15>)
ret <32 x i16> %res
@@ -1120,16 +1084,12 @@ define <32 x i16> @splatconstant_funnnel_v32i16(<32 x i16> %x) nounwind {
;
; AVX512VBMI2-LABEL: splatconstant_funnnel_v32i16:
; AVX512VBMI2: # %bb.0:
-; AVX512VBMI2-NEXT: vpsllw $9, %zmm0, %zmm1
-; AVX512VBMI2-NEXT: vpsrlw $7, %zmm0, %zmm0
-; AVX512VBMI2-NEXT: vporq %zmm1, %zmm0, %zmm0
+; AVX512VBMI2-NEXT: vpshrdw $7, %zmm0, %zmm0, %zmm0
; AVX512VBMI2-NEXT: retq
;
; AVX512VLVBMI2-LABEL: splatconstant_funnnel_v32i16:
; AVX512VLVBMI2: # %bb.0:
-; AVX512VLVBMI2-NEXT: vpsllw $9, %zmm0, %zmm1
-; AVX512VLVBMI2-NEXT: vpsrlw $7, %zmm0, %zmm0
-; AVX512VLVBMI2-NEXT: vporq %zmm1, %zmm0, %zmm0
+; AVX512VLVBMI2-NEXT: vpshrdw $7, %zmm0, %zmm0, %zmm0
; AVX512VLVBMI2-NEXT: retq
%res = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %x, <32 x i16> %x, <32 x i16> <i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7>)
ret <32 x i16> %res
More information about the llvm-branch-commits
mailing list