[llvm] a636801 - [X86] LowerRotate - enable v8i16 ROTL/ROTR on all pre-SSE41 targets
Simon Pilgrim via llvm-commits
llvm-commits at lists.llvm.org
Thu Feb 24 06:14:42 PST 2022
Author: Simon Pilgrim
Date: 2022-02-24T14:14:08Z
New Revision: a636801a3671ae277b439b10b3fee00ce6717055
URL: https://github.com/llvm/llvm-project/commit/a636801a3671ae277b439b10b3fee00ce6717055
DIFF: https://github.com/llvm/llvm-project/commit/a636801a3671ae277b439b10b3fee00ce6717055.diff
LOG: [X86] LowerRotate - enable v8i16 ROTL/ROTR on all pre-SSE41 targets
We're still better off expanding this once we have PMOVZX
Added:
Modified:
llvm/lib/Target/X86/X86ISelLowering.cpp
llvm/test/CodeGen/X86/vector-fshl-rot-128.ll
llvm/test/CodeGen/X86/vector-fshr-rot-128.ll
llvm/test/CodeGen/X86/vector-rotate-128.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 7d6d0476f84d..edf4825cd069 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -30199,7 +30199,7 @@ static SDValue LowerRotate(SDValue Op, const X86Subtarget &Subtarget,
// rotr(x,y) -> (unpack(x,x) >> (y & (bw-1))).
// TODO: Handle vXi16 cases on all targets.
if (EltSizeInBits == 8 || EltSizeInBits == 32 ||
- (IsROTL && EltSizeInBits == 16 && !Subtarget.hasAVX())) {
+ (EltSizeInBits == 16 && !Subtarget.hasSSE41())) {
int BaseRotAmtIdx = -1;
if (SDValue BaseRotAmt = DAG.getSplatSourceVector(AmtMod, BaseRotAmtIdx)) {
unsigned ShiftX86Opc = IsROTL ? X86ISD::VSHLI : X86ISD::VSRLI;
diff --git a/llvm/test/CodeGen/X86/vector-fshl-rot-128.ll b/llvm/test/CodeGen/X86/vector-fshl-rot-128.ll
index 16013c3cd221..10d57d83321d 100644
--- a/llvm/test/CodeGen/X86/vector-fshl-rot-128.ll
+++ b/llvm/test/CodeGen/X86/vector-fshl-rot-128.ll
@@ -935,14 +935,14 @@ define <8 x i16> @splatvar_funnnel_v8i16(<8 x i16> %x, <8 x i16> %amt) nounwind
; SSE41-LABEL: splatvar_funnnel_v8i16:
; SSE41: # %bb.0:
; SSE41-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
-; SSE41-NEXT: movdqa %xmm0, %xmm2
-; SSE41-NEXT: punpckhwd {{.*#+}} xmm2 = xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
-; SSE41-NEXT: pslld %xmm1, %xmm2
-; SSE41-NEXT: psrld $16, %xmm2
-; SSE41-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
-; SSE41-NEXT: pslld %xmm1, %xmm0
-; SSE41-NEXT: psrld $16, %xmm0
-; SSE41-NEXT: packusdw %xmm2, %xmm0
+; SSE41-NEXT: pmovzxwq {{.*#+}} xmm2 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
+; SSE41-NEXT: movdqa %xmm0, %xmm3
+; SSE41-NEXT: psllw %xmm2, %xmm3
+; SSE41-NEXT: movdqa {{.*#+}} xmm2 = [16,16,16,16,16,16,16,16]
+; SSE41-NEXT: psubw %xmm1, %xmm2
+; SSE41-NEXT: pmovzxwq {{.*#+}} xmm1 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero
+; SSE41-NEXT: psrlw %xmm1, %xmm0
+; SSE41-NEXT: por %xmm3, %xmm0
; SSE41-NEXT: retq
;
; AVX-LABEL: splatvar_funnnel_v8i16:
diff --git a/llvm/test/CodeGen/X86/vector-fshr-rot-128.ll b/llvm/test/CodeGen/X86/vector-fshr-rot-128.ll
index f3bb1c3cef0d..f67377ddd043 100644
--- a/llvm/test/CodeGen/X86/vector-fshr-rot-128.ll
+++ b/llvm/test/CodeGen/X86/vector-fshr-rot-128.ll
@@ -952,16 +952,16 @@ define <8 x i16> @splatvar_funnnel_v8i16(<8 x i16> %x, <8 x i16> %amt) nounwind
; SSE2-LABEL: splatvar_funnnel_v8i16:
; SSE2: # %bb.0:
; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
-; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [65535,0,0,0]
-; SSE2-NEXT: pand %xmm1, %xmm2
-; SSE2-NEXT: movdqa %xmm0, %xmm3
-; SSE2-NEXT: psrlw %xmm2, %xmm3
-; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [16,16,16,16,16,16,16,16]
-; SSE2-NEXT: psubw %xmm1, %xmm2
-; SSE2-NEXT: pslldq {{.*#+}} xmm2 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm2[0,1]
-; SSE2-NEXT: psrldq {{.*#+}} xmm2 = xmm2[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
-; SSE2-NEXT: psllw %xmm2, %xmm0
-; SSE2-NEXT: por %xmm3, %xmm0
+; SSE2-NEXT: movdqa %xmm0, %xmm2
+; SSE2-NEXT: punpckhwd {{.*#+}} xmm2 = xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
+; SSE2-NEXT: psrld %xmm1, %xmm2
+; SSE2-NEXT: pslld $16, %xmm2
+; SSE2-NEXT: psrad $16, %xmm2
+; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
+; SSE2-NEXT: psrld %xmm1, %xmm0
+; SSE2-NEXT: pslld $16, %xmm0
+; SSE2-NEXT: psrad $16, %xmm0
+; SSE2-NEXT: packssdw %xmm2, %xmm0
; SSE2-NEXT: retq
;
; SSE41-LABEL: splatvar_funnnel_v8i16:
@@ -1072,16 +1072,16 @@ define <8 x i16> @splatvar_funnnel_v8i16(<8 x i16> %x, <8 x i16> %amt) nounwind
; X86-SSE2-LABEL: splatvar_funnnel_v8i16:
; X86-SSE2: # %bb.0:
; X86-SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1
-; X86-SSE2-NEXT: movdqa {{.*#+}} xmm2 = [65535,0,0,0]
-; X86-SSE2-NEXT: pand %xmm1, %xmm2
-; X86-SSE2-NEXT: movdqa %xmm0, %xmm3
-; X86-SSE2-NEXT: psrlw %xmm2, %xmm3
-; X86-SSE2-NEXT: movdqa {{.*#+}} xmm2 = [16,16,16,16,16,16,16,16]
-; X86-SSE2-NEXT: psubw %xmm1, %xmm2
-; X86-SSE2-NEXT: pslldq {{.*#+}} xmm2 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm2[0,1]
-; X86-SSE2-NEXT: psrldq {{.*#+}} xmm2 = xmm2[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
-; X86-SSE2-NEXT: psllw %xmm2, %xmm0
-; X86-SSE2-NEXT: por %xmm3, %xmm0
+; X86-SSE2-NEXT: movdqa %xmm0, %xmm2
+; X86-SSE2-NEXT: punpckhwd {{.*#+}} xmm2 = xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
+; X86-SSE2-NEXT: psrld %xmm1, %xmm2
+; X86-SSE2-NEXT: pslld $16, %xmm2
+; X86-SSE2-NEXT: psrad $16, %xmm2
+; X86-SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
+; X86-SSE2-NEXT: psrld %xmm1, %xmm0
+; X86-SSE2-NEXT: pslld $16, %xmm0
+; X86-SSE2-NEXT: psrad $16, %xmm0
+; X86-SSE2-NEXT: packssdw %xmm2, %xmm0
; X86-SSE2-NEXT: retl
%splat = shufflevector <8 x i16> %amt, <8 x i16> undef, <8 x i32> zeroinitializer
%res = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %x, <8 x i16> %x, <8 x i16> %splat)
diff --git a/llvm/test/CodeGen/X86/vector-rotate-128.ll b/llvm/test/CodeGen/X86/vector-rotate-128.ll
index 16cce0adfcb2..8b503f70c302 100644
--- a/llvm/test/CodeGen/X86/vector-rotate-128.ll
+++ b/llvm/test/CodeGen/X86/vector-rotate-128.ll
@@ -920,14 +920,14 @@ define <8 x i16> @splatvar_rotate_v8i16(<8 x i16> %a, <8 x i16> %b) nounwind {
; SSE41-LABEL: splatvar_rotate_v8i16:
; SSE41: # %bb.0:
; SSE41-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
-; SSE41-NEXT: movdqa %xmm0, %xmm2
-; SSE41-NEXT: punpckhwd {{.*#+}} xmm2 = xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
-; SSE41-NEXT: pslld %xmm1, %xmm2
-; SSE41-NEXT: psrld $16, %xmm2
-; SSE41-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
-; SSE41-NEXT: pslld %xmm1, %xmm0
-; SSE41-NEXT: psrld $16, %xmm0
-; SSE41-NEXT: packusdw %xmm2, %xmm0
+; SSE41-NEXT: pmovzxwq {{.*#+}} xmm2 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
+; SSE41-NEXT: movdqa %xmm0, %xmm3
+; SSE41-NEXT: psllw %xmm2, %xmm3
+; SSE41-NEXT: movdqa {{.*#+}} xmm2 = [16,16,16,16,16,16,16,16]
+; SSE41-NEXT: psubw %xmm1, %xmm2
+; SSE41-NEXT: pmovzxwq {{.*#+}} xmm1 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero
+; SSE41-NEXT: psrlw %xmm1, %xmm0
+; SSE41-NEXT: por %xmm3, %xmm0
; SSE41-NEXT: retq
;
; AVX-LABEL: splatvar_rotate_v8i16:
More information about the llvm-commits
mailing list