[llvm] [X86] Use GFNI for vXi8 shifts/rotates (PR #89115)
via llvm-commits
llvm-commits at lists.llvm.org
Thu Apr 18 09:06:32 PDT 2024
================
@@ -262,84 +208,51 @@ declare <64 x i8> @llvm.fshl.v64i8(<64 x i8>, <64 x i8>, <64 x i8>)
define <64 x i8> @splatconstant_fshr_v64i8(<64 x i8> %a, <64 x i8> %b) nounwind {
; GFNISSE-LABEL: splatconstant_fshr_v64i8:
; GFNISSE: # %bb.0:
-; GFNISSE-NEXT: psrlw $2, %xmm4
-; GFNISSE-NEXT: movdqa {{.*#+}} xmm8 = [192,192,192,192,192,192,192,192,192,192,192,192,192,192,192,192]
-; GFNISSE-NEXT: movdqa %xmm8, %xmm9
-; GFNISSE-NEXT: pandn %xmm4, %xmm9
-; GFNISSE-NEXT: psllw $6, %xmm0
-; GFNISSE-NEXT: pand %xmm8, %xmm0
-; GFNISSE-NEXT: por %xmm9, %xmm0
-; GFNISSE-NEXT: psrlw $2, %xmm5
-; GFNISSE-NEXT: movdqa %xmm8, %xmm4
-; GFNISSE-NEXT: pandn %xmm5, %xmm4
-; GFNISSE-NEXT: psllw $6, %xmm1
-; GFNISSE-NEXT: pand %xmm8, %xmm1
-; GFNISSE-NEXT: por %xmm4, %xmm1
-; GFNISSE-NEXT: psrlw $2, %xmm6
-; GFNISSE-NEXT: movdqa %xmm8, %xmm4
-; GFNISSE-NEXT: pandn %xmm6, %xmm4
-; GFNISSE-NEXT: psllw $6, %xmm2
-; GFNISSE-NEXT: pand %xmm8, %xmm2
-; GFNISSE-NEXT: por %xmm4, %xmm2
-; GFNISSE-NEXT: psrlw $2, %xmm7
-; GFNISSE-NEXT: psllw $6, %xmm3
-; GFNISSE-NEXT: pand %xmm8, %xmm3
-; GFNISSE-NEXT: pandn %xmm7, %xmm8
-; GFNISSE-NEXT: por %xmm8, %xmm3
+; GFNISSE-NEXT: movdqa {{.*#+}} xmm8 = [290499906672525312,290499906672525312]
+; GFNISSE-NEXT: gf2p8affineqb $0, %xmm8, %xmm4
+; GFNISSE-NEXT: pmovsxwq {{.*#+}} xmm9 = [258,258]
+; GFNISSE-NEXT: gf2p8affineqb $0, %xmm9, %xmm0
+; GFNISSE-NEXT: por %xmm4, %xmm0
+; GFNISSE-NEXT: gf2p8affineqb $0, %xmm8, %xmm5
+; GFNISSE-NEXT: gf2p8affineqb $0, %xmm9, %xmm1
+; GFNISSE-NEXT: por %xmm5, %xmm1
+; GFNISSE-NEXT: gf2p8affineqb $0, %xmm8, %xmm6
+; GFNISSE-NEXT: gf2p8affineqb $0, %xmm9, %xmm2
+; GFNISSE-NEXT: por %xmm6, %xmm2
+; GFNISSE-NEXT: gf2p8affineqb $0, %xmm8, %xmm7
+; GFNISSE-NEXT: gf2p8affineqb $0, %xmm9, %xmm3
+; GFNISSE-NEXT: por %xmm7, %xmm3
; GFNISSE-NEXT: retq
;
; GFNIAVX1-LABEL: splatconstant_fshr_v64i8:
; GFNIAVX1: # %bb.0:
-; GFNIAVX1-NEXT: vextractf128 $1, %ymm2, %xmm4
-; GFNIAVX1-NEXT: vpsrlw $2, %xmm4, %xmm4
-; GFNIAVX1-NEXT: vbroadcastss {{.*#+}} xmm5 = [63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63]
-; GFNIAVX1-NEXT: vpand %xmm5, %xmm4, %xmm4
-; GFNIAVX1-NEXT: vpsrlw $2, %xmm2, %xmm2
-; GFNIAVX1-NEXT: vpand %xmm5, %xmm2, %xmm2
-; GFNIAVX1-NEXT: vinsertf128 $1, %xmm4, %ymm2, %ymm2
-; GFNIAVX1-NEXT: vextractf128 $1, %ymm0, %xmm4
-; GFNIAVX1-NEXT: vpsllw $6, %xmm4, %xmm4
-; GFNIAVX1-NEXT: vbroadcastss {{.*#+}} xmm6 = [192,192,192,192,192,192,192,192,192,192,192,192,192,192,192,192]
-; GFNIAVX1-NEXT: vpand %xmm6, %xmm4, %xmm4
-; GFNIAVX1-NEXT: vpsllw $6, %xmm0, %xmm0
-; GFNIAVX1-NEXT: vpand %xmm6, %xmm0, %xmm0
-; GFNIAVX1-NEXT: vinsertf128 $1, %xmm4, %ymm0, %ymm0
+; GFNIAVX1-NEXT: vbroadcastsd {{.*#+}} ymm4 = [0,0,128,64,32,16,8,4,0,0,128,64,32,16,8,4,0,0,128,64,32,16,8,4,0,0,128,64,32,16,8,4]
+; GFNIAVX1-NEXT: vgf2p8affineqb $0, %ymm4, %ymm2, %ymm2
+; GFNIAVX1-NEXT: vbroadcastsd {{.*#+}} ymm5 = [2,1,0,0,0,0,0,0,2,1,0,0,0,0,0,0,2,1,0,0,0,0,0,0,2,1,0,0,0,0,0,0]
+; GFNIAVX1-NEXT: vgf2p8affineqb $0, %ymm5, %ymm0, %ymm0
; GFNIAVX1-NEXT: vorps %ymm2, %ymm0, %ymm0
-; GFNIAVX1-NEXT: vextractf128 $1, %ymm3, %xmm2
-; GFNIAVX1-NEXT: vpsrlw $2, %xmm2, %xmm2
-; GFNIAVX1-NEXT: vpand %xmm5, %xmm2, %xmm2
-; GFNIAVX1-NEXT: vpsrlw $2, %xmm3, %xmm3
-; GFNIAVX1-NEXT: vpand %xmm5, %xmm3, %xmm3
-; GFNIAVX1-NEXT: vinsertf128 $1, %xmm2, %ymm3, %ymm2
-; GFNIAVX1-NEXT: vextractf128 $1, %ymm1, %xmm3
-; GFNIAVX1-NEXT: vpsllw $6, %xmm3, %xmm3
-; GFNIAVX1-NEXT: vpand %xmm6, %xmm3, %xmm3
-; GFNIAVX1-NEXT: vpsllw $6, %xmm1, %xmm1
-; GFNIAVX1-NEXT: vpand %xmm6, %xmm1, %xmm1
-; GFNIAVX1-NEXT: vinsertf128 $1, %xmm3, %ymm1, %ymm1
+; GFNIAVX1-NEXT: vgf2p8affineqb $0, %ymm4, %ymm3, %ymm2
+; GFNIAVX1-NEXT: vgf2p8affineqb $0, %ymm5, %ymm1, %ymm1
; GFNIAVX1-NEXT: vorps %ymm2, %ymm1, %ymm1
; GFNIAVX1-NEXT: retq
;
; GFNIAVX2-LABEL: splatconstant_fshr_v64i8:
; GFNIAVX2: # %bb.0:
-; GFNIAVX2-NEXT: vpsrlw $2, %ymm2, %ymm2
-; GFNIAVX2-NEXT: vpbroadcastb {{.*#+}} ymm4 = [192,192,192,192,192,192,192,192,192,192,192,192,192,192,192,192,192,192,192,192,192,192,192,192,192,192,192,192,192,192,192,192]
-; GFNIAVX2-NEXT: vpandn %ymm2, %ymm4, %ymm2
-; GFNIAVX2-NEXT: vpsllw $6, %ymm0, %ymm0
-; GFNIAVX2-NEXT: vpand %ymm4, %ymm0, %ymm0
+; GFNIAVX2-NEXT: vpbroadcastq {{.*#+}} ymm4 = [290499906672525312,290499906672525312,290499906672525312,290499906672525312]
+; GFNIAVX2-NEXT: vgf2p8affineqb $0, %ymm4, %ymm2, %ymm2
+; GFNIAVX2-NEXT: vpbroadcastq {{.*#+}} ymm5 = [258,258,258,258]
+; GFNIAVX2-NEXT: vgf2p8affineqb $0, %ymm5, %ymm0, %ymm0
; GFNIAVX2-NEXT: vpor %ymm2, %ymm0, %ymm0
-; GFNIAVX2-NEXT: vpsrlw $2, %ymm3, %ymm2
-; GFNIAVX2-NEXT: vpandn %ymm2, %ymm4, %ymm2
-; GFNIAVX2-NEXT: vpsllw $6, %ymm1, %ymm1
-; GFNIAVX2-NEXT: vpand %ymm4, %ymm1, %ymm1
+; GFNIAVX2-NEXT: vgf2p8affineqb $0, %ymm4, %ymm3, %ymm2
+; GFNIAVX2-NEXT: vgf2p8affineqb $0, %ymm5, %ymm1, %ymm1
; GFNIAVX2-NEXT: vpor %ymm2, %ymm1, %ymm1
; GFNIAVX2-NEXT: retq
;
; GFNIAVX512-LABEL: splatconstant_fshr_v64i8:
; GFNIAVX512: # %bb.0:
-; GFNIAVX512-NEXT: vpsllw $6, %zmm0, %zmm2
-; GFNIAVX512-NEXT: vpsrlw $2, %zmm1, %zmm0
-; GFNIAVX512-NEXT: vpternlogd $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm2, %zmm0
----------------
goldsteinn wrote:
likewise
https://github.com/llvm/llvm-project/pull/89115
More information about the llvm-commits
mailing list