[llvm] 90ec51a - [X86] combineConcatVectorOps - fold 512-bit concat(GF2P8AFFINEQB(x,y,c),GF2P8AFFINEQB(z,w,c)) -> GF2P8AFFINEQB(concat(x,z),concat(y,w),c)
Simon Pilgrim via llvm-commits
llvm-commits at lists.llvm.org
Tue Nov 1 05:07:12 PDT 2022
Author: Simon Pilgrim
Date: 2022-11-01T12:06:46Z
New Revision: 90ec51a9ab87c4b0a2a145425be8068f6edebeaf
URL: https://github.com/llvm/llvm-project/commit/90ec51a9ab87c4b0a2a145425be8068f6edebeaf
DIFF: https://github.com/llvm/llvm-project/commit/90ec51a9ab87c4b0a2a145425be8068f6edebeaf.diff
LOG: [X86] combineConcatVectorOps - fold 512-bit concat(GF2P8AFFINEQB(x,y,c),GF2P8AFFINEQB(z,w,c)) -> GF2P8AFFINEQB(concat(x,z),concat(y,w),c)
Now that D137036 has landed, we just need AVX512F support to generate 512-bit GF2P8AFFINEQB ops
Added:
Modified:
llvm/lib/Target/X86/X86ISelLowering.cpp
llvm/test/CodeGen/X86/vector-bitreverse.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index eae7164e819a6..e32b0cb3d45e4 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -54698,7 +54698,7 @@ static SDValue combineConcatVectorOps(const SDLoc &DL, MVT VT,
case X86ISD::GF2P8AFFINEQB:
if (!IsSplat &&
(VT.is256BitVector() ||
- (VT.is512BitVector() && Subtarget.useBWIRegs())) &&
+ (VT.is512BitVector() && Subtarget.useAVX512Regs())) &&
llvm::all_of(Ops, [Op0](SDValue Op) {
return Op0.getOperand(2) == Op.getOperand(2);
})) {
diff --git a/llvm/test/CodeGen/X86/vector-bitreverse.ll b/llvm/test/CodeGen/X86/vector-bitreverse.ll
index 5466ec9af5761..b592a60cd44eb 100644
--- a/llvm/test/CodeGen/X86/vector-bitreverse.ll
+++ b/llvm/test/CodeGen/X86/vector-bitreverse.ll
@@ -1735,19 +1735,10 @@ define <64 x i8> @test_bitreverse_v64i8(<64 x i8> %a) nounwind {
; GFNIAVX2-NEXT: vgf2p8affineqb $0, %ymm2, %ymm1, %ymm1
; GFNIAVX2-NEXT: retq
;
-; GFNIAVX512F-LABEL: test_bitreverse_v64i8:
-; GFNIAVX512F: # %bb.0:
-; GFNIAVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm1
-; GFNIAVX512F-NEXT: vpbroadcastq {{.*#+}} ymm2 = [9241421688590303745,9241421688590303745,9241421688590303745,9241421688590303745]
-; GFNIAVX512F-NEXT: vgf2p8affineqb $0, %ymm2, %ymm1, %ymm1
-; GFNIAVX512F-NEXT: vgf2p8affineqb $0, %ymm2, %ymm0, %ymm0
-; GFNIAVX512F-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
-; GFNIAVX512F-NEXT: retq
-;
-; GFNIAVX512BW-LABEL: test_bitreverse_v64i8:
-; GFNIAVX512BW: # %bb.0:
-; GFNIAVX512BW-NEXT: vgf2p8affineqb $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %zmm0
-; GFNIAVX512BW-NEXT: retq
+; GFNIAVX512-LABEL: test_bitreverse_v64i8:
+; GFNIAVX512: # %bb.0:
+; GFNIAVX512-NEXT: vgf2p8affineqb $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %zmm0
+; GFNIAVX512-NEXT: retq
%b = call <64 x i8> @llvm.bitreverse.v64i8(<64 x i8> %a)
ret <64 x i8> %b
}
@@ -2069,11 +2060,9 @@ define <32 x i16> @test_bitreverse_v32i16(<32 x i16> %a) nounwind {
; GFNIAVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm1
; GFNIAVX512F-NEXT: vmovdqa {{.*#+}} ymm2 = [1,0,3,2,5,4,7,6,9,8,11,10,13,12,15,14,1,0,3,2,5,4,7,6,9,8,11,10,13,12,15,14]
; GFNIAVX512F-NEXT: vpshufb %ymm2, %ymm1, %ymm1
-; GFNIAVX512F-NEXT: vpbroadcastq {{.*#+}} ymm3 = [9241421688590303745,9241421688590303745,9241421688590303745,9241421688590303745]
-; GFNIAVX512F-NEXT: vgf2p8affineqb $0, %ymm3, %ymm1, %ymm1
; GFNIAVX512F-NEXT: vpshufb %ymm2, %ymm0, %ymm0
-; GFNIAVX512F-NEXT: vgf2p8affineqb $0, %ymm3, %ymm0, %ymm0
; GFNIAVX512F-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
+; GFNIAVX512F-NEXT: vgf2p8affineqb $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %zmm0
; GFNIAVX512F-NEXT: retq
;
; GFNIAVX512BW-LABEL: test_bitreverse_v32i16:
@@ -2419,11 +2408,9 @@ define <16 x i32> @test_bitreverse_v16i32(<16 x i32> %a) nounwind {
; GFNIAVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm1
; GFNIAVX512F-NEXT: vmovdqa {{.*#+}} ymm2 = [3,2,1,0,7,6,5,4,11,10,9,8,15,14,13,12,3,2,1,0,7,6,5,4,11,10,9,8,15,14,13,12]
; GFNIAVX512F-NEXT: vpshufb %ymm2, %ymm1, %ymm1
-; GFNIAVX512F-NEXT: vpbroadcastq {{.*#+}} ymm3 = [9241421688590303745,9241421688590303745,9241421688590303745,9241421688590303745]
-; GFNIAVX512F-NEXT: vgf2p8affineqb $0, %ymm3, %ymm1, %ymm1
; GFNIAVX512F-NEXT: vpshufb %ymm2, %ymm0, %ymm0
-; GFNIAVX512F-NEXT: vgf2p8affineqb $0, %ymm3, %ymm0, %ymm0
; GFNIAVX512F-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
+; GFNIAVX512F-NEXT: vgf2p8affineqb $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %zmm0
; GFNIAVX512F-NEXT: retq
;
; GFNIAVX512BW-LABEL: test_bitreverse_v16i32:
@@ -2777,11 +2764,9 @@ define <8 x i64> @test_bitreverse_v8i64(<8 x i64> %a) nounwind {
; GFNIAVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm1
; GFNIAVX512F-NEXT: vmovdqa {{.*#+}} ymm2 = [7,6,5,4,3,2,1,0,15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0,15,14,13,12,11,10,9,8]
; GFNIAVX512F-NEXT: vpshufb %ymm2, %ymm1, %ymm1
-; GFNIAVX512F-NEXT: vpbroadcastq {{.*#+}} ymm3 = [9241421688590303745,9241421688590303745,9241421688590303745,9241421688590303745]
-; GFNIAVX512F-NEXT: vgf2p8affineqb $0, %ymm3, %ymm1, %ymm1
; GFNIAVX512F-NEXT: vpshufb %ymm2, %ymm0, %ymm0
-; GFNIAVX512F-NEXT: vgf2p8affineqb $0, %ymm3, %ymm0, %ymm0
; GFNIAVX512F-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
+; GFNIAVX512F-NEXT: vgf2p8affineqb $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %zmm0
; GFNIAVX512F-NEXT: retq
;
; GFNIAVX512BW-LABEL: test_bitreverse_v8i64:
More information about the llvm-commits
mailing list