[llvm] b172c7e - [X86] combineConcatVectorOps - fold concat(GF2P8AFFINEQB(x,y,c),GF2P8AFFINEQB(z,w,c)) -> GF2P8AFFINEQB(concat(x,z),concat(y,w),c)
Simon Pilgrim via llvm-commits
llvm-commits at lists.llvm.org
Mon Oct 31 05:29:41 PDT 2022
Author: Simon Pilgrim
Date: 2022-10-31T12:27:57Z
New Revision: b172c7e1933b04f459b813a6c894d90ea65a9cd4
URL: https://github.com/llvm/llvm-project/commit/b172c7e1933b04f459b813a6c894d90ea65a9cd4
DIFF: https://github.com/llvm/llvm-project/commit/b172c7e1933b04f459b813a6c894d90ea65a9cd4.diff
LOG: [X86] combineConcatVectorOps - fold concat(GF2P8AFFINEQB(x,y,c),GF2P8AFFINEQB(z,w,c)) -> GF2P8AFFINEQB(concat(x,z),concat(y,w),c)
Pulled out of D137026
Added:
Modified:
llvm/lib/Target/X86/X86ISelLowering.cpp
llvm/test/CodeGen/X86/vector-bitreverse.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 9fd07a7301bb3..c5a591a23f3cf 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -54685,6 +54685,18 @@ static SDValue combineConcatVectorOps(const SDLoc &DL, MVT VT,
ConcatSubOperand(SrcVT, Ops, 1));
}
break;
+ case X86ISD::GF2P8AFFINEQB:
+ if (!IsSplat &&
+ (VT.is256BitVector() ||
+ (VT.is512BitVector() && Subtarget.useBWIRegs())) &&
+ llvm::all_of(Ops, [Op0](SDValue Op) {
+ return Op0.getOperand(2) == Op.getOperand(2);
+ })) {
+ return DAG.getNode(Op0.getOpcode(), DL, VT,
+ ConcatSubOperand(VT, Ops, 0),
+ ConcatSubOperand(VT, Ops, 1), Op0.getOperand(2));
+ }
+ break;
case X86ISD::HADD:
case X86ISD::HSUB:
case X86ISD::FHADD:
diff --git a/llvm/test/CodeGen/X86/vector-bitreverse.ll b/llvm/test/CodeGen/X86/vector-bitreverse.ll
index 73145d8f876ef..5466ec9af5761 100644
--- a/llvm/test/CodeGen/X86/vector-bitreverse.ll
+++ b/llvm/test/CodeGen/X86/vector-bitreverse.ll
@@ -873,11 +873,7 @@ define <32 x i8> @test_bitreverse_v32i8(<32 x i8> %a) nounwind {
;
; GFNIAVX1-LABEL: test_bitreverse_v32i8:
; GFNIAVX1: # %bb.0:
-; GFNIAVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
-; GFNIAVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [9241421688590303745,9241421688590303745]
-; GFNIAVX1-NEXT: vgf2p8affineqb $0, %xmm2, %xmm1, %xmm1
-; GFNIAVX1-NEXT: vgf2p8affineqb $0, %xmm2, %xmm0, %xmm0
-; GFNIAVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; GFNIAVX1-NEXT: vgf2p8affineqb $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
; GFNIAVX1-NEXT: retq
;
; GFNIAVX2-LABEL: test_bitreverse_v32i8:
@@ -1060,11 +1056,9 @@ define <16 x i16> @test_bitreverse_v16i16(<16 x i16> %a) nounwind {
; GFNIAVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
; GFNIAVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [1,0,3,2,5,4,7,6,9,8,11,10,13,12,15,14]
; GFNIAVX1-NEXT: vpshufb %xmm2, %xmm1, %xmm1
-; GFNIAVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [9241421688590303745,9241421688590303745]
-; GFNIAVX1-NEXT: vgf2p8affineqb $0, %xmm3, %xmm1, %xmm1
; GFNIAVX1-NEXT: vpshufb %xmm2, %xmm0, %xmm0
-; GFNIAVX1-NEXT: vgf2p8affineqb $0, %xmm3, %xmm0, %xmm0
; GFNIAVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; GFNIAVX1-NEXT: vgf2p8affineqb $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
; GFNIAVX1-NEXT: retq
;
; GFNIAVX2-LABEL: test_bitreverse_v16i16:
@@ -1258,11 +1252,9 @@ define <8 x i32> @test_bitreverse_v8i32(<8 x i32> %a) nounwind {
; GFNIAVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
; GFNIAVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [3,2,1,0,7,6,5,4,11,10,9,8,15,14,13,12]
; GFNIAVX1-NEXT: vpshufb %xmm2, %xmm1, %xmm1
-; GFNIAVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [9241421688590303745,9241421688590303745]
-; GFNIAVX1-NEXT: vgf2p8affineqb $0, %xmm3, %xmm1, %xmm1
; GFNIAVX1-NEXT: vpshufb %xmm2, %xmm0, %xmm0
-; GFNIAVX1-NEXT: vgf2p8affineqb $0, %xmm3, %xmm0, %xmm0
; GFNIAVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; GFNIAVX1-NEXT: vgf2p8affineqb $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
; GFNIAVX1-NEXT: retq
;
; GFNIAVX2-LABEL: test_bitreverse_v8i32:
@@ -1460,11 +1452,9 @@ define <4 x i64> @test_bitreverse_v4i64(<4 x i64> %a) nounwind {
; GFNIAVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
; GFNIAVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [7,6,5,4,3,2,1,0,15,14,13,12,11,10,9,8]
; GFNIAVX1-NEXT: vpshufb %xmm2, %xmm1, %xmm1
-; GFNIAVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [9241421688590303745,9241421688590303745]
-; GFNIAVX1-NEXT: vgf2p8affineqb $0, %xmm3, %xmm1, %xmm1
; GFNIAVX1-NEXT: vpshufb %xmm2, %xmm0, %xmm0
-; GFNIAVX1-NEXT: vgf2p8affineqb $0, %xmm3, %xmm0, %xmm0
; GFNIAVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; GFNIAVX1-NEXT: vgf2p8affineqb $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
; GFNIAVX1-NEXT: retq
;
; GFNIAVX2-LABEL: test_bitreverse_v4i64:
@@ -1733,15 +1723,9 @@ define <64 x i8> @test_bitreverse_v64i8(<64 x i8> %a) nounwind {
;
; GFNIAVX1-LABEL: test_bitreverse_v64i8:
; GFNIAVX1: # %bb.0:
-; GFNIAVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
-; GFNIAVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [9241421688590303745,9241421688590303745]
-; GFNIAVX1-NEXT: vgf2p8affineqb $0, %xmm3, %xmm2, %xmm2
-; GFNIAVX1-NEXT: vgf2p8affineqb $0, %xmm3, %xmm0, %xmm0
-; GFNIAVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
-; GFNIAVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
-; GFNIAVX1-NEXT: vgf2p8affineqb $0, %xmm3, %xmm2, %xmm2
-; GFNIAVX1-NEXT: vgf2p8affineqb $0, %xmm3, %xmm1, %xmm1
-; GFNIAVX1-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1
+; GFNIAVX1-NEXT: vbroadcastsd {{.*#+}} ymm2 = [9241421688590303745,9241421688590303745,9241421688590303745,9241421688590303745]
+; GFNIAVX1-NEXT: vgf2p8affineqb $0, %ymm2, %ymm0, %ymm0
+; GFNIAVX1-NEXT: vgf2p8affineqb $0, %ymm2, %ymm1, %ymm1
; GFNIAVX1-NEXT: retq
;
; GFNIAVX2-LABEL: test_bitreverse_v64i8:
@@ -2059,17 +2043,15 @@ define <32 x i16> @test_bitreverse_v32i16(<32 x i16> %a) nounwind {
; GFNIAVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
; GFNIAVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [1,0,3,2,5,4,7,6,9,8,11,10,13,12,15,14]
; GFNIAVX1-NEXT: vpshufb %xmm3, %xmm2, %xmm2
-; GFNIAVX1-NEXT: vmovdqa {{.*#+}} xmm4 = [9241421688590303745,9241421688590303745]
-; GFNIAVX1-NEXT: vgf2p8affineqb $0, %xmm4, %xmm2, %xmm2
; GFNIAVX1-NEXT: vpshufb %xmm3, %xmm0, %xmm0
-; GFNIAVX1-NEXT: vgf2p8affineqb $0, %xmm4, %xmm0, %xmm0
; GFNIAVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
-; GFNIAVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
-; GFNIAVX1-NEXT: vpshufb %xmm3, %xmm2, %xmm2
-; GFNIAVX1-NEXT: vgf2p8affineqb $0, %xmm4, %xmm2, %xmm2
+; GFNIAVX1-NEXT: vbroadcastsd {{.*#+}} ymm2 = [9241421688590303745,9241421688590303745,9241421688590303745,9241421688590303745]
+; GFNIAVX1-NEXT: vgf2p8affineqb $0, %ymm2, %ymm0, %ymm0
+; GFNIAVX1-NEXT: vextractf128 $1, %ymm1, %xmm4
+; GFNIAVX1-NEXT: vpshufb %xmm3, %xmm4, %xmm4
; GFNIAVX1-NEXT: vpshufb %xmm3, %xmm1, %xmm1
-; GFNIAVX1-NEXT: vgf2p8affineqb $0, %xmm4, %xmm1, %xmm1
-; GFNIAVX1-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1
+; GFNIAVX1-NEXT: vinsertf128 $1, %xmm4, %ymm1, %ymm1
+; GFNIAVX1-NEXT: vgf2p8affineqb $0, %ymm2, %ymm1, %ymm1
; GFNIAVX1-NEXT: retq
;
; GFNIAVX2-LABEL: test_bitreverse_v32i16:
@@ -2411,17 +2393,15 @@ define <16 x i32> @test_bitreverse_v16i32(<16 x i32> %a) nounwind {
; GFNIAVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
; GFNIAVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [3,2,1,0,7,6,5,4,11,10,9,8,15,14,13,12]
; GFNIAVX1-NEXT: vpshufb %xmm3, %xmm2, %xmm2
-; GFNIAVX1-NEXT: vmovdqa {{.*#+}} xmm4 = [9241421688590303745,9241421688590303745]
-; GFNIAVX1-NEXT: vgf2p8affineqb $0, %xmm4, %xmm2, %xmm2
; GFNIAVX1-NEXT: vpshufb %xmm3, %xmm0, %xmm0
-; GFNIAVX1-NEXT: vgf2p8affineqb $0, %xmm4, %xmm0, %xmm0
; GFNIAVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
-; GFNIAVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
-; GFNIAVX1-NEXT: vpshufb %xmm3, %xmm2, %xmm2
-; GFNIAVX1-NEXT: vgf2p8affineqb $0, %xmm4, %xmm2, %xmm2
+; GFNIAVX1-NEXT: vbroadcastsd {{.*#+}} ymm2 = [9241421688590303745,9241421688590303745,9241421688590303745,9241421688590303745]
+; GFNIAVX1-NEXT: vgf2p8affineqb $0, %ymm2, %ymm0, %ymm0
+; GFNIAVX1-NEXT: vextractf128 $1, %ymm1, %xmm4
+; GFNIAVX1-NEXT: vpshufb %xmm3, %xmm4, %xmm4
; GFNIAVX1-NEXT: vpshufb %xmm3, %xmm1, %xmm1
-; GFNIAVX1-NEXT: vgf2p8affineqb $0, %xmm4, %xmm1, %xmm1
-; GFNIAVX1-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1
+; GFNIAVX1-NEXT: vinsertf128 $1, %xmm4, %ymm1, %ymm1
+; GFNIAVX1-NEXT: vgf2p8affineqb $0, %ymm2, %ymm1, %ymm1
; GFNIAVX1-NEXT: retq
;
; GFNIAVX2-LABEL: test_bitreverse_v16i32:
@@ -2771,17 +2751,15 @@ define <8 x i64> @test_bitreverse_v8i64(<8 x i64> %a) nounwind {
; GFNIAVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
; GFNIAVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [7,6,5,4,3,2,1,0,15,14,13,12,11,10,9,8]
; GFNIAVX1-NEXT: vpshufb %xmm3, %xmm2, %xmm2
-; GFNIAVX1-NEXT: vmovdqa {{.*#+}} xmm4 = [9241421688590303745,9241421688590303745]
-; GFNIAVX1-NEXT: vgf2p8affineqb $0, %xmm4, %xmm2, %xmm2
; GFNIAVX1-NEXT: vpshufb %xmm3, %xmm0, %xmm0
-; GFNIAVX1-NEXT: vgf2p8affineqb $0, %xmm4, %xmm0, %xmm0
; GFNIAVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
-; GFNIAVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
-; GFNIAVX1-NEXT: vpshufb %xmm3, %xmm2, %xmm2
-; GFNIAVX1-NEXT: vgf2p8affineqb $0, %xmm4, %xmm2, %xmm2
+; GFNIAVX1-NEXT: vbroadcastsd {{.*#+}} ymm2 = [9241421688590303745,9241421688590303745,9241421688590303745,9241421688590303745]
+; GFNIAVX1-NEXT: vgf2p8affineqb $0, %ymm2, %ymm0, %ymm0
+; GFNIAVX1-NEXT: vextractf128 $1, %ymm1, %xmm4
+; GFNIAVX1-NEXT: vpshufb %xmm3, %xmm4, %xmm4
; GFNIAVX1-NEXT: vpshufb %xmm3, %xmm1, %xmm1
-; GFNIAVX1-NEXT: vgf2p8affineqb $0, %xmm4, %xmm1, %xmm1
-; GFNIAVX1-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1
+; GFNIAVX1-NEXT: vinsertf128 $1, %xmm4, %ymm1, %ymm1
+; GFNIAVX1-NEXT: vgf2p8affineqb $0, %ymm2, %ymm1, %ymm1
; GFNIAVX1-NEXT: retq
;
; GFNIAVX2-LABEL: test_bitreverse_v8i64:
More information about the llvm-commits
mailing list