[llvm] 8a5aea7 - [X86][AVX] Fold extract_subvector(subv_broadcast(x),c) -> (x)
Simon Pilgrim via llvm-commits
llvm-commits at lists.llvm.org
Sun May 24 10:49:57 PDT 2020
Author: Simon Pilgrim
Date: 2020-05-24T18:49:39+01:00
New Revision: 8a5aea7b50429cd4a459511286a7a9f1a7f4f5e2
URL: https://github.com/llvm/llvm-project/commit/8a5aea7b50429cd4a459511286a7a9f1a7f4f5e2
DIFF: https://github.com/llvm/llvm-project/commit/8a5aea7b50429cd4a459511286a7a9f1a7f4f5e2.diff
LOG: [X86][AVX] Fold extract_subvector(subv_broadcast(x),c) -> (x)
If we're extracting an subvector from a broadcasted subvector of the same type then we can use the source vector directly.
Added:
Modified:
llvm/lib/Target/X86/X86ISelLowering.cpp
llvm/test/CodeGen/X86/avx512-vbroadcasti256.ll
llvm/test/CodeGen/X86/oddshuffles.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 4a4f4a1e9eca..5fc8448d1e72 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -47297,6 +47297,11 @@ static SDValue combineExtractSubvector(SDNode *N, SelectionDAG &DAG,
InVec.getOpcode() == X86ISD::VBROADCAST_LOAD))
return extractSubVector(InVec, 0, DAG, SDLoc(N), VT.getSizeInBits());
+ // If we're extracting a broadcasted subvector, just use the source.
+ if (InVec.getOpcode() == X86ISD::SUBV_BROADCAST &&
+ InVec.getOperand(0).getValueType() == VT)
+ return InVec.getOperand(0);
+
// If we're extracting the lowest subvector and we're the only user,
// we may be able to perform this with a smaller vector width.
if (IdxVal == 0 && InVec.hasOneUse()) {
diff --git a/llvm/test/CodeGen/X86/avx512-vbroadcasti256.ll b/llvm/test/CodeGen/X86/avx512-vbroadcasti256.ll
index aed8b680e406..33b3ddba0fcc 100644
--- a/llvm/test/CodeGen/X86/avx512-vbroadcasti256.ll
+++ b/llvm/test/CodeGen/X86/avx512-vbroadcasti256.ll
@@ -54,9 +54,8 @@ define <16 x i32> @test_broadcast_8i32_16i32(<8 x i32> *%p) nounwind {
define <32 x i16> @test_broadcast_16i16_32i16(<16 x i16> *%p) nounwind {
; X64-AVX512VL-LABEL: test_broadcast_16i16_32i16:
; X64-AVX512VL: ## %bb.0:
-; X64-AVX512VL-NEXT: vbroadcasti64x4 {{.*#+}} zmm0 = mem[0,1,2,3,0,1,2,3]
+; X64-AVX512VL-NEXT: vmovdqa (%rdi), %ymm0
; X64-AVX512VL-NEXT: vpaddw {{.*}}(%rip), %ymm0, %ymm1
-; X64-AVX512VL-NEXT: vextracti64x4 $1, %zmm0, %ymm0
; X64-AVX512VL-NEXT: vpaddw {{.*}}(%rip), %ymm0, %ymm0
; X64-AVX512VL-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0
; X64-AVX512VL-NEXT: retq
@@ -69,9 +68,8 @@ define <32 x i16> @test_broadcast_16i16_32i16(<16 x i16> *%p) nounwind {
;
; X64-AVX512DQVL-LABEL: test_broadcast_16i16_32i16:
; X64-AVX512DQVL: ## %bb.0:
-; X64-AVX512DQVL-NEXT: vbroadcasti64x4 {{.*#+}} zmm0 = mem[0,1,2,3,0,1,2,3]
+; X64-AVX512DQVL-NEXT: vmovdqa (%rdi), %ymm0
; X64-AVX512DQVL-NEXT: vpaddw {{.*}}(%rip), %ymm0, %ymm1
-; X64-AVX512DQVL-NEXT: vextracti64x4 $1, %zmm0, %ymm0
; X64-AVX512DQVL-NEXT: vpaddw {{.*}}(%rip), %ymm0, %ymm0
; X64-AVX512DQVL-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0
; X64-AVX512DQVL-NEXT: retq
@@ -84,9 +82,8 @@ define <32 x i16> @test_broadcast_16i16_32i16(<16 x i16> *%p) nounwind {
define <64 x i8> @test_broadcast_32i8_64i8(<32 x i8> *%p) nounwind {
; X64-AVX512VL-LABEL: test_broadcast_32i8_64i8:
; X64-AVX512VL: ## %bb.0:
-; X64-AVX512VL-NEXT: vbroadcasti64x4 {{.*#+}} zmm0 = mem[0,1,2,3,0,1,2,3]
+; X64-AVX512VL-NEXT: vmovdqa (%rdi), %ymm0
; X64-AVX512VL-NEXT: vpaddb {{.*}}(%rip), %ymm0, %ymm1
-; X64-AVX512VL-NEXT: vextracti64x4 $1, %zmm0, %ymm0
; X64-AVX512VL-NEXT: vpaddb {{.*}}(%rip), %ymm0, %ymm0
; X64-AVX512VL-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0
; X64-AVX512VL-NEXT: retq
@@ -99,9 +96,8 @@ define <64 x i8> @test_broadcast_32i8_64i8(<32 x i8> *%p) nounwind {
;
; X64-AVX512DQVL-LABEL: test_broadcast_32i8_64i8:
; X64-AVX512DQVL: ## %bb.0:
-; X64-AVX512DQVL-NEXT: vbroadcasti64x4 {{.*#+}} zmm0 = mem[0,1,2,3,0,1,2,3]
+; X64-AVX512DQVL-NEXT: vmovdqa (%rdi), %ymm0
; X64-AVX512DQVL-NEXT: vpaddb {{.*}}(%rip), %ymm0, %ymm1
-; X64-AVX512DQVL-NEXT: vextracti64x4 $1, %zmm0, %ymm0
; X64-AVX512DQVL-NEXT: vpaddb {{.*}}(%rip), %ymm0, %ymm0
; X64-AVX512DQVL-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0
; X64-AVX512DQVL-NEXT: retq
diff --git a/llvm/test/CodeGen/X86/oddshuffles.ll b/llvm/test/CodeGen/X86/oddshuffles.ll
index 6bdc4e3d4478..03f6b5266516 100644
--- a/llvm/test/CodeGen/X86/oddshuffles.ll
+++ b/llvm/test/CodeGen/X86/oddshuffles.ll
@@ -1146,22 +1146,22 @@ define void @interleave_24i16_in(<24 x i16>* %p, <8 x i16>* %q1, <8 x i16>* %q2,
; AVX2: # %bb.0:
; AVX2-NEXT: vmovdqu (%rsi), %xmm0
; AVX2-NEXT: vmovdqu (%rdx), %xmm1
-; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm2
-; AVX2-NEXT: vbroadcasti128 {{.*#+}} ymm3 = mem[0,1,0,1]
-; AVX2-NEXT: vpshufb {{.*#+}} ymm4 = ymm2[0,1,2,3,6,7,2,3,8,9,8,9,4,5,6,7,16,17,18,19,22,23,18,19,24,25,24,25,20,21,22,23]
-; AVX2-NEXT: vpermq {{.*#+}} ymm2 = ymm2[2,3,0,1]
-; AVX2-NEXT: vpshufb {{.*#+}} ymm2 = ymm2[u,u,0,1,u,u,u,u,2,3,u,u,u,u,4,5,u,u,22,23,u,u,u,u,24,25,u,u,u,u,26,27]
-; AVX2-NEXT: vpblendw {{.*#+}} ymm2 = ymm4[0],ymm2[1],ymm4[2,3],ymm2[4],ymm4[5,6],ymm2[7],ymm4[8],ymm2[9],ymm4[10,11],ymm2[12],ymm4[13,14],ymm2[15]
+; AVX2-NEXT: vmovdqu (%rcx), %xmm2
+; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm3
+; AVX2-NEXT: vpshufb {{.*#+}} ymm4 = ymm3[0,1,2,3,6,7,2,3,8,9,8,9,4,5,6,7,16,17,18,19,22,23,18,19,24,25,24,25,20,21,22,23]
+; AVX2-NEXT: vpermq {{.*#+}} ymm3 = ymm3[2,3,0,1]
+; AVX2-NEXT: vpshufb {{.*#+}} ymm3 = ymm3[u,u,0,1,u,u,u,u,2,3,u,u,u,u,4,5,u,u,22,23,u,u,u,u,24,25,u,u,u,u,26,27]
+; AVX2-NEXT: vpblendw {{.*#+}} ymm3 = ymm4[0],ymm3[1],ymm4[2,3],ymm3[4],ymm4[5,6],ymm3[7],ymm4[8],ymm3[9],ymm4[10,11],ymm3[12],ymm4[13,14],ymm3[15]
; AVX2-NEXT: vmovdqa {{.*#+}} ymm4 = <u,0,0,u,1,1,u,2>
-; AVX2-NEXT: vpermd %ymm3, %ymm4, %ymm4
+; AVX2-NEXT: vpermd %ymm2, %ymm4, %ymm4
; AVX2-NEXT: vmovdqa {{.*#+}} ymm5 = [255,255,255,255,0,0,255,255,255,255,0,0,255,255,255,255,0,0,255,255,255,255,0,0,255,255,255,255,0,0,255,255]
-; AVX2-NEXT: vpblendvb %ymm5, %ymm2, %ymm4, %ymm2
+; AVX2-NEXT: vpblendvb %ymm5, %ymm3, %ymm4, %ymm3
; AVX2-NEXT: vpunpckhwd {{.*#+}} xmm0 = xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[4,5,10,11,10,11,8,9,8,9,14,15,12,13,14,15]
-; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm3[2,2,3,3]
+; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm2[2,2,3,3]
; AVX2-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2,3],xmm1[4],xmm0[5,6],xmm1[7]
; AVX2-NEXT: vmovdqu %xmm0, 32(%rdi)
-; AVX2-NEXT: vmovdqu %ymm2, (%rdi)
+; AVX2-NEXT: vmovdqu %ymm3, (%rdi)
; AVX2-NEXT: vzeroupper
; AVX2-NEXT: retq
;
More information about the llvm-commits
mailing list