[llvm] a03f064 - [X86] combineX86ShufflesRecursively - peek through one use bitcasts to find additional (free) extract_subvector nodes
Simon Pilgrim via llvm-commits
llvm-commits at lists.llvm.org
Thu Feb 20 05:52:26 PST 2025
Author: Simon Pilgrim
Date: 2025-02-20T13:49:49Z
New Revision: a03f064b60f0ccfe9bf31046bc944087ed2fad6c
URL: https://github.com/llvm/llvm-project/commit/a03f064b60f0ccfe9bf31046bc944087ed2fad6c
DIFF: https://github.com/llvm/llvm-project/commit/a03f064b60f0ccfe9bf31046bc944087ed2fad6c.diff
LOG: [X86] combineX86ShufflesRecursively - peek through one use bitcasts to find additional (free) extract_subvector nodes
Added:
Modified:
llvm/lib/Target/X86/X86ISelLowering.cpp
llvm/test/CodeGen/X86/vector-replicaton-i1-mask.ll
llvm/test/CodeGen/X86/zero_extend_vector_inreg_of_broadcast.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index b69674d9be4e4..1c9d43ce4c062 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -41084,12 +41084,17 @@ static SDValue combineX86ShufflesRecursively(
}
}
- // Peek through any free extract_subvector nodes back to root size.
- for (SDValue &Op : Ops)
- while (Op.getOpcode() == ISD::EXTRACT_SUBVECTOR &&
- (RootSizeInBits % Op.getOperand(0).getValueSizeInBits()) == 0 &&
- isNullConstant(Op.getOperand(1)))
- Op = Op.getOperand(0);
+ // Peek through any free bitcasts/extract_subvector nodes back to root size.
+ for (SDValue &Op : Ops){
+ SDValue BC = Op;
+ if (BC.getOpcode() == ISD::BITCAST && BC.hasOneUse())
+ BC = peekThroughOneUseBitcasts(BC);
+ while (BC.getOpcode() == ISD::EXTRACT_SUBVECTOR &&
+ (RootSizeInBits % BC.getOperand(0).getValueSizeInBits()) == 0 &&
+ isNullConstant(BC.getOperand(1))) {
+ Op = BC = BC.getOperand(0);
+ }
+ }
// Remove unused/repeated shuffle source ops.
resolveTargetShuffleInputsAndMask(Ops, Mask);
diff --git a/llvm/test/CodeGen/X86/vector-replicaton-i1-mask.ll b/llvm/test/CodeGen/X86/vector-replicaton-i1-mask.ll
index 717d1e447e165..a768baae97add 100644
--- a/llvm/test/CodeGen/X86/vector-replicaton-i1-mask.ll
+++ b/llvm/test/CodeGen/X86/vector-replicaton-i1-mask.ll
@@ -12867,46 +12867,25 @@ define void @mask_replication_factor8_vf8(ptr %in.maskvec, ptr %in.vec, ptr %out
; AVX512DQ-NEXT: vzeroupper
; AVX512DQ-NEXT: retq
;
-; AVX512BW-ONLY-LABEL: mask_replication_factor8_vf8:
-; AVX512BW-ONLY: # %bb.0:
-; AVX512BW-ONLY-NEXT: kmovq (%rdi), %k0
-; AVX512BW-ONLY-NEXT: vpmovm2b %k0, %zmm0
-; AVX512BW-ONLY-NEXT: vpbroadcastq %xmm0, %zmm0
-; AVX512BW-ONLY-NEXT: vpshufb {{.*#+}} zmm0 = zmm0[0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1,18,18,18,18,18,18,18,18,19,19,19,19,19,19,19,19,36,36,36,36,36,36,36,36,37,37,37,37,37,37,37,37,54,54,54,54,54,54,54,54,55,55,55,55,55,55,55,55]
-; AVX512BW-ONLY-NEXT: vpmovb2m %zmm0, %k1
-; AVX512BW-ONLY-NEXT: kshiftrq $16, %k1, %k2
-; AVX512BW-ONLY-NEXT: vmovdqa32 64(%rsi), %zmm0 {%k2} {z}
-; AVX512BW-ONLY-NEXT: vmovdqa32 (%rsi), %zmm1 {%k1} {z}
-; AVX512BW-ONLY-NEXT: kshiftrq $48, %k1, %k2
-; AVX512BW-ONLY-NEXT: vmovdqa32 192(%rsi), %zmm2 {%k2} {z}
-; AVX512BW-ONLY-NEXT: kshiftrq $32, %k1, %k1
-; AVX512BW-ONLY-NEXT: vmovdqa32 128(%rsi), %zmm3 {%k1} {z}
-; AVX512BW-ONLY-NEXT: vmovdqa64 %zmm3, 128(%rdx)
-; AVX512BW-ONLY-NEXT: vmovdqa64 %zmm2, 192(%rdx)
-; AVX512BW-ONLY-NEXT: vmovdqa64 %zmm1, (%rdx)
-; AVX512BW-ONLY-NEXT: vmovdqa64 %zmm0, 64(%rdx)
-; AVX512BW-ONLY-NEXT: vzeroupper
-; AVX512BW-ONLY-NEXT: retq
-;
-; AVX512VBMI-ONLY-LABEL: mask_replication_factor8_vf8:
-; AVX512VBMI-ONLY: # %bb.0:
-; AVX512VBMI-ONLY-NEXT: kmovq (%rdi), %k0
-; AVX512VBMI-ONLY-NEXT: vpmovm2b %k0, %zmm0
-; AVX512VBMI-ONLY-NEXT: vpmovsxbq %xmm0, %zmm0
-; AVX512VBMI-ONLY-NEXT: vpmovb2m %zmm0, %k1
-; AVX512VBMI-ONLY-NEXT: kshiftrq $16, %k1, %k2
-; AVX512VBMI-ONLY-NEXT: vmovdqa32 64(%rsi), %zmm0 {%k2} {z}
-; AVX512VBMI-ONLY-NEXT: vmovdqa32 (%rsi), %zmm1 {%k1} {z}
-; AVX512VBMI-ONLY-NEXT: kshiftrq $48, %k1, %k2
-; AVX512VBMI-ONLY-NEXT: vmovdqa32 192(%rsi), %zmm2 {%k2} {z}
-; AVX512VBMI-ONLY-NEXT: kshiftrq $32, %k1, %k1
-; AVX512VBMI-ONLY-NEXT: vmovdqa32 128(%rsi), %zmm3 {%k1} {z}
-; AVX512VBMI-ONLY-NEXT: vmovdqa64 %zmm3, 128(%rdx)
-; AVX512VBMI-ONLY-NEXT: vmovdqa64 %zmm2, 192(%rdx)
-; AVX512VBMI-ONLY-NEXT: vmovdqa64 %zmm1, (%rdx)
-; AVX512VBMI-ONLY-NEXT: vmovdqa64 %zmm0, 64(%rdx)
-; AVX512VBMI-ONLY-NEXT: vzeroupper
-; AVX512VBMI-ONLY-NEXT: retq
+; AVX512BW-LABEL: mask_replication_factor8_vf8:
+; AVX512BW: # %bb.0:
+; AVX512BW-NEXT: kmovq (%rdi), %k0
+; AVX512BW-NEXT: vpmovm2b %k0, %zmm0
+; AVX512BW-NEXT: vpmovsxbq %xmm0, %zmm0
+; AVX512BW-NEXT: vpmovb2m %zmm0, %k1
+; AVX512BW-NEXT: kshiftrq $16, %k1, %k2
+; AVX512BW-NEXT: vmovdqa32 64(%rsi), %zmm0 {%k2} {z}
+; AVX512BW-NEXT: vmovdqa32 (%rsi), %zmm1 {%k1} {z}
+; AVX512BW-NEXT: kshiftrq $48, %k1, %k2
+; AVX512BW-NEXT: vmovdqa32 192(%rsi), %zmm2 {%k2} {z}
+; AVX512BW-NEXT: kshiftrq $32, %k1, %k1
+; AVX512BW-NEXT: vmovdqa32 128(%rsi), %zmm3 {%k1} {z}
+; AVX512BW-NEXT: vmovdqa64 %zmm3, 128(%rdx)
+; AVX512BW-NEXT: vmovdqa64 %zmm2, 192(%rdx)
+; AVX512BW-NEXT: vmovdqa64 %zmm1, (%rdx)
+; AVX512BW-NEXT: vmovdqa64 %zmm0, 64(%rdx)
+; AVX512BW-NEXT: vzeroupper
+; AVX512BW-NEXT: retq
%src.mask.padded = load <64 x i1>, ptr %in.maskvec, align 64
%src.mask = shufflevector <64 x i1> %src.mask.padded, <64 x i1> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
%tgt.mask = shufflevector <8 x i1> %src.mask, <8 x i1> poison, <64 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>
diff --git a/llvm/test/CodeGen/X86/zero_extend_vector_inreg_of_broadcast.ll b/llvm/test/CodeGen/X86/zero_extend_vector_inreg_of_broadcast.ll
index 29d80e16bb26e..42521b809b102 100644
--- a/llvm/test/CodeGen/X86/zero_extend_vector_inreg_of_broadcast.ll
+++ b/llvm/test/CodeGen/X86/zero_extend_vector_inreg_of_broadcast.ll
@@ -5117,15 +5117,11 @@ define void @vec384_i32_widen_to_i128_factor4_broadcast_to_v3i128_factor3(ptr %i
; AVX512BW-FAST-LABEL: vec384_i32_widen_to_i128_factor4_broadcast_to_v3i128_factor3:
; AVX512BW-FAST: # %bb.0:
; AVX512BW-FAST-NEXT: vmovdqa64 (%rdi), %zmm0
-; AVX512BW-FAST-NEXT: vpmovsxbd {{.*#+}} ymm1 = [0,13,14,15,0,1,2,3]
; AVX512BW-FAST-NEXT: vpaddb (%rsi), %zmm0, %zmm0
-; AVX512BW-FAST-NEXT: vpermd %zmm0, %zmm1, %zmm1
-; AVX512BW-FAST-NEXT: vpxor %xmm2, %xmm2, %xmm2
-; AVX512BW-FAST-NEXT: vpblendd {{.*#+}} ymm1 = ymm1[0,1,2,3,4],ymm2[5,6,7]
-; AVX512BW-FAST-NEXT: vpxor %xmm2, %xmm2, %xmm2
-; AVX512BW-FAST-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm2[2,3,4,5,6,7]
-; AVX512BW-FAST-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0
-; AVX512BW-FAST-NEXT: vpaddb (%rdx), %zmm0, %zmm0
+; AVX512BW-FAST-NEXT: vpxor %xmm1, %xmm1, %xmm1
+; AVX512BW-FAST-NEXT: vpmovsxbd {{.*#+}} zmm2 = [0,13,14,15,0,21,22,23,0,25,26,27,28,29,30,31]
+; AVX512BW-FAST-NEXT: vpermi2d %zmm1, %zmm0, %zmm2
+; AVX512BW-FAST-NEXT: vpaddb (%rdx), %zmm2, %zmm0
; AVX512BW-FAST-NEXT: vmovdqa64 %zmm0, (%rcx)
; AVX512BW-FAST-NEXT: vzeroupper
; AVX512BW-FAST-NEXT: retq
More information about the llvm-commits
mailing list