[llvm] d43ec97 - [X86] combineConcatVectorOps - IsConcatFree - peek through bitcasts to find inplace subvectors.
Simon Pilgrim via llvm-commits
llvm-commits at lists.llvm.org
Thu Jul 4 06:32:56 PDT 2024
Author: Simon Pilgrim
Date: 2024-07-04T14:32:39+01:00
New Revision: d43ec97de081755990264049eba09cb7c83cb321
URL: https://github.com/llvm/llvm-project/commit/d43ec97de081755990264049eba09cb7c83cb321
DIFF: https://github.com/llvm/llvm-project/commit/d43ec97de081755990264049eba09cb7c83cb321.diff
LOG: [X86] combineConcatVectorOps - IsConcatFree - peek through bitcasts to find inplace subvectors.
The EXTRACT_SUBVECTOR nodes don't have to be the same type, they just need to be at the correct bit offsets when concatenated back together.
Added:
Modified:
llvm/lib/Target/X86/X86ISelLowering.cpp
llvm/test/CodeGen/X86/vselect-avx.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index de26ce2853c5b..e03edf92cc478 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -56041,18 +56041,19 @@ static SDValue combineConcatVectorOps(const SDLoc &DL, MVT VT,
};
auto IsConcatFree = [](MVT VT, ArrayRef<SDValue> SubOps, unsigned Op) {
bool AllConstants = true;
- bool AllSubVectors = true;
+ bool AllSubs = true;
+ unsigned VecSize = VT.getSizeInBits();
for (unsigned I = 0, E = SubOps.size(); I != E; ++I) {
- SDValue Sub = SubOps[I].getOperand(Op);
- unsigned NumSubElts = Sub.getValueType().getVectorNumElements();
- SDValue BC = peekThroughBitcasts(Sub);
+ SDValue BC = peekThroughBitcasts(SubOps[I].getOperand(Op));
+ unsigned SubSize = BC.getValueSizeInBits();
+ unsigned EltSize = BC.getScalarValueSizeInBits();
AllConstants &= ISD::isBuildVectorOfConstantSDNodes(BC.getNode()) ||
ISD::isBuildVectorOfConstantFPSDNodes(BC.getNode());
- AllSubVectors &= Sub.getOpcode() == ISD::EXTRACT_SUBVECTOR &&
- Sub.getOperand(0).getValueType() == VT &&
- Sub.getConstantOperandAPInt(1) == (I * NumSubElts);
+ AllSubs &= BC.getOpcode() == ISD::EXTRACT_SUBVECTOR &&
+ BC.getOperand(0).getValueSizeInBits() == VecSize &&
+ (BC.getConstantOperandVal(1) * EltSize) == (I * SubSize);
}
- return AllConstants || AllSubVectors;
+ return AllConstants || AllSubs;
};
switch (Op0.getOpcode()) {
diff --git a/llvm/test/CodeGen/X86/vselect-avx.ll b/llvm/test/CodeGen/X86/vselect-avx.ll
index bd26948766a56..364390a4a60e5 100644
--- a/llvm/test/CodeGen/X86/vselect-avx.ll
+++ b/llvm/test/CodeGen/X86/vselect-avx.ll
@@ -259,7 +259,7 @@ define void @blendv_split(ptr %p, <8 x i32> %cond, <8 x i32> %a, <8 x i32> %x, <
ret void
}
-; TODO: Concatenate 128-bit pblendvb back together on AVX2+ targets (hidden by SSE __m128i bitcasts)
+; Concatenate 128-bit pblendvb back together on AVX2+ targets (hidden by SSE __m128i bitcasts)
define <4 x i64> @vselect_concat_split_v16i8(<4 x i64> %a, <4 x i64> %b, <4 x i64> %c, <4 x i64> %d) {
; AVX1-LABEL: vselect_concat_split_v16i8:
; AVX1: ## %bb.0:
@@ -277,24 +277,13 @@ define <4 x i64> @vselect_concat_split_v16i8(<4 x i64> %a, <4 x i64> %b, <4 x i6
; AVX2-LABEL: vselect_concat_split_v16i8:
; AVX2: ## %bb.0:
; AVX2-NEXT: vpcmpgtb %ymm2, %ymm3, %ymm2
-; AVX2-NEXT: vpblendvb %xmm2, %xmm1, %xmm0, %xmm3
-; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm0
-; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm1
-; AVX2-NEXT: vextracti128 $1, %ymm2, %xmm2
-; AVX2-NEXT: vpblendvb %xmm2, %xmm1, %xmm0, %xmm0
-; AVX2-NEXT: vinserti128 $1, %xmm0, %ymm3, %ymm0
+; AVX2-NEXT: vpblendvb %ymm2, %ymm1, %ymm0, %ymm0
; AVX2-NEXT: retq
;
; AVX512-LABEL: vselect_concat_split_v16i8:
; AVX512: ## %bb.0:
; AVX512-NEXT: vpcmpgtb %ymm2, %ymm3, %ymm2
-; AVX512-NEXT: vextracti128 $1, %ymm2, %xmm3
-; AVX512-NEXT: vextracti128 $1, %ymm1, %xmm4
-; AVX512-NEXT: ## kill: def $xmm1 killed $xmm1 killed $ymm1 def $ymm1
-; AVX512-NEXT: vpternlogq $226, %xmm0, %xmm2, %xmm1
-; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm0
-; AVX512-NEXT: vpternlogq $226, %xmm0, %xmm3, %xmm4
-; AVX512-NEXT: vinserti128 $1, %xmm4, %ymm1, %ymm0
+; AVX512-NEXT: vpternlogq $216, %ymm2, %ymm1, %ymm0
; AVX512-NEXT: retq
%a.bc = bitcast <4 x i64> %a to <32 x i8>
%b.bc = bitcast <4 x i64> %b to <32 x i8>
More information about the llvm-commits
mailing list