[llvm] c4eec9e - [X86] combineConcatVectorOps - add concatenation handling for consecutive extracts of upper subvectors (#132389)
via llvm-commits
llvm-commits at lists.llvm.org
Fri Mar 21 07:22:53 PDT 2025
Author: Simon Pilgrim
Date: 2025-03-21T14:22:49Z
New Revision: c4eec9eb52c604c1d54d86181bcce92f3cbc61ec
URL: https://github.com/llvm/llvm-project/commit/c4eec9eb52c604c1d54d86181bcce92f3cbc61ec
DIFF: https://github.com/llvm/llvm-project/commit/c4eec9eb52c604c1d54d86181bcce92f3cbc61ec.diff
LOG: [X86] combineConcatVectorOps - add concatenation handling for consecutive extracts of upper subvectors (#132389)
We already fold concat(extract_subvector(x,0), extract_subvector(x,numsubelts)) -> (wider lower half) extract_subvector(x,0)
This patch extends this handling for the concat(extract_subvector(x,c), extract_subvector(x,c+numsubelts)) -> (wider upper half) extract_subvector(x,c) case as well
Added:
Modified:
llvm/lib/Target/X86/X86ISelLowering.cpp
llvm/test/CodeGen/X86/matrix-multiply.ll
llvm/test/CodeGen/X86/vector-shuffle-combining-avx512f.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index f6b5d4af5ba4e..4f4254ce6fb0e 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -57955,13 +57955,19 @@ static SDValue combineConcatVectorOps(const SDLoc &DL, MVT VT,
DAG.getBitcast(VT, Src1.getOperand(0)),
DAG.getTargetConstant(0x31, DL, MVT::i8));
}
- // concat(extract_subvector(x,lo), extract_subvector(x,hi)) -> x.
+ // Widen extract_subvector
+ // concat(extract_subvector(x,lo), extract_subvector(x,hi))
+ // --> extract_subvector(x,lo)
+ unsigned NumSubElts0 = Src0.getValueType().getVectorNumElements();
if (Src0.getOperand(0) == Src1.getOperand(0) &&
- Src0.getConstantOperandAPInt(1) == 0 &&
+ (Src0.getConstantOperandAPInt(1) == 0 ||
+ Src0.getConstantOperandAPInt(1) == (NumSrcElts0 / 2)) &&
Src1.getConstantOperandAPInt(1) ==
- Src0.getValueType().getVectorNumElements()) {
- return DAG.getBitcast(VT, extractSubVector(Src0.getOperand(0), 0, DAG,
- DL, VT.getSizeInBits()));
+ (Src0.getConstantOperandAPInt(1) + NumSubElts0)) {
+ return DAG.getBitcast(VT,
+ extractSubVector(Src0.getOperand(0),
+ Src0.getConstantOperandVal(1),
+ DAG, DL, VT.getSizeInBits()));
}
}
}
diff --git a/llvm/test/CodeGen/X86/matrix-multiply.ll b/llvm/test/CodeGen/X86/matrix-multiply.ll
index 9763893d3daa2..9b8816bd11f70 100644
--- a/llvm/test/CodeGen/X86/matrix-multiply.ll
+++ b/llvm/test/CodeGen/X86/matrix-multiply.ll
@@ -1036,10 +1036,6 @@ define <16 x float> @test_mul4x4_f32(<16 x float> %a0, <16 x float> %a1) nounwin
;
; AVX512-LABEL: test_mul4x4_f32:
; AVX512: # %bb.0: # %entry
-; AVX512-NEXT: vextractf32x4 $2, %zmm1, %xmm2
-; AVX512-NEXT: vextractf32x4 $3, %zmm1, %xmm3
-; AVX512-NEXT: vinsertf128 $1, %xmm3, %ymm2, %ymm2
-; AVX512-NEXT: vinsertf64x4 $1, %ymm2, %zmm1, %zmm1
; AVX512-NEXT: vshufps {{.*#+}} zmm2 = zmm1[1,1,1,1,5,5,5,5,9,9,9,9,13,13,13,13]
; AVX512-NEXT: vshuff64x2 {{.*#+}} zmm3 = zmm0[2,3,2,3,2,3,2,3]
; AVX512-NEXT: vmulps %zmm2, %zmm3, %zmm2
diff --git a/llvm/test/CodeGen/X86/vector-shuffle-combining-avx512f.ll b/llvm/test/CodeGen/X86/vector-shuffle-combining-avx512f.ll
index a28eba39685cb..400a3e835307f 100644
--- a/llvm/test/CodeGen/X86/vector-shuffle-combining-avx512f.ll
+++ b/llvm/test/CodeGen/X86/vector-shuffle-combining-avx512f.ll
@@ -997,15 +997,12 @@ define <8 x double> @concat_vpermilvar_v8f64_v2f64(<2 x double> %a0, <2 x double
;
; X64-LABEL: concat_vpermilvar_v8f64_v2f64:
; X64: # %bb.0:
+; X64-NEXT: # kill: def $xmm2 killed $xmm2 def $ymm2
; X64-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0
-; X64-NEXT: vextractf32x4 $2, %zmm4, %xmm5
-; X64-NEXT: vextractf32x4 $3, %zmm4, %xmm6
-; X64-NEXT: vpermilpd %xmm5, %xmm2, %xmm2
-; X64-NEXT: vpermilpd %xmm6, %xmm3, %xmm3
; X64-NEXT: vinsertf128 $1, %xmm3, %ymm2, %ymm2
; X64-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
-; X64-NEXT: vpermilpd %ymm4, %ymm0, %ymm0
; X64-NEXT: vinsertf64x4 $1, %ymm2, %zmm0, %zmm0
+; X64-NEXT: vpermilpd %zmm4, %zmm0, %zmm0
; X64-NEXT: retq
%m0 = shufflevector <8 x i64> %m, <8 x i64> poison, <2 x i32> <i32 0, i32 1>
%m1 = shufflevector <8 x i64> %m, <8 x i64> poison, <2 x i32> <i32 2, i32 3>
More information about the llvm-commits
mailing list