[llvm] 20cdffb - [X86] combineConcatVectorOps - extend VPERMILPD handling to support 512-bit types
Simon Pilgrim via llvm-commits
llvm-commits at lists.llvm.org
Mon Mar 17 12:15:49 PDT 2025
Author: Simon Pilgrim
Date: 2025-03-17T19:15:37Z
New Revision: 20cdffbd2761b2cb33e0968a78de9a427d0ef555
URL: https://github.com/llvm/llvm-project/commit/20cdffbd2761b2cb33e0968a78de9a427d0ef555
DIFF: https://github.com/llvm/llvm-project/commit/20cdffbd2761b2cb33e0968a78de9a427d0ef555.diff
LOG: [X86] combineConcatVectorOps - extend VPERMILPD handling to support 512-bit types
Added:
Modified:
llvm/lib/Target/X86/X86ISelLowering.cpp
llvm/test/CodeGen/X86/vector-shuffle-512-v8.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index d3e6bff302600..ee2c72bc9cced 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -58113,11 +58113,14 @@ static SDValue combineConcatVectorOps(const SDLoc &DL, MVT VT,
DAG.getNode(X86ISD::VPERMILPI, DL, FloatVT, Res, Op0.getOperand(1));
return DAG.getBitcast(VT, Res);
}
- // TODO: v8f64 VPERMILPI concatenation.
- if (!IsSplat && NumOps == 2 && VT == MVT::v4f64) {
- uint64_t Idx0 = Ops[0].getConstantOperandVal(1);
- uint64_t Idx1 = Ops[1].getConstantOperandVal(1);
- uint64_t Idx = ((Idx1 & 3) << 2) | (Idx0 & 3);
+ if (!IsSplat && (VT == MVT::v4f64 || VT == MVT::v8f64)) {
+ unsigned NumSubElts = Op0.getValueType().getVectorNumElements();
+ uint64_t Mask = (1ULL << NumSubElts) - 1;
+ uint64_t Idx = 0;
+ for (unsigned I = 0; I != NumOps; ++I) {
+ uint64_t SubIdx = Ops[I].getConstantOperandVal(1);
+ Idx |= (SubIdx & Mask) << (I * NumSubElts);
+ }
return DAG.getNode(X86ISD::VPERMILPI, DL, VT,
ConcatSubOperand(VT, Ops, 0),
DAG.getTargetConstant(Idx, DL, MVT::i8));
diff --git a/llvm/test/CodeGen/X86/vector-shuffle-512-v8.ll b/llvm/test/CodeGen/X86/vector-shuffle-512-v8.ll
index 37b2d1307b461..fce98cd470bcd 100644
--- a/llvm/test/CodeGen/X86/vector-shuffle-512-v8.ll
+++ b/llvm/test/CodeGen/X86/vector-shuffle-512-v8.ll
@@ -1678,11 +1678,10 @@ define <8 x double> @concat_shuffle_v8f64_v2f64_10325476(<2 x double> %a0, <2 x
; AVX512F: # %bb.0:
; AVX512F-NEXT: # kill: def $xmm2 killed $xmm2 def $ymm2
; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0
+; AVX512F-NEXT: vinsertf128 $1, %xmm3, %ymm2, %ymm2
; AVX512F-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
-; AVX512F-NEXT: vshufpd {{.*#+}} ymm0 = ymm0[1,0,3,2]
-; AVX512F-NEXT: vinsertf128 $1, %xmm3, %ymm2, %ymm1
-; AVX512F-NEXT: vshufpd {{.*#+}} ymm1 = ymm1[1,0,3,2]
-; AVX512F-NEXT: vinsertf64x4 $1, %ymm1, %zmm0, %zmm0
+; AVX512F-NEXT: vinsertf64x4 $1, %ymm2, %zmm0, %zmm0
+; AVX512F-NEXT: vshufpd {{.*#+}} zmm0 = zmm0[1,0,3,2,5,4,7,6]
; AVX512F-NEXT: retq
;
; AVX512F-32-LABEL: concat_shuffle_v8f64_v2f64_10325476:
@@ -1691,10 +1690,9 @@ define <8 x double> @concat_shuffle_v8f64_v2f64_10325476(<2 x double> %a0, <2 x
; AVX512F-32-NEXT: # kill: def $xmm2 killed $xmm2 def $ymm2
; AVX512F-32-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0
; AVX512F-32-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
-; AVX512F-32-NEXT: vshufpd {{.*#+}} ymm0 = ymm0[1,0,3,2]
; AVX512F-32-NEXT: vinsertf128 $1, {{[0-9]+}}(%esp), %ymm2, %ymm1
-; AVX512F-32-NEXT: vshufpd {{.*#+}} ymm1 = ymm1[1,0,3,2]
; AVX512F-32-NEXT: vinsertf64x4 $1, %ymm1, %zmm0, %zmm0
+; AVX512F-32-NEXT: vshufpd {{.*#+}} zmm0 = zmm0[1,0,3,2,5,4,7,6]
; AVX512F-32-NEXT: addl $12, %esp
; AVX512F-32-NEXT: retl
%s0 = shufflevector <2 x double> %a0, <2 x double> poison, <2 x i32> <i32 1, i32 0>
More information about the llvm-commits
mailing list