[llvm] [X86] combineConcatVectorOps - add concatenation handling for X86ISD::VPERMILPV nodes (PR #132355)
via llvm-commits
llvm-commits at lists.llvm.org
Fri Mar 21 01:38:21 PDT 2025
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-x86
Author: Simon Pilgrim (RKSimon)
<details>
<summary>Changes</summary>
Concat the nodes if we can merge either of the operands for free.
---
Full diff: https://github.com/llvm/llvm-project/pull/132355.diff
3 Files Affected:
- (modified) llvm/lib/Target/X86/X86ISelLowering.cpp (+11)
- (modified) llvm/test/CodeGen/X86/vector-shuffle-combining-avx.ll (+2-3)
- (modified) llvm/test/CodeGen/X86/vector-shuffle-combining-avx512f.ll (+13-17)
``````````diff
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 8287fb55f22ee..f6b5d4af5ba4e 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -58164,6 +58164,17 @@ static SDValue combineConcatVectorOps(const SDLoc &DL, MVT VT,
DAG.getTargetConstant(Idx, DL, MVT::i8));
}
break;
+ case X86ISD::VPERMILPV:
+ if (!IsSplat && (VT.is256BitVector() ||
+ (VT.is512BitVector() && Subtarget.useAVX512Regs()))) {
+ SDValue Concat0 = CombineSubOperand(VT, Ops, 0);
+ SDValue Concat1 = CombineSubOperand(VT, Ops, 1);
+ if (Concat0 || Concat1)
+ return DAG.getNode(Opcode, DL, VT,
+ Concat0 ? Concat0 : ConcatSubOperand(VT, Ops, 0),
+ Concat1 ? Concat1 : ConcatSubOperand(VT, Ops, 1));
+ }
+ break;
case X86ISD::PSHUFB:
case X86ISD::PSADBW:
case X86ISD::VPMADDUBSW:
diff --git a/llvm/test/CodeGen/X86/vector-shuffle-combining-avx.ll b/llvm/test/CodeGen/X86/vector-shuffle-combining-avx.ll
index e86ebe63e1f69..2df013d0ff3e3 100644
--- a/llvm/test/CodeGen/X86/vector-shuffle-combining-avx.ll
+++ b/llvm/test/CodeGen/X86/vector-shuffle-combining-avx.ll
@@ -674,10 +674,9 @@ define <8 x i32> @concat_self_v8i32(<4 x i32> %x) {
define <4 x double> @concat_vpermilvar_v4f64_v2f64(<2 x double> %a0, <2 x double> %a1, <4 x i64> %m) {
; CHECK-LABEL: concat_vpermilvar_v4f64_v2f64:
; CHECK: # %bb.0:
-; CHECK-NEXT: vextractf128 $1, %ymm2, %xmm3
-; CHECK-NEXT: vpermilpd %xmm2, %xmm0, %xmm0
-; CHECK-NEXT: vpermilpd %xmm3, %xmm1, %xmm1
+; CHECK-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0
; CHECK-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; CHECK-NEXT: vpermilpd %ymm2, %ymm0, %ymm0
; CHECK-NEXT: ret{{[l|q]}}
%m0 = shufflevector <4 x i64> %m, <4 x i64> poison, <2 x i32> <i32 0, i32 1>
%m1 = shufflevector <4 x i64> %m, <4 x i64> poison, <2 x i32> <i32 2, i32 3>
diff --git a/llvm/test/CodeGen/X86/vector-shuffle-combining-avx512f.ll b/llvm/test/CodeGen/X86/vector-shuffle-combining-avx512f.ll
index 6ffb3be1a7bc5..a28eba39685cb 100644
--- a/llvm/test/CodeGen/X86/vector-shuffle-combining-avx512f.ll
+++ b/llvm/test/CodeGen/X86/vector-shuffle-combining-avx512f.ll
@@ -985,29 +985,26 @@ define <8 x double> @concat_vpermilvar_v8f64_v2f64(<2 x double> %a0, <2 x double
; X86-NEXT: movl %esp, %ebp
; X86-NEXT: andl $-64, %esp
; X86-NEXT: subl $64, %esp
-; X86-NEXT: vmovapd 8(%ebp), %xmm3
-; X86-NEXT: vpermilpd 72(%ebp), %xmm0, %xmm0
-; X86-NEXT: vpermilpd 88(%ebp), %xmm1, %xmm1
-; X86-NEXT: vpermilpd 104(%ebp), %xmm2, %xmm2
-; X86-NEXT: vpermilpd 120(%ebp), %xmm3, %xmm3
-; X86-NEXT: vinsertf128 $1, %xmm3, %ymm2, %ymm2
+; X86-NEXT: # kill: def $xmm2 killed $xmm2 def $ymm2
+; X86-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0
; X86-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
-; X86-NEXT: vinsertf64x4 $1, %ymm2, %zmm0, %zmm0
+; X86-NEXT: vinsertf128 $1, 8(%ebp), %ymm2, %ymm1
+; X86-NEXT: vinsertf64x4 $1, %ymm1, %zmm0, %zmm0
+; X86-NEXT: vpermilpd 72(%ebp), %zmm0, %zmm0
; X86-NEXT: movl %ebp, %esp
; X86-NEXT: popl %ebp
; X86-NEXT: retl
;
; X64-LABEL: concat_vpermilvar_v8f64_v2f64:
; X64: # %bb.0:
-; X64-NEXT: vextractf128 $1, %ymm4, %xmm5
-; X64-NEXT: vextractf32x4 $2, %zmm4, %xmm6
-; X64-NEXT: vextractf32x4 $3, %zmm4, %xmm7
-; X64-NEXT: vpermilpd %xmm4, %xmm0, %xmm0
-; X64-NEXT: vpermilpd %xmm5, %xmm1, %xmm1
-; X64-NEXT: vpermilpd %xmm6, %xmm2, %xmm2
-; X64-NEXT: vpermilpd %xmm7, %xmm3, %xmm3
+; X64-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0
+; X64-NEXT: vextractf32x4 $2, %zmm4, %xmm5
+; X64-NEXT: vextractf32x4 $3, %zmm4, %xmm6
+; X64-NEXT: vpermilpd %xmm5, %xmm2, %xmm2
+; X64-NEXT: vpermilpd %xmm6, %xmm3, %xmm3
; X64-NEXT: vinsertf128 $1, %xmm3, %ymm2, %ymm2
; X64-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; X64-NEXT: vpermilpd %ymm4, %ymm0, %ymm0
; X64-NEXT: vinsertf64x4 $1, %ymm2, %zmm0, %zmm0
; X64-NEXT: retq
%m0 = shufflevector <8 x i64> %m, <8 x i64> poison, <2 x i32> <i32 0, i32 1>
@@ -1027,10 +1024,9 @@ define <8 x double> @concat_vpermilvar_v8f64_v2f64(<2 x double> %a0, <2 x double
define <8 x double> @concat_vpermilvar_v8f64_v4f64(<4 x double> %a0, <4 x double> %a1, <8 x i64> %m) nounwind {
; CHECK-LABEL: concat_vpermilvar_v8f64_v4f64:
; CHECK: # %bb.0:
-; CHECK-NEXT: vextractf64x4 $1, %zmm2, %ymm3
-; CHECK-NEXT: vpermilpd %ymm2, %ymm0, %ymm0
-; CHECK-NEXT: vpermilpd %ymm3, %ymm1, %ymm1
+; CHECK-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
; CHECK-NEXT: vinsertf64x4 $1, %ymm1, %zmm0, %zmm0
+; CHECK-NEXT: vpermilpd %zmm2, %zmm0, %zmm0
; CHECK-NEXT: ret{{[l|q]}}
%m0 = shufflevector <8 x i64> %m, <8 x i64> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
%m1 = shufflevector <8 x i64> %m, <8 x i64> poison, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
``````````
</details>
https://github.com/llvm/llvm-project/pull/132355
More information about the llvm-commits
mailing list