[llvm] 3382119 - [X86] combineConcatVectorOps - convert ADD/SUB/MUL concatenation to use combineConcatVectorOps recursion
Simon Pilgrim via llvm-commits
llvm-commits at lists.llvm.org
Mon Mar 10 09:16:52 PDT 2025
Author: Simon Pilgrim
Date: 2025-03-10T16:15:36Z
New Revision: 338211929ce8626aa31b542e79cb5eb7722d0aa4
URL: https://github.com/llvm/llvm-project/commit/338211929ce8626aa31b542e79cb5eb7722d0aa4
DIFF: https://github.com/llvm/llvm-project/commit/338211929ce8626aa31b542e79cb5eb7722d0aa4.diff
LOG: [X86] combineConcatVectorOps - convert ADD/SUB/MUL concatenation to use combineConcatVectorOps recursion
Only concatenate ADD/SUB/MUL nodes if at least one operand is beneficial to concatenate
Added:
Modified:
llvm/lib/Target/X86/X86ISelLowering.cpp
llvm/test/CodeGen/X86/vector-shuffle-combining-avx2.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index c3fae7d90348c..aec071f038dc6 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -58388,9 +58388,12 @@ static SDValue combineConcatVectorOps(const SDLoc &DL, MVT VT,
if (!IsSplat && ((VT.is256BitVector() && Subtarget.hasInt256()) ||
(VT.is512BitVector() && Subtarget.useAVX512Regs() &&
(EltSizeInBits >= 32 || Subtarget.useBWIRegs())))) {
- return DAG.getNode(Op0.getOpcode(), DL, VT,
- ConcatSubOperand(VT, Ops, 0),
- ConcatSubOperand(VT, Ops, 1));
+ SDValue Concat0 = CombineSubOperand(VT, Ops, 0);
+ SDValue Concat1 = CombineSubOperand(VT, Ops, 1);
+ if (Concat0 || Concat1)
+ return DAG.getNode(Op0.getOpcode(), DL, VT,
+ Concat0 ? Concat0 : ConcatSubOperand(VT, Ops, 0),
+ Concat1 ? Concat1 : ConcatSubOperand(VT, Ops, 1));
}
break;
// Due to VADD, VSUB, VMUL can executed on more ports than VINSERT and
diff --git a/llvm/test/CodeGen/X86/vector-shuffle-combining-avx2.ll b/llvm/test/CodeGen/X86/vector-shuffle-combining-avx2.ll
index 915c186de9ceb..4bf8663b9ee09 100644
--- a/llvm/test/CodeGen/X86/vector-shuffle-combining-avx2.ll
+++ b/llvm/test/CodeGen/X86/vector-shuffle-combining-avx2.ll
@@ -775,15 +775,13 @@ define <32 x i8> @combine_pshufb_pshufb_or_pshufb(<32 x i8> %a0) {
ret <32 x i8> %4
}
-; TODO: Not beneficial to concatenate both inputs just to create a 256-bit vpaddb
+; Not beneficial to concatenate both inputs just to create a 256-bit vpaddb
define <32 x i8> @concat_add_unnecessary(<16 x i8> %a0, <16 x i8> noundef %a1, <16 x i8> %a2) nounwind {
; CHECK-LABEL: concat_add_unnecessary:
; CHECK: # %bb.0:
-; CHECK-NEXT: # kill: def $xmm1 killed $xmm1 def $ymm1
-; CHECK-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0
-; CHECK-NEXT: vinserti128 $1, %xmm2, %ymm1, %ymm1
-; CHECK-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0
-; CHECK-NEXT: vpaddb %ymm1, %ymm0, %ymm0
+; CHECK-NEXT: vpaddb %xmm1, %xmm0, %xmm1
+; CHECK-NEXT: vpaddb %xmm2, %xmm0, %xmm0
+; CHECK-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0
; CHECK-NEXT: ret{{[l|q]}}
%lo = add <16 x i8> %a0, %a1
%hi = add <16 x i8> %a0, %a2
@@ -791,15 +789,13 @@ define <32 x i8> @concat_add_unnecessary(<16 x i8> %a0, <16 x i8> noundef %a1, <
ret <32 x i8> %res
}
-; TODO: Not beneficial to concatenate both inputs just to create a 256-bit vpmullw
+; Not beneficial to concatenate both inputs just to create a 256-bit vpmullw
define <16 x i16> @concat_mul_unnecessary(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> %a2) nounwind {
; CHECK-LABEL: concat_mul_unnecessary:
; CHECK: # %bb.0:
-; CHECK-NEXT: # kill: def $xmm1 killed $xmm1 def $ymm1
-; CHECK-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0
-; CHECK-NEXT: vinserti128 $1, %xmm2, %ymm1, %ymm1
-; CHECK-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0
-; CHECK-NEXT: vpmullw %ymm1, %ymm0, %ymm0
+; CHECK-NEXT: vpmullw %xmm1, %xmm0, %xmm1
+; CHECK-NEXT: vpmullw %xmm2, %xmm0, %xmm0
+; CHECK-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0
; CHECK-NEXT: ret{{[l|q]}}
%lo = mul <8 x i16> %a0, %a1
%hi = mul <8 x i16> %a0, %a2
More information about the llvm-commits
mailing list