[llvm] 3382119 - [X86] combineConcatVectorOps - convert ADD/SUB/MUL concatenation to use combineConcatVectorOps recursion

Mon Mar 10 09:16:52 PDT 2025

Author: Simon Pilgrim
Date: 2025-03-10T16:15:36Z
New Revision: 338211929ce8626aa31b542e79cb5eb7722d0aa4

URL: https://github.com/llvm/llvm-project/commit/338211929ce8626aa31b542e79cb5eb7722d0aa4
DIFF: https://github.com/llvm/llvm-project/commit/338211929ce8626aa31b542e79cb5eb7722d0aa4.diff

LOG: [X86] combineConcatVectorOps - convert ADD/SUB/MUL concatenation to use combineConcatVectorOps recursion

Only concatenate ADD/SUB/MUL nodes if at least one operand is beneficial to concatenate

Added: 
    

Modified: 
    llvm/lib/Target/X86/X86ISelLowering.cpp
    llvm/test/CodeGen/X86/vector-shuffle-combining-avx2.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index c3fae7d90348c..aec071f038dc6 100644

--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -58388,9 +58388,12 @@ static SDValue combineConcatVectorOps(const SDLoc &DL, MVT VT,
       if (!IsSplat && ((VT.is256BitVector() && Subtarget.hasInt256()) ||
                        (VT.is512BitVector() && Subtarget.useAVX512Regs() &&
                         (EltSizeInBits >= 32 || Subtarget.useBWIRegs())))) {
-        return DAG.getNode(Op0.getOpcode(), DL, VT,
-                           ConcatSubOperand(VT, Ops, 0),
-                           ConcatSubOperand(VT, Ops, 1));
+        SDValue Concat0 = CombineSubOperand(VT, Ops, 0);
+        SDValue Concat1 = CombineSubOperand(VT, Ops, 1);
+        if (Concat0 || Concat1)
+          return DAG.getNode(Op0.getOpcode(), DL, VT,
+                             Concat0 ? Concat0 : ConcatSubOperand(VT, Ops, 0),
+                             Concat1 ? Concat1 : ConcatSubOperand(VT, Ops, 1));
       }
       break;
     // Due to VADD, VSUB, VMUL can executed on more ports than VINSERT and

diff  --git a/llvm/test/CodeGen/X86/vector-shuffle-combining-avx2.ll b/llvm/test/CodeGen/X86/vector-shuffle-combining-avx2.ll
index 915c186de9ceb..4bf8663b9ee09 100644
--- a/llvm/test/CodeGen/X86/vector-shuffle-combining-avx2.ll
+++ b/llvm/test/CodeGen/X86/vector-shuffle-combining-avx2.ll
@@ -775,15 +775,13 @@ define <32 x i8> @combine_pshufb_pshufb_or_pshufb(<32 x i8> %a0) {
   ret <32 x i8> %4
 }
 
-; TODO: Not beneficial to concatenate both inputs just to create a 256-bit vpaddb
+; Not beneficial to concatenate both inputs just to create a 256-bit vpaddb
 define <32 x i8> @concat_add_unnecessary(<16 x i8> %a0, <16 x i8> noundef %a1, <16 x i8> %a2) nounwind {
 ; CHECK-LABEL: concat_add_unnecessary:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    # kill: def $xmm1 killed $xmm1 def $ymm1
-; CHECK-NEXT:    # kill: def $xmm0 killed $xmm0 def $ymm0
-; CHECK-NEXT:    vinserti128 $1, %xmm2, %ymm1, %ymm1
-; CHECK-NEXT:    vinserti128 $1, %xmm0, %ymm0, %ymm0
-; CHECK-NEXT:    vpaddb %ymm1, %ymm0, %ymm0
+; CHECK-NEXT:    vpaddb %xmm1, %xmm0, %xmm1
+; CHECK-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
+; CHECK-NEXT:    vinserti128 $1, %xmm0, %ymm1, %ymm0
 ; CHECK-NEXT:    ret{{[l|q]}}
   %lo = add <16 x i8> %a0, %a1
   %hi = add <16 x i8> %a0, %a2
@@ -791,15 +789,13 @@ define <32 x i8> @concat_add_unnecessary(<16 x i8> %a0, <16 x i8> noundef %a1, <
   ret <32 x i8> %res
 }
 
-; TODO: Not beneficial to concatenate both inputs just to create a 256-bit vpmullw
+; Not beneficial to concatenate both inputs just to create a 256-bit vpmullw
 define <16 x i16> @concat_mul_unnecessary(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> %a2) nounwind {
 ; CHECK-LABEL: concat_mul_unnecessary:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    # kill: def $xmm1 killed $xmm1 def $ymm1
-; CHECK-NEXT:    # kill: def $xmm0 killed $xmm0 def $ymm0
-; CHECK-NEXT:    vinserti128 $1, %xmm2, %ymm1, %ymm1
-; CHECK-NEXT:    vinserti128 $1, %xmm0, %ymm0, %ymm0
-; CHECK-NEXT:    vpmullw %ymm1, %ymm0, %ymm0
+; CHECK-NEXT:    vpmullw %xmm1, %xmm0, %xmm1
+; CHECK-NEXT:    vpmullw %xmm2, %xmm0, %xmm0
+; CHECK-NEXT:    vinserti128 $1, %xmm0, %ymm1, %ymm0
 ; CHECK-NEXT:    ret{{[l|q]}}
   %lo = mul <8 x i16> %a0, %a1
   %hi = mul <8 x i16> %a0, %a2