[llvm] 140680c - [X86] Add peephole for (add (concat_vectors vpmaddwd, vpmaddwd)) -> vpdpwssd on VNNI targets

Simon Pilgrim via llvm-commits llvm-commits at lists.llvm.org
Fri Dec 6 08:25:28 PST 2024


Author: Simon Pilgrim
Date: 2024-12-06T16:25:14Z
New Revision: 140680c5c8560f03b7a4b3f7db5a7f3c158b938a

URL: https://github.com/llvm/llvm-project/commit/140680c5c8560f03b7a4b3f7db5a7f3c158b938a
DIFF: https://github.com/llvm/llvm-project/commit/140680c5c8560f03b7a4b3f7db5a7f3c158b938a.diff

LOG: [X86] Add peephole for (add (concat_vectors vpmaddwd, vpmaddwd)) -> vpdpwssd on VNNI targets

Cleanup for #118433

Added: 
    

Modified: 
    llvm/lib/Target/X86/X86ISelLowering.cpp
    llvm/test/CodeGen/X86/vpdpwssd.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index ff21aa975033cf..f66a7a80d027ec 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -56847,6 +56847,23 @@ static SDValue combineAdd(SDNode *N, SelectionDAG &DAG,
     }
   }
 
+  // Peephole for 512-bit VPDPBSSD on non-VLX targets.
+  // TODO: Should this be part of matchPMADDWD/matchPMADDWD_2?
+  if (Subtarget.hasVNNI() && VT == MVT::v16i32) {
+    using namespace SDPatternMatch;
+    SDValue Accum, Lo0, Lo1, Hi0, Hi1;
+    if (sd_match(N, m_Add(m_Value(Accum),
+                          m_Node(ISD::CONCAT_VECTORS,
+                                 m_BinOp(X86ISD::VPMADDWD, m_Value(Lo0),
+                                         m_Value(Lo1)),
+                                 m_BinOp(X86ISD::VPMADDWD, m_Value(Hi0),
+                                         m_Value(Hi1)))))) {
+      return DAG.getNode(X86ISD::VPDPWSSD, DL, VT, Accum,
+                         concatSubVectors(Lo0, Hi0, DAG, DL),
+                         concatSubVectors(Lo1, Hi1, DAG, DL));
+    }
+  }
+
   // Fold ADD(ADC(Y,0,W),X) -> ADC(X,Y,W)
   if (Op0.getOpcode() == X86ISD::ADC && Op0->hasOneUse() &&
       X86::isZeroNode(Op0.getOperand(1))) {

diff  --git a/llvm/test/CodeGen/X86/vpdpwssd.ll b/llvm/test/CodeGen/X86/vpdpwssd.ll
index f7cd6f8f1b8961..2ac2b48af4ce70 100644
--- a/llvm/test/CodeGen/X86/vpdpwssd.ll
+++ b/llvm/test/CodeGen/X86/vpdpwssd.ll
@@ -1,6 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=znver4 | FileCheck %s --check-prefixes=CHECK,ZNVER,AVX512BW-VNNI
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=znver5 | FileCheck %s --check-prefixes=CHECK,ZNVER,AVX-VNNI
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=znver4 | FileCheck %s --check-prefixes=CHECK,AVX512VL-VNNI
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=znver5 | FileCheck %s --check-prefixes=CHECK,AVX-VNNI
 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512vnni,+fast-dpwssd | FileCheck %s --check-prefixes=CHECK,AVX512-VNNI
 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512vnni,+avx512vl,+fast-dpwssd | FileCheck %s --check-prefixes=CHECK,AVX512VL-VNNI
 
@@ -14,31 +14,11 @@ define <16 x i32> @vpdpwssd_test(<16 x i32> %0, <16 x i32> %1, <16 x i32> %2) {
 }
 
 define <16 x i32> @vpdpwssd_v16i32_accumulate(<32 x i16> %a0, <32 x i16> %a1, <16 x i32> %a2) {
-; ZNVER-LABEL: vpdpwssd_v16i32_accumulate:
-; ZNVER:       # %bb.0:
-; ZNVER-NEXT:    vpdpwssd %zmm1, %zmm0, %zmm2
-; ZNVER-NEXT:    vmovdqa64 %zmm2, %zmm0
-; ZNVER-NEXT:    retq
-;
-; AVX512-VNNI-LABEL: vpdpwssd_v16i32_accumulate:
-; AVX512-VNNI:       # %bb.0:
-; AVX512-VNNI-NEXT:    vextracti64x4 $1, %zmm1, %ymm3
-; AVX512-VNNI-NEXT:    vextracti64x4 $1, %zmm0, %ymm4
-; AVX512-VNNI-NEXT:    vpmaddwd %ymm3, %ymm4, %ymm3
-; AVX512-VNNI-NEXT:    vpmaddwd %ymm1, %ymm0, %ymm0
-; AVX512-VNNI-NEXT:    vinserti64x4 $1, %ymm3, %zmm0, %zmm0
-; AVX512-VNNI-NEXT:    vpaddd %zmm2, %zmm0, %zmm0
-; AVX512-VNNI-NEXT:    retq
-;
-; AVX512VL-VNNI-LABEL: vpdpwssd_v16i32_accumulate:
-; AVX512VL-VNNI:       # %bb.0:
-; AVX512VL-VNNI-NEXT:    vextracti64x4 $1, %zmm1, %ymm3
-; AVX512VL-VNNI-NEXT:    vextracti64x4 $1, %zmm0, %ymm4
-; AVX512VL-VNNI-NEXT:    vpmaddwd %ymm3, %ymm4, %ymm3
-; AVX512VL-VNNI-NEXT:    vpmaddwd %ymm1, %ymm0, %ymm0
-; AVX512VL-VNNI-NEXT:    vinserti64x4 $1, %ymm3, %zmm0, %zmm0
-; AVX512VL-VNNI-NEXT:    vpaddd %zmm2, %zmm0, %zmm0
-; AVX512VL-VNNI-NEXT:    retq
+; CHECK-LABEL: vpdpwssd_v16i32_accumulate:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vpdpwssd %zmm1, %zmm0, %zmm2
+; CHECK-NEXT:    vmovdqa64 %zmm2, %zmm0
+; CHECK-NEXT:    retq
   %x0 = sext <32 x i16> %a0 to <32 x i32>
   %x1 = sext <32 x i16> %a1 to <32 x i32>
   %m = mul nsw <32 x i32> %x0, %x1
@@ -50,11 +30,11 @@ define <16 x i32> @vpdpwssd_v16i32_accumulate(<32 x i16> %a0, <32 x i16> %a1, <1
 }
 
 define <8 x i32> @vpdpwssd_v8i32_accumulate(<16 x i16> %a0, <16 x i16> %a1, <8 x i32> %a2) {
-; AVX512BW-VNNI-LABEL: vpdpwssd_v8i32_accumulate:
-; AVX512BW-VNNI:       # %bb.0:
-; AVX512BW-VNNI-NEXT:    vpdpwssd %ymm1, %ymm0, %ymm2
-; AVX512BW-VNNI-NEXT:    vmovdqa %ymm2, %ymm0
-; AVX512BW-VNNI-NEXT:    retq
+; AVX512VL-VNNI-LABEL: vpdpwssd_v8i32_accumulate:
+; AVX512VL-VNNI:       # %bb.0:
+; AVX512VL-VNNI-NEXT:    vpdpwssd %ymm1, %ymm0, %ymm2
+; AVX512VL-VNNI-NEXT:    vmovdqa %ymm2, %ymm0
+; AVX512VL-VNNI-NEXT:    retq
 ;
 ; AVX-VNNI-LABEL: vpdpwssd_v8i32_accumulate:
 ; AVX-VNNI:       # %bb.0:
@@ -67,12 +47,6 @@ define <8 x i32> @vpdpwssd_v8i32_accumulate(<16 x i16> %a0, <16 x i16> %a1, <8 x
 ; AVX512-VNNI-NEXT:    vpmaddwd %ymm1, %ymm0, %ymm0
 ; AVX512-VNNI-NEXT:    vpaddd %ymm2, %ymm0, %ymm0
 ; AVX512-VNNI-NEXT:    retq
-;
-; AVX512VL-VNNI-LABEL: vpdpwssd_v8i32_accumulate:
-; AVX512VL-VNNI:       # %bb.0:
-; AVX512VL-VNNI-NEXT:    vpdpwssd %ymm1, %ymm0, %ymm2
-; AVX512VL-VNNI-NEXT:    vmovdqa %ymm2, %ymm0
-; AVX512VL-VNNI-NEXT:    retq
   %x0 = sext <16 x i16> %a0 to <16 x i32>
   %x1 = sext <16 x i16> %a1 to <16 x i32>
   %m = mul nsw <16 x i32> %x0, %x1
@@ -84,11 +58,11 @@ define <8 x i32> @vpdpwssd_v8i32_accumulate(<16 x i16> %a0, <16 x i16> %a1, <8 x
 }
 
 define <4 x i32> @vpdpwssd_v4i32_accumulate(<8 x i16> %a0, <8 x i16> %a1, <4 x i32> %a2) {
-; AVX512BW-VNNI-LABEL: vpdpwssd_v4i32_accumulate:
-; AVX512BW-VNNI:       # %bb.0:
-; AVX512BW-VNNI-NEXT:    vpdpwssd %xmm1, %xmm0, %xmm2
-; AVX512BW-VNNI-NEXT:    vmovdqa %xmm2, %xmm0
-; AVX512BW-VNNI-NEXT:    retq
+; AVX512VL-VNNI-LABEL: vpdpwssd_v4i32_accumulate:
+; AVX512VL-VNNI:       # %bb.0:
+; AVX512VL-VNNI-NEXT:    vpdpwssd %xmm1, %xmm0, %xmm2
+; AVX512VL-VNNI-NEXT:    vmovdqa %xmm2, %xmm0
+; AVX512VL-VNNI-NEXT:    retq
 ;
 ; AVX-VNNI-LABEL: vpdpwssd_v4i32_accumulate:
 ; AVX-VNNI:       # %bb.0:
@@ -101,12 +75,6 @@ define <4 x i32> @vpdpwssd_v4i32_accumulate(<8 x i16> %a0, <8 x i16> %a1, <4 x i
 ; AVX512-VNNI-NEXT:    vpmaddwd %xmm1, %xmm0, %xmm0
 ; AVX512-VNNI-NEXT:    vpaddd %xmm2, %xmm0, %xmm0
 ; AVX512-VNNI-NEXT:    retq
-;
-; AVX512VL-VNNI-LABEL: vpdpwssd_v4i32_accumulate:
-; AVX512VL-VNNI:       # %bb.0:
-; AVX512VL-VNNI-NEXT:    vpdpwssd %xmm1, %xmm0, %xmm2
-; AVX512VL-VNNI-NEXT:    vmovdqa %xmm2, %xmm0
-; AVX512VL-VNNI-NEXT:    retq
   %x0 = sext <8 x i16> %a0 to <8 x i32>
   %x1 = sext <8 x i16> %a1 to <8 x i32>
   %m = mul nsw <8 x i32> %x0, %x1


        


More information about the llvm-commits mailing list