[llvm] [X86] SimplifyDemandedBitsForTargetNode - add handling for VPMADD52L/VPMADD52H (PR #155494)
    via llvm-commits 
    llvm-commits at lists.llvm.org
       
    Tue Aug 26 22:04:47 PDT 2025
    
    
  
https://github.com/XChy updated https://github.com/llvm/llvm-project/pull/155494
>From e4c5dc81809eca9de9c9d4ed86aaa0fb307354cc Mon Sep 17 00:00:00 2001
From: XChy <xxs_chy at outlook.com>
Date: Wed, 27 Aug 2025 04:24:55 +0800
Subject: [PATCH 1/2] [X86][NFC] Add tests for pr155387
---
 llvm/test/CodeGen/X86/combine-vpmadd52.ll | 82 +++++++++++++++++++++++
 1 file changed, 82 insertions(+)
 create mode 100644 llvm/test/CodeGen/X86/combine-vpmadd52.ll
diff --git a/llvm/test/CodeGen/X86/combine-vpmadd52.ll b/llvm/test/CodeGen/X86/combine-vpmadd52.ll
new file mode 100644
index 0000000000000..907e34fcee31e
--- /dev/null
+++ b/llvm/test/CodeGen/X86/combine-vpmadd52.ll
@@ -0,0 +1,82 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512ifma,+avx512vl | FileCheck %s --check-prefixes=CHECK,AVX512
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avxifma | FileCheck %s --check-prefixes=CHECK,AVX
+
+define <2 x i64> @test_vpmadd52l(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2) {
+; AVX512-LABEL: test_vpmadd52l:
+; AVX512:       # %bb.0:
+; AVX512-NEXT:    vpsllq $52, %xmm1, %xmm1
+; AVX512-NEXT:    vpsllq $52, %xmm2, %xmm2
+; AVX512-NEXT:    vpmadd52luq %xmm2, %xmm1, %xmm0
+; AVX512-NEXT:    retq
+;
+; AVX-LABEL: test_vpmadd52l:
+; AVX:       # %bb.0:
+; AVX-NEXT:    vpsllq $52, %xmm1, %xmm1
+; AVX-NEXT:    vpsllq $52, %xmm2, %xmm2
+; AVX-NEXT:    {vex} vpmadd52luq %xmm2, %xmm1, %xmm0
+; AVX-NEXT:    retq
+  %shl1 = shl <2 x i64> %x1, <i64 52, i64 52>
+  %shl2 = shl <2 x i64> %x2, <i64 52, i64 52>
+  %1 = call <2 x i64> @llvm.x86.avx512.vpmadd52l.uq.128(<2 x i64> %x0, <2 x i64> %shl1, <2 x i64> %shl2)
+  ret <2 x i64> %1
+}
+
+define <2 x i64> @test_vpmadd52l_wrong_shift(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2) {
+; AVX512-LABEL: test_vpmadd52l_wrong_shift:
+; AVX512:       # %bb.0:
+; AVX512-NEXT:    vpsllq $51, %xmm1, %xmm1
+; AVX512-NEXT:    vpsllq $51, %xmm2, %xmm2
+; AVX512-NEXT:    vpmadd52luq %xmm2, %xmm1, %xmm0
+; AVX512-NEXT:    retq
+;
+; AVX-LABEL: test_vpmadd52l_wrong_shift:
+; AVX:       # %bb.0:
+; AVX-NEXT:    vpsllq $51, %xmm1, %xmm1
+; AVX-NEXT:    vpsllq $51, %xmm2, %xmm2
+; AVX-NEXT:    {vex} vpmadd52luq %xmm2, %xmm1, %xmm0
+; AVX-NEXT:    retq
+  %shl1 = shl <2 x i64> %x1, <i64 51, i64 51>
+  %shl2 = shl <2 x i64> %x2, <i64 51, i64 51>
+  %1 = call <2 x i64> @llvm.x86.avx512.vpmadd52l.uq.128(<2 x i64> %x0, <2 x i64> %shl1, <2 x i64> %shl2)
+  ret <2 x i64> %1
+}
+
+define <2 x i64> @test_vpmadd52l_wrong_op(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2) {
+; AVX512-LABEL: test_vpmadd52l_wrong_op:
+; AVX512:       # %bb.0:
+; AVX512-NEXT:    vpsllq $52, %xmm0, %xmm0
+; AVX512-NEXT:    vpmadd52luq %xmm2, %xmm1, %xmm0
+; AVX512-NEXT:    retq
+;
+; AVX-LABEL: test_vpmadd52l_wrong_op:
+; AVX:       # %bb.0:
+; AVX-NEXT:    vpsllq $52, %xmm0, %xmm0
+; AVX-NEXT:    {vex} vpmadd52luq %xmm2, %xmm1, %xmm0
+; AVX-NEXT:    retq
+  %shl0 = shl <2 x i64> %x0, <i64 52, i64 52>
+  %1 = call <2 x i64> @llvm.x86.avx512.vpmadd52l.uq.128(<2 x i64> %shl0, <2 x i64> %x1, <2 x i64> %x2)
+  ret <2 x i64> %1
+}
+
+define <2 x i64> @test_vpmadd52h(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2) {
+; AVX512-LABEL: test_vpmadd52h:
+; AVX512:       # %bb.0:
+; AVX512-NEXT:    vpsllq $52, %xmm1, %xmm1
+; AVX512-NEXT:    vpsllq $52, %xmm2, %xmm2
+; AVX512-NEXT:    vpmadd52huq %xmm2, %xmm1, %xmm0
+; AVX512-NEXT:    retq
+;
+; AVX-LABEL: test_vpmadd52h:
+; AVX:       # %bb.0:
+; AVX-NEXT:    vpsllq $52, %xmm1, %xmm1
+; AVX-NEXT:    vpsllq $52, %xmm2, %xmm2
+; AVX-NEXT:    {vex} vpmadd52huq %xmm2, %xmm1, %xmm0
+; AVX-NEXT:    retq
+  %shl1 = shl <2 x i64> %x1, <i64 52, i64 52>
+  %shl2 = shl <2 x i64> %x2, <i64 52, i64 52>
+  %1 = call <2 x i64> @llvm.x86.avx512.vpmadd52h.uq.128(<2 x i64> %x0, <2 x i64> %shl1, <2 x i64> %shl2)
+  ret <2 x i64> %1
+}
+;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
+; CHECK: {{.*}}
>From f57cb12e5df7298050e6c2f4d5793a24771a400d Mon Sep 17 00:00:00 2001
From: XChy <xxs_chy at outlook.com>
Date: Wed, 27 Aug 2025 04:26:37 +0800
Subject: [PATCH 2/2] [X86] SimplifyDemandedBitsForTargetNode - add handling
 for VPMADD52L/VPMADD52H
---
 llvm/lib/Target/X86/X86ISelLowering.cpp   | 33 +++++++++++++++++++++++
 llvm/test/CodeGen/X86/combine-vpmadd52.ll | 20 ++++++--------
 2 files changed, 41 insertions(+), 12 deletions(-)
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 19131fbd4102b..8df2fcc5a2c1e 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -44957,6 +44957,24 @@ bool X86TargetLowering::SimplifyDemandedBitsForTargetNode(
     Known.Zero.setLowBits(Known2.countMinTrailingZeros());
     return false;
   }
+  case X86ISD::VPMADD52L:
+  case X86ISD::VPMADD52H: {
+    KnownBits KnownOp0, KnownOp1;
+    SDValue Op0 = Op.getOperand(0);
+    SDValue Op1 = Op.getOperand(1);
+    //  Only demand the lower 52-bits of operands 0 / 1 (and all 64-bits of
+    //  operand 2).
+    APInt Low52Bits = APInt::getLowBitsSet(BitWidth, 52);
+    if (SimplifyDemandedBits(Op0, Low52Bits, OriginalDemandedElts, KnownOp0,
+                             TLO, Depth + 1))
+      return true;
+
+    if (SimplifyDemandedBits(Op1, Low52Bits, OriginalDemandedElts, KnownOp1,
+                             TLO, Depth + 1))
+      return true;
+    // TODO: Compute the known bits for VPMADD52L/VPMADD52H.
+    break;
+  }
   }
 
   return TargetLowering::SimplifyDemandedBitsForTargetNode(
@@ -60068,6 +60086,19 @@ static SDValue combineVPMADD(SDNode *N, SelectionDAG &DAG,
   return SDValue();
 }
 
+// Simplify VPMADD52L/VPMADD52H operations.
+static SDValue combineVPMADD52LH(SDNode *N, SelectionDAG &DAG,
+                                 TargetLowering::DAGCombinerInfo &DCI) {
+  MVT VT = N->getSimpleValueType(0);
+  unsigned NumEltBits = VT.getScalarSizeInBits();
+  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+  if (TLI.SimplifyDemandedBits(SDValue(N, 0), APInt::getAllOnes(NumEltBits),
+                               DCI))
+    return SDValue(N, 0);
+
+  return SDValue();
+}
+
 static SDValue combineEXTEND_VECTOR_INREG(SDNode *N, SelectionDAG &DAG,
                                           TargetLowering::DAGCombinerInfo &DCI,
                                           const X86Subtarget &Subtarget) {
@@ -60705,6 +60736,8 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N,
   case X86ISD::PMULUDQ:     return combinePMULDQ(N, DAG, DCI, Subtarget);
   case X86ISD::VPMADDUBSW:
   case X86ISD::VPMADDWD:    return combineVPMADD(N, DAG, DCI);
+  case X86ISD::VPMADD52L:
+  case X86ISD::VPMADD52H:    return combineVPMADD52LH(N, DAG, DCI);
   case X86ISD::KSHIFTL:
   case X86ISD::KSHIFTR:     return combineKSHIFT(N, DAG, DCI);
   case ISD::FP16_TO_FP:     return combineFP16_TO_FP(N, DAG, Subtarget);
diff --git a/llvm/test/CodeGen/X86/combine-vpmadd52.ll b/llvm/test/CodeGen/X86/combine-vpmadd52.ll
index 907e34fcee31e..4e6a8b8a07853 100644
--- a/llvm/test/CodeGen/X86/combine-vpmadd52.ll
+++ b/llvm/test/CodeGen/X86/combine-vpmadd52.ll
@@ -5,16 +5,14 @@
 define <2 x i64> @test_vpmadd52l(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2) {
 ; AVX512-LABEL: test_vpmadd52l:
 ; AVX512:       # %bb.0:
-; AVX512-NEXT:    vpsllq $52, %xmm1, %xmm1
-; AVX512-NEXT:    vpsllq $52, %xmm2, %xmm2
-; AVX512-NEXT:    vpmadd52luq %xmm2, %xmm1, %xmm0
+; AVX512-NEXT:    vpxor %xmm1, %xmm1, %xmm1
+; AVX512-NEXT:    vpmadd52luq %xmm1, %xmm1, %xmm0
 ; AVX512-NEXT:    retq
 ;
 ; AVX-LABEL: test_vpmadd52l:
 ; AVX:       # %bb.0:
-; AVX-NEXT:    vpsllq $52, %xmm1, %xmm1
-; AVX-NEXT:    vpsllq $52, %xmm2, %xmm2
-; AVX-NEXT:    {vex} vpmadd52luq %xmm2, %xmm1, %xmm0
+; AVX-NEXT:    vpxor %xmm1, %xmm1, %xmm1
+; AVX-NEXT:    {vex} vpmadd52luq %xmm1, %xmm1, %xmm0
 ; AVX-NEXT:    retq
   %shl1 = shl <2 x i64> %x1, <i64 52, i64 52>
   %shl2 = shl <2 x i64> %x2, <i64 52, i64 52>
@@ -62,16 +60,14 @@ define <2 x i64> @test_vpmadd52l_wrong_op(<2 x i64> %x0, <2 x i64> %x1, <2 x i64
 define <2 x i64> @test_vpmadd52h(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2) {
 ; AVX512-LABEL: test_vpmadd52h:
 ; AVX512:       # %bb.0:
-; AVX512-NEXT:    vpsllq $52, %xmm1, %xmm1
-; AVX512-NEXT:    vpsllq $52, %xmm2, %xmm2
-; AVX512-NEXT:    vpmadd52huq %xmm2, %xmm1, %xmm0
+; AVX512-NEXT:    vpxor %xmm1, %xmm1, %xmm1
+; AVX512-NEXT:    vpmadd52huq %xmm1, %xmm1, %xmm0
 ; AVX512-NEXT:    retq
 ;
 ; AVX-LABEL: test_vpmadd52h:
 ; AVX:       # %bb.0:
-; AVX-NEXT:    vpsllq $52, %xmm1, %xmm1
-; AVX-NEXT:    vpsllq $52, %xmm2, %xmm2
-; AVX-NEXT:    {vex} vpmadd52huq %xmm2, %xmm1, %xmm0
+; AVX-NEXT:    vpxor %xmm1, %xmm1, %xmm1
+; AVX-NEXT:    {vex} vpmadd52huq %xmm1, %xmm1, %xmm0
 ; AVX-NEXT:    retq
   %shl1 = shl <2 x i64> %x1, <i64 52, i64 52>
   %shl2 = shl <2 x i64> %x2, <i64 52, i64 52>
    
    
More information about the llvm-commits
mailing list