[llvm] [X86] SimplifyDemandedBitsForTargetNode - add handling for VPMADD52L/VPMADD52H (PR #155494)
via llvm-commits
llvm-commits at lists.llvm.org
Wed Aug 27 03:14:01 PDT 2025
https://github.com/XChy updated https://github.com/llvm/llvm-project/pull/155494
>From f7b02238dfd71a5520e5d6a0fad7ab9185ae96df Mon Sep 17 00:00:00 2001
From: XChy <xxs_chy at outlook.com>
Date: Wed, 27 Aug 2025 04:24:55 +0800
Subject: [PATCH 1/2] [X86][NFC] Add tests for pr155387
---
llvm/test/CodeGen/X86/combine-vpmadd52.ll | 116 ++++++++++++++++++++++
1 file changed, 116 insertions(+)
create mode 100644 llvm/test/CodeGen/X86/combine-vpmadd52.ll
diff --git a/llvm/test/CodeGen/X86/combine-vpmadd52.ll b/llvm/test/CodeGen/X86/combine-vpmadd52.ll
new file mode 100644
index 0000000000000..2ae33fca726a7
--- /dev/null
+++ b/llvm/test/CodeGen/X86/combine-vpmadd52.ll
@@ -0,0 +1,116 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512ifma,+avx512vl | FileCheck %s --check-prefixes=CHECK,AVX512
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avxifma | FileCheck %s --check-prefixes=CHECK,AVX
+
+define <2 x i64> @test1_vpmadd52l(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2) {
+; AVX512-LABEL: test1_vpmadd52l:
+; AVX512: # %bb.0:
+; AVX512-NEXT: vpandq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm1, %xmm1
+; AVX512-NEXT: vpmadd52luq %xmm2, %xmm1, %xmm0
+; AVX512-NEXT: retq
+;
+; AVX-LABEL: test1_vpmadd52l:
+; AVX: # %bb.0:
+; AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
+; AVX-NEXT: {vex} vpmadd52luq %xmm2, %xmm1, %xmm0
+; AVX-NEXT: retq
+ %and = and <2 x i64> %x1, splat (i64 4503599627370495) ; (1LL << 52) - 1
+ %1 = call <2 x i64> @llvm.x86.avx512.vpmadd52l.uq.128(<2 x i64> %x0, <2 x i64> %and, <2 x i64> %x2)
+ ret <2 x i64> %1
+}
+
+define <2 x i64> @test2_vpmadd52l(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2) {
+; AVX512-LABEL: test2_vpmadd52l:
+; AVX512: # %bb.0:
+; AVX512-NEXT: vpandq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm2, %xmm2
+; AVX512-NEXT: vpmadd52luq %xmm2, %xmm1, %xmm0
+; AVX512-NEXT: retq
+;
+; AVX-LABEL: test2_vpmadd52l:
+; AVX: # %bb.0:
+; AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm2
+; AVX-NEXT: {vex} vpmadd52luq %xmm2, %xmm1, %xmm0
+; AVX-NEXT: retq
+ %and = and <2 x i64> %x2, splat (i64 4503599627370495) ; (1LL << 52) - 1
+ %1 = call <2 x i64> @llvm.x86.avx512.vpmadd52l.uq.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %and)
+ ret <2 x i64> %1
+}
+
+define <2 x i64> @test3_vpmadd52l(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2) {
+; AVX512-LABEL: test3_vpmadd52l:
+; AVX512: # %bb.0:
+; AVX512-NEXT: vpandq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm1, %xmm1
+; AVX512-NEXT: vporq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm2, %xmm2
+; AVX512-NEXT: vpmadd52luq %xmm2, %xmm1, %xmm0
+; AVX512-NEXT: retq
+;
+; AVX-LABEL: test3_vpmadd52l:
+; AVX: # %bb.0:
+; AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
+; AVX-NEXT: vpor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm2
+; AVX-NEXT: {vex} vpmadd52luq %xmm2, %xmm1, %xmm0
+; AVX-NEXT: retq
+ %and = and <2 x i64> %x1, splat (i64 4503599627370495) ; (1LL << 52) - 1
+ %or = or <2 x i64> %x2, splat (i64 4503599627370496) ; 1LL << 52
+ %1 = call <2 x i64> @llvm.x86.avx512.vpmadd52l.uq.128(<2 x i64> %x0, <2 x i64> %and, <2 x i64> %or)
+ ret <2 x i64> %1
+}
+
+define <2 x i64> @test_vpmadd52l_wrong_bits(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2) {
+; AVX512-LABEL: test_vpmadd52l_wrong_bits:
+; AVX512: # %bb.0:
+; AVX512-NEXT: vpandq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm1, %xmm1
+; AVX512-NEXT: vporq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm2, %xmm2
+; AVX512-NEXT: vpmadd52luq %xmm2, %xmm1, %xmm0
+; AVX512-NEXT: retq
+;
+; AVX-LABEL: test_vpmadd52l_wrong_bits:
+; AVX: # %bb.0:
+; AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
+; AVX-NEXT: vpor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm2
+; AVX-NEXT: {vex} vpmadd52luq %xmm2, %xmm1, %xmm0
+; AVX-NEXT: retq
+ %and = and <2 x i64> %x1, splat (i64 2251799813685247) ; (1LL << 51) - 1
+ %or = or <2 x i64> %x2, splat (i64 2251799813685248) ; 1LL << 51
+ %1 = call <2 x i64> @llvm.x86.avx512.vpmadd52l.uq.128(<2 x i64> %x0, <2 x i64> %and, <2 x i64> %or)
+ ret <2 x i64> %1
+}
+
+define <2 x i64> @test_vpmadd52l_wrong_op(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2) {
+; AVX512-LABEL: test_vpmadd52l_wrong_op:
+; AVX512: # %bb.0:
+; AVX512-NEXT: vpandq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm1, %xmm0
+; AVX512-NEXT: vpmadd52luq %xmm2, %xmm1, %xmm0
+; AVX512-NEXT: retq
+;
+; AVX-LABEL: test_vpmadd52l_wrong_op:
+; AVX: # %bb.0:
+; AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm0
+; AVX-NEXT: {vex} vpmadd52luq %xmm2, %xmm1, %xmm0
+; AVX-NEXT: retq
+ %and = and <2 x i64> %x1, splat (i64 4503599627370495) ; (1LL << 52) - 1
+ %1 = call <2 x i64> @llvm.x86.avx512.vpmadd52l.uq.128(<2 x i64> %and, <2 x i64> %x1, <2 x i64> %x2)
+ ret <2 x i64> %1
+}
+
+define <2 x i64> @test_vpmadd52h(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2) {
+; AVX512-LABEL: test_vpmadd52h:
+; AVX512: # %bb.0:
+; AVX512-NEXT: vpandq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm1, %xmm1
+; AVX512-NEXT: vporq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm2, %xmm2
+; AVX512-NEXT: vpmadd52huq %xmm2, %xmm1, %xmm0
+; AVX512-NEXT: retq
+;
+; AVX-LABEL: test_vpmadd52h:
+; AVX: # %bb.0:
+; AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
+; AVX-NEXT: vpor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm2
+; AVX-NEXT: {vex} vpmadd52huq %xmm2, %xmm1, %xmm0
+; AVX-NEXT: retq
+ %and = and <2 x i64> %x1, splat (i64 4503599627370495) ; (1LL << 52) - 1
+ %or = or <2 x i64> %x2, splat (i64 4503599627370496) ; 1LL << 52
+ %1 = call <2 x i64> @llvm.x86.avx512.vpmadd52h.uq.128(<2 x i64> %x0, <2 x i64> %and, <2 x i64> %or)
+ ret <2 x i64> %1
+}
+;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
+; CHECK: {{.*}}
>From b258fd6f8f1e7ab15d0c1f3122f1a424a377ccd4 Mon Sep 17 00:00:00 2001
From: XChy <xxs_chy at outlook.com>
Date: Wed, 27 Aug 2025 04:26:37 +0800
Subject: [PATCH 2/2] [X86] SimplifyDemandedBitsForTargetNode - add handling
for VPMADD52L/VPMADD52H
---
llvm/lib/Target/X86/X86ISelLowering.cpp | 33 +++++++++++++++++++++++
llvm/test/CodeGen/X86/combine-vpmadd52.ll | 14 ++--------
2 files changed, 35 insertions(+), 12 deletions(-)
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 19131fbd4102b..8df2fcc5a2c1e 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -44957,6 +44957,24 @@ bool X86TargetLowering::SimplifyDemandedBitsForTargetNode(
Known.Zero.setLowBits(Known2.countMinTrailingZeros());
return false;
}
+ case X86ISD::VPMADD52L:
+ case X86ISD::VPMADD52H: {
+ KnownBits KnownOp0, KnownOp1;
+ SDValue Op0 = Op.getOperand(0);
+ SDValue Op1 = Op.getOperand(1);
+ // Only demand the lower 52-bits of operands 0 / 1 (and all 64-bits of
+ // operand 2).
+ APInt Low52Bits = APInt::getLowBitsSet(BitWidth, 52);
+ if (SimplifyDemandedBits(Op0, Low52Bits, OriginalDemandedElts, KnownOp0,
+ TLO, Depth + 1))
+ return true;
+
+ if (SimplifyDemandedBits(Op1, Low52Bits, OriginalDemandedElts, KnownOp1,
+ TLO, Depth + 1))
+ return true;
+ // TODO: Compute the known bits for VPMADD52L/VPMADD52H.
+ break;
+ }
}
return TargetLowering::SimplifyDemandedBitsForTargetNode(
@@ -60068,6 +60086,19 @@ static SDValue combineVPMADD(SDNode *N, SelectionDAG &DAG,
return SDValue();
}
+// Simplify VPMADD52L/VPMADD52H operations.
+static SDValue combineVPMADD52LH(SDNode *N, SelectionDAG &DAG,
+ TargetLowering::DAGCombinerInfo &DCI) {
+ MVT VT = N->getSimpleValueType(0);
+ unsigned NumEltBits = VT.getScalarSizeInBits();
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ if (TLI.SimplifyDemandedBits(SDValue(N, 0), APInt::getAllOnes(NumEltBits),
+ DCI))
+ return SDValue(N, 0);
+
+ return SDValue();
+}
+
static SDValue combineEXTEND_VECTOR_INREG(SDNode *N, SelectionDAG &DAG,
TargetLowering::DAGCombinerInfo &DCI,
const X86Subtarget &Subtarget) {
@@ -60705,6 +60736,8 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N,
case X86ISD::PMULUDQ: return combinePMULDQ(N, DAG, DCI, Subtarget);
case X86ISD::VPMADDUBSW:
case X86ISD::VPMADDWD: return combineVPMADD(N, DAG, DCI);
+ case X86ISD::VPMADD52L:
+ case X86ISD::VPMADD52H: return combineVPMADD52LH(N, DAG, DCI);
case X86ISD::KSHIFTL:
case X86ISD::KSHIFTR: return combineKSHIFT(N, DAG, DCI);
case ISD::FP16_TO_FP: return combineFP16_TO_FP(N, DAG, Subtarget);
diff --git a/llvm/test/CodeGen/X86/combine-vpmadd52.ll b/llvm/test/CodeGen/X86/combine-vpmadd52.ll
index 2ae33fca726a7..004db995ee584 100644
--- a/llvm/test/CodeGen/X86/combine-vpmadd52.ll
+++ b/llvm/test/CodeGen/X86/combine-vpmadd52.ll
@@ -5,15 +5,14 @@
define <2 x i64> @test1_vpmadd52l(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2) {
; AVX512-LABEL: test1_vpmadd52l:
; AVX512: # %bb.0:
-; AVX512-NEXT: vpandq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm1, %xmm1
; AVX512-NEXT: vpmadd52luq %xmm2, %xmm1, %xmm0
; AVX512-NEXT: retq
;
; AVX-LABEL: test1_vpmadd52l:
; AVX: # %bb.0:
-; AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
; AVX-NEXT: {vex} vpmadd52luq %xmm2, %xmm1, %xmm0
; AVX-NEXT: retq
+
%and = and <2 x i64> %x1, splat (i64 4503599627370495) ; (1LL << 52) - 1
%1 = call <2 x i64> @llvm.x86.avx512.vpmadd52l.uq.128(<2 x i64> %x0, <2 x i64> %and, <2 x i64> %x2)
ret <2 x i64> %1
@@ -22,13 +21,11 @@ define <2 x i64> @test1_vpmadd52l(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2) {
define <2 x i64> @test2_vpmadd52l(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2) {
; AVX512-LABEL: test2_vpmadd52l:
; AVX512: # %bb.0:
-; AVX512-NEXT: vpandq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm2, %xmm2
; AVX512-NEXT: vpmadd52luq %xmm2, %xmm1, %xmm0
; AVX512-NEXT: retq
;
; AVX-LABEL: test2_vpmadd52l:
; AVX: # %bb.0:
-; AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm2
; AVX-NEXT: {vex} vpmadd52luq %xmm2, %xmm1, %xmm0
; AVX-NEXT: retq
%and = and <2 x i64> %x2, splat (i64 4503599627370495) ; (1LL << 52) - 1
@@ -39,15 +36,11 @@ define <2 x i64> @test2_vpmadd52l(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2) {
define <2 x i64> @test3_vpmadd52l(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2) {
; AVX512-LABEL: test3_vpmadd52l:
; AVX512: # %bb.0:
-; AVX512-NEXT: vpandq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm1, %xmm1
-; AVX512-NEXT: vporq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm2, %xmm2
; AVX512-NEXT: vpmadd52luq %xmm2, %xmm1, %xmm0
; AVX512-NEXT: retq
;
; AVX-LABEL: test3_vpmadd52l:
; AVX: # %bb.0:
-; AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
-; AVX-NEXT: vpor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm2
; AVX-NEXT: {vex} vpmadd52luq %xmm2, %xmm1, %xmm0
; AVX-NEXT: retq
%and = and <2 x i64> %x1, splat (i64 4503599627370495) ; (1LL << 52) - 1
@@ -96,17 +89,14 @@ define <2 x i64> @test_vpmadd52l_wrong_op(<2 x i64> %x0, <2 x i64> %x1, <2 x i64
define <2 x i64> @test_vpmadd52h(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2) {
; AVX512-LABEL: test_vpmadd52h:
; AVX512: # %bb.0:
-; AVX512-NEXT: vpandq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm1, %xmm1
-; AVX512-NEXT: vporq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm2, %xmm2
; AVX512-NEXT: vpmadd52huq %xmm2, %xmm1, %xmm0
; AVX512-NEXT: retq
;
; AVX-LABEL: test_vpmadd52h:
; AVX: # %bb.0:
-; AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
-; AVX-NEXT: vpor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm2
; AVX-NEXT: {vex} vpmadd52huq %xmm2, %xmm1, %xmm0
; AVX-NEXT: retq
+
%and = and <2 x i64> %x1, splat (i64 4503599627370495) ; (1LL << 52) - 1
%or = or <2 x i64> %x2, splat (i64 4503599627370496) ; 1LL << 52
%1 = call <2 x i64> @llvm.x86.avx512.vpmadd52h.uq.128(<2 x i64> %x0, <2 x i64> %and, <2 x i64> %or)
More information about the llvm-commits
mailing list