[llvm] dc6a915 - [X86] X86TargetLowering::computeKnownBitsForTargetNode - add X86ISD::VPMADD52L\H handling - again (#159230)
via llvm-commits
llvm-commits at lists.llvm.org
Mon Sep 22 02:46:48 PDT 2025
Author: 黃國庭
Date: 2025-09-22T09:46:44Z
New Revision: dc6a9151ad14a5fe4cdbca60f5cb6deb77668524
URL: https://github.com/llvm/llvm-project/commit/dc6a9151ad14a5fe4cdbca60f5cb6deb77668524
DIFF: https://github.com/llvm/llvm-project/commit/dc6a9151ad14a5fe4cdbca60f5cb6deb77668524.diff
LOG: [X86] X86TargetLowering::computeKnownBitsForTargetNode - add X86ISD::VPMADD52L\H handling - again (#159230)
FIX #155386
Added:
llvm/test/CodeGen/X86/knownbits-vpmadd52.ll
Modified:
llvm/lib/Target/X86/X86ISelLowering.cpp
Removed:
################################################################################
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 0837e1e3feb1d..2feb76e0eb7b4 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -38999,6 +38999,26 @@ void X86TargetLowering::computeKnownBitsForTargetNode(const SDValue Op,
}
break;
}
+ case X86ISD::VPMADD52L:
+ case X86ISD::VPMADD52H: {
+ assert(Op.getValueType().isVector() &&
+ Op.getValueType().getScalarType() == MVT::i64 &&
+ "Unexpected VPMADD52 type");
+ KnownBits K0 =
+ DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
+ KnownBits K1 =
+ DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
+ KnownBits KAcc =
+ DAG.computeKnownBits(Op.getOperand(2), DemandedElts, Depth + 1);
+ K0 = K0.trunc(52);
+ K1 = K1.trunc(52);
+ KnownBits KnownMul = (Op.getOpcode() == X86ISD::VPMADD52L)
+ ? KnownBits::mul(K0, K1)
+ : KnownBits::mulhu(K0, K1);
+ KnownMul = KnownMul.zext(64);
+ Known = KnownBits::add(KAcc, KnownMul);
+ return;
+ }
}
// Handle target shuffles.
diff --git a/llvm/test/CodeGen/X86/knownbits-vpmadd52.ll b/llvm/test/CodeGen/X86/knownbits-vpmadd52.ll
new file mode 100644
index 0000000000000..0e322fec2c7d9
--- /dev/null
+++ b/llvm/test/CodeGen/X86/knownbits-vpmadd52.ll
@@ -0,0 +1,109 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512ifma,+avx512vl | FileCheck %s --check-prefixes=AVX512VL
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avxifma | FileCheck %s --check-prefixes=AVXIFMA
+
+; High-52 path
+
+declare <2 x i64> @llvm.x86.avx512.vpmadd52h.uq.128(<2 x i64>, <2 x i64>, <2 x i64>)
+declare <4 x i64> @llvm.x86.avx512.vpmadd52h.uq.256(<4 x i64>, <4 x i64>, <4 x i64>)
+
+; High-52, 25x25 masked inputs, accumulator = 1, expected constant fold.
+define <2 x i64> @kb52h_128_mask25_and1(<2 x i64> %x, <2 x i64> %y) {
+; AVX512VL-LABEL: kb52h_128_mask25_and1:
+; AVX512VL: # %bb.0:
+; AVX512VL-NEXT: vmovddup {{.*#+}} xmm0 = [1,1]
+; AVX512VL-NEXT: # xmm0 = mem[0,0]
+; AVX512VL-NEXT: retq
+;
+; AVXIFMA-LABEL: kb52h_128_mask25_and1:
+; AVXIFMA: # %bb.0:
+; AVXIFMA-NEXT: vmovddup {{.*#+}} xmm0 = [1,1]
+; AVXIFMA-NEXT: # xmm0 = mem[0,0]
+; AVXIFMA-NEXT: retq
+ %mx = and <2 x i64> %x, splat (i64 33554431) ; (1<<25)-1
+ %my = and <2 x i64> %y, splat (i64 33554431) ; (1<<25)-1
+ %r = call <2 x i64> @llvm.x86.avx512.vpmadd52h.uq.128(
+ <2 x i64> splat (i64 1),
+ <2 x i64> %mx,
+ <2 x i64> %my)
+ %ret = and <2 x i64> %r, splat (i64 1)
+ ret <2 x i64> %ret
+}
+
+; High-52, 25x26 masked inputs, accumulator = 1, expected constant fold.
+define <4 x i64> @kb52h_256_mask25x26_acc1(<4 x i64> %x, <4 x i64> %y) {
+; AVX512VL-LABEL: kb52h_256_mask25x26_acc1:
+; AVX512VL: # %bb.0:
+; AVX512VL-NEXT: vbroadcastsd {{.*#+}} ymm0 = [1,1,1,1]
+; AVX512VL-NEXT: retq
+;
+; AVXIFMA-LABEL: kb52h_256_mask25x26_acc1:
+; AVXIFMA: # %bb.0:
+; AVXIFMA-NEXT: vbroadcastsd {{.*#+}} ymm0 = [1,1,1,1]
+; AVXIFMA-NEXT: retq
+ %mx = and <4 x i64> %x, splat (i64 33554431) ; (1<<25)-1
+ %my = and <4 x i64> %y, splat (i64 67108863) ; (1<<26)-1
+ %r = call <4 x i64> @llvm.x86.avx512.vpmadd52h.uq.256(
+ <4 x i64> splat (i64 1),
+ <4 x i64> %mx, <4 x i64> %my)
+ ret <4 x i64> %r
+}
+
+; Low-52 path
+
+declare <2 x i64> @llvm.x86.avx512.vpmadd52l.uq.128(<2 x i64>, <2 x i64>, <2 x i64>)
+declare <4 x i64> @llvm.x86.avx512.vpmadd52l.uq.256(<4 x i64>, <4 x i64>, <4 x i64>)
+
+; Low-52, 26x26 masked inputs, add with accumulator.
+define <2 x i64> @kb52l_128_mask26x26_add_intrin(<2 x i64> %x, <2 x i64> %y, <2 x i64> %acc) {
+; AVX512VL-LABEL: kb52l_128_mask26x26_add_intrin:
+; AVX512VL: # %bb.0:
+; AVX512VL-NEXT: vpbroadcastq {{.*#+}} xmm3 = [67108863,67108863]
+; AVX512VL-NEXT: vpand %xmm3, %xmm0, %xmm0
+; AVX512VL-NEXT: vpand %xmm3, %xmm1, %xmm1
+; AVX512VL-NEXT: vpmadd52luq %xmm1, %xmm0, %xmm2
+; AVX512VL-NEXT: vmovdqa %xmm2, %xmm0
+; AVX512VL-NEXT: retq
+;
+; AVXIFMA-LABEL: kb52l_128_mask26x26_add_intrin:
+; AVXIFMA: # %bb.0:
+; AVXIFMA-NEXT: vpbroadcastq {{.*#+}} xmm3 = [67108863,67108863]
+; AVXIFMA-NEXT: vpand %xmm3, %xmm0, %xmm0
+; AVXIFMA-NEXT: vpand %xmm3, %xmm1, %xmm1
+; AVXIFMA-NEXT: {vex} vpmadd52luq %xmm1, %xmm0, %xmm2
+; AVXIFMA-NEXT: vmovdqa %xmm2, %xmm0
+; AVXIFMA-NEXT: retq
+ %xm = and <2 x i64> %x, splat (i64 67108863) ; (1<<26)-1
+ %ym = and <2 x i64> %y, splat (i64 67108863) ; (1<<26)-1
+ %r = call <2 x i64> @llvm.x86.avx512.vpmadd52l.uq.128(
+ <2 x i64> %acc, <2 x i64> %xm, <2 x i64> %ym)
+ ret <2 x i64> %r
+}
+
+; Low-52, 50-bit × 2-bit masked inputs, add with accumulator.
+define <4 x i64> @kb52l_256_mask50x3_add_intrin(<4 x i64> %x, <4 x i64> %y, <4 x i64> %acc) {
+; AVX512VL-LABEL: kb52l_256_mask50x3_add_intrin:
+; AVX512VL: # %bb.0:
+; AVX512VL-NEXT: vpandq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm0, %ymm0
+; AVX512VL-NEXT: vpandq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm1, %ymm1
+; AVX512VL-NEXT: vpmadd52luq %ymm1, %ymm0, %ymm2
+; AVX512VL-NEXT: vmovdqa %ymm2, %ymm0
+; AVX512VL-NEXT: retq
+;
+; AVXIFMA-LABEL: kb52l_256_mask50x3_add_intrin:
+; AVXIFMA: # %bb.0:
+; AVXIFMA-NEXT: vpbroadcastq {{.*#+}} ymm3 = [1125899906842623,1125899906842623,1125899906842623,1125899906842623]
+; AVXIFMA-NEXT: vpand %ymm3, %ymm0, %ymm0
+; AVXIFMA-NEXT: vpbroadcastq {{.*#+}} ymm3 = [3,3,3,3]
+; AVXIFMA-NEXT: vpand %ymm3, %ymm1, %ymm1
+; AVXIFMA-NEXT: {vex} vpmadd52luq %ymm1, %ymm0, %ymm2
+; AVXIFMA-NEXT: vmovdqa %ymm2, %ymm0
+; AVXIFMA-NEXT: retq
+ %xm = and <4 x i64> %x, splat (i64 1125899906842623) ; (1<<50)-1
+ %ym = and <4 x i64> %y, splat (i64 3) ; (1<<2)-1
+ %r = call <4 x i64> @llvm.x86.avx512.vpmadd52l.uq.256(
+ <4 x i64> %acc, <4 x i64> %xm, <4 x i64> %ym)
+ ret <4 x i64> %r
+}
+
More information about the llvm-commits
mailing list