[llvm] [TTI] Return a more sensible cost for histogram intrinsic. (PR #97397)
Graham Hunter via llvm-commits
llvm-commits at lists.llvm.org
Tue Jul 2 01:56:47 PDT 2024
https://github.com/huntergr-arm created https://github.com/llvm/llvm-project/pull/97397
This is just an initial cost, making it invalid for any target which
doesn't specifically return a cost for now. Also adds an AArch64
specific cost check.
We will need to improve that later, e.g. by returning a scalarization
cost for generic targets and possibly introducing a new TTI method, at
least once LoopVectorize has changed it's cost model. The reason is
that the histogram intrinsic also effectively contains a gather and
scatter, and we will need details of the addressing to determine an
appropriate cost for that.
>From 04e91c8fe84f81df7a9c39ef72ae9a32e10f50a7 Mon Sep 17 00:00:00 2001
From: Graham Hunter <graham.hunter at arm.com>
Date: Thu, 27 Jun 2024 10:20:22 +0000
Subject: [PATCH] [TTI] Return a more sensible cost for histogram intrinsic.
This is just an initial cost, making it invalid for any target which
doesn't specifically return a cost for now. Also adds an AArch64
specific cost check.
We will need to improve that later, e.g. by returning a scalarization
cost for generic targets and possibly introducing a new TTI method, at
least once LoopVectorize has changed it's cost model. The reason is
that the histogram intrinsic also effectively contains a gather and
scatter, and we will need details of the addressing to determine an
appropriate cost for that.
---
.../llvm/Analysis/TargetTransformInfoImpl.h | 3 +
.../AArch64/AArch64TargetTransformInfo.cpp | 33 +++++
.../CostModel/AArch64/sve-intrinsics.ll | 118 ++++++++++++++++++
3 files changed, 154 insertions(+)
diff --git a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
index 0ded98f162abf..01624de190d51 100644
--- a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
+++ b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
@@ -728,6 +728,9 @@ class TargetTransformInfoImplBase {
switch (ICA.getID()) {
default:
break;
+ case Intrinsic::experimental_vector_histogram_add:
+ // For now, we want explicit support from the target for histograms.
+ return InstructionCost::getInvalid();
case Intrinsic::allow_runtime_check:
case Intrinsic::allow_ubsan_check:
case Intrinsic::annotation:
diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
index c0abbd32eeec4..1941cfcf80773 100644
--- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
@@ -61,6 +61,11 @@ static cl::opt<bool> EnableOrLikeSelectOpt("enable-aarch64-or-like-select",
static cl::opt<bool> EnableLSRCostOpt("enable-aarch64-lsr-cost-opt",
cl::init(true), cl::Hidden);
+// A complete guess as to a reasonable cost.
+static cl::opt<unsigned>
+ BaseHistCntCost("aarch64-base-histcnt-cost", cl::init(8), cl::Hidden,
+ cl::desc("The cost of a histcnt instruction"));
+
namespace {
class TailFoldingOption {
// These bitfields will only ever be set to something non-zero in operator=,
@@ -508,11 +513,39 @@ static bool isUnpackedVectorVT(EVT VecVT) {
VecVT.getSizeInBits().getKnownMinValue() < AArch64::SVEBitsPerBlock;
}
+static InstructionCost getHistogramCost(const IntrinsicCostAttributes &ICA) {
+ Type *BucketPtrsTy = ICA.getArgTypes()[0]; // Type of vector of pointers
+ Type *EltTy = ICA.getArgTypes()[1]; // Type of bucket elements
+
+ // Only allow (32b and 64b) integers or pointers for now...
+ if ((!EltTy->isIntegerTy() && !EltTy->isPointerTy()) ||
+ (EltTy->getScalarSizeInBits() != 32 &&
+ EltTy->getScalarSizeInBits() != 64))
+ return InstructionCost::getInvalid();
+
+ // FIXME: Hacky check for legal vector types. We can promote smaller types
+ // but we cannot legalize vectors via splitting for histcnt.
+ // FIXME: We should be able to generate histcnt for fixed-length vectors
+ // using ptrue with a specific VL.
+ if (VectorType *VTy = dyn_cast<VectorType>(BucketPtrsTy))
+ if ((VTy->getElementCount().getKnownMinValue() != 2 &&
+ VTy->getElementCount().getKnownMinValue() != 4) ||
+ VTy->getPrimitiveSizeInBits().getKnownMinValue() > 128 ||
+ !VTy->isScalableTy())
+ return InstructionCost::getInvalid();
+
+ return InstructionCost(BaseHistCntCost);
+}
+
InstructionCost
AArch64TTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
TTI::TargetCostKind CostKind) {
auto *RetTy = ICA.getReturnType();
switch (ICA.getID()) {
+ case Intrinsic::experimental_vector_histogram_add:
+ if (!ST->hasSVE2())
+ return InstructionCost::getInvalid();
+ return getHistogramCost(ICA);
case Intrinsic::umin:
case Intrinsic::umax:
case Intrinsic::smin:
diff --git a/llvm/test/Analysis/CostModel/AArch64/sve-intrinsics.ll b/llvm/test/Analysis/CostModel/AArch64/sve-intrinsics.ll
index 1ff280d75b4e9..1993023c91e26 100644
--- a/llvm/test/Analysis/CostModel/AArch64/sve-intrinsics.ll
+++ b/llvm/test/Analysis/CostModel/AArch64/sve-intrinsics.ll
@@ -909,6 +909,123 @@ define void @masked_scatter_v1i128(<1 x i128> %data, <1 x ptr> %ptrs, <1 x i1> %
ret void
}
+define void @histogram_nxv2i64(<vscale x 2 x ptr> %buckets, <vscale x 2 x i1> %mask) #3 {
+; CHECK-LABEL: 'histogram_nxv2i64'
+; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.experimental.vector.histogram.add.nxv2p0.i64(<vscale x 2 x ptr> %buckets, i64 1, <vscale x 2 x i1> %mask)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+; TYPE_BASED_ONLY-LABEL: 'histogram_nxv2i64'
+; TYPE_BASED_ONLY-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.experimental.vector.histogram.add.nxv2p0.i64(<vscale x 2 x ptr> %buckets, i64 1, <vscale x 2 x i1> %mask)
+; TYPE_BASED_ONLY-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+ call void @llvm.experimental.vector.histogram.add.nxv2p0.i64(<vscale x 2 x ptr> %buckets, i64 1, <vscale x 2 x i1> %mask)
+ ret void
+}
+
+define void @histogram_nxv4i32(<vscale x 4 x ptr> %buckets, <vscale x 4 x i1> %mask) #3 {
+; CHECK-LABEL: 'histogram_nxv4i32'
+; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.experimental.vector.histogram.add.nxv4p0.i32(<vscale x 4 x ptr> %buckets, i32 1, <vscale x 4 x i1> %mask)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+; TYPE_BASED_ONLY-LABEL: 'histogram_nxv4i32'
+; TYPE_BASED_ONLY-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.experimental.vector.histogram.add.nxv4p0.i32(<vscale x 4 x ptr> %buckets, i32 1, <vscale x 4 x i1> %mask)
+; TYPE_BASED_ONLY-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+ call void @llvm.experimental.vector.histogram.add.nxv4p0.i32(<vscale x 4 x ptr> %buckets, i32 1, <vscale x 4 x i1> %mask)
+ ret void
+}
+
+define void @histogram_nxv8i16(<vscale x 8 x ptr> %buckets, <vscale x 8 x i1> %mask) {
+; CHECK-LABEL: 'histogram_nxv8i16'
+; CHECK-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.experimental.vector.histogram.add.nxv8p0.i16(<vscale x 8 x ptr> %buckets, i16 1, <vscale x 8 x i1> %mask)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+; TYPE_BASED_ONLY-LABEL: 'histogram_nxv8i16'
+; TYPE_BASED_ONLY-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.experimental.vector.histogram.add.nxv8p0.i16(<vscale x 8 x ptr> %buckets, i16 1, <vscale x 8 x i1> %mask)
+; TYPE_BASED_ONLY-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+ call void @llvm.experimental.vector.histogram.add.nxv8p0.i16(<vscale x 8 x ptr> %buckets, i16 1, <vscale x 8 x i1> %mask)
+ ret void
+}
+
+define void @histogram_nxv16i8(<vscale x 16 x ptr> %buckets, <vscale x 16 x i1> %mask) {
+; CHECK-LABEL: 'histogram_nxv16i8'
+; CHECK-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.experimental.vector.histogram.add.nxv16p0.i8(<vscale x 16 x ptr> %buckets, i8 1, <vscale x 16 x i1> %mask)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+; TYPE_BASED_ONLY-LABEL: 'histogram_nxv16i8'
+; TYPE_BASED_ONLY-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.experimental.vector.histogram.add.nxv16p0.i8(<vscale x 16 x ptr> %buckets, i8 1, <vscale x 16 x i1> %mask)
+; TYPE_BASED_ONLY-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+ call void @llvm.experimental.vector.histogram.add.nxv16p0.i64(<vscale x 16 x ptr> %buckets, i8 1, <vscale x 16 x i1> %mask)
+ ret void
+}
+
+define void @histogram_v2i64(<2 x ptr> %buckets, <2 x i1> %mask) {
+; CHECK-LABEL: 'histogram_v2i64'
+; CHECK-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.experimental.vector.histogram.add.v2p0.i64(<2 x ptr> %buckets, i64 1, <2 x i1> %mask)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+; TYPE_BASED_ONLY-LABEL: 'histogram_v2i64'
+; TYPE_BASED_ONLY-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.experimental.vector.histogram.add.v2p0.i64(<2 x ptr> %buckets, i64 1, <2 x i1> %mask)
+; TYPE_BASED_ONLY-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+ call void @llvm.experimental.vector.histogram.add.v2p0.i64(<2 x ptr> %buckets, i64 1, <2 x i1> %mask)
+ ret void
+}
+
+define void @histogram_v4i32(<4 x ptr> %buckets, <4 x i1> %mask) {
+; CHECK-LABEL: 'histogram_v4i32'
+; CHECK-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.experimental.vector.histogram.add.v4p0.i32(<4 x ptr> %buckets, i32 1, <4 x i1> %mask)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+; TYPE_BASED_ONLY-LABEL: 'histogram_v4i32'
+; TYPE_BASED_ONLY-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.experimental.vector.histogram.add.v4p0.i32(<4 x ptr> %buckets, i32 1, <4 x i1> %mask)
+; TYPE_BASED_ONLY-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+ call void @llvm.experimental.vector.histogram.add.v4p0.i32(<4 x ptr> %buckets, i32 1, <4 x i1> %mask)
+ ret void
+}
+
+define void @histogram_v8i16(<8 x ptr> %buckets, <8 x i1> %mask) {
+; CHECK-LABEL: 'histogram_v8i16'
+; CHECK-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.experimental.vector.histogram.add.v8p0.i16(<8 x ptr> %buckets, i16 1, <8 x i1> %mask)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+; TYPE_BASED_ONLY-LABEL: 'histogram_v8i16'
+; TYPE_BASED_ONLY-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.experimental.vector.histogram.add.v8p0.i16(<8 x ptr> %buckets, i16 1, <8 x i1> %mask)
+; TYPE_BASED_ONLY-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+ call void @llvm.experimental.vector.histogram.add.v8p0.i16(<8 x ptr> %buckets, i16 1, <8 x i1> %mask)
+ ret void
+}
+
+define void @histogram_v16i8(<16 x ptr> %buckets, <16 x i1> %mask) {
+; CHECK-LABEL: 'histogram_v16i8'
+; CHECK-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.experimental.vector.histogram.add.v16p0.i8(<16 x ptr> %buckets, i8 1, <16 x i1> %mask)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+; TYPE_BASED_ONLY-LABEL: 'histogram_v16i8'
+; TYPE_BASED_ONLY-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.experimental.vector.histogram.add.v16p0.i8(<16 x ptr> %buckets, i8 1, <16 x i1> %mask)
+; TYPE_BASED_ONLY-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+ call void @llvm.experimental.vector.histogram.add.v16p0.i64(<16 x ptr> %buckets, i8 1, <16 x i1> %mask)
+ ret void
+}
+
+define void @histogram_nxv4i64(<vscale x 4 x ptr> %buckets, <vscale x 4 x i1> %mask) {
+; CHECK-LABEL: 'histogram_nxv4i64'
+; CHECK-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.experimental.vector.histogram.add.nxv4p0.i64(<vscale x 4 x ptr> %buckets, i64 1, <vscale x 4 x i1> %mask)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+; TYPE_BASED_ONLY-LABEL: 'histogram_nxv4i64'
+; TYPE_BASED_ONLY-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.experimental.vector.histogram.add.nxv4p0.i64(<vscale x 4 x ptr> %buckets, i64 1, <vscale x 4 x i1> %mask)
+; TYPE_BASED_ONLY-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+ call void @llvm.experimental.vector.histogram.add.nxv4p0.i64(<vscale x 4 x ptr> %buckets, i64 1, <vscale x 4 x i1> %mask)
+ ret void
+}
+
declare <vscale x 16 x i1> @llvm.get.active.lane.mask.nxv16i1.i64(i64, i64)
declare <vscale x 8 x i1> @llvm.get.active.lane.mask.nxv8i1.i64(i64, i64)
declare <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i64(i64, i64)
@@ -949,3 +1066,4 @@ declare void @llvm.masked.scatter.v1i128.v1p0(<1 x i128> %data, <1 x ptr> %ptrs,
attributes #0 = { "target-features"="+sve,+bf16" }
attributes #1 = { "target-features"="+sve" vscale_range(1,16) }
attributes #2 = { "target-features"="+sve" vscale_range(2, 16) }
+attributes #3 = { "target-features"="+sve,+sve2" vscale_range(1,16) }
More information about the llvm-commits
mailing list