[llvm] [AArch64] Consider histcnt smaller than i32 in the cost model (PR #108521)
via llvm-commits
llvm-commits at lists.llvm.org
Fri Sep 13 02:29:41 PDT 2024
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-aarch64
Author: Sam Tebbs (SamTebbs33)
<details>
<summary>Changes</summary>
This PR updates the AArch64 cost model to consider the cheaper cost of <i32 histograms to reflect the improvements from
https://github.com/llvm/llvm-project/pull/101017 and https://github.com/llvm/llvm-project/pull/103037
Work by Max Beck-Jones (@<!-- -->DevM-uk)
---
Full diff: https://github.com/llvm/llvm-project/pull/108521.diff
2 Files Affected:
- (modified) llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp (+17-11)
- (modified) llvm/test/Analysis/CostModel/AArch64/sve-intrinsics.ll (+9-9)
``````````diff
diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
index 58c267f1ce4bd6..83b5344fc8ed24 100644
--- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
@@ -517,25 +517,31 @@ static bool isUnpackedVectorVT(EVT VecVT) {
static InstructionCost getHistogramCost(const IntrinsicCostAttributes &ICA) {
Type *BucketPtrsTy = ICA.getArgTypes()[0]; // Type of vector of pointers
Type *EltTy = ICA.getArgTypes()[1]; // Type of bucket elements
+ unsigned TotalHistCnts = 1;
- // Only allow (32b and 64b) integers or pointers for now...
+ // Only allow (up to 64b) integers or pointers
if ((!EltTy->isIntegerTy() && !EltTy->isPointerTy()) ||
- (EltTy->getScalarSizeInBits() != 32 &&
- EltTy->getScalarSizeInBits() != 64))
+ EltTy->getScalarSizeInBits() > 64)
return InstructionCost::getInvalid();
- // FIXME: Hacky check for legal vector types. We can promote smaller types
- // but we cannot legalize vectors via splitting for histcnt.
// FIXME: We should be able to generate histcnt for fixed-length vectors
// using ptrue with a specific VL.
- if (VectorType *VTy = dyn_cast<VectorType>(BucketPtrsTy))
- if ((VTy->getElementCount().getKnownMinValue() != 2 &&
- VTy->getElementCount().getKnownMinValue() != 4) ||
- VTy->getPrimitiveSizeInBits().getKnownMinValue() > 128 ||
- !VTy->isScalableTy())
+ if (VectorType *VTy = dyn_cast<VectorType>(BucketPtrsTy)) {
+ unsigned EC = VTy->getElementCount().getKnownMinValue();
+ if (!isPowerOf2_64(EC) || !VTy->isScalableTy())
return InstructionCost::getInvalid();
- return InstructionCost(BaseHistCntCost);
+ bool Element64b = EltTy->isIntegerTy(64);
+
+ if (EC == 2 || (!Element64b && EC == 4))
+ return InstructionCost(BaseHistCntCost);
+
+ unsigned NaturalVectorWidth = Element64b ? AArch64::SVEBitsPerBlock / 64
+ : AArch64::SVEBitsPerBlock / 32;
+ TotalHistCnts = EC / NaturalVectorWidth;
+ }
+
+ return InstructionCost(BaseHistCntCost * TotalHistCnts);
}
InstructionCost
diff --git a/llvm/test/Analysis/CostModel/AArch64/sve-intrinsics.ll b/llvm/test/Analysis/CostModel/AArch64/sve-intrinsics.ll
index aede9c89843128..1ecd02e5c124a6 100644
--- a/llvm/test/Analysis/CostModel/AArch64/sve-intrinsics.ll
+++ b/llvm/test/Analysis/CostModel/AArch64/sve-intrinsics.ll
@@ -971,26 +971,26 @@ define void @histogram_nxv4i32(<vscale x 4 x ptr> %buckets, <vscale x 4 x i1> %m
ret void
}
-define void @histogram_nxv8i16(<vscale x 8 x ptr> %buckets, <vscale x 8 x i1> %mask) {
+define void @histogram_nxv8i16(<vscale x 8 x ptr> %buckets, <vscale x 8 x i1> %mask) #3 {
; CHECK-LABEL: 'histogram_nxv8i16'
-; CHECK-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.experimental.vector.histogram.add.nxv8p0.i16(<vscale x 8 x ptr> %buckets, i16 1, <vscale x 8 x i1> %mask)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.experimental.vector.histogram.add.nxv8p0.i16(<vscale x 8 x ptr> %buckets, i16 1, <vscale x 8 x i1> %mask)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
; TYPE_BASED_ONLY-LABEL: 'histogram_nxv8i16'
-; TYPE_BASED_ONLY-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.experimental.vector.histogram.add.nxv8p0.i16(<vscale x 8 x ptr> %buckets, i16 1, <vscale x 8 x i1> %mask)
+; TYPE_BASED_ONLY-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.experimental.vector.histogram.add.nxv8p0.i16(<vscale x 8 x ptr> %buckets, i16 1, <vscale x 8 x i1> %mask)
; TYPE_BASED_ONLY-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
call void @llvm.experimental.vector.histogram.add.nxv8p0.i16(<vscale x 8 x ptr> %buckets, i16 1, <vscale x 8 x i1> %mask)
ret void
}
-define void @histogram_nxv16i8(<vscale x 16 x ptr> %buckets, <vscale x 16 x i1> %mask) {
+define void @histogram_nxv16i8(<vscale x 16 x ptr> %buckets, <vscale x 16 x i1> %mask) #3 {
; CHECK-LABEL: 'histogram_nxv16i8'
-; CHECK-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.experimental.vector.histogram.add.nxv16p0.i8(<vscale x 16 x ptr> %buckets, i8 1, <vscale x 16 x i1> %mask)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.experimental.vector.histogram.add.nxv16p0.i8(<vscale x 16 x ptr> %buckets, i8 1, <vscale x 16 x i1> %mask)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
; TYPE_BASED_ONLY-LABEL: 'histogram_nxv16i8'
-; TYPE_BASED_ONLY-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.experimental.vector.histogram.add.nxv16p0.i8(<vscale x 16 x ptr> %buckets, i8 1, <vscale x 16 x i1> %mask)
+; TYPE_BASED_ONLY-NEXT: Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.experimental.vector.histogram.add.nxv16p0.i8(<vscale x 16 x ptr> %buckets, i8 1, <vscale x 16 x i1> %mask)
; TYPE_BASED_ONLY-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
call void @llvm.experimental.vector.histogram.add.nxv16p0.i64(<vscale x 16 x ptr> %buckets, i8 1, <vscale x 16 x i1> %mask)
@@ -1049,13 +1049,13 @@ define void @histogram_v16i8(<16 x ptr> %buckets, <16 x i1> %mask) {
ret void
}
-define void @histogram_nxv4i64(<vscale x 4 x ptr> %buckets, <vscale x 4 x i1> %mask) {
+define void @histogram_nxv4i64(<vscale x 4 x ptr> %buckets, <vscale x 4 x i1> %mask) #3 {
; CHECK-LABEL: 'histogram_nxv4i64'
-; CHECK-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.experimental.vector.histogram.add.nxv4p0.i64(<vscale x 4 x ptr> %buckets, i64 1, <vscale x 4 x i1> %mask)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.experimental.vector.histogram.add.nxv4p0.i64(<vscale x 4 x ptr> %buckets, i64 1, <vscale x 4 x i1> %mask)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
; TYPE_BASED_ONLY-LABEL: 'histogram_nxv4i64'
-; TYPE_BASED_ONLY-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.experimental.vector.histogram.add.nxv4p0.i64(<vscale x 4 x ptr> %buckets, i64 1, <vscale x 4 x i1> %mask)
+; TYPE_BASED_ONLY-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.experimental.vector.histogram.add.nxv4p0.i64(<vscale x 4 x ptr> %buckets, i64 1, <vscale x 4 x i1> %mask)
; TYPE_BASED_ONLY-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
call void @llvm.experimental.vector.histogram.add.nxv4p0.i64(<vscale x 4 x ptr> %buckets, i64 1, <vscale x 4 x i1> %mask)
``````````
</details>
https://github.com/llvm/llvm-project/pull/108521
More information about the llvm-commits
mailing list