[llvm] [CostModel] Provide a default model for histogram intrinsics (PR #149348)

Graham Hunter via llvm-commits llvm-commits at lists.llvm.org
Thu Jul 17 09:12:32 PDT 2025


https://github.com/huntergr-arm created https://github.com/llvm/llvm-project/pull/149348

Since we scalarize these intrinsics when the target does not support them, we should model that for costing purposes.


>From dd851043bc3e251ba5f8bbccaefba9fa99999ecd Mon Sep 17 00:00:00 2001
From: Graham Hunter <graham.hunter at arm.com>
Date: Thu, 17 Jul 2025 13:33:17 +0000
Subject: [PATCH 1/3] Test precommit

---
 .../Analysis/CostModel/AArch64/histograms.ll  | 142 ++++++++++++++++++
 1 file changed, 142 insertions(+)
 create mode 100644 llvm/test/Analysis/CostModel/AArch64/histograms.ll

diff --git a/llvm/test/Analysis/CostModel/AArch64/histograms.ll b/llvm/test/Analysis/CostModel/AArch64/histograms.ll
new file mode 100644
index 0000000000000..1c21a850b3634
--- /dev/null
+++ b/llvm/test/Analysis/CostModel/AArch64/histograms.ll
@@ -0,0 +1,142 @@
+; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --version 5
+; RUN: opt < %s -passes="print<cost-model>" -cost-kind=throughput 2>&1 -disable-output -S -mtriple=aarch64--linux-gnu -mattr=+sve | FileCheck %s --check-prefix=CHECK-SVE
+; RUN: opt < %s -passes="print<cost-model>" -cost-kind=throughput 2>&1 -disable-output -S -mtriple=aarch64--linux-gnu -mattr=+sve2 | FileCheck %s --check-prefix=CHECK-SVE2
+
+define void @histograms() {
+; CHECK-SVE-LABEL: 'histograms'
+; CHECK-SVE-NEXT:  Cost Model: Invalid cost for instruction: call void @llvm.experimental.vector.histogram.add.nxv2p0.i64(<vscale x 2 x ptr> poison, i64 1, <vscale x 2 x i1> poison)
+; CHECK-SVE-NEXT:  Cost Model: Invalid cost for instruction: call void @llvm.experimental.vector.histogram.add.nxv4p0.i32(<vscale x 4 x ptr> poison, i32 1, <vscale x 4 x i1> poison)
+; CHECK-SVE-NEXT:  Cost Model: Invalid cost for instruction: call void @llvm.experimental.vector.histogram.add.nxv8p0.i16(<vscale x 8 x ptr> poison, i16 1, <vscale x 8 x i1> poison)
+; CHECK-SVE-NEXT:  Cost Model: Invalid cost for instruction: call void @llvm.experimental.vector.histogram.add.nxv16p0.i8(<vscale x 16 x ptr> poison, i8 1, <vscale x 16 x i1> poison)
+; CHECK-SVE-NEXT:  Cost Model: Invalid cost for instruction: call void @llvm.experimental.vector.histogram.add.v2p0.i64(<2 x ptr> poison, i64 1, <2 x i1> poison)
+; CHECK-SVE-NEXT:  Cost Model: Invalid cost for instruction: call void @llvm.experimental.vector.histogram.add.v4p0.i32(<4 x ptr> poison, i32 1, <4 x i1> poison)
+; CHECK-SVE-NEXT:  Cost Model: Invalid cost for instruction: call void @llvm.experimental.vector.histogram.add.v8p0.i16(<8 x ptr> poison, i16 1, <8 x i1> poison)
+; CHECK-SVE-NEXT:  Cost Model: Invalid cost for instruction: call void @llvm.experimental.vector.histogram.add.v16p0.i8(<16 x ptr> poison, i8 1, <16 x i1> poison)
+; CHECK-SVE-NEXT:  Cost Model: Invalid cost for instruction: call void @llvm.experimental.vector.histogram.uadd.sat.nxv2p0.i64(<vscale x 2 x ptr> poison, i64 1, <vscale x 2 x i1> poison)
+; CHECK-SVE-NEXT:  Cost Model: Invalid cost for instruction: call void @llvm.experimental.vector.histogram.uadd.sat.nxv4p0.i32(<vscale x 4 x ptr> poison, i32 1, <vscale x 4 x i1> poison)
+; CHECK-SVE-NEXT:  Cost Model: Invalid cost for instruction: call void @llvm.experimental.vector.histogram.uadd.sat.nxv8p0.i16(<vscale x 8 x ptr> poison, i16 1, <vscale x 8 x i1> poison)
+; CHECK-SVE-NEXT:  Cost Model: Invalid cost for instruction: call void @llvm.experimental.vector.histogram.uadd.sat.nxv16p0.i8(<vscale x 16 x ptr> poison, i8 1, <vscale x 16 x i1> poison)
+; CHECK-SVE-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: call void @llvm.experimental.vector.histogram.uadd.sat.v2p0.i64(<2 x ptr> poison, i64 1, <2 x i1> poison)
+; CHECK-SVE-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: call void @llvm.experimental.vector.histogram.uadd.sat.v4p0.i32(<4 x ptr> poison, i32 1, <4 x i1> poison)
+; CHECK-SVE-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: call void @llvm.experimental.vector.histogram.uadd.sat.v8p0.i16(<8 x ptr> poison, i16 1, <8 x i1> poison)
+; CHECK-SVE-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: call void @llvm.experimental.vector.histogram.uadd.sat.v16p0.i8(<16 x ptr> poison, i8 1, <16 x i1> poison)
+; CHECK-SVE-NEXT:  Cost Model: Invalid cost for instruction: call void @llvm.experimental.vector.histogram.umax.nxv2p0.i64(<vscale x 2 x ptr> poison, i64 1, <vscale x 2 x i1> poison)
+; CHECK-SVE-NEXT:  Cost Model: Invalid cost for instruction: call void @llvm.experimental.vector.histogram.umax.nxv4p0.i32(<vscale x 4 x ptr> poison, i32 1, <vscale x 4 x i1> poison)
+; CHECK-SVE-NEXT:  Cost Model: Invalid cost for instruction: call void @llvm.experimental.vector.histogram.umax.nxv8p0.i16(<vscale x 8 x ptr> poison, i16 1, <vscale x 8 x i1> poison)
+; CHECK-SVE-NEXT:  Cost Model: Invalid cost for instruction: call void @llvm.experimental.vector.histogram.umax.nxv16p0.i8(<vscale x 16 x ptr> poison, i8 1, <vscale x 16 x i1> poison)
+; CHECK-SVE-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: call void @llvm.experimental.vector.histogram.umax.v2p0.i64(<2 x ptr> poison, i64 1, <2 x i1> poison)
+; CHECK-SVE-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: call void @llvm.experimental.vector.histogram.umax.v4p0.i32(<4 x ptr> poison, i32 1, <4 x i1> poison)
+; CHECK-SVE-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: call void @llvm.experimental.vector.histogram.umax.v8p0.i16(<8 x ptr> poison, i16 1, <8 x i1> poison)
+; CHECK-SVE-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: call void @llvm.experimental.vector.histogram.umax.v16p0.i8(<16 x ptr> poison, i8 1, <16 x i1> poison)
+; CHECK-SVE-NEXT:  Cost Model: Invalid cost for instruction: call void @llvm.experimental.vector.histogram.umin.nxv2p0.i64(<vscale x 2 x ptr> poison, i64 1, <vscale x 2 x i1> poison)
+; CHECK-SVE-NEXT:  Cost Model: Invalid cost for instruction: call void @llvm.experimental.vector.histogram.umin.nxv4p0.i32(<vscale x 4 x ptr> poison, i32 1, <vscale x 4 x i1> poison)
+; CHECK-SVE-NEXT:  Cost Model: Invalid cost for instruction: call void @llvm.experimental.vector.histogram.umin.nxv8p0.i16(<vscale x 8 x ptr> poison, i16 1, <vscale x 8 x i1> poison)
+; CHECK-SVE-NEXT:  Cost Model: Invalid cost for instruction: call void @llvm.experimental.vector.histogram.umin.nxv16p0.i8(<vscale x 16 x ptr> poison, i8 1, <vscale x 16 x i1> poison)
+; CHECK-SVE-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: call void @llvm.experimental.vector.histogram.umin.v2p0.i64(<2 x ptr> poison, i64 1, <2 x i1> poison)
+; CHECK-SVE-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: call void @llvm.experimental.vector.histogram.umin.v4p0.i32(<4 x ptr> poison, i32 1, <4 x i1> poison)
+; CHECK-SVE-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: call void @llvm.experimental.vector.histogram.umin.v8p0.i16(<8 x ptr> poison, i16 1, <8 x i1> poison)
+; CHECK-SVE-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: call void @llvm.experimental.vector.histogram.umin.v16p0.i8(<16 x ptr> poison, i8 1, <16 x i1> poison)
+; CHECK-SVE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+; CHECK-SVE2-LABEL: 'histograms'
+; CHECK-SVE2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.experimental.vector.histogram.add.nxv2p0.i64(<vscale x 2 x ptr> poison, i64 1, <vscale x 2 x i1> poison)
+; CHECK-SVE2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.experimental.vector.histogram.add.nxv4p0.i32(<vscale x 4 x ptr> poison, i32 1, <vscale x 4 x i1> poison)
+; CHECK-SVE2-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.experimental.vector.histogram.add.nxv8p0.i16(<vscale x 8 x ptr> poison, i16 1, <vscale x 8 x i1> poison)
+; CHECK-SVE2-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.experimental.vector.histogram.add.nxv16p0.i8(<vscale x 16 x ptr> poison, i8 1, <vscale x 16 x i1> poison)
+; CHECK-SVE2-NEXT:  Cost Model: Invalid cost for instruction: call void @llvm.experimental.vector.histogram.add.v2p0.i64(<2 x ptr> poison, i64 1, <2 x i1> poison)
+; CHECK-SVE2-NEXT:  Cost Model: Invalid cost for instruction: call void @llvm.experimental.vector.histogram.add.v4p0.i32(<4 x ptr> poison, i32 1, <4 x i1> poison)
+; CHECK-SVE2-NEXT:  Cost Model: Invalid cost for instruction: call void @llvm.experimental.vector.histogram.add.v8p0.i16(<8 x ptr> poison, i16 1, <8 x i1> poison)
+; CHECK-SVE2-NEXT:  Cost Model: Invalid cost for instruction: call void @llvm.experimental.vector.histogram.add.v16p0.i8(<16 x ptr> poison, i8 1, <16 x i1> poison)
+; CHECK-SVE2-NEXT:  Cost Model: Invalid cost for instruction: call void @llvm.experimental.vector.histogram.uadd.sat.nxv2p0.i64(<vscale x 2 x ptr> poison, i64 1, <vscale x 2 x i1> poison)
+; CHECK-SVE2-NEXT:  Cost Model: Invalid cost for instruction: call void @llvm.experimental.vector.histogram.uadd.sat.nxv4p0.i32(<vscale x 4 x ptr> poison, i32 1, <vscale x 4 x i1> poison)
+; CHECK-SVE2-NEXT:  Cost Model: Invalid cost for instruction: call void @llvm.experimental.vector.histogram.uadd.sat.nxv8p0.i16(<vscale x 8 x ptr> poison, i16 1, <vscale x 8 x i1> poison)
+; CHECK-SVE2-NEXT:  Cost Model: Invalid cost for instruction: call void @llvm.experimental.vector.histogram.uadd.sat.nxv16p0.i8(<vscale x 16 x ptr> poison, i8 1, <vscale x 16 x i1> poison)
+; CHECK-SVE2-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: call void @llvm.experimental.vector.histogram.uadd.sat.v2p0.i64(<2 x ptr> poison, i64 1, <2 x i1> poison)
+; CHECK-SVE2-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: call void @llvm.experimental.vector.histogram.uadd.sat.v4p0.i32(<4 x ptr> poison, i32 1, <4 x i1> poison)
+; CHECK-SVE2-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: call void @llvm.experimental.vector.histogram.uadd.sat.v8p0.i16(<8 x ptr> poison, i16 1, <8 x i1> poison)
+; CHECK-SVE2-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: call void @llvm.experimental.vector.histogram.uadd.sat.v16p0.i8(<16 x ptr> poison, i8 1, <16 x i1> poison)
+; CHECK-SVE2-NEXT:  Cost Model: Invalid cost for instruction: call void @llvm.experimental.vector.histogram.umax.nxv2p0.i64(<vscale x 2 x ptr> poison, i64 1, <vscale x 2 x i1> poison)
+; CHECK-SVE2-NEXT:  Cost Model: Invalid cost for instruction: call void @llvm.experimental.vector.histogram.umax.nxv4p0.i32(<vscale x 4 x ptr> poison, i32 1, <vscale x 4 x i1> poison)
+; CHECK-SVE2-NEXT:  Cost Model: Invalid cost for instruction: call void @llvm.experimental.vector.histogram.umax.nxv8p0.i16(<vscale x 8 x ptr> poison, i16 1, <vscale x 8 x i1> poison)
+; CHECK-SVE2-NEXT:  Cost Model: Invalid cost for instruction: call void @llvm.experimental.vector.histogram.umax.nxv16p0.i8(<vscale x 16 x ptr> poison, i8 1, <vscale x 16 x i1> poison)
+; CHECK-SVE2-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: call void @llvm.experimental.vector.histogram.umax.v2p0.i64(<2 x ptr> poison, i64 1, <2 x i1> poison)
+; CHECK-SVE2-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: call void @llvm.experimental.vector.histogram.umax.v4p0.i32(<4 x ptr> poison, i32 1, <4 x i1> poison)
+; CHECK-SVE2-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: call void @llvm.experimental.vector.histogram.umax.v8p0.i16(<8 x ptr> poison, i16 1, <8 x i1> poison)
+; CHECK-SVE2-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: call void @llvm.experimental.vector.histogram.umax.v16p0.i8(<16 x ptr> poison, i8 1, <16 x i1> poison)
+; CHECK-SVE2-NEXT:  Cost Model: Invalid cost for instruction: call void @llvm.experimental.vector.histogram.umin.nxv2p0.i64(<vscale x 2 x ptr> poison, i64 1, <vscale x 2 x i1> poison)
+; CHECK-SVE2-NEXT:  Cost Model: Invalid cost for instruction: call void @llvm.experimental.vector.histogram.umin.nxv4p0.i32(<vscale x 4 x ptr> poison, i32 1, <vscale x 4 x i1> poison)
+; CHECK-SVE2-NEXT:  Cost Model: Invalid cost for instruction: call void @llvm.experimental.vector.histogram.umin.nxv8p0.i16(<vscale x 8 x ptr> poison, i16 1, <vscale x 8 x i1> poison)
+; CHECK-SVE2-NEXT:  Cost Model: Invalid cost for instruction: call void @llvm.experimental.vector.histogram.umin.nxv16p0.i8(<vscale x 16 x ptr> poison, i8 1, <vscale x 16 x i1> poison)
+; CHECK-SVE2-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: call void @llvm.experimental.vector.histogram.umin.v2p0.i64(<2 x ptr> poison, i64 1, <2 x i1> poison)
+; CHECK-SVE2-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: call void @llvm.experimental.vector.histogram.umin.v4p0.i32(<4 x ptr> poison, i32 1, <4 x i1> poison)
+; CHECK-SVE2-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: call void @llvm.experimental.vector.histogram.umin.v8p0.i16(<8 x ptr> poison, i16 1, <8 x i1> poison)
+; CHECK-SVE2-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: call void @llvm.experimental.vector.histogram.umin.v16p0.i8(<16 x ptr> poison, i8 1, <16 x i1> poison)
+; CHECK-SVE2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+  call void @llvm.experimental.vector.histogram.add.nxv2p0.i64(<vscale x 2 x ptr> poison, i64 1, <vscale x 2 x i1> poison)
+  call void @llvm.experimental.vector.histogram.add.nxv4p0.i32(<vscale x 4 x ptr> poison, i32 1, <vscale x 4 x i1> poison)
+  call void @llvm.experimental.vector.histogram.add.nxv8p0.i16(<vscale x 8 x ptr> poison, i16 1, <vscale x 8 x i1> poison)
+  call void @llvm.experimental.vector.histogram.add.nxv16p0.i8(<vscale x 16 x ptr> poison, i8 1, <vscale x 16 x i1> poison)
+  call void @llvm.experimental.vector.histogram.add.v2p0.i64(<2 x ptr> poison, i64 1, <2 x i1> poison)
+  call void @llvm.experimental.vector.histogram.add.v4p0.i32(<4 x ptr> poison, i32 1, <4 x i1> poison)
+  call void @llvm.experimental.vector.histogram.add.v8p0.i16(<8 x ptr> poison, i16 1, <8 x i1> poison)
+  call void @llvm.experimental.vector.histogram.add.v16p0.i8(<16 x ptr> poison, i8 1, <16 x i1> poison)
+  call void @llvm.experimental.vector.histogram.uadd.sat.nxv2p0.i64(<vscale x 2 x ptr> poison, i64 1, <vscale x 2 x i1> poison)
+  call void @llvm.experimental.vector.histogram.uadd.sat.nxv4p0.i32(<vscale x 4 x ptr> poison, i32 1, <vscale x 4 x i1> poison)
+  call void @llvm.experimental.vector.histogram.uadd.sat.nxv8p0.i16(<vscale x 8 x ptr> poison, i16 1, <vscale x 8 x i1> poison)
+  call void @llvm.experimental.vector.histogram.uadd.sat.nxv16p0.i8(<vscale x 16 x ptr> poison, i8 1, <vscale x 16 x i1> poison)
+  call void @llvm.experimental.vector.histogram.uadd.sat.v2p0.i64(<2 x ptr> poison, i64 1, <2 x i1> poison)
+  call void @llvm.experimental.vector.histogram.uadd.sat.v4p0.i32(<4 x ptr> poison, i32 1, <4 x i1> poison)
+  call void @llvm.experimental.vector.histogram.uadd.sat.v8p0.i16(<8 x ptr> poison, i16 1, <8 x i1> poison)
+  call void @llvm.experimental.vector.histogram.uadd.sat.v16p0.i8(<16 x ptr> poison, i8 1, <16 x i1> poison)
+  call void @llvm.experimental.vector.histogram.umax.nxv2p0.i64(<vscale x 2 x ptr> poison, i64 1, <vscale x 2 x i1> poison)
+  call void @llvm.experimental.vector.histogram.umax.nxv4p0.i32(<vscale x 4 x ptr> poison, i32 1, <vscale x 4 x i1> poison)
+  call void @llvm.experimental.vector.histogram.umax.nxv8p0.i16(<vscale x 8 x ptr> poison, i16 1, <vscale x 8 x i1> poison)
+  call void @llvm.experimental.vector.histogram.umax.nxv16p0.i8(<vscale x 16 x ptr> poison, i8 1, <vscale x 16 x i1> poison)
+  call void @llvm.experimental.vector.histogram.umax.v2p0.i64(<2 x ptr> poison, i64 1, <2 x i1> poison)
+  call void @llvm.experimental.vector.histogram.umax.v4p0.i32(<4 x ptr> poison, i32 1, <4 x i1> poison)
+  call void @llvm.experimental.vector.histogram.umax.v8p0.i16(<8 x ptr> poison, i16 1, <8 x i1> poison)
+  call void @llvm.experimental.vector.histogram.umax.v16p0.i8(<16 x ptr> poison, i8 1, <16 x i1> poison)
+  call void @llvm.experimental.vector.histogram.umin.nxv2p0.i64(<vscale x 2 x ptr> poison, i64 1, <vscale x 2 x i1> poison)
+  call void @llvm.experimental.vector.histogram.umin.nxv4p0.i32(<vscale x 4 x ptr> poison, i32 1, <vscale x 4 x i1> poison)
+  call void @llvm.experimental.vector.histogram.umin.nxv8p0.i16(<vscale x 8 x ptr> poison, i16 1, <vscale x 8 x i1> poison)
+  call void @llvm.experimental.vector.histogram.umin.nxv16p0.i8(<vscale x 16 x ptr> poison, i8 1, <vscale x 16 x i1> poison)
+  call void @llvm.experimental.vector.histogram.umin.v2p0.i64(<2 x ptr> poison, i64 1, <2 x i1> poison)
+  call void @llvm.experimental.vector.histogram.umin.v4p0.i32(<4 x ptr> poison, i32 1, <4 x i1> poison)
+  call void @llvm.experimental.vector.histogram.umin.v8p0.i16(<8 x ptr> poison, i16 1, <8 x i1> poison)
+  call void @llvm.experimental.vector.histogram.umin.v16p0.i8(<16 x ptr> poison, i8 1, <16 x i1> poison)
+  ret void
+}
+
+declare void @llvm.experimental.vector.histogram.add.nxv2p0.i64(<vscale x 2 x ptr>, i64, <vscale x 2 x i1>)
+declare void @llvm.experimental.vector.histogram.add.nxv4p0.i32(<vscale x 4 x ptr>, i32, <vscale x 4 x i1>)
+declare void @llvm.experimental.vector.histogram.add.nxv8p0.i16(<vscale x 8 x ptr>, i16, <vscale x 8 x i1>)
+declare void @llvm.experimental.vector.histogram.add.nxv16p0.i8(<vscale x 16 x ptr>, i8, <vscale x 16 x i1>)
+declare void @llvm.experimental.vector.histogram.add.v2p0.i64(<2 x ptr>, i64, <2 x i1>)
+declare void @llvm.experimental.vector.histogram.add.v4p0.i32(<4 x ptr>, i32, <4 x i1>)
+declare void @llvm.experimental.vector.histogram.add.v8p0.i16(<8 x ptr>, i16, <8 x i1>)
+declare void @llvm.experimental.vector.histogram.add.v16p0.i8(<16 x ptr>, i8, <16 x i1>)
+declare void @llvm.experimental.vector.histogram.uadd.sat.nxv2p0.i64(<vscale x 2 x ptr>, i64, <vscale x 2 x i1>)
+declare void @llvm.experimental.vector.histogram.uadd.sat.nxv4p0.i32(<vscale x 4 x ptr>, i32, <vscale x 4 x i1>)
+declare void @llvm.experimental.vector.histogram.uadd.sat.nxv8p0.i16(<vscale x 8 x ptr>, i16, <vscale x 8 x i1>)
+declare void @llvm.experimental.vector.histogram.uadd.sat.nxv16p0.i8(<vscale x 16 x ptr>, i8, <vscale x 16 x i1>)
+declare void @llvm.experimental.vector.histogram.uadd.sat.v2p0.i64(<2 x ptr>, i64, <2 x i1>)
+declare void @llvm.experimental.vector.histogram.uadd.sat.v4p0.i32(<4 x ptr>, i32, <4 x i1>)
+declare void @llvm.experimental.vector.histogram.uadd.sat.v8p0.i16(<8 x ptr>, i16, <8 x i1>)
+declare void @llvm.experimental.vector.histogram.uadd.sat.v16p0.i8(<16 x ptr>, i8, <16 x i1>)
+declare void @llvm.experimental.vector.histogram.umax.nxv2p0.i64(<vscale x 2 x ptr>, i64, <vscale x 2 x i1>)
+declare void @llvm.experimental.vector.histogram.umax.nxv4p0.i32(<vscale x 4 x ptr>, i32, <vscale x 4 x i1>)
+declare void @llvm.experimental.vector.histogram.umax.nxv8p0.i16(<vscale x 8 x ptr>, i16, <vscale x 8 x i1>)
+declare void @llvm.experimental.vector.histogram.umax.nxv16p0.i8(<vscale x 16 x ptr>, i8, <vscale x 16 x i1>)
+declare void @llvm.experimental.vector.histogram.umax.v2p0.i64(<2 x ptr>, i64, <2 x i1>)
+declare void @llvm.experimental.vector.histogram.umax.v4p0.i32(<4 x ptr>, i32, <4 x i1>)
+declare void @llvm.experimental.vector.histogram.umax.v8p0.i16(<8 x ptr>, i16, <8 x i1>)
+declare void @llvm.experimental.vector.histogram.umax.v16p0.i8(<16 x ptr>, i8, <16 x i1>)
+declare void @llvm.experimental.vector.histogram.umin.nxv2p0.i64(<vscale x 2 x ptr>, i64, <vscale x 2 x i1>)
+declare void @llvm.experimental.vector.histogram.umin.nxv4p0.i32(<vscale x 4 x ptr>, i32, <vscale x 4 x i1>)
+declare void @llvm.experimental.vector.histogram.umin.nxv8p0.i16(<vscale x 8 x ptr>, i16, <vscale x 8 x i1>)
+declare void @llvm.experimental.vector.histogram.umin.nxv16p0.i8(<vscale x 16 x ptr>, i8, <vscale x 16 x i1>)
+declare void @llvm.experimental.vector.histogram.umin.v2p0.i64(<2 x ptr>, i64, <2 x i1>)
+declare void @llvm.experimental.vector.histogram.umin.v4p0.i32(<4 x ptr>, i32, <4 x i1>)
+declare void @llvm.experimental.vector.histogram.umin.v8p0.i16(<8 x ptr>, i16, <8 x i1>)
+declare void @llvm.experimental.vector.histogram.umin.v16p0.i8(<16 x ptr>, i8, <16 x i1>)

>From 7681d5b7b39f72d9d240e86950db4024b2d8f58a Mon Sep 17 00:00:00 2001
From: Graham Hunter <graham.hunter at arm.com>
Date: Thu, 17 Jul 2025 14:15:13 +0000
Subject: [PATCH 2/3] Only return a cost estimate for scalable types in AArch64
 TTI

---
 .../AArch64/AArch64TargetTransformInfo.cpp    | 27 ++++++++++++++-----
 .../Analysis/CostModel/AArch64/histograms.ll  | 16 +++++------
 2 files changed, 29 insertions(+), 14 deletions(-)

diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
index 90d3d92d6bbf5..5b29da919c6da 100644
--- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
@@ -547,7 +547,17 @@ static bool isUnpackedVectorVT(EVT VecVT) {
          VecVT.getSizeInBits().getKnownMinValue() < AArch64::SVEBitsPerBlock;
 }
 
-static InstructionCost getHistogramCost(const IntrinsicCostAttributes &ICA) {
+static InstructionCost getHistogramCost(const AArch64Subtarget *ST,
+                                        const IntrinsicCostAttributes &ICA) {
+  // We need to know at least the number of elements in the vector of buckets
+  // and the size of each element to update.
+  if (ICA.getArgTypes().size() < 2)
+    return InstructionCost::getInvalid();
+
+  // Only interested in costing for the hardware instruction from SVE2.
+  if (!ST->hasSVE2())
+    return InstructionCost::getInvalid();
+
   Type *BucketPtrsTy = ICA.getArgTypes()[0]; // Type of vector of pointers
   Type *EltTy = ICA.getArgTypes()[1];        // Type of bucket elements
   unsigned TotalHistCnts = 1;
@@ -572,9 +582,11 @@ static InstructionCost getHistogramCost(const IntrinsicCostAttributes &ICA) {
 
     unsigned NaturalVectorWidth = AArch64::SVEBitsPerBlock / LegalEltSize;
     TotalHistCnts = EC / NaturalVectorWidth;
+
+    return InstructionCost(BaseHistCntCost * TotalHistCnts);
   }
 
-  return InstructionCost(BaseHistCntCost * TotalHistCnts);
+  return InstructionCost::getInvalid();
 }
 
 InstructionCost
@@ -590,10 +602,13 @@ AArch64TTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
       return InstructionCost::getInvalid();
 
   switch (ICA.getID()) {
-  case Intrinsic::experimental_vector_histogram_add:
-    if (!ST->hasSVE2())
-      return InstructionCost::getInvalid();
-    return getHistogramCost(ICA);
+  case Intrinsic::experimental_vector_histogram_add: {
+    InstructionCost HistCost = getHistogramCost(ST, ICA);
+    // If the cost isn't valid, we may still be able to scalarize
+    if (HistCost.isValid())
+      return HistCost;
+    break;
+  }
   case Intrinsic::umin:
   case Intrinsic::umax:
   case Intrinsic::smin:
diff --git a/llvm/test/Analysis/CostModel/AArch64/histograms.ll b/llvm/test/Analysis/CostModel/AArch64/histograms.ll
index 1c21a850b3634..455eed9977372 100644
--- a/llvm/test/Analysis/CostModel/AArch64/histograms.ll
+++ b/llvm/test/Analysis/CostModel/AArch64/histograms.ll
@@ -8,10 +8,10 @@ define void @histograms() {
 ; CHECK-SVE-NEXT:  Cost Model: Invalid cost for instruction: call void @llvm.experimental.vector.histogram.add.nxv4p0.i32(<vscale x 4 x ptr> poison, i32 1, <vscale x 4 x i1> poison)
 ; CHECK-SVE-NEXT:  Cost Model: Invalid cost for instruction: call void @llvm.experimental.vector.histogram.add.nxv8p0.i16(<vscale x 8 x ptr> poison, i16 1, <vscale x 8 x i1> poison)
 ; CHECK-SVE-NEXT:  Cost Model: Invalid cost for instruction: call void @llvm.experimental.vector.histogram.add.nxv16p0.i8(<vscale x 16 x ptr> poison, i8 1, <vscale x 16 x i1> poison)
-; CHECK-SVE-NEXT:  Cost Model: Invalid cost for instruction: call void @llvm.experimental.vector.histogram.add.v2p0.i64(<2 x ptr> poison, i64 1, <2 x i1> poison)
-; CHECK-SVE-NEXT:  Cost Model: Invalid cost for instruction: call void @llvm.experimental.vector.histogram.add.v4p0.i32(<4 x ptr> poison, i32 1, <4 x i1> poison)
-; CHECK-SVE-NEXT:  Cost Model: Invalid cost for instruction: call void @llvm.experimental.vector.histogram.add.v8p0.i16(<8 x ptr> poison, i16 1, <8 x i1> poison)
-; CHECK-SVE-NEXT:  Cost Model: Invalid cost for instruction: call void @llvm.experimental.vector.histogram.add.v16p0.i8(<16 x ptr> poison, i8 1, <16 x i1> poison)
+; CHECK-SVE-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: call void @llvm.experimental.vector.histogram.add.v2p0.i64(<2 x ptr> poison, i64 1, <2 x i1> poison)
+; CHECK-SVE-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: call void @llvm.experimental.vector.histogram.add.v4p0.i32(<4 x ptr> poison, i32 1, <4 x i1> poison)
+; CHECK-SVE-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: call void @llvm.experimental.vector.histogram.add.v8p0.i16(<8 x ptr> poison, i16 1, <8 x i1> poison)
+; CHECK-SVE-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: call void @llvm.experimental.vector.histogram.add.v16p0.i8(<16 x ptr> poison, i8 1, <16 x i1> poison)
 ; CHECK-SVE-NEXT:  Cost Model: Invalid cost for instruction: call void @llvm.experimental.vector.histogram.uadd.sat.nxv2p0.i64(<vscale x 2 x ptr> poison, i64 1, <vscale x 2 x i1> poison)
 ; CHECK-SVE-NEXT:  Cost Model: Invalid cost for instruction: call void @llvm.experimental.vector.histogram.uadd.sat.nxv4p0.i32(<vscale x 4 x ptr> poison, i32 1, <vscale x 4 x i1> poison)
 ; CHECK-SVE-NEXT:  Cost Model: Invalid cost for instruction: call void @llvm.experimental.vector.histogram.uadd.sat.nxv8p0.i16(<vscale x 8 x ptr> poison, i16 1, <vscale x 8 x i1> poison)
@@ -43,10 +43,10 @@ define void @histograms() {
 ; CHECK-SVE2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.experimental.vector.histogram.add.nxv4p0.i32(<vscale x 4 x ptr> poison, i32 1, <vscale x 4 x i1> poison)
 ; CHECK-SVE2-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.experimental.vector.histogram.add.nxv8p0.i16(<vscale x 8 x ptr> poison, i16 1, <vscale x 8 x i1> poison)
 ; CHECK-SVE2-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.experimental.vector.histogram.add.nxv16p0.i8(<vscale x 16 x ptr> poison, i8 1, <vscale x 16 x i1> poison)
-; CHECK-SVE2-NEXT:  Cost Model: Invalid cost for instruction: call void @llvm.experimental.vector.histogram.add.v2p0.i64(<2 x ptr> poison, i64 1, <2 x i1> poison)
-; CHECK-SVE2-NEXT:  Cost Model: Invalid cost for instruction: call void @llvm.experimental.vector.histogram.add.v4p0.i32(<4 x ptr> poison, i32 1, <4 x i1> poison)
-; CHECK-SVE2-NEXT:  Cost Model: Invalid cost for instruction: call void @llvm.experimental.vector.histogram.add.v8p0.i16(<8 x ptr> poison, i16 1, <8 x i1> poison)
-; CHECK-SVE2-NEXT:  Cost Model: Invalid cost for instruction: call void @llvm.experimental.vector.histogram.add.v16p0.i8(<16 x ptr> poison, i8 1, <16 x i1> poison)
+; CHECK-SVE2-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: call void @llvm.experimental.vector.histogram.add.v2p0.i64(<2 x ptr> poison, i64 1, <2 x i1> poison)
+; CHECK-SVE2-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: call void @llvm.experimental.vector.histogram.add.v4p0.i32(<4 x ptr> poison, i32 1, <4 x i1> poison)
+; CHECK-SVE2-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: call void @llvm.experimental.vector.histogram.add.v8p0.i16(<8 x ptr> poison, i16 1, <8 x i1> poison)
+; CHECK-SVE2-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: call void @llvm.experimental.vector.histogram.add.v16p0.i8(<16 x ptr> poison, i8 1, <16 x i1> poison)
 ; CHECK-SVE2-NEXT:  Cost Model: Invalid cost for instruction: call void @llvm.experimental.vector.histogram.uadd.sat.nxv2p0.i64(<vscale x 2 x ptr> poison, i64 1, <vscale x 2 x i1> poison)
 ; CHECK-SVE2-NEXT:  Cost Model: Invalid cost for instruction: call void @llvm.experimental.vector.histogram.uadd.sat.nxv4p0.i32(<vscale x 4 x ptr> poison, i32 1, <vscale x 4 x i1> poison)
 ; CHECK-SVE2-NEXT:  Cost Model: Invalid cost for instruction: call void @llvm.experimental.vector.histogram.uadd.sat.nxv8p0.i16(<vscale x 8 x ptr> poison, i16 1, <vscale x 8 x i1> poison)

>From fddc3b38fa96aa7230672b37c8357ce0bd85bf46 Mon Sep 17 00:00:00 2001
From: Graham Hunter <graham.hunter at arm.com>
Date: Thu, 17 Jul 2025 15:30:35 +0000
Subject: [PATCH 3/3] Add BasicTTIImplBase implementation for histogram ops

---
 .../llvm/Analysis/TargetTransformInfoImpl.h   |  3 -
 llvm/include/llvm/CodeGen/BasicTTIImpl.h      | 46 ++++++++++
 .../Analysis/CostModel/AArch64/histograms.ll  | 84 +++++++++++++------
 .../CostModel/AArch64/sve-intrinsics.ll       | 24 +++---
 .../Transforms/LoopVectorize/histograms.ll    | 37 ++++++--
 5 files changed, 148 insertions(+), 46 deletions(-)

diff --git a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
index ddc8a5eaffa94..a56afe05f66f6 100644
--- a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
+++ b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
@@ -878,9 +878,6 @@ class TargetTransformInfoImplBase {
     switch (ICA.getID()) {
     default:
       break;
-    case Intrinsic::experimental_vector_histogram_add:
-      // For now, we want explicit support from the target for histograms.
-      return InstructionCost::getInvalid();
     case Intrinsic::allow_runtime_check:
     case Intrinsic::allow_ubsan_check:
     case Intrinsic::annotation:
diff --git a/llvm/include/llvm/CodeGen/BasicTTIImpl.h b/llvm/include/llvm/CodeGen/BasicTTIImpl.h
index 1d7c41452f7d5..5d5705f82e2f7 100644
--- a/llvm/include/llvm/CodeGen/BasicTTIImpl.h
+++ b/llvm/include/llvm/CodeGen/BasicTTIImpl.h
@@ -2110,6 +2110,7 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> {
     }
     case Intrinsic::get_active_lane_mask:
     case Intrinsic::experimental_vector_match:
+    case Intrinsic::experimental_vector_histogram_add:
       return thisT()->getTypeBasedIntrinsicInstrCost(ICA, CostKind);
     case Intrinsic::modf:
     case Intrinsic::sincos:
@@ -2458,6 +2459,51 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> {
           thisT()->getArithmeticInstrCost(BinaryOperator::And, RetTy, CostKind);
       return Cost;
     }
+    case Intrinsic::experimental_vector_histogram_add:
+    case Intrinsic::experimental_vector_histogram_uadd_sat:
+    case Intrinsic::experimental_vector_histogram_umax:
+    case Intrinsic::experimental_vector_histogram_umin: {
+      FixedVectorType *PtrsTy = dyn_cast<FixedVectorType>(ICA.getArgTypes()[0]);
+      Type *EltTy = ICA.getArgTypes()[1];
+
+      // Targets with scalable vectors must handle this on their own.
+      if (!PtrsTy)
+        return InstructionCost::getInvalid();
+
+      Align Alignment = thisT()->DL.getABITypeAlign(EltTy);
+      InstructionCost Cost = 0;
+      Cost += thisT()->getVectorInstrCost(Instruction::ExtractElement, PtrsTy,
+                                          CostKind, 1, nullptr, nullptr);
+      Cost += thisT()->getMemoryOpCost(Instruction::Load, EltTy, Alignment, 0,
+                                       CostKind);
+      switch (IID) {
+      default:
+        llvm_unreachable("Unhandled histogram update operation.");
+      case Intrinsic::experimental_vector_histogram_add:
+        Cost +=
+            thisT()->getArithmeticInstrCost(Instruction::Add, EltTy, CostKind);
+        break;
+      case Intrinsic::experimental_vector_histogram_uadd_sat: {
+        IntrinsicCostAttributes UAddSat(Intrinsic::uadd_sat, EltTy, {EltTy});
+        Cost += thisT()->getIntrinsicInstrCost(UAddSat, CostKind);
+        break;
+      }
+      case Intrinsic::experimental_vector_histogram_umax: {
+        IntrinsicCostAttributes UMax(Intrinsic::umax, EltTy, {EltTy});
+        Cost += thisT()->getIntrinsicInstrCost(UMax, CostKind);
+        break;
+      }
+      case Intrinsic::experimental_vector_histogram_umin: {
+        IntrinsicCostAttributes UMin(Intrinsic::umin, EltTy, {EltTy});
+        Cost += thisT()->getIntrinsicInstrCost(UMin, CostKind);
+        break;
+      }
+      }
+      Cost += thisT()->getMemoryOpCost(Instruction::Store, EltTy, Alignment, 0,
+                                       CostKind);
+      Cost *= PtrsTy->getNumElements();
+      return Cost;
+    }
     case Intrinsic::get_active_lane_mask: {
       Type *ArgTy = ICA.getArgTypes()[0];
       EVT ResVT = getTLI()->getValueType(DL, RetTy, true);
diff --git a/llvm/test/Analysis/CostModel/AArch64/histograms.ll b/llvm/test/Analysis/CostModel/AArch64/histograms.ll
index 455eed9977372..c0489587551b0 100644
--- a/llvm/test/Analysis/CostModel/AArch64/histograms.ll
+++ b/llvm/test/Analysis/CostModel/AArch64/histograms.ll
@@ -1,8 +1,44 @@
 ; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --version 5
+; RUN: opt < %s -passes="print<cost-model>" -cost-kind=throughput 2>&1 -disable-output -S -mtriple=aarch64--linux-gnu | FileCheck %s --check-prefix=CHECK-NEON
 ; RUN: opt < %s -passes="print<cost-model>" -cost-kind=throughput 2>&1 -disable-output -S -mtriple=aarch64--linux-gnu -mattr=+sve | FileCheck %s --check-prefix=CHECK-SVE
 ; RUN: opt < %s -passes="print<cost-model>" -cost-kind=throughput 2>&1 -disable-output -S -mtriple=aarch64--linux-gnu -mattr=+sve2 | FileCheck %s --check-prefix=CHECK-SVE2
 
 define void @histograms() {
+; CHECK-NEON-LABEL: 'histograms'
+; CHECK-NEON-NEXT:  Cost Model: Invalid cost for instruction: call void @llvm.experimental.vector.histogram.add.nxv2p0.i64(<vscale x 2 x ptr> poison, i64 1, <vscale x 2 x i1> poison)
+; CHECK-NEON-NEXT:  Cost Model: Invalid cost for instruction: call void @llvm.experimental.vector.histogram.add.nxv4p0.i32(<vscale x 4 x ptr> poison, i32 1, <vscale x 4 x i1> poison)
+; CHECK-NEON-NEXT:  Cost Model: Invalid cost for instruction: call void @llvm.experimental.vector.histogram.add.nxv8p0.i16(<vscale x 8 x ptr> poison, i16 1, <vscale x 8 x i1> poison)
+; CHECK-NEON-NEXT:  Cost Model: Invalid cost for instruction: call void @llvm.experimental.vector.histogram.add.nxv16p0.i8(<vscale x 16 x ptr> poison, i8 1, <vscale x 16 x i1> poison)
+; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: call void @llvm.experimental.vector.histogram.add.v2p0.i64(<2 x ptr> poison, i64 1, <2 x i1> poison)
+; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: call void @llvm.experimental.vector.histogram.add.v4p0.i32(<4 x ptr> poison, i32 1, <4 x i1> poison)
+; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: call void @llvm.experimental.vector.histogram.add.v8p0.i16(<8 x ptr> poison, i16 1, <8 x i1> poison)
+; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: call void @llvm.experimental.vector.histogram.add.v16p0.i8(<16 x ptr> poison, i8 1, <16 x i1> poison)
+; CHECK-NEON-NEXT:  Cost Model: Invalid cost for instruction: call void @llvm.experimental.vector.histogram.uadd.sat.nxv2p0.i64(<vscale x 2 x ptr> poison, i64 1, <vscale x 2 x i1> poison)
+; CHECK-NEON-NEXT:  Cost Model: Invalid cost for instruction: call void @llvm.experimental.vector.histogram.uadd.sat.nxv4p0.i32(<vscale x 4 x ptr> poison, i32 1, <vscale x 4 x i1> poison)
+; CHECK-NEON-NEXT:  Cost Model: Invalid cost for instruction: call void @llvm.experimental.vector.histogram.uadd.sat.nxv8p0.i16(<vscale x 8 x ptr> poison, i16 1, <vscale x 8 x i1> poison)
+; CHECK-NEON-NEXT:  Cost Model: Invalid cost for instruction: call void @llvm.experimental.vector.histogram.uadd.sat.nxv16p0.i8(<vscale x 16 x ptr> poison, i8 1, <vscale x 16 x i1> poison)
+; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: call void @llvm.experimental.vector.histogram.uadd.sat.v2p0.i64(<2 x ptr> poison, i64 1, <2 x i1> poison)
+; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: call void @llvm.experimental.vector.histogram.uadd.sat.v4p0.i32(<4 x ptr> poison, i32 1, <4 x i1> poison)
+; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: call void @llvm.experimental.vector.histogram.uadd.sat.v8p0.i16(<8 x ptr> poison, i16 1, <8 x i1> poison)
+; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 128 for instruction: call void @llvm.experimental.vector.histogram.uadd.sat.v16p0.i8(<16 x ptr> poison, i8 1, <16 x i1> poison)
+; CHECK-NEON-NEXT:  Cost Model: Invalid cost for instruction: call void @llvm.experimental.vector.histogram.umax.nxv2p0.i64(<vscale x 2 x ptr> poison, i64 1, <vscale x 2 x i1> poison)
+; CHECK-NEON-NEXT:  Cost Model: Invalid cost for instruction: call void @llvm.experimental.vector.histogram.umax.nxv4p0.i32(<vscale x 4 x ptr> poison, i32 1, <vscale x 4 x i1> poison)
+; CHECK-NEON-NEXT:  Cost Model: Invalid cost for instruction: call void @llvm.experimental.vector.histogram.umax.nxv8p0.i16(<vscale x 8 x ptr> poison, i16 1, <vscale x 8 x i1> poison)
+; CHECK-NEON-NEXT:  Cost Model: Invalid cost for instruction: call void @llvm.experimental.vector.histogram.umax.nxv16p0.i8(<vscale x 16 x ptr> poison, i8 1, <vscale x 16 x i1> poison)
+; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: call void @llvm.experimental.vector.histogram.umax.v2p0.i64(<2 x ptr> poison, i64 1, <2 x i1> poison)
+; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: call void @llvm.experimental.vector.histogram.umax.v4p0.i32(<4 x ptr> poison, i32 1, <4 x i1> poison)
+; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: call void @llvm.experimental.vector.histogram.umax.v8p0.i16(<8 x ptr> poison, i16 1, <8 x i1> poison)
+; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 96 for instruction: call void @llvm.experimental.vector.histogram.umax.v16p0.i8(<16 x ptr> poison, i8 1, <16 x i1> poison)
+; CHECK-NEON-NEXT:  Cost Model: Invalid cost for instruction: call void @llvm.experimental.vector.histogram.umin.nxv2p0.i64(<vscale x 2 x ptr> poison, i64 1, <vscale x 2 x i1> poison)
+; CHECK-NEON-NEXT:  Cost Model: Invalid cost for instruction: call void @llvm.experimental.vector.histogram.umin.nxv4p0.i32(<vscale x 4 x ptr> poison, i32 1, <vscale x 4 x i1> poison)
+; CHECK-NEON-NEXT:  Cost Model: Invalid cost for instruction: call void @llvm.experimental.vector.histogram.umin.nxv8p0.i16(<vscale x 8 x ptr> poison, i16 1, <vscale x 8 x i1> poison)
+; CHECK-NEON-NEXT:  Cost Model: Invalid cost for instruction: call void @llvm.experimental.vector.histogram.umin.nxv16p0.i8(<vscale x 16 x ptr> poison, i8 1, <vscale x 16 x i1> poison)
+; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: call void @llvm.experimental.vector.histogram.umin.v2p0.i64(<2 x ptr> poison, i64 1, <2 x i1> poison)
+; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: call void @llvm.experimental.vector.histogram.umin.v4p0.i32(<4 x ptr> poison, i32 1, <4 x i1> poison)
+; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: call void @llvm.experimental.vector.histogram.umin.v8p0.i16(<8 x ptr> poison, i16 1, <8 x i1> poison)
+; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 96 for instruction: call void @llvm.experimental.vector.histogram.umin.v16p0.i8(<16 x ptr> poison, i8 1, <16 x i1> poison)
+; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
 ; CHECK-SVE-LABEL: 'histograms'
 ; CHECK-SVE-NEXT:  Cost Model: Invalid cost for instruction: call void @llvm.experimental.vector.histogram.add.nxv2p0.i64(<vscale x 2 x ptr> poison, i64 1, <vscale x 2 x i1> poison)
 ; CHECK-SVE-NEXT:  Cost Model: Invalid cost for instruction: call void @llvm.experimental.vector.histogram.add.nxv4p0.i32(<vscale x 4 x ptr> poison, i32 1, <vscale x 4 x i1> poison)
@@ -16,26 +52,26 @@ define void @histograms() {
 ; CHECK-SVE-NEXT:  Cost Model: Invalid cost for instruction: call void @llvm.experimental.vector.histogram.uadd.sat.nxv4p0.i32(<vscale x 4 x ptr> poison, i32 1, <vscale x 4 x i1> poison)
 ; CHECK-SVE-NEXT:  Cost Model: Invalid cost for instruction: call void @llvm.experimental.vector.histogram.uadd.sat.nxv8p0.i16(<vscale x 8 x ptr> poison, i16 1, <vscale x 8 x i1> poison)
 ; CHECK-SVE-NEXT:  Cost Model: Invalid cost for instruction: call void @llvm.experimental.vector.histogram.uadd.sat.nxv16p0.i8(<vscale x 16 x ptr> poison, i8 1, <vscale x 16 x i1> poison)
-; CHECK-SVE-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: call void @llvm.experimental.vector.histogram.uadd.sat.v2p0.i64(<2 x ptr> poison, i64 1, <2 x i1> poison)
-; CHECK-SVE-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: call void @llvm.experimental.vector.histogram.uadd.sat.v4p0.i32(<4 x ptr> poison, i32 1, <4 x i1> poison)
-; CHECK-SVE-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: call void @llvm.experimental.vector.histogram.uadd.sat.v8p0.i16(<8 x ptr> poison, i16 1, <8 x i1> poison)
-; CHECK-SVE-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: call void @llvm.experimental.vector.histogram.uadd.sat.v16p0.i8(<16 x ptr> poison, i8 1, <16 x i1> poison)
+; CHECK-SVE-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: call void @llvm.experimental.vector.histogram.uadd.sat.v2p0.i64(<2 x ptr> poison, i64 1, <2 x i1> poison)
+; CHECK-SVE-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: call void @llvm.experimental.vector.histogram.uadd.sat.v4p0.i32(<4 x ptr> poison, i32 1, <4 x i1> poison)
+; CHECK-SVE-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: call void @llvm.experimental.vector.histogram.uadd.sat.v8p0.i16(<8 x ptr> poison, i16 1, <8 x i1> poison)
+; CHECK-SVE-NEXT:  Cost Model: Found an estimated cost of 128 for instruction: call void @llvm.experimental.vector.histogram.uadd.sat.v16p0.i8(<16 x ptr> poison, i8 1, <16 x i1> poison)
 ; CHECK-SVE-NEXT:  Cost Model: Invalid cost for instruction: call void @llvm.experimental.vector.histogram.umax.nxv2p0.i64(<vscale x 2 x ptr> poison, i64 1, <vscale x 2 x i1> poison)
 ; CHECK-SVE-NEXT:  Cost Model: Invalid cost for instruction: call void @llvm.experimental.vector.histogram.umax.nxv4p0.i32(<vscale x 4 x ptr> poison, i32 1, <vscale x 4 x i1> poison)
 ; CHECK-SVE-NEXT:  Cost Model: Invalid cost for instruction: call void @llvm.experimental.vector.histogram.umax.nxv8p0.i16(<vscale x 8 x ptr> poison, i16 1, <vscale x 8 x i1> poison)
 ; CHECK-SVE-NEXT:  Cost Model: Invalid cost for instruction: call void @llvm.experimental.vector.histogram.umax.nxv16p0.i8(<vscale x 16 x ptr> poison, i8 1, <vscale x 16 x i1> poison)
-; CHECK-SVE-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: call void @llvm.experimental.vector.histogram.umax.v2p0.i64(<2 x ptr> poison, i64 1, <2 x i1> poison)
-; CHECK-SVE-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: call void @llvm.experimental.vector.histogram.umax.v4p0.i32(<4 x ptr> poison, i32 1, <4 x i1> poison)
-; CHECK-SVE-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: call void @llvm.experimental.vector.histogram.umax.v8p0.i16(<8 x ptr> poison, i16 1, <8 x i1> poison)
-; CHECK-SVE-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: call void @llvm.experimental.vector.histogram.umax.v16p0.i8(<16 x ptr> poison, i8 1, <16 x i1> poison)
+; CHECK-SVE-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: call void @llvm.experimental.vector.histogram.umax.v2p0.i64(<2 x ptr> poison, i64 1, <2 x i1> poison)
+; CHECK-SVE-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: call void @llvm.experimental.vector.histogram.umax.v4p0.i32(<4 x ptr> poison, i32 1, <4 x i1> poison)
+; CHECK-SVE-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: call void @llvm.experimental.vector.histogram.umax.v8p0.i16(<8 x ptr> poison, i16 1, <8 x i1> poison)
+; CHECK-SVE-NEXT:  Cost Model: Found an estimated cost of 96 for instruction: call void @llvm.experimental.vector.histogram.umax.v16p0.i8(<16 x ptr> poison, i8 1, <16 x i1> poison)
 ; CHECK-SVE-NEXT:  Cost Model: Invalid cost for instruction: call void @llvm.experimental.vector.histogram.umin.nxv2p0.i64(<vscale x 2 x ptr> poison, i64 1, <vscale x 2 x i1> poison)
 ; CHECK-SVE-NEXT:  Cost Model: Invalid cost for instruction: call void @llvm.experimental.vector.histogram.umin.nxv4p0.i32(<vscale x 4 x ptr> poison, i32 1, <vscale x 4 x i1> poison)
 ; CHECK-SVE-NEXT:  Cost Model: Invalid cost for instruction: call void @llvm.experimental.vector.histogram.umin.nxv8p0.i16(<vscale x 8 x ptr> poison, i16 1, <vscale x 8 x i1> poison)
 ; CHECK-SVE-NEXT:  Cost Model: Invalid cost for instruction: call void @llvm.experimental.vector.histogram.umin.nxv16p0.i8(<vscale x 16 x ptr> poison, i8 1, <vscale x 16 x i1> poison)
-; CHECK-SVE-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: call void @llvm.experimental.vector.histogram.umin.v2p0.i64(<2 x ptr> poison, i64 1, <2 x i1> poison)
-; CHECK-SVE-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: call void @llvm.experimental.vector.histogram.umin.v4p0.i32(<4 x ptr> poison, i32 1, <4 x i1> poison)
-; CHECK-SVE-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: call void @llvm.experimental.vector.histogram.umin.v8p0.i16(<8 x ptr> poison, i16 1, <8 x i1> poison)
-; CHECK-SVE-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: call void @llvm.experimental.vector.histogram.umin.v16p0.i8(<16 x ptr> poison, i8 1, <16 x i1> poison)
+; CHECK-SVE-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: call void @llvm.experimental.vector.histogram.umin.v2p0.i64(<2 x ptr> poison, i64 1, <2 x i1> poison)
+; CHECK-SVE-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: call void @llvm.experimental.vector.histogram.umin.v4p0.i32(<4 x ptr> poison, i32 1, <4 x i1> poison)
+; CHECK-SVE-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: call void @llvm.experimental.vector.histogram.umin.v8p0.i16(<8 x ptr> poison, i16 1, <8 x i1> poison)
+; CHECK-SVE-NEXT:  Cost Model: Found an estimated cost of 96 for instruction: call void @llvm.experimental.vector.histogram.umin.v16p0.i8(<16 x ptr> poison, i8 1, <16 x i1> poison)
 ; CHECK-SVE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; CHECK-SVE2-LABEL: 'histograms'
@@ -51,26 +87,26 @@ define void @histograms() {
 ; CHECK-SVE2-NEXT:  Cost Model: Invalid cost for instruction: call void @llvm.experimental.vector.histogram.uadd.sat.nxv4p0.i32(<vscale x 4 x ptr> poison, i32 1, <vscale x 4 x i1> poison)
 ; CHECK-SVE2-NEXT:  Cost Model: Invalid cost for instruction: call void @llvm.experimental.vector.histogram.uadd.sat.nxv8p0.i16(<vscale x 8 x ptr> poison, i16 1, <vscale x 8 x i1> poison)
 ; CHECK-SVE2-NEXT:  Cost Model: Invalid cost for instruction: call void @llvm.experimental.vector.histogram.uadd.sat.nxv16p0.i8(<vscale x 16 x ptr> poison, i8 1, <vscale x 16 x i1> poison)
-; CHECK-SVE2-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: call void @llvm.experimental.vector.histogram.uadd.sat.v2p0.i64(<2 x ptr> poison, i64 1, <2 x i1> poison)
-; CHECK-SVE2-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: call void @llvm.experimental.vector.histogram.uadd.sat.v4p0.i32(<4 x ptr> poison, i32 1, <4 x i1> poison)
-; CHECK-SVE2-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: call void @llvm.experimental.vector.histogram.uadd.sat.v8p0.i16(<8 x ptr> poison, i16 1, <8 x i1> poison)
-; CHECK-SVE2-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: call void @llvm.experimental.vector.histogram.uadd.sat.v16p0.i8(<16 x ptr> poison, i8 1, <16 x i1> poison)
+; CHECK-SVE2-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: call void @llvm.experimental.vector.histogram.uadd.sat.v2p0.i64(<2 x ptr> poison, i64 1, <2 x i1> poison)
+; CHECK-SVE2-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: call void @llvm.experimental.vector.histogram.uadd.sat.v4p0.i32(<4 x ptr> poison, i32 1, <4 x i1> poison)
+; CHECK-SVE2-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: call void @llvm.experimental.vector.histogram.uadd.sat.v8p0.i16(<8 x ptr> poison, i16 1, <8 x i1> poison)
+; CHECK-SVE2-NEXT:  Cost Model: Found an estimated cost of 128 for instruction: call void @llvm.experimental.vector.histogram.uadd.sat.v16p0.i8(<16 x ptr> poison, i8 1, <16 x i1> poison)
 ; CHECK-SVE2-NEXT:  Cost Model: Invalid cost for instruction: call void @llvm.experimental.vector.histogram.umax.nxv2p0.i64(<vscale x 2 x ptr> poison, i64 1, <vscale x 2 x i1> poison)
 ; CHECK-SVE2-NEXT:  Cost Model: Invalid cost for instruction: call void @llvm.experimental.vector.histogram.umax.nxv4p0.i32(<vscale x 4 x ptr> poison, i32 1, <vscale x 4 x i1> poison)
 ; CHECK-SVE2-NEXT:  Cost Model: Invalid cost for instruction: call void @llvm.experimental.vector.histogram.umax.nxv8p0.i16(<vscale x 8 x ptr> poison, i16 1, <vscale x 8 x i1> poison)
 ; CHECK-SVE2-NEXT:  Cost Model: Invalid cost for instruction: call void @llvm.experimental.vector.histogram.umax.nxv16p0.i8(<vscale x 16 x ptr> poison, i8 1, <vscale x 16 x i1> poison)
-; CHECK-SVE2-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: call void @llvm.experimental.vector.histogram.umax.v2p0.i64(<2 x ptr> poison, i64 1, <2 x i1> poison)
-; CHECK-SVE2-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: call void @llvm.experimental.vector.histogram.umax.v4p0.i32(<4 x ptr> poison, i32 1, <4 x i1> poison)
-; CHECK-SVE2-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: call void @llvm.experimental.vector.histogram.umax.v8p0.i16(<8 x ptr> poison, i16 1, <8 x i1> poison)
-; CHECK-SVE2-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: call void @llvm.experimental.vector.histogram.umax.v16p0.i8(<16 x ptr> poison, i8 1, <16 x i1> poison)
+; CHECK-SVE2-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: call void @llvm.experimental.vector.histogram.umax.v2p0.i64(<2 x ptr> poison, i64 1, <2 x i1> poison)
+; CHECK-SVE2-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: call void @llvm.experimental.vector.histogram.umax.v4p0.i32(<4 x ptr> poison, i32 1, <4 x i1> poison)
+; CHECK-SVE2-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: call void @llvm.experimental.vector.histogram.umax.v8p0.i16(<8 x ptr> poison, i16 1, <8 x i1> poison)
+; CHECK-SVE2-NEXT:  Cost Model: Found an estimated cost of 96 for instruction: call void @llvm.experimental.vector.histogram.umax.v16p0.i8(<16 x ptr> poison, i8 1, <16 x i1> poison)
 ; CHECK-SVE2-NEXT:  Cost Model: Invalid cost for instruction: call void @llvm.experimental.vector.histogram.umin.nxv2p0.i64(<vscale x 2 x ptr> poison, i64 1, <vscale x 2 x i1> poison)
 ; CHECK-SVE2-NEXT:  Cost Model: Invalid cost for instruction: call void @llvm.experimental.vector.histogram.umin.nxv4p0.i32(<vscale x 4 x ptr> poison, i32 1, <vscale x 4 x i1> poison)
 ; CHECK-SVE2-NEXT:  Cost Model: Invalid cost for instruction: call void @llvm.experimental.vector.histogram.umin.nxv8p0.i16(<vscale x 8 x ptr> poison, i16 1, <vscale x 8 x i1> poison)
 ; CHECK-SVE2-NEXT:  Cost Model: Invalid cost for instruction: call void @llvm.experimental.vector.histogram.umin.nxv16p0.i8(<vscale x 16 x ptr> poison, i8 1, <vscale x 16 x i1> poison)
-; CHECK-SVE2-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: call void @llvm.experimental.vector.histogram.umin.v2p0.i64(<2 x ptr> poison, i64 1, <2 x i1> poison)
-; CHECK-SVE2-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: call void @llvm.experimental.vector.histogram.umin.v4p0.i32(<4 x ptr> poison, i32 1, <4 x i1> poison)
-; CHECK-SVE2-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: call void @llvm.experimental.vector.histogram.umin.v8p0.i16(<8 x ptr> poison, i16 1, <8 x i1> poison)
-; CHECK-SVE2-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: call void @llvm.experimental.vector.histogram.umin.v16p0.i8(<16 x ptr> poison, i8 1, <16 x i1> poison)
+; CHECK-SVE2-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: call void @llvm.experimental.vector.histogram.umin.v2p0.i64(<2 x ptr> poison, i64 1, <2 x i1> poison)
+; CHECK-SVE2-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: call void @llvm.experimental.vector.histogram.umin.v4p0.i32(<4 x ptr> poison, i32 1, <4 x i1> poison)
+; CHECK-SVE2-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: call void @llvm.experimental.vector.histogram.umin.v8p0.i16(<8 x ptr> poison, i16 1, <8 x i1> poison)
+; CHECK-SVE2-NEXT:  Cost Model: Found an estimated cost of 96 for instruction: call void @llvm.experimental.vector.histogram.umin.v16p0.i8(<16 x ptr> poison, i8 1, <16 x i1> poison)
 ; CHECK-SVE2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
   call void @llvm.experimental.vector.histogram.add.nxv2p0.i64(<vscale x 2 x ptr> poison, i64 1, <vscale x 2 x i1> poison)
diff --git a/llvm/test/Analysis/CostModel/AArch64/sve-intrinsics.ll b/llvm/test/Analysis/CostModel/AArch64/sve-intrinsics.ll
index ee485e2f861b4..abd6e50f771b3 100644
--- a/llvm/test/Analysis/CostModel/AArch64/sve-intrinsics.ll
+++ b/llvm/test/Analysis/CostModel/AArch64/sve-intrinsics.ll
@@ -1277,15 +1277,15 @@ define void @histogram_nxv16i8(<vscale x 16 x ptr> %buckets, <vscale x 16 x i1>
 
 define void @histogram_v2i64(<2 x ptr> %buckets, <2 x i1> %mask) {
 ; CHECK-VSCALE-1-LABEL: 'histogram_v2i64'
-; CHECK-VSCALE-1-NEXT:  Cost Model: Found costs of Invalid for: call void @llvm.experimental.vector.histogram.add.v2p0.i64(<2 x ptr> %buckets, i64 1, <2 x i1> %mask)
+; CHECK-VSCALE-1-NEXT:  Cost Model: Found costs of RThru:10 CodeSize:8 Lat:10 SizeLat:10 for: call void @llvm.experimental.vector.histogram.add.v2p0.i64(<2 x ptr> %buckets, i64 1, <2 x i1> %mask)
 ; CHECK-VSCALE-1-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
 ;
 ; CHECK-VSCALE-2-LABEL: 'histogram_v2i64'
-; CHECK-VSCALE-2-NEXT:  Cost Model: Found costs of Invalid for: call void @llvm.experimental.vector.histogram.add.v2p0.i64(<2 x ptr> %buckets, i64 1, <2 x i1> %mask)
+; CHECK-VSCALE-2-NEXT:  Cost Model: Found costs of RThru:10 CodeSize:8 Lat:10 SizeLat:10 for: call void @llvm.experimental.vector.histogram.add.v2p0.i64(<2 x ptr> %buckets, i64 1, <2 x i1> %mask)
 ; CHECK-VSCALE-2-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
 ;
 ; TYPE_BASED_ONLY-LABEL: 'histogram_v2i64'
-; TYPE_BASED_ONLY-NEXT:  Cost Model: Found costs of Invalid for: call void @llvm.experimental.vector.histogram.add.v2p0.i64(<2 x ptr> %buckets, i64 1, <2 x i1> %mask)
+; TYPE_BASED_ONLY-NEXT:  Cost Model: Found costs of RThru:10 CodeSize:8 Lat:10 SizeLat:10 for: call void @llvm.experimental.vector.histogram.add.v2p0.i64(<2 x ptr> %buckets, i64 1, <2 x i1> %mask)
 ; TYPE_BASED_ONLY-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
 ;
   call void @llvm.experimental.vector.histogram.add.v2p0.i64(<2 x ptr> %buckets, i64 1, <2 x i1> %mask)
@@ -1294,15 +1294,15 @@ define void @histogram_v2i64(<2 x ptr> %buckets, <2 x i1> %mask) {
 
 define void @histogram_v4i32(<4 x ptr> %buckets, <4 x i1> %mask) {
 ; CHECK-VSCALE-1-LABEL: 'histogram_v4i32'
-; CHECK-VSCALE-1-NEXT:  Cost Model: Found costs of Invalid for: call void @llvm.experimental.vector.histogram.add.v4p0.i32(<4 x ptr> %buckets, i32 1, <4 x i1> %mask)
+; CHECK-VSCALE-1-NEXT:  Cost Model: Found costs of RThru:20 CodeSize:16 Lat:20 SizeLat:20 for: call void @llvm.experimental.vector.histogram.add.v4p0.i32(<4 x ptr> %buckets, i32 1, <4 x i1> %mask)
 ; CHECK-VSCALE-1-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
 ;
 ; CHECK-VSCALE-2-LABEL: 'histogram_v4i32'
-; CHECK-VSCALE-2-NEXT:  Cost Model: Found costs of Invalid for: call void @llvm.experimental.vector.histogram.add.v4p0.i32(<4 x ptr> %buckets, i32 1, <4 x i1> %mask)
+; CHECK-VSCALE-2-NEXT:  Cost Model: Found costs of RThru:20 CodeSize:16 Lat:20 SizeLat:20 for: call void @llvm.experimental.vector.histogram.add.v4p0.i32(<4 x ptr> %buckets, i32 1, <4 x i1> %mask)
 ; CHECK-VSCALE-2-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
 ;
 ; TYPE_BASED_ONLY-LABEL: 'histogram_v4i32'
-; TYPE_BASED_ONLY-NEXT:  Cost Model: Found costs of Invalid for: call void @llvm.experimental.vector.histogram.add.v4p0.i32(<4 x ptr> %buckets, i32 1, <4 x i1> %mask)
+; TYPE_BASED_ONLY-NEXT:  Cost Model: Found costs of RThru:20 CodeSize:16 Lat:20 SizeLat:20 for: call void @llvm.experimental.vector.histogram.add.v4p0.i32(<4 x ptr> %buckets, i32 1, <4 x i1> %mask)
 ; TYPE_BASED_ONLY-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
 ;
   call void @llvm.experimental.vector.histogram.add.v4p0.i32(<4 x ptr> %buckets, i32 1, <4 x i1> %mask)
@@ -1311,15 +1311,15 @@ define void @histogram_v4i32(<4 x ptr> %buckets, <4 x i1> %mask) {
 
 define void @histogram_v8i16(<8 x ptr> %buckets, <8 x i1> %mask) {
 ; CHECK-VSCALE-1-LABEL: 'histogram_v8i16'
-; CHECK-VSCALE-1-NEXT:  Cost Model: Found costs of Invalid for: call void @llvm.experimental.vector.histogram.add.v8p0.i16(<8 x ptr> %buckets, i16 1, <8 x i1> %mask)
+; CHECK-VSCALE-1-NEXT:  Cost Model: Found costs of RThru:40 CodeSize:32 Lat:40 SizeLat:40 for: call void @llvm.experimental.vector.histogram.add.v8p0.i16(<8 x ptr> %buckets, i16 1, <8 x i1> %mask)
 ; CHECK-VSCALE-1-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
 ;
 ; CHECK-VSCALE-2-LABEL: 'histogram_v8i16'
-; CHECK-VSCALE-2-NEXT:  Cost Model: Found costs of Invalid for: call void @llvm.experimental.vector.histogram.add.v8p0.i16(<8 x ptr> %buckets, i16 1, <8 x i1> %mask)
+; CHECK-VSCALE-2-NEXT:  Cost Model: Found costs of RThru:40 CodeSize:32 Lat:40 SizeLat:40 for: call void @llvm.experimental.vector.histogram.add.v8p0.i16(<8 x ptr> %buckets, i16 1, <8 x i1> %mask)
 ; CHECK-VSCALE-2-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
 ;
 ; TYPE_BASED_ONLY-LABEL: 'histogram_v8i16'
-; TYPE_BASED_ONLY-NEXT:  Cost Model: Found costs of Invalid for: call void @llvm.experimental.vector.histogram.add.v8p0.i16(<8 x ptr> %buckets, i16 1, <8 x i1> %mask)
+; TYPE_BASED_ONLY-NEXT:  Cost Model: Found costs of RThru:40 CodeSize:32 Lat:40 SizeLat:40 for: call void @llvm.experimental.vector.histogram.add.v8p0.i16(<8 x ptr> %buckets, i16 1, <8 x i1> %mask)
 ; TYPE_BASED_ONLY-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
 ;
   call void @llvm.experimental.vector.histogram.add.v8p0.i16(<8 x ptr> %buckets, i16 1, <8 x i1> %mask)
@@ -1328,15 +1328,15 @@ define void @histogram_v8i16(<8 x ptr> %buckets, <8 x i1> %mask) {
 
 define void @histogram_v16i8(<16 x ptr> %buckets, <16 x i1> %mask) {
 ; CHECK-VSCALE-1-LABEL: 'histogram_v16i8'
-; CHECK-VSCALE-1-NEXT:  Cost Model: Found costs of Invalid for: call void @llvm.experimental.vector.histogram.add.v16p0.i8(<16 x ptr> %buckets, i8 1, <16 x i1> %mask)
+; CHECK-VSCALE-1-NEXT:  Cost Model: Found costs of RThru:80 CodeSize:64 Lat:80 SizeLat:80 for: call void @llvm.experimental.vector.histogram.add.v16p0.i8(<16 x ptr> %buckets, i8 1, <16 x i1> %mask)
 ; CHECK-VSCALE-1-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
 ;
 ; CHECK-VSCALE-2-LABEL: 'histogram_v16i8'
-; CHECK-VSCALE-2-NEXT:  Cost Model: Found costs of Invalid for: call void @llvm.experimental.vector.histogram.add.v16p0.i8(<16 x ptr> %buckets, i8 1, <16 x i1> %mask)
+; CHECK-VSCALE-2-NEXT:  Cost Model: Found costs of RThru:80 CodeSize:64 Lat:80 SizeLat:80 for: call void @llvm.experimental.vector.histogram.add.v16p0.i8(<16 x ptr> %buckets, i8 1, <16 x i1> %mask)
 ; CHECK-VSCALE-2-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
 ;
 ; TYPE_BASED_ONLY-LABEL: 'histogram_v16i8'
-; TYPE_BASED_ONLY-NEXT:  Cost Model: Found costs of Invalid for: call void @llvm.experimental.vector.histogram.add.v16p0.i8(<16 x ptr> %buckets, i8 1, <16 x i1> %mask)
+; TYPE_BASED_ONLY-NEXT:  Cost Model: Found costs of RThru:80 CodeSize:64 Lat:80 SizeLat:80 for: call void @llvm.experimental.vector.histogram.add.v16p0.i8(<16 x ptr> %buckets, i8 1, <16 x i1> %mask)
 ; TYPE_BASED_ONLY-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
 ;
   call void @llvm.experimental.vector.histogram.add.v16p0.i64(<16 x ptr> %buckets, i8 1, <16 x i1> %mask)
diff --git a/llvm/test/Transforms/LoopVectorize/histograms.ll b/llvm/test/Transforms/LoopVectorize/histograms.ll
index 1adc0bf0c9ec0..f0ceae7d5816b 100644
--- a/llvm/test/Transforms/LoopVectorize/histograms.ll
+++ b/llvm/test/Transforms/LoopVectorize/histograms.ll
@@ -1,25 +1,48 @@
 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 3
 ; RUN: opt < %s -passes=loop-vectorize,instcombine -enable-histogram-loop-vectorization -force-vector-width=2 -S | FileCheck %s
 
-;; Currently we don't expect this to vectorize, since the generic cost model returns
-;; invalid for the histogram intrinsic.
 define void @simple_histogram(ptr noalias %buckets, ptr readonly %indices, i64 %N) {
 ; CHECK-LABEL: define void @simple_histogram(
 ; CHECK-SAME: ptr noalias [[BUCKETS:%.*]], ptr readonly [[INDICES:%.*]], i64 [[N:%.*]]) {
 ; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], 2
+; CHECK-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[ENTRY:%.*]]
+; CHECK:       vector.ph:
+; CHECK-NEXT:    [[N_VEC:%.*]] = and i64 [[N]], -2
 ; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
+; CHECK:       vector.body:
+; CHECK-NEXT:    [[IV:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds i32, ptr [[INDICES]], i64 [[IV]]
+; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <2 x i32>, ptr [[TMP0]], align 4
+; CHECK-NEXT:    [[TMP1:%.*]] = zext <2 x i32> [[WIDE_LOAD]] to <2 x i64>
+; CHECK-NEXT:    [[TMP2:%.*]] = extractelement <2 x i64> [[TMP1]], i64 0
+; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr inbounds nuw i32, ptr [[BUCKETS]], i64 [[TMP2]]
+; CHECK-NEXT:    [[TMP4:%.*]] = extractelement <2 x i64> [[TMP1]], i64 1
+; CHECK-NEXT:    [[TMP5:%.*]] = getelementptr inbounds nuw i32, ptr [[BUCKETS]], i64 [[TMP4]]
+; CHECK-NEXT:    [[TMP6:%.*]] = insertelement <2 x ptr> poison, ptr [[TMP3]], i64 0
+; CHECK-NEXT:    [[TMP7:%.*]] = insertelement <2 x ptr> [[TMP6]], ptr [[TMP5]], i64 1
+; CHECK-NEXT:    call void @llvm.experimental.vector.histogram.add.v2p0.i32(<2 x ptr> [[TMP7]], i32 1, <2 x i1> splat (i1 true))
+; CHECK-NEXT:    [[IV_NEXT]] = add nuw i64 [[IV]], 2
+; CHECK-NEXT:    [[TMP8:%.*]] = icmp eq i64 [[IV_NEXT]], [[N_VEC]]
+; CHECK-NEXT:    br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[FOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
+; CHECK:       middle.block:
+; CHECK-NEXT:    [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]]
+; CHECK-NEXT:    br i1 [[CMP_N]], label [[FOR_EXIT:%.*]], label [[SCALAR_PH]]
+; CHECK:       scalar.ph:
+; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY1:%.*]] ]
+; CHECK-NEXT:    br label [[FOR_BODY1:%.*]]
 ; CHECK:       for.body:
-; CHECK-NEXT:    [[IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ]
-; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds nuw i32, ptr [[INDICES]], i64 [[IV]]
+; CHECK-NEXT:    [[IV1:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT1:%.*]], [[FOR_BODY1]] ]
+; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[INDICES]], i64 [[IV1]]
 ; CHECK-NEXT:    [[TMP12:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
 ; CHECK-NEXT:    [[IDXPROM1:%.*]] = zext i32 [[TMP12]] to i64
 ; CHECK-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds nuw i32, ptr [[BUCKETS]], i64 [[IDXPROM1]]
 ; CHECK-NEXT:    [[TMP13:%.*]] = load i32, ptr [[ARRAYIDX2]], align 4
 ; CHECK-NEXT:    [[INC:%.*]] = add nsw i32 [[TMP13]], 1
 ; CHECK-NEXT:    store i32 [[INC]], ptr [[ARRAYIDX2]], align 4
-; CHECK-NEXT:    [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
-; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]]
-; CHECK-NEXT:    br i1 [[EXITCOND]], label [[FOR_EXIT:%.*]], label [[FOR_BODY]]
+; CHECK-NEXT:    [[IV_NEXT1]] = add nuw nsw i64 [[IV1]], 1
+; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp eq i64 [[IV_NEXT1]], [[N]]
+; CHECK-NEXT:    br i1 [[EXITCOND]], label [[FOR_EXIT]], label [[FOR_BODY1]], !llvm.loop [[LOOP3:![0-9]+]]
 ; CHECK:       for.exit:
 ; CHECK-NEXT:    ret void
 ;



More information about the llvm-commits mailing list