[llvm] [RISCV][TTI] Implement cost of llvm.experimental.vector.extract.last.active (PR #184067)
Elvis Wang via llvm-commits
llvm-commits at lists.llvm.org
Sun Mar 8 22:27:34 PDT 2026
https://github.com/ElvisWang123 updated https://github.com/llvm/llvm-project/pull/184067
>From f8bf4f74677d15c25bbf6b7d185d8375db521b48 Mon Sep 17 00:00:00 2001
From: Elvis Wang <elvis.wang at sifive.com>
Date: Sun, 1 Mar 2026 21:53:38 -0800
Subject: [PATCH 1/6] precommit tests.
---
.../CostModel/RISCV/extract-last-active.ll | 109 ++++++++++++++++++
1 file changed, 109 insertions(+)
create mode 100644 llvm/test/Analysis/CostModel/RISCV/extract-last-active.ll
diff --git a/llvm/test/Analysis/CostModel/RISCV/extract-last-active.ll b/llvm/test/Analysis/CostModel/RISCV/extract-last-active.ll
new file mode 100644
index 0000000000000..7591f600f81a8
--- /dev/null
+++ b/llvm/test/Analysis/CostModel/RISCV/extract-last-active.ll
@@ -0,0 +1,109 @@
+; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --version 6
+; RUN: opt < %s -passes="print<cost-model>" 2>&1 -disable-output -S -mtriple=riscv64--linux-gnu -mattr=+v | FileCheck %s
+
+define void @extractions() {
+ ;; Legal types
+; CHECK-LABEL: 'extractions'
+; CHECK-NEXT: Cost Model: Found an estimated cost of 110 for instruction: %v16i8 = call i8 @llvm.experimental.vector.extract.last.active.v16i8(<16 x i8> poison, <16 x i1> poison, i8 poison)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %v8i16 = call i16 @llvm.experimental.vector.extract.last.active.v8i16(<8 x i16> poison, <8 x i1> poison, i16 poison)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %v4i32 = call i32 @llvm.experimental.vector.extract.last.active.v4i32(<4 x i32> poison, <4 x i1> poison, i32 poison)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v2i64 = call i64 @llvm.experimental.vector.extract.last.active.v2i64(<2 x i64> poison, <2 x i1> poison, i64 poison)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 39 for instruction: %v8f16 = call half @llvm.experimental.vector.extract.last.active.v8f16(<8 x half> poison, <8 x i1> poison, half poison)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 39 for instruction: %v8bf16 = call bfloat @llvm.experimental.vector.extract.last.active.v8bf16(<8 x bfloat> poison, <8 x i1> poison, bfloat poison)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %v4f32 = call float @llvm.experimental.vector.extract.last.active.v4f32(<4 x float> poison, <4 x i1> poison, float poison)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v2f64 = call double @llvm.experimental.vector.extract.last.active.v2f64(<2 x double> poison, <2 x i1> poison, double poison)
+; CHECK-NEXT: Cost Model: Invalid cost for instruction: %nxv16i8 = call i8 @llvm.experimental.vector.extract.last.active.nxv16i8(<vscale x 16 x i8> poison, <vscale x 16 x i1> poison, i8 poison)
+; CHECK-NEXT: Cost Model: Invalid cost for instruction: %nxv8i16 = call i16 @llvm.experimental.vector.extract.last.active.nxv8i16(<vscale x 8 x i16> poison, <vscale x 8 x i1> poison, i16 poison)
+; CHECK-NEXT: Cost Model: Invalid cost for instruction: %nxv4i32 = call i32 @llvm.experimental.vector.extract.last.active.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i1> poison, i32 poison)
+; CHECK-NEXT: Cost Model: Invalid cost for instruction: %nxv2i64 = call i64 @llvm.experimental.vector.extract.last.active.nxv2i64(<vscale x 2 x i64> poison, <vscale x 2 x i1> poison, i64 poison)
+; CHECK-NEXT: Cost Model: Invalid cost for instruction: %nxv8f16 = call half @llvm.experimental.vector.extract.last.active.nxv8f16(<vscale x 8 x half> poison, <vscale x 8 x i1> poison, half poison)
+; CHECK-NEXT: Cost Model: Invalid cost for instruction: %nxv8bf16 = call bfloat @llvm.experimental.vector.extract.last.active.nxv8bf16(<vscale x 8 x bfloat> poison, <vscale x 8 x i1> poison, bfloat poison)
+; CHECK-NEXT: Cost Model: Invalid cost for instruction: %nxv4f32 = call float @llvm.experimental.vector.extract.last.active.nxv4f32(<vscale x 4 x float> poison, <vscale x 4 x i1> poison, float poison)
+; CHECK-NEXT: Cost Model: Invalid cost for instruction: %nxv2f64 = call double @llvm.experimental.vector.extract.last.active.nxv2f64(<vscale x 2 x double> poison, <vscale x 2 x i1> poison, double poison)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 286 for instruction: %v32i8 = call i8 @llvm.experimental.vector.extract.last.active.v32i8(<32 x i8> poison, <32 x i1> poison, i8 poison)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 110 for instruction: %v16i16 = call i16 @llvm.experimental.vector.extract.last.active.v16i16(<16 x i16> poison, <16 x i1> poison, i16 poison)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %v8i32 = call i32 @llvm.experimental.vector.extract.last.active.v8i32(<8 x i32> poison, <8 x i1> poison, i32 poison)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %v4i64 = call i64 @llvm.experimental.vector.extract.last.active.v4i64(<4 x i64> poison, <4 x i1> poison, i64 poison)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 79 for instruction: %v16f16 = call half @llvm.experimental.vector.extract.last.active.v16f16(<16 x half> poison, <16 x i1> poison, half poison)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 79 for instruction: %v16bf16 = call bfloat @llvm.experimental.vector.extract.last.active.v16bf16(<16 x bfloat> poison, <16 x i1> poison, bfloat poison)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %v8f32 = call float @llvm.experimental.vector.extract.last.active.v8f32(<8 x float> poison, <8 x i1> poison, float poison)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %v4f64 = call double @llvm.experimental.vector.extract.last.active.v4f64(<4 x double> poison, <4 x i1> poison, double poison)
+; CHECK-NEXT: Cost Model: Invalid cost for instruction: %nxv32i8 = call i8 @llvm.experimental.vector.extract.last.active.nxv32i8(<vscale x 32 x i8> poison, <vscale x 32 x i1> poison, i8 poison)
+; CHECK-NEXT: Cost Model: Invalid cost for instruction: %nxv16i16 = call i16 @llvm.experimental.vector.extract.last.active.nxv16i16(<vscale x 16 x i16> poison, <vscale x 16 x i1> poison, i16 poison)
+; CHECK-NEXT: Cost Model: Invalid cost for instruction: %nxv8i32 = call i32 @llvm.experimental.vector.extract.last.active.nxv8i32(<vscale x 8 x i32> poison, <vscale x 8 x i1> poison, i32 poison)
+; CHECK-NEXT: Cost Model: Invalid cost for instruction: %nxv4i64 = call i64 @llvm.experimental.vector.extract.last.active.nxv4i64(<vscale x 4 x i64> poison, <vscale x 4 x i1> poison, i64 poison)
+; CHECK-NEXT: Cost Model: Invalid cost for instruction: %nxv16f16 = call half @llvm.experimental.vector.extract.last.active.nxv16f16(<vscale x 16 x half> poison, <vscale x 16 x i1> poison, half poison)
+; CHECK-NEXT: Cost Model: Invalid cost for instruction: %nxv16bf16 = call bfloat @llvm.experimental.vector.extract.last.active.nxv16bf16(<vscale x 16 x bfloat> poison, <vscale x 16 x i1> poison, bfloat poison)
+; CHECK-NEXT: Cost Model: Invalid cost for instruction: %nxv8f32 = call float @llvm.experimental.vector.extract.last.active.nxv8f32(<vscale x 8 x float> poison, <vscale x 8 x i1> poison, float poison)
+; CHECK-NEXT: Cost Model: Invalid cost for instruction: %nxv4f64 = call double @llvm.experimental.vector.extract.last.active.nxv4f64(<vscale x 4 x double> poison, <vscale x 4 x i1> poison, double poison)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %v8i8 = call i8 @llvm.experimental.vector.extract.last.active.v8i8(<8 x i8> poison, <8 x i1> poison, i8 poison)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %v4i16 = call i16 @llvm.experimental.vector.extract.last.active.v4i16(<4 x i16> poison, <4 x i1> poison, i16 poison)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v2i32 = call i32 @llvm.experimental.vector.extract.last.active.v2i32(<2 x i32> poison, <2 x i1> poison, i32 poison)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v1i64 = call i64 @llvm.experimental.vector.extract.last.active.v1i64(<1 x i64> poison, <1 x i1> poison, i64 poison)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %v4f16 = call half @llvm.experimental.vector.extract.last.active.v4f16(<4 x half> poison, <4 x i1> poison, half poison)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %v4bf16 = call bfloat @llvm.experimental.vector.extract.last.active.v4bf16(<4 x bfloat> poison, <4 x i1> poison, bfloat poison)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v2f32 = call float @llvm.experimental.vector.extract.last.active.v2f32(<2 x float> poison, <2 x i1> poison, float poison)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v1f64 = call double @llvm.experimental.vector.extract.last.active.v1f64(<1 x double> poison, <1 x i1> poison, double poison)
+; CHECK-NEXT: Cost Model: Invalid cost for instruction: %nxv8i8 = call i8 @llvm.experimental.vector.extract.last.active.nxv8i8(<vscale x 8 x i8> poison, <vscale x 8 x i1> poison, i8 poison)
+; CHECK-NEXT: Cost Model: Invalid cost for instruction: %nxv4i16 = call i16 @llvm.experimental.vector.extract.last.active.nxv4i16(<vscale x 4 x i16> poison, <vscale x 4 x i1> poison, i16 poison)
+; CHECK-NEXT: Cost Model: Invalid cost for instruction: %nxv2i32 = call i32 @llvm.experimental.vector.extract.last.active.nxv2i32(<vscale x 2 x i32> poison, <vscale x 2 x i1> poison, i32 poison)
+; CHECK-NEXT: Cost Model: Invalid cost for instruction: %nxv1i64 = call i64 @llvm.experimental.vector.extract.last.active.nxv1i64(<vscale x 1 x i64> poison, <vscale x 1 x i1> poison, i64 poison)
+; CHECK-NEXT: Cost Model: Invalid cost for instruction: %nxv4f16 = call half @llvm.experimental.vector.extract.last.active.nxv4f16(<vscale x 4 x half> poison, <vscale x 4 x i1> poison, half poison)
+; CHECK-NEXT: Cost Model: Invalid cost for instruction: %nxv4bf16 = call bfloat @llvm.experimental.vector.extract.last.active.nxv4bf16(<vscale x 4 x bfloat> poison, <vscale x 4 x i1> poison, bfloat poison)
+; CHECK-NEXT: Cost Model: Invalid cost for instruction: %nxv2f32 = call float @llvm.experimental.vector.extract.last.active.nxv2f32(<vscale x 2 x float> poison, <vscale x 2 x i1> poison, float poison)
+; CHECK-NEXT: Cost Model: Invalid cost for instruction: %nxv1f64 = call double @llvm.experimental.vector.extract.last.active.nxv1f64(<vscale x 1 x double> poison, <vscale x 1 x i1> poison, double poison)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+ %v16i8 = call i8 @llvm.experimental.vector.extract.last.active.v16i8(<16 x i8> poison, <16 x i1> poison, i8 poison)
+ %v8i16 = call i16 @llvm.experimental.vector.extract.last.active.v8i16(<8 x i16> poison, <8 x i1> poison, i16 poison)
+ %v4i32 = call i32 @llvm.experimental.vector.extract.last.active.v4i32(<4 x i32> poison, <4 x i1> poison, i32 poison)
+ %v2i64 = call i64 @llvm.experimental.vector.extract.last.active.v2i64(<2 x i64> poison, <2 x i1> poison, i64 poison)
+ %v8f16 = call half @llvm.experimental.vector.extract.last.active.v8f16(<8 x half> poison, <8 x i1> poison, half poison)
+ %v8bf16 = call bfloat @llvm.experimental.vector.extract.last.active.v8bf16(<8 x bfloat> poison, <8 x i1> poison, bfloat poison)
+ %v4f32 = call float @llvm.experimental.vector.extract.last.active.v4f32(<4 x float> poison, <4 x i1> poison, float poison)
+ %v2f64 = call double @llvm.experimental.vector.extract.last.active.v2f64(<2 x double> poison, <2 x i1> poison, double poison)
+ %nxv16i8 = call i8 @llvm.experimental.vector.extract.last.active.nxv16i8(<vscale x 16 x i8> poison, <vscale x 16 x i1> poison, i8 poison)
+ %nxv8i16 = call i16 @llvm.experimental.vector.extract.last.active.nxv8i16(<vscale x 8 x i16> poison, <vscale x 8 x i1> poison, i16 poison)
+ %nxv4i32 = call i32 @llvm.experimental.vector.extract.last.active.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i1> poison, i32 poison)
+ %nxv2i64 = call i64 @llvm.experimental.vector.extract.last.active.nxv2i64(<vscale x 2 x i64> poison, <vscale x 2 x i1> poison, i64 poison)
+ %nxv8f16 = call half @llvm.experimental.vector.extract.last.active.nxv8f16(<vscale x 8 x half> poison, <vscale x 8 x i1> poison, half poison)
+ %nxv8bf16 = call bfloat @llvm.experimental.vector.extract.last.active.nxv8bf16(<vscale x 8 x bfloat> poison, <vscale x 8 x i1> poison, bfloat poison)
+ %nxv4f32 = call float @llvm.experimental.vector.extract.last.active.nxv4f32(<vscale x 4 x float> poison, <vscale x 4 x i1> poison, float poison)
+ %nxv2f64 = call double @llvm.experimental.vector.extract.last.active.nxv2f64(<vscale x 2 x double> poison, <vscale x 2 x i1> poison, double poison)
+
+ %v32i8 = call i8 @llvm.experimental.vector.extract.last.active.v32i8(<32 x i8> poison, <32 x i1> poison, i8 poison)
+ %v16i16 = call i16 @llvm.experimental.vector.extract.last.active.v16i16(<16 x i16> poison, <16 x i1> poison, i16 poison)
+ %v8i32 = call i32 @llvm.experimental.vector.extract.last.active.v8i32(<8 x i32> poison, <8 x i1> poison, i32 poison)
+ %v4i64 = call i64 @llvm.experimental.vector.extract.last.active.v4i64(<4 x i64> poison, <4 x i1> poison, i64 poison)
+ %v16f16 = call half @llvm.experimental.vector.extract.last.active.v16f16(<16 x half> poison, <16 x i1> poison, half poison)
+ %v16bf16 = call bfloat @llvm.experimental.vector.extract.last.active.v16bf16(<16 x bfloat> poison, <16 x i1> poison, bfloat poison)
+ %v8f32 = call float @llvm.experimental.vector.extract.last.active.v8f32(<8 x float> poison, <8 x i1> poison, float poison)
+ %v4f64 = call double @llvm.experimental.vector.extract.last.active.v4f64(<4 x double> poison, <4 x i1> poison, double poison)
+ %nxv32i8 = call i8 @llvm.experimental.vector.extract.last.active.nxv32i8(<vscale x 32 x i8> poison, <vscale x 32 x i1> poison, i8 poison)
+ %nxv16i16 = call i16 @llvm.experimental.vector.extract.last.active.nxv16i16(<vscale x 16 x i16> poison, <vscale x 16 x i1> poison, i16 poison)
+ %nxv8i32 = call i32 @llvm.experimental.vector.extract.last.active.nxv8i32(<vscale x 8 x i32> poison, <vscale x 8 x i1> poison, i32 poison)
+ %nxv4i64 = call i64 @llvm.experimental.vector.extract.last.active.nxv4i64(<vscale x 4 x i64> poison, <vscale x 4 x i1> poison, i64 poison)
+ %nxv16f16 = call half @llvm.experimental.vector.extract.last.active.nxv16f16(<vscale x 16 x half> poison, <vscale x 16 x i1> poison, half poison)
+ %nxv16bf16 = call bfloat @llvm.experimental.vector.extract.last.active.nxv16bf16(<vscale x 16 x bfloat> poison, <vscale x 16 x i1> poison, bfloat poison)
+ %nxv8f32 = call float @llvm.experimental.vector.extract.last.active.nxv8f32(<vscale x 8 x float> poison, <vscale x 8 x i1> poison, float poison)
+ %nxv4f64 = call double @llvm.experimental.vector.extract.last.active.nxv4f64(<vscale x 4 x double> poison, <vscale x 4 x i1> poison, double poison)
+
+ %v8i8 = call i8 @llvm.experimental.vector.extract.last.active.v8i8(<8 x i8> poison, <8 x i1> poison, i8 poison)
+ %v4i16 = call i16 @llvm.experimental.vector.extract.last.active.v4i16(<4 x i16> poison, <4 x i1> poison, i16 poison)
+ %v2i32 = call i32 @llvm.experimental.vector.extract.last.active.v2i32(<2 x i32> poison, <2 x i1> poison, i32 poison)
+ %v1i64 = call i64 @llvm.experimental.vector.extract.last.active.v1i64(<1 x i64> poison, <1 x i1> poison, i64 poison)
+ %v4f16 = call half @llvm.experimental.vector.extract.last.active.v4f16(<4 x half> poison, <4 x i1> poison, half poison)
+ %v4bf16 = call bfloat @llvm.experimental.vector.extract.last.active.v4bf16(<4 x bfloat> poison, <4 x i1> poison, bfloat poison)
+ %v2f32 = call float @llvm.experimental.vector.extract.last.active.v2f32(<2 x float> poison, <2 x i1> poison, float poison)
+ %v1f64 = call double @llvm.experimental.vector.extract.last.active.v1f64(<1 x double> poison, <1 x i1> poison, double poison)
+ %nxv8i8 = call i8 @llvm.experimental.vector.extract.last.active.nxv8i8(<vscale x 8 x i8> poison, <vscale x 8 x i1> poison, i8 poison)
+ %nxv4i16 = call i16 @llvm.experimental.vector.extract.last.active.nxv4i16(<vscale x 4 x i16> poison, <vscale x 4 x i1> poison, i16 poison)
+ %nxv2i32 = call i32 @llvm.experimental.vector.extract.last.active.nxv2i32(<vscale x 2 x i32> poison, <vscale x 2 x i1> poison, i32 poison)
+ %nxv1i64 = call i64 @llvm.experimental.vector.extract.last.active.nxv1i64(<vscale x 1 x i64> poison, <vscale x 1 x i1> poison, i64 poison)
+ %nxv4f16 = call half @llvm.experimental.vector.extract.last.active.nxv4f16(<vscale x 4 x half> poison, <vscale x 4 x i1> poison, half poison)
+ %nxv4bf16 = call bfloat @llvm.experimental.vector.extract.last.active.nxv4bf16(<vscale x 4 x bfloat> poison, <vscale x 4 x i1> poison, bfloat poison)
+ %nxv2f32 = call float @llvm.experimental.vector.extract.last.active.nxv2f32(<vscale x 2 x float> poison, <vscale x 2 x i1> poison, float poison)
+ %nxv1f64 = call double @llvm.experimental.vector.extract.last.active.nxv1f64(<vscale x 1 x double> poison, <vscale x 1 x i1> poison, double poison)
+
+ ret void
+}
>From 10e2b01dbdf30bf54663582f31d806bcdcd6e48d Mon Sep 17 00:00:00 2001
From: Elvis Wang <elvis.wang at sifive.com>
Date: Sun, 1 Mar 2026 21:55:13 -0800
Subject: [PATCH 2/6] [RISCV][TTI] Implement cost of
llvm.experimental.vector.extract.last.active
This patch implements the cost of
llvm.experimental.vector.extract.last.active which will lower to:
vid v10, v0.t
vredmaxu.vs v10, v10, v10
vmv.v.s a0, v10
zext.b a0, a0
vslidedown v8, v8,
This patch also helps conditional-scalar-assignment (CSA) works for
scalable vector.
---
.../Target/RISCV/RISCVTargetTransformInfo.cpp | 23 +++++
.../CostModel/RISCV/extract-last-active.ll | 84 +++++++++----------
2 files changed, 65 insertions(+), 42 deletions(-)
diff --git a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
index ffb64d9f52beb..df256c4f8850e 100644
--- a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
+++ b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
@@ -1702,6 +1702,29 @@ RISCVTTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
CmpInst::FCMP_UNO, CostKind);
return Cost;
}
+ case Intrinsic::experimental_vector_extract_last_active: {
+ Type *ValTy = ICA.getArgTypes()[0];
+ Type *MaskTy = ICA.getArgTypes()[1];
+
+ auto ValLT = getTypeLegalizationCost(ValTy);
+ auto MaskLT = getTypeLegalizationCost(MaskTy);
+
+ if (!ValLT.first.isValid() || !MaskLT.first.isValid())
+ return InstructionCost::getInvalid();
+
+ // TODO: Return free when the entire lane is inactive.
+ // The expected asm sequence is:
+ // vid v10, v0.t
+ // vredmaxu.vs v10, v10, v10
+ // vmv.x.s a0, v10
+ // zext.b a0, a0
+ // vslidedown v8, v8, a0
+ // vmv.x.s a0, v8
+ unsigned Opcodes[] = {RISCV::VID_V, RISCV::VREDMAXU_VS, RISCV::VMV_X_S,
+ RISCV::VSLIDEDOWN_VI, RISCV::VMV_X_S};
+ return ValLT.first *
+ getRISCVInstructionCost(Opcodes, ValLT.second, CostKind);
+ }
}
if (ST->hasVInstructions() && RetTy->isVectorTy()) {
diff --git a/llvm/test/Analysis/CostModel/RISCV/extract-last-active.ll b/llvm/test/Analysis/CostModel/RISCV/extract-last-active.ll
index 7591f600f81a8..9d143d62b9c0d 100644
--- a/llvm/test/Analysis/CostModel/RISCV/extract-last-active.ll
+++ b/llvm/test/Analysis/CostModel/RISCV/extract-last-active.ll
@@ -4,54 +4,54 @@
define void @extractions() {
;; Legal types
; CHECK-LABEL: 'extractions'
-; CHECK-NEXT: Cost Model: Found an estimated cost of 110 for instruction: %v16i8 = call i8 @llvm.experimental.vector.extract.last.active.v16i8(<16 x i8> poison, <16 x i1> poison, i8 poison)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %v8i16 = call i16 @llvm.experimental.vector.extract.last.active.v8i16(<8 x i16> poison, <8 x i1> poison, i16 poison)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %v4i32 = call i32 @llvm.experimental.vector.extract.last.active.v4i32(<4 x i32> poison, <4 x i1> poison, i32 poison)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v2i64 = call i64 @llvm.experimental.vector.extract.last.active.v2i64(<2 x i64> poison, <2 x i1> poison, i64 poison)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 39 for instruction: %v8f16 = call half @llvm.experimental.vector.extract.last.active.v8f16(<8 x half> poison, <8 x i1> poison, half poison)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 39 for instruction: %v8bf16 = call bfloat @llvm.experimental.vector.extract.last.active.v8bf16(<8 x bfloat> poison, <8 x i1> poison, bfloat poison)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %v4f32 = call float @llvm.experimental.vector.extract.last.active.v4f32(<4 x float> poison, <4 x i1> poison, float poison)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v2f64 = call double @llvm.experimental.vector.extract.last.active.v2f64(<2 x double> poison, <2 x i1> poison, double poison)
-; CHECK-NEXT: Cost Model: Invalid cost for instruction: %nxv16i8 = call i8 @llvm.experimental.vector.extract.last.active.nxv16i8(<vscale x 16 x i8> poison, <vscale x 16 x i1> poison, i8 poison)
-; CHECK-NEXT: Cost Model: Invalid cost for instruction: %nxv8i16 = call i16 @llvm.experimental.vector.extract.last.active.nxv8i16(<vscale x 8 x i16> poison, <vscale x 8 x i1> poison, i16 poison)
-; CHECK-NEXT: Cost Model: Invalid cost for instruction: %nxv4i32 = call i32 @llvm.experimental.vector.extract.last.active.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i1> poison, i32 poison)
-; CHECK-NEXT: Cost Model: Invalid cost for instruction: %nxv2i64 = call i64 @llvm.experimental.vector.extract.last.active.nxv2i64(<vscale x 2 x i64> poison, <vscale x 2 x i1> poison, i64 poison)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v16i8 = call i8 @llvm.experimental.vector.extract.last.active.v16i8(<16 x i8> poison, <16 x i1> poison, i8 poison)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %v8i16 = call i16 @llvm.experimental.vector.extract.last.active.v8i16(<8 x i16> poison, <8 x i1> poison, i16 poison)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v4i32 = call i32 @llvm.experimental.vector.extract.last.active.v4i32(<4 x i32> poison, <4 x i1> poison, i32 poison)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %v2i64 = call i64 @llvm.experimental.vector.extract.last.active.v2i64(<2 x i64> poison, <2 x i1> poison, i64 poison)
+; CHECK-NEXT: Cost Model: Invalid cost for instruction: %v8f16 = call half @llvm.experimental.vector.extract.last.active.v8f16(<8 x half> poison, <8 x i1> poison, half poison)
+; CHECK-NEXT: Cost Model: Invalid cost for instruction: %v8bf16 = call bfloat @llvm.experimental.vector.extract.last.active.v8bf16(<8 x bfloat> poison, <8 x i1> poison, bfloat poison)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v4f32 = call float @llvm.experimental.vector.extract.last.active.v4f32(<4 x float> poison, <4 x i1> poison, float poison)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %v2f64 = call double @llvm.experimental.vector.extract.last.active.v2f64(<2 x double> poison, <2 x i1> poison, double poison)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %nxv16i8 = call i8 @llvm.experimental.vector.extract.last.active.nxv16i8(<vscale x 16 x i8> poison, <vscale x 16 x i1> poison, i8 poison)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %nxv8i16 = call i16 @llvm.experimental.vector.extract.last.active.nxv8i16(<vscale x 8 x i16> poison, <vscale x 8 x i1> poison, i16 poison)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %nxv4i32 = call i32 @llvm.experimental.vector.extract.last.active.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i1> poison, i32 poison)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %nxv2i64 = call i64 @llvm.experimental.vector.extract.last.active.nxv2i64(<vscale x 2 x i64> poison, <vscale x 2 x i1> poison, i64 poison)
; CHECK-NEXT: Cost Model: Invalid cost for instruction: %nxv8f16 = call half @llvm.experimental.vector.extract.last.active.nxv8f16(<vscale x 8 x half> poison, <vscale x 8 x i1> poison, half poison)
; CHECK-NEXT: Cost Model: Invalid cost for instruction: %nxv8bf16 = call bfloat @llvm.experimental.vector.extract.last.active.nxv8bf16(<vscale x 8 x bfloat> poison, <vscale x 8 x i1> poison, bfloat poison)
-; CHECK-NEXT: Cost Model: Invalid cost for instruction: %nxv4f32 = call float @llvm.experimental.vector.extract.last.active.nxv4f32(<vscale x 4 x float> poison, <vscale x 4 x i1> poison, float poison)
-; CHECK-NEXT: Cost Model: Invalid cost for instruction: %nxv2f64 = call double @llvm.experimental.vector.extract.last.active.nxv2f64(<vscale x 2 x double> poison, <vscale x 2 x i1> poison, double poison)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 286 for instruction: %v32i8 = call i8 @llvm.experimental.vector.extract.last.active.v32i8(<32 x i8> poison, <32 x i1> poison, i8 poison)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 110 for instruction: %v16i16 = call i16 @llvm.experimental.vector.extract.last.active.v16i16(<16 x i16> poison, <16 x i1> poison, i16 poison)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %v8i32 = call i32 @llvm.experimental.vector.extract.last.active.v8i32(<8 x i32> poison, <8 x i1> poison, i32 poison)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %v4i64 = call i64 @llvm.experimental.vector.extract.last.active.v4i64(<4 x i64> poison, <4 x i1> poison, i64 poison)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 79 for instruction: %v16f16 = call half @llvm.experimental.vector.extract.last.active.v16f16(<16 x half> poison, <16 x i1> poison, half poison)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 79 for instruction: %v16bf16 = call bfloat @llvm.experimental.vector.extract.last.active.v16bf16(<16 x bfloat> poison, <16 x i1> poison, bfloat poison)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %v8f32 = call float @llvm.experimental.vector.extract.last.active.v8f32(<8 x float> poison, <8 x i1> poison, float poison)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %v4f64 = call double @llvm.experimental.vector.extract.last.active.v4f64(<4 x double> poison, <4 x i1> poison, double poison)
-; CHECK-NEXT: Cost Model: Invalid cost for instruction: %nxv32i8 = call i8 @llvm.experimental.vector.extract.last.active.nxv32i8(<vscale x 32 x i8> poison, <vscale x 32 x i1> poison, i8 poison)
-; CHECK-NEXT: Cost Model: Invalid cost for instruction: %nxv16i16 = call i16 @llvm.experimental.vector.extract.last.active.nxv16i16(<vscale x 16 x i16> poison, <vscale x 16 x i1> poison, i16 poison)
-; CHECK-NEXT: Cost Model: Invalid cost for instruction: %nxv8i32 = call i32 @llvm.experimental.vector.extract.last.active.nxv8i32(<vscale x 8 x i32> poison, <vscale x 8 x i1> poison, i32 poison)
-; CHECK-NEXT: Cost Model: Invalid cost for instruction: %nxv4i64 = call i64 @llvm.experimental.vector.extract.last.active.nxv4i64(<vscale x 4 x i64> poison, <vscale x 4 x i1> poison, i64 poison)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %nxv4f32 = call float @llvm.experimental.vector.extract.last.active.nxv4f32(<vscale x 4 x float> poison, <vscale x 4 x i1> poison, float poison)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %nxv2f64 = call double @llvm.experimental.vector.extract.last.active.nxv2f64(<vscale x 2 x double> poison, <vscale x 2 x i1> poison, double poison)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %v32i8 = call i8 @llvm.experimental.vector.extract.last.active.v32i8(<32 x i8> poison, <32 x i1> poison, i8 poison)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %v16i16 = call i16 @llvm.experimental.vector.extract.last.active.v16i16(<16 x i16> poison, <16 x i1> poison, i16 poison)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %v8i32 = call i32 @llvm.experimental.vector.extract.last.active.v8i32(<8 x i32> poison, <8 x i1> poison, i32 poison)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v4i64 = call i64 @llvm.experimental.vector.extract.last.active.v4i64(<4 x i64> poison, <4 x i1> poison, i64 poison)
+; CHECK-NEXT: Cost Model: Invalid cost for instruction: %v16f16 = call half @llvm.experimental.vector.extract.last.active.v16f16(<16 x half> poison, <16 x i1> poison, half poison)
+; CHECK-NEXT: Cost Model: Invalid cost for instruction: %v16bf16 = call bfloat @llvm.experimental.vector.extract.last.active.v16bf16(<16 x bfloat> poison, <16 x i1> poison, bfloat poison)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %v8f32 = call float @llvm.experimental.vector.extract.last.active.v8f32(<8 x float> poison, <8 x i1> poison, float poison)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v4f64 = call double @llvm.experimental.vector.extract.last.active.v4f64(<4 x double> poison, <4 x i1> poison, double poison)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %nxv32i8 = call i8 @llvm.experimental.vector.extract.last.active.nxv32i8(<vscale x 32 x i8> poison, <vscale x 32 x i1> poison, i8 poison)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %nxv16i16 = call i16 @llvm.experimental.vector.extract.last.active.nxv16i16(<vscale x 16 x i16> poison, <vscale x 16 x i1> poison, i16 poison)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %nxv8i32 = call i32 @llvm.experimental.vector.extract.last.active.nxv8i32(<vscale x 8 x i32> poison, <vscale x 8 x i1> poison, i32 poison)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %nxv4i64 = call i64 @llvm.experimental.vector.extract.last.active.nxv4i64(<vscale x 4 x i64> poison, <vscale x 4 x i1> poison, i64 poison)
; CHECK-NEXT: Cost Model: Invalid cost for instruction: %nxv16f16 = call half @llvm.experimental.vector.extract.last.active.nxv16f16(<vscale x 16 x half> poison, <vscale x 16 x i1> poison, half poison)
; CHECK-NEXT: Cost Model: Invalid cost for instruction: %nxv16bf16 = call bfloat @llvm.experimental.vector.extract.last.active.nxv16bf16(<vscale x 16 x bfloat> poison, <vscale x 16 x i1> poison, bfloat poison)
-; CHECK-NEXT: Cost Model: Invalid cost for instruction: %nxv8f32 = call float @llvm.experimental.vector.extract.last.active.nxv8f32(<vscale x 8 x float> poison, <vscale x 8 x i1> poison, float poison)
-; CHECK-NEXT: Cost Model: Invalid cost for instruction: %nxv4f64 = call double @llvm.experimental.vector.extract.last.active.nxv4f64(<vscale x 4 x double> poison, <vscale x 4 x i1> poison, double poison)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %v8i8 = call i8 @llvm.experimental.vector.extract.last.active.v8i8(<8 x i8> poison, <8 x i1> poison, i8 poison)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %v4i16 = call i16 @llvm.experimental.vector.extract.last.active.v4i16(<4 x i16> poison, <4 x i1> poison, i16 poison)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v2i32 = call i32 @llvm.experimental.vector.extract.last.active.v2i32(<2 x i32> poison, <2 x i1> poison, i32 poison)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v1i64 = call i64 @llvm.experimental.vector.extract.last.active.v1i64(<1 x i64> poison, <1 x i1> poison, i64 poison)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %v4f16 = call half @llvm.experimental.vector.extract.last.active.v4f16(<4 x half> poison, <4 x i1> poison, half poison)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %v4bf16 = call bfloat @llvm.experimental.vector.extract.last.active.v4bf16(<4 x bfloat> poison, <4 x i1> poison, bfloat poison)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v2f32 = call float @llvm.experimental.vector.extract.last.active.v2f32(<2 x float> poison, <2 x i1> poison, float poison)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v1f64 = call double @llvm.experimental.vector.extract.last.active.v1f64(<1 x double> poison, <1 x i1> poison, double poison)
-; CHECK-NEXT: Cost Model: Invalid cost for instruction: %nxv8i8 = call i8 @llvm.experimental.vector.extract.last.active.nxv8i8(<vscale x 8 x i8> poison, <vscale x 8 x i1> poison, i8 poison)
-; CHECK-NEXT: Cost Model: Invalid cost for instruction: %nxv4i16 = call i16 @llvm.experimental.vector.extract.last.active.nxv4i16(<vscale x 4 x i16> poison, <vscale x 4 x i1> poison, i16 poison)
-; CHECK-NEXT: Cost Model: Invalid cost for instruction: %nxv2i32 = call i32 @llvm.experimental.vector.extract.last.active.nxv2i32(<vscale x 2 x i32> poison, <vscale x 2 x i1> poison, i32 poison)
-; CHECK-NEXT: Cost Model: Invalid cost for instruction: %nxv1i64 = call i64 @llvm.experimental.vector.extract.last.active.nxv1i64(<vscale x 1 x i64> poison, <vscale x 1 x i1> poison, i64 poison)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %nxv8f32 = call float @llvm.experimental.vector.extract.last.active.nxv8f32(<vscale x 8 x float> poison, <vscale x 8 x i1> poison, float poison)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %nxv4f64 = call double @llvm.experimental.vector.extract.last.active.nxv4f64(<vscale x 4 x double> poison, <vscale x 4 x i1> poison, double poison)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %v8i8 = call i8 @llvm.experimental.vector.extract.last.active.v8i8(<8 x i8> poison, <8 x i1> poison, i8 poison)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v4i16 = call i16 @llvm.experimental.vector.extract.last.active.v4i16(<4 x i16> poison, <4 x i1> poison, i16 poison)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %v2i32 = call i32 @llvm.experimental.vector.extract.last.active.v2i32(<2 x i32> poison, <2 x i1> poison, i32 poison)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v1i64 = call i64 @llvm.experimental.vector.extract.last.active.v1i64(<1 x i64> poison, <1 x i1> poison, i64 poison)
+; CHECK-NEXT: Cost Model: Invalid cost for instruction: %v4f16 = call half @llvm.experimental.vector.extract.last.active.v4f16(<4 x half> poison, <4 x i1> poison, half poison)
+; CHECK-NEXT: Cost Model: Invalid cost for instruction: %v4bf16 = call bfloat @llvm.experimental.vector.extract.last.active.v4bf16(<4 x bfloat> poison, <4 x i1> poison, bfloat poison)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %v2f32 = call float @llvm.experimental.vector.extract.last.active.v2f32(<2 x float> poison, <2 x i1> poison, float poison)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v1f64 = call double @llvm.experimental.vector.extract.last.active.v1f64(<1 x double> poison, <1 x i1> poison, double poison)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %nxv8i8 = call i8 @llvm.experimental.vector.extract.last.active.nxv8i8(<vscale x 8 x i8> poison, <vscale x 8 x i1> poison, i8 poison)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %nxv4i16 = call i16 @llvm.experimental.vector.extract.last.active.nxv4i16(<vscale x 4 x i16> poison, <vscale x 4 x i1> poison, i16 poison)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %nxv2i32 = call i32 @llvm.experimental.vector.extract.last.active.nxv2i32(<vscale x 2 x i32> poison, <vscale x 2 x i1> poison, i32 poison)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %nxv1i64 = call i64 @llvm.experimental.vector.extract.last.active.nxv1i64(<vscale x 1 x i64> poison, <vscale x 1 x i1> poison, i64 poison)
; CHECK-NEXT: Cost Model: Invalid cost for instruction: %nxv4f16 = call half @llvm.experimental.vector.extract.last.active.nxv4f16(<vscale x 4 x half> poison, <vscale x 4 x i1> poison, half poison)
; CHECK-NEXT: Cost Model: Invalid cost for instruction: %nxv4bf16 = call bfloat @llvm.experimental.vector.extract.last.active.nxv4bf16(<vscale x 4 x bfloat> poison, <vscale x 4 x i1> poison, bfloat poison)
-; CHECK-NEXT: Cost Model: Invalid cost for instruction: %nxv2f32 = call float @llvm.experimental.vector.extract.last.active.nxv2f32(<vscale x 2 x float> poison, <vscale x 2 x i1> poison, float poison)
-; CHECK-NEXT: Cost Model: Invalid cost for instruction: %nxv1f64 = call double @llvm.experimental.vector.extract.last.active.nxv1f64(<vscale x 1 x double> poison, <vscale x 1 x i1> poison, double poison)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %nxv2f32 = call float @llvm.experimental.vector.extract.last.active.nxv2f32(<vscale x 2 x float> poison, <vscale x 2 x i1> poison, float poison)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %nxv1f64 = call double @llvm.experimental.vector.extract.last.active.nxv1f64(<vscale x 1 x double> poison, <vscale x 1 x i1> poison, double poison)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
%v16i8 = call i8 @llvm.experimental.vector.extract.last.active.v16i8(<16 x i8> poison, <16 x i1> poison, i8 poison)
>From 09feabfe2b2f183c77901ea27fcc20c696abdea3 Mon Sep 17 00:00:00 2001
From: Elvis Wang <elvis.wang at sifive.com>
Date: Wed, 4 Mar 2026 15:57:39 -0800
Subject: [PATCH 3/6] !fixup, add cost of vcpop.m.
---
.../Target/RISCV/RISCVTargetTransformInfo.cpp | 12 +++-
.../CostModel/RISCV/extract-last-active.ll | 72 +++++++++----------
2 files changed, 45 insertions(+), 39 deletions(-)
diff --git a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
index df256c4f8850e..eac45456931d4 100644
--- a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
+++ b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
@@ -1712,18 +1712,24 @@ RISCVTTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
if (!ValLT.first.isValid() || !MaskLT.first.isValid())
return InstructionCost::getInvalid();
- // TODO: Return free when the entire lane is inactive.
+ // TODO: Return cheaper cost when the entire lane is inactive.
// The expected asm sequence is:
+ // vcpop.m a0, v0
+ // beqz a0, exit # Return passthru when the entire lane is inactive.
// vid v10, v0.t
// vredmaxu.vs v10, v10, v10
// vmv.x.s a0, v10
// zext.b a0, a0
// vslidedown v8, v8, a0
// vmv.x.s a0, v8
+ // exit:
+ // ...
unsigned Opcodes[] = {RISCV::VID_V, RISCV::VREDMAXU_VS, RISCV::VMV_X_S,
RISCV::VSLIDEDOWN_VI, RISCV::VMV_X_S};
- return ValLT.first *
- getRISCVInstructionCost(Opcodes, ValLT.second, CostKind);
+ return MaskLT.first * getRISCVInstructionCost(RISCV::VCPOP_M, MaskLT.second,
+ CostKind) +
+ ValLT.first *
+ getRISCVInstructionCost(Opcodes, ValLT.second, CostKind);
}
}
diff --git a/llvm/test/Analysis/CostModel/RISCV/extract-last-active.ll b/llvm/test/Analysis/CostModel/RISCV/extract-last-active.ll
index 9d143d62b9c0d..c521977e39035 100644
--- a/llvm/test/Analysis/CostModel/RISCV/extract-last-active.ll
+++ b/llvm/test/Analysis/CostModel/RISCV/extract-last-active.ll
@@ -4,54 +4,54 @@
define void @extractions() {
;; Legal types
; CHECK-LABEL: 'extractions'
-; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v16i8 = call i8 @llvm.experimental.vector.extract.last.active.v16i8(<16 x i8> poison, <16 x i1> poison, i8 poison)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %v8i16 = call i16 @llvm.experimental.vector.extract.last.active.v8i16(<8 x i16> poison, <8 x i1> poison, i16 poison)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v4i32 = call i32 @llvm.experimental.vector.extract.last.active.v4i32(<4 x i32> poison, <4 x i1> poison, i32 poison)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %v2i64 = call i64 @llvm.experimental.vector.extract.last.active.v2i64(<2 x i64> poison, <2 x i1> poison, i64 poison)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %v16i8 = call i8 @llvm.experimental.vector.extract.last.active.v16i8(<16 x i8> poison, <16 x i1> poison, i8 poison)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v8i16 = call i16 @llvm.experimental.vector.extract.last.active.v8i16(<8 x i16> poison, <8 x i1> poison, i16 poison)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %v4i32 = call i32 @llvm.experimental.vector.extract.last.active.v4i32(<4 x i32> poison, <4 x i1> poison, i32 poison)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v2i64 = call i64 @llvm.experimental.vector.extract.last.active.v2i64(<2 x i64> poison, <2 x i1> poison, i64 poison)
; CHECK-NEXT: Cost Model: Invalid cost for instruction: %v8f16 = call half @llvm.experimental.vector.extract.last.active.v8f16(<8 x half> poison, <8 x i1> poison, half poison)
; CHECK-NEXT: Cost Model: Invalid cost for instruction: %v8bf16 = call bfloat @llvm.experimental.vector.extract.last.active.v8bf16(<8 x bfloat> poison, <8 x i1> poison, bfloat poison)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v4f32 = call float @llvm.experimental.vector.extract.last.active.v4f32(<4 x float> poison, <4 x i1> poison, float poison)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %v2f64 = call double @llvm.experimental.vector.extract.last.active.v2f64(<2 x double> poison, <2 x i1> poison, double poison)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %nxv16i8 = call i8 @llvm.experimental.vector.extract.last.active.nxv16i8(<vscale x 16 x i8> poison, <vscale x 16 x i1> poison, i8 poison)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %nxv8i16 = call i16 @llvm.experimental.vector.extract.last.active.nxv8i16(<vscale x 8 x i16> poison, <vscale x 8 x i1> poison, i16 poison)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %nxv4i32 = call i32 @llvm.experimental.vector.extract.last.active.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i1> poison, i32 poison)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %nxv2i64 = call i64 @llvm.experimental.vector.extract.last.active.nxv2i64(<vscale x 2 x i64> poison, <vscale x 2 x i1> poison, i64 poison)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %v4f32 = call float @llvm.experimental.vector.extract.last.active.v4f32(<4 x float> poison, <4 x i1> poison, float poison)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v2f64 = call double @llvm.experimental.vector.extract.last.active.v2f64(<2 x double> poison, <2 x i1> poison, double poison)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %nxv16i8 = call i8 @llvm.experimental.vector.extract.last.active.nxv16i8(<vscale x 16 x i8> poison, <vscale x 16 x i1> poison, i8 poison)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %nxv8i16 = call i16 @llvm.experimental.vector.extract.last.active.nxv8i16(<vscale x 8 x i16> poison, <vscale x 8 x i1> poison, i16 poison)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %nxv4i32 = call i32 @llvm.experimental.vector.extract.last.active.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i1> poison, i32 poison)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %nxv2i64 = call i64 @llvm.experimental.vector.extract.last.active.nxv2i64(<vscale x 2 x i64> poison, <vscale x 2 x i1> poison, i64 poison)
; CHECK-NEXT: Cost Model: Invalid cost for instruction: %nxv8f16 = call half @llvm.experimental.vector.extract.last.active.nxv8f16(<vscale x 8 x half> poison, <vscale x 8 x i1> poison, half poison)
; CHECK-NEXT: Cost Model: Invalid cost for instruction: %nxv8bf16 = call bfloat @llvm.experimental.vector.extract.last.active.nxv8bf16(<vscale x 8 x bfloat> poison, <vscale x 8 x i1> poison, bfloat poison)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %nxv4f32 = call float @llvm.experimental.vector.extract.last.active.nxv4f32(<vscale x 4 x float> poison, <vscale x 4 x i1> poison, float poison)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %nxv2f64 = call double @llvm.experimental.vector.extract.last.active.nxv2f64(<vscale x 2 x double> poison, <vscale x 2 x i1> poison, double poison)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %v32i8 = call i8 @llvm.experimental.vector.extract.last.active.v32i8(<32 x i8> poison, <32 x i1> poison, i8 poison)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %v16i16 = call i16 @llvm.experimental.vector.extract.last.active.v16i16(<16 x i16> poison, <16 x i1> poison, i16 poison)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %v8i32 = call i32 @llvm.experimental.vector.extract.last.active.v8i32(<8 x i32> poison, <8 x i1> poison, i32 poison)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v4i64 = call i64 @llvm.experimental.vector.extract.last.active.v4i64(<4 x i64> poison, <4 x i1> poison, i64 poison)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %nxv4f32 = call float @llvm.experimental.vector.extract.last.active.nxv4f32(<vscale x 4 x float> poison, <vscale x 4 x i1> poison, float poison)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %nxv2f64 = call double @llvm.experimental.vector.extract.last.active.nxv2f64(<vscale x 2 x double> poison, <vscale x 2 x i1> poison, double poison)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v32i8 = call i8 @llvm.experimental.vector.extract.last.active.v32i8(<32 x i8> poison, <32 x i1> poison, i8 poison)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %v16i16 = call i16 @llvm.experimental.vector.extract.last.active.v16i16(<16 x i16> poison, <16 x i1> poison, i16 poison)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %v8i32 = call i32 @llvm.experimental.vector.extract.last.active.v8i32(<8 x i32> poison, <8 x i1> poison, i32 poison)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %v4i64 = call i64 @llvm.experimental.vector.extract.last.active.v4i64(<4 x i64> poison, <4 x i1> poison, i64 poison)
; CHECK-NEXT: Cost Model: Invalid cost for instruction: %v16f16 = call half @llvm.experimental.vector.extract.last.active.v16f16(<16 x half> poison, <16 x i1> poison, half poison)
; CHECK-NEXT: Cost Model: Invalid cost for instruction: %v16bf16 = call bfloat @llvm.experimental.vector.extract.last.active.v16bf16(<16 x bfloat> poison, <16 x i1> poison, bfloat poison)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %v8f32 = call float @llvm.experimental.vector.extract.last.active.v8f32(<8 x float> poison, <8 x i1> poison, float poison)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v4f64 = call double @llvm.experimental.vector.extract.last.active.v4f64(<4 x double> poison, <4 x i1> poison, double poison)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %nxv32i8 = call i8 @llvm.experimental.vector.extract.last.active.nxv32i8(<vscale x 32 x i8> poison, <vscale x 32 x i1> poison, i8 poison)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %nxv16i16 = call i16 @llvm.experimental.vector.extract.last.active.nxv16i16(<vscale x 16 x i16> poison, <vscale x 16 x i1> poison, i16 poison)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %nxv8i32 = call i32 @llvm.experimental.vector.extract.last.active.nxv8i32(<vscale x 8 x i32> poison, <vscale x 8 x i1> poison, i32 poison)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %nxv4i64 = call i64 @llvm.experimental.vector.extract.last.active.nxv4i64(<vscale x 4 x i64> poison, <vscale x 4 x i1> poison, i64 poison)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %v8f32 = call float @llvm.experimental.vector.extract.last.active.v8f32(<8 x float> poison, <8 x i1> poison, float poison)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %v4f64 = call double @llvm.experimental.vector.extract.last.active.v4f64(<4 x double> poison, <4 x i1> poison, double poison)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %nxv32i8 = call i8 @llvm.experimental.vector.extract.last.active.nxv32i8(<vscale x 32 x i8> poison, <vscale x 32 x i1> poison, i8 poison)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %nxv16i16 = call i16 @llvm.experimental.vector.extract.last.active.nxv16i16(<vscale x 16 x i16> poison, <vscale x 16 x i1> poison, i16 poison)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %nxv8i32 = call i32 @llvm.experimental.vector.extract.last.active.nxv8i32(<vscale x 8 x i32> poison, <vscale x 8 x i1> poison, i32 poison)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %nxv4i64 = call i64 @llvm.experimental.vector.extract.last.active.nxv4i64(<vscale x 4 x i64> poison, <vscale x 4 x i1> poison, i64 poison)
; CHECK-NEXT: Cost Model: Invalid cost for instruction: %nxv16f16 = call half @llvm.experimental.vector.extract.last.active.nxv16f16(<vscale x 16 x half> poison, <vscale x 16 x i1> poison, half poison)
; CHECK-NEXT: Cost Model: Invalid cost for instruction: %nxv16bf16 = call bfloat @llvm.experimental.vector.extract.last.active.nxv16bf16(<vscale x 16 x bfloat> poison, <vscale x 16 x i1> poison, bfloat poison)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %nxv8f32 = call float @llvm.experimental.vector.extract.last.active.nxv8f32(<vscale x 8 x float> poison, <vscale x 8 x i1> poison, float poison)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %nxv4f64 = call double @llvm.experimental.vector.extract.last.active.nxv4f64(<vscale x 4 x double> poison, <vscale x 4 x i1> poison, double poison)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %v8i8 = call i8 @llvm.experimental.vector.extract.last.active.v8i8(<8 x i8> poison, <8 x i1> poison, i8 poison)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v4i16 = call i16 @llvm.experimental.vector.extract.last.active.v4i16(<4 x i16> poison, <4 x i1> poison, i16 poison)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %v2i32 = call i32 @llvm.experimental.vector.extract.last.active.v2i32(<2 x i32> poison, <2 x i1> poison, i32 poison)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v1i64 = call i64 @llvm.experimental.vector.extract.last.active.v1i64(<1 x i64> poison, <1 x i1> poison, i64 poison)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %nxv8f32 = call float @llvm.experimental.vector.extract.last.active.nxv8f32(<vscale x 8 x float> poison, <vscale x 8 x i1> poison, float poison)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %nxv4f64 = call double @llvm.experimental.vector.extract.last.active.nxv4f64(<vscale x 4 x double> poison, <vscale x 4 x i1> poison, double poison)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v8i8 = call i8 @llvm.experimental.vector.extract.last.active.v8i8(<8 x i8> poison, <8 x i1> poison, i8 poison)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %v4i16 = call i16 @llvm.experimental.vector.extract.last.active.v4i16(<4 x i16> poison, <4 x i1> poison, i16 poison)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v2i32 = call i32 @llvm.experimental.vector.extract.last.active.v2i32(<2 x i32> poison, <2 x i1> poison, i32 poison)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %v1i64 = call i64 @llvm.experimental.vector.extract.last.active.v1i64(<1 x i64> poison, <1 x i1> poison, i64 poison)
; CHECK-NEXT: Cost Model: Invalid cost for instruction: %v4f16 = call half @llvm.experimental.vector.extract.last.active.v4f16(<4 x half> poison, <4 x i1> poison, half poison)
; CHECK-NEXT: Cost Model: Invalid cost for instruction: %v4bf16 = call bfloat @llvm.experimental.vector.extract.last.active.v4bf16(<4 x bfloat> poison, <4 x i1> poison, bfloat poison)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %v2f32 = call float @llvm.experimental.vector.extract.last.active.v2f32(<2 x float> poison, <2 x i1> poison, float poison)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v1f64 = call double @llvm.experimental.vector.extract.last.active.v1f64(<1 x double> poison, <1 x i1> poison, double poison)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %nxv8i8 = call i8 @llvm.experimental.vector.extract.last.active.nxv8i8(<vscale x 8 x i8> poison, <vscale x 8 x i1> poison, i8 poison)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %nxv4i16 = call i16 @llvm.experimental.vector.extract.last.active.nxv4i16(<vscale x 4 x i16> poison, <vscale x 4 x i1> poison, i16 poison)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %nxv2i32 = call i32 @llvm.experimental.vector.extract.last.active.nxv2i32(<vscale x 2 x i32> poison, <vscale x 2 x i1> poison, i32 poison)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %nxv1i64 = call i64 @llvm.experimental.vector.extract.last.active.nxv1i64(<vscale x 1 x i64> poison, <vscale x 1 x i1> poison, i64 poison)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v2f32 = call float @llvm.experimental.vector.extract.last.active.v2f32(<2 x float> poison, <2 x i1> poison, float poison)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %v1f64 = call double @llvm.experimental.vector.extract.last.active.v1f64(<1 x double> poison, <1 x i1> poison, double poison)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %nxv8i8 = call i8 @llvm.experimental.vector.extract.last.active.nxv8i8(<vscale x 8 x i8> poison, <vscale x 8 x i1> poison, i8 poison)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %nxv4i16 = call i16 @llvm.experimental.vector.extract.last.active.nxv4i16(<vscale x 4 x i16> poison, <vscale x 4 x i1> poison, i16 poison)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %nxv2i32 = call i32 @llvm.experimental.vector.extract.last.active.nxv2i32(<vscale x 2 x i32> poison, <vscale x 2 x i1> poison, i32 poison)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %nxv1i64 = call i64 @llvm.experimental.vector.extract.last.active.nxv1i64(<vscale x 1 x i64> poison, <vscale x 1 x i1> poison, i64 poison)
; CHECK-NEXT: Cost Model: Invalid cost for instruction: %nxv4f16 = call half @llvm.experimental.vector.extract.last.active.nxv4f16(<vscale x 4 x half> poison, <vscale x 4 x i1> poison, half poison)
; CHECK-NEXT: Cost Model: Invalid cost for instruction: %nxv4bf16 = call bfloat @llvm.experimental.vector.extract.last.active.nxv4bf16(<vscale x 4 x bfloat> poison, <vscale x 4 x i1> poison, bfloat poison)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %nxv2f32 = call float @llvm.experimental.vector.extract.last.active.nxv2f32(<vscale x 2 x float> poison, <vscale x 2 x i1> poison, float poison)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %nxv1f64 = call double @llvm.experimental.vector.extract.last.active.nxv1f64(<vscale x 1 x double> poison, <vscale x 1 x i1> poison, double poison)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %nxv2f32 = call float @llvm.experimental.vector.extract.last.active.nxv2f32(<vscale x 2 x float> poison, <vscale x 2 x i1> poison, float poison)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %nxv1f64 = call double @llvm.experimental.vector.extract.last.active.nxv1f64(<vscale x 1 x double> poison, <vscale x 1 x i1> poison, double poison)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
%v16i8 = call i8 @llvm.experimental.vector.extract.last.active.v16i8(<16 x i8> poison, <16 x i1> poison, i8 poison)
>From ee35c2fe9237406e984ad00e7e5e89364bd46563 Mon Sep 17 00:00:00 2001
From: Elvis Wang <elvis.wang at sifive.com>
Date: Wed, 4 Mar 2026 21:06:58 -0800
Subject: [PATCH 4/6] Address comments.
---
.../Target/RISCV/RISCVTargetTransformInfo.cpp | 5 +---
.../CostModel/RISCV/extract-last-active.ll | 27 +++++++++----------
2 files changed, 14 insertions(+), 18 deletions(-)
diff --git a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
index eac45456931d4..58c27b749d5ad 100644
--- a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
+++ b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
@@ -1709,9 +1709,6 @@ RISCVTTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
auto ValLT = getTypeLegalizationCost(ValTy);
auto MaskLT = getTypeLegalizationCost(MaskTy);
- if (!ValLT.first.isValid() || !MaskLT.first.isValid())
- return InstructionCost::getInvalid();
-
// TODO: Return cheaper cost when the entire lane is inactive.
// The expected asm sequence is:
// vcpop.m a0, v0
@@ -1720,7 +1717,7 @@ RISCVTTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
// vredmaxu.vs v10, v10, v10
// vmv.x.s a0, v10
// zext.b a0, a0
- // vslidedown v8, v8, a0
+ // vslidedown.vx v8, v8, a0
// vmv.x.s a0, v8
// exit:
// ...
diff --git a/llvm/test/Analysis/CostModel/RISCV/extract-last-active.ll b/llvm/test/Analysis/CostModel/RISCV/extract-last-active.ll
index c521977e39035..7105f6a61a325 100644
--- a/llvm/test/Analysis/CostModel/RISCV/extract-last-active.ll
+++ b/llvm/test/Analysis/CostModel/RISCV/extract-last-active.ll
@@ -1,55 +1,54 @@
; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --version 6
-; RUN: opt < %s -passes="print<cost-model>" 2>&1 -disable-output -S -mtriple=riscv64--linux-gnu -mattr=+v | FileCheck %s
+; RUN: opt < %s -passes="print<cost-model>" 2>&1 -disable-output -S -mtriple=riscv64 -mattr=+v,+zvfhmin,+zvfbfmin | FileCheck %s
define void @extractions() {
- ;; Legal types
; CHECK-LABEL: 'extractions'
; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %v16i8 = call i8 @llvm.experimental.vector.extract.last.active.v16i8(<16 x i8> poison, <16 x i1> poison, i8 poison)
; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v8i16 = call i16 @llvm.experimental.vector.extract.last.active.v8i16(<8 x i16> poison, <8 x i1> poison, i16 poison)
; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %v4i32 = call i32 @llvm.experimental.vector.extract.last.active.v4i32(<4 x i32> poison, <4 x i1> poison, i32 poison)
; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v2i64 = call i64 @llvm.experimental.vector.extract.last.active.v2i64(<2 x i64> poison, <2 x i1> poison, i64 poison)
-; CHECK-NEXT: Cost Model: Invalid cost for instruction: %v8f16 = call half @llvm.experimental.vector.extract.last.active.v8f16(<8 x half> poison, <8 x i1> poison, half poison)
-; CHECK-NEXT: Cost Model: Invalid cost for instruction: %v8bf16 = call bfloat @llvm.experimental.vector.extract.last.active.v8bf16(<8 x bfloat> poison, <8 x i1> poison, bfloat poison)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v8f16 = call half @llvm.experimental.vector.extract.last.active.v8f16(<8 x half> poison, <8 x i1> poison, half poison)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v8bf16 = call bfloat @llvm.experimental.vector.extract.last.active.v8bf16(<8 x bfloat> poison, <8 x i1> poison, bfloat poison)
; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %v4f32 = call float @llvm.experimental.vector.extract.last.active.v4f32(<4 x float> poison, <4 x i1> poison, float poison)
; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v2f64 = call double @llvm.experimental.vector.extract.last.active.v2f64(<2 x double> poison, <2 x i1> poison, double poison)
; CHECK-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %nxv16i8 = call i8 @llvm.experimental.vector.extract.last.active.nxv16i8(<vscale x 16 x i8> poison, <vscale x 16 x i1> poison, i8 poison)
; CHECK-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %nxv8i16 = call i16 @llvm.experimental.vector.extract.last.active.nxv8i16(<vscale x 8 x i16> poison, <vscale x 8 x i1> poison, i16 poison)
; CHECK-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %nxv4i32 = call i32 @llvm.experimental.vector.extract.last.active.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i1> poison, i32 poison)
; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %nxv2i64 = call i64 @llvm.experimental.vector.extract.last.active.nxv2i64(<vscale x 2 x i64> poison, <vscale x 2 x i1> poison, i64 poison)
-; CHECK-NEXT: Cost Model: Invalid cost for instruction: %nxv8f16 = call half @llvm.experimental.vector.extract.last.active.nxv8f16(<vscale x 8 x half> poison, <vscale x 8 x i1> poison, half poison)
-; CHECK-NEXT: Cost Model: Invalid cost for instruction: %nxv8bf16 = call bfloat @llvm.experimental.vector.extract.last.active.nxv8bf16(<vscale x 8 x bfloat> poison, <vscale x 8 x i1> poison, bfloat poison)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %nxv8f16 = call half @llvm.experimental.vector.extract.last.active.nxv8f16(<vscale x 8 x half> poison, <vscale x 8 x i1> poison, half poison)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %nxv8bf16 = call bfloat @llvm.experimental.vector.extract.last.active.nxv8bf16(<vscale x 8 x bfloat> poison, <vscale x 8 x i1> poison, bfloat poison)
; CHECK-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %nxv4f32 = call float @llvm.experimental.vector.extract.last.active.nxv4f32(<vscale x 4 x float> poison, <vscale x 4 x i1> poison, float poison)
; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %nxv2f64 = call double @llvm.experimental.vector.extract.last.active.nxv2f64(<vscale x 2 x double> poison, <vscale x 2 x i1> poison, double poison)
; CHECK-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v32i8 = call i8 @llvm.experimental.vector.extract.last.active.v32i8(<32 x i8> poison, <32 x i1> poison, i8 poison)
; CHECK-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %v16i16 = call i16 @llvm.experimental.vector.extract.last.active.v16i16(<16 x i16> poison, <16 x i1> poison, i16 poison)
; CHECK-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %v8i32 = call i32 @llvm.experimental.vector.extract.last.active.v8i32(<8 x i32> poison, <8 x i1> poison, i32 poison)
; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %v4i64 = call i64 @llvm.experimental.vector.extract.last.active.v4i64(<4 x i64> poison, <4 x i1> poison, i64 poison)
-; CHECK-NEXT: Cost Model: Invalid cost for instruction: %v16f16 = call half @llvm.experimental.vector.extract.last.active.v16f16(<16 x half> poison, <16 x i1> poison, half poison)
-; CHECK-NEXT: Cost Model: Invalid cost for instruction: %v16bf16 = call bfloat @llvm.experimental.vector.extract.last.active.v16bf16(<16 x bfloat> poison, <16 x i1> poison, bfloat poison)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %v16f16 = call half @llvm.experimental.vector.extract.last.active.v16f16(<16 x half> poison, <16 x i1> poison, half poison)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %v16bf16 = call bfloat @llvm.experimental.vector.extract.last.active.v16bf16(<16 x bfloat> poison, <16 x i1> poison, bfloat poison)
; CHECK-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %v8f32 = call float @llvm.experimental.vector.extract.last.active.v8f32(<8 x float> poison, <8 x i1> poison, float poison)
; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %v4f64 = call double @llvm.experimental.vector.extract.last.active.v4f64(<4 x double> poison, <4 x i1> poison, double poison)
; CHECK-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %nxv32i8 = call i8 @llvm.experimental.vector.extract.last.active.nxv32i8(<vscale x 32 x i8> poison, <vscale x 32 x i1> poison, i8 poison)
; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %nxv16i16 = call i16 @llvm.experimental.vector.extract.last.active.nxv16i16(<vscale x 16 x i16> poison, <vscale x 16 x i1> poison, i16 poison)
; CHECK-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %nxv8i32 = call i32 @llvm.experimental.vector.extract.last.active.nxv8i32(<vscale x 8 x i32> poison, <vscale x 8 x i1> poison, i32 poison)
; CHECK-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %nxv4i64 = call i64 @llvm.experimental.vector.extract.last.active.nxv4i64(<vscale x 4 x i64> poison, <vscale x 4 x i1> poison, i64 poison)
-; CHECK-NEXT: Cost Model: Invalid cost for instruction: %nxv16f16 = call half @llvm.experimental.vector.extract.last.active.nxv16f16(<vscale x 16 x half> poison, <vscale x 16 x i1> poison, half poison)
-; CHECK-NEXT: Cost Model: Invalid cost for instruction: %nxv16bf16 = call bfloat @llvm.experimental.vector.extract.last.active.nxv16bf16(<vscale x 16 x bfloat> poison, <vscale x 16 x i1> poison, bfloat poison)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %nxv16f16 = call half @llvm.experimental.vector.extract.last.active.nxv16f16(<vscale x 16 x half> poison, <vscale x 16 x i1> poison, half poison)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %nxv16bf16 = call bfloat @llvm.experimental.vector.extract.last.active.nxv16bf16(<vscale x 16 x bfloat> poison, <vscale x 16 x i1> poison, bfloat poison)
; CHECK-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %nxv8f32 = call float @llvm.experimental.vector.extract.last.active.nxv8f32(<vscale x 8 x float> poison, <vscale x 8 x i1> poison, float poison)
; CHECK-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %nxv4f64 = call double @llvm.experimental.vector.extract.last.active.nxv4f64(<vscale x 4 x double> poison, <vscale x 4 x i1> poison, double poison)
; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v8i8 = call i8 @llvm.experimental.vector.extract.last.active.v8i8(<8 x i8> poison, <8 x i1> poison, i8 poison)
; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %v4i16 = call i16 @llvm.experimental.vector.extract.last.active.v4i16(<4 x i16> poison, <4 x i1> poison, i16 poison)
; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v2i32 = call i32 @llvm.experimental.vector.extract.last.active.v2i32(<2 x i32> poison, <2 x i1> poison, i32 poison)
; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %v1i64 = call i64 @llvm.experimental.vector.extract.last.active.v1i64(<1 x i64> poison, <1 x i1> poison, i64 poison)
-; CHECK-NEXT: Cost Model: Invalid cost for instruction: %v4f16 = call half @llvm.experimental.vector.extract.last.active.v4f16(<4 x half> poison, <4 x i1> poison, half poison)
-; CHECK-NEXT: Cost Model: Invalid cost for instruction: %v4bf16 = call bfloat @llvm.experimental.vector.extract.last.active.v4bf16(<4 x bfloat> poison, <4 x i1> poison, bfloat poison)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %v4f16 = call half @llvm.experimental.vector.extract.last.active.v4f16(<4 x half> poison, <4 x i1> poison, half poison)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %v4bf16 = call bfloat @llvm.experimental.vector.extract.last.active.v4bf16(<4 x bfloat> poison, <4 x i1> poison, bfloat poison)
; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v2f32 = call float @llvm.experimental.vector.extract.last.active.v2f32(<2 x float> poison, <2 x i1> poison, float poison)
; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %v1f64 = call double @llvm.experimental.vector.extract.last.active.v1f64(<1 x double> poison, <1 x i1> poison, double poison)
; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %nxv8i8 = call i8 @llvm.experimental.vector.extract.last.active.nxv8i8(<vscale x 8 x i8> poison, <vscale x 8 x i1> poison, i8 poison)
; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %nxv4i16 = call i16 @llvm.experimental.vector.extract.last.active.nxv4i16(<vscale x 4 x i16> poison, <vscale x 4 x i1> poison, i16 poison)
; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %nxv2i32 = call i32 @llvm.experimental.vector.extract.last.active.nxv2i32(<vscale x 2 x i32> poison, <vscale x 2 x i1> poison, i32 poison)
; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %nxv1i64 = call i64 @llvm.experimental.vector.extract.last.active.nxv1i64(<vscale x 1 x i64> poison, <vscale x 1 x i1> poison, i64 poison)
-; CHECK-NEXT: Cost Model: Invalid cost for instruction: %nxv4f16 = call half @llvm.experimental.vector.extract.last.active.nxv4f16(<vscale x 4 x half> poison, <vscale x 4 x i1> poison, half poison)
-; CHECK-NEXT: Cost Model: Invalid cost for instruction: %nxv4bf16 = call bfloat @llvm.experimental.vector.extract.last.active.nxv4bf16(<vscale x 4 x bfloat> poison, <vscale x 4 x i1> poison, bfloat poison)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %nxv4f16 = call half @llvm.experimental.vector.extract.last.active.nxv4f16(<vscale x 4 x half> poison, <vscale x 4 x i1> poison, half poison)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %nxv4bf16 = call bfloat @llvm.experimental.vector.extract.last.active.nxv4bf16(<vscale x 4 x bfloat> poison, <vscale x 4 x i1> poison, bfloat poison)
; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %nxv2f32 = call float @llvm.experimental.vector.extract.last.active.nxv2f32(<vscale x 2 x float> poison, <vscale x 2 x i1> poison, float poison)
; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %nxv1f64 = call double @llvm.experimental.vector.extract.last.active.nxv1f64(<vscale x 1 x double> poison, <vscale x 1 x i1> poison, double poison)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
>From dfb7e99ea30d636c5cc3356dee717f86fc28a6c2 Mon Sep 17 00:00:00 2001
From: Elvis Wang <elvis.wang at sifive.com>
Date: Thu, 5 Mar 2026 15:46:35 -0800
Subject: [PATCH 5/6] !fixup, also consider branch and zext costs.
---
.../Target/RISCV/RISCVTargetTransformInfo.cpp | 25 +++++--
.../CostModel/RISCV/extract-last-active.ll | 68 +++++++++----------
2 files changed, 53 insertions(+), 40 deletions(-)
diff --git a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
index 58c27b749d5ad..22ac00fbd926f 100644
--- a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
+++ b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
@@ -1721,12 +1721,25 @@ RISCVTTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
// vmv.x.s a0, v8
// exit:
// ...
- unsigned Opcodes[] = {RISCV::VID_V, RISCV::VREDMAXU_VS, RISCV::VMV_X_S,
- RISCV::VSLIDEDOWN_VI, RISCV::VMV_X_S};
- return MaskLT.first * getRISCVInstructionCost(RISCV::VCPOP_M, MaskLT.second,
- CostKind) +
- ValLT.first *
- getRISCVInstructionCost(Opcodes, ValLT.second, CostKind);
+ auto *Int8Ty = Type::getInt8Ty(ValTy->getContext());
+ auto *Int8VecTy =
+ VectorType::get(Int8Ty, cast<VectorType>(ValTy)->getElementCount());
+ auto Int8LT = getTypeLegalizationCost(Int8VecTy);
+ InstructionCost Cost = 0;
+ unsigned Opcodes[] = {RISCV::VID_V, RISCV::VREDMAXU_VS, RISCV::VMV_X_S};
+
+ Cost += MaskLT.first *
+ getRISCVInstructionCost(RISCV::VCPOP_M, MaskLT.second, CostKind);
+ Cost += getCFInstrCost(Instruction::Br, CostKind, nullptr);
+ Cost += Int8LT.first *
+ getRISCVInstructionCost(Opcodes, Int8LT.second, CostKind);
+ Cost += getCastInstrCost(Instruction::ZExt,
+ Type::getInt64Ty(ValTy->getContext()), Int8Ty,
+ TTI::CastContextHint::None, CostKind, nullptr);
+ Cost += ValLT.first *
+ getRISCVInstructionCost({RISCV::VSLIDEDOWN_VI, RISCV::VMV_X_S},
+ ValLT.second, CostKind);
+ return Cost;
}
}
diff --git a/llvm/test/Analysis/CostModel/RISCV/extract-last-active.ll b/llvm/test/Analysis/CostModel/RISCV/extract-last-active.ll
index 7105f6a61a325..d6b00a8bf8a79 100644
--- a/llvm/test/Analysis/CostModel/RISCV/extract-last-active.ll
+++ b/llvm/test/Analysis/CostModel/RISCV/extract-last-active.ll
@@ -3,15 +3,15 @@
define void @extractions() {
; CHECK-LABEL: 'extractions'
-; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %v16i8 = call i8 @llvm.experimental.vector.extract.last.active.v16i8(<16 x i8> poison, <16 x i1> poison, i8 poison)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v8i16 = call i16 @llvm.experimental.vector.extract.last.active.v8i16(<8 x i16> poison, <8 x i1> poison, i16 poison)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %v4i32 = call i32 @llvm.experimental.vector.extract.last.active.v4i32(<4 x i32> poison, <4 x i1> poison, i32 poison)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v2i64 = call i64 @llvm.experimental.vector.extract.last.active.v2i64(<2 x i64> poison, <2 x i1> poison, i64 poison)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v8f16 = call half @llvm.experimental.vector.extract.last.active.v8f16(<8 x half> poison, <8 x i1> poison, half poison)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v8bf16 = call bfloat @llvm.experimental.vector.extract.last.active.v8bf16(<8 x bfloat> poison, <8 x i1> poison, bfloat poison)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %v4f32 = call float @llvm.experimental.vector.extract.last.active.v4f32(<4 x float> poison, <4 x i1> poison, float poison)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v2f64 = call double @llvm.experimental.vector.extract.last.active.v2f64(<2 x double> poison, <2 x i1> poison, double poison)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %nxv16i8 = call i8 @llvm.experimental.vector.extract.last.active.nxv16i8(<vscale x 16 x i8> poison, <vscale x 16 x i1> poison, i8 poison)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %v16i8 = call i8 @llvm.experimental.vector.extract.last.active.v16i8(<16 x i8> poison, <16 x i1> poison, i8 poison)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %v8i16 = call i16 @llvm.experimental.vector.extract.last.active.v8i16(<8 x i16> poison, <8 x i1> poison, i16 poison)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v4i32 = call i32 @llvm.experimental.vector.extract.last.active.v4i32(<4 x i32> poison, <4 x i1> poison, i32 poison)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %v2i64 = call i64 @llvm.experimental.vector.extract.last.active.v2i64(<2 x i64> poison, <2 x i1> poison, i64 poison)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %v8f16 = call half @llvm.experimental.vector.extract.last.active.v8f16(<8 x half> poison, <8 x i1> poison, half poison)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %v8bf16 = call bfloat @llvm.experimental.vector.extract.last.active.v8bf16(<8 x bfloat> poison, <8 x i1> poison, bfloat poison)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v4f32 = call float @llvm.experimental.vector.extract.last.active.v4f32(<4 x float> poison, <4 x i1> poison, float poison)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %v2f64 = call double @llvm.experimental.vector.extract.last.active.v2f64(<2 x double> poison, <2 x i1> poison, double poison)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %nxv16i8 = call i8 @llvm.experimental.vector.extract.last.active.nxv16i8(<vscale x 16 x i8> poison, <vscale x 16 x i1> poison, i8 poison)
; CHECK-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %nxv8i16 = call i16 @llvm.experimental.vector.extract.last.active.nxv8i16(<vscale x 8 x i16> poison, <vscale x 8 x i1> poison, i16 poison)
; CHECK-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %nxv4i32 = call i32 @llvm.experimental.vector.extract.last.active.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i1> poison, i32 poison)
; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %nxv2i64 = call i64 @llvm.experimental.vector.extract.last.active.nxv2i64(<vscale x 2 x i64> poison, <vscale x 2 x i1> poison, i64 poison)
@@ -19,7 +19,7 @@ define void @extractions() {
; CHECK-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %nxv8bf16 = call bfloat @llvm.experimental.vector.extract.last.active.nxv8bf16(<vscale x 8 x bfloat> poison, <vscale x 8 x i1> poison, bfloat poison)
; CHECK-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %nxv4f32 = call float @llvm.experimental.vector.extract.last.active.nxv4f32(<vscale x 4 x float> poison, <vscale x 4 x i1> poison, float poison)
; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %nxv2f64 = call double @llvm.experimental.vector.extract.last.active.nxv2f64(<vscale x 2 x double> poison, <vscale x 2 x i1> poison, double poison)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v32i8 = call i8 @llvm.experimental.vector.extract.last.active.v32i8(<32 x i8> poison, <32 x i1> poison, i8 poison)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %v32i8 = call i8 @llvm.experimental.vector.extract.last.active.v32i8(<32 x i8> poison, <32 x i1> poison, i8 poison)
; CHECK-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %v16i16 = call i16 @llvm.experimental.vector.extract.last.active.v16i16(<16 x i16> poison, <16 x i1> poison, i16 poison)
; CHECK-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %v8i32 = call i32 @llvm.experimental.vector.extract.last.active.v8i32(<8 x i32> poison, <8 x i1> poison, i32 poison)
; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %v4i64 = call i64 @llvm.experimental.vector.extract.last.active.v4i64(<4 x i64> poison, <4 x i1> poison, i64 poison)
@@ -27,30 +27,30 @@ define void @extractions() {
; CHECK-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %v16bf16 = call bfloat @llvm.experimental.vector.extract.last.active.v16bf16(<16 x bfloat> poison, <16 x i1> poison, bfloat poison)
; CHECK-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %v8f32 = call float @llvm.experimental.vector.extract.last.active.v8f32(<8 x float> poison, <8 x i1> poison, float poison)
; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %v4f64 = call double @llvm.experimental.vector.extract.last.active.v4f64(<4 x double> poison, <4 x i1> poison, double poison)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %nxv32i8 = call i8 @llvm.experimental.vector.extract.last.active.nxv32i8(<vscale x 32 x i8> poison, <vscale x 32 x i1> poison, i8 poison)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %nxv16i16 = call i16 @llvm.experimental.vector.extract.last.active.nxv16i16(<vscale x 16 x i16> poison, <vscale x 16 x i1> poison, i16 poison)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %nxv8i32 = call i32 @llvm.experimental.vector.extract.last.active.nxv8i32(<vscale x 8 x i32> poison, <vscale x 8 x i1> poison, i32 poison)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %nxv4i64 = call i64 @llvm.experimental.vector.extract.last.active.nxv4i64(<vscale x 4 x i64> poison, <vscale x 4 x i1> poison, i64 poison)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %nxv16f16 = call half @llvm.experimental.vector.extract.last.active.nxv16f16(<vscale x 16 x half> poison, <vscale x 16 x i1> poison, half poison)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %nxv16bf16 = call bfloat @llvm.experimental.vector.extract.last.active.nxv16bf16(<vscale x 16 x bfloat> poison, <vscale x 16 x i1> poison, bfloat poison)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %nxv8f32 = call float @llvm.experimental.vector.extract.last.active.nxv8f32(<vscale x 8 x float> poison, <vscale x 8 x i1> poison, float poison)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %nxv4f64 = call double @llvm.experimental.vector.extract.last.active.nxv4f64(<vscale x 4 x double> poison, <vscale x 4 x i1> poison, double poison)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v8i8 = call i8 @llvm.experimental.vector.extract.last.active.v8i8(<8 x i8> poison, <8 x i1> poison, i8 poison)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %v4i16 = call i16 @llvm.experimental.vector.extract.last.active.v4i16(<4 x i16> poison, <4 x i1> poison, i16 poison)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v2i32 = call i32 @llvm.experimental.vector.extract.last.active.v2i32(<2 x i32> poison, <2 x i1> poison, i32 poison)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %v1i64 = call i64 @llvm.experimental.vector.extract.last.active.v1i64(<1 x i64> poison, <1 x i1> poison, i64 poison)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %v4f16 = call half @llvm.experimental.vector.extract.last.active.v4f16(<4 x half> poison, <4 x i1> poison, half poison)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %v4bf16 = call bfloat @llvm.experimental.vector.extract.last.active.v4bf16(<4 x bfloat> poison, <4 x i1> poison, bfloat poison)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v2f32 = call float @llvm.experimental.vector.extract.last.active.v2f32(<2 x float> poison, <2 x i1> poison, float poison)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %v1f64 = call double @llvm.experimental.vector.extract.last.active.v1f64(<1 x double> poison, <1 x i1> poison, double poison)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %nxv8i8 = call i8 @llvm.experimental.vector.extract.last.active.nxv8i8(<vscale x 8 x i8> poison, <vscale x 8 x i1> poison, i8 poison)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %nxv4i16 = call i16 @llvm.experimental.vector.extract.last.active.nxv4i16(<vscale x 4 x i16> poison, <vscale x 4 x i1> poison, i16 poison)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %nxv2i32 = call i32 @llvm.experimental.vector.extract.last.active.nxv2i32(<vscale x 2 x i32> poison, <vscale x 2 x i1> poison, i32 poison)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %nxv1i64 = call i64 @llvm.experimental.vector.extract.last.active.nxv1i64(<vscale x 1 x i64> poison, <vscale x 1 x i1> poison, i64 poison)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %nxv4f16 = call half @llvm.experimental.vector.extract.last.active.nxv4f16(<vscale x 4 x half> poison, <vscale x 4 x i1> poison, half poison)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %nxv4bf16 = call bfloat @llvm.experimental.vector.extract.last.active.nxv4bf16(<vscale x 4 x bfloat> poison, <vscale x 4 x i1> poison, bfloat poison)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %nxv2f32 = call float @llvm.experimental.vector.extract.last.active.nxv2f32(<vscale x 2 x float> poison, <vscale x 2 x i1> poison, float poison)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %nxv1f64 = call double @llvm.experimental.vector.extract.last.active.nxv1f64(<vscale x 1 x double> poison, <vscale x 1 x i1> poison, double poison)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %nxv32i8 = call i8 @llvm.experimental.vector.extract.last.active.nxv32i8(<vscale x 32 x i8> poison, <vscale x 32 x i1> poison, i8 poison)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %nxv16i16 = call i16 @llvm.experimental.vector.extract.last.active.nxv16i16(<vscale x 16 x i16> poison, <vscale x 16 x i1> poison, i16 poison)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %nxv8i32 = call i32 @llvm.experimental.vector.extract.last.active.nxv8i32(<vscale x 8 x i32> poison, <vscale x 8 x i1> poison, i32 poison)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %nxv4i64 = call i64 @llvm.experimental.vector.extract.last.active.nxv4i64(<vscale x 4 x i64> poison, <vscale x 4 x i1> poison, i64 poison)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %nxv16f16 = call half @llvm.experimental.vector.extract.last.active.nxv16f16(<vscale x 16 x half> poison, <vscale x 16 x i1> poison, half poison)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %nxv16bf16 = call bfloat @llvm.experimental.vector.extract.last.active.nxv16bf16(<vscale x 16 x bfloat> poison, <vscale x 16 x i1> poison, bfloat poison)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %nxv8f32 = call float @llvm.experimental.vector.extract.last.active.nxv8f32(<vscale x 8 x float> poison, <vscale x 8 x i1> poison, float poison)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %nxv4f64 = call double @llvm.experimental.vector.extract.last.active.nxv4f64(<vscale x 4 x double> poison, <vscale x 4 x i1> poison, double poison)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %v8i8 = call i8 @llvm.experimental.vector.extract.last.active.v8i8(<8 x i8> poison, <8 x i1> poison, i8 poison)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v4i16 = call i16 @llvm.experimental.vector.extract.last.active.v4i16(<4 x i16> poison, <4 x i1> poison, i16 poison)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %v2i32 = call i32 @llvm.experimental.vector.extract.last.active.v2i32(<2 x i32> poison, <2 x i1> poison, i32 poison)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v1i64 = call i64 @llvm.experimental.vector.extract.last.active.v1i64(<1 x i64> poison, <1 x i1> poison, i64 poison)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v4f16 = call half @llvm.experimental.vector.extract.last.active.v4f16(<4 x half> poison, <4 x i1> poison, half poison)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v4bf16 = call bfloat @llvm.experimental.vector.extract.last.active.v4bf16(<4 x bfloat> poison, <4 x i1> poison, bfloat poison)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %v2f32 = call float @llvm.experimental.vector.extract.last.active.v2f32(<2 x float> poison, <2 x i1> poison, float poison)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v1f64 = call double @llvm.experimental.vector.extract.last.active.v1f64(<1 x double> poison, <1 x i1> poison, double poison)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %nxv8i8 = call i8 @llvm.experimental.vector.extract.last.active.nxv8i8(<vscale x 8 x i8> poison, <vscale x 8 x i1> poison, i8 poison)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %nxv4i16 = call i16 @llvm.experimental.vector.extract.last.active.nxv4i16(<vscale x 4 x i16> poison, <vscale x 4 x i1> poison, i16 poison)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %nxv2i32 = call i32 @llvm.experimental.vector.extract.last.active.nxv2i32(<vscale x 2 x i32> poison, <vscale x 2 x i1> poison, i32 poison)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %nxv1i64 = call i64 @llvm.experimental.vector.extract.last.active.nxv1i64(<vscale x 1 x i64> poison, <vscale x 1 x i1> poison, i64 poison)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %nxv4f16 = call half @llvm.experimental.vector.extract.last.active.nxv4f16(<vscale x 4 x half> poison, <vscale x 4 x i1> poison, half poison)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %nxv4bf16 = call bfloat @llvm.experimental.vector.extract.last.active.nxv4bf16(<vscale x 4 x bfloat> poison, <vscale x 4 x i1> poison, bfloat poison)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %nxv2f32 = call float @llvm.experimental.vector.extract.last.active.nxv2f32(<vscale x 2 x float> poison, <vscale x 2 x i1> poison, float poison)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %nxv1f64 = call double @llvm.experimental.vector.extract.last.active.nxv1f64(<vscale x 1 x double> poison, <vscale x 1 x i1> poison, double poison)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
%v16i8 = call i8 @llvm.experimental.vector.extract.last.active.v16i8(<16 x i8> poison, <16 x i1> poison, i8 poison)
>From 1efe2f9a509227761ef090de48011dbb2be047e7 Mon Sep 17 00:00:00 2001
From: Elvis Wang <elvis.wang at sifive.com>
Date: Sun, 8 Mar 2026 20:15:59 -0700
Subject: [PATCH 6/6] !fixup, use getBitWidthForCttzElements to get step vector
size.
---
.../Target/RISCV/RISCVTargetTransformInfo.cpp | 24 ++++++++++++-------
1 file changed, 15 insertions(+), 9 deletions(-)
diff --git a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
index 22ac00fbd926f..5394d5dc0be11 100644
--- a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
+++ b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
@@ -1703,8 +1703,8 @@ RISCVTTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
return Cost;
}
case Intrinsic::experimental_vector_extract_last_active: {
- Type *ValTy = ICA.getArgTypes()[0];
- Type *MaskTy = ICA.getArgTypes()[1];
+ auto *ValTy = cast<VectorType>(ICA.getArgTypes()[0]);
+ auto *MaskTy = cast<VectorType>(ICA.getArgTypes()[1]);
auto ValLT = getTypeLegalizationCost(ValTy);
auto MaskLT = getTypeLegalizationCost(MaskTy);
@@ -1721,20 +1721,26 @@ RISCVTTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
// vmv.x.s a0, v8
// exit:
// ...
- auto *Int8Ty = Type::getInt8Ty(ValTy->getContext());
- auto *Int8VecTy =
- VectorType::get(Int8Ty, cast<VectorType>(ValTy)->getElementCount());
- auto Int8LT = getTypeLegalizationCost(Int8VecTy);
+
+ // Find a suitable type for a stepvector.
+ ConstantRange VScaleRange(APInt(64, 1), APInt::getZero(64));
+ unsigned EltWidth = getTLI()->getBitWidthForCttzElements(
+ MaskTy->getScalarType(), MaskTy->getElementCount(),
+ /*ZeroIsPoison=*/true, &VScaleRange);
+ EltWidth = std::max(EltWidth, MaskTy->getScalarSizeInBits());
+ Type *StepTy = Type::getIntNTy(MaskTy->getContext(), EltWidth);
+ auto *StepVecTy = VectorType::get(StepTy, ValTy->getElementCount());
+ auto StepLT = getTypeLegalizationCost(StepVecTy);
InstructionCost Cost = 0;
unsigned Opcodes[] = {RISCV::VID_V, RISCV::VREDMAXU_VS, RISCV::VMV_X_S};
Cost += MaskLT.first *
getRISCVInstructionCost(RISCV::VCPOP_M, MaskLT.second, CostKind);
Cost += getCFInstrCost(Instruction::Br, CostKind, nullptr);
- Cost += Int8LT.first *
- getRISCVInstructionCost(Opcodes, Int8LT.second, CostKind);
+ Cost += StepLT.first *
+ getRISCVInstructionCost(Opcodes, StepLT.second, CostKind);
Cost += getCastInstrCost(Instruction::ZExt,
- Type::getInt64Ty(ValTy->getContext()), Int8Ty,
+ Type::getInt64Ty(ValTy->getContext()), StepTy,
TTI::CastContextHint::None, CostKind, nullptr);
Cost += ValLT.first *
getRISCVInstructionCost({RISCV::VSLIDEDOWN_VI, RISCV::VMV_X_S},
More information about the llvm-commits
mailing list