[llvm] [RISCV][TTI] Model the cost of insert/extractelt when the vector split into multiple register group and idx exceed single group. (PR #118401)

Elvis Wang via llvm-commits llvm-commits at lists.llvm.org
Tue Dec 3 23:13:26 PST 2024


https://github.com/ElvisWang123 updated https://github.com/llvm/llvm-project/pull/118401

>From 806bbabe7fa84dbd82092923fc53888cc597256a Mon Sep 17 00:00:00 2001
From: Elvis Wang <elvis.wang at sifive.com>
Date: Mon, 2 Dec 2024 13:41:10 -0800
Subject: [PATCH 1/4] [RISCV][TTI] Model the cost of extractelt when it cannot
 using vmv + vslide.

This patch implement the cost when the size of the vector need to split
into multiple groups and the index exceed single vector group.
Under this situation, we need the store the vector to stack and load the
target element.

After this patch, the cost of extract element will close to the
generated assembly.
---
 .../Target/RISCV/RISCVTargetTransformInfo.cpp | 17 ++++
 .../CostModel/RISCV/rvv-extractelement.ll     | 88 +++++++++----------
 2 files changed, 61 insertions(+), 44 deletions(-)

diff --git a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
index 57f635ca6f42a8..20ca80aedab62c 100644
--- a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
+++ b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
@@ -1945,6 +1945,23 @@ InstructionCost RISCVTTIImpl::getVectorInstrCost(unsigned Opcode, Type *Val,
     // TODO: should we count these special vsetvlis?
     BaseCost = Opcode == Instruction::InsertElement ? 3 : 4;
   }
+
+  // When the vector need to split into multiple register groups and the index
+  // exceed single vector resgister group, we need to extract the element via
+  // stack.
+  if (Opcode == Instruction::ExtractElement && LT.first > 1 &&
+      ((Index == -1U) || (Index > LT.second.getVectorMinNumElements() &&
+                          LT.second.isScalableVector()))) {
+    Type *ScalarType = Val->getScalarType();
+    Align VecAlign = DL.getPrefTypeAlign(Val);
+    Align SclAlign = DL.getPrefTypeAlign(ScalarType);
+    // Store all split vectors into stack and load the target element.
+    return LT.first *
+               getMemoryOpCost(Instruction::Store, Val, VecAlign, 0, CostKind) +
+           getMemoryOpCost(Instruction::Load, ScalarType, SclAlign, 0,
+                           CostKind);
+  }
+
   return BaseCost + SlideCost;
 }
 
diff --git a/llvm/test/Analysis/CostModel/RISCV/rvv-extractelement.ll b/llvm/test/Analysis/CostModel/RISCV/rvv-extractelement.ll
index 618b7bc8945a50..34a323066689ba 100644
--- a/llvm/test/Analysis/CostModel/RISCV/rvv-extractelement.ll
+++ b/llvm/test/Analysis/CostModel/RISCV/rvv-extractelement.ll
@@ -139,7 +139,7 @@ define void @extractelement_int(i32 %x) {
 ; RV32V-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %nxv16i8_x = extractelement <vscale x 16 x i8> undef, i32 %x
 ; RV32V-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %nxv32i8_x = extractelement <vscale x 32 x i8> undef, i32 %x
 ; RV32V-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %nxv64i8_x = extractelement <vscale x 64 x i8> undef, i32 %x
-; RV32V-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %nxv128i8_x = extractelement <vscale x 128 x i8> undef, i32 %x
+; RV32V-NEXT:  Cost Model: Found an estimated cost of 33 for instruction: %nxv128i8_x = extractelement <vscale x 128 x i8> undef, i32 %x
 ; RV32V-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v2i16_x = extractelement <2 x i16> undef, i32 %x
 ; RV32V-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v4i16_x = extractelement <4 x i16> undef, i32 %x
 ; RV32V-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v8i16_x = extractelement <8 x i16> undef, i32 %x
@@ -151,7 +151,7 @@ define void @extractelement_int(i32 %x) {
 ; RV32V-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %nxv8i16_x = extractelement <vscale x 8 x i16> undef, i32 %x
 ; RV32V-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %nxv16i16_x = extractelement <vscale x 16 x i16> undef, i32 %x
 ; RV32V-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %nxv32i16_x = extractelement <vscale x 32 x i16> undef, i32 %x
-; RV32V-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %nxv64i16_x = extractelement <vscale x 64 x i16> undef, i32 %x
+; RV32V-NEXT:  Cost Model: Found an estimated cost of 33 for instruction: %nxv64i16_x = extractelement <vscale x 64 x i16> undef, i32 %x
 ; RV32V-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v2i32_x = extractelement <2 x i32> undef, i32 %x
 ; RV32V-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v4i32_x = extractelement <4 x i32> undef, i32 %x
 ; RV32V-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v8i32_x = extractelement <8 x i32> undef, i32 %x
@@ -161,7 +161,7 @@ define void @extractelement_int(i32 %x) {
 ; RV32V-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %nxv4i32_x = extractelement <vscale x 4 x i32> undef, i32 %x
 ; RV32V-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %nxv8i32_x = extractelement <vscale x 8 x i32> undef, i32 %x
 ; RV32V-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %nxv16i32_x = extractelement <vscale x 16 x i32> undef, i32 %x
-; RV32V-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %nxv32i32_x = extractelement <vscale x 32 x i32> undef, i32 %x
+; RV32V-NEXT:  Cost Model: Found an estimated cost of 33 for instruction: %nxv32i32_x = extractelement <vscale x 32 x i32> undef, i32 %x
 ; RV32V-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %v2i64_x = extractelement <2 x i64> undef, i32 %x
 ; RV32V-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %v4i64_x = extractelement <4 x i64> undef, i32 %x
 ; RV32V-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %v8i64_x = extractelement <8 x i64> undef, i32 %x
@@ -169,7 +169,7 @@ define void @extractelement_int(i32 %x) {
 ; RV32V-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %nxv2i64_x = extractelement <vscale x 2 x i64> undef, i32 %x
 ; RV32V-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %nxv4i64_x = extractelement <vscale x 4 x i64> undef, i32 %x
 ; RV32V-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %nxv8i64_x = extractelement <vscale x 8 x i64> undef, i32 %x
-; RV32V-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %nxv16i64_x = extractelement <vscale x 16 x i64> undef, i32 %x
+; RV32V-NEXT:  Cost Model: Found an estimated cost of 34 for instruction: %nxv16i64_x = extractelement <vscale x 16 x i64> undef, i32 %x
 ; RV32V-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; RV64V-LABEL: 'extractelement_int'
@@ -304,7 +304,7 @@ define void @extractelement_int(i32 %x) {
 ; RV64V-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %nxv16i8_x = extractelement <vscale x 16 x i8> undef, i32 %x
 ; RV64V-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %nxv32i8_x = extractelement <vscale x 32 x i8> undef, i32 %x
 ; RV64V-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %nxv64i8_x = extractelement <vscale x 64 x i8> undef, i32 %x
-; RV64V-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %nxv128i8_x = extractelement <vscale x 128 x i8> undef, i32 %x
+; RV64V-NEXT:  Cost Model: Found an estimated cost of 33 for instruction: %nxv128i8_x = extractelement <vscale x 128 x i8> undef, i32 %x
 ; RV64V-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v2i16_x = extractelement <2 x i16> undef, i32 %x
 ; RV64V-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v4i16_x = extractelement <4 x i16> undef, i32 %x
 ; RV64V-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v8i16_x = extractelement <8 x i16> undef, i32 %x
@@ -316,7 +316,7 @@ define void @extractelement_int(i32 %x) {
 ; RV64V-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %nxv8i16_x = extractelement <vscale x 8 x i16> undef, i32 %x
 ; RV64V-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %nxv16i16_x = extractelement <vscale x 16 x i16> undef, i32 %x
 ; RV64V-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %nxv32i16_x = extractelement <vscale x 32 x i16> undef, i32 %x
-; RV64V-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %nxv64i16_x = extractelement <vscale x 64 x i16> undef, i32 %x
+; RV64V-NEXT:  Cost Model: Found an estimated cost of 33 for instruction: %nxv64i16_x = extractelement <vscale x 64 x i16> undef, i32 %x
 ; RV64V-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v2i32_x = extractelement <2 x i32> undef, i32 %x
 ; RV64V-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v4i32_x = extractelement <4 x i32> undef, i32 %x
 ; RV64V-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v8i32_x = extractelement <8 x i32> undef, i32 %x
@@ -326,7 +326,7 @@ define void @extractelement_int(i32 %x) {
 ; RV64V-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %nxv4i32_x = extractelement <vscale x 4 x i32> undef, i32 %x
 ; RV64V-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %nxv8i32_x = extractelement <vscale x 8 x i32> undef, i32 %x
 ; RV64V-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %nxv16i32_x = extractelement <vscale x 16 x i32> undef, i32 %x
-; RV64V-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %nxv32i32_x = extractelement <vscale x 32 x i32> undef, i32 %x
+; RV64V-NEXT:  Cost Model: Found an estimated cost of 33 for instruction: %nxv32i32_x = extractelement <vscale x 32 x i32> undef, i32 %x
 ; RV64V-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v2i64_x = extractelement <2 x i64> undef, i32 %x
 ; RV64V-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v4i64_x = extractelement <4 x i64> undef, i32 %x
 ; RV64V-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v8i64_x = extractelement <8 x i64> undef, i32 %x
@@ -334,7 +334,7 @@ define void @extractelement_int(i32 %x) {
 ; RV64V-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %nxv2i64_x = extractelement <vscale x 2 x i64> undef, i32 %x
 ; RV64V-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %nxv4i64_x = extractelement <vscale x 4 x i64> undef, i32 %x
 ; RV64V-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %nxv8i64_x = extractelement <vscale x 8 x i64> undef, i32 %x
-; RV64V-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %nxv16i64_x = extractelement <vscale x 16 x i64> undef, i32 %x
+; RV64V-NEXT:  Cost Model: Found an estimated cost of 33 for instruction: %nxv16i64_x = extractelement <vscale x 16 x i64> undef, i32 %x
 ; RV64V-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; RV32ZVE64X-LABEL: 'extractelement_int'
@@ -462,44 +462,44 @@ define void @extractelement_int(i32 %x) {
 ; RV32ZVE64X-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v16i8_x = extractelement <16 x i8> undef, i32 %x
 ; RV32ZVE64X-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v32i8_x = extractelement <32 x i8> undef, i32 %x
 ; RV32ZVE64X-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v64i8_x = extractelement <64 x i8> undef, i32 %x
-; RV32ZVE64X-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v128i8_x = extractelement <128 x i8> undef, i32 %x
+; RV32ZVE64X-NEXT:  Cost Model: Found an estimated cost of 33 for instruction: %v128i8_x = extractelement <128 x i8> undef, i32 %x
 ; RV32ZVE64X-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %nxv2i8_x = extractelement <vscale x 2 x i8> undef, i32 %x
 ; RV32ZVE64X-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %nxv4i8_x = extractelement <vscale x 4 x i8> undef, i32 %x
 ; RV32ZVE64X-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %nxv8i8_x = extractelement <vscale x 8 x i8> undef, i32 %x
 ; RV32ZVE64X-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %nxv16i8_x = extractelement <vscale x 16 x i8> undef, i32 %x
 ; RV32ZVE64X-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %nxv32i8_x = extractelement <vscale x 32 x i8> undef, i32 %x
 ; RV32ZVE64X-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %nxv64i8_x = extractelement <vscale x 64 x i8> undef, i32 %x
-; RV32ZVE64X-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %nxv128i8_x = extractelement <vscale x 128 x i8> undef, i32 %x
+; RV32ZVE64X-NEXT:  Cost Model: Found an estimated cost of 33 for instruction: %nxv128i8_x = extractelement <vscale x 128 x i8> undef, i32 %x
 ; RV32ZVE64X-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v2i16_x = extractelement <2 x i16> undef, i32 %x
 ; RV32ZVE64X-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v4i16_x = extractelement <4 x i16> undef, i32 %x
 ; RV32ZVE64X-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v8i16_x = extractelement <8 x i16> undef, i32 %x
 ; RV32ZVE64X-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v16i16_x = extractelement <16 x i16> undef, i32 %x
 ; RV32ZVE64X-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v32i16_x = extractelement <32 x i16> undef, i32 %x
-; RV32ZVE64X-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v64i16_x = extractelement <64 x i16> undef, i32 %x
+; RV32ZVE64X-NEXT:  Cost Model: Found an estimated cost of 33 for instruction: %v64i16_x = extractelement <64 x i16> undef, i32 %x
 ; RV32ZVE64X-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %nxv2i16_x = extractelement <vscale x 2 x i16> undef, i32 %x
 ; RV32ZVE64X-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %nxv4i16_x = extractelement <vscale x 4 x i16> undef, i32 %x
 ; RV32ZVE64X-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %nxv8i16_x = extractelement <vscale x 8 x i16> undef, i32 %x
 ; RV32ZVE64X-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %nxv16i16_x = extractelement <vscale x 16 x i16> undef, i32 %x
 ; RV32ZVE64X-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %nxv32i16_x = extractelement <vscale x 32 x i16> undef, i32 %x
-; RV32ZVE64X-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %nxv64i16_x = extractelement <vscale x 64 x i16> undef, i32 %x
+; RV32ZVE64X-NEXT:  Cost Model: Found an estimated cost of 33 for instruction: %nxv64i16_x = extractelement <vscale x 64 x i16> undef, i32 %x
 ; RV32ZVE64X-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v2i32_x = extractelement <2 x i32> undef, i32 %x
 ; RV32ZVE64X-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v4i32_x = extractelement <4 x i32> undef, i32 %x
 ; RV32ZVE64X-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v8i32_x = extractelement <8 x i32> undef, i32 %x
 ; RV32ZVE64X-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v16i32_x = extractelement <16 x i32> undef, i32 %x
-; RV32ZVE64X-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v32i32_x = extractelement <32 x i32> undef, i32 %x
+; RV32ZVE64X-NEXT:  Cost Model: Found an estimated cost of 33 for instruction: %v32i32_x = extractelement <32 x i32> undef, i32 %x
 ; RV32ZVE64X-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %nxv2i32_x = extractelement <vscale x 2 x i32> undef, i32 %x
 ; RV32ZVE64X-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %nxv4i32_x = extractelement <vscale x 4 x i32> undef, i32 %x
 ; RV32ZVE64X-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %nxv8i32_x = extractelement <vscale x 8 x i32> undef, i32 %x
 ; RV32ZVE64X-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %nxv16i32_x = extractelement <vscale x 16 x i32> undef, i32 %x
-; RV32ZVE64X-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %nxv32i32_x = extractelement <vscale x 32 x i32> undef, i32 %x
+; RV32ZVE64X-NEXT:  Cost Model: Found an estimated cost of 33 for instruction: %nxv32i32_x = extractelement <vscale x 32 x i32> undef, i32 %x
 ; RV32ZVE64X-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %v2i64_x = extractelement <2 x i64> undef, i32 %x
 ; RV32ZVE64X-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %v4i64_x = extractelement <4 x i64> undef, i32 %x
 ; RV32ZVE64X-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %v8i64_x = extractelement <8 x i64> undef, i32 %x
-; RV32ZVE64X-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %v16i64_x = extractelement <16 x i64> undef, i32 %x
+; RV32ZVE64X-NEXT:  Cost Model: Found an estimated cost of 34 for instruction: %v16i64_x = extractelement <16 x i64> undef, i32 %x
 ; RV32ZVE64X-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %nxv2i64_x = extractelement <vscale x 2 x i64> undef, i32 %x
 ; RV32ZVE64X-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %nxv4i64_x = extractelement <vscale x 4 x i64> undef, i32 %x
 ; RV32ZVE64X-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %nxv8i64_x = extractelement <vscale x 8 x i64> undef, i32 %x
-; RV32ZVE64X-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %nxv16i64_x = extractelement <vscale x 16 x i64> undef, i32 %x
+; RV32ZVE64X-NEXT:  Cost Model: Found an estimated cost of 34 for instruction: %nxv16i64_x = extractelement <vscale x 16 x i64> undef, i32 %x
 ; RV32ZVE64X-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; RV64ZVE64X-LABEL: 'extractelement_int'
@@ -627,44 +627,44 @@ define void @extractelement_int(i32 %x) {
 ; RV64ZVE64X-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v16i8_x = extractelement <16 x i8> undef, i32 %x
 ; RV64ZVE64X-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v32i8_x = extractelement <32 x i8> undef, i32 %x
 ; RV64ZVE64X-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v64i8_x = extractelement <64 x i8> undef, i32 %x
-; RV64ZVE64X-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v128i8_x = extractelement <128 x i8> undef, i32 %x
+; RV64ZVE64X-NEXT:  Cost Model: Found an estimated cost of 33 for instruction: %v128i8_x = extractelement <128 x i8> undef, i32 %x
 ; RV64ZVE64X-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %nxv2i8_x = extractelement <vscale x 2 x i8> undef, i32 %x
 ; RV64ZVE64X-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %nxv4i8_x = extractelement <vscale x 4 x i8> undef, i32 %x
 ; RV64ZVE64X-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %nxv8i8_x = extractelement <vscale x 8 x i8> undef, i32 %x
 ; RV64ZVE64X-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %nxv16i8_x = extractelement <vscale x 16 x i8> undef, i32 %x
 ; RV64ZVE64X-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %nxv32i8_x = extractelement <vscale x 32 x i8> undef, i32 %x
 ; RV64ZVE64X-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %nxv64i8_x = extractelement <vscale x 64 x i8> undef, i32 %x
-; RV64ZVE64X-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %nxv128i8_x = extractelement <vscale x 128 x i8> undef, i32 %x
+; RV64ZVE64X-NEXT:  Cost Model: Found an estimated cost of 33 for instruction: %nxv128i8_x = extractelement <vscale x 128 x i8> undef, i32 %x
 ; RV64ZVE64X-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v2i16_x = extractelement <2 x i16> undef, i32 %x
 ; RV64ZVE64X-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v4i16_x = extractelement <4 x i16> undef, i32 %x
 ; RV64ZVE64X-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v8i16_x = extractelement <8 x i16> undef, i32 %x
 ; RV64ZVE64X-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v16i16_x = extractelement <16 x i16> undef, i32 %x
 ; RV64ZVE64X-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v32i16_x = extractelement <32 x i16> undef, i32 %x
-; RV64ZVE64X-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v64i16_x = extractelement <64 x i16> undef, i32 %x
+; RV64ZVE64X-NEXT:  Cost Model: Found an estimated cost of 33 for instruction: %v64i16_x = extractelement <64 x i16> undef, i32 %x
 ; RV64ZVE64X-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %nxv2i16_x = extractelement <vscale x 2 x i16> undef, i32 %x
 ; RV64ZVE64X-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %nxv4i16_x = extractelement <vscale x 4 x i16> undef, i32 %x
 ; RV64ZVE64X-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %nxv8i16_x = extractelement <vscale x 8 x i16> undef, i32 %x
 ; RV64ZVE64X-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %nxv16i16_x = extractelement <vscale x 16 x i16> undef, i32 %x
 ; RV64ZVE64X-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %nxv32i16_x = extractelement <vscale x 32 x i16> undef, i32 %x
-; RV64ZVE64X-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %nxv64i16_x = extractelement <vscale x 64 x i16> undef, i32 %x
+; RV64ZVE64X-NEXT:  Cost Model: Found an estimated cost of 33 for instruction: %nxv64i16_x = extractelement <vscale x 64 x i16> undef, i32 %x
 ; RV64ZVE64X-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v2i32_x = extractelement <2 x i32> undef, i32 %x
 ; RV64ZVE64X-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v4i32_x = extractelement <4 x i32> undef, i32 %x
 ; RV64ZVE64X-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v8i32_x = extractelement <8 x i32> undef, i32 %x
 ; RV64ZVE64X-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v16i32_x = extractelement <16 x i32> undef, i32 %x
-; RV64ZVE64X-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v32i32_x = extractelement <32 x i32> undef, i32 %x
+; RV64ZVE64X-NEXT:  Cost Model: Found an estimated cost of 33 for instruction: %v32i32_x = extractelement <32 x i32> undef, i32 %x
 ; RV64ZVE64X-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %nxv2i32_x = extractelement <vscale x 2 x i32> undef, i32 %x
 ; RV64ZVE64X-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %nxv4i32_x = extractelement <vscale x 4 x i32> undef, i32 %x
 ; RV64ZVE64X-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %nxv8i32_x = extractelement <vscale x 8 x i32> undef, i32 %x
 ; RV64ZVE64X-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %nxv16i32_x = extractelement <vscale x 16 x i32> undef, i32 %x
-; RV64ZVE64X-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %nxv32i32_x = extractelement <vscale x 32 x i32> undef, i32 %x
+; RV64ZVE64X-NEXT:  Cost Model: Found an estimated cost of 33 for instruction: %nxv32i32_x = extractelement <vscale x 32 x i32> undef, i32 %x
 ; RV64ZVE64X-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v2i64_x = extractelement <2 x i64> undef, i32 %x
 ; RV64ZVE64X-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v4i64_x = extractelement <4 x i64> undef, i32 %x
 ; RV64ZVE64X-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v8i64_x = extractelement <8 x i64> undef, i32 %x
-; RV64ZVE64X-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v16i64_x = extractelement <16 x i64> undef, i32 %x
+; RV64ZVE64X-NEXT:  Cost Model: Found an estimated cost of 33 for instruction: %v16i64_x = extractelement <16 x i64> undef, i32 %x
 ; RV64ZVE64X-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %nxv2i64_x = extractelement <vscale x 2 x i64> undef, i32 %x
 ; RV64ZVE64X-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %nxv4i64_x = extractelement <vscale x 4 x i64> undef, i32 %x
 ; RV64ZVE64X-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %nxv8i64_x = extractelement <vscale x 8 x i64> undef, i32 %x
-; RV64ZVE64X-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %nxv16i64_x = extractelement <vscale x 16 x i64> undef, i32 %x
+; RV64ZVE64X-NEXT:  Cost Model: Found an estimated cost of 33 for instruction: %nxv16i64_x = extractelement <vscale x 16 x i64> undef, i32 %x
 ; RV64ZVE64X-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
   %v2i1_0 = extractelement <2 x i1> undef, i32 0
@@ -876,9 +876,9 @@ define void @extractelement_int_lmul(i32 %x) {
 ; RV32V-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v32i32_31 = extractelement <32 x i32> undef, i32 31
 ; RV32V-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v64i32_63 = extractelement <64 x i32> undef, i32 63
 ; RV32V-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v128i8 = extractelement <128 x i8> undef, i32 %x
-; RV32V-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v256i8 = extractelement <256 x i8> undef, i32 %x
+; RV32V-NEXT:  Cost Model: Found an estimated cost of 33 for instruction: %v256i8 = extractelement <256 x i8> undef, i32 %x
 ; RV32V-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v32i32 = extractelement <32 x i32> undef, i32 %x
-; RV32V-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v64i32 = extractelement <64 x i32> undef, i32 %x
+; RV32V-NEXT:  Cost Model: Found an estimated cost of 33 for instruction: %v64i32 = extractelement <64 x i32> undef, i32 %x
 ; RV32V-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; RV64V-LABEL: 'extractelement_int_lmul'
@@ -893,9 +893,9 @@ define void @extractelement_int_lmul(i32 %x) {
 ; RV64V-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v32i32_31 = extractelement <32 x i32> undef, i32 31
 ; RV64V-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v64i32_63 = extractelement <64 x i32> undef, i32 63
 ; RV64V-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v128i8 = extractelement <128 x i8> undef, i32 %x
-; RV64V-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v256i8 = extractelement <256 x i8> undef, i32 %x
+; RV64V-NEXT:  Cost Model: Found an estimated cost of 33 for instruction: %v256i8 = extractelement <256 x i8> undef, i32 %x
 ; RV64V-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v32i32 = extractelement <32 x i32> undef, i32 %x
-; RV64V-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v64i32 = extractelement <64 x i32> undef, i32 %x
+; RV64V-NEXT:  Cost Model: Found an estimated cost of 33 for instruction: %v64i32 = extractelement <64 x i32> undef, i32 %x
 ; RV64V-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; RV32ZVE64X-LABEL: 'extractelement_int_lmul'
@@ -909,10 +909,10 @@ define void @extractelement_int_lmul(i32 %x) {
 ; RV32ZVE64X-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v32i32_15 = extractelement <32 x i32> undef, i32 15
 ; RV32ZVE64X-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v32i32_31 = extractelement <32 x i32> undef, i32 31
 ; RV32ZVE64X-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v64i32_63 = extractelement <64 x i32> undef, i32 63
-; RV32ZVE64X-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v128i8 = extractelement <128 x i8> undef, i32 %x
-; RV32ZVE64X-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v256i8 = extractelement <256 x i8> undef, i32 %x
-; RV32ZVE64X-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v32i32 = extractelement <32 x i32> undef, i32 %x
-; RV32ZVE64X-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v64i32 = extractelement <64 x i32> undef, i32 %x
+; RV32ZVE64X-NEXT:  Cost Model: Found an estimated cost of 33 for instruction: %v128i8 = extractelement <128 x i8> undef, i32 %x
+; RV32ZVE64X-NEXT:  Cost Model: Found an estimated cost of 129 for instruction: %v256i8 = extractelement <256 x i8> undef, i32 %x
+; RV32ZVE64X-NEXT:  Cost Model: Found an estimated cost of 33 for instruction: %v32i32 = extractelement <32 x i32> undef, i32 %x
+; RV32ZVE64X-NEXT:  Cost Model: Found an estimated cost of 129 for instruction: %v64i32 = extractelement <64 x i32> undef, i32 %x
 ; RV32ZVE64X-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; RV64ZVE64X-LABEL: 'extractelement_int_lmul'
@@ -926,10 +926,10 @@ define void @extractelement_int_lmul(i32 %x) {
 ; RV64ZVE64X-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v32i32_15 = extractelement <32 x i32> undef, i32 15
 ; RV64ZVE64X-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v32i32_31 = extractelement <32 x i32> undef, i32 31
 ; RV64ZVE64X-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v64i32_63 = extractelement <64 x i32> undef, i32 63
-; RV64ZVE64X-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v128i8 = extractelement <128 x i8> undef, i32 %x
-; RV64ZVE64X-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v256i8 = extractelement <256 x i8> undef, i32 %x
-; RV64ZVE64X-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v32i32 = extractelement <32 x i32> undef, i32 %x
-; RV64ZVE64X-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v64i32 = extractelement <64 x i32> undef, i32 %x
+; RV64ZVE64X-NEXT:  Cost Model: Found an estimated cost of 33 for instruction: %v128i8 = extractelement <128 x i8> undef, i32 %x
+; RV64ZVE64X-NEXT:  Cost Model: Found an estimated cost of 129 for instruction: %v256i8 = extractelement <256 x i8> undef, i32 %x
+; RV64ZVE64X-NEXT:  Cost Model: Found an estimated cost of 33 for instruction: %v32i32 = extractelement <32 x i32> undef, i32 %x
+; RV64ZVE64X-NEXT:  Cost Model: Found an estimated cost of 129 for instruction: %v64i32 = extractelement <64 x i32> undef, i32 %x
 ; RV64ZVE64X-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
   %v128i8_31 = extractelement <128 x i8> undef, i32 31
@@ -1027,7 +1027,7 @@ define void @extractelement_fp(i32 %x) {
 ; RV32V-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %nxv8f16_x = extractelement <vscale x 8 x half> undef, i32 %x
 ; RV32V-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %nxv16f16_x = extractelement <vscale x 16 x half> undef, i32 %x
 ; RV32V-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %nxv32f16_x = extractelement <vscale x 32 x half> undef, i32 %x
-; RV32V-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %nxv64f16_x = extractelement <vscale x 64 x half> undef, i32 %x
+; RV32V-NEXT:  Cost Model: Found an estimated cost of 33 for instruction: %nxv64f16_x = extractelement <vscale x 64 x half> undef, i32 %x
 ; RV32V-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v2f32_x = extractelement <2 x float> undef, i32 %x
 ; RV32V-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v4f32_x = extractelement <4 x float> undef, i32 %x
 ; RV32V-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v8f32_x = extractelement <8 x float> undef, i32 %x
@@ -1037,7 +1037,7 @@ define void @extractelement_fp(i32 %x) {
 ; RV32V-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %nxv4f32_x = extractelement <vscale x 4 x float> undef, i32 %x
 ; RV32V-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %nxv8f32_x = extractelement <vscale x 8 x float> undef, i32 %x
 ; RV32V-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %nxv16f32_x = extractelement <vscale x 16 x float> undef, i32 %x
-; RV32V-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %nxv32f32_x = extractelement <vscale x 32 x float> undef, i32 %x
+; RV32V-NEXT:  Cost Model: Found an estimated cost of 33 for instruction: %nxv32f32_x = extractelement <vscale x 32 x float> undef, i32 %x
 ; RV32V-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v2f64_x = extractelement <2 x double> undef, i32 %x
 ; RV32V-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v4f64_x = extractelement <4 x double> undef, i32 %x
 ; RV32V-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v8f64_x = extractelement <8 x double> undef, i32 %x
@@ -1045,7 +1045,7 @@ define void @extractelement_fp(i32 %x) {
 ; RV32V-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %nxv2f64_x = extractelement <vscale x 2 x double> undef, i32 %x
 ; RV32V-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %nxv4f64_x = extractelement <vscale x 4 x double> undef, i32 %x
 ; RV32V-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %nxv8f64_x = extractelement <vscale x 8 x double> undef, i32 %x
-; RV32V-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %nxv16f64_x = extractelement <vscale x 16 x double> undef, i32 %x
+; RV32V-NEXT:  Cost Model: Found an estimated cost of 33 for instruction: %nxv16f64_x = extractelement <vscale x 16 x double> undef, i32 %x
 ; RV32V-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; RV64V-LABEL: 'extractelement_fp'
@@ -1120,7 +1120,7 @@ define void @extractelement_fp(i32 %x) {
 ; RV64V-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %nxv8f16_x = extractelement <vscale x 8 x half> undef, i32 %x
 ; RV64V-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %nxv16f16_x = extractelement <vscale x 16 x half> undef, i32 %x
 ; RV64V-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %nxv32f16_x = extractelement <vscale x 32 x half> undef, i32 %x
-; RV64V-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %nxv64f16_x = extractelement <vscale x 64 x half> undef, i32 %x
+; RV64V-NEXT:  Cost Model: Found an estimated cost of 33 for instruction: %nxv64f16_x = extractelement <vscale x 64 x half> undef, i32 %x
 ; RV64V-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v2f32_x = extractelement <2 x float> undef, i32 %x
 ; RV64V-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v4f32_x = extractelement <4 x float> undef, i32 %x
 ; RV64V-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v8f32_x = extractelement <8 x float> undef, i32 %x
@@ -1130,7 +1130,7 @@ define void @extractelement_fp(i32 %x) {
 ; RV64V-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %nxv4f32_x = extractelement <vscale x 4 x float> undef, i32 %x
 ; RV64V-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %nxv8f32_x = extractelement <vscale x 8 x float> undef, i32 %x
 ; RV64V-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %nxv16f32_x = extractelement <vscale x 16 x float> undef, i32 %x
-; RV64V-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %nxv32f32_x = extractelement <vscale x 32 x float> undef, i32 %x
+; RV64V-NEXT:  Cost Model: Found an estimated cost of 33 for instruction: %nxv32f32_x = extractelement <vscale x 32 x float> undef, i32 %x
 ; RV64V-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v2f64_x = extractelement <2 x double> undef, i32 %x
 ; RV64V-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v4f64_x = extractelement <4 x double> undef, i32 %x
 ; RV64V-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v8f64_x = extractelement <8 x double> undef, i32 %x
@@ -1138,7 +1138,7 @@ define void @extractelement_fp(i32 %x) {
 ; RV64V-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %nxv2f64_x = extractelement <vscale x 2 x double> undef, i32 %x
 ; RV64V-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %nxv4f64_x = extractelement <vscale x 4 x double> undef, i32 %x
 ; RV64V-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %nxv8f64_x = extractelement <vscale x 8 x double> undef, i32 %x
-; RV64V-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %nxv16f64_x = extractelement <vscale x 16 x double> undef, i32 %x
+; RV64V-NEXT:  Cost Model: Found an estimated cost of 33 for instruction: %nxv16f64_x = extractelement <vscale x 16 x double> undef, i32 %x
 ; RV64V-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; RV32ZVE64X-LABEL: 'extractelement_fp'
@@ -1472,7 +1472,7 @@ define void @extractelement_int_nonpoweroftwo(i32 %x) {
 ; RV32ZVE64X-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v3i32 = extractelement <3 x i32> undef, i32 %x
 ; RV32ZVE64X-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v7i32 = extractelement <7 x i32> undef, i32 %x
 ; RV32ZVE64X-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v15i32 = extractelement <15 x i32> undef, i32 %x
-; RV32ZVE64X-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v31i32 = extractelement <31 x i32> undef, i32 %x
+; RV32ZVE64X-NEXT:  Cost Model: Found an estimated cost of 33 for instruction: %v31i32 = extractelement <31 x i32> undef, i32 %x
 ; RV32ZVE64X-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v31i32_0 = extractelement <31 x i32> undef, i32 0
 ; RV32ZVE64X-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
@@ -1484,7 +1484,7 @@ define void @extractelement_int_nonpoweroftwo(i32 %x) {
 ; RV64ZVE64X-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v3i32 = extractelement <3 x i32> undef, i32 %x
 ; RV64ZVE64X-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v7i32 = extractelement <7 x i32> undef, i32 %x
 ; RV64ZVE64X-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v15i32 = extractelement <15 x i32> undef, i32 %x
-; RV64ZVE64X-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v31i32 = extractelement <31 x i32> undef, i32 %x
+; RV64ZVE64X-NEXT:  Cost Model: Found an estimated cost of 33 for instruction: %v31i32 = extractelement <31 x i32> undef, i32 %x
 ; RV64ZVE64X-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v31i32_0 = extractelement <31 x i32> undef, i32 0
 ; RV64ZVE64X-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;

>From f50fc42407c3f61e8c8ced2fd1a38b5f07d2fcb8 Mon Sep 17 00:00:00 2001
From: Elvis Wang <elvis.wang at sifive.com>
Date: Mon, 2 Dec 2024 14:09:58 -0800
Subject: [PATCH 2/4] Fixup! Typo.

---
 llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
index 20ca80aedab62c..8d0e3009aff6a8 100644
--- a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
+++ b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
@@ -1946,8 +1946,8 @@ InstructionCost RISCVTTIImpl::getVectorInstrCost(unsigned Opcode, Type *Val,
     BaseCost = Opcode == Instruction::InsertElement ? 3 : 4;
   }
 
-  // When the vector need to split into multiple register groups and the index
-  // exceed single vector resgister group, we need to extract the element via
+  // When the vector needs to split into multiple register groups and the index
+  // exceeds single vector register group, we need to extract the element via
   // stack.
   if (Opcode == Instruction::ExtractElement && LT.first > 1 &&
       ((Index == -1U) || (Index > LT.second.getVectorMinNumElements() &&

>From d8b6466acb15e26e2ad092f6dbb09b4503638e0a Mon Sep 17 00:00:00 2001
From: Elvis Wang <elvis.wang at sifive.com>
Date: Tue, 3 Dec 2024 19:32:09 -0800
Subject: [PATCH 3/4] Support cost of insertelt.

---
 .../Target/RISCV/RISCVTargetTransformInfo.cpp | 25 ++++--
 .../CostModel/RISCV/rvv-insertelement.ll      | 84 +++++++++----------
 2 files changed, 60 insertions(+), 49 deletions(-)

diff --git a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
index 8d0e3009aff6a8..2bf8c08b03e5a8 100644
--- a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
+++ b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
@@ -1947,18 +1947,29 @@ InstructionCost RISCVTTIImpl::getVectorInstrCost(unsigned Opcode, Type *Val,
   }
 
   // When the vector needs to split into multiple register groups and the index
-  // exceeds single vector register group, we need to extract the element via
-  // stack.
-  if (Opcode == Instruction::ExtractElement && LT.first > 1 &&
-      ((Index == -1U) || (Index > LT.second.getVectorMinNumElements() &&
+  // exceeds single vector register group, we need to insert/extract the element
+  // via stack.
+  if (LT.first > 1 &&
+      ((Index == -1U) || (Index >= LT.second.getVectorMinNumElements() &&
                           LT.second.isScalableVector()))) {
     Type *ScalarType = Val->getScalarType();
     Align VecAlign = DL.getPrefTypeAlign(Val);
     Align SclAlign = DL.getPrefTypeAlign(ScalarType);
+
     // Store all split vectors into stack and load the target element.
-    return LT.first *
-               getMemoryOpCost(Instruction::Store, Val, VecAlign, 0, CostKind) +
-           getMemoryOpCost(Instruction::Load, ScalarType, SclAlign, 0,
+    if (Opcode == Instruction::ExtractElement)
+      return LT.first * getMemoryOpCost(Instruction::Store, Val, VecAlign, 0,
+                                        CostKind) +
+             getMemoryOpCost(Instruction::Load, ScalarType, SclAlign, 0,
+                             CostKind);
+
+    // Store all split vectors into stack and store the target element and load
+    // vectors back.
+    return LT.first * (getMemoryOpCost(Instruction::Store, Val, VecAlign, 0,
+                                       CostKind) +
+                       getMemoryOpCost(Instruction::Load, Val, VecAlign, 0,
+                                       CostKind)) +
+           getMemoryOpCost(Instruction::Store, ScalarType, SclAlign, 0,
                            CostKind);
   }
 
diff --git a/llvm/test/Analysis/CostModel/RISCV/rvv-insertelement.ll b/llvm/test/Analysis/CostModel/RISCV/rvv-insertelement.ll
index c240a75066b10c..02efb4f6646b61 100644
--- a/llvm/test/Analysis/CostModel/RISCV/rvv-insertelement.ll
+++ b/llvm/test/Analysis/CostModel/RISCV/rvv-insertelement.ll
@@ -139,7 +139,7 @@ define void @insertelement_int(i32 %x) {
 ; RV32V-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %nxv16i8_x = insertelement <vscale x 16 x i8> undef, i8 undef, i32 %x
 ; RV32V-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %nxv32i8_x = insertelement <vscale x 32 x i8> undef, i8 undef, i32 %x
 ; RV32V-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %nxv64i8_x = insertelement <vscale x 64 x i8> undef, i8 undef, i32 %x
-; RV32V-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %nxv128i8_x = insertelement <vscale x 128 x i8> undef, i8 undef, i32 %x
+; RV32V-NEXT:  Cost Model: Found an estimated cost of 65 for instruction: %nxv128i8_x = insertelement <vscale x 128 x i8> undef, i8 undef, i32 %x
 ; RV32V-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v2i16_x = insertelement <2 x i16> undef, i16 undef, i32 %x
 ; RV32V-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v4i16_x = insertelement <4 x i16> undef, i16 undef, i32 %x
 ; RV32V-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v8i16_x = insertelement <8 x i16> undef, i16 undef, i32 %x
@@ -151,7 +151,7 @@ define void @insertelement_int(i32 %x) {
 ; RV32V-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %nxv8i16_x = insertelement <vscale x 8 x i16> undef, i16 undef, i32 %x
 ; RV32V-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %nxv16i16_x = insertelement <vscale x 16 x i16> undef, i16 undef, i32 %x
 ; RV32V-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %nxv32i16_x = insertelement <vscale x 32 x i16> undef, i16 undef, i32 %x
-; RV32V-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %nxv64i16_x = insertelement <vscale x 64 x i16> undef, i16 undef, i32 %x
+; RV32V-NEXT:  Cost Model: Found an estimated cost of 65 for instruction: %nxv64i16_x = insertelement <vscale x 64 x i16> undef, i16 undef, i32 %x
 ; RV32V-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v2i32_x = insertelement <2 x i32> undef, i32 undef, i32 %x
 ; RV32V-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v4i32_x = insertelement <4 x i32> undef, i32 undef, i32 %x
 ; RV32V-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v8i32_x = insertelement <8 x i32> undef, i32 undef, i32 %x
@@ -161,7 +161,7 @@ define void @insertelement_int(i32 %x) {
 ; RV32V-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %nxv4i32_x = insertelement <vscale x 4 x i32> undef, i32 undef, i32 %x
 ; RV32V-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %nxv8i32_x = insertelement <vscale x 8 x i32> undef, i32 undef, i32 %x
 ; RV32V-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %nxv16i32_x = insertelement <vscale x 16 x i32> undef, i32 undef, i32 %x
-; RV32V-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %nxv32i32_x = insertelement <vscale x 32 x i32> undef, i32 undef, i32 %x
+; RV32V-NEXT:  Cost Model: Found an estimated cost of 65 for instruction: %nxv32i32_x = insertelement <vscale x 32 x i32> undef, i32 undef, i32 %x
 ; RV32V-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %v2i64_x = insertelement <2 x i64> undef, i64 undef, i32 %x
 ; RV32V-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %v4i64_x = insertelement <4 x i64> undef, i64 undef, i32 %x
 ; RV32V-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %v8i64_x = insertelement <8 x i64> undef, i64 undef, i32 %x
@@ -169,7 +169,7 @@ define void @insertelement_int(i32 %x) {
 ; RV32V-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %nxv2i64_x = insertelement <vscale x 2 x i64> undef, i64 undef, i32 %x
 ; RV32V-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %nxv4i64_x = insertelement <vscale x 4 x i64> undef, i64 undef, i32 %x
 ; RV32V-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %nxv8i64_x = insertelement <vscale x 8 x i64> undef, i64 undef, i32 %x
-; RV32V-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %nxv16i64_x = insertelement <vscale x 16 x i64> undef, i64 undef, i32 %x
+; RV32V-NEXT:  Cost Model: Found an estimated cost of 66 for instruction: %nxv16i64_x = insertelement <vscale x 16 x i64> undef, i64 undef, i32 %x
 ; RV32V-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; RV64V-LABEL: 'insertelement_int'
@@ -304,7 +304,7 @@ define void @insertelement_int(i32 %x) {
 ; RV64V-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %nxv16i8_x = insertelement <vscale x 16 x i8> undef, i8 undef, i32 %x
 ; RV64V-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %nxv32i8_x = insertelement <vscale x 32 x i8> undef, i8 undef, i32 %x
 ; RV64V-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %nxv64i8_x = insertelement <vscale x 64 x i8> undef, i8 undef, i32 %x
-; RV64V-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %nxv128i8_x = insertelement <vscale x 128 x i8> undef, i8 undef, i32 %x
+; RV64V-NEXT:  Cost Model: Found an estimated cost of 65 for instruction: %nxv128i8_x = insertelement <vscale x 128 x i8> undef, i8 undef, i32 %x
 ; RV64V-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v2i16_x = insertelement <2 x i16> undef, i16 undef, i32 %x
 ; RV64V-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v4i16_x = insertelement <4 x i16> undef, i16 undef, i32 %x
 ; RV64V-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v8i16_x = insertelement <8 x i16> undef, i16 undef, i32 %x
@@ -316,7 +316,7 @@ define void @insertelement_int(i32 %x) {
 ; RV64V-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %nxv8i16_x = insertelement <vscale x 8 x i16> undef, i16 undef, i32 %x
 ; RV64V-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %nxv16i16_x = insertelement <vscale x 16 x i16> undef, i16 undef, i32 %x
 ; RV64V-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %nxv32i16_x = insertelement <vscale x 32 x i16> undef, i16 undef, i32 %x
-; RV64V-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %nxv64i16_x = insertelement <vscale x 64 x i16> undef, i16 undef, i32 %x
+; RV64V-NEXT:  Cost Model: Found an estimated cost of 65 for instruction: %nxv64i16_x = insertelement <vscale x 64 x i16> undef, i16 undef, i32 %x
 ; RV64V-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v2i32_x = insertelement <2 x i32> undef, i32 undef, i32 %x
 ; RV64V-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v4i32_x = insertelement <4 x i32> undef, i32 undef, i32 %x
 ; RV64V-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v8i32_x = insertelement <8 x i32> undef, i32 undef, i32 %x
@@ -326,7 +326,7 @@ define void @insertelement_int(i32 %x) {
 ; RV64V-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %nxv4i32_x = insertelement <vscale x 4 x i32> undef, i32 undef, i32 %x
 ; RV64V-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %nxv8i32_x = insertelement <vscale x 8 x i32> undef, i32 undef, i32 %x
 ; RV64V-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %nxv16i32_x = insertelement <vscale x 16 x i32> undef, i32 undef, i32 %x
-; RV64V-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %nxv32i32_x = insertelement <vscale x 32 x i32> undef, i32 undef, i32 %x
+; RV64V-NEXT:  Cost Model: Found an estimated cost of 65 for instruction: %nxv32i32_x = insertelement <vscale x 32 x i32> undef, i32 undef, i32 %x
 ; RV64V-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v2i64_x = insertelement <2 x i64> undef, i64 undef, i32 %x
 ; RV64V-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v4i64_x = insertelement <4 x i64> undef, i64 undef, i32 %x
 ; RV64V-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v8i64_x = insertelement <8 x i64> undef, i64 undef, i32 %x
@@ -334,7 +334,7 @@ define void @insertelement_int(i32 %x) {
 ; RV64V-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %nxv2i64_x = insertelement <vscale x 2 x i64> undef, i64 undef, i32 %x
 ; RV64V-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %nxv4i64_x = insertelement <vscale x 4 x i64> undef, i64 undef, i32 %x
 ; RV64V-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %nxv8i64_x = insertelement <vscale x 8 x i64> undef, i64 undef, i32 %x
-; RV64V-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %nxv16i64_x = insertelement <vscale x 16 x i64> undef, i64 undef, i32 %x
+; RV64V-NEXT:  Cost Model: Found an estimated cost of 65 for instruction: %nxv16i64_x = insertelement <vscale x 16 x i64> undef, i64 undef, i32 %x
 ; RV64V-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; RV32ZVE64X-LABEL: 'insertelement_int'
@@ -462,44 +462,44 @@ define void @insertelement_int(i32 %x) {
 ; RV32ZVE64X-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v16i8_x = insertelement <16 x i8> undef, i8 undef, i32 %x
 ; RV32ZVE64X-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v32i8_x = insertelement <32 x i8> undef, i8 undef, i32 %x
 ; RV32ZVE64X-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v64i8_x = insertelement <64 x i8> undef, i8 undef, i32 %x
-; RV32ZVE64X-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v128i8_x = insertelement <128 x i8> undef, i8 undef, i32 %x
+; RV32ZVE64X-NEXT:  Cost Model: Found an estimated cost of 65 for instruction: %v128i8_x = insertelement <128 x i8> undef, i8 undef, i32 %x
 ; RV32ZVE64X-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %nxv2i8_x = insertelement <vscale x 2 x i8> undef, i8 undef, i32 %x
 ; RV32ZVE64X-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %nxv4i8_x = insertelement <vscale x 4 x i8> undef, i8 undef, i32 %x
 ; RV32ZVE64X-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %nxv8i8_x = insertelement <vscale x 8 x i8> undef, i8 undef, i32 %x
 ; RV32ZVE64X-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %nxv16i8_x = insertelement <vscale x 16 x i8> undef, i8 undef, i32 %x
 ; RV32ZVE64X-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %nxv32i8_x = insertelement <vscale x 32 x i8> undef, i8 undef, i32 %x
 ; RV32ZVE64X-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %nxv64i8_x = insertelement <vscale x 64 x i8> undef, i8 undef, i32 %x
-; RV32ZVE64X-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %nxv128i8_x = insertelement <vscale x 128 x i8> undef, i8 undef, i32 %x
+; RV32ZVE64X-NEXT:  Cost Model: Found an estimated cost of 65 for instruction: %nxv128i8_x = insertelement <vscale x 128 x i8> undef, i8 undef, i32 %x
 ; RV32ZVE64X-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v2i16_x = insertelement <2 x i16> undef, i16 undef, i32 %x
 ; RV32ZVE64X-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v4i16_x = insertelement <4 x i16> undef, i16 undef, i32 %x
 ; RV32ZVE64X-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v8i16_x = insertelement <8 x i16> undef, i16 undef, i32 %x
 ; RV32ZVE64X-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v16i16_x = insertelement <16 x i16> undef, i16 undef, i32 %x
 ; RV32ZVE64X-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v32i16_x = insertelement <32 x i16> undef, i16 undef, i32 %x
-; RV32ZVE64X-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v64i16_x = insertelement <64 x i16> undef, i16 undef, i32 %x
+; RV32ZVE64X-NEXT:  Cost Model: Found an estimated cost of 65 for instruction: %v64i16_x = insertelement <64 x i16> undef, i16 undef, i32 %x
 ; RV32ZVE64X-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %nxv2i16_x = insertelement <vscale x 2 x i16> undef, i16 undef, i32 %x
 ; RV32ZVE64X-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %nxv4i16_x = insertelement <vscale x 4 x i16> undef, i16 undef, i32 %x
 ; RV32ZVE64X-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %nxv8i16_x = insertelement <vscale x 8 x i16> undef, i16 undef, i32 %x
 ; RV32ZVE64X-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %nxv16i16_x = insertelement <vscale x 16 x i16> undef, i16 undef, i32 %x
 ; RV32ZVE64X-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %nxv32i16_x = insertelement <vscale x 32 x i16> undef, i16 undef, i32 %x
-; RV32ZVE64X-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %nxv64i16_x = insertelement <vscale x 64 x i16> undef, i16 undef, i32 %x
+; RV32ZVE64X-NEXT:  Cost Model: Found an estimated cost of 65 for instruction: %nxv64i16_x = insertelement <vscale x 64 x i16> undef, i16 undef, i32 %x
 ; RV32ZVE64X-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v2i32_x = insertelement <2 x i32> undef, i32 undef, i32 %x
 ; RV32ZVE64X-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v4i32_x = insertelement <4 x i32> undef, i32 undef, i32 %x
 ; RV32ZVE64X-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v8i32_x = insertelement <8 x i32> undef, i32 undef, i32 %x
 ; RV32ZVE64X-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v16i32_x = insertelement <16 x i32> undef, i32 undef, i32 %x
-; RV32ZVE64X-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v32i32_x = insertelement <32 x i32> undef, i32 undef, i32 %x
+; RV32ZVE64X-NEXT:  Cost Model: Found an estimated cost of 65 for instruction: %v32i32_x = insertelement <32 x i32> undef, i32 undef, i32 %x
 ; RV32ZVE64X-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %nxv2i32_x = insertelement <vscale x 2 x i32> undef, i32 undef, i32 %x
 ; RV32ZVE64X-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %nxv4i32_x = insertelement <vscale x 4 x i32> undef, i32 undef, i32 %x
 ; RV32ZVE64X-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %nxv8i32_x = insertelement <vscale x 8 x i32> undef, i32 undef, i32 %x
 ; RV32ZVE64X-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %nxv16i32_x = insertelement <vscale x 16 x i32> undef, i32 undef, i32 %x
-; RV32ZVE64X-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %nxv32i32_x = insertelement <vscale x 32 x i32> undef, i32 undef, i32 %x
+; RV32ZVE64X-NEXT:  Cost Model: Found an estimated cost of 65 for instruction: %nxv32i32_x = insertelement <vscale x 32 x i32> undef, i32 undef, i32 %x
 ; RV32ZVE64X-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %v2i64_x = insertelement <2 x i64> undef, i64 undef, i32 %x
 ; RV32ZVE64X-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %v4i64_x = insertelement <4 x i64> undef, i64 undef, i32 %x
 ; RV32ZVE64X-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %v8i64_x = insertelement <8 x i64> undef, i64 undef, i32 %x
-; RV32ZVE64X-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %v16i64_x = insertelement <16 x i64> undef, i64 undef, i32 %x
+; RV32ZVE64X-NEXT:  Cost Model: Found an estimated cost of 66 for instruction: %v16i64_x = insertelement <16 x i64> undef, i64 undef, i32 %x
 ; RV32ZVE64X-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %nxv2i64_x = insertelement <vscale x 2 x i64> undef, i64 undef, i32 %x
 ; RV32ZVE64X-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %nxv4i64_x = insertelement <vscale x 4 x i64> undef, i64 undef, i32 %x
 ; RV32ZVE64X-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %nxv8i64_x = insertelement <vscale x 8 x i64> undef, i64 undef, i32 %x
-; RV32ZVE64X-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %nxv16i64_x = insertelement <vscale x 16 x i64> undef, i64 undef, i32 %x
+; RV32ZVE64X-NEXT:  Cost Model: Found an estimated cost of 66 for instruction: %nxv16i64_x = insertelement <vscale x 16 x i64> undef, i64 undef, i32 %x
 ; RV32ZVE64X-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; RV64ZVE64X-LABEL: 'insertelement_int'
@@ -627,44 +627,44 @@ define void @insertelement_int(i32 %x) {
 ; RV64ZVE64X-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v16i8_x = insertelement <16 x i8> undef, i8 undef, i32 %x
 ; RV64ZVE64X-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v32i8_x = insertelement <32 x i8> undef, i8 undef, i32 %x
 ; RV64ZVE64X-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v64i8_x = insertelement <64 x i8> undef, i8 undef, i32 %x
-; RV64ZVE64X-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v128i8_x = insertelement <128 x i8> undef, i8 undef, i32 %x
+; RV64ZVE64X-NEXT:  Cost Model: Found an estimated cost of 65 for instruction: %v128i8_x = insertelement <128 x i8> undef, i8 undef, i32 %x
 ; RV64ZVE64X-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %nxv2i8_x = insertelement <vscale x 2 x i8> undef, i8 undef, i32 %x
 ; RV64ZVE64X-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %nxv4i8_x = insertelement <vscale x 4 x i8> undef, i8 undef, i32 %x
 ; RV64ZVE64X-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %nxv8i8_x = insertelement <vscale x 8 x i8> undef, i8 undef, i32 %x
 ; RV64ZVE64X-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %nxv16i8_x = insertelement <vscale x 16 x i8> undef, i8 undef, i32 %x
 ; RV64ZVE64X-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %nxv32i8_x = insertelement <vscale x 32 x i8> undef, i8 undef, i32 %x
 ; RV64ZVE64X-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %nxv64i8_x = insertelement <vscale x 64 x i8> undef, i8 undef, i32 %x
-; RV64ZVE64X-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %nxv128i8_x = insertelement <vscale x 128 x i8> undef, i8 undef, i32 %x
+; RV64ZVE64X-NEXT:  Cost Model: Found an estimated cost of 65 for instruction: %nxv128i8_x = insertelement <vscale x 128 x i8> undef, i8 undef, i32 %x
 ; RV64ZVE64X-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v2i16_x = insertelement <2 x i16> undef, i16 undef, i32 %x
 ; RV64ZVE64X-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v4i16_x = insertelement <4 x i16> undef, i16 undef, i32 %x
 ; RV64ZVE64X-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v8i16_x = insertelement <8 x i16> undef, i16 undef, i32 %x
 ; RV64ZVE64X-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v16i16_x = insertelement <16 x i16> undef, i16 undef, i32 %x
 ; RV64ZVE64X-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v32i16_x = insertelement <32 x i16> undef, i16 undef, i32 %x
-; RV64ZVE64X-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v64i16_x = insertelement <64 x i16> undef, i16 undef, i32 %x
+; RV64ZVE64X-NEXT:  Cost Model: Found an estimated cost of 65 for instruction: %v64i16_x = insertelement <64 x i16> undef, i16 undef, i32 %x
 ; RV64ZVE64X-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %nxv2i16_x = insertelement <vscale x 2 x i16> undef, i16 undef, i32 %x
 ; RV64ZVE64X-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %nxv4i16_x = insertelement <vscale x 4 x i16> undef, i16 undef, i32 %x
 ; RV64ZVE64X-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %nxv8i16_x = insertelement <vscale x 8 x i16> undef, i16 undef, i32 %x
 ; RV64ZVE64X-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %nxv16i16_x = insertelement <vscale x 16 x i16> undef, i16 undef, i32 %x
 ; RV64ZVE64X-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %nxv32i16_x = insertelement <vscale x 32 x i16> undef, i16 undef, i32 %x
-; RV64ZVE64X-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %nxv64i16_x = insertelement <vscale x 64 x i16> undef, i16 undef, i32 %x
+; RV64ZVE64X-NEXT:  Cost Model: Found an estimated cost of 65 for instruction: %nxv64i16_x = insertelement <vscale x 64 x i16> undef, i16 undef, i32 %x
 ; RV64ZVE64X-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v2i32_x = insertelement <2 x i32> undef, i32 undef, i32 %x
 ; RV64ZVE64X-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v4i32_x = insertelement <4 x i32> undef, i32 undef, i32 %x
 ; RV64ZVE64X-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v8i32_x = insertelement <8 x i32> undef, i32 undef, i32 %x
 ; RV64ZVE64X-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v16i32_x = insertelement <16 x i32> undef, i32 undef, i32 %x
-; RV64ZVE64X-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v32i32_x = insertelement <32 x i32> undef, i32 undef, i32 %x
+; RV64ZVE64X-NEXT:  Cost Model: Found an estimated cost of 65 for instruction: %v32i32_x = insertelement <32 x i32> undef, i32 undef, i32 %x
 ; RV64ZVE64X-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %nxv2i32_x = insertelement <vscale x 2 x i32> undef, i32 undef, i32 %x
 ; RV64ZVE64X-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %nxv4i32_x = insertelement <vscale x 4 x i32> undef, i32 undef, i32 %x
 ; RV64ZVE64X-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %nxv8i32_x = insertelement <vscale x 8 x i32> undef, i32 undef, i32 %x
 ; RV64ZVE64X-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %nxv16i32_x = insertelement <vscale x 16 x i32> undef, i32 undef, i32 %x
-; RV64ZVE64X-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %nxv32i32_x = insertelement <vscale x 32 x i32> undef, i32 undef, i32 %x
+; RV64ZVE64X-NEXT:  Cost Model: Found an estimated cost of 65 for instruction: %nxv32i32_x = insertelement <vscale x 32 x i32> undef, i32 undef, i32 %x
 ; RV64ZVE64X-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v2i64_x = insertelement <2 x i64> undef, i64 undef, i32 %x
 ; RV64ZVE64X-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v4i64_x = insertelement <4 x i64> undef, i64 undef, i32 %x
 ; RV64ZVE64X-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v8i64_x = insertelement <8 x i64> undef, i64 undef, i32 %x
-; RV64ZVE64X-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v16i64_x = insertelement <16 x i64> undef, i64 undef, i32 %x
+; RV64ZVE64X-NEXT:  Cost Model: Found an estimated cost of 65 for instruction: %v16i64_x = insertelement <16 x i64> undef, i64 undef, i32 %x
 ; RV64ZVE64X-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %nxv2i64_x = insertelement <vscale x 2 x i64> undef, i64 undef, i32 %x
 ; RV64ZVE64X-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %nxv4i64_x = insertelement <vscale x 4 x i64> undef, i64 undef, i32 %x
 ; RV64ZVE64X-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %nxv8i64_x = insertelement <vscale x 8 x i64> undef, i64 undef, i32 %x
-; RV64ZVE64X-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %nxv16i64_x = insertelement <vscale x 16 x i64> undef, i64 undef, i32 %x
+; RV64ZVE64X-NEXT:  Cost Model: Found an estimated cost of 65 for instruction: %nxv16i64_x = insertelement <vscale x 16 x i64> undef, i64 undef, i32 %x
 ; RV64ZVE64X-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
   %v2i1_0 = insertelement <2 x i1> undef, i1 undef, i32 0
@@ -876,9 +876,9 @@ define void @insertelement_int_lmul(i32 %x) {
 ; RV32V-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v32i32_31 = insertelement <32 x i32> undef, i32 undef, i32 31
 ; RV32V-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v64i32_63 = insertelement <64 x i32> undef, i32 undef, i32 63
 ; RV32V-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v128i8 = insertelement <128 x i8> undef, i8 undef, i32 %x
-; RV32V-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v256i8 = insertelement <256 x i8> undef, i8 undef, i32 %x
+; RV32V-NEXT:  Cost Model: Found an estimated cost of 65 for instruction: %v256i8 = insertelement <256 x i8> undef, i8 undef, i32 %x
 ; RV32V-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v32i32 = insertelement <32 x i32> undef, i32 undef, i32 %x
-; RV32V-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v64i32 = insertelement <64 x i32> undef, i32 undef, i32 %x
+; RV32V-NEXT:  Cost Model: Found an estimated cost of 65 for instruction: %v64i32 = insertelement <64 x i32> undef, i32 undef, i32 %x
 ; RV32V-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; RV64V-LABEL: 'insertelement_int_lmul'
@@ -893,9 +893,9 @@ define void @insertelement_int_lmul(i32 %x) {
 ; RV64V-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v32i32_31 = insertelement <32 x i32> undef, i32 undef, i32 31
 ; RV64V-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v64i32_63 = insertelement <64 x i32> undef, i32 undef, i32 63
 ; RV64V-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v128i8 = insertelement <128 x i8> undef, i8 undef, i32 %x
-; RV64V-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v256i8 = insertelement <256 x i8> undef, i8 undef, i32 %x
+; RV64V-NEXT:  Cost Model: Found an estimated cost of 65 for instruction: %v256i8 = insertelement <256 x i8> undef, i8 undef, i32 %x
 ; RV64V-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v32i32 = insertelement <32 x i32> undef, i32 undef, i32 %x
-; RV64V-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v64i32 = insertelement <64 x i32> undef, i32 undef, i32 %x
+; RV64V-NEXT:  Cost Model: Found an estimated cost of 65 for instruction: %v64i32 = insertelement <64 x i32> undef, i32 undef, i32 %x
 ; RV64V-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; RV32ZVE64X-LABEL: 'insertelement_int_lmul'
@@ -909,10 +909,10 @@ define void @insertelement_int_lmul(i32 %x) {
 ; RV32ZVE64X-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v32i32_15 = insertelement <32 x i32> undef, i32 undef, i32 15
 ; RV32ZVE64X-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v32i32_31 = insertelement <32 x i32> undef, i32 undef, i32 31
 ; RV32ZVE64X-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v64i32_63 = insertelement <64 x i32> undef, i32 undef, i32 63
-; RV32ZVE64X-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v128i8 = insertelement <128 x i8> undef, i8 undef, i32 %x
-; RV32ZVE64X-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v256i8 = insertelement <256 x i8> undef, i8 undef, i32 %x
-; RV32ZVE64X-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v32i32 = insertelement <32 x i32> undef, i32 undef, i32 %x
-; RV32ZVE64X-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v64i32 = insertelement <64 x i32> undef, i32 undef, i32 %x
+; RV32ZVE64X-NEXT:  Cost Model: Found an estimated cost of 65 for instruction: %v128i8 = insertelement <128 x i8> undef, i8 undef, i32 %x
+; RV32ZVE64X-NEXT:  Cost Model: Found an estimated cost of 257 for instruction: %v256i8 = insertelement <256 x i8> undef, i8 undef, i32 %x
+; RV32ZVE64X-NEXT:  Cost Model: Found an estimated cost of 65 for instruction: %v32i32 = insertelement <32 x i32> undef, i32 undef, i32 %x
+; RV32ZVE64X-NEXT:  Cost Model: Found an estimated cost of 257 for instruction: %v64i32 = insertelement <64 x i32> undef, i32 undef, i32 %x
 ; RV32ZVE64X-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; RV64ZVE64X-LABEL: 'insertelement_int_lmul'
@@ -926,10 +926,10 @@ define void @insertelement_int_lmul(i32 %x) {
 ; RV64ZVE64X-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v32i32_15 = insertelement <32 x i32> undef, i32 undef, i32 15
 ; RV64ZVE64X-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v32i32_31 = insertelement <32 x i32> undef, i32 undef, i32 31
 ; RV64ZVE64X-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v64i32_63 = insertelement <64 x i32> undef, i32 undef, i32 63
-; RV64ZVE64X-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v128i8 = insertelement <128 x i8> undef, i8 undef, i32 %x
-; RV64ZVE64X-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v256i8 = insertelement <256 x i8> undef, i8 undef, i32 %x
-; RV64ZVE64X-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v32i32 = insertelement <32 x i32> undef, i32 undef, i32 %x
-; RV64ZVE64X-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v64i32 = insertelement <64 x i32> undef, i32 undef, i32 %x
+; RV64ZVE64X-NEXT:  Cost Model: Found an estimated cost of 65 for instruction: %v128i8 = insertelement <128 x i8> undef, i8 undef, i32 %x
+; RV64ZVE64X-NEXT:  Cost Model: Found an estimated cost of 257 for instruction: %v256i8 = insertelement <256 x i8> undef, i8 undef, i32 %x
+; RV64ZVE64X-NEXT:  Cost Model: Found an estimated cost of 65 for instruction: %v32i32 = insertelement <32 x i32> undef, i32 undef, i32 %x
+; RV64ZVE64X-NEXT:  Cost Model: Found an estimated cost of 257 for instruction: %v64i32 = insertelement <64 x i32> undef, i32 undef, i32 %x
 ; RV64ZVE64X-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
   %v128i8_31 = insertelement <128 x i8> undef, i8 undef, i32 31
@@ -1027,7 +1027,7 @@ define void @insertelement_fp(i32 %x) {
 ; RV32V-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %nxv8f16_x = insertelement <vscale x 8 x half> undef, half undef, i32 %x
 ; RV32V-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %nxv16f16_x = insertelement <vscale x 16 x half> undef, half undef, i32 %x
 ; RV32V-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %nxv32f16_x = insertelement <vscale x 32 x half> undef, half undef, i32 %x
-; RV32V-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %nxv64f16_x = insertelement <vscale x 64 x half> undef, half undef, i32 %x
+; RV32V-NEXT:  Cost Model: Found an estimated cost of 65 for instruction: %nxv64f16_x = insertelement <vscale x 64 x half> undef, half undef, i32 %x
 ; RV32V-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v2f32_x = insertelement <2 x float> undef, float undef, i32 %x
 ; RV32V-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v4f32_x = insertelement <4 x float> undef, float undef, i32 %x
 ; RV32V-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v8f32_x = insertelement <8 x float> undef, float undef, i32 %x
@@ -1037,7 +1037,7 @@ define void @insertelement_fp(i32 %x) {
 ; RV32V-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %nxv4f32_x = insertelement <vscale x 4 x float> undef, float undef, i32 %x
 ; RV32V-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %nxv8f32_x = insertelement <vscale x 8 x float> undef, float undef, i32 %x
 ; RV32V-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %nxv16f32_x = insertelement <vscale x 16 x float> undef, float undef, i32 %x
-; RV32V-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %nxv32f32_x = insertelement <vscale x 32 x float> undef, float undef, i32 %x
+; RV32V-NEXT:  Cost Model: Found an estimated cost of 65 for instruction: %nxv32f32_x = insertelement <vscale x 32 x float> undef, float undef, i32 %x
 ; RV32V-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v2f64_x = insertelement <2 x double> undef, double undef, i32 %x
 ; RV32V-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v4f64_x = insertelement <4 x double> undef, double undef, i32 %x
 ; RV32V-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v8f64_x = insertelement <8 x double> undef, double undef, i32 %x
@@ -1045,7 +1045,7 @@ define void @insertelement_fp(i32 %x) {
 ; RV32V-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %nxv2f64_x = insertelement <vscale x 2 x double> undef, double undef, i32 %x
 ; RV32V-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %nxv4f64_x = insertelement <vscale x 4 x double> undef, double undef, i32 %x
 ; RV32V-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %nxv8f64_x = insertelement <vscale x 8 x double> undef, double undef, i32 %x
-; RV32V-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %nxv16f64_x = insertelement <vscale x 16 x double> undef, double undef, i32 %x
+; RV32V-NEXT:  Cost Model: Found an estimated cost of 65 for instruction: %nxv16f64_x = insertelement <vscale x 16 x double> undef, double undef, i32 %x
 ; RV32V-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; RV64V-LABEL: 'insertelement_fp'
@@ -1120,7 +1120,7 @@ define void @insertelement_fp(i32 %x) {
 ; RV64V-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %nxv8f16_x = insertelement <vscale x 8 x half> undef, half undef, i32 %x
 ; RV64V-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %nxv16f16_x = insertelement <vscale x 16 x half> undef, half undef, i32 %x
 ; RV64V-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %nxv32f16_x = insertelement <vscale x 32 x half> undef, half undef, i32 %x
-; RV64V-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %nxv64f16_x = insertelement <vscale x 64 x half> undef, half undef, i32 %x
+; RV64V-NEXT:  Cost Model: Found an estimated cost of 65 for instruction: %nxv64f16_x = insertelement <vscale x 64 x half> undef, half undef, i32 %x
 ; RV64V-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v2f32_x = insertelement <2 x float> undef, float undef, i32 %x
 ; RV64V-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v4f32_x = insertelement <4 x float> undef, float undef, i32 %x
 ; RV64V-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v8f32_x = insertelement <8 x float> undef, float undef, i32 %x
@@ -1130,7 +1130,7 @@ define void @insertelement_fp(i32 %x) {
 ; RV64V-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %nxv4f32_x = insertelement <vscale x 4 x float> undef, float undef, i32 %x
 ; RV64V-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %nxv8f32_x = insertelement <vscale x 8 x float> undef, float undef, i32 %x
 ; RV64V-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %nxv16f32_x = insertelement <vscale x 16 x float> undef, float undef, i32 %x
-; RV64V-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %nxv32f32_x = insertelement <vscale x 32 x float> undef, float undef, i32 %x
+; RV64V-NEXT:  Cost Model: Found an estimated cost of 65 for instruction: %nxv32f32_x = insertelement <vscale x 32 x float> undef, float undef, i32 %x
 ; RV64V-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v2f64_x = insertelement <2 x double> undef, double undef, i32 %x
 ; RV64V-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v4f64_x = insertelement <4 x double> undef, double undef, i32 %x
 ; RV64V-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v8f64_x = insertelement <8 x double> undef, double undef, i32 %x
@@ -1138,7 +1138,7 @@ define void @insertelement_fp(i32 %x) {
 ; RV64V-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %nxv2f64_x = insertelement <vscale x 2 x double> undef, double undef, i32 %x
 ; RV64V-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %nxv4f64_x = insertelement <vscale x 4 x double> undef, double undef, i32 %x
 ; RV64V-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %nxv8f64_x = insertelement <vscale x 8 x double> undef, double undef, i32 %x
-; RV64V-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %nxv16f64_x = insertelement <vscale x 16 x double> undef, double undef, i32 %x
+; RV64V-NEXT:  Cost Model: Found an estimated cost of 65 for instruction: %nxv16f64_x = insertelement <vscale x 16 x double> undef, double undef, i32 %x
 ; RV64V-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; RV32ZVE64X-LABEL: 'insertelement_fp'

>From c41b47e7ba668296c9c5ed87a78ba616671b8cdd Mon Sep 17 00:00:00 2001
From: Elvis Wang <elvis.wang at sifive.com>
Date: Tue, 3 Dec 2024 22:54:32 -0800
Subject: [PATCH 4/4] Fixup! Early exit.

---
 .../Target/RISCV/RISCVTargetTransformInfo.cpp | 48 +++++++++----------
 1 file changed, 24 insertions(+), 24 deletions(-)

diff --git a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
index 2bf8c08b03e5a8..9af75f4023b512 100644
--- a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
+++ b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
@@ -1922,30 +1922,6 @@ InstructionCost RISCVTTIImpl::getVectorInstrCost(unsigned Opcode, Type *Val,
       SlideCost = 1; // With a constant index, we do not need to use addi.
   }
 
-  // Extract i64 in the target that has XLEN=32 need more instruction.
-  if (Val->getScalarType()->isIntegerTy() &&
-      ST->getXLen() < Val->getScalarSizeInBits()) {
-    // For extractelement, we need the following instructions:
-    // vsetivli zero, 1, e64, m1, ta, mu (not count)
-    // vslidedown.vx v8, v8, a0
-    // vmv.x.s a0, v8
-    // li a1, 32
-    // vsrl.vx v8, v8, a1
-    // vmv.x.s a1, v8
-
-    // For insertelement, we need the following instructions:
-    // vsetivli zero, 2, e32, m4, ta, mu (not count)
-    // vmv.v.i v12, 0
-    // vslide1up.vx v16, v12, a1
-    // vslide1up.vx v12, v16, a0
-    // addi a0, a2, 1
-    // vsetvli zero, a0, e64, m4, tu, mu (not count)
-    // vslideup.vx v8, v12, a2
-
-    // TODO: should we count these special vsetvlis?
-    BaseCost = Opcode == Instruction::InsertElement ? 3 : 4;
-  }
-
   // When the vector needs to split into multiple register groups and the index
   // exceeds single vector register group, we need to insert/extract the element
   // via stack.
@@ -1973,6 +1949,30 @@ InstructionCost RISCVTTIImpl::getVectorInstrCost(unsigned Opcode, Type *Val,
                            CostKind);
   }
 
+  // Extract i64 in the target that has XLEN=32 need more instruction.
+  if (Val->getScalarType()->isIntegerTy() &&
+      ST->getXLen() < Val->getScalarSizeInBits()) {
+    // For extractelement, we need the following instructions:
+    // vsetivli zero, 1, e64, m1, ta, mu (not count)
+    // vslidedown.vx v8, v8, a0
+    // vmv.x.s a0, v8
+    // li a1, 32
+    // vsrl.vx v8, v8, a1
+    // vmv.x.s a1, v8
+
+    // For insertelement, we need the following instructions:
+    // vsetivli zero, 2, e32, m4, ta, mu (not count)
+    // vmv.v.i v12, 0
+    // vslide1up.vx v16, v12, a1
+    // vslide1up.vx v12, v16, a0
+    // addi a0, a2, 1
+    // vsetvli zero, a0, e64, m4, tu, mu (not count)
+    // vslideup.vx v8, v12, a2
+
+    // TODO: should we count these special vsetvlis?
+    BaseCost = Opcode == Instruction::InsertElement ? 3 : 4;
+  }
+
   return BaseCost + SlideCost;
 }
 



More information about the llvm-commits mailing list