[llvm] [CostModel] Account for power-2 urem in funnel shift costs (PR #127037)
via llvm-commits
llvm-commits at lists.llvm.org
Thu Feb 13 01:55:13 PST 2025
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-llvm-analysis
Author: David Green (davemgreen)
<details>
<summary>Changes</summary>
As can be seen in https://godbolt.org/z/qvMqY79cK, a urem by a power-2 constant will be code-generated as an And of a mask. The cost model for funnel shifts tries to account for that by passing OP_PowerOf2 as the operand info for the second operand. As far as I can tell returning a lower cost for urem with a OP_PowerOf2 is only implemented on X86 though.
This patch short-cuts that by calling getArithmeticInstrCost(And, ..) directly when we know the typesize will be a power-of-2. This is an alternative to the patch in #<!-- -->126912 which is a more general solution for power-2 udiv/urem costs, this more narrowly just fixes funnel shifts.
---
Patch is 20.06 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/127037.diff
5 Files Affected:
- (modified) llvm/include/llvm/CodeGen/BasicTTIImpl.h (+9-7)
- (modified) llvm/test/Analysis/CostModel/AArch64/fshl.ll (+7-7)
- (modified) llvm/test/Analysis/CostModel/AArch64/fshr.ll (+7-7)
- (modified) llvm/test/Analysis/CostModel/AArch64/sve-intrinsics.ll (+16-16)
- (modified) llvm/test/Analysis/CostModel/ARM/intrinsic-cost-kinds.ll (+4-4)
``````````diff
diff --git a/llvm/include/llvm/CodeGen/BasicTTIImpl.h b/llvm/include/llvm/CodeGen/BasicTTIImpl.h
index 339b83637fa8f..8ff276fd18301 100644
--- a/llvm/include/llvm/CodeGen/BasicTTIImpl.h
+++ b/llvm/include/llvm/CodeGen/BasicTTIImpl.h
@@ -1891,10 +1891,6 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> {
const TTI::OperandValueInfo OpInfoX = TTI::getOperandInfo(X);
const TTI::OperandValueInfo OpInfoY = TTI::getOperandInfo(Y);
const TTI::OperandValueInfo OpInfoZ = TTI::getOperandInfo(Z);
- const TTI::OperandValueInfo OpInfoBW =
- {TTI::OK_UniformConstantValue,
- isPowerOf2_32(RetTy->getScalarSizeInBits()) ? TTI::OP_PowerOf2
- : TTI::OP_None};
// fshl: (X << (Z % BW)) | (Y >> (BW - (Z % BW)))
// fshr: (X << (BW - (Z % BW))) | (Y >> (Z % BW))
@@ -1910,9 +1906,15 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> {
BinaryOperator::LShr, RetTy, CostKind, OpInfoY,
{OpInfoZ.Kind, TTI::OP_None});
// Non-constant shift amounts requires a modulo.
- if (!OpInfoZ.isConstant())
- Cost += thisT()->getArithmeticInstrCost(BinaryOperator::URem, RetTy,
- CostKind, OpInfoZ, OpInfoBW);
+ if (!OpInfoZ.isConstant()) {
+ Cost += isPowerOf2_32(RetTy->getScalarSizeInBits())
+ ? thisT()->getArithmeticInstrCost(
+ BinaryOperator::And, RetTy, CostKind, OpInfoZ,
+ {TTI::OK_UniformConstantValue, TTI::OP_None})
+ : thisT()->getArithmeticInstrCost(
+ BinaryOperator::URem, RetTy, CostKind, OpInfoZ,
+ {TTI::OK_UniformConstantValue, TTI::OP_None});
+ }
// For non-rotates (X != Y) we must add shift-by-zero handling costs.
if (X != Y) {
Type *CondTy = RetTy->getWithNewBitWidth(1);
diff --git a/llvm/test/Analysis/CostModel/AArch64/fshl.ll b/llvm/test/Analysis/CostModel/AArch64/fshl.ll
index 632f26dfa5382..317adc96a74b6 100644
--- a/llvm/test/Analysis/CostModel/AArch64/fshl.ll
+++ b/llvm/test/Analysis/CostModel/AArch64/fshl.ll
@@ -15,7 +15,7 @@ entry:
define i8 @fshl_i8_3rd_arg_var(i8 %a, i8 %b, i8 %c) {
; CHECK-LABEL: 'fshl_i8_3rd_arg_var'
-; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %fshl = tail call i8 @llvm.fshl.i8(i8 %a, i8 %b, i8 %c)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %fshl = tail call i8 @llvm.fshl.i8(i8 %a, i8 %b, i8 %c)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i8 %fshl
;
entry:
@@ -49,7 +49,7 @@ entry:
define i32 @fshl_i32_3rd_arg_var(i32 %a, i32 %b, i32 %c) {
; CHECK-LABEL: 'fshl_i32_3rd_arg_var'
-; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %fshl = tail call i32 @llvm.fshl.i32(i32 %a, i32 %b, i32 %c)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %fshl = tail call i32 @llvm.fshl.i32(i32 %a, i32 %b, i32 %c)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 %fshl
;
entry:
@@ -71,7 +71,7 @@ entry:
define i64 @fshl_i64_3rd_arg_var(i64 %a, i64 %b, i64 %c) {
; CHECK-LABEL: 'fshl_i64_3rd_arg_var'
-; CHECK-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %fshl = tail call i64 @llvm.fshl.i64(i64 %a, i64 %b, i64 %c)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %fshl = tail call i64 @llvm.fshl.i64(i64 %a, i64 %b, i64 %c)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i64 %fshl
;
entry:
@@ -116,7 +116,7 @@ entry:
define <16 x i8> @fshl_v16i8_3rd_arg_var(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c) {
; CHECK-LABEL: 'fshl_v16i8_3rd_arg_var'
-; CHECK-NEXT: Cost Model: Found an estimated cost of 118 for instruction: %fshl = tail call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %fshl = tail call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %fshl
;
entry:
@@ -148,7 +148,7 @@ entry:
define <8 x i16> @fshl_v8i16_3rd_arg_var(<8 x i16> %a, <8 x i16> %b, <8 x i16> %c) {
; CHECK-LABEL: 'fshl_v8i16_3rd_arg_var'
-; CHECK-NEXT: Cost Model: Found an estimated cost of 62 for instruction: %fshl = tail call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a, <8 x i16> %b, <8 x i16> %c)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %fshl = tail call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a, <8 x i16> %b, <8 x i16> %c)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i16> %fshl
;
entry:
@@ -180,7 +180,7 @@ entry:
define <4 x i32> @fshl_v4i32_3rd_arg_var(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) {
; CHECK-LABEL: 'fshl_v4i32_3rd_arg_var'
-; CHECK-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %fshl = tail call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %fshl = tail call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %fshl
;
entry:
@@ -212,7 +212,7 @@ entry:
define <2 x i64> @fshl_v2i64_3rd_arg_var(<2 x i64> %a, <2 x i64> %b, <2 x i64> %c) {
; CHECK-LABEL: 'fshl_v2i64_3rd_arg_var'
-; CHECK-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %fshl = tail call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a, <2 x i64> %b, <2 x i64> %c)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %fshl = tail call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a, <2 x i64> %b, <2 x i64> %c)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <2 x i64> %fshl
;
entry:
diff --git a/llvm/test/Analysis/CostModel/AArch64/fshr.ll b/llvm/test/Analysis/CostModel/AArch64/fshr.ll
index a0a579ae96a9b..14f1f996fa174 100644
--- a/llvm/test/Analysis/CostModel/AArch64/fshr.ll
+++ b/llvm/test/Analysis/CostModel/AArch64/fshr.ll
@@ -15,7 +15,7 @@ entry:
define i8 @fshr_i8_3rd_arg_var(i8 %a, i8 %b, i8 %c) {
; CHECK-LABEL: 'fshr_i8_3rd_arg_var'
-; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %fshr = tail call i8 @llvm.fshr.i8(i8 %a, i8 %b, i8 %c)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %fshr = tail call i8 @llvm.fshr.i8(i8 %a, i8 %b, i8 %c)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i8 %fshr
;
entry:
@@ -49,7 +49,7 @@ entry:
define i32 @fshr_i32_3rd_arg_var(i32 %a, i32 %b, i32 %c) {
; CHECK-LABEL: 'fshr_i32_3rd_arg_var'
-; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %fshr = tail call i32 @llvm.fshr.i32(i32 %a, i32 %b, i32 %c)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %fshr = tail call i32 @llvm.fshr.i32(i32 %a, i32 %b, i32 %c)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 %fshr
;
entry:
@@ -71,7 +71,7 @@ entry:
define i64 @fshr_i64_3rd_arg_var(i64 %a, i64 %b, i64 %c) {
; CHECK-LABEL: 'fshr_i64_3rd_arg_var'
-; CHECK-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %fshr = tail call i64 @llvm.fshr.i64(i64 %a, i64 %b, i64 %c)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %fshr = tail call i64 @llvm.fshr.i64(i64 %a, i64 %b, i64 %c)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i64 %fshr
;
entry:
@@ -116,7 +116,7 @@ entry:
define <16 x i8> @fshr_v16i8_3rd_arg_var(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c) {
; CHECK-LABEL: 'fshr_v16i8_3rd_arg_var'
-; CHECK-NEXT: Cost Model: Found an estimated cost of 118 for instruction: %fshr = tail call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %fshr = tail call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %fshr
;
entry:
@@ -148,7 +148,7 @@ entry:
define <8 x i16> @fshr_v8i16_3rd_arg_var(<8 x i16> %a, <8 x i16> %b, <8 x i16> %c) {
; CHECK-LABEL: 'fshr_v8i16_3rd_arg_var'
-; CHECK-NEXT: Cost Model: Found an estimated cost of 62 for instruction: %fshr = tail call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a, <8 x i16> %b, <8 x i16> %c)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %fshr = tail call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a, <8 x i16> %b, <8 x i16> %c)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i16> %fshr
;
entry:
@@ -180,7 +180,7 @@ entry:
define <4 x i32> @fshr_v4i32_3rd_arg_var(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) {
; CHECK-LABEL: 'fshr_v4i32_3rd_arg_var'
-; CHECK-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %fshr = tail call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %fshr = tail call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %fshr
;
entry:
@@ -212,7 +212,7 @@ entry:
define <2 x i64> @fshr_v2i64_3rd_arg_var(<2 x i64> %a, <2 x i64> %b, <2 x i64> %c) {
; CHECK-LABEL: 'fshr_v2i64_3rd_arg_var'
-; CHECK-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %fshr = tail call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a, <2 x i64> %b, <2 x i64> %c)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %fshr = tail call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a, <2 x i64> %b, <2 x i64> %c)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <2 x i64> %fshr
;
entry:
diff --git a/llvm/test/Analysis/CostModel/AArch64/sve-intrinsics.ll b/llvm/test/Analysis/CostModel/AArch64/sve-intrinsics.ll
index 696dec91d93d2..b7ae41292ea00 100644
--- a/llvm/test/Analysis/CostModel/AArch64/sve-intrinsics.ll
+++ b/llvm/test/Analysis/CostModel/AArch64/sve-intrinsics.ll
@@ -1011,17 +1011,17 @@ define void @get_lane_mask() #0 {
define void @fshr() #0 {
; CHECK-VSCALE-1-LABEL: 'fshr'
-; CHECK-VSCALE-1-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %1 = call <vscale x 16 x i8> @llvm.fshr.nxv16i8(<vscale x 16 x i8> undef, <vscale x 16 x i8> undef, <vscale x 16 x i8> undef)
-; CHECK-VSCALE-1-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %2 = call <vscale x 8 x i16> @llvm.fshr.nxv8i16(<vscale x 8 x i16> undef, <vscale x 8 x i16> undef, <vscale x 8 x i16> undef)
-; CHECK-VSCALE-1-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %3 = call <vscale x 4 x i32> @llvm.fshr.nxv4i32(<vscale x 4 x i32> undef, <vscale x 4 x i32> undef, <vscale x 4 x i32> undef)
-; CHECK-VSCALE-1-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %4 = call <vscale x 2 x i64> @llvm.fshr.nxv2i64(<vscale x 2 x i64> undef, <vscale x 2 x i64> undef, <vscale x 2 x i64> undef)
+; CHECK-VSCALE-1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %1 = call <vscale x 16 x i8> @llvm.fshr.nxv16i8(<vscale x 16 x i8> undef, <vscale x 16 x i8> undef, <vscale x 16 x i8> undef)
+; CHECK-VSCALE-1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %2 = call <vscale x 8 x i16> @llvm.fshr.nxv8i16(<vscale x 8 x i16> undef, <vscale x 8 x i16> undef, <vscale x 8 x i16> undef)
+; CHECK-VSCALE-1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %3 = call <vscale x 4 x i32> @llvm.fshr.nxv4i32(<vscale x 4 x i32> undef, <vscale x 4 x i32> undef, <vscale x 4 x i32> undef)
+; CHECK-VSCALE-1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %4 = call <vscale x 2 x i64> @llvm.fshr.nxv2i64(<vscale x 2 x i64> undef, <vscale x 2 x i64> undef, <vscale x 2 x i64> undef)
; CHECK-VSCALE-1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
; CHECK-VSCALE-2-LABEL: 'fshr'
-; CHECK-VSCALE-2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %1 = call <vscale x 16 x i8> @llvm.fshr.nxv16i8(<vscale x 16 x i8> undef, <vscale x 16 x i8> undef, <vscale x 16 x i8> undef)
-; CHECK-VSCALE-2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %2 = call <vscale x 8 x i16> @llvm.fshr.nxv8i16(<vscale x 8 x i16> undef, <vscale x 8 x i16> undef, <vscale x 8 x i16> undef)
-; CHECK-VSCALE-2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %3 = call <vscale x 4 x i32> @llvm.fshr.nxv4i32(<vscale x 4 x i32> undef, <vscale x 4 x i32> undef, <vscale x 4 x i32> undef)
-; CHECK-VSCALE-2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %4 = call <vscale x 2 x i64> @llvm.fshr.nxv2i64(<vscale x 2 x i64> undef, <vscale x 2 x i64> undef, <vscale x 2 x i64> undef)
+; CHECK-VSCALE-2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %1 = call <vscale x 16 x i8> @llvm.fshr.nxv16i8(<vscale x 16 x i8> undef, <vscale x 16 x i8> undef, <vscale x 16 x i8> undef)
+; CHECK-VSCALE-2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %2 = call <vscale x 8 x i16> @llvm.fshr.nxv8i16(<vscale x 8 x i16> undef, <vscale x 8 x i16> undef, <vscale x 8 x i16> undef)
+; CHECK-VSCALE-2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %3 = call <vscale x 4 x i32> @llvm.fshr.nxv4i32(<vscale x 4 x i32> undef, <vscale x 4 x i32> undef, <vscale x 4 x i32> undef)
+; CHECK-VSCALE-2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %4 = call <vscale x 2 x i64> @llvm.fshr.nxv2i64(<vscale x 2 x i64> undef, <vscale x 2 x i64> undef, <vscale x 2 x i64> undef)
; CHECK-VSCALE-2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
; TYPE_BASED_ONLY-LABEL: 'fshr'
@@ -1040,17 +1040,17 @@ define void @fshr() #0 {
define void @fshl() #0 {
; CHECK-VSCALE-1-LABEL: 'fshl'
-; CHECK-VSCALE-1-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %1 = call <vscale x 16 x i8> @llvm.fshl.nxv16i8(<vscale x 16 x i8> undef, <vscale x 16 x i8> undef, <vscale x 16 x i8> undef)
-; CHECK-VSCALE-1-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %2 = call <vscale x 8 x i16> @llvm.fshl.nxv8i16(<vscale x 8 x i16> undef, <vscale x 8 x i16> undef, <vscale x 8 x i16> undef)
-; CHECK-VSCALE-1-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %3 = call <vscale x 4 x i32> @llvm.fshl.nxv4i32(<vscale x 4 x i32> undef, <vscale x 4 x i32> undef, <vscale x 4 x i32> undef)
-; CHECK-VSCALE-1-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %4 = call <vscale x 2 x i64> @llvm.fshl.nxv2i64(<vscale x 2 x i64> undef, <vscale x 2 x i64> undef, <vscale x 2 x i64> undef)
+; CHECK-VSCALE-1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %1 = call <vscale x 16 x i8> @llvm.fshl.nxv16i8(<vscale x 16 x i8> undef, <vscale x 16 x i8> undef, <vscale x 16 x i8> undef)
+; CHECK-VSCALE-1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %2 = call <vscale x 8 x i16> @llvm.fshl.nxv8i16(<vscale x 8 x i16> undef, <vscale x 8 x i16> undef, <vscale x 8 x i16> undef)
+; CHECK-VSCALE-1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %3 = call <vscale x 4 x i32> @llvm.fshl.nxv4i32(<vscale x 4 x i32> undef, <vscale x 4 x i32> undef, <vscale x 4 x i32> undef)
+; CHECK-VSCALE-1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %4 = call <vscale x 2 x i64> @llvm.fshl.nxv2i64(<vscale x 2 x i64> undef, <vscale x 2 x i64> undef, <vscale x 2 x i64> undef)
; CHECK-VSCALE-1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
; CHECK-VSCALE-2-LABEL: 'fshl'
-; CHECK-VSCALE-2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %1 = call <vscale x 16 x i8> @llvm.fshl.nxv16i8(<vscale x 16 x i8> undef, <vscale x 16 x i8> undef, <vscale x 16 x i8> undef)
-; CHECK-VSCALE-2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %2 = call <vscale x 8 x i16> @llvm.fshl.nxv8i16(<vscale x 8 x i16> undef, <vscale x 8 x i16> undef, <vscale x 8 x i16> undef)
-; CHECK-VSCALE-2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %3 = call <vscale x 4 x i32> @llvm.fshl.nxv4i32(<vscale x 4 x i32> undef, <vscale x 4 x i32> undef, <vscale x 4 x i32> undef)
-; CHECK-VSCALE-2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %4 = call <vscale x 2 x i64> @llvm.fshl.nxv2i64(<vscale x 2 x i64> undef, <vscale x 2 x i64> undef, <vscale x 2 x i64> undef)
+; CHECK-VSCALE-2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %1 = call <vscale x 16 x i8> @llvm.fshl.nxv16i8(<vscale x 16 x i8> undef, <vscale x 16 x i8> undef, <vscale x 16 x i8> undef)
+; CHECK-VSCALE-2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %2 = call <vscale x 8 x i16> @llvm.fshl.nxv8i16(<vscale x 8 x i16> undef, <vscale x 8 x i16> undef, <vscale x 8 x i16> undef)
+; CHECK-VSCALE-2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %3 = call <vscale x 4 x i32> @llvm.fshl.nxv4i32(<vscale x 4 x i32> undef, <vscale x 4 x i32> undef, <vscale x 4 x i32> undef)
+; CHECK-VSCALE-2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %4 = call <vscale x 2 x i64> @llvm.fshl.nxv2i64(<vscale x 2 x i64> undef, <vscale x 2 x i64> undef, <vscale x 2 x i64> undef)
; CHECK-VSCALE-2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
; TYPE_BASED_ONLY-LABEL: 'fshl'
diff --git a/llvm/test/Analysis/CostModel/ARM/intrinsic-cost-kinds.ll b/llvm/test/Analysis/CostModel/ARM/intrinsic-cost-kinds.ll
index 2823ab4b4f78e..66240c8255ad7 100644
--- a/llvm/test/Analysis/CostModel/ARM/intrinsic-cost-kinds.ll
+++ b/llvm/test/Analysis/CostModel/ARM/intrinsic-cost-kinds.ll
@@ -231,22 +231,22 @@ define void @ctlz(i32 %a, <16 x i32> %va) {
define void @fshl(i32 %a, i32 %b, i32 %c, <16 x i32> %va, <16 x i32> %vb, <16 x i32> %vc) {
; THRU-LABEL: 'fshl'
; THRU-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %s = call i32 @llvm.fshl.i32(i32 %a, i32 %b, i32 %c)
-; THRU-NEXT: Cost Model: Found an estimated cost of 256 for instruction: %v = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %va, <16 x i32> %vb, <16 x i32> %vc)
+; THRU-NEXT: Cost Model: Found an estimated cost of 120 for instruction: %v = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %va, <16 x i32> %vb, <16 x i32> %vc)
; THRU-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
; LATE-LABEL: 'fshl'
; LATE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %s = call i32 @llvm.fshl.i32(i32 %a, i32 %b, i32 %c)
-; LATE-NEXT: Cost Model: Found an estimated cost of 250 for instruction: %v = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %va, <16 x i32> %vb, <16 x i32> %vc)
+; LATE-NEXT: Cost Model: Found an estimated cost of 114 for instruction: %v = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %va, <16 x i32> %vb, <16 x i32> %vc)
; LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
;
; SIZE-LABEL: 'fshl'
; SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %s = call i32 @llvm.fshl.i32(i32 %a, i32 %b, i32 %c)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 229 for instruction: %v = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %va, <16 x i32> %vb, <16 x i32> %vc)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 89 for instruction: %v = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %va, <16 x i32> %vb, <16 x i32> %vc)
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
;
; SIZE_LATE-LABEL: 'fshl'
; SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %s = call i32 @llvm.fshl.i32(i32 %a, i32 %b, i32 %c)
-; SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 250 for instruction: %v = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %va, <16 x i32> %vb, <16 x i32> %vc)
+; SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 114 for instruction: %v = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %va, <16 x i32> %vb, <16 x i32> %vc)
; SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret ...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/127037
More information about the llvm-commits
mailing list