[llvm] [AArch64] Improve vector funnel shift by constant costs. (PR #130044)
via llvm-commits
llvm-commits at lists.llvm.org
Thu Mar 6 01:59:11 PST 2025
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-llvm-analysis
Author: David Green (davemgreen)
<details>
<summary>Changes</summary>
We now have better codegen, and can have better costs to match. The generated code should now produce a shl+usra and can be seen in testcases such as: https://github.com/llvm/llvm-project/blob/7e5821bae80db3f3f0fe0d5f8ce62f79e548eed5/llvm/test/CodeGen/AArch64/fsh.ll#L3941.
---
Full diff: https://github.com/llvm/llvm-project/pull/130044.diff
3 Files Affected:
- (modified) llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp (+4-5)
- (modified) llvm/test/Analysis/CostModel/AArch64/fshl.ll (+8-8)
- (modified) llvm/test/Analysis/CostModel/AArch64/fshr.ll (+8-8)
``````````diff
diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
index ba019e1a4ecd5..53c6a029e9287 100644
--- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
@@ -884,12 +884,11 @@ AArch64TTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
const auto LegalisationCost = getTypeLegalizationCost(RetTy);
if (OpInfoZ.isUniform()) {
- // FIXME: The costs could be lower if the codegen is better.
static const CostTblEntry FshlTbl[] = {
- {Intrinsic::fshl, MVT::v4i32, 3}, // ushr + shl + orr
- {Intrinsic::fshl, MVT::v2i64, 3}, {Intrinsic::fshl, MVT::v16i8, 4},
- {Intrinsic::fshl, MVT::v8i16, 4}, {Intrinsic::fshl, MVT::v2i32, 3},
- {Intrinsic::fshl, MVT::v8i8, 4}, {Intrinsic::fshl, MVT::v4i16, 4}};
+ {Intrinsic::fshl, MVT::v4i32, 2}, // shl + usra
+ {Intrinsic::fshl, MVT::v2i64, 2}, {Intrinsic::fshl, MVT::v16i8, 2},
+ {Intrinsic::fshl, MVT::v8i16, 2}, {Intrinsic::fshl, MVT::v2i32, 2},
+ {Intrinsic::fshl, MVT::v8i8, 2}, {Intrinsic::fshl, MVT::v4i16, 2}};
// Costs for both fshl & fshr are the same, so just pass Intrinsic::fshl
// to avoid having to duplicate the costs.
const auto *Entry =
diff --git a/llvm/test/Analysis/CostModel/AArch64/fshl.ll b/llvm/test/Analysis/CostModel/AArch64/fshl.ll
index 8c6466ab470b4..c59b41157c304 100644
--- a/llvm/test/Analysis/CostModel/AArch64/fshl.ll
+++ b/llvm/test/Analysis/CostModel/AArch64/fshl.ll
@@ -129,11 +129,11 @@ declare i19 @llvm.fshl.i19(i19, i19, i19)
define <16 x i8> @fshl_v16i8_3rd_arg_vec_const_all_lanes_same(<16 x i8> %a, <16 x i8> %b) {
; RECIP-LABEL: 'fshl_v16i8_3rd_arg_vec_const_all_lanes_same'
-; RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %fshl = tail call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> splat (i8 3))
+; RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %fshl = tail call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> splat (i8 3))
; RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %fshl
;
; SIZE-LABEL: 'fshl_v16i8_3rd_arg_vec_const_all_lanes_same'
-; SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %fshl = tail call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> splat (i8 3))
+; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %fshl = tail call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> splat (i8 3))
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %fshl
;
entry:
@@ -173,11 +173,11 @@ declare <16 x i8> @llvm.fshl.v16i8(<16 x i8>, <16 x i8>, <16 x i8>)
define <8 x i16> @fshl_v8i16_3rd_arg_vec_const_all_lanes_same(<8 x i16> %a, <8 x i16> %b) {
; RECIP-LABEL: 'fshl_v8i16_3rd_arg_vec_const_all_lanes_same'
-; RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %fshl = tail call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a, <8 x i16> %b, <8 x i16> splat (i16 3))
+; RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %fshl = tail call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a, <8 x i16> %b, <8 x i16> splat (i16 3))
; RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i16> %fshl
;
; SIZE-LABEL: 'fshl_v8i16_3rd_arg_vec_const_all_lanes_same'
-; SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %fshl = tail call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a, <8 x i16> %b, <8 x i16> splat (i16 3))
+; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %fshl = tail call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a, <8 x i16> %b, <8 x i16> splat (i16 3))
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i16> %fshl
;
entry:
@@ -217,11 +217,11 @@ declare <8 x i16> @llvm.fshl.v8i16(<8 x i16>, <8 x i16>, <8 x i16>)
define <4 x i32> @fshl_v4i32_3rd_arg_vec_const_all_lanes_same(<4 x i32> %a, <4 x i32> %b) {
; RECIP-LABEL: 'fshl_v4i32_3rd_arg_vec_const_all_lanes_same'
-; RECIP-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %fshl = tail call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a, <4 x i32> %b, <4 x i32> splat (i32 3))
+; RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %fshl = tail call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a, <4 x i32> %b, <4 x i32> splat (i32 3))
; RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %fshl
;
; SIZE-LABEL: 'fshl_v4i32_3rd_arg_vec_const_all_lanes_same'
-; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %fshl = tail call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a, <4 x i32> %b, <4 x i32> splat (i32 3))
+; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %fshl = tail call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a, <4 x i32> %b, <4 x i32> splat (i32 3))
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %fshl
;
entry:
@@ -261,11 +261,11 @@ declare <4 x i32> @llvm.fshl.v4i32(<4 x i32>, <4 x i32>, <4 x i32>)
define <2 x i64> @fshl_v2i64_3rd_arg_vec_const_all_lanes_same(<2 x i64> %a, <2 x i64> %b) {
; RECIP-LABEL: 'fshl_v2i64_3rd_arg_vec_const_all_lanes_same'
-; RECIP-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %fshl = tail call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a, <2 x i64> %b, <2 x i64> splat (i64 1))
+; RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %fshl = tail call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a, <2 x i64> %b, <2 x i64> splat (i64 1))
; RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <2 x i64> %fshl
;
; SIZE-LABEL: 'fshl_v2i64_3rd_arg_vec_const_all_lanes_same'
-; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %fshl = tail call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a, <2 x i64> %b, <2 x i64> splat (i64 1))
+; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %fshl = tail call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a, <2 x i64> %b, <2 x i64> splat (i64 1))
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i64> %fshl
;
entry:
diff --git a/llvm/test/Analysis/CostModel/AArch64/fshr.ll b/llvm/test/Analysis/CostModel/AArch64/fshr.ll
index 120b1c4c4c4ef..e4cea1ddf77a0 100644
--- a/llvm/test/Analysis/CostModel/AArch64/fshr.ll
+++ b/llvm/test/Analysis/CostModel/AArch64/fshr.ll
@@ -129,11 +129,11 @@ declare i19 @llvm.fshr.i19(i19, i19, i19)
define <16 x i8> @fshr_v16i8_3rd_arg_vec_const_all_lanes_same(<16 x i8> %a, <16 x i8> %b) {
; RECIP-LABEL: 'fshr_v16i8_3rd_arg_vec_const_all_lanes_same'
-; RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %fshr = tail call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> splat (i8 3))
+; RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %fshr = tail call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> splat (i8 3))
; RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %fshr
;
; SIZE-LABEL: 'fshr_v16i8_3rd_arg_vec_const_all_lanes_same'
-; SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %fshr = tail call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> splat (i8 3))
+; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %fshr = tail call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> splat (i8 3))
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %fshr
;
entry:
@@ -173,11 +173,11 @@ declare <16 x i8> @llvm.fshr.v16i8(<16 x i8>, <16 x i8>, <16 x i8>)
define <8 x i16> @fshr_v8i16_3rd_arg_vec_const_all_lanes_same(<8 x i16> %a, <8 x i16> %b) {
; RECIP-LABEL: 'fshr_v8i16_3rd_arg_vec_const_all_lanes_same'
-; RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %fshr = tail call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a, <8 x i16> %b, <8 x i16> splat (i16 3))
+; RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %fshr = tail call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a, <8 x i16> %b, <8 x i16> splat (i16 3))
; RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i16> %fshr
;
; SIZE-LABEL: 'fshr_v8i16_3rd_arg_vec_const_all_lanes_same'
-; SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %fshr = tail call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a, <8 x i16> %b, <8 x i16> splat (i16 3))
+; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %fshr = tail call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a, <8 x i16> %b, <8 x i16> splat (i16 3))
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i16> %fshr
;
entry:
@@ -217,11 +217,11 @@ declare <8 x i16> @llvm.fshr.v8i16(<8 x i16>, <8 x i16>, <8 x i16>)
define <4 x i32> @fshr_v4i32_3rd_arg_vec_const_all_lanes_same(<4 x i32> %a, <4 x i32> %b) {
; RECIP-LABEL: 'fshr_v4i32_3rd_arg_vec_const_all_lanes_same'
-; RECIP-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %fshr = tail call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a, <4 x i32> %b, <4 x i32> splat (i32 3))
+; RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %fshr = tail call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a, <4 x i32> %b, <4 x i32> splat (i32 3))
; RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %fshr
;
; SIZE-LABEL: 'fshr_v4i32_3rd_arg_vec_const_all_lanes_same'
-; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %fshr = tail call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a, <4 x i32> %b, <4 x i32> splat (i32 3))
+; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %fshr = tail call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a, <4 x i32> %b, <4 x i32> splat (i32 3))
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %fshr
;
entry:
@@ -261,11 +261,11 @@ declare <4 x i32> @llvm.fshr.v4i32(<4 x i32>, <4 x i32>, <4 x i32>)
define <2 x i64> @fshr_v2i64_3rd_arg_vec_const_all_lanes_same(<2 x i64> %a, <2 x i64> %b) {
; RECIP-LABEL: 'fshr_v2i64_3rd_arg_vec_const_all_lanes_same'
-; RECIP-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %fshr = tail call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a, <2 x i64> %b, <2 x i64> splat (i64 1))
+; RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %fshr = tail call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a, <2 x i64> %b, <2 x i64> splat (i64 1))
; RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <2 x i64> %fshr
;
; SIZE-LABEL: 'fshr_v2i64_3rd_arg_vec_const_all_lanes_same'
-; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %fshr = tail call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a, <2 x i64> %b, <2 x i64> splat (i64 1))
+; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %fshr = tail call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a, <2 x i64> %b, <2 x i64> splat (i64 1))
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i64> %fshr
;
entry:
``````````
</details>
https://github.com/llvm/llvm-project/pull/130044
More information about the llvm-commits
mailing list