[llvm] 3d3d8fe - [AArch64] Improve fsh(l|r) cost modeling if 3rd arg is constant.

Florian Hahn via llvm-commits llvm-commits at lists.llvm.org
Thu Apr 20 10:20:15 PDT 2023


Author: Zain Jaffal
Date: 2023-04-20T18:20:01+01:00
New Revision: 3d3d8fef07926f01cc08bba65728808f4fc6e36d

URL: https://github.com/llvm/llvm-project/commit/3d3d8fef07926f01cc08bba65728808f4fc6e36d
DIFF: https://github.com/llvm/llvm-project/commit/3d3d8fef07926f01cc08bba65728808f4fc6e36d.diff

LOG: [AArch64] Improve fsh(l|r) cost modeling if 3rd arg is constant.

In that case, the cost for i32 and i64 should be 1 (a single EXTR
instruction). For v4i32 and v2i64 it should be 3 (USHR + SHL + ORR).

Other sizes smaller than 64 bits require an extra instruction for
conversion to i32/i64.

This recovers a SLP regression revealed by D140392.

Reviewed By: dmgreen

Differential Revision: https://reviews.llvm.org/D147322

Added: 
    

Modified: 
    llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
    llvm/test/Analysis/CostModel/AArch64/fshl.ll
    llvm/test/Analysis/CostModel/AArch64/fshr.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
index 55a0ff15213d..6e3fd7358bb3 100644
--- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
@@ -523,6 +523,52 @@ AArch64TTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
     }
     break;
   }
+  case Intrinsic::fshl:
+  case Intrinsic::fshr: {
+    if (ICA.getArgs().empty())
+      break;
+
+    // TODO: Add handling for fshl where third argument is not a constant.
+    const TTI::OperandValueInfo OpInfoZ = TTI::getOperandInfo(ICA.getArgs()[2]);
+    if (!OpInfoZ.isConstant())
+      break;
+
+    const auto LegalisationCost = getTypeLegalizationCost(RetTy);
+    if (OpInfoZ.isUniform()) {
+      // FIXME: The costs could be lower if the codegen is better.
+      static const CostTblEntry FshlTbl[] = {
+          {Intrinsic::fshl, MVT::v4i32, 3}, // ushr + shl + orr
+          {Intrinsic::fshl, MVT::v2i64, 3}, {Intrinsic::fshl, MVT::v16i8, 4},
+          {Intrinsic::fshl, MVT::v8i16, 4}, {Intrinsic::fshl, MVT::v2i32, 3},
+          {Intrinsic::fshl, MVT::v8i8, 4},  {Intrinsic::fshl, MVT::v4i16, 4}};
+      // Costs for both fshl & fshr are the same, so just pass Intrinsic::fshl
+      // to avoid having to duplicate the costs.
+      const auto *Entry =
+          CostTableLookup(FshlTbl, Intrinsic::fshl, LegalisationCost.second);
+      if (Entry)
+        return LegalisationCost.first * Entry->Cost;
+    }
+
+    auto TyL = getTypeLegalizationCost(RetTy);
+    if (!RetTy->isIntegerTy())
+      break;
+
+    // Estimate cost manually, as types like i8 and i16 will get promoted to
+    // i32 and CostTableLookup will ignore the extra conversion cost.
+    bool HigherCost = (RetTy->getScalarSizeInBits() != 32 &&
+                       RetTy->getScalarSizeInBits() < 64) ||
+                      (RetTy->getScalarSizeInBits() % 64 != 0);
+    unsigned ExtraCost = HigherCost ? 1 : 0;
+    if (RetTy->getScalarSizeInBits() == 32 ||
+        RetTy->getScalarSizeInBits() == 64)
+      ExtraCost = 0; // fhsl/fshr for i32 and i64 can be lowered to a single
+                     // extr instruction.
+    else if (HigherCost)
+      ExtraCost = 1;
+    else
+      break;
+    return TyL.first + ExtraCost;
+  }
   default:
     break;
   }

diff  --git a/llvm/test/Analysis/CostModel/AArch64/fshl.ll b/llvm/test/Analysis/CostModel/AArch64/fshl.ll
index 9d8e0f7bce4d..6d4557cffd85 100644
--- a/llvm/test/Analysis/CostModel/AArch64/fshl.ll
+++ b/llvm/test/Analysis/CostModel/AArch64/fshl.ll
@@ -5,7 +5,7 @@ target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
 
 define i8 @fshl_i8_3rd_arg_const(i8 %a, i8 %b) {
 ; CHECK-LABEL: 'fshl_i8_3rd_arg_const'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %fshl = tail call i8 @llvm.fshl.i8(i8 %a, i8 %b, i8 9)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %fshl = tail call i8 @llvm.fshl.i8(i8 %a, i8 %b, i8 9)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i8 %fshl
 ;
 entry:
@@ -27,7 +27,7 @@ declare i8 @llvm.fshl.i8(i8, i8, i8)
 
 define i16 @fshl_i16(i16 %a, i16 %b) {
 ; CHECK-LABEL: 'fshl_i16'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %fshl = tail call i16 @llvm.fshl.i16(i16 %a, i16 %b, i16 9)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %fshl = tail call i16 @llvm.fshl.i16(i16 %a, i16 %b, i16 9)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i16 %fshl
 ;
 entry:
@@ -39,7 +39,7 @@ declare i16 @llvm.fshl.i16(i16, i16, i16)
 
 define i32 @fshl_i32_3rd_arg_const(i32 %a, i32 %b) {
 ; CHECK-LABEL: 'fshl_i32_3rd_arg_const'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %fshl = tail call i32 @llvm.fshl.i32(i32 %a, i32 %b, i32 9)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %fshl = tail call i32 @llvm.fshl.i32(i32 %a, i32 %b, i32 9)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 %fshl
 ;
 entry:
@@ -61,7 +61,7 @@ declare i32 @llvm.fshl.i32(i32, i32, i32)
 
 define i64 @fshl_i64_3rd_arg_const(i64 %a, i64 %b) {
 ; CHECK-LABEL: 'fshl_i64_3rd_arg_const'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %fshl = tail call i64 @llvm.fshl.i64(i64 %a, i64 %b, i64 9)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %fshl = tail call i64 @llvm.fshl.i64(i64 %a, i64 %b, i64 9)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i64 %fshl
 ;
 entry:
@@ -83,7 +83,7 @@ declare i64 @llvm.fshl.i64(i64, i64, i64)
 
 define i19 @fshl_i19(i19 %a, i19 %b) {
 ; CHECK-LABEL: 'fshl_i19'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %fshl = tail call i19 @llvm.fshl.i19(i19 %a, i19 %b, i19 9)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %fshl = tail call i19 @llvm.fshl.i19(i19 %a, i19 %b, i19 9)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i19 %fshl
 ;
 entry:
@@ -96,7 +96,7 @@ declare i19 @llvm.fshl.i19(i19, i19, i19)
 
 define <16 x i8> @fshl_v16i8_3rd_arg_vec_const_all_lanes_same(<16 x i8> %a, <16 x i8> %b) {
 ; CHECK-LABEL: 'fshl_v16i8_3rd_arg_vec_const_all_lanes_same'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %fshl = tail call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %fshl = tail call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %fshl
 ;
 entry:
@@ -128,7 +128,7 @@ declare <16 x i8> @llvm.fshl.v16i8(<16 x i8>, <16 x i8>, <16 x i8>)
 
 define <8 x i16> @fshl_v8i16_3rd_arg_vec_const_all_lanes_same(<8 x i16> %a, <8 x i16> %b) {
 ; CHECK-LABEL: 'fshl_v8i16_3rd_arg_vec_const_all_lanes_same'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %fshl = tail call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a, <8 x i16> %b, <8 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %fshl = tail call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a, <8 x i16> %b, <8 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i16> %fshl
 ;
 entry:
@@ -160,7 +160,7 @@ declare <8 x i16> @llvm.fshl.v8i16(<8 x i16>, <8 x i16>, <8 x i16>)
 
 define <4 x i32> @fshl_v4i32_3rd_arg_vec_const_all_lanes_same(<4 x i32> %a, <4 x i32> %b) {
 ; CHECK-LABEL: 'fshl_v4i32_3rd_arg_vec_const_all_lanes_same'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %fshl = tail call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 3, i32 3, i32 3, i32 3>)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %fshl = tail call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 3, i32 3, i32 3, i32 3>)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %fshl
 ;
 entry:
@@ -192,7 +192,7 @@ declare <4 x i32> @llvm.fshl.v4i32(<4 x i32>, <4 x i32>, <4 x i32>)
 
 define <2 x i64> @fshl_v2i64_3rd_arg_vec_const_all_lanes_same(<2 x i64> %a, <2 x i64> %b) {
 ; CHECK-LABEL: 'fshl_v2i64_3rd_arg_vec_const_all_lanes_same'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %fshl = tail call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a, <2 x i64> %b, <2 x i64> <i64 1, i64 1>)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %fshl = tail call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a, <2 x i64> %b, <2 x i64> <i64 1, i64 1>)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <2 x i64> %fshl
 ;
 entry:
@@ -247,7 +247,7 @@ declare <2 x i66> @llvm.fshl.v4i66(<2 x i66>, <2 x i66>, <2 x i66>)
 
 define i66 @fshl_i66(i66 %a, i66 %b) {
 ; CHECK-LABEL: 'fshl_i66'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %fshl = tail call i66 @llvm.fshl.i66(i66 %a, i66 %b, i66 9)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %fshl = tail call i66 @llvm.fshl.i66(i66 %a, i66 %b, i66 9)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i66 %fshl
 ;
 entry:

diff  --git a/llvm/test/Analysis/CostModel/AArch64/fshr.ll b/llvm/test/Analysis/CostModel/AArch64/fshr.ll
index fa84f4259f18..ab40c287cb5e 100644
--- a/llvm/test/Analysis/CostModel/AArch64/fshr.ll
+++ b/llvm/test/Analysis/CostModel/AArch64/fshr.ll
@@ -5,7 +5,7 @@ target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
 
 define i8 @fshr_i8_3rd_arg_const(i8 %a, i8 %b) {
 ; CHECK-LABEL: 'fshr_i8_3rd_arg_const'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %fshr = tail call i8 @llvm.fshr.i8(i8 %a, i8 %b, i8 9)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %fshr = tail call i8 @llvm.fshr.i8(i8 %a, i8 %b, i8 9)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i8 %fshr
 ;
 entry:
@@ -27,7 +27,7 @@ declare i8 @llvm.fshr.i8(i8, i8, i8)
 
 define i16 @fshr_i16(i16 %a, i16 %b) {
 ; CHECK-LABEL: 'fshr_i16'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %fshr = tail call i16 @llvm.fshr.i16(i16 %a, i16 %b, i16 9)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %fshr = tail call i16 @llvm.fshr.i16(i16 %a, i16 %b, i16 9)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i16 %fshr
 ;
 entry:
@@ -39,7 +39,7 @@ declare i16 @llvm.fshr.i16(i16, i16, i16)
 
 define i32 @fshr_i32_3rd_arg_const(i32 %a, i32 %b) {
 ; CHECK-LABEL: 'fshr_i32_3rd_arg_const'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %fshr = tail call i32 @llvm.fshr.i32(i32 %a, i32 %b, i32 9)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %fshr = tail call i32 @llvm.fshr.i32(i32 %a, i32 %b, i32 9)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 %fshr
 ;
 entry:
@@ -61,7 +61,7 @@ declare i32 @llvm.fshr.i32(i32, i32, i32)
 
 define i64 @fshr_i64_3rd_arg_const(i64 %a, i64 %b) {
 ; CHECK-LABEL: 'fshr_i64_3rd_arg_const'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %fshr = tail call i64 @llvm.fshr.i64(i64 %a, i64 %b, i64 9)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %fshr = tail call i64 @llvm.fshr.i64(i64 %a, i64 %b, i64 9)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i64 %fshr
 ;
 entry:
@@ -83,7 +83,7 @@ declare i64 @llvm.fshr.i64(i64, i64, i64)
 
 define i19 @fshr_i19(i19 %a, i19 %b) {
 ; CHECK-LABEL: 'fshr_i19'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %fshr = tail call i19 @llvm.fshr.i19(i19 %a, i19 %b, i19 9)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %fshr = tail call i19 @llvm.fshr.i19(i19 %a, i19 %b, i19 9)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i19 %fshr
 ;
 entry:
@@ -96,7 +96,7 @@ declare i19 @llvm.fshr.i19(i19, i19, i19)
 
 define <16 x i8> @fshr_v16i8_3rd_arg_vec_const_all_lanes_same(<16 x i8> %a, <16 x i8> %b) {
 ; CHECK-LABEL: 'fshr_v16i8_3rd_arg_vec_const_all_lanes_same'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %fshr = tail call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %fshr = tail call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %fshr
 ;
 entry:
@@ -128,7 +128,7 @@ declare <16 x i8> @llvm.fshr.v16i8(<16 x i8>, <16 x i8>, <16 x i8>)
 
 define <8 x i16> @fshr_v8i16_3rd_arg_vec_const_all_lanes_same(<8 x i16> %a, <8 x i16> %b) {
 ; CHECK-LABEL: 'fshr_v8i16_3rd_arg_vec_const_all_lanes_same'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %fshr = tail call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a, <8 x i16> %b, <8 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %fshr = tail call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a, <8 x i16> %b, <8 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i16> %fshr
 ;
 entry:
@@ -160,7 +160,7 @@ declare <8 x i16> @llvm.fshr.v8i16(<8 x i16>, <8 x i16>, <8 x i16>)
 
 define <4 x i32> @fshr_v4i32_3rd_arg_vec_const_all_lanes_same(<4 x i32> %a, <4 x i32> %b) {
 ; CHECK-LABEL: 'fshr_v4i32_3rd_arg_vec_const_all_lanes_same'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %fshr = tail call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 3, i32 3, i32 3, i32 3>)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %fshr = tail call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 3, i32 3, i32 3, i32 3>)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %fshr
 ;
 entry:
@@ -192,7 +192,7 @@ declare <4 x i32> @llvm.fshr.v4i32(<4 x i32>, <4 x i32>, <4 x i32>)
 
 define <2 x i64> @fshr_v2i64_3rd_arg_vec_const_all_lanes_same(<2 x i64> %a, <2 x i64> %b) {
 ; CHECK-LABEL: 'fshr_v2i64_3rd_arg_vec_const_all_lanes_same'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %fshr = tail call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a, <2 x i64> %b, <2 x i64> <i64 1, i64 1>)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %fshr = tail call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a, <2 x i64> %b, <2 x i64> <i64 1, i64 1>)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <2 x i64> %fshr
 ;
 entry:
@@ -247,7 +247,7 @@ declare <2 x i66> @llvm.fshr.v4i66(<2 x i66>, <2 x i66>, <2 x i66>)
 
 define i66 @fshr_i66(i66 %a, i66 %b) {
 ; CHECK-LABEL: 'fshr_i66'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %fshr = tail call i66 @llvm.fshr.i66(i66 %a, i66 %b, i66 9)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %fshr = tail call i66 @llvm.fshr.i66(i66 %a, i66 %b, i66 9)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i66 %fshr
 ;
 entry:


        


More information about the llvm-commits mailing list