[llvm] [InlineCost] Correct the default branch cost for the switch statement (PR #85160)

Quentin Dian via llvm-commits llvm-commits at lists.llvm.org
Mon Apr 15 06:28:40 PDT 2024


https://github.com/DianQK updated https://github.com/llvm/llvm-project/pull/85160

>From 58622ef6755a02f97e5127bea29ed5b8812fe25e Mon Sep 17 00:00:00 2001
From: DianQK <dianqk at dianqk.net>
Date: Sun, 17 Mar 2024 16:17:24 +0800
Subject: [PATCH 1/9] [InlineCost] Ignore default branch

---
 llvm/lib/Analysis/InlineCost.cpp | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/llvm/lib/Analysis/InlineCost.cpp b/llvm/lib/Analysis/InlineCost.cpp
index e55eaa55f8e947..8b495207fccc51 100644
--- a/llvm/lib/Analysis/InlineCost.cpp
+++ b/llvm/lib/Analysis/InlineCost.cpp
@@ -701,8 +701,8 @@ class InlineCostCallAnalyzer final : public CallAnalyzer {
 
   void onFinalizeSwitch(unsigned JumpTableSize, unsigned NumCaseCluster,
                         bool DefaultDestUndefined) override {
-    if (!DefaultDestUndefined)
-      addCost(2 * InstrCost);
+    // if (!DefaultDestUndefined)
+    //   addCost(2 * InstrCost);
     // If suitable for a jump table, consider the cost for the table size and
     // branch to destination.
     // Maximum valid cost increased in this function.
@@ -1235,9 +1235,9 @@ class InlineCostFeaturesAnalyzer final : public CallAnalyzer {
 
   void onFinalizeSwitch(unsigned JumpTableSize, unsigned NumCaseCluster,
                         bool DefaultDestUndefined) override {
-    if (!DefaultDestUndefined)
-      increment(InlineCostFeatureIndex::switch_default_dest_penalty,
-                SwitchDefaultDestCostMultiplier * InstrCost);
+    // if (!DefaultDestUndefined)
+    //   increment(InlineCostFeatureIndex::switch_default_dest_penalty,
+    //             SwitchDefaultDestCostMultiplier * InstrCost);
 
     if (JumpTableSize) {
       int64_t JTCost = static_cast<int64_t>(JumpTableSize) * InstrCost +

>From d69f4bc19bc04ed0b3c056938b711ea7d7712ef0 Mon Sep 17 00:00:00 2001
From: DianQK <dianqk at dianqk.net>
Date: Thu, 22 Feb 2024 20:31:44 +0800
Subject: [PATCH 2/9] [InlineCost] Consider the default branch when
 transforming to comparison

---
 llvm/lib/Analysis/InlineCost.cpp | 22 ++++++++++++++--------
 1 file changed, 14 insertions(+), 8 deletions(-)

diff --git a/llvm/lib/Analysis/InlineCost.cpp b/llvm/lib/Analysis/InlineCost.cpp
index 8b495207fccc51..ffffa15f3dd730 100644
--- a/llvm/lib/Analysis/InlineCost.cpp
+++ b/llvm/lib/Analysis/InlineCost.cpp
@@ -701,12 +701,14 @@ class InlineCostCallAnalyzer final : public CallAnalyzer {
 
   void onFinalizeSwitch(unsigned JumpTableSize, unsigned NumCaseCluster,
                         bool DefaultDestUndefined) override {
-    // if (!DefaultDestUndefined)
-    //   addCost(2 * InstrCost);
     // If suitable for a jump table, consider the cost for the table size and
     // branch to destination.
     // Maximum valid cost increased in this function.
     if (JumpTableSize) {
+      // Suppose a default branch includes one compare and one conditional
+      // branch if it's reachable.
+      if (!DefaultDestUndefined)
+        addCost(2 * InstrCost);
       int64_t JTCost =
           static_cast<int64_t>(JumpTableSize) * InstrCost + 4 * InstrCost;
       addCost(JTCost);
@@ -715,10 +717,13 @@ class InlineCostCallAnalyzer final : public CallAnalyzer {
 
     if (NumCaseCluster <= 3) {
       // Suppose a comparison includes one compare and one conditional branch.
-      addCost(NumCaseCluster * 2 * InstrCost);
+      // We can reduce a set of instructions if the default branch is
+      // undefined.
+      addCost((NumCaseCluster - DefaultDestUndefined) * 2 * InstrCost);
       return;
     }
 
+    // FIXME: Consider the case when default branch is undefined.
     int64_t ExpectedNumberOfCompare =
         getExpectedNumberOfCompare(NumCaseCluster);
     int64_t SwitchCost = ExpectedNumberOfCompare * 2 * InstrCost;
@@ -1235,11 +1240,10 @@ class InlineCostFeaturesAnalyzer final : public CallAnalyzer {
 
   void onFinalizeSwitch(unsigned JumpTableSize, unsigned NumCaseCluster,
                         bool DefaultDestUndefined) override {
-    // if (!DefaultDestUndefined)
-    //   increment(InlineCostFeatureIndex::switch_default_dest_penalty,
-    //             SwitchDefaultDestCostMultiplier * InstrCost);
-
     if (JumpTableSize) {
+      if (!DefaultDestUndefined)
+        increment(InlineCostFeatureIndex::switch_default_dest_penalty,
+                  SwitchDefaultDestCostMultiplier * InstrCost);
       int64_t JTCost = static_cast<int64_t>(JumpTableSize) * InstrCost +
                        JTCostMultiplier * InstrCost;
       increment(InlineCostFeatureIndex::jump_table_penalty, JTCost);
@@ -1248,10 +1252,12 @@ class InlineCostFeaturesAnalyzer final : public CallAnalyzer {
 
     if (NumCaseCluster <= 3) {
       increment(InlineCostFeatureIndex::case_cluster_penalty,
-                NumCaseCluster * CaseClusterCostMultiplier * InstrCost);
+                (NumCaseCluster - DefaultDestUndefined) *
+                    CaseClusterCostMultiplier * InstrCost);
       return;
     }
 
+    // FIXME: Consider the case when default branch is undefined.
     int64_t ExpectedNumberOfCompare =
         getExpectedNumberOfCompare(NumCaseCluster);
 

>From 9333f9171aedd73a15600a542b72c88fa8bd00d6 Mon Sep 17 00:00:00 2001
From: DianQK <dianqk at dianqk.net>
Date: Wed, 13 Mar 2024 22:04:22 +0800
Subject: [PATCH 3/9] [InlineCost] The jump table only requires a jump
 instruction

---
 llvm/lib/Analysis/InlineCost.cpp | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/llvm/lib/Analysis/InlineCost.cpp b/llvm/lib/Analysis/InlineCost.cpp
index ffffa15f3dd730..de5bad23bc728b 100644
--- a/llvm/lib/Analysis/InlineCost.cpp
+++ b/llvm/lib/Analysis/InlineCost.cpp
@@ -709,8 +709,9 @@ class InlineCostCallAnalyzer final : public CallAnalyzer {
       // branch if it's reachable.
       if (!DefaultDestUndefined)
         addCost(2 * InstrCost);
+      // The jump table only requires a jump instruction.
       int64_t JTCost =
-          static_cast<int64_t>(JumpTableSize) * InstrCost + 4 * InstrCost;
+          static_cast<int64_t>(JumpTableSize) * InstrCost + InstrCost;
       addCost(JTCost);
       return;
     }
@@ -1157,7 +1158,7 @@ class InlineCostFeaturesAnalyzer final : public CallAnalyzer {
   // FIXME: These constants are taken from the heuristic-based cost visitor.
   // These should be removed entirely in a later revision to avoid reliance on
   // heuristics in the ML inliner.
-  static constexpr int JTCostMultiplier = 4;
+  static constexpr int JTCostMultiplier = 1;
   static constexpr int CaseClusterCostMultiplier = 2;
   static constexpr int SwitchDefaultDestCostMultiplier = 2;
   static constexpr int SwitchCostMultiplier = 2;

>From 9f29ce41a6cfd512b910ced8ba603db1961e24ec Mon Sep 17 00:00:00 2001
From: DianQK <dianqk at dianqk.net>
Date: Sun, 17 Mar 2024 16:49:53 +0800
Subject: [PATCH 4/9] [InlineCost] Reduce a comparison

---
 llvm/lib/Analysis/InlineCost.cpp | 18 ++++++++++++------
 1 file changed, 12 insertions(+), 6 deletions(-)

diff --git a/llvm/lib/Analysis/InlineCost.cpp b/llvm/lib/Analysis/InlineCost.cpp
index de5bad23bc728b..09611cd1c0ce73 100644
--- a/llvm/lib/Analysis/InlineCost.cpp
+++ b/llvm/lib/Analysis/InlineCost.cpp
@@ -536,8 +536,16 @@ class CallAnalyzer : public InstVisitor<CallAnalyzer, bool> {
 // Considering comparisons from leaf and non-leaf nodes, we can estimate the
 // number of comparisons in a simple closed form :
 //   n + n / 2 - 1 = n * 3 / 2 - 1
-int64_t getExpectedNumberOfCompare(int NumCaseCluster) {
-  return 3 * static_cast<int64_t>(NumCaseCluster) / 2 - 1;
+int64_t getExpectedNumberOfCompare(int NumCaseCluster,
+                                   bool DefaultDestUndefined) {
+  int64_t ExpectedNumber = 3 * static_cast<int64_t>(NumCaseCluster) / 2 - 1;
+  // FIXME: The compare instruction count should be less than the branch count
+  // when default branch is undefined. But this will cause some performance
+  // regressions. At least, we can now try to remove a compare instruction.
+  if (DefaultDestUndefined) {
+    ExpectedNumber -= 1;
+  }
+  return ExpectedNumber;
 }
 
 /// FIXME: if it is necessary to derive from InlineCostCallAnalyzer, note
@@ -724,9 +732,8 @@ class InlineCostCallAnalyzer final : public CallAnalyzer {
       return;
     }
 
-    // FIXME: Consider the case when default branch is undefined.
     int64_t ExpectedNumberOfCompare =
-        getExpectedNumberOfCompare(NumCaseCluster);
+        getExpectedNumberOfCompare(NumCaseCluster, DefaultDestUndefined);
     int64_t SwitchCost = ExpectedNumberOfCompare * 2 * InstrCost;
 
     addCost(SwitchCost);
@@ -1258,9 +1265,8 @@ class InlineCostFeaturesAnalyzer final : public CallAnalyzer {
       return;
     }
 
-    // FIXME: Consider the case when default branch is undefined.
     int64_t ExpectedNumberOfCompare =
-        getExpectedNumberOfCompare(NumCaseCluster);
+        getExpectedNumberOfCompare(NumCaseCluster, DefaultDestUndefined);
 
     int64_t SwitchCost =
         ExpectedNumberOfCompare * SwitchCostMultiplier * InstrCost;

>From 9a7e12ca9ff9f666d3b96915a22123f9a2c623d2 Mon Sep 17 00:00:00 2001
From: DianQK <dianqk at dianqk.net>
Date: Sun, 17 Mar 2024 16:51:38 +0800
Subject: [PATCH 5/9] [InlineCost] Update test cases

---
 llvm/test/Transforms/Inline/inline-switch-default-2.ll | 2 +-
 llvm/test/Transforms/Inline/inline-switch-default.ll   | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/llvm/test/Transforms/Inline/inline-switch-default-2.ll b/llvm/test/Transforms/Inline/inline-switch-default-2.ll
index 8d3e24c798df82..1a648300ae3c1e 100644
--- a/llvm/test/Transforms/Inline/inline-switch-default-2.ll
+++ b/llvm/test/Transforms/Inline/inline-switch-default-2.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4
-; RUN: opt %s -S -passes=inline -inline-threshold=21 | FileCheck %s
+; RUN: opt %s -S -passes=inline -inline-threshold=11 | FileCheck %s
 
 ; Check for scenarios without TTI.
 
diff --git a/llvm/test/Transforms/Inline/inline-switch-default.ll b/llvm/test/Transforms/Inline/inline-switch-default.ll
index 44f1304e82dff0..6a50820aad3a7d 100644
--- a/llvm/test/Transforms/Inline/inline-switch-default.ll
+++ b/llvm/test/Transforms/Inline/inline-switch-default.ll
@@ -1,6 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4
-; RUN: opt %s -S -passes=inline -inline-threshold=26 -min-jump-table-entries=4 | FileCheck %s -check-prefix=LOOKUPTABLE
-; RUN: opt %s -S -passes=inline -inline-threshold=21 -min-jump-table-entries=5 | FileCheck %s -check-prefix=SWITCH
+; RUN: opt %s -S -passes=inline -inline-threshold=16 -min-jump-table-entries=4 | FileCheck %s -check-prefix=LOOKUPTABLE
+; RUN: opt %s -S -passes=inline -inline-threshold=11 -min-jump-table-entries=5 | FileCheck %s -check-prefix=SWITCH
 
 target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128"
 target triple = "x86_64-unknown-linux-gnu"

>From dc2c2faa82d3d7b998680267a79895eb4969e6fd Mon Sep 17 00:00:00 2001
From: DianQK <dianqk at dianqk.net>
Date: Sun, 17 Mar 2024 18:05:01 +0800
Subject: [PATCH 6/9] [perf experiment] Update the number of comparisons for
 the default branch

---
 llvm/lib/Analysis/InlineCost.cpp | 10 ++++------
 1 file changed, 4 insertions(+), 6 deletions(-)

diff --git a/llvm/lib/Analysis/InlineCost.cpp b/llvm/lib/Analysis/InlineCost.cpp
index 09611cd1c0ce73..9d29d5765c1915 100644
--- a/llvm/lib/Analysis/InlineCost.cpp
+++ b/llvm/lib/Analysis/InlineCost.cpp
@@ -538,14 +538,12 @@ class CallAnalyzer : public InstVisitor<CallAnalyzer, bool> {
 //   n + n / 2 - 1 = n * 3 / 2 - 1
 int64_t getExpectedNumberOfCompare(int NumCaseCluster,
                                    bool DefaultDestUndefined) {
-  int64_t ExpectedNumber = 3 * static_cast<int64_t>(NumCaseCluster) / 2 - 1;
-  // FIXME: The compare instruction count should be less than the branch count
-  // when default branch is undefined. But this will cause some performance
-  // regressions. At least, we can now try to remove a compare instruction.
+  // The compare instruction count should be less than the branch count
+  // when default branch is undefined.
   if (DefaultDestUndefined) {
-    ExpectedNumber -= 1;
+    return static_cast<int64_t>(NumCaseCluster) - 1;
   }
-  return ExpectedNumber;
+  return 3 * static_cast<int64_t>(NumCaseCluster) / 2 - 1;
 }
 
 /// FIXME: if it is necessary to derive from InlineCostCallAnalyzer, note

>From 01838cc6223ceb3c1e0f675865d30aea32db3f71 Mon Sep 17 00:00:00 2001
From: DianQK <dianqk at dianqk.net>
Date: Tue, 19 Mar 2024 22:23:32 +0800
Subject: [PATCH 7/9] Suppose a jump table requires one load and one jump
 instruction

---
 llvm/lib/Analysis/InlineCost.cpp | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/llvm/lib/Analysis/InlineCost.cpp b/llvm/lib/Analysis/InlineCost.cpp
index 9d29d5765c1915..1ea028c2dafc8f 100644
--- a/llvm/lib/Analysis/InlineCost.cpp
+++ b/llvm/lib/Analysis/InlineCost.cpp
@@ -715,9 +715,9 @@ class InlineCostCallAnalyzer final : public CallAnalyzer {
       // branch if it's reachable.
       if (!DefaultDestUndefined)
         addCost(2 * InstrCost);
-      // The jump table only requires a jump instruction.
+      // Suppose a jump table requires one load and one jump instruction.
       int64_t JTCost =
-          static_cast<int64_t>(JumpTableSize) * InstrCost + InstrCost;
+          static_cast<int64_t>(JumpTableSize) * InstrCost + 2 * InstrCost;
       addCost(JTCost);
       return;
     }
@@ -1163,7 +1163,7 @@ class InlineCostFeaturesAnalyzer final : public CallAnalyzer {
   // FIXME: These constants are taken from the heuristic-based cost visitor.
   // These should be removed entirely in a later revision to avoid reliance on
   // heuristics in the ML inliner.
-  static constexpr int JTCostMultiplier = 1;
+  static constexpr int JTCostMultiplier = 2;
   static constexpr int CaseClusterCostMultiplier = 2;
   static constexpr int SwitchDefaultDestCostMultiplier = 2;
   static constexpr int SwitchCostMultiplier = 2;

>From 8f0ac66ff1ac06de532894564cbe33e6b875ed69 Mon Sep 17 00:00:00 2001
From: DianQK <dianqk at dianqk.net>
Date: Sat, 23 Mar 2024 13:07:29 +0800
Subject: [PATCH 8/9] Revert "[perf experiment] Update the number of
 comparisons for the default branch"

This reverts commit dc2c2faa82d3d7b998680267a79895eb4969e6fd.
---
 llvm/lib/Analysis/InlineCost.cpp | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/llvm/lib/Analysis/InlineCost.cpp b/llvm/lib/Analysis/InlineCost.cpp
index 1ea028c2dafc8f..1472950707bf70 100644
--- a/llvm/lib/Analysis/InlineCost.cpp
+++ b/llvm/lib/Analysis/InlineCost.cpp
@@ -538,12 +538,14 @@ class CallAnalyzer : public InstVisitor<CallAnalyzer, bool> {
 //   n + n / 2 - 1 = n * 3 / 2 - 1
 int64_t getExpectedNumberOfCompare(int NumCaseCluster,
                                    bool DefaultDestUndefined) {
-  // The compare instruction count should be less than the branch count
-  // when default branch is undefined.
+  int64_t ExpectedNumber = 3 * static_cast<int64_t>(NumCaseCluster) / 2 - 1;
+  // FIXME: The compare instruction count should be less than the branch count
+  // when default branch is undefined. But this will cause some performance
+  // regressions. At least, we can now try to remove a compare instruction.
   if (DefaultDestUndefined) {
-    return static_cast<int64_t>(NumCaseCluster) - 1;
+    ExpectedNumber -= 1;
   }
-  return 3 * static_cast<int64_t>(NumCaseCluster) / 2 - 1;
+  return ExpectedNumber;
 }
 
 /// FIXME: if it is necessary to derive from InlineCostCallAnalyzer, note

>From c786e6b94c62340e80397e02d3fb0dfdb14bc0f6 Mon Sep 17 00:00:00 2001
From: DianQK <dianqk at dianqk.net>
Date: Sat, 23 Mar 2024 13:10:12 +0800
Subject: [PATCH 9/9] Update comment

---
 llvm/lib/Analysis/InlineCost.cpp | 16 +++++-----------
 1 file changed, 5 insertions(+), 11 deletions(-)

diff --git a/llvm/lib/Analysis/InlineCost.cpp b/llvm/lib/Analysis/InlineCost.cpp
index 1472950707bf70..ed83177d294b55 100644
--- a/llvm/lib/Analysis/InlineCost.cpp
+++ b/llvm/lib/Analysis/InlineCost.cpp
@@ -536,16 +536,10 @@ class CallAnalyzer : public InstVisitor<CallAnalyzer, bool> {
 // Considering comparisons from leaf and non-leaf nodes, we can estimate the
 // number of comparisons in a simple closed form :
 //   n + n / 2 - 1 = n * 3 / 2 - 1
-int64_t getExpectedNumberOfCompare(int NumCaseCluster,
-                                   bool DefaultDestUndefined) {
-  int64_t ExpectedNumber = 3 * static_cast<int64_t>(NumCaseCluster) / 2 - 1;
+int64_t getExpectedNumberOfCompare(int NumCaseCluster) {
   // FIXME: The compare instruction count should be less than the branch count
-  // when default branch is undefined. But this will cause some performance
-  // regressions. At least, we can now try to remove a compare instruction.
-  if (DefaultDestUndefined) {
-    ExpectedNumber -= 1;
-  }
-  return ExpectedNumber;
+  // when default branch is undefined: https://llvm.godbolt.org/z/x6ETdfY79.
+  return 3 * static_cast<int64_t>(NumCaseCluster) / 2 - 1;
 }
 
 /// FIXME: if it is necessary to derive from InlineCostCallAnalyzer, note
@@ -733,7 +727,7 @@ class InlineCostCallAnalyzer final : public CallAnalyzer {
     }
 
     int64_t ExpectedNumberOfCompare =
-        getExpectedNumberOfCompare(NumCaseCluster, DefaultDestUndefined);
+        getExpectedNumberOfCompare(NumCaseCluster);
     int64_t SwitchCost = ExpectedNumberOfCompare * 2 * InstrCost;
 
     addCost(SwitchCost);
@@ -1266,7 +1260,7 @@ class InlineCostFeaturesAnalyzer final : public CallAnalyzer {
     }
 
     int64_t ExpectedNumberOfCompare =
-        getExpectedNumberOfCompare(NumCaseCluster, DefaultDestUndefined);
+        getExpectedNumberOfCompare(NumCaseCluster);
 
     int64_t SwitchCost =
         ExpectedNumberOfCompare * SwitchCostMultiplier * InstrCost;



More information about the llvm-commits mailing list