[llvm] 0524534 - [FuncSpec] Enable specialization of literal constants.

Alexandros Lamprineas via llvm-commits llvm-commits at lists.llvm.org
Thu May 25 02:05:26 PDT 2023


Author: Alexandros Lamprineas
Date: 2023-05-25T09:55:46+01:00
New Revision: 0524534d5220da5ecb2cd424a46520184d2be366

URL: https://github.com/llvm/llvm-project/commit/0524534d5220da5ecb2cd424a46520184d2be366
DIFF: https://github.com/llvm/llvm-project/commit/0524534d5220da5ecb2cd424a46520184d2be366.diff

LOG: [FuncSpec] Enable specialization of literal constants.

To do so we have to tweak the cost model such that specialization
does not trigger excessively.

Differential Revision: https://reviews.llvm.org/D150649

Added: 
    llvm/test/Transforms/FunctionSpecialization/max-iters.ll

Modified: 
    llvm/include/llvm/Transforms/IPO/FunctionSpecialization.h
    llvm/lib/Transforms/IPO/FunctionSpecialization.cpp
    llvm/test/Transforms/FunctionSpecialization/compiler-crash-58759.ll
    llvm/test/Transforms/FunctionSpecialization/function-specialization-constant-expression.ll
    llvm/test/Transforms/FunctionSpecialization/function-specialization-minsize3.ll
    llvm/test/Transforms/FunctionSpecialization/function-specialization.ll
    llvm/test/Transforms/FunctionSpecialization/get-possible-constants.ll
    llvm/test/Transforms/FunctionSpecialization/global-rank.ll
    llvm/test/Transforms/FunctionSpecialization/identical-specializations.ll
    llvm/test/Transforms/FunctionSpecialization/literal-const.ll
    llvm/test/Transforms/FunctionSpecialization/noinline.ll
    llvm/test/Transforms/FunctionSpecialization/remove-dead-recursive-function.ll
    llvm/test/Transforms/FunctionSpecialization/specialize-multiple-arguments.ll
    llvm/unittests/Transforms/IPO/FunctionSpecializationTest.cpp

Removed: 
    llvm/test/Transforms/FunctionSpecialization/function-specialization2.ll


################################################################################
diff  --git a/llvm/include/llvm/Transforms/IPO/FunctionSpecialization.h b/llvm/include/llvm/Transforms/IPO/FunctionSpecialization.h
index 349d5a7a08795..e9ddff01f728c 100644
--- a/llvm/include/llvm/Transforms/IPO/FunctionSpecialization.h
+++ b/llvm/include/llvm/Transforms/IPO/FunctionSpecialization.h
@@ -188,6 +188,8 @@ class FunctionSpecializer {
 
   bool run();
 
+  static unsigned getBlockFreqMultiplier();
+
   InstCostVisitor getInstCostVisitorFor(Function *F) {
     auto &BFI = (GetBFI)(*F);
     auto &TTI = (GetTTI)(*F);

diff  --git a/llvm/lib/Transforms/IPO/FunctionSpecialization.cpp b/llvm/lib/Transforms/IPO/FunctionSpecialization.cpp
index a970253d9b1c8..a635d7b4d40aa 100644
--- a/llvm/lib/Transforms/IPO/FunctionSpecialization.cpp
+++ b/llvm/lib/Transforms/IPO/FunctionSpecialization.cpp
@@ -74,6 +74,22 @@ static cl::opt<bool> ForceSpecialization(
     "Force function specialization for every call site with a constant "
     "argument"));
 
+// Set to 2^3 to model three levels of if-else nest.
+static cl::opt<unsigned> BlockFreqMultiplier(
+    "funcspec-block-freq-multiplier", cl::init(8), cl::Hidden, cl::desc(
+    "Multiplier to scale block frequency of user instructions during "
+    "specialization bonus estimation"));
+
+static cl::opt<unsigned> MinEntryFreq(
+    "funcspec-min-entry-freq", cl::init(450), cl::Hidden, cl::desc(
+    "Do not specialize functions with entry block frequency lower than "
+    "this value"));
+
+static cl::opt<unsigned> MinScore(
+    "funcspec-min-score", cl::init(2), cl::Hidden, cl::desc(
+    "Do not specialize functions with score lower than this value "
+    "(the ratio of specialization bonus over specialization cost)"));
+
 static cl::opt<unsigned> MaxClones(
     "funcspec-max-clones", cl::init(3), cl::Hidden, cl::desc(
     "The maximum number of clones allowed for a single function "
@@ -88,15 +104,15 @@ static cl::opt<bool> SpecializeOnAddress(
     "funcspec-on-address", cl::init(false), cl::Hidden, cl::desc(
     "Enable function specialization on the address of global values"));
 
-// Disabled by default as it can significantly increase compilation times.
-//
-// https://llvm-compile-time-tracker.com
-// https://github.com/nikic/llvm-compile-time-tracker
 static cl::opt<bool> SpecializeLiteralConstant(
-    "funcspec-for-literal-constant", cl::init(false), cl::Hidden, cl::desc(
+    "funcspec-for-literal-constant", cl::init(true), cl::Hidden, cl::desc(
     "Enable specialization of functions that take a literal constant as an "
     "argument"));
 
+unsigned FunctionSpecializer::getBlockFreqMultiplier() {
+  return BlockFreqMultiplier;
+}
+
 // Estimates the instruction cost of all the basic blocks in \p WorkList.
 // The successors of such blocks are added to the list as long as they are
 // executable and they have a unique predecessor. \p WorkList represents
@@ -114,7 +130,8 @@ static Cost estimateBasicBlocks(SmallVectorImpl<BasicBlock *> &WorkList,
   while (!WorkList.empty()) {
     BasicBlock *BB = WorkList.pop_back_val();
 
-    uint64_t Weight = BFI.getBlockFreq(BB).getFrequency() /
+    uint64_t Weight = BlockFreqMultiplier *
+                      BFI.getBlockFreq(BB).getFrequency() /
                       BFI.getEntryFreq();
     if (!Weight)
       continue;
@@ -167,7 +184,8 @@ Cost InstCostVisitor::getUserBonus(Instruction *User, Value *Use, Constant *C) {
 
   KnownConstants.insert({User, C});
 
-  uint64_t Weight = BFI.getBlockFreq(User->getParent()).getFrequency() /
+  uint64_t Weight = BlockFreqMultiplier *
+                    BFI.getBlockFreq(User->getParent()).getFrequency() /
                     BFI.getEntryFreq();
   if (!Weight)
     return 0;
@@ -649,6 +667,7 @@ bool FunctionSpecializer::findSpecializations(Function *F, Cost SpecCost,
   if (Args.empty())
     return false;
 
+  bool HasCheckedEntryFreq = false;
   for (User *U : F->users()) {
     if (!isa<CallInst>(U) && !isa<InvokeInst>(U))
       continue;
@@ -684,6 +703,21 @@ bool FunctionSpecializer::findSpecializations(Function *F, Cost SpecCost,
     if (S.Args.empty())
       continue;
 
+    // Check the function entry frequency only once. We sink this code here to
+    // postpone running the Block Frequency Analysis until we know for sure
+    // there are Specialization candidates, otherwise we are adding unnecessary
+    // overhead.
+    if (!HasCheckedEntryFreq) {
+      // Reject cold functions (for some definition of 'cold').
+      uint64_t EntryFreq = (GetBFI)(*F).getEntryFreq();
+      if (!ForceSpecialization && EntryFreq < MinEntryFreq)
+        return false;
+
+      HasCheckedEntryFreq = true;
+      LLVM_DEBUG(dbgs() << "FnSpecialization: Entry block frequency for "
+                        << F->getName() << " = " << EntryFreq << "\n");
+    }
+
     // Check if we have encountered the same specialisation already.
     if (auto It = UniqueSpecs.find(S); It != UniqueSpecs.end()) {
       // Existing specialisation. Add the call to the list to rewrite, unless
@@ -698,13 +732,14 @@ bool FunctionSpecializer::findSpecializations(Function *F, Cost SpecCost,
       AllSpecs[Index].CallSites.push_back(&CS);
     } else {
       // Calculate the specialisation gain.
-      Cost Score = 0 - SpecCost;
+      Cost Score = 0;
       InstCostVisitor Visitor = getInstCostVisitorFor(F);
       for (ArgInfo &A : S.Args)
         Score += getSpecializationBonus(A.Formal, A.Actual, Visitor);
+      Score /= SpecCost;
 
       // Discard unprofitable specialisations.
-      if (!ForceSpecialization && Score <= 0)
+      if (!ForceSpecialization && Score < MinScore)
         continue;
 
       // Create a new specialisation entry.

diff  --git a/llvm/test/Transforms/FunctionSpecialization/compiler-crash-58759.ll b/llvm/test/Transforms/FunctionSpecialization/compiler-crash-58759.ll
index 5cbfaade98d3c..7c390dadef777 100644
--- a/llvm/test/Transforms/FunctionSpecialization/compiler-crash-58759.ll
+++ b/llvm/test/Transforms/FunctionSpecialization/compiler-crash-58759.ll
@@ -1,4 +1,4 @@
-; RUN: opt -S --passes="default<O3>" < %s | FileCheck %s
+; RUN: opt -S --passes="default<O3>" -force-specialization < %s | FileCheck %s
 
 define dso_local i32 @g0(i32 noundef %x) local_unnamed_addr {
 entry:

diff  --git a/llvm/test/Transforms/FunctionSpecialization/function-specialization-constant-expression.ll b/llvm/test/Transforms/FunctionSpecialization/function-specialization-constant-expression.ll
index 003f80fa260ff..ef73ed63b863b 100644
--- a/llvm/test/Transforms/FunctionSpecialization/function-specialization-constant-expression.ll
+++ b/llvm/test/Transforms/FunctionSpecialization/function-specialization-constant-expression.ll
@@ -1,11 +1,9 @@
-; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt -passes="ipsccp<func-spec>" -force-specialization -S < %s | FileCheck %s
 
 ; Test function specialization wouldn't crash due to constant expression.
 ; Note that this test case shows that function specialization pass would
 ; transform the function even if no specialization happened.
 
-; RUN: opt -passes="ipsccp<func-spec>" -force-specialization -S < %s | FileCheck %s
-
 %struct = type { i8, i16, i32, i64, i64}
 @Global = internal constant %struct {i8 0, i16 1, i32 2, i64 3, i64 4}
 
@@ -26,19 +24,6 @@ entry:
 }
 
 define internal i64 @zoo(i1 %flag) {
-; CHECK-LABEL: @zoo(
-; CHECK-NEXT:  entry:
-; CHECK-NEXT:    br i1 [[FLAG:%.*]], label [[PLUS:%.*]], label [[MINUS:%.*]]
-; CHECK:       plus:
-; CHECK-NEXT:    [[TMP0:%.*]] = call i64 @func2.2(ptr getelementptr inbounds ([[STRUCT:%.*]], ptr @Global, i32 0, i32 3))
-; CHECK-NEXT:    br label [[MERGE:%.*]]
-; CHECK:       minus:
-; CHECK-NEXT:    [[TMP1:%.*]] = call i64 @func2.1(ptr getelementptr inbounds ([[STRUCT]], ptr @Global, i32 0, i32 4))
-; CHECK-NEXT:    br label [[MERGE]]
-; CHECK:       merge:
-; CHECK-NEXT:    [[TMP2:%.*]] = phi i64 [ ptrtoint (ptr getelementptr inbounds ([[STRUCT:%.*]], ptr @Global, i32 0, i32 3) to i64), [[PLUS]] ], [ ptrtoint (ptr getelementptr inbounds ([[STRUCT:%.*]], ptr @Global, i32 0, i32 4) to i64), [[MINUS]] ]
-; CHECK-NEXT:    ret i64 [[TMP2]]
-;
 entry:
   br i1 %flag, label %plus, label %minus
 
@@ -60,10 +45,9 @@ merge:
 
 define i64 @main() {
 ; CHECK-LABEL: @main(
-; CHECK-NEXT:    [[TMP1:%.*]] = call i64 @zoo(i1 false)
-; CHECK-NEXT:    [[TMP2:%.*]] = call i64 @zoo(i1 true)
-; CHECK-NEXT:    [[TMP3:%.*]] = add i64 [[TMP1]], [[TMP2]]
-; CHECK-NEXT:    ret i64 [[TMP3]]
+; CHECK-NEXT:    [[TMP1:%.*]] = call i64 @zoo.4(i1 false)
+; CHECK-NEXT:    [[TMP2:%.*]] = call i64 @zoo.3(i1 true)
+; CHECK-NEXT:    ret i64 add (i64 ptrtoint (ptr getelementptr inbounds ([[STRUCT:%.*]], ptr @Global, i32 0, i32 4) to i64), i64 ptrtoint (ptr getelementptr inbounds ([[STRUCT]], ptr @Global, i32 0, i32 3) to i64))
 ;
   %1 = call i64 @zoo(i1 0)
   %2 = call i64 @zoo(i1 1)
@@ -71,3 +55,29 @@ define i64 @main() {
   ret i64 %3
 }
 
+; CHECK-LABEL: @func2.1(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    ret i64 undef
+
+; CHECK-LABEL: @func2.2(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    ret i64 undef
+
+; CHECK-LABEL: @zoo.3(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br label [[PLUS:%.*]]
+; CHECK:       plus:
+; CHECK-NEXT:    [[TMP0:%.*]] = call i64 @func2.2(ptr getelementptr inbounds ([[STRUCT:%.*]], ptr @Global, i32 0, i32 3))
+; CHECK-NEXT:  br label [[MERGE:%.*]]
+; CHECK:       merge:
+; CHECK-NEXT:    ret i64 undef
+
+; CHECK-LABEL: @zoo.4(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br label [[MINUS:%.*]]
+; CHECK:       minus:
+; CHECK-NEXT:    [[TMP1:%.*]] = call i64 @func2.1(ptr getelementptr inbounds ([[STRUCT:%.*]], ptr @Global, i32 0, i32 4))
+; CHECK-NEXT:  br label [[MERGE:%.*]]
+; CHECK:       merge:
+; CHECK-NEXT:    ret i64 undef
+

diff  --git a/llvm/test/Transforms/FunctionSpecialization/function-specialization-minsize3.ll b/llvm/test/Transforms/FunctionSpecialization/function-specialization-minsize3.ll
index 525721f03cfb2..609058764262b 100644
--- a/llvm/test/Transforms/FunctionSpecialization/function-specialization-minsize3.ll
+++ b/llvm/test/Transforms/FunctionSpecialization/function-specialization-minsize3.ll
@@ -1,4 +1,4 @@
-; RUN: opt -passes="ipsccp<func-spec>" -funcspec-min-function-size=3 -S < %s | FileCheck %s
+; RUN: opt -passes="ipsccp<func-spec>" -force-specialization -S < %s | FileCheck %s
 
 ; Checks for callsites that have been annotated with MinSize. We only expect
 ; specialisation for the call that does not have the attribute:

diff  --git a/llvm/test/Transforms/FunctionSpecialization/function-specialization.ll b/llvm/test/Transforms/FunctionSpecialization/function-specialization.ll
index b5d16f6dab1c0..21be617fd5c3b 100644
--- a/llvm/test/Transforms/FunctionSpecialization/function-specialization.ll
+++ b/llvm/test/Transforms/FunctionSpecialization/function-specialization.ll
@@ -1,5 +1,5 @@
-; RUN: opt -passes="ipsccp<func-spec>" -funcspec-min-function-size=3 -S < %s | FileCheck %s
-; RUN: opt -passes="ipsccp<no-func-spec>" -funcspec-min-function-size=3 -S < %s | FileCheck %s --check-prefix=NOFSPEC
+; RUN: opt -passes="ipsccp<func-spec>" -force-specialization -S < %s | FileCheck %s
+; RUN: opt -passes="ipsccp<no-func-spec>" -force-specialization -S < %s | FileCheck %s --check-prefix=NOFSPEC
 
 define i64 @main(i64 %x, i1 %flag) {
 ;

diff  --git a/llvm/test/Transforms/FunctionSpecialization/function-specialization2.ll b/llvm/test/Transforms/FunctionSpecialization/function-specialization2.ll
deleted file mode 100644
index 950ed13f7b9e1..0000000000000
--- a/llvm/test/Transforms/FunctionSpecialization/function-specialization2.ll
+++ /dev/null
@@ -1,88 +0,0 @@
-; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
-; RUN: opt -passes="ipsccp<func-spec>,deadargelim" -force-specialization -S < %s | FileCheck %s
-; RUN: opt -passes="ipsccp<func-spec>,deadargelim" -funcspec-max-iters=1 -force-specialization -S < %s | FileCheck %s
-; RUN: opt -passes="ipsccp<func-spec>,deadargelim" -funcspec-max-iters=0 -force-specialization -S < %s | FileCheck %s --check-prefix=DISABLED
-
-; DISABLED-NOT: @func.1(
-; DISABLED-NOT: @func.2(
-
-define internal i32 @func(ptr %0, i32 %1, ptr nocapture %2) {
-  %4 = alloca i32, align 4
-  store i32 %1, ptr %4, align 4
-  %5 = load i32, ptr %4, align 4
-  %6 = icmp slt i32 %5, 1
-  br i1 %6, label %14, label %7
-
-7:                                                ; preds = %3
-  %8 = load i32, ptr %4, align 4
-  %9 = sext i32 %8 to i64
-  %10 = getelementptr inbounds i32, ptr %0, i64 %9
-  call void %2(ptr %10)
-  %11 = load i32, ptr %4, align 4
-  %12 = add nsw i32 %11, -1
-  %13 = call i32 @func(ptr %0, i32 %12, ptr %2)
-  br label %14
-
-14:                                               ; preds = %3, %7
-  ret i32 0
-}
-
-define internal void @increment(ptr nocapture %0) {
-  %2 = load i32, ptr %0, align 4
-  %3 = add nsw i32 %2, 1
-  store i32 %3, ptr %0, align 4
-  ret void
-}
-
-define internal void @decrement(ptr nocapture %0) {
-  %2 = load i32, ptr %0, align 4
-  %3 = add nsw i32 %2, -1
-  store i32 %3, ptr %0, align 4
-  ret void
-}
-
-define i32 @main(ptr %0, i32 %1) {
-; CHECK:    call void @func.2(ptr [[TMP0:%.*]], i32 [[TMP1:%.*]])
-  %3 = call i32 @func(ptr %0, i32 %1, ptr nonnull @increment)
-; CHECK:    call void @func.1(ptr [[TMP0]], i32 0)
-  %4 = call i32 @func(ptr %0, i32 %3, ptr nonnull @decrement)
-; CHECK:    ret i32 0
-  ret i32 %4
-}
-
-; CHECK: @func.1(
-; CHECK:    [[TMP3:%.*]] = alloca i32, align 4
-; CHECK:    store i32 [[TMP1:%.*]], ptr [[TMP3]], align 4
-; CHECK:    [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4
-; CHECK:    [[TMP5:%.*]] = icmp slt i32 [[TMP4]], 1
-; CHECK:    br i1 [[TMP5]], label [[TMP13:%.*]], label [[TMP6:%.*]]
-; CHECK:       6:
-; CHECK:    [[TMP7:%.*]] = load i32, ptr [[TMP3]], align 4
-; CHECK:    [[TMP8:%.*]] = sext i32 [[TMP7]] to i64
-; CHECK:    [[TMP9:%.*]] = getelementptr inbounds i32, ptr [[TMP0:%.*]], i64 [[TMP8]]
-; CHECK:    call void @decrement(ptr [[TMP9]])
-; CHECK:    [[TMP10:%.*]] = load i32, ptr [[TMP3]], align 4
-; CHECK:    [[TMP11:%.*]] = add nsw i32 [[TMP10]], -1
-; CHECK:    call void @func.1(ptr [[TMP0]], i32 [[TMP11]])
-; CHECK:    br label [[TMP12:%.*]]
-; CHECK:       12:
-; CHECK:    ret void
-;
-;
-; CHECK: @func.2(
-; CHECK:    [[TMP3:%.*]] = alloca i32, align 4
-; CHECK:    store i32 [[TMP1:%.*]], ptr [[TMP3]], align 4
-; CHECK:    [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4
-; CHECK:    [[TMP5:%.*]] = icmp slt i32 [[TMP4]], 1
-; CHECK:    br i1 [[TMP5]], label [[TMP13:%.*]], label [[TMP6:%.*]]
-; CHECK:       6:
-; CHECK:    [[TMP7:%.*]] = load i32, ptr [[TMP3]], align 4
-; CHECK:    [[TMP8:%.*]] = sext i32 [[TMP7]] to i64
-; CHECK:    [[TMP9:%.*]] = getelementptr inbounds i32, ptr [[TMP0:%.*]], i64 [[TMP8]]
-; CHECK:    call void @increment(ptr [[TMP9]])
-; CHECK:    [[TMP10:%.*]] = load i32, ptr [[TMP3]], align 4
-; CHECK:    [[TMP11:%.*]] = add nsw i32 [[TMP10]], -1
-; CHECK:    call void @func.2(ptr [[TMP0]], i32 [[TMP11]])
-; CHECK:    br label [[TMP12:%.*]]
-; CHECK:       12:
-; CHECK:    ret void

diff  --git a/llvm/test/Transforms/FunctionSpecialization/get-possible-constants.ll b/llvm/test/Transforms/FunctionSpecialization/get-possible-constants.ll
index 9b14db5399f3d..84231b1cae6e4 100644
--- a/llvm/test/Transforms/FunctionSpecialization/get-possible-constants.ll
+++ b/llvm/test/Transforms/FunctionSpecialization/get-possible-constants.ll
@@ -1,4 +1,4 @@
-; RUN: opt -S --passes="ipsccp<func-spec>" < %s | FileCheck %s
+; RUN: opt -S --passes="ipsccp<func-spec>" -force-specialization < %s | FileCheck %s
 define dso_local i32 @p0(i32 noundef %x) {
 entry:
   %add = add nsw i32 %x, 1

diff  --git a/llvm/test/Transforms/FunctionSpecialization/global-rank.ll b/llvm/test/Transforms/FunctionSpecialization/global-rank.ll
index 541faa2e19515..d46b73d156894 100644
--- a/llvm/test/Transforms/FunctionSpecialization/global-rank.ll
+++ b/llvm/test/Transforms/FunctionSpecialization/global-rank.ll
@@ -1,4 +1,5 @@
-; RUN: opt -S --passes="ipsccp<func-spec>" -funcspec-max-clones=1 < %s | FileCheck %s
+; RUN: opt -S --passes="ipsccp<func-spec>" -funcspec-max-clones=1 -force-specialization < %s | FileCheck %s
+
 define internal i32 @f(i32 noundef %x, ptr nocapture noundef readonly %p, ptr nocapture noundef readonly %q) noinline {
 entry:
   %call = tail call i32 %p(i32 noundef %x)

diff  --git a/llvm/test/Transforms/FunctionSpecialization/identical-specializations.ll b/llvm/test/Transforms/FunctionSpecialization/identical-specializations.ll
index c2ba0920c2be3..2cfbf9dd7bdaa 100644
--- a/llvm/test/Transforms/FunctionSpecialization/identical-specializations.ll
+++ b/llvm/test/Transforms/FunctionSpecialization/identical-specializations.ll
@@ -6,10 +6,10 @@ define i64 @main(i64 %x, i64 %y, i1 %flag) {
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    br i1 [[FLAG:%.*]], label [[PLUS:%.*]], label [[MINUS:%.*]]
 ; CHECK:       plus:
-; CHECK-NEXT:    [[CMP0:%.*]] = call i64 @compute.2(i64 [[X:%.*]], i64 [[Y:%.*]], ptr @plus, ptr @minus)
+; CHECK-NEXT:    [[CMP0:%.*]] = call i64 @compute.2(i64 [[X:%.*]], i64 42, ptr @plus, ptr @minus)
 ; CHECK-NEXT:    br label [[MERGE:%.*]]
 ; CHECK:       minus:
-; CHECK-NEXT:    [[CMP1:%.*]] = call i64 @compute.3(i64 [[X]], i64 [[Y]], ptr @minus, ptr @plus)
+; CHECK-NEXT:    [[CMP1:%.*]] = call i64 @compute.3(i64 [[X]], i64 [[Y:%.*]], ptr @minus, ptr @plus)
 ; CHECK-NEXT:    br label [[MERGE]]
 ; CHECK:       merge:
 ; CHECK-NEXT:    [[PH:%.*]] = phi i64 [ [[CMP0]], [[PLUS]] ], [ [[CMP1]], [[MINUS]] ]
@@ -20,7 +20,7 @@ entry:
   br i1 %flag, label %plus, label %minus
 
 plus:
-  %cmp0 = call i64 @compute(i64 %x, i64 %y, ptr @plus, ptr @minus)
+  %cmp0 = call i64 @compute(i64 %x, i64 42, ptr @plus, ptr @minus)
   br label %merge
 
 minus:
@@ -68,9 +68,9 @@ entry:
 
 ; CHECK-LABEL: @compute.2
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[CMP0:%.*]] = call i64 @plus(i64 [[X:%.*]], i64 [[Y:%.*]])
-; CHECK-NEXT:    [[CMP1:%.*]] = call i64 @minus(i64 [[X]], i64 [[Y]])
-; CHECK-NEXT:    [[CMP2:%.*]] = call i64 @compute.1(i64 [[X]], i64 [[Y]], ptr @plus, ptr @plus)
+; CHECK-NEXT:    [[CMP0:%.*]] = call i64 @plus(i64 [[X:%.*]], i64 42)
+; CHECK-NEXT:    [[CMP1:%.*]] = call i64 @minus(i64 [[X]], i64 42)
+; CHECK-NEXT:    [[CMP2:%.*]] = call i64 @compute.1(i64 [[X]], i64 42, ptr @plus, ptr @plus)
 
 ; CHECK-LABEL: @compute.3
 ; CHECK-NEXT:  entry:

diff  --git a/llvm/test/Transforms/FunctionSpecialization/literal-const.ll b/llvm/test/Transforms/FunctionSpecialization/literal-const.ll
index fc400202ab91e..479a841567ad7 100644
--- a/llvm/test/Transforms/FunctionSpecialization/literal-const.ll
+++ b/llvm/test/Transforms/FunctionSpecialization/literal-const.ll
@@ -1,7 +1,8 @@
 ; RUN: opt -S --passes="ipsccp<func-spec>" \
+; RUN:        -funcspec-for-literal-constant=0 \
 ; RUN:        -force-specialization < %s | FileCheck %s -check-prefix CHECK-NOLIT
 ; RUN: opt -S --passes="ipsccp<func-spec>" \
-; RUN:        -funcspec-for-literal-constant \
+; RUN:        -funcspec-for-literal-constant=1 \
 ; RUN:        -force-specialization < %s | FileCheck %s -check-prefix CHECK-LIT
 
 define i32 @f0(i32 noundef %x) {

diff  --git a/llvm/test/Transforms/FunctionSpecialization/max-iters.ll b/llvm/test/Transforms/FunctionSpecialization/max-iters.ll
new file mode 100644
index 0000000000000..76d60949f1ade
--- /dev/null
+++ b/llvm/test/Transforms/FunctionSpecialization/max-iters.ll
@@ -0,0 +1,110 @@
+; RUN: opt -passes="ipsccp<func-spec>,deadargelim" -force-specialization -S < %s | FileCheck %s --check-prefixes=COMMON,ITERS1
+; RUN: opt -passes="ipsccp<func-spec>,deadargelim" -funcspec-max-iters=1 -force-specialization -S < %s | FileCheck %s --check-prefixes=COMMON,ITERS1
+; RUN: opt -passes="ipsccp<func-spec>,deadargelim" -funcspec-max-iters=2 -force-specialization -S < %s | FileCheck %s --check-prefixes=COMMON,ITERS2
+; RUN: opt -passes="ipsccp<func-spec>,deadargelim" -funcspec-max-iters=0 -force-specialization -S < %s | FileCheck %s --check-prefix=DISABLED
+
+; DISABLED-NOT: @func.1(
+; DISABLED-NOT: @func.2(
+; DISABLED-NOT: @func.3(
+
+define internal i32 @func(ptr %0, i32 %1, ptr nocapture %2) {
+  %4 = alloca i32, align 4
+  store i32 %1, ptr %4, align 4
+  %5 = load i32, ptr %4, align 4
+  %6 = icmp slt i32 %5, 1
+  br i1 %6, label %14, label %7
+
+7:                                                ; preds = %3
+  %8 = load i32, ptr %4, align 4
+  %9 = sext i32 %8 to i64
+  %10 = getelementptr inbounds i32, ptr %0, i64 %9
+  call void %2(ptr %10)
+  %11 = load i32, ptr %4, align 4
+  %12 = add nsw i32 %11, -1
+  %13 = call i32 @func(ptr %0, i32 %12, ptr %2)
+  br label %14
+
+14:                                               ; preds = %3, %7
+  ret i32 0
+}
+
+define internal void @increment(ptr nocapture %0) {
+  %2 = load i32, ptr %0, align 4
+  %3 = add nsw i32 %2, 1
+  store i32 %3, ptr %0, align 4
+  ret void
+}
+
+define internal void @decrement(ptr nocapture %0) {
+  %2 = load i32, ptr %0, align 4
+  %3 = add nsw i32 %2, -1
+  store i32 %3, ptr %0, align 4
+  ret void
+}
+
+define i32 @main(ptr %0, i32 %1) {
+; COMMON:      define i32 @main(
+; COMMON-NEXT:    call void @func.2(ptr [[TMP0:%.*]], i32 [[TMP1:%.*]])
+; COMMON-NEXT:    call void @func.1(ptr [[TMP0]])
+; COMMON-NEXT:    ret i32 0
+;
+  %3 = call i32 @func(ptr %0, i32 %1, ptr nonnull @increment)
+  %4 = call i32 @func(ptr %0, i32 %3, ptr nonnull @decrement)
+  ret i32 %4
+}
+
+; COMMON:      define internal void @func.1(
+; COMMON-NEXT:    [[TMP2:%.*]] = alloca i32, align 4
+; COMMON-NEXT:    store i32 0, ptr [[TMP2]], align 4
+; COMMON-NEXT:    [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4
+; COMMON-NEXT:    [[TMP4:%.*]] = icmp slt i32 [[TMP3]], 1
+; COMMON-NEXT:    br i1 [[TMP4]], label [[TMP11:%.*]], label [[TMP5:%.*]]
+; COMMON:      5:
+; COMMON-NEXT:    [[TMP6:%.*]] = load i32, ptr [[TMP2]], align 4
+; COMMON-NEXT:    [[TMP7:%.*]] = sext i32 [[TMP6]] to i64
+; COMMON-NEXT:    [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[TMP0:%.*]], i64 [[TMP7]]
+; COMMON-NEXT:    call void @decrement(ptr [[TMP8]])
+; COMMON-NEXT:    [[TMP9:%.*]] = load i32, ptr [[TMP2]], align 4
+; COMMON-NEXT:    [[TMP10:%.*]] = add nsw i32 [[TMP9]], -1
+; ITERS1-NEXT:    call void @func(ptr [[TMP0]], i32 [[TMP10]], ptr @decrement)
+; ITERS2-NEXT:    call void @func.3(ptr [[TMP0]], i32 [[TMP10]])
+; COMMON-NEXT:    br label [[TMP11:%.*]]
+; COMMON:      11:
+; COMMON-NEXT:    ret void
+;
+; COMMON:      define internal void @func.2(
+; COMMON-NEXT:    [[TMP3:%.*]] = alloca i32, align 4
+; COMMON-NEXT:    store i32 [[TMP1:%.*]], ptr [[TMP3]], align 4
+; COMMON-NEXT:    [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4
+; COMMON-NEXT:    [[TMP5:%.*]] = icmp slt i32 [[TMP4]], 1
+; COMMON-NEXT:    br i1 [[TMP5]], label [[TMP13:%.*]], label [[TMP6:%.*]]
+; COMMON:      6:
+; COMMON-NEXT:    [[TMP7:%.*]] = load i32, ptr [[TMP3]], align 4
+; COMMON-NEXT:    [[TMP8:%.*]] = sext i32 [[TMP7]] to i64
+; COMMON-NEXT:    [[TMP9:%.*]] = getelementptr inbounds i32, ptr [[TMP0:%.*]], i64 [[TMP8]]
+; COMMON-NEXT:    call void @increment(ptr [[TMP9]])
+; COMMON-NEXT:    [[TMP10:%.*]] = load i32, ptr [[TMP3]], align 4
+; COMMON-NEXT:    [[TMP11:%.*]] = add nsw i32 [[TMP10]], -1
+; COMMON-NEXT:    call void @func.2(ptr [[TMP0]], i32 [[TMP11]])
+; COMMON-NEXT:    br label [[TMP12:%.*]]
+; COMMON:      12:
+; COMMON-NEXT:    ret void
+;
+; ITERS2:      define internal void @func.3(
+; ITERS2-NEXT:    [[TMP3:%.*]] = alloca i32, align 4
+; ITERS2-NEXT:    store i32 [[TMP1:%.*]], ptr [[TMP3]], align 4
+; ITERS2-NEXT:    [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4
+; ITERS2-NEXT:    [[TMP5:%.*]] = icmp slt i32 [[TMP4]], 1
+; ITERS2-NEXT:    br i1 [[TMP5]], label [[TMP13:%.*]], label [[TMP6:%.*]]
+; ITERS2:      6:
+; ITERS2-NEXT:    [[TMP7:%.*]] = load i32, ptr [[TMP3]], align 4
+; ITERS2-NEXT:    [[TMP8:%.*]] = sext i32 [[TMP7]] to i64
+; ITERS2-NEXT:    [[TMP9:%.*]] = getelementptr inbounds i32, ptr [[TMP0:%.*]], i64 [[TMP8]]
+; ITERS2-NEXT:    call void @decrement(ptr [[TMP9]])
+; ITERS2-NEXT:    [[TMP10:%.*]] = load i32, ptr [[TMP3]], align 4
+; ITERS2-NEXT:    [[TMP11:%.*]] = add nsw i32 [[TMP10]], -1
+; ITERS2-NEXT:    call void @func.3(ptr [[TMP0]], i32 [[TMP11]])
+; ITERS2-NEXT:    br label [[TMP12:%.*]]
+; ITERS2:      12:
+; ITERS2-NEXT:    ret void
+

diff  --git a/llvm/test/Transforms/FunctionSpecialization/noinline.ll b/llvm/test/Transforms/FunctionSpecialization/noinline.ll
index 863e6e74eb23c..bf66cf374c488 100644
--- a/llvm/test/Transforms/FunctionSpecialization/noinline.ll
+++ b/llvm/test/Transforms/FunctionSpecialization/noinline.ll
@@ -1,4 +1,4 @@
-; RUN: opt -S --passes="ipsccp<func-spec>" < %s | FileCheck %s
+; RUN: opt -S --passes="ipsccp<func-spec>" -funcspec-min-entry-freq=1 < %s | FileCheck %s
 define dso_local i32 @p0(i32 noundef %x) {
 entry:
   %add = add nsw i32 %x, 1

diff  --git a/llvm/test/Transforms/FunctionSpecialization/remove-dead-recursive-function.ll b/llvm/test/Transforms/FunctionSpecialization/remove-dead-recursive-function.ll
index 4233998ad9f6d..3db1a8ce69a10 100644
--- a/llvm/test/Transforms/FunctionSpecialization/remove-dead-recursive-function.ll
+++ b/llvm/test/Transforms/FunctionSpecialization/remove-dead-recursive-function.ll
@@ -1,4 +1,4 @@
-; RUN: opt -passes="ipsccp<func-spec>" -funcspec-min-function-size=3 -S < %s | FileCheck %s
+; RUN: opt -passes="ipsccp<func-spec>" -force-specialization -S < %s | FileCheck %s
 
 define i64 @main(i64 %x, i1 %flag) {
 entry:

diff  --git a/llvm/test/Transforms/FunctionSpecialization/specialize-multiple-arguments.ll b/llvm/test/Transforms/FunctionSpecialization/specialize-multiple-arguments.ll
index d1c23e07d5972..73006ae0fcb58 100644
--- a/llvm/test/Transforms/FunctionSpecialization/specialize-multiple-arguments.ll
+++ b/llvm/test/Transforms/FunctionSpecialization/specialize-multiple-arguments.ll
@@ -1,20 +1,12 @@
 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
-; RUN: opt -passes="ipsccp<func-spec>" -funcspec-max-clones=0 -funcspec-min-function-size=14 -S < %s | FileCheck %s --check-prefix=NONE
-; RUN: opt -passes="ipsccp<func-spec>" -funcspec-max-clones=1 -funcspec-min-function-size=14 -S < %s | FileCheck %s --check-prefix=ONE
-; RUN: opt -passes="ipsccp<func-spec>" -funcspec-max-clones=2 -funcspec-min-function-size=14 -S < %s | FileCheck %s --check-prefix=TWO
-; RUN: opt -passes="ipsccp<func-spec>" -funcspec-max-clones=3 -funcspec-min-function-size=14 -S < %s | FileCheck %s --check-prefix=THREE
+; RUN: opt -passes="ipsccp<func-spec>" -funcspec-max-clones=0 -force-specialization -S < %s | FileCheck %s --check-prefix=NONE
+; RUN: opt -passes="ipsccp<func-spec>" -funcspec-max-clones=1 -force-specialization -S < %s | FileCheck %s --check-prefix=ONE
+; RUN: opt -passes="ipsccp<func-spec>" -funcspec-max-clones=2 -force-specialization -S < %s | FileCheck %s --check-prefix=TWO
+; RUN: opt -passes="ipsccp<func-spec>" -funcspec-max-clones=3 -force-specialization -S < %s | FileCheck %s --check-prefix=THREE
 
 ; Make sure that we iterate correctly after sorting the specializations:
-; FnSpecialization: Specializations for function compute
-; FnSpecialization:   Gain = 608
-; FnSpecialization:   FormalArg = binop1, ActualArg = power
-; FnSpecialization:   FormalArg = binop2, ActualArg = mul
-; FnSpecialization:   Gain = 982
-; FnSpecialization:   FormalArg = binop1, ActualArg = plus
-; FnSpecialization:   FormalArg = binop2, ActualArg = minus
-; FnSpecialization:   Gain = 795
-; FnSpecialization:   FormalArg = binop1, ActualArg = minus
-; FnSpecialization:   FormalArg = binop2, ActualArg = power
+;
+; Score(@plus, @minus) > Score(42, @minus, @power) > Score(@power, @mul)
 
 define i64 @main(i64 %x, i64 %y, i1 %flag) {
 ; NONE-LABEL: @main(
@@ -116,11 +108,11 @@ merge:
 ;
 ; THREE-LABEL: define internal i64 @compute.3(i64 %x, i64 %y, ptr %binop1, ptr %binop2) {
 ; THREE-NEXT:  entry:
-; THREE-NEXT:    [[TMP0:%.+]] = call i64 @minus(i64 %x, i64 %y)
-; THREE-NEXT:    [[TMP1:%.+]] = call i64 @power(i64 %x, i64 %y)
+; THREE-NEXT:    [[TMP0:%.+]] = call i64 @minus(i64 %x, i64 42)
+; THREE-NEXT:    [[TMP1:%.+]] = call i64 @power(i64 %x, i64 42)
 ; THREE-NEXT:    [[TMP2:%.+]] = add i64 [[TMP0]], [[TMP1]]
 ; THREE-NEXT:    [[TMP3:%.+]] = sdiv i64 [[TMP2]], %x
-; THREE-NEXT:    [[TMP4:%.+]] = sub i64 [[TMP3]], %y
+; THREE-NEXT:    [[TMP4:%.+]] = sub i64 [[TMP3]], 42
 ; THREE-NEXT:    [[TMP5:%.+]] = mul i64 [[TMP4]], 2
 ; THREE-NEXT:    ret i64 [[TMP5]]
 ; THREE-NEXT:  }

diff  --git a/llvm/unittests/Transforms/IPO/FunctionSpecializationTest.cpp b/llvm/unittests/Transforms/IPO/FunctionSpecializationTest.cpp
index 16c9a505e4498..c6516bbe58051 100644
--- a/llvm/unittests/Transforms/IPO/FunctionSpecializationTest.cpp
+++ b/llvm/unittests/Transforms/IPO/FunctionSpecializationTest.cpp
@@ -85,7 +85,10 @@ class FunctionSpecializationTest : public testing::Test {
     auto &TTI = FAM.getResult<TargetIRAnalysis>(*I.getFunction());
     auto &BFI = FAM.getResult<BlockFrequencyAnalysis>(*I.getFunction());
 
-    return BFI.getBlockFreq(I.getParent()).getFrequency() / BFI.getEntryFreq() *
+    uint64_t Weight = FunctionSpecializer::getBlockFreqMultiplier() *
+                      BFI.getBlockFreq(I.getParent()).getFrequency() /
+                      BFI.getEntryFreq();
+    return Weight *
          TTI.getInstructionCost(&I, TargetTransformInfo::TCK_SizeAndLatency);
   }
 };


        


More information about the llvm-commits mailing list