[llvm] 0524534 - [FuncSpec] Enable specialization of literal constants.
Alexandros Lamprineas via llvm-commits
llvm-commits at lists.llvm.org
Thu May 25 02:05:26 PDT 2023
Author: Alexandros Lamprineas
Date: 2023-05-25T09:55:46+01:00
New Revision: 0524534d5220da5ecb2cd424a46520184d2be366
URL: https://github.com/llvm/llvm-project/commit/0524534d5220da5ecb2cd424a46520184d2be366
DIFF: https://github.com/llvm/llvm-project/commit/0524534d5220da5ecb2cd424a46520184d2be366.diff
LOG: [FuncSpec] Enable specialization of literal constants.
To do so we have to tweak the cost model such that specialization
does not trigger excessively.
Differential Revision: https://reviews.llvm.org/D150649
Added:
llvm/test/Transforms/FunctionSpecialization/max-iters.ll
Modified:
llvm/include/llvm/Transforms/IPO/FunctionSpecialization.h
llvm/lib/Transforms/IPO/FunctionSpecialization.cpp
llvm/test/Transforms/FunctionSpecialization/compiler-crash-58759.ll
llvm/test/Transforms/FunctionSpecialization/function-specialization-constant-expression.ll
llvm/test/Transforms/FunctionSpecialization/function-specialization-minsize3.ll
llvm/test/Transforms/FunctionSpecialization/function-specialization.ll
llvm/test/Transforms/FunctionSpecialization/get-possible-constants.ll
llvm/test/Transforms/FunctionSpecialization/global-rank.ll
llvm/test/Transforms/FunctionSpecialization/identical-specializations.ll
llvm/test/Transforms/FunctionSpecialization/literal-const.ll
llvm/test/Transforms/FunctionSpecialization/noinline.ll
llvm/test/Transforms/FunctionSpecialization/remove-dead-recursive-function.ll
llvm/test/Transforms/FunctionSpecialization/specialize-multiple-arguments.ll
llvm/unittests/Transforms/IPO/FunctionSpecializationTest.cpp
Removed:
llvm/test/Transforms/FunctionSpecialization/function-specialization2.ll
################################################################################
diff --git a/llvm/include/llvm/Transforms/IPO/FunctionSpecialization.h b/llvm/include/llvm/Transforms/IPO/FunctionSpecialization.h
index 349d5a7a08795..e9ddff01f728c 100644
--- a/llvm/include/llvm/Transforms/IPO/FunctionSpecialization.h
+++ b/llvm/include/llvm/Transforms/IPO/FunctionSpecialization.h
@@ -188,6 +188,8 @@ class FunctionSpecializer {
bool run();
+ static unsigned getBlockFreqMultiplier();
+
InstCostVisitor getInstCostVisitorFor(Function *F) {
auto &BFI = (GetBFI)(*F);
auto &TTI = (GetTTI)(*F);
diff --git a/llvm/lib/Transforms/IPO/FunctionSpecialization.cpp b/llvm/lib/Transforms/IPO/FunctionSpecialization.cpp
index a970253d9b1c8..a635d7b4d40aa 100644
--- a/llvm/lib/Transforms/IPO/FunctionSpecialization.cpp
+++ b/llvm/lib/Transforms/IPO/FunctionSpecialization.cpp
@@ -74,6 +74,22 @@ static cl::opt<bool> ForceSpecialization(
"Force function specialization for every call site with a constant "
"argument"));
+// Set to 2^3 to model three levels of if-else nest.
+static cl::opt<unsigned> BlockFreqMultiplier(
+ "funcspec-block-freq-multiplier", cl::init(8), cl::Hidden, cl::desc(
+ "Multiplier to scale block frequency of user instructions during "
+ "specialization bonus estimation"));
+
+static cl::opt<unsigned> MinEntryFreq(
+ "funcspec-min-entry-freq", cl::init(450), cl::Hidden, cl::desc(
+ "Do not specialize functions with entry block frequency lower than "
+ "this value"));
+
+static cl::opt<unsigned> MinScore(
+ "funcspec-min-score", cl::init(2), cl::Hidden, cl::desc(
+ "Do not specialize functions with score lower than this value "
+ "(the ratio of specialization bonus over specialization cost)"));
+
static cl::opt<unsigned> MaxClones(
"funcspec-max-clones", cl::init(3), cl::Hidden, cl::desc(
"The maximum number of clones allowed for a single function "
@@ -88,15 +104,15 @@ static cl::opt<bool> SpecializeOnAddress(
"funcspec-on-address", cl::init(false), cl::Hidden, cl::desc(
"Enable function specialization on the address of global values"));
-// Disabled by default as it can significantly increase compilation times.
-//
-// https://llvm-compile-time-tracker.com
-// https://github.com/nikic/llvm-compile-time-tracker
static cl::opt<bool> SpecializeLiteralConstant(
- "funcspec-for-literal-constant", cl::init(false), cl::Hidden, cl::desc(
+ "funcspec-for-literal-constant", cl::init(true), cl::Hidden, cl::desc(
"Enable specialization of functions that take a literal constant as an "
"argument"));
+unsigned FunctionSpecializer::getBlockFreqMultiplier() {
+ return BlockFreqMultiplier;
+}
+
// Estimates the instruction cost of all the basic blocks in \p WorkList.
// The successors of such blocks are added to the list as long as they are
// executable and they have a unique predecessor. \p WorkList represents
@@ -114,7 +130,8 @@ static Cost estimateBasicBlocks(SmallVectorImpl<BasicBlock *> &WorkList,
while (!WorkList.empty()) {
BasicBlock *BB = WorkList.pop_back_val();
- uint64_t Weight = BFI.getBlockFreq(BB).getFrequency() /
+ uint64_t Weight = BlockFreqMultiplier *
+ BFI.getBlockFreq(BB).getFrequency() /
BFI.getEntryFreq();
if (!Weight)
continue;
@@ -167,7 +184,8 @@ Cost InstCostVisitor::getUserBonus(Instruction *User, Value *Use, Constant *C) {
KnownConstants.insert({User, C});
- uint64_t Weight = BFI.getBlockFreq(User->getParent()).getFrequency() /
+ uint64_t Weight = BlockFreqMultiplier *
+ BFI.getBlockFreq(User->getParent()).getFrequency() /
BFI.getEntryFreq();
if (!Weight)
return 0;
@@ -649,6 +667,7 @@ bool FunctionSpecializer::findSpecializations(Function *F, Cost SpecCost,
if (Args.empty())
return false;
+ bool HasCheckedEntryFreq = false;
for (User *U : F->users()) {
if (!isa<CallInst>(U) && !isa<InvokeInst>(U))
continue;
@@ -684,6 +703,21 @@ bool FunctionSpecializer::findSpecializations(Function *F, Cost SpecCost,
if (S.Args.empty())
continue;
+ // Check the function entry frequency only once. We sink this code here to
+ // postpone running the Block Frequency Analysis until we know for sure
+ // there are Specialization candidates, otherwise we are adding unnecessary
+ // overhead.
+ if (!HasCheckedEntryFreq) {
+ // Reject cold functions (for some definition of 'cold').
+ uint64_t EntryFreq = (GetBFI)(*F).getEntryFreq();
+ if (!ForceSpecialization && EntryFreq < MinEntryFreq)
+ return false;
+
+ HasCheckedEntryFreq = true;
+ LLVM_DEBUG(dbgs() << "FnSpecialization: Entry block frequency for "
+ << F->getName() << " = " << EntryFreq << "\n");
+ }
+
// Check if we have encountered the same specialisation already.
if (auto It = UniqueSpecs.find(S); It != UniqueSpecs.end()) {
// Existing specialisation. Add the call to the list to rewrite, unless
@@ -698,13 +732,14 @@ bool FunctionSpecializer::findSpecializations(Function *F, Cost SpecCost,
AllSpecs[Index].CallSites.push_back(&CS);
} else {
// Calculate the specialisation gain.
- Cost Score = 0 - SpecCost;
+ Cost Score = 0;
InstCostVisitor Visitor = getInstCostVisitorFor(F);
for (ArgInfo &A : S.Args)
Score += getSpecializationBonus(A.Formal, A.Actual, Visitor);
+ Score /= SpecCost;
// Discard unprofitable specialisations.
- if (!ForceSpecialization && Score <= 0)
+ if (!ForceSpecialization && Score < MinScore)
continue;
// Create a new specialisation entry.
diff --git a/llvm/test/Transforms/FunctionSpecialization/compiler-crash-58759.ll b/llvm/test/Transforms/FunctionSpecialization/compiler-crash-58759.ll
index 5cbfaade98d3c..7c390dadef777 100644
--- a/llvm/test/Transforms/FunctionSpecialization/compiler-crash-58759.ll
+++ b/llvm/test/Transforms/FunctionSpecialization/compiler-crash-58759.ll
@@ -1,4 +1,4 @@
-; RUN: opt -S --passes="default<O3>" < %s | FileCheck %s
+; RUN: opt -S --passes="default<O3>" -force-specialization < %s | FileCheck %s
define dso_local i32 @g0(i32 noundef %x) local_unnamed_addr {
entry:
diff --git a/llvm/test/Transforms/FunctionSpecialization/function-specialization-constant-expression.ll b/llvm/test/Transforms/FunctionSpecialization/function-specialization-constant-expression.ll
index 003f80fa260ff..ef73ed63b863b 100644
--- a/llvm/test/Transforms/FunctionSpecialization/function-specialization-constant-expression.ll
+++ b/llvm/test/Transforms/FunctionSpecialization/function-specialization-constant-expression.ll
@@ -1,11 +1,9 @@
-; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt -passes="ipsccp<func-spec>" -force-specialization -S < %s | FileCheck %s
; Test function specialization wouldn't crash due to constant expression.
; Note that this test case shows that function specialization pass would
; transform the function even if no specialization happened.
-; RUN: opt -passes="ipsccp<func-spec>" -force-specialization -S < %s | FileCheck %s
-
%struct = type { i8, i16, i32, i64, i64}
@Global = internal constant %struct {i8 0, i16 1, i32 2, i64 3, i64 4}
@@ -26,19 +24,6 @@ entry:
}
define internal i64 @zoo(i1 %flag) {
-; CHECK-LABEL: @zoo(
-; CHECK-NEXT: entry:
-; CHECK-NEXT: br i1 [[FLAG:%.*]], label [[PLUS:%.*]], label [[MINUS:%.*]]
-; CHECK: plus:
-; CHECK-NEXT: [[TMP0:%.*]] = call i64 @func2.2(ptr getelementptr inbounds ([[STRUCT:%.*]], ptr @Global, i32 0, i32 3))
-; CHECK-NEXT: br label [[MERGE:%.*]]
-; CHECK: minus:
-; CHECK-NEXT: [[TMP1:%.*]] = call i64 @func2.1(ptr getelementptr inbounds ([[STRUCT]], ptr @Global, i32 0, i32 4))
-; CHECK-NEXT: br label [[MERGE]]
-; CHECK: merge:
-; CHECK-NEXT: [[TMP2:%.*]] = phi i64 [ ptrtoint (ptr getelementptr inbounds ([[STRUCT:%.*]], ptr @Global, i32 0, i32 3) to i64), [[PLUS]] ], [ ptrtoint (ptr getelementptr inbounds ([[STRUCT:%.*]], ptr @Global, i32 0, i32 4) to i64), [[MINUS]] ]
-; CHECK-NEXT: ret i64 [[TMP2]]
-;
entry:
br i1 %flag, label %plus, label %minus
@@ -60,10 +45,9 @@ merge:
define i64 @main() {
; CHECK-LABEL: @main(
-; CHECK-NEXT: [[TMP1:%.*]] = call i64 @zoo(i1 false)
-; CHECK-NEXT: [[TMP2:%.*]] = call i64 @zoo(i1 true)
-; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[TMP1]], [[TMP2]]
-; CHECK-NEXT: ret i64 [[TMP3]]
+; CHECK-NEXT: [[TMP1:%.*]] = call i64 @zoo.4(i1 false)
+; CHECK-NEXT: [[TMP2:%.*]] = call i64 @zoo.3(i1 true)
+; CHECK-NEXT: ret i64 add (i64 ptrtoint (ptr getelementptr inbounds ([[STRUCT:%.*]], ptr @Global, i32 0, i32 4) to i64), i64 ptrtoint (ptr getelementptr inbounds ([[STRUCT]], ptr @Global, i32 0, i32 3) to i64))
;
%1 = call i64 @zoo(i1 0)
%2 = call i64 @zoo(i1 1)
@@ -71,3 +55,29 @@ define i64 @main() {
ret i64 %3
}
+; CHECK-LABEL: @func2.1(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: ret i64 undef
+
+; CHECK-LABEL: @func2.2(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: ret i64 undef
+
+; CHECK-LABEL: @zoo.3(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: br label [[PLUS:%.*]]
+; CHECK: plus:
+; CHECK-NEXT: [[TMP0:%.*]] = call i64 @func2.2(ptr getelementptr inbounds ([[STRUCT:%.*]], ptr @Global, i32 0, i32 3))
+; CHECK-NEXT: br label [[MERGE:%.*]]
+; CHECK: merge:
+; CHECK-NEXT: ret i64 undef
+
+; CHECK-LABEL: @zoo.4(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: br label [[MINUS:%.*]]
+; CHECK: minus:
+; CHECK-NEXT: [[TMP1:%.*]] = call i64 @func2.1(ptr getelementptr inbounds ([[STRUCT:%.*]], ptr @Global, i32 0, i32 4))
+; CHECK-NEXT: br label [[MERGE:%.*]]
+; CHECK: merge:
+; CHECK-NEXT: ret i64 undef
+
diff --git a/llvm/test/Transforms/FunctionSpecialization/function-specialization-minsize3.ll b/llvm/test/Transforms/FunctionSpecialization/function-specialization-minsize3.ll
index 525721f03cfb2..609058764262b 100644
--- a/llvm/test/Transforms/FunctionSpecialization/function-specialization-minsize3.ll
+++ b/llvm/test/Transforms/FunctionSpecialization/function-specialization-minsize3.ll
@@ -1,4 +1,4 @@
-; RUN: opt -passes="ipsccp<func-spec>" -funcspec-min-function-size=3 -S < %s | FileCheck %s
+; RUN: opt -passes="ipsccp<func-spec>" -force-specialization -S < %s | FileCheck %s
; Checks for callsites that have been annotated with MinSize. We only expect
; specialisation for the call that does not have the attribute:
diff --git a/llvm/test/Transforms/FunctionSpecialization/function-specialization.ll b/llvm/test/Transforms/FunctionSpecialization/function-specialization.ll
index b5d16f6dab1c0..21be617fd5c3b 100644
--- a/llvm/test/Transforms/FunctionSpecialization/function-specialization.ll
+++ b/llvm/test/Transforms/FunctionSpecialization/function-specialization.ll
@@ -1,5 +1,5 @@
-; RUN: opt -passes="ipsccp<func-spec>" -funcspec-min-function-size=3 -S < %s | FileCheck %s
-; RUN: opt -passes="ipsccp<no-func-spec>" -funcspec-min-function-size=3 -S < %s | FileCheck %s --check-prefix=NOFSPEC
+; RUN: opt -passes="ipsccp<func-spec>" -force-specialization -S < %s | FileCheck %s
+; RUN: opt -passes="ipsccp<no-func-spec>" -force-specialization -S < %s | FileCheck %s --check-prefix=NOFSPEC
define i64 @main(i64 %x, i1 %flag) {
;
diff --git a/llvm/test/Transforms/FunctionSpecialization/function-specialization2.ll b/llvm/test/Transforms/FunctionSpecialization/function-specialization2.ll
deleted file mode 100644
index 950ed13f7b9e1..0000000000000
--- a/llvm/test/Transforms/FunctionSpecialization/function-specialization2.ll
+++ /dev/null
@@ -1,88 +0,0 @@
-; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
-; RUN: opt -passes="ipsccp<func-spec>,deadargelim" -force-specialization -S < %s | FileCheck %s
-; RUN: opt -passes="ipsccp<func-spec>,deadargelim" -funcspec-max-iters=1 -force-specialization -S < %s | FileCheck %s
-; RUN: opt -passes="ipsccp<func-spec>,deadargelim" -funcspec-max-iters=0 -force-specialization -S < %s | FileCheck %s --check-prefix=DISABLED
-
-; DISABLED-NOT: @func.1(
-; DISABLED-NOT: @func.2(
-
-define internal i32 @func(ptr %0, i32 %1, ptr nocapture %2) {
- %4 = alloca i32, align 4
- store i32 %1, ptr %4, align 4
- %5 = load i32, ptr %4, align 4
- %6 = icmp slt i32 %5, 1
- br i1 %6, label %14, label %7
-
-7: ; preds = %3
- %8 = load i32, ptr %4, align 4
- %9 = sext i32 %8 to i64
- %10 = getelementptr inbounds i32, ptr %0, i64 %9
- call void %2(ptr %10)
- %11 = load i32, ptr %4, align 4
- %12 = add nsw i32 %11, -1
- %13 = call i32 @func(ptr %0, i32 %12, ptr %2)
- br label %14
-
-14: ; preds = %3, %7
- ret i32 0
-}
-
-define internal void @increment(ptr nocapture %0) {
- %2 = load i32, ptr %0, align 4
- %3 = add nsw i32 %2, 1
- store i32 %3, ptr %0, align 4
- ret void
-}
-
-define internal void @decrement(ptr nocapture %0) {
- %2 = load i32, ptr %0, align 4
- %3 = add nsw i32 %2, -1
- store i32 %3, ptr %0, align 4
- ret void
-}
-
-define i32 @main(ptr %0, i32 %1) {
-; CHECK: call void @func.2(ptr [[TMP0:%.*]], i32 [[TMP1:%.*]])
- %3 = call i32 @func(ptr %0, i32 %1, ptr nonnull @increment)
-; CHECK: call void @func.1(ptr [[TMP0]], i32 0)
- %4 = call i32 @func(ptr %0, i32 %3, ptr nonnull @decrement)
-; CHECK: ret i32 0
- ret i32 %4
-}
-
-; CHECK: @func.1(
-; CHECK: [[TMP3:%.*]] = alloca i32, align 4
-; CHECK: store i32 [[TMP1:%.*]], ptr [[TMP3]], align 4
-; CHECK: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4
-; CHECK: [[TMP5:%.*]] = icmp slt i32 [[TMP4]], 1
-; CHECK: br i1 [[TMP5]], label [[TMP13:%.*]], label [[TMP6:%.*]]
-; CHECK: 6:
-; CHECK: [[TMP7:%.*]] = load i32, ptr [[TMP3]], align 4
-; CHECK: [[TMP8:%.*]] = sext i32 [[TMP7]] to i64
-; CHECK: [[TMP9:%.*]] = getelementptr inbounds i32, ptr [[TMP0:%.*]], i64 [[TMP8]]
-; CHECK: call void @decrement(ptr [[TMP9]])
-; CHECK: [[TMP10:%.*]] = load i32, ptr [[TMP3]], align 4
-; CHECK: [[TMP11:%.*]] = add nsw i32 [[TMP10]], -1
-; CHECK: call void @func.1(ptr [[TMP0]], i32 [[TMP11]])
-; CHECK: br label [[TMP12:%.*]]
-; CHECK: 12:
-; CHECK: ret void
-;
-;
-; CHECK: @func.2(
-; CHECK: [[TMP3:%.*]] = alloca i32, align 4
-; CHECK: store i32 [[TMP1:%.*]], ptr [[TMP3]], align 4
-; CHECK: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4
-; CHECK: [[TMP5:%.*]] = icmp slt i32 [[TMP4]], 1
-; CHECK: br i1 [[TMP5]], label [[TMP13:%.*]], label [[TMP6:%.*]]
-; CHECK: 6:
-; CHECK: [[TMP7:%.*]] = load i32, ptr [[TMP3]], align 4
-; CHECK: [[TMP8:%.*]] = sext i32 [[TMP7]] to i64
-; CHECK: [[TMP9:%.*]] = getelementptr inbounds i32, ptr [[TMP0:%.*]], i64 [[TMP8]]
-; CHECK: call void @increment(ptr [[TMP9]])
-; CHECK: [[TMP10:%.*]] = load i32, ptr [[TMP3]], align 4
-; CHECK: [[TMP11:%.*]] = add nsw i32 [[TMP10]], -1
-; CHECK: call void @func.2(ptr [[TMP0]], i32 [[TMP11]])
-; CHECK: br label [[TMP12:%.*]]
-; CHECK: 12:
-; CHECK: ret void
diff --git a/llvm/test/Transforms/FunctionSpecialization/get-possible-constants.ll b/llvm/test/Transforms/FunctionSpecialization/get-possible-constants.ll
index 9b14db5399f3d..84231b1cae6e4 100644
--- a/llvm/test/Transforms/FunctionSpecialization/get-possible-constants.ll
+++ b/llvm/test/Transforms/FunctionSpecialization/get-possible-constants.ll
@@ -1,4 +1,4 @@
-; RUN: opt -S --passes="ipsccp<func-spec>" < %s | FileCheck %s
+; RUN: opt -S --passes="ipsccp<func-spec>" -force-specialization < %s | FileCheck %s
define dso_local i32 @p0(i32 noundef %x) {
entry:
%add = add nsw i32 %x, 1
diff --git a/llvm/test/Transforms/FunctionSpecialization/global-rank.ll b/llvm/test/Transforms/FunctionSpecialization/global-rank.ll
index 541faa2e19515..d46b73d156894 100644
--- a/llvm/test/Transforms/FunctionSpecialization/global-rank.ll
+++ b/llvm/test/Transforms/FunctionSpecialization/global-rank.ll
@@ -1,4 +1,5 @@
-; RUN: opt -S --passes="ipsccp<func-spec>" -funcspec-max-clones=1 < %s | FileCheck %s
+; RUN: opt -S --passes="ipsccp<func-spec>" -funcspec-max-clones=1 -force-specialization < %s | FileCheck %s
+
define internal i32 @f(i32 noundef %x, ptr nocapture noundef readonly %p, ptr nocapture noundef readonly %q) noinline {
entry:
%call = tail call i32 %p(i32 noundef %x)
diff --git a/llvm/test/Transforms/FunctionSpecialization/identical-specializations.ll b/llvm/test/Transforms/FunctionSpecialization/identical-specializations.ll
index c2ba0920c2be3..2cfbf9dd7bdaa 100644
--- a/llvm/test/Transforms/FunctionSpecialization/identical-specializations.ll
+++ b/llvm/test/Transforms/FunctionSpecialization/identical-specializations.ll
@@ -6,10 +6,10 @@ define i64 @main(i64 %x, i64 %y, i1 %flag) {
; CHECK-NEXT: entry:
; CHECK-NEXT: br i1 [[FLAG:%.*]], label [[PLUS:%.*]], label [[MINUS:%.*]]
; CHECK: plus:
-; CHECK-NEXT: [[CMP0:%.*]] = call i64 @compute.2(i64 [[X:%.*]], i64 [[Y:%.*]], ptr @plus, ptr @minus)
+; CHECK-NEXT: [[CMP0:%.*]] = call i64 @compute.2(i64 [[X:%.*]], i64 42, ptr @plus, ptr @minus)
; CHECK-NEXT: br label [[MERGE:%.*]]
; CHECK: minus:
-; CHECK-NEXT: [[CMP1:%.*]] = call i64 @compute.3(i64 [[X]], i64 [[Y]], ptr @minus, ptr @plus)
+; CHECK-NEXT: [[CMP1:%.*]] = call i64 @compute.3(i64 [[X]], i64 [[Y:%.*]], ptr @minus, ptr @plus)
; CHECK-NEXT: br label [[MERGE]]
; CHECK: merge:
; CHECK-NEXT: [[PH:%.*]] = phi i64 [ [[CMP0]], [[PLUS]] ], [ [[CMP1]], [[MINUS]] ]
@@ -20,7 +20,7 @@ entry:
br i1 %flag, label %plus, label %minus
plus:
- %cmp0 = call i64 @compute(i64 %x, i64 %y, ptr @plus, ptr @minus)
+ %cmp0 = call i64 @compute(i64 %x, i64 42, ptr @plus, ptr @minus)
br label %merge
minus:
@@ -68,9 +68,9 @@ entry:
; CHECK-LABEL: @compute.2
; CHECK-NEXT: entry:
-; CHECK-NEXT: [[CMP0:%.*]] = call i64 @plus(i64 [[X:%.*]], i64 [[Y:%.*]])
-; CHECK-NEXT: [[CMP1:%.*]] = call i64 @minus(i64 [[X]], i64 [[Y]])
-; CHECK-NEXT: [[CMP2:%.*]] = call i64 @compute.1(i64 [[X]], i64 [[Y]], ptr @plus, ptr @plus)
+; CHECK-NEXT: [[CMP0:%.*]] = call i64 @plus(i64 [[X:%.*]], i64 42)
+; CHECK-NEXT: [[CMP1:%.*]] = call i64 @minus(i64 [[X]], i64 42)
+; CHECK-NEXT: [[CMP2:%.*]] = call i64 @compute.1(i64 [[X]], i64 42, ptr @plus, ptr @plus)
; CHECK-LABEL: @compute.3
; CHECK-NEXT: entry:
diff --git a/llvm/test/Transforms/FunctionSpecialization/literal-const.ll b/llvm/test/Transforms/FunctionSpecialization/literal-const.ll
index fc400202ab91e..479a841567ad7 100644
--- a/llvm/test/Transforms/FunctionSpecialization/literal-const.ll
+++ b/llvm/test/Transforms/FunctionSpecialization/literal-const.ll
@@ -1,7 +1,8 @@
; RUN: opt -S --passes="ipsccp<func-spec>" \
+; RUN: -funcspec-for-literal-constant=0 \
; RUN: -force-specialization < %s | FileCheck %s -check-prefix CHECK-NOLIT
; RUN: opt -S --passes="ipsccp<func-spec>" \
-; RUN: -funcspec-for-literal-constant \
+; RUN: -funcspec-for-literal-constant=1 \
; RUN: -force-specialization < %s | FileCheck %s -check-prefix CHECK-LIT
define i32 @f0(i32 noundef %x) {
diff --git a/llvm/test/Transforms/FunctionSpecialization/max-iters.ll b/llvm/test/Transforms/FunctionSpecialization/max-iters.ll
new file mode 100644
index 0000000000000..76d60949f1ade
--- /dev/null
+++ b/llvm/test/Transforms/FunctionSpecialization/max-iters.ll
@@ -0,0 +1,110 @@
+; RUN: opt -passes="ipsccp<func-spec>,deadargelim" -force-specialization -S < %s | FileCheck %s --check-prefixes=COMMON,ITERS1
+; RUN: opt -passes="ipsccp<func-spec>,deadargelim" -funcspec-max-iters=1 -force-specialization -S < %s | FileCheck %s --check-prefixes=COMMON,ITERS1
+; RUN: opt -passes="ipsccp<func-spec>,deadargelim" -funcspec-max-iters=2 -force-specialization -S < %s | FileCheck %s --check-prefixes=COMMON,ITERS2
+; RUN: opt -passes="ipsccp<func-spec>,deadargelim" -funcspec-max-iters=0 -force-specialization -S < %s | FileCheck %s --check-prefix=DISABLED
+
+; DISABLED-NOT: @func.1(
+; DISABLED-NOT: @func.2(
+; DISABLED-NOT: @func.3(
+
+define internal i32 @func(ptr %0, i32 %1, ptr nocapture %2) {
+ %4 = alloca i32, align 4
+ store i32 %1, ptr %4, align 4
+ %5 = load i32, ptr %4, align 4
+ %6 = icmp slt i32 %5, 1
+ br i1 %6, label %14, label %7
+
+7: ; preds = %3
+ %8 = load i32, ptr %4, align 4
+ %9 = sext i32 %8 to i64
+ %10 = getelementptr inbounds i32, ptr %0, i64 %9
+ call void %2(ptr %10)
+ %11 = load i32, ptr %4, align 4
+ %12 = add nsw i32 %11, -1
+ %13 = call i32 @func(ptr %0, i32 %12, ptr %2)
+ br label %14
+
+14: ; preds = %3, %7
+ ret i32 0
+}
+
+define internal void @increment(ptr nocapture %0) {
+ %2 = load i32, ptr %0, align 4
+ %3 = add nsw i32 %2, 1
+ store i32 %3, ptr %0, align 4
+ ret void
+}
+
+define internal void @decrement(ptr nocapture %0) {
+ %2 = load i32, ptr %0, align 4
+ %3 = add nsw i32 %2, -1
+ store i32 %3, ptr %0, align 4
+ ret void
+}
+
+define i32 @main(ptr %0, i32 %1) {
+; COMMON: define i32 @main(
+; COMMON-NEXT: call void @func.2(ptr [[TMP0:%.*]], i32 [[TMP1:%.*]])
+; COMMON-NEXT: call void @func.1(ptr [[TMP0]])
+; COMMON-NEXT: ret i32 0
+;
+ %3 = call i32 @func(ptr %0, i32 %1, ptr nonnull @increment)
+ %4 = call i32 @func(ptr %0, i32 %3, ptr nonnull @decrement)
+ ret i32 %4
+}
+
+; COMMON: define internal void @func.1(
+; COMMON-NEXT: [[TMP2:%.*]] = alloca i32, align 4
+; COMMON-NEXT: store i32 0, ptr [[TMP2]], align 4
+; COMMON-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4
+; COMMON-NEXT: [[TMP4:%.*]] = icmp slt i32 [[TMP3]], 1
+; COMMON-NEXT: br i1 [[TMP4]], label [[TMP11:%.*]], label [[TMP5:%.*]]
+; COMMON: 5:
+; COMMON-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP2]], align 4
+; COMMON-NEXT: [[TMP7:%.*]] = sext i32 [[TMP6]] to i64
+; COMMON-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[TMP0:%.*]], i64 [[TMP7]]
+; COMMON-NEXT: call void @decrement(ptr [[TMP8]])
+; COMMON-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP2]], align 4
+; COMMON-NEXT: [[TMP10:%.*]] = add nsw i32 [[TMP9]], -1
+; ITERS1-NEXT: call void @func(ptr [[TMP0]], i32 [[TMP10]], ptr @decrement)
+; ITERS2-NEXT: call void @func.3(ptr [[TMP0]], i32 [[TMP10]])
+; COMMON-NEXT: br label [[TMP11:%.*]]
+; COMMON: 11:
+; COMMON-NEXT: ret void
+;
+; COMMON: define internal void @func.2(
+; COMMON-NEXT: [[TMP3:%.*]] = alloca i32, align 4
+; COMMON-NEXT: store i32 [[TMP1:%.*]], ptr [[TMP3]], align 4
+; COMMON-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4
+; COMMON-NEXT: [[TMP5:%.*]] = icmp slt i32 [[TMP4]], 1
+; COMMON-NEXT: br i1 [[TMP5]], label [[TMP13:%.*]], label [[TMP6:%.*]]
+; COMMON: 6:
+; COMMON-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP3]], align 4
+; COMMON-NEXT: [[TMP8:%.*]] = sext i32 [[TMP7]] to i64
+; COMMON-NEXT: [[TMP9:%.*]] = getelementptr inbounds i32, ptr [[TMP0:%.*]], i64 [[TMP8]]
+; COMMON-NEXT: call void @increment(ptr [[TMP9]])
+; COMMON-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP3]], align 4
+; COMMON-NEXT: [[TMP11:%.*]] = add nsw i32 [[TMP10]], -1
+; COMMON-NEXT: call void @func.2(ptr [[TMP0]], i32 [[TMP11]])
+; COMMON-NEXT: br label [[TMP12:%.*]]
+; COMMON: 12:
+; COMMON-NEXT: ret void
+;
+; ITERS2: define internal void @func.3(
+; ITERS2-NEXT: [[TMP3:%.*]] = alloca i32, align 4
+; ITERS2-NEXT: store i32 [[TMP1:%.*]], ptr [[TMP3]], align 4
+; ITERS2-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4
+; ITERS2-NEXT: [[TMP5:%.*]] = icmp slt i32 [[TMP4]], 1
+; ITERS2-NEXT: br i1 [[TMP5]], label [[TMP13:%.*]], label [[TMP6:%.*]]
+; ITERS2: 6:
+; ITERS2-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP3]], align 4
+; ITERS2-NEXT: [[TMP8:%.*]] = sext i32 [[TMP7]] to i64
+; ITERS2-NEXT: [[TMP9:%.*]] = getelementptr inbounds i32, ptr [[TMP0:%.*]], i64 [[TMP8]]
+; ITERS2-NEXT: call void @decrement(ptr [[TMP9]])
+; ITERS2-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP3]], align 4
+; ITERS2-NEXT: [[TMP11:%.*]] = add nsw i32 [[TMP10]], -1
+; ITERS2-NEXT: call void @func.3(ptr [[TMP0]], i32 [[TMP11]])
+; ITERS2-NEXT: br label [[TMP12:%.*]]
+; ITERS2: 12:
+; ITERS2-NEXT: ret void
+
diff --git a/llvm/test/Transforms/FunctionSpecialization/noinline.ll b/llvm/test/Transforms/FunctionSpecialization/noinline.ll
index 863e6e74eb23c..bf66cf374c488 100644
--- a/llvm/test/Transforms/FunctionSpecialization/noinline.ll
+++ b/llvm/test/Transforms/FunctionSpecialization/noinline.ll
@@ -1,4 +1,4 @@
-; RUN: opt -S --passes="ipsccp<func-spec>" < %s | FileCheck %s
+; RUN: opt -S --passes="ipsccp<func-spec>" -funcspec-min-entry-freq=1 < %s | FileCheck %s
define dso_local i32 @p0(i32 noundef %x) {
entry:
%add = add nsw i32 %x, 1
diff --git a/llvm/test/Transforms/FunctionSpecialization/remove-dead-recursive-function.ll b/llvm/test/Transforms/FunctionSpecialization/remove-dead-recursive-function.ll
index 4233998ad9f6d..3db1a8ce69a10 100644
--- a/llvm/test/Transforms/FunctionSpecialization/remove-dead-recursive-function.ll
+++ b/llvm/test/Transforms/FunctionSpecialization/remove-dead-recursive-function.ll
@@ -1,4 +1,4 @@
-; RUN: opt -passes="ipsccp<func-spec>" -funcspec-min-function-size=3 -S < %s | FileCheck %s
+; RUN: opt -passes="ipsccp<func-spec>" -force-specialization -S < %s | FileCheck %s
define i64 @main(i64 %x, i1 %flag) {
entry:
diff --git a/llvm/test/Transforms/FunctionSpecialization/specialize-multiple-arguments.ll b/llvm/test/Transforms/FunctionSpecialization/specialize-multiple-arguments.ll
index d1c23e07d5972..73006ae0fcb58 100644
--- a/llvm/test/Transforms/FunctionSpecialization/specialize-multiple-arguments.ll
+++ b/llvm/test/Transforms/FunctionSpecialization/specialize-multiple-arguments.ll
@@ -1,20 +1,12 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
-; RUN: opt -passes="ipsccp<func-spec>" -funcspec-max-clones=0 -funcspec-min-function-size=14 -S < %s | FileCheck %s --check-prefix=NONE
-; RUN: opt -passes="ipsccp<func-spec>" -funcspec-max-clones=1 -funcspec-min-function-size=14 -S < %s | FileCheck %s --check-prefix=ONE
-; RUN: opt -passes="ipsccp<func-spec>" -funcspec-max-clones=2 -funcspec-min-function-size=14 -S < %s | FileCheck %s --check-prefix=TWO
-; RUN: opt -passes="ipsccp<func-spec>" -funcspec-max-clones=3 -funcspec-min-function-size=14 -S < %s | FileCheck %s --check-prefix=THREE
+; RUN: opt -passes="ipsccp<func-spec>" -funcspec-max-clones=0 -force-specialization -S < %s | FileCheck %s --check-prefix=NONE
+; RUN: opt -passes="ipsccp<func-spec>" -funcspec-max-clones=1 -force-specialization -S < %s | FileCheck %s --check-prefix=ONE
+; RUN: opt -passes="ipsccp<func-spec>" -funcspec-max-clones=2 -force-specialization -S < %s | FileCheck %s --check-prefix=TWO
+; RUN: opt -passes="ipsccp<func-spec>" -funcspec-max-clones=3 -force-specialization -S < %s | FileCheck %s --check-prefix=THREE
; Make sure that we iterate correctly after sorting the specializations:
-; FnSpecialization: Specializations for function compute
-; FnSpecialization: Gain = 608
-; FnSpecialization: FormalArg = binop1, ActualArg = power
-; FnSpecialization: FormalArg = binop2, ActualArg = mul
-; FnSpecialization: Gain = 982
-; FnSpecialization: FormalArg = binop1, ActualArg = plus
-; FnSpecialization: FormalArg = binop2, ActualArg = minus
-; FnSpecialization: Gain = 795
-; FnSpecialization: FormalArg = binop1, ActualArg = minus
-; FnSpecialization: FormalArg = binop2, ActualArg = power
+;
+; Score(@plus, @minus) > Score(42, @minus, @power) > Score(@power, @mul)
define i64 @main(i64 %x, i64 %y, i1 %flag) {
; NONE-LABEL: @main(
@@ -116,11 +108,11 @@ merge:
;
; THREE-LABEL: define internal i64 @compute.3(i64 %x, i64 %y, ptr %binop1, ptr %binop2) {
; THREE-NEXT: entry:
-; THREE-NEXT: [[TMP0:%.+]] = call i64 @minus(i64 %x, i64 %y)
-; THREE-NEXT: [[TMP1:%.+]] = call i64 @power(i64 %x, i64 %y)
+; THREE-NEXT: [[TMP0:%.+]] = call i64 @minus(i64 %x, i64 42)
+; THREE-NEXT: [[TMP1:%.+]] = call i64 @power(i64 %x, i64 42)
; THREE-NEXT: [[TMP2:%.+]] = add i64 [[TMP0]], [[TMP1]]
; THREE-NEXT: [[TMP3:%.+]] = sdiv i64 [[TMP2]], %x
-; THREE-NEXT: [[TMP4:%.+]] = sub i64 [[TMP3]], %y
+; THREE-NEXT: [[TMP4:%.+]] = sub i64 [[TMP3]], 42
; THREE-NEXT: [[TMP5:%.+]] = mul i64 [[TMP4]], 2
; THREE-NEXT: ret i64 [[TMP5]]
; THREE-NEXT: }
diff --git a/llvm/unittests/Transforms/IPO/FunctionSpecializationTest.cpp b/llvm/unittests/Transforms/IPO/FunctionSpecializationTest.cpp
index 16c9a505e4498..c6516bbe58051 100644
--- a/llvm/unittests/Transforms/IPO/FunctionSpecializationTest.cpp
+++ b/llvm/unittests/Transforms/IPO/FunctionSpecializationTest.cpp
@@ -85,7 +85,10 @@ class FunctionSpecializationTest : public testing::Test {
auto &TTI = FAM.getResult<TargetIRAnalysis>(*I.getFunction());
auto &BFI = FAM.getResult<BlockFrequencyAnalysis>(*I.getFunction());
- return BFI.getBlockFreq(I.getParent()).getFrequency() / BFI.getEntryFreq() *
+ uint64_t Weight = FunctionSpecializer::getBlockFreqMultiplier() *
+ BFI.getBlockFreq(I.getParent()).getFrequency() /
+ BFI.getEntryFreq();
+ return Weight *
TTI.getInstructionCost(&I, TargetTransformInfo::TCK_SizeAndLatency);
}
};
More information about the llvm-commits
mailing list