[llvm] [FuncSpec] Consider literal constants of recursive functions (PR #111162)
via llvm-commits
llvm-commits at lists.llvm.org
Fri Oct 4 07:16:14 PDT 2024
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-llvm-transforms
Author: Hari Limaye (hazzlim)
<details>
<summary>Changes</summary>
Enable specialization of literal constants by default for recursive functions.
---
Full diff: https://github.com/llvm/llvm-project/pull/111162.diff
4 Files Affected:
- (modified) llvm/include/llvm/Transforms/IPO/FunctionSpecialization.h (+4-2)
- (modified) llvm/lib/Transforms/IPO/FunctionSpecialization.cpp (+10-6)
- (modified) llvm/test/Transforms/FunctionSpecialization/function-specialization2.ll (+56-16)
- (modified) llvm/test/Transforms/FunctionSpecialization/identical-specializations.ll (+50-8)
``````````diff
diff --git a/llvm/include/llvm/Transforms/IPO/FunctionSpecialization.h b/llvm/include/llvm/Transforms/IPO/FunctionSpecialization.h
index b001771951e0fe..329491f2a0fe97 100644
--- a/llvm/include/llvm/Transforms/IPO/FunctionSpecialization.h
+++ b/llvm/include/llvm/Transforms/IPO/FunctionSpecialization.h
@@ -311,9 +311,11 @@ class FunctionSpecializer {
/// @param FuncSize Cost of specializing a function.
/// @param AllSpecs A vector to add potential specializations to.
/// @param SM A map for a function's specialisation range
+ /// @param ConsiderLiterals Whether to specialize on literal constants
/// @return True, if any potential specializations were found
bool findSpecializations(Function *F, unsigned FuncSize,
- SmallVectorImpl<Spec> &AllSpecs, SpecMap &SM);
+ SmallVectorImpl<Spec> &AllSpecs, SpecMap &SM,
+ bool ConsiderLiterals);
/// Compute the inlining bonus for replacing argument \p A with constant \p C.
unsigned getInliningBonus(Argument *A, Constant *C);
@@ -328,7 +330,7 @@ class FunctionSpecializer {
/// Determine if it is possible to specialise the function for constant values
/// of the formal parameter \p A.
- bool isArgumentInteresting(Argument *A);
+ bool isArgumentInteresting(Argument *A, bool ConsiderLiterals);
/// Check if the value \p V (an actual argument) is a constant or can only
/// have a constant value. Return that constant.
diff --git a/llvm/lib/Transforms/IPO/FunctionSpecialization.cpp b/llvm/lib/Transforms/IPO/FunctionSpecialization.cpp
index 548335d750e33d..db0f06ec8ac17d 100644
--- a/llvm/lib/Transforms/IPO/FunctionSpecialization.cpp
+++ b/llvm/lib/Transforms/IPO/FunctionSpecialization.cpp
@@ -663,7 +663,8 @@ bool FunctionSpecializer::run() {
if (Inserted && Metrics.isRecursive)
promoteConstantStackValues(&F);
- if (!findSpecializations(&F, FuncSize, AllSpecs, SM)) {
+ bool ConsiderLiterals = SpecializeLiteralConstant || Metrics.isRecursive;
+ if (!findSpecializations(&F, FuncSize, AllSpecs, SM, ConsiderLiterals)) {
LLVM_DEBUG(
dbgs() << "FnSpecialization: No possible specializations found for "
<< F.getName() << "\n");
@@ -803,7 +804,8 @@ static Function *cloneCandidateFunction(Function *F, unsigned NSpecs) {
bool FunctionSpecializer::findSpecializations(Function *F, unsigned FuncSize,
SmallVectorImpl<Spec> &AllSpecs,
- SpecMap &SM) {
+ SpecMap &SM,
+ bool ConsiderLiterals) {
// A mapping from a specialisation signature to the index of the respective
// entry in the all specialisation array. Used to ensure uniqueness of
// specialisations.
@@ -812,7 +814,7 @@ bool FunctionSpecializer::findSpecializations(Function *F, unsigned FuncSize,
// Get a list of interesting arguments.
SmallVector<Argument *> Args;
for (Argument &Arg : F->args())
- if (isArgumentInteresting(&Arg))
+ if (isArgumentInteresting(&Arg, ConsiderLiterals))
Args.push_back(&Arg);
if (Args.empty())
@@ -1032,14 +1034,16 @@ unsigned FunctionSpecializer::getInliningBonus(Argument *A, Constant *C) {
/// Determine if it is possible to specialise the function for constant values
/// of the formal parameter \p A.
-bool FunctionSpecializer::isArgumentInteresting(Argument *A) {
+bool FunctionSpecializer::isArgumentInteresting(Argument *A,
+ bool ConsiderLiterals) {
// No point in specialization if the argument is unused.
if (A->user_empty())
return false;
Type *Ty = A->getType();
- if (!Ty->isPointerTy() && (!SpecializeLiteralConstant ||
- (!Ty->isIntegerTy() && !Ty->isFloatingPointTy() && !Ty->isStructTy())))
+ if (!Ty->isPointerTy() &&
+ (!ConsiderLiterals ||
+ (!Ty->isIntegerTy() && !Ty->isFloatingPointTy() && !Ty->isStructTy())))
return false;
// SCCP solver does not record an argument that will be constructed on
diff --git a/llvm/test/Transforms/FunctionSpecialization/function-specialization2.ll b/llvm/test/Transforms/FunctionSpecialization/function-specialization2.ll
index ef830a0e9a4a9e..00319f1424d766 100644
--- a/llvm/test/Transforms/FunctionSpecialization/function-specialization2.ll
+++ b/llvm/test/Transforms/FunctionSpecialization/function-specialization2.ll
@@ -64,27 +64,27 @@ define i32 @main(ptr %0, i32 %1) {
; CHECK-LABEL: define i32 @main(
; CHECK-SAME: ptr [[TMP0:%.*]], i32 [[TMP1:%.*]]) {
; CHECK-NEXT: call void @func.specialized.2(ptr [[TMP0]], i32 [[TMP1]])
-; CHECK-NEXT: call void @func.specialized.1(ptr [[TMP0]], i32 0)
+; CHECK-NEXT: call void @func.specialized.1(ptr [[TMP0]])
; CHECK-NEXT: ret i32 0
;
;
; CHECK-LABEL: define internal void @func.specialized.1(
-; CHECK-SAME: ptr [[TMP0:%.*]], i32 [[TMP1:%.*]]) {
+; CHECK-SAME: ptr [[TMP0:%.*]]) {
; CHECK-NEXT: [[TMP2:%.*]] = alloca i32, align 4
-; CHECK-NEXT: store i32 [[TMP1]], ptr [[TMP2]], align 4
+; CHECK-NEXT: store i32 0, ptr [[TMP2]], align 4
; CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4
; CHECK-NEXT: [[TMP4:%.*]] = icmp slt i32 [[TMP3]], 1
-; CHECK-NEXT: br i1 [[TMP4]], label %[[BB12:.*]], label %[[BB6:.*]]
-; CHECK: [[BB6]]:
+; CHECK-NEXT: br i1 [[TMP4]], label %[[BB11:.*]], label %[[BB5:.*]]
+; CHECK: [[BB5]]:
; CHECK-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP2]], align 4
; CHECK-NEXT: [[TMP7:%.*]] = sext i32 [[TMP6]] to i64
; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i64 [[TMP7]]
; CHECK-NEXT: call void @decrement(ptr [[TMP8]])
; CHECK-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP2]], align 4
; CHECK-NEXT: [[TMP10:%.*]] = add nsw i32 [[TMP9]], -1
-; CHECK-NEXT: call void @func.specialized.1(ptr [[TMP0]], i32 [[TMP10]])
-; CHECK-NEXT: br label %[[BB12]]
-; CHECK: [[BB12]]:
+; CHECK-NEXT: call void @func.specialized.3(ptr [[TMP0]], i32 [[TMP10]])
+; CHECK-NEXT: br label %[[BB11]]
+; CHECK: [[BB11]]:
; CHECK-NEXT: ret void
;
;
@@ -108,6 +108,46 @@ define i32 @main(ptr %0, i32 %1) {
; CHECK-NEXT: ret void
;
;
+; CHECK-LABEL: define internal void @func.specialized.3(
+; CHECK-SAME: ptr [[TMP0:%.*]], i32 [[TMP1:%.*]]) {
+; CHECK-NEXT: [[TMP3:%.*]] = alloca i32, align 4
+; CHECK-NEXT: store i32 [[TMP1]], ptr [[TMP3]], align 4
+; CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4
+; CHECK-NEXT: [[TMP5:%.*]] = icmp slt i32 [[TMP4]], 1
+; CHECK-NEXT: br i1 [[TMP5]], label %[[BB12:.*]], label %[[BB6:.*]]
+; CHECK: [[BB6]]:
+; CHECK-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP3]], align 4
+; CHECK-NEXT: [[TMP8:%.*]] = sext i32 [[TMP7]] to i64
+; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i64 [[TMP8]]
+; CHECK-NEXT: call void @decrement(ptr [[TMP9]])
+; CHECK-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP3]], align 4
+; CHECK-NEXT: [[TMP11:%.*]] = add nsw i32 [[TMP10]], -1
+; CHECK-NEXT: call void @func.specialized.3(ptr [[TMP0]], i32 [[TMP11]])
+; CHECK-NEXT: br label %[[BB12]]
+; CHECK: [[BB12]]:
+; CHECK-NEXT: ret void
+;
+;
+; ONE-ITER-LABEL: define internal void @func(
+; ONE-ITER-SAME: ptr [[TMP0:%.*]], i32 [[TMP1:%.*]], ptr nocapture [[TMP2:%.*]]) {
+; ONE-ITER-NEXT: [[TMP4:%.*]] = alloca i32, align 4
+; ONE-ITER-NEXT: store i32 [[TMP1]], ptr [[TMP4]], align 4
+; ONE-ITER-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4
+; ONE-ITER-NEXT: [[TMP6:%.*]] = icmp slt i32 [[TMP5]], 1
+; ONE-ITER-NEXT: br i1 [[TMP6]], label %[[BB13:.*]], label %[[BB7:.*]]
+; ONE-ITER: [[BB7]]:
+; ONE-ITER-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP4]], align 4
+; ONE-ITER-NEXT: [[TMP9:%.*]] = sext i32 [[TMP8]] to i64
+; ONE-ITER-NEXT: [[TMP10:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i64 [[TMP9]]
+; ONE-ITER-NEXT: call void [[TMP2]](ptr [[TMP10]])
+; ONE-ITER-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP4]], align 4
+; ONE-ITER-NEXT: [[TMP12:%.*]] = add nsw i32 [[TMP11]], -1
+; ONE-ITER-NEXT: call void @func(ptr [[TMP0]], i32 [[TMP12]], ptr [[TMP2]])
+; ONE-ITER-NEXT: br label %[[BB13]]
+; ONE-ITER: [[BB13]]:
+; ONE-ITER-NEXT: ret void
+;
+;
; ONE-ITER-LABEL: define internal void @increment(
; ONE-ITER-SAME: ptr nocapture [[TMP0:%.*]]) {
; ONE-ITER-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP0]], align 4
@@ -127,27 +167,27 @@ define i32 @main(ptr %0, i32 %1) {
; ONE-ITER-LABEL: define i32 @main(
; ONE-ITER-SAME: ptr [[TMP0:%.*]], i32 [[TMP1:%.*]]) {
; ONE-ITER-NEXT: call void @func.specialized.2(ptr [[TMP0]], i32 [[TMP1]])
-; ONE-ITER-NEXT: call void @func.specialized.1(ptr [[TMP0]], i32 0)
+; ONE-ITER-NEXT: call void @func.specialized.1(ptr [[TMP0]])
; ONE-ITER-NEXT: ret i32 0
;
;
; ONE-ITER-LABEL: define internal void @func.specialized.1(
-; ONE-ITER-SAME: ptr [[TMP0:%.*]], i32 [[TMP1:%.*]]) {
+; ONE-ITER-SAME: ptr [[TMP0:%.*]]) {
; ONE-ITER-NEXT: [[TMP2:%.*]] = alloca i32, align 4
-; ONE-ITER-NEXT: store i32 [[TMP1]], ptr [[TMP2]], align 4
+; ONE-ITER-NEXT: store i32 0, ptr [[TMP2]], align 4
; ONE-ITER-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4
; ONE-ITER-NEXT: [[TMP4:%.*]] = icmp slt i32 [[TMP3]], 1
-; ONE-ITER-NEXT: br i1 [[TMP4]], label %[[BB12:.*]], label %[[BB6:.*]]
-; ONE-ITER: [[BB6]]:
+; ONE-ITER-NEXT: br i1 [[TMP4]], label %[[BB11:.*]], label %[[BB5:.*]]
+; ONE-ITER: [[BB5]]:
; ONE-ITER-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP2]], align 4
; ONE-ITER-NEXT: [[TMP7:%.*]] = sext i32 [[TMP6]] to i64
; ONE-ITER-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i64 [[TMP7]]
; ONE-ITER-NEXT: call void @decrement(ptr [[TMP8]])
; ONE-ITER-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP2]], align 4
; ONE-ITER-NEXT: [[TMP10:%.*]] = add nsw i32 [[TMP9]], -1
-; ONE-ITER-NEXT: call void @func.specialized.1(ptr [[TMP0]], i32 [[TMP10]])
-; ONE-ITER-NEXT: br label %[[BB12]]
-; ONE-ITER: [[BB12]]:
+; ONE-ITER-NEXT: call void @func(ptr [[TMP0]], i32 [[TMP10]], ptr @decrement)
+; ONE-ITER-NEXT: br label %[[BB11]]
+; ONE-ITER: [[BB11]]:
; ONE-ITER-NEXT: ret void
;
;
diff --git a/llvm/test/Transforms/FunctionSpecialization/identical-specializations.ll b/llvm/test/Transforms/FunctionSpecialization/identical-specializations.ll
index 930ed6627f7f1e..e7e2fffac5d6ec 100644
--- a/llvm/test/Transforms/FunctionSpecialization/identical-specializations.ll
+++ b/llvm/test/Transforms/FunctionSpecialization/identical-specializations.ll
@@ -51,7 +51,7 @@ entry:
; CHECK-NEXT: [[ENTRY:.*:]]
; CHECK-NEXT: br i1 [[FLAG]], label %[[PLUS:.*]], label %[[MINUS:.*]]
; CHECK: [[PLUS]]:
-; CHECK-NEXT: [[CMP0:%.*]] = call i64 @compute.specialized.2(i64 [[X]], i64 [[Y]], ptr @plus, ptr @minus)
+; CHECK-NEXT: [[CMP0:%.*]] = call i64 @compute.specialized.1(i64 [[X]], i64 [[Y]], ptr @plus, ptr @minus)
; CHECK-NEXT: br label %[[MERGE:.*]]
; CHECK: [[MINUS]]:
; CHECK-NEXT: [[CMP1:%.*]] = call i64 @compute.specialized.3(i64 [[X]], i64 [[Y]], ptr @minus, ptr @plus)
@@ -79,10 +79,10 @@ entry:
; CHECK-LABEL: define internal i64 @compute.specialized.1(
; CHECK-SAME: i64 [[X:%.*]], i64 [[Y:%.*]], ptr [[BINOP1:%.*]], ptr [[BINOP2:%.*]]) {
; CHECK-NEXT: [[ENTRY:.*:]]
-; CHECK-NEXT: [[OP1:%.*]] = call i64 [[BINOP1]](i64 [[X]], i64 [[Y]])
; CHECK-NEXT: [[OP0:%.*]] = call i64 @plus(i64 [[X]], i64 [[Y]])
-; CHECK-NEXT: [[OP2:%.*]] = call i64 @compute.specialized.1(i64 [[X]], i64 [[Y]], ptr [[BINOP1]], ptr @plus)
-; CHECK-NEXT: [[ADD0:%.*]] = add i64 [[OP1]], [[OP0]]
+; CHECK-NEXT: [[OP1:%.*]] = call i64 @minus(i64 [[X]], i64 [[Y]])
+; CHECK-NEXT: [[OP2:%.*]] = call i64 @compute.specialized.5(i64 [[X]], i64 [[Y]], ptr @plus, ptr @plus)
+; CHECK-NEXT: [[ADD0:%.*]] = add i64 [[OP0]], [[OP1]]
; CHECK-NEXT: [[ADD1:%.*]] = add i64 [[ADD0]], [[OP2]]
; CHECK-NEXT: [[DIV:%.*]] = sdiv i64 [[ADD1]], [[X]]
; CHECK-NEXT: [[SUB:%.*]] = sub i64 [[DIV]], [[Y]]
@@ -93,13 +93,13 @@ entry:
; CHECK-LABEL: define internal i64 @compute.specialized.2(
; CHECK-SAME: i64 [[X:%.*]], i64 [[Y:%.*]], ptr [[BINOP1:%.*]], ptr [[BINOP2:%.*]]) {
; CHECK-NEXT: [[ENTRY:.*:]]
-; CHECK-NEXT: [[OP0:%.*]] = call i64 @plus(i64 [[X]], i64 [[Y]])
-; CHECK-NEXT: [[OP1:%.*]] = call i64 @minus(i64 [[X]], i64 [[Y]])
-; CHECK-NEXT: [[OP2:%.*]] = call i64 @compute.specialized.1(i64 [[X]], i64 [[Y]], ptr @plus, ptr @plus)
+; CHECK-NEXT: [[OP0:%.*]] = call i64 @plus(i64 [[X]], i64 42)
+; CHECK-NEXT: [[OP1:%.*]] = call i64 @minus(i64 [[X]], i64 42)
+; CHECK-NEXT: [[OP2:%.*]] = call i64 @compute.specialized.4(i64 [[X]], i64 42, ptr @plus, ptr @plus)
; CHECK-NEXT: [[ADD0:%.*]] = add i64 [[OP0]], [[OP1]]
; CHECK-NEXT: [[ADD1:%.*]] = add i64 [[ADD0]], [[OP2]]
; CHECK-NEXT: [[DIV:%.*]] = sdiv i64 [[ADD1]], [[X]]
-; CHECK-NEXT: [[SUB:%.*]] = sub i64 [[DIV]], [[Y]]
+; CHECK-NEXT: [[SUB:%.*]] = sub i64 [[DIV]], 42
; CHECK-NEXT: [[MUL:%.*]] = mul i64 [[SUB]], 2
; CHECK-NEXT: ret i64 [[MUL]]
;
@@ -117,3 +117,45 @@ entry:
; CHECK-NEXT: [[MUL:%.*]] = mul i64 [[SUB]], 2
; CHECK-NEXT: ret i64 [[MUL]]
;
+;
+; CHECK-LABEL: define internal i64 @compute.specialized.4(
+; CHECK-SAME: i64 [[X:%.*]], i64 [[Y:%.*]], ptr [[BINOP1:%.*]], ptr [[BINOP2:%.*]]) {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: [[OP0:%.*]] = call i64 @plus(i64 [[X]], i64 42)
+; CHECK-NEXT: [[OP1:%.*]] = call i64 @plus(i64 [[X]], i64 42)
+; CHECK-NEXT: [[OP2:%.*]] = call i64 @compute.specialized.4(i64 [[X]], i64 42, ptr @plus, ptr @plus)
+; CHECK-NEXT: [[ADD0:%.*]] = add i64 [[OP0]], [[OP1]]
+; CHECK-NEXT: [[ADD1:%.*]] = add i64 [[ADD0]], [[OP2]]
+; CHECK-NEXT: [[DIV:%.*]] = sdiv i64 [[ADD1]], [[X]]
+; CHECK-NEXT: [[SUB:%.*]] = sub i64 [[DIV]], 42
+; CHECK-NEXT: [[MUL:%.*]] = mul i64 [[SUB]], 2
+; CHECK-NEXT: ret i64 [[MUL]]
+;
+;
+; CHECK-LABEL: define internal i64 @compute.specialized.5(
+; CHECK-SAME: i64 [[X:%.*]], i64 [[Y:%.*]], ptr [[BINOP1:%.*]], ptr [[BINOP2:%.*]]) {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: [[OP0:%.*]] = call i64 @plus(i64 [[X]], i64 [[Y]])
+; CHECK-NEXT: [[OP1:%.*]] = call i64 @plus(i64 [[X]], i64 [[Y]])
+; CHECK-NEXT: [[OP2:%.*]] = call i64 @compute.specialized.5(i64 [[X]], i64 [[Y]], ptr @plus, ptr @plus)
+; CHECK-NEXT: [[ADD0:%.*]] = add i64 [[OP0]], [[OP1]]
+; CHECK-NEXT: [[ADD1:%.*]] = add i64 [[ADD0]], [[OP2]]
+; CHECK-NEXT: [[DIV:%.*]] = sdiv i64 [[ADD1]], [[X]]
+; CHECK-NEXT: [[SUB:%.*]] = sub i64 [[DIV]], [[Y]]
+; CHECK-NEXT: [[MUL:%.*]] = mul i64 [[SUB]], 2
+; CHECK-NEXT: ret i64 [[MUL]]
+;
+;
+; CHECK-LABEL: define internal i64 @compute.specialized.6(
+; CHECK-SAME: i64 [[X:%.*]], i64 [[Y:%.*]], ptr [[BINOP1:%.*]], ptr [[BINOP2:%.*]]) {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: [[OP0:%.*]] = call i64 [[BINOP1]](i64 [[X]], i64 [[Y]])
+; CHECK-NEXT: [[OP1:%.*]] = call i64 @plus(i64 [[X]], i64 [[Y]])
+; CHECK-NEXT: [[OP2:%.*]] = call i64 @compute.specialized.6(i64 [[X]], i64 [[Y]], ptr [[BINOP1]], ptr @plus)
+; CHECK-NEXT: [[ADD0:%.*]] = add i64 [[OP0]], [[OP1]]
+; CHECK-NEXT: [[ADD1:%.*]] = add i64 [[ADD0]], [[OP2]]
+; CHECK-NEXT: [[DIV:%.*]] = sdiv i64 [[ADD1]], [[X]]
+; CHECK-NEXT: [[SUB:%.*]] = sub i64 [[DIV]], [[Y]]
+; CHECK-NEXT: [[MUL:%.*]] = mul i64 [[SUB]], 2
+; CHECK-NEXT: ret i64 [[MUL]]
+;
``````````
</details>
https://github.com/llvm/llvm-project/pull/111162
More information about the llvm-commits
mailing list