[llvm] [FuncSpec] Enable SpecializeLiteralConstant by default (PR #113442)

Hari Limaye via llvm-commits llvm-commits at lists.llvm.org
Mon Oct 28 03:23:18 PDT 2024


https://github.com/hazzlim updated https://github.com/llvm/llvm-project/pull/113442

>From 8a24db8f82b36bab49f68b1bb9f584b7d0cb71a7 Mon Sep 17 00:00:00 2001
From: Hari Limaye <hari.limaye at arm.com>
Date: Tue, 22 Oct 2024 23:17:39 +0000
Subject: [PATCH 1/6] [FuncSpec] Enable SpecializeLiteralConstant by default

Enable specialization on literal constant arguments by default in
Function Specialization.
---
 .../Transforms/IPO/FunctionSpecialization.cpp  | 18 +++++++-----------
 .../Generic/ipsccp-remap-assign-id.ll          |  4 ++--
 .../compiler-crash-58759.ll                    |  2 +-
 ...ction-specialization-constant-expression.ll |  2 +-
 .../function-specialization2.ll                |  6 +++---
 .../function-specialization4.ll                |  6 ++++--
 .../get-possible-constants.ll                  |  2 +-
 .../FunctionSpecialization/global-rank.ll      |  2 +-
 .../identical-specializations.ll               |  2 +-
 .../FunctionSpecialization/literal-const.ll    |  3 ++-
 .../specialize-multiple-arguments.ll           |  8 ++++----
 11 files changed, 27 insertions(+), 28 deletions(-)

diff --git a/llvm/lib/Transforms/IPO/FunctionSpecialization.cpp b/llvm/lib/Transforms/IPO/FunctionSpecialization.cpp
index 20249a20a37e41..2b0388f339fabc 100644
--- a/llvm/lib/Transforms/IPO/FunctionSpecialization.cpp
+++ b/llvm/lib/Transforms/IPO/FunctionSpecialization.cpp
@@ -84,14 +84,11 @@ static cl::opt<bool> SpecializeOnAddress(
     "funcspec-on-address", cl::init(false), cl::Hidden, cl::desc(
     "Enable function specialization on the address of global values"));
 
-// Disabled by default as it can significantly increase compilation times.
-//
-// https://llvm-compile-time-tracker.com
-// https://github.com/nikic/llvm-compile-time-tracker
 static cl::opt<bool> SpecializeLiteralConstant(
-    "funcspec-for-literal-constant", cl::init(false), cl::Hidden, cl::desc(
-    "Enable specialization of functions that take a literal constant as an "
-    "argument"));
+    "funcspec-for-literal-constant", cl::init(true), cl::Hidden,
+    cl::desc(
+        "Enable specialization of functions that take a literal constant as an "
+        "argument"));
 
 bool InstCostVisitor::canEliminateSuccessor(BasicBlock *BB, BasicBlock *Succ,
                                          DenseSet<BasicBlock *> &DeadBlocks) {
@@ -682,10 +679,9 @@ bool FunctionSpecializer::run() {
         (RequireMinSize && Metrics.NumInsts < MinFunctionSize))
       continue;
 
-    // TODO: For now only consider recursive functions when running multiple
-    // times. This should change if specialization on literal constants gets
-    // enabled.
-    if (!Inserted && !Metrics.isRecursive && !SpecializeLiteralConstant)
+    // When specialization on literal constants is disabled, only consider
+    // recursive functions when running multiple times.
+    if (!SpecializeLiteralConstant && !Inserted && !Metrics.isRecursive)
       continue;
 
     int64_t Sz = *Metrics.NumInsts.getValue();
diff --git a/llvm/test/DebugInfo/Generic/ipsccp-remap-assign-id.ll b/llvm/test/DebugInfo/Generic/ipsccp-remap-assign-id.ll
index 0e8f92cacf66d7..42560fc3958d1b 100644
--- a/llvm/test/DebugInfo/Generic/ipsccp-remap-assign-id.ll
+++ b/llvm/test/DebugInfo/Generic/ipsccp-remap-assign-id.ll
@@ -1,5 +1,5 @@
-; RUN: opt -passes=ipsccp %s -S -o - | FileCheck %s
-; RUN: opt --try-experimental-debuginfo-iterators -passes=ipsccp %s -S -o - | FileCheck %s
+; RUN: opt -passes=ipsccp -funcspec-for-literal-constant=false %s -S -o - | FileCheck %s
+; RUN: opt --try-experimental-debuginfo-iterators -passes=ipsccp -funcspec-for-literal-constant=false %s -S -o - | FileCheck %s
 
 ;; Check the dbg.assign DIAssignID operand gets remapped after cloning.
 
diff --git a/llvm/test/Transforms/FunctionSpecialization/compiler-crash-58759.ll b/llvm/test/Transforms/FunctionSpecialization/compiler-crash-58759.ll
index f29cf0d123939a..7291d83b816115 100644
--- a/llvm/test/Transforms/FunctionSpecialization/compiler-crash-58759.ll
+++ b/llvm/test/Transforms/FunctionSpecialization/compiler-crash-58759.ll
@@ -1,4 +1,4 @@
-; RUN: opt -S --passes="default<O3>" < %s | FileCheck %s
+; RUN: opt -S --passes="default<O3>" -funcspec-for-literal-constant=false < %s | FileCheck %s
 
 define dso_local i32 @g0(i32 noundef %x) local_unnamed_addr {
 entry:
diff --git a/llvm/test/Transforms/FunctionSpecialization/function-specialization-constant-expression.ll b/llvm/test/Transforms/FunctionSpecialization/function-specialization-constant-expression.ll
index 16a46851163129..0c24169d02c2c5 100644
--- a/llvm/test/Transforms/FunctionSpecialization/function-specialization-constant-expression.ll
+++ b/llvm/test/Transforms/FunctionSpecialization/function-specialization-constant-expression.ll
@@ -4,7 +4,7 @@
 ; Note that this test case shows that function specialization pass would
 ; transform the function even if no specialization happened.
 
-; RUN: opt -passes="ipsccp<func-spec>" -force-specialization -S < %s | FileCheck %s
+; RUN: opt -passes="ipsccp<func-spec>" -force-specialization -funcspec-for-literal-constant=false -S < %s | FileCheck %s
 
 %struct = type { i8, i16, i32, i64, i64}
 @Global = internal constant %struct {i8 0, i16 1, i32 2, i64 3, i64 4}
diff --git a/llvm/test/Transforms/FunctionSpecialization/function-specialization2.ll b/llvm/test/Transforms/FunctionSpecialization/function-specialization2.ll
index ef830a0e9a4a9e..6f36a394979d81 100644
--- a/llvm/test/Transforms/FunctionSpecialization/function-specialization2.ll
+++ b/llvm/test/Transforms/FunctionSpecialization/function-specialization2.ll
@@ -1,7 +1,7 @@
 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --include-generated-funcs --version 5
-; RUN: opt -passes="ipsccp<func-spec>,deadargelim" -force-specialization -S < %s | FileCheck %s
-; RUN: opt -passes="ipsccp<func-spec>,deadargelim" -funcspec-max-iters=1 -force-specialization -S < %s | FileCheck %s --check-prefix=ONE-ITER
-; RUN: opt -passes="ipsccp<func-spec>,deadargelim" -funcspec-max-iters=0 -force-specialization -S < %s | FileCheck %s --check-prefix=DISABLED
+; RUN: opt -passes="ipsccp<func-spec>,deadargelim" -funcspec-for-literal-constant=false -force-specialization -S < %s | FileCheck %s
+; RUN: opt -passes="ipsccp<func-spec>,deadargelim" -funcspec-for-literal-constant=false -funcspec-max-iters=1 -force-specialization -S < %s | FileCheck %s --check-prefix=ONE-ITER
+; RUN: opt -passes="ipsccp<func-spec>,deadargelim" -funcspec-for-literal-constant=false -funcspec-max-iters=0 -force-specialization -S < %s | FileCheck %s --check-prefix=DISABLED
 
 
 define internal i32 @func(ptr %0, i32 %1, ptr nocapture %2) {
diff --git a/llvm/test/Transforms/FunctionSpecialization/function-specialization4.ll b/llvm/test/Transforms/FunctionSpecialization/function-specialization4.ll
index 4e5a196d668291..a6a990c3415936 100644
--- a/llvm/test/Transforms/FunctionSpecialization/function-specialization4.ll
+++ b/llvm/test/Transforms/FunctionSpecialization/function-specialization4.ll
@@ -1,8 +1,10 @@
 ; RUN: opt -passes="ipsccp<func-spec>" -force-specialization \
-; RUN:   -funcspec-max-clones=2 -S < %s | FileCheck %s
+; RUN:   -funcspec-for-literal-constant=false -funcspec-max-clones=2 \
+; RUN:   -S < %s | FileCheck %s
 
 ; RUN: opt -passes="ipsccp<func-spec>" -force-specialization \
-; RUN:   -funcspec-max-clones=1 -S < %s | FileCheck %s --check-prefix=CONST1
+; RUN:   -funcspec-for-literal-constant=false -funcspec-max-clones=1 \
+; RUN:   -S < %s | FileCheck %s --check-prefix=CONST1
 
 target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
 
diff --git a/llvm/test/Transforms/FunctionSpecialization/get-possible-constants.ll b/llvm/test/Transforms/FunctionSpecialization/get-possible-constants.ll
index dfa1e5a42776a5..2f42125d8cf979 100644
--- a/llvm/test/Transforms/FunctionSpecialization/get-possible-constants.ll
+++ b/llvm/test/Transforms/FunctionSpecialization/get-possible-constants.ll
@@ -1,4 +1,4 @@
-; RUN: opt -S --passes="ipsccp<func-spec>" < %s | FileCheck %s
+; RUN: opt -S --passes="ipsccp<func-spec>" -funcspec-for-literal-constant=false < %s | FileCheck %s
 define dso_local i32 @p0(i32 noundef %x) {
 entry:
   %add = add nsw i32 %x, 1
diff --git a/llvm/test/Transforms/FunctionSpecialization/global-rank.ll b/llvm/test/Transforms/FunctionSpecialization/global-rank.ll
index 1926e29ddee013..06185332f22e0c 100644
--- a/llvm/test/Transforms/FunctionSpecialization/global-rank.ll
+++ b/llvm/test/Transforms/FunctionSpecialization/global-rank.ll
@@ -1,4 +1,4 @@
-; RUN: opt -S --passes="ipsccp<func-spec>" -funcspec-max-clones=1 < %s | FileCheck %s
+; RUN: opt -S --passes="ipsccp<func-spec>" -funcspec-for-literal-constant=false -funcspec-max-clones=1 < %s | FileCheck %s
 define internal i32 @f(i32 noundef %x, ptr nocapture noundef readonly %p, ptr nocapture noundef readonly %q) noinline {
 entry:
   %call = tail call i32 %p(i32 noundef %x)
diff --git a/llvm/test/Transforms/FunctionSpecialization/identical-specializations.ll b/llvm/test/Transforms/FunctionSpecialization/identical-specializations.ll
index 930ed6627f7f1e..97d77971a92d3b 100644
--- a/llvm/test/Transforms/FunctionSpecialization/identical-specializations.ll
+++ b/llvm/test/Transforms/FunctionSpecialization/identical-specializations.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --include-generated-funcs --version 5
-; RUN: opt -passes="ipsccp<func-spec>" -force-specialization -S < %s | FileCheck %s
+; RUN: opt -passes="ipsccp<func-spec>" -force-specialization -funcspec-for-literal-constant=false -S < %s | FileCheck %s
 
 define i64 @main(i64 %x, i64 %y, i1 %flag) {
 entry:
diff --git a/llvm/test/Transforms/FunctionSpecialization/literal-const.ll b/llvm/test/Transforms/FunctionSpecialization/literal-const.ll
index 3eae3dc261fb2a..d7adbec558cca3 100644
--- a/llvm/test/Transforms/FunctionSpecialization/literal-const.ll
+++ b/llvm/test/Transforms/FunctionSpecialization/literal-const.ll
@@ -1,7 +1,8 @@
 ; RUN: opt -S --passes="ipsccp<func-spec>" \
+; RUN:        -funcspec-for-literal-constant=false \
 ; RUN:        -force-specialization < %s | FileCheck %s -check-prefix CHECK-NOLIT
 ; RUN: opt -S --passes="ipsccp<func-spec>" \
-; RUN:        -funcspec-for-literal-constant \
+; RUN:        -funcspec-for-literal-constant=true \
 ; RUN:        -force-specialization < %s | FileCheck %s -check-prefix CHECK-LIT
 
 define i32 @f0(i32 noundef %x) {
diff --git a/llvm/test/Transforms/FunctionSpecialization/specialize-multiple-arguments.ll b/llvm/test/Transforms/FunctionSpecialization/specialize-multiple-arguments.ll
index a653760abb2cc6..73291600edb85d 100644
--- a/llvm/test/Transforms/FunctionSpecialization/specialize-multiple-arguments.ll
+++ b/llvm/test/Transforms/FunctionSpecialization/specialize-multiple-arguments.ll
@@ -1,8 +1,8 @@
 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
-; RUN: opt -passes="ipsccp<func-spec>" -funcspec-max-clones=0 -funcspec-min-function-size=14 -S < %s | FileCheck %s --check-prefix=NONE
-; RUN: opt -passes="ipsccp<func-spec>" -funcspec-max-clones=1 -funcspec-min-function-size=14 -S < %s | FileCheck %s --check-prefix=ONE
-; RUN: opt -passes="ipsccp<func-spec>" -funcspec-max-clones=2 -funcspec-min-function-size=14 -S < %s | FileCheck %s --check-prefix=TWO
-; RUN: opt -passes="ipsccp<func-spec>" -funcspec-max-clones=3 -funcspec-min-function-size=14 -S < %s | FileCheck %s --check-prefix=THREE
+; RUN: opt -passes="ipsccp<func-spec>" -funcspec-for-literal-constant=false -funcspec-max-clones=0 -funcspec-min-function-size=14 -S < %s | FileCheck %s --check-prefix=NONE
+; RUN: opt -passes="ipsccp<func-spec>" -funcspec-for-literal-constant=false -funcspec-max-clones=1 -funcspec-min-function-size=14 -S < %s | FileCheck %s --check-prefix=ONE
+; RUN: opt -passes="ipsccp<func-spec>" -funcspec-for-literal-constant=false -funcspec-max-clones=2 -funcspec-min-function-size=14 -S < %s | FileCheck %s --check-prefix=TWO
+; RUN: opt -passes="ipsccp<func-spec>" -funcspec-for-literal-constant=false -funcspec-max-clones=3 -funcspec-min-function-size=14 -S < %s | FileCheck %s --check-prefix=THREE
 
 ; Make sure that we iterate correctly after sorting the specializations:
 ; FnSpecialization: Specializations for function compute

>From 5e2d8b6deec02c3628431f7477b293b293aab158 Mon Sep 17 00:00:00 2001
From: Hari Limaye <hari.limaye at arm.com>
Date: Wed, 23 Oct 2024 11:38:10 +0000
Subject: [PATCH 2/6] Remove redundant funcspec-for-literal-constant=true in
 lit test

---
 llvm/test/Transforms/FunctionSpecialization/literal-const.ll | 1 -
 1 file changed, 1 deletion(-)

diff --git a/llvm/test/Transforms/FunctionSpecialization/literal-const.ll b/llvm/test/Transforms/FunctionSpecialization/literal-const.ll
index d7adbec558cca3..7d5e506064af77 100644
--- a/llvm/test/Transforms/FunctionSpecialization/literal-const.ll
+++ b/llvm/test/Transforms/FunctionSpecialization/literal-const.ll
@@ -2,7 +2,6 @@
 ; RUN:        -funcspec-for-literal-constant=false \
 ; RUN:        -force-specialization < %s | FileCheck %s -check-prefix CHECK-NOLIT
 ; RUN: opt -S --passes="ipsccp<func-spec>" \
-; RUN:        -funcspec-for-literal-constant=true \
 ; RUN:        -force-specialization < %s | FileCheck %s -check-prefix CHECK-LIT
 
 define i32 @f0(i32 noundef %x) {

>From eb991bb8213c889269cea3debcac0d7e2d994e12 Mon Sep 17 00:00:00 2001
From: Hari Limaye <hari.limaye at arm.com>
Date: Thu, 24 Oct 2024 15:28:54 +0000
Subject: [PATCH 3/6] Address review comments 1

- Improve comment regarding multiple iterations
---
 llvm/lib/Transforms/IPO/FunctionSpecialization.cpp | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/llvm/lib/Transforms/IPO/FunctionSpecialization.cpp b/llvm/lib/Transforms/IPO/FunctionSpecialization.cpp
index 2b0388f339fabc..382d3af3d0b2c5 100644
--- a/llvm/lib/Transforms/IPO/FunctionSpecialization.cpp
+++ b/llvm/lib/Transforms/IPO/FunctionSpecialization.cpp
@@ -680,7 +680,9 @@ bool FunctionSpecializer::run() {
       continue;
 
     // When specialization on literal constants is disabled, only consider
-    // recursive functions when running multiple times.
+    // recursive functions when running multiple times to save wasted analysis,
+    // as we will not be able to specialize on any newly found literal constant
+    // return values.
     if (!SpecializeLiteralConstant && !Inserted && !Metrics.isRecursive)
       continue;
 

>From 4533583136a11f260b391c0bf9be2aeb72b389fc Mon Sep 17 00:00:00 2001
From: Hari Limaye <hari.limaye at arm.com>
Date: Thu, 24 Oct 2024 16:33:32 +0000
Subject: [PATCH 4/6] Add regression test for pointer return value
 specialization

- Add regression test which motivates removing the conditional check to
  only consider recursive functions on successive iterations when
  SpecializeLiteralConstant=false
---
 ...unction-specialization-track-ptr-return.ll | 147 ++++++++++++++++++
 1 file changed, 147 insertions(+)
 create mode 100644 llvm/test/Transforms/FunctionSpecialization/function-specialization-track-ptr-return.ll

diff --git a/llvm/test/Transforms/FunctionSpecialization/function-specialization-track-ptr-return.ll b/llvm/test/Transforms/FunctionSpecialization/function-specialization-track-ptr-return.ll
new file mode 100644
index 00000000000000..4ee5bbeb5d36e3
--- /dev/null
+++ b/llvm/test/Transforms/FunctionSpecialization/function-specialization-track-ptr-return.ll
@@ -0,0 +1,147 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --include-generated-funcs --version 5
+; RUN: opt -passes="ipsccp<func-spec>" -force-specialization \
+; RUN:     -funcspec-max-iters=3 -S < %s | FileCheck %s
+
+; RUN: opt -passes="ipsccp<func-spec>" -force-specialization          \
+; RUN:     -funcspec-for-literal-constant=false -funcspec-max-iters=3 \
+; RUN:     -S < %s | FileCheck %s --check-prefix=NOLIT
+
+ at global_true = constant i1 true
+ at global_false = constant i1 false
+
+define i64 @main() {
+entry:
+  %binop1 = call ptr @select_binop(ptr @global_true)
+  %binop2 = call ptr @select_binop(ptr @global_false)
+
+  %c1 = call i64 @compute(ptr %binop1)
+  %c2 = call i64 @compute(ptr %binop2)
+  %add = add i64 %c1, %c2
+  ret i64 %add
+}
+
+define ptr @select_binop(ptr %flag) {
+  %flag.val = load i1, ptr %flag
+  %binop = select i1 %flag.val, ptr @plus, ptr @minus
+  ret ptr %binop
+}
+
+define internal i64 @compute(ptr %binop) {
+entry:
+  %res = call i64 %binop(i64 1, i64 1)
+  ret i64 %res
+}
+
+define internal i64 @plus(i64 %x) {
+entry:
+  %sum = add i64 %x, 1
+  ret i64 %sum
+}
+
+define internal i64 @minus(i64 %x) {
+entry:
+  %diff = sub i64 %x, 1
+  ret i64 %diff
+}
+; CHECK-LABEL: define i64 @main() {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    [[BINOP1:%.*]] = call ptr @select_binop.specialized.1(ptr @global_true)
+; CHECK-NEXT:    [[BINOP2:%.*]] = call ptr @select_binop.specialized.2(ptr @global_false)
+; CHECK-NEXT:    [[C1:%.*]] = call i64 @compute.specialized.3(ptr @plus)
+; CHECK-NEXT:    [[C2:%.*]] = call i64 @compute.specialized.4(ptr @minus)
+; CHECK-NEXT:    [[ADD:%.*]] = add i64 [[C1]], [[C2]]
+; CHECK-NEXT:    ret i64 [[ADD]]
+;
+;
+; CHECK-LABEL: define ptr @select_binop(
+; CHECK-SAME: ptr [[FLAG:%.*]]) {
+; CHECK-NEXT:    [[FLAG_VAL:%.*]] = load i1, ptr [[FLAG]], align 1
+; CHECK-NEXT:    [[BINOP:%.*]] = select i1 [[FLAG_VAL]], ptr @plus, ptr @minus
+; CHECK-NEXT:    ret ptr [[BINOP]]
+;
+;
+; CHECK-LABEL: define internal i64 @plus(
+; CHECK-SAME: i64 [[X:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    [[SUM:%.*]] = add i64 [[X]], 1
+; CHECK-NEXT:    ret i64 [[SUM]]
+;
+;
+; CHECK-LABEL: define internal i64 @minus(
+; CHECK-SAME: i64 [[X:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    [[DIFF:%.*]] = sub i64 [[X]], 1
+; CHECK-NEXT:    ret i64 [[DIFF]]
+;
+;
+; CHECK-LABEL: define internal ptr @select_binop.specialized.1(
+; CHECK-SAME: ptr [[FLAG:%.*]]) {
+; CHECK-NEXT:    ret ptr poison
+;
+;
+; CHECK-LABEL: define internal ptr @select_binop.specialized.2(
+; CHECK-SAME: ptr [[FLAG:%.*]]) {
+; CHECK-NEXT:    ret ptr poison
+;
+;
+; CHECK-LABEL: define internal i64 @compute.specialized.3(
+; CHECK-SAME: ptr [[BINOP:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    [[RES:%.*]] = call i64 @plus(i64 1, i64 1)
+; CHECK-NEXT:    ret i64 [[RES]]
+;
+;
+; CHECK-LABEL: define internal i64 @compute.specialized.4(
+; CHECK-SAME: ptr [[BINOP:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    [[RES:%.*]] = call i64 @minus(i64 1, i64 1)
+; CHECK-NEXT:    ret i64 [[RES]]
+;
+;
+; NOLIT-LABEL: define i64 @main() {
+; NOLIT-NEXT:  [[ENTRY:.*:]]
+; NOLIT-NEXT:    [[BINOP1:%.*]] = call ptr @select_binop.specialized.1(ptr @global_true)
+; NOLIT-NEXT:    [[BINOP2:%.*]] = call ptr @select_binop.specialized.2(ptr @global_false)
+; NOLIT-NEXT:    [[C1:%.*]] = call i64 @compute(ptr @plus)
+; NOLIT-NEXT:    [[C2:%.*]] = call i64 @compute(ptr @minus)
+; NOLIT-NEXT:    [[ADD:%.*]] = add i64 [[C1]], [[C2]]
+; NOLIT-NEXT:    ret i64 [[ADD]]
+;
+;
+; NOLIT-LABEL: define ptr @select_binop(
+; NOLIT-SAME: ptr [[FLAG:%.*]]) {
+; NOLIT-NEXT:    [[FLAG_VAL:%.*]] = load i1, ptr [[FLAG]], align 1
+; NOLIT-NEXT:    [[BINOP:%.*]] = select i1 [[FLAG_VAL]], ptr @plus, ptr @minus
+; NOLIT-NEXT:    ret ptr [[BINOP]]
+;
+;
+; NOLIT-LABEL: define internal i64 @compute(
+; NOLIT-SAME: ptr [[BINOP:%.*]]) {
+; NOLIT-NEXT:  [[ENTRY:.*:]]
+; NOLIT-NEXT:    [[RES:%.*]] = call i64 [[BINOP]](i64 1, i64 1)
+; NOLIT-NEXT:    ret i64 [[RES]]
+;
+;
+; NOLIT-LABEL: define internal i64 @plus(
+; NOLIT-SAME: i64 [[X:%.*]]) {
+; NOLIT-NEXT:  [[ENTRY:.*:]]
+; NOLIT-NEXT:    [[SUM:%.*]] = add i64 [[X]], 1
+; NOLIT-NEXT:    ret i64 [[SUM]]
+;
+;
+; NOLIT-LABEL: define internal i64 @minus(
+; NOLIT-SAME: i64 [[X:%.*]]) {
+; NOLIT-NEXT:  [[ENTRY:.*:]]
+; NOLIT-NEXT:    [[DIFF:%.*]] = sub i64 [[X]], 1
+; NOLIT-NEXT:    ret i64 [[DIFF]]
+;
+;
+; NOLIT-LABEL: define internal ptr @select_binop.specialized.1(
+; NOLIT-SAME: ptr [[FLAG:%.*]]) {
+; NOLIT-NEXT:    ret ptr poison
+;
+;
+; NOLIT-LABEL: define internal ptr @select_binop.specialized.2(
+; NOLIT-SAME: ptr [[FLAG:%.*]]) {
+; NOLIT-NEXT:    ret ptr poison
+;

>From 18392646fdae97be03ef1235f989c163a17f75a1 Mon Sep 17 00:00:00 2001
From: Hari Limaye <hari.limaye at arm.com>
Date: Thu, 24 Oct 2024 22:52:55 +0000
Subject: [PATCH 5/6] Clean up regression test

- Rename regression test to remove redundant function-specialization
  prefix
- Correct `binop` -> `op` and remove superfluous parameter
---
 ...rack-ptr-return.ll => track-ptr-return.ll} | 58 +++++++++----------
 1 file changed, 29 insertions(+), 29 deletions(-)
 rename llvm/test/Transforms/FunctionSpecialization/{function-specialization-track-ptr-return.ll => track-ptr-return.ll} (65%)

diff --git a/llvm/test/Transforms/FunctionSpecialization/function-specialization-track-ptr-return.ll b/llvm/test/Transforms/FunctionSpecialization/track-ptr-return.ll
similarity index 65%
rename from llvm/test/Transforms/FunctionSpecialization/function-specialization-track-ptr-return.ll
rename to llvm/test/Transforms/FunctionSpecialization/track-ptr-return.ll
index 4ee5bbeb5d36e3..f4ba0e72a1b439 100644
--- a/llvm/test/Transforms/FunctionSpecialization/function-specialization-track-ptr-return.ll
+++ b/llvm/test/Transforms/FunctionSpecialization/track-ptr-return.ll
@@ -11,24 +11,24 @@
 
 define i64 @main() {
 entry:
-  %binop1 = call ptr @select_binop(ptr @global_true)
-  %binop2 = call ptr @select_binop(ptr @global_false)
+  %op1 = call ptr @select_op(ptr @global_true)
+  %op2 = call ptr @select_op(ptr @global_false)
 
-  %c1 = call i64 @compute(ptr %binop1)
-  %c2 = call i64 @compute(ptr %binop2)
+  %c1 = call i64 @compute(ptr %op1)
+  %c2 = call i64 @compute(ptr %op2)
   %add = add i64 %c1, %c2
   ret i64 %add
 }
 
-define ptr @select_binop(ptr %flag) {
+define ptr @select_op(ptr %flag) {
   %flag.val = load i1, ptr %flag
-  %binop = select i1 %flag.val, ptr @plus, ptr @minus
-  ret ptr %binop
+  %op = select i1 %flag.val, ptr @plus, ptr @minus
+  ret ptr %op
 }
 
-define internal i64 @compute(ptr %binop) {
+define internal i64 @compute(ptr %op) {
 entry:
-  %res = call i64 %binop(i64 1, i64 1)
+  %res = call i64 %op(i64 1)
   ret i64 %res
 }
 
@@ -45,19 +45,19 @@ entry:
 }
 ; CHECK-LABEL: define i64 @main() {
 ; CHECK-NEXT:  [[ENTRY:.*:]]
-; CHECK-NEXT:    [[BINOP1:%.*]] = call ptr @select_binop.specialized.1(ptr @global_true)
-; CHECK-NEXT:    [[BINOP2:%.*]] = call ptr @select_binop.specialized.2(ptr @global_false)
+; CHECK-NEXT:    [[OP1:%.*]] = call ptr @select_op.specialized.1(ptr @global_true)
+; CHECK-NEXT:    [[OP2:%.*]] = call ptr @select_op.specialized.2(ptr @global_false)
 ; CHECK-NEXT:    [[C1:%.*]] = call i64 @compute.specialized.3(ptr @plus)
 ; CHECK-NEXT:    [[C2:%.*]] = call i64 @compute.specialized.4(ptr @minus)
 ; CHECK-NEXT:    [[ADD:%.*]] = add i64 [[C1]], [[C2]]
 ; CHECK-NEXT:    ret i64 [[ADD]]
 ;
 ;
-; CHECK-LABEL: define ptr @select_binop(
+; CHECK-LABEL: define ptr @select_op(
 ; CHECK-SAME: ptr [[FLAG:%.*]]) {
 ; CHECK-NEXT:    [[FLAG_VAL:%.*]] = load i1, ptr [[FLAG]], align 1
-; CHECK-NEXT:    [[BINOP:%.*]] = select i1 [[FLAG_VAL]], ptr @plus, ptr @minus
-; CHECK-NEXT:    ret ptr [[BINOP]]
+; CHECK-NEXT:    [[OP:%.*]] = select i1 [[FLAG_VAL]], ptr @plus, ptr @minus
+; CHECK-NEXT:    ret ptr [[OP]]
 ;
 ;
 ; CHECK-LABEL: define internal i64 @plus(
@@ -74,51 +74,51 @@ entry:
 ; CHECK-NEXT:    ret i64 [[DIFF]]
 ;
 ;
-; CHECK-LABEL: define internal ptr @select_binop.specialized.1(
+; CHECK-LABEL: define internal ptr @select_op.specialized.1(
 ; CHECK-SAME: ptr [[FLAG:%.*]]) {
 ; CHECK-NEXT:    ret ptr poison
 ;
 ;
-; CHECK-LABEL: define internal ptr @select_binop.specialized.2(
+; CHECK-LABEL: define internal ptr @select_op.specialized.2(
 ; CHECK-SAME: ptr [[FLAG:%.*]]) {
 ; CHECK-NEXT:    ret ptr poison
 ;
 ;
 ; CHECK-LABEL: define internal i64 @compute.specialized.3(
-; CHECK-SAME: ptr [[BINOP:%.*]]) {
+; CHECK-SAME: ptr [[OP:%.*]]) {
 ; CHECK-NEXT:  [[ENTRY:.*:]]
-; CHECK-NEXT:    [[RES:%.*]] = call i64 @plus(i64 1, i64 1)
+; CHECK-NEXT:    [[RES:%.*]] = call i64 @plus(i64 1)
 ; CHECK-NEXT:    ret i64 [[RES]]
 ;
 ;
 ; CHECK-LABEL: define internal i64 @compute.specialized.4(
-; CHECK-SAME: ptr [[BINOP:%.*]]) {
+; CHECK-SAME: ptr [[OP:%.*]]) {
 ; CHECK-NEXT:  [[ENTRY:.*:]]
-; CHECK-NEXT:    [[RES:%.*]] = call i64 @minus(i64 1, i64 1)
+; CHECK-NEXT:    [[RES:%.*]] = call i64 @minus(i64 1)
 ; CHECK-NEXT:    ret i64 [[RES]]
 ;
 ;
 ; NOLIT-LABEL: define i64 @main() {
 ; NOLIT-NEXT:  [[ENTRY:.*:]]
-; NOLIT-NEXT:    [[BINOP1:%.*]] = call ptr @select_binop.specialized.1(ptr @global_true)
-; NOLIT-NEXT:    [[BINOP2:%.*]] = call ptr @select_binop.specialized.2(ptr @global_false)
+; NOLIT-NEXT:    [[OP1:%.*]] = call ptr @select_op.specialized.1(ptr @global_true)
+; NOLIT-NEXT:    [[OP2:%.*]] = call ptr @select_op.specialized.2(ptr @global_false)
 ; NOLIT-NEXT:    [[C1:%.*]] = call i64 @compute(ptr @plus)
 ; NOLIT-NEXT:    [[C2:%.*]] = call i64 @compute(ptr @minus)
 ; NOLIT-NEXT:    [[ADD:%.*]] = add i64 [[C1]], [[C2]]
 ; NOLIT-NEXT:    ret i64 [[ADD]]
 ;
 ;
-; NOLIT-LABEL: define ptr @select_binop(
+; NOLIT-LABEL: define ptr @select_op(
 ; NOLIT-SAME: ptr [[FLAG:%.*]]) {
 ; NOLIT-NEXT:    [[FLAG_VAL:%.*]] = load i1, ptr [[FLAG]], align 1
-; NOLIT-NEXT:    [[BINOP:%.*]] = select i1 [[FLAG_VAL]], ptr @plus, ptr @minus
-; NOLIT-NEXT:    ret ptr [[BINOP]]
+; NOLIT-NEXT:    [[OP:%.*]] = select i1 [[FLAG_VAL]], ptr @plus, ptr @minus
+; NOLIT-NEXT:    ret ptr [[OP]]
 ;
 ;
 ; NOLIT-LABEL: define internal i64 @compute(
-; NOLIT-SAME: ptr [[BINOP:%.*]]) {
+; NOLIT-SAME: ptr [[OP:%.*]]) {
 ; NOLIT-NEXT:  [[ENTRY:.*:]]
-; NOLIT-NEXT:    [[RES:%.*]] = call i64 [[BINOP]](i64 1, i64 1)
+; NOLIT-NEXT:    [[RES:%.*]] = call i64 [[OP]](i64 1)
 ; NOLIT-NEXT:    ret i64 [[RES]]
 ;
 ;
@@ -136,12 +136,12 @@ entry:
 ; NOLIT-NEXT:    ret i64 [[DIFF]]
 ;
 ;
-; NOLIT-LABEL: define internal ptr @select_binop.specialized.1(
+; NOLIT-LABEL: define internal ptr @select_op.specialized.1(
 ; NOLIT-SAME: ptr [[FLAG:%.*]]) {
 ; NOLIT-NEXT:    ret ptr poison
 ;
 ;
-; NOLIT-LABEL: define internal ptr @select_binop.specialized.2(
+; NOLIT-LABEL: define internal ptr @select_op.specialized.2(
 ; NOLIT-SAME: ptr [[FLAG:%.*]]) {
 ; NOLIT-NEXT:    ret ptr poison
 ;

>From 23bc1e00093644affe5aa17c063f3902fbba699f Mon Sep 17 00:00:00 2001
From: Hari Limaye <hari.limaye at arm.com>
Date: Mon, 28 Oct 2024 10:01:48 +0000
Subject: [PATCH 6/6] Add Limitations comment to header

---
 llvm/include/llvm/Transforms/IPO/FunctionSpecialization.h | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/llvm/include/llvm/Transforms/IPO/FunctionSpecialization.h b/llvm/include/llvm/Transforms/IPO/FunctionSpecialization.h
index 5920dde9d77dfd..8c3e6a9a4d522f 100644
--- a/llvm/include/llvm/Transforms/IPO/FunctionSpecialization.h
+++ b/llvm/include/llvm/Transforms/IPO/FunctionSpecialization.h
@@ -64,6 +64,13 @@
 // - Perhaps a post-inlining function specialization pass could be more
 //   aggressive on literal constants.
 //
+// Limitations:
+// ------
+// - We are unable to consider specializations of functions called from indirect
+//   callsites whose pointer operand has a lattice value that is known to be
+//   constant, either from IPSCCP or previous iterations of FuncSpec. This is
+//   because SCCP has not yet replaced their uses.
+//
 // References:
 // -----------
 // 2021 LLVM Dev Mtg “Introducing function specialisation, and can we enable



More information about the llvm-commits mailing list