[llvm] [FuncSpec] Handle ssa_copy intrinsic calls in InstCostVisitor (PR #114247)
Hari Limaye via llvm-commits
llvm-commits at lists.llvm.org
Fri Nov 1 09:13:07 PDT 2024
https://github.com/hazzlim updated https://github.com/llvm/llvm-project/pull/114247
>From acfdc09c9b878aef2c2f5329a2bf209eba2ba5f6 Mon Sep 17 00:00:00 2001
From: Hari Limaye <hari.limaye at arm.com>
Date: Thu, 31 Oct 2024 09:39:26 +0000
Subject: [PATCH 1/3] [FuncSpec] Precommit test for looking through
llvm.ssa.copy
---
.../FunctionSpecialization/ssa-copy.ll | 67 +++++++++++++++++++
1 file changed, 67 insertions(+)
create mode 100644 llvm/test/Transforms/FunctionSpecialization/ssa-copy.ll
diff --git a/llvm/test/Transforms/FunctionSpecialization/ssa-copy.ll b/llvm/test/Transforms/FunctionSpecialization/ssa-copy.ll
new file mode 100644
index 00000000000000..06361ced5c3b5b
--- /dev/null
+++ b/llvm/test/Transforms/FunctionSpecialization/ssa-copy.ll
@@ -0,0 +1,67 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --include-generated-funcs --version 5
+; RUN: opt -passes="ipsccp<func-spec>" -funcspec-min-function-size=1 \
+; RUN: -funcspec-for-literal-constant=true \
+; RUN: -funcspec-min-codesize-savings=50 \
+; RUN: -funcspec-min-latency-savings=0 \
+; RUN: -S < %s | FileCheck %s
+
+; Verify that we are able to estimate the codesize savings by looking through
+; calls to ssa_copy intrinsics, which are inserted by PredicateInfo when IPSCCP
+; is run prior to FunctionSpecialization.
+; FIXME: We should be able to specialize this, but we currently do not handle
+; FIXME: llvm.ssa.copy calls in InstCostVisitor.
+define i32 @main() {
+entry:
+ %res = call i32 @test_ssa_copy(i32 0)
+ ret i32 %res
+}
+
+define i32 @test_ssa_copy(i32 %x) {
+entry:
+ br label %block1
+
+block1:
+ %cmp = icmp eq i32 %x, 0
+ br i1 %cmp, label %block2, label %exit1
+
+block2:
+ br i1 %cmp, label %block3, label %exit2
+
+block3:
+ br i1 %cmp, label %exit4, label %exit3
+
+exit1:
+ ret i32 %x
+
+exit2:
+ ret i32 %x
+
+exit3:
+ ret i32 %x
+
+exit4:
+ ret i32 999
+}
+
+; CHECK-LABEL: define range(i32 1, 0) i32 @main() {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: [[RES:%.*]] = call i32 @test_ssa_copy(i32 0)
+; CHECK-NEXT: ret i32 [[RES]]
+;
+;
+; CHECK-LABEL: define range(i32 1, 0) i32 @test_ssa_copy(
+; CHECK-SAME: i32 [[X:%.*]]) {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: br label %[[BLOCK1:.*]]
+; CHECK: [[BLOCK1]]:
+; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[X]], 0
+; CHECK-NEXT: br i1 [[CMP]], label %[[BLOCK2:.*]], label %[[EXIT1:.*]]
+; CHECK: [[BLOCK2]]:
+; CHECK-NEXT: br label %[[BLOCK3:.*]]
+; CHECK: [[BLOCK3]]:
+; CHECK-NEXT: br label %[[EXIT4:.*]]
+; CHECK: [[EXIT1]]:
+; CHECK-NEXT: ret i32 [[X]]
+; CHECK: [[EXIT4]]:
+; CHECK-NEXT: ret i32 999
+;
>From 301880c5335ef02d34d0554888625c4244e54e66 Mon Sep 17 00:00:00 2001
From: Hari Limaye <hari.limaye at arm.com>
Date: Tue, 29 Oct 2024 12:39:05 +0000
Subject: [PATCH 2/3] [FuncSpec] Handle ssa_copy intrinsic calls in
InstCostVisitor
Look through ssa_copy intrinsic calls when computing codesize bonus for
a specialization.
Also remove redundant logic to skip computing codesize bonus for
ssa_copy intrinsics, now these are considered zero-cost by TTI
(in PR#75294).
---
.../Transforms/IPO/FunctionSpecialization.cpp | 12 ++++++----
.../FunctionSpecialization/ssa-copy.ll | 22 ++++++++++++++-----
2 files changed, 25 insertions(+), 9 deletions(-)
diff --git a/llvm/lib/Transforms/IPO/FunctionSpecialization.cpp b/llvm/lib/Transforms/IPO/FunctionSpecialization.cpp
index 919d3143a13f7e..1efec22624dd4f 100644
--- a/llvm/lib/Transforms/IPO/FunctionSpecialization.cpp
+++ b/llvm/lib/Transforms/IPO/FunctionSpecialization.cpp
@@ -120,10 +120,6 @@ Cost InstCostVisitor::estimateBasicBlocks(
continue;
for (Instruction &I : *BB) {
- // Disregard SSA copies.
- if (auto *II = dyn_cast<IntrinsicInst>(&I))
- if (II->getIntrinsicID() == Intrinsic::ssa_copy)
- continue;
// If it's a known constant we have already accounted for it.
if (KnownConstants.contains(&I))
continue;
@@ -402,6 +398,14 @@ Constant *InstCostVisitor::visitFreezeInst(FreezeInst &I) {
}
Constant *InstCostVisitor::visitCallBase(CallBase &I) {
+ assert(LastVisited != KnownConstants.end() && "Invalid iterator!");
+
+ // Look through calls to ssa_copy intrinsics.
+ if (auto *II = dyn_cast<IntrinsicInst>(&I);
+ II && II->getIntrinsicID() == Intrinsic::ssa_copy) {
+ return LastVisited->second;
+ }
+
Function *F = I.getCalledFunction();
if (!F || !canConstantFoldCallTo(&I, F))
return nullptr;
diff --git a/llvm/test/Transforms/FunctionSpecialization/ssa-copy.ll b/llvm/test/Transforms/FunctionSpecialization/ssa-copy.ll
index 06361ced5c3b5b..a13a2fe3190842 100644
--- a/llvm/test/Transforms/FunctionSpecialization/ssa-copy.ll
+++ b/llvm/test/Transforms/FunctionSpecialization/ssa-copy.ll
@@ -8,8 +8,6 @@
; Verify that we are able to estimate the codesize savings by looking through
; calls to ssa_copy intrinsics, which are inserted by PredicateInfo when IPSCCP
; is run prior to FunctionSpecialization.
-; FIXME: We should be able to specialize this, but we currently do not handle
-; FIXME: llvm.ssa.copy calls in InstCostVisitor.
define i32 @main() {
entry:
%res = call i32 @test_ssa_copy(i32 0)
@@ -43,10 +41,10 @@ exit4:
ret i32 999
}
-; CHECK-LABEL: define range(i32 1, 0) i32 @main() {
+; CHECK-LABEL: define i32 @main() {
; CHECK-NEXT: [[ENTRY:.*:]]
-; CHECK-NEXT: [[RES:%.*]] = call i32 @test_ssa_copy(i32 0)
-; CHECK-NEXT: ret i32 [[RES]]
+; CHECK-NEXT: [[RES:%.*]] = call i32 @test_ssa_copy.specialized.1(i32 0)
+; CHECK-NEXT: ret i32 999
;
;
; CHECK-LABEL: define range(i32 1, 0) i32 @test_ssa_copy(
@@ -65,3 +63,17 @@ exit4:
; CHECK: [[EXIT4]]:
; CHECK-NEXT: ret i32 999
;
+;
+; CHECK-LABEL: define internal i32 @test_ssa_copy.specialized.1(
+; CHECK-SAME: i32 [[X:%.*]]) {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: br label %[[BLOCK1:.*]]
+; CHECK: [[BLOCK1]]:
+; CHECK-NEXT: br label %[[BLOCK2:.*]]
+; CHECK: [[BLOCK2]]:
+; CHECK-NEXT: br label %[[BLOCK3:.*]]
+; CHECK: [[BLOCK3]]:
+; CHECK-NEXT: br label %[[EXIT4:.*]]
+; CHECK: [[EXIT4]]:
+; CHECK-NEXT: ret i32 poison
+;
>From afd2da5386386de28a836b625ed3221036f0d26b Mon Sep 17 00:00:00 2001
From: Hari Limaye <hari.limaye at arm.com>
Date: Fri, 1 Nov 2024 16:10:03 +0000
Subject: [PATCH 3/3] Add RUN line for checking predicateinfo to regression
test
---
.../FunctionSpecialization/ssa-copy.ll | 100 ++++++++++++------
1 file changed, 67 insertions(+), 33 deletions(-)
diff --git a/llvm/test/Transforms/FunctionSpecialization/ssa-copy.ll b/llvm/test/Transforms/FunctionSpecialization/ssa-copy.ll
index a13a2fe3190842..aaafe294480b24 100644
--- a/llvm/test/Transforms/FunctionSpecialization/ssa-copy.ll
+++ b/llvm/test/Transforms/FunctionSpecialization/ssa-copy.ll
@@ -1,9 +1,10 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --include-generated-funcs --version 5
+; RUN: opt -passes=print-predicateinfo -disable-output -S < %s 2>&1 | FileCheck %s --check-prefix=PREDINF
; RUN: opt -passes="ipsccp<func-spec>" -funcspec-min-function-size=1 \
; RUN: -funcspec-for-literal-constant=true \
; RUN: -funcspec-min-codesize-savings=50 \
; RUN: -funcspec-min-latency-savings=0 \
-; RUN: -S < %s | FileCheck %s
+; RUN: -S < %s | FileCheck %s --check-prefix=FUNCSPEC
; Verify that we are able to estimate the codesize savings by looking through
; calls to ssa_copy intrinsics, which are inserted by PredicateInfo when IPSCCP
@@ -40,40 +41,73 @@ exit3:
exit4:
ret i32 999
}
-
-; CHECK-LABEL: define i32 @main() {
-; CHECK-NEXT: [[ENTRY:.*:]]
-; CHECK-NEXT: [[RES:%.*]] = call i32 @test_ssa_copy.specialized.1(i32 0)
-; CHECK-NEXT: ret i32 999
+; PREDINF-LABEL: define i32 @main() {
+; PREDINF-NEXT: [[ENTRY:.*:]]
+; PREDINF-NEXT: [[RES:%.*]] = call i32 @test_ssa_copy(i32 0)
+; PREDINF-NEXT: ret i32 [[RES]]
+;
+;
+; PREDINF-LABEL: define i32 @test_ssa_copy(
+; PREDINF-SAME: i32 [[X:%.*]]) {
+; PREDINF-NEXT: [[ENTRY:.*:]]
+; PREDINF-NEXT: br label %[[BLOCK1:.*]]
+; PREDINF: [[BLOCK1]]:
+; PREDINF-NEXT: [[CMP:%.*]] = icmp eq i32 [[X]], 0
+; PREDINF: [[CMP_0:%.*]] = call i1 @llvm.ssa.copy.i1(i1 [[CMP]])
+; PREDINF: [[X_0:%.*]] = call i32 @llvm.ssa.copy.i32(i32 [[X]])
+; PREDINF: [[X_4:%.*]] = call i32 @llvm.ssa.copy.i32(i32 [[X]])
+; PREDINF-NEXT: br i1 [[CMP]], label %[[BLOCK2:.*]], label %[[EXIT1:.*]]
+; PREDINF: [[BLOCK2]]:
+; PREDINF: [[CMP_0_1:%.*]] = call i1 @llvm.ssa.copy.i1(i1 [[CMP_0]])
+; PREDINF: [[X_0_1:%.*]] = call i32 @llvm.ssa.copy.i32(i32 [[X_0]])
+; PREDINF: [[X_0_3:%.*]] = call i32 @llvm.ssa.copy.i32(i32 [[X_0]])
+; PREDINF-NEXT: br i1 [[CMP_0]], label %[[BLOCK3:.*]], label %[[EXIT2:.*]]
+; PREDINF: [[BLOCK3]]:
+; PREDINF: [[X_0_1_2:%.*]] = call i32 @llvm.ssa.copy.i32(i32 [[X_0_1]])
+; PREDINF-NEXT: br i1 [[CMP_0_1]], label %[[EXIT4:.*]], label %[[EXIT3:.*]]
+; PREDINF: [[EXIT1]]:
+; PREDINF-NEXT: ret i32 [[X_4]]
+; PREDINF: [[EXIT2]]:
+; PREDINF-NEXT: ret i32 [[X_0_3]]
+; PREDINF: [[EXIT3]]:
+; PREDINF-NEXT: ret i32 [[X_0_1_2]]
+; PREDINF: [[EXIT4]]:
+; PREDINF-NEXT: ret i32 999
+;
+;
+; FUNCSPEC-LABEL: define i32 @main() {
+; FUNCSPEC-NEXT: [[ENTRY:.*:]]
+; FUNCSPEC-NEXT: [[RES:%.*]] = call i32 @test_ssa_copy.specialized.1(i32 0)
+; FUNCSPEC-NEXT: ret i32 999
;
;
-; CHECK-LABEL: define range(i32 1, 0) i32 @test_ssa_copy(
-; CHECK-SAME: i32 [[X:%.*]]) {
-; CHECK-NEXT: [[ENTRY:.*:]]
-; CHECK-NEXT: br label %[[BLOCK1:.*]]
-; CHECK: [[BLOCK1]]:
-; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[X]], 0
-; CHECK-NEXT: br i1 [[CMP]], label %[[BLOCK2:.*]], label %[[EXIT1:.*]]
-; CHECK: [[BLOCK2]]:
-; CHECK-NEXT: br label %[[BLOCK3:.*]]
-; CHECK: [[BLOCK3]]:
-; CHECK-NEXT: br label %[[EXIT4:.*]]
-; CHECK: [[EXIT1]]:
-; CHECK-NEXT: ret i32 [[X]]
-; CHECK: [[EXIT4]]:
-; CHECK-NEXT: ret i32 999
+; FUNCSPEC-LABEL: define range(i32 1, 0) i32 @test_ssa_copy(
+; FUNCSPEC-SAME: i32 [[X:%.*]]) {
+; FUNCSPEC-NEXT: [[ENTRY:.*:]]
+; FUNCSPEC-NEXT: br label %[[BLOCK1:.*]]
+; FUNCSPEC: [[BLOCK1]]:
+; FUNCSPEC-NEXT: [[CMP:%.*]] = icmp eq i32 [[X]], 0
+; FUNCSPEC-NEXT: br i1 [[CMP]], label %[[BLOCK2:.*]], label %[[EXIT1:.*]]
+; FUNCSPEC: [[BLOCK2]]:
+; FUNCSPEC-NEXT: br label %[[BLOCK3:.*]]
+; FUNCSPEC: [[BLOCK3]]:
+; FUNCSPEC-NEXT: br label %[[EXIT4:.*]]
+; FUNCSPEC: [[EXIT1]]:
+; FUNCSPEC-NEXT: ret i32 [[X]]
+; FUNCSPEC: [[EXIT4]]:
+; FUNCSPEC-NEXT: ret i32 999
;
;
-; CHECK-LABEL: define internal i32 @test_ssa_copy.specialized.1(
-; CHECK-SAME: i32 [[X:%.*]]) {
-; CHECK-NEXT: [[ENTRY:.*:]]
-; CHECK-NEXT: br label %[[BLOCK1:.*]]
-; CHECK: [[BLOCK1]]:
-; CHECK-NEXT: br label %[[BLOCK2:.*]]
-; CHECK: [[BLOCK2]]:
-; CHECK-NEXT: br label %[[BLOCK3:.*]]
-; CHECK: [[BLOCK3]]:
-; CHECK-NEXT: br label %[[EXIT4:.*]]
-; CHECK: [[EXIT4]]:
-; CHECK-NEXT: ret i32 poison
+; FUNCSPEC-LABEL: define internal i32 @test_ssa_copy.specialized.1(
+; FUNCSPEC-SAME: i32 [[X:%.*]]) {
+; FUNCSPEC-NEXT: [[ENTRY:.*:]]
+; FUNCSPEC-NEXT: br label %[[BLOCK1:.*]]
+; FUNCSPEC: [[BLOCK1]]:
+; FUNCSPEC-NEXT: br label %[[BLOCK2:.*]]
+; FUNCSPEC: [[BLOCK2]]:
+; FUNCSPEC-NEXT: br label %[[BLOCK3:.*]]
+; FUNCSPEC: [[BLOCK3]]:
+; FUNCSPEC-NEXT: br label %[[EXIT4:.*]]
+; FUNCSPEC: [[EXIT4]]:
+; FUNCSPEC-NEXT: ret i32 poison
;
More information about the llvm-commits
mailing list