[llvm] [FuncSpec] Handle ssa_copy intrinsic calls in InstCostVisitor (PR #114247)

Hari Limaye via llvm-commits llvm-commits at lists.llvm.org
Fri Nov 1 09:13:07 PDT 2024


https://github.com/hazzlim updated https://github.com/llvm/llvm-project/pull/114247

>From acfdc09c9b878aef2c2f5329a2bf209eba2ba5f6 Mon Sep 17 00:00:00 2001
From: Hari Limaye <hari.limaye at arm.com>
Date: Thu, 31 Oct 2024 09:39:26 +0000
Subject: [PATCH 1/3] [FuncSpec] Precommit test for looking through
 llvm.ssa.copy

---
 .../FunctionSpecialization/ssa-copy.ll        | 67 +++++++++++++++++++
 1 file changed, 67 insertions(+)
 create mode 100644 llvm/test/Transforms/FunctionSpecialization/ssa-copy.ll

diff --git a/llvm/test/Transforms/FunctionSpecialization/ssa-copy.ll b/llvm/test/Transforms/FunctionSpecialization/ssa-copy.ll
new file mode 100644
index 00000000000000..06361ced5c3b5b
--- /dev/null
+++ b/llvm/test/Transforms/FunctionSpecialization/ssa-copy.ll
@@ -0,0 +1,67 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --include-generated-funcs --version 5
+; RUN: opt -passes="ipsccp<func-spec>" -funcspec-min-function-size=1       \
+; RUN:                                 -funcspec-for-literal-constant=true \
+; RUN:                                 -funcspec-min-codesize-savings=50   \
+; RUN:                                 -funcspec-min-latency-savings=0     \
+; RUN:                                 -S < %s | FileCheck %s
+
+; Verify that we are able to estimate the codesize savings by looking through
+; calls to ssa_copy intrinsics, which are inserted by PredicateInfo when IPSCCP
+; is run prior to FunctionSpecialization.
+; FIXME: We should be able to specialize this, but we currently do not handle
+; FIXME: llvm.ssa.copy calls in InstCostVisitor.
+define i32 @main() {
+entry:
+  %res = call i32 @test_ssa_copy(i32 0)
+  ret i32 %res
+}
+
+define i32 @test_ssa_copy(i32 %x) {
+entry:
+  br label %block1
+
+block1:
+  %cmp = icmp eq i32 %x, 0
+  br i1 %cmp, label %block2, label %exit1
+
+block2:
+  br i1 %cmp, label %block3, label %exit2
+
+block3:
+  br i1 %cmp, label %exit4, label %exit3
+
+exit1:
+  ret i32 %x
+
+exit2:
+  ret i32 %x
+
+exit3:
+  ret i32 %x
+
+exit4:
+  ret i32 999
+}
+
+; CHECK-LABEL: define range(i32 1, 0) i32 @main() {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    [[RES:%.*]] = call i32 @test_ssa_copy(i32 0)
+; CHECK-NEXT:    ret i32 [[RES]]
+;
+;
+; CHECK-LABEL: define range(i32 1, 0) i32 @test_ssa_copy(
+; CHECK-SAME: i32 [[X:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    br label %[[BLOCK1:.*]]
+; CHECK:       [[BLOCK1]]:
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[X]], 0
+; CHECK-NEXT:    br i1 [[CMP]], label %[[BLOCK2:.*]], label %[[EXIT1:.*]]
+; CHECK:       [[BLOCK2]]:
+; CHECK-NEXT:    br label %[[BLOCK3:.*]]
+; CHECK:       [[BLOCK3]]:
+; CHECK-NEXT:    br label %[[EXIT4:.*]]
+; CHECK:       [[EXIT1]]:
+; CHECK-NEXT:    ret i32 [[X]]
+; CHECK:       [[EXIT4]]:
+; CHECK-NEXT:    ret i32 999
+;

>From 301880c5335ef02d34d0554888625c4244e54e66 Mon Sep 17 00:00:00 2001
From: Hari Limaye <hari.limaye at arm.com>
Date: Tue, 29 Oct 2024 12:39:05 +0000
Subject: [PATCH 2/3] [FuncSpec] Handle ssa_copy intrinsic calls in
 InstCostVisitor

Look through ssa_copy intrinsic calls when computing codesize bonus for
a specialization.

Also remove redundant logic to skip computing codesize bonus for
ssa_copy intrinsics, now these are considered zero-cost by TTI
(in PR#75294).
---
 .../Transforms/IPO/FunctionSpecialization.cpp | 12 ++++++----
 .../FunctionSpecialization/ssa-copy.ll        | 22 ++++++++++++++-----
 2 files changed, 25 insertions(+), 9 deletions(-)

diff --git a/llvm/lib/Transforms/IPO/FunctionSpecialization.cpp b/llvm/lib/Transforms/IPO/FunctionSpecialization.cpp
index 919d3143a13f7e..1efec22624dd4f 100644
--- a/llvm/lib/Transforms/IPO/FunctionSpecialization.cpp
+++ b/llvm/lib/Transforms/IPO/FunctionSpecialization.cpp
@@ -120,10 +120,6 @@ Cost InstCostVisitor::estimateBasicBlocks(
       continue;
 
     for (Instruction &I : *BB) {
-      // Disregard SSA copies.
-      if (auto *II = dyn_cast<IntrinsicInst>(&I))
-        if (II->getIntrinsicID() == Intrinsic::ssa_copy)
-          continue;
       // If it's a known constant we have already accounted for it.
       if (KnownConstants.contains(&I))
         continue;
@@ -402,6 +398,14 @@ Constant *InstCostVisitor::visitFreezeInst(FreezeInst &I) {
 }
 
 Constant *InstCostVisitor::visitCallBase(CallBase &I) {
+  assert(LastVisited != KnownConstants.end() && "Invalid iterator!");
+
+  // Look through calls to ssa_copy intrinsics.
+  if (auto *II = dyn_cast<IntrinsicInst>(&I);
+      II && II->getIntrinsicID() == Intrinsic::ssa_copy) {
+    return LastVisited->second;
+  }
+
   Function *F = I.getCalledFunction();
   if (!F || !canConstantFoldCallTo(&I, F))
     return nullptr;
diff --git a/llvm/test/Transforms/FunctionSpecialization/ssa-copy.ll b/llvm/test/Transforms/FunctionSpecialization/ssa-copy.ll
index 06361ced5c3b5b..a13a2fe3190842 100644
--- a/llvm/test/Transforms/FunctionSpecialization/ssa-copy.ll
+++ b/llvm/test/Transforms/FunctionSpecialization/ssa-copy.ll
@@ -8,8 +8,6 @@
 ; Verify that we are able to estimate the codesize savings by looking through
 ; calls to ssa_copy intrinsics, which are inserted by PredicateInfo when IPSCCP
 ; is run prior to FunctionSpecialization.
-; FIXME: We should be able to specialize this, but we currently do not handle
-; FIXME: llvm.ssa.copy calls in InstCostVisitor.
 define i32 @main() {
 entry:
   %res = call i32 @test_ssa_copy(i32 0)
@@ -43,10 +41,10 @@ exit4:
   ret i32 999
 }
 
-; CHECK-LABEL: define range(i32 1, 0) i32 @main() {
+; CHECK-LABEL: define i32 @main() {
 ; CHECK-NEXT:  [[ENTRY:.*:]]
-; CHECK-NEXT:    [[RES:%.*]] = call i32 @test_ssa_copy(i32 0)
-; CHECK-NEXT:    ret i32 [[RES]]
+; CHECK-NEXT:    [[RES:%.*]] = call i32 @test_ssa_copy.specialized.1(i32 0)
+; CHECK-NEXT:    ret i32 999
 ;
 ;
 ; CHECK-LABEL: define range(i32 1, 0) i32 @test_ssa_copy(
@@ -65,3 +63,17 @@ exit4:
 ; CHECK:       [[EXIT4]]:
 ; CHECK-NEXT:    ret i32 999
 ;
+;
+; CHECK-LABEL: define internal i32 @test_ssa_copy.specialized.1(
+; CHECK-SAME: i32 [[X:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    br label %[[BLOCK1:.*]]
+; CHECK:       [[BLOCK1]]:
+; CHECK-NEXT:    br label %[[BLOCK2:.*]]
+; CHECK:       [[BLOCK2]]:
+; CHECK-NEXT:    br label %[[BLOCK3:.*]]
+; CHECK:       [[BLOCK3]]:
+; CHECK-NEXT:    br label %[[EXIT4:.*]]
+; CHECK:       [[EXIT4]]:
+; CHECK-NEXT:    ret i32 poison
+;

>From afd2da5386386de28a836b625ed3221036f0d26b Mon Sep 17 00:00:00 2001
From: Hari Limaye <hari.limaye at arm.com>
Date: Fri, 1 Nov 2024 16:10:03 +0000
Subject: [PATCH 3/3] Add RUN line for checking predicateinfo to regression
 test

---
 .../FunctionSpecialization/ssa-copy.ll        | 100 ++++++++++++------
 1 file changed, 67 insertions(+), 33 deletions(-)

diff --git a/llvm/test/Transforms/FunctionSpecialization/ssa-copy.ll b/llvm/test/Transforms/FunctionSpecialization/ssa-copy.ll
index a13a2fe3190842..aaafe294480b24 100644
--- a/llvm/test/Transforms/FunctionSpecialization/ssa-copy.ll
+++ b/llvm/test/Transforms/FunctionSpecialization/ssa-copy.ll
@@ -1,9 +1,10 @@
 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --include-generated-funcs --version 5
+; RUN: opt -passes=print-predicateinfo -disable-output -S < %s 2>&1 | FileCheck %s --check-prefix=PREDINF
 ; RUN: opt -passes="ipsccp<func-spec>" -funcspec-min-function-size=1       \
 ; RUN:                                 -funcspec-for-literal-constant=true \
 ; RUN:                                 -funcspec-min-codesize-savings=50   \
 ; RUN:                                 -funcspec-min-latency-savings=0     \
-; RUN:                                 -S < %s | FileCheck %s
+; RUN:                                 -S < %s | FileCheck %s --check-prefix=FUNCSPEC
 
 ; Verify that we are able to estimate the codesize savings by looking through
 ; calls to ssa_copy intrinsics, which are inserted by PredicateInfo when IPSCCP
@@ -40,40 +41,73 @@ exit3:
 exit4:
   ret i32 999
 }
-
-; CHECK-LABEL: define i32 @main() {
-; CHECK-NEXT:  [[ENTRY:.*:]]
-; CHECK-NEXT:    [[RES:%.*]] = call i32 @test_ssa_copy.specialized.1(i32 0)
-; CHECK-NEXT:    ret i32 999
+; PREDINF-LABEL: define i32 @main() {
+; PREDINF-NEXT:  [[ENTRY:.*:]]
+; PREDINF-NEXT:    [[RES:%.*]] = call i32 @test_ssa_copy(i32 0)
+; PREDINF-NEXT:    ret i32 [[RES]]
+;
+;
+; PREDINF-LABEL: define i32 @test_ssa_copy(
+; PREDINF-SAME: i32 [[X:%.*]]) {
+; PREDINF-NEXT:  [[ENTRY:.*:]]
+; PREDINF-NEXT:    br label %[[BLOCK1:.*]]
+; PREDINF:       [[BLOCK1]]:
+; PREDINF-NEXT:    [[CMP:%.*]] = icmp eq i32 [[X]], 0
+; PREDINF:         [[CMP_0:%.*]] = call i1 @llvm.ssa.copy.i1(i1 [[CMP]])
+; PREDINF:         [[X_0:%.*]] = call i32 @llvm.ssa.copy.i32(i32 [[X]])
+; PREDINF:         [[X_4:%.*]] = call i32 @llvm.ssa.copy.i32(i32 [[X]])
+; PREDINF-NEXT:    br i1 [[CMP]], label %[[BLOCK2:.*]], label %[[EXIT1:.*]]
+; PREDINF:       [[BLOCK2]]:
+; PREDINF:         [[CMP_0_1:%.*]] = call i1 @llvm.ssa.copy.i1(i1 [[CMP_0]])
+; PREDINF:         [[X_0_1:%.*]] = call i32 @llvm.ssa.copy.i32(i32 [[X_0]])
+; PREDINF:         [[X_0_3:%.*]] = call i32 @llvm.ssa.copy.i32(i32 [[X_0]])
+; PREDINF-NEXT:    br i1 [[CMP_0]], label %[[BLOCK3:.*]], label %[[EXIT2:.*]]
+; PREDINF:       [[BLOCK3]]:
+; PREDINF:         [[X_0_1_2:%.*]] = call i32 @llvm.ssa.copy.i32(i32 [[X_0_1]])
+; PREDINF-NEXT:    br i1 [[CMP_0_1]], label %[[EXIT4:.*]], label %[[EXIT3:.*]]
+; PREDINF:       [[EXIT1]]:
+; PREDINF-NEXT:    ret i32 [[X_4]]
+; PREDINF:       [[EXIT2]]:
+; PREDINF-NEXT:    ret i32 [[X_0_3]]
+; PREDINF:       [[EXIT3]]:
+; PREDINF-NEXT:    ret i32 [[X_0_1_2]]
+; PREDINF:       [[EXIT4]]:
+; PREDINF-NEXT:    ret i32 999
+;
+;
+; FUNCSPEC-LABEL: define i32 @main() {
+; FUNCSPEC-NEXT:  [[ENTRY:.*:]]
+; FUNCSPEC-NEXT:    [[RES:%.*]] = call i32 @test_ssa_copy.specialized.1(i32 0)
+; FUNCSPEC-NEXT:    ret i32 999
 ;
 ;
-; CHECK-LABEL: define range(i32 1, 0) i32 @test_ssa_copy(
-; CHECK-SAME: i32 [[X:%.*]]) {
-; CHECK-NEXT:  [[ENTRY:.*:]]
-; CHECK-NEXT:    br label %[[BLOCK1:.*]]
-; CHECK:       [[BLOCK1]]:
-; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[X]], 0
-; CHECK-NEXT:    br i1 [[CMP]], label %[[BLOCK2:.*]], label %[[EXIT1:.*]]
-; CHECK:       [[BLOCK2]]:
-; CHECK-NEXT:    br label %[[BLOCK3:.*]]
-; CHECK:       [[BLOCK3]]:
-; CHECK-NEXT:    br label %[[EXIT4:.*]]
-; CHECK:       [[EXIT1]]:
-; CHECK-NEXT:    ret i32 [[X]]
-; CHECK:       [[EXIT4]]:
-; CHECK-NEXT:    ret i32 999
+; FUNCSPEC-LABEL: define range(i32 1, 0) i32 @test_ssa_copy(
+; FUNCSPEC-SAME: i32 [[X:%.*]]) {
+; FUNCSPEC-NEXT:  [[ENTRY:.*:]]
+; FUNCSPEC-NEXT:    br label %[[BLOCK1:.*]]
+; FUNCSPEC:       [[BLOCK1]]:
+; FUNCSPEC-NEXT:    [[CMP:%.*]] = icmp eq i32 [[X]], 0
+; FUNCSPEC-NEXT:    br i1 [[CMP]], label %[[BLOCK2:.*]], label %[[EXIT1:.*]]
+; FUNCSPEC:       [[BLOCK2]]:
+; FUNCSPEC-NEXT:    br label %[[BLOCK3:.*]]
+; FUNCSPEC:       [[BLOCK3]]:
+; FUNCSPEC-NEXT:    br label %[[EXIT4:.*]]
+; FUNCSPEC:       [[EXIT1]]:
+; FUNCSPEC-NEXT:    ret i32 [[X]]
+; FUNCSPEC:       [[EXIT4]]:
+; FUNCSPEC-NEXT:    ret i32 999
 ;
 ;
-; CHECK-LABEL: define internal i32 @test_ssa_copy.specialized.1(
-; CHECK-SAME: i32 [[X:%.*]]) {
-; CHECK-NEXT:  [[ENTRY:.*:]]
-; CHECK-NEXT:    br label %[[BLOCK1:.*]]
-; CHECK:       [[BLOCK1]]:
-; CHECK-NEXT:    br label %[[BLOCK2:.*]]
-; CHECK:       [[BLOCK2]]:
-; CHECK-NEXT:    br label %[[BLOCK3:.*]]
-; CHECK:       [[BLOCK3]]:
-; CHECK-NEXT:    br label %[[EXIT4:.*]]
-; CHECK:       [[EXIT4]]:
-; CHECK-NEXT:    ret i32 poison
+; FUNCSPEC-LABEL: define internal i32 @test_ssa_copy.specialized.1(
+; FUNCSPEC-SAME: i32 [[X:%.*]]) {
+; FUNCSPEC-NEXT:  [[ENTRY:.*:]]
+; FUNCSPEC-NEXT:    br label %[[BLOCK1:.*]]
+; FUNCSPEC:       [[BLOCK1]]:
+; FUNCSPEC-NEXT:    br label %[[BLOCK2:.*]]
+; FUNCSPEC:       [[BLOCK2]]:
+; FUNCSPEC-NEXT:    br label %[[BLOCK3:.*]]
+; FUNCSPEC:       [[BLOCK3]]:
+; FUNCSPEC-NEXT:    br label %[[EXIT4:.*]]
+; FUNCSPEC:       [[EXIT4]]:
+; FUNCSPEC-NEXT:    ret i32 poison
 ;



More information about the llvm-commits mailing list