[llvm] [FuncSpec] Query SCCPSolver in more places (PR #114964)

Hari Limaye via llvm-commits llvm-commits at lists.llvm.org
Tue Nov 5 02:02:47 PST 2024


https://github.com/hazzlim created https://github.com/llvm/llvm-project/pull/114964

When traversing the use-def chain of an Argument in a candidate specialization, also query the SCCPSolver to see if a Value is constant. This allows us to better estimate the codesize savings of a candidate in the presence of instructions that are a user of the argument we are estimating savings for which also use arguments that have been found constant by IPSCCP.

Similarly when estimating the dead basic blocks from branch and switch instructions which become constant, also query the SCCPSolver to see if a predecessor is unreachable.

>From 1ba4b3d451be436130a826ea39e75fb7a3871b1e Mon Sep 17 00:00:00 2001
From: Hari Limaye <hari.limaye at arm.com>
Date: Thu, 31 Oct 2024 15:00:34 +0000
Subject: [PATCH] [FuncSpec] Query SCCPSolver in more places

When traversing the use-def chain of an Argument in a candidate
specialization, also query the SCCPSolver to see if a Value is constant.
This allows us to better estimate the codesize savings of a candidate
in the presence of instructions that are a user of the argument we are
estimating savings for which also use arguments that have been found
constant by IPSCCP.

Similarly when estimating the dead basic blocks from branch and switch
instructions which become constant, also query the SCCPSolver to see if
a predecessor is unreachable.
---
 .../Transforms/IPO/FunctionSpecialization.h   |  3 +-
 .../Transforms/IPO/FunctionSpecialization.cpp | 42 ++++++-----
 .../solver-constants.ll                       | 66 +++++++++++++++++
 .../solver-dead-blocks.ll                     | 74 +++++++++++++++++++
 4 files changed, 166 insertions(+), 19 deletions(-)
 create mode 100644 llvm/test/Transforms/FunctionSpecialization/solver-constants.ll
 create mode 100644 llvm/test/Transforms/FunctionSpecialization/solver-dead-blocks.ll

diff --git a/llvm/include/llvm/Transforms/IPO/FunctionSpecialization.h b/llvm/include/llvm/Transforms/IPO/FunctionSpecialization.h
index ff5af5988656a1..4c7fd9263036dd 100644
--- a/llvm/include/llvm/Transforms/IPO/FunctionSpecialization.h
+++ b/llvm/include/llvm/Transforms/IPO/FunctionSpecialization.h
@@ -190,7 +190,8 @@ class InstCostVisitor : public InstVisitor<InstCostVisitor, Constant *> {
   friend class InstVisitor<InstCostVisitor, Constant *>;
 
   static bool canEliminateSuccessor(BasicBlock *BB, BasicBlock *Succ,
-                                    DenseSet<BasicBlock *> &DeadBlocks);
+                                    DenseSet<BasicBlock *> &DeadBlocks,
+                                    const SCCPSolver &Solver);
 
   Cost getCodeSizeSavingsForUser(Instruction *User, Value *Use = nullptr,
                                  Constant *C = nullptr);
diff --git a/llvm/lib/Transforms/IPO/FunctionSpecialization.cpp b/llvm/lib/Transforms/IPO/FunctionSpecialization.cpp
index 17d8283255d245..ef3ebd71a866da 100644
--- a/llvm/lib/Transforms/IPO/FunctionSpecialization.cpp
+++ b/llvm/lib/Transforms/IPO/FunctionSpecialization.cpp
@@ -91,12 +91,14 @@ static cl::opt<bool> SpecializeLiteralConstant(
         "argument"));
 
 bool InstCostVisitor::canEliminateSuccessor(BasicBlock *BB, BasicBlock *Succ,
-                                         DenseSet<BasicBlock *> &DeadBlocks) {
+                                            DenseSet<BasicBlock *> &DeadBlocks,
+                                            const SCCPSolver &Solver) {
   unsigned I = 0;
-  return all_of(predecessors(Succ),
-    [&I, BB, Succ, &DeadBlocks] (BasicBlock *Pred) {
+  return all_of(predecessors(Succ), [&I, BB, Succ, &DeadBlocks,
+                                     &Solver](BasicBlock *Pred) {
     return I++ < MaxBlockPredecessors &&
-      (Pred == BB || Pred == Succ || DeadBlocks.contains(Pred));
+           (Pred == BB || Pred == Succ || DeadBlocks.contains(Pred) ||
+            !Solver.isBlockExecutable(Pred));
   });
 }
 
@@ -135,15 +137,18 @@ Cost InstCostVisitor::estimateBasicBlocks(
     // executable and only reachable from dead blocks.
     for (BasicBlock *SuccBB : successors(BB))
       if (isBlockExecutable(SuccBB) &&
-          canEliminateSuccessor(BB, SuccBB, DeadBlocks))
+          canEliminateSuccessor(BB, SuccBB, DeadBlocks, Solver))
         WorkList.push_back(SuccBB);
   }
   return CodeSize;
 }
 
-static Constant *findConstantFor(Value *V, ConstMap &KnownConstants) {
+static Constant *findConstantFor(Value *V, ConstMap &KnownConstants,
+                                 const SCCPSolver &Solver) {
   if (auto *C = dyn_cast<Constant>(V))
     return C;
+  if (auto *C = Solver.getConstantOrNull(V))
+    return C;
   return KnownConstants.lookup(V);
 }
 
@@ -266,7 +271,7 @@ Cost InstCostVisitor::estimateSwitchInst(SwitchInst &I) {
   for (const auto &Case : I.cases()) {
     BasicBlock *BB = Case.getCaseSuccessor();
     if (BB != Succ && isBlockExecutable(BB) &&
-        canEliminateSuccessor(I.getParent(), BB, DeadBlocks))
+        canEliminateSuccessor(I.getParent(), BB, DeadBlocks, Solver))
       WorkList.push_back(BB);
   }
 
@@ -284,7 +289,7 @@ Cost InstCostVisitor::estimateBranchInst(BranchInst &I) {
   // it is executable and has a unique predecessor.
   SmallVector<BasicBlock *> WorkList;
   if (isBlockExecutable(Succ) &&
-      canEliminateSuccessor(I.getParent(), Succ, DeadBlocks))
+      canEliminateSuccessor(I.getParent(), Succ, DeadBlocks, Solver))
     WorkList.push_back(Succ);
 
   return estimateBasicBlocks(WorkList);
@@ -312,10 +317,10 @@ bool InstCostVisitor::discoverTransitivelyIncomingValues(
 
       // Disregard self-references and dead incoming values.
       if (auto *Inst = dyn_cast<Instruction>(V))
-        if (Inst == PN || DeadBlocks.contains(PN->getIncomingBlock(I)))
+        if (Inst == PN || !isBlockExecutable(PN->getIncomingBlock(I)))
           continue;
 
-      if (Constant *C = findConstantFor(V, KnownConstants)) {
+      if (Constant *C = findConstantFor(V, KnownConstants, Solver)) {
         // Not all incoming values are the same constant. Bail immediately.
         if (C != Const)
           return false;
@@ -347,10 +352,10 @@ Constant *InstCostVisitor::visitPHINode(PHINode &I) {
 
     // Disregard self-references and dead incoming values.
     if (auto *Inst = dyn_cast<Instruction>(V))
-      if (Inst == &I || DeadBlocks.contains(I.getIncomingBlock(Idx)))
+      if (Inst == &I || !isBlockExecutable(I.getIncomingBlock(Idx)))
         continue;
 
-    if (Constant *C = findConstantFor(V, KnownConstants)) {
+    if (Constant *C = findConstantFor(V, KnownConstants, Solver)) {
       if (!Const)
         Const = C;
       // Not all incoming values are the same constant. Bail immediately.
@@ -415,7 +420,7 @@ Constant *InstCostVisitor::visitCallBase(CallBase &I) {
 
   for (unsigned Idx = 0, E = I.getNumOperands() - 1; Idx != E; ++Idx) {
     Value *V = I.getOperand(Idx);
-    Constant *C = findConstantFor(V, KnownConstants);
+    Constant *C = findConstantFor(V, KnownConstants, Solver);
     if (!C)
       return nullptr;
     Operands.push_back(C);
@@ -439,7 +444,7 @@ Constant *InstCostVisitor::visitGetElementPtrInst(GetElementPtrInst &I) {
 
   for (unsigned Idx = 0, E = I.getNumOperands(); Idx != E; ++Idx) {
     Value *V = I.getOperand(Idx);
-    Constant *C = findConstantFor(V, KnownConstants);
+    Constant *C = findConstantFor(V, KnownConstants, Solver);
     if (!C)
       return nullptr;
     Operands.push_back(C);
@@ -455,9 +460,10 @@ Constant *InstCostVisitor::visitSelectInst(SelectInst &I) {
   if (I.getCondition() == LastVisited->first) {
     Value *V = LastVisited->second->isZeroValue() ? I.getFalseValue()
                                                   : I.getTrueValue();
-    return findConstantFor(V, KnownConstants);
+    return findConstantFor(V, KnownConstants, Solver);
   }
-  if (Constant *Condition = findConstantFor(I.getCondition(), KnownConstants))
+  if (Constant *Condition =
+          findConstantFor(I.getCondition(), KnownConstants, Solver))
     if ((I.getTrueValue() == LastVisited->first && Condition->isOneValue()) ||
         (I.getFalseValue() == LastVisited->first && Condition->isZeroValue()))
       return LastVisited->second;
@@ -475,7 +481,7 @@ Constant *InstCostVisitor::visitCmpInst(CmpInst &I) {
   Constant *Const = LastVisited->second;
   bool ConstOnRHS = I.getOperand(1) == LastVisited->first;
   Value *V = ConstOnRHS ? I.getOperand(0) : I.getOperand(1);
-  Constant *Other = findConstantFor(V, KnownConstants);
+  Constant *Other = findConstantFor(V, KnownConstants, Solver);
 
   if (Other) {
     if (ConstOnRHS)
@@ -503,7 +509,7 @@ Constant *InstCostVisitor::visitBinaryOperator(BinaryOperator &I) {
 
   bool ConstOnRHS = I.getOperand(1) == LastVisited->first;
   Value *V = ConstOnRHS ? I.getOperand(0) : I.getOperand(1);
-  Constant *Other = findConstantFor(V, KnownConstants);
+  Constant *Other = findConstantFor(V, KnownConstants, Solver);
   Value *OtherVal = Other ? Other : V;
   Value *ConstVal = LastVisited->second;
 
diff --git a/llvm/test/Transforms/FunctionSpecialization/solver-constants.ll b/llvm/test/Transforms/FunctionSpecialization/solver-constants.ll
new file mode 100644
index 00000000000000..516fd5cb49bb76
--- /dev/null
+++ b/llvm/test/Transforms/FunctionSpecialization/solver-constants.ll
@@ -0,0 +1,66 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --include-generated-funcs --version 5
+; RUN: opt -passes="ipsccp<func-spec>" -funcspec-min-function-size=1       \
+; RUN:                                 -funcspec-for-literal-constant=true \
+; RUN:                                 -funcspec-min-codesize-savings=50   \
+; RUN:                                 -funcspec-min-latency-savings=0     \
+; RUN:                                 -S < %s | FileCheck %s
+
+; Verify that we are able to estimate the codesize savings arising from a branch
+; based on a binary operator, where one operand is already found constant by
+; IPSCCP.
+define i32 @main(i1 %flag) {
+  %notspec = call i32 @test(i1 %flag, i1 false)
+  %spec = call i32 @test(i1 false, i1 false)
+  %sum = add i32 %notspec, %spec
+  ret i32 %sum
+}
+
+define internal i32 @test(i1 %argflag, i1 %constflag) {
+entry:
+  %cond = or i1 %argflag, %constflag
+  br i1 %cond, label %if.then, label %if.end
+
+if.then:
+  call void @do_something()
+  call void @do_something()
+  call void @do_something()
+  call void @do_something()
+  br label %if.end
+
+if.end:
+  %res = phi i32 [ 0, %entry ], [ 1, %if.then]
+  ret i32 %res
+}
+
+declare void @do_something()
+; CHECK-LABEL: define range(i32 0, 2) i32 @main(
+; CHECK-SAME: i1 [[FLAG:%.*]]) {
+; CHECK-NEXT:    [[NOTSPEC:%.*]] = call i32 @test(i1 [[FLAG]], i1 false)
+; CHECK-NEXT:    [[SPEC:%.*]] = call i32 @test.specialized.1(i1 false, i1 false)
+; CHECK-NEXT:    [[SUM:%.*]] = add nuw nsw i32 [[NOTSPEC]], 0
+; CHECK-NEXT:    ret i32 [[SUM]]
+;
+;
+; CHECK-LABEL: define internal range(i32 0, 2) i32 @test(
+; CHECK-SAME: i1 [[ARGFLAG:%.*]], i1 [[CONSTFLAG:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*]]:
+; CHECK-NEXT:    [[COND:%.*]] = or i1 [[ARGFLAG]], false
+; CHECK-NEXT:    br i1 [[COND]], label %[[IF_THEN:.*]], label %[[IF_END:.*]]
+; CHECK:       [[IF_THEN]]:
+; CHECK-NEXT:    call void @do_something()
+; CHECK-NEXT:    call void @do_something()
+; CHECK-NEXT:    call void @do_something()
+; CHECK-NEXT:    call void @do_something()
+; CHECK-NEXT:    br label %[[IF_END]]
+; CHECK:       [[IF_END]]:
+; CHECK-NEXT:    [[RES:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ 1, %[[IF_THEN]] ]
+; CHECK-NEXT:    ret i32 [[RES]]
+;
+;
+; CHECK-LABEL: define internal i32 @test.specialized.1(
+; CHECK-SAME: i1 [[ARGFLAG:%.*]], i1 [[CONSTFLAG:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    br label %[[IF_END:.*]]
+; CHECK:       [[IF_END]]:
+; CHECK-NEXT:    ret i32 poison
+;
diff --git a/llvm/test/Transforms/FunctionSpecialization/solver-dead-blocks.ll b/llvm/test/Transforms/FunctionSpecialization/solver-dead-blocks.ll
new file mode 100644
index 00000000000000..05368e934ebb66
--- /dev/null
+++ b/llvm/test/Transforms/FunctionSpecialization/solver-dead-blocks.ll
@@ -0,0 +1,74 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --include-generated-funcs --version 5
+; RUN: opt -passes="ipsccp<func-spec>" -funcspec-min-function-size=1       \
+; RUN:                                 -funcspec-for-literal-constant=true \
+; RUN:                                 -funcspec-min-codesize-savings=50   \
+; RUN:                                 -funcspec-min-latency-savings=0     \
+; RUN:                                 -S < %s | FileCheck %s
+
+; Verify that we are able to estimate the codesize savings arising from a block
+; which is found dead, where the block has a predecessor that was found dead by
+; IPSCCP.
+define i32 @main(i1 %flag) {
+  %notspec = call i32 @test(i1 %flag, i1 true)
+  %spec = call i32 @test(i1 true, i1 true)
+  %sum = add i32 %notspec, %spec
+  ret i32 %sum
+}
+
+define internal i32 @test(i1 %argflag, i1 %constflag) {
+entry:
+  br i1 %argflag, label %block1, label %block3
+
+block1:
+  br i1 %constflag, label %end, label %block2
+
+block2:
+  br label %block3
+
+block3:
+  call void @do_something()
+  call void @do_something()
+  call void @do_something()
+  call void @do_something()
+  br label %end
+
+end:
+  %res = phi i32 [ 0, %block1 ], [ 1, %block3]
+  ret i32 %res
+}
+
+declare void @do_something()
+; CHECK-LABEL: define range(i32 0, 2) i32 @main(
+; CHECK-SAME: i1 [[FLAG:%.*]]) {
+; CHECK-NEXT:    [[NOTSPEC:%.*]] = call i32 @test(i1 [[FLAG]], i1 true)
+; CHECK-NEXT:    [[SPEC:%.*]] = call i32 @test.specialized.1(i1 true, i1 true)
+; CHECK-NEXT:    [[SUM:%.*]] = add nuw nsw i32 [[NOTSPEC]], 0
+; CHECK-NEXT:    ret i32 [[SUM]]
+;
+;
+; CHECK-LABEL: define internal range(i32 0, 2) i32 @test(
+; CHECK-SAME: i1 [[ARGFLAG:%.*]], i1 [[CONSTFLAG:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    br i1 [[ARGFLAG]], label %[[BLOCK1:.*]], label %[[BLOCK3:.*]]
+; CHECK:       [[BLOCK1]]:
+; CHECK-NEXT:    br label %[[END:.*]]
+; CHECK:       [[BLOCK3]]:
+; CHECK-NEXT:    call void @do_something()
+; CHECK-NEXT:    call void @do_something()
+; CHECK-NEXT:    call void @do_something()
+; CHECK-NEXT:    call void @do_something()
+; CHECK-NEXT:    br label %[[END]]
+; CHECK:       [[END]]:
+; CHECK-NEXT:    [[RES:%.*]] = phi i32 [ 0, %[[BLOCK1]] ], [ 1, %[[BLOCK3]] ]
+; CHECK-NEXT:    ret i32 [[RES]]
+;
+;
+; CHECK-LABEL: define internal i32 @test.specialized.1(
+; CHECK-SAME: i1 [[ARGFLAG:%.*]], i1 [[CONSTFLAG:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    br label %[[BLOCK1:.*]]
+; CHECK:       [[BLOCK1]]:
+; CHECK-NEXT:    br label %[[END:.*]]
+; CHECK:       [[END]]:
+; CHECK-NEXT:    ret i32 poison
+;



More information about the llvm-commits mailing list