[llvm] [InlineCost] Simplify extractvalue across callsite (PR #145054)

Tobias Stadler via llvm-commits llvm-commits at lists.llvm.org
Mon Jun 23 07:33:55 PDT 2025


https://github.com/tobias-stadler updated https://github.com/llvm/llvm-project/pull/145054

>From c210cf3ee74162c63e2dd5a3db957c04bb58e57c Mon Sep 17 00:00:00 2001
From: Tobias Stadler <mail at stadler-tobias.de>
Date: Fri, 20 Jun 2025 18:16:47 +0100
Subject: [PATCH 1/3] [InlineCost] Allow simplifying to non-Constant values

Allow mapping callee Values to arbitrary (non-Constant) simplified
values. The simplified values can also originate from the caller. This
enables us to simplify instructions in the callee with instructions from
the caller.

The first use case for this is simplifying extractvalues (#145054).
---
 llvm/lib/Analysis/InlineCost.cpp | 76 +++++++++++++++++++++-----------
 1 file changed, 51 insertions(+), 25 deletions(-)

diff --git a/llvm/lib/Analysis/InlineCost.cpp b/llvm/lib/Analysis/InlineCost.cpp
index 7bd1f18004580..f0ecae758299e 100644
--- a/llvm/lib/Analysis/InlineCost.cpp
+++ b/llvm/lib/Analysis/InlineCost.cpp
@@ -391,7 +391,8 @@ class CallAnalyzer : public InstVisitor<CallAnalyzer, bool> {
   /// likely simplifications post-inlining. The most important aspect we track
   /// is CFG altering simplifications -- when we prove a basic block dead, that
   /// can cause dramatic shifts in the cost of inlining a function.
-  DenseMap<Value *, Constant *> SimplifiedValues;
+  /// Note: The simplified Value may be owned by the caller function.
+  DenseMap<Value *, Value *> SimplifiedValues;
 
   /// Keep track of the values which map back (through function arguments) to
   /// allocas on the caller stack which could be simplified through SROA.
@@ -432,7 +433,7 @@ class CallAnalyzer : public InstVisitor<CallAnalyzer, bool> {
   template <typename T> T *getDirectOrSimplifiedValue(Value *V) const {
     if (auto *Direct = dyn_cast<T>(V))
       return Direct;
-    return dyn_cast_if_present<T>(SimplifiedValues.lookup(V));
+    return getSimplifiedValue<T>(V);
   }
 
   // Custom simplification helper routines.
@@ -525,11 +526,33 @@ class CallAnalyzer : public InstVisitor<CallAnalyzer, bool> {
 
   InlineResult analyze();
 
-  std::optional<Constant *> getSimplifiedValue(Instruction *I) {
-    auto It = SimplifiedValues.find(I);
-    if (It != SimplifiedValues.end())
-      return It->second;
-    return std::nullopt;
+  // Lookup simplified Value. May return a value owned by the caller.
+  Value *getSimplifiedValueUnchecked(Value *V) const {
+    return SimplifiedValues.lookup(V);
+  }
+
+  // Lookup simplified Value, but return nullptr if the simplified value is
+  // owned by the caller.
+  template <typename T> T *getSimplifiedValue(Value *V) const {
+    Value *SimpleV = SimplifiedValues.lookup(V);
+    if (!SimpleV)
+      return nullptr;
+
+    // Skip checks if we know T is a global. This has a small, but measurable
+    // impact on compile-time.
+    if constexpr (std::is_base_of_v<Constant, T>)
+      return dyn_cast<T>(SimpleV);
+
+    // Make sure the simplified Value is owned by this function
+    if (auto *I = dyn_cast<Instruction>(SimpleV)) {
+      if (I->getFunction() != &F)
+        return nullptr;
+    } else if (auto *Arg = dyn_cast<Argument>(SimpleV)) {
+      if (Arg->getParent() != &F)
+        return nullptr;
+    } else if (!isa<Constant>(SimpleV))
+      return nullptr;
+    return dyn_cast<T>(SimpleV);
   }
 
   // Keep a bunch of stats about the cost savings found so we can print them
@@ -921,12 +944,11 @@ class InlineCostCallAnalyzer final : public CallAnalyzer {
         if (BranchInst *BI = dyn_cast<BranchInst>(&I)) {
           // Count a conditional branch as savings if it becomes unconditional.
           if (BI->isConditional() &&
-              isa_and_nonnull<ConstantInt>(
-                  SimplifiedValues.lookup(BI->getCondition()))) {
+              getSimplifiedValue<ConstantInt>(BI->getCondition())) {
             CurrentSavings += InstrCost;
           }
         } else if (SwitchInst *SI = dyn_cast<SwitchInst>(&I)) {
-          if (isa_and_present<ConstantInt>(SimplifiedValues.lookup(SI->getCondition())))
+          if (getSimplifiedValue<ConstantInt>(SI->getCondition()))
             CurrentSavings += InstrCost;
         } else if (Value *V = dyn_cast<Value>(&I)) {
           // Count an instruction as savings if we can fold it.
@@ -1423,10 +1445,17 @@ void InlineCostAnnotationWriter::emitInstructionAnnot(
     if (Record->hasThresholdChanged())
       OS << ", threshold delta = " << Record->getThresholdDelta();
   }
-  auto C = ICCA->getSimplifiedValue(const_cast<Instruction *>(I));
-  if (C) {
+  auto *V = ICCA->getSimplifiedValueUnchecked(const_cast<Instruction *>(I));
+  if (V) {
     OS << ", simplified to ";
-    (*C)->print(OS, true);
+    V->print(OS, true);
+    if (auto *VI = dyn_cast<Instruction>(V)) {
+      if (VI->getFunction() != I->getFunction())
+        OS << " (caller instruction)";
+    } else if (auto *VArg = dyn_cast<Argument>(V)) {
+      if (VArg->getParent() != I->getFunction())
+        OS << " (caller argument)";
+    }
   }
   OS << "\n";
 }
@@ -1483,7 +1512,7 @@ bool CallAnalyzer::isGEPFree(GetElementPtrInst &GEP) {
   SmallVector<Value *, 4> Operands;
   Operands.push_back(GEP.getOperand(0));
   for (const Use &Op : GEP.indices())
-    if (Constant *SimpleOp = SimplifiedValues.lookup(Op))
+    if (Constant *SimpleOp = getSimplifiedValue<Constant>(Op))
       Operands.push_back(SimpleOp);
     else
       Operands.push_back(Op);
@@ -1498,7 +1527,7 @@ bool CallAnalyzer::visitAlloca(AllocaInst &I) {
   // Check whether inlining will turn a dynamic alloca into a static
   // alloca and handle that case.
   if (I.isArrayAllocation()) {
-    Constant *Size = SimplifiedValues.lookup(I.getArraySize());
+    Constant *Size = getSimplifiedValue<Constant>(I.getArraySize());
     if (auto *AllocSize = dyn_cast_or_null<ConstantInt>(Size)) {
       // Sometimes a dynamic alloca could be converted into a static alloca
       // after this constant prop, and become a huge static alloca on an
@@ -2388,7 +2417,7 @@ bool CallAnalyzer::visitCallBase(CallBase &Call) {
     // Check if this happens to be an indirect function call to a known function
     // in this inline context. If not, we've done all we can.
     Value *Callee = Call.getCalledOperand();
-    F = dyn_cast_or_null<Function>(SimplifiedValues.lookup(Callee));
+    F = getSimplifiedValue<Function>(Callee);
     if (!F || F->getFunctionType() != Call.getFunctionType()) {
       onCallArgumentSetup(Call);
 
@@ -2483,8 +2512,7 @@ bool CallAnalyzer::visitSelectInst(SelectInst &SI) {
 
   Constant *TrueC = getDirectOrSimplifiedValue<Constant>(TrueVal);
   Constant *FalseC = getDirectOrSimplifiedValue<Constant>(FalseVal);
-  Constant *CondC =
-      dyn_cast_or_null<Constant>(SimplifiedValues.lookup(SI.getCondition()));
+  Constant *CondC = getSimplifiedValue<Constant>(SI.getCondition());
 
   if (!CondC) {
     // Select C, X, X => X
@@ -2833,8 +2861,9 @@ InlineResult CallAnalyzer::analyze() {
   auto CAI = CandidateCall.arg_begin();
   for (Argument &FAI : F.args()) {
     assert(CAI != CandidateCall.arg_end());
-    if (Constant *C = dyn_cast<Constant>(CAI))
-      SimplifiedValues[&FAI] = C;
+    SimplifiedValues[&FAI] = *CAI;
+    if (isa<Constant>(*CAI))
+      ++NumConstantArgs;
 
     Value *PtrArg = *CAI;
     if (ConstantInt *C = stripAndComputeInBoundsConstantOffsets(PtrArg)) {
@@ -2849,7 +2878,6 @@ InlineResult CallAnalyzer::analyze() {
     }
     ++CAI;
   }
-  NumConstantArgs = SimplifiedValues.size();
   NumConstantOffsetPtrArgs = ConstantOffsetPtrs.size();
   NumAllocaArgs = SROAArgValues.size();
 
@@ -2911,8 +2939,7 @@ InlineResult CallAnalyzer::analyze() {
     if (BranchInst *BI = dyn_cast<BranchInst>(TI)) {
       if (BI->isConditional()) {
         Value *Cond = BI->getCondition();
-        if (ConstantInt *SimpleCond =
-                dyn_cast_or_null<ConstantInt>(SimplifiedValues.lookup(Cond))) {
+        if (ConstantInt *SimpleCond = getSimplifiedValue<ConstantInt>(Cond)) {
           BasicBlock *NextBB = BI->getSuccessor(SimpleCond->isZero() ? 1 : 0);
           BBWorklist.insert(NextBB);
           KnownSuccessors[BB] = NextBB;
@@ -2922,8 +2949,7 @@ InlineResult CallAnalyzer::analyze() {
       }
     } else if (SwitchInst *SI = dyn_cast<SwitchInst>(TI)) {
       Value *Cond = SI->getCondition();
-      if (ConstantInt *SimpleCond =
-              dyn_cast_or_null<ConstantInt>(SimplifiedValues.lookup(Cond))) {
+      if (ConstantInt *SimpleCond = getSimplifiedValue<ConstantInt>(Cond)) {
         BasicBlock *NextBB = SI->findCaseValue(SimpleCond)->getCaseSuccessor();
         BBWorklist.insert(NextBB);
         KnownSuccessors[BB] = NextBB;

>From 68cbd6e096ca97558e045c5dbc486b6c4119c7f1 Mon Sep 17 00:00:00 2001
From: Tobias Stadler <mail at stadler-tobias.de>
Date: Mon, 23 Jun 2025 14:29:22 +0100
Subject: [PATCH 2/3] Fix comments

---
 llvm/lib/Analysis/InlineCost.cpp | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/llvm/lib/Analysis/InlineCost.cpp b/llvm/lib/Analysis/InlineCost.cpp
index f0ecae758299e..fe1ceb74429c9 100644
--- a/llvm/lib/Analysis/InlineCost.cpp
+++ b/llvm/lib/Analysis/InlineCost.cpp
@@ -526,13 +526,13 @@ class CallAnalyzer : public InstVisitor<CallAnalyzer, bool> {
 
   InlineResult analyze();
 
-  // Lookup simplified Value. May return a value owned by the caller.
+  /// Lookup simplified Value. May return a value owned by the caller.
   Value *getSimplifiedValueUnchecked(Value *V) const {
     return SimplifiedValues.lookup(V);
   }
 
-  // Lookup simplified Value, but return nullptr if the simplified value is
-  // owned by the caller.
+  /// Lookup simplified Value, but return nullptr if the simplified value is
+  /// owned by the caller.
   template <typename T> T *getSimplifiedValue(Value *V) const {
     Value *SimpleV = SimplifiedValues.lookup(V);
     if (!SimpleV)

>From 875e3d9c2f7105fc8aff94b6b0eedc8adecca50f Mon Sep 17 00:00:00 2001
From: Tobias Stadler <mail at stadler-tobias.de>
Date: Wed, 18 Jun 2025 23:31:35 +0100
Subject: [PATCH 3/3] [InlineCost] Simplify extractvalue across callsite

---
 llvm/lib/Analysis/InlineCost.cpp              | 15 +++++--
 .../Inline/simplify-crosscallsite.ll          | 39 +++++++++++++++++++
 2 files changed, 51 insertions(+), 3 deletions(-)
 create mode 100644 llvm/test/Transforms/Inline/simplify-crosscallsite.ll

diff --git a/llvm/lib/Analysis/InlineCost.cpp b/llvm/lib/Analysis/InlineCost.cpp
index fe1ceb74429c9..773a60479ae22 100644
--- a/llvm/lib/Analysis/InlineCost.cpp
+++ b/llvm/lib/Analysis/InlineCost.cpp
@@ -2316,9 +2316,18 @@ bool CallAnalyzer::visitStore(StoreInst &I) {
 }
 
 bool CallAnalyzer::visitExtractValue(ExtractValueInst &I) {
-  // Constant folding for extract value is trivial.
-  if (simplifyInstruction(I))
-    return true;
+  Value *Op = I.getAggregateOperand();
+
+  // Special handling, because we want to simplify extractvalue with a
+  // potential insertvalue from the caller.
+  if (Value *SimpleOp = getSimplifiedValueUnchecked(Op)) {
+    SimplifyQuery SQ(DL);
+    Value *SimpleV = simplifyExtractValueInst(SimpleOp, I.getIndices(), SQ);
+    if (SimpleV) {
+      SimplifiedValues[&I] = SimpleV;
+      return true;
+    }
+  }
 
   // SROA can't look through these, but they may be free.
   return Base::visitExtractValue(I);
diff --git a/llvm/test/Transforms/Inline/simplify-crosscallsite.ll b/llvm/test/Transforms/Inline/simplify-crosscallsite.ll
new file mode 100644
index 0000000000000..0c0207a4883da
--- /dev/null
+++ b/llvm/test/Transforms/Inline/simplify-crosscallsite.ll
@@ -0,0 +1,39 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; RUN: opt < %s -S -passes=inline | FileCheck %s
+
+define i32 @callee([2 x i32] %agg) {
+; CHECK-LABEL: define i32 @callee(
+; CHECK-SAME: [2 x i32] [[AGG:%.*]]) {
+; CHECK-NEXT:    [[V:%.*]] = extractvalue [2 x i32] [[AGG]], 0
+; CHECK-NEXT:    [[C:%.*]] = icmp eq i32 [[V]], 0
+; CHECK-NEXT:    br i1 [[C]], label %[[IS_NULL:.*]], label %[[NON_NULL:.*]]
+; CHECK:       [[IS_NULL]]:
+; CHECK-NEXT:    ret i32 0
+; CHECK:       [[NON_NULL]]:
+; CHECK-NEXT:    [[R:%.*]] = call i32 @callee([2 x i32] [[AGG]])
+; CHECK-NEXT:    ret i32 [[R]]
+;
+  %v = extractvalue [2 x i32] %agg, 0
+  %c = icmp eq i32 %v, 0
+  br i1 %c, label %is_null, label %non_null
+
+is_null:
+  ret i32 0
+
+non_null:
+  %r = call i32 @callee([2 x i32] %agg)
+  ret i32 %r
+}
+
+define i32 @caller(i32 %arg) {
+; CHECK-LABEL: define i32 @caller(
+; CHECK-SAME: i32 [[ARG:%.*]]) {
+; CHECK-NEXT:    [[AGG0:%.*]] = insertvalue [2 x i32] poison, i32 0, 0
+; CHECK-NEXT:    [[AGG1:%.*]] = insertvalue [2 x i32] [[AGG0]], i32 [[ARG]], 1
+; CHECK-NEXT:    ret i32 0
+;
+  %agg0 = insertvalue [2 x i32] poison, i32 0, 0
+  %agg1 = insertvalue [2 x i32] %agg0, i32 %arg, 1
+  %v = call i32 @callee([2 x i32] %agg1)
+  ret i32 %v
+}



More information about the llvm-commits mailing list