[llvm] [PGO] Make the PGO instrumentation insert point after alloca (PR #142043)

via llvm-commits llvm-commits at lists.llvm.org
Thu May 29 14:46:02 PDT 2025


https://github.com/xur-llvm created https://github.com/llvm/llvm-project/pull/142043

We're changing PGO instrumentation to insert the intrinsic after alloca instructions. For sampled instrumentation, a conditional check is placed before the intrinsic. If this intrinsic comes before an alloca, the alloca (whose size might be unknown due to Phi node) becomes conditional, resulting in inefficient code. We have seen some stack overflows due to this.

This patch guarantees the intrinsic is always after the alloca.

>From 75769f299b345ed43716726f4bef94c9d3e78fd4 Mon Sep 17 00:00:00 2001
From: Rong Xu <xur at google.com>
Date: Thu, 29 May 2025 14:22:55 -0700
Subject: [PATCH] [PGO] Make the PGO instrumentation insert point after alloca

We're changing PGO instrumentation to insert the intrinsic after
alloca instructions. For sampled instrumentation, a conditional
check is placed before the intrinsic. If this intrinsic comes
before an alloca, the alloca (whose size might be unknown due to
Phi node) becomes conditional, resulting in inefficient code.
We have seen some stack overflows due to this.

This patch guarantees the intrinsic is always after the alloca.
---
 .../Instrumentation/PGOInstrumentation.cpp    |  8 ++--
 .../Transforms/PGOProfile/entry_alloca.ll     | 38 +++++++++++++++++++
 .../split-indirectbr-critical-edges.ll        |  1 +
 3 files changed, 43 insertions(+), 4 deletions(-)
 create mode 100644 llvm/test/Transforms/PGOProfile/entry_alloca.ll

diff --git a/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp b/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp
index a063fb2ec3fe1..3347f3d376c94 100644
--- a/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp
+++ b/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp
@@ -855,7 +855,7 @@ BasicBlock *FuncPGOInstrumentation<Edge, BBInfo>::getInstrBB(Edge *E) {
   auto canInstrument = [](BasicBlock *BB) -> BasicBlock * {
     // There are basic blocks (such as catchswitch) cannot be instrumented.
     // If the returned first insertion point is the end of BB, skip this BB.
-    if (BB->getFirstInsertionPt() == BB->end())
+    if (BB->getFirstNonPHIOrDbgOrAlloca() == BB->end())
       return nullptr;
     return BB;
   };
@@ -952,7 +952,7 @@ void FunctionInstrumenter::instrument() {
       Name, PointerType::get(M.getContext(), 0));
   if (PGOFunctionEntryCoverage) {
     auto &EntryBB = F.getEntryBlock();
-    IRBuilder<> Builder(&EntryBB, EntryBB.getFirstInsertionPt());
+    IRBuilder<> Builder(&EntryBB, EntryBB.getFirstNonPHIOrDbgOrAlloca());
     // llvm.instrprof.cover(i8* <name>, i64 <hash>, i32 <num-counters>,
     //                      i32 <index>)
     Builder.CreateIntrinsic(
@@ -1010,7 +1010,7 @@ void FunctionInstrumenter::instrument() {
   if (PGOTemporalInstrumentation) {
     NumCounters += PGOBlockCoverage ? 8 : 1;
     auto &EntryBB = F.getEntryBlock();
-    IRBuilder<> Builder(&EntryBB, EntryBB.getFirstInsertionPt());
+    IRBuilder<> Builder(&EntryBB, EntryBB.getFirstNonPHIOrDbgOrAlloca());
     // llvm.instrprof.timestamp(i8* <name>, i64 <hash>, i32 <num-counters>,
     //                          i32 <index>)
     Builder.CreateIntrinsic(Intrinsic::instrprof_timestamp,
@@ -1021,7 +1021,7 @@ void FunctionInstrumenter::instrument() {
   }
 
   for (auto *InstrBB : InstrumentBBs) {
-    IRBuilder<> Builder(InstrBB, InstrBB->getFirstInsertionPt());
+    IRBuilder<> Builder(InstrBB, InstrBB->getFirstNonPHIOrDbgOrAlloca());
     assert(Builder.GetInsertPoint() != InstrBB->end() &&
            "Cannot get the Instrumentation point");
     // llvm.instrprof.increment(i8* <name>, i64 <hash>, i32 <num-counters>,
diff --git a/llvm/test/Transforms/PGOProfile/entry_alloca.ll b/llvm/test/Transforms/PGOProfile/entry_alloca.ll
new file mode 100644
index 0000000000000..bc06bd14d21ca
--- /dev/null
+++ b/llvm/test/Transforms/PGOProfile/entry_alloca.ll
@@ -0,0 +1,38 @@
+; Note: Make sure that instrumention intrinsic is after entry alloca.
+;RUN: opt < %s -passes=pgo-instr-gen -S | FileCheck %s
+
+%struct.A = type { i32, [0 x i32] }
+
+; CHECK-LABEL: @foo()
+; CHECK-NEXT:   %1 = alloca %struct.A
+; CHECK-NEXT:   call void @llvm.instrprof.increment(ptr @__profn_foo
+; CHECK-NEXT:   call void @bar(ptr
+
+define dso_local i32 @foo() {
+  %1 = alloca %struct.A, align 4
+  call void @bar(ptr noundef nonnull %1) #3
+  %2 = load i32, ptr %1, align 4
+  %3 = icmp sgt i32 %2, 0
+  br i1 %3, label %4, label %15
+
+4:
+  %5 = getelementptr inbounds i8, ptr %1, i64 4
+  %6 = zext nneg i32 %2 to i64
+  br label %7
+
+7:
+  %8 = phi i64 [ 0, %4 ], [ %13, %7 ]
+  %9 = phi i32 [ 0, %4 ], [ %12, %7 ]
+  %10 = getelementptr inbounds [0 x i32], ptr %5, i64 0, i64 %8
+  %11 = load i32, ptr %10, align 4
+  %12 = add nsw i32 %11, %9
+  %13 = add nuw nsw i64 %8, 1
+  %14 = icmp eq i64 %13, %6
+  br i1 %14, label %15, label %7
+
+15:
+  %16 = phi i32 [ 0, %0 ], [ %12, %7 ]
+  ret i32 %16
+}
+
+declare void @bar(ptr noundef)
diff --git a/llvm/test/Transforms/PGOProfile/split-indirectbr-critical-edges.ll b/llvm/test/Transforms/PGOProfile/split-indirectbr-critical-edges.ll
index 8b92f8ccb51fb..cf022dd7c4e65 100644
--- a/llvm/test/Transforms/PGOProfile/split-indirectbr-critical-edges.ll
+++ b/llvm/test/Transforms/PGOProfile/split-indirectbr-critical-edges.ll
@@ -42,6 +42,7 @@ if.end:                                           ; preds = %if.end.preheader, %
 ;; The edge will not be profiled.
 ; CHECK-LABEL: @cannot_split(
 ; CHECK-NEXT:  entry:
+; CHECK-NEXT:    %targets = alloca <2 x ptr>, align 16
 ; CHECK-NEXT:    call void @llvm.instrprof.increment
 ; CHECK: indirect:
 ; CHECK-NOT:     call void @llvm.instrprof.increment



More information about the llvm-commits mailing list