[llvm] a004c70 - [PGO] Make the PGO instrumentation insert point after alloca (#142043)

via llvm-commits llvm-commits at lists.llvm.org
Fri May 30 14:37:09 PDT 2025


Author: xur-llvm
Date: 2025-05-30T14:37:06-07:00
New Revision: a004c703bcf7a563415ca872c98a415edda36e6e

URL: https://github.com/llvm/llvm-project/commit/a004c703bcf7a563415ca872c98a415edda36e6e
DIFF: https://github.com/llvm/llvm-project/commit/a004c703bcf7a563415ca872c98a415edda36e6e.diff

LOG: [PGO] Make the PGO instrumentation insert point after alloca (#142043)

We're changing PGO instrumentation to insert the intrinsic after alloca
instructions. For sampled instrumentation, a conditional check is placed
before the intrinsic. If this intrinsic comes before an alloca, the
alloca (whose size might be unknown due to Phi node) becomes
conditional, resulting in inefficient code. We have seen some stack
overflows due to this.

This patch guarantees the intrinsic is always after the alloca.

Added: 
    llvm/test/Transforms/PGOProfile/entry_alloca.ll

Modified: 
    llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp
    llvm/test/Transforms/PGOProfile/split-indirectbr-critical-edges.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp b/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp
index a063fb2ec3fe1..3347f3d376c94 100644
--- a/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp
+++ b/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp
@@ -855,7 +855,7 @@ BasicBlock *FuncPGOInstrumentation<Edge, BBInfo>::getInstrBB(Edge *E) {
   auto canInstrument = [](BasicBlock *BB) -> BasicBlock * {
     // There are basic blocks (such as catchswitch) cannot be instrumented.
     // If the returned first insertion point is the end of BB, skip this BB.
-    if (BB->getFirstInsertionPt() == BB->end())
+    if (BB->getFirstNonPHIOrDbgOrAlloca() == BB->end())
       return nullptr;
     return BB;
   };
@@ -952,7 +952,7 @@ void FunctionInstrumenter::instrument() {
       Name, PointerType::get(M.getContext(), 0));
   if (PGOFunctionEntryCoverage) {
     auto &EntryBB = F.getEntryBlock();
-    IRBuilder<> Builder(&EntryBB, EntryBB.getFirstInsertionPt());
+    IRBuilder<> Builder(&EntryBB, EntryBB.getFirstNonPHIOrDbgOrAlloca());
     // llvm.instrprof.cover(i8* <name>, i64 <hash>, i32 <num-counters>,
     //                      i32 <index>)
     Builder.CreateIntrinsic(
@@ -1010,7 +1010,7 @@ void FunctionInstrumenter::instrument() {
   if (PGOTemporalInstrumentation) {
     NumCounters += PGOBlockCoverage ? 8 : 1;
     auto &EntryBB = F.getEntryBlock();
-    IRBuilder<> Builder(&EntryBB, EntryBB.getFirstInsertionPt());
+    IRBuilder<> Builder(&EntryBB, EntryBB.getFirstNonPHIOrDbgOrAlloca());
     // llvm.instrprof.timestamp(i8* <name>, i64 <hash>, i32 <num-counters>,
     //                          i32 <index>)
     Builder.CreateIntrinsic(Intrinsic::instrprof_timestamp,
@@ -1021,7 +1021,7 @@ void FunctionInstrumenter::instrument() {
   }
 
   for (auto *InstrBB : InstrumentBBs) {
-    IRBuilder<> Builder(InstrBB, InstrBB->getFirstInsertionPt());
+    IRBuilder<> Builder(InstrBB, InstrBB->getFirstNonPHIOrDbgOrAlloca());
     assert(Builder.GetInsertPoint() != InstrBB->end() &&
            "Cannot get the Instrumentation point");
     // llvm.instrprof.increment(i8* <name>, i64 <hash>, i32 <num-counters>,

diff  --git a/llvm/test/Transforms/PGOProfile/entry_alloca.ll b/llvm/test/Transforms/PGOProfile/entry_alloca.ll
new file mode 100644
index 0000000000000..580f055921970
--- /dev/null
+++ b/llvm/test/Transforms/PGOProfile/entry_alloca.ll
@@ -0,0 +1,56 @@
+; Note: Make sure that instrumention intrinsic is after entry alloca.
+; RUN: opt < %s -passes=pgo-instr-gen -S | FileCheck %s
+; RUN: opt < %s -passes=pgo-instr-gen,instrprof -sampled-instrumentation -S | FileCheck %s --check-prefixes=SAMPLE
+
+%struct.A = type { i32, [0 x i32] }
+%struct.B = type { i32, [0 x double] }
+
+; CHECK-LABEL: @foo()
+; CHECK-NEXT:   %1 = alloca %struct.A
+; CHECK-NEXT:   %2 = alloca %struct.B
+; CHECK-NEXT:   call void @llvm.instrprof.increment(ptr @__profn_foo
+
+; SAMPLE: @foo()
+; SAMPLE-NEXT:  %1 = alloca %struct.A
+; SAMPLE-NEXT:  %2 = alloca %struct.B
+; SAMPLE-NEXT:  %[[v:[0-9]+]] = load i16, ptr @__llvm_profile_sampling
+; SAMPLE-NEXT:  {{.*}} = icmp ule i16 %[[v]], 199
+
+define dso_local double @foo() {
+  %1 = alloca %struct.A, align 4
+  %2 = alloca %struct.B, align 8
+  call void @llvm.lifetime.start.p0(i64 4, ptr nonnull %1)
+  call void @llvm.lifetime.start.p0(i64 8, ptr nonnull %2)
+  call void @bar(ptr noundef nonnull %1, ptr noundef nonnull %2)
+  %3 = load i32, ptr %1, align 4
+  %4 = icmp sgt i32 %3, 0
+  br i1 %4, label %5, label %21
+
+5:
+  %6 = getelementptr inbounds i8, ptr %1, i64 4
+  %7 = getelementptr inbounds i8, ptr %2, i64 8
+  %8 = zext nneg i32 %3 to i64
+  br label %9
+
+9:
+  %10 = phi i64 [ 0, %5 ], [ %19, %9 ]
+  %11 = phi double [ 0.000000e+00, %5 ], [ %18, %9 ]
+  %12 = getelementptr inbounds [0 x i32], ptr %6, i64 0, i64 %10
+  %13 = load i32, ptr %12, align 4
+  %14 = sitofp i32 %13 to double
+  %15 = getelementptr inbounds [0 x double], ptr %7, i64 0, i64 %10
+  %16 = load double, ptr %15, align 8
+  %17 = fadd double %16, %14
+  %18 = fadd double %11, %17
+  %19 = add nuw nsw i64 %10, 1
+  %20 = icmp eq i64 %19, %8
+  br i1 %20, label %21, label %9
+
+21:
+  %22 = phi double [ 0.000000e+00, %0 ], [ %18, %9 ]
+  call void @llvm.lifetime.end.p0(i64 8, ptr nonnull %2)
+  call void @llvm.lifetime.end.p0(i64 4, ptr nonnull %1)
+  ret double %22
+}
+
+declare void @bar(ptr noundef, ptr noundef)

diff  --git a/llvm/test/Transforms/PGOProfile/split-indirectbr-critical-edges.ll b/llvm/test/Transforms/PGOProfile/split-indirectbr-critical-edges.ll
index 8b92f8ccb51fb..cf022dd7c4e65 100644
--- a/llvm/test/Transforms/PGOProfile/split-indirectbr-critical-edges.ll
+++ b/llvm/test/Transforms/PGOProfile/split-indirectbr-critical-edges.ll
@@ -42,6 +42,7 @@ if.end:                                           ; preds = %if.end.preheader, %
 ;; The edge will not be profiled.
 ; CHECK-LABEL: @cannot_split(
 ; CHECK-NEXT:  entry:
+; CHECK-NEXT:    %targets = alloca <2 x ptr>, align 16
 ; CHECK-NEXT:    call void @llvm.instrprof.increment
 ; CHECK: indirect:
 ; CHECK-NOT:     call void @llvm.instrprof.increment


        


More information about the llvm-commits mailing list