[llvm] [PartiallyInlineLibCalls] Add infrastructure for emitting optimization remarks from PartiallyInlineLibCalls (PR #122654)

Sun Jan 12 15:25:52 PST 2025

llvmbot wrote:




@llvm/pr-subscribers-backend-aarch64

Author: None (TiborGY)

<details>
<summary>Changes</summary>

**PR 1/2**
I am planning to add some optimization remarks to the `PartiallyInlineLibCalls` pass.
However, since this pass does not emit any optimization remarks yet, I have to add the "infrastructure" for that first, which is what this PR is about.

**Questions/points of uncertainty for reviewers:**

1. I am getting the pointer for the `OptimizationRemarkEmitter` object from the `OptimizationRemarkEmitter` analysis pass. This seems to be used in other optimization passes for this purpose, but I cannot claim that I understand it.
2. It looks like getting an `OptimizationRemarkEmitter` object brings in `Lazy Branch Probability Analysis` and `Lazy Block Frequency Analysis` as a dependency. I had to change some optimization pass ordering tests that were failing due to this. Please confirm that this is an inconsequential change.

---
Full diff: https://github.com/llvm/llvm-project/pull/122654.diff


8 Files Affected:

- (modified) llvm/lib/Transforms/Scalar/PartiallyInlineLibCalls.cpp (+12-5) 
- (modified) llvm/test/CodeGen/AArch64/O3-pipeline.ll (+3) 
- (modified) llvm/test/CodeGen/AMDGPU/llc-pipeline.ll (+12) 
- (modified) llvm/test/CodeGen/ARM/O3-pipeline.ll (+3) 
- (modified) llvm/test/CodeGen/LoongArch/opt-pipeline.ll (+3) 
- (modified) llvm/test/CodeGen/PowerPC/O3-pipeline.ll (+3) 
- (modified) llvm/test/CodeGen/RISCV/O3-pipeline.ll (+3) 
- (modified) llvm/test/CodeGen/X86/opt-pipeline.ll (+3) 


``````````diff

diff --git a/llvm/lib/Transforms/Scalar/PartiallyInlineLibCalls.cpp b/llvm/lib/Transforms/Scalar/PartiallyInlineLibCalls.cpp
index 3a699df1cde4df..97339308cb0e1b 100644
--- a/llvm/lib/Transforms/Scalar/PartiallyInlineLibCalls.cpp
+++ b/llvm/lib/Transforms/Scalar/PartiallyInlineLibCalls.cpp
@@ -22,6 +22,7 @@
 #include "llvm/Support/DebugCounter.h"
 #include "llvm/Transforms/Scalar.h"
 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include "llvm/Analysis/OptimizationRemarkEmitter.h"
 #include <optional>
 
 using namespace llvm;
@@ -33,7 +34,8 @@ DEBUG_COUNTER(PILCounter, "partially-inline-libcalls-transform",
 
 static bool optimizeSQRT(CallInst *Call, Function *CalledFunc,
                          BasicBlock &CurrBB, Function::iterator &BB,
-                         const TargetTransformInfo *TTI, DomTreeUpdater *DTU) {
+                         const TargetTransformInfo *TTI, DomTreeUpdater *DTU,
+                         OptimizationRemarkEmitter *ORE) {
   // There is no need to change the IR, since backend will emit sqrt
   // instruction if the call has already been marked read-only.
   if (Call->onlyReadsMemory())
@@ -103,7 +105,8 @@ static bool optimizeSQRT(CallInst *Call, Function *CalledFunc,
 
 static bool runPartiallyInlineLibCalls(Function &F, TargetLibraryInfo *TLI,
                                        const TargetTransformInfo *TTI,
-                                       DominatorTree *DT) {
+                                       DominatorTree *DT,
+                                       OptimizationRemarkEmitter *ORE) {
   std::optional<DomTreeUpdater> DTU;
   if (DT)
     DTU.emplace(DT, DomTreeUpdater::UpdateStrategy::Lazy);
@@ -140,7 +143,7 @@ static bool runPartiallyInlineLibCalls(Function &F, TargetLibraryInfo *TLI,
       case LibFunc_sqrt:
         if (TTI->haveFastSqrt(Call->getType()) &&
             optimizeSQRT(Call, CalledFunc, *CurrBB, BB, TTI,
-                         DTU ? &*DTU : nullptr))
+                         DTU ? &*DTU : nullptr, ORE))
           break;
         continue;
       default:
@@ -160,7 +163,8 @@ PartiallyInlineLibCallsPass::run(Function &F, FunctionAnalysisManager &AM) {
   auto &TLI = AM.getResult<TargetLibraryAnalysis>(F);
   auto &TTI = AM.getResult<TargetIRAnalysis>(F);
   auto *DT = AM.getCachedResult<DominatorTreeAnalysis>(F);
-  if (!runPartiallyInlineLibCalls(F, &TLI, &TTI, DT))
+  auto &ORE = AM.getResult<OptimizationRemarkEmitterAnalysis>(F);
+  if (!runPartiallyInlineLibCalls(F, &TLI, &TTI, DT, &ORE))
     return PreservedAnalyses::all();
   PreservedAnalyses PA;
   PA.preserve<DominatorTreeAnalysis>();
@@ -181,6 +185,7 @@ class PartiallyInlineLibCallsLegacyPass : public FunctionPass {
     AU.addRequired<TargetLibraryInfoWrapperPass>();
     AU.addRequired<TargetTransformInfoWrapperPass>();
     AU.addPreserved<DominatorTreeWrapperPass>();
+    AU.addRequired<OptimizationRemarkEmitterWrapperPass>();
     FunctionPass::getAnalysisUsage(AU);
   }
 
@@ -195,7 +200,8 @@ class PartiallyInlineLibCallsLegacyPass : public FunctionPass {
     DominatorTree *DT = nullptr;
     if (auto *DTWP = getAnalysisIfAvailable<DominatorTreeWrapperPass>())
       DT = &DTWP->getDomTree();
-    return runPartiallyInlineLibCalls(F, TLI, TTI, DT);
+    auto *ORE = &getAnalysis<OptimizationRemarkEmitterWrapperPass>().getORE();
+    return runPartiallyInlineLibCalls(F, TLI, TTI, DT, ORE);
   }
 };
 }
@@ -208,6 +214,7 @@ INITIALIZE_PASS_BEGIN(PartiallyInlineLibCallsLegacyPass,
 INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
 INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
 INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(OptimizationRemarkEmitterWrapperPass)
 INITIALIZE_PASS_END(PartiallyInlineLibCallsLegacyPass,
                     "partially-inline-libcalls",
                     "Partially inline calls to library functions", false, false)
diff --git a/llvm/test/CodeGen/AArch64/O3-pipeline.ll b/llvm/test/CodeGen/AArch64/O3-pipeline.ll
index b5d5e27afa17ad..49a86134411d62 100644
--- a/llvm/test/CodeGen/AArch64/O3-pipeline.ll
+++ b/llvm/test/CodeGen/AArch64/O3-pipeline.ll
@@ -59,6 +59,9 @@
 ; CHECK-NEXT:       Block Frequency Analysis
 ; CHECK-NEXT:       Constant Hoisting
 ; CHECK-NEXT:       Replace intrinsics with calls to vector library
+; CHECK-NEXT:       Lazy Branch Probability Analysis
+; CHECK-NEXT:       Lazy Block Frequency Analysis
+; CHECK-NEXT:       Optimization Remark Emitter
 ; CHECK-NEXT:       Partially inline calls to library functions
 ; CHECK-NEXT:       Instrument function entry/exit with calls to e.g. mcount() (post inlining)
 ; CHECK-NEXT:       Scalarize Masked Memory Intrinsics
diff --git a/llvm/test/CodeGen/AMDGPU/llc-pipeline.ll b/llvm/test/CodeGen/AMDGPU/llc-pipeline.ll
index b2708cf13cbf32..a7cc4cfc6707fb 100644
--- a/llvm/test/CodeGen/AMDGPU/llc-pipeline.ll
+++ b/llvm/test/CodeGen/AMDGPU/llc-pipeline.ll
@@ -224,6 +224,9 @@
 ; GCN-O1-NEXT:      Block Frequency Analysis
 ; GCN-O1-NEXT:      Constant Hoisting
 ; GCN-O1-NEXT:      Replace intrinsics with calls to vector library
+; GCN-O1-NEXT:      Lazy Branch Probability Analysis
+; GCN-O1-NEXT:      Lazy Block Frequency Analysis
+; GCN-O1-NEXT:      Optimization Remark Emitter
 ; GCN-O1-NEXT:      Partially inline calls to library functions
 ; GCN-O1-NEXT:      Instrument function entry/exit with calls to e.g. mcount() (post inlining)
 ; GCN-O1-NEXT:      Scalarize Masked Memory Intrinsics
@@ -518,6 +521,9 @@
 ; GCN-O1-OPTS-NEXT:      Block Frequency Analysis
 ; GCN-O1-OPTS-NEXT:      Constant Hoisting
 ; GCN-O1-OPTS-NEXT:      Replace intrinsics with calls to vector library
+; GCN-O1-OPTS-NEXT:      Lazy Branch Probability Analysis
+; GCN-O1-OPTS-NEXT:      Lazy Block Frequency Analysis
+; GCN-O1-OPTS-NEXT:      Optimization Remark Emitter
 ; GCN-O1-OPTS-NEXT:      Partially inline calls to library functions
 ; GCN-O1-OPTS-NEXT:      Instrument function entry/exit with calls to e.g. mcount() (post inlining)
 ; GCN-O1-OPTS-NEXT:      Scalarize Masked Memory Intrinsics
@@ -831,6 +837,9 @@
 ; GCN-O2-NEXT:      Block Frequency Analysis
 ; GCN-O2-NEXT:      Constant Hoisting
 ; GCN-O2-NEXT:      Replace intrinsics with calls to vector library
+; GCN-O2-NEXT:      Lazy Branch Probability Analysis
+; GCN-O2-NEXT:      Lazy Block Frequency Analysis
+; GCN-O2-NEXT:      Optimization Remark Emitter
 ; GCN-O2-NEXT:      Partially inline calls to library functions
 ; GCN-O2-NEXT:      Instrument function entry/exit with calls to e.g. mcount() (post inlining)
 ; GCN-O2-NEXT:      Scalarize Masked Memory Intrinsics
@@ -1152,6 +1161,9 @@
 ; GCN-O3-NEXT:      Block Frequency Analysis
 ; GCN-O3-NEXT:      Constant Hoisting
 ; GCN-O3-NEXT:      Replace intrinsics with calls to vector library
+; GCN-O3-NEXT:      Lazy Branch Probability Analysis
+; GCN-O3-NEXT:      Lazy Block Frequency Analysis
+; GCN-O3-NEXT:      Optimization Remark Emitter
 ; GCN-O3-NEXT:      Partially inline calls to library functions
 ; GCN-O3-NEXT:      Instrument function entry/exit with calls to e.g. mcount() (post inlining)
 ; GCN-O3-NEXT:      Scalarize Masked Memory Intrinsics
diff --git a/llvm/test/CodeGen/ARM/O3-pipeline.ll b/llvm/test/CodeGen/ARM/O3-pipeline.ll
index f6822713022a9c..1840b5ce46c6f6 100644
--- a/llvm/test/CodeGen/ARM/O3-pipeline.ll
+++ b/llvm/test/CodeGen/ARM/O3-pipeline.ll
@@ -37,6 +37,9 @@
 ; CHECK-NEXT:      Block Frequency Analysis
 ; CHECK-NEXT:      Constant Hoisting
 ; CHECK-NEXT:      Replace intrinsics with calls to vector library
+; CHECK-NEXT:      Lazy Branch Probability Analysis
+; CHECK-NEXT:      Lazy Block Frequency Analysis
+; CHECK-NEXT:      Optimization Remark Emitter
 ; CHECK-NEXT:      Partially inline calls to library functions
 ; CHECK-NEXT:      Instrument function entry/exit with calls to e.g. mcount() (post inlining)
 ; CHECK-NEXT:      Scalarize Masked Memory Intrinsics
diff --git a/llvm/test/CodeGen/LoongArch/opt-pipeline.ll b/llvm/test/CodeGen/LoongArch/opt-pipeline.ll
index da26e9846301a1..ab76d4e998d2b9 100644
--- a/llvm/test/CodeGen/LoongArch/opt-pipeline.ll
+++ b/llvm/test/CodeGen/LoongArch/opt-pipeline.ll
@@ -60,6 +60,9 @@
 ; LAXX-NEXT:       Block Frequency Analysis
 ; LAXX-NEXT:       Constant Hoisting
 ; LAXX-NEXT:       Replace intrinsics with calls to vector library
+; LAXX-NEXT:       Lazy Branch Probability Analysis
+; LAXX-NEXT:       Lazy Block Frequency Analysis
+; LAXX-NEXT:       Optimization Remark Emitter
 ; LAXX-NEXT:       Partially inline calls to library functions
 ; LAXX-NEXT:       Instrument function entry/exit with calls to e.g. mcount() (post inlining)
 ; LAXX-NEXT:       Scalarize Masked Memory Intrinsics
diff --git a/llvm/test/CodeGen/PowerPC/O3-pipeline.ll b/llvm/test/CodeGen/PowerPC/O3-pipeline.ll
index 64325ec772459d..3920d75c83ffe3 100644
--- a/llvm/test/CodeGen/PowerPC/O3-pipeline.ll
+++ b/llvm/test/CodeGen/PowerPC/O3-pipeline.ll
@@ -61,6 +61,9 @@
 ; CHECK-NEXT:       Block Frequency Analysis
 ; CHECK-NEXT:       Constant Hoisting
 ; CHECK-NEXT:       Replace intrinsics with calls to vector library
+; CHECK-NEXT:       Lazy Branch Probability Analysis
+; CHECK-NEXT:       Lazy Block Frequency Analysis
+; CHECK-NEXT:       Optimization Remark Emitter
 ; CHECK-NEXT:       Partially inline calls to library functions
 ; CHECK-NEXT:       Instrument function entry/exit with calls to e.g. mcount() (post inlining)
 ; CHECK-NEXT:       Scalarize Masked Memory Intrinsics
diff --git a/llvm/test/CodeGen/RISCV/O3-pipeline.ll b/llvm/test/CodeGen/RISCV/O3-pipeline.ll
index b0c756e26985bb..668c7346124472 100644
--- a/llvm/test/CodeGen/RISCV/O3-pipeline.ll
+++ b/llvm/test/CodeGen/RISCV/O3-pipeline.ll
@@ -63,6 +63,9 @@
 ; CHECK-NEXT:       Block Frequency Analysis
 ; CHECK-NEXT:       Constant Hoisting
 ; CHECK-NEXT:       Replace intrinsics with calls to vector library
+; CHECK-NEXT:       Lazy Branch Probability Analysis
+; CHECK-NEXT:       Lazy Block Frequency Analysis
+; CHECK-NEXT:       Optimization Remark Emitter
 ; CHECK-NEXT:       Partially inline calls to library functions
 ; CHECK-NEXT:       Instrument function entry/exit with calls to e.g. mcount() (post inlining)
 ; CHECK-NEXT:       Scalarize Masked Memory Intrinsics
diff --git a/llvm/test/CodeGen/X86/opt-pipeline.ll b/llvm/test/CodeGen/X86/opt-pipeline.ll
index 1bca1b960edda4..203be56751d092 100644
--- a/llvm/test/CodeGen/X86/opt-pipeline.ll
+++ b/llvm/test/CodeGen/X86/opt-pipeline.ll
@@ -58,6 +58,9 @@
 ; CHECK-NEXT:       Block Frequency Analysis
 ; CHECK-NEXT:       Constant Hoisting
 ; CHECK-NEXT:       Replace intrinsics with calls to vector library
+; CHECK-NEXT:       Lazy Branch Probability Analysis
+; CHECK-NEXT:       Lazy Block Frequency Analysis
+; CHECK-NEXT:       Optimization Remark Emitter
 ; CHECK-NEXT:       Partially inline calls to library functions
 ; CHECK-NEXT:       Instrument function entry/exit with calls to e.g. mcount() (post inlining)
 ; CHECK-NEXT:       Scalarize Masked Memory Intrinsics

``````````

</details>


https://github.com/llvm/llvm-project/pull/122654