[llvm] [CodeGen] Add OffloadBlockUniformityAnalysis for offload PGO (PR #178417)
Yaxun Liu via llvm-commits
llvm-commits at lists.llvm.org
Wed Jan 28 05:14:53 PST 2026
https://github.com/yxsamliu created https://github.com/llvm/llvm-project/pull/178417
Add a target-agnostic MachineFunction analysis that propagates per-block
uniformity information from IR metadata to codegen passes.
The analysis reads "offload-block-uniformity" metadata attached to IR
BasicBlock terminators during PGO-use. This metadata is produced by the
offload PGO infrastructure when profile data includes uniformity bits.
SpillPlacement consumes this analysis to flatten block frequencies for
divergent blocks, preventing PGO-guided spill placement from causing
performance regressions on SIMT architectures where "cold" divergent
paths still execute with partial wave occupancy.
Key components:
- OffloadBlockUniformityInfo: Stores per-MBB divergence classification
- OffloadBlockUniformityAnalysis: MachineFunctionAnalysis wrapper
- SpillPlacement integration: Queries analysis for divergent blocks
This is independent of the core offload PGO infrastructure - if no
metadata exists, the analysis reports hasUniformity()=false and
SpillPlacement behaves normally.
Related PR: #177665 (offload PGO infrastructure)
>From ffe44a5be52641dda5ede8a25ba5f5060435ddf6 Mon Sep 17 00:00:00 2001
From: "Yaxun (Sam) Liu" <yaxun.liu at amd.com>
Date: Tue, 27 Jan 2026 16:42:05 -0500
Subject: [PATCH] [CodeGen] Add OffloadBlockUniformityAnalysis for offload PGO
Add a target-agnostic MachineFunction analysis that propagates per-block
uniformity information from IR metadata to codegen passes.
The analysis reads "offload-block-uniformity" metadata attached to IR
BasicBlock terminators during PGO-use. This metadata is produced by the
offload PGO infrastructure when profile data includes uniformity bits.
SpillPlacement consumes this analysis to flatten block frequencies for
divergent blocks, preventing PGO-guided spill placement from causing
performance regressions on SIMT architectures where "cold" divergent
paths still execute with partial wave occupancy.
Key components:
- OffloadBlockUniformityInfo: Stores per-MBB divergence classification
- OffloadBlockUniformityAnalysis: MachineFunctionAnalysis wrapper
- SpillPlacement integration: Queries analysis for divergent blocks
This is independent of the core offload PGO infrastructure - if no
metadata exists, the analysis reports hasUniformity()=false and
SpillPlacement behaves normally.
---
.../llvm/CodeGen/OffloadBlockUniformity.h | 65 ++++++++++++++
llvm/include/llvm/CodeGen/SpillPlacement.h | 4 +-
llvm/include/llvm/Passes/CodeGenPassBuilder.h | 1 +
.../llvm/Passes/MachinePassRegistry.def | 2 +
llvm/lib/CodeGen/CMakeLists.txt | 1 +
llvm/lib/CodeGen/OffloadBlockUniformity.cpp | 84 +++++++++++++++++++
llvm/lib/CodeGen/SpillPlacement.cpp | 24 ++++--
llvm/lib/Passes/PassBuilder.cpp | 1 +
8 files changed, 176 insertions(+), 6 deletions(-)
create mode 100644 llvm/include/llvm/CodeGen/OffloadBlockUniformity.h
create mode 100644 llvm/lib/CodeGen/OffloadBlockUniformity.cpp
diff --git a/llvm/include/llvm/CodeGen/OffloadBlockUniformity.h b/llvm/include/llvm/CodeGen/OffloadBlockUniformity.h
new file mode 100644
index 0000000000000..31e57758a23a0
--- /dev/null
+++ b/llvm/include/llvm/CodeGen/OffloadBlockUniformity.h
@@ -0,0 +1,65 @@
+//===- OffloadBlockUniformity.h - Offload block uniformity info -*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// Provide per-(Machine)basic-block uniformity information for offload profiles.
+//
+// The source of truth is IR metadata attached during PGO use:
+// - Metadata name: "offload-block-uniformity"
+// - Payload: i1 (true = uniform, false = divergent)
+//
+// This is intentionally target-agnostic: any offload backend that produces
+// uniformity bits in the profile can attach the same metadata and reuse this
+// analysis in codegen.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CODEGEN_OFFLOADBLOCKUNIFORMITY_H
+#define LLVM_CODEGEN_OFFLOADBLOCKUNIFORMITY_H
+
+#include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/CodeGen/MachineFunctionAnalysis.h"
+#include "llvm/CodeGen/MachineFunctionAnalysisManager.h"
+#include "llvm/Support/Compiler.h"
+
+namespace llvm {
+
+class MachineBasicBlock;
+class MachineFunction;
+
+class OffloadBlockUniformityInfo {
+public:
+ static constexpr StringLiteral MetadataName = "offload-block-uniformity";
+
+ LLVM_ABI void compute(const MachineFunction &MF);
+
+ bool hasUniformity() const { return HasAnyUniformity; }
+
+ // Returns true if the block is considered divergent. If uniformity exists for
+ // the function but a block has no explicit annotation, it is treated as
+ // divergent (conservative).
+ LLVM_ABI bool isDivergent(const MachineBasicBlock &MBB) const;
+
+private:
+ bool HasAnyUniformity = false;
+ BitVector DivergentBlocks;
+};
+
+class OffloadBlockUniformityAnalysis
+ : public AnalysisInfoMixin<OffloadBlockUniformityAnalysis> {
+ friend AnalysisInfoMixin<OffloadBlockUniformityAnalysis>;
+ static AnalysisKey Key;
+
+public:
+ using Result = OffloadBlockUniformityInfo;
+ LLVM_ABI Result run(MachineFunction &MF, MachineFunctionAnalysisManager &MFAM);
+};
+
+} // end namespace llvm
+
+#endif // LLVM_CODEGEN_OFFLOADBLOCKUNIFORMITY_H
diff --git a/llvm/include/llvm/CodeGen/SpillPlacement.h b/llvm/include/llvm/CodeGen/SpillPlacement.h
index 1ef37f2718a65..490ebbb236efc 100644
--- a/llvm/include/llvm/CodeGen/SpillPlacement.h
+++ b/llvm/include/llvm/CodeGen/SpillPlacement.h
@@ -39,6 +39,7 @@ class BitVector;
class EdgeBundles;
class MachineBlockFrequencyInfo;
class MachineFunction;
+class OffloadBlockUniformityInfo;
class SpillPlacementWrapperLegacy;
class SpillPlacementAnalysis;
@@ -169,7 +170,8 @@ class SpillPlacement {
void releaseMemory();
void run(MachineFunction &MF, EdgeBundles *Bundles,
- MachineBlockFrequencyInfo *MBFI);
+ MachineBlockFrequencyInfo *MBFI,
+ const OffloadBlockUniformityInfo *Uniformity = nullptr);
void activate(unsigned n);
void setThreshold(BlockFrequency Entry);
diff --git a/llvm/include/llvm/Passes/CodeGenPassBuilder.h b/llvm/include/llvm/Passes/CodeGenPassBuilder.h
index 6942fc42ca721..7930a58fcc290 100644
--- a/llvm/include/llvm/Passes/CodeGenPassBuilder.h
+++ b/llvm/include/llvm/Passes/CodeGenPassBuilder.h
@@ -60,6 +60,7 @@
#include "llvm/CodeGen/MachineScheduler.h"
#include "llvm/CodeGen/MachineSink.h"
#include "llvm/CodeGen/MachineVerifier.h"
+#include "llvm/CodeGen/OffloadBlockUniformity.h"
#include "llvm/CodeGen/OptimizePHIs.h"
#include "llvm/CodeGen/PEI.h"
#include "llvm/CodeGen/PHIElimination.h"
diff --git a/llvm/include/llvm/Passes/MachinePassRegistry.def b/llvm/include/llvm/Passes/MachinePassRegistry.def
index 20b066a2ead6d..ed56bd6acd3fc 100644
--- a/llvm/include/llvm/Passes/MachinePassRegistry.def
+++ b/llvm/include/llvm/Passes/MachinePassRegistry.def
@@ -81,6 +81,8 @@ MACHINE_FUNCTION_ANALYSIS("machine-post-dom-tree",
MachinePostDominatorTreeAnalysis())
MACHINE_FUNCTION_ANALYSIS("machine-trace-metrics", MachineTraceMetricsAnalysis())
MACHINE_FUNCTION_ANALYSIS("machine-uniformity", MachineUniformityAnalysis())
+MACHINE_FUNCTION_ANALYSIS("offload-block-uniformity",
+ OffloadBlockUniformityAnalysis())
MACHINE_FUNCTION_ANALYSIS("pass-instrumentation", PassInstrumentationAnalysis(PIC))
MACHINE_FUNCTION_ANALYSIS("reaching-def", ReachingDefAnalysis())
MACHINE_FUNCTION_ANALYSIS("regalloc-evict", RegAllocEvictionAdvisorAnalysis())
diff --git a/llvm/lib/CodeGen/CMakeLists.txt b/llvm/lib/CodeGen/CMakeLists.txt
index f26b2cb6fddf5..b358150569cb3 100644
--- a/llvm/lib/CodeGen/CMakeLists.txt
+++ b/llvm/lib/CodeGen/CMakeLists.txt
@@ -68,6 +68,7 @@ add_llvm_component_library(LLVMCodeGen
FixupStatepointCallerSaved.cpp
FuncletLayout.cpp
MachineFunctionAnalysis.cpp
+ OffloadBlockUniformity.cpp
GCMetadata.cpp
GCMetadataPrinter.cpp
GCRootLowering.cpp
diff --git a/llvm/lib/CodeGen/OffloadBlockUniformity.cpp b/llvm/lib/CodeGen/OffloadBlockUniformity.cpp
new file mode 100644
index 0000000000000..13d772c08d92c
--- /dev/null
+++ b/llvm/lib/CodeGen/OffloadBlockUniformity.cpp
@@ -0,0 +1,84 @@
+//===- OffloadBlockUniformity.cpp - Offload block uniformity info --------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/OffloadBlockUniformity.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/IR/BasicBlock.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/Instruction.h"
+#include "llvm/IR/Metadata.h"
+#include <optional>
+
+using namespace llvm;
+
+static std::optional<bool> getIRBlockUniformity(const BasicBlock &BB) {
+ const Instruction *TI = BB.getTerminator();
+ if (!TI)
+ return std::nullopt;
+
+ MDNode *MD = TI->getMetadata(OffloadBlockUniformityInfo::MetadataName);
+ if (!MD)
+ return std::nullopt;
+
+ // Metadata format: !{i1 IsUniform} - structural validity assumed (verifier).
+ return mdconst::extract<ConstantInt>(MD->getOperand(0))->isOne();
+}
+
+void OffloadBlockUniformityInfo::compute(const MachineFunction &MF) {
+ HasAnyUniformity = false;
+ DivergentBlocks.clear();
+ DivergentBlocks.resize(MF.getNumBlockIDs());
+
+ // First determine whether any uniformity annotation exists for this function.
+ for (const MachineBasicBlock &MBB : MF) {
+ const BasicBlock *BB = MBB.getBasicBlock();
+ if (!BB)
+ continue;
+ if (getIRBlockUniformity(*BB).has_value()) {
+ HasAnyUniformity = true;
+ break;
+ }
+ }
+
+ if (!HasAnyUniformity)
+ return;
+
+ // Conservative behavior: if uniformity exists for the function but we cannot
+ // classify a particular (Machine)basic block, treat it as divergent.
+ for (const MachineBasicBlock &MBB : MF) {
+ const unsigned Num = MBB.getNumber();
+ bool IsDivergent = true;
+ if (const BasicBlock *BB = MBB.getBasicBlock()) {
+ if (auto U = getIRBlockUniformity(*BB))
+ IsDivergent = !*U;
+ }
+ if (Num < DivergentBlocks.size())
+ DivergentBlocks.set(Num, IsDivergent);
+ }
+}
+
+bool OffloadBlockUniformityInfo::isDivergent(
+ const MachineBasicBlock &MBB) const {
+ if (!HasAnyUniformity)
+ return false;
+ const unsigned Num = MBB.getNumber();
+ if (Num >= DivergentBlocks.size())
+ return true;
+ return DivergentBlocks.test(Num);
+}
+
+AnalysisKey OffloadBlockUniformityAnalysis::Key;
+
+OffloadBlockUniformityAnalysis::Result
+OffloadBlockUniformityAnalysis::run(MachineFunction &MF,
+ MachineFunctionAnalysisManager &) {
+ OffloadBlockUniformityInfo Info;
+ Info.compute(MF);
+ return Info;
+}
diff --git a/llvm/lib/CodeGen/SpillPlacement.cpp b/llvm/lib/CodeGen/SpillPlacement.cpp
index 55a96a22a00ec..fd7e9a1d9919c 100644
--- a/llvm/lib/CodeGen/SpillPlacement.cpp
+++ b/llvm/lib/CodeGen/SpillPlacement.cpp
@@ -32,7 +32,10 @@
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineBlockFrequencyInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/OffloadBlockUniformity.h"
#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/TargetSubtargetInfo.h"
+#include "llvm/IR/Function.h"
#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
#include <algorithm>
@@ -193,7 +196,9 @@ bool SpillPlacementWrapperLegacy::runOnMachineFunction(MachineFunction &MF) {
auto *Bundles = &getAnalysis<EdgeBundlesWrapperLegacy>().getEdgeBundles();
auto *MBFI = &getAnalysis<MachineBlockFrequencyInfoWrapperPass>().getMBFI();
- Impl.run(MF, Bundles, MBFI);
+ OffloadBlockUniformityInfo Uniformity;
+ Uniformity.compute(MF);
+ Impl.run(MF, Bundles, MBFI, &Uniformity);
return false;
}
@@ -204,8 +209,9 @@ SpillPlacementAnalysis::run(MachineFunction &MF,
MachineFunctionAnalysisManager &MFAM) {
auto *Bundles = &MFAM.getResult<EdgeBundlesAnalysis>(MF);
auto *MBFI = &MFAM.getResult<MachineBlockFrequencyAnalysis>(MF);
+ auto &Uniformity = MFAM.getResult<OffloadBlockUniformityAnalysis>(MF);
SpillPlacement Impl;
- Impl.run(MF, Bundles, MBFI);
+ Impl.run(MF, Bundles, MBFI, &Uniformity);
return Impl;
}
@@ -217,7 +223,8 @@ bool SpillPlacementAnalysis::Result::invalidate(
return true;
// Check dependencies.
return Inv.invalidate<EdgeBundlesAnalysis>(MF, PA) ||
- Inv.invalidate<MachineBlockFrequencyAnalysis>(MF, PA);
+ Inv.invalidate<MachineBlockFrequencyAnalysis>(MF, PA) ||
+ Inv.invalidate<OffloadBlockUniformityAnalysis>(MF, PA);
}
SpillPlacement::SpillPlacement() = default;
@@ -230,7 +237,8 @@ void SpillPlacement::releaseMemory() {
}
void SpillPlacement::run(MachineFunction &mf, EdgeBundles *Bundles,
- MachineBlockFrequencyInfo *MBFI) {
+ MachineBlockFrequencyInfo *MBFI,
+ const OffloadBlockUniformityInfo *Uniformity) {
MF = &mf;
this->bundles = Bundles;
this->MBFI = MBFI;
@@ -240,12 +248,18 @@ void SpillPlacement::run(MachineFunction &mf, EdgeBundles *Bundles,
TodoList.clear();
TodoList.setUniverse(bundles->getNumBundles());
+ const bool HasUniformity = Uniformity && Uniformity->hasUniformity();
+
// Compute total ingoing and outgoing block frequencies for all bundles.
BlockFrequencies.resize(mf.getNumBlockIDs());
setThreshold(MBFI->getEntryFreq());
for (auto &I : mf) {
unsigned Num = I.getNumber();
- BlockFrequencies[Num] = MBFI->getBlockFreq(&I);
+ if (HasUniformity && Uniformity->isDivergent(I)) {
+ BlockFrequencies[Num] = MBFI->getEntryFreq();
+ } else {
+ BlockFrequencies[Num] = MBFI->getBlockFreq(&I);
+ }
}
}
diff --git a/llvm/lib/Passes/PassBuilder.cpp b/llvm/lib/Passes/PassBuilder.cpp
index 8bb78c8c7df63..d541062b743d2 100644
--- a/llvm/lib/Passes/PassBuilder.cpp
+++ b/llvm/lib/Passes/PassBuilder.cpp
@@ -135,6 +135,7 @@
#include "llvm/CodeGen/MachineTraceMetrics.h"
#include "llvm/CodeGen/MachineUniformityAnalysis.h"
#include "llvm/CodeGen/MachineVerifier.h"
+#include "llvm/CodeGen/OffloadBlockUniformity.h"
#include "llvm/CodeGen/OptimizePHIs.h"
#include "llvm/CodeGen/PEI.h"
#include "llvm/CodeGen/PHIElimination.h"
More information about the llvm-commits
mailing list