[llvm] [CodeGen] Add OffloadBlockUniformityAnalysis for offload PGO (PR #178417)

Yaxun Liu via llvm-commits llvm-commits at lists.llvm.org
Wed Jan 28 05:14:53 PST 2026


https://github.com/yxsamliu created https://github.com/llvm/llvm-project/pull/178417

Add a target-agnostic MachineFunction analysis that propagates per-block
uniformity information from IR metadata to codegen passes.

The analysis reads "offload-block-uniformity" metadata attached to IR
BasicBlock terminators during PGO-use. This metadata is produced by the
offload PGO infrastructure when profile data includes uniformity bits.

SpillPlacement consumes this analysis to flatten block frequencies for
divergent blocks, preventing PGO-guided spill placement from causing
performance regressions on SIMT architectures where "cold" divergent
paths still execute with partial wave occupancy.

Key components:
- OffloadBlockUniformityInfo: Stores per-MBB divergence classification
- OffloadBlockUniformityAnalysis: MachineFunctionAnalysis wrapper
- SpillPlacement integration: Queries analysis for divergent blocks

This is independent of the core offload PGO infrastructure - if no
metadata exists, the analysis reports hasUniformity()=false and
SpillPlacement behaves normally.

Related PR: #177665 (offload PGO infrastructure)

>From ffe44a5be52641dda5ede8a25ba5f5060435ddf6 Mon Sep 17 00:00:00 2001
From: "Yaxun (Sam) Liu" <yaxun.liu at amd.com>
Date: Tue, 27 Jan 2026 16:42:05 -0500
Subject: [PATCH] [CodeGen] Add OffloadBlockUniformityAnalysis for offload PGO

Add a target-agnostic MachineFunction analysis that propagates per-block
uniformity information from IR metadata to codegen passes.

The analysis reads "offload-block-uniformity" metadata attached to IR
BasicBlock terminators during PGO-use. This metadata is produced by the
offload PGO infrastructure when profile data includes uniformity bits.

SpillPlacement consumes this analysis to flatten block frequencies for
divergent blocks, preventing PGO-guided spill placement from causing
performance regressions on SIMT architectures where "cold" divergent
paths still execute with partial wave occupancy.

Key components:
- OffloadBlockUniformityInfo: Stores per-MBB divergence classification
- OffloadBlockUniformityAnalysis: MachineFunctionAnalysis wrapper
- SpillPlacement integration: Queries analysis for divergent blocks

This is independent of the core offload PGO infrastructure - if no
metadata exists, the analysis reports hasUniformity()=false and
SpillPlacement behaves normally.
---
 .../llvm/CodeGen/OffloadBlockUniformity.h     | 65 ++++++++++++++
 llvm/include/llvm/CodeGen/SpillPlacement.h    |  4 +-
 llvm/include/llvm/Passes/CodeGenPassBuilder.h |  1 +
 .../llvm/Passes/MachinePassRegistry.def       |  2 +
 llvm/lib/CodeGen/CMakeLists.txt               |  1 +
 llvm/lib/CodeGen/OffloadBlockUniformity.cpp   | 84 +++++++++++++++++++
 llvm/lib/CodeGen/SpillPlacement.cpp           | 24 ++++--
 llvm/lib/Passes/PassBuilder.cpp               |  1 +
 8 files changed, 176 insertions(+), 6 deletions(-)
 create mode 100644 llvm/include/llvm/CodeGen/OffloadBlockUniformity.h
 create mode 100644 llvm/lib/CodeGen/OffloadBlockUniformity.cpp

diff --git a/llvm/include/llvm/CodeGen/OffloadBlockUniformity.h b/llvm/include/llvm/CodeGen/OffloadBlockUniformity.h
new file mode 100644
index 0000000000000..31e57758a23a0
--- /dev/null
+++ b/llvm/include/llvm/CodeGen/OffloadBlockUniformity.h
@@ -0,0 +1,65 @@
+//===- OffloadBlockUniformity.h - Offload block uniformity info -*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// Provide per-(Machine)basic-block uniformity information for offload profiles.
+//
+// The source of truth is IR metadata attached during PGO use:
+//   - Metadata name: "offload-block-uniformity"
+//   - Payload: i1 (true = uniform, false = divergent)
+//
+// This is intentionally target-agnostic: any offload backend that produces
+// uniformity bits in the profile can attach the same metadata and reuse this
+// analysis in codegen.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CODEGEN_OFFLOADBLOCKUNIFORMITY_H
+#define LLVM_CODEGEN_OFFLOADBLOCKUNIFORMITY_H
+
+#include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/CodeGen/MachineFunctionAnalysis.h"
+#include "llvm/CodeGen/MachineFunctionAnalysisManager.h"
+#include "llvm/Support/Compiler.h"
+
+namespace llvm {
+
+class MachineBasicBlock;
+class MachineFunction;
+
+class OffloadBlockUniformityInfo {
+public:
+  static constexpr StringLiteral MetadataName = "offload-block-uniformity";
+
+  LLVM_ABI void compute(const MachineFunction &MF);
+
+  bool hasUniformity() const { return HasAnyUniformity; }
+
+  // Returns true if the block is considered divergent. If uniformity exists for
+  // the function but a block has no explicit annotation, it is treated as
+  // divergent (conservative).
+  LLVM_ABI bool isDivergent(const MachineBasicBlock &MBB) const;
+
+private:
+  bool HasAnyUniformity = false;
+  BitVector DivergentBlocks;
+};
+
+class OffloadBlockUniformityAnalysis
+    : public AnalysisInfoMixin<OffloadBlockUniformityAnalysis> {
+  friend AnalysisInfoMixin<OffloadBlockUniformityAnalysis>;
+  static AnalysisKey Key;
+
+public:
+  using Result = OffloadBlockUniformityInfo;
+  LLVM_ABI Result run(MachineFunction &MF, MachineFunctionAnalysisManager &MFAM);
+};
+
+} // end namespace llvm
+
+#endif // LLVM_CODEGEN_OFFLOADBLOCKUNIFORMITY_H
diff --git a/llvm/include/llvm/CodeGen/SpillPlacement.h b/llvm/include/llvm/CodeGen/SpillPlacement.h
index 1ef37f2718a65..490ebbb236efc 100644
--- a/llvm/include/llvm/CodeGen/SpillPlacement.h
+++ b/llvm/include/llvm/CodeGen/SpillPlacement.h
@@ -39,6 +39,7 @@ class BitVector;
 class EdgeBundles;
 class MachineBlockFrequencyInfo;
 class MachineFunction;
+class OffloadBlockUniformityInfo;
 class SpillPlacementWrapperLegacy;
 class SpillPlacementAnalysis;
 
@@ -169,7 +170,8 @@ class SpillPlacement {
   void releaseMemory();
 
   void run(MachineFunction &MF, EdgeBundles *Bundles,
-           MachineBlockFrequencyInfo *MBFI);
+           MachineBlockFrequencyInfo *MBFI,
+           const OffloadBlockUniformityInfo *Uniformity = nullptr);
   void activate(unsigned n);
   void setThreshold(BlockFrequency Entry);
 
diff --git a/llvm/include/llvm/Passes/CodeGenPassBuilder.h b/llvm/include/llvm/Passes/CodeGenPassBuilder.h
index 6942fc42ca721..7930a58fcc290 100644
--- a/llvm/include/llvm/Passes/CodeGenPassBuilder.h
+++ b/llvm/include/llvm/Passes/CodeGenPassBuilder.h
@@ -60,6 +60,7 @@
 #include "llvm/CodeGen/MachineScheduler.h"
 #include "llvm/CodeGen/MachineSink.h"
 #include "llvm/CodeGen/MachineVerifier.h"
+#include "llvm/CodeGen/OffloadBlockUniformity.h"
 #include "llvm/CodeGen/OptimizePHIs.h"
 #include "llvm/CodeGen/PEI.h"
 #include "llvm/CodeGen/PHIElimination.h"
diff --git a/llvm/include/llvm/Passes/MachinePassRegistry.def b/llvm/include/llvm/Passes/MachinePassRegistry.def
index 20b066a2ead6d..ed56bd6acd3fc 100644
--- a/llvm/include/llvm/Passes/MachinePassRegistry.def
+++ b/llvm/include/llvm/Passes/MachinePassRegistry.def
@@ -81,6 +81,8 @@ MACHINE_FUNCTION_ANALYSIS("machine-post-dom-tree",
                           MachinePostDominatorTreeAnalysis())
 MACHINE_FUNCTION_ANALYSIS("machine-trace-metrics", MachineTraceMetricsAnalysis())
 MACHINE_FUNCTION_ANALYSIS("machine-uniformity", MachineUniformityAnalysis())
+MACHINE_FUNCTION_ANALYSIS("offload-block-uniformity",
+                          OffloadBlockUniformityAnalysis())
 MACHINE_FUNCTION_ANALYSIS("pass-instrumentation", PassInstrumentationAnalysis(PIC))
 MACHINE_FUNCTION_ANALYSIS("reaching-def", ReachingDefAnalysis())
 MACHINE_FUNCTION_ANALYSIS("regalloc-evict", RegAllocEvictionAdvisorAnalysis())
diff --git a/llvm/lib/CodeGen/CMakeLists.txt b/llvm/lib/CodeGen/CMakeLists.txt
index f26b2cb6fddf5..b358150569cb3 100644
--- a/llvm/lib/CodeGen/CMakeLists.txt
+++ b/llvm/lib/CodeGen/CMakeLists.txt
@@ -68,6 +68,7 @@ add_llvm_component_library(LLVMCodeGen
   FixupStatepointCallerSaved.cpp
   FuncletLayout.cpp
   MachineFunctionAnalysis.cpp
+  OffloadBlockUniformity.cpp
   GCMetadata.cpp
   GCMetadataPrinter.cpp
   GCRootLowering.cpp
diff --git a/llvm/lib/CodeGen/OffloadBlockUniformity.cpp b/llvm/lib/CodeGen/OffloadBlockUniformity.cpp
new file mode 100644
index 0000000000000..13d772c08d92c
--- /dev/null
+++ b/llvm/lib/CodeGen/OffloadBlockUniformity.cpp
@@ -0,0 +1,84 @@
+//===- OffloadBlockUniformity.cpp - Offload block uniformity info --------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/OffloadBlockUniformity.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/IR/BasicBlock.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/Instruction.h"
+#include "llvm/IR/Metadata.h"
+#include <optional>
+
+using namespace llvm;
+
+static std::optional<bool> getIRBlockUniformity(const BasicBlock &BB) {
+  const Instruction *TI = BB.getTerminator();
+  if (!TI)
+    return std::nullopt;
+
+  MDNode *MD = TI->getMetadata(OffloadBlockUniformityInfo::MetadataName);
+  if (!MD)
+    return std::nullopt;
+
+  // Metadata format: !{i1 IsUniform} - structural validity assumed (verifier).
+  return mdconst::extract<ConstantInt>(MD->getOperand(0))->isOne();
+}
+
+void OffloadBlockUniformityInfo::compute(const MachineFunction &MF) {
+  HasAnyUniformity = false;
+  DivergentBlocks.clear();
+  DivergentBlocks.resize(MF.getNumBlockIDs());
+
+  // First determine whether any uniformity annotation exists for this function.
+  for (const MachineBasicBlock &MBB : MF) {
+    const BasicBlock *BB = MBB.getBasicBlock();
+    if (!BB)
+      continue;
+    if (getIRBlockUniformity(*BB).has_value()) {
+      HasAnyUniformity = true;
+      break;
+    }
+  }
+
+  if (!HasAnyUniformity)
+    return;
+
+  // Conservative behavior: if uniformity exists for the function but we cannot
+  // classify a particular (Machine)basic block, treat it as divergent.
+  for (const MachineBasicBlock &MBB : MF) {
+    const unsigned Num = MBB.getNumber();
+    bool IsDivergent = true;
+    if (const BasicBlock *BB = MBB.getBasicBlock()) {
+      if (auto U = getIRBlockUniformity(*BB))
+        IsDivergent = !*U;
+    }
+    if (Num < DivergentBlocks.size())
+      DivergentBlocks.set(Num, IsDivergent);
+  }
+}
+
+bool OffloadBlockUniformityInfo::isDivergent(
+    const MachineBasicBlock &MBB) const {
+  if (!HasAnyUniformity)
+    return false;
+  const unsigned Num = MBB.getNumber();
+  if (Num >= DivergentBlocks.size())
+    return true;
+  return DivergentBlocks.test(Num);
+}
+
+AnalysisKey OffloadBlockUniformityAnalysis::Key;
+
+OffloadBlockUniformityAnalysis::Result
+OffloadBlockUniformityAnalysis::run(MachineFunction &MF,
+                                    MachineFunctionAnalysisManager &) {
+  OffloadBlockUniformityInfo Info;
+  Info.compute(MF);
+  return Info;
+}
diff --git a/llvm/lib/CodeGen/SpillPlacement.cpp b/llvm/lib/CodeGen/SpillPlacement.cpp
index 55a96a22a00ec..fd7e9a1d9919c 100644
--- a/llvm/lib/CodeGen/SpillPlacement.cpp
+++ b/llvm/lib/CodeGen/SpillPlacement.cpp
@@ -32,7 +32,10 @@
 #include "llvm/CodeGen/MachineBasicBlock.h"
 #include "llvm/CodeGen/MachineBlockFrequencyInfo.h"
 #include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/OffloadBlockUniformity.h"
 #include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/TargetSubtargetInfo.h"
+#include "llvm/IR/Function.h"
 #include "llvm/InitializePasses.h"
 #include "llvm/Pass.h"
 #include <algorithm>
@@ -193,7 +196,9 @@ bool SpillPlacementWrapperLegacy::runOnMachineFunction(MachineFunction &MF) {
   auto *Bundles = &getAnalysis<EdgeBundlesWrapperLegacy>().getEdgeBundles();
   auto *MBFI = &getAnalysis<MachineBlockFrequencyInfoWrapperPass>().getMBFI();
 
-  Impl.run(MF, Bundles, MBFI);
+  OffloadBlockUniformityInfo Uniformity;
+  Uniformity.compute(MF);
+  Impl.run(MF, Bundles, MBFI, &Uniformity);
   return false;
 }
 
@@ -204,8 +209,9 @@ SpillPlacementAnalysis::run(MachineFunction &MF,
                             MachineFunctionAnalysisManager &MFAM) {
   auto *Bundles = &MFAM.getResult<EdgeBundlesAnalysis>(MF);
   auto *MBFI = &MFAM.getResult<MachineBlockFrequencyAnalysis>(MF);
+  auto &Uniformity = MFAM.getResult<OffloadBlockUniformityAnalysis>(MF);
   SpillPlacement Impl;
-  Impl.run(MF, Bundles, MBFI);
+  Impl.run(MF, Bundles, MBFI, &Uniformity);
   return Impl;
 }
 
@@ -217,7 +223,8 @@ bool SpillPlacementAnalysis::Result::invalidate(
     return true;
   // Check dependencies.
   return Inv.invalidate<EdgeBundlesAnalysis>(MF, PA) ||
-         Inv.invalidate<MachineBlockFrequencyAnalysis>(MF, PA);
+         Inv.invalidate<MachineBlockFrequencyAnalysis>(MF, PA) ||
+         Inv.invalidate<OffloadBlockUniformityAnalysis>(MF, PA);
 }
 
 SpillPlacement::SpillPlacement() = default;
@@ -230,7 +237,8 @@ void SpillPlacement::releaseMemory() {
 }
 
 void SpillPlacement::run(MachineFunction &mf, EdgeBundles *Bundles,
-                         MachineBlockFrequencyInfo *MBFI) {
+                         MachineBlockFrequencyInfo *MBFI,
+                         const OffloadBlockUniformityInfo *Uniformity) {
   MF = &mf;
   this->bundles = Bundles;
   this->MBFI = MBFI;
@@ -240,12 +248,18 @@ void SpillPlacement::run(MachineFunction &mf, EdgeBundles *Bundles,
   TodoList.clear();
   TodoList.setUniverse(bundles->getNumBundles());
 
+  const bool HasUniformity = Uniformity && Uniformity->hasUniformity();
+
   // Compute total ingoing and outgoing block frequencies for all bundles.
   BlockFrequencies.resize(mf.getNumBlockIDs());
   setThreshold(MBFI->getEntryFreq());
   for (auto &I : mf) {
     unsigned Num = I.getNumber();
-    BlockFrequencies[Num] = MBFI->getBlockFreq(&I);
+    if (HasUniformity && Uniformity->isDivergent(I)) {
+      BlockFrequencies[Num] = MBFI->getEntryFreq();
+    } else {
+      BlockFrequencies[Num] = MBFI->getBlockFreq(&I);
+    }
   }
 }
 
diff --git a/llvm/lib/Passes/PassBuilder.cpp b/llvm/lib/Passes/PassBuilder.cpp
index 8bb78c8c7df63..d541062b743d2 100644
--- a/llvm/lib/Passes/PassBuilder.cpp
+++ b/llvm/lib/Passes/PassBuilder.cpp
@@ -135,6 +135,7 @@
 #include "llvm/CodeGen/MachineTraceMetrics.h"
 #include "llvm/CodeGen/MachineUniformityAnalysis.h"
 #include "llvm/CodeGen/MachineVerifier.h"
+#include "llvm/CodeGen/OffloadBlockUniformity.h"
 #include "llvm/CodeGen/OptimizePHIs.h"
 #include "llvm/CodeGen/PEI.h"
 #include "llvm/CodeGen/PHIElimination.h"



More information about the llvm-commits mailing list