[Mlir-commits] [mlir] [mlir][GPU] Generalize gpu.printf to not need gpu.module (PR #161266)
Krzysztof Drewniak
llvmlistbot at llvm.org
Mon Sep 29 12:58:24 PDT 2025
https://github.com/krzysz00 created https://github.com/llvm/llvm-project/pull/161266
In order to make the gpu.printf => [various LLVM calls] passes less order-dependent and to allow downstreams that don't use gpu.module to use gpu.printf, allow the lowerigs for such prints to target the neraest `builtin.module` if a `gpu.module` cannot be found.
>From b4d6033a4cff3740eee80a11b3b3200dca896504 Mon Sep 17 00:00:00 2001
From: Krzysztof Drewniak <Krzysztof.Drewniak at amd.com>
Date: Mon, 29 Sep 2025 19:56:48 +0000
Subject: [PATCH] [mlir][GPU] Generalize gpu.printf to not need gpu.module
In order to make the gpu.printf => [various LLVM calls] passes less
order-dependent and to allow downstreams that don't use gpu.module
to use gpu.printf, allow the lowerigs for such prints to target
the neraest `builtin.module` if a `gpu.module` cannot be found.
---
.../Conversion/GPUCommon/GPUOpsLowering.cpp | 58 +++++++++++++------
.../lib/Conversion/GPUCommon/GPUOpsLowering.h | 4 +-
.../GPUToROCDL/gpu-to-rocdl-hip.mlir | 1 +
3 files changed, 43 insertions(+), 20 deletions(-)
diff --git a/mlir/lib/Conversion/GPUCommon/GPUOpsLowering.cpp b/mlir/lib/Conversion/GPUCommon/GPUOpsLowering.cpp
index a73afbcb6474b..78bdbbfc61836 100644
--- a/mlir/lib/Conversion/GPUCommon/GPUOpsLowering.cpp
+++ b/mlir/lib/Conversion/GPUCommon/GPUOpsLowering.cpp
@@ -20,20 +20,20 @@
using namespace mlir;
-LLVM::LLVMFuncOp mlir::getOrDefineFunction(gpu::GPUModuleOp moduleOp,
- Location loc, OpBuilder &b,
- StringRef name,
+LLVM::LLVMFuncOp mlir::getOrDefineFunction(Operation *moduleOp, Location loc,
+ OpBuilder &b, StringRef name,
LLVM::LLVMFunctionType type) {
LLVM::LLVMFuncOp ret;
- if (!(ret = moduleOp.template lookupSymbol<LLVM::LLVMFuncOp>(name))) {
+ if (!(ret = dyn_cast_or_null<LLVM::LLVMFuncOp>(
+ SymbolTable::lookupSymbolIn(moduleOp, name)))) {
OpBuilder::InsertionGuard guard(b);
- b.setInsertionPointToStart(moduleOp.getBody());
+ b.setInsertionPointToStart(&moduleOp->getRegion(0).front());
ret = LLVM::LLVMFuncOp::create(b, loc, name, type, LLVM::Linkage::External);
}
return ret;
}
-static SmallString<16> getUniqueSymbolName(gpu::GPUModuleOp moduleOp,
+static SmallString<16> getUniqueSymbolName(Operation *moduleOp,
StringRef prefix) {
// Get a unique global name.
unsigned stringNumber = 0;
@@ -41,15 +41,16 @@ static SmallString<16> getUniqueSymbolName(gpu::GPUModuleOp moduleOp,
do {
stringConstName.clear();
(prefix + Twine(stringNumber++)).toStringRef(stringConstName);
- } while (moduleOp.lookupSymbol(stringConstName));
+ } while (SymbolTable::lookupSymbolIn(moduleOp, stringConstName));
return stringConstName;
}
-LLVM::GlobalOp
-mlir::getOrCreateStringConstant(OpBuilder &b, Location loc,
- gpu::GPUModuleOp moduleOp, Type llvmI8,
- StringRef namePrefix, StringRef str,
- uint64_t alignment, unsigned addrSpace) {
+LLVM::GlobalOp mlir::getOrCreateStringConstant(OpBuilder &b, Location loc,
+ Operation *moduleOp, Type llvmI8,
+ StringRef namePrefix,
+ StringRef str,
+ uint64_t alignment,
+ unsigned addrSpace) {
llvm::SmallString<20> nullTermStr(str);
nullTermStr.push_back('\0'); // Null terminate for C
auto globalType =
@@ -57,7 +58,7 @@ mlir::getOrCreateStringConstant(OpBuilder &b, Location loc,
StringAttr attr = b.getStringAttr(nullTermStr);
// Try to find existing global.
- for (auto globalOp : moduleOp.getOps<LLVM::GlobalOp>())
+ for (auto globalOp : moduleOp->getRegion(0).getOps<LLVM::GlobalOp>())
if (globalOp.getGlobalType() == globalType && globalOp.getConstant() &&
globalOp.getValueAttr() == attr &&
globalOp.getAlignment().value_or(0) == alignment &&
@@ -66,7 +67,7 @@ mlir::getOrCreateStringConstant(OpBuilder &b, Location loc,
// Not found: create new global.
OpBuilder::InsertionGuard guard(b);
- b.setInsertionPointToStart(moduleOp.getBody());
+ b.setInsertionPointToStart(&moduleOp->getRegion(0).front());
SmallString<16> name = getUniqueSymbolName(moduleOp, namePrefix);
return LLVM::GlobalOp::create(b, loc, globalType,
/*isConstant=*/true, LLVM::Linkage::Internal,
@@ -398,8 +399,15 @@ LogicalResult GPUPrintfOpToHIPLowering::matchAndRewrite(
mlir::Type llvmI64 = typeConverter->convertType(rewriter.getI64Type());
// Note: this is the GPUModule op, not the ModuleOp that surrounds it
// This ensures that global constants and declarations are placed within
- // the device code, not the host code
- auto moduleOp = gpuPrintfOp->getParentOfType<gpu::GPUModuleOp>();
+ // the device code, not the host code.
+ Operation *moduleOp = gpuPrintfOp->getParentOfType<gpu::GPUModuleOp>();
+ // However, if the `gpu.module` is already lowered or for compilers that don't
+ // use `gpu.module`, fall back to `builtin.module`.
+ if (!moduleOp)
+ moduleOp = gpuPrintfOp->getParentOfType<ModuleOp>();
+ if (!moduleOp)
+ return rewriter.notifyMatchFailure(gpuPrintfOp,
+ "Couldn't find a parent module");
auto ocklBegin =
getOrDefineFunction(moduleOp, loc, rewriter, "__ockl_printf_begin",
@@ -499,7 +507,14 @@ LogicalResult GPUPrintfOpToLLVMCallLowering::matchAndRewrite(
// Note: this is the GPUModule op, not the ModuleOp that surrounds it
// This ensures that global constants and declarations are placed within
// the device code, not the host code
- auto moduleOp = gpuPrintfOp->getParentOfType<gpu::GPUModuleOp>();
+ Operation *moduleOp = gpuPrintfOp->getParentOfType<gpu::GPUModuleOp>();
+ // However, if the `gpu.module` is already lowered or for compilers that don't
+ // use `gpu.module`, fall back to `builtin.module`.
+ if (!moduleOp)
+ moduleOp = gpuPrintfOp->getParentOfType<ModuleOp>();
+ if (!moduleOp)
+ return rewriter.notifyMatchFailure(gpuPrintfOp,
+ "Couldn't find a parent module");
auto printfType =
LLVM::LLVMFunctionType::get(rewriter.getI32Type(), {ptrType},
@@ -544,7 +559,14 @@ LogicalResult GPUPrintfOpToVPrintfLowering::matchAndRewrite(
// Note: this is the GPUModule op, not the ModuleOp that surrounds it
// This ensures that global constants and declarations are placed within
// the device code, not the host code
- auto moduleOp = gpuPrintfOp->getParentOfType<gpu::GPUModuleOp>();
+ Operation *moduleOp = gpuPrintfOp->getParentOfType<gpu::GPUModuleOp>();
+ // However, if the `gpu.module` is already lowered or for compilers that don't
+ // use `gpu.module`, fall back to `builtin.module`.
+ if (!moduleOp)
+ moduleOp = gpuPrintfOp->getParentOfType<ModuleOp>();
+ if (!moduleOp)
+ return rewriter.notifyMatchFailure(gpuPrintfOp,
+ "Couldn't find a parent module");
// Create a valid global location removing any metadata attached to the
// location as debug info metadata inside of a function cannot be used outside
diff --git a/mlir/lib/Conversion/GPUCommon/GPUOpsLowering.h b/mlir/lib/Conversion/GPUCommon/GPUOpsLowering.h
index e17b06379988c..5eceb96e5234b 100644
--- a/mlir/lib/Conversion/GPUCommon/GPUOpsLowering.h
+++ b/mlir/lib/Conversion/GPUCommon/GPUOpsLowering.h
@@ -19,14 +19,14 @@ namespace mlir {
//===----------------------------------------------------------------------===//
/// Find or create an external function declaration in the given module.
-LLVM::LLVMFuncOp getOrDefineFunction(gpu::GPUModuleOp moduleOp, Location loc,
+LLVM::LLVMFuncOp getOrDefineFunction(Operation *moduleOp, Location loc,
OpBuilder &b, StringRef name,
LLVM::LLVMFunctionType type);
/// Create a global that contains the given string. If a global with the same
/// string already exists in the module, return that global.
LLVM::GlobalOp getOrCreateStringConstant(OpBuilder &b, Location loc,
- gpu::GPUModuleOp moduleOp, Type llvmI8,
+ Operation *moduleOp, Type llvmI8,
StringRef namePrefix, StringRef str,
uint64_t alignment = 0,
unsigned addrSpace = 0);
diff --git a/mlir/test/Conversion/GPUToROCDL/gpu-to-rocdl-hip.mlir b/mlir/test/Conversion/GPUToROCDL/gpu-to-rocdl-hip.mlir
index 2dc6a5ab2a86c..e23e4eb5ed3f2 100644
--- a/mlir/test/Conversion/GPUToROCDL/gpu-to-rocdl-hip.mlir
+++ b/mlir/test/Conversion/GPUToROCDL/gpu-to-rocdl-hip.mlir
@@ -1,5 +1,6 @@
// RUN: mlir-opt %s -convert-gpu-to-rocdl='runtime=HIP' -split-input-file | FileCheck %s
+// CHECK-LABEL: gpu.module @test_module
gpu.module @test_module {
// CHECK-DAG: llvm.mlir.global internal constant @[[$PRINT_GLOBAL0:[A-Za-z0-9_]+]]("Hello, world\0A\00")
// CHECK-DAG: llvm.mlir.global internal constant @[[$PRINT_GLOBAL1:[A-Za-z0-9_]+]]("Hello: %d\0A\00")
More information about the Mlir-commits
mailing list