[Mlir-commits] [mlir] [mlir][ROCDL] Plumb through AMDGPU memory access metadata (PR #110916)
Krzysztof Drewniak
llvmlistbot at llvm.org
Wed Oct 2 12:11:00 PDT 2024
https://github.com/krzysz00 created https://github.com/llvm/llvm-project/pull/110916
The LLVM backend has moved from function-wide attributes for making assurances about potentially unsafe atomic operations (like "unsafe-fp-atomics") to metadata on individual atomic operations.
This commit adds support for generating this metadata from MLIR.
>From c10e5a57dea9ea360e10f21bae5fc42c229c8da5 Mon Sep 17 00:00:00 2001
From: Krzysztof Drewniak <Krzysztof.Drewniak at amd.com>
Date: Wed, 2 Oct 2024 19:09:01 +0000
Subject: [PATCH] [mlir][ROCDL] Plumb through AMDGPU memory access metadata
The LLVM backend has moved from function-wide attributes for making
assurances about potentially unsafe atomic operations (like
"unsafe-fp-atomics") to metadata on individual atomic operations.
This commit adds support for generating this metadata from MLIR.
---
.../mlir/Dialect/LLVMIR/LLVMAttrDefs.td | 1 +
mlir/include/mlir/Dialect/LLVMIR/ROCDLOps.td | 9 +++++--
.../ROCDL/ROCDLToLLVMIRTranslation.cpp | 27 ++++++++++++++++++-
mlir/test/Target/LLVMIR/rocdl.mlir | 23 ++++++++++++++++
4 files changed, 57 insertions(+), 3 deletions(-)
diff --git a/mlir/include/mlir/Dialect/LLVMIR/LLVMAttrDefs.td b/mlir/include/mlir/Dialect/LLVMIR/LLVMAttrDefs.td
index 2da45eba77655b..fae2fe9cc3f8d6 100644
--- a/mlir/include/mlir/Dialect/LLVMIR/LLVMAttrDefs.td
+++ b/mlir/include/mlir/Dialect/LLVMIR/LLVMAttrDefs.td
@@ -1055,6 +1055,7 @@ def LLVM_ConstantRangeAttr : LLVM_Attr<"ConstantRange", "constant_range"> {
Syntax:
```
`<` `i`(width($lower)) $lower `,` $upper `>`
+ ```
}];
let builders = [
diff --git a/mlir/include/mlir/Dialect/LLVMIR/ROCDLOps.td b/mlir/include/mlir/Dialect/LLVMIR/ROCDLOps.td
index aae2cf88ded041..1d515b2b7c801c 100644
--- a/mlir/include/mlir/Dialect/LLVMIR/ROCDLOps.td
+++ b/mlir/include/mlir/Dialect/LLVMIR/ROCDLOps.td
@@ -58,7 +58,12 @@ def ROCDL_Dialect : Dialect {
"::mlir::StringAttr":$flat_work_group_size,
"::mlir::IntegerAttr":$max_flat_work_group_size,
"::mlir::IntegerAttr":$waves_per_eu,
- "::mlir::BoolAttr":$unsafe_fp_atomics
+ "::mlir::BoolAttr":$unsafe_fp_atomics,
+ // Correspond to LLVM matadata of the same name
+ "::mlir::UnitAttr":$last_use,
+ "::mlir::UnitAttr":$no_remote_memory,
+ "::mlir::UnitAttr":$no_fine_grained_memory,
+ "::mlir::UnitAttr":$ignore_denormal_mode
);
let useDefaultAttributePrinterParser = 1;
@@ -88,7 +93,7 @@ class ROCDL_IntrPure1Op<string mnemonic> :
class ROCDL_IntrOp<string mnemonic, list<int> overloadedResults,
list<int> overloadedOperands, list<Trait> traits, int numResults,
- int requiresAccessGroup = 0, int requiresAliasAnalysis = 0, list<int> immArgPositions = [],
+ int requiresAccessGroup = 0, int requiresAliasAnalysis = 0, list<int> immArgPositions = [],
list<string> immArgAttrNames = []> :
LLVM_IntrOpBase<ROCDL_Dialect, mnemonic,
"amdgcn_" # !subst(".", "_", mnemonic), overloadedResults,
diff --git a/mlir/lib/Target/LLVMIR/Dialect/ROCDL/ROCDLToLLVMIRTranslation.cpp b/mlir/lib/Target/LLVMIR/Dialect/ROCDL/ROCDLToLLVMIRTranslation.cpp
index ec21fbf714c24a..88a9d4c2a7ef23 100644
--- a/mlir/lib/Target/LLVMIR/Dialect/ROCDL/ROCDLToLLVMIRTranslation.cpp
+++ b/mlir/lib/Target/LLVMIR/Dialect/ROCDL/ROCDLToLLVMIRTranslation.cpp
@@ -77,6 +77,7 @@ class ROCDLDialectLLVMIRTranslationInterface
NamedAttribute attribute,
LLVM::ModuleTranslation &moduleTranslation) const final {
auto *dialect = dyn_cast<ROCDL::ROCDLDialect>(attribute.getNameDialect());
+ llvm::LLVMContext &llvmContext = moduleTranslation.getLLVMContext();
if (dialect->getKernelAttrHelper().getName() == attribute.getName()) {
auto func = dyn_cast<LLVM::LLVMFuncOp>(op);
if (!func)
@@ -198,7 +199,6 @@ class ROCDLDialectLLVMIRTranslationInterface
if (!value)
return op->emitOpError(Twine(attribute.getName()) +
" must be a dense i32 array attribute");
- llvm::LLVMContext &llvmContext = moduleTranslation.getLLVMContext();
SmallVector<llvm::Metadata *, 3> metadata;
llvm::Type *i32 = llvm::IntegerType::get(llvmContext, 32);
for (int32_t i : value.asArrayRef()) {
@@ -210,6 +210,31 @@ class ROCDLDialectLLVMIRTranslationInterface
llvm::MDNode *node = llvm::MDNode::get(llvmContext, metadata);
llvmFunc->setMetadata("reqd_work_group_size", node);
}
+
+ // Atomic and nontemporal metadata
+ if (dialect->getLastUseAttrHelper().getName() == attribute.getName()) {
+ for (llvm::Instruction *i : instructions)
+ i->setMetadata("amdgpu.last.use", llvm::MDNode::get(llvmContext, {}));
+ }
+ if (dialect->getNoRemoteMemoryAttrHelper().getName() ==
+ attribute.getName()) {
+ for (llvm::Instruction *i : instructions)
+ i->setMetadata("amdgpu.no.remote.memory",
+ llvm::MDNode::get(llvmContext, {}));
+ }
+ if (dialect->getNoFineGrainedMemoryAttrHelper().getName() ==
+ attribute.getName()) {
+ for (llvm::Instruction *i : instructions)
+ i->setMetadata("amdgpu.no.fine.grained.memory",
+ llvm::MDNode::get(llvmContext, {}));
+ }
+ if (dialect->getIgnoreDenormalModeAttrHelper().getName() ==
+ attribute.getName()) {
+ for (llvm::Instruction *i : instructions)
+ i->setMetadata("amdgpu.ignore.denormal.mode",
+ llvm::MDNode::get(llvmContext, {}));
+ }
+
return success();
}
};
diff --git a/mlir/test/Target/LLVMIR/rocdl.mlir b/mlir/test/Target/LLVMIR/rocdl.mlir
index 08c2d4e6477970..97276b087b7e93 100644
--- a/mlir/test/Target/LLVMIR/rocdl.mlir
+++ b/mlir/test/Target/LLVMIR/rocdl.mlir
@@ -564,11 +564,34 @@ llvm.func @rocdl_8bit_floats(%source: i32, %stoch: i32) -> i32 {
}
llvm.func @rocdl_16bit_packed_floats(%sourceA: f32, %sourceB: f32) -> vector<2xf16> {
+ // CHECK-LABEL: @rocdl_16bit_packed_floats
// CHECK: call <2 x half> @llvm.amdgcn.cvt.pkrtz(float {{.*}}, float {{.*}})
%source = rocdl.cvt.pkrtz %sourceA, %sourceB : vector<2xf16>
llvm.return %source : vector<2xf16>
}
+llvm.func @rocdl_atomic_attrs(%ptr: !llvm.ptr<1>, %data: f32) {
+ // CHECK-LABEL: @rocdl_atomic_attrs
+ // CHECK: atomicrmw
+ // CHECK-SAME: !amdgpu.ignore.denormal.mode
+ // CHECK-SAME: !amdgpu.no.fine.grained.memory
+ // CHECK-SAME: !amdgpu.no.remote.memory
+ llvm.atomicrmw fadd %ptr, %data monotonic {
+ rocdl.ignore_denormal_mode,
+ rocdl.no_fine_grained_memory,
+ rocdl.no_remote_memory} : !llvm.ptr<1>, f32
+ llvm.return
+}
+
+llvm.func @rocdl_last_use(%ptr: !llvm.ptr<1>) -> i32 {
+ // CHECK-LABEL: @rocdl_last_use
+ // CHECK: %[[ret:.+]] = load
+ // CHECK-SAME: !amdgpu.last.use
+ // CHECK: ret i32 %[[ret]]
+ %ret = llvm.load %ptr {rocdl.last_use} : !llvm.ptr<1> -> i32
+ llvm.return %ret : i32
+}
+
// CHECK-DAG: attributes #[[$KERNEL_ATTRS]] = { "amdgpu-flat-work-group-size"="1,256" "uniform-work-group-size"="true" }
// CHECK-DAG: attributes #[[$KERNEL_WORKGROUP_ATTRS]] = { "amdgpu-flat-work-group-size"="1,1024"
// CHECK-DAG: attributes #[[$KNOWN_BLOCK_SIZE_ATTRS]] = { "amdgpu-flat-work-group-size"="128,128"
More information about the Mlir-commits
mailing list