[Mlir-commits] [mlir] 016e1eb - [mlir][gpu] Add metadata attributes for storing kernel metadata in GPU objects (#95292)
llvmlistbot at llvm.org
llvmlistbot at llvm.org
Tue Aug 27 15:44:53 PDT 2024
Author: Fabian Mora
Date: 2024-08-27T18:44:50-04:00
New Revision: 016e1eb9c86923bf6a9669697f6be8309d12b78c
URL: https://github.com/llvm/llvm-project/commit/016e1eb9c86923bf6a9669697f6be8309d12b78c
DIFF: https://github.com/llvm/llvm-project/commit/016e1eb9c86923bf6a9669697f6be8309d12b78c.diff
LOG: [mlir][gpu] Add metadata attributes for storing kernel metadata in GPU objects (#95292)
This patch adds the `#gpu.kernel_metadata` and `#gpu.kernel_table`
attributes. The `#gpu.kernel_metadata` attribute allows storing metadata
related to a compiled kernel, for example, the number of scalar
registers used by the kernel. The attribute only has 2 required
parameters, the name and function type. It also has 2 optional
parameters, the arguments attributes and generic dictionary for storing
all other metadata.
The `#gpu.kernel_table` stores a table of `#gpu.kernel_metadata`,
mapping the name of the kernel to the metadata.
Finally, the function `ROCDL::getAMDHSAKernelsELFMetadata` was added to
collect ELF metadata from a binary, and to test the class methods in
both attributes.
Example:
```mlir
gpu.binary @binary [#gpu.object<#rocdl.target<chip = "gfx900">, kernels = #gpu.kernel_table<[
#gpu.kernel_metadata<"kernel0", (i32) -> (), metadata = {sgpr_count = 255}>,
#gpu.kernel_metadata<"kernel1", (i32, f32) -> (), arg_attrs = [{llvm.read_only}, {}]>
]> , bin = "BLOB">]
```
The motivation behind these attributes is to provide useful information
for things like tunning.
---------
Co-authored-by: Mehdi Amini <joker.eph at gmail.com>
Added:
mlir/lib/Target/LLVM/ROCDL/Utils.cpp
Modified:
mlir/include/mlir-c/Dialect/GPU.h
mlir/include/mlir/Dialect/GPU/IR/CompilationAttrs.td
mlir/include/mlir/Target/LLVM/ROCDL/Utils.h
mlir/lib/Bindings/Python/DialectGPU.cpp
mlir/lib/CAPI/Dialect/GPU.cpp
mlir/lib/Dialect/GPU/IR/GPUDialect.cpp
mlir/lib/Target/LLVM/CMakeLists.txt
mlir/lib/Target/LLVM/NVVM/Target.cpp
mlir/lib/Target/LLVM/ROCDL/Target.cpp
mlir/lib/Target/SPIRV/Target.cpp
mlir/test/Dialect/GPU/invalid.mlir
mlir/test/Dialect/GPU/ops.mlir
mlir/test/python/dialects/gpu/dialect.py
mlir/unittests/Target/LLVM/SerializeROCDLTarget.cpp
mlir/unittests/Target/LLVM/SerializeToLLVMBitcode.cpp
Removed:
################################################################################
diff --git a/mlir/include/mlir-c/Dialect/GPU.h b/mlir/include/mlir-c/Dialect/GPU.h
index c42ff61f9592c7..321c1122c33707 100644
--- a/mlir/include/mlir-c/Dialect/GPU.h
+++ b/mlir/include/mlir-c/Dialect/GPU.h
@@ -37,6 +37,11 @@ MLIR_CAPI_EXPORTED MlirAttribute
mlirGPUObjectAttrGet(MlirContext mlirCtx, MlirAttribute target, uint32_t format,
MlirStringRef objectStrRef, MlirAttribute mlirObjectProps);
+MLIR_CAPI_EXPORTED MlirAttribute mlirGPUObjectAttrGetWithKernels(
+ MlirContext mlirCtx, MlirAttribute target, uint32_t format,
+ MlirStringRef objectStrRef, MlirAttribute mlirObjectProps,
+ MlirAttribute mlirKernelsAttr);
+
MLIR_CAPI_EXPORTED MlirAttribute
mlirGPUObjectAttrGetTarget(MlirAttribute mlirObjectAttr);
@@ -52,6 +57,12 @@ mlirGPUObjectAttrHasProperties(MlirAttribute mlirObjectAttr);
MLIR_CAPI_EXPORTED MlirAttribute
mlirGPUObjectAttrGetProperties(MlirAttribute mlirObjectAttr);
+MLIR_CAPI_EXPORTED bool
+mlirGPUObjectAttrHasKernels(MlirAttribute mlirObjectAttr);
+
+MLIR_CAPI_EXPORTED MlirAttribute
+mlirGPUObjectAttrGetKernels(MlirAttribute mlirObjectAttr);
+
#ifdef __cplusplus
}
#endif
diff --git a/mlir/include/mlir/Dialect/GPU/IR/CompilationAttrs.td b/mlir/include/mlir/Dialect/GPU/IR/CompilationAttrs.td
index 6659f4a2c58e82..07879a0dab07f4 100644
--- a/mlir/include/mlir/Dialect/GPU/IR/CompilationAttrs.td
+++ b/mlir/include/mlir/Dialect/GPU/IR/CompilationAttrs.td
@@ -16,6 +16,155 @@
include "mlir/Dialect/GPU/IR/GPUBase.td"
include "mlir/Dialect/GPU/IR/CompilationAttrInterfaces.td"
+//===----------------------------------------------------------------------===//
+// GPU kernel metadata attribute
+//===----------------------------------------------------------------------===//
+
+def GPU_KernelMetadataAttr : GPU_Attr<"KernelMetadata", "kernel_metadata"> {
+ let description = [{
+ GPU attribute for storing metadata related to a compiled kernel. The
+ attribute contains the name and arguments type of the kernel.
+
+ The attribute also contains optional parameters for storing the arguments
+ attributes as well as a dictionary for additional metadata, like occupancy
+ information or other function attributes.
+
+ Note: The `arg_attrs` parameter is expected to follow all the constraints
+ imposed by the `mlir::FunctionOpInterface` interface.
+
+ Examples:
+ ```mlir
+ #gpu.kernel_metadata<@kernel1, (i32) -> (), arg_attrs = [...], metadata = {reg_count = 255, ...}>
+ #gpu.kernel_metadata<@kernel2, (i32, f64) -> ()>
+ ```
+ }];
+ let parameters = (ins
+ "StringAttr":$name,
+ "Type":$function_type,
+ OptionalParameter<"ArrayAttr", "arguments attributes">:$arg_attrs,
+ OptionalParameter<"DictionaryAttr", "metadata dictionary">:$metadata
+ );
+ let assemblyFormat = [{
+ `<` $name `,` $function_type (`,` struct($arg_attrs, $metadata)^)? `>`
+ }];
+ let builders = [
+ AttrBuilderWithInferredContext<(ins "StringAttr":$name,
+ "Type":$functionType,
+ CArg<"ArrayAttr", "nullptr">:$argAttrs,
+ CArg<"DictionaryAttr",
+ "nullptr">:$metadata), [{
+ assert(name && "invalid name");
+ return $_get(name.getContext(), name, functionType, argAttrs, metadata);
+ }]>,
+ AttrBuilderWithInferredContext<(ins "FunctionOpInterface":$kernel,
+ CArg<"DictionaryAttr",
+ "nullptr">:$metadata)>
+ ];
+ let genVerifyDecl = 1;
+ let extraClassDeclaration = [{
+ /// Compare two kernels based on the name.
+ bool operator<(const KernelMetadataAttr& other) const {
+ return getName().getValue() < other.getName().getValue();
+ }
+
+ /// Returns the metadata attribute corresponding to `key` or `nullptr`
+ /// if missing.
+ Attribute getAttr(StringRef key) const {
+ DictionaryAttr attrs = getMetadata();
+ return attrs ? attrs.get(key) : nullptr;
+ }
+ template <typename ConcreteAttr>
+ ConcreteAttr getAttr(StringRef key) const {
+ return llvm::dyn_cast_or_null<ConcreteAttr>(getAttr(key));
+ }
+ Attribute getAttr(StringAttr key) const {
+ DictionaryAttr attrs = getMetadata();
+ return attrs ? attrs.get(key) : nullptr;
+ }
+ template <typename ConcreteAttr>
+ ConcreteAttr getAttr(StringAttr key) const {
+ return llvm::dyn_cast_or_null<ConcreteAttr>(getAttr(key));
+ }
+
+ /// Returns the attribute dictionary at position `index`.
+ DictionaryAttr getArgAttrDict(unsigned index) {
+ ArrayAttr argArray = getArgAttrs();
+ return argArray ? llvm::cast<DictionaryAttr>(argArray[index]) : nullptr;
+ }
+
+ /// Return the specified attribute, if present, for the argument at 'index',
+ /// null otherwise.
+ Attribute getArgAttr(unsigned index, StringAttr name) {
+ DictionaryAttr argDict = getArgAttrDict(index);
+ return argDict ? argDict.get(name) : nullptr;
+ }
+ Attribute getArgAttr(unsigned index, StringRef name) {
+ DictionaryAttr argDict = getArgAttrDict(index);
+ return argDict ? argDict.get(name) : nullptr;
+ }
+
+ /// Returns a new KernelMetadataAttr that contains `attrs` in the metadata dictionary.
+ KernelMetadataAttr appendMetadata(ArrayRef<NamedAttribute> attrs) const;
+ }];
+}
+
+//===----------------------------------------------------------------------===//
+// GPU kernel table attribute
+//===----------------------------------------------------------------------===//
+
+def GPU_KernelTableAttr : GPU_Attr<"KernelTable", "kernel_table"> {
+ let description = [{
+ GPU attribute representing a list of `#gpu.kernel_metadata` attributes. This
+ attribute supports searching kernels by name. All kernels in the table must
+ have an unique name.
+
+ Examples:
+ ```mlir
+ // Empty table.
+ #gpu.kernel_table<>
+
+ // Table with a single kernel.
+ #gpu.kernel_table<[#gpu.kernel_metadata<kernel0, () -> () >]>
+
+ // Table with multiple kernels.
+ #gpu.kernel_table<[
+ #gpu.kernel_metadata<"kernel0", (i32, f32) -> (), metadata = {sgpr_count = 255}>,
+ #gpu.kernel_metadata<"kernel1", (i32) -> ()>
+ ]>
+ ```
+ }];
+ let parameters = (ins
+ OptionalArrayRefParameter<"KernelMetadataAttr", "array of kernels">:$kernel_table
+ );
+ let assemblyFormat = [{
+ `<` (`[` qualified($kernel_table)^ `]`)? `>`
+ }];
+ let builders = [
+ AttrBuilder<(ins "ArrayRef<KernelMetadataAttr>":$kernels,
+ CArg<"bool", "false">:$isSorted)>
+ ];
+ let skipDefaultBuilders = 1;
+ let genVerifyDecl = 1;
+ let extraClassDeclaration = [{
+ llvm::ArrayRef<KernelMetadataAttr>::iterator begin() const {
+ return getKernelTable().begin();
+ }
+ llvm::ArrayRef<KernelMetadataAttr>::iterator end() const {
+ return getKernelTable().end();
+ }
+ size_t size() const {
+ return getKernelTable().size();
+ }
+ bool empty() const {
+ return getKernelTable().empty();
+ }
+
+ /// Returns the kernel with name `key` or `nullptr` if not present.
+ KernelMetadataAttr lookup(StringRef key) const;
+ KernelMetadataAttr lookup(StringAttr key) const;
+ }];
+}
+
//===----------------------------------------------------------------------===//
// GPU object attribute.
//===----------------------------------------------------------------------===//
@@ -36,8 +185,9 @@ def GPU_CompilationTargetEnum : GPU_I32Enum<
def GPU_ObjectAttr : GPU_Attr<"Object", "object"> {
let description = [{
A GPU object attribute glues together a GPU target, the object kind, a
- binary string with the object, and the object properties, encapsulating how
- the object was generated and its properties with the object itself.
+ binary string with the object, the object properties, and kernel metadata,
+ encapsulating how the object was generated and its properties with the
+ object itself.
There are four object formats:
1. `Offload`: represents generic objects not described by the other three
@@ -55,6 +205,10 @@ def GPU_ObjectAttr : GPU_Attr<"Object", "object"> {
Object properties are specified through the `properties` dictionary
attribute and can be used to define additional information.
+
+ Kernel metadata is specified through the `kernels` parameter, and can be
+ used to specify additional information on a kernel by kernel basis.
+
The target attribute must implement or promise the `TargetAttrInterface`
interface.
@@ -63,16 +217,29 @@ def GPU_ObjectAttr : GPU_Attr<"Object", "object"> {
#gpu.object<#nvvm.target, properties = {O = 3 : i32}, assembly = "..."> // An assembly object with additional properties.
#gpu.object<#rocdl.target, bin = "..."> // A binary object.
#gpu.object<#nvvm.target, "..."> // A fatbin object.
+ #gpu.object<#nvvm.target, kernels = #gpu.kernel_table<...>, "..."> // An object with a kernel table.
```
}];
let parameters = (ins
"Attribute":$target,
DefaultValuedParameter<"CompilationTarget", "CompilationTarget::Fatbin">:$format,
"StringAttr":$object,
- OptionalParameter<"DictionaryAttr">:$properties
+ OptionalParameter<"DictionaryAttr">:$properties,
+ OptionalParameter<"KernelTableAttr">:$kernels
);
+ let builders = [
+ AttrBuilderWithInferredContext<(ins "Attribute":$target,
+ "CompilationTarget":$format,
+ "StringAttr":$object,
+ CArg<"DictionaryAttr", "nullptr">:$properties,
+ CArg<"KernelTableAttr", "nullptr">:$kernels), [{
+ assert(target && "invalid target");
+ return $_get(target.getContext(), target, format, object, properties, kernels);
+ }]>
+ ];
let assemblyFormat = [{ `<`
- $target `,` (`properties` `=` $properties ^ `,`)?
+ $target `,` (`properties` `=` $properties^ `,`)?
+ (`kernels` `=` $kernels^ `,`)?
custom<Object>($format, $object)
`>`
}];
diff --git a/mlir/include/mlir/Target/LLVM/ROCDL/Utils.h b/mlir/include/mlir/Target/LLVM/ROCDL/Utils.h
index 3c637a01b0e3be..3d2174c144815b 100644
--- a/mlir/include/mlir/Target/LLVM/ROCDL/Utils.h
+++ b/mlir/include/mlir/Target/LLVM/ROCDL/Utils.h
@@ -14,6 +14,7 @@
#define MLIR_TARGET_LLVM_ROCDL_UTILS_H
#include "mlir/Dialect/GPU/IR/CompilationInterfaces.h"
+#include "mlir/Dialect/GPU/IR/GPUDialect.h"
#include "mlir/Dialect/LLVMIR/ROCDLDialect.h"
#include "mlir/Support/LLVM.h"
#include "mlir/Target/LLVM/ModuleToObject.h"
@@ -107,6 +108,20 @@ class SerializeGPUModuleBase : public LLVM::ModuleToObject {
/// AMD GCN libraries to use when linking, the default is using none.
AMDGCNLibraries deviceLibs = AMDGCNLibraries::None;
};
+
+/// Returns a map containing the `amdhsa.kernels` ELF metadata for each of the
+/// kernels in the binary, or `std::nullopt` if the metadata couldn't be
+/// retrieved. The map associates the name of the kernel with the list of named
+/// attributes found in `amdhsa.kernels`. For more information on the ELF
+/// metadata see: https://llvm.org/docs/AMDGPUUsage.html#amdhsa
+std::optional<DenseMap<StringAttr, NamedAttrList>>
+getAMDHSAKernelsELFMetadata(Builder &builder, ArrayRef<char> elfData);
+
+/// Returns a `#gpu.kernel_table` containing kernel metadata for each of the
+/// kernels in `gpuModule`. If `elfData` is valid, then the `amdhsa.kernels` ELF
+/// metadata will be added to the `#gpu.kernel_table`.
+gpu::KernelTableAttr getKernelMetadata(Operation *gpuModule,
+ ArrayRef<char> elfData = {});
} // namespace ROCDL
} // namespace mlir
diff --git a/mlir/lib/Bindings/Python/DialectGPU.cpp b/mlir/lib/Bindings/Python/DialectGPU.cpp
index a9e339b50dabc6..560a54bcd15919 100644
--- a/mlir/lib/Bindings/Python/DialectGPU.cpp
+++ b/mlir/lib/Bindings/Python/DialectGPU.cpp
@@ -48,17 +48,21 @@ PYBIND11_MODULE(_mlirDialectsGPU, m) {
.def_classmethod(
"get",
[](py::object cls, MlirAttribute target, uint32_t format,
- py::bytes object, std::optional<MlirAttribute> mlirObjectProps) {
+ py::bytes object, std::optional<MlirAttribute> mlirObjectProps,
+ std::optional<MlirAttribute> mlirKernelsAttr) {
py::buffer_info info(py::buffer(object).request());
MlirStringRef objectStrRef =
mlirStringRefCreate(static_cast<char *>(info.ptr), info.size);
- return cls(mlirGPUObjectAttrGet(
+ return cls(mlirGPUObjectAttrGetWithKernels(
mlirAttributeGetContext(target), target, format, objectStrRef,
mlirObjectProps.has_value() ? *mlirObjectProps
+ : MlirAttribute{nullptr},
+ mlirKernelsAttr.has_value() ? *mlirKernelsAttr
: MlirAttribute{nullptr}));
},
"cls"_a, "target"_a, "format"_a, "object"_a,
- "properties"_a = py::none(), "Gets a gpu.object from parameters.")
+ "properties"_a = py::none(), "kernels"_a = py::none(),
+ "Gets a gpu.object from parameters.")
.def_property_readonly(
"target",
[](MlirAttribute self) { return mlirGPUObjectAttrGetTarget(self); })
@@ -71,9 +75,16 @@ PYBIND11_MODULE(_mlirDialectsGPU, m) {
MlirStringRef stringRef = mlirGPUObjectAttrGetObject(self);
return py::bytes(stringRef.data, stringRef.length);
})
- .def_property_readonly("properties", [](MlirAttribute self) {
- if (mlirGPUObjectAttrHasProperties(self))
- return py::cast(mlirGPUObjectAttrGetProperties(self));
+ .def_property_readonly("properties",
+ [](MlirAttribute self) {
+ if (mlirGPUObjectAttrHasProperties(self))
+ return py::cast(
+ mlirGPUObjectAttrGetProperties(self));
+ return py::none().cast<py::object>();
+ })
+ .def_property_readonly("kernels", [](MlirAttribute self) {
+ if (mlirGPUObjectAttrHasKernels(self))
+ return py::cast(mlirGPUObjectAttrGetKernels(self));
return py::none().cast<py::object>();
});
}
diff --git a/mlir/lib/CAPI/Dialect/GPU.cpp b/mlir/lib/CAPI/Dialect/GPU.cpp
index 0acebb23004291..e4796ed1499ea1 100644
--- a/mlir/lib/CAPI/Dialect/GPU.cpp
+++ b/mlir/lib/CAPI/Dialect/GPU.cpp
@@ -43,9 +43,28 @@ MlirAttribute mlirGPUObjectAttrGet(MlirContext mlirCtx, MlirAttribute target,
DictionaryAttr objectProps;
if (mlirObjectProps.ptr != nullptr)
objectProps = llvm::cast<DictionaryAttr>(unwrap(mlirObjectProps));
- return wrap(gpu::ObjectAttr::get(ctx, unwrap(target),
- static_cast<gpu::CompilationTarget>(format),
- StringAttr::get(ctx, object), objectProps));
+ return wrap(gpu::ObjectAttr::get(
+ ctx, unwrap(target), static_cast<gpu::CompilationTarget>(format),
+ StringAttr::get(ctx, object), objectProps, nullptr));
+}
+
+MlirAttribute mlirGPUObjectAttrGetWithKernels(MlirContext mlirCtx,
+ MlirAttribute target,
+ uint32_t format,
+ MlirStringRef objectStrRef,
+ MlirAttribute mlirObjectProps,
+ MlirAttribute mlirKernelsAttr) {
+ MLIRContext *ctx = unwrap(mlirCtx);
+ llvm::StringRef object = unwrap(objectStrRef);
+ DictionaryAttr objectProps;
+ if (mlirObjectProps.ptr != nullptr)
+ objectProps = llvm::cast<DictionaryAttr>(unwrap(mlirObjectProps));
+ gpu::KernelTableAttr kernels;
+ if (mlirKernelsAttr.ptr != nullptr)
+ kernels = llvm::cast<gpu::KernelTableAttr>(unwrap(mlirKernelsAttr));
+ return wrap(gpu::ObjectAttr::get(
+ ctx, unwrap(target), static_cast<gpu::CompilationTarget>(format),
+ StringAttr::get(ctx, object), objectProps, kernels));
}
MlirAttribute mlirGPUObjectAttrGetTarget(MlirAttribute mlirObjectAttr) {
@@ -78,3 +97,15 @@ MlirAttribute mlirGPUObjectAttrGetProperties(MlirAttribute mlirObjectAttr) {
llvm::cast<gpu::ObjectAttr>(unwrap(mlirObjectAttr));
return wrap(objectAttr.getProperties());
}
+
+bool mlirGPUObjectAttrHasKernels(MlirAttribute mlirObjectAttr) {
+ gpu::ObjectAttr objectAttr =
+ llvm::cast<gpu::ObjectAttr>(unwrap(mlirObjectAttr));
+ return objectAttr.getKernels() != nullptr;
+}
+
+MlirAttribute mlirGPUObjectAttrGetKernels(MlirAttribute mlirObjectAttr) {
+ gpu::ObjectAttr objectAttr =
+ llvm::cast<gpu::ObjectAttr>(unwrap(mlirObjectAttr));
+ return wrap(objectAttr.getKernels());
+}
diff --git a/mlir/lib/Dialect/GPU/IR/GPUDialect.cpp b/mlir/lib/Dialect/GPU/IR/GPUDialect.cpp
index a59952228ef6ea..e45ba7838b453c 100644
--- a/mlir/lib/Dialect/GPU/IR/GPUDialect.cpp
+++ b/mlir/lib/Dialect/GPU/IR/GPUDialect.cpp
@@ -2091,7 +2091,8 @@ void AllocOp::getCanonicalizationPatterns(RewritePatternSet &results,
LogicalResult ObjectAttr::verify(function_ref<InFlightDiagnostic()> emitError,
Attribute target, CompilationTarget format,
- StringAttr object, DictionaryAttr properties) {
+ StringAttr object, DictionaryAttr properties,
+ KernelTableAttr kernels) {
if (!target)
return emitError() << "the target attribute cannot be null";
if (target.hasPromiseOrImplementsInterface<TargetAttrInterface>())
@@ -2177,6 +2178,113 @@ LogicalResult gpu::DynamicSharedMemoryOp::verify() {
return success();
}
+//===----------------------------------------------------------------------===//
+// GPU KernelMetadataAttr
+//===----------------------------------------------------------------------===//
+
+KernelMetadataAttr KernelMetadataAttr::get(FunctionOpInterface kernel,
+ DictionaryAttr metadata) {
+ assert(kernel && "invalid kernel");
+ return get(kernel.getNameAttr(), kernel.getFunctionType(),
+ kernel.getAllArgAttrs(), metadata);
+}
+
+KernelMetadataAttr
+KernelMetadataAttr::getChecked(function_ref<InFlightDiagnostic()> emitError,
+ FunctionOpInterface kernel,
+ DictionaryAttr metadata) {
+ assert(kernel && "invalid kernel");
+ return getChecked(emitError, kernel.getNameAttr(), kernel.getFunctionType(),
+ kernel.getAllArgAttrs(), metadata);
+}
+
+KernelMetadataAttr
+KernelMetadataAttr::appendMetadata(ArrayRef<NamedAttribute> attrs) const {
+ if (attrs.empty())
+ return *this;
+ NamedAttrList attrList;
+ if (DictionaryAttr dict = getMetadata())
+ attrList.append(dict);
+ attrList.append(attrs);
+ return KernelMetadataAttr::get(getName(), getFunctionType(), getArgAttrs(),
+ attrList.getDictionary(getContext()));
+}
+
+LogicalResult
+KernelMetadataAttr::verify(function_ref<InFlightDiagnostic()> emitError,
+ StringAttr name, Type functionType,
+ ArrayAttr argAttrs, DictionaryAttr metadata) {
+ if (name.empty())
+ return emitError() << "the kernel name can't be empty";
+ if (argAttrs) {
+ if (llvm::any_of(argAttrs, [](Attribute attr) {
+ return !llvm::isa<DictionaryAttr>(attr);
+ }))
+ return emitError()
+ << "all attributes in the array must be a dictionary attribute";
+ }
+ return success();
+}
+
+//===----------------------------------------------------------------------===//
+// GPU KernelTableAttr
+//===----------------------------------------------------------------------===//
+
+KernelTableAttr KernelTableAttr::get(MLIRContext *context,
+ ArrayRef<KernelMetadataAttr> kernels,
+ bool isSorted) {
+ // Note that `is_sorted` is always only invoked once even with assertions ON.
+ assert((!isSorted || llvm::is_sorted(kernels)) &&
+ "expected a sorted kernel array");
+ // Immediately return the attribute if the array is sorted.
+ if (isSorted || llvm::is_sorted(kernels))
+ return Base::get(context, kernels);
+ // Sort the array.
+ SmallVector<KernelMetadataAttr> kernelsTmp(kernels);
+ llvm::array_pod_sort(kernelsTmp.begin(), kernelsTmp.end());
+ return Base::get(context, kernelsTmp);
+}
+
+KernelTableAttr KernelTableAttr::getChecked(
+ function_ref<InFlightDiagnostic()> emitError, MLIRContext *context,
+ ArrayRef<KernelMetadataAttr> kernels, bool isSorted) {
+ // Note that `is_sorted` is always only invoked once even with assertions ON.
+ assert((!isSorted || llvm::is_sorted(kernels)) &&
+ "expected a sorted kernel array");
+ // Immediately return the attribute if the array is sorted.
+ if (isSorted || llvm::is_sorted(kernels))
+ return Base::getChecked(emitError, context, kernels);
+ // Sort the array.
+ SmallVector<KernelMetadataAttr> kernelsTmp(kernels);
+ llvm::array_pod_sort(kernelsTmp.begin(), kernelsTmp.end());
+ return Base::getChecked(emitError, context, kernelsTmp);
+}
+
+LogicalResult
+KernelTableAttr::verify(function_ref<InFlightDiagnostic()> emitError,
+ ArrayRef<KernelMetadataAttr> kernels) {
+ if (kernels.size() < 2)
+ return success();
+ // Check that the kernels are uniquely named.
+ if (std::adjacent_find(kernels.begin(), kernels.end(),
+ [](KernelMetadataAttr l, KernelMetadataAttr r) {
+ return l.getName() == r.getName();
+ }) != kernels.end()) {
+ return emitError() << "expected all kernels to be uniquely named";
+ }
+ return success();
+}
+
+KernelMetadataAttr KernelTableAttr::lookup(StringRef key) const {
+ auto [iterator, found] = impl::findAttrSorted(begin(), end(), key);
+ return found ? *iterator : KernelMetadataAttr();
+}
+
+KernelMetadataAttr KernelTableAttr::lookup(StringAttr key) const {
+ auto [iterator, found] = impl::findAttrSorted(begin(), end(), key);
+ return found ? *iterator : KernelMetadataAttr();
+}
+
//===----------------------------------------------------------------------===//
// GPU target options
//===----------------------------------------------------------------------===//
diff --git a/mlir/lib/Target/LLVM/CMakeLists.txt b/mlir/lib/Target/LLVM/CMakeLists.txt
index 93dc5ff9d35b74..bc14c568e46be2 100644
--- a/mlir/lib/Target/LLVM/CMakeLists.txt
+++ b/mlir/lib/Target/LLVM/CMakeLists.txt
@@ -110,10 +110,12 @@ endif()
add_mlir_dialect_library(MLIRROCDLTarget
ROCDL/Target.cpp
+ ROCDL/Utils.cpp
OBJECT
LINK_COMPONENTS
+ FrontendOffloading
MCParser
${AMDGPU_LIBS}
diff --git a/mlir/lib/Target/LLVM/NVVM/Target.cpp b/mlir/lib/Target/LLVM/NVVM/Target.cpp
index a75b7f92ed8dc3..806c405ac17dfa 100644
--- a/mlir/lib/Target/LLVM/NVVM/Target.cpp
+++ b/mlir/lib/Target/LLVM/NVVM/Target.cpp
@@ -604,5 +604,5 @@ NVVMTargetAttrImpl::createObject(Attribute attribute, Operation *module,
return builder.getAttr<gpu::ObjectAttr>(
attribute, format,
builder.getStringAttr(StringRef(object.data(), object.size())),
- objectProps);
+ objectProps, /*kernels=*/nullptr);
}
diff --git a/mlir/lib/Target/LLVM/ROCDL/Target.cpp b/mlir/lib/Target/LLVM/ROCDL/Target.cpp
index e32a0c7e14e85c..d8a79a7e80d643 100644
--- a/mlir/lib/Target/LLVM/ROCDL/Target.cpp
+++ b/mlir/lib/Target/LLVM/ROCDL/Target.cpp
@@ -506,13 +506,15 @@ ROCDLTargetAttrImpl::createObject(Attribute attribute, Operation *module,
gpu::CompilationTarget format = options.getCompilationTarget();
// If format is `fatbin` transform it to binary as `fatbin` is not yet
// supported.
- if (format > gpu::CompilationTarget::Binary)
+ gpu::KernelTableAttr kernels;
+ if (format > gpu::CompilationTarget::Binary) {
format = gpu::CompilationTarget::Binary;
-
+ kernels = ROCDL::getKernelMetadata(module, object);
+ }
DictionaryAttr properties{};
Builder builder(attribute.getContext());
- return builder.getAttr<gpu::ObjectAttr>(
- attribute, format,
- builder.getStringAttr(StringRef(object.data(), object.size())),
- properties);
+ StringAttr objectStr =
+ builder.getStringAttr(StringRef(object.data(), object.size()));
+ return builder.getAttr<gpu::ObjectAttr>(attribute, format, objectStr,
+ properties, kernels);
}
diff --git a/mlir/lib/Target/LLVM/ROCDL/Utils.cpp b/mlir/lib/Target/LLVM/ROCDL/Utils.cpp
new file mode 100644
index 00000000000000..04b1b22279e5db
--- /dev/null
+++ b/mlir/lib/Target/LLVM/ROCDL/Utils.cpp
@@ -0,0 +1,87 @@
+//===- Utils.cpp - MLIR ROCDL target utils ----------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This files defines ROCDL target related utility classes and functions.
+//
+//===----------------------------------------------------------------------===//
+
+#include "mlir/Target/LLVM/ROCDL/Utils.h"
+#include "mlir/Dialect/GPU/IR/GPUDialect.h"
+#include "mlir/Dialect/LLVMIR/ROCDLDialect.h"
+
+#include "llvm/ADT/StringMap.h"
+#include "llvm/Frontend/Offloading/Utility.h"
+
+using namespace mlir;
+using namespace mlir::ROCDL;
+
+std::optional<DenseMap<StringAttr, NamedAttrList>>
+mlir::ROCDL::getAMDHSAKernelsELFMetadata(Builder &builder,
+ ArrayRef<char> elfData) {
+ uint16_t elfABIVersion;
+ llvm::StringMap<llvm::offloading::amdgpu::AMDGPUKernelMetaData> kernels;
+ llvm::MemoryBufferRef buffer(StringRef(elfData.data(), elfData.size()),
+ "buffer");
+ // Get the metadata.
+ llvm::Error error = llvm::offloading::amdgpu::getAMDGPUMetaDataFromImage(
+ buffer, kernels, elfABIVersion);
+ // Return `nullopt` if the metadata couldn't be retrieved.
+ if (error) {
+ llvm::consumeError(std::move(error));
+ return std::nullopt;
+ }
+ // Helper lambda for converting values.
+ auto getI32Array = [&builder](const uint32_t *array) {
+ return builder.getDenseI32ArrayAttr({static_cast<int32_t>(array[0]),
+ static_cast<int32_t>(array[1]),
+ static_cast<int32_t>(array[2])});
+ };
+ DenseMap<StringAttr, NamedAttrList> kernelMD;
+ for (const auto &[name, kernel] : kernels) {
+ NamedAttrList attrs;
+ // Add kernel metadata.
+ attrs.append("agpr_count", builder.getI64IntegerAttr(kernel.AGPRCount));
+ attrs.append("sgpr_count", builder.getI64IntegerAttr(kernel.SGPRCount));
+ attrs.append("vgpr_count", builder.getI64IntegerAttr(kernel.VGPRCount));
+ attrs.append("sgpr_spill_count",
+ builder.getI64IntegerAttr(kernel.SGPRSpillCount));
+ attrs.append("vgpr_spill_count",
+ builder.getI64IntegerAttr(kernel.VGPRSpillCount));
+ attrs.append("wavefront_size",
+ builder.getI64IntegerAttr(kernel.WavefrontSize));
+ attrs.append("max_flat_workgroup_size",
+ builder.getI64IntegerAttr(kernel.MaxFlatWorkgroupSize));
+ attrs.append("group_segment_fixed_size",
+ builder.getI64IntegerAttr(kernel.GroupSegmentList));
+ attrs.append("private_segment_fixed_size",
+ builder.getI64IntegerAttr(kernel.PrivateSegmentSize));
+ attrs.append("reqd_workgroup_size",
+ getI32Array(kernel.RequestedWorkgroupSize));
+ attrs.append("workgroup_size_hint", getI32Array(kernel.WorkgroupSizeHint));
+ kernelMD[builder.getStringAttr(name)] = std::move(attrs);
+ }
+ return std::move(kernelMD);
+}
+
+gpu::KernelTableAttr mlir::ROCDL::getKernelMetadata(Operation *gpuModule,
+ ArrayRef<char> elfData) {
+ auto module = cast<gpu::GPUModuleOp>(gpuModule);
+ Builder builder(module.getContext());
+ SmallVector<gpu::KernelMetadataAttr> kernels;
+ std::optional<DenseMap<StringAttr, NamedAttrList>> mdMapOrNull =
+ getAMDHSAKernelsELFMetadata(builder, elfData);
+ for (auto funcOp : module.getBody()->getOps<LLVM::LLVMFuncOp>()) {
+ if (!funcOp->getDiscardableAttr("rocdl.kernel"))
+ continue;
+ kernels.push_back(gpu::KernelMetadataAttr::get(
+ funcOp, mdMapOrNull ? builder.getDictionaryAttr(
+ mdMapOrNull->lookup(funcOp.getNameAttr()))
+ : nullptr));
+ }
+ return gpu::KernelTableAttr::get(gpuModule->getContext(), kernels);
+}
diff --git a/mlir/lib/Target/SPIRV/Target.cpp b/mlir/lib/Target/SPIRV/Target.cpp
index d48548bf9709c0..dd128e254aa0d6 100644
--- a/mlir/lib/Target/SPIRV/Target.cpp
+++ b/mlir/lib/Target/SPIRV/Target.cpp
@@ -98,5 +98,5 @@ SPIRVTargetAttrImpl::createObject(Attribute attribute, Operation *module,
return builder.getAttr<gpu::ObjectAttr>(
attribute, format,
builder.getStringAttr(StringRef(object.data(), object.size())),
- objectProps);
+ objectProps, /*kernels=*/nullptr);
}
diff --git a/mlir/test/Dialect/GPU/invalid.mlir b/mlir/test/Dialect/GPU/invalid.mlir
index 20c1c4cf8a2d0b..fd7618020b5d89 100644
--- a/mlir/test/Dialect/GPU/invalid.mlir
+++ b/mlir/test/Dialect/GPU/invalid.mlir
@@ -848,3 +848,15 @@ module attributes {gpu.container_module} {
gpu.module @kernel <> {
}
}
+
+// -----
+
+gpu.binary @binary [#gpu.object<#rocdl.target<chip = "gfx900">,
+ // expected-error at +1{{expected all kernels to be uniquely named}}
+ kernels = #gpu.kernel_table<[
+ #gpu.kernel_metadata<"kernel", (i32) -> ()>,
+ #gpu.kernel_metadata<"kernel", (i32, f32) -> (), metadata = {sgpr_count = 255}>
+ // expected-error at below{{failed to parse GPU_ObjectAttr parameter 'kernels' which is to be a `KernelTableAttr`}}
+ ]>,
+ bin = "BLOB">
+ ]
diff --git a/mlir/test/Dialect/GPU/ops.mlir b/mlir/test/Dialect/GPU/ops.mlir
index ba7897f4e80cb5..b9c0a0e79e8f2a 100644
--- a/mlir/test/Dialect/GPU/ops.mlir
+++ b/mlir/test/Dialect/GPU/ops.mlir
@@ -441,3 +441,26 @@ gpu.module @module_with_two_target [#nvvm.target, #rocdl.target<chip = "gfx90a">
gpu.module @module_with_offload_handler <#gpu.select_object<0>> [#nvvm.target] {
}
+
+// Test kernel attributes
+gpu.binary @kernel_attrs_1 [
+ #gpu.object<#rocdl.target<chip = "gfx900">,
+ kernels = #gpu.kernel_table<[
+ #gpu.kernel_metadata<"kernel0", (i32, f32) -> (), metadata = {sgpr_count = 255}>,
+ #gpu.kernel_metadata<"kernel1", (i32) -> (), arg_attrs = [{llvm.read_only}]>
+ ]>,
+ bin = "BLOB">
+ ]
+
+// Verify the kernels are sorted
+// CHECK-LABEL: gpu.binary @kernel_attrs_2
+gpu.binary @kernel_attrs_2 [
+ // CHECK: [#gpu.kernel_metadata<"a_kernel", () -> ()>, #gpu.kernel_metadata<"m_kernel", () -> ()>, #gpu.kernel_metadata<"z_kernel", () -> ()>]
+ #gpu.object<#rocdl.target<chip = "gfx900">,
+ kernels = #gpu.kernel_table<[
+ #gpu.kernel_metadata<"z_kernel", () -> ()>,
+ #gpu.kernel_metadata<"m_kernel", () -> ()>,
+ #gpu.kernel_metadata<"a_kernel", () -> ()>
+ ]>,
+ bin = "BLOB">
+ ]
diff --git a/mlir/test/python/dialects/gpu/dialect.py b/mlir/test/python/dialects/gpu/dialect.py
index aded35b04aa1ea..26ee9f34cb3326 100644
--- a/mlir/test/python/dialects/gpu/dialect.py
+++ b/mlir/test/python/dialects/gpu/dialect.py
@@ -55,3 +55,12 @@ def testObjectAttr():
# CHECK: #gpu.object<#nvvm.target, "//\0A// Generated by LLVM NVPTX Back-End\0A//\0A\0A.version 6.0\0A.target sm_50">
print(o)
assert o.object == object
+
+ object = b"BC\xc0\xde5\x14\x00\x00\x05\x00\x00\x00b\x0c0$MY\xbef"
+ kernelTable = Attribute.parse(
+ '#gpu.kernel_table<[#gpu.kernel_metadata<"kernel", () -> ()>]>'
+ )
+ o = gpu.ObjectAttr.get(target, format, object, kernels=kernelTable)
+ # CHECK: #gpu.object<#nvvm.target, kernels = <[#gpu.kernel_metadata<"kernel", () -> ()>]>, "BC\C0\DE5\14\00\00\05\00\00\00b\0C0$MY\BEf">
+ print(o)
+ assert o.kernels == kernelTable
diff --git a/mlir/unittests/Target/LLVM/SerializeROCDLTarget.cpp b/mlir/unittests/Target/LLVM/SerializeROCDLTarget.cpp
index 33291bc4bcaed9..43fa3d850d9e29 100644
--- a/mlir/unittests/Target/LLVM/SerializeROCDLTarget.cpp
+++ b/mlir/unittests/Target/LLVM/SerializeROCDLTarget.cpp
@@ -158,3 +158,69 @@ TEST_F(MLIRTargetLLVMROCDL, SKIP_WITHOUT_AMDGPU(SerializeROCDLToBinary)) {
ASSERT_FALSE(object->empty());
}
}
+
+// Test ROCDL metadata.
+TEST_F(MLIRTargetLLVMROCDL, SKIP_WITHOUT_AMDGPU(GetELFMetadata)) {
+ if (!hasROCMTools())
+ GTEST_SKIP() << "ROCm installation not found, skipping test.";
+
+ MLIRContext context(registry);
+
+ // MLIR module used for the tests.
+ const std::string moduleStr = R"mlir(
+ gpu.module @rocdl_test {
+ llvm.func @rocdl_kernel_1(%arg0: f32) attributes {gpu.kernel, rocdl.kernel} {
+ llvm.return
+ }
+ llvm.func @rocdl_kernel_0(%arg0: f32) attributes {gpu.kernel, rocdl.kernel} {
+ llvm.return
+ }
+ llvm.func @rocdl_kernel_2(%arg0: f32) attributes {gpu.kernel, rocdl.kernel} {
+ llvm.return
+ }
+ llvm.func @a_kernel(%arg0: f32) attributes {gpu.kernel, rocdl.kernel} {
+ llvm.return
+ }
+ })mlir";
+
+ OwningOpRef<ModuleOp> module =
+ parseSourceString<ModuleOp>(moduleStr, &context);
+ ASSERT_TRUE(!!module);
+
+ // Create a ROCDL target.
+ ROCDL::ROCDLTargetAttr target = ROCDL::ROCDLTargetAttr::get(&context);
+
+ // Serialize the module.
+ auto serializer = dyn_cast<gpu::TargetAttrInterface>(target);
+ ASSERT_TRUE(!!serializer);
+ gpu::TargetOptions options("", {}, "", gpu::CompilationTarget::Binary);
+ for (auto gpuModule : (*module).getBody()->getOps<gpu::GPUModuleOp>()) {
+ std::optional<SmallVector<char, 0>> object =
+ serializer.serializeToObject(gpuModule, options);
+ // Check that the serializer was successful.
+ ASSERT_TRUE(object != std::nullopt);
+ ASSERT_FALSE(object->empty());
+ if (!object)
+ continue;
+ // Get the metadata.
+ gpu::KernelTableAttr metadata =
+ ROCDL::getKernelMetadata(gpuModule, *object);
+ ASSERT_TRUE(metadata != nullptr);
+ // There should be 4 kernels.
+ ASSERT_TRUE(metadata.size() == 4);
+ // Check that the lookup method returns finds the kernel.
+ ASSERT_TRUE(metadata.lookup("a_kernel") != nullptr);
+ ASSERT_TRUE(metadata.lookup("rocdl_kernel_0") != nullptr);
+ // Check that the kernel doesn't exist.
+ ASSERT_TRUE(metadata.lookup("not_existent_kernel") == nullptr);
+ // Test the `KernelMetadataAttr` iterators.
+ for (gpu::KernelMetadataAttr kernel : metadata) {
+ // Check that the ELF metadata is present.
+ ASSERT_TRUE(kernel.getMetadata() != nullptr);
+ // Verify that `sgpr_count` is present and it is an integer attribute.
+ ASSERT_TRUE(kernel.getAttr<IntegerAttr>("sgpr_count") != nullptr);
+ // Verify that `vgpr_count` is present and it is an integer attribute.
+ ASSERT_TRUE(kernel.getAttr<IntegerAttr>("vgpr_count") != nullptr);
+ }
+ }
+}
diff --git a/mlir/unittests/Target/LLVM/SerializeToLLVMBitcode.cpp b/mlir/unittests/Target/LLVM/SerializeToLLVMBitcode.cpp
index 37dbfe62036871..aaa281e07933b5 100644
--- a/mlir/unittests/Target/LLVM/SerializeToLLVMBitcode.cpp
+++ b/mlir/unittests/Target/LLVM/SerializeToLLVMBitcode.cpp
@@ -116,7 +116,7 @@ TargetAttrImpl::createObject(Attribute attribute, Operation *module,
module->getContext(), attribute, gpu::CompilationTarget::Offload,
StringAttr::get(module->getContext(),
StringRef(object.data(), object.size())),
- module->getAttrDictionary());
+ module->getAttrDictionary(), /*kernels=*/nullptr);
}
TEST_F(MLIRTargetLLVM, SKIP_WITHOUT_NATIVE(TargetAttrAPI)) {
More information about the Mlir-commits
mailing list