[Mlir-commits] [mlir] [MLIR][AMDGPU] Add rocdl.attr translation for waves_per_eu & unsafe_fp_atomics (PR #103732)
Manupa Karunaratne
llvmlistbot at llvm.org
Wed Aug 14 03:14:10 PDT 2024
https://github.com/manupak created https://github.com/llvm/llvm-project/pull/103732
This commit adds support amdgpu-unsafe-gp-atomics attr plumbing
via introduction of `rocdl.unsafe_fp_atomics`.
This adds the missing translation for amdgpu-waves-per-eu attr.
>From 25239a7bf2690b2f1c35cc589ea32b62f99eea6c Mon Sep 17 00:00:00 2001
From: Manupa Karunaratne <manupa.karunaratne at amd.com>
Date: Tue, 13 Aug 2024 10:22:50 +0000
Subject: [PATCH] [MLIR][AMDGPU] Add rocdl.attr translation for waves_per_eu &
unsafe_fp_atomics
This commit adds support amdgpu-unsafe-gp-atomics attr plumbing
via introduction of `rocdl.unsafe_fp_atomics`.
This adds the missing translation for amdgpu-waves-per-eu attr.
---
mlir/include/mlir/Dialect/LLVMIR/ROCDLOps.td | 3 +-
.../ROCDL/ROCDLToLLVMIRTranslation.cpp | 32 +++++++++++++++++++
mlir/test/Target/LLVMIR/rocdl.mlir | 16 ++++++++++
3 files changed, 50 insertions(+), 1 deletion(-)
diff --git a/mlir/include/mlir/Dialect/LLVMIR/ROCDLOps.td b/mlir/include/mlir/Dialect/LLVMIR/ROCDLOps.td
index 868208ff74a521..b81b7e577b213a 100644
--- a/mlir/include/mlir/Dialect/LLVMIR/ROCDLOps.td
+++ b/mlir/include/mlir/Dialect/LLVMIR/ROCDLOps.td
@@ -57,7 +57,8 @@ def ROCDL_Dialect : Dialect {
"::mlir::DenseI32ArrayAttr":$reqd_work_group_size,
"::mlir::StringAttr":$flat_work_group_size,
"::mlir::IntegerAttr":$max_flat_work_group_size,
- "::mlir::IntegerAttr":$waves_per_eu
+ "::mlir::IntegerAttr":$waves_per_eu,
+ "::mlir::BoolAttr":$unsafe_fp_atomics
);
let useDefaultAttributePrinterParser = 1;
diff --git a/mlir/lib/Target/LLVMIR/Dialect/ROCDL/ROCDLToLLVMIRTranslation.cpp b/mlir/lib/Target/LLVMIR/Dialect/ROCDL/ROCDLToLLVMIRTranslation.cpp
index 2a146f5efed307..c1ee6507763566 100644
--- a/mlir/lib/Target/LLVMIR/Dialect/ROCDL/ROCDLToLLVMIRTranslation.cpp
+++ b/mlir/lib/Target/LLVMIR/Dialect/ROCDL/ROCDLToLLVMIRTranslation.cpp
@@ -128,6 +128,23 @@ class ROCDLDialectLLVMIRTranslationInterface
attrValueStream << "1," << value.getInt();
llvmFunc->addFnAttr("amdgpu-flat-work-group-size", llvmAttrValue);
}
+ if (dialect->getWavesPerEuAttrHelper().getName() == attribute.getName()) {
+ auto func = dyn_cast<LLVM::LLVMFuncOp>(op);
+ if (!func)
+ return op->emitOpError(Twine(attribute.getName()) +
+ " is only supported on `llvm.func` operations");
+ auto value = dyn_cast<IntegerAttr>(attribute.getValue());
+ if (!value)
+ return op->emitOpError(Twine(attribute.getName()) +
+ " must be an integer");
+
+ llvm::Function *llvmFunc =
+ moduleTranslation.lookupFunction(func.getName());
+ llvm::SmallString<8> llvmAttrValue;
+ llvm::raw_svector_ostream attrValueStream(llvmAttrValue);
+ attrValueStream << value.getInt();
+ llvmFunc->addFnAttr("amdgpu-waves-per-eu", llvmAttrValue);
+ }
if (dialect->getFlatWorkGroupSizeAttrHelper().getName() ==
attribute.getName()) {
auto func = dyn_cast<LLVM::LLVMFuncOp>(op);
@@ -160,6 +177,21 @@ class ROCDLDialectLLVMIRTranslationInterface
llvmFunc->addFnAttr("uniform-work-group-size",
value.getValue() ? "true" : "false");
}
+ if (dialect->getUnsafeFpAtomicsAttrHelper().getName() ==
+ attribute.getName()) {
+ auto func = dyn_cast<LLVM::LLVMFuncOp>(op);
+ if (!func)
+ return op->emitOpError(Twine(attribute.getName()) +
+ " is only supported on `llvm.func` operations");
+ auto value = dyn_cast<BoolAttr>(attribute.getValue());
+ if (!value)
+ return op->emitOpError(Twine(attribute.getName()) +
+ " must be a boolean");
+ llvm::Function *llvmFunc =
+ moduleTranslation.lookupFunction(func.getName());
+ llvmFunc->addFnAttr("amdgpu-unsafe-fp-atomics",
+ value.getValue() ? "true" : "false");
+ }
// Set reqd_work_group_size metadata
if (dialect->getReqdWorkGroupSizeAttrHelper().getName() ==
attribute.getName()) {
diff --git a/mlir/test/Target/LLVMIR/rocdl.mlir b/mlir/test/Target/LLVMIR/rocdl.mlir
index 78c3987fab648e..64bcb5bdb255db 100644
--- a/mlir/test/Target/LLVMIR/rocdl.mlir
+++ b/mlir/test/Target/LLVMIR/rocdl.mlir
@@ -62,6 +62,20 @@ llvm.func @kernel_func_no_uniform_work_groups() attributes {rocdl.kernel, rocdl.
llvm.return
}
+llvm.func @kernel_func_waves_per_eu()
+ attributes {rocdl.kernel, rocdl.waves_per_eu = 2 : i32} {
+ // CHECK-LABEL: amdgpu_kernel void @kernel_func_waves_per_eu()
+ // CHECK: #[[$KERNEL_WAVES_PER_EU_ATTR:[0-9]+]]
+ llvm.return
+}
+
+llvm.func @kernel_func_unsafe_fp_atomics()
+ attributes {rocdl.kernel, rocdl.unsafe_fp_atomics = true} {
+ // CHECK-LABEL: amdgpu_kernel void @kernel_func_unsafe_fp_atomics()
+ // CHECK: #[[$KERNEL_UNSAFE_FP_ATOMICS_ATTR:[0-9]+]]
+ llvm.return
+}
+
llvm.func @rocdl.lane_id() -> i32 {
// CHECK: [[mbcntlo:%.+]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0)
// CHECK-NEXT: call i32 @llvm.amdgcn.mbcnt.hi(i32 -1, i32 [[mbcntlo]])
@@ -521,3 +535,5 @@ llvm.func @rocdl_8bit_floats(%source: i32, %stoch: i32) -> i32 {
// CHECK-DAG: attributes #[[$KNOWN_BLOCK_SIZE_ATTRS]] = { "amdgpu-flat-work-group-size"="128,128"
// CHECK-DAG: attributes #[[$KERNEL_NO_UNIFORM_WORK_GROUPS_ATTRS]] = { "amdgpu-flat-work-group-size"="1,256" "uniform-work-group-size"="false" }
// CHECK-DAG: ![[$REQD_WORK_GROUP_SIZE]] = !{i32 16, i32 4, i32 2}
+// CHECK-DAG: attributes #[[$KERNEL_WAVES_PER_EU_ATTR]] = { "amdgpu-flat-work-group-size"="1,256" "amdgpu-waves-per-eu"="2" "uniform-work-group-size"="true" }
+// CHECK-DAG: attributes #[[$KERNEL_UNSAFE_FP_ATOMICS_ATTR]] = { "amdgpu-flat-work-group-size"="1,256" "amdgpu-unsafe-fp-atomics"="true" "uniform-work-group-size"="true" }
More information about the Mlir-commits
mailing list