[Mlir-commits] [mlir] [ROCDL] Use attached target on the GPU module when lowering GPU ops to ROCDL (PR #110735)
Umang Yadav
llvmlistbot at llvm.org
Fri Oct 4 07:11:30 PDT 2024
https://github.com/umangyadav updated https://github.com/llvm/llvm-project/pull/110735
>From 9731b4c5282b1f607412151e0c4524ef4a2dd5bb Mon Sep 17 00:00:00 2001
From: Umang Yadav <umayadav at amd.com>
Date: Tue, 1 Oct 2024 20:08:46 +0000
Subject: [PATCH 1/5] Use attached target on the GPU module when lowering GPU
ops to ROCDL
---
.../Conversion/GPUToROCDL/GPUToROCDLPass.h | 2 +-
mlir/include/mlir/Conversion/Passes.td | 4 +--
.../GPUToROCDL/LowerGpuOpsToROCDLOps.cpp | 29 +++++++++++++++----
.../GPUCommon/lower-memory-space-attrs.mlir | 2 +-
.../GPUCommon/memory-attrbution.mlir | 2 +-
.../GPUCommon/memref-arg-attrs.mlir | 2 +-
.../GPUCommon/memref-arg-noalias-attrs.mlir | 2 +-
.../GPUCommon/memref-arg-noalias-warning.mlir | 2 +-
.../GPUToROCDL/gpu-to-rocdl-hip.mlir | 2 +-
.../GPUToROCDL/gpu-to-rocdl-opencl.mlir | 2 +-
.../Conversion/GPUToROCDL/gpu-to-rocdl.mlir | 4 +--
mlir/test/Conversion/GPUToROCDL/memref.mlir | 4 +--
.../Integration/GPU/ROCM/gpu-to-hsaco.mlir | 2 +-
mlir/test/Integration/GPU/ROCM/printf.mlir | 2 +-
.../Integration/GPU/ROCM/two-modules.mlir | 2 +-
mlir/test/Integration/GPU/ROCM/vecadd.mlir | 2 +-
.../GPU/ROCM/vector-transferops.mlir | 2 +-
17 files changed, 42 insertions(+), 25 deletions(-)
diff --git a/mlir/include/mlir/Conversion/GPUToROCDL/GPUToROCDLPass.h b/mlir/include/mlir/Conversion/GPUToROCDL/GPUToROCDLPass.h
index 5647787712997b..f1233ad894daf5 100644
--- a/mlir/include/mlir/Conversion/GPUToROCDL/GPUToROCDLPass.h
+++ b/mlir/include/mlir/Conversion/GPUToROCDL/GPUToROCDLPass.h
@@ -42,7 +42,7 @@ void configureGpuToROCDLConversionLegality(ConversionTarget &target);
/// is configurable.
std::unique_ptr<OperationPass<gpu::GPUModuleOp>>
createLowerGpuOpsToROCDLOpsPass(
- const std::string &chipset = "gfx900",
+ const std::string &chipset = "infer",
unsigned indexBitwidth = kDeriveIndexBitwidthFromDataLayout,
bool useBarePtrCallConv = false,
gpu::amd::Runtime runtime = gpu::amd::Runtime::Unknown);
diff --git a/mlir/include/mlir/Conversion/Passes.td b/mlir/include/mlir/Conversion/Passes.td
index 4d272ba219c6f1..d536ee25a75637 100644
--- a/mlir/include/mlir/Conversion/Passes.td
+++ b/mlir/include/mlir/Conversion/Passes.td
@@ -592,8 +592,8 @@ def ConvertGpuOpsToROCDLOps : Pass<"convert-gpu-to-rocdl", "gpu::GPUModuleOp"> {
];
let options = [
Option<"chipset", "chipset", "std::string",
- /*default=*/"\"gfx000\"",
- "Chipset that these operations will run on">,
+ /*default=*/"\"infer\"",
+ "Chipset that these operations will run on. By Default it will infer target from attached target attribute on GPU module on which it operates">,
Option<"indexBitwidth", "index-bitwidth", "unsigned",
/*default=kDeriveIndexBitwidthFromDataLayout*/"0",
"Bitwidth of the index type, 0 to use size of machine word">,
diff --git a/mlir/lib/Conversion/GPUToROCDL/LowerGpuOpsToROCDLOps.cpp b/mlir/lib/Conversion/GPUToROCDL/LowerGpuOpsToROCDLOps.cpp
index 482c9e2c2d0017..83607d2c46a0f4 100644
--- a/mlir/lib/Conversion/GPUToROCDL/LowerGpuOpsToROCDLOps.cpp
+++ b/mlir/lib/Conversion/GPUToROCDL/LowerGpuOpsToROCDLOps.cpp
@@ -219,6 +219,29 @@ struct LowerGpuOpsToROCDLOpsPass
gpu::GPUModuleOp m = getOperation();
MLIRContext *ctx = m.getContext();
+ ArrayAttr targets = m.getTargetsAttr();
+ if (chipset == "infer") {
+ if (!targets) {
+ emitError(UnknownLoc::get(ctx),
+ "ROCDLTargetAttr is empty on GPU module");
+ return signalPassFailure();
+ }
+ if (targets.size() != 1) {
+ emitError(UnknownLoc::get(ctx), "ROCDLTargetAttrs has more specified "
+ "more than one gpu-arch on GPU module");
+ return signalPassFailure();
+ }
+ const ROCDL::ROCDLTargetAttr targetAttr =
+ mlir::dyn_cast<ROCDL::ROCDLTargetAttr>(targets.getValue().front());
+ chipset = targetAttr.getChip().str();
+ }
+
+ FailureOr<amdgpu::Chipset> maybeChipset = amdgpu::Chipset::parse(chipset);
+ if (failed(maybeChipset)) {
+ emitError(UnknownLoc::get(ctx), "Invalid chipset name: " + chipset);
+ return signalPassFailure();
+ }
+
auto llvmDataLayout = m->getAttrOfType<StringAttr>(
LLVM::LLVMDialect::getDataLayoutAttrName());
if (!llvmDataLayout) {
@@ -231,12 +254,6 @@ struct LowerGpuOpsToROCDLOpsPass
UnitAttr::get(ctx));
}
- FailureOr<amdgpu::Chipset> maybeChipset = amdgpu::Chipset::parse(chipset);
- if (failed(maybeChipset)) {
- emitError(UnknownLoc::get(ctx), "Invalid chipset name: " + chipset);
- return signalPassFailure();
- }
-
/// Customize the bitwidth used for the device side index computations.
LowerToLLVMOptions options(
ctx, DataLayout(cast<DataLayoutOpInterface>(m.getOperation())));
diff --git a/mlir/test/Conversion/GPUCommon/lower-memory-space-attrs.mlir b/mlir/test/Conversion/GPUCommon/lower-memory-space-attrs.mlir
index 771f3185904bb8..a338d35525eba7 100644
--- a/mlir/test/Conversion/GPUCommon/lower-memory-space-attrs.mlir
+++ b/mlir/test/Conversion/GPUCommon/lower-memory-space-attrs.mlir
@@ -1,4 +1,4 @@
-// RUN: mlir-opt %s -split-input-file -convert-gpu-to-rocdl | FileCheck %s --check-prefixes=CHECK,ROCDL
+// RUN: mlir-opt %s -split-input-file -convert-gpu-to-rocdl='chipset=gfx900' | FileCheck %s --check-prefixes=CHECK,ROCDL
// RUN: mlir-opt %s -split-input-file -convert-gpu-to-nvvm | FileCheck %s --check-prefixes=CHECK,NVVM
gpu.module @kernel {
diff --git a/mlir/test/Conversion/GPUCommon/memory-attrbution.mlir b/mlir/test/Conversion/GPUCommon/memory-attrbution.mlir
index 4fc19b8e93646c..b1291e07c060b5 100644
--- a/mlir/test/Conversion/GPUCommon/memory-attrbution.mlir
+++ b/mlir/test/Conversion/GPUCommon/memory-attrbution.mlir
@@ -1,5 +1,5 @@
// RUN: mlir-opt -allow-unregistered-dialect --convert-gpu-to-nvvm --split-input-file %s | FileCheck --check-prefix=NVVM %s
-// RUN: mlir-opt -allow-unregistered-dialect --convert-gpu-to-rocdl --split-input-file %s | FileCheck --check-prefix=ROCDL %s
+// RUN: mlir-opt -allow-unregistered-dialect --convert-gpu-to-rocdl='chipset=gfx900' --split-input-file %s | FileCheck --check-prefix=ROCDL %s
gpu.module @kernel {
// NVVM-LABEL: llvm.func @private
diff --git a/mlir/test/Conversion/GPUCommon/memref-arg-attrs.mlir b/mlir/test/Conversion/GPUCommon/memref-arg-attrs.mlir
index e7c742067b4eb5..3c3082c473896e 100644
--- a/mlir/test/Conversion/GPUCommon/memref-arg-attrs.mlir
+++ b/mlir/test/Conversion/GPUCommon/memref-arg-attrs.mlir
@@ -1,4 +1,4 @@
-// RUN: mlir-opt %s -split-input-file -convert-gpu-to-rocdl='use-bare-ptr-memref-call-conv=0' | FileCheck %s --check-prefixes=CHECK,ROCDL
+// RUN: mlir-opt %s -split-input-file -convert-gpu-to-rocdl='chipset=gfx900 use-bare-ptr-memref-call-conv=0' | FileCheck %s --check-prefixes=CHECK,ROCDL
// RUN: mlir-opt %s -split-input-file -convert-gpu-to-nvvm='use-bare-ptr-memref-call-conv=0' | FileCheck %s --check-prefixes=CHECK,NVVM
gpu.module @kernel {
diff --git a/mlir/test/Conversion/GPUCommon/memref-arg-noalias-attrs.mlir b/mlir/test/Conversion/GPUCommon/memref-arg-noalias-attrs.mlir
index 33cdc3348e5137..d17214d1f2299b 100644
--- a/mlir/test/Conversion/GPUCommon/memref-arg-noalias-attrs.mlir
+++ b/mlir/test/Conversion/GPUCommon/memref-arg-noalias-attrs.mlir
@@ -1,4 +1,4 @@
-// RUN: mlir-opt %s -split-input-file -convert-gpu-to-rocdl='use-bare-ptr-memref-call-conv=1' | FileCheck %s --check-prefixes=CHECK,ROCDL
+// RUN: mlir-opt %s -split-input-file -convert-gpu-to-rocdl='chipset=gfx900 use-bare-ptr-memref-call-conv=1' | FileCheck %s --check-prefixes=CHECK,ROCDL
// RUN: mlir-opt %s -split-input-file -convert-gpu-to-nvvm='use-bare-ptr-memref-call-conv=1' | FileCheck %s --check-prefixes=CHECK,NVVM
gpu.module @kernel {
diff --git a/mlir/test/Conversion/GPUCommon/memref-arg-noalias-warning.mlir b/mlir/test/Conversion/GPUCommon/memref-arg-noalias-warning.mlir
index 793df7380d78bd..ab98be59a2c87e 100644
--- a/mlir/test/Conversion/GPUCommon/memref-arg-noalias-warning.mlir
+++ b/mlir/test/Conversion/GPUCommon/memref-arg-noalias-warning.mlir
@@ -1,4 +1,4 @@
-// RUN: mlir-opt %s -split-input-file -convert-gpu-to-rocdl='use-bare-ptr-memref-call-conv=0' -verify-diagnostics
+// RUN: mlir-opt %s -split-input-file -convert-gpu-to-rocdl='chipset=gfx900 use-bare-ptr-memref-call-conv=0' -verify-diagnostics
gpu.module @kernel {
// expected-warning @+1 {{Cannot copy noalias with non-bare pointers.}}
diff --git a/mlir/test/Conversion/GPUToROCDL/gpu-to-rocdl-hip.mlir b/mlir/test/Conversion/GPUToROCDL/gpu-to-rocdl-hip.mlir
index 1b904fa142bad3..3e3b43c6d4f493 100644
--- a/mlir/test/Conversion/GPUToROCDL/gpu-to-rocdl-hip.mlir
+++ b/mlir/test/Conversion/GPUToROCDL/gpu-to-rocdl-hip.mlir
@@ -1,4 +1,4 @@
-// RUN: mlir-opt %s -convert-gpu-to-rocdl='runtime=HIP' -split-input-file | FileCheck %s
+// RUN: mlir-opt %s -convert-gpu-to-rocdl='chipset=gfx900 runtime=HIP' -split-input-file | FileCheck %s
gpu.module @test_module {
// CHECK-DAG: llvm.mlir.global internal constant @[[$PRINT_GLOBAL0:[A-Za-z0-9_]+]]("Hello, world\0A\00")
diff --git a/mlir/test/Conversion/GPUToROCDL/gpu-to-rocdl-opencl.mlir b/mlir/test/Conversion/GPUToROCDL/gpu-to-rocdl-opencl.mlir
index 870f5c5016ecef..fa01801972d6a4 100644
--- a/mlir/test/Conversion/GPUToROCDL/gpu-to-rocdl-opencl.mlir
+++ b/mlir/test/Conversion/GPUToROCDL/gpu-to-rocdl-opencl.mlir
@@ -1,4 +1,4 @@
-// RUN: mlir-opt %s -convert-gpu-to-rocdl='runtime=OpenCL' | FileCheck %s
+// RUN: mlir-opt %s -convert-gpu-to-rocdl='chipset=gfx900 runtime=OpenCL' | FileCheck %s
gpu.module @test_module {
// CHECK: llvm.mlir.global internal constant @[[$PRINT_GLOBAL:[A-Za-z0-9_]+]]("Hello: %d\0A\00") {addr_space = 4 : i32}
diff --git a/mlir/test/Conversion/GPUToROCDL/gpu-to-rocdl.mlir b/mlir/test/Conversion/GPUToROCDL/gpu-to-rocdl.mlir
index 0d3e9f4ea2bf39..ca827e9acffe3b 100644
--- a/mlir/test/Conversion/GPUToROCDL/gpu-to-rocdl.mlir
+++ b/mlir/test/Conversion/GPUToROCDL/gpu-to-rocdl.mlir
@@ -1,5 +1,5 @@
-// RUN: mlir-opt %s -convert-gpu-to-rocdl -split-input-file | FileCheck %s
-// RUN: mlir-opt %s -convert-gpu-to-rocdl='index-bitwidth=32' -split-input-file | FileCheck --check-prefix=CHECK32 %s
+// RUN: mlir-opt %s -convert-gpu-to-rocdl='chipset=gfx900' -split-input-file | FileCheck %s
+// RUN: mlir-opt %s -convert-gpu-to-rocdl='chipset=gfx900 index-bitwidth=32' -split-input-file | FileCheck --check-prefix=CHECK32 %s
// CHECK-LABEL: @test_module
// CHECK-SAME: llvm.data_layout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:256:256:32-p8:128:128-p9:192:256:256:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7:8:9"
diff --git a/mlir/test/Conversion/GPUToROCDL/memref.mlir b/mlir/test/Conversion/GPUToROCDL/memref.mlir
index e645481c892308..debf899dd68742 100644
--- a/mlir/test/Conversion/GPUToROCDL/memref.mlir
+++ b/mlir/test/Conversion/GPUToROCDL/memref.mlir
@@ -1,6 +1,6 @@
-// RUN: mlir-opt %s -convert-gpu-to-rocdl -split-input-file | FileCheck %s
+// RUN: mlir-opt %s -convert-gpu-to-rocdl='chipset=gfx900' -split-input-file | FileCheck %s
// RUN: mlir-opt %s \
-// RUN: -convert-gpu-to-rocdl='use-bare-ptr-memref-call-conv=true' \
+// RUN: -convert-gpu-to-rocdl='chipset=gfx900 use-bare-ptr-memref-call-conv=true' \
// RUN: -split-input-file \
// RUN: | FileCheck %s --check-prefix=BARE
diff --git a/mlir/test/Integration/GPU/ROCM/gpu-to-hsaco.mlir b/mlir/test/Integration/GPU/ROCM/gpu-to-hsaco.mlir
index 3c8f3b1d0cbf4b..edb75ee81224ef 100644
--- a/mlir/test/Integration/GPU/ROCM/gpu-to-hsaco.mlir
+++ b/mlir/test/Integration/GPU/ROCM/gpu-to-hsaco.mlir
@@ -1,6 +1,6 @@
// RUN: mlir-opt %s \
// RUN: | mlir-opt -gpu-kernel-outlining \
-// RUN: | mlir-opt -pass-pipeline='builtin.module(gpu.module(strip-debuginfo,convert-gpu-to-rocdl),rocdl-attach-target{chip=%chip})' \
+// RUN: | mlir-opt -pass-pipeline='builtin.module(gpu.module(strip-debuginfo),rocdl-attach-target{chip=%chip}, gpu.module(convert-gpu-to-rocdl))' \
// RUN: | mlir-opt -gpu-to-llvm -reconcile-unrealized-casts -gpu-module-to-binary \
// RUN: | mlir-cpu-runner \
// RUN: --shared-libs=%mlir_rocm_runtime \
diff --git a/mlir/test/Integration/GPU/ROCM/printf.mlir b/mlir/test/Integration/GPU/ROCM/printf.mlir
index d5e6e3757540b2..e8feeaa69c2907 100644
--- a/mlir/test/Integration/GPU/ROCM/printf.mlir
+++ b/mlir/test/Integration/GPU/ROCM/printf.mlir
@@ -1,5 +1,5 @@
// RUN: mlir-opt %s \
-// RUN: | mlir-opt -pass-pipeline='builtin.module(gpu.module(strip-debuginfo,convert-gpu-to-rocdl{index-bitwidth=32 runtime=HIP}),rocdl-attach-target{chip=%chip})' \
+// RUN: | mlir-opt -pass-pipeline='builtin.module(gpu.module(strip-debuginfo),rocdl-attach-target{chip=%chip}, gpu.module(convert-gpu-to-rocdl{index-bitwidth=32 runtime=HIP}))' \
// RUN: | mlir-opt -gpu-to-llvm -reconcile-unrealized-casts -gpu-module-to-binary \
// RUN: | mlir-cpu-runner \
// RUN: --shared-libs=%mlir_rocm_runtime \
diff --git a/mlir/test/Integration/GPU/ROCM/two-modules.mlir b/mlir/test/Integration/GPU/ROCM/two-modules.mlir
index d49d3957abbe96..d20f71d162800c 100644
--- a/mlir/test/Integration/GPU/ROCM/two-modules.mlir
+++ b/mlir/test/Integration/GPU/ROCM/two-modules.mlir
@@ -1,6 +1,6 @@
// RUN: mlir-opt %s \
// RUN: | mlir-opt -gpu-kernel-outlining \
-// RUN: | mlir-opt -pass-pipeline='builtin.module(gpu.module(strip-debuginfo,convert-gpu-to-rocdl),rocdl-attach-target{chip=%chip})' \
+// RUN: | mlir-opt -pass-pipeline='builtin.module(gpu.module(strip-debuginfo),rocdl-attach-target{chip=%chip}, gpu.module(convert-gpu-to-rocdl))' \
// RUN: | mlir-opt -gpu-to-llvm -reconcile-unrealized-casts -gpu-module-to-binary \
// RUN: | mlir-cpu-runner \
// RUN: --shared-libs=%mlir_rocm_runtime \
diff --git a/mlir/test/Integration/GPU/ROCM/vecadd.mlir b/mlir/test/Integration/GPU/ROCM/vecadd.mlir
index 986d8239427e3c..0ac391cd5f8e3b 100644
--- a/mlir/test/Integration/GPU/ROCM/vecadd.mlir
+++ b/mlir/test/Integration/GPU/ROCM/vecadd.mlir
@@ -1,7 +1,7 @@
// RUN: mlir-opt %s \
// RUN: | mlir-opt -convert-scf-to-cf \
// RUN: | mlir-opt -gpu-kernel-outlining \
-// RUN: | mlir-opt -pass-pipeline='builtin.module(gpu.module(strip-debuginfo,convert-gpu-to-rocdl{use-bare-ptr-memref-call-conv=true}),rocdl-attach-target{chip=%chip})' \
+// RUN: | mlir-opt -pass-pipeline='builtin.module(gpu.module(strip-debuginfo),rocdl-attach-target{chip=%chip}, gpu.module(convert-gpu-to-rocdl{use-bare-ptr-memref-call-conv=true}))' \
// RUN: | mlir-opt -gpu-to-llvm=use-bare-pointers-for-kernels=true -reconcile-unrealized-casts -gpu-module-to-binary \
// RUN: | mlir-cpu-runner \
// RUN: --shared-libs=%mlir_rocm_runtime \
diff --git a/mlir/test/Integration/GPU/ROCM/vector-transferops.mlir b/mlir/test/Integration/GPU/ROCM/vector-transferops.mlir
index 575d967dcc9a23..417f67e64669ed 100644
--- a/mlir/test/Integration/GPU/ROCM/vector-transferops.mlir
+++ b/mlir/test/Integration/GPU/ROCM/vector-transferops.mlir
@@ -1,7 +1,7 @@
// RUN: mlir-opt %s \
// RUN: | mlir-opt -convert-scf-to-cf \
// RUN: | mlir-opt -gpu-kernel-outlining \
-// RUN: | mlir-opt -pass-pipeline='builtin.module(gpu.module(strip-debuginfo,convert-gpu-to-rocdl{chipset=%chip index-bitwidth=32}),rocdl-attach-target{chip=%chip})' \
+// RUN: | mlir-opt -pass-pipeline='builtin.module(gpu.module(strip-debuginfo),rocdl-attach-target{chip=%chip}, gpu.module(convert-gpu-to-rocdl{index-bitwidth=32}))' \
// RUN: | mlir-opt -gpu-to-llvm -reconcile-unrealized-casts -gpu-module-to-binary \
// RUN: | mlir-cpu-runner \
// RUN: --shared-libs=%mlir_rocm_runtime \
>From 3e3d2973ad7a39ee1dc98d2342d863104cde4a08 Mon Sep 17 00:00:00 2001
From: Umang Yadav <umayadav at amd.com>
Date: Wed, 2 Oct 2024 13:07:55 +0000
Subject: [PATCH 2/5] Add test for reading attached target
---
.../GPUToROCDL/LowerGpuOpsToROCDLOps.cpp | 9 +++---
.../GPUToROCDL/gpu-to-rocdl-infer-target.mlir | 30 +++++++++++++++++++
2 files changed, 34 insertions(+), 5 deletions(-)
create mode 100644 mlir/test/Conversion/GPUToROCDL/gpu-to-rocdl-infer-target.mlir
diff --git a/mlir/lib/Conversion/GPUToROCDL/LowerGpuOpsToROCDLOps.cpp b/mlir/lib/Conversion/GPUToROCDL/LowerGpuOpsToROCDLOps.cpp
index 83607d2c46a0f4..d1adcc4c8e64a4 100644
--- a/mlir/lib/Conversion/GPUToROCDL/LowerGpuOpsToROCDLOps.cpp
+++ b/mlir/lib/Conversion/GPUToROCDL/LowerGpuOpsToROCDLOps.cpp
@@ -222,13 +222,12 @@ struct LowerGpuOpsToROCDLOpsPass
ArrayAttr targets = m.getTargetsAttr();
if (chipset == "infer") {
if (!targets) {
- emitError(UnknownLoc::get(ctx),
- "ROCDLTargetAttr is empty on GPU module");
+ m->emitError("ROCDLTargetAttr is empty on GPU module");
return signalPassFailure();
}
if (targets.size() != 1) {
- emitError(UnknownLoc::get(ctx), "ROCDLTargetAttrs has more specified "
- "more than one gpu-arch on GPU module");
+ m->emitError("ROCDLTargetAttrs has more specified more than one "
+ "gpu-arch on GPU module");
return signalPassFailure();
}
const ROCDL::ROCDLTargetAttr targetAttr =
@@ -238,7 +237,7 @@ struct LowerGpuOpsToROCDLOpsPass
FailureOr<amdgpu::Chipset> maybeChipset = amdgpu::Chipset::parse(chipset);
if (failed(maybeChipset)) {
- emitError(UnknownLoc::get(ctx), "Invalid chipset name: " + chipset);
+ m->emitError("Invalid chipset name: " + chipset);
return signalPassFailure();
}
diff --git a/mlir/test/Conversion/GPUToROCDL/gpu-to-rocdl-infer-target.mlir b/mlir/test/Conversion/GPUToROCDL/gpu-to-rocdl-infer-target.mlir
new file mode 100644
index 00000000000000..83e9470ec1b532
--- /dev/null
+++ b/mlir/test/Conversion/GPUToROCDL/gpu-to-rocdl-infer-target.mlir
@@ -0,0 +1,30 @@
+// RUN: mlir-opt %s -convert-gpu-to-rocdl -split-input-file --verify-diagnostics | FileCheck --check-prefix=CHECK_TARGET %s
+
+// CHECK_TARGET: @test_module [#rocdl.target<O = 3, chip = "gfx90a">] attributes {llvm.data_layout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:256:256:32-p8:128:128-p9:192:256:256:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7:8:9"} {
+gpu.module @test_module [#rocdl.target<O = 3, chip = "gfx90a">] {
+ // CHECK_TARGET-LABEL: @kernel_func
+ // CHECK_TARGET: attributes
+ // CHECK_TARGET: gpu.kernel
+ // CHECK_TARGET: rocdl.kernel
+ gpu.func @kernel_func() kernel {
+ gpu.return
+ }
+}
+
+// -----
+
+// expected-error at below {{ROCDLTargetAttr is empty on GPU module}}
+gpu.module @test_module {
+ gpu.func @kernel_func() kernel {
+ gpu.return
+ }
+}
+
+// -----
+
+// expected-error at below {{Invalid chipset name: gfx90a,gfx900}}
+gpu.module @test_module [#rocdl.target<O = 3, chip = "gfx90a,gfx900">] {
+ gpu.func @kernel_func() kernel {
+ gpu.return
+ }
+}
>From 10c279c21149091d3cec0915c2c817e2a096675b Mon Sep 17 00:00:00 2001
From: Umang Yadav <29876643+umangyadav at users.noreply.github.com>
Date: Fri, 4 Oct 2024 10:11:01 -0400
Subject: [PATCH 3/5] Update
mlir/lib/Conversion/GPUToROCDL/LowerGpuOpsToROCDLOps.cpp
Co-authored-by: Fabian Mora <fmora.dev at gmail.com>
---
mlir/lib/Conversion/GPUToROCDL/LowerGpuOpsToROCDLOps.cpp | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/mlir/lib/Conversion/GPUToROCDL/LowerGpuOpsToROCDLOps.cpp b/mlir/lib/Conversion/GPUToROCDL/LowerGpuOpsToROCDLOps.cpp
index d1adcc4c8e64a4..eab1ae559d45a9 100644
--- a/mlir/lib/Conversion/GPUToROCDL/LowerGpuOpsToROCDLOps.cpp
+++ b/mlir/lib/Conversion/GPUToROCDL/LowerGpuOpsToROCDLOps.cpp
@@ -237,7 +237,7 @@ struct LowerGpuOpsToROCDLOpsPass
FailureOr<amdgpu::Chipset> maybeChipset = amdgpu::Chipset::parse(chipset);
if (failed(maybeChipset)) {
- m->emitError("Invalid chipset name: " + chipset);
+ m->emitError("invalid chipset name: " + chipset);
return signalPassFailure();
}
>From 02871b4d68a69054df93712b1a414a2c773576d8 Mon Sep 17 00:00:00 2001
From: Umang Yadav <29876643+umangyadav at users.noreply.github.com>
Date: Fri, 4 Oct 2024 10:11:10 -0400
Subject: [PATCH 4/5] Update
mlir/lib/Conversion/GPUToROCDL/LowerGpuOpsToROCDLOps.cpp
Co-authored-by: Fabian Mora <fmora.dev at gmail.com>
---
mlir/lib/Conversion/GPUToROCDL/LowerGpuOpsToROCDLOps.cpp | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/mlir/lib/Conversion/GPUToROCDL/LowerGpuOpsToROCDLOps.cpp b/mlir/lib/Conversion/GPUToROCDL/LowerGpuOpsToROCDLOps.cpp
index eab1ae559d45a9..8c6b7798c0bfd9 100644
--- a/mlir/lib/Conversion/GPUToROCDL/LowerGpuOpsToROCDLOps.cpp
+++ b/mlir/lib/Conversion/GPUToROCDL/LowerGpuOpsToROCDLOps.cpp
@@ -222,7 +222,7 @@ struct LowerGpuOpsToROCDLOpsPass
ArrayAttr targets = m.getTargetsAttr();
if (chipset == "infer") {
if (!targets) {
- m->emitError("ROCDLTargetAttr is empty on GPU module");
+ m->emitError("there are no target attributes to infer");
return signalPassFailure();
}
if (targets.size() != 1) {
>From b4ee991224d781e78c29e9eba921dbf244c4bf54 Mon Sep 17 00:00:00 2001
From: Umang Yadav <29876643+umangyadav at users.noreply.github.com>
Date: Fri, 4 Oct 2024 10:11:20 -0400
Subject: [PATCH 5/5] Update
mlir/lib/Conversion/GPUToROCDL/LowerGpuOpsToROCDLOps.cpp
Co-authored-by: Fabian Mora <fmora.dev at gmail.com>
---
mlir/lib/Conversion/GPUToROCDL/LowerGpuOpsToROCDLOps.cpp | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/mlir/lib/Conversion/GPUToROCDL/LowerGpuOpsToROCDLOps.cpp b/mlir/lib/Conversion/GPUToROCDL/LowerGpuOpsToROCDLOps.cpp
index 8c6b7798c0bfd9..7d98be771105bf 100644
--- a/mlir/lib/Conversion/GPUToROCDL/LowerGpuOpsToROCDLOps.cpp
+++ b/mlir/lib/Conversion/GPUToROCDL/LowerGpuOpsToROCDLOps.cpp
@@ -219,8 +219,8 @@ struct LowerGpuOpsToROCDLOpsPass
gpu::GPUModuleOp m = getOperation();
MLIRContext *ctx = m.getContext();
- ArrayAttr targets = m.getTargetsAttr();
if (chipset == "infer") {
+ ArrayAttr targets = m.getTargetsAttr();
if (!targets) {
m->emitError("there are no target attributes to infer");
return signalPassFailure();
More information about the Mlir-commits
mailing list