[Mlir-commits] [mlir] [MLIR][NVGPU] Move max threads/blocks size to dialect (NFC) (PR #124454)
Guray Ozen
llvmlistbot at llvm.org
Sun Jan 26 00:28:55 PST 2025
https://github.com/grypp updated https://github.com/llvm/llvm-project/pull/124454
>From 6340c361c7b32888943a9f36bf3beded8240f236 Mon Sep 17 00:00:00 2001
From: Guray Ozen <gozen at nvidia.com>
Date: Sun, 26 Jan 2025 09:28:44 +0100
Subject: [PATCH] [MLIR][NVGPU] Move max threads/blocks size to dialect (NFC)
This PR moves maximum number of threads in a block and block in a grid to nvgpu dialect to avoid replicated code.
The limits are defined here:
https://docs.nvidia.com/cuda/cuda-c-programming-guide/#features-and-technical-specifications-technical-specifications-per-compute-capability
---
.../mlir/Dialect/NVGPU/IR/NVGPUDialect.h | 12 +++++++++
mlir/lib/Dialect/GPU/TransformOps/Utils.cpp | 25 +++++++------------
2 files changed, 21 insertions(+), 16 deletions(-)
diff --git a/mlir/include/mlir/Dialect/NVGPU/IR/NVGPUDialect.h b/mlir/include/mlir/Dialect/NVGPU/IR/NVGPUDialect.h
index aad2ac6f4dd2b4..db4c63b3390eb7 100644
--- a/mlir/include/mlir/Dialect/NVGPU/IR/NVGPUDialect.h
+++ b/mlir/include/mlir/Dialect/NVGPU/IR/NVGPUDialect.h
@@ -22,8 +22,20 @@
#include "mlir/Dialect/NVGPU/IR/NVGPUEnums.h.inc"
+// Maximum warp size
constexpr int kWarpSize = 32;
+// Maximum number of threads in a block and block in a grid
+// https://docs.nvidia.com/cuda/cuda-c-programming-guide/#features-and-technical-specifications-technical-specifications-per-compute-capability
+constexpr int kMaxTotalBlockdim = 1024;
+constexpr int kMaxBlockdimx = 1024;
+constexpr int kMaxBlockdimy = 1024;
+constexpr int kMaxBlockdimz = 64;
+constexpr int kMaxTotalGriddim = 2147483647;
+constexpr int kMaxGriddimx = 2147483647;
+constexpr int kMaxGriddimy = 65535;
+constexpr int kMaxGriddimz = 65535;
+
/// M size of wgmma.mma_async instruction
constexpr int kWgmmaSizeM = 64;
diff --git a/mlir/lib/Dialect/GPU/TransformOps/Utils.cpp b/mlir/lib/Dialect/GPU/TransformOps/Utils.cpp
index f4d36129bae776..f5a6d0893d9a94 100644
--- a/mlir/lib/Dialect/GPU/TransformOps/Utils.cpp
+++ b/mlir/lib/Dialect/GPU/TransformOps/Utils.cpp
@@ -14,6 +14,7 @@
#include "mlir/Dialect/GPU/IR/GPUDialect.h"
#include "mlir/Dialect/GPU/TransformOps/GPUTransformOps.h"
#include "mlir/Dialect/MemRef/IR/MemRef.h"
+#include "mlir/Dialect/NVGPU/IR/NVGPUDialect.h"
#include "mlir/Dialect/SCF/IR/DeviceMappingInterface.h"
#include "mlir/Dialect/SCF/IR/SCF.h"
#include "mlir/Dialect/Transform/IR/TransformDialect.h"
@@ -237,25 +238,17 @@ DiagnosedSilenceableFailure checkGpuLimits(TransformOpInterface transformOp,
std::optional<int64_t> blockDimZ) {
// TODO: pass a configuration object to set the limits properly.
- static constexpr int maxTotalBlockdim = 1024;
- static constexpr int maxBlockdimx = 1024;
- static constexpr int maxBlockdimy = 1024;
- static constexpr int maxBlockdimz = 64;
- static constexpr int maxTotalGriddim = 2147483647;
- static constexpr int maxGriddimx = 2147483647;
- static constexpr int maxGriddimy = 65535;
- static constexpr int maxGriddimz = 65535;
if ((blockDimX.value_or(1) * blockDimY.value_or(1) * blockDimZ.value_or(1)) >
- maxTotalBlockdim ||
+ kMaxTotalBlockdim ||
(gridDimX.value_or(1) * gridDimY.value_or(1) * gridDimZ.value_or(1)) >
- maxTotalGriddim ||
- blockDimX.value_or(1) > maxBlockdimx ||
- blockDimY.value_or(1) > maxBlockdimy ||
- blockDimZ.value_or(1) > maxBlockdimz ||
- gridDimY.value_or(1) > maxGriddimy ||
- gridDimZ.value_or(1) > maxGriddimz ||
- gridDimX.value_or(1) > maxGriddimx) {
+ kMaxTotalGriddim ||
+ blockDimX.value_or(1) > kMaxBlockdimx ||
+ blockDimY.value_or(1) > kMaxBlockdimy ||
+ blockDimZ.value_or(1) > kMaxBlockdimz ||
+ gridDimY.value_or(1) > kMaxGriddimy ||
+ gridDimZ.value_or(1) > kMaxGriddimz ||
+ gridDimX.value_or(1) > kMaxGriddimx) {
return transformOp.emitSilenceableError()
<< "Trying to launch a GPU kernel with grid_dims = ("
<< gridDimX.value_or(1) << ", " << gridDimY.value_or(1) << ", "
More information about the Mlir-commits
mailing list