[Mlir-commits] [mlir] bd6568c - [MLIR][GPU] Add gpu.cluster_dim_blocks and gpu.cluster_block_id Ops (#95245)
llvmlistbot at llvm.org
llvmlistbot at llvm.org
Thu Jun 13 22:05:39 PDT 2024
Author: Pradeep Kumar
Date: 2024-06-14T10:35:35+05:30
New Revision: bd6568c98a50a180eabc41e9df5b896b7518c587
URL: https://github.com/llvm/llvm-project/commit/bd6568c98a50a180eabc41e9df5b896b7518c587
DIFF: https://github.com/llvm/llvm-project/commit/bd6568c98a50a180eabc41e9df5b896b7518c587.diff
LOG: [MLIR][GPU] Add gpu.cluster_dim_blocks and gpu.cluster_block_id Ops (#95245)
This commit adds support for `gpu.cluster_dim_blocks` and
`gpu.cluster_block_id` Ops to represent number of blocks per cluster and
block id inside a cluster respectively. Also, fixed the description of
`gpu.cluster_dim` Op and updated the `cga_cluster.mlir` test file to use
`gpu.cluster_dim_blocks`
Co-authored-by: pradeepku <pradeepku at nvidia.com>
Co-authored-by: Guray Ozen <guray.ozen at gmail.com>
Added:
Modified:
mlir/include/mlir/Dialect/GPU/IR/GPUOps.td
mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td
mlir/lib/Conversion/GPUToNVVM/LowerGpuOpsToNVVMOps.cpp
mlir/lib/Dialect/GPU/IR/InferIntRangeInterfaceImpls.cpp
mlir/test/Integration/GPU/CUDA/sm90/cga_cluster.mlir
Removed:
################################################################################
diff --git a/mlir/include/mlir/Dialect/GPU/IR/GPUOps.td b/mlir/include/mlir/Dialect/GPU/IR/GPUOps.td
index eb81b6469746f..9c5f7ecd8cbe8 100644
--- a/mlir/include/mlir/Dialect/GPU/IR/GPUOps.td
+++ b/mlir/include/mlir/Dialect/GPU/IR/GPUOps.td
@@ -70,7 +70,7 @@ class GPU_IndexOp<string mnemonic, list<Trait> traits = []> :
def GPU_ClusterDimOp : GPU_IndexOp<"cluster_dim"> {
let description = [{
- Returns the number of thread blocks in the cluster along
+ Returns the number of cluster identifiers per grid along
the x, y, or z `dimension`.
Example:
@@ -81,6 +81,19 @@ def GPU_ClusterDimOp : GPU_IndexOp<"cluster_dim"> {
}];
}
+def GPU_ClusterDimBlocksOp : GPU_IndexOp<"cluster_dim_blocks"> {
+ let description = [{
+ Returns the number of thread blocks in the cluster along
+ the x, y, or z `dimension`.
+
+ Example:
+
+ ```mlir
+ %cDimBlocksX = gpu.cluster_dim_blocks x
+ ```
+ }];
+}
+
def GPU_ClusterIdOp : GPU_IndexOp<"cluster_id"> {
let description = [{
Returns the cluster id, i.e. the index of the current cluster within the
@@ -94,6 +107,18 @@ def GPU_ClusterIdOp : GPU_IndexOp<"cluster_id"> {
}];
}
+def GPU_ClusterBlockIdOp : GPU_IndexOp<"cluster_block_id"> {
+ let description = [{
+ Returns the block id within the cluster along the x, y, or z `dimension`.
+
+ Example:
+
+ ```mlir
+ %cBlockIdY = gpu.cluster_block_id y
+ ```
+ }];
+}
+
def GPU_BlockDimOp : GPU_IndexOp<"block_dim"> {
let description = [{
Returns the number of threads in the thread block (aka the block size) along
diff --git a/mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td b/mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td
index 4daeeab093863..4d48b3de7a57e 100644
--- a/mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td
+++ b/mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td
@@ -160,9 +160,9 @@ def NVVM_ClusterDimZOp : NVVM_SpecialRegisterOp<"read.ptx.sreg.nclusterid.z">;
def NVVM_BlockInClusterIdXOp : NVVM_SpecialRegisterOp<"read.ptx.sreg.cluster.ctaid.x">;
def NVVM_BlockInClusterIdYOp : NVVM_SpecialRegisterOp<"read.ptx.sreg.cluster.ctaid.y">;
def NVVM_BlockInClusterIdZOp : NVVM_SpecialRegisterOp<"read.ptx.sreg.cluster.ctaid.z">;
-def NVVM_GridInClusterDimXOp : NVVM_SpecialRegisterOp<"read.ptx.sreg.cluster.nctaid.x">;
-def NVVM_GridInClusterDimYOp : NVVM_SpecialRegisterOp<"read.ptx.sreg.cluster.nctaid.y">;
-def NVVM_GridInClusterDimZOp : NVVM_SpecialRegisterOp<"read.ptx.sreg.cluster.nctaid.z">;
+def NVVM_ClusterDimBlocksXOp : NVVM_SpecialRegisterOp<"read.ptx.sreg.cluster.nctaid.x">;
+def NVVM_ClusterDimBlocksYOp : NVVM_SpecialRegisterOp<"read.ptx.sreg.cluster.nctaid.y">;
+def NVVM_ClusterDimBlocksZOp : NVVM_SpecialRegisterOp<"read.ptx.sreg.cluster.nctaid.z">;
//===----------------------------------------------------------------------===//
// CTA index and across Cluster dimensions
diff --git a/mlir/lib/Conversion/GPUToNVVM/LowerGpuOpsToNVVMOps.cpp b/mlir/lib/Conversion/GPUToNVVM/LowerGpuOpsToNVVMOps.cpp
index b95fba20a00cb..fdd65e40e9064 100644
--- a/mlir/lib/Conversion/GPUToNVVM/LowerGpuOpsToNVVMOps.cpp
+++ b/mlir/lib/Conversion/GPUToNVVM/LowerGpuOpsToNVVMOps.cpp
@@ -342,8 +342,14 @@ void mlir::populateGpuToNVVMConversionPatterns(LLVMTypeConverter &converter,
NVVM::BlockDimYOp, NVVM::BlockDimZOp>,
GPUIndexIntrinsicOpLowering<gpu::ClusterIdOp, NVVM::ClusterIdXOp,
NVVM::ClusterIdYOp, NVVM::ClusterIdZOp>,
+ GPUIndexIntrinsicOpLowering<
+ gpu::ClusterBlockIdOp, NVVM::BlockInClusterIdXOp,
+ NVVM::BlockInClusterIdYOp, NVVM::BlockInClusterIdZOp>,
GPUIndexIntrinsicOpLowering<gpu::ClusterDimOp, NVVM::ClusterDimXOp,
NVVM::ClusterDimYOp, NVVM::ClusterDimZOp>,
+ GPUIndexIntrinsicOpLowering<
+ gpu::ClusterDimBlocksOp, NVVM::ClusterDimBlocksXOp,
+ NVVM::ClusterDimBlocksYOp, NVVM::ClusterDimBlocksZOp>,
GPUIndexIntrinsicOpLowering<gpu::BlockIdOp, NVVM::BlockIdXOp,
NVVM::BlockIdYOp, NVVM::BlockIdZOp>,
GPUIndexIntrinsicOpLowering<gpu::GridDimOp, NVVM::GridDimXOp,
diff --git a/mlir/lib/Dialect/GPU/IR/InferIntRangeInterfaceImpls.cpp b/mlir/lib/Dialect/GPU/IR/InferIntRangeInterfaceImpls.cpp
index 69017efb9a0e6..46b85db8b5431 100644
--- a/mlir/lib/Dialect/GPU/IR/InferIntRangeInterfaceImpls.cpp
+++ b/mlir/lib/Dialect/GPU/IR/InferIntRangeInterfaceImpls.cpp
@@ -86,6 +86,12 @@ static std::optional<uint64_t> getKnownLaunchDim(Op op, LaunchDims type) {
void ClusterDimOp::inferResultRanges(ArrayRef<ConstantIntRanges>,
SetIntRangeFn setResultRange) {
+ uint64_t max = APInt::getMaxValue(64).getZExtValue();
+ setResultRange(getResult(), getIndexRange(1, max));
+}
+
+void ClusterDimBlocksOp::inferResultRanges(ArrayRef<ConstantIntRanges>,
+ SetIntRangeFn setResultRange) {
setResultRange(getResult(), getIndexRange(1, kMaxClusterDim));
}
@@ -95,6 +101,12 @@ void ClusterIdOp::inferResultRanges(ArrayRef<ConstantIntRanges>,
setResultRange(getResult(), getIndexRange(0, max - 1ULL));
}
+void ClusterBlockIdOp::inferResultRanges(ArrayRef<ConstantIntRanges>,
+ SetIntRangeFn setResultRange) {
+ uint64_t max = kMaxClusterDim;
+ setResultRange(getResult(), getIndexRange(0, max - 1ULL));
+}
+
void BlockDimOp::inferResultRanges(ArrayRef<ConstantIntRanges>,
SetIntRangeFn setResultRange) {
std::optional<uint64_t> knownVal =
diff --git a/mlir/test/Integration/GPU/CUDA/sm90/cga_cluster.mlir b/mlir/test/Integration/GPU/CUDA/sm90/cga_cluster.mlir
index 025282ec0d688..5c11d80178f72 100644
--- a/mlir/test/Integration/GPU/CUDA/sm90/cga_cluster.mlir
+++ b/mlir/test/Integration/GPU/CUDA/sm90/cga_cluster.mlir
@@ -22,9 +22,9 @@ module attributes {gpu.container_module} {
%cidX = gpu.cluster_id x
%cidY = gpu.cluster_id y
%cidZ = gpu.cluster_id z
- %cdimX = gpu.cluster_dim x
- %cdimY = gpu.cluster_dim y
- %cdimZ = gpu.cluster_dim z
+ %cdimX = gpu.cluster_dim_blocks x
+ %cdimY = gpu.cluster_dim_blocks y
+ %cdimZ = gpu.cluster_dim_blocks z
%bidX = gpu.block_id x
%bidY = gpu.block_id y
%bidZ = gpu.block_id z
More information about the Mlir-commits
mailing list