[flang-commits] [flang] 0b700f2 - [flang][cuda] Add entry point to launch global function with cluster_dims (#113958)
via flang-commits
flang-commits at lists.llvm.org
Tue Oct 29 10:01:52 PDT 2024
Author: Valentin Clement (バレンタイン クレメン)
Date: 2024-10-29T10:01:49-07:00
New Revision: 0b700f23335e9206e1e460a477df2103ce3c186d
URL: https://github.com/llvm/llvm-project/commit/0b700f23335e9206e1e460a477df2103ce3c186d
DIFF: https://github.com/llvm/llvm-project/commit/0b700f23335e9206e1e460a477df2103ce3c186d.diff
LOG: [flang][cuda] Add entry point to launch global function with cluster_dims (#113958)
Added:
Modified:
flang/include/flang/Runtime/CUDA/kernel.h
flang/runtime/CUDA/kernel.cpp
Removed:
################################################################################
diff --git a/flang/include/flang/Runtime/CUDA/kernel.h b/flang/include/flang/Runtime/CUDA/kernel.h
index cf07d874a082c0..85afda09e347ae 100644
--- a/flang/include/flang/Runtime/CUDA/kernel.h
+++ b/flang/include/flang/Runtime/CUDA/kernel.h
@@ -15,13 +15,19 @@
extern "C" {
-// This function uses intptr_t instead of CUDA's unsigned int to match
+// These functions use intptr_t instead of CUDA's unsigned int to match
// the type of MLIR's index type. This avoids the need for casts in the
// generated MLIR code.
+
void RTDEF(CUFLaunchKernel)(const void *kernelName, intptr_t gridX,
intptr_t gridY, intptr_t gridZ, intptr_t blockX, intptr_t blockY,
intptr_t blockZ, int32_t smem, void **params, void **extra);
+void RTDEF(CUFLaunchClusterKernel)(const void *kernelName, intptr_t clusterX,
+ intptr_t clusterY, intptr_t clusterZ, intptr_t gridX, intptr_t gridY,
+ intptr_t gridZ, intptr_t blockX, intptr_t blockY, intptr_t blockZ,
+ int32_t smem, void **params, void **extra);
+
} // extern "C"
#endif // FORTRAN_RUNTIME_CUDA_KERNEL_H_
diff --git a/flang/runtime/CUDA/kernel.cpp b/flang/runtime/CUDA/kernel.cpp
index f81153a1af4bc7..abb7ebb72e5923 100644
--- a/flang/runtime/CUDA/kernel.cpp
+++ b/flang/runtime/CUDA/kernel.cpp
@@ -25,9 +25,32 @@ void RTDEF(CUFLaunchKernel)(const void *kernel, intptr_t gridX, intptr_t gridY,
blockDim.x = blockX;
blockDim.y = blockY;
blockDim.z = blockZ;
- cudaStream_t stream = 0;
+ cudaStream_t stream = 0; // TODO stream managment
CUDA_REPORT_IF_ERROR(
cudaLaunchKernel(kernel, gridDim, blockDim, params, smem, stream));
}
+void RTDEF(CUFLaunchClusterKernel)(const void *kernel, intptr_t clusterX,
+ intptr_t clusterY, intptr_t clusterZ, intptr_t gridX, intptr_t gridY,
+ intptr_t gridZ, intptr_t blockX, intptr_t blockY, intptr_t blockZ,
+ int32_t smem, void **params, void **extra) {
+ cudaLaunchConfig_t config;
+ config.gridDim.x = gridX;
+ config.gridDim.y = gridY;
+ config.gridDim.z = gridZ;
+ config.blockDim.x = blockX;
+ config.blockDim.y = blockY;
+ config.blockDim.z = blockZ;
+ config.dynamicSmemBytes = smem;
+ config.stream = 0; // TODO stream managment
+ cudaLaunchAttribute launchAttr[1];
+ launchAttr[0].id = cudaLaunchAttributeClusterDimension;
+ launchAttr[0].val.clusterDim.x = clusterX;
+ launchAttr[0].val.clusterDim.y = clusterY;
+ launchAttr[0].val.clusterDim.z = clusterZ;
+ config.numAttrs = 1;
+ config.attrs = launchAttr;
+ CUDA_REPORT_IF_ERROR(cudaLaunchKernelExC(&config, kernel, params));
+}
+
} // extern "C"
More information about the flang-commits
mailing list