[flang-commits] [flang] f6a2a55 - [flang][cuda] Handle lowering of stars in cuf kernel launch parameters (#85695)
via flang-commits
flang-commits at lists.llvm.org
Mon Mar 18 19:46:15 PDT 2024
Author: Valentin Clement (バレンタイン クレメン)
Date: 2024-03-18T19:46:11-07:00
New Revision: f6a2a55ba1fe1a4b720b8760704785d12137b35e
URL: https://github.com/llvm/llvm-project/commit/f6a2a55ba1fe1a4b720b8760704785d12137b35e
DIFF: https://github.com/llvm/llvm-project/commit/f6a2a55ba1fe1a4b720b8760704785d12137b35e.diff
LOG: [flang][cuda] Handle lowering of stars in cuf kernel launch parameters (#85695)
Parsing of the cuf kernel loop directive has been updated to handle
variants with the * syntax. This patch updates the lowering to make use
of them.
- If the grid or block syntax uses only stars then the operation
variadic operand remains empty.
- If there is values and stars, then stars are represented as a zero
constant value.
Added:
Modified:
flang/include/flang/Optimizer/Dialect/FIROps.td
flang/lib/Lower/Bridge.cpp
flang/test/Lower/CUDA/cuda-kernel-loop-directive.cuf
Removed:
################################################################################
diff --git a/flang/include/flang/Optimizer/Dialect/FIROps.td b/flang/include/flang/Optimizer/Dialect/FIROps.td
index f4792637f481c0..6e520d111701f0 100644
--- a/flang/include/flang/Optimizer/Dialect/FIROps.td
+++ b/flang/include/flang/Optimizer/Dialect/FIROps.td
@@ -3131,6 +3131,16 @@ def fir_BoxOffsetOp : fir_Op<"box_offset", [NoMemoryEffect]> {
def fir_CUDAKernelOp : fir_Op<"cuda_kernel", [AttrSizedOperandSegments,
DeclareOpInterfaceMethods<LoopLikeOpInterface>]> {
+ let description = [{
+ Represent the CUDA Fortran kernel directive. The operation is a loop like
+ operation that represents the iteration range of the embedded loop nest.
+
+ When grid or block variadic operands are empty, a `*` only syntax was used
+ in the Fortran code.
+ If the `*` is mixed with values for either grid or block, these are
+ represented by a 0 constant value.
+ }];
+
let arguments = (ins
Variadic<I32>:$grid, // empty means `*`
Variadic<I32>:$block, // empty means `*`
diff --git a/flang/lib/Lower/Bridge.cpp b/flang/lib/Lower/Bridge.cpp
index 6c32c537198c67..c3cb9ba6a47e3d 100644
--- a/flang/lib/Lower/Bridge.cpp
+++ b/flang/lib/Lower/Bridge.cpp
@@ -2529,23 +2529,42 @@ class FirConverter : public Fortran::lower::AbstractConverter {
const std::optional<Fortran::parser::ScalarIntExpr> &stream =
std::get<3>(dir.t);
+ auto isOnlyStars =
+ [&](const std::list<Fortran::parser::CUFKernelDoConstruct::StarOrExpr>
+ &list) -> bool {
+ for (const Fortran::parser::CUFKernelDoConstruct::StarOrExpr &expr :
+ list) {
+ if (expr.v)
+ return false;
+ }
+ return true;
+ };
+
+ mlir::Value zero =
+ builder->createIntegerConstant(loc, builder->getI32Type(), 0);
+
llvm::SmallVector<mlir::Value> gridValues;
- for (const Fortran::parser::CUFKernelDoConstruct::StarOrExpr &expr : grid) {
- if (expr.v) {
- gridValues.push_back(fir::getBase(
- genExprValue(*Fortran::semantics::GetExpr(*expr.v), stmtCtx)));
- } else {
- // TODO: '*'
+ if (!isOnlyStars(grid)) {
+ for (const Fortran::parser::CUFKernelDoConstruct::StarOrExpr &expr :
+ grid) {
+ if (expr.v) {
+ gridValues.push_back(fir::getBase(
+ genExprValue(*Fortran::semantics::GetExpr(*expr.v), stmtCtx)));
+ } else {
+ gridValues.push_back(zero);
+ }
}
}
llvm::SmallVector<mlir::Value> blockValues;
- for (const Fortran::parser::CUFKernelDoConstruct::StarOrExpr &expr :
- block) {
- if (expr.v) {
- blockValues.push_back(fir::getBase(
- genExprValue(*Fortran::semantics::GetExpr(*expr.v), stmtCtx)));
- } else {
- // TODO: '*'
+ if (!isOnlyStars(block)) {
+ for (const Fortran::parser::CUFKernelDoConstruct::StarOrExpr &expr :
+ block) {
+ if (expr.v) {
+ blockValues.push_back(fir::getBase(
+ genExprValue(*Fortran::semantics::GetExpr(*expr.v), stmtCtx)));
+ } else {
+ blockValues.push_back(zero);
+ }
}
}
mlir::Value streamValue;
diff --git a/flang/test/Lower/CUDA/cuda-kernel-loop-directive.cuf b/flang/test/Lower/CUDA/cuda-kernel-loop-directive.cuf
index c017561447f85d..6179e609db383c 100644
--- a/flang/test/Lower/CUDA/cuda-kernel-loop-directive.cuf
+++ b/flang/test/Lower/CUDA/cuda-kernel-loop-directive.cuf
@@ -42,7 +42,20 @@ subroutine sub1()
! CHECK: fir.cuda_kernel<<<%c1{{.*}}, (%c256{{.*}}, %c1{{.*}})>>> (%{{.*}} : index, %{{.*}} : index) = (%{{.*}}, %{{.*}} : index, index) to (%{{.*}}, %{{.*}} : index, index) step (%{{.*}}, %{{.*}} : index, index)
! CHECK: {n = 2 : i64}
-! TODO: lowering for these cases
-! !$cuf kernel do(2) <<< (1,*), (256,1) >>>
-! !$cuf kernel do(2) <<< (*,*), (32,4) >>>
+ !$cuf kernel do(2) <<< (1,*), (256,1) >>>
+ do i = 1, n
+ do j = 1, n
+ c(i,j) = c(i,j) * d(i,j)
+ end do
+ end do
+! CHECK: fir.cuda_kernel<<<(%c1{{.*}}, %c0{{.*}}), (%c256{{.*}}, %c1{{.*}})>>> (%{{.*}} : index, %{{.*}} : index) = (%{{.*}}, %{{.*}} : index, index) to (%{{.*}}, %{{.*}} : index, index) step (%{{.*}}, %{{.*}} : index, index)
+
+!$cuf kernel do(2) <<< (*,*), (32,4) >>>
+ do i = 1, n
+ do j = 1, n
+ c(i,j) = c(i,j) * d(i,j)
+ end do
+ end do
+
+! CHECK: fir.cuda_kernel<<<*, (%c32{{.*}}, %c4{{.*}})>>> (%{{.*}} : index, %{{.*}} : index) = (%{{.*}}, %{{.*}} : index, index) to (%{{.*}}, %{{.*}} : index, index) step (%{{.*}}, %{{.*}} : index, index)
end
More information about the flang-commits
mailing list