[flang-commits] [flang] [flang][cuda] Handle lowering of stars in cuf kernel launch parameters (PR #85695)

Valentin Clement バレンタイン クレメン via flang-commits flang-commits at lists.llvm.org
Mon Mar 18 17:51:47 PDT 2024


https://github.com/clementval updated https://github.com/llvm/llvm-project/pull/85695

>From 2d1feaf12b3fb4450d19183a5902366c7aa62488 Mon Sep 17 00:00:00 2001
From: Valentin Clement <clementval at gmail.com>
Date: Mon, 18 Mar 2024 13:24:44 -0700
Subject: [PATCH 1/3] [flang][cuda] Handle stars in cuf kernel launch paramters

---
 flang/lib/Lower/Bridge.cpp                    | 45 +++++++++++++------
 .../Lower/CUDA/cuda-kernel-loop-directive.cuf | 19 ++++++--
 2 files changed, 48 insertions(+), 16 deletions(-)

diff --git a/flang/lib/Lower/Bridge.cpp b/flang/lib/Lower/Bridge.cpp
index 650ec5db2d0ccb..1b9a8a867b0804 100644
--- a/flang/lib/Lower/Bridge.cpp
+++ b/flang/lib/Lower/Bridge.cpp
@@ -2529,23 +2529,42 @@ class FirConverter : public Fortran::lower::AbstractConverter {
     const std::optional<Fortran::parser::ScalarIntExpr> &stream =
         std::get<3>(dir.t);
 
+    auto isOnlyStars =
+        [&](const std::list<Fortran::parser::CUFKernelDoConstruct::StarOrExpr>
+                &list) -> bool {
+      for (const Fortran::parser::CUFKernelDoConstruct::StarOrExpr &expr :
+           list) {
+        if (expr.v)
+          return false;
+      }
+      return true;
+    };
+
+    mlir::Value zero =
+        builder->createIntegerConstant(loc, builder->getI32Type(), 0);
+
     llvm::SmallVector<mlir::Value> gridValues;
-    for (const Fortran::parser::CUFKernelDoConstruct::StarOrExpr &expr : grid) {
-      if (expr.v) {
-        gridValues.push_back(fir::getBase(
-            genExprValue(*Fortran::semantics::GetExpr(*expr.v), stmtCtx)));
-      } else {
-        // TODO: '*'
+    if (!isOnlyStars(grid)) {
+      for (const Fortran::parser::CUFKernelDoConstruct::StarOrExpr &expr :
+           grid) {
+        if (expr.v) {
+          gridValues.push_back(fir::getBase(
+              genExprValue(*Fortran::semantics::GetExpr(*expr.v), stmtCtx)));
+        } else {
+          gridValues.push_back(zero);
+        }
       }
     }
     llvm::SmallVector<mlir::Value> blockValues;
-    for (const Fortran::parser::CUFKernelDoConstruct::StarOrExpr &expr :
-         block) {
-      if (expr.v) {
-        blockValues.push_back(fir::getBase(
-            genExprValue(*Fortran::semantics::GetExpr(*expr.v), stmtCtx)));
-      } else {
-        // TODO: '*'
+    if (!isOnlyStars(block)) {
+      for (const Fortran::parser::CUFKernelDoConstruct::StarOrExpr &expr :
+           block) {
+        if (expr.v) {
+          blockValues.push_back(fir::getBase(
+              genExprValue(*Fortran::semantics::GetExpr(*expr.v), stmtCtx)));
+        } else {
+          blockValues.push_back(zero);
+        }
       }
     }
     mlir::Value streamValue;
diff --git a/flang/test/Lower/CUDA/cuda-kernel-loop-directive.cuf b/flang/test/Lower/CUDA/cuda-kernel-loop-directive.cuf
index c017561447f85d..6179e609db383c 100644
--- a/flang/test/Lower/CUDA/cuda-kernel-loop-directive.cuf
+++ b/flang/test/Lower/CUDA/cuda-kernel-loop-directive.cuf
@@ -42,7 +42,20 @@ subroutine sub1()
 ! CHECK: fir.cuda_kernel<<<%c1{{.*}}, (%c256{{.*}}, %c1{{.*}})>>> (%{{.*}} : index, %{{.*}} : index) = (%{{.*}}, %{{.*}} : index, index) to (%{{.*}}, %{{.*}} : index, index) step (%{{.*}}, %{{.*}} : index, index)
 ! CHECK: {n = 2 : i64}
 
-! TODO: lowering for these cases
-! !$cuf kernel do(2) <<< (1,*), (256,1) >>>
-! !$cuf kernel do(2) <<< (*,*), (32,4) >>>
+  !$cuf kernel do(2) <<< (1,*), (256,1) >>>
+  do i = 1, n
+    do j = 1, n
+      c(i,j) = c(i,j) * d(i,j)
+    end do
+  end do
+! CHECK: fir.cuda_kernel<<<(%c1{{.*}}, %c0{{.*}}), (%c256{{.*}}, %c1{{.*}})>>> (%{{.*}} : index, %{{.*}} : index) = (%{{.*}}, %{{.*}} : index, index) to (%{{.*}}, %{{.*}} : index, index)  step (%{{.*}}, %{{.*}} : index, index)
+
+!$cuf kernel do(2) <<< (*,*), (32,4) >>>
+  do i = 1, n
+    do j = 1, n
+      c(i,j) = c(i,j) * d(i,j)
+    end do
+  end do
+
+! CHECK: fir.cuda_kernel<<<*, (%c32{{.*}}, %c4{{.*}})>>> (%{{.*}} : index, %{{.*}} : index) = (%{{.*}}, %{{.*}} : index, index) to (%{{.*}}, %{{.*}} : index, index)  step (%{{.*}}, %{{.*}} : index, index)
 end

>From 7be93fb459814f3a13d1c13099ff19c472cc2876 Mon Sep 17 00:00:00 2001
From: Valentin Clement <clementval at gmail.com>
Date: Mon, 18 Mar 2024 17:48:00 -0700
Subject: [PATCH 2/3] Add comment about zero constant value

---
 flang/include/flang/Optimizer/Dialect/FIROps.td | 12 +++++++++++-
 1 file changed, 11 insertions(+), 1 deletion(-)

diff --git a/flang/include/flang/Optimizer/Dialect/FIROps.td b/flang/include/flang/Optimizer/Dialect/FIROps.td
index f4792637f481c0..f405cf39951387 100644
--- a/flang/include/flang/Optimizer/Dialect/FIROps.td
+++ b/flang/include/flang/Optimizer/Dialect/FIROps.td
@@ -3131,8 +3131,18 @@ def fir_BoxOffsetOp : fir_Op<"box_offset", [NoMemoryEffect]> {
 def fir_CUDAKernelOp : fir_Op<"cuda_kernel", [AttrSizedOperandSegments,
     DeclareOpInterfaceMethods<LoopLikeOpInterface>]> {
 
+  let description = [{
+    Represent the CUDA Fortran kernel directive. The operation is a loop like
+    operation that represents the iteration range of the embedded loop nest.
+
+    When grid or block variadic operands are empty, a `*` only syntax was used
+    in the Fortran code.
+    If the `*` is mixed with values for either grid or block, these are
+    represented by a 0 constant value.
+  }];
+
   let arguments = (ins
-    Variadic<I32>:$grid, // empty means `*`
+    Variadic<I32>:$grid, // empty means `*`. 
     Variadic<I32>:$block, // empty means `*`
     Optional<I32>:$stream,
     Variadic<Index>:$lowerbound,

>From 3e02f6ff436c6b14f199e90e8f11a472387362bb Mon Sep 17 00:00:00 2001
From: Valentin Clement <clementval at gmail.com>
Date: Mon, 18 Mar 2024 17:51:36 -0700
Subject: [PATCH 3/3] Remove added dot

---
 flang/include/flang/Optimizer/Dialect/FIROps.td | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/flang/include/flang/Optimizer/Dialect/FIROps.td b/flang/include/flang/Optimizer/Dialect/FIROps.td
index f405cf39951387..6e520d111701f0 100644
--- a/flang/include/flang/Optimizer/Dialect/FIROps.td
+++ b/flang/include/flang/Optimizer/Dialect/FIROps.td
@@ -3142,7 +3142,7 @@ def fir_CUDAKernelOp : fir_Op<"cuda_kernel", [AttrSizedOperandSegments,
   }];
 
   let arguments = (ins
-    Variadic<I32>:$grid, // empty means `*`. 
+    Variadic<I32>:$grid, // empty means `*`
     Variadic<I32>:$block, // empty means `*`
     Optional<I32>:$stream,
     Variadic<Index>:$lowerbound,



More information about the flang-commits mailing list