[flang-commits] [flang] [flang][cuda] Fix retrieval of nested evaluation in cuf kernel (PR #91298)

Mon May 6 21:30:42 PDT 2024

https://github.com/clementval created https://github.com/llvm/llvm-project/pull/91298

`loopEval` was declared inside the for loop to iterate over the nested loops so the same loop control was redeclared for each level of the loop nest. Make sure we are iterating over all the loops by putting `loopEval` declaration ouside of the for loop. 



>From 32cea2fe0b4ec000dfe5e3675328bbe0dbacb3db Mon Sep 17 00:00:00 2001
From: Valentin Clement <clementval at gmail.com>
Date: Mon, 6 May 2024 21:28:06 -0700
Subject: [PATCH] [flang][cuda] Fix retrieval of nested evaluation in cuf
 kernel

---
 flang/lib/Lower/Bridge.cpp                           | 5 ++---
 flang/test/Lower/CUDA/cuda-kernel-loop-directive.cuf | 8 ++++++--
 2 files changed, 8 insertions(+), 5 deletions(-)

diff --git a/flang/lib/Lower/Bridge.cpp b/flang/lib/Lower/Bridge.cpp
index ae8679afc603f..18897699c0574 100644
--- a/flang/lib/Lower/Bridge.cpp
+++ b/flang/lib/Lower/Bridge.cpp
@@ -2585,11 +2585,10 @@ class FirConverter : public Fortran::lower::AbstractConverter {
     llvm::SmallVector<mlir::Type> ivTypes;
     llvm::SmallVector<mlir::Location> ivLocs;
     llvm::SmallVector<mlir::Value> ivValues;
+    Fortran::lower::pft::Evaluation *loopEval =
+          &getEval().getFirstNestedEvaluation();
     for (unsigned i = 0; i < nestedLoops; ++i) {
       const Fortran::parser::LoopControl *loopControl;
-      Fortran::lower::pft::Evaluation *loopEval =
-          &getEval().getFirstNestedEvaluation();
-
       mlir::Location crtLoc = loc;
       if (i == 0) {
         loopControl = &*outerDoConstruct->GetLoopControl();
diff --git a/flang/test/Lower/CUDA/cuda-kernel-loop-directive.cuf b/flang/test/Lower/CUDA/cuda-kernel-loop-directive.cuf
index d80542f76c92e..e1cc35772618a 100644
--- a/flang/test/Lower/CUDA/cuda-kernel-loop-directive.cuf
+++ b/flang/test/Lower/CUDA/cuda-kernel-loop-directive.cuf
@@ -11,7 +11,7 @@ subroutine sub1()
 
 ! CHECK-LABEL: func.func @_QPsub1()
 ! CHECK: %[[IV:.*]]:2 = hlfir.declare %{{.*}} {uniq_name = "_QFsub1Ei"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
-
+! CHECK: %[[IV_J:.*]]:2 = hlfir.declare %{{.*}} {uniq_name = "_QFsub1Ej"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
   !$cuf kernel do <<< 1, 2 >>>
   do i = 1, n
     a(i) = a(i) * b(i)
@@ -41,7 +41,11 @@ subroutine sub1()
     end do
   end do
 
-! CHECK: fir.cuda_kernel<<<%c1{{.*}}, (%c256{{.*}}, %c1{{.*}})>>> (%{{.*}} : index, %{{.*}} : index) = (%{{.*}}, %{{.*}} : index, index) to (%{{.*}}, %{{.*}} : index, index) step (%{{.*}}, %{{.*}} : index, index)
+! CHECK: fir.cuda_kernel<<<%c1{{.*}}, (%c256{{.*}}, %c1{{.*}})>>> (%[[ARG0:.*]] : index, %[[ARG1:.*]] : index) = (%{{.*}}, %{{.*}} : index, index) to (%{{.*}}, %{{.*}} : index, index) step (%{{.*}}, %{{.*}} : index, index)
+! CHECK: %[[ARG0_I32:.*]] = fir.convert %[[ARG0]] : (index) -> i32
+! CHECK: fir.store %[[ARG0_I32]] to %[[IV]]#1 : !fir.ref<i32>
+! CHECK: %[[ARG1_I32:.*]] = fir.convert %[[ARG1]] : (index) -> i32
+! CHECK: fir.store %[[ARG1_I32]] to %[[IV_J]]#1 : !fir.ref<i32>
 ! CHECK: {n = 2 : i64}
 
   !$cuf kernel do(2) <<< (1,*), (256,1) >>>