[flang-commits] [flang] f724540 - [flang][cuda] Fix retrieval of nested evaluation in cuf kernel (#91298)
via flang-commits
flang-commits at lists.llvm.org
Tue May 7 08:29:24 PDT 2024
Author: Valentin Clement (バレンタイン クレメン)
Date: 2024-05-07T08:29:21-07:00
New Revision: f72454086af9d3f91a86e10dc1923849c5f670a8
URL: https://github.com/llvm/llvm-project/commit/f72454086af9d3f91a86e10dc1923849c5f670a8
DIFF: https://github.com/llvm/llvm-project/commit/f72454086af9d3f91a86e10dc1923849c5f670a8.diff
LOG: [flang][cuda] Fix retrieval of nested evaluation in cuf kernel (#91298)
`loopEval` was declared inside the for loop to iterate over the nested
loops so the same loop control was redeclared for each level of the loop
nest. Make sure we are iterating over all the loops by putting
`loopEval` declaration ouside of the for loop.
Added:
Modified:
flang/lib/Lower/Bridge.cpp
flang/test/Lower/CUDA/cuda-kernel-loop-directive.cuf
Removed:
################################################################################
diff --git a/flang/lib/Lower/Bridge.cpp b/flang/lib/Lower/Bridge.cpp
index ae8679afc603f..b0fc26332651e 100644
--- a/flang/lib/Lower/Bridge.cpp
+++ b/flang/lib/Lower/Bridge.cpp
@@ -2585,11 +2585,10 @@ class FirConverter : public Fortran::lower::AbstractConverter {
llvm::SmallVector<mlir::Type> ivTypes;
llvm::SmallVector<mlir::Location> ivLocs;
llvm::SmallVector<mlir::Value> ivValues;
+ Fortran::lower::pft::Evaluation *loopEval =
+ &getEval().getFirstNestedEvaluation();
for (unsigned i = 0; i < nestedLoops; ++i) {
const Fortran::parser::LoopControl *loopControl;
- Fortran::lower::pft::Evaluation *loopEval =
- &getEval().getFirstNestedEvaluation();
-
mlir::Location crtLoc = loc;
if (i == 0) {
loopControl = &*outerDoConstruct->GetLoopControl();
diff --git a/flang/test/Lower/CUDA/cuda-kernel-loop-directive.cuf b/flang/test/Lower/CUDA/cuda-kernel-loop-directive.cuf
index d80542f76c92e..e1cc35772618a 100644
--- a/flang/test/Lower/CUDA/cuda-kernel-loop-directive.cuf
+++ b/flang/test/Lower/CUDA/cuda-kernel-loop-directive.cuf
@@ -11,7 +11,7 @@ subroutine sub1()
! CHECK-LABEL: func.func @_QPsub1()
! CHECK: %[[IV:.*]]:2 = hlfir.declare %{{.*}} {uniq_name = "_QFsub1Ei"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
-
+! CHECK: %[[IV_J:.*]]:2 = hlfir.declare %{{.*}} {uniq_name = "_QFsub1Ej"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
!$cuf kernel do <<< 1, 2 >>>
do i = 1, n
a(i) = a(i) * b(i)
@@ -41,7 +41,11 @@ subroutine sub1()
end do
end do
-! CHECK: fir.cuda_kernel<<<%c1{{.*}}, (%c256{{.*}}, %c1{{.*}})>>> (%{{.*}} : index, %{{.*}} : index) = (%{{.*}}, %{{.*}} : index, index) to (%{{.*}}, %{{.*}} : index, index) step (%{{.*}}, %{{.*}} : index, index)
+! CHECK: fir.cuda_kernel<<<%c1{{.*}}, (%c256{{.*}}, %c1{{.*}})>>> (%[[ARG0:.*]] : index, %[[ARG1:.*]] : index) = (%{{.*}}, %{{.*}} : index, index) to (%{{.*}}, %{{.*}} : index, index) step (%{{.*}}, %{{.*}} : index, index)
+! CHECK: %[[ARG0_I32:.*]] = fir.convert %[[ARG0]] : (index) -> i32
+! CHECK: fir.store %[[ARG0_I32]] to %[[IV]]#1 : !fir.ref<i32>
+! CHECK: %[[ARG1_I32:.*]] = fir.convert %[[ARG1]] : (index) -> i32
+! CHECK: fir.store %[[ARG1_I32]] to %[[IV_J]]#1 : !fir.ref<i32>
! CHECK: {n = 2 : i64}
!$cuf kernel do(2) <<< (1,*), (256,1) >>>
More information about the flang-commits
mailing list