[flang-commits] [flang] [flang][cuda] Fix lowering of cuf kernel with unstructured nested construct (PR #107149)

Valentin Clement バレンタイン クレメン via flang-commits flang-commits at lists.llvm.org
Tue Sep 3 13:17:22 PDT 2024


https://github.com/clementval created https://github.com/llvm/llvm-project/pull/107149

Lowering was crashing when cuf kernels has an unstructured construct. Blocks created by PFT need to be re-created inside of the operation like it is done for OpenACC construct. 

>From 9b897c94a1875fb06cb4405b2d424aae517bf08d Mon Sep 17 00:00:00 2001
From: Valentin Clement <clementval at gmail.com>
Date: Tue, 3 Sep 2024 13:15:09 -0700
Subject: [PATCH] [flang][cuda] Fix lowering of cuf kernel with unstructured
 nested construct

---
 flang/lib/Lower/Bridge.cpp                    |  8 +++++++-
 .../Lower/CUDA/cuda-kernel-loop-directive.cuf | 20 +++++++++++++++++++
 2 files changed, 27 insertions(+), 1 deletion(-)

diff --git a/flang/lib/Lower/Bridge.cpp b/flang/lib/Lower/Bridge.cpp
index e5ccf659c3f8ed..1f2724290b8852 100644
--- a/flang/lib/Lower/Bridge.cpp
+++ b/flang/lib/Lower/Bridge.cpp
@@ -11,6 +11,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "flang/Lower/Bridge.h"
+#include "DirectivesCommon.h"
 #include "flang/Common/Version.h"
 #include "flang/Lower/Allocatable.h"
 #include "flang/Lower/CallInterface.h"
@@ -2999,6 +3000,12 @@ class FirConverter : public Fortran::lower::AbstractConverter {
     mlir::Block &b = op.getRegion().back();
     builder->setInsertionPointToStart(&b);
 
+    Fortran::lower::pft::Evaluation *crtEval = &getEval();
+    if (crtEval->lowerAsUnstructured())
+      Fortran::lower::createEmptyRegionBlocks<fir::FirEndOp>(
+          *builder, crtEval->getNestedEvaluations());
+    builder->setInsertionPointToStart(&b);
+
     for (auto [arg, value] : llvm::zip(
              op.getLoopRegions().front()->front().getArguments(), ivValues)) {
       mlir::Value convArg =
@@ -3006,7 +3013,6 @@ class FirConverter : public Fortran::lower::AbstractConverter {
       builder->create<fir::StoreOp>(loc, convArg, value);
     }
 
-    Fortran::lower::pft::Evaluation *crtEval = &getEval();
     if (crtEval->lowerAsStructured()) {
       crtEval = &crtEval->getFirstNestedEvaluation();
       for (int64_t i = 1; i < nestedLoops; i++)
diff --git a/flang/test/Lower/CUDA/cuda-kernel-loop-directive.cuf b/flang/test/Lower/CUDA/cuda-kernel-loop-directive.cuf
index ba5d390df4785e..aac569b6eb35bd 100644
--- a/flang/test/Lower/CUDA/cuda-kernel-loop-directive.cuf
+++ b/flang/test/Lower/CUDA/cuda-kernel-loop-directive.cuf
@@ -78,3 +78,23 @@ end
 ! CHECK: %[[STREAM_LOAD:.*]] = fir.load %[[STREAM]]#0 : !fir.ref<i64>
 ! CHECK: %[[STREAM_I32:.*]] = fir.convert %[[STREAM_LOAD]] : (i64) -> i32
 ! CHECK: cuf.kernel<<<*, *, stream = %[[STREAM_I32]]>>>
+
+
+! Test lowering with unstructured construct inside.
+subroutine sub2(m,a,b)
+  integer :: m
+  real, device :: a(m,m), b(m)
+  integer :: i,j
+  !$cuf kernel do<<<*,*>>>
+  
+  do j = 1, m
+    i = 1
+    do while (a(i,j).eq.0)
+      i = i + 1
+    end do
+    b(j) = i
+  end do
+end subroutine
+
+! CHECK-LABEL: func.func @_QPsub2
+! CHECK: cuf.kernel



More information about the flang-commits mailing list