[flang-commits] [flang] [flang][cuda] Fix lowering of cuf kernel with unstructured nested construct (PR #107149)

via flang-commits flang-commits at lists.llvm.org
Tue Sep 3 13:17:57 PDT 2024


llvmbot wrote:


<!--LLVM PR SUMMARY COMMENT-->

@llvm/pr-subscribers-flang-fir-hlfir

Author: Valentin Clement (バレンタイン クレメン) (clementval)

<details>
<summary>Changes</summary>

Lowering was crashing when cuf kernels has an unstructured construct. Blocks created by PFT need to be re-created inside of the operation like it is done for OpenACC construct. 

---
Full diff: https://github.com/llvm/llvm-project/pull/107149.diff


2 Files Affected:

- (modified) flang/lib/Lower/Bridge.cpp (+7-1) 
- (modified) flang/test/Lower/CUDA/cuda-kernel-loop-directive.cuf (+20) 


``````````diff
diff --git a/flang/lib/Lower/Bridge.cpp b/flang/lib/Lower/Bridge.cpp
index e5ccf659c3f8ed..1f2724290b8852 100644
--- a/flang/lib/Lower/Bridge.cpp
+++ b/flang/lib/Lower/Bridge.cpp
@@ -11,6 +11,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "flang/Lower/Bridge.h"
+#include "DirectivesCommon.h"
 #include "flang/Common/Version.h"
 #include "flang/Lower/Allocatable.h"
 #include "flang/Lower/CallInterface.h"
@@ -2999,6 +3000,12 @@ class FirConverter : public Fortran::lower::AbstractConverter {
     mlir::Block &b = op.getRegion().back();
     builder->setInsertionPointToStart(&b);
 
+    Fortran::lower::pft::Evaluation *crtEval = &getEval();
+    if (crtEval->lowerAsUnstructured())
+      Fortran::lower::createEmptyRegionBlocks<fir::FirEndOp>(
+          *builder, crtEval->getNestedEvaluations());
+    builder->setInsertionPointToStart(&b);
+
     for (auto [arg, value] : llvm::zip(
              op.getLoopRegions().front()->front().getArguments(), ivValues)) {
       mlir::Value convArg =
@@ -3006,7 +3013,6 @@ class FirConverter : public Fortran::lower::AbstractConverter {
       builder->create<fir::StoreOp>(loc, convArg, value);
     }
 
-    Fortran::lower::pft::Evaluation *crtEval = &getEval();
     if (crtEval->lowerAsStructured()) {
       crtEval = &crtEval->getFirstNestedEvaluation();
       for (int64_t i = 1; i < nestedLoops; i++)
diff --git a/flang/test/Lower/CUDA/cuda-kernel-loop-directive.cuf b/flang/test/Lower/CUDA/cuda-kernel-loop-directive.cuf
index ba5d390df4785e..aac569b6eb35bd 100644
--- a/flang/test/Lower/CUDA/cuda-kernel-loop-directive.cuf
+++ b/flang/test/Lower/CUDA/cuda-kernel-loop-directive.cuf
@@ -78,3 +78,23 @@ end
 ! CHECK: %[[STREAM_LOAD:.*]] = fir.load %[[STREAM]]#0 : !fir.ref<i64>
 ! CHECK: %[[STREAM_I32:.*]] = fir.convert %[[STREAM_LOAD]] : (i64) -> i32
 ! CHECK: cuf.kernel<<<*, *, stream = %[[STREAM_I32]]>>>
+
+
+! Test lowering with unstructured construct inside.
+subroutine sub2(m,a,b)
+  integer :: m
+  real, device :: a(m,m), b(m)
+  integer :: i,j
+  !$cuf kernel do<<<*,*>>>
+  
+  do j = 1, m
+    i = 1
+    do while (a(i,j).eq.0)
+      i = i + 1
+    end do
+    b(j) = i
+  end do
+end subroutine
+
+! CHECK-LABEL: func.func @_QPsub2
+! CHECK: cuf.kernel

``````````

</details>


https://github.com/llvm/llvm-project/pull/107149


More information about the flang-commits mailing list