[flang-commits] [flang] [flang][cuda] Fix lowering of cuf kernel with unstructured nested construct (PR #107149)
via flang-commits
flang-commits at lists.llvm.org
Tue Sep 3 13:17:57 PDT 2024
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-flang-fir-hlfir
Author: Valentin Clement (バレンタイン クレメン) (clementval)
<details>
<summary>Changes</summary>
Lowering was crashing when cuf kernels has an unstructured construct. Blocks created by PFT need to be re-created inside of the operation like it is done for OpenACC construct.
---
Full diff: https://github.com/llvm/llvm-project/pull/107149.diff
2 Files Affected:
- (modified) flang/lib/Lower/Bridge.cpp (+7-1)
- (modified) flang/test/Lower/CUDA/cuda-kernel-loop-directive.cuf (+20)
``````````diff
diff --git a/flang/lib/Lower/Bridge.cpp b/flang/lib/Lower/Bridge.cpp
index e5ccf659c3f8ed..1f2724290b8852 100644
--- a/flang/lib/Lower/Bridge.cpp
+++ b/flang/lib/Lower/Bridge.cpp
@@ -11,6 +11,7 @@
//===----------------------------------------------------------------------===//
#include "flang/Lower/Bridge.h"
+#include "DirectivesCommon.h"
#include "flang/Common/Version.h"
#include "flang/Lower/Allocatable.h"
#include "flang/Lower/CallInterface.h"
@@ -2999,6 +3000,12 @@ class FirConverter : public Fortran::lower::AbstractConverter {
mlir::Block &b = op.getRegion().back();
builder->setInsertionPointToStart(&b);
+ Fortran::lower::pft::Evaluation *crtEval = &getEval();
+ if (crtEval->lowerAsUnstructured())
+ Fortran::lower::createEmptyRegionBlocks<fir::FirEndOp>(
+ *builder, crtEval->getNestedEvaluations());
+ builder->setInsertionPointToStart(&b);
+
for (auto [arg, value] : llvm::zip(
op.getLoopRegions().front()->front().getArguments(), ivValues)) {
mlir::Value convArg =
@@ -3006,7 +3013,6 @@ class FirConverter : public Fortran::lower::AbstractConverter {
builder->create<fir::StoreOp>(loc, convArg, value);
}
- Fortran::lower::pft::Evaluation *crtEval = &getEval();
if (crtEval->lowerAsStructured()) {
crtEval = &crtEval->getFirstNestedEvaluation();
for (int64_t i = 1; i < nestedLoops; i++)
diff --git a/flang/test/Lower/CUDA/cuda-kernel-loop-directive.cuf b/flang/test/Lower/CUDA/cuda-kernel-loop-directive.cuf
index ba5d390df4785e..aac569b6eb35bd 100644
--- a/flang/test/Lower/CUDA/cuda-kernel-loop-directive.cuf
+++ b/flang/test/Lower/CUDA/cuda-kernel-loop-directive.cuf
@@ -78,3 +78,23 @@ end
! CHECK: %[[STREAM_LOAD:.*]] = fir.load %[[STREAM]]#0 : !fir.ref<i64>
! CHECK: %[[STREAM_I32:.*]] = fir.convert %[[STREAM_LOAD]] : (i64) -> i32
! CHECK: cuf.kernel<<<*, *, stream = %[[STREAM_I32]]>>>
+
+
+! Test lowering with unstructured construct inside.
+subroutine sub2(m,a,b)
+ integer :: m
+ real, device :: a(m,m), b(m)
+ integer :: i,j
+ !$cuf kernel do<<<*,*>>>
+
+ do j = 1, m
+ i = 1
+ do while (a(i,j).eq.0)
+ i = i + 1
+ end do
+ b(j) = i
+ end do
+end subroutine
+
+! CHECK-LABEL: func.func @_QPsub2
+! CHECK: cuf.kernel
``````````
</details>
https://github.com/llvm/llvm-project/pull/107149
More information about the flang-commits
mailing list