[flang-commits] [flang] 3163f7a - [flang][openacc] Add kernels construct lowering

Valentin Clement via flang-commits flang-commits at lists.llvm.org
Fri Apr 14 09:44:13 PDT 2023


Author: Valentin Clement
Date: 2023-04-14T09:44:07-07:00
New Revision: 3163f7ade2552c2eefd628f89e065eeeb5530915

URL: https://github.com/llvm/llvm-project/commit/3163f7ade2552c2eefd628f89e065eeeb5530915
DIFF: https://github.com/llvm/llvm-project/commit/3163f7ade2552c2eefd628f89e065eeeb5530915.diff

LOG: [flang][openacc] Add kernels construct lowering

Lower the parse tree to acc dialects operations. Make use
of the compute construct lowering.

Depends on D148277

Reviewed By: PeteSteinfeld

Differential Revision: https://reviews.llvm.org/D148278

Added: 
    flang/test/Lower/OpenACC/acc-kernels-loop.f90
    flang/test/Lower/OpenACC/acc-kernels.f90

Modified: 
    flang/lib/Lower/OpenACC.cpp

Removed: 
    


################################################################################
diff  --git a/flang/lib/Lower/OpenACC.cpp b/flang/lib/Lower/OpenACC.cpp
index 8c5f11923e5ac..48ce1bae1e77f 100644
--- a/flang/lib/Lower/OpenACC.cpp
+++ b/flang/lib/Lower/OpenACC.cpp
@@ -551,14 +551,15 @@ createComputeOp(Fortran::lower::AbstractConverter &converter,
   llvm::SmallVector<int32_t, 8> operandSegments;
   addOperand(operands, operandSegments, async);
   addOperands(operands, operandSegments, waitOperands);
-  if constexpr (std::is_same_v<Op, mlir::acc::ParallelOp>) {
+  if constexpr (!std::is_same_v<Op, mlir::acc::SerialOp>) {
     addOperand(operands, operandSegments, numGangs);
     addOperand(operands, operandSegments, numWorkers);
     addOperand(operands, operandSegments, vectorLength);
   }
   addOperand(operands, operandSegments, ifCond);
   addOperand(operands, operandSegments, selfCond);
-  addOperands(operands, operandSegments, reductionOperands);
+  if constexpr (!std::is_same_v<Op, mlir::acc::KernelsOp>)
+    addOperands(operands, operandSegments, reductionOperands);
   addOperands(operands, operandSegments, copyOperands);
   addOperands(operands, operandSegments, copyinOperands);
   addOperands(operands, operandSegments, copyinReadonlyOperands);
@@ -570,11 +571,18 @@ createComputeOp(Fortran::lower::AbstractConverter &converter,
   addOperands(operands, operandSegments, presentOperands);
   addOperands(operands, operandSegments, devicePtrOperands);
   addOperands(operands, operandSegments, attachOperands);
-  addOperands(operands, operandSegments, privateOperands);
-  addOperands(operands, operandSegments, firstprivateOperands);
+  if constexpr (!std::is_same_v<Op, mlir::acc::KernelsOp>) {
+    addOperands(operands, operandSegments, privateOperands);
+    addOperands(operands, operandSegments, firstprivateOperands);
+  }
 
-  Op computeOp = createRegionOp<Op, mlir::acc::YieldOp>(
-      firOpBuilder, currentLocation, operands, operandSegments);
+  Op computeOp;
+  if constexpr (std::is_same_v<Op, mlir::acc::KernelsOp>)
+    computeOp = createRegionOp<Op, mlir::acc::TerminatorOp>(
+        firOpBuilder, currentLocation, operands, operandSegments);
+  else
+    computeOp = createRegionOp<Op, mlir::acc::YieldOp>(
+        firOpBuilder, currentLocation, operands, operandSegments);
 
   if (addAsyncAttr)
     computeOp.setAsyncAttrAttr(firOpBuilder.getUnitAttr());
@@ -697,7 +705,8 @@ genACC(Fortran::lower::AbstractConverter &converter,
     createComputeOp<mlir::acc::SerialOp>(
         converter, currentLocation, semanticsContext, stmtCtx, accClauseList);
   } else if (blockDirective.v == llvm::acc::ACCD_kernels) {
-    TODO(currentLocation, "kernels construct lowering");
+    createComputeOp<mlir::acc::KernelsOp>(
+        converter, currentLocation, semanticsContext, stmtCtx, accClauseList);
   } else if (blockDirective.v == llvm::acc::ACCD_host_data) {
     TODO(currentLocation, "host_data construct lowering");
   }
@@ -720,7 +729,10 @@ genACC(Fortran::lower::AbstractConverter &converter,
   Fortran::lower::StatementContext stmtCtx;
 
   if (combinedDirective.v == llvm::acc::ACCD_kernels_loop) {
-    TODO(currentLocation, "OpenACC Kernels Loop construct not lowered yet!");
+    createComputeOp<mlir::acc::KernelsOp>(
+        converter, currentLocation, semanticsContext, stmtCtx, accClauseList);
+    createLoopOp(converter, currentLocation, semanticsContext, stmtCtx,
+                 accClauseList);
   } else if (combinedDirective.v == llvm::acc::ACCD_parallel_loop) {
     createComputeOp<mlir::acc::ParallelOp>(
         converter, currentLocation, semanticsContext, stmtCtx, accClauseList);

diff  --git a/flang/test/Lower/OpenACC/acc-kernels-loop.f90 b/flang/test/Lower/OpenACC/acc-kernels-loop.f90
new file mode 100644
index 0000000000000..e8f6b0d704292
--- /dev/null
+++ b/flang/test/Lower/OpenACC/acc-kernels-loop.f90
@@ -0,0 +1,684 @@
+! This test checks lowering of OpenACC kernels loop combined directive.
+
+! RUN: bbc -fopenacc -emit-fir %s -o - | FileCheck %s
+
+subroutine acc_kernels_loop
+  integer :: i, j
+
+  integer :: async = 1
+  integer :: wait1 = 1
+  integer :: wait2 = 2
+  integer :: numGangs = 1
+  integer :: numWorkers = 10
+  integer :: vectorLength = 128
+  logical :: ifCondition = .TRUE.
+  integer, parameter :: n = 10
+  real, dimension(n) :: a, b, c
+  real, dimension(n, n) :: d, e
+  real, pointer :: f, g
+
+  integer :: gangNum = 8
+  integer :: gangStatic = 8
+  integer :: vectorNum = 128
+  integer, parameter :: tileSize = 2
+
+!CHECK: [[A:%.*]] = fir.alloca !fir.array<10xf32> {{{.*}}uniq_name = "{{.*}}Ea"}
+!CHECK: [[B:%.*]] = fir.alloca !fir.array<10xf32> {{{.*}}uniq_name = "{{.*}}Eb"}
+!CHECK: [[C:%.*]] = fir.alloca !fir.array<10xf32> {{{.*}}uniq_name = "{{.*}}Ec"}
+!CHECK: [[F:%.*]] = fir.alloca !fir.box<!fir.ptr<f32>> {bindc_name = "f", uniq_name = "{{.*}}Ef"}
+!CHECK: [[G:%.*]] = fir.alloca !fir.box<!fir.ptr<f32>> {bindc_name = "g", uniq_name = "{{.*}}Eg"}
+!CHECK: [[IFCONDITION:%.*]] = fir.address_of(@{{.*}}ifcondition) : !fir.ref<!fir.logical<4>>
+
+  !$acc kernels loop
+  DO i = 1, n
+    a(i) = b(i)
+  END DO
+
+!CHECK:      acc.kernels {
+!CHECK:        acc.loop {
+!CHECK:          fir.do_loop
+!CHECK:          acc.yield
+!CHECK-NEXT:   }{{$}}
+!CHECK:        acc.terminator
+!CHECK-NEXT: }{{$}}
+
+  !$acc kernels loop async
+  DO i = 1, n
+    a(i) = b(i)
+  END DO
+  !$acc end kernels loop
+
+!CHECK:      acc.kernels {
+!CHECK:        acc.loop {
+!CHECK:          fir.do_loop
+!CHECK:          acc.yield
+!CHECK-NEXT:   }{{$}}
+!CHECK:        acc.terminator
+!CHECK-NEXT: } attributes {asyncAttr}
+
+  !$acc kernels loop async(1)
+  DO i = 1, n
+    a(i) = b(i)
+  END DO
+
+!CHECK:      [[ASYNC1:%.*]] = arith.constant 1 : i32
+!CHECK:      acc.kernels async([[ASYNC1]] : i32) {
+!CHECK:        acc.loop {
+!CHECK:          fir.do_loop
+!CHECK:          acc.yield
+!CHECK-NEXT:   }{{$}}
+!CHECK:        acc.terminator
+!CHECK-NEXT: }{{$}}
+
+  !$acc kernels loop async(async)
+  DO i = 1, n
+    a(i) = b(i)
+  END DO
+
+!CHECK:      [[ASYNC2:%.*]] = fir.load %{{.*}} : !fir.ref<i32>
+!CHECK:      acc.kernels async([[ASYNC2]] : i32) {
+!CHECK:        acc.loop {
+!CHECK:          fir.do_loop
+!CHECK:          acc.yield
+!CHECK-NEXT:   }{{$}}
+!CHECK:        acc.terminator
+!CHECK-NEXT: }{{$}}
+
+  !$acc kernels loop wait
+  DO i = 1, n
+    a(i) = b(i)
+  END DO
+
+!CHECK:      acc.kernels {
+!CHECK:        acc.loop {
+!CHECK:          fir.do_loop
+!CHECK:          acc.yield
+!CHECK-NEXT:   }{{$}}
+!CHECK:        acc.terminator
+!CHECK-NEXT: } attributes {waitAttr}
+
+  !$acc kernels loop wait(1)
+  DO i = 1, n
+    a(i) = b(i)
+  END DO
+
+!CHECK:      [[WAIT1:%.*]] = arith.constant 1 : i32
+!CHECK:      acc.kernels wait([[WAIT1]] : i32) {
+!CHECK:        acc.loop {
+!CHECK:          fir.do_loop
+!CHECK:          acc.yield
+!CHECK-NEXT:   }{{$}}
+!CHECK:        acc.terminator
+!CHECK-NEXT: }{{$}}
+
+  !$acc kernels loop wait(1, 2)
+  DO i = 1, n
+    a(i) = b(i)
+  END DO
+
+!CHECK:      [[WAIT2:%.*]] = arith.constant 1 : i32
+!CHECK:      [[WAIT3:%.*]] = arith.constant 2 : i32
+!CHECK:      acc.kernels wait([[WAIT2]], [[WAIT3]] : i32, i32) {
+!CHECK:        acc.loop {
+!CHECK:          fir.do_loop
+!CHECK:          acc.yield
+!CHECK-NEXT:   }{{$}}
+!CHECK:        acc.terminator
+!CHECK-NEXT: }{{$}}
+
+  !$acc kernels loop wait(wait1, wait2)
+  DO i = 1, n
+    a(i) = b(i)
+  END DO
+
+!CHECK:      [[WAIT4:%.*]] = fir.load %{{.*}} : !fir.ref<i32>
+!CHECK:      [[WAIT5:%.*]] = fir.load %{{.*}} : !fir.ref<i32>
+!CHECK:      acc.kernels wait([[WAIT4]], [[WAIT5]] : i32, i32) {
+!CHECK:        acc.loop {
+!CHECK:          fir.do_loop
+!CHECK:          acc.yield
+!CHECK-NEXT:   }{{$}}
+!CHECK:        acc.terminator
+!CHECK-NEXT: }{{$}}
+
+  !$acc kernels loop num_gangs(1)
+  DO i = 1, n
+    a(i) = b(i)
+  END DO
+
+!CHECK:      [[NUMGANGS1:%.*]] = arith.constant 1 : i32
+!CHECK:      acc.kernels num_gangs([[NUMGANGS1]] : i32) {
+!CHECK:        acc.loop {
+!CHECK:          fir.do_loop
+!CHECK:          acc.yield
+!CHECK-NEXT:   }{{$}}
+!CHECK:        acc.terminator
+!CHECK-NEXT: }{{$}}
+
+  !$acc kernels loop num_gangs(numGangs)
+  DO i = 1, n
+    a(i) = b(i)
+  END DO
+
+!CHECK:      [[NUMGANGS2:%.*]] = fir.load %{{.*}} : !fir.ref<i32>
+!CHECK:      acc.kernels num_gangs([[NUMGANGS2]] : i32) {
+!CHECK:        acc.loop {
+!CHECK:          fir.do_loop
+!CHECK:          acc.yield
+!CHECK-NEXT:   }{{$}}
+!CHECK:        acc.terminator
+!CHECK-NEXT: }{{$}}
+
+  !$acc kernels loop num_workers(10)
+  DO i = 1, n
+    a(i) = b(i)
+  END DO
+
+!CHECK:      [[NUMWORKERS1:%.*]] = arith.constant 10 : i32
+!CHECK:      acc.kernels num_workers([[NUMWORKERS1]] : i32) {
+!CHECK:        acc.loop {
+!CHECK:          fir.do_loop
+!CHECK:          acc.yield
+!CHECK-NEXT:   }{{$}}
+!CHECK:        acc.terminator
+!CHECK-NEXT: }{{$}}
+
+  !$acc kernels loop num_workers(numWorkers)
+  DO i = 1, n
+    a(i) = b(i)
+  END DO
+
+!CHECK:      [[NUMWORKERS2:%.*]] = fir.load %{{.*}} : !fir.ref<i32>
+!CHECK:      acc.kernels num_workers([[NUMWORKERS2]] : i32) {
+!CHECK:        acc.loop {
+!CHECK:          fir.do_loop
+!CHECK:          acc.yield
+!CHECK-NEXT:   }{{$}}
+!CHECK:        acc.terminator
+!CHECK-NEXT: }{{$}}
+
+  !$acc kernels loop vector_length(128)
+  DO i = 1, n
+    a(i) = b(i)
+  END DO
+
+!CHECK:      [[VECTORLENGTH1:%.*]] = arith.constant 128 : i32
+!CHECK:      acc.kernels vector_length([[VECTORLENGTH1]] : i32) {
+!CHECK:        acc.loop {
+!CHECK:          fir.do_loop
+!CHECK:          acc.yield
+!CHECK-NEXT:   }{{$}}
+!CHECK:        acc.terminator
+!CHECK-NEXT: }{{$}}
+
+  !$acc kernels loop vector_length(vectorLength)
+  DO i = 1, n
+    a(i) = b(i)
+  END DO
+
+!CHECK:      [[VECTORLENGTH2:%.*]] = fir.load %{{.*}} : !fir.ref<i32>
+!CHECK:      acc.kernels vector_length([[VECTORLENGTH2]] : i32) {
+!CHECK:        acc.loop {
+!CHECK:          fir.do_loop
+!CHECK:          acc.yield
+!CHECK-NEXT:   }{{$}}
+!CHECK:        acc.terminator
+!CHECK-NEXT: }{{$}}
+
+  !$acc kernels loop if(.TRUE.)
+  DO i = 1, n
+    a(i) = b(i)
+  END DO
+
+!CHECK:      [[IF1:%.*]] = arith.constant true
+!CHECK:      acc.kernels if([[IF1]]) {
+!CHECK:        acc.loop {
+!CHECK:          fir.do_loop
+!CHECK:          acc.yield
+!CHECK-NEXT:   }{{$}}
+!CHECK:        acc.terminator
+!CHECK-NEXT: }{{$}}
+
+  !$acc kernels loop if(ifCondition)
+  DO i = 1, n
+    a(i) = b(i)
+  END DO
+
+!CHECK:      [[IFCOND:%.*]] = fir.load %{{.*}} : !fir.ref<!fir.logical<4>>
+!CHECK:      [[IF2:%.*]] = fir.convert [[IFCOND]] : (!fir.logical<4>) -> i1
+!CHECK:      acc.kernels if([[IF2]]) {
+!CHECK:        acc.loop {
+!CHECK:          fir.do_loop
+!CHECK:          acc.yield
+!CHECK-NEXT:   }{{$}}
+!CHECK:        acc.terminator
+!CHECK-NEXT: }{{$}}
+
+  !$acc kernels loop self(.TRUE.)
+  DO i = 1, n
+    a(i) = b(i)
+  END DO
+
+!CHECK:      [[SELF1:%.*]] = arith.constant true
+!CHECK:      acc.kernels self([[SELF1]]) {
+!CHECK:        acc.loop {
+!CHECK:          fir.do_loop
+!CHECK:          acc.yield
+!CHECK-NEXT:   }{{$}}
+!CHECK:        acc.terminator
+!CHECK-NEXT: }{{$}}
+
+  !$acc kernels loop self
+  DO i = 1, n
+    a(i) = b(i)
+  END DO
+
+!CHECK:      acc.kernels {
+!CHECK:        acc.loop {
+!CHECK:          fir.do_loop
+!CHECK:          acc.yield
+!CHECK-NEXT:   }{{$}}
+!CHECK:        acc.terminator
+!CHECK-NEXT: } attributes {selfAttr}
+
+  !$acc kernels loop self(ifCondition)
+  DO i = 1, n
+    a(i) = b(i)
+  END DO
+
+!CHECK:      [[SELF2:%.*]] = fir.convert [[IFCONDITION]] : (!fir.ref<!fir.logical<4>>) -> i1
+!CHECK:      acc.kernels self([[SELF2]]) {
+!CHECK:        acc.loop {
+!CHECK:          fir.do_loop
+!CHECK:          acc.yield
+!CHECK-NEXT:   }{{$}}
+!CHECK:        acc.terminator
+!CHECK-NEXT: }{{$}}
+
+  !$acc kernels loop copy(a, b)
+  DO i = 1, n
+    a(i) = b(i)
+  END DO
+
+!CHECK:      acc.kernels copy([[A]], [[B]] : !fir.ref<!fir.array<10xf32>>, !fir.ref<!fir.array<10xf32>>) {
+!CHECK:        acc.loop {
+!CHECK:          fir.do_loop
+!CHECK:          acc.yield
+!CHECK-NEXT:   }{{$}}
+!CHECK:        acc.terminator
+!CHECK-NEXT: }{{$}}
+
+  !$acc kernels loop copy(a) copy(b)
+  DO i = 1, n
+    a(i) = b(i)
+  END DO
+
+!CHECK:      acc.kernels copy([[A]], [[B]] : !fir.ref<!fir.array<10xf32>>, !fir.ref<!fir.array<10xf32>>) {
+!CHECK:        acc.loop {
+!CHECK:          fir.do_loop
+!CHECK:          acc.yield
+!CHECK-NEXT:   }{{$}}
+!CHECK:        acc.terminator
+!CHECK-NEXT: }{{$}}
+
+  !$acc kernels loop copyin(a) copyin(readonly: b)
+  DO i = 1, n
+    a(i) = b(i)
+  END DO
+
+!CHECK:      acc.kernels copyin([[A]] : !fir.ref<!fir.array<10xf32>>) copyin_readonly([[B]] : !fir.ref<!fir.array<10xf32>>) {
+!CHECK:        acc.loop {
+!CHECK:          fir.do_loop
+!CHECK:          acc.yield
+!CHECK-NEXT:   }{{$}}
+!CHECK:        acc.terminator
+!CHECK-NEXT: }{{$}}
+
+  !$acc kernels loop copyout(a) copyout(zero: b)
+  DO i = 1, n
+    a(i) = b(i)
+  END DO
+
+!CHECK:      acc.kernels copyout([[A]] : !fir.ref<!fir.array<10xf32>>) copyout_zero([[B]] : !fir.ref<!fir.array<10xf32>>) {
+!CHECK:        acc.loop {
+!CHECK:          fir.do_loop
+!CHECK:          acc.yield
+!CHECK-NEXT:   }{{$}}
+!CHECK:        acc.terminator
+!CHECK-NEXT: }{{$}}
+
+  !$acc kernels loop create(b) create(zero: a)
+  DO i = 1, n
+    a(i) = b(i)
+  END DO
+
+!CHECK:      acc.kernels create([[B]] : !fir.ref<!fir.array<10xf32>>) create_zero([[A]] : !fir.ref<!fir.array<10xf32>>) {
+!CHECK:        acc.loop {
+!CHECK:          fir.do_loop
+!CHECK:          acc.yield
+!CHECK-NEXT:   }{{$}}
+!CHECK:        acc.terminator
+!CHECK-NEXT: }{{$}}
+
+  !$acc kernels loop no_create(a, b)
+  DO i = 1, n
+    a(i) = b(i)
+  END DO
+
+!CHECK:      acc.kernels no_create([[A]], [[B]] : !fir.ref<!fir.array<10xf32>>, !fir.ref<!fir.array<10xf32>>) {
+!CHECK:        acc.loop {
+!CHECK:          fir.do_loop
+!CHECK:          acc.yield
+!CHECK-NEXT:   }{{$}}
+!CHECK:        acc.terminator
+!CHECK-NEXT: }{{$}}
+
+  !$acc kernels loop present(a, b)
+  DO i = 1, n
+    a(i) = b(i)
+  END DO
+
+!CHECK:      acc.kernels present([[A]], [[B]] : !fir.ref<!fir.array<10xf32>>, !fir.ref<!fir.array<10xf32>>) {
+!CHECK:        acc.loop {
+!CHECK:          fir.do_loop
+!CHECK:          acc.yield
+!CHECK-NEXT:   }{{$}}
+!CHECK:        acc.terminator
+!CHECK-NEXT: }{{$}}
+
+  !$acc kernels loop deviceptr(a) deviceptr(b)
+  DO i = 1, n
+    a(i) = b(i)
+  END DO
+
+!CHECK:      acc.kernels deviceptr([[A]], [[B]] : !fir.ref<!fir.array<10xf32>>, !fir.ref<!fir.array<10xf32>>) {
+!CHECK:        acc.loop {
+!CHECK:          fir.do_loop
+!CHECK:          acc.yield
+!CHECK-NEXT:   }{{$}}
+!CHECK:        acc.terminator
+!CHECK-NEXT: }{{$}}
+
+  !$acc kernels loop attach(f, g)
+  DO i = 1, n
+    a(i) = b(i)
+  END DO
+
+!CHECK:      acc.kernels attach([[F]], [[G]] : !fir.ref<!fir.box<!fir.ptr<f32>>>, !fir.ref<!fir.box<!fir.ptr<f32>>>) {
+!CHECK:        acc.loop {
+!CHECK:          fir.do_loop
+!CHECK:          acc.yield
+!CHECK-NEXT:   }{{$}}
+!CHECK:        acc.terminator
+!CHECK-NEXT: }{{$}}
+
+  !$acc kernels loop seq
+  DO i = 1, n
+    a(i) = b(i)
+  END DO
+
+!CHECK:      acc.kernels {
+!CHECK:        acc.loop {
+!CHECK:          fir.do_loop
+!CHECK:          acc.yield
+!CHECK-NEXT:   } attributes {seq}
+!CHECK:        acc.terminator
+!CHECK-NEXT: }{{$}}
+
+  !$acc kernels loop auto
+  DO i = 1, n
+    a(i) = b(i)
+  END DO
+
+!CHECK:      acc.kernels {
+!CHECK:        acc.loop {
+!CHECK:          fir.do_loop
+!CHECK:          acc.yield
+!CHECK-NEXT:   } attributes {auto}
+!CHECK:        acc.terminator
+!CHECK-NEXT: }{{$}}
+
+  !$acc kernels loop independent
+  DO i = 1, n
+    a(i) = b(i)
+  END DO
+
+!CHECK:      acc.kernels {
+!CHECK:        acc.loop {
+!CHECK:          fir.do_loop
+!CHECK:          acc.yield
+!CHECK-NEXT:   } attributes {independent}
+!CHECK:        acc.terminator
+!CHECK-NEXT: }{{$}}
+
+  !$acc kernels loop gang
+  DO i = 1, n
+    a(i) = b(i)
+  END DO
+
+!CHECK:      acc.kernels {
+!CHECK:        acc.loop gang {
+!CHECK:          fir.do_loop
+!CHECK:          acc.yield
+!CHECK-NEXT:   }{{$}}
+!CHECK:        acc.terminator
+!CHECK-NEXT: }{{$}}
+
+  !$acc kernels loop gang(num: 8)
+  DO i = 1, n
+    a(i) = b(i)
+  END DO
+
+!CHECK:      acc.kernels {
+!CHECK:        [[GANGNUM1:%.*]] = arith.constant 8 : i32
+!CHECK-NEXT:   acc.loop gang(num=[[GANGNUM1]]: i32) {
+!CHECK:          fir.do_loop
+!CHECK:          acc.yield
+!CHECK-NEXT:   }{{$}}
+!CHECK:        acc.terminator
+!CHECK-NEXT: }{{$}}
+
+  !$acc kernels loop gang(num: gangNum)
+  DO i = 1, n
+    a(i) = b(i)
+  END DO
+
+!CHECK:      acc.kernels {
+!CHECK:        [[GANGNUM2:%.*]] = fir.load %{{.*}} : !fir.ref<i32>
+!CHECK-NEXT:   acc.loop gang(num=[[GANGNUM2]]: i32) {
+!CHECK:          fir.do_loop
+!CHECK:          acc.yield
+!CHECK-NEXT:   }{{$}}
+!CHECK:        acc.terminator
+!CHECK-NEXT: }{{$}}
+
+ !$acc kernels loop gang(num: gangNum, static: gangStatic)
+  DO i = 1, n
+    a(i) = b(i)
+  END DO
+
+!CHECK:      acc.kernels {
+!CHECK:        acc.loop gang(num=%{{.*}}: i32, static=%{{.*}}: i32) {
+!CHECK:          fir.do_loop
+!CHECK:          acc.yield
+!CHECK-NEXT:   }{{$}}
+!CHECK:        acc.terminator
+!CHECK-NEXT: }{{$}}
+
+  !$acc kernels loop vector
+  DO i = 1, n
+    a(i) = b(i)
+  END DO
+!CHECK:      acc.kernels {
+!CHECK:        acc.loop vector {
+!CHECK:          fir.do_loop
+!CHECK:          acc.yield
+!CHECK-NEXT:   }{{$}}
+!CHECK:        acc.terminator
+!CHECK-NEXT: }{{$}}
+
+  !$acc kernels loop vector(128)
+  DO i = 1, n
+    a(i) = b(i)
+  END DO
+
+!CHECK:      acc.kernels {
+!CHECK:        [[CONSTANT128:%.*]] = arith.constant 128 : i32
+!CHECK:        acc.loop vector([[CONSTANT128]]: i32) {
+!CHECK:          fir.do_loop
+!CHECK:          acc.yield
+!CHECK-NEXT:   }{{$}}
+!CHECK:        acc.terminator
+!CHECK-NEXT: }{{$}}
+
+  !$acc kernels loop vector(vectorLength)
+  DO i = 1, n
+    a(i) = b(i)
+  END DO
+
+!CHECK:      acc.kernels {
+!CHECK:        [[VECTORLENGTH:%.*]] = fir.load %{{.*}} : !fir.ref<i32>
+!CHECK:        acc.loop vector([[VECTORLENGTH]]: i32) {
+!CHECK:          fir.do_loop
+!CHECK:          acc.yield
+!CHECK-NEXT:   }{{$}}
+!CHECK:        acc.terminator
+!CHECK-NEXT: }{{$}}
+
+  !$acc kernels loop worker
+  DO i = 1, n
+    a(i) = b(i)
+  END DO
+
+!CHECK:      acc.kernels {
+!CHECK:        acc.loop worker {
+!CHECK:          fir.do_loop
+!CHECK:          acc.yield
+!CHECK-NEXT:   }{{$}}
+!CHECK:        acc.terminator
+!CHECK-NEXT: }{{$}}
+
+  !$acc kernels loop worker(128)
+  DO i = 1, n
+    a(i) = b(i)
+  END DO
+
+!CHECK:      acc.kernels {
+!CHECK:        [[WORKER128:%.*]] = arith.constant 128 : i32
+!CHECK:        acc.loop worker([[WORKER128]]: i32) {
+!CHECK:          fir.do_loop
+!CHECK:          acc.yield
+!CHECK-NEXT:   }{{$}}
+!CHECK:        acc.terminator
+!CHECK-NEXT: }{{$}}
+
+  !$acc kernels loop collapse(2)
+  DO i = 1, n
+    DO j = 1, n
+      d(i, j) = e(i, j)
+    END DO
+  END DO
+
+!CHECK:      acc.kernels {
+!CHECK:        acc.loop {
+!CHECK:          fir.do_loop
+!CHECK:            fir.do_loop
+!CHECK:          acc.yield
+!CHECK-NEXT:   } attributes {collapse = 2 : i64}
+!CHECK:        acc.terminator
+!CHECK-NEXT: }{{$}}
+
+  !$acc kernels loop
+  DO i = 1, n
+    !$acc loop
+    DO j = 1, n
+      d(i, j) = e(i, j)
+    END DO
+  END DO
+
+!CHECK:      acc.kernels {
+!CHECK:        acc.loop {
+!CHECK:          fir.do_loop
+!CHECK:            acc.loop {
+!CHECK:              fir.do_loop
+!CHECK:              acc.yield
+!CHECK-NEXT:     }{{$}}
+!CHECK:          acc.yield
+!CHECK-NEXT:   }{{$}}
+!CHECK:        acc.terminator
+!CHECK-NEXT: }{{$}}
+
+ !$acc kernels loop tile(2)
+  DO i = 1, n
+    a(i) = b(i)
+  END DO
+
+!CHECK:      acc.kernels {
+!CHECK:        [[TILESIZE:%.*]] = arith.constant 2 : i32
+!CHECK:        acc.loop tile([[TILESIZE]]: i32) {
+!CHECK:          fir.do_loop
+!CHECK:          acc.yield
+!CHECK-NEXT:   }{{$}}
+!CHECK:        acc.terminator
+!CHECK-NEXT: }{{$}}
+
+ !$acc kernels loop tile(*)
+  DO i = 1, n
+    a(i) = b(i)
+  END DO
+
+!CHECK:      acc.kernels {
+!CHECK:        [[TILESIZEM1:%.*]] = arith.constant -1 : i32
+!CHECK:        acc.loop tile([[TILESIZEM1]]: i32) {
+!CHECK:          fir.do_loop
+!CHECK:          acc.yield
+!CHECK-NEXT:   }{{$}}
+!CHECK:        acc.terminator
+!CHECK-NEXT: }{{$}}
+
+  !$acc kernels loop tile(2, 2)
+  DO i = 1, n
+    DO j = 1, n
+      d(i, j) = e(i, j)
+    END DO
+  END DO
+
+!CHECK:      acc.kernels {
+!CHECK:        [[TILESIZE1:%.*]] = arith.constant 2 : i32
+!CHECK:        [[TILESIZE2:%.*]] = arith.constant 2 : i32
+!CHECK:        acc.loop tile([[TILESIZE1]]: i32, [[TILESIZE2]]: i32) {
+!CHECK:          fir.do_loop
+!CHECK:          acc.yield
+!CHECK-NEXT:   }{{$}}
+!CHECK:        acc.terminator
+!CHECK-NEXT: }{{$}}
+
+  !$acc kernels loop tile(tileSize)
+  DO i = 1, n
+    a(i) = b(i)
+  END DO
+
+!CHECK:      acc.kernels {
+!CHECK:        acc.loop tile(%{{.*}}: i32) {
+!CHECK:          fir.do_loop
+!CHECK:          acc.yield
+!CHECK-NEXT:   }{{$}}
+!CHECK:        acc.terminator
+!CHECK-NEXT: }{{$}}
+
+  !$acc kernels loop tile(tileSize, tileSize)
+  DO i = 1, n
+    DO j = 1, n
+      d(i, j) = e(i, j)
+    END DO
+  END DO
+
+!CHECK:      acc.kernels {
+!CHECK:        acc.loop tile(%{{.*}}: i32, %{{.*}}: i32) {
+!CHECK:          fir.do_loop
+!CHECK:          acc.yield
+!CHECK-NEXT:   }{{$}}
+!CHECK:        acc.terminator
+!CHECK-NEXT: }{{$}}
+
+end subroutine

diff  --git a/flang/test/Lower/OpenACC/acc-kernels.f90 b/flang/test/Lower/OpenACC/acc-kernels.f90
new file mode 100644
index 0000000000000..65730fba16817
--- /dev/null
+++ b/flang/test/Lower/OpenACC/acc-kernels.f90
@@ -0,0 +1,239 @@
+! This test checks lowering of OpenACC kernels construct.
+
+! RUN: bbc -fopenacc -emit-fir %s -o - | FileCheck %s
+
+subroutine acc_kernels
+  integer :: i, j
+
+  integer :: async = 1
+  integer :: wait1 = 1
+  integer :: wait2 = 2
+  integer :: numGangs = 1
+  integer :: numWorkers = 10
+  integer :: vectorLength = 128
+  logical :: ifCondition = .TRUE.
+  real, dimension(10, 10) :: a, b, c
+  real, pointer :: d, e
+
+!CHECK: [[A:%.*]] = fir.alloca !fir.array<10x10xf32> {{{.*}}uniq_name = "{{.*}}Ea"}
+!CHECK: [[B:%.*]] = fir.alloca !fir.array<10x10xf32> {{{.*}}uniq_name = "{{.*}}Eb"}
+!CHECK: [[C:%.*]] = fir.alloca !fir.array<10x10xf32> {{{.*}}uniq_name = "{{.*}}Ec"}
+!CHECK: [[D:%.*]] = fir.alloca !fir.box<!fir.ptr<f32>> {bindc_name = "d", uniq_name = "{{.*}}Ed"}
+!CHECK: [[E:%.*]] = fir.alloca !fir.box<!fir.ptr<f32>> {bindc_name = "e", uniq_name = "{{.*}}Ee"}
+!CHECK: [[IFCONDITION:%.*]] = fir.address_of(@{{.*}}ifcondition) : !fir.ref<!fir.logical<4>>
+
+  !$acc kernels
+  !$acc end kernels
+
+!CHECK:      acc.kernels  {
+!CHECK:        acc.terminator
+!CHECK-NEXT: }{{$}}
+
+  !$acc kernels async
+  !$acc end kernels
+
+!CHECK:      acc.kernels  {
+!CHECK:        acc.terminator
+!CHECK-NEXT: } attributes {asyncAttr}
+
+  !$acc kernels async(1)
+  !$acc end kernels
+
+!CHECK:      [[ASYNC1:%.*]] = arith.constant 1 : i32
+!CHECK:      acc.kernels  async([[ASYNC1]] : i32) {
+!CHECK:        acc.terminator
+!CHECK-NEXT: }{{$}}
+
+  !$acc kernels async(async)
+  !$acc end kernels
+
+!CHECK:      [[ASYNC2:%.*]] = fir.load %{{.*}} : !fir.ref<i32>
+!CHECK:      acc.kernels  async([[ASYNC2]] : i32) {
+!CHECK:        acc.terminator
+!CHECK-NEXT: }{{$}}
+
+  !$acc kernels wait
+  !$acc end kernels
+
+!CHECK:      acc.kernels  {
+!CHECK:        acc.terminator
+!CHECK-NEXT: } attributes {waitAttr}
+
+  !$acc kernels wait(1)
+  !$acc end kernels
+
+!CHECK:      [[WAIT1:%.*]] = arith.constant 1 : i32
+!CHECK:      acc.kernels  wait([[WAIT1]] : i32) {
+!CHECK:        acc.terminator
+!CHECK-NEXT: }{{$}}
+
+  !$acc kernels wait(1, 2)
+  !$acc end kernels
+
+!CHECK:      [[WAIT2:%.*]] = arith.constant 1 : i32
+!CHECK:      [[WAIT3:%.*]] = arith.constant 2 : i32
+!CHECK:      acc.kernels  wait([[WAIT2]], [[WAIT3]] : i32, i32) {
+!CHECK:        acc.terminator
+!CHECK-NEXT: }{{$}}
+
+  !$acc kernels wait(wait1, wait2)
+  !$acc end kernels
+
+!CHECK:      [[WAIT4:%.*]] = fir.load %{{.*}} : !fir.ref<i32>
+!CHECK:      [[WAIT5:%.*]] = fir.load %{{.*}} : !fir.ref<i32>
+!CHECK:      acc.kernels  wait([[WAIT4]], [[WAIT5]] : i32, i32) {
+!CHECK:        acc.terminator
+!CHECK-NEXT: }{{$}}
+
+  !$acc kernels num_gangs(1)
+  !$acc end kernels
+
+!CHECK:      [[NUMGANGS1:%.*]] = arith.constant 1 : i32
+!CHECK:      acc.kernels  num_gangs([[NUMGANGS1]] : i32) {
+!CHECK:        acc.terminator
+!CHECK-NEXT: }{{$}}
+
+  !$acc kernels num_gangs(numGangs)
+  !$acc end kernels
+
+!CHECK:      [[NUMGANGS2:%.*]] = fir.load %{{.*}} : !fir.ref<i32>
+!CHECK:      acc.kernels  num_gangs([[NUMGANGS2]] : i32) {
+!CHECK:        acc.terminator
+!CHECK-NEXT: }{{$}}
+
+  !$acc kernels num_workers(10)
+  !$acc end kernels
+
+!CHECK:      [[NUMWORKERS1:%.*]] = arith.constant 10 : i32
+!CHECK:      acc.kernels  num_workers([[NUMWORKERS1]] : i32) {
+!CHECK:        acc.terminator
+!CHECK-NEXT: }{{$}}
+
+  !$acc kernels num_workers(numWorkers)
+  !$acc end kernels
+
+!CHECK:      [[NUMWORKERS2:%.*]] = fir.load %{{.*}} : !fir.ref<i32>
+!CHECK:      acc.kernels  num_workers([[NUMWORKERS2]] : i32) {
+!CHECK:        acc.terminator
+!CHECK-NEXT: }{{$}}
+
+  !$acc kernels vector_length(128)
+  !$acc end kernels
+
+!CHECK:      [[VECTORLENGTH1:%.*]] = arith.constant 128 : i32
+!CHECK:      acc.kernels  vector_length([[VECTORLENGTH1]] : i32) {
+!CHECK:        acc.terminator
+!CHECK-NEXT: }{{$}}
+
+  !$acc kernels vector_length(vectorLength)
+  !$acc end kernels
+
+!CHECK:      [[VECTORLENGTH2:%.*]] = fir.load %{{.*}} : !fir.ref<i32>
+!CHECK:      acc.kernels  vector_length([[VECTORLENGTH2]] : i32) {
+!CHECK:        acc.terminator
+!CHECK-NEXT: }{{$}}
+
+  !$acc kernels if(.TRUE.)
+  !$acc end kernels
+
+!CHECK:      [[IF1:%.*]] = arith.constant true
+!CHECK:      acc.kernels  if([[IF1]]) {
+!CHECK:        acc.terminator
+!CHECK-NEXT: }{{$}}
+
+  !$acc kernels if(ifCondition)
+  !$acc end kernels
+
+!CHECK:      [[IFCOND:%.*]] = fir.load %{{.*}} : !fir.ref<!fir.logical<4>>
+!CHECK:      [[IF2:%.*]] = fir.convert [[IFCOND]] : (!fir.logical<4>) -> i1
+!CHECK:      acc.kernels  if([[IF2]]) {
+!CHECK:        acc.terminator
+!CHECK-NEXT: }{{$}}
+
+  !$acc kernels self(.TRUE.)
+  !$acc end kernels
+
+!CHECK:      [[SELF1:%.*]] = arith.constant true
+!CHECK:      acc.kernels  self([[SELF1]]) {
+!CHECK:        acc.terminator
+!CHECK-NEXT: }{{$}}
+
+  !$acc kernels self
+  !$acc end kernels
+
+!CHECK:      acc.kernels  {
+!CHECK:        acc.terminator
+!CHECK-NEXT: } attributes {selfAttr}
+
+  !$acc kernels self(ifCondition)
+  !$acc end kernels
+
+!CHECK:      [[SELF2:%.*]] = fir.convert [[IFCONDITION]] : (!fir.ref<!fir.logical<4>>) -> i1
+!CHECK:      acc.kernels  self([[SELF2]]) {
+!CHECK:        acc.terminator
+!CHECK-NEXT: }{{$}}
+
+  !$acc kernels copy(a, b, c)
+  !$acc end kernels
+
+!CHECK:      acc.kernels  copy([[A]], [[B]], [[C]] : !fir.ref<!fir.array<10x10xf32>>, !fir.ref<!fir.array<10x10xf32>>, !fir.ref<!fir.array<10x10xf32>>) {
+!CHECK:        acc.terminator
+!CHECK-NEXT: }{{$}}
+
+  !$acc kernels copy(a) copy(b) copy(c)
+  !$acc end kernels
+
+!CHECK:      acc.kernels  copy([[A]], [[B]], [[C]] : !fir.ref<!fir.array<10x10xf32>>, !fir.ref<!fir.array<10x10xf32>>, !fir.ref<!fir.array<10x10xf32>>) {
+!CHECK:        acc.terminator
+!CHECK-NEXT: }{{$}}
+
+  !$acc kernels copyin(a) copyin(readonly: b, c)
+  !$acc end kernels
+
+!CHECK:      acc.kernels  copyin([[A]] : !fir.ref<!fir.array<10x10xf32>>) copyin_readonly([[B]], [[C]] : !fir.ref<!fir.array<10x10xf32>>, !fir.ref<!fir.array<10x10xf32>>) {
+!CHECK:        acc.terminator
+!CHECK-NEXT: }{{$}}
+
+  !$acc kernels copyout(a) copyout(zero: b) copyout(c)
+  !$acc end kernels
+
+!CHECK:      acc.kernels  copyout([[A]], [[C]] : !fir.ref<!fir.array<10x10xf32>>, !fir.ref<!fir.array<10x10xf32>>) copyout_zero([[B]] : !fir.ref<!fir.array<10x10xf32>>) {
+!CHECK:        acc.terminator
+!CHECK-NEXT: }{{$}}
+
+  !$acc kernels create(a, b) create(zero: c)
+  !$acc end kernels
+
+!CHECK:      acc.kernels  create([[A]], [[B]] : !fir.ref<!fir.array<10x10xf32>>, !fir.ref<!fir.array<10x10xf32>>) create_zero([[C]] : !fir.ref<!fir.array<10x10xf32>>) {
+!CHECK:        acc.terminator
+!CHECK-NEXT: }{{$}}
+
+  !$acc kernels no_create(a, b) create(zero: c)
+  !$acc end kernels
+
+!CHECK:      acc.kernels  create_zero([[C]] : !fir.ref<!fir.array<10x10xf32>>) no_create([[A]], [[B]] : !fir.ref<!fir.array<10x10xf32>>, !fir.ref<!fir.array<10x10xf32>>) {
+!CHECK:        acc.terminator
+!CHECK-NEXT: }{{$}}
+
+  !$acc kernels present(a, b, c)
+  !$acc end kernels
+
+!CHECK:      acc.kernels  present([[A]], [[B]], [[C]] : !fir.ref<!fir.array<10x10xf32>>, !fir.ref<!fir.array<10x10xf32>>, !fir.ref<!fir.array<10x10xf32>>) {
+!CHECK:        acc.terminator
+!CHECK-NEXT: }{{$}}
+
+  !$acc kernels deviceptr(a) deviceptr(c)
+  !$acc end kernels
+
+!CHECK:      acc.kernels  deviceptr([[A]], [[C]] : !fir.ref<!fir.array<10x10xf32>>, !fir.ref<!fir.array<10x10xf32>>) {
+!CHECK:        acc.terminator
+!CHECK-NEXT: }{{$}}
+
+  !$acc kernels attach(d, e)
+  !$acc end kernels
+
+!CHECK:      acc.kernels  attach([[D]], [[E]] : !fir.ref<!fir.box<!fir.ptr<f32>>>, !fir.ref<!fir.box<!fir.ptr<f32>>>) {
+!CHECK:        acc.terminator
+!CHECK-NEXT: }{{$}}
+
+end subroutine acc_kernels


        


More information about the flang-commits mailing list