[flang-commits] [flang] d7ab38f - [flang][OpenACC] Lower parallel loop

Valentin Clement via flang-commits flang-commits at lists.llvm.org
Fri Jun 24 12:06:27 PDT 2022


Author: Valentin Clement
Date: 2022-06-24T21:06:21+02:00
New Revision: d7ab38f8f4d4a21cd09f62aed897cc02d174d57b

URL: https://github.com/llvm/llvm-project/commit/d7ab38f8f4d4a21cd09f62aed897cc02d174d57b
DIFF: https://github.com/llvm/llvm-project/commit/d7ab38f8f4d4a21cd09f62aed897cc02d174d57b.diff

LOG: [flang][OpenACC] Lower parallel loop

Lower the `parallel loop` contrsuct and refactor some of the code
of parallel and loop lowering to be reused.

Also add tests for loop and parallel since they were not upstreamed.

This patch is part of the upstreaming effort from fir-dev branch.

Reviewed By: PeteSteinfeld

Differential Revision: https://reviews.llvm.org/D128510

Added: 
    flang/test/Lower/OpenACC/acc-loop.f90
    flang/test/Lower/OpenACC/acc-parallel-loop.f90
    flang/test/Lower/OpenACC/acc-parallel.f90

Modified: 
    flang/lib/Lower/OpenACC.cpp

Removed: 
    


################################################################################
diff  --git a/flang/lib/Lower/OpenACC.cpp b/flang/lib/Lower/OpenACC.cpp
index 260db752d3e6..6172cac852a9 100644
--- a/flang/lib/Lower/OpenACC.cpp
+++ b/flang/lib/Lower/OpenACC.cpp
@@ -108,7 +108,7 @@ createRegionOp(fir::FirOpBuilder &builder, mlir::Location loc,
   llvm::ArrayRef<mlir::Type> argTy;
   Op op = builder.create<Op>(loc, argTy, operands);
   builder.createBlock(&op.getRegion());
-  auto &block = op.getRegion().back();
+  mlir::Block &block = op.getRegion().back();
   builder.setInsertionPointToStart(&block);
   builder.create<Terminator>(loc);
 
@@ -204,161 +204,166 @@ static void genWaitClause(Fortran::lower::AbstractConverter &converter,
   }
 }
 
-static void genACC(Fortran::lower::AbstractConverter &converter,
-                   Fortran::lower::pft::Evaluation &eval,
-                   const Fortran::parser::OpenACCLoopConstruct &loopConstruct) {
+static mlir::acc::LoopOp
+createLoopOp(Fortran::lower::AbstractConverter &converter,
+             const Fortran::parser::AccClauseList &accClauseList) {
+  fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder();
+  mlir::Location currentLocation = converter.getCurrentLocation();
   Fortran::lower::StatementContext stmtCtx;
-  const auto &beginLoopDirective =
-      std::get<Fortran::parser::AccBeginLoopDirective>(loopConstruct.t);
-  const auto &loopDirective =
-      std::get<Fortran::parser::AccLoopDirective>(beginLoopDirective.t);
 
-  if (loopDirective.v == llvm::acc::ACCD_loop) {
-    auto &firOpBuilder = converter.getFirOpBuilder();
-    auto currentLocation = converter.getCurrentLocation();
-
-    // Add attribute extracted from clauses.
-    const auto &accClauseList =
-        std::get<Fortran::parser::AccClauseList>(beginLoopDirective.t);
+  mlir::Value workerNum;
+  mlir::Value vectorNum;
+  mlir::Value gangNum;
+  mlir::Value gangStatic;
+  llvm::SmallVector<mlir::Value, 2> tileOperands, privateOperands,
+      reductionOperands;
+  std::int64_t executionMapping = mlir::acc::OpenACCExecMapping::NONE;
 
-    mlir::Value workerNum;
-    mlir::Value vectorLength;
-    mlir::Value gangNum;
-    mlir::Value gangStatic;
-    llvm::SmallVector<mlir::Value, 2> tileOperands, privateOperands,
-        reductionOperands;
-    std::int64_t executionMapping = mlir::acc::OpenACCExecMapping::NONE;
-
-    // Lower clauses values mapped to operands.
-    for (const auto &clause : accClauseList.v) {
-      if (const auto *gangClause =
-              std::get_if<Fortran::parser::AccClause::Gang>(&clause.u)) {
-        if (gangClause->v) {
-          const Fortran::parser::AccGangArgument &x = *gangClause->v;
-          if (const auto &gangNumValue =
-                  std::get<std::optional<Fortran::parser::ScalarIntExpr>>(
-                      x.t)) {
-            gangNum = fir::getBase(converter.genExprValue(
-                *Fortran::semantics::GetExpr(gangNumValue.value()), stmtCtx));
-          }
-          if (const auto &gangStaticValue =
-                  std::get<std::optional<Fortran::parser::AccSizeExpr>>(x.t)) {
-            const auto &expr =
-                std::get<std::optional<Fortran::parser::ScalarIntExpr>>(
-                    gangStaticValue.value().t);
-            if (expr) {
-              gangStatic = fir::getBase(converter.genExprValue(
-                  *Fortran::semantics::GetExpr(*expr), stmtCtx));
-            } else {
-              // * was passed as value and will be represented as a -1 constant
-              // integer.
-              gangStatic = firOpBuilder.createIntegerConstant(
-                  currentLocation, firOpBuilder.getIntegerType(32),
-                  /* STAR */ -1);
-            }
-          }
-        }
-        executionMapping |= mlir::acc::OpenACCExecMapping::GANG;
-      } else if (const auto *workerClause =
-                     std::get_if<Fortran::parser::AccClause::Worker>(
-                         &clause.u)) {
-        if (workerClause->v) {
-          workerNum = fir::getBase(converter.genExprValue(
-              *Fortran::semantics::GetExpr(*workerClause->v), stmtCtx));
-        }
-        executionMapping |= mlir::acc::OpenACCExecMapping::WORKER;
-      } else if (const auto *vectorClause =
-                     std::get_if<Fortran::parser::AccClause::Vector>(
-                         &clause.u)) {
-        if (vectorClause->v) {
-          vectorLength = fir::getBase(converter.genExprValue(
-              *Fortran::semantics::GetExpr(*vectorClause->v), stmtCtx));
+  for (const Fortran::parser::AccClause &clause : accClauseList.v) {
+    if (const auto *gangClause =
+            std::get_if<Fortran::parser::AccClause::Gang>(&clause.u)) {
+      if (gangClause->v) {
+        const Fortran::parser::AccGangArgument &x = *gangClause->v;
+        if (const auto &gangNumValue =
+                std::get<std::optional<Fortran::parser::ScalarIntExpr>>(x.t)) {
+          gangNum = fir::getBase(converter.genExprValue(
+              *Fortran::semantics::GetExpr(gangNumValue.value()), stmtCtx));
         }
-        executionMapping |= mlir::acc::OpenACCExecMapping::VECTOR;
-      } else if (const auto *tileClause =
-                     std::get_if<Fortran::parser::AccClause::Tile>(&clause.u)) {
-        const Fortran::parser::AccTileExprList &accTileExprList = tileClause->v;
-        for (const auto &accTileExpr : accTileExprList.v) {
+        if (const auto &gangStaticValue =
+                std::get<std::optional<Fortran::parser::AccSizeExpr>>(x.t)) {
           const auto &expr =
-              std::get<std::optional<Fortran::parser::ScalarIntConstantExpr>>(
-                  accTileExpr.t);
+              std::get<std::optional<Fortran::parser::ScalarIntExpr>>(
+                  gangStaticValue.value().t);
           if (expr) {
-            tileOperands.push_back(fir::getBase(converter.genExprValue(
-                *Fortran::semantics::GetExpr(*expr), stmtCtx)));
+            gangStatic = fir::getBase(converter.genExprValue(
+                *Fortran::semantics::GetExpr(*expr), stmtCtx));
           } else {
-            // * was passed as value and will be represented as a -1 constant
-            // integer.
-            mlir::Value tileStar = firOpBuilder.createIntegerConstant(
-                currentLocation, firOpBuilder.getIntegerType(32),
-                /* STAR */ -1);
-            tileOperands.push_back(tileStar);
+            // * was passed as value and will be represented as a special
+            // constant.
+            gangStatic = firOpBuilder.createIntegerConstant(
+                currentLocation, firOpBuilder.getIndexType(), starCst);
           }
         }
-      } else if (const auto *privateClause =
-                     std::get_if<Fortran::parser::AccClause::Private>(
-                         &clause.u)) {
-        genObjectList(privateClause->v, converter, privateOperands);
       }
-      // Reduction clause is left out for the moment as the clause will probably
-      // end up having its own operation.
+      executionMapping |= mlir::acc::OpenACCExecMapping::GANG;
+    } else if (const auto *workerClause =
+                   std::get_if<Fortran::parser::AccClause::Worker>(&clause.u)) {
+      if (workerClause->v) {
+        workerNum = fir::getBase(converter.genExprValue(
+            *Fortran::semantics::GetExpr(*workerClause->v), stmtCtx));
+      }
+      executionMapping |= mlir::acc::OpenACCExecMapping::WORKER;
+    } else if (const auto *vectorClause =
+                   std::get_if<Fortran::parser::AccClause::Vector>(&clause.u)) {
+      if (vectorClause->v) {
+        vectorNum = fir::getBase(converter.genExprValue(
+            *Fortran::semantics::GetExpr(*vectorClause->v), stmtCtx));
+      }
+      executionMapping |= mlir::acc::OpenACCExecMapping::VECTOR;
+    } else if (const auto *tileClause =
+                   std::get_if<Fortran::parser::AccClause::Tile>(&clause.u)) {
+      const Fortran::parser::AccTileExprList &accTileExprList = tileClause->v;
+      for (const auto &accTileExpr : accTileExprList.v) {
+        const auto &expr =
+            std::get<std::optional<Fortran::parser::ScalarIntConstantExpr>>(
+                accTileExpr.t);
+        if (expr) {
+          tileOperands.push_back(fir::getBase(converter.genExprValue(
+              *Fortran::semantics::GetExpr(*expr), stmtCtx)));
+        } else {
+          // * was passed as value and will be represented as a -1 constant
+          // integer.
+          mlir::Value tileStar = firOpBuilder.createIntegerConstant(
+              currentLocation, firOpBuilder.getIntegerType(32),
+              /* STAR */ -1);
+          tileOperands.push_back(tileStar);
+        }
+      }
+    } else if (const auto *privateClause =
+                   std::get_if<Fortran::parser::AccClause::Private>(
+                       &clause.u)) {
+      genObjectList(privateClause->v, converter, privateOperands);
     }
+    // Reduction clause is left out for the moment as the clause will probably
+    // end up having its own operation.
+  }
 
-    // Prepare the operand segement size attribute and the operands value range.
-    llvm::SmallVector<mlir::Value, 8> operands;
-    llvm::SmallVector<int32_t, 8> operandSegments;
-    addOperand(operands, operandSegments, gangNum);
-    addOperand(operands, operandSegments, gangStatic);
-    addOperand(operands, operandSegments, workerNum);
-    addOperand(operands, operandSegments, vectorLength);
-    addOperands(operands, operandSegments, tileOperands);
-    addOperands(operands, operandSegments, privateOperands);
-    addOperands(operands, operandSegments, reductionOperands);
-
-    auto loopOp = createRegionOp<mlir::acc::LoopOp, mlir::acc::YieldOp>(
-        firOpBuilder, currentLocation, operands, operandSegments);
-
-    loopOp->setAttr(mlir::acc::LoopOp::getExecutionMappingAttrName(),
-                    firOpBuilder.getI64IntegerAttr(executionMapping));
-
-    // Lower clauses mapped to attributes
-    for (const auto &clause : accClauseList.v) {
-      if (const auto *collapseClause =
-              std::get_if<Fortran::parser::AccClause::Collapse>(&clause.u)) {
-        const auto *expr = Fortran::semantics::GetExpr(collapseClause->v);
-        const auto collapseValue = Fortran::evaluate::ToInt64(*expr);
-        if (collapseValue) {
-          loopOp->setAttr(mlir::acc::LoopOp::getCollapseAttrName(),
-                          firOpBuilder.getI64IntegerAttr(*collapseValue));
-        }
-      } else if (std::get_if<Fortran::parser::AccClause::Seq>(&clause.u)) {
-        loopOp->setAttr(mlir::acc::LoopOp::getSeqAttrName(),
-                        firOpBuilder.getUnitAttr());
-      } else if (std::get_if<Fortran::parser::AccClause::Independent>(
-                     &clause.u)) {
-        loopOp->setAttr(mlir::acc::LoopOp::getIndependentAttrName(),
-                        firOpBuilder.getUnitAttr());
-      } else if (std::get_if<Fortran::parser::AccClause::Auto>(&clause.u)) {
-        loopOp->setAttr(mlir::acc::LoopOp::getAutoAttrName(),
-                        firOpBuilder.getUnitAttr());
+  // Prepare the operand segement size attribute and the operands value range.
+  llvm::SmallVector<mlir::Value> operands;
+  llvm::SmallVector<int32_t> operandSegments;
+  addOperand(operands, operandSegments, gangNum);
+  addOperand(operands, operandSegments, gangStatic);
+  addOperand(operands, operandSegments, workerNum);
+  addOperand(operands, operandSegments, vectorNum);
+  addOperands(operands, operandSegments, tileOperands);
+  addOperands(operands, operandSegments, privateOperands);
+  addOperands(operands, operandSegments, reductionOperands);
+
+  auto loopOp = createRegionOp<mlir::acc::LoopOp, mlir::acc::YieldOp>(
+      firOpBuilder, currentLocation, operands, operandSegments);
+
+  loopOp->setAttr(mlir::acc::LoopOp::getExecutionMappingAttrName(),
+                  firOpBuilder.getI64IntegerAttr(executionMapping));
+
+  // Lower clauses mapped to attributes
+  for (const Fortran::parser::AccClause &clause : accClauseList.v) {
+    if (const auto *collapseClause =
+            std::get_if<Fortran::parser::AccClause::Collapse>(&clause.u)) {
+      const auto *expr = Fortran::semantics::GetExpr(collapseClause->v);
+      const std::optional<int64_t> collapseValue =
+          Fortran::evaluate::ToInt64(*expr);
+      if (collapseValue) {
+        loopOp->setAttr(mlir::acc::LoopOp::getCollapseAttrName(),
+                        firOpBuilder.getI64IntegerAttr(*collapseValue));
       }
+    } else if (std::get_if<Fortran::parser::AccClause::Seq>(&clause.u)) {
+      loopOp->setAttr(mlir::acc::LoopOp::getSeqAttrName(),
+                      firOpBuilder.getUnitAttr());
+    } else if (std::get_if<Fortran::parser::AccClause::Independent>(
+                   &clause.u)) {
+      loopOp->setAttr(mlir::acc::LoopOp::getIndependentAttrName(),
+                      firOpBuilder.getUnitAttr());
+    } else if (std::get_if<Fortran::parser::AccClause::Auto>(&clause.u)) {
+      loopOp->setAttr(mlir::acc::LoopOp::getAutoAttrName(),
+                      firOpBuilder.getUnitAttr());
     }
   }
+  return loopOp;
 }
 
-static void
-genACCParallelOp(Fortran::lower::AbstractConverter &converter,
+static void genACC(Fortran::lower::AbstractConverter &converter,
+                   Fortran::lower::pft::Evaluation &eval,
+                   const Fortran::parser::OpenACCLoopConstruct &loopConstruct) {
+
+  const auto &beginLoopDirective =
+      std::get<Fortran::parser::AccBeginLoopDirective>(loopConstruct.t);
+  const auto &loopDirective =
+      std::get<Fortran::parser::AccLoopDirective>(beginLoopDirective.t);
+
+  if (loopDirective.v == llvm::acc::ACCD_loop) {
+    const auto &accClauseList =
+        std::get<Fortran::parser::AccClauseList>(beginLoopDirective.t);
+    createLoopOp(converter, accClauseList);
+  }
+}
+
+static mlir::acc::ParallelOp
+createParallelOp(Fortran::lower::AbstractConverter &converter,
                  const Fortran::parser::AccClauseList &accClauseList) {
+
+  // Parallel operation operands
   mlir::Value async;
   mlir::Value numGangs;
   mlir::Value numWorkers;
   mlir::Value vectorLength;
   mlir::Value ifCond;
   mlir::Value selfCond;
+  mlir::Value waitDevnum;
   llvm::SmallVector<mlir::Value, 2> waitOperands, reductionOperands,
       copyOperands, copyinOperands, copyinReadonlyOperands, copyoutOperands,
       copyoutZeroOperands, createOperands, createZeroOperands, noCreateOperands,
-      presentOperands, devicePtrOperands, attachOperands, privateOperands,
-      firstprivateOperands;
+      presentOperands, devicePtrOperands, attachOperands, firstprivateOperands,
+      privateOperands;
 
   // Async, wait and self clause have optional values but can be present with
   // no value as well. When there is no value, the op has an attribute to
@@ -367,38 +372,21 @@ genACCParallelOp(Fortran::lower::AbstractConverter &converter,
   bool addWaitAttr = false;
   bool addSelfAttr = false;
 
-  auto &firOpBuilder = converter.getFirOpBuilder();
-  auto currentLocation = converter.getCurrentLocation();
+  fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder();
+  mlir::Location currentLocation = converter.getCurrentLocation();
   Fortran::lower::StatementContext stmtCtx;
 
   // Lower clauses values mapped to operands.
   // Keep track of each group of operands separatly as clauses can appear
   // more than once.
-  for (const auto &clause : accClauseList.v) {
+  for (const Fortran::parser::AccClause &clause : accClauseList.v) {
     if (const auto *asyncClause =
             std::get_if<Fortran::parser::AccClause::Async>(&clause.u)) {
-      const auto &asyncClauseValue = asyncClause->v;
-      if (asyncClauseValue) { // async has a value.
-        async = fir::getBase(converter.genExprValue(
-            *Fortran::semantics::GetExpr(*asyncClauseValue), stmtCtx));
-      } else {
-        addAsyncAttr = true;
-      }
+      genAsyncClause(converter, asyncClause, async, addAsyncAttr, stmtCtx);
     } else if (const auto *waitClause =
                    std::get_if<Fortran::parser::AccClause::Wait>(&clause.u)) {
-      const auto &waitClauseValue = waitClause->v;
-      if (waitClauseValue) { // wait has a value.
-        const Fortran::parser::AccWaitArgument &waitArg = *waitClauseValue;
-        const auto &waitList =
-            std::get<std::list<Fortran::parser::ScalarIntExpr>>(waitArg.t);
-        for (const Fortran::parser::ScalarIntExpr &value : waitList) {
-          auto v = fir::getBase(converter.genExprValue(
-              *Fortran::semantics::GetExpr(value), stmtCtx));
-          waitOperands.push_back(v);
-        }
-      } else {
-        addWaitAttr = true;
-      }
+      genWaitClause(converter, waitClause, waitOperands, waitDevnum,
+                    addWaitAttr, stmtCtx);
     } else if (const auto *numGangsClause =
                    std::get_if<Fortran::parser::AccClause::NumGangs>(
                        &clause.u)) {
@@ -416,10 +404,7 @@ genACCParallelOp(Fortran::lower::AbstractConverter &converter,
           *Fortran::semantics::GetExpr(vectorLengthClause->v), stmtCtx));
     } else if (const auto *ifClause =
                    std::get_if<Fortran::parser::AccClause::If>(&clause.u)) {
-      mlir::Value cond = fir::getBase(converter.genExprValue(
-          *Fortran::semantics::GetExpr(ifClause->v), stmtCtx));
-      ifCond = firOpBuilder.createConvert(currentLocation,
-                                          firOpBuilder.getI1Type(), cond);
+      genIfClause(converter, ifClause, ifCond, stmtCtx);
     } else if (const auto *selfClause =
                    std::get_if<Fortran::parser::AccClause::Self>(&clause.u)) {
       const Fortran::parser::AccSelfClause &accSelfClause = selfClause->v;
@@ -434,6 +419,21 @@ genACCParallelOp(Fortran::lower::AbstractConverter &converter,
         } else {
           addSelfAttr = true;
         }
+      } else if (const auto *accClauseList =
+                     std::get_if<Fortran::parser::AccObjectList>(
+                         &accSelfClause.u)) {
+        // TODO This would be nicer to be done in canonicalization step.
+        if (accClauseList->v.size() == 1) {
+          const auto &accObject = accClauseList->v.front();
+          if (const auto *designator =
+                  std::get_if<Fortran::parser::Designator>(&accObject.u)) {
+            if (const auto *name = getDesignatorNameIfDataRef(*designator)) {
+              auto cond = converter.getSymbolAddress(*name->symbol);
+              selfCond = firOpBuilder.createConvert(
+                  currentLocation, firOpBuilder.getI1Type(), cond);
+            }
+          }
+        }
       }
     } else if (const auto *copyClause =
                    std::get_if<Fortran::parser::AccClause::Copy>(&clause.u)) {
@@ -508,8 +508,9 @@ genACCParallelOp(Fortran::lower::AbstractConverter &converter,
   addOperands(operands, operandSegments, privateOperands);
   addOperands(operands, operandSegments, firstprivateOperands);
 
-  auto parallelOp = createRegionOp<mlir::acc::ParallelOp, mlir::acc::YieldOp>(
-      firOpBuilder, currentLocation, operands, operandSegments);
+  mlir::acc::ParallelOp parallelOp =
+      createRegionOp<mlir::acc::ParallelOp, mlir::acc::YieldOp>(
+          firOpBuilder, currentLocation, operands, operandSegments);
 
   if (addAsyncAttr)
     parallelOp->setAttr(mlir::acc::ParallelOp::getAsyncAttrName(),
@@ -520,6 +521,14 @@ genACCParallelOp(Fortran::lower::AbstractConverter &converter,
   if (addSelfAttr)
     parallelOp->setAttr(mlir::acc::ParallelOp::getSelfAttrName(),
                         firOpBuilder.getUnitAttr());
+
+  return parallelOp;
+}
+
+static void
+genACCParallelOp(Fortran::lower::AbstractConverter &converter,
+                 const Fortran::parser::AccClauseList &accClauseList) {
+  createParallelOp(converter, accClauseList);
 }
 
 static void genACCDataOp(Fortran::lower::AbstractConverter &converter,
@@ -619,6 +628,37 @@ genACC(Fortran::lower::AbstractConverter &converter,
   }
 }
 
+static void
+genACCParallelLoopOps(Fortran::lower::AbstractConverter &converter,
+                      const Fortran::parser::AccClauseList &accClauseList) {
+  createParallelOp(converter, accClauseList);
+  createLoopOp(converter, accClauseList);
+}
+
+static void
+genACC(Fortran::lower::AbstractConverter &converter,
+       Fortran::lower::pft::Evaluation &eval,
+       const Fortran::parser::OpenACCCombinedConstruct &combinedConstruct) {
+  const auto &beginCombinedDirective =
+      std::get<Fortran::parser::AccBeginCombinedDirective>(combinedConstruct.t);
+  const auto &combinedDirective =
+      std::get<Fortran::parser::AccCombinedDirective>(beginCombinedDirective.t);
+  const auto &accClauseList =
+      std::get<Fortran::parser::AccClauseList>(beginCombinedDirective.t);
+
+  if (combinedDirective.v == llvm::acc::ACCD_kernels_loop) {
+    TODO(converter.getCurrentLocation(),
+         "OpenACC Kernels Loop construct not lowered yet!");
+  } else if (combinedDirective.v == llvm::acc::ACCD_parallel_loop) {
+    genACCParallelLoopOps(converter, accClauseList);
+  } else if (combinedDirective.v == llvm::acc::ACCD_serial_loop) {
+    TODO(converter.getCurrentLocation(),
+         "OpenACC Serial Loop construct not lowered yet!");
+  } else {
+    llvm::report_fatal_error("Unknown combined construct encountered");
+  }
+}
+
 static void
 genACCEnterDataOp(Fortran::lower::AbstractConverter &converter,
                   const Fortran::parser::AccClauseList &accClauseList) {
@@ -979,8 +1019,7 @@ void Fortran::lower::genOpenACCConstruct(
           },
           [&](const Fortran::parser::OpenACCCombinedConstruct
                   &combinedConstruct) {
-            TODO(converter.getCurrentLocation(),
-                 "OpenACC Combined construct not lowered yet!");
+            genACC(converter, eval, combinedConstruct);
           },
           [&](const Fortran::parser::OpenACCLoopConstruct &loopConstruct) {
             genACC(converter, eval, loopConstruct);

diff  --git a/flang/test/Lower/OpenACC/acc-loop.f90 b/flang/test/Lower/OpenACC/acc-loop.f90
new file mode 100644
index 000000000000..ac9be539cd6d
--- /dev/null
+++ b/flang/test/Lower/OpenACC/acc-loop.f90
@@ -0,0 +1,268 @@
+! This test checks lowering of OpenACC loop directive.
+
+! RUN: bbc -fopenacc -emit-fir %s -o - | FileCheck %s
+
+program acc_loop
+
+  integer :: i, j
+  integer, parameter :: n = 10
+  real, dimension(n) :: a, b
+  real, dimension(n, n) :: c, d
+  integer :: gangNum = 8
+  integer :: gangStatic = 8
+  integer :: vectorLength = 128
+  integer, parameter :: tileSize = 2
+
+
+  !$acc loop
+  DO i = 1, n
+    a(i) = b(i)
+  END DO
+
+!CHECK:      acc.loop {
+!CHECK:        fir.do_loop
+!CHECK:        acc.yield
+!CHECK-NEXT: }{{$}}
+
+ !$acc loop seq
+  DO i = 1, n
+    a(i) = b(i)
+  END DO
+
+!CHECK:      acc.loop {
+!CHECK:        fir.do_loop
+!CHECK:        acc.yield
+!CHECK-NEXT: } attributes {seq}
+
+  !$acc loop auto
+  DO i = 1, n
+    a(i) = b(i)
+  END DO
+
+!CHECK:      acc.loop {
+!CHECK:        fir.do_loop
+!CHECK:        acc.yield
+!CHECK-NEXT: } attributes {auto}
+
+  !$acc loop independent
+  DO i = 1, n
+    a(i) = b(i)
+  END DO
+
+!CHECK:      acc.loop {
+!CHECK:        fir.do_loop
+!CHECK:        acc.yield
+!CHECK-NEXT: } attributes {independent}
+
+  !$acc loop gang
+  DO i = 1, n
+    a(i) = b(i)
+  END DO
+
+!CHECK:      acc.loop gang {
+!CHECK:        fir.do_loop
+!CHECK:        acc.yield
+!CHECK-NEXT: }{{$}}
+
+  !$acc loop gang(num: 8)
+  DO i = 1, n
+    a(i) = b(i)
+  END DO
+
+!CHECK:      [[GANGNUM1:%.*]] = arith.constant 8 : i32
+!CHECK-NEXT: acc.loop gang(num=[[GANGNUM1]]: i32) {
+!CHECK:        fir.do_loop
+!CHECK:        acc.yield
+!CHECK-NEXT: }{{$}}
+
+  !$acc loop gang(num: gangNum)
+  DO i = 1, n
+    a(i) = b(i)
+  END DO
+
+!CHECK:      [[GANGNUM2:%.*]] = fir.load %{{.*}} : !fir.ref<i32>
+!CHECK-NEXT: acc.loop gang(num=[[GANGNUM2]]: i32) {
+!CHECK:        fir.do_loop
+!CHECK:        acc.yield
+!CHECK-NEXT: }{{$}}
+
+ !$acc loop gang(num: gangNum, static: gangStatic)
+  DO i = 1, n
+    a(i) = b(i)
+  END DO
+
+!CHECK: acc.loop gang(num=%{{.*}}: i32, static=%{{.*}}: i32) {
+!CHECK:        fir.do_loop
+!CHECK:        acc.yield
+!CHECK-NEXT: }{{$}}
+
+  !$acc loop vector
+  DO i = 1, n
+    a(i) = b(i)
+  END DO
+
+!CHECK:      acc.loop vector {
+!CHECK:        fir.do_loop
+!CHECK:        acc.yield
+!CHECK-NEXT: }{{$}}
+
+  !$acc loop vector(128)
+  DO i = 1, n
+    a(i) = b(i)
+  END DO
+
+!CHECK: [[CONSTANT128:%.*]] = arith.constant 128 : i32
+!CHECK:      acc.loop vector([[CONSTANT128]]: i32) {
+!CHECK:        fir.do_loop
+!CHECK:        acc.yield
+!CHECK-NEXT: }{{$}}
+
+  !$acc loop vector(vectorLength)
+  DO i = 1, n
+    a(i) = b(i)
+  END DO
+
+!CHECK:      [[VECTORLENGTH:%.*]] = fir.load %{{.*}} : !fir.ref<i32>
+!CHECK:      acc.loop vector([[VECTORLENGTH]]: i32) {
+!CHECK:        fir.do_loop
+!CHECK:        acc.yield
+!CHECK-NEXT: }{{$}}
+
+!$acc loop worker
+  DO i = 1, n
+    a(i) = b(i)
+  END DO
+
+!CHECK:      acc.loop worker {
+!CHECK:        fir.do_loop
+!CHECK:        acc.yield
+!CHECK-NEXT: }{{$}}
+
+  !$acc loop worker(128)
+  DO i = 1, n
+    a(i) = b(i)
+  END DO
+
+!CHECK: [[WORKER128:%.*]] = arith.constant 128 : i32
+!CHECK:      acc.loop worker([[WORKER128]]: i32) {
+!CHECK:        fir.do_loop
+!CHECK:        acc.yield
+!CHECK-NEXT: }{{$}}
+
+  !$acc loop private(c)
+  DO i = 1, n
+    a(i) = b(i)
+  END DO
+
+!CHECK:      acc.loop private(%{{.*}}: !fir.ref<!fir.array<10x10xf32>>) {
+!CHECK:        fir.do_loop
+!CHECK:        acc.yield
+!CHECK-NEXT: }{{$}}
+
+  !$acc loop private(c, d)
+  DO i = 1, n
+    a(i) = b(i)
+  END DO
+
+!CHECK:      acc.loop private(%{{.*}}: !fir.ref<!fir.array<10x10xf32>>, %{{.*}}: !fir.ref<!fir.array<10x10xf32>>) {
+!CHECK:        fir.do_loop
+!CHECK:        acc.yield
+!CHECK-NEXT: }{{$}}
+
+  !$acc loop private(c) private(d)
+  DO i = 1, n
+    a(i) = b(i)
+  END DO
+
+!CHECK:      acc.loop private(%{{.*}}: !fir.ref<!fir.array<10x10xf32>>, %{{.*}}: !fir.ref<!fir.array<10x10xf32>>) {
+!CHECK:        fir.do_loop
+!CHECK:        acc.yield
+!CHECK-NEXT: }{{$}}
+
+  !$acc loop tile(2)
+  DO i = 1, n
+    a(i) = b(i)
+  END DO
+!CHECK:      [[TILESIZE:%.*]] = arith.constant 2 : i32
+!CHECK:      acc.loop tile([[TILESIZE]]: i32) {
+!CHECK:        fir.do_loop
+!CHECK:        acc.yield
+!CHECK-NEXT: }{{$}}
+
+ !$acc loop tile(*)
+  DO i = 1, n
+    a(i) = b(i)
+  END DO
+!CHECK:      [[TILESIZEM1:%.*]] = arith.constant -1 : i32
+!CHECK:      acc.loop tile([[TILESIZEM1]]: i32) {
+!CHECK:        fir.do_loop
+!CHECK:        acc.yield
+!CHECK-NEXT: }{{$}}
+
+  !$acc loop tile(2, 2)
+  DO i = 1, n
+    DO j = 1, n
+      c(i, j) = d(i, j)
+    END DO
+  END DO
+
+!CHECK:      [[TILESIZE1:%.*]] = arith.constant 2 : i32
+!CHECK:      [[TILESIZE2:%.*]] = arith.constant 2 : i32
+!CHECK:      acc.loop tile([[TILESIZE1]]: i32, [[TILESIZE2]]: i32) {
+!CHECK:        fir.do_loop
+!CHECK:        acc.yield
+!CHECK-NEXT: }{{$}}
+
+  !$acc loop tile(tileSize)
+  DO i = 1, n
+    a(i) = b(i)
+  END DO
+
+!CHECK:      acc.loop tile(%{{.*}}: i32) {
+!CHECK:        fir.do_loop
+!CHECK:        acc.yield
+!CHECK-NEXT: }{{$}}
+
+  !$acc loop tile(tileSize, tileSize)
+  DO i = 1, n
+    DO j = 1, n
+      c(i, j) = d(i, j)
+    END DO
+  END DO
+
+!CHECK:      acc.loop tile(%{{.*}}: i32, %{{.*}}: i32) {
+!CHECK:        fir.do_loop
+!CHECK:        acc.yield
+!CHECK-NEXT: }{{$}}
+
+  !$acc loop collapse(2)
+  DO i = 1, n
+    DO j = 1, n
+      c(i, j) = d(i, j)
+    END DO
+  END DO
+
+!CHECK:      acc.loop {
+!CHECK:        fir.do_loop
+!CHECK:          fir.do_loop
+!CHECK:        acc.yield
+!CHECK-NEXT: } attributes {collapse = 2 : i64}
+
+  !$acc loop
+  DO i = 1, n
+    !$acc loop
+    DO j = 1, n
+      c(i, j) = d(i, j)
+    END DO
+  END DO
+
+!CHECK:      acc.loop {
+!CHECK:        fir.do_loop
+!CHECK:          acc.loop {
+!CHECK:            fir.do_loop
+!CHECK:            acc.yield
+!CHECK-NEXT:   }{{$}}
+!CHECK:        acc.yield
+!CHECK-NEXT: }{{$}}
+
+end program

diff  --git a/flang/test/Lower/OpenACC/acc-parallel-loop.f90 b/flang/test/Lower/OpenACC/acc-parallel-loop.f90
new file mode 100644
index 000000000000..1a04b84a689f
--- /dev/null
+++ b/flang/test/Lower/OpenACC/acc-parallel-loop.f90
@@ -0,0 +1,697 @@
+! This test checks lowering of OpenACC parallel loop combined directive.
+
+! RUN: bbc -fopenacc -emit-fir %s -o - | FileCheck %s
+
+subroutine acc_parallel_loop
+  integer :: i, j
+
+  integer :: async = 1
+  integer :: wait1 = 1
+  integer :: wait2 = 2
+  integer :: numGangs = 1
+  integer :: numWorkers = 10
+  integer :: vectorLength = 128
+  logical :: ifCondition = .TRUE.
+  integer, parameter :: n = 10
+  real, dimension(n) :: a, b, c
+  real, dimension(n, n) :: d, e
+  real, pointer :: f, g
+
+  integer :: gangNum = 8
+  integer :: gangStatic = 8
+  integer :: vectorNum = 128
+  integer, parameter :: tileSize = 2
+
+!CHECK: [[A:%.*]] = fir.alloca !fir.array<10xf32> {{{.*}}uniq_name = "{{.*}}Ea"}
+!CHECK: [[B:%.*]] = fir.alloca !fir.array<10xf32> {{{.*}}uniq_name = "{{.*}}Eb"}
+!CHECK: [[C:%.*]] = fir.alloca !fir.array<10xf32> {{{.*}}uniq_name = "{{.*}}Ec"}
+!CHECK: [[F:%.*]] = fir.alloca !fir.box<!fir.ptr<f32>> {bindc_name = "f", uniq_name = "{{.*}}Ef"}
+!CHECK: [[G:%.*]] = fir.alloca !fir.box<!fir.ptr<f32>> {bindc_name = "g", uniq_name = "{{.*}}Eg"}
+!CHECK: [[IFCONDITION:%.*]] = fir.address_of(@{{.*}}ifcondition) : !fir.ref<!fir.logical<4>>
+
+  !$acc parallel loop
+  DO i = 1, n
+    a(i) = b(i)
+  END DO
+
+!CHECK:      acc.parallel {
+!CHECK:        acc.loop {
+!CHECK:          fir.do_loop
+!CHECK:          acc.yield
+!CHECK-NEXT:   }{{$}}
+!CHECK:        acc.yield
+!CHECK-NEXT: }{{$}}
+
+  !$acc parallel loop async
+  DO i = 1, n
+    a(i) = b(i)
+  END DO
+  !$acc end parallel loop
+
+!CHECK:      acc.parallel {
+!CHECK:        acc.loop {
+!CHECK:          fir.do_loop
+!CHECK:          acc.yield
+!CHECK-NEXT:   }{{$}}
+!CHECK:        acc.yield
+!CHECK-NEXT: } attributes {asyncAttr}
+
+  !$acc parallel loop async(1)
+  DO i = 1, n
+    a(i) = b(i)
+  END DO
+
+!CHECK:      [[ASYNC1:%.*]] = arith.constant 1 : i32
+!CHECK:      acc.parallel async([[ASYNC1]]: i32) {
+!CHECK:        acc.loop {
+!CHECK:          fir.do_loop
+!CHECK:          acc.yield
+!CHECK-NEXT:   }{{$}}
+!CHECK:        acc.yield
+!CHECK-NEXT: }{{$}}
+
+  !$acc parallel loop async(async)
+  DO i = 1, n
+    a(i) = b(i)
+  END DO
+
+!CHECK:      [[ASYNC2:%.*]] = fir.load %{{.*}} : !fir.ref<i32>
+!CHECK:      acc.parallel async([[ASYNC2]]: i32) {
+!CHECK:        acc.loop {
+!CHECK:          fir.do_loop
+!CHECK:          acc.yield
+!CHECK-NEXT:   }{{$}}
+!CHECK:        acc.yield
+!CHECK-NEXT: }{{$}}
+
+  !$acc parallel loop wait
+  DO i = 1, n
+    a(i) = b(i)
+  END DO
+
+!CHECK:      acc.parallel {
+!CHECK:        acc.loop {
+!CHECK:          fir.do_loop
+!CHECK:          acc.yield
+!CHECK-NEXT:   }{{$}}
+!CHECK:        acc.yield
+!CHECK-NEXT: } attributes {waitAttr}
+
+  !$acc parallel loop wait(1)
+  DO i = 1, n
+    a(i) = b(i)
+  END DO
+
+!CHECK:      [[WAIT1:%.*]] = arith.constant 1 : i32
+!CHECK:      acc.parallel wait([[WAIT1]]: i32) {
+!CHECK:        acc.loop {
+!CHECK:          fir.do_loop
+!CHECK:          acc.yield
+!CHECK-NEXT:   }{{$}}
+!CHECK:        acc.yield
+!CHECK-NEXT: }{{$}}
+
+  !$acc parallel loop wait(1, 2)
+  DO i = 1, n
+    a(i) = b(i)
+  END DO
+
+!CHECK:      [[WAIT2:%.*]] = arith.constant 1 : i32
+!CHECK:      [[WAIT3:%.*]] = arith.constant 2 : i32
+!CHECK:      acc.parallel wait([[WAIT2]]: i32, [[WAIT3]]: i32) {
+!CHECK:        acc.loop {
+!CHECK:          fir.do_loop
+!CHECK:          acc.yield
+!CHECK-NEXT:   }{{$}}
+!CHECK:        acc.yield
+!CHECK-NEXT: }{{$}}
+
+  !$acc parallel loop wait(wait1, wait2)
+  DO i = 1, n
+    a(i) = b(i)
+  END DO
+
+!CHECK:      [[WAIT4:%.*]] = fir.load %{{.*}} : !fir.ref<i32>
+!CHECK:      [[WAIT5:%.*]] = fir.load %{{.*}} : !fir.ref<i32>
+!CHECK:      acc.parallel wait([[WAIT4]]: i32, [[WAIT5]]: i32) {
+!CHECK:        acc.loop {
+!CHECK:          fir.do_loop
+!CHECK:          acc.yield
+!CHECK-NEXT:   }{{$}}
+!CHECK:        acc.yield
+!CHECK-NEXT: }{{$}}
+
+  !$acc parallel loop num_gangs(1)
+  DO i = 1, n
+    a(i) = b(i)
+  END DO
+
+!CHECK:      [[NUMGANGS1:%.*]] = arith.constant 1 : i32
+!CHECK:      acc.parallel num_gangs([[NUMGANGS1]]: i32) {
+!CHECK:        acc.loop {
+!CHECK:          fir.do_loop
+!CHECK:          acc.yield
+!CHECK-NEXT:   }{{$}}
+!CHECK:        acc.yield
+!CHECK-NEXT: }{{$}}
+
+  !$acc parallel loop num_gangs(numGangs)
+  DO i = 1, n
+    a(i) = b(i)
+  END DO
+
+!CHECK:      [[NUMGANGS2:%.*]] = fir.load %{{.*}} : !fir.ref<i32>
+!CHECK:      acc.parallel num_gangs([[NUMGANGS2]]: i32) {
+!CHECK:        acc.loop {
+!CHECK:          fir.do_loop
+!CHECK:          acc.yield
+!CHECK-NEXT:   }{{$}}
+!CHECK:        acc.yield
+!CHECK-NEXT: }{{$}}
+
+  !$acc parallel loop num_workers(10)
+  DO i = 1, n
+    a(i) = b(i)
+  END DO
+
+!CHECK:      [[NUMWORKERS1:%.*]] = arith.constant 10 : i32
+!CHECK:      acc.parallel num_workers([[NUMWORKERS1]]: i32) {
+!CHECK:        acc.loop {
+!CHECK:          fir.do_loop
+!CHECK:          acc.yield
+!CHECK-NEXT:   }{{$}}
+!CHECK:        acc.yield
+!CHECK-NEXT: }{{$}}
+
+  !$acc parallel loop num_workers(numWorkers)
+  DO i = 1, n
+    a(i) = b(i)
+  END DO
+
+!CHECK:      [[NUMWORKERS2:%.*]] = fir.load %{{.*}} : !fir.ref<i32>
+!CHECK:      acc.parallel num_workers([[NUMWORKERS2]]: i32) {
+!CHECK:        acc.loop {
+!CHECK:          fir.do_loop
+!CHECK:          acc.yield
+!CHECK-NEXT:   }{{$}}
+!CHECK:        acc.yield
+!CHECK-NEXT: }{{$}}
+
+  !$acc parallel loop vector_length(128)
+  DO i = 1, n
+    a(i) = b(i)
+  END DO
+
+!CHECK:      [[VECTORLENGTH1:%.*]] = arith.constant 128 : i32
+!CHECK:      acc.parallel vector_length([[VECTORLENGTH1]]: i32) {
+!CHECK:        acc.loop {
+!CHECK:          fir.do_loop
+!CHECK:          acc.yield
+!CHECK-NEXT:   }{{$}}
+!CHECK:        acc.yield
+!CHECK-NEXT: }{{$}}
+
+  !$acc parallel loop vector_length(vectorLength)
+  DO i = 1, n
+    a(i) = b(i)
+  END DO
+
+!CHECK:      [[VECTORLENGTH2:%.*]] = fir.load %{{.*}} : !fir.ref<i32>
+!CHECK:      acc.parallel vector_length([[VECTORLENGTH2]]: i32) {
+!CHECK:        acc.loop {
+!CHECK:          fir.do_loop
+!CHECK:          acc.yield
+!CHECK-NEXT:   }{{$}}
+!CHECK:        acc.yield
+!CHECK-NEXT: }{{$}}
+
+  !$acc parallel loop if(.TRUE.)
+  DO i = 1, n
+    a(i) = b(i)
+  END DO
+
+!CHECK:      [[IF1:%.*]] = arith.constant true
+!CHECK:      acc.parallel if([[IF1]]) {
+!CHECK:        acc.loop {
+!CHECK:          fir.do_loop
+!CHECK:          acc.yield
+!CHECK-NEXT:   }{{$}}
+!CHECK:        acc.yield
+!CHECK-NEXT: }{{$}}
+
+  !$acc parallel loop if(ifCondition)
+  DO i = 1, n
+    a(i) = b(i)
+  END DO
+
+!CHECK:      [[IFCOND:%.*]] = fir.load %{{.*}} : !fir.ref<!fir.logical<4>>
+!CHECK:      [[IF2:%.*]] = fir.convert [[IFCOND]] : (!fir.logical<4>) -> i1
+!CHECK:      acc.parallel if([[IF2]]) {
+!CHECK:        acc.loop {
+!CHECK:          fir.do_loop
+!CHECK:          acc.yield
+!CHECK-NEXT:   }{{$}}
+!CHECK:        acc.yield
+!CHECK-NEXT: }{{$}}
+
+  !$acc parallel loop self(.TRUE.)
+  DO i = 1, n
+    a(i) = b(i)
+  END DO
+
+!CHECK:      [[SELF1:%.*]] = arith.constant true
+!CHECK:      acc.parallel self([[SELF1]]) {
+!CHECK:        acc.loop {
+!CHECK:          fir.do_loop
+!CHECK:          acc.yield
+!CHECK-NEXT:   }{{$}}
+!CHECK:        acc.yield
+!CHECK-NEXT: }{{$}}
+
+  !$acc parallel loop self
+  DO i = 1, n
+    a(i) = b(i)
+  END DO
+
+!CHECK:      acc.parallel {
+!CHECK:        acc.loop {
+!CHECK:          fir.do_loop
+!CHECK:          acc.yield
+!CHECK-NEXT:   }{{$}}
+!CHECK:        acc.yield
+!CHECK-NEXT: } attributes {selfAttr}
+
+  !$acc parallel loop self(ifCondition)
+  DO i = 1, n
+    a(i) = b(i)
+  END DO
+
+!CHECK:      [[SELF2:%.*]] = fir.convert [[IFCONDITION]] : (!fir.ref<!fir.logical<4>>) -> i1
+!CHECK:      acc.parallel self([[SELF2]]) {
+!CHECK:        acc.loop {
+!CHECK:          fir.do_loop
+!CHECK:          acc.yield
+!CHECK-NEXT:   }{{$}}
+!CHECK:        acc.yield
+!CHECK-NEXT: }{{$}}
+
+  !$acc parallel loop copy(a, b)
+  DO i = 1, n
+    a(i) = b(i)
+  END DO
+
+!CHECK:      acc.parallel copy([[A]]: !fir.ref<!fir.array<10xf32>>, [[B]]: !fir.ref<!fir.array<10xf32>>) {
+!CHECK:        acc.loop {
+!CHECK:          fir.do_loop
+!CHECK:          acc.yield
+!CHECK-NEXT:   }{{$}}
+!CHECK:        acc.yield
+!CHECK-NEXT: }{{$}}
+
+  !$acc parallel loop copy(a) copy(b)
+  DO i = 1, n
+    a(i) = b(i)
+  END DO
+
+!CHECK:      acc.parallel copy([[A]]: !fir.ref<!fir.array<10xf32>>, [[B]]: !fir.ref<!fir.array<10xf32>>) {
+!CHECK:        acc.loop {
+!CHECK:          fir.do_loop
+!CHECK:          acc.yield
+!CHECK-NEXT:   }{{$}}
+!CHECK:        acc.yield
+!CHECK-NEXT: }{{$}}
+
+  !$acc parallel loop copyin(a) copyin(readonly: b)
+  DO i = 1, n
+    a(i) = b(i)
+  END DO
+
+!CHECK:      acc.parallel copyin([[A]]: !fir.ref<!fir.array<10xf32>>) copyin_readonly([[B]]: !fir.ref<!fir.array<10xf32>>) {
+!CHECK:        acc.loop {
+!CHECK:          fir.do_loop
+!CHECK:          acc.yield
+!CHECK-NEXT:   }{{$}}
+!CHECK:        acc.yield
+!CHECK-NEXT: }{{$}}
+
+  !$acc parallel loop copyout(a) copyout(zero: b)
+  DO i = 1, n
+    a(i) = b(i)
+  END DO
+
+!CHECK:      acc.parallel copyout([[A]]: !fir.ref<!fir.array<10xf32>>) copyout_zero([[B]]: !fir.ref<!fir.array<10xf32>>) {
+!CHECK:        acc.loop {
+!CHECK:          fir.do_loop
+!CHECK:          acc.yield
+!CHECK-NEXT:   }{{$}}
+!CHECK:        acc.yield
+!CHECK-NEXT: }{{$}}
+
+  !$acc parallel loop create(b) create(zero: a)
+  DO i = 1, n
+    a(i) = b(i)
+  END DO
+
+!CHECK:      acc.parallel create([[B]]: !fir.ref<!fir.array<10xf32>>) create_zero([[A]]: !fir.ref<!fir.array<10xf32>>) {
+!CHECK:        acc.loop {
+!CHECK:          fir.do_loop
+!CHECK:          acc.yield
+!CHECK-NEXT:   }{{$}}
+!CHECK:        acc.yield
+!CHECK-NEXT: }{{$}}
+
+  !$acc parallel loop no_create(a, b)
+  DO i = 1, n
+    a(i) = b(i)
+  END DO
+
+!CHECK:      acc.parallel no_create([[A]]: !fir.ref<!fir.array<10xf32>>, [[B]]: !fir.ref<!fir.array<10xf32>>) {
+!CHECK:        acc.loop {
+!CHECK:          fir.do_loop
+!CHECK:          acc.yield
+!CHECK-NEXT:   }{{$}}
+!CHECK:        acc.yield
+!CHECK-NEXT: }{{$}}
+
+  !$acc parallel loop present(a, b)
+  DO i = 1, n
+    a(i) = b(i)
+  END DO
+
+!CHECK:      acc.parallel present([[A]]: !fir.ref<!fir.array<10xf32>>, [[B]]: !fir.ref<!fir.array<10xf32>>) {
+!CHECK:        acc.loop {
+!CHECK:          fir.do_loop
+!CHECK:          acc.yield
+!CHECK-NEXT:   }{{$}}
+!CHECK:        acc.yield
+!CHECK-NEXT: }{{$}}
+
+  !$acc parallel loop deviceptr(a) deviceptr(b)
+  DO i = 1, n
+    a(i) = b(i)
+  END DO
+
+!CHECK:      acc.parallel deviceptr([[A]]: !fir.ref<!fir.array<10xf32>>, [[B]]: !fir.ref<!fir.array<10xf32>>) {
+!CHECK:        acc.loop {
+!CHECK:          fir.do_loop
+!CHECK:          acc.yield
+!CHECK-NEXT:   }{{$}}
+!CHECK:        acc.yield
+!CHECK-NEXT: }{{$}}
+
+  !$acc parallel loop attach(f, g)
+  DO i = 1, n
+    a(i) = b(i)
+  END DO
+
+!CHECK:      acc.parallel attach([[F]]: !fir.ref<!fir.box<!fir.ptr<f32>>>, [[G]]: !fir.ref<!fir.box<!fir.ptr<f32>>>) {
+!CHECK:        acc.loop {
+!CHECK:          fir.do_loop
+!CHECK:          acc.yield
+!CHECK-NEXT:   }{{$}}
+!CHECK:        acc.yield
+!CHECK-NEXT: }{{$}}
+
+  !$acc parallel loop private(a) firstprivate(b)
+  DO i = 1, n
+    a(i) = b(i)
+  END DO
+
+!CHECK:      acc.parallel private([[A]]: !fir.ref<!fir.array<10xf32>>) firstprivate([[B]]: !fir.ref<!fir.array<10xf32>>) {
+!CHECK:        acc.loop private([[A]]: !fir.ref<!fir.array<10xf32>>) {
+!CHECK:          fir.do_loop
+!CHECK:          acc.yield
+!CHECK-NEXT:   }{{$}}
+!CHECK:        acc.yield
+!CHECK-NEXT: }{{$}}
+
+  !$acc parallel loop seq
+  DO i = 1, n
+    a(i) = b(i)
+  END DO
+
+!CHECK:      acc.parallel {
+!CHECK:        acc.loop {
+!CHECK:          fir.do_loop
+!CHECK:          acc.yield
+!CHECK-NEXT:   } attributes {seq}
+!CHECK:        acc.yield
+!CHECK-NEXT: }{{$}}
+
+  !$acc parallel loop auto
+  DO i = 1, n
+    a(i) = b(i)
+  END DO
+
+!CHECK:      acc.parallel {
+!CHECK:        acc.loop {
+!CHECK:          fir.do_loop
+!CHECK:          acc.yield
+!CHECK-NEXT:   } attributes {auto}
+!CHECK:        acc.yield
+!CHECK-NEXT: }{{$}}
+
+  !$acc parallel loop independent
+  DO i = 1, n
+    a(i) = b(i)
+  END DO
+
+!CHECK:      acc.parallel {
+!CHECK:        acc.loop {
+!CHECK:          fir.do_loop
+!CHECK:          acc.yield
+!CHECK-NEXT:   } attributes {independent}
+!CHECK:        acc.yield
+!CHECK-NEXT: }{{$}}
+
+  !$acc parallel loop gang
+  DO i = 1, n
+    a(i) = b(i)
+  END DO
+
+!CHECK:      acc.parallel {
+!CHECK:        acc.loop gang {
+!CHECK:          fir.do_loop
+!CHECK:          acc.yield
+!CHECK-NEXT:   }{{$}}
+!CHECK:        acc.yield
+!CHECK-NEXT: }{{$}}
+
+  !$acc parallel loop gang(num: 8)
+  DO i = 1, n
+    a(i) = b(i)
+  END DO
+
+!CHECK:      acc.parallel {
+!CHECK:        [[GANGNUM1:%.*]] = arith.constant 8 : i32
+!CHECK-NEXT:   acc.loop gang(num=[[GANGNUM1]]: i32) {
+!CHECK:          fir.do_loop
+!CHECK:          acc.yield
+!CHECK-NEXT:   }{{$}}
+!CHECK:        acc.yield
+!CHECK-NEXT: }{{$}}
+
+  !$acc parallel loop gang(num: gangNum)
+  DO i = 1, n
+    a(i) = b(i)
+  END DO
+
+!CHECK:      acc.parallel {
+!CHECK:        [[GANGNUM2:%.*]] = fir.load %{{.*}} : !fir.ref<i32>
+!CHECK-NEXT:   acc.loop gang(num=[[GANGNUM2]]: i32) {
+!CHECK:          fir.do_loop
+!CHECK:          acc.yield
+!CHECK-NEXT:   }{{$}}
+!CHECK:        acc.yield
+!CHECK-NEXT: }{{$}}
+
+ !$acc parallel loop gang(num: gangNum, static: gangStatic)
+  DO i = 1, n
+    a(i) = b(i)
+  END DO
+
+!CHECK:      acc.parallel {
+!CHECK:        acc.loop gang(num=%{{.*}}: i32, static=%{{.*}}: i32) {
+!CHECK:          fir.do_loop
+!CHECK:          acc.yield
+!CHECK-NEXT:   }{{$}}
+!CHECK:        acc.yield
+!CHECK-NEXT: }{{$}}
+
+  !$acc parallel loop vector
+  DO i = 1, n
+    a(i) = b(i)
+  END DO
+!CHECK:      acc.parallel {
+!CHECK:        acc.loop vector {
+!CHECK:          fir.do_loop
+!CHECK:          acc.yield
+!CHECK-NEXT:   }{{$}}
+!CHECK:        acc.yield
+!CHECK-NEXT: }{{$}}
+
+  !$acc parallel loop vector(128)
+  DO i = 1, n
+    a(i) = b(i)
+  END DO
+
+!CHECK:      acc.parallel {
+!CHECK:        [[CONSTANT128:%.*]] = arith.constant 128 : i32
+!CHECK:        acc.loop vector([[CONSTANT128]]: i32) {
+!CHECK:          fir.do_loop
+!CHECK:          acc.yield
+!CHECK-NEXT:   }{{$}}
+!CHECK:        acc.yield
+!CHECK-NEXT: }{{$}}
+
+  !$acc parallel loop vector(vectorLength)
+  DO i = 1, n
+    a(i) = b(i)
+  END DO
+
+!CHECK:      acc.parallel {
+!CHECK:        [[VECTORLENGTH:%.*]] = fir.load %{{.*}} : !fir.ref<i32>
+!CHECK:        acc.loop vector([[VECTORLENGTH]]: i32) {
+!CHECK:          fir.do_loop
+!CHECK:          acc.yield
+!CHECK-NEXT:   }{{$}}
+!CHECK:        acc.yield
+!CHECK-NEXT: }{{$}}
+
+  !$acc parallel loop worker
+  DO i = 1, n
+    a(i) = b(i)
+  END DO
+
+!CHECK:      acc.parallel {
+!CHECK:        acc.loop worker {
+!CHECK:          fir.do_loop
+!CHECK:          acc.yield
+!CHECK-NEXT:   }{{$}}
+!CHECK:        acc.yield
+!CHECK-NEXT: }{{$}}
+
+  !$acc parallel loop worker(128)
+  DO i = 1, n
+    a(i) = b(i)
+  END DO
+
+!CHECK:      acc.parallel {
+!CHECK:        [[WORKER128:%.*]] = arith.constant 128 : i32
+!CHECK:        acc.loop worker([[WORKER128]]: i32) {
+!CHECK:          fir.do_loop
+!CHECK:          acc.yield
+!CHECK-NEXT:   }{{$}}
+!CHECK:        acc.yield
+!CHECK-NEXT: }{{$}}
+
+  !$acc parallel loop collapse(2)
+  DO i = 1, n
+    DO j = 1, n
+      d(i, j) = e(i, j)
+    END DO
+  END DO
+
+!CHECK:      acc.parallel {
+!CHECK:        acc.loop {
+!CHECK:          fir.do_loop
+!CHECK:            fir.do_loop
+!CHECK:          acc.yield
+!CHECK-NEXT:   } attributes {collapse = 2 : i64}
+!CHECK:        acc.yield
+!CHECK-NEXT: }{{$}}
+
+  !$acc parallel loop
+  DO i = 1, n
+    !$acc loop
+    DO j = 1, n
+      d(i, j) = e(i, j)
+    END DO
+  END DO
+
+!CHECK:      acc.parallel {
+!CHECK:        acc.loop {
+!CHECK:          fir.do_loop
+!CHECK:            acc.loop {
+!CHECK:              fir.do_loop
+!CHECK:              acc.yield
+!CHECK-NEXT:     }{{$}}
+!CHECK:          acc.yield
+!CHECK-NEXT:   }{{$}}
+!CHECK:        acc.yield
+!CHECK-NEXT: }{{$}}
+
+ !$acc parallel loop tile(2)
+  DO i = 1, n
+    a(i) = b(i)
+  END DO
+
+!CHECK:      acc.parallel {
+!CHECK:        [[TILESIZE:%.*]] = arith.constant 2 : i32
+!CHECK:        acc.loop tile([[TILESIZE]]: i32) {
+!CHECK:          fir.do_loop
+!CHECK:          acc.yield
+!CHECK-NEXT:   }{{$}}
+!CHECK:        acc.yield
+!CHECK-NEXT: }{{$}}
+
+ !$acc parallel loop tile(*)
+  DO i = 1, n
+    a(i) = b(i)
+  END DO
+
+!CHECK:      acc.parallel {
+!CHECK:        [[TILESIZEM1:%.*]] = arith.constant -1 : i32
+!CHECK:        acc.loop tile([[TILESIZEM1]]: i32) {
+!CHECK:          fir.do_loop
+!CHECK:          acc.yield
+!CHECK-NEXT:   }{{$}}
+!CHECK:        acc.yield
+!CHECK-NEXT: }{{$}}
+
+  !$acc parallel loop tile(2, 2)
+  DO i = 1, n
+    DO j = 1, n
+      d(i, j) = e(i, j)
+    END DO
+  END DO
+
+!CHECK:      acc.parallel {
+!CHECK:        [[TILESIZE1:%.*]] = arith.constant 2 : i32
+!CHECK:        [[TILESIZE2:%.*]] = arith.constant 2 : i32
+!CHECK:        acc.loop tile([[TILESIZE1]]: i32, [[TILESIZE2]]: i32) {
+!CHECK:          fir.do_loop
+!CHECK:          acc.yield
+!CHECK-NEXT:   }{{$}}
+!CHECK:        acc.yield
+!CHECK-NEXT: }{{$}}
+
+  !$acc parallel loop tile(tileSize)
+  DO i = 1, n
+    a(i) = b(i)
+  END DO
+
+!CHECK:      acc.parallel {
+!CHECK:        acc.loop tile(%{{.*}}: i32) {
+!CHECK:          fir.do_loop
+!CHECK:          acc.yield
+!CHECK-NEXT:   }{{$}}
+!CHECK:        acc.yield
+!CHECK-NEXT: }{{$}}
+
+  !$acc parallel loop tile(tileSize, tileSize)
+  DO i = 1, n
+    DO j = 1, n
+      d(i, j) = e(i, j)
+    END DO
+  END DO
+
+!CHECK:      acc.parallel {
+!CHECK:        acc.loop tile(%{{.*}}: i32, %{{.*}}: i32) {
+!CHECK:          fir.do_loop
+!CHECK:          acc.yield
+!CHECK-NEXT:   }{{$}}
+!CHECK:        acc.yield
+!CHECK-NEXT: }{{$}}
+
+end subroutine acc_parallel_loop

diff  --git a/flang/test/Lower/OpenACC/acc-parallel.f90 b/flang/test/Lower/OpenACC/acc-parallel.f90
new file mode 100644
index 000000000000..bc596581a7e5
--- /dev/null
+++ b/flang/test/Lower/OpenACC/acc-parallel.f90
@@ -0,0 +1,246 @@
+! This test checks lowering of OpenACC parallel directive.
+
+! RUN: bbc -fopenacc -emit-fir %s -o - | FileCheck %s
+
+subroutine acc_parallel
+  integer :: i, j
+
+  integer :: async = 1
+  integer :: wait1 = 1
+  integer :: wait2 = 2
+  integer :: numGangs = 1
+  integer :: numWorkers = 10
+  integer :: vectorLength = 128
+  logical :: ifCondition = .TRUE.
+  real, dimension(10, 10) :: a, b, c
+  real, pointer :: d, e
+
+!CHECK: [[A:%.*]] = fir.alloca !fir.array<10x10xf32> {{{.*}}uniq_name = "{{.*}}Ea"}
+!CHECK: [[B:%.*]] = fir.alloca !fir.array<10x10xf32> {{{.*}}uniq_name = "{{.*}}Eb"}
+!CHECK: [[C:%.*]] = fir.alloca !fir.array<10x10xf32> {{{.*}}uniq_name = "{{.*}}Ec"}
+!CHECK: [[D:%.*]] = fir.alloca !fir.box<!fir.ptr<f32>> {bindc_name = "d", uniq_name = "{{.*}}Ed"}
+!CHECK: [[E:%.*]] = fir.alloca !fir.box<!fir.ptr<f32>> {bindc_name = "e", uniq_name = "{{.*}}Ee"}
+!CHECK: [[IFCONDITION:%.*]] = fir.address_of(@{{.*}}ifcondition) : !fir.ref<!fir.logical<4>>
+
+  !$acc parallel
+  !$acc end parallel
+
+!CHECK:      acc.parallel {
+!CHECK:        acc.yield
+!CHECK-NEXT: }{{$}}
+
+  !$acc parallel async
+  !$acc end parallel
+
+!CHECK:      acc.parallel {
+!CHECK:        acc.yield
+!CHECK-NEXT: } attributes {asyncAttr}
+
+  !$acc parallel async(1)
+  !$acc end parallel
+
+!CHECK:      [[ASYNC1:%.*]] = arith.constant 1 : i32
+!CHECK:      acc.parallel async([[ASYNC1]]: i32) {
+!CHECK:        acc.yield
+!CHECK-NEXT: }{{$}}
+
+  !$acc parallel async(async)
+  !$acc end parallel
+
+!CHECK:      [[ASYNC2:%.*]] = fir.load %{{.*}} : !fir.ref<i32>
+!CHECK:      acc.parallel async([[ASYNC2]]: i32) {
+!CHECK:        acc.yield
+!CHECK-NEXT: }{{$}}
+
+  !$acc parallel wait
+  !$acc end parallel
+
+!CHECK:      acc.parallel {
+!CHECK:        acc.yield
+!CHECK-NEXT: } attributes {waitAttr}
+
+  !$acc parallel wait(1)
+  !$acc end parallel
+
+!CHECK:      [[WAIT1:%.*]] = arith.constant 1 : i32
+!CHECK:      acc.parallel wait([[WAIT1]]: i32) {
+!CHECK:        acc.yield
+!CHECK-NEXT: }{{$}}
+
+  !$acc parallel wait(1, 2)
+  !$acc end parallel
+
+!CHECK:      [[WAIT2:%.*]] = arith.constant 1 : i32
+!CHECK:      [[WAIT3:%.*]] = arith.constant 2 : i32
+!CHECK:      acc.parallel wait([[WAIT2]]: i32, [[WAIT3]]: i32) {
+!CHECK:        acc.yield
+!CHECK-NEXT: }{{$}}
+
+  !$acc parallel wait(wait1, wait2)
+  !$acc end parallel
+
+!CHECK:      [[WAIT4:%.*]] = fir.load %{{.*}} : !fir.ref<i32>
+!CHECK:      [[WAIT5:%.*]] = fir.load %{{.*}} : !fir.ref<i32>
+!CHECK:      acc.parallel wait([[WAIT4]]: i32, [[WAIT5]]: i32) {
+!CHECK:        acc.yield
+!CHECK-NEXT: }{{$}}
+
+  !$acc parallel num_gangs(1)
+  !$acc end parallel
+
+!CHECK:      [[NUMGANGS1:%.*]] = arith.constant 1 : i32
+!CHECK:      acc.parallel num_gangs([[NUMGANGS1]]: i32) {
+!CHECK:        acc.yield
+!CHECK-NEXT: }{{$}}
+
+  !$acc parallel num_gangs(numGangs)
+  !$acc end parallel
+
+!CHECK:      [[NUMGANGS2:%.*]] = fir.load %{{.*}} : !fir.ref<i32>
+!CHECK:      acc.parallel num_gangs([[NUMGANGS2]]: i32) {
+!CHECK:        acc.yield
+!CHECK-NEXT: }{{$}}
+
+  !$acc parallel num_workers(10)
+  !$acc end parallel
+
+!CHECK:      [[NUMWORKERS1:%.*]] = arith.constant 10 : i32
+!CHECK:      acc.parallel num_workers([[NUMWORKERS1]]: i32) {
+!CHECK:        acc.yield
+!CHECK-NEXT: }{{$}}
+
+  !$acc parallel num_workers(numWorkers)
+  !$acc end parallel
+
+!CHECK:      [[NUMWORKERS2:%.*]] = fir.load %{{.*}} : !fir.ref<i32>
+!CHECK:      acc.parallel num_workers([[NUMWORKERS2]]: i32) {
+!CHECK:        acc.yield
+!CHECK-NEXT: }{{$}}
+
+  !$acc parallel vector_length(128)
+  !$acc end parallel
+
+!CHECK:      [[VECTORLENGTH1:%.*]] = arith.constant 128 : i32
+!CHECK:      acc.parallel vector_length([[VECTORLENGTH1]]: i32) {
+!CHECK:        acc.yield
+!CHECK-NEXT: }{{$}}
+
+  !$acc parallel vector_length(vectorLength)
+  !$acc end parallel
+
+!CHECK:      [[VECTORLENGTH2:%.*]] = fir.load %{{.*}} : !fir.ref<i32>
+!CHECK:      acc.parallel vector_length([[VECTORLENGTH2]]: i32) {
+!CHECK:        acc.yield
+!CHECK-NEXT: }{{$}}
+
+  !$acc parallel if(.TRUE.)
+  !$acc end parallel
+
+!CHECK:      [[IF1:%.*]] = arith.constant true
+!CHECK:      acc.parallel if([[IF1]]) {
+!CHECK:        acc.yield
+!CHECK-NEXT: }{{$}}
+
+  !$acc parallel if(ifCondition)
+  !$acc end parallel
+
+!CHECK:      [[IFCOND:%.*]] = fir.load %{{.*}} : !fir.ref<!fir.logical<4>>
+!CHECK:      [[IF2:%.*]] = fir.convert [[IFCOND]] : (!fir.logical<4>) -> i1
+!CHECK:      acc.parallel if([[IF2]]) {
+!CHECK:        acc.yield
+!CHECK-NEXT: }{{$}}
+
+  !$acc parallel self(.TRUE.)
+  !$acc end parallel
+
+!CHECK:      [[SELF1:%.*]] = arith.constant true
+!CHECK:      acc.parallel self([[SELF1]]) {
+!CHECK:        acc.yield
+!CHECK-NEXT: }{{$}}
+
+  !$acc parallel self
+  !$acc end parallel
+
+!CHECK:      acc.parallel {
+!CHECK:        acc.yield
+!CHECK-NEXT: } attributes {selfAttr}
+
+  !$acc parallel self(ifCondition)
+  !$acc end parallel
+
+!CHECK:      [[SELF2:%.*]] = fir.convert [[IFCONDITION]] : (!fir.ref<!fir.logical<4>>) -> i1
+!CHECK:      acc.parallel self([[SELF2]]) {
+!CHECK:        acc.yield
+!CHECK-NEXT: }{{$}}
+
+  !$acc parallel copy(a, b, c)
+  !$acc end parallel
+
+!CHECK:      acc.parallel copy([[A]]: !fir.ref<!fir.array<10x10xf32>>, [[B]]: !fir.ref<!fir.array<10x10xf32>>, [[C]]: !fir.ref<!fir.array<10x10xf32>>) {
+!CHECK:        acc.yield
+!CHECK-NEXT: }{{$}}
+
+  !$acc parallel copy(a) copy(b) copy(c)
+  !$acc end parallel
+
+!CHECK:      acc.parallel copy([[A]]: !fir.ref<!fir.array<10x10xf32>>, [[B]]: !fir.ref<!fir.array<10x10xf32>>, [[C]]: !fir.ref<!fir.array<10x10xf32>>) {
+!CHECK:        acc.yield
+!CHECK-NEXT: }{{$}}
+
+  !$acc parallel copyin(a) copyin(readonly: b, c)
+  !$acc end parallel
+
+!CHECK:      acc.parallel copyin([[A]]: !fir.ref<!fir.array<10x10xf32>>) copyin_readonly([[B]]: !fir.ref<!fir.array<10x10xf32>>, [[C]]: !fir.ref<!fir.array<10x10xf32>>) {
+!CHECK:        acc.yield
+!CHECK-NEXT: }{{$}}
+
+  !$acc parallel copyout(a) copyout(zero: b) copyout(c)
+  !$acc end parallel
+
+!CHECK:      acc.parallel copyout([[A]]: !fir.ref<!fir.array<10x10xf32>>, [[C]]: !fir.ref<!fir.array<10x10xf32>>) copyout_zero([[B]]: !fir.ref<!fir.array<10x10xf32>>) {
+!CHECK:        acc.yield
+!CHECK-NEXT: }{{$}}
+
+  !$acc parallel create(a, b) create(zero: c)
+  !$acc end parallel
+
+!CHECK:      acc.parallel create([[A]]: !fir.ref<!fir.array<10x10xf32>>, [[B]]: !fir.ref<!fir.array<10x10xf32>>) create_zero([[C]]: !fir.ref<!fir.array<10x10xf32>>) {
+!CHECK:        acc.yield
+!CHECK-NEXT: }{{$}}
+
+  !$acc parallel no_create(a, b) create(zero: c)
+  !$acc end parallel
+
+!CHECK:      acc.parallel create_zero([[C]]: !fir.ref<!fir.array<10x10xf32>>) no_create([[A]]: !fir.ref<!fir.array<10x10xf32>>, [[B]]: !fir.ref<!fir.array<10x10xf32>>) {
+!CHECK:        acc.yield
+!CHECK-NEXT: }{{$}}
+
+  !$acc parallel present(a, b, c)
+  !$acc end parallel
+
+!CHECK:      acc.parallel present([[A]]: !fir.ref<!fir.array<10x10xf32>>, [[B]]: !fir.ref<!fir.array<10x10xf32>>, [[C]]: !fir.ref<!fir.array<10x10xf32>>) {
+!CHECK:        acc.yield
+!CHECK-NEXT: }{{$}}
+
+  !$acc parallel deviceptr(a) deviceptr(c)
+  !$acc end parallel
+
+!CHECK:      acc.parallel deviceptr([[A]]: !fir.ref<!fir.array<10x10xf32>>, [[C]]: !fir.ref<!fir.array<10x10xf32>>) {
+!CHECK:        acc.yield
+!CHECK-NEXT: }{{$}}
+
+  !$acc parallel attach(d, e)
+  !$acc end parallel
+
+!CHECK:      acc.parallel attach([[D]]: !fir.ref<!fir.box<!fir.ptr<f32>>>, [[E]]: !fir.ref<!fir.box<!fir.ptr<f32>>>) {
+!CHECK:        acc.yield
+!CHECK-NEXT: }{{$}}
+
+  !$acc parallel private(a) firstprivate(b) private(c)
+  !$acc end parallel
+
+!CHECK:      acc.parallel private([[A]]: !fir.ref<!fir.array<10x10xf32>>, [[C]]: !fir.ref<!fir.array<10x10xf32>>) firstprivate([[B]]: !fir.ref<!fir.array<10x10xf32>>) {
+!CHECK:        acc.yield
+!CHECK-NEXT: }{{$}}
+
+end subroutine acc_parallel


        


More information about the flang-commits mailing list