[flang-commits] [mlir] [flang] [flang][openacc] Lower loop directive to the new acc.loop op design (PR #65417)

Wed Nov 29 14:11:22 PST 2023

https://github.com/clementval updated https://github.com/llvm/llvm-project/pull/65417

>From fd3e884f022d560453f7c1d30773429af2079c79 Mon Sep 17 00:00:00 2001
From: Valentin Clement <clementval at gmail.com>
Date: Tue, 29 Aug 2023 14:48:49 -0700
Subject: [PATCH 1/4] [mlir][openacc] Update acc.loop to be a proper loop like
 operation

The initial design of the `acc.loop` was to be an operation that encapsulates
a loop like operation. This was an early design and we now want to change it
so the `acc.loop` operation becomes a real loop-like operation by implementing
the LoopLikeInterface.

Differential Revision: https://reviews.llvm.org/D159229
---
 mlir/include/mlir/Dialect/OpenACC/OpenACC.h   |   1 +
 .../mlir/Dialect/OpenACC/OpenACCOps.td        |  77 ++---
 mlir/lib/Dialect/OpenACC/IR/OpenACC.cpp       |  78 ++++-
 mlir/test/Dialect/OpenACC/canonicalize.mlir   |  10 +-
 mlir/test/Dialect/OpenACC/invalid.mlir        |  88 +++---
 mlir/test/Dialect/OpenACC/ops.mlir            | 278 ++++++++----------
 6 files changed, 300 insertions(+), 232 deletions(-)

diff --git a/mlir/include/mlir/Dialect/OpenACC/OpenACC.h b/mlir/include/mlir/Dialect/OpenACC/OpenACC.h
index 4dc94782c1c9b54..989b8ba3b78f334 100644
--- a/mlir/include/mlir/Dialect/OpenACC/OpenACC.h
+++ b/mlir/include/mlir/Dialect/OpenACC/OpenACC.h
@@ -25,6 +25,7 @@
 #include "mlir/Dialect/OpenACC/OpenACCTypeInterfaces.h.inc"
 #include "mlir/Dialect/OpenACCMPCommon/Interfaces/AtomicInterfaces.h"
 #include "mlir/Interfaces/ControlFlowInterfaces.h"
+#include "mlir/Interfaces/LoopLikeInterface.h"
 #include "mlir/Interfaces/SideEffectInterfaces.h"
 
 #define GET_TYPEDEF_CLASSES
diff --git a/mlir/include/mlir/Dialect/OpenACC/OpenACCOps.td b/mlir/include/mlir/Dialect/OpenACC/OpenACCOps.td
index 391e77e0c4081a3..b52814c974ded12 100644
--- a/mlir/include/mlir/Dialect/OpenACC/OpenACCOps.td
+++ b/mlir/include/mlir/Dialect/OpenACC/OpenACCOps.td
@@ -14,6 +14,7 @@
 #define OPENACC_OPS
 
 include "mlir/Interfaces/ControlFlowInterfaces.td"
+include "mlir/Interfaces/LoopLikeInterface.td"
 include "mlir/Interfaces/SideEffectInterfaces.td"
 include "mlir/IR/BuiltinTypes.td"
 include "mlir/IR/EnumAttr.td"
@@ -1161,47 +1162,53 @@ def OpenACC_HostDataOp : OpenACC_Op<"host_data", [AttrSizedOperandSegments]> {
 //===----------------------------------------------------------------------===//
 
 def OpenACC_LoopOp : OpenACC_Op<"loop",
-    [AttrSizedOperandSegments, RecursiveMemoryEffects]> {
+    [AttrSizedOperandSegments, RecursiveMemoryEffects,
+     DeclareOpInterfaceMethods<LoopLikeOpInterface>]> {
   let summary = "loop construct";
 
   let description = [{
-    The "acc.loop" operation represents the OpenACC loop construct.
+    The "acc.loop" operation represents the OpenACC loop construct. The lower
+    and upper bounds specify a half-open range: the range includes the lower
+    bound but does not include the upper bound. If the `inclusive` attribute is
+    set then the upper bound is included.
 
     Example:
 
     ```mlir
-    acc.loop gang vector {
-      scf.for %arg3 = %c0 to %c10 step %c1 {
-        scf.for %arg4 = %c0 to %c10 step %c1 {
-          scf.for %arg5 = %c0 to %c10 step %c1 {
-            // ... body
-          }
-        }
-      }
+    acc.loop gang() vector() (%arg3 : index, %arg4 : index, %arg5 : index) = 
+        (%c0, %c0, %c0 : index, index, index) to 
+        (%c10, %c10, %c10 : index, index, index) step 
+        (%c1, %c1, %c1 : index, index, index) {
+      // Loop body
       acc.yield
     } attributes { collapse = 3 }
     ```
   }];
 
-  let arguments = (ins OptionalAttr<I64Attr>:$collapse,
-      Optional<IntOrIndex>:$gangNum,
-      Optional<IntOrIndex>:$gangDim,
-      Optional<IntOrIndex>:$gangStatic,
-      Optional<IntOrIndex>:$workerNum,
-      Optional<IntOrIndex>:$vectorLength,
-      UnitAttr:$seq,
-      UnitAttr:$independent,
-      UnitAttr:$auto_,
-      UnitAttr:$hasGang,
-      UnitAttr:$hasWorker,
-      UnitAttr:$hasVector,
-      Variadic<IntOrIndex>:$tileOperands,
-      Variadic<OpenACC_PointerLikeTypeInterface>:$cacheOperands,
-      Variadic<OpenACC_PointerLikeTypeInterface>:$privateOperands,
-      OptionalAttr<SymbolRefArrayAttr>:$privatizations,
-      Variadic<AnyType>:$reductionOperands,
-      OptionalAttr<SymbolRefArrayAttr>:$reductionRecipes
-      );
+  let arguments = (ins
+    Variadic<IntOrIndex>:$lowerbound,
+    Variadic<IntOrIndex>:$upperbound,
+    Variadic<IntOrIndex>:$step,
+    OptionalAttr<DenseBoolArrayAttr>:$inclusiveUpperbound,
+    OptionalAttr<I64Attr>:$collapse,
+    Optional<IntOrIndex>:$gangNum,
+    Optional<IntOrIndex>:$gangDim,
+    Optional<IntOrIndex>:$gangStatic,
+    Optional<IntOrIndex>:$workerNum,
+    Optional<IntOrIndex>:$vectorLength,
+    UnitAttr:$seq,
+    UnitAttr:$independent,
+    UnitAttr:$auto_,
+    UnitAttr:$hasGang,
+    UnitAttr:$hasWorker,
+    UnitAttr:$hasVector,
+    Variadic<IntOrIndex>:$tileOperands,
+    Variadic<OpenACC_PointerLikeTypeInterface>:$cacheOperands,
+    Variadic<OpenACC_PointerLikeTypeInterface>:$privateOperands,
+    OptionalAttr<SymbolRefArrayAttr>:$privatizations,
+    Variadic<AnyType>:$reductionOperands,
+    OptionalAttr<SymbolRefArrayAttr>:$reductionRecipes
+  );
 
   let results = (outs Variadic<AnyType>:$results);
 
@@ -1223,9 +1230,12 @@ def OpenACC_LoopOp : OpenACC_Op<"loop",
   let hasCustomAssemblyFormat = 1;
   let assemblyFormat = [{
     oilist(
-        `gang` `` custom<GangClause>($gangNum, type($gangNum), $gangDim, type($gangDim), $gangStatic, type($gangStatic), $hasGang)
-      | `worker` `` custom<WorkerClause>($workerNum, type($workerNum), $hasWorker)
-      | `vector` `` custom<VectorClause>($vectorLength, type($vectorLength), $hasVector)
+        `gang` `` custom<GangClause>($gangNum, type($gangNum), $gangDim,
+            type($gangDim), $gangStatic, type($gangStatic), $hasGang)
+      | `worker` ``
+          custom<WorkerClause>($workerNum, type($workerNum), $hasWorker)
+      | `vector` ``
+          custom<VectorClause>($vectorLength, type($vectorLength), $hasVector)
       | `private` `(` custom<SymOperandList>(
             $privateOperands, type($privateOperands), $privatizations)
         `)`
@@ -1235,7 +1245,8 @@ def OpenACC_LoopOp : OpenACC_Op<"loop",
         `)`
       | `cache` `(` $cacheOperands `:` type($cacheOperands) `)`
     )
-    $region
+    custom<LoopControl>($region, $lowerbound, type($lowerbound), $upperbound,
+        type($upperbound), $step, type($step))
     ( `(` type($results)^ `)` )?
     attr-dict-with-keyword
   }];
diff --git a/mlir/lib/Dialect/OpenACC/IR/OpenACC.cpp b/mlir/lib/Dialect/OpenACC/IR/OpenACC.cpp
index 08e83cad482207d..86a4747f1e86c19 100644
--- a/mlir/lib/Dialect/OpenACC/IR/OpenACC.cpp
+++ b/mlir/lib/Dialect/OpenACC/IR/OpenACC.cpp
@@ -745,6 +745,9 @@ static ParseResult parseGangClause(
 
   // optional gang operands
   if (succeeded(parser.parseOptionalLParen())) {
+    // Allow empty parenthesis.
+    if (succeeded(parser.parseOptionalRParen()))
+      return success();
     while (true) {
       bool newValue = false;
       bool needValue = false;
@@ -791,8 +794,8 @@ static ParseResult parseGangClause(
 void printGangClause(OpAsmPrinter &p, Operation *op, Value gangNum,
                      Type gangNumType, Value gangDim, Type gangDimType,
                      Value gangStatic, Type gangStaticType, UnitAttr hasGang) {
+  p << "(";
   if (gangNum || gangStatic || gangDim) {
-    p << "(";
     if (gangNum) {
       p << LoopOp::getGangNumKeyword() << "=" << gangNum << " : "
         << gangNumType;
@@ -808,8 +811,8 @@ void printGangClause(OpAsmPrinter &p, Operation *op, Value gangNum,
     if (gangStatic)
       p << LoopOp::getGangStaticKeyword() << "=" << gangStatic << " : "
         << gangStaticType;
-    p << ")";
   }
+  p << ")";
 }
 
 static ParseResult
@@ -818,6 +821,9 @@ parseWorkerClause(OpAsmParser &parser,
                   Type &workerNumType, UnitAttr &hasWorker) {
   hasWorker = UnitAttr::get(parser.getBuilder().getContext());
   if (succeeded(parser.parseOptionalLParen())) {
+    // Allow empty parenthesis.
+    if (succeeded(parser.parseOptionalRParen()))
+      return success();
     workerNum = OpAsmParser::UnresolvedOperand{};
     if (parser.parseOperand(*workerNum) ||
         parser.parseColonType(workerNumType) || parser.parseRParen())
@@ -828,8 +834,10 @@ parseWorkerClause(OpAsmParser &parser,
 
 void printWorkerClause(OpAsmPrinter &p, Operation *op, Value workerNum,
                        Type workerNumType, UnitAttr hasWorker) {
+  p << "(";
   if (workerNum)
-    p << "(" << workerNum << " : " << workerNumType << ")";
+    p << workerNum << " : " << workerNumType;
+  p << ")";
 }
 
 static ParseResult
@@ -838,6 +846,9 @@ parseVectorClause(OpAsmParser &parser,
                   Type &vectorLengthType, UnitAttr &hasVector) {
   hasVector = UnitAttr::get(parser.getBuilder().getContext());
   if (succeeded(parser.parseOptionalLParen())) {
+    // Allow empty parenthesis.
+    if (succeeded(parser.parseOptionalRParen()))
+      return success();
     vectorLength = OpAsmParser::UnresolvedOperand{};
     if (parser.parseOperand(*vectorLength) ||
         parser.parseColonType(vectorLengthType) || parser.parseRParen())
@@ -848,11 +859,18 @@ parseVectorClause(OpAsmParser &parser,
 
 void printVectorClause(OpAsmPrinter &p, Operation *op, Value vectorLength,
                        Type vectorLengthType, UnitAttr hasVector) {
+  p << "(";
   if (vectorLength)
-    p << "(" << vectorLength << " : " << vectorLengthType << ")";
+    p << vectorLength << " : " << vectorLengthType;
+  p << ")";
 }
 
 LogicalResult acc::LoopOp::verify() {
+  if (!getUpperbound().empty() &&
+      (getUpperbound().size() != getInclusiveUpperbound()->size()))
+    return emitError() << "inclusiveUpperbound size is expected to be the same"
+                       << " as upperbound size";
+
   // auto, independent and seq attribute are mutually exclusive.
   if ((getAuto_() && (getIndependent() || getSeq())) ||
       (getIndependent() && getSeq())) {
@@ -898,6 +916,58 @@ Value LoopOp::getDataOperand(unsigned i) {
   return getOperand(numOptional + i);
 }
 
+llvm::SmallVector<mlir::Region *> acc::LoopOp::getLoopRegions() {
+  return {&getRegion()};
+}
+
+/// loop-control ::= `(` ssa-id-and-type-list `)` `=` `(` ssa-id-and-type-list
+/// `)` `to` `(` ssa-id-and-type-list `)` `step` `(` ssa-id-and-type-list `)`
+ParseResult
+parseLoopControl(OpAsmParser &parser, Region &region,
+                 SmallVectorImpl<OpAsmParser::UnresolvedOperand> &lowerbound,
+                 SmallVectorImpl<Type> &lowerboundType,
+                 SmallVectorImpl<OpAsmParser::UnresolvedOperand> &upperbound,
+                 SmallVectorImpl<Type> &upperboundType,
+                 SmallVectorImpl<OpAsmParser::UnresolvedOperand> &step,
+                 SmallVectorImpl<Type> &stepType) {
+
+  SmallVector<OpAsmParser::Argument> inductionVars;
+  if (succeeded(parser.parseOptionalLParen())) {
+    if (parser.parseArgumentList(inductionVars, OpAsmParser::Delimiter::None,
+                                 /*allowType=*/true) ||
+        parser.parseRParen() || parser.parseEqual() || parser.parseLParen() ||
+        parser.parseOperandList(lowerbound, inductionVars.size(),
+                                OpAsmParser::Delimiter::None) ||
+        parser.parseColonTypeList(lowerboundType) || parser.parseRParen() ||
+        parser.parseKeyword("to") || parser.parseLParen() ||
+        parser.parseOperandList(upperbound, inductionVars.size(),
+                                OpAsmParser::Delimiter::None) ||
+        parser.parseColonTypeList(upperboundType) || parser.parseRParen() ||
+        parser.parseKeyword("step") || parser.parseLParen() ||
+        parser.parseOperandList(step, inductionVars.size(),
+                                OpAsmParser::Delimiter::None) ||
+        parser.parseColonTypeList(stepType) || parser.parseRParen())
+      return failure();
+  }
+  return parser.parseRegion(region, inductionVars);
+}
+
+void printLoopControl(OpAsmPrinter &p, Operation *op, Region &region,
+                      ValueRange lowerbound, TypeRange lowerboundType,
+                      ValueRange upperbound, TypeRange upperboundType,
+                      ValueRange steps, TypeRange stepType) {
+  ValueRange regionArgs = region.front().getArguments();
+  if (!regionArgs.empty()) {
+    p << "(";
+    llvm::interleaveComma(regionArgs, p,
+                          [&p](Value v) { p << v << " : " << v.getType(); });
+    p << ") = (" << lowerbound << " : " << lowerboundType << ") to ("
+      << upperbound << " : " << upperboundType << ") "
+      << " step (" << steps << " : " << stepType << ") ";
+  }
+  p.printRegion(region, /*printEntryBlockArgs=*/false);
+}
+
 //===----------------------------------------------------------------------===//
 // DataOp
 //===----------------------------------------------------------------------===//
diff --git a/mlir/test/Dialect/OpenACC/canonicalize.mlir b/mlir/test/Dialect/OpenACC/canonicalize.mlir
index 6173ab6699c6c58..4522ffb252a62e3 100644
--- a/mlir/test/Dialect/OpenACC/canonicalize.mlir
+++ b/mlir/test/Dialect/OpenACC/canonicalize.mlir
@@ -110,14 +110,16 @@ func.func @testupdateop(%a: memref<f32>, %ifCond: i1) -> () {
 
 func.func @testhostdataop(%a: memref<f32>, %ifCond: i1) -> () {
   %0 = acc.use_device varPtr(%a : memref<f32>) -> memref<f32>
+  %1 = arith.constant 1 : i32
+  %2 = arith.constant 10 : i32
   %false = arith.constant false
   acc.host_data dataOperands(%0 : memref<f32>) if(%false) {
-    acc.loop {
+    acc.loop (%iv : i32) = (%1 : i32) to (%2 : i32) step (%1 : i32) {
       acc.yield
-    }
-    acc.loop {
+    } attributes { inclusiveUpperbound = array<i1: true> }
+    acc.loop (%iv : i32) = (%1 : i32) to (%2 : i32) step (%1 : i32) {
       acc.yield
-    }
+    } attributes { inclusiveUpperbound = array<i1: true> }
     acc.terminator
   }
   return
diff --git a/mlir/test/Dialect/OpenACC/invalid.mlir b/mlir/test/Dialect/OpenACC/invalid.mlir
index b9ac68d0592c872..a6b99a54b4c142f 100644
--- a/mlir/test/Dialect/OpenACC/invalid.mlir
+++ b/mlir/test/Dialect/OpenACC/invalid.mlir
@@ -1,71 +1,81 @@
 // RUN: mlir-opt -split-input-file -verify-diagnostics %s
 
+%1 = arith.constant 1 : i32
+%2 = arith.constant 10 : i32
 // expected-error at +1 {{gang, worker or vector cannot appear with the seq attr}}
-acc.loop gang {
+acc.loop gang() (%iv : i32) = (%1 : i32) to (%2 : i32) step (%1 : i32) {
   "test.openacc_dummy_op"() : () -> ()
   acc.yield
-} attributes {seq}
+} attributes {seq, inclusiveUpperbound = array<i1: true>}
 
 // -----
 
+%1 = arith.constant 1 : i32
+%2 = arith.constant 10 : i32
 // expected-error at +1 {{gang, worker or vector cannot appear with the seq attr}}
-acc.loop worker {
+acc.loop worker() (%iv : i32) = (%1 : i32) to (%2 : i32) step (%1 : i32) {
   "test.openacc_dummy_op"() : () -> ()
   acc.yield
-} attributes {seq}
+} attributes {seq, inclusiveUpperbound = array<i1: true>}
 
 // -----
 
+%1 = arith.constant 1 : i32
+%2 = arith.constant 10 : i32
 // expected-error at +1 {{gang, worker or vector cannot appear with the seq attr}}
-acc.loop vector {
+acc.loop vector() (%iv : i32) = (%1 : i32) to (%2 : i32) step (%1 : i32) {
   "test.openacc_dummy_op"() : () -> ()
   acc.yield
-} attributes {seq}
+} attributes {seq, inclusiveUpperbound = array<i1: true>}
 
 // -----
 
+%1 = arith.constant 1 : i32
+%2 = arith.constant 10 : i32
 // expected-error at +1 {{gang, worker or vector cannot appear with the seq attr}}
-acc.loop gang worker {
+acc.loop gang() worker() (%iv : i32) = (%1 : i32) to (%2 : i32) step (%1 : i32) {
   "test.openacc_dummy_op"() : () -> ()
   acc.yield
-} attributes {seq}
+} attributes {seq, inclusiveUpperbound = array<i1: true>}
 
 // -----
 
+%1 = arith.constant 1 : i32
+%2 = arith.constant 10 : i32
 // expected-error at +1 {{gang, worker or vector cannot appear with the seq attr}}
-acc.loop gang vector {
+acc.loop gang() vector() (%iv : i32) = (%1 : i32) to (%2 : i32) step (%1 : i32) {
   "test.openacc_dummy_op"() : () -> ()
   acc.yield
-} attributes {seq}
+} attributes {seq, inclusiveUpperbound = array<i1: true>}
 
 // -----
 
+%1 = arith.constant 1 : i32
+%2 = arith.constant 10 : i32
 // expected-error at +1 {{gang, worker or vector cannot appear with the seq attr}}
-acc.loop worker vector {
+acc.loop worker() vector() (%iv : i32) = (%1 : i32) to (%2 : i32) step (%1 : i32) {
   "test.openacc_dummy_op"() : () -> ()
   acc.yield
-} attributes {seq}
+} attributes {seq, inclusiveUpperbound = array<i1: true>}
 
 // -----
 
+%1 = arith.constant 1 : i32
+%2 = arith.constant 10 : i32
 // expected-error at +1 {{gang, worker or vector cannot appear with the seq attr}}
-acc.loop gang worker vector {
+acc.loop gang() worker() vector() (%iv : i32) = (%1 : i32) to (%2 : i32) step (%1 : i32) {
   "test.openacc_dummy_op"() : () -> ()
   acc.yield
-} attributes {seq}
-
-// -----
-
-// expected-error at +1 {{expected non-empty body.}}
-acc.loop {
-}
+} attributes {seq, inclusiveUpperbound = array<i1: true>}
 
 // -----
 
+%1 = arith.constant 1 : i32
+%2 = arith.constant 10 : i32
 // expected-error at +1 {{only one of "auto", "independent", "seq" can be present at the same time}}
-acc.loop {
+acc.loop (%iv : i32) = (%1 : i32) to (%2 : i32) step (%1 : i32) {
   acc.yield
-} attributes {auto_, seq}
+} attributes {auto_, seq, inclusiveUpperbound = array<i1: true>}
 
 // -----
 
@@ -133,11 +143,13 @@ acc.parallel {
 
 // -----
 
-acc.loop {
+%1 = arith.constant 1 : i32
+%2 = arith.constant 10 : i32
+acc.loop (%iv : i32) = (%1 : i32) to (%2 : i32) step (%1 : i32){
 // expected-error at +1 {{'acc.init' op cannot be nested in a compute operation}}
   acc.init
   acc.yield
-}
+} attributes {inclusiveUpperbound = array<i1: true>}
 
 // -----
 
@@ -149,21 +161,25 @@ acc.parallel {
 
 // -----
 
-acc.loop {
+%1 = arith.constant 1 : i32
+%2 = arith.constant 10 : i32
+acc.loop (%iv : i32) = (%1 : i32) to (%2 : i32) step (%1 : i32) {
 // expected-error at +1 {{'acc.shutdown' op cannot be nested in a compute operation}}
   acc.shutdown
   acc.yield
-}
+} attributes {inclusiveUpperbound = array<i1: true>}
 
 // -----
 
-acc.loop {
+%1 = arith.constant 1 : i32
+%2 = arith.constant 10 : i32
+acc.loop (%iv : i32) = (%1 : i32) to (%2 : i32) step (%1 : i32) {
   "test.openacc_dummy_op"() ({
     // expected-error at +1 {{'acc.shutdown' op cannot be nested in a compute operation}}
     acc.shutdown
   }) : () -> ()
   acc.yield
-}
+} attributes {inclusiveUpperbound = array<i1: true>}
 
 // -----
 
@@ -367,8 +383,10 @@ acc.firstprivate.recipe @privatization_i32 : i32 init {
 
 // -----
 
+%1 = arith.constant 1 : i32
+%2 = arith.constant 10 : i32
 // expected-error at +1 {{expected ')'}}
-acc.loop gang(static=%i64Value: i64, num=%i64Value: i64 {
+acc.loop gang(static=%i64Value: i64, num=%i64Value: i64 (%iv : i32) = (%1 : i32) to (%2 : i32) step (%1 : i32) {
   "test.openacc_dummy_op"() : () -> ()
   acc.yield
 }
@@ -436,8 +454,10 @@ acc.reduction.recipe @reduction_i64 : i64 reduction_operator<add> init {
 
 // -----
 
+%1 = arith.constant 1 : i32
+%2 = arith.constant 10 : i32
 // expected-error at +1 {{new value expected after comma}}
-acc.loop gang(static=%i64Value: i64, ) {
+acc.loop gang(static=%i64Value: i64, ) (%iv : i32) = (%1 : i32) to (%2 : i32) step (%1 : i32) {
   "test.openacc_dummy_op"() : () -> ()
   acc.yield
 }
@@ -453,14 +473,6 @@ func.func @fct1(%0 : !llvm.ptr) -> () {
 
 // -----
 
-// expected-error at +1 {{expect at least one of num, dim or static values}}
-acc.loop gang() {
-  "test.openacc_dummy_op"() : () -> ()
-  acc.yield
-}
-
-// -----
-
 %i64value = arith.constant 1 : i64
 // expected-error at +1 {{num_gangs expects a maximum of 3 values}}
 acc.parallel num_gangs(%i64value, %i64value, %i64value, %i64value : i64, i64, i64, i64) {
diff --git a/mlir/test/Dialect/OpenACC/ops.mlir b/mlir/test/Dialect/OpenACC/ops.mlir
index c06e5a596923f94..69d11f8bb2cce13 100644
--- a/mlir/test/Dialect/OpenACC/ops.mlir
+++ b/mlir/test/Dialect/OpenACC/ops.mlir
@@ -11,52 +11,40 @@ func.func @compute1(%A: memref<10x10xf32>, %B: memref<10x10xf32>, %C: memref<10x
   %async = arith.constant 1 : i64
 
   acc.parallel async(%async: i64) {
-    acc.loop gang vector {
-      scf.for %arg3 = %c0 to %c10 step %c1 {
-        scf.for %arg4 = %c0 to %c10 step %c1 {
-          scf.for %arg5 = %c0 to %c10 step %c1 {
-            %a = memref.load %A[%arg3, %arg5] : memref<10x10xf32>
-            %b = memref.load %B[%arg5, %arg4] : memref<10x10xf32>
-            %cij = memref.load %C[%arg3, %arg4] : memref<10x10xf32>
-            %p = arith.mulf %a, %b : f32
-            %co = arith.addf %cij, %p : f32
-            memref.store %co, %C[%arg3, %arg4] : memref<10x10xf32>
-          }
-        }
-      }
+    acc.loop gang() vector() (%arg3 : index, %arg4 : index, %arg5 : index) = (%c0, %c0, %c0 : index, index, index) to (%c10, %c10, %c10 : index, index, index) step (%c1, %c1, %c1 : index, index, index) {
+      %a = memref.load %A[%arg3, %arg5] : memref<10x10xf32>
+      %b = memref.load %B[%arg5, %arg4] : memref<10x10xf32>
+      %cij = memref.load %C[%arg3, %arg4] : memref<10x10xf32>
+      %p = arith.mulf %a, %b : f32
+      %co = arith.addf %cij, %p : f32
+      memref.store %co, %C[%arg3, %arg4] : memref<10x10xf32>
       acc.yield
-    } attributes { collapse = 3 }
+    } attributes { collapse = 3, inclusiveUpperbound = array<i1: true, true, true> }
     acc.yield
   }
 
   return %C : memref<10x10xf32>
 }
 
-// CHECK-LABEL: func @compute1(
+// CHECK-LABEL: func @compute1
 //  CHECK-NEXT:   %{{.*}} = arith.constant 0 : index
 //  CHECK-NEXT:   %{{.*}} = arith.constant 10 : index
 //  CHECK-NEXT:   %{{.*}} = arith.constant 1 : index
 //  CHECK-NEXT:   [[ASYNC:%.*]] = arith.constant 1 : i64
 //  CHECK-NEXT:   acc.parallel async([[ASYNC]] : i64) {
-//  CHECK-NEXT:     acc.loop gang vector {
-//  CHECK-NEXT:       scf.for %{{.*}} = %{{.*}} to %{{.*}} step %{{.*}} {
-//  CHECK-NEXT:         scf.for %{{.*}} = %{{.*}} to %{{.*}} step %{{.*}} {
-//  CHECK-NEXT:           scf.for %{{.*}} = %{{.*}} to %{{.*}} step %{{.*}} {
-//  CHECK-NEXT:             %{{.*}} = memref.load %{{.*}}[%{{.*}}, %{{.*}}] : memref<10x10xf32>
-//  CHECK-NEXT:             %{{.*}} = memref.load %{{.*}}[%{{.*}}, %{{.*}}] : memref<10x10xf32>
-//  CHECK-NEXT:             %{{.*}} = memref.load %{{.*}}[%{{.*}}, %{{.*}}] : memref<10x10xf32>
-//  CHECK-NEXT:             %{{.*}} = arith.mulf %{{.*}}, %{{.*}} : f32
-//  CHECK-NEXT:             %{{.*}} = arith.addf %{{.*}}, %{{.*}} : f32
-//  CHECK-NEXT:             memref.store %{{.*}}, %{{.*}}[%{{.*}}, %{{.*}}] : memref<10x10xf32>
-//  CHECK-NEXT:           }
-//  CHECK-NEXT:         }
-//  CHECK-NEXT:       }
+//  CHECK-NEXT:     acc.loop gang() vector() (%{{.*}}) = (%{{.*}}) to (%{{.*}}) step (%{{.*}}) {
+//  CHECK-NEXT:       %{{.*}} = memref.load %{{.*}}[%{{.*}}, %{{.*}}] : memref<10x10xf32>
+//  CHECK-NEXT:       %{{.*}} = memref.load %{{.*}}[%{{.*}}, %{{.*}}] : memref<10x10xf32>
+//  CHECK-NEXT:       %{{.*}} = memref.load %{{.*}}[%{{.*}}, %{{.*}}] : memref<10x10xf32>
+//  CHECK-NEXT:       %{{.*}} = arith.mulf %{{.*}}, %{{.*}} : f32
+//  CHECK-NEXT:       %{{.*}} = arith.addf %{{.*}}, %{{.*}} : f32
+//  CHECK-NEXT:       memref.store %{{.*}}, %{{.*}}[%{{.*}}, %{{.*}}] : memref<10x10xf32>
 //  CHECK-NEXT:       acc.yield
-//  CHECK-NEXT:     } attributes {collapse = 3 : i64}
+//  CHECK-NEXT:     } attributes {collapse = 3 : i64, inclusiveUpperbound = array<i1: true, true, true>}
 //  CHECK-NEXT:     acc.yield
 //  CHECK-NEXT:   }
 //  CHECK-NEXT:   return %{{.*}} : memref<10x10xf32>
-//  CHECK-NEXT: }
+
 
 // -----
 
@@ -66,21 +54,19 @@ func.func @compute2(%A: memref<10x10xf32>, %B: memref<10x10xf32>, %C: memref<10x
   %c1 = arith.constant 1 : index
 
   acc.parallel {
-    acc.loop {
-      scf.for %arg3 = %c0 to %c10 step %c1 {
-        scf.for %arg4 = %c0 to %c10 step %c1 {
-          scf.for %arg5 = %c0 to %c10 step %c1 {
-            %a = memref.load %A[%arg3, %arg5] : memref<10x10xf32>
-            %b = memref.load %B[%arg5, %arg4] : memref<10x10xf32>
-            %cij = memref.load %C[%arg3, %arg4] : memref<10x10xf32>
-            %p = arith.mulf %a, %b : f32
-            %co = arith.addf %cij, %p : f32
-            memref.store %co, %C[%arg3, %arg4] : memref<10x10xf32>
-          }
+    acc.loop (%arg3 : index) = (%c0 : index) to (%c10 : index) step (%c1 : index) {
+      scf.for %arg4 = %c0 to %c10 step %c1 {
+        scf.for %arg5 = %c0 to %c10 step %c1 {
+          %a = memref.load %A[%arg3, %arg5] : memref<10x10xf32>
+          %b = memref.load %B[%arg5, %arg4] : memref<10x10xf32>
+          %cij = memref.load %C[%arg3, %arg4] : memref<10x10xf32>
+          %p = arith.mulf %a, %b : f32
+          %co = arith.addf %cij, %p : f32
+          memref.store %co, %C[%arg3, %arg4] : memref<10x10xf32>
         }
       }
       acc.yield
-    } attributes {seq}
+    } attributes { seq, inclusiveUpperbound = array<i1: true> }
     acc.yield
   }
 
@@ -92,8 +78,7 @@ func.func @compute2(%A: memref<10x10xf32>, %B: memref<10x10xf32>, %C: memref<10x
 //  CHECK-NEXT:   %{{.*}} = arith.constant 10 : index
 //  CHECK-NEXT:   %{{.*}} = arith.constant 1 : index
 //  CHECK-NEXT:   acc.parallel {
-//  CHECK-NEXT:     acc.loop {
-//  CHECK-NEXT:       scf.for %{{.*}} = %{{.*}} to %{{.*}} step %{{.*}} {
+//  CHECK-NEXT:     acc.loop  (%{{.*}}) = (%{{.*}}) to (%{{.*}}) step (%{{.*}})
 //  CHECK-NEXT:         scf.for %{{.*}} = %{{.*}} to %{{.*}} step %{{.*}} {
 //  CHECK-NEXT:           scf.for %{{.*}} = %{{.*}} to %{{.*}} step %{{.*}} {
 //  CHECK-NEXT:             %{{.*}} = memref.load %{{.*}}[%{{.*}}, %{{.*}}] : memref<10x10xf32>
@@ -104,9 +89,8 @@ func.func @compute2(%A: memref<10x10xf32>, %B: memref<10x10xf32>, %C: memref<10x
 //  CHECK-NEXT:             memref.store %{{.*}}, %{{.*}}[%{{.*}}, %{{.*}}] : memref<10x10xf32>
 //  CHECK-NEXT:           }
 //  CHECK-NEXT:         }
-//  CHECK-NEXT:       }
 //  CHECK-NEXT:       acc.yield
-//  CHECK-NEXT:     } attributes {seq}
+//  CHECK-NEXT:     } attributes {inclusiveUpperbound = array<i1: true>, seq}
 //  CHECK-NEXT:     acc.yield
 //  CHECK-NEXT:   }
 //  CHECK-NEXT:   return %{{.*}} : memref<10x10xf32>
@@ -138,32 +122,25 @@ func.func @compute3(%a: memref<10x10xf32>, %b: memref<10x10xf32>, %c: memref<10x
   acc.data dataOperands(%pa, %pb, %pc, %pd: memref<10x10xf32>, memref<10x10xf32>, memref<10xf32>, memref<10xf32>) {
     %private = acc.private varPtr(%c : memref<10xf32>) -> memref<10xf32>
     acc.parallel num_gangs(%numGangs: i64) num_workers(%numWorkers: i64) private(@privatization_memref_10_f32 -> %private : memref<10xf32>) {
-      acc.loop gang {
-        scf.for %x = %lb to %c10 step %st {
-          acc.loop worker {
-            scf.for %y = %lb to %c10 step %st {
-              %axy = memref.load %a[%x, %y] : memref<10x10xf32>
-              %bxy = memref.load %b[%x, %y] : memref<10x10xf32>
-              %tmp = arith.addf %axy, %bxy : f32
-              memref.store %tmp, %c[%y] : memref<10xf32>
-            }
-            acc.yield
-          }
-
-          acc.loop {
-            // for i = 0 to 10 step 1
-            //   d[x] += c[i]
-            scf.for %i = %lb to %c10 step %st {
-              %ci = memref.load %c[%i] : memref<10xf32>
-              %dx = memref.load %d[%x] : memref<10xf32>
-              %z = arith.addf %ci, %dx : f32
-              memref.store %z, %d[%x] : memref<10xf32>
-            }
-            acc.yield
-          } attributes {seq}
-        }
+      acc.loop gang() (%x : index) = (%lb : index) to (%c10 : index) step (%st : index) {
+        acc.loop worker() (%y : index) = (%lb : index) to (%c10 : index) step (%st : index) {
+          %axy = memref.load %a[%x, %y] : memref<10x10xf32>
+          %bxy = memref.load %b[%x, %y] : memref<10x10xf32>
+          %tmp = arith.addf %axy, %bxy : f32
+          memref.store %tmp, %c[%y] : memref<10xf32>
+          acc.yield
+        } attributes {inclusiveUpperbound = array<i1: true>}
+        acc.loop (%i : index) = (%lb : index) to (%c10 : index) step (%st : index) {
+          // for i = 0 to 10 step 1
+          //   d[x] += c[i]
+          %ci = memref.load %c[%i] : memref<10xf32>
+          %dx = memref.load %d[%x] : memref<10xf32>
+          %z = arith.addf %ci, %dx : f32
+          memref.store %z, %d[%x] : memref<10xf32>
+          acc.yield
+        } attributes {seq, inclusiveUpperbound = array<i1: true>}
         acc.yield
-      }
+      } attributes {inclusiveUpperbound = array<i1: true>}
       acc.yield
     }
     acc.terminator
@@ -181,27 +158,21 @@ func.func @compute3(%a: memref<10x10xf32>, %b: memref<10x10xf32>, %c: memref<10x
 // CHECK:        acc.data dataOperands(%{{.*}}, %{{.*}}, %{{.*}}, %{{.*}} : memref<10x10xf32>, memref<10x10xf32>, memref<10xf32>, memref<10xf32>) {
 // CHECK-NEXT:     %[[P_ARG2:.*]] = acc.private varPtr([[ARG2]] : memref<10xf32>) -> memref<10xf32> 
 // CHECK-NEXT:     acc.parallel num_gangs([[NUMGANG]] : i64) num_workers([[NUMWORKERS]] : i64) private(@privatization_memref_10_f32 -> %[[P_ARG2]] : memref<10xf32>) {
-// CHECK-NEXT:       acc.loop gang {
-// CHECK-NEXT:         scf.for %{{.*}} = [[C0]] to [[C10]] step [[C1]] {
-// CHECK-NEXT:           acc.loop worker {
-// CHECK-NEXT:             scf.for %{{.*}} = [[C0]] to [[C10]] step [[C1]] {
+// CHECK-NEXT:       acc.loop gang() (%{{.*}}) = (%{{.*}}) to (%{{.*}}) step (%{{.*}})
+// CHECK-NEXT:           acc.loop worker() (%{{.*}}) = (%{{.*}}) to (%{{.*}}) step (%{{.*}})
 // CHECK-NEXT:               %{{.*}} = memref.load %{{.*}}[%{{.*}}, %{{.*}}] : memref<10x10xf32>
 // CHECK-NEXT:               %{{.*}} = memref.load %{{.*}}[%{{.*}}, %{{.*}}] : memref<10x10xf32>
 // CHECK-NEXT:               %{{.*}} = arith.addf %{{.*}}, %{{.*}} : f32
 // CHECK-NEXT:               memref.store %{{.*}}, %{{.*}}[%{{.*}}] : memref<10xf32>
-// CHECK-NEXT:             }
 // CHECK-NEXT:             acc.yield
 // CHECK-NEXT:           }
-// CHECK-NEXT:           acc.loop {
-// CHECK-NEXT:             scf.for %{{.*}} = [[C0]] to [[C10]] step [[C1]] {
+// CHECK-NEXT:           acc.loop (%{{.*}}) = (%{{.*}}) to (%{{.*}}) step (%{{.*}})
 // CHECK-NEXT:               %{{.*}} = memref.load %{{.*}}[%{{.*}}] : memref<10xf32>
 // CHECK-NEXT:               %{{.*}} = memref.load %{{.*}}[%{{.*}}] : memref<10xf32>
 // CHECK-NEXT:               %{{.*}} = arith.addf %{{.*}}, %{{.*}} : f32
 // CHECK-NEXT:               memref.store %{{.*}}, %{{.*}}[%{{.*}}] : memref<10xf32>
-// CHECK-NEXT:             }
 // CHECK-NEXT:             acc.yield
-// CHECK-NEXT:           } attributes {seq}
-// CHECK-NEXT:         }
+// CHECK-NEXT:           } attributes {inclusiveUpperbound = array<i1: true>, seq}
 // CHECK-NEXT:         acc.yield
 // CHECK-NEXT:       }
 // CHECK-NEXT:       acc.yield
@@ -217,68 +188,75 @@ func.func @testloopop(%a : memref<10xf32>) -> () {
   %i64Value = arith.constant 1 : i64
   %i32Value = arith.constant 128 : i32
   %idxValue = arith.constant 8 : index
+  %c0 = arith.constant 0 : index
+  %c10 = arith.constant 10 : index
+  %c1 = arith.constant 1 : index
 
-  acc.loop gang worker vector {
+  acc.loop gang() worker() vector() (%iv : index) = (%c0 : index) to (%c10 : index) step (%c1 : index) {
     "test.openacc_dummy_op"() : () -> ()
     acc.yield
-  }
-  acc.loop gang(num=%i64Value: i64) {
+  } attributes {inclusiveUpperbound = array<i1: true>}
+  acc.loop gang(num=%i64Value: i64) (%iv : index) = (%c0 : index) to (%c10 : index) step (%c1 : index) {
     "test.openacc_dummy_op"() : () -> ()
     acc.yield
-  }
-  acc.loop gang(static=%i64Value: i64) {
+  } attributes {inclusiveUpperbound = array<i1: true>}
+  acc.loop gang(static=%i64Value: i64) (%iv : index) = (%c0 : index) to (%c10 : index) step (%c1 : index) {
     "test.openacc_dummy_op"() : () -> ()
     acc.yield
-  }
-  acc.loop worker(%i64Value: i64) {
+  } attributes {inclusiveUpperbound = array<i1: true>}
+  acc.loop worker(%i64Value: i64) (%iv : index) = (%c0 : index) to (%c10 : index) step (%c1 : index) {
     "test.openacc_dummy_op"() : () -> ()
     acc.yield
-  }
-  acc.loop worker(%i32Value: i32) {
+  } attributes {inclusiveUpperbound = array<i1: true>}
+  acc.loop worker(%i32Value: i32) (%iv : index) = (%c0 : index) to (%c10 : index) step (%c1 : index) {
     "test.openacc_dummy_op"() : () -> ()
     acc.yield
-  }
-  acc.loop worker(%idxValue: index) {
+  } attributes {inclusiveUpperbound = array<i1: true>}
+  acc.loop worker(%idxValue: index) (%iv : index) = (%c0 : index) to (%c10 : index) step (%c1 : index) {
     "test.openacc_dummy_op"() : () -> ()
     acc.yield
-  }
-  acc.loop vector(%i64Value: i64) {
+  } attributes {inclusiveUpperbound = array<i1: true>}
+  acc.loop vector(%i64Value: i64) (%iv : index) = (%c0 : index) to (%c10 : index) step (%c1 : index) {
     "test.openacc_dummy_op"() : () -> ()
     acc.yield
-  }
-  acc.loop vector(%i32Value: i32) {
+  } attributes {inclusiveUpperbound = array<i1: true>}
+  acc.loop vector(%i32Value: i32) (%iv : index) = (%c0 : index) to (%c10 : index) step (%c1 : index) {
     "test.openacc_dummy_op"() : () -> ()
     acc.yield
-  }
-  acc.loop vector(%idxValue: index) {
+  } attributes {inclusiveUpperbound = array<i1: true>}
+  acc.loop vector(%idxValue: index) (%iv : index) = (%c0 : index) to (%c10 : index) step (%c1 : index) {
     "test.openacc_dummy_op"() : () -> ()
     acc.yield
-  }
-  acc.loop gang(num=%i64Value: i64) worker vector {
+  } attributes {inclusiveUpperbound = array<i1: true>}
+  acc.loop gang(num=%i64Value: i64) worker() vector() (%iv : index) = (%c0 : index) to (%c10 : index) step (%c1 : index) {
     "test.openacc_dummy_op"() : () -> ()
     acc.yield
-  }
-  acc.loop gang(num=%i64Value: i64, static=%i64Value: i64) worker(%i64Value: i64) vector(%i64Value: i64) {
+  } attributes {inclusiveUpperbound = array<i1: true>}
+  acc.loop gang(num=%i64Value: i64, static=%i64Value: i64) worker(%i64Value: i64) vector(%i64Value: i64) (%iv : index) = (%c0 : index) to (%c10 : index) step (%c1 : index) {
     "test.openacc_dummy_op"() : () -> ()
     acc.yield
-  }
-  acc.loop gang(num=%i32Value: i32, static=%idxValue: index) {
+  } attributes {inclusiveUpperbound = array<i1: true>}
+  acc.loop gang(num=%i32Value: i32, static=%idxValue: index) (%iv : index) = (%c0 : index) to (%c10 : index) step (%c1 : index) {
     "test.openacc_dummy_op"() : () -> ()
     acc.yield
-  }
-  acc.loop tile(%i64Value, %i64Value : i64, i64) {
+  } attributes {inclusiveUpperbound = array<i1: true>}
+  acc.loop tile(%i64Value, %i64Value : i64, i64) (%iv : index) = (%c0 : index) to (%c10 : index) step (%c1 : index) {
     "test.openacc_dummy_op"() : () -> ()
     acc.yield
-  }
-  acc.loop tile(%i32Value, %i32Value : i32, i32) {
+  } attributes {inclusiveUpperbound = array<i1: true>}
+  acc.loop tile(%i32Value, %i32Value : i32, i32) (%iv : index) = (%c0 : index) to (%c10 : index) step (%c1 : index) {
     "test.openacc_dummy_op"() : () -> ()
     acc.yield
-  }
-  acc.loop gang(static=%i64Value: i64, num=%i64Value: i64) {
+  } attributes {inclusiveUpperbound = array<i1: true>}
+  acc.loop gang(static=%i64Value: i64, num=%i64Value: i64) (%iv : index) = (%c0 : index) to (%c10 : index) step (%c1 : index) {
     "test.openacc_dummy_op"() : () -> ()
     acc.yield
-  }
-  acc.loop gang(dim=%i64Value : i64, static=%i64Value: i64) {
+  } attributes {inclusiveUpperbound = array<i1: true>}
+  acc.loop gang(dim=%i64Value : i64, static=%i64Value: i64) (%iv : index) = (%c0 : index) to (%c10 : index) step (%c1 : index) {
+    "test.openacc_dummy_op"() : () -> ()
+    acc.yield
+  } attributes {inclusiveUpperbound = array<i1: true>}
+  acc.loop {
     "test.openacc_dummy_op"() : () -> ()
     acc.yield
   }
@@ -293,67 +271,55 @@ func.func @testloopop(%a : memref<10xf32>) -> () {
 // CHECK:      [[I64VALUE:%.*]] = arith.constant 1 : i64
 // CHECK-NEXT: [[I32VALUE:%.*]] = arith.constant 128 : i32
 // CHECK-NEXT: [[IDXVALUE:%.*]] = arith.constant 8 : index
-// CHECK:      acc.loop gang worker vector {
+// CHECK:      acc.loop gang() worker() vector()
 // CHECK-NEXT:   "test.openacc_dummy_op"() : () -> ()
 // CHECK-NEXT:   acc.yield
-// CHECK-NEXT: }
-// CHECK:      acc.loop gang(num=[[I64VALUE]] : i64) {
+// CHECK:      acc.loop gang(num=[[I64VALUE]] : i64)
 // CHECK-NEXT:   "test.openacc_dummy_op"() : () -> ()
 // CHECK-NEXT:   acc.yield
-// CHECK-NEXT: }
-// CHECK:      acc.loop gang(static=[[I64VALUE]] : i64) {
+// CHECK:      acc.loop gang(static=[[I64VALUE]] : i64)
 // CHECK-NEXT:   "test.openacc_dummy_op"() : () -> ()
 // CHECK-NEXT:   acc.yield
-// CHECK-NEXT: }
-// CHECK:      acc.loop worker([[I64VALUE]] : i64) {
+// CHECK:      acc.loop worker([[I64VALUE]] : i64)
 // CHECK-NEXT:   "test.openacc_dummy_op"() : () -> ()
 // CHECK-NEXT:   acc.yield
-// CHECK-NEXT: }
-// CHECK:      acc.loop worker([[I32VALUE]] : i32) {
+// CHECK:      acc.loop worker([[I32VALUE]] : i32)
 // CHECK-NEXT:   "test.openacc_dummy_op"() : () -> ()
 // CHECK-NEXT:   acc.yield
-// CHECK-NEXT: }
-// CHECK:      acc.loop worker([[IDXVALUE]] : index) {
+// CHECK:      acc.loop worker([[IDXVALUE]] : index)
 // CHECK-NEXT:   "test.openacc_dummy_op"() : () -> ()
 // CHECK-NEXT:   acc.yield
-// CHECK-NEXT: }
-// CHECK:      acc.loop vector([[I64VALUE]] : i64) {
+// CHECK:      acc.loop vector([[I64VALUE]] : i64)
 // CHECK-NEXT:   "test.openacc_dummy_op"() : () -> ()
 // CHECK-NEXT:   acc.yield
-// CHECK-NEXT: }
-// CHECK:      acc.loop vector([[I32VALUE]] : i32) {
+// CHECK:      acc.loop vector([[I32VALUE]] : i32)
 // CHECK-NEXT:   "test.openacc_dummy_op"() : () -> ()
 // CHECK-NEXT:   acc.yield
-// CHECK-NEXT: }
-// CHECK:      acc.loop vector([[IDXVALUE]] : index) {
+// CHECK:      acc.loop vector([[IDXVALUE]] : index)
 // CHECK-NEXT:   "test.openacc_dummy_op"() : () -> ()
 // CHECK-NEXT:   acc.yield
-// CHECK-NEXT: }
-// CHECK:      acc.loop gang(num=[[I64VALUE]] : i64) worker vector {
+// CHECK:      acc.loop gang(num=[[I64VALUE]] : i64) worker() vector()
 // CHECK-NEXT:   "test.openacc_dummy_op"() : () -> ()
 // CHECK-NEXT:   acc.yield
-// CHECK-NEXT: }
-// CHECK:      acc.loop gang(num=[[I64VALUE]] : i64, static=[[I64VALUE]] : i64) worker([[I64VALUE]] : i64) vector([[I64VALUE]] : i64) {
+// CHECK:      acc.loop gang(num=[[I64VALUE]] : i64, static=[[I64VALUE]] : i64) worker([[I64VALUE]] : i64) vector([[I64VALUE]] : i64)
 // CHECK-NEXT:   "test.openacc_dummy_op"() : () -> ()
 // CHECK-NEXT:   acc.yield
-// CHECK-NEXT: }
-// CHECK:      acc.loop gang(num=[[I32VALUE]] : i32, static=[[IDXVALUE]] : index) {
+// CHECK:      acc.loop gang(num=[[I32VALUE]] : i32, static=[[IDXVALUE]] : index)
 // CHECK-NEXT:   "test.openacc_dummy_op"() : () -> ()
 // CHECK-NEXT:   acc.yield
-// CHECK-NEXT: }
-// CHECK:      acc.loop tile([[I64VALUE]], [[I64VALUE]] : i64, i64) {
+// CHECK:      acc.loop tile([[I64VALUE]], [[I64VALUE]] : i64, i64)
 // CHECK-NEXT:   "test.openacc_dummy_op"() : () -> ()
 // CHECK-NEXT:   acc.yield
-// CHECK-NEXT: }
-// CHECK:      acc.loop tile([[I32VALUE]], [[I32VALUE]] : i32, i32) {
+// CHECK:      acc.loop tile([[I32VALUE]], [[I32VALUE]] : i32, i32)
 // CHECK-NEXT:   "test.openacc_dummy_op"() : () -> ()
 // CHECK-NEXT:   acc.yield
-// CHECK-NEXT: }
-// CHECK:      acc.loop gang(num=[[I64VALUE]] : i64, static=[[I64VALUE]] : i64) {
+// CHECK:      acc.loop gang(num=[[I64VALUE]] : i64, static=[[I64VALUE]] : i64)
 // CHECK-NEXT:   "test.openacc_dummy_op"() : () -> ()
 // CHECK-NEXT:   acc.yield
-// CHECK-NEXT: }
-// CHECK:      acc.loop gang(dim=[[I64VALUE]] : i64, static=[[I64VALUE]] : i64) {
+// CHECK:      acc.loop gang(dim=[[I64VALUE]] : i64, static=[[I64VALUE]] : i64)
+// CHECK-NEXT:   "test.openacc_dummy_op"() : () -> ()
+// CHECK-NEXT:   acc.yield
+// CHECK:      acc.loop {
 // CHECK-NEXT:   "test.openacc_dummy_op"() : () -> ()
 // CHECK-NEXT:   acc.yield
 // CHECK-NEXT: }
@@ -366,22 +332,25 @@ func.func @testloopop(%a : memref<10xf32>) -> () {
 // -----
 
 func.func @acc_loop_multiple_block() {
+  %c0 = arith.constant 0 : index
+  %c10 = arith.constant 10 : index
+  %c1 = arith.constant 1 : index
   acc.parallel {
-    acc.loop {
-      %c1 = arith.constant 1 : index
-      cf.br ^bb1(%c1 : index)
+    acc.loop  (%iv : index) = (%c0 : index) to (%c10 : index) step (%c1 : index) {
+      %c1_1 = arith.constant 1 : index
+      cf.br ^bb1(%c1_1 : index)
     ^bb1(%9: index):
-      %c0 = arith.constant 0 : index
-      %12 = arith.cmpi sgt, %9, %c0 : index
+      %c0_1 = arith.constant 0 : index
+      %12 = arith.cmpi sgt, %9, %c0_1 : index
       cf.cond_br %12, ^bb2, ^bb3
     ^bb2:
       %c1_0 = arith.constant 1 : index
-      %c10 = arith.constant 10 : index
-      %22 = arith.subi %c10, %c1_0 : index
+      %c10_1 = arith.constant 10 : index
+      %22 = arith.subi %c10_1, %c1_0 : index
       cf.br ^bb1(%22 : index)
     ^bb3:
       acc.yield
-    }
+    } attributes {inclusiveUpperbound = array<i1: true>}
     acc.yield
   }
   return
@@ -1509,10 +1478,13 @@ acc.reduction.recipe @reduction_add_i64 : i64 reduction_operator<add> init {
 // CHECK:       }
 
 func.func @acc_reduc_test(%a : i64) -> () {
+  %c0 = arith.constant 0 : index
+  %c10 = arith.constant 10 : index
+  %c1 = arith.constant 1 : index
   acc.parallel reduction(@reduction_add_i64 -> %a : i64) {
-    acc.loop reduction(@reduction_add_i64 -> %a : i64) {
+    acc.loop reduction(@reduction_add_i64 -> %a : i64) (%iv : index) = (%c0 : index) to (%c10 : index) step (%c1 : index) {
       acc.yield
-    }
+    } attributes { inclusiveUpperbound = array<i1: true> }
     acc.yield
   }
   return

>From bc123ca98eeaeada4225327d524d6a7ddb700133 Mon Sep 17 00:00:00 2001
From: Valentin Clement <clementval at gmail.com>
Date: Fri, 1 Sep 2023 11:10:59 -0700
Subject: [PATCH 2/4] [flang][openacc] Lower loop directive to the new acc.loop
 op design

acc.loop was redesigned in https://reviews.llvm.org/D159229. This patch
updates the lowering to match the new op.

DO CONCURRENT construct will be added in a follow up patch.
---
 flang/include/flang/Lower/OpenACC.h           |   5 +
 flang/lib/Lower/Bridge.cpp                    |  19 +-
 flang/lib/Lower/OpenACC.cpp                   | 147 ++++++++++--
 flang/test/Lower/OpenACC/acc-kernels-loop.f90 | 154 +++++--------
 flang/test/Lower/OpenACC/acc-loop.f90         | 213 ++++++++----------
 .../test/Lower/OpenACC/acc-parallel-loop.f90  | 180 ++++++---------
 flang/test/Lower/OpenACC/acc-private.f90      |  14 +-
 flang/test/Lower/OpenACC/acc-reduction.f90    |  45 ++--
 flang/test/Lower/OpenACC/acc-serial-loop.f90  | 138 ++++--------
 9 files changed, 437 insertions(+), 478 deletions(-)

diff --git a/flang/include/flang/Lower/OpenACC.h b/flang/include/flang/Lower/OpenACC.h
index 409956f0ecb309f..da601c456a1fb2e 100644
--- a/flang/include/flang/Lower/OpenACC.h
+++ b/flang/include/flang/Lower/OpenACC.h
@@ -35,6 +35,7 @@ class FirOpBuilder;
 
 namespace Fortran {
 namespace parser {
+struct AccClauseList;
 struct OpenACCConstruct;
 struct OpenACCDeclarativeConstruct;
 struct OpenACCRoutineConstruct;
@@ -64,6 +65,8 @@ static constexpr llvm::StringRef declarePreDeallocSuffix =
 static constexpr llvm::StringRef declarePostDeallocSuffix =
     "_acc_declare_update_desc_post_dealloc";
 
+static constexpr llvm::StringRef privatizationRecipePrefix = "privatization";
+
 void genOpenACCConstruct(AbstractConverter &,
                          Fortran::semantics::SemanticsContext &,
                          pft::Evaluation &, const parser::OpenACCConstruct &);
@@ -112,6 +115,8 @@ void attachDeclarePostDeallocAction(AbstractConverter &, fir::FirOpBuilder &,
 void genOpenACCTerminator(fir::FirOpBuilder &, mlir::Operation *,
                           mlir::Location);
 
+int64_t getCollapseValue(const Fortran::parser::AccClauseList &);
+
 } // namespace lower
 } // namespace Fortran
 
diff --git a/flang/lib/Lower/Bridge.cpp b/flang/lib/Lower/Bridge.cpp
index 0090c2f1212164a..fcb5f275c8ffa02 100644
--- a/flang/lib/Lower/Bridge.cpp
+++ b/flang/lib/Lower/Bridge.cpp
@@ -2383,7 +2383,24 @@ class FirConverter : public Fortran::lower::AbstractConverter {
     mlir::OpBuilder::InsertPoint insertPt = builder->saveInsertionPoint();
     localSymbols.pushScope();
     genOpenACCConstruct(*this, bridge.getSemanticsContext(), getEval(), acc);
-    for (Fortran::lower::pft::Evaluation &e : getEval().getNestedEvaluations())
+
+    const Fortran::parser::OpenACCLoopConstruct *accLoop =
+        std::get_if<Fortran::parser::OpenACCLoopConstruct>(&acc.u);
+
+    Fortran::lower::pft::Evaluation *curEval = &getEval();
+
+    if (accLoop) {
+      const Fortran::parser::AccBeginLoopDirective &beginLoopDir =
+          std::get<Fortran::parser::AccBeginLoopDirective>(accLoop->t);
+      const Fortran::parser::AccClauseList &clauseList =
+          std::get<Fortran::parser::AccClauseList>(beginLoopDir.t);
+      int64_t collapseValue = Fortran::lower::getCollapseValue(clauseList);
+      curEval = &curEval->getFirstNestedEvaluation();
+      for (int64_t i = 1; i < collapseValue; i++)
+        curEval = &*std::next(curEval->getNestedEvaluations().begin());
+    }
+
+    for (Fortran::lower::pft::Evaluation &e : curEval->getNestedEvaluations())
       genFIR(e);
     localSymbols.popScope();
     builder->restoreInsertionPoint(insertPt);
diff --git a/flang/lib/Lower/OpenACC.cpp b/flang/lib/Lower/OpenACC.cpp
index 8c6c22210cf0894..e3451688f9af51c 100644
--- a/flang/lib/Lower/OpenACC.cpp
+++ b/flang/lib/Lower/OpenACC.cpp
@@ -754,7 +754,8 @@ genPrivatizations(const Fortran::parser::AccObjectList &objectList,
     mlir::Type retTy = getTypeFromBounds(bounds, baseAddr.getType());
     if constexpr (std::is_same_v<RecipeOp, mlir::acc::PrivateRecipeOp>) {
       std::string recipeName =
-          fir::getTypeAsString(retTy, converter.getKindMap(), "privatization");
+          fir::getTypeAsString(retTy, converter.getKindMap(),
+                               Fortran::lower::privatizationRecipePrefix);
       recipe = Fortran::lower::createOrGetPrivateRecipe(builder, recipeName,
                                                         operandLocation, retTy);
       auto op = createDataEntryOp<mlir::acc::PrivateOp>(
@@ -1381,10 +1382,12 @@ static Op createRegionOp(fir::FirOpBuilder &builder, mlir::Location loc,
                          Fortran::lower::pft::Evaluation &eval,
                          const llvm::SmallVectorImpl<mlir::Value> &operands,
                          const llvm::SmallVectorImpl<int32_t> &operandSegments,
-                         bool outerCombined = false) {
-  llvm::ArrayRef<mlir::Type> argTy;
-  Op op = builder.create<Op>(loc, argTy, operands);
-  builder.createBlock(&op.getRegion());
+                         bool outerCombined = false,
+                         mlir::TypeRange argsTy = {},
+                         llvm::SmallVector<mlir::Location> locs = {}) {
+  llvm::ArrayRef<mlir::Type> retTy;
+  Op op = builder.create<Op>(loc, retTy, operands);
+  builder.createBlock(&op.getRegion(), op.getRegion().end(), argsTy, locs);
   mlir::Block &block = op.getRegion().back();
   builder.setInsertionPointToStart(&block);
 
@@ -1488,12 +1491,22 @@ static void genWaitClause(Fortran::lower::AbstractConverter &converter,
   }
 }
 
+mlir::Type getTypeFromIvTypeSize(fir::FirOpBuilder &builder,
+                                 const Fortran::semantics::Symbol &ivSym) {
+  std::size_t ivTypeSize = ivSym.size();
+  if (ivTypeSize == 0)
+    llvm::report_fatal_error("unexpected induction variable size");
+  // ivTypeSize is in bytes and IntegerType needs to be in bits.
+  return builder.getIntegerType(ivTypeSize * 8);
+}
+
 static mlir::acc::LoopOp
 createLoopOp(Fortran::lower::AbstractConverter &converter,
              mlir::Location currentLocation,
-             Fortran::lower::pft::Evaluation &eval,
              Fortran::semantics::SemanticsContext &semanticsContext,
              Fortran::lower::StatementContext &stmtCtx,
+             const Fortran::parser::DoConstruct &outerDoConstruct,
+             Fortran::lower::pft::Evaluation &eval,
              const Fortran::parser::AccClauseList &accClauseList) {
   fir::FirOpBuilder &builder = converter.getFirOpBuilder();
 
@@ -1502,11 +1515,73 @@ createLoopOp(Fortran::lower::AbstractConverter &converter,
   mlir::Value gangNum;
   mlir::Value gangDim;
   mlir::Value gangStatic;
-  llvm::SmallVector<mlir::Value> tileOperands, privateOperands,
-      reductionOperands, cacheOperands;
+  llvm::SmallVector<mlir::Value> tileOperands, privateOperands, ivPrivate,
+      reductionOperands, cacheOperands, lowerbounds, upperbounds, steps;
   llvm::SmallVector<mlir::Attribute> privatizations, reductionRecipes;
+
   bool hasGang = false, hasVector = false, hasWorker = false;
 
+  llvm::SmallVector<mlir::Type> ivTypes;
+  llvm::SmallVector<mlir::Location> ivLocs;
+  llvm::SmallVector<bool> inclusiveBounds;
+
+  if (outerDoConstruct.IsDoConcurrent())
+    TODO(currentLocation, "OpenACC loop with DO CONCURRENT");
+
+  int64_t collapseValue = Fortran::lower::getCollapseValue(accClauseList);
+  Fortran::lower::pft::Evaluation *crtEval = &eval.getFirstNestedEvaluation();
+  for (unsigned i = 0; i < collapseValue; ++i) {
+    const Fortran::parser::LoopControl *loopControl;
+    if (i == 0) {
+      loopControl = &*outerDoConstruct.GetLoopControl();
+    } else {
+      auto *doCons = crtEval->getIf<Fortran::parser::DoConstruct>();
+      assert(doCons && "expect do construct");
+      loopControl = &*doCons->GetLoopControl();
+    }
+
+    const Fortran::parser::LoopControl::Bounds *bounds =
+        std::get_if<Fortran::parser::LoopControl::Bounds>(&loopControl->u);
+    assert(bounds && "Expected bounds on the loop construct");
+    lowerbounds.push_back(fir::getBase(converter.genExprValue(
+        *Fortran::semantics::GetExpr(bounds->lower), stmtCtx)));
+    upperbounds.push_back(fir::getBase(converter.genExprValue(
+        *Fortran::semantics::GetExpr(bounds->upper), stmtCtx)));
+    if (bounds->step)
+      steps.push_back(fir::getBase(converter.genExprValue(
+          *Fortran::semantics::GetExpr(bounds->step), stmtCtx)));
+    else // If `step` is not present, assume it as `1`.
+      steps.push_back(builder.createIntegerConstant(
+          currentLocation, upperbounds[upperbounds.size() - 1].getType(), 1));
+
+    Fortran::semantics::Symbol &ivSym =
+        bounds->name.thing.symbol->GetUltimate();
+
+    mlir::Type ivTy = getTypeFromIvTypeSize(builder, ivSym);
+    mlir::Value ivValue = converter.getSymbolAddress(ivSym);
+    ivTypes.push_back(ivTy);
+    ivLocs.push_back(currentLocation);
+    std::string recipeName =
+        fir::getTypeAsString(ivValue.getType(), converter.getKindMap(),
+                             Fortran::lower::privatizationRecipePrefix);
+    auto recipe = Fortran::lower::createOrGetPrivateRecipe(
+        builder, recipeName, currentLocation, ivValue.getType());
+    std::stringstream asFortran;
+    auto op = createDataEntryOp<mlir::acc::PrivateOp>(
+        builder, currentLocation, ivValue, asFortran, {}, true,
+        /*implicit=*/true, mlir::acc::DataClause::acc_private,
+        ivValue.getType());
+
+    privateOperands.push_back(op.getAccPtr());
+    ivPrivate.push_back(op.getAccPtr());
+    privatizations.push_back(mlir::SymbolRefAttr::get(
+        builder.getContext(), recipe.getSymName().str()));
+    inclusiveBounds.push_back(true);
+    converter.bindSymbol(ivSym, op.getAccPtr());
+    if (i < collapseValue - 1)
+      crtEval = &*std::next(crtEval->getNestedEvaluations().begin());
+  }
+
   for (const Fortran::parser::AccClause &clause : accClauseList.v) {
     mlir::Location clauseLocation = converter.genLocation(clause.source);
     if (const auto *gangClause =
@@ -1589,6 +1664,9 @@ createLoopOp(Fortran::lower::AbstractConverter &converter,
   // Prepare the operand segment size attribute and the operands value range.
   llvm::SmallVector<mlir::Value> operands;
   llvm::SmallVector<int32_t> operandSegments;
+  addOperands(operands, operandSegments, lowerbounds);
+  addOperands(operands, operandSegments, upperbounds);
+  addOperands(operands, operandSegments, steps);
   addOperand(operands, operandSegments, gangNum);
   addOperand(operands, operandSegments, gangDim);
   addOperand(operands, operandSegments, gangStatic);
@@ -1600,7 +1678,14 @@ createLoopOp(Fortran::lower::AbstractConverter &converter,
   addOperands(operands, operandSegments, reductionOperands);
 
   auto loopOp = createRegionOp<mlir::acc::LoopOp, mlir::acc::YieldOp>(
-      builder, currentLocation, eval, operands, operandSegments);
+      builder, currentLocation, eval, operands, operandSegments,
+      /*outerCombined=*/false, ivTypes, ivLocs);
+
+  for (auto [arg, value] : llvm::zip(
+           loopOp.getLoopRegions().front()->front().getArguments(), ivPrivate))
+    builder.create<fir::StoreOp>(currentLocation, arg, value);
+
+  loopOp.setInclusiveUpperbound(inclusiveBounds);
 
   if (hasGang)
     loopOp.setHasGangAttr(builder.getUnitAttr());
@@ -1661,12 +1746,16 @@ static void genACC(Fortran::lower::AbstractConverter &converter,
       converter.genLocation(beginLoopDirective.source);
   Fortran::lower::StatementContext stmtCtx;
 
-  if (loopDirective.v == llvm::acc::ACCD_loop) {
-    const auto &accClauseList =
-        std::get<Fortran::parser::AccClauseList>(beginLoopDirective.t);
-    createLoopOp(converter, currentLocation, eval, semanticsContext, stmtCtx,
-                 accClauseList);
-  }
+  assert(loopDirective.v == llvm::acc::ACCD_loop &&
+         "Unsupported OpenACC loop construct");
+
+  const auto &accClauseList =
+      std::get<Fortran::parser::AccClauseList>(beginLoopDirective.t);
+  const auto &outerDoConstruct =
+      std::get<std::optional<Fortran::parser::DoConstruct>>(loopConstruct.t);
+
+  createLoopOp(converter, currentLocation, semanticsContext, stmtCtx,
+               *outerDoConstruct, eval, accClauseList);
 }
 
 template <typename Op, typename Clause>
@@ -2242,6 +2331,9 @@ genACC(Fortran::lower::AbstractConverter &converter,
       std::get<Fortran::parser::AccCombinedDirective>(beginCombinedDirective.t);
   const auto &accClauseList =
       std::get<Fortran::parser::AccClauseList>(beginCombinedDirective.t);
+  const auto &outerDoConstruct =
+      std::get<std::optional<Fortran::parser::DoConstruct>>(
+          combinedConstruct.t);
 
   mlir::Location currentLocation =
       converter.genLocation(beginCombinedDirective.source);
@@ -2251,20 +2343,20 @@ genACC(Fortran::lower::AbstractConverter &converter,
     createComputeOp<mlir::acc::KernelsOp>(
         converter, currentLocation, eval, semanticsContext, stmtCtx,
         accClauseList, /*outerCombined=*/true);
-    createLoopOp(converter, currentLocation, eval, semanticsContext, stmtCtx,
-                 accClauseList);
+    createLoopOp(converter, currentLocation, semanticsContext, stmtCtx,
+                 *outerDoConstruct, eval, accClauseList);
   } else if (combinedDirective.v == llvm::acc::ACCD_parallel_loop) {
     createComputeOp<mlir::acc::ParallelOp>(
         converter, currentLocation, eval, semanticsContext, stmtCtx,
         accClauseList, /*outerCombined=*/true);
-    createLoopOp(converter, currentLocation, eval, semanticsContext, stmtCtx,
-                 accClauseList);
+    createLoopOp(converter, currentLocation, semanticsContext, stmtCtx,
+                 *outerDoConstruct, eval, accClauseList);
   } else if (combinedDirective.v == llvm::acc::ACCD_serial_loop) {
     createComputeOp<mlir::acc::SerialOp>(converter, currentLocation, eval,
                                          semanticsContext, stmtCtx,
                                          accClauseList, /*outerCombined=*/true);
-    createLoopOp(converter, currentLocation, eval, semanticsContext, stmtCtx,
-                 accClauseList);
+    createLoopOp(converter, currentLocation, semanticsContext, stmtCtx,
+                 *outerDoConstruct, eval, accClauseList);
   } else {
     llvm::report_fatal_error("Unknown combined construct encountered");
   }
@@ -3560,3 +3652,16 @@ void Fortran::lower::genOpenACCTerminator(fir::FirOpBuilder &builder,
   else
     builder.create<mlir::acc::TerminatorOp>(loc);
 }
+
+int64_t Fortran::lower::getCollapseValue(
+    const Fortran::parser::AccClauseList &clauseList) {
+  for (const Fortran::parser::AccClause &clause : clauseList.v) {
+    if (const auto *collapseClause =
+            std::get_if<Fortran::parser::AccClause::Collapse>(&clause.u)) {
+      const parser::AccCollapseArg &arg = collapseClause->v;
+      const auto &collapseValue{std::get<parser::ScalarIntConstantExpr>(arg.t)};
+      return *Fortran::semantics::GetIntValue(collapseValue);
+    }
+  }
+  return 1;
+}
diff --git a/flang/test/Lower/OpenACC/acc-kernels-loop.f90 b/flang/test/Lower/OpenACC/acc-kernels-loop.f90
index 34e723269724173..2469f251df970f3 100644
--- a/flang/test/Lower/OpenACC/acc-kernels-loop.f90
+++ b/flang/test/Lower/OpenACC/acc-kernels-loop.f90
@@ -43,8 +43,7 @@ subroutine acc_kernels_loop
   END DO
 
 ! CHECK:      acc.kernels {
-! CHECK:        acc.loop {
-! CHECK:          fir.do_loop
+! CHECK:        acc.loop {{.*}} {
 ! CHECK:          acc.yield
 ! CHECK-NEXT:   }{{$}}
 ! CHECK:        acc.terminator
@@ -57,8 +56,7 @@ subroutine acc_kernels_loop
   !$acc end kernels loop
 
 ! CHECK:      acc.kernels {
-! CHECK:        acc.loop {
-! CHECK:          fir.do_loop
+! CHECK:        acc.loop {{.*}} {
 ! CHECK:          acc.yield
 ! CHECK-NEXT:   }{{$}}
 ! CHECK:        acc.terminator
@@ -71,8 +69,7 @@ subroutine acc_kernels_loop
 
 ! CHECK:      [[ASYNC1:%.*]] = arith.constant 1 : i32
 ! CHECK:      acc.kernels async([[ASYNC1]] : i32) {
-! CHECK:        acc.loop {
-! CHECK:          fir.do_loop
+! CHECK:        acc.loop {{.*}} {
 ! CHECK:          acc.yield
 ! CHECK-NEXT:   }{{$}}
 ! CHECK:        acc.terminator
@@ -85,8 +82,7 @@ subroutine acc_kernels_loop
 
 ! CHECK:      [[ASYNC2:%.*]] = fir.load %{{.*}} : !fir.ref<i32>
 ! CHECK:      acc.kernels async([[ASYNC2]] : i32) {
-! CHECK:        acc.loop {
-! CHECK:          fir.do_loop
+! CHECK:        acc.loop {{.*}} {
 ! CHECK:          acc.yield
 ! CHECK-NEXT:   }{{$}}
 ! CHECK:        acc.terminator
@@ -98,8 +94,7 @@ subroutine acc_kernels_loop
   END DO
 
 ! CHECK:      acc.kernels {
-! CHECK:        acc.loop {
-! CHECK:          fir.do_loop
+! CHECK:        acc.loop {{.*}} {
 ! CHECK:          acc.yield
 ! CHECK-NEXT:   }{{$}}
 ! CHECK:        acc.terminator
@@ -112,8 +107,7 @@ subroutine acc_kernels_loop
 
 ! CHECK:      [[WAIT1:%.*]] = arith.constant 1 : i32
 ! CHECK:      acc.kernels wait([[WAIT1]] : i32) {
-! CHECK:        acc.loop {
-! CHECK:          fir.do_loop
+! CHECK:        acc.loop {{.*}} {
 ! CHECK:          acc.yield
 ! CHECK-NEXT:   }{{$}}
 ! CHECK:        acc.terminator
@@ -127,8 +121,7 @@ subroutine acc_kernels_loop
 ! CHECK:      [[WAIT2:%.*]] = arith.constant 1 : i32
 ! CHECK:      [[WAIT3:%.*]] = arith.constant 2 : i32
 ! CHECK:      acc.kernels wait([[WAIT2]], [[WAIT3]] : i32, i32) {
-! CHECK:        acc.loop {
-! CHECK:          fir.do_loop
+! CHECK:        acc.loop {{.*}} {
 ! CHECK:          acc.yield
 ! CHECK-NEXT:   }{{$}}
 ! CHECK:        acc.terminator
@@ -142,8 +135,7 @@ subroutine acc_kernels_loop
 ! CHECK:      [[WAIT4:%.*]] = fir.load %{{.*}} : !fir.ref<i32>
 ! CHECK:      [[WAIT5:%.*]] = fir.load %{{.*}} : !fir.ref<i32>
 ! CHECK:      acc.kernels wait([[WAIT4]], [[WAIT5]] : i32, i32) {
-! CHECK:        acc.loop {
-! CHECK:          fir.do_loop
+! CHECK:        acc.loop {{.*}} {
 ! CHECK:          acc.yield
 ! CHECK-NEXT:   }{{$}}
 ! CHECK:        acc.terminator
@@ -156,8 +148,7 @@ subroutine acc_kernels_loop
 
 ! CHECK:      [[NUMGANGS1:%.*]] = arith.constant 1 : i32
 ! CHECK:      acc.kernels num_gangs([[NUMGANGS1]] : i32) {
-! CHECK:        acc.loop {
-! CHECK:          fir.do_loop
+! CHECK:        acc.loop {{.*}} {
 ! CHECK:          acc.yield
 ! CHECK-NEXT:   }{{$}}
 ! CHECK:        acc.terminator
@@ -170,8 +161,7 @@ subroutine acc_kernels_loop
 
 ! CHECK:      [[NUMGANGS2:%.*]] = fir.load %{{.*}} : !fir.ref<i32>
 ! CHECK:      acc.kernels num_gangs([[NUMGANGS2]] : i32) {
-! CHECK:        acc.loop {
-! CHECK:          fir.do_loop
+! CHECK:        acc.loop {{.*}} {
 ! CHECK:          acc.yield
 ! CHECK-NEXT:   }{{$}}
 ! CHECK:        acc.terminator
@@ -184,8 +174,7 @@ subroutine acc_kernels_loop
 
 ! CHECK:      [[NUMWORKERS1:%.*]] = arith.constant 10 : i32
 ! CHECK:      acc.kernels num_workers([[NUMWORKERS1]] : i32) {
-! CHECK:        acc.loop {
-! CHECK:          fir.do_loop
+! CHECK:        acc.loop {{.*}} {
 ! CHECK:          acc.yield
 ! CHECK-NEXT:   }{{$}}
 ! CHECK:        acc.terminator
@@ -198,8 +187,7 @@ subroutine acc_kernels_loop
 
 ! CHECK:      [[NUMWORKERS2:%.*]] = fir.load %{{.*}} : !fir.ref<i32>
 ! CHECK:      acc.kernels num_workers([[NUMWORKERS2]] : i32) {
-! CHECK:        acc.loop {
-! CHECK:          fir.do_loop
+! CHECK:        acc.loop {{.*}} {
 ! CHECK:          acc.yield
 ! CHECK-NEXT:   }{{$}}
 ! CHECK:        acc.terminator
@@ -212,8 +200,7 @@ subroutine acc_kernels_loop
 
 ! CHECK:      [[VECTORLENGTH1:%.*]] = arith.constant 128 : i32
 ! CHECK:      acc.kernels vector_length([[VECTORLENGTH1]] : i32) {
-! CHECK:        acc.loop {
-! CHECK:          fir.do_loop
+! CHECK:        acc.loop {{.*}} {
 ! CHECK:          acc.yield
 ! CHECK-NEXT:   }{{$}}
 ! CHECK:        acc.terminator
@@ -226,8 +213,7 @@ subroutine acc_kernels_loop
 
 ! CHECK:      [[VECTORLENGTH2:%.*]] = fir.load %{{.*}} : !fir.ref<i32>
 ! CHECK:      acc.kernels vector_length([[VECTORLENGTH2]] : i32) {
-! CHECK:        acc.loop {
-! CHECK:          fir.do_loop
+! CHECK:        acc.loop {{.*}} {
 ! CHECK:          acc.yield
 ! CHECK-NEXT:   }{{$}}
 ! CHECK:        acc.terminator
@@ -240,8 +226,7 @@ subroutine acc_kernels_loop
 
 ! CHECK:      [[IF1:%.*]] = arith.constant true
 ! CHECK:      acc.kernels if([[IF1]]) {
-! CHECK:        acc.loop {
-! CHECK:          fir.do_loop
+! CHECK:        acc.loop {{.*}} {
 ! CHECK:          acc.yield
 ! CHECK-NEXT:   }{{$}}
 ! CHECK:        acc.terminator
@@ -255,8 +240,7 @@ subroutine acc_kernels_loop
 ! CHECK:      [[IFCOND:%.*]] = fir.load %{{.*}} : !fir.ref<!fir.logical<4>>
 ! CHECK:      [[IF2:%.*]] = fir.convert [[IFCOND]] : (!fir.logical<4>) -> i1
 ! CHECK:      acc.kernels if([[IF2]]) {
-! CHECK:        acc.loop {
-! CHECK:          fir.do_loop
+! CHECK:        acc.loop {{.*}} {
 ! CHECK:          acc.yield
 ! CHECK-NEXT:   }{{$}}
 ! CHECK:        acc.terminator
@@ -269,8 +253,7 @@ subroutine acc_kernels_loop
 
 ! CHECK:      [[SELF1:%.*]] = arith.constant true
 ! CHECK:      acc.kernels self([[SELF1]]) {
-! CHECK:        acc.loop {
-! CHECK:          fir.do_loop
+! CHECK:        acc.loop {{.*}} {
 ! CHECK:          acc.yield
 ! CHECK-NEXT:   }{{$}}
 ! CHECK:        acc.terminator
@@ -282,8 +265,7 @@ subroutine acc_kernels_loop
   END DO
 
 ! CHECK:      acc.kernels {
-! CHECK:        acc.loop {
-! CHECK:          fir.do_loop
+! CHECK:        acc.loop {{.*}} {
 ! CHECK:          acc.yield
 ! CHECK-NEXT:   }{{$}}
 ! CHECK:        acc.terminator
@@ -297,8 +279,7 @@ subroutine acc_kernels_loop
 
 ! CHECK:      %[[SELF2:.*]] = fir.convert %[[DECLIFCONDITION]]#1 : (!fir.ref<!fir.logical<4>>) -> i1
 ! CHECK:      acc.kernels self(%[[SELF2]]) {
-! CHECK:        acc.loop {
-! CHECK:          fir.do_loop
+! CHECK:        acc.loop {{.*}} {
 ! CHECK:          acc.yield
 ! CHECK-NEXT:   }{{$}}
 ! CHECK:        acc.terminator
@@ -313,8 +294,7 @@ subroutine acc_kernels_loop
 ! CHECK:      %[[COPYIN_A:.*]] = acc.copyin varPtr(%[[DECLA]]#1 : !fir.ref<!fir.array<10xf32>>) bounds(%{{.*}}) -> !fir.ref<!fir.array<10xf32>> {dataClause = #acc<data_clause acc_copy>, name = "a"}
 ! CHECK:      %[[COPYIN_B:.*]] = acc.copyin varPtr(%[[DECLB]]#1 : !fir.ref<!fir.array<10xf32>>) bounds(%{{.*}}) -> !fir.ref<!fir.array<10xf32>> {dataClause = #acc<data_clause acc_copy>, name = "b"}
 ! CHECK:      acc.kernels dataOperands(%[[COPYIN_A]], %[[COPYIN_B]] : !fir.ref<!fir.array<10xf32>>, !fir.ref<!fir.array<10xf32>>) {
-! CHECK:        acc.loop {
-! CHECK:          fir.do_loop
+! CHECK:        acc.loop {{.*}} {
 ! CHECK:          acc.yield
 ! CHECK-NEXT:   }{{$}}
 ! CHECK:        acc.terminator
@@ -330,8 +310,7 @@ subroutine acc_kernels_loop
 ! CHECK:      %[[COPYIN_A:.*]] = acc.copyin varPtr(%[[DECLA]]#1 : !fir.ref<!fir.array<10xf32>>) bounds(%{{.*}}) -> !fir.ref<!fir.array<10xf32>> {dataClause = #acc<data_clause acc_copy>, name = "a"}
 ! CHECK:      %[[COPYIN_B:.*]] = acc.copyin varPtr(%[[DECLB]]#1 : !fir.ref<!fir.array<10xf32>>) bounds(%{{.*}}) -> !fir.ref<!fir.array<10xf32>> {dataClause = #acc<data_clause acc_copy>, name = "b"}
 ! CHECK:      acc.kernels dataOperands(%[[COPYIN_A]], %[[COPYIN_B]] : !fir.ref<!fir.array<10xf32>>, !fir.ref<!fir.array<10xf32>>) {
-! CHECK:        acc.loop {
-! CHECK:          fir.do_loop
+! CHECK:        acc.loop {{.*}} {
 ! CHECK:          acc.yield
 ! CHECK-NEXT:   }{{$}}
 ! CHECK:        acc.terminator
@@ -347,8 +326,7 @@ subroutine acc_kernels_loop
 ! CHECK:      %[[COPYIN_A:.*]] = acc.copyin varPtr(%[[DECLA]]#1 : !fir.ref<!fir.array<10xf32>>) bounds(%{{.*}}) -> !fir.ref<!fir.array<10xf32>> {name = "a"}
 ! CHECK:      %[[COPYIN_B:.*]] = acc.copyin varPtr(%[[DECLB]]#1 : !fir.ref<!fir.array<10xf32>>) bounds(%{{.*}}) -> !fir.ref<!fir.array<10xf32>> {dataClause = #acc<data_clause acc_copyin_readonly>, name = "b"}
 ! CHECK:      acc.kernels dataOperands(%[[COPYIN_A]], %[[COPYIN_B]] : !fir.ref<!fir.array<10xf32>>, !fir.ref<!fir.array<10xf32>>) {
-! CHECK:        acc.loop {
-! CHECK:          fir.do_loop
+! CHECK:        acc.loop {{.*}} {
 ! CHECK:          acc.yield
 ! CHECK-NEXT:   }{{$}}
 ! CHECK:        acc.terminator
@@ -362,8 +340,7 @@ subroutine acc_kernels_loop
 ! CHECK:      %[[CREATE_A:.*]] = acc.create varPtr(%[[DECLA]]#1 : !fir.ref<!fir.array<10xf32>>) bounds(%{{.*}}) -> !fir.ref<!fir.array<10xf32>> {dataClause = #acc<data_clause acc_copyout>, name = "a"}
 ! CHECK:      %[[CREATE_B:.*]] = acc.create varPtr(%[[DECLB]]#1 : !fir.ref<!fir.array<10xf32>>) bounds(%{{.*}}) -> !fir.ref<!fir.array<10xf32>> {dataClause = #acc<data_clause acc_copyout>, name = "b"}
 ! CHECK:      acc.kernels dataOperands(%[[CREATE_A]], %[[CREATE_B]] : !fir.ref<!fir.array<10xf32>>, !fir.ref<!fir.array<10xf32>>) {
-! CHECK:        acc.loop {
-! CHECK:          fir.do_loop
+! CHECK:        acc.loop {{.*}} {
 ! CHECK:          acc.yield
 ! CHECK-NEXT:   }{{$}}
 ! CHECK:        acc.terminator
@@ -379,8 +356,7 @@ subroutine acc_kernels_loop
 ! CHECK:      %[[CREATE_B:.*]] = acc.create varPtr(%[[DECLB]]#1 : !fir.ref<!fir.array<10xf32>>) bounds(%{{.*}}) -> !fir.ref<!fir.array<10xf32>> {name = "b"}
 ! CHECK:      %[[CREATE_A:.*]] = acc.create varPtr(%[[DECLA]]#1 : !fir.ref<!fir.array<10xf32>>) bounds(%{{.*}}) -> !fir.ref<!fir.array<10xf32>> {dataClause = #acc<data_clause acc_create_zero>, name = "a"}
 ! CHECK:      acc.kernels dataOperands(%[[CREATE_B]], %[[CREATE_A]] : !fir.ref<!fir.array<10xf32>>, !fir.ref<!fir.array<10xf32>>) {
-! CHECK:        acc.loop {
-! CHECK:          fir.do_loop
+! CHECK:        acc.loop {{.*}} {
 ! CHECK:          acc.yield
 ! CHECK-NEXT:   }{{$}}
 ! CHECK:        acc.terminator
@@ -396,8 +372,7 @@ subroutine acc_kernels_loop
 ! CHECK:      %[[NOCREATE_A:.*]] = acc.nocreate varPtr(%[[DECLA]]#1 : !fir.ref<!fir.array<10xf32>>) bounds(%{{.*}}) -> !fir.ref<!fir.array<10xf32>> {name = "a"}
 ! CHECK:      %[[NOCREATE_B:.*]] = acc.nocreate varPtr(%[[DECLB]]#1 : !fir.ref<!fir.array<10xf32>>) bounds(%{{.*}}) -> !fir.ref<!fir.array<10xf32>> {name = "b"}
 ! CHECK:      acc.kernels dataOperands(%[[NOCREATE_A]], %[[NOCREATE_B]] : !fir.ref<!fir.array<10xf32>>, !fir.ref<!fir.array<10xf32>>) {
-! CHECK:        acc.loop {
-! CHECK:          fir.do_loop
+! CHECK:        acc.loop {{.*}} {
 ! CHECK:          acc.yield
 ! CHECK-NEXT:   }{{$}}
 ! CHECK:        acc.terminator
@@ -411,8 +386,7 @@ subroutine acc_kernels_loop
 ! CHECK:      %[[PRESENT_A:.*]] = acc.present varPtr(%[[DECLA]]#1 : !fir.ref<!fir.array<10xf32>>) bounds(%{{.*}}) -> !fir.ref<!fir.array<10xf32>> {name = "a"}
 ! CHECK:      %[[PRESENT_B:.*]] = acc.present varPtr(%[[DECLB]]#1 : !fir.ref<!fir.array<10xf32>>) bounds(%{{.*}}) -> !fir.ref<!fir.array<10xf32>> {name = "b"}
 ! CHECK:      acc.kernels dataOperands(%[[PRESENT_A]], %[[PRESENT_B]] : !fir.ref<!fir.array<10xf32>>, !fir.ref<!fir.array<10xf32>>) {
-! CHECK:        acc.loop {
-! CHECK:          fir.do_loop
+! CHECK:        acc.loop {{.*}} {
 ! CHECK:          acc.yield
 ! CHECK-NEXT:   }{{$}}
 ! CHECK:        acc.terminator
@@ -426,8 +400,7 @@ subroutine acc_kernels_loop
 ! CHECK:      %[[DEVICEPTR_A:.*]] = acc.deviceptr varPtr(%[[DECLA]]#1 : !fir.ref<!fir.array<10xf32>>) bounds(%{{.*}}) -> !fir.ref<!fir.array<10xf32>> {name = "a"}
 ! CHECK:      %[[DEVICEPTR_B:.*]] = acc.deviceptr varPtr(%[[DECLB]]#1 : !fir.ref<!fir.array<10xf32>>) bounds(%{{.*}}) -> !fir.ref<!fir.array<10xf32>> {name = "b"}
 ! CHECK:      acc.kernels dataOperands(%[[DEVICEPTR_A]], %[[DEVICEPTR_B]] : !fir.ref<!fir.array<10xf32>>, !fir.ref<!fir.array<10xf32>>) {
-! CHECK:        acc.loop {
-! CHECK:          fir.do_loop
+! CHECK:        acc.loop {{.*}} {
 ! CHECK:          acc.yield
 ! CHECK-NEXT:   }{{$}}
 ! CHECK:        acc.terminator
@@ -445,8 +418,7 @@ subroutine acc_kernels_loop
 ! CHECK:      %[[BOX_ADDR_G:.*]] = fir.box_addr %[[BOX_G]] : (!fir.box<!fir.ptr<f32>>) -> !fir.ptr<f32>
 ! CHECK:      %[[ATTACH_G:.*]] = acc.attach varPtr(%[[BOX_ADDR_G]] : !fir.ptr<f32>) -> !fir.ptr<f32> {name = "g"}
 ! CHECK:      acc.kernels dataOperands(%[[ATTACH_F]], %[[ATTACH_G]] : !fir.ptr<f32>, !fir.ptr<f32>) {
-! CHECK:        acc.loop {
-! CHECK:          fir.do_loop
+! CHECK:        acc.loop {{.*}} {
 ! CHECK:          acc.yield
 ! CHECK-NEXT:   }{{$}}
 ! CHECK:        acc.terminator
@@ -458,10 +430,9 @@ subroutine acc_kernels_loop
   END DO
 
 ! CHECK:      acc.kernels {
-! CHECK:        acc.loop {
-! CHECK:          fir.do_loop
+! CHECK:        acc.loop {{.*}} {
 ! CHECK:          acc.yield
-! CHECK-NEXT:   } attributes {seq}
+! CHECK-NEXT:   } attributes {inclusiveUpperbound = array<i1: true>, seq}
 ! CHECK:        acc.terminator
 ! CHECK-NEXT: }{{$}}
 
@@ -471,10 +442,9 @@ subroutine acc_kernels_loop
   END DO
 
 ! CHECK:      acc.kernels {
-! CHECK:        acc.loop {
-! CHECK:          fir.do_loop
+! CHECK:        acc.loop {{.*}} {
 ! CHECK:          acc.yield
-! CHECK-NEXT:   } attributes {auto}
+! CHECK-NEXT:   } attributes {auto, inclusiveUpperbound = array<i1: true>}
 ! CHECK:        acc.terminator
 ! CHECK-NEXT: }{{$}}
 
@@ -484,10 +454,9 @@ subroutine acc_kernels_loop
   END DO
 
 ! CHECK:      acc.kernels {
-! CHECK:        acc.loop {
-! CHECK:          fir.do_loop
+! CHECK:        acc.loop {{.*}} {
 ! CHECK:          acc.yield
-! CHECK-NEXT:   } attributes {independent}
+! CHECK-NEXT:   } attributes {inclusiveUpperbound = array<i1: true>, independent}
 ! CHECK:        acc.terminator
 ! CHECK-NEXT: }{{$}}
 
@@ -497,8 +466,7 @@ subroutine acc_kernels_loop
   END DO
 
 ! CHECK:      acc.kernels {
-! CHECK:        acc.loop gang {
-! CHECK:          fir.do_loop
+! CHECK:        acc.loop gang() {{.*}} {
 ! CHECK:          acc.yield
 ! CHECK-NEXT:   }{{$}}
 ! CHECK:        acc.terminator
@@ -511,8 +479,7 @@ subroutine acc_kernels_loop
 
 ! CHECK:      acc.kernels {
 ! CHECK:        [[GANGNUM1:%.*]] = arith.constant 8 : i32
-! CHECK-NEXT:   acc.loop gang(num=[[GANGNUM1]] : i32) {
-! CHECK:          fir.do_loop
+! CHECK-NEXT:   acc.loop gang(num=[[GANGNUM1]] : i32) {{.*}} {
 ! CHECK:          acc.yield
 ! CHECK-NEXT:   }{{$}}
 ! CHECK:        acc.terminator
@@ -525,8 +492,7 @@ subroutine acc_kernels_loop
 
 ! CHECK:      acc.kernels {
 ! CHECK:        [[GANGNUM2:%.*]] = fir.load %{{.*}} : !fir.ref<i32>
-! CHECK-NEXT:   acc.loop gang(num=[[GANGNUM2]] : i32) {
-! CHECK:          fir.do_loop
+! CHECK-NEXT:   acc.loop gang(num=[[GANGNUM2]] : i32) {{.*}} {
 ! CHECK:          acc.yield
 ! CHECK-NEXT:   }{{$}}
 ! CHECK:        acc.terminator
@@ -539,7 +505,6 @@ subroutine acc_kernels_loop
 
 ! CHECK:      acc.kernels {
 ! CHECK:        acc.loop gang(num=%{{.*}} : i32, static=%{{.*}} : i32) {
-! CHECK:          fir.do_loop
 ! CHECK:          acc.yield
 ! CHECK-NEXT:   }{{$}}
 ! CHECK:        acc.terminator
@@ -550,8 +515,7 @@ subroutine acc_kernels_loop
     a(i) = b(i)
   END DO
 ! CHECK:      acc.kernels {
-! CHECK:        acc.loop vector {
-! CHECK:          fir.do_loop
+! CHECK:        acc.loop vector() {{.*}} {
 ! CHECK:          acc.yield
 ! CHECK-NEXT:   }{{$}}
 ! CHECK:        acc.terminator
@@ -564,8 +528,7 @@ subroutine acc_kernels_loop
 
 ! CHECK:      acc.kernels {
 ! CHECK:        [[CONSTANT128:%.*]] = arith.constant 128 : i32
-! CHECK:        acc.loop vector([[CONSTANT128]] : i32) {
-! CHECK:          fir.do_loop
+! CHECK:        acc.loop vector([[CONSTANT128]] : i32) {{.*}} {
 ! CHECK:          acc.yield
 ! CHECK-NEXT:   }{{$}}
 ! CHECK:        acc.terminator
@@ -578,8 +541,7 @@ subroutine acc_kernels_loop
 
 ! CHECK:      acc.kernels {
 ! CHECK:        [[VECTORLENGTH:%.*]] = fir.load %{{.*}} : !fir.ref<i32>
-! CHECK:        acc.loop vector([[VECTORLENGTH]] : i32) {
-! CHECK:          fir.do_loop
+! CHECK:        acc.loop vector([[VECTORLENGTH]] : i32) {{.*}} {
 ! CHECK:          acc.yield
 ! CHECK-NEXT:   }{{$}}
 ! CHECK:        acc.terminator
@@ -591,8 +553,7 @@ subroutine acc_kernels_loop
   END DO
 
 ! CHECK:      acc.kernels {
-! CHECK:        acc.loop worker {
-! CHECK:          fir.do_loop
+! CHECK:        acc.loop worker() {{.*}} {
 ! CHECK:          acc.yield
 ! CHECK-NEXT:   }{{$}}
 ! CHECK:        acc.terminator
@@ -605,8 +566,7 @@ subroutine acc_kernels_loop
 
 ! CHECK:      acc.kernels {
 ! CHECK:        [[WORKER128:%.*]] = arith.constant 128 : i32
-! CHECK:        acc.loop worker([[WORKER128]] : i32) {
-! CHECK:          fir.do_loop
+! CHECK:        acc.loop worker([[WORKER128]] : i32) {{.*}} {
 ! CHECK:          acc.yield
 ! CHECK-NEXT:   }{{$}}
 ! CHECK:        acc.terminator
@@ -620,11 +580,9 @@ subroutine acc_kernels_loop
   END DO
 
 ! CHECK:      acc.kernels {
-! CHECK:        acc.loop {
-! CHECK:          fir.do_loop
-! CHECK:            fir.do_loop
+! CHECK:        acc.loop {{.*}} {
 ! CHECK:          acc.yield
-! CHECK-NEXT:   } attributes {collapse = 2 : i64}
+! CHECK-NEXT:   } attributes {collapse = 2 : i64, inclusiveUpperbound = array<i1: true, true>} 
 ! CHECK:        acc.terminator
 ! CHECK-NEXT: }{{$}}
 
@@ -637,10 +595,8 @@ subroutine acc_kernels_loop
   END DO
 
 ! CHECK:      acc.kernels {
-! CHECK:        acc.loop {
-! CHECK:          fir.do_loop
-! CHECK:            acc.loop {
-! CHECK:              fir.do_loop
+! CHECK:        acc.loop {{.*}} {
+! CHECK:            acc.loop {{.*}} {
 ! CHECK:              acc.yield
 ! CHECK-NEXT:     }{{$}}
 ! CHECK:          acc.yield
@@ -655,8 +611,7 @@ subroutine acc_kernels_loop
 
 ! CHECK:      acc.kernels {
 ! CHECK:        [[TILESIZE:%.*]] = arith.constant 2 : i32
-! CHECK:        acc.loop tile([[TILESIZE]] : i32) {
-! CHECK:          fir.do_loop
+! CHECK:        acc.loop {{.*}} tile([[TILESIZE]] : i32) {{.*}} {
 ! CHECK:          acc.yield
 ! CHECK-NEXT:   }{{$}}
 ! CHECK:        acc.terminator
@@ -669,8 +624,7 @@ subroutine acc_kernels_loop
 
 ! CHECK:      acc.kernels {
 ! CHECK:        [[TILESIZEM1:%.*]] = arith.constant -1 : i32
-! CHECK:        acc.loop tile([[TILESIZEM1]] : i32) {
-! CHECK:          fir.do_loop
+! CHECK:        acc.loop {{.*}} tile([[TILESIZEM1]] : i32) {{.*}} {
 ! CHECK:          acc.yield
 ! CHECK-NEXT:   }{{$}}
 ! CHECK:        acc.terminator
@@ -686,8 +640,7 @@ subroutine acc_kernels_loop
 ! CHECK:      acc.kernels {
 ! CHECK:        [[TILESIZE1:%.*]] = arith.constant 2 : i32
 ! CHECK:        [[TILESIZE2:%.*]] = arith.constant 2 : i32
-! CHECK:        acc.loop tile([[TILESIZE1]], [[TILESIZE2]] : i32, i32) {
-! CHECK:          fir.do_loop
+! CHECK:        acc.loop {{.*}} tile([[TILESIZE1]], [[TILESIZE2]] : i32, i32) {{.*}} {
 ! CHECK:          acc.yield
 ! CHECK-NEXT:   }{{$}}
 ! CHECK:        acc.terminator
@@ -699,8 +652,7 @@ subroutine acc_kernels_loop
   END DO
 
 ! CHECK:      acc.kernels {
-! CHECK:        acc.loop tile(%{{.*}} : i32) {
-! CHECK:          fir.do_loop
+! CHECK:        acc.loop {{.*}} tile(%{{.*}} : i32) {{.*}} {
 ! CHECK:          acc.yield
 ! CHECK-NEXT:   }{{$}}
 ! CHECK:        acc.terminator
@@ -714,8 +666,7 @@ subroutine acc_kernels_loop
   END DO
 
 ! CHECK:      acc.kernels {
-! CHECK:        acc.loop tile(%{{.*}}, %{{.*}} : i32, i32) {
-! CHECK:          fir.do_loop
+! CHECK:        acc.loop {{.*}} tile(%{{.*}}, %{{.*}} : i32, i32) {{.*}} {
 ! CHECK:          acc.yield
 ! CHECK-NEXT:   }{{$}}
 ! CHECK:        acc.terminator
@@ -730,8 +681,7 @@ subroutine acc_kernels_loop
 ! CHECK:      %[[COPYINREDR:.*]] = acc.copyin varPtr(%{{.*}} : !fir.ref<f32>) -> !fir.ref<f32> {dataClause = #acc<data_clause acc_reduction>, implicit = true, name = "reduction_r"}
 ! CHECK:      %[[COPYINREDI:.*]] = acc.copyin varPtr(%{{.*}} : !fir.ref<i32>) -> !fir.ref<i32> {dataClause = #acc<data_clause acc_reduction>, implicit = true, name = "reduction_i"}
 ! CHECK:      acc.kernels dataOperands(%[[COPYINREDR]], %[[COPYINREDI]] : !fir.ref<f32>, !fir.ref<i32>) {
-! CHECK:        acc.loop reduction(@reduction_add_ref_f32 -> %{{.*}} : !fir.ref<f32>, @reduction_mul_ref_i32 -> %{{.*}} : !fir.ref<i32>) {
-! CHECK:          fir.do_loop
+! CHECK:        acc.loop {{.*}} reduction(@reduction_add_ref_f32 -> %{{.*}} : !fir.ref<f32>, @reduction_mul_ref_i32 -> %{{.*}} : !fir.ref<i32>) {{.*}} {
 ! CHECK:          acc.yield
 ! CHECK-NEXT:   }{{$}}
 ! CHECK:        acc.terminator
diff --git a/flang/test/Lower/OpenACC/acc-loop.f90 b/flang/test/Lower/OpenACC/acc-loop.f90
index 924574512da4c73..224f7f983bccf87 100644
--- a/flang/test/Lower/OpenACC/acc-loop.f90
+++ b/flang/test/Lower/OpenACC/acc-loop.f90
@@ -28,185 +28,168 @@ program acc_loop
     a(i) = b(i)
   END DO
 
-!CHECK:      acc.loop {
-!CHECK:        fir.do_loop
-!CHECK:        acc.yield
-!CHECK-NEXT: }{{$}}
+! CHECK: acc.loop private(@privatization_ref_i32 -> %{{.*}} : !fir.ref<i32>) (%arg0 : i32) = (%{{.*}} : i32) to (%{{.*}} : i32) step (%{{.*}} : i32) {
+! CHECK:        acc.yield
+! CHECK-NEXT: } attributes {inclusiveUpperbound = array<i1: true>}{{$}}
 
  !$acc loop seq
   DO i = 1, n
     a(i) = b(i)
   END DO
 
-!CHECK:      acc.loop {
-!CHECK:        fir.do_loop
-!CHECK:        acc.yield
-!CHECK-NEXT: } attributes {seq}
+! CHECK: acc.loop private(@privatization_ref_i32 -> %{{.*}} : !fir.ref<i32>) (%arg0 : i32) = (%{{.*}} : i32) to (%{{.*}} : i32) step (%{{.*}} : i32) {
+! CHECK:        acc.yield
+! CHECK-NEXT: } attributes {inclusiveUpperbound = array<i1: true>, seq}
 
   !$acc loop auto
   DO i = 1, n
     a(i) = b(i)
   END DO
 
-!CHECK:      acc.loop {
-!CHECK:        fir.do_loop
-!CHECK:        acc.yield
-!CHECK-NEXT: } attributes {auto}
+! CHECK: acc.loop private(@privatization_ref_i32 -> %{{.*}} : !fir.ref<i32>) (%arg0 : i32) = (%{{.*}} : i32) to (%{{.*}} : i32) step (%{{.*}} : i32) {
+! CHECK:        acc.yield
+! CHECK-NEXT: } attributes {auto, inclusiveUpperbound = array<i1: true>}
 
   !$acc loop independent
   DO i = 1, n
     a(i) = b(i)
   END DO
 
-!CHECK:      acc.loop {
-!CHECK:        fir.do_loop
-!CHECK:        acc.yield
-!CHECK-NEXT: } attributes {independent}
+! CHECK:      acc.loop private(@privatization_ref_i32 -> %{{.*}} : !fir.ref<i32>) (%arg0 : i32) = (%{{.*}} : i32) to (%{{.*}} : i32) step (%{{.*}} : i32) {
+! CHECK:        acc.yield
+! CHECK-NEXT: } attributes {inclusiveUpperbound = array<i1: true>, independent}
 
   !$acc loop gang
   DO i = 1, n
     a(i) = b(i)
   END DO
 
-!CHECK:      acc.loop gang {
-!CHECK:        fir.do_loop
-!CHECK:        acc.yield
-!CHECK-NEXT: }{{$}}
+! CHECK:      acc.loop gang() private(@privatization_ref_i32 -> %{{.*}} : !fir.ref<i32>) (%arg0 : i32) = (%{{.*}} : i32) to (%{{.*}} : i32) step (%{{.*}} : i32) {
+! CHECK:        acc.yield
+! CHECK-NEXT: } attributes {inclusiveUpperbound = array<i1: true>}
 
   !$acc loop gang(num: 8)
   DO i = 1, n
     a(i) = b(i)
   END DO
 
-!CHECK:      [[GANGNUM1:%.*]] = arith.constant 8 : i32
-!CHECK-NEXT: acc.loop gang(num=[[GANGNUM1]] : i32) {
-!CHECK:        fir.do_loop
-!CHECK:        acc.yield
-!CHECK-NEXT: }{{$}}
+! CHECK:      [[GANGNUM1:%.*]] = arith.constant 8 : i32
+! CHECK-NEXT: acc.loop gang(num=[[GANGNUM1]] : i32) private(@privatization_ref_i32 -> %{{.*}} : !fir.ref<i32>) (%arg0 : i32) = (%{{.*}} : i32) to (%{{.*}} : i32) step (%{{.*}} : i32) {
+! CHECK:        acc.yield
+! CHECK-NEXT: } attributes {inclusiveUpperbound = array<i1: true>}
 
   !$acc loop gang(num: gangNum)
   DO i = 1, n
     a(i) = b(i)
   END DO
 
-!CHECK:      [[GANGNUM2:%.*]] = fir.load %{{.*}} : !fir.ref<i32>
-!CHECK-NEXT: acc.loop gang(num=[[GANGNUM2]] : i32) {
-!CHECK:        fir.do_loop
-!CHECK:        acc.yield
-!CHECK-NEXT: }{{$}}
+! CHECK:      [[GANGNUM2:%.*]] = fir.load %{{.*}} : !fir.ref<i32>
+! CHECK-NEXT: acc.loop gang(num=[[GANGNUM2]] : i32) private(@privatization_ref_i32 -> %{{.*}} : !fir.ref<i32>) (%arg0 : i32) = (%{{.*}} : i32) to (%{{.*}} : i32) step (%{{.*}} : i32) {
+! CHECK:        acc.yield
+! CHECK-NEXT: } attributes {inclusiveUpperbound = array<i1: true>}
 
  !$acc loop gang(num: gangNum, static: gangStatic)
   DO i = 1, n
     a(i) = b(i)
   END DO
 
-!CHECK: acc.loop gang(num=%{{.*}} : i32, static=%{{.*}} : i32) {
-!CHECK:        fir.do_loop
-!CHECK:        acc.yield
-!CHECK-NEXT: }{{$}}
+! CHECK: acc.loop gang(num=%{{.*}} : i32, static=%{{.*}} : i32) private(@privatization_ref_i32 -> %{{.*}} : !fir.ref<i32>) (%arg0 : i32) = (%{{.*}} : i32) to (%{{.*}} : i32) step (%{{.*}} : i32) {
+! CHECK:        acc.yield
+! CHECK-NEXT: } attributes {inclusiveUpperbound = array<i1: true>}
 
   !$acc loop vector
   DO i = 1, n
     a(i) = b(i)
   END DO
 
-!CHECK:      acc.loop vector {
-!CHECK:        fir.do_loop
-!CHECK:        acc.yield
-!CHECK-NEXT: }{{$}}
+! CHECK:      acc.loop vector() private(@privatization_ref_i32 -> %{{.*}} : !fir.ref<i32>) (%arg0 : i32) = (%{{.*}} : i32) to (%{{.*}} : i32) step (%{{.*}} : i32) {
+! CHECK:        acc.yield
+! CHECK-NEXT: } attributes {inclusiveUpperbound = array<i1: true>}
 
   !$acc loop vector(128)
   DO i = 1, n
     a(i) = b(i)
   END DO
 
-!CHECK: [[CONSTANT128:%.*]] = arith.constant 128 : i32
-!CHECK:      acc.loop vector([[CONSTANT128]] : i32) {
-!CHECK:        fir.do_loop
-!CHECK:        acc.yield
-!CHECK-NEXT: }{{$}}
+! CHECK: [[CONSTANT128:%.*]] = arith.constant 128 : i32
+! CHECK:      acc.loop vector([[CONSTANT128]] : i32) private(@privatization_ref_i32 -> %{{.*}} : !fir.ref<i32>) (%arg0 : i32) = (%{{.*}} : i32) to (%{{.*}} : i32) step (%{{.*}} : i32) {
+! CHECK:        acc.yield
+! CHECK-NEXT: }{{$}}
 
   !$acc loop vector(vectorLength)
   DO i = 1, n
     a(i) = b(i)
   END DO
 
-!CHECK:      [[VECTORLENGTH:%.*]] = fir.load %{{.*}} : !fir.ref<i32>
-!CHECK:      acc.loop vector([[VECTORLENGTH]] : i32) {
-!CHECK:        fir.do_loop
-!CHECK:        acc.yield
-!CHECK-NEXT: }{{$}}
+! CHECK:      [[VECTORLENGTH:%.*]] = fir.load %{{.*}} : !fir.ref<i32>
+! CHECK:      acc.loop vector([[VECTORLENGTH]] : i32) private(@privatization_ref_i32 -> %{{.*}} : !fir.ref<i32>) (%arg0 : i32) = (%{{.*}} : i32) to (%{{.*}} : i32) step (%{{.*}} : i32) {
+! CHECK:        acc.yield
+! CHECK-NEXT: } attributes {inclusiveUpperbound = array<i1: true>}
 
 !$acc loop worker
   DO i = 1, n
     a(i) = b(i)
   END DO
 
-!CHECK:      acc.loop worker {
-!CHECK:        fir.do_loop
-!CHECK:        acc.yield
-!CHECK-NEXT: }{{$}}
+! CHECK:      acc.loop worker() private(@privatization_ref_i32 -> %{{.*}} : !fir.ref<i32>) (%arg0 : i32) = (%{{.*}} : i32) to (%{{.*}} : i32) step (%{{.*}} : i32) {
+! CHECK:        acc.yield
+! CHECK-NEXT: } attributes {inclusiveUpperbound = array<i1: true>}
 
   !$acc loop worker(128)
   DO i = 1, n
     a(i) = b(i)
   END DO
 
-!CHECK: [[WORKER128:%.*]] = arith.constant 128 : i32
-!CHECK:      acc.loop worker([[WORKER128]] : i32) {
-!CHECK:        fir.do_loop
-!CHECK:        acc.yield
-!CHECK-NEXT: }{{$}}
+! CHECK: [[WORKER128:%.*]] = arith.constant 128 : i32
+! CHECK:      acc.loop worker([[WORKER128]] : i32) private(@privatization_ref_i32 -> %{{.*}} : !fir.ref<i32>) (%arg0 : i32) = (%{{.*}} : i32) to (%{{.*}} : i32) step (%{{.*}} : i32) {
+! CHECK:        acc.yield
+! CHECK-NEXT: } attributes {inclusiveUpperbound = array<i1: true>}
 
   !$acc loop private(c)
   DO i = 1, n
     a(i) = b(i)
   END DO
 
-!CHECK:      acc.loop private(@privatization_ref_10x10xf32 -> %{{.*}} : !fir.ref<!fir.array<10x10xf32>>) {
-!CHECK:        fir.do_loop
-!CHECK:        acc.yield
-!CHECK-NEXT: }{{$}}
+! CHECK:      acc.loop private(@privatization_ref_i32 -> %{{.*}} : !fir.ref<i32>, @privatization_ref_10x10xf32 -> %{{.*}} : !fir.ref<!fir.array<10x10xf32>>) (%arg0 : i32) = (%{{.*}} : i32) to (%{{.*}} : i32) step (%{{.*}} : i32) {
+! CHECK:        acc.yield
+! CHECK-NEXT: } attributes {inclusiveUpperbound = array<i1: true>}
 
   !$acc loop private(c, d)
   DO i = 1, n
     a(i) = b(i)
   END DO
 
-!CHECK:      acc.loop private(@privatization_ref_10x10xf32 -> %{{.*}} : !fir.ref<!fir.array<10x10xf32>>, @privatization_ref_10x10xf32 -> %{{.*}} : !fir.ref<!fir.array<10x10xf32>>) {
-!CHECK:        fir.do_loop
-!CHECK:        acc.yield
-!CHECK-NEXT: }{{$}}
+! CHECK:      acc.loop private(@privatization_ref_i32 -> %{{.*}} : !fir.ref<i32>, @privatization_ref_10x10xf32 -> %{{.*}} : !fir.ref<!fir.array<10x10xf32>>, @privatization_ref_10x10xf32 -> %{{.*}} : !fir.ref<!fir.array<10x10xf32>>) (%arg0 : i32) = (%{{.*}} : i32) to (%{{.*}} : i32) step (%{{.*}} : i32) {
+! CHECK:        acc.yield
+! CHECK-NEXT: } attributes {inclusiveUpperbound = array<i1: true>}
 
   !$acc loop private(c) private(d)
   DO i = 1, n
     a(i) = b(i)
   END DO
 
-!CHECK:      acc.loop private(@privatization_ref_10x10xf32 -> %{{.*}} : !fir.ref<!fir.array<10x10xf32>>, @privatization_ref_10x10xf32 -> %{{.*}} : !fir.ref<!fir.array<10x10xf32>>) {
-!CHECK:        fir.do_loop
-!CHECK:        acc.yield
-!CHECK-NEXT: }{{$}}
+! CHECK:      acc.loop private(@privatization_ref_i32 -> %{{.*}} : !fir.ref<i32>, @privatization_ref_10x10xf32 -> %{{.*}} : !fir.ref<!fir.array<10x10xf32>>, @privatization_ref_10x10xf32 -> %{{.*}} : !fir.ref<!fir.array<10x10xf32>>) (%arg0 : i32) = (%{{.*}} : i32) to (%{{.*}} : i32) step (%{{.*}} : i32) {
+! CHECK:        acc.yield
+! CHECK-NEXT: } attributes {inclusiveUpperbound = array<i1: true>}
 
   !$acc loop tile(2)
   DO i = 1, n
     a(i) = b(i)
   END DO
-!CHECK:      [[TILESIZE:%.*]] = arith.constant 2 : i32
-!CHECK:      acc.loop tile([[TILESIZE]] : i32) {
-!CHECK:        fir.do_loop
-!CHECK:        acc.yield
-!CHECK-NEXT: }{{$}}
+
+! CHECK:      [[TILESIZE:%.*]] = arith.constant 2 : i32
+! CHECK:      acc.loop {{.*}} tile([[TILESIZE]] : i32) (%arg0 : i32) = (%{{.*}} : i32) to (%{{.*}} : i32) step (%{{.*}} : i32) {
+! CHECK:        acc.yield
+! CHECK-NEXT: } attributes {inclusiveUpperbound = array<i1: true>}
 
  !$acc loop tile(*)
   DO i = 1, n
     a(i) = b(i)
   END DO
-!CHECK:      [[TILESIZEM1:%.*]] = arith.constant -1 : i32
-!CHECK:      acc.loop tile([[TILESIZEM1]] : i32) {
-!CHECK:        fir.do_loop
-!CHECK:        acc.yield
-!CHECK-NEXT: }{{$}}
+! CHECK:      [[TILESIZEM1:%.*]] = arith.constant -1 : i32
+! CHECK:      acc.loop {{.*}} tile([[TILESIZEM1]] : i32) (%arg0 : i32) = (%{{.*}} : i32) to (%{{.*}} : i32) step (%{{.*}} : i32) {
+! CHECK:        acc.yield
+! CHECK-NEXT: } attributes {inclusiveUpperbound = array<i1: true>}
 
   !$acc loop tile(2, 2)
   DO i = 1, n
@@ -215,22 +198,20 @@ program acc_loop
     END DO
   END DO
 
-!CHECK:      [[TILESIZE1:%.*]] = arith.constant 2 : i32
-!CHECK:      [[TILESIZE2:%.*]] = arith.constant 2 : i32
-!CHECK:      acc.loop tile([[TILESIZE1]], [[TILESIZE2]] : i32, i32) {
-!CHECK:        fir.do_loop
-!CHECK:        acc.yield
-!CHECK-NEXT: }{{$}}
+! CHECK:      [[TILESIZE1:%.*]] = arith.constant 2 : i32
+! CHECK:      [[TILESIZE2:%.*]] = arith.constant 2 : i32
+! CHECK:      acc.loop {{.*}} tile([[TILESIZE1]], [[TILESIZE2]] : i32, i32) (%arg0 : i32) = (%{{.*}} : i32) to (%{{.*}} : i32) step (%{{.*}} : i32) {
+! CHECK:        acc.yield
+! CHECK-NEXT: } attributes {inclusiveUpperbound = array<i1: true>}
 
   !$acc loop tile(tileSize)
   DO i = 1, n
     a(i) = b(i)
   END DO
 
-!CHECK:      acc.loop tile(%{{.*}} : i32) {
-!CHECK:        fir.do_loop
-!CHECK:        acc.yield
-!CHECK-NEXT: }{{$}}
+! CHECK:      acc.loop {{.*}} tile(%{{.*}} : i32) (%arg0 : i32) = (%{{.*}} : i32) to (%{{.*}} : i32) step (%{{.*}} : i32) {
+! CHECK:        acc.yield
+! CHECK-NEXT: } attributes {inclusiveUpperbound = array<i1: true>}
 
   !$acc loop tile(tileSize, tileSize)
   DO i = 1, n
@@ -239,10 +220,9 @@ program acc_loop
     END DO
   END DO
 
-!CHECK:      acc.loop tile(%{{.*}}, %{{.*}} : i32, i32) {
-!CHECK:        fir.do_loop
-!CHECK:        acc.yield
-!CHECK-NEXT: }{{$}}
+! CHECK:      acc.loop {{.*}} tile(%{{.*}}, %{{.*}} : i32, i32) (%arg0 : i32) = (%{{.*}} : i32) to (%{{.*}} : i32) step (%{{.*}} : i32) {
+! CHECK:        acc.yield
+! CHECK-NEXT: } attributes {inclusiveUpperbound = array<i1: true>}
 
   !$acc loop collapse(2)
   DO i = 1, n
@@ -251,11 +231,11 @@ program acc_loop
     END DO
   END DO
 
-!CHECK:      acc.loop {
-!CHECK:        fir.do_loop
-!CHECK:          fir.do_loop
-!CHECK:        acc.yield
-!CHECK-NEXT: } attributes {collapse = 2 : i64}
+! CHECK:      acc.loop {{.*}} (%arg0 : i32, %arg1 : i32) = (%{{.*}} : i32, i32) to (%{{.*}} : i32, i32) step (%{{.*}} : i32, i32) {
+! CHECK:        fir.store %arg0 to %{{.*}} : !fir.ref<i32>
+! CHECK:        fir.store %arg1 to %{{.*}} : !fir.ref<i32>
+! CHECK:        acc.yield
+! CHECK-NEXT: } attributes {collapse = 2 : i64, inclusiveUpperbound = array<i1: true, true>}
 
   !$acc loop
   DO i = 1, n
@@ -265,14 +245,12 @@ program acc_loop
     END DO
   END DO
 
-!CHECK:      acc.loop {
-!CHECK:        fir.do_loop
-!CHECK:          acc.loop {
-!CHECK:            fir.do_loop
-!CHECK:            acc.yield
-!CHECK-NEXT:   }{{$}}
-!CHECK:        acc.yield
-!CHECK-NEXT: }{{$}}
+! CHECK:      acc.loop {{.*}} (%arg0 : i32) = (%{{.*}} : i32) to (%{{.*}} : i32) step (%{{.*}} : i32) {
+! CHECK:          acc.loop {{.*}} (%arg1 : i32) = (%{{.*}} : i32) to (%{{.*}} : i32) step (%{{.*}} : i32) {
+! CHECK:            acc.yield
+! CHECK-NEXT:   } attributes {inclusiveUpperbound = array<i1: true>}
+! CHECK:        acc.yield
+! CHECK-NEXT: } attributes {inclusiveUpperbound = array<i1: true>}
 
   !$acc loop reduction(+:reduction_r) reduction(*:reduction_i)
   do i = 1, n
@@ -280,31 +258,27 @@ program acc_loop
     reduction_i = 1
   end do
 
-! CHECK:      acc.loop reduction(@reduction_add_ref_f32 -> %{{.*}} : !fir.ref<f32>, @reduction_mul_ref_i32 -> %{{.*}} : !fir.ref<i32>) {
-! CHECK:        fir.do_loop
+! CHECK:      acc.loop private(@privatization_ref_i32 -> %{{.*}} : !fir.ref<i32>) reduction(@reduction_add_ref_f32 -> %{{.*}} : !fir.ref<f32>, @reduction_mul_ref_i32 -> %{{.*}} : !fir.ref<i32>) (%arg0 : i32) = (%{{.*}} : i32) to (%{{.*}} : i32) step (%{{.*}} : i32) {
 ! CHECK:        acc.yield
-! CHECK-NEXT: }{{$}}
+! CHECK-NEXT: } attributes {inclusiveUpperbound = array<i1: true>}
 
  !$acc loop gang(dim: gangDim, static: gangStatic)
   DO i = 1, n
     a(i) = b(i)
   END DO
 
-!CHECK: acc.loop gang(dim=%{{.*}}, static=%{{.*}} : i32) {
-!CHECK:        fir.do_loop
-!CHECK:        acc.yield
-!CHECK-NEXT: }{{$}}
+! CHECK: acc.loop gang(dim=%{{.*}}, static=%{{.*}} : i32) {{.*}} (%arg0 : i32) = (%{{.*}} : i32) to (%{{.*}} : i32) step (%{{.*}} : i32) {
+! CHECK:        acc.yield
+! CHECK-NEXT: } attributes {inclusiveUpperbound = array<i1: true>}
 
   !$acc loop gang(dim: 1)
   DO i = 1, n
     a(i) = b(i)
   END DO
 
-!CHECK:      [[GANGDIM1:%.*]] = arith.constant 1 : i32
-!CHECK-NEXT: acc.loop gang(dim=[[GANGDIM1]] : i32) {
-!CHECK:        fir.do_loop
-!CHECK:        acc.yield
-!CHECK-NEXT: }{{$}}
+! CHECK:      acc.loop gang(dim=%{{.*}} : i32) {{.*}} (%arg0 : i32) = (%{{.*}} : i32) to (%{{.*}} : i32) step (%{{.*}} : i32) {
+! CHECK:        acc.yield
+! CHECK-NEXT: } attributes {inclusiveUpperbound = array<i1: true>}
 
   !$acc loop
   DO i = 1, n
@@ -313,12 +287,11 @@ program acc_loop
   END DO
 
 ! CHECK: %[[CACHE:.*]] = acc.cache varPtr(%{{.*}} : !fir.ref<!fir.array<10xf32>>) bounds(%{{.*}}) -> !fir.ref<!fir.array<10xf32>> {name = "b"}
-! CHECK: acc.loop cache(%[[CACHE]] : !fir.ref<!fir.array<10xf32>>)
+! CHECK: acc.loop {{.*}} cache(%[[CACHE]] : !fir.ref<!fir.array<10xf32>>)
 
   !$acc loop
   do 100 i=0, n
   100 continue
 ! CHECK: acc.loop
-! CHECK: fir.do_loop
 
 end program
diff --git a/flang/test/Lower/OpenACC/acc-parallel-loop.f90 b/flang/test/Lower/OpenACC/acc-parallel-loop.f90
index 1856215ce59d134..712e00918169534 100644
--- a/flang/test/Lower/OpenACC/acc-parallel-loop.f90
+++ b/flang/test/Lower/OpenACC/acc-parallel-loop.f90
@@ -45,8 +45,7 @@ subroutine acc_parallel_loop
   END DO
 
 ! CHECK:      acc.parallel {
-! CHECK:        acc.loop {
-! CHECK:          fir.do_loop
+! CHECK:        acc.loop {{.*}} {
 ! CHECK:          acc.yield
 ! CHECK-NEXT:   }{{$}}
 ! CHECK:        acc.yield
@@ -59,8 +58,7 @@ subroutine acc_parallel_loop
   !$acc end parallel loop
 
 ! CHECK:      acc.parallel {
-! CHECK:        acc.loop {
-! CHECK:          fir.do_loop
+! CHECK:        acc.loop {{.*}} {
 ! CHECK:          acc.yield
 ! CHECK-NEXT:   }{{$}}
 ! CHECK:        acc.yield
@@ -73,8 +71,7 @@ subroutine acc_parallel_loop
 
 ! CHECK:      [[ASYNC1:%.*]] = arith.constant 1 : i32
 ! CHECK:      acc.parallel async([[ASYNC1]] : i32) {
-! CHECK:        acc.loop {
-! CHECK:          fir.do_loop
+! CHECK:        acc.loop {{.*}} {
 ! CHECK:          acc.yield
 ! CHECK-NEXT:   }{{$}}
 ! CHECK:        acc.yield
@@ -87,8 +84,7 @@ subroutine acc_parallel_loop
 
 ! CHECK:      [[ASYNC2:%.*]] = fir.load %{{.*}} : !fir.ref<i32>
 ! CHECK:      acc.parallel async([[ASYNC2]] : i32) {
-! CHECK:        acc.loop {
-! CHECK:          fir.do_loop
+! CHECK:        acc.loop {{.*}} {
 ! CHECK:          acc.yield
 ! CHECK-NEXT:   }{{$}}
 ! CHECK:        acc.yield
@@ -100,8 +96,7 @@ subroutine acc_parallel_loop
   END DO
 
 ! CHECK:      acc.parallel {
-! CHECK:        acc.loop {
-! CHECK:          fir.do_loop
+! CHECK:        acc.loop {{.*}} {
 ! CHECK:          acc.yield
 ! CHECK-NEXT:   }{{$}}
 ! CHECK:        acc.yield
@@ -114,8 +109,7 @@ subroutine acc_parallel_loop
 
 ! CHECK:      [[WAIT1:%.*]] = arith.constant 1 : i32
 ! CHECK:      acc.parallel wait([[WAIT1]] : i32) {
-! CHECK:        acc.loop {
-! CHECK:          fir.do_loop
+! CHECK:        acc.loop {{.*}} {
 ! CHECK:          acc.yield
 ! CHECK-NEXT:   }{{$}}
 ! CHECK:        acc.yield
@@ -129,8 +123,7 @@ subroutine acc_parallel_loop
 ! CHECK:      [[WAIT2:%.*]] = arith.constant 1 : i32
 ! CHECK:      [[WAIT3:%.*]] = arith.constant 2 : i32
 ! CHECK:      acc.parallel wait([[WAIT2]], [[WAIT3]] : i32, i32) {
-! CHECK:        acc.loop {
-! CHECK:          fir.do_loop
+! CHECK:        acc.loop {{.*}} {
 ! CHECK:          acc.yield
 ! CHECK-NEXT:   }{{$}}
 ! CHECK:        acc.yield
@@ -144,8 +137,7 @@ subroutine acc_parallel_loop
 ! CHECK:      [[WAIT4:%.*]] = fir.load %{{.*}} : !fir.ref<i32>
 ! CHECK:      [[WAIT5:%.*]] = fir.load %{{.*}} : !fir.ref<i32>
 ! CHECK:      acc.parallel wait([[WAIT4]], [[WAIT5]] : i32, i32) {
-! CHECK:        acc.loop {
-! CHECK:          fir.do_loop
+! CHECK:        acc.loop {{.*}} {
 ! CHECK:          acc.yield
 ! CHECK-NEXT:   }{{$}}
 ! CHECK:        acc.yield
@@ -158,8 +150,7 @@ subroutine acc_parallel_loop
 
 ! CHECK:      [[NUMGANGS1:%.*]] = arith.constant 1 : i32
 ! CHECK:      acc.parallel num_gangs([[NUMGANGS1]] : i32) {
-! CHECK:        acc.loop {
-! CHECK:          fir.do_loop
+! CHECK:        acc.loop {{.*}} {
 ! CHECK:          acc.yield
 ! CHECK-NEXT:   }{{$}}
 ! CHECK:        acc.yield
@@ -172,8 +163,7 @@ subroutine acc_parallel_loop
 
 ! CHECK:      [[NUMGANGS2:%.*]] = fir.load %{{.*}} : !fir.ref<i32>
 ! CHECK:      acc.parallel num_gangs([[NUMGANGS2]] : i32) {
-! CHECK:        acc.loop {
-! CHECK:          fir.do_loop
+! CHECK:        acc.loop {{.*}} {
 ! CHECK:          acc.yield
 ! CHECK-NEXT:   }{{$}}
 ! CHECK:        acc.yield
@@ -186,8 +176,7 @@ subroutine acc_parallel_loop
 
 ! CHECK:      [[NUMWORKERS1:%.*]] = arith.constant 10 : i32
 ! CHECK:      acc.parallel num_workers([[NUMWORKERS1]] : i32) {
-! CHECK:        acc.loop {
-! CHECK:          fir.do_loop
+! CHECK:        acc.loop {{.*}} {
 ! CHECK:          acc.yield
 ! CHECK-NEXT:   }{{$}}
 ! CHECK:        acc.yield
@@ -200,8 +189,7 @@ subroutine acc_parallel_loop
 
 ! CHECK:      [[NUMWORKERS2:%.*]] = fir.load %{{.*}} : !fir.ref<i32>
 ! CHECK:      acc.parallel num_workers([[NUMWORKERS2]] : i32) {
-! CHECK:        acc.loop {
-! CHECK:          fir.do_loop
+! CHECK:        acc.loop {{.*}} {
 ! CHECK:          acc.yield
 ! CHECK-NEXT:   }{{$}}
 ! CHECK:        acc.yield
@@ -214,8 +202,7 @@ subroutine acc_parallel_loop
 
 ! CHECK:      [[VECTORLENGTH1:%.*]] = arith.constant 128 : i32
 ! CHECK:      acc.parallel vector_length([[VECTORLENGTH1]] : i32) {
-! CHECK:        acc.loop {
-! CHECK:          fir.do_loop
+! CHECK:        acc.loop {{.*}} {
 ! CHECK:          acc.yield
 ! CHECK-NEXT:   }{{$}}
 ! CHECK:        acc.yield
@@ -228,8 +215,7 @@ subroutine acc_parallel_loop
 
 ! CHECK:      [[VECTORLENGTH2:%.*]] = fir.load %{{.*}} : !fir.ref<i32>
 ! CHECK:      acc.parallel vector_length([[VECTORLENGTH2]] : i32) {
-! CHECK:        acc.loop {
-! CHECK:          fir.do_loop
+! CHECK:        acc.loop {{.*}} {
 ! CHECK:          acc.yield
 ! CHECK-NEXT:   }{{$}}
 ! CHECK:        acc.yield
@@ -242,8 +228,7 @@ subroutine acc_parallel_loop
 
 ! CHECK:      [[IF1:%.*]] = arith.constant true
 ! CHECK:      acc.parallel if([[IF1]]) {
-! CHECK:        acc.loop {
-! CHECK:          fir.do_loop
+! CHECK:        acc.loop {{.*}} {
 ! CHECK:          acc.yield
 ! CHECK-NEXT:   }{{$}}
 ! CHECK:        acc.yield
@@ -257,8 +242,7 @@ subroutine acc_parallel_loop
 ! CHECK:      [[IFCOND:%.*]] = fir.load %{{.*}} : !fir.ref<!fir.logical<4>>
 ! CHECK:      [[IF2:%.*]] = fir.convert [[IFCOND]] : (!fir.logical<4>) -> i1
 ! CHECK:      acc.parallel if([[IF2]]) {
-! CHECK:        acc.loop {
-! CHECK:          fir.do_loop
+! CHECK:        acc.loop {{.*}} {
 ! CHECK:          acc.yield
 ! CHECK-NEXT:   }{{$}}
 ! CHECK:        acc.yield
@@ -271,8 +255,7 @@ subroutine acc_parallel_loop
 
 ! CHECK:      [[SELF1:%.*]] = arith.constant true
 ! CHECK:      acc.parallel self([[SELF1]]) {
-! CHECK:        acc.loop {
-! CHECK:          fir.do_loop
+! CHECK:        acc.loop {{.*}} {
 ! CHECK:          acc.yield
 ! CHECK-NEXT:   }{{$}}
 ! CHECK:        acc.yield
@@ -284,8 +267,7 @@ subroutine acc_parallel_loop
   END DO
 
 ! CHECK:      acc.parallel {
-! CHECK:        acc.loop {
-! CHECK:          fir.do_loop
+! CHECK:        acc.loop {{.*}} {
 ! CHECK:          acc.yield
 ! CHECK-NEXT:   }{{$}}
 ! CHECK:        acc.yield
@@ -298,8 +280,7 @@ subroutine acc_parallel_loop
 
 ! CHECK:      %[[SELF2:.*]] = fir.convert %[[DECLIFCONDITION]]#1 : (!fir.ref<!fir.logical<4>>) -> i1
 ! CHECK:      acc.parallel self(%[[SELF2]]) {
-! CHECK:        acc.loop {
-! CHECK:          fir.do_loop
+! CHECK:        acc.loop {{.*}} {
 ! CHECK:          acc.yield
 ! CHECK-NEXT:   }{{$}}
 ! CHECK:        acc.yield
@@ -313,8 +294,7 @@ subroutine acc_parallel_loop
 ! CHECK:      %[[COPYIN_A:.*]] = acc.copyin varPtr(%[[DECLA]]#1 : !fir.ref<!fir.array<10xf32>>) bounds(%{{.*}}) -> !fir.ref<!fir.array<10xf32>> {dataClause = #acc<data_clause acc_copy>, name = "a"}
 ! CHECK:      %[[COPYIN_B:.*]] = acc.copyin varPtr(%[[DECLB]]#1 : !fir.ref<!fir.array<10xf32>>) bounds(%{{.*}}) -> !fir.ref<!fir.array<10xf32>> {dataClause = #acc<data_clause acc_copy>, name = "b"}
 ! CHECK:      acc.parallel dataOperands(%[[COPYIN_A]], %[[COPYIN_B]] : !fir.ref<!fir.array<10xf32>>, !fir.ref<!fir.array<10xf32>>) {
-! CHECK:        acc.loop {
-! CHECK:          fir.do_loop
+! CHECK:        acc.loop {{.*}} {
 ! CHECK:          acc.yield
 ! CHECK-NEXT:   }{{$}}
 ! CHECK:        acc.yield
@@ -330,8 +310,7 @@ subroutine acc_parallel_loop
 ! CHECK:      %[[COPYIN_A:.*]] = acc.copyin varPtr(%[[DECLA]]#1 : !fir.ref<!fir.array<10xf32>>) bounds(%{{.*}}) -> !fir.ref<!fir.array<10xf32>> {dataClause = #acc<data_clause acc_copy>, name = "a"}
 ! CHECK:      %[[COPYIN_B:.*]] = acc.copyin varPtr(%[[DECLB]]#1 : !fir.ref<!fir.array<10xf32>>) bounds(%{{.*}}) -> !fir.ref<!fir.array<10xf32>> {dataClause = #acc<data_clause acc_copy>, name = "b"}
 ! CHECK:      acc.parallel dataOperands(%[[COPYIN_A]], %[[COPYIN_B]] : !fir.ref<!fir.array<10xf32>>, !fir.ref<!fir.array<10xf32>>) {
-! CHECK:        acc.loop {
-! CHECK:          fir.do_loop
+! CHECK:        acc.loop {{.*}} {
 ! CHECK:          acc.yield
 ! CHECK-NEXT:   }{{$}}
 ! CHECK:        acc.yield
@@ -347,8 +326,7 @@ subroutine acc_parallel_loop
 ! CHECK:      %[[COPYIN_A:.*]] = acc.copyin varPtr(%[[DECLA]]#1 : !fir.ref<!fir.array<10xf32>>) bounds(%{{.*}}) -> !fir.ref<!fir.array<10xf32>> {name = "a"}
 ! CHECK:      %[[COPYIN_B:.*]] = acc.copyin varPtr(%[[DECLB]]#1 : !fir.ref<!fir.array<10xf32>>) bounds(%{{.*}}) -> !fir.ref<!fir.array<10xf32>> {dataClause = #acc<data_clause acc_copyin_readonly>, name = "b"}
 ! CHECK:      acc.parallel dataOperands(%[[COPYIN_A]], %[[COPYIN_B]] : !fir.ref<!fir.array<10xf32>>, !fir.ref<!fir.array<10xf32>>) {
-! CHECK:        acc.loop {
-! CHECK:          fir.do_loop
+! CHECK:        acc.loop {{.*}} {
 ! CHECK:          acc.yield
 ! CHECK-NEXT:   }{{$}}
 ! CHECK:        acc.yield
@@ -362,8 +340,7 @@ subroutine acc_parallel_loop
 ! CHECK:      %[[CREATE_A:.*]] = acc.create varPtr(%[[DECLA]]#1 : !fir.ref<!fir.array<10xf32>>) bounds(%{{.*}}) -> !fir.ref<!fir.array<10xf32>> {dataClause = #acc<data_clause acc_copyout>, name = "a"}
 ! CHECK:      %[[CREATE_B:.*]] = acc.create varPtr(%[[DECLB]]#1 : !fir.ref<!fir.array<10xf32>>) bounds(%{{.*}}) -> !fir.ref<!fir.array<10xf32>> {dataClause = #acc<data_clause acc_copyout>, name = "b"}
 ! CHECK:      acc.parallel dataOperands(%[[CREATE_A]], %[[CREATE_B]] : !fir.ref<!fir.array<10xf32>>, !fir.ref<!fir.array<10xf32>>) {
-! CHECK:        acc.loop {
-! CHECK:          fir.do_loop
+! CHECK:        acc.loop {{.*}} {
 ! CHECK:          acc.yield
 ! CHECK-NEXT:   }{{$}}
 ! CHECK:        acc.yield
@@ -379,8 +356,7 @@ subroutine acc_parallel_loop
 ! CHECK:      %[[CREATE_B:.*]] = acc.create varPtr(%[[DECLB]]#1 : !fir.ref<!fir.array<10xf32>>) bounds(%{{.*}}) -> !fir.ref<!fir.array<10xf32>> {name = "b"}
 ! CHECK:      %[[CREATE_A:.*]] = acc.create varPtr(%[[DECLA]]#1 : !fir.ref<!fir.array<10xf32>>) bounds(%{{.*}}) -> !fir.ref<!fir.array<10xf32>> {dataClause = #acc<data_clause acc_create_zero>, name = "a"}
 ! CHECK:      acc.parallel dataOperands(%[[CREATE_B]], %[[CREATE_A]] : !fir.ref<!fir.array<10xf32>>, !fir.ref<!fir.array<10xf32>>) {
-! CHECK:        acc.loop {
-! CHECK:          fir.do_loop
+! CHECK:        acc.loop {{.*}} {
 ! CHECK:          acc.yield
 ! CHECK-NEXT:   }{{$}}
 ! CHECK:        acc.yield
@@ -396,8 +372,7 @@ subroutine acc_parallel_loop
 ! CHECK:      %[[NOCREATE_A:.*]] = acc.nocreate varPtr(%[[DECLA]]#1 : !fir.ref<!fir.array<10xf32>>) bounds(%{{.*}}) -> !fir.ref<!fir.array<10xf32>> {name = "a"}
 ! CHECK:      %[[NOCREATE_B:.*]] = acc.nocreate varPtr(%[[DECLB]]#1 : !fir.ref<!fir.array<10xf32>>) bounds(%{{.*}}) -> !fir.ref<!fir.array<10xf32>> {name = "b"}
 ! CHECK:      acc.parallel dataOperands(%[[NOCREATE_A]], %[[NOCREATE_B]] : !fir.ref<!fir.array<10xf32>>, !fir.ref<!fir.array<10xf32>>) {
-! CHECK:        acc.loop {
-! CHECK:          fir.do_loop
+! CHECK:        acc.loop {{.*}} {
 ! CHECK:          acc.yield
 ! CHECK-NEXT:   }{{$}}
 ! CHECK:        acc.yield
@@ -411,8 +386,7 @@ subroutine acc_parallel_loop
 ! CHECK:      %[[PRESENT_A:.*]] = acc.present varPtr(%[[DECLA]]#1 : !fir.ref<!fir.array<10xf32>>) bounds(%{{.*}}) -> !fir.ref<!fir.array<10xf32>> {name = "a"}
 ! CHECK:      %[[PRESENT_B:.*]] = acc.present varPtr(%[[DECLB]]#1 : !fir.ref<!fir.array<10xf32>>) bounds(%{{.*}}) -> !fir.ref<!fir.array<10xf32>> {name = "b"}
 ! CHECK:      acc.parallel dataOperands(%[[PRESENT_A]], %[[PRESENT_B]] : !fir.ref<!fir.array<10xf32>>, !fir.ref<!fir.array<10xf32>>) {
-! CHECK:        acc.loop {
-! CHECK:          fir.do_loop
+! CHECK:        acc.loop {{.*}} {
 ! CHECK:          acc.yield
 ! CHECK-NEXT:   }{{$}}
 ! CHECK:        acc.yield
@@ -426,8 +400,7 @@ subroutine acc_parallel_loop
 ! CHECK:      %[[DEVICEPTR_A:.*]] = acc.deviceptr varPtr(%[[DECLA]]#1 : !fir.ref<!fir.array<10xf32>>) bounds(%{{.*}}) -> !fir.ref<!fir.array<10xf32>> {name = "a"}
 ! CHECK:      %[[DEVICEPTR_B:.*]] = acc.deviceptr varPtr(%[[DECLB]]#1 : !fir.ref<!fir.array<10xf32>>) bounds(%{{.*}}) -> !fir.ref<!fir.array<10xf32>> {name = "b"}
 ! CHECK:      acc.parallel dataOperands(%[[DEVICEPTR_A]], %[[DEVICEPTR_B]] : !fir.ref<!fir.array<10xf32>>, !fir.ref<!fir.array<10xf32>>) {
-! CHECK:        acc.loop {
-! CHECK:          fir.do_loop
+! CHECK:        acc.loop {{.*}} {
 ! CHECK:          acc.yield
 ! CHECK-NEXT:   }{{$}}
 ! CHECK:        acc.yield
@@ -445,8 +418,7 @@ subroutine acc_parallel_loop
 ! CHECK:      %[[BOX_ADDR_G:.*]] = fir.box_addr %[[BOX_G]] : (!fir.box<!fir.ptr<f32>>) -> !fir.ptr<f32>
 ! CHECK:      %[[ATTACH_G:.*]] = acc.attach varPtr(%[[BOX_ADDR_G]] : !fir.ptr<f32>) -> !fir.ptr<f32> {name = "g"}
 ! CHECK:      acc.parallel dataOperands(%[[ATTACH_F]], %[[ATTACH_G]] : !fir.ptr<f32>, !fir.ptr<f32>) {
-! CHECK:        acc.loop {
-! CHECK:          fir.do_loop
+! CHECK:        acc.loop {{.*}} {
 ! CHECK:          acc.yield
 ! CHECK-NEXT:   }{{$}}
 ! CHECK:        acc.yield
@@ -460,7 +432,7 @@ subroutine acc_parallel_loop
 ! CHECK:      %[[ACC_PRIVATE_B:.*]] = acc.firstprivate varPtr(%[[DECLB]]#1 : !fir.ref<!fir.array<10xf32>>) bounds(%{{.*}}) -> !fir.ref<!fir.array<10xf32>> {name = "b"}
 ! CHECK:      acc.parallel firstprivate(@firstprivatization_section_ext10_ref_10xf32 -> %[[ACC_PRIVATE_B]] : !fir.ref<!fir.array<10xf32>>) {
 ! CHECK:      %[[ACC_PRIVATE_A:.*]] = acc.private varPtr(%[[DECLA]]#1 : !fir.ref<!fir.array<10xf32>>) bounds(%{{.*}}) -> !fir.ref<!fir.array<10xf32>> {name = "a"}
-! CHECK:        acc.loop private(@privatization_ref_10xf32 -> %[[ACC_PRIVATE_A]] : !fir.ref<!fir.array<10xf32>>) {
+! CHECK:        acc.loop private({{.*}}@privatization_ref_10xf32 -> %[[ACC_PRIVATE_A]] : !fir.ref<!fir.array<10xf32>>)
 ! CHECK:          fir.do_loop
 ! CHECK:          acc.yield
 ! CHECK-NEXT:   }{{$}}
@@ -473,10 +445,9 @@ subroutine acc_parallel_loop
   END DO
 
 ! CHECK:      acc.parallel {
-! CHECK:        acc.loop {
-! CHECK:          fir.do_loop
+! CHECK:        acc.loop {{.*}} {
 ! CHECK:          acc.yield
-! CHECK-NEXT:   } attributes {seq}
+! CHECK-NEXT:   } attributes {inclusiveUpperbound = array<i1: true>, seq}
 ! CHECK:        acc.yield
 ! CHECK-NEXT: }{{$}}
 
@@ -486,10 +457,9 @@ subroutine acc_parallel_loop
   END DO
 
 ! CHECK:      acc.parallel {
-! CHECK:        acc.loop {
-! CHECK:          fir.do_loop
+! CHECK:        acc.loop {{.*}} {
 ! CHECK:          acc.yield
-! CHECK-NEXT:   } attributes {auto}
+! CHECK-NEXT:   } attributes {auto, inclusiveUpperbound = array<i1: true>}
 ! CHECK:        acc.yield
 ! CHECK-NEXT: }{{$}}
 
@@ -499,10 +469,9 @@ subroutine acc_parallel_loop
   END DO
 
 ! CHECK:      acc.parallel {
-! CHECK:        acc.loop {
-! CHECK:          fir.do_loop
+! CHECK:        acc.loop {{.*}} {
 ! CHECK:          acc.yield
-! CHECK-NEXT:   } attributes {independent}
+! CHECK-NEXT:   } attributes {inclusiveUpperbound = array<i1: true>, independent}
 ! CHECK:        acc.yield
 ! CHECK-NEXT: }{{$}}
 
@@ -512,8 +481,7 @@ subroutine acc_parallel_loop
   END DO
 
 ! CHECK:      acc.parallel {
-! CHECK:        acc.loop gang {
-! CHECK:          fir.do_loop
+! CHECK:        acc.loop gang() {{.*}} {
 ! CHECK:          acc.yield
 ! CHECK-NEXT:   }{{$}}
 ! CHECK:        acc.yield
@@ -526,8 +494,7 @@ subroutine acc_parallel_loop
 
 ! CHECK:      acc.parallel {
 ! CHECK:        [[GANGNUM1:%.*]] = arith.constant 8 : i32
-! CHECK-NEXT:   acc.loop gang(num=[[GANGNUM1]] : i32) {
-! CHECK:          fir.do_loop
+! CHECK-NEXT:   acc.loop gang(num=[[GANGNUM1]] : i32) {{.*}} {
 ! CHECK:          acc.yield
 ! CHECK-NEXT:   }{{$}}
 ! CHECK:        acc.yield
@@ -540,8 +507,7 @@ subroutine acc_parallel_loop
 
 ! CHECK:      acc.parallel {
 ! CHECK:        [[GANGNUM2:%.*]] = fir.load %{{.*}} : !fir.ref<i32>
-! CHECK-NEXT:   acc.loop gang(num=[[GANGNUM2]] : i32) {
-! CHECK:          fir.do_loop
+! CHECK-NEXT:   acc.loop gang(num=[[GANGNUM2]] : i32) {{.*}} {
 ! CHECK:          acc.yield
 ! CHECK-NEXT:   }{{$}}
 ! CHECK:        acc.yield
@@ -553,8 +519,7 @@ subroutine acc_parallel_loop
   END DO
 
 ! CHECK:      acc.parallel {
-! CHECK:        acc.loop gang(num=%{{.*}} : i32, static=%{{.*}} : i32) {
-! CHECK:          fir.do_loop
+! CHECK:        acc.loop gang(num=%{{.*}} : i32, static=%{{.*}} : i32) {{.*}} {
 ! CHECK:          acc.yield
 ! CHECK-NEXT:   }{{$}}
 ! CHECK:        acc.yield
@@ -565,8 +530,7 @@ subroutine acc_parallel_loop
     a(i) = b(i)
   END DO
 ! CHECK:      acc.parallel {
-! CHECK:        acc.loop vector {
-! CHECK:          fir.do_loop
+! CHECK:        acc.loop vector() {{.*}} {
 ! CHECK:          acc.yield
 ! CHECK-NEXT:   }{{$}}
 ! CHECK:        acc.yield
@@ -579,8 +543,7 @@ subroutine acc_parallel_loop
 
 ! CHECK:      acc.parallel {
 ! CHECK:        [[CONSTANT128:%.*]] = arith.constant 128 : i32
-! CHECK:        acc.loop vector([[CONSTANT128]] : i32) {
-! CHECK:          fir.do_loop
+! CHECK:        acc.loop vector([[CONSTANT128]] : i32) {{.*}} {
 ! CHECK:          acc.yield
 ! CHECK-NEXT:   }{{$}}
 ! CHECK:        acc.yield
@@ -593,7 +556,7 @@ subroutine acc_parallel_loop
 
 ! CHECK:      acc.parallel {
 ! CHECK:        [[VECTORLENGTH:%.*]] = fir.load %{{.*}} : !fir.ref<i32>
-! CHECK:        acc.loop vector([[VECTORLENGTH]] : i32) {
+! CHECK:        acc.loop vector([[VECTORLENGTH]] : i32) {{.*}} {
 ! CHECK:          fir.do_loop
 ! CHECK:          acc.yield
 ! CHECK-NEXT:   }{{$}}
@@ -606,7 +569,7 @@ subroutine acc_parallel_loop
   END DO
 
 ! CHECK:      acc.parallel {
-! CHECK:        acc.loop worker {
+! CHECK:        acc.loop worker() {{.*}} {
 ! CHECK:          fir.do_loop
 ! CHECK:          acc.yield
 ! CHECK-NEXT:   }{{$}}
@@ -620,7 +583,7 @@ subroutine acc_parallel_loop
 
 ! CHECK:      acc.parallel {
 ! CHECK:        [[WORKER128:%.*]] = arith.constant 128 : i32
-! CHECK:        acc.loop worker([[WORKER128]] : i32) {
+! CHECK:        acc.loop worker([[WORKER128]] : i32) {{.*}} {
 ! CHECK:          fir.do_loop
 ! CHECK:          acc.yield
 ! CHECK-NEXT:   }{{$}}
@@ -635,11 +598,9 @@ subroutine acc_parallel_loop
   END DO
 
 ! CHECK:      acc.parallel {
-! CHECK:        acc.loop {
-! CHECK:          fir.do_loop
-! CHECK:            fir.do_loop
+! CHECK:        acc.loop {{.*}} {
 ! CHECK:          acc.yield
-! CHECK-NEXT:   } attributes {collapse = 2 : i64}
+! CHECK-NEXT:   } attributes {collapse = 2 : i64, inclusiveUpperbound = array<i1: true, true>}
 ! CHECK:        acc.yield
 ! CHECK-NEXT: }{{$}}
 
@@ -652,10 +613,8 @@ subroutine acc_parallel_loop
   END DO
 
 ! CHECK:      acc.parallel {
-! CHECK:        acc.loop {
-! CHECK:          fir.do_loop
-! CHECK:            acc.loop {
-! CHECK:              fir.do_loop
+! CHECK:        acc.loop {{.*}} {
+! CHECK:            acc.loop {{.*}} {
 ! CHECK:              acc.yield
 ! CHECK-NEXT:     }{{$}}
 ! CHECK:          acc.yield
@@ -670,22 +629,7 @@ subroutine acc_parallel_loop
 
 ! CHECK:      acc.parallel {
 ! CHECK:        [[TILESIZE:%.*]] = arith.constant 2 : i32
-! CHECK:        acc.loop tile([[TILESIZE]] : i32) {
-! CHECK:          fir.do_loop
-! CHECK:          acc.yield
-! CHECK-NEXT:   }{{$}}
-! CHECK:        acc.yield
-! CHECK-NEXT: }{{$}}
-
- !$acc parallel loop tile(*)
-  DO i = 1, n
-    a(i) = b(i)
-  END DO
-
-! CHECK:      acc.parallel {
-! CHECK:        [[TILESIZEM1:%.*]] = arith.constant -1 : i32
-! CHECK:        acc.loop tile([[TILESIZEM1]] : i32) {
-! CHECK:          fir.do_loop
+! CHECK:        acc.loop {{.*}} tile([[TILESIZE]] : i32) {{.*}} {
 ! CHECK:          acc.yield
 ! CHECK-NEXT:   }{{$}}
 ! CHECK:        acc.yield
@@ -701,8 +645,7 @@ subroutine acc_parallel_loop
 ! CHECK:      acc.parallel {
 ! CHECK:        [[TILESIZE1:%.*]] = arith.constant 2 : i32
 ! CHECK:        [[TILESIZE2:%.*]] = arith.constant 2 : i32
-! CHECK:        acc.loop tile([[TILESIZE1]], [[TILESIZE2]] : i32, i32) {
-! CHECK:          fir.do_loop
+! CHECK:        acc.loop {{.*}} tile([[TILESIZE1]], [[TILESIZE2]] : i32, i32) {{.*}} {
 ! CHECK:          acc.yield
 ! CHECK-NEXT:   }{{$}}
 ! CHECK:        acc.yield
@@ -714,8 +657,7 @@ subroutine acc_parallel_loop
   END DO
 
 ! CHECK:      acc.parallel {
-! CHECK:        acc.loop tile(%{{.*}} : i32) {
-! CHECK:          fir.do_loop
+! CHECK:        acc.loop {{.*}} tile(%{{.*}} : i32) {{.*}} {
 ! CHECK:          acc.yield
 ! CHECK-NEXT:   }{{$}}
 ! CHECK:        acc.yield
@@ -729,8 +671,20 @@ subroutine acc_parallel_loop
   END DO
 
 ! CHECK:      acc.parallel {
-! CHECK:        acc.loop tile(%{{.*}}, %{{.*}} : i32, i32) {
-! CHECK:          fir.do_loop
+! CHECK:        acc.loop {{.*}} tile(%{{.*}}, %{{.*}} : i32, i32) {{.*}} {
+! CHECK:          acc.yield
+! CHECK-NEXT:   }{{$}}
+! CHECK:        acc.yield
+! CHECK-NEXT: }{{$}}
+
+ !$acc parallel loop tile(*)
+  DO i = 1, n
+    a(i) = b(i)
+  END DO
+
+! CHECK:      acc.parallel {
+! CHECK:        [[TILESIZEM1:%.*]] = arith.constant -1 : i32
+! CHECK:        acc.loop {{.*}} tile([[TILESIZEM1]] : i32) {{.*}} {
 ! CHECK:          acc.yield
 ! CHECK-NEXT:   }{{$}}
 ! CHECK:        acc.yield
@@ -745,8 +699,7 @@ subroutine acc_parallel_loop
 ! CHECK:      %[[COPYINREDR:.*]] = acc.copyin varPtr(%{{.*}} : !fir.ref<f32>) -> !fir.ref<f32> {dataClause = #acc<data_clause acc_reduction>, implicit = true, name = "reduction_r"}
 ! CHECK:      %[[COPYINREDI:.*]] = acc.copyin varPtr(%{{.*}} : !fir.ref<i32>) -> !fir.ref<i32> {dataClause = #acc<data_clause acc_reduction>, implicit = true, name = "reduction_i"}
 ! CHECK:      acc.parallel dataOperands(%[[COPYINREDR]], %[[COPYINREDI]] : !fir.ref<f32>, !fir.ref<i32>) {
-! CHECK:        acc.loop reduction(@reduction_add_ref_f32 -> %{{.*}} : !fir.ref<f32>, @reduction_mul_ref_i32 -> %{{.*}} : !fir.ref<i32>) {
-! CHECK:          fir.do_loop
+! CHECK:        acc.loop {{.*}} reduction(@reduction_add_ref_f32 -> %{{.*}} : !fir.ref<f32>, @reduction_mul_ref_i32 -> %{{.*}} : !fir.ref<i32>) {{.*}}
 ! CHECK:          acc.yield
 ! CHECK-NEXT:   }{{$}}
 ! CHECK:        acc.yield
@@ -754,6 +707,7 @@ subroutine acc_parallel_loop
 ! CHECK:      acc.copyout accPtr(%[[COPYINREDR]] : !fir.ref<f32>) to varPtr(%{{.*}} : !fir.ref<f32>) {dataClause = #acc<data_clause acc_reduction>, implicit = true, name = "reduction_r"}
 ! CHECK:      acc.copyout accPtr(%[[COPYINREDI]] : !fir.ref<i32>) to varPtr(%{{.*}} : !fir.ref<i32>) {dataClause = #acc<data_clause acc_reduction>, implicit = true, name = "reduction_i"}
 
+
   !$acc parallel loop
   do 10 i=0, n
   10 continue
diff --git a/flang/test/Lower/OpenACC/acc-private.f90 b/flang/test/Lower/OpenACC/acc-private.f90
index 0caaefc82225424..69820b1cde49619 100644
--- a/flang/test/Lower/OpenACC/acc-private.f90
+++ b/flang/test/Lower/OpenACC/acc-private.f90
@@ -189,7 +189,7 @@ program acc_private
   END DO
 
 ! CHECK: %[[C_PRIVATE:.*]] = acc.private varPtr(%[[DECLC]]#1 : !fir.ref<i32>) -> !fir.ref<i32> {name = "c"}
-! CHECK: acc.loop private(@privatization_ref_i32 -> %[[C_PRIVATE]] : !fir.ref<i32>)
+! CHECK: acc.loop private({{.*}}@privatization_ref_i32 -> %[[C_PRIVATE]] : !fir.ref<i32>)
 ! CHECK: acc.yield
 
   !$acc loop private(b)
@@ -203,7 +203,7 @@ program acc_private
 ! CHECK: %[[UB:.*]] = arith.subi %{{.*}}, %[[C1]] : index
 ! CHECK: %[[BOUND:.*]] = acc.bounds lowerbound(%[[LB]] : index) upperbound(%[[UB]] : index) extent(%{{.*}} : index) stride(%[[C1]] : index) startIdx(%[[C1]] : index)
 ! CHECK: %[[B_PRIVATE:.*]] = acc.private varPtr(%[[DECLB]]#1 : !fir.ref<!fir.array<100xf32>>) bounds(%[[BOUND]]) -> !fir.ref<!fir.array<100xf32>> {name = "b"}
-! CHECK: acc.loop private(@privatization_ref_100xf32 -> %[[B_PRIVATE]] : !fir.ref<!fir.array<100xf32>>) {
+! CHECK: acc.loop private({{.*}}@privatization_ref_100xf32 -> %[[B_PRIVATE]] : !fir.ref<!fir.array<100xf32>>)
 ! CHECK: acc.yield
 
   !$acc loop private(b(1:50))
@@ -217,7 +217,7 @@ program acc_private
 ! CHECK: %[[UB:.*]] = arith.constant 49 : index
 ! CHECK: %[[BOUND:.*]] = acc.bounds lowerbound(%[[LB]] : index) upperbound(%[[UB]] : index) stride(%[[C1]] : index) startIdx(%[[C1]] : index)
 ! CHECK: %[[B_PRIVATE:.*]] = acc.private varPtr(%[[DECLB]]#1 : !fir.ref<!fir.array<100xf32>>) bounds(%[[BOUND]]) -> !fir.ref<!fir.array<50xf32>> {name = "b(1:50)"}
-! CHECK: acc.loop private(@privatization_ref_50xf32 -> %[[B_PRIVATE]] : !fir.ref<!fir.array<50xf32>>)
+! CHECK: acc.loop private({{.*}}@privatization_ref_50xf32 -> %[[B_PRIVATE]] : !fir.ref<!fir.array<50xf32>>)
 
   !$acc parallel loop firstprivate(c)
   DO i = 1, n
@@ -273,7 +273,7 @@ subroutine acc_private_assumed_shape(a, n)
 ! CHECK: acc.parallel {
 ! CHECK: %[[ADDR:.*]] = fir.box_addr %[[DECL_A]]#1 : (!fir.box<!fir.array<?xi32>>) -> !fir.ref<!fir.array<?xi32>>
 ! CHECK: %[[PRIVATE:.*]] = acc.private varPtr(%[[ADDR]] : !fir.ref<!fir.array<?xi32>>) bounds(%{{.*}}) -> !fir.ref<!fir.array<?xi32>> {name = "a"}
-! CHECK: acc.loop private(@privatization_box_Uxi32 -> %[[PRIVATE]] : !fir.ref<!fir.array<?xi32>>) {
+! CHECK: acc.loop private({{.*}}@privatization_box_Uxi32 -> %[[PRIVATE]] : !fir.ref<!fir.array<?xi32>>)
 
 subroutine acc_private_allocatable_array(a, n)
   integer, allocatable :: a(:)
@@ -296,7 +296,7 @@ subroutine acc_private_allocatable_array(a, n)
 ! CHECK: %[[BOX:.*]] = fir.load %[[DECLA_A]]#1 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>
 ! CHECK: %[[BOX_ADDR:.*]] = fir.box_addr %[[BOX]] : (!fir.box<!fir.heap<!fir.array<?xi32>>>) -> !fir.heap<!fir.array<?xi32>>
 ! CHECK: %[[PRIVATE:.*]] = acc.private varPtr(%[[BOX_ADDR]] : !fir.heap<!fir.array<?xi32>>) bounds(%{{.*}}) -> !fir.heap<!fir.array<?xi32>> {name = "a"}
-! CHECK: acc.loop private(@privatization_box_heap_Uxi32 -> %[[PRIVATE]] : !fir.heap<!fir.array<?xi32>>)
+! CHECK: acc.loop private({{.*}}@privatization_box_heap_Uxi32 -> %[[PRIVATE]] : !fir.heap<!fir.array<?xi32>>)
 ! CHECK: acc.serial private(@privatization_box_heap_Uxi32 -> %{{.*}} : !fir.heap<!fir.array<?xi32>>)
 
 subroutine acc_private_pointer_array(a, n)
@@ -316,7 +316,7 @@ subroutine acc_private_pointer_array(a, n)
 ! CHECK: %[[BOX:.*]] = fir.load %[[DECLA_A]]#1 : !fir.ref<!fir.box<!fir.ptr<!fir.array<?xi32>>>>
 ! CHECK: %[[BOX_ADDR:.*]] = fir.box_addr %[[BOX]] : (!fir.box<!fir.ptr<!fir.array<?xi32>>>) -> !fir.ptr<!fir.array<?xi32>>
 ! CHECK: %[[PRIVATE:.*]] = acc.private varPtr(%[[BOX_ADDR]] : !fir.ptr<!fir.array<?xi32>>) bounds(%{{.*}}) -> !fir.ptr<!fir.array<?xi32>> {name = "a"}
-! CHECK: acc.loop private(@privatization_box_ptr_Uxi32 -> %[[PRIVATE]] : !fir.ptr<!fir.array<?xi32>>)
+! CHECK: acc.loop private({{.*}}@privatization_box_ptr_Uxi32 -> %[[PRIVATE]] : !fir.ptr<!fir.array<?xi32>>)
 
 subroutine acc_private_dynamic_extent(a, n)
   integer :: n, i
@@ -334,7 +334,7 @@ subroutine acc_private_dynamic_extent(a, n)
 ! CHECK: %[[DECL_A:.*]]:2 = hlfir.declare %[[ARG0]](%16) {uniq_name = "_QFacc_private_dynamic_extentEa"} : (!fir.ref<!fir.array<?x?x2xi32>>, !fir.shape<3>) -> (!fir.box<!fir.array<?x?x2xi32>>, !fir.ref<!fir.array<?x?x2xi32>>)
 ! CHECK: acc.parallel {
 ! CHECK: %[[PRIV:.*]] = acc.private varPtr(%[[DECL_A]]#1 : !fir.ref<!fir.array<?x?x2xi32>>) bounds(%{{.*}}, %{{.*}}, %{{.*}}) -> !fir.ref<!fir.array<?x?x2xi32>> {name = "a"}
-! CHECK: acc.loop private(@privatization_ref_UxUx2xi32 -> %[[PRIV]] : !fir.ref<!fir.array<?x?x2xi32>>)
+! CHECK: acc.loop private({{.*}}@privatization_ref_UxUx2xi32 -> %[[PRIV]] : !fir.ref<!fir.array<?x?x2xi32>>)
 
 subroutine acc_firstprivate_assumed_shape(a, n)
   integer :: a(:), i, n
diff --git a/flang/test/Lower/OpenACC/acc-reduction.f90 b/flang/test/Lower/OpenACC/acc-reduction.f90
index 98fef6a2d2f29a3..1b61c055001ce73 100644
--- a/flang/test/Lower/OpenACC/acc-reduction.f90
+++ b/flang/test/Lower/OpenACC/acc-reduction.f90
@@ -706,7 +706,7 @@ subroutine acc_reduction_add_int(a, b)
 ! HLFIR:       %[[DECLB:.*]]:2 = hlfir.declare %[[B]]
 ! FIR:         %[[RED_B:.*]] = acc.reduction varPtr(%[[B]] : !fir.ref<i32>) -> !fir.ref<i32> {name = "b"}
 ! HLFIR:       %[[RED_B:.*]] = acc.reduction varPtr(%[[DECLB]]#1 : !fir.ref<i32>) -> !fir.ref<i32> {name = "b"}
-! CHECK:       acc.loop reduction(@reduction_add_ref_i32 -> %[[RED_B]] : !fir.ref<i32>)
+! CHECK:       acc.loop {{.*}} reduction(@reduction_add_ref_i32 -> %[[RED_B]] : !fir.ref<i32>)
 
 subroutine acc_reduction_add_int_array_1d(a, b)
   integer :: a(100)
@@ -723,7 +723,7 @@ subroutine acc_reduction_add_int_array_1d(a, b)
 ! HLFIR:       %[[DECLB:.*]]:2 = hlfir.declare %[[B]]
 ! FIR:         %[[RED_B:.*]] = acc.reduction varPtr(%[[B]] : !fir.ref<!fir.array<100xi32>>) bounds(%{{.*}}) -> !fir.ref<!fir.array<100xi32>> {name = "b"}
 ! HLFIR:       %[[RED_B:.*]] = acc.reduction varPtr(%[[DECLB]]#1 : !fir.ref<!fir.array<100xi32>>) bounds(%{{.*}}) -> !fir.ref<!fir.array<100xi32>> {name = "b"}
-! CHECK:       acc.loop reduction(@reduction_add_section_ext100_ref_100xi32 -> %[[RED_B]] : !fir.ref<!fir.array<100xi32>>)
+! CHECK:       acc.loop {{.*}} reduction(@reduction_add_section_ext100_ref_100xi32 -> %[[RED_B]] : !fir.ref<!fir.array<100xi32>>)
 
 subroutine acc_reduction_add_int_array_2d(a, b)
   integer :: a(100, 10), b(100, 10)
@@ -742,8 +742,7 @@ subroutine acc_reduction_add_int_array_2d(a, b)
 ! HLFIR:       %[[DECLARG1:.*]]:2 = hlfir.declare %[[ARG1]]
 ! FIR:         %[[RED_ARG1:.*]] = acc.reduction varPtr(%[[ARG1]] : !fir.ref<!fir.array<100x10xi32>>) bounds(%{{.*}}, %{{.*}}) -> !fir.ref<!fir.array<100x10xi32>> {name = "b"}
 ! HLFIR:       %[[RED_ARG1:.*]] = acc.reduction varPtr(%[[DECLARG1]]#1 : !fir.ref<!fir.array<100x10xi32>>) bounds(%{{.*}}, %{{.*}}) -> !fir.ref<!fir.array<100x10xi32>> {name = "b"}
-! CHECK:       acc.loop reduction(@reduction_add_section_ext100xext10_ref_100x10xi32 -> %[[RED_ARG1]] : !fir.ref<!fir.array<100x10xi32>>) {
-! CHECK: } attributes {collapse = 2 : i64}
+! CHECK:       acc.loop {{.*}} reduction(@reduction_add_section_ext100xext10_ref_100x10xi32 -> %[[RED_ARG1]] : !fir.ref<!fir.array<100x10xi32>>)
 
 subroutine acc_reduction_add_int_array_3d(a, b)
   integer :: a(100, 10, 2), b(100, 10, 2)
@@ -764,8 +763,7 @@ subroutine acc_reduction_add_int_array_3d(a, b)
 ! HLFIR: %[[DECLARG1:.*]]:2 = hlfir.declare %[[ARG1]]
 ! FIR:   %[[RED_ARG1:.*]] = acc.reduction varPtr(%[[ARG1]] : !fir.ref<!fir.array<100x10x2xi32>>) bounds(%{{.*}}, %{{.*}}, %{{.*}}) -> !fir.ref<!fir.array<100x10x2xi32>> {name = "b"}
 ! HLFIR: %[[RED_ARG1:.*]] = acc.reduction varPtr(%[[DECLARG1]]#1 : !fir.ref<!fir.array<100x10x2xi32>>) bounds(%{{.*}}, %{{.*}}, %{{.*}}) -> !fir.ref<!fir.array<100x10x2xi32>> {name = "b"}
-! CHECK: acc.loop reduction(@reduction_add_section_ext100xext10xext2_ref_100x10x2xi32 -> %[[RED_ARG1]] : !fir.ref<!fir.array<100x10x2xi32>>)
-! CHECK: } attributes {collapse = 3 : i64}
+! CHECK: acc.loop {{.*}} reduction(@reduction_add_section_ext100xext10xext2_ref_100x10x2xi32 -> %[[RED_ARG1]] : !fir.ref<!fir.array<100x10x2xi32>>)
 
 subroutine acc_reduction_add_float(a, b)
   real :: a(100), b
@@ -782,7 +780,7 @@ subroutine acc_reduction_add_float(a, b)
 ! HLFIR:       %[[DECLB:.*]]:2 = hlfir.declare %[[B]]
 ! FIR:         %[[RED_B:.*]] = acc.reduction varPtr(%[[B]] : !fir.ref<f32>) -> !fir.ref<f32> {name = "b"}
 ! HLFIR:       %[[RED_B:.*]] = acc.reduction varPtr(%[[DECLB]]#1 : !fir.ref<f32>) -> !fir.ref<f32> {name = "b"}
-! CHECK:       acc.loop reduction(@reduction_add_ref_f32 -> %[[RED_B]] : !fir.ref<f32>)
+! CHECK:       acc.loop {{.*}} reduction(@reduction_add_ref_f32 -> %[[RED_B]] : !fir.ref<f32>)
 
 subroutine acc_reduction_add_float_array_1d(a, b)
   real :: a(100), b(100)
@@ -799,7 +797,7 @@ subroutine acc_reduction_add_float_array_1d(a, b)
 ! HLFIR: %[[DECLB:.*]]:2 = hlfir.declare %[[B]]
 ! FIR:   %[[RED_B:.*]] = acc.reduction varPtr(%[[B]] : !fir.ref<!fir.array<100xf32>>) bounds(%{{.*}}) -> !fir.ref<!fir.array<100xf32>> {name = "b"}
 ! HLFIR: %[[RED_B:.*]] = acc.reduction varPtr(%[[DECLB]]#1 : !fir.ref<!fir.array<100xf32>>) bounds(%{{.*}}) -> !fir.ref<!fir.array<100xf32>> {name = "b"}
-! CHECK:       acc.loop reduction(@reduction_add_section_ext100_ref_100xf32 -> %[[RED_B]] : !fir.ref<!fir.array<100xf32>>)
+! CHECK:       acc.loop {{.*}} reduction(@reduction_add_section_ext100_ref_100xf32 -> %[[RED_B]] : !fir.ref<!fir.array<100xf32>>)
 
 subroutine acc_reduction_mul_int(a, b)
   integer :: a(100)
@@ -816,7 +814,7 @@ subroutine acc_reduction_mul_int(a, b)
 ! HLFIR:       %[[DECLB:.*]]:2 = hlfir.declare %[[B]]
 ! FIR:         %[[RED_B:.*]] = acc.reduction varPtr(%[[B]] : !fir.ref<i32>) -> !fir.ref<i32> {name = "b"}
 ! HLFIR:       %[[RED_B:.*]] = acc.reduction varPtr(%[[DECLB]]#1 : !fir.ref<i32>) -> !fir.ref<i32> {name = "b"}
-! CHECK:       acc.loop reduction(@reduction_mul_ref_i32 -> %[[RED_B]] : !fir.ref<i32>)
+! CHECK:       acc.loop {{.*}} reduction(@reduction_mul_ref_i32 -> %[[RED_B]] : !fir.ref<i32>)
 
 subroutine acc_reduction_mul_int_array_1d(a, b)
   integer :: a(100)
@@ -833,7 +831,7 @@ subroutine acc_reduction_mul_int_array_1d(a, b)
 ! HLFIR:       %[[DECLB:.*]]:2 = hlfir.declare %[[B]]
 ! FIR:         %[[RED_B:.*]] = acc.reduction varPtr(%[[B]] : !fir.ref<!fir.array<100xi32>>) bounds(%{{.*}}) -> !fir.ref<!fir.array<100xi32>> {name = "b"}
 ! HLFIR:       %[[RED_B:.*]] = acc.reduction varPtr(%[[DECLB]]#1 : !fir.ref<!fir.array<100xi32>>) bounds(%{{.*}}) -> !fir.ref<!fir.array<100xi32>> {name = "b"}
-! CHECK:       acc.loop reduction(@reduction_mul_section_ext100_ref_100xi32 -> %[[RED_B]] : !fir.ref<!fir.array<100xi32>>)
+! CHECK:       acc.loop {{.*}} reduction(@reduction_mul_section_ext100_ref_100xi32 -> %[[RED_B]] : !fir.ref<!fir.array<100xi32>>)
 
 subroutine acc_reduction_mul_float(a, b)
   real :: a(100), b
@@ -850,7 +848,7 @@ subroutine acc_reduction_mul_float(a, b)
 ! HLFIR:       %[[DECLB:.*]]:2 = hlfir.declare %[[B]]
 ! FIR:         %[[RED_B:.*]] = acc.reduction varPtr(%[[B]] : !fir.ref<f32>) -> !fir.ref<f32> {name = "b"}
 ! HLFIR:       %[[RED_B:.*]] = acc.reduction varPtr(%[[DECLB]]#1 : !fir.ref<f32>) -> !fir.ref<f32> {name = "b"}
-! CHECK:       acc.loop reduction(@reduction_mul_ref_f32 -> %[[RED_B]] : !fir.ref<f32>)
+! CHECK:       acc.loop {{.*}} reduction(@reduction_mul_ref_f32 -> %[[RED_B]] : !fir.ref<f32>)
 
 subroutine acc_reduction_mul_float_array_1d(a, b)
   real :: a(100), b(100)
@@ -865,9 +863,9 @@ subroutine acc_reduction_mul_float_array_1d(a, b)
 ! CHECK-LABEL: func.func @_QPacc_reduction_mul_float_array_1d(
 ! CHECK-SAME:  %{{.*}}: !fir.ref<!fir.array<100xf32>> {fir.bindc_name = "a"}, %[[B:.*]]: !fir.ref<!fir.array<100xf32>> {fir.bindc_name = "b"})
 ! HLFIR:       %[[DECLB:.*]]:2 = hlfir.declare %[[B]]
-! FIR:         %[[RED_B:.*]] = acc.reduction varPtr(%[[B]] : !fir.ref<!fir.array<100xf32>>) bounds(%2) -> !fir.ref<!fir.array<100xf32>> {name = "b"}
+! FIR:         %[[RED_B:.*]] = acc.reduction varPtr(%[[B]] : !fir.ref<!fir.array<100xf32>>) bounds(%{{.*}}) -> !fir.ref<!fir.array<100xf32>> {name = "b"}
 ! HLFIR:       %[[RED_B:.*]] = acc.reduction varPtr(%[[DECLB]]#1 : !fir.ref<!fir.array<100xf32>>) bounds(%{{.*}}) -> !fir.ref<!fir.array<100xf32>> {name = "b"}
-! CHECK:       acc.loop reduction(@reduction_mul_section_ext100_ref_100xf32 -> %[[RED_B]] : !fir.ref<!fir.array<100xf32>>)
+! CHECK:       acc.loop {{.*}} reduction(@reduction_mul_section_ext100_ref_100xf32 -> %[[RED_B]] : !fir.ref<!fir.array<100xf32>>)
 
 subroutine acc_reduction_min_int(a, b)
   integer :: a(100)
@@ -884,7 +882,7 @@ subroutine acc_reduction_min_int(a, b)
 ! HLFIR:       %[[DECLB:.*]]:2 = hlfir.declare %[[B]]
 ! FIR:         %[[RED_B:.*]] = acc.reduction varPtr(%[[B]] : !fir.ref<i32>) -> !fir.ref<i32> {name = "b"}
 ! HLFIR:       %[[RED_B:.*]] = acc.reduction varPtr(%[[DECLB]]#1 : !fir.ref<i32>) -> !fir.ref<i32> {name = "b"}
-! CHECK:       acc.loop reduction(@reduction_min_ref_i32 -> %[[RED_B]] : !fir.ref<i32>)
+! CHECK:       acc.loop {{.*}} reduction(@reduction_min_ref_i32 -> %[[RED_B]] : !fir.ref<i32>)
 
 subroutine acc_reduction_min_int_array_1d(a, b)
   integer :: a(100), b(100)
@@ -899,9 +897,9 @@ subroutine acc_reduction_min_int_array_1d(a, b)
 ! CHECK-LABEL: func.func @_QPacc_reduction_min_int_array_1d(
 ! CHECK-SAME: %{{.*}}: !fir.ref<!fir.array<100xi32>> {fir.bindc_name = "a"}, %[[ARG1:.*]]: !fir.ref<!fir.array<100xi32>> {fir.bindc_name = "b"})
 ! HLFIR: %[[DECLARG1:.*]]:2 = hlfir.declare %[[ARG1]]
-! FIR:   %[[RED_ARG1:.*]] = acc.reduction varPtr(%[[ARG1]] : !fir.ref<!fir.array<100xi32>>) bounds(%2) -> !fir.ref<!fir.array<100xi32>> {name = "b"}
+! FIR:   %[[RED_ARG1:.*]] = acc.reduction varPtr(%[[ARG1]] : !fir.ref<!fir.array<100xi32>>) bounds(%{{.*}}) -> !fir.ref<!fir.array<100xi32>> {name = "b"}
 ! HLFIR: %[[RED_ARG1:.*]] = acc.reduction varPtr(%[[DECLARG1]]#1 : !fir.ref<!fir.array<100xi32>>) bounds(%{{.*}}) -> !fir.ref<!fir.array<100xi32>> {name = "b"}
-! CHECK: acc.loop reduction(@reduction_min_section_ext100_ref_100xi32 -> %[[RED_ARG1]] : !fir.ref<!fir.array<100xi32>>)
+! CHECK: acc.loop {{.*}} reduction(@reduction_min_section_ext100_ref_100xi32 -> %[[RED_ARG1]] : !fir.ref<!fir.array<100xi32>>)
 
 subroutine acc_reduction_min_float(a, b)
   real :: a(100), b
@@ -918,7 +916,7 @@ subroutine acc_reduction_min_float(a, b)
 ! HLFIR:       %[[DECLB:.*]]:2 = hlfir.declare %[[B]]
 ! FIR:         %[[RED_B:.*]] = acc.reduction varPtr(%[[B]] : !fir.ref<f32>) -> !fir.ref<f32> {name = "b"}
 ! HLFIR:       %[[RED_B:.*]] = acc.reduction varPtr(%[[DECLB]]#1 : !fir.ref<f32>) -> !fir.ref<f32> {name = "b"}
-! CHECK:       acc.loop reduction(@reduction_min_ref_f32 -> %[[RED_B]] : !fir.ref<f32>)
+! CHECK:       acc.loop {{.*}} reduction(@reduction_min_ref_f32 -> %[[RED_B]] : !fir.ref<f32>)
 
 subroutine acc_reduction_min_float_array2d(a, b)
   real :: a(100, 10), b(100, 10)
@@ -935,10 +933,9 @@ subroutine acc_reduction_min_float_array2d(a, b)
 ! CHECK-LABEL: func.func @_QPacc_reduction_min_float_array2d(
 ! CHECK-SAME: %{{.*}}: !fir.ref<!fir.array<100x10xf32>> {fir.bindc_name = "a"}, %[[ARG1:.*]]: !fir.ref<!fir.array<100x10xf32>> {fir.bindc_name = "b"})
 ! HLFIR: %[[DECLARG1:.*]]:2 = hlfir.declare %[[ARG1]]
-! FIR:   %[[RED_ARG1:.*]] = acc.reduction varPtr(%[[ARG1]] : !fir.ref<!fir.array<100x10xf32>>) bounds(%3, %5) -> !fir.ref<!fir.array<100x10xf32>> {name = "b"}
+! FIR:   %[[RED_ARG1:.*]] = acc.reduction varPtr(%[[ARG1]] : !fir.ref<!fir.array<100x10xf32>>) bounds(%{{.*}}, %{{.*}}) -> !fir.ref<!fir.array<100x10xf32>> {name = "b"}
 ! HLFIR: %[[RED_ARG1:.*]] = acc.reduction varPtr(%[[DECLARG1]]#1 : !fir.ref<!fir.array<100x10xf32>>) bounds(%{{.*}}, %{{.*}}) -> !fir.ref<!fir.array<100x10xf32>> {name = "b"}
-! CHECK: acc.loop reduction(@reduction_min_section_ext100xext10_ref_100x10xf32 -> %[[RED_ARG1]] : !fir.ref<!fir.array<100x10xf32>>)
-! CHECK: attributes {collapse = 2 : i64}
+! CHECK: acc.loop {{.*}} reduction(@reduction_min_section_ext100xext10_ref_100x10xf32 -> %[[RED_ARG1]] : !fir.ref<!fir.array<100x10xf32>>)
 
 subroutine acc_reduction_max_int(a, b)
   integer :: a(100)
@@ -955,7 +952,7 @@ subroutine acc_reduction_max_int(a, b)
 ! HLFIR:       %[[DECLB:.*]]:2 = hlfir.declare %[[B]]
 ! FIR:         %[[RED_B:.*]] = acc.reduction varPtr(%[[B]] : !fir.ref<i32>) -> !fir.ref<i32> {name = "b"}
 ! HLFIR:       %[[RED_B:.*]] = acc.reduction varPtr(%[[DECLB]]#1 : !fir.ref<i32>) -> !fir.ref<i32> {name = "b"}
-! CHECK:       acc.loop reduction(@reduction_max_ref_i32 -> %[[RED_B]] : !fir.ref<i32>)
+! CHECK:       acc.loop {{.*}} reduction(@reduction_max_ref_i32 -> %[[RED_B]] : !fir.ref<i32>)
 
 subroutine acc_reduction_max_int_array2d(a, b)
   integer :: a(100, 10), b(100, 10)
@@ -974,7 +971,7 @@ subroutine acc_reduction_max_int_array2d(a, b)
 ! HLFIR: %[[DECLARG1:.*]]:2 = hlfir.declare %[[ARG1]]
 ! FIR:   %[[RED_ARG1:.*]] = acc.reduction varPtr(%[[ARG1]] : !fir.ref<!fir.array<100x10xi32>>) bounds(%{{.*}}, %{{.*}}) -> !fir.ref<!fir.array<100x10xi32>> {name = "b"}
 ! HLFIR: %[[RED_ARG1:.*]] = acc.reduction varPtr(%[[DECLARG1]]#1 : !fir.ref<!fir.array<100x10xi32>>) bounds(%{{.*}}, %{{.*}}) -> !fir.ref<!fir.array<100x10xi32>> {name = "b"}
-! CHECK: acc.loop reduction(@reduction_max_section_ext100xext10_ref_100x10xi32 -> %[[RED_ARG1]] : !fir.ref<!fir.array<100x10xi32>>)
+! CHECK: acc.loop {{.*}} reduction(@reduction_max_section_ext100xext10_ref_100x10xi32 -> %[[RED_ARG1]] : !fir.ref<!fir.array<100x10xi32>>)
 
 subroutine acc_reduction_max_float(a, b)
   real :: a(100), b
@@ -991,7 +988,7 @@ subroutine acc_reduction_max_float(a, b)
 ! HLFIR:       %[[DECLB:.*]]:2 = hlfir.declare %[[B]]
 ! FIR:         %[[RED_B:.*]] = acc.reduction varPtr(%[[B]] : !fir.ref<f32>) -> !fir.ref<f32> {name = "b"}
 ! HLFIR:       %[[RED_B:.*]] = acc.reduction varPtr(%[[DECLB]]#1 : !fir.ref<f32>) -> !fir.ref<f32> {name = "b"}
-! CHECK:       acc.loop reduction(@reduction_max_ref_f32 -> %[[RED_B]] : !fir.ref<f32>)
+! CHECK:       acc.loop {{.*}} reduction(@reduction_max_ref_f32 -> %[[RED_B]] : !fir.ref<f32>)
 
 subroutine acc_reduction_max_float_array1d(a, b)
   real :: a(100), b(100)
@@ -1008,7 +1005,7 @@ subroutine acc_reduction_max_float_array1d(a, b)
 ! HLFIR:       %[[DECLARG1:.*]]:2 = hlfir.declare %[[ARG1]]
 ! FIR:         %[[RED_ARG1:.*]] = acc.reduction varPtr(%[[ARG1]] : !fir.ref<!fir.array<100xf32>>) bounds(%{{.*}}) -> !fir.ref<!fir.array<100xf32>> {name = "b"}
 ! HLFIR:       %[[RED_ARG1:.*]] = acc.reduction varPtr(%[[DECLARG1]]#1 : !fir.ref<!fir.array<100xf32>>) bounds(%{{.*}}) -> !fir.ref<!fir.array<100xf32>> {name = "b"}
-! CHECK:       acc.loop reduction(@reduction_max_section_ext100_ref_100xf32 -> %[[RED_ARG1]] : !fir.ref<!fir.array<100xf32>>) {
+! CHECK:       acc.loop {{.*}} reduction(@reduction_max_section_ext100_ref_100xf32 -> %[[RED_ARG1]] : !fir.ref<!fir.array<100xf32>>)
 
 subroutine acc_reduction_iand()
   integer :: i
diff --git a/flang/test/Lower/OpenACC/acc-serial-loop.f90 b/flang/test/Lower/OpenACC/acc-serial-loop.f90
index 4ed7bb8da29a1a0..ca504f36c368353 100644
--- a/flang/test/Lower/OpenACC/acc-serial-loop.f90
+++ b/flang/test/Lower/OpenACC/acc-serial-loop.f90
@@ -64,8 +64,7 @@ subroutine acc_serial_loop
   END DO
 
 ! CHECK:      acc.serial {
-! CHECK:        acc.loop {
-! CHECK:          fir.do_loop
+! CHECK:        acc.loop {{.*}} {
 ! CHECK:          acc.yield
 ! CHECK-NEXT:   }{{$}}
 ! CHECK:        acc.yield
@@ -78,8 +77,7 @@ subroutine acc_serial_loop
   !$acc end serial loop
 
 ! CHECK:      acc.serial {
-! CHECK:        acc.loop {
-! CHECK:          fir.do_loop
+! CHECK:        acc.loop {{.*}} {
 ! CHECK:          acc.yield
 ! CHECK-NEXT:   }{{$}}
 ! CHECK:        acc.yield
@@ -92,8 +90,7 @@ subroutine acc_serial_loop
 
 ! CHECK:      [[ASYNC1:%.*]] = arith.constant 1 : i32
 ! CHECK:      acc.serial async([[ASYNC1]] : i32) {
-! CHECK:        acc.loop {
-! CHECK:          fir.do_loop
+! CHECK:        acc.loop {{.*}} {
 ! CHECK:          acc.yield
 ! CHECK-NEXT:   }{{$}}
 ! CHECK:        acc.yield
@@ -106,8 +103,7 @@ subroutine acc_serial_loop
 
 ! CHECK:      [[ASYNC2:%.*]] = fir.load %{{.*}} : !fir.ref<i32>
 ! CHECK:      acc.serial async([[ASYNC2]] : i32) {
-! CHECK:        acc.loop {
-! CHECK:          fir.do_loop
+! CHECK:        acc.loop {{.*}} {
 ! CHECK:          acc.yield
 ! CHECK-NEXT:   }{{$}}
 ! CHECK:        acc.yield
@@ -119,8 +115,7 @@ subroutine acc_serial_loop
   END DO
 
 ! CHECK:      acc.serial {
-! CHECK:        acc.loop {
-! CHECK:          fir.do_loop
+! CHECK:        acc.loop {{.*}} {
 ! CHECK:          acc.yield
 ! CHECK-NEXT:   }{{$}}
 ! CHECK:        acc.yield
@@ -133,8 +128,7 @@ subroutine acc_serial_loop
 
 ! CHECK:      [[WAIT1:%.*]] = arith.constant 1 : i32
 ! CHECK:      acc.serial wait([[WAIT1]] : i32) {
-! CHECK:        acc.loop {
-! CHECK:          fir.do_loop
+! CHECK:        acc.loop {{.*}} {
 ! CHECK:          acc.yield
 ! CHECK-NEXT:   }{{$}}
 ! CHECK:        acc.yield
@@ -148,8 +142,7 @@ subroutine acc_serial_loop
 ! CHECK:      [[WAIT2:%.*]] = arith.constant 1 : i32
 ! CHECK:      [[WAIT3:%.*]] = arith.constant 2 : i32
 ! CHECK:      acc.serial wait([[WAIT2]], [[WAIT3]] : i32, i32) {
-! CHECK:        acc.loop {
-! CHECK:          fir.do_loop
+! CHECK:        acc.loop {{.*}} {
 ! CHECK:          acc.yield
 ! CHECK-NEXT:   }{{$}}
 ! CHECK:        acc.yield
@@ -163,8 +156,7 @@ subroutine acc_serial_loop
 ! CHECK:      [[WAIT4:%.*]] = fir.load %{{.*}} : !fir.ref<i32>
 ! CHECK:      [[WAIT5:%.*]] = fir.load %{{.*}} : !fir.ref<i32>
 ! CHECK:      acc.serial wait([[WAIT4]], [[WAIT5]] : i32, i32) {
-! CHECK:        acc.loop {
-! CHECK:          fir.do_loop
+! CHECK:        acc.loop {{.*}} {
 ! CHECK:          acc.yield
 ! CHECK-NEXT:   }{{$}}
 ! CHECK:        acc.yield
@@ -177,8 +169,7 @@ subroutine acc_serial_loop
 
 ! CHECK:      [[IF1:%.*]] = arith.constant true
 ! CHECK:      acc.serial if([[IF1]]) {
-! CHECK:        acc.loop {
-! CHECK:          fir.do_loop
+! CHECK:        acc.loop {{.*}} {
 ! CHECK:          acc.yield
 ! CHECK-NEXT:   }{{$}}
 ! CHECK:        acc.yield
@@ -192,8 +183,7 @@ subroutine acc_serial_loop
 ! CHECK:      [[IFCOND:%.*]] = fir.load %{{.*}} : !fir.ref<!fir.logical<4>>
 ! CHECK:      [[IF2:%.*]] = fir.convert [[IFCOND]] : (!fir.logical<4>) -> i1
 ! CHECK:      acc.serial if([[IF2]]) {
-! CHECK:        acc.loop {
-! CHECK:          fir.do_loop
+! CHECK:        acc.loop {{.*}} {
 ! CHECK:          acc.yield
 ! CHECK-NEXT:   }{{$}}
 ! CHECK:        acc.yield
@@ -206,8 +196,7 @@ subroutine acc_serial_loop
 
 ! CHECK:      [[SELF1:%.*]] = arith.constant true
 ! CHECK:      acc.serial self([[SELF1]]) {
-! CHECK:        acc.loop {
-! CHECK:          fir.do_loop
+! CHECK:        acc.loop {{.*}} {
 ! CHECK:          acc.yield
 ! CHECK-NEXT:   }{{$}}
 ! CHECK:        acc.yield
@@ -219,8 +208,7 @@ subroutine acc_serial_loop
   END DO
 
 ! CHECK:      acc.serial {
-! CHECK:        acc.loop {
-! CHECK:          fir.do_loop
+! CHECK:        acc.loop {{.*}} {
 ! CHECK:          acc.yield
 ! CHECK-NEXT:   }{{$}}
 ! CHECK:        acc.yield
@@ -233,8 +221,7 @@ subroutine acc_serial_loop
 
 ! CHECK:      %[[SELF2:.*]] = fir.convert %[[DECLIFCONDITION]]#1 : (!fir.ref<!fir.logical<4>>) -> i1
 ! CHECK:      acc.serial self(%[[SELF2]]) {
-! CHECK:        acc.loop {
-! CHECK:          fir.do_loop
+! CHECK:        acc.loop {{.*}} {
 ! CHECK:          acc.yield
 ! CHECK-NEXT:   }{{$}}
 ! CHECK:        acc.yield
@@ -248,8 +235,7 @@ subroutine acc_serial_loop
 ! CHECK:      %[[COPYIN_A:.*]] = acc.copyin varPtr(%[[DECLA]]#1 : !fir.ref<!fir.array<10xf32>>) bounds(%{{.*}}) -> !fir.ref<!fir.array<10xf32>> {dataClause = #acc<data_clause acc_copy>, name = "a"}
 ! CHECK:      %[[COPYIN_B:.*]] = acc.copyin varPtr(%[[DECLB]]#1 : !fir.ref<!fir.array<10xf32>>) bounds(%{{.*}}) -> !fir.ref<!fir.array<10xf32>> {dataClause = #acc<data_clause acc_copy>, name = "b"}
 ! CHECK:      acc.serial dataOperands(%[[COPYIN_A]], %[[COPYIN_B]] : !fir.ref<!fir.array<10xf32>>, !fir.ref<!fir.array<10xf32>>) {
-! CHECK:        acc.loop {
-! CHECK:          fir.do_loop
+! CHECK:        acc.loop {{.*}} {
 ! CHECK:          acc.yield
 ! CHECK-NEXT:   }{{$}}
 ! CHECK:        acc.yield
@@ -265,8 +251,7 @@ subroutine acc_serial_loop
 ! CHECK:      %[[COPYIN_A:.*]] = acc.copyin varPtr(%[[DECLA]]#1 : !fir.ref<!fir.array<10xf32>>) bounds(%{{.*}}) -> !fir.ref<!fir.array<10xf32>> {dataClause = #acc<data_clause acc_copy>, name = "a"}
 ! CHECK:      %[[COPYIN_B:.*]] = acc.copyin varPtr(%[[DECLB]]#1 : !fir.ref<!fir.array<10xf32>>) bounds(%{{.*}}) -> !fir.ref<!fir.array<10xf32>> {dataClause = #acc<data_clause acc_copy>, name = "b"}
 ! CHECK:      acc.serial dataOperands(%[[COPYIN_A]], %[[COPYIN_B]] : !fir.ref<!fir.array<10xf32>>, !fir.ref<!fir.array<10xf32>>) {
-! CHECK:        acc.loop {
-! CHECK:          fir.do_loop
+! CHECK:        acc.loop {{.*}} {
 ! CHECK:          acc.yield
 ! CHECK-NEXT:   }{{$}}
 ! CHECK:        acc.yield
@@ -282,8 +267,7 @@ subroutine acc_serial_loop
 ! CHECK:      %[[COPYIN_A:.*]] = acc.copyin varPtr(%[[DECLA]]#1 : !fir.ref<!fir.array<10xf32>>) bounds(%{{.*}}) -> !fir.ref<!fir.array<10xf32>> {name = "a"}
 ! CHECK:      %[[COPYIN_B:.*]] = acc.copyin varPtr(%[[DECLB]]#1 : !fir.ref<!fir.array<10xf32>>) bounds(%{{.*}}) -> !fir.ref<!fir.array<10xf32>> {dataClause = #acc<data_clause acc_copyin_readonly>, name = "b"}
 ! CHECK:      acc.serial dataOperands(%[[COPYIN_A]], %[[COPYIN_B]] : !fir.ref<!fir.array<10xf32>>, !fir.ref<!fir.array<10xf32>>) {
-! CHECK:        acc.loop {
-! CHECK:          fir.do_loop
+! CHECK:        acc.loop {{.*}} {
 ! CHECK:          acc.yield
 ! CHECK-NEXT:   }{{$}}
 ! CHECK:        acc.yield
@@ -297,8 +281,7 @@ subroutine acc_serial_loop
 ! CHECK:      %[[CREATE_A:.*]] = acc.create varPtr(%[[DECLA]]#1 : !fir.ref<!fir.array<10xf32>>) bounds(%{{.*}}) -> !fir.ref<!fir.array<10xf32>> {dataClause = #acc<data_clause acc_copyout>, name = "a"}
 ! CHECK:      %[[CREATE_B:.*]] = acc.create varPtr(%[[DECLB]]#1 : !fir.ref<!fir.array<10xf32>>) bounds(%{{.*}}) -> !fir.ref<!fir.array<10xf32>> {dataClause = #acc<data_clause acc_copyout>, name = "b"}
 ! CHECK:      acc.serial dataOperands(%[[CREATE_A]], %[[CREATE_B]] : !fir.ref<!fir.array<10xf32>>, !fir.ref<!fir.array<10xf32>>) {
-! CHECK:        acc.loop {
-! CHECK:          fir.do_loop
+! CHECK:        acc.loop {{.*}} {
 ! CHECK:          acc.yield
 ! CHECK-NEXT:   }{{$}}
 ! CHECK:        acc.yield
@@ -314,8 +297,7 @@ subroutine acc_serial_loop
 ! CHECK:      %[[CREATE_B:.*]] = acc.create varPtr(%[[DECLB]]#1 : !fir.ref<!fir.array<10xf32>>) bounds(%{{.*}}) -> !fir.ref<!fir.array<10xf32>> {name = "b"}
 ! CHECK:      %[[CREATE_A:.*]] = acc.create varPtr(%[[DECLA]]#1 : !fir.ref<!fir.array<10xf32>>) bounds(%{{.*}}) -> !fir.ref<!fir.array<10xf32>> {dataClause = #acc<data_clause acc_create_zero>, name = "a"}
 ! CHECK:      acc.serial dataOperands(%[[CREATE_B]], %[[CREATE_A]] : !fir.ref<!fir.array<10xf32>>, !fir.ref<!fir.array<10xf32>>) {
-! CHECK:        acc.loop {
-! CHECK:          fir.do_loop
+! CHECK:        acc.loop {{.*}} {
 ! CHECK:          acc.yield
 ! CHECK-NEXT:   }{{$}}
 ! CHECK:        acc.yield
@@ -331,8 +313,7 @@ subroutine acc_serial_loop
 ! CHECK:      %[[NOCREATE_A:.*]] = acc.nocreate varPtr(%[[DECLA]]#1 : !fir.ref<!fir.array<10xf32>>) bounds(%{{.*}}) -> !fir.ref<!fir.array<10xf32>> {name = "a"}
 ! CHECK:      %[[NOCREATE_B:.*]] = acc.nocreate varPtr(%[[DECLB]]#1 : !fir.ref<!fir.array<10xf32>>) bounds(%{{.*}}) -> !fir.ref<!fir.array<10xf32>> {name = "b"}
 ! CHECK:      acc.serial dataOperands(%[[NOCREATE_A]], %[[NOCREATE_B]] : !fir.ref<!fir.array<10xf32>>, !fir.ref<!fir.array<10xf32>>) {
-! CHECK:        acc.loop {
-! CHECK:          fir.do_loop
+! CHECK:        acc.loop {{.*}} {
 ! CHECK:          acc.yield
 ! CHECK-NEXT:   }{{$}}
 ! CHECK:        acc.yield
@@ -346,8 +327,7 @@ subroutine acc_serial_loop
 ! CHECK:      %[[PRESENT_A:.*]] = acc.present varPtr(%[[DECLA]]#1 : !fir.ref<!fir.array<10xf32>>) bounds(%{{.*}}) -> !fir.ref<!fir.array<10xf32>> {name = "a"}
 ! CHECK:      %[[PRESENT_B:.*]] = acc.present varPtr(%[[DECLB]]#1 : !fir.ref<!fir.array<10xf32>>) bounds(%{{.*}}) -> !fir.ref<!fir.array<10xf32>> {name = "b"}
 ! CHECK:      acc.serial dataOperands(%[[PRESENT_A]], %[[PRESENT_B]] : !fir.ref<!fir.array<10xf32>>, !fir.ref<!fir.array<10xf32>>) {
-! CHECK:        acc.loop {
-! CHECK:          fir.do_loop
+! CHECK:        acc.loop {{.*}} {
 ! CHECK:          acc.yield
 ! CHECK-NEXT:   }{{$}}
 ! CHECK:        acc.yield
@@ -361,8 +341,7 @@ subroutine acc_serial_loop
 ! CHECK:      %[[DEVICEPTR_A:.*]] = acc.deviceptr varPtr(%[[DECLA]]#1 : !fir.ref<!fir.array<10xf32>>) bounds(%{{.*}}) -> !fir.ref<!fir.array<10xf32>> {name = "a"}
 ! CHECK:      %[[DEVICEPTR_B:.*]] = acc.deviceptr varPtr(%[[DECLB]]#1 : !fir.ref<!fir.array<10xf32>>) bounds(%{{.*}}) -> !fir.ref<!fir.array<10xf32>> {name = "b"}
 ! CHECK:      acc.serial dataOperands(%[[DEVICEPTR_A]], %[[DEVICEPTR_B]] : !fir.ref<!fir.array<10xf32>>, !fir.ref<!fir.array<10xf32>>) {
-! CHECK:        acc.loop {
-! CHECK:          fir.do_loop
+! CHECK:        acc.loop {{.*}} {
 ! CHECK:          acc.yield
 ! CHECK-NEXT:   }{{$}}
 ! CHECK:        acc.yield
@@ -380,8 +359,7 @@ subroutine acc_serial_loop
 ! CHECK:      %[[BOX_ADDR_G:.*]] = fir.box_addr %[[BOX_G]] : (!fir.box<!fir.ptr<f32>>) -> !fir.ptr<f32>
 ! CHECK:      %[[ATTACH_G:.*]] = acc.attach varPtr(%[[BOX_ADDR_G]] : !fir.ptr<f32>) -> !fir.ptr<f32> {name = "g"}
 ! CHECK:      acc.serial dataOperands(%[[ATTACH_F]], %[[ATTACH_G]] : !fir.ptr<f32>, !fir.ptr<f32>) {
-! CHECK:        acc.loop {
-! CHECK:          fir.do_loop
+! CHECK:        acc.loop {{.*}} {
 ! CHECK:          acc.yield
 ! CHECK-NEXT:   }{{$}}
 ! CHECK:        acc.yield
@@ -395,7 +373,7 @@ subroutine acc_serial_loop
 ! CHECK:      %[[ACC_FPRIVATE_B:.*]] = acc.firstprivate varPtr(%[[DECLB]]#1 : !fir.ref<!fir.array<10xf32>>) bounds(%{{.*}}) -> !fir.ref<!fir.array<10xf32>> {name = "b"}
 ! CHECK:      acc.serial firstprivate(@firstprivatization_section_ext10_ref_10xf32 -> %[[ACC_FPRIVATE_B]] : !fir.ref<!fir.array<10xf32>>) {
 ! CHECK:      %[[ACC_PRIVATE_A:.*]] = acc.private varPtr(%[[DECLA]]#1 : !fir.ref<!fir.array<10xf32>>) bounds(%{{.*}}) -> !fir.ref<!fir.array<10xf32>> {name = "a"}
-! CHECK:        acc.loop private(@privatization_ref_10xf32 -> %[[ACC_PRIVATE_A]] : !fir.ref<!fir.array<10xf32>>) {
+! CHECK:        acc.loop private({{.*}}@privatization_ref_10xf32 -> %[[ACC_PRIVATE_A]] : !fir.ref<!fir.array<10xf32>>)
 ! CHECK:          fir.do_loop
 ! CHECK:          acc.yield
 ! CHECK-NEXT:   }{{$}}
@@ -408,10 +386,9 @@ subroutine acc_serial_loop
   END DO
 
 ! CHECK:      acc.serial {
-! CHECK:        acc.loop {
-! CHECK:          fir.do_loop
+! CHECK:        acc.loop {{.*}} {
 ! CHECK:          acc.yield
-! CHECK-NEXT:   } attributes {seq}
+! CHECK-NEXT:   } attributes {inclusiveUpperbound = array<i1: true>, seq}
 ! CHECK:        acc.yield
 ! CHECK-NEXT: }{{$}}
 
@@ -421,10 +398,9 @@ subroutine acc_serial_loop
   END DO
 
 ! CHECK:      acc.serial {
-! CHECK:        acc.loop {
-! CHECK:          fir.do_loop
+! CHECK:        acc.loop {{.*}} {
 ! CHECK:          acc.yield
-! CHECK-NEXT:   } attributes {auto}
+! CHECK-NEXT:   } attributes {auto, inclusiveUpperbound = array<i1: true>}
 ! CHECK:        acc.yield
 ! CHECK-NEXT: }{{$}}
 
@@ -434,10 +410,9 @@ subroutine acc_serial_loop
   END DO
 
 ! CHECK:      acc.serial {
-! CHECK:        acc.loop {
-! CHECK:          fir.do_loop
+! CHECK:        acc.loop {{.*}} {
 ! CHECK:          acc.yield
-! CHECK-NEXT:   } attributes {independent}
+! CHECK-NEXT:   } attributes {inclusiveUpperbound = array<i1: true>, independent}
 ! CHECK:        acc.yield
 ! CHECK-NEXT: }{{$}}
 
@@ -447,8 +422,7 @@ subroutine acc_serial_loop
   END DO
 
 ! CHECK:      acc.serial {
-! CHECK:        acc.loop gang {
-! CHECK:          fir.do_loop
+! CHECK:        acc.loop gang() {{.*}} {
 ! CHECK:          acc.yield
 ! CHECK-NEXT:   }{{$}}
 ! CHECK:        acc.yield
@@ -461,8 +435,7 @@ subroutine acc_serial_loop
 
 ! CHECK:      acc.serial {
 ! CHECK:        [[GANGNUM1:%.*]] = arith.constant 8 : i32
-! CHECK-NEXT:   acc.loop gang(num=[[GANGNUM1]] : i32) {
-! CHECK:          fir.do_loop
+! CHECK-NEXT:   acc.loop gang(num=[[GANGNUM1]] : i32) {{.*}} {
 ! CHECK:          acc.yield
 ! CHECK-NEXT:   }{{$}}
 ! CHECK:        acc.yield
@@ -475,8 +448,7 @@ subroutine acc_serial_loop
 
 ! CHECK:      acc.serial {
 ! CHECK:        [[GANGNUM2:%.*]] = fir.load %{{.*}} : !fir.ref<i32>
-! CHECK-NEXT:   acc.loop gang(num=[[GANGNUM2]] : i32) {
-! CHECK:          fir.do_loop
+! CHECK-NEXT:   acc.loop gang(num=[[GANGNUM2]] : i32) {{.*}} {
 ! CHECK:          acc.yield
 ! CHECK-NEXT:   }{{$}}
 ! CHECK:        acc.yield
@@ -488,8 +460,7 @@ subroutine acc_serial_loop
   END DO
 
 ! CHECK:      acc.serial {
-! CHECK:        acc.loop gang(num=%{{.*}} : i32, static=%{{.*}} : i32) {
-! CHECK:          fir.do_loop
+! CHECK:        acc.loop gang(num=%{{.*}} : i32, static=%{{.*}} : i32) {{.*}} {
 ! CHECK:          acc.yield
 ! CHECK-NEXT:   }{{$}}
 ! CHECK:        acc.yield
@@ -500,8 +471,7 @@ subroutine acc_serial_loop
     a(i) = b(i)
   END DO
 ! CHECK:      acc.serial {
-! CHECK:        acc.loop vector {
-! CHECK:          fir.do_loop
+! CHECK:        acc.loop vector() {{.*}} {
 ! CHECK:          acc.yield
 ! CHECK-NEXT:   }{{$}}
 ! CHECK:        acc.yield
@@ -514,8 +484,7 @@ subroutine acc_serial_loop
 
 ! CHECK:      acc.serial {
 ! CHECK:        [[CONSTANT128:%.*]] = arith.constant 128 : i32
-! CHECK:        acc.loop vector([[CONSTANT128]] : i32) {
-! CHECK:          fir.do_loop
+! CHECK:        acc.loop vector([[CONSTANT128]] : i32) {{.*}} {
 ! CHECK:          acc.yield
 ! CHECK-NEXT:   }{{$}}
 ! CHECK:        acc.yield
@@ -528,8 +497,7 @@ subroutine acc_serial_loop
 
 ! CHECK:      acc.serial {
 ! CHECK:        [[VECTORLENGTH:%.*]] = fir.load %{{.*}} : !fir.ref<i32>
-! CHECK:        acc.loop vector([[VECTORLENGTH]] : i32) {
-! CHECK:          fir.do_loop
+! CHECK:        acc.loop vector([[VECTORLENGTH]] : i32) {{.*}} {
 ! CHECK:          acc.yield
 ! CHECK-NEXT:   }{{$}}
 ! CHECK:        acc.yield
@@ -541,8 +509,7 @@ subroutine acc_serial_loop
   END DO
 
 ! CHECK:      acc.serial {
-! CHECK:        acc.loop worker {
-! CHECK:          fir.do_loop
+! CHECK:        acc.loop worker() {{.*}} {
 ! CHECK:          acc.yield
 ! CHECK-NEXT:   }{{$}}
 ! CHECK:        acc.yield
@@ -555,8 +522,7 @@ subroutine acc_serial_loop
 
 ! CHECK:      acc.serial {
 ! CHECK:        [[WORKER128:%.*]] = arith.constant 128 : i32
-! CHECK:        acc.loop worker([[WORKER128]] : i32) {
-! CHECK:          fir.do_loop
+! CHECK:        acc.loop worker([[WORKER128]] : i32) {{.*}} {
 ! CHECK:          acc.yield
 ! CHECK-NEXT:   }{{$}}
 ! CHECK:        acc.yield
@@ -570,11 +536,10 @@ subroutine acc_serial_loop
   END DO
 
 ! CHECK:      acc.serial {
-! CHECK:        acc.loop {
-! CHECK:          fir.do_loop
+! CHECK:        acc.loop {{.*}} {
 ! CHECK:            fir.do_loop
 ! CHECK:          acc.yield
-! CHECK-NEXT:   } attributes {collapse = 2 : i64}
+! CHECK-NEXT:   } attributes {collapse = 2 : i64, inclusiveUpperbound = array<i1: true, true>}
 ! CHECK:        acc.yield
 ! CHECK-NEXT: }{{$}}
 
@@ -587,10 +552,8 @@ subroutine acc_serial_loop
   END DO
 
 ! CHECK:      acc.serial {
-! CHECK:        acc.loop {
-! CHECK:          fir.do_loop
-! CHECK:            acc.loop {
-! CHECK:              fir.do_loop
+! CHECK:        acc.loop {{.*}} {
+! CHECK:            acc.loop {{.*}} {
 ! CHECK:              acc.yield
 ! CHECK-NEXT:     }{{$}}
 ! CHECK:          acc.yield
@@ -605,8 +568,7 @@ subroutine acc_serial_loop
 
 ! CHECK:      acc.serial {
 ! CHECK:        [[TILESIZE:%.*]] = arith.constant 2 : i32
-! CHECK:        acc.loop tile([[TILESIZE]] : i32) {
-! CHECK:          fir.do_loop
+! CHECK:        acc.loop {{.*}} tile([[TILESIZE]] : i32) {{.*}} {
 ! CHECK:          acc.yield
 ! CHECK-NEXT:   }{{$}}
 ! CHECK:        acc.yield
@@ -619,8 +581,7 @@ subroutine acc_serial_loop
 
 ! CHECK:      acc.serial {
 ! CHECK:        [[TILESIZEM1:%.*]] = arith.constant -1 : i32
-! CHECK:        acc.loop tile([[TILESIZEM1]] : i32) {
-! CHECK:          fir.do_loop
+! CHECK:        acc.loop {{.*}} tile([[TILESIZEM1]] : i32) {{.*}} {
 ! CHECK:          acc.yield
 ! CHECK-NEXT:   }{{$}}
 ! CHECK:        acc.yield
@@ -636,8 +597,7 @@ subroutine acc_serial_loop
 ! CHECK:      acc.serial {
 ! CHECK:        [[TILESIZE1:%.*]] = arith.constant 2 : i32
 ! CHECK:        [[TILESIZE2:%.*]] = arith.constant 2 : i32
-! CHECK:        acc.loop tile([[TILESIZE1]], [[TILESIZE2]] : i32, i32) {
-! CHECK:          fir.do_loop
+! CHECK:        acc.loop {{.*}} tile([[TILESIZE1]], [[TILESIZE2]] : i32, i32) {{.*}} {
 ! CHECK:          acc.yield
 ! CHECK-NEXT:   }{{$}}
 ! CHECK:        acc.yield
@@ -649,8 +609,7 @@ subroutine acc_serial_loop
   END DO
 
 ! CHECK:      acc.serial {
-! CHECK:        acc.loop tile(%{{.*}} : i32) {
-! CHECK:          fir.do_loop
+! CHECK:        acc.loop {{.*}} tile(%{{.*}} : i32) {{.*}} {
 ! CHECK:          acc.yield
 ! CHECK-NEXT:   }{{$}}
 ! CHECK:        acc.yield
@@ -664,8 +623,7 @@ subroutine acc_serial_loop
   END DO
 
 ! CHECK:      acc.serial {
-! CHECK:        acc.loop tile(%{{.*}}, %{{.*}} : i32, i32) {
-! CHECK:          fir.do_loop
+! CHECK:        acc.loop {{.*}} tile(%{{.*}}, %{{.*}} : i32, i32) {{.*}} {
 ! CHECK:          acc.yield
 ! CHECK-NEXT:   }{{$}}
 ! CHECK:        acc.yield
@@ -680,7 +638,7 @@ subroutine acc_serial_loop
 ! CHECK:      %[[COPYINREDR:.*]] = acc.copyin varPtr(%{{.*}} : !fir.ref<f32>) -> !fir.ref<f32> {dataClause = #acc<data_clause acc_reduction>, implicit = true, name = "reduction_r"}
 ! CHECK:      %[[COPYINREDI:.*]] = acc.copyin varPtr(%{{.*}} : !fir.ref<i32>) -> !fir.ref<i32> {dataClause = #acc<data_clause acc_reduction>, implicit = true, name = "reduction_i"}
 ! CHECK:      acc.serial dataOperands(%[[COPYINREDR]], %[[COPYINREDI]] : !fir.ref<f32>, !fir.ref<i32>) {
-! CHECK:        acc.loop reduction(@reduction_add_ref_f32 -> %{{.*}} : !fir.ref<f32>, @reduction_mul_ref_i32 -> %{{.*}} : !fir.ref<i32>) {
+! CHECK:        acc.loop {{.*}} reduction(@reduction_add_ref_f32 -> %{{.*}} : !fir.ref<f32>, @reduction_mul_ref_i32 -> %{{.*}} : !fir.ref<i32>)
 ! CHECK:          fir.do_loop
 ! CHECK:          acc.yield
 ! CHECK-NEXT:   }{{$}}

>From b0fff85a5d6950c8048b3b962caa470acccdb4f1 Mon Sep 17 00:00:00 2001
From: Valentin Clement <clementval at gmail.com>
Date: Wed, 29 Nov 2023 14:09:48 -0800
Subject: [PATCH 3/4] Fix lowering of combined construct

---
 flang/lib/Lower/Bridge.cpp                    | 32 +++++++++++++------
 .../test/Lower/OpenACC/acc-parallel-loop.f90  | 10 +++---
 flang/test/Lower/OpenACC/acc-serial-loop.f90  |  6 ++--
 3 files changed, 31 insertions(+), 17 deletions(-)

diff --git a/flang/lib/Lower/Bridge.cpp b/flang/lib/Lower/Bridge.cpp
index fcb5f275c8ffa02..d7e4c40ae454a53 100644
--- a/flang/lib/Lower/Bridge.cpp
+++ b/flang/lib/Lower/Bridge.cpp
@@ -2386,18 +2386,32 @@ class FirConverter : public Fortran::lower::AbstractConverter {
 
     const Fortran::parser::OpenACCLoopConstruct *accLoop =
         std::get_if<Fortran::parser::OpenACCLoopConstruct>(&acc.u);
+    const Fortran::parser::OpenACCCombinedConstruct *accCombined =
+        std::get_if<Fortran::parser::OpenACCCombinedConstruct>(&acc.u);
 
     Fortran::lower::pft::Evaluation *curEval = &getEval();
 
-    if (accLoop) {
-      const Fortran::parser::AccBeginLoopDirective &beginLoopDir =
-          std::get<Fortran::parser::AccBeginLoopDirective>(accLoop->t);
-      const Fortran::parser::AccClauseList &clauseList =
-          std::get<Fortran::parser::AccClauseList>(beginLoopDir.t);
-      int64_t collapseValue = Fortran::lower::getCollapseValue(clauseList);
-      curEval = &curEval->getFirstNestedEvaluation();
-      for (int64_t i = 1; i < collapseValue; i++)
-        curEval = &*std::next(curEval->getNestedEvaluations().begin());
+    if (accLoop || accCombined) {
+      int64_t collapseValue;
+      if (accLoop) {
+        const Fortran::parser::AccBeginLoopDirective &beginLoopDir =
+            std::get<Fortran::parser::AccBeginLoopDirective>(accLoop->t);
+        const Fortran::parser::AccClauseList &clauseList =
+            std::get<Fortran::parser::AccClauseList>(beginLoopDir.t);
+        collapseValue = Fortran::lower::getCollapseValue(clauseList);
+      } else if (accCombined) {
+        const Fortran::parser::AccBeginCombinedDirective &beginCombinedDir =
+            std::get<Fortran::parser::AccBeginCombinedDirective>(accCombined->t);
+        const Fortran::parser::AccClauseList &clauseList =
+            std::get<Fortran::parser::AccClauseList>(beginCombinedDir.t);
+        collapseValue = Fortran::lower::getCollapseValue(clauseList);
+      }
+
+      if (curEval->lowerAsStructured()) {
+        curEval = &curEval->getFirstNestedEvaluation();
+        for (int64_t i = 1; i < collapseValue; i++)
+          curEval = &*std::next(curEval->getNestedEvaluations().begin());
+      }
     }
 
     for (Fortran::lower::pft::Evaluation &e : curEval->getNestedEvaluations())
diff --git a/flang/test/Lower/OpenACC/acc-parallel-loop.f90 b/flang/test/Lower/OpenACC/acc-parallel-loop.f90
index 712e00918169534..ef9a74260a07a6b 100644
--- a/flang/test/Lower/OpenACC/acc-parallel-loop.f90
+++ b/flang/test/Lower/OpenACC/acc-parallel-loop.f90
@@ -433,7 +433,7 @@ subroutine acc_parallel_loop
 ! CHECK:      acc.parallel firstprivate(@firstprivatization_section_ext10_ref_10xf32 -> %[[ACC_PRIVATE_B]] : !fir.ref<!fir.array<10xf32>>) {
 ! CHECK:      %[[ACC_PRIVATE_A:.*]] = acc.private varPtr(%[[DECLA]]#1 : !fir.ref<!fir.array<10xf32>>) bounds(%{{.*}}) -> !fir.ref<!fir.array<10xf32>> {name = "a"}
 ! CHECK:        acc.loop private({{.*}}@privatization_ref_10xf32 -> %[[ACC_PRIVATE_A]] : !fir.ref<!fir.array<10xf32>>)
-! CHECK:          fir.do_loop
+! CHECK-NOT:      fir.do_loop
 ! CHECK:          acc.yield
 ! CHECK-NEXT:   }{{$}}
 ! CHECK:        acc.yield
@@ -557,7 +557,7 @@ subroutine acc_parallel_loop
 ! CHECK:      acc.parallel {
 ! CHECK:        [[VECTORLENGTH:%.*]] = fir.load %{{.*}} : !fir.ref<i32>
 ! CHECK:        acc.loop vector([[VECTORLENGTH]] : i32) {{.*}} {
-! CHECK:          fir.do_loop
+! CHECK-NOT:      fir.do_loop
 ! CHECK:          acc.yield
 ! CHECK-NEXT:   }{{$}}
 ! CHECK:        acc.yield
@@ -570,7 +570,7 @@ subroutine acc_parallel_loop
 
 ! CHECK:      acc.parallel {
 ! CHECK:        acc.loop worker() {{.*}} {
-! CHECK:          fir.do_loop
+! CHECK-NOT:      fir.do_loop
 ! CHECK:          acc.yield
 ! CHECK-NEXT:   }{{$}}
 ! CHECK:        acc.yield
@@ -584,7 +584,7 @@ subroutine acc_parallel_loop
 ! CHECK:      acc.parallel {
 ! CHECK:        [[WORKER128:%.*]] = arith.constant 128 : i32
 ! CHECK:        acc.loop worker([[WORKER128]] : i32) {{.*}} {
-! CHECK:          fir.do_loop
+! CHECK-NOT:      fir.do_loop
 ! CHECK:          acc.yield
 ! CHECK-NEXT:   }{{$}}
 ! CHECK:        acc.yield
@@ -713,6 +713,6 @@ subroutine acc_parallel_loop
   10 continue
 ! CHECK: acc.parallel
 ! CHECK: acc.loop
-! CHECK: fir.do_loop
+! CHECK-NOT: fir.do_loop
 
 end subroutine acc_parallel_loop
diff --git a/flang/test/Lower/OpenACC/acc-serial-loop.f90 b/flang/test/Lower/OpenACC/acc-serial-loop.f90
index ca504f36c368353..b5028c82380d27d 100644
--- a/flang/test/Lower/OpenACC/acc-serial-loop.f90
+++ b/flang/test/Lower/OpenACC/acc-serial-loop.f90
@@ -374,7 +374,7 @@ subroutine acc_serial_loop
 ! CHECK:      acc.serial firstprivate(@firstprivatization_section_ext10_ref_10xf32 -> %[[ACC_FPRIVATE_B]] : !fir.ref<!fir.array<10xf32>>) {
 ! CHECK:      %[[ACC_PRIVATE_A:.*]] = acc.private varPtr(%[[DECLA]]#1 : !fir.ref<!fir.array<10xf32>>) bounds(%{{.*}}) -> !fir.ref<!fir.array<10xf32>> {name = "a"}
 ! CHECK:        acc.loop private({{.*}}@privatization_ref_10xf32 -> %[[ACC_PRIVATE_A]] : !fir.ref<!fir.array<10xf32>>)
-! CHECK:          fir.do_loop
+! CHECK-NOT:          fir.do_loop
 ! CHECK:          acc.yield
 ! CHECK-NEXT:   }{{$}}
 ! CHECK:        acc.yield
@@ -537,7 +537,7 @@ subroutine acc_serial_loop
 
 ! CHECK:      acc.serial {
 ! CHECK:        acc.loop {{.*}} {
-! CHECK:            fir.do_loop
+! CHECK-NOT:            fir.do_loop
 ! CHECK:          acc.yield
 ! CHECK-NEXT:   } attributes {collapse = 2 : i64, inclusiveUpperbound = array<i1: true, true>}
 ! CHECK:        acc.yield
@@ -639,7 +639,7 @@ subroutine acc_serial_loop
 ! CHECK:      %[[COPYINREDI:.*]] = acc.copyin varPtr(%{{.*}} : !fir.ref<i32>) -> !fir.ref<i32> {dataClause = #acc<data_clause acc_reduction>, implicit = true, name = "reduction_i"}
 ! CHECK:      acc.serial dataOperands(%[[COPYINREDR]], %[[COPYINREDI]] : !fir.ref<f32>, !fir.ref<i32>) {
 ! CHECK:        acc.loop {{.*}} reduction(@reduction_add_ref_f32 -> %{{.*}} : !fir.ref<f32>, @reduction_mul_ref_i32 -> %{{.*}} : !fir.ref<i32>)
-! CHECK:          fir.do_loop
+! CHECK-NOT:      fir.do_loop
 ! CHECK:          acc.yield
 ! CHECK-NEXT:   }{{$}}
 ! CHECK:        acc.yield

>From 13b61f877314b1a7134827985c8946c942a47f99 Mon Sep 17 00:00:00 2001
From: Valentin Clement <clementval at gmail.com>
Date: Wed, 29 Nov 2023 14:10:46 -0800
Subject: [PATCH 4/4] clang-format

---
 flang/lib/Lower/Bridge.cpp | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/flang/lib/Lower/Bridge.cpp b/flang/lib/Lower/Bridge.cpp
index d7e4c40ae454a53..78a5ac75a74c4fd 100644
--- a/flang/lib/Lower/Bridge.cpp
+++ b/flang/lib/Lower/Bridge.cpp
@@ -2401,7 +2401,8 @@ class FirConverter : public Fortran::lower::AbstractConverter {
         collapseValue = Fortran::lower::getCollapseValue(clauseList);
       } else if (accCombined) {
         const Fortran::parser::AccBeginCombinedDirective &beginCombinedDir =
-            std::get<Fortran::parser::AccBeginCombinedDirective>(accCombined->t);
+            std::get<Fortran::parser::AccBeginCombinedDirective>(
+                accCombined->t);
         const Fortran::parser::AccClauseList &clauseList =
             std::get<Fortran::parser::AccClauseList>(beginCombinedDir.t);
         collapseValue = Fortran::lower::getCollapseValue(clauseList);