[Mlir-commits] [mlir] [mlir][openacc] Update acc.loop to be a proper loop like operation (PR #67355)

Valentin Clement バレンタイン クレメン llvmlistbot at llvm.org
Mon Jan 22 10:15:28 PST 2024


https://github.com/clementval updated https://github.com/llvm/llvm-project/pull/67355

>From 4dd9f2e3bc7552dbf5f14b9014abd915ec6f91f8 Mon Sep 17 00:00:00 2001
From: Valentin Clement <clementval at gmail.com>
Date: Tue, 29 Aug 2023 14:48:49 -0700
Subject: [PATCH 1/2] [mlir][openacc] Update acc.loop to be a proper loop like
 operation

The initial design of the `acc.loop` was to be an operation that encapsulates
a loop like operation. This was an early design and we now want to change it
so the `acc.loop` operation becomes a real loop-like operation by implementing
the LoopLikeInterface.

Differential Revision: https://reviews.llvm.org/D159229
---
 mlir/include/mlir/Dialect/OpenACC/OpenACC.h   |   1 +
 .../mlir/Dialect/OpenACC/OpenACCOps.td        |  31 +-
 mlir/lib/Dialect/OpenACC/IR/OpenACC.cpp       | 110 ++++--
 mlir/test/Dialect/OpenACC/canonicalize.mlir   |  10 +-
 mlir/test/Dialect/OpenACC/invalid.mlir        |  58 +++-
 mlir/test/Dialect/OpenACC/ops.mlir            | 319 ++++++++----------
 6 files changed, 299 insertions(+), 230 deletions(-)

diff --git a/mlir/include/mlir/Dialect/OpenACC/OpenACC.h b/mlir/include/mlir/Dialect/OpenACC/OpenACC.h
index 36daf8de235f34..bb3b9617c24edb 100644
--- a/mlir/include/mlir/Dialect/OpenACC/OpenACC.h
+++ b/mlir/include/mlir/Dialect/OpenACC/OpenACC.h
@@ -25,6 +25,7 @@
 #include "mlir/Dialect/OpenACC/OpenACCTypeInterfaces.h.inc"
 #include "mlir/Dialect/OpenACCMPCommon/Interfaces/AtomicInterfaces.h"
 #include "mlir/Interfaces/ControlFlowInterfaces.h"
+#include "mlir/Interfaces/LoopLikeInterface.h"
 #include "mlir/Interfaces/SideEffectInterfaces.h"
 
 #define GET_TYPEDEF_CLASSES
diff --git a/mlir/include/mlir/Dialect/OpenACC/OpenACCOps.td b/mlir/include/mlir/Dialect/OpenACC/OpenACCOps.td
index 7344ab2852b9ce..f7976b87ccac79 100644
--- a/mlir/include/mlir/Dialect/OpenACC/OpenACCOps.td
+++ b/mlir/include/mlir/Dialect/OpenACC/OpenACCOps.td
@@ -14,6 +14,7 @@
 #define OPENACC_OPS
 
 include "mlir/Interfaces/ControlFlowInterfaces.td"
+include "mlir/Interfaces/LoopLikeInterface.td"
 include "mlir/Interfaces/SideEffectInterfaces.td"
 include "mlir/IR/BuiltinTypes.td"
 include "mlir/IR/EnumAttr.td"
@@ -1474,29 +1475,34 @@ def OpenACC_HostDataOp : OpenACC_Op<"host_data",
 
 def OpenACC_LoopOp : OpenACC_Op<"loop",
     [AttrSizedOperandSegments, RecursiveMemoryEffects,
-     MemoryEffects<[MemWrite<OpenACC_ConstructResource>]>]> {
+     MemoryEffects<[MemWrite<OpenACC_ConstructResource>]>,
+     DeclareOpInterfaceMethods<LoopLikeOpInterface>]> {
   let summary = "loop construct";
 
   let description = [{
-    The "acc.loop" operation represents the OpenACC loop construct.
+    The "acc.loop" operation represents the OpenACC loop construct. The lower
+    and upper bounds specify a half-open range: the range includes the lower
+    bound but does not include the upper bound. If the `inclusive` attribute is
+    set then the upper bound is included.
 
     Example:
 
     ```mlir
-    acc.loop gang vector {
-      scf.for %arg3 = %c0 to %c10 step %c1 {
-        scf.for %arg4 = %c0 to %c10 step %c1 {
-          scf.for %arg5 = %c0 to %c10 step %c1 {
-            // ... body
-          }
-        }
-      }
+    acc.loop gang() vector() (%arg3 : index, %arg4 : index, %arg5 : index) = 
+        (%c0, %c0, %c0 : index, index, index) to 
+        (%c10, %c10, %c10 : index, index, index) step 
+        (%c1, %c1, %c1 : index, index, index) {
+      // Loop body
       acc.yield
     } attributes { collapse = [3] }
     ```
   }];
 
   let arguments = (ins
+      Variadic<IntOrIndex>:$lowerbound,
+      Variadic<IntOrIndex>:$upperbound,
+      Variadic<IntOrIndex>:$step,
+      OptionalAttr<DenseBoolArrayAttr>:$inclusiveUpperbound,
       OptionalAttr<I64ArrayAttr>:$collapse,
       OptionalAttr<DeviceTypeArrayAttr>:$collapseDeviceType,
       Variadic<IntOrIndex>:$gangOperands,
@@ -1521,7 +1527,7 @@ def OpenACC_LoopOp : OpenACC_Op<"loop",
       OptionalAttr<SymbolRefArrayAttr>:$privatizations,
       Variadic<AnyType>:$reductionOperands,
       OptionalAttr<SymbolRefArrayAttr>:$reductionRecipes
-      );
+  );
 
   let results = (outs Variadic<AnyType>:$results);
 
@@ -1628,7 +1634,8 @@ def OpenACC_LoopOp : OpenACC_Op<"loop",
         `)`
       | `cache` `(` $cacheOperands `:` type($cacheOperands) `)`
     )
-    $region
+    custom<LoopControl>($region, $lowerbound, type($lowerbound), $upperbound,
+        type($upperbound), $step, type($step))
     ( `(` type($results)^ `)` )?
     attr-dict-with-keyword
   }];
diff --git a/mlir/lib/Dialect/OpenACC/IR/OpenACC.cpp b/mlir/lib/Dialect/OpenACC/IR/OpenACC.cpp
index bdc9c345341b22..f6229e5192a0ab 100644
--- a/mlir/lib/Dialect/OpenACC/IR/OpenACC.cpp
+++ b/mlir/lib/Dialect/OpenACC/IR/OpenACC.cpp
@@ -1008,17 +1008,12 @@ static ParseResult parseDeviceTypeOperandsWithKeywordOnly(
   llvm::SmallVector<mlir::Attribute> keywordOnlyDeviceTypeAttributes;
   bool needCommaBeforeOperands = false;
 
-  // Keyword only
-  if (failed(parser.parseOptionalLParen())) {
-    keywordOnlyDeviceTypeAttributes.push_back(mlir::acc::DeviceTypeAttr::get(
-        parser.getContext(), mlir::acc::DeviceType::None));
-    keywordOnlyDeviceType =
-        ArrayAttr::get(parser.getContext(), keywordOnlyDeviceTypeAttributes);
-    return success();
-  }
+  if (failed(parser.parseOptionalLParen()))
+    return failure();
 
   // Parse keyword only attributes
   if (succeeded(parser.parseOptionalLSquare())) {
+    // Parse keyword only attributes
     if (failed(parser.parseCommaSeparatedList([&]() {
           if (parser.parseAttribute(
                   keywordOnlyDeviceTypeAttributes.emplace_back()))
@@ -1029,6 +1024,13 @@ static ParseResult parseDeviceTypeOperandsWithKeywordOnly(
     if (parser.parseRSquare())
       return failure();
     needCommaBeforeOperands = true;
+  } else if (succeeded(parser.parseOptionalRParen())) {
+    // Keyword only
+    keywordOnlyDeviceTypeAttributes.push_back(mlir::acc::DeviceTypeAttr::get(
+        parser.getContext(), mlir::acc::DeviceType::None));
+    keywordOnlyDeviceType =
+        ArrayAttr::get(parser.getContext(), keywordOnlyDeviceTypeAttributes);
+    return success();
   }
 
   if (needCommaBeforeOperands && failed(parser.parseComma()))
@@ -1065,15 +1067,18 @@ static void printDeviceTypeOperandsWithKeywordOnly(
     mlir::TypeRange types, std::optional<mlir::ArrayAttr> deviceTypes,
     std::optional<mlir::ArrayAttr> keywordOnlyDeviceTypes) {
 
+  p << "(";
+
   if (operands.begin() == operands.end() && keywordOnlyDeviceTypes &&
       keywordOnlyDeviceTypes->size() == 1) {
     auto deviceTypeAttr =
         mlir::dyn_cast<mlir::acc::DeviceTypeAttr>((*keywordOnlyDeviceTypes)[0]);
-    if (deviceTypeAttr.getValue() == mlir::acc::DeviceType::None)
+    if (deviceTypeAttr.getValue() == mlir::acc::DeviceType::None) {
+      p << ")";
       return;
+    }
   }
 
-  p << "(";
   printDeviceTypes(p, keywordOnlyDeviceTypes);
   if (hasDeviceTypeValues(keywordOnlyDeviceTypes) &&
       hasDeviceTypeValues(deviceTypes))
@@ -1323,17 +1328,12 @@ static ParseResult parseGangClause(
   bool needCommaBetweenValues = false;
   bool needCommaBeforeOperands = false;
 
-  // Gang only keyword
-  if (failed(parser.parseOptionalLParen())) {
-    gangOnlyDeviceTypeAttributes.push_back(mlir::acc::DeviceTypeAttr::get(
-        parser.getContext(), mlir::acc::DeviceType::None));
-    gangOnlyDeviceType =
-        ArrayAttr::get(parser.getContext(), gangOnlyDeviceTypeAttributes);
-    return success();
-  }
+  if (failed(parser.parseOptionalLParen()))
+    return failure();
 
   // Parse gang only attributes
   if (succeeded(parser.parseOptionalLSquare())) {
+    // Parse gang only attributes
     if (failed(parser.parseCommaSeparatedList([&]() {
           if (parser.parseAttribute(
                   gangOnlyDeviceTypeAttributes.emplace_back()))
@@ -1344,6 +1344,13 @@ static ParseResult parseGangClause(
     if (parser.parseRSquare())
       return failure();
     needCommaBeforeOperands = true;
+  } else if (succeeded(parser.parseOptionalRParen())) {
+    // Gang only keyword
+    gangOnlyDeviceTypeAttributes.push_back(mlir::acc::DeviceTypeAttr::get(
+        parser.getContext(), mlir::acc::DeviceType::None));
+    gangOnlyDeviceType =
+        ArrayAttr::get(parser.getContext(), gangOnlyDeviceTypeAttributes);
+    return success();
   }
 
   auto argNum = mlir::acc::GangArgTypeAttr::get(parser.getContext(),
@@ -1443,16 +1450,18 @@ void printGangClause(OpAsmPrinter &p, Operation *op,
                      std::optional<mlir::DenseI32ArrayAttr> segments,
                      std::optional<mlir::ArrayAttr> gangOnlyDeviceTypes) {
 
+  p << "(";
   if (operands.begin() == operands.end() &&
       hasDeviceTypeValues(gangOnlyDeviceTypes) &&
       gangOnlyDeviceTypes->size() == 1) {
     auto deviceTypeAttr =
         mlir::dyn_cast<mlir::acc::DeviceTypeAttr>((*gangOnlyDeviceTypes)[0]);
-    if (deviceTypeAttr.getValue() == mlir::acc::DeviceType::None)
+    if (deviceTypeAttr.getValue() == mlir::acc::DeviceType::None) {
+      p << ")";
       return;
+    }
   }
 
-  p << "(";
   printDeviceTypes(p, gangOnlyDeviceTypes);
 
   if (hasDeviceTypeValues(gangOnlyDeviceTypes) &&
@@ -1516,6 +1525,11 @@ LogicalResult checkDeviceTypes(mlir::ArrayAttr deviceTypes) {
 }
 
 LogicalResult acc::LoopOp::verify() {
+  if (!getUpperbound().empty() && getInclusiveUpperbound() &&
+      (getUpperbound().size() != getInclusiveUpperbound()->size()))
+    return emitError() << "inclusiveUpperbound size is expected to be the same"
+                       << " as upperbound size";
+
   // Check collapse
   if (getCollapseAttr() && !getCollapseDeviceTypeAttr())
     return emitOpError() << "collapse device_type attr must be define when"
@@ -1629,7 +1643,9 @@ unsigned LoopOp::getNumDataOperands() {
 }
 
 Value LoopOp::getDataOperand(unsigned i) {
-  unsigned numOptional = getGangOperands().size();
+  unsigned numOptional =
+      getLowerbound().size() + getUpperbound().size() + getStep().size();
+  numOptional += getGangOperands().size();
   numOptional += getVectorOperands().size();
   numOptional += getWorkerNumOperands().size();
   numOptional += getTileOperands().size();
@@ -1748,6 +1764,58 @@ bool LoopOp::hasGang(mlir::acc::DeviceType deviceType) {
   return hasDeviceType(getGang(), deviceType);
 }
 
+llvm::SmallVector<mlir::Region *> acc::LoopOp::getLoopRegions() {
+  return {&getRegion()};
+}
+
+/// loop-control ::= `(` ssa-id-and-type-list `)` `=` `(` ssa-id-and-type-list
+/// `)` `to` `(` ssa-id-and-type-list `)` `step` `(` ssa-id-and-type-list `)`
+ParseResult
+parseLoopControl(OpAsmParser &parser, Region &region,
+                 SmallVectorImpl<OpAsmParser::UnresolvedOperand> &lowerbound,
+                 SmallVectorImpl<Type> &lowerboundType,
+                 SmallVectorImpl<OpAsmParser::UnresolvedOperand> &upperbound,
+                 SmallVectorImpl<Type> &upperboundType,
+                 SmallVectorImpl<OpAsmParser::UnresolvedOperand> &step,
+                 SmallVectorImpl<Type> &stepType) {
+
+  SmallVector<OpAsmParser::Argument> inductionVars;
+  if (succeeded(parser.parseOptionalLParen())) {
+    if (parser.parseArgumentList(inductionVars, OpAsmParser::Delimiter::None,
+                                 /*allowType=*/true) ||
+        parser.parseRParen() || parser.parseEqual() || parser.parseLParen() ||
+        parser.parseOperandList(lowerbound, inductionVars.size(),
+                                OpAsmParser::Delimiter::None) ||
+        parser.parseColonTypeList(lowerboundType) || parser.parseRParen() ||
+        parser.parseKeyword("to") || parser.parseLParen() ||
+        parser.parseOperandList(upperbound, inductionVars.size(),
+                                OpAsmParser::Delimiter::None) ||
+        parser.parseColonTypeList(upperboundType) || parser.parseRParen() ||
+        parser.parseKeyword("step") || parser.parseLParen() ||
+        parser.parseOperandList(step, inductionVars.size(),
+                                OpAsmParser::Delimiter::None) ||
+        parser.parseColonTypeList(stepType) || parser.parseRParen())
+      return failure();
+  }
+  return parser.parseRegion(region, inductionVars);
+}
+
+void printLoopControl(OpAsmPrinter &p, Operation *op, Region &region,
+                      ValueRange lowerbound, TypeRange lowerboundType,
+                      ValueRange upperbound, TypeRange upperboundType,
+                      ValueRange steps, TypeRange stepType) {
+  ValueRange regionArgs = region.front().getArguments();
+  if (!regionArgs.empty()) {
+    p << "(";
+    llvm::interleaveComma(regionArgs, p,
+                          [&p](Value v) { p << v << " : " << v.getType(); });
+    p << ") = (" << lowerbound << " : " << lowerboundType << ") to ("
+      << upperbound << " : " << upperboundType << ") "
+      << " step (" << steps << " : " << stepType << ") ";
+  }
+  p.printRegion(region, /*printEntryBlockArgs=*/false);
+}
+
 //===----------------------------------------------------------------------===//
 // DataOp
 //===----------------------------------------------------------------------===//
diff --git a/mlir/test/Dialect/OpenACC/canonicalize.mlir b/mlir/test/Dialect/OpenACC/canonicalize.mlir
index 6173ab6699c6c5..4522ffb252a62e 100644
--- a/mlir/test/Dialect/OpenACC/canonicalize.mlir
+++ b/mlir/test/Dialect/OpenACC/canonicalize.mlir
@@ -110,14 +110,16 @@ func.func @testupdateop(%a: memref<f32>, %ifCond: i1) -> () {
 
 func.func @testhostdataop(%a: memref<f32>, %ifCond: i1) -> () {
   %0 = acc.use_device varPtr(%a : memref<f32>) -> memref<f32>
+  %1 = arith.constant 1 : i32
+  %2 = arith.constant 10 : i32
   %false = arith.constant false
   acc.host_data dataOperands(%0 : memref<f32>) if(%false) {
-    acc.loop {
+    acc.loop (%iv : i32) = (%1 : i32) to (%2 : i32) step (%1 : i32) {
       acc.yield
-    }
-    acc.loop {
+    } attributes { inclusiveUpperbound = array<i1: true> }
+    acc.loop (%iv : i32) = (%1 : i32) to (%2 : i32) step (%1 : i32) {
       acc.yield
-    }
+    } attributes { inclusiveUpperbound = array<i1: true> }
     acc.terminator
   }
   return
diff --git a/mlir/test/Dialect/OpenACC/invalid.mlir b/mlir/test/Dialect/OpenACC/invalid.mlir
index 5dcdb3a37e4e3b..57ae5856149d11 100644
--- a/mlir/test/Dialect/OpenACC/invalid.mlir
+++ b/mlir/test/Dialect/OpenACC/invalid.mlir
@@ -1,53 +1,67 @@
 // RUN: mlir-opt -split-input-file -verify-diagnostics %s
 
+%1 = arith.constant 1 : i32
+%2 = arith.constant 10 : i32
 // expected-error at +1 {{gang, worker or vector cannot appear with the seq attr}}
-acc.loop {
+acc.loop (%iv : i32) = (%1 : i32) to (%2 : i32) step (%1 : i32) {
   "test.openacc_dummy_op"() : () -> ()
   acc.yield
 } attributes {seq = [#acc.device_type<none>], gang = [#acc.device_type<none>]}
 
 // -----
 
+%1 = arith.constant 1 : i32
+%2 = arith.constant 10 : i32
 // expected-error at +1 {{gang, worker or vector cannot appear with the seq attr}}
-acc.loop {
+acc.loop (%iv : i32) = (%1 : i32) to (%2 : i32) step (%1 : i32) {
   "test.openacc_dummy_op"() : () -> ()
   acc.yield
 } attributes {seq = [#acc.device_type<none>], worker = [#acc.device_type<none>]}
 
 // -----
 
+%1 = arith.constant 1 : i32
+%2 = arith.constant 10 : i32
 // expected-error at +1 {{gang, worker or vector cannot appear with the seq attr}}
-acc.loop {
+acc.loop (%iv : i32) = (%1 : i32) to (%2 : i32) step (%1 : i32) {
   "test.openacc_dummy_op"() : () -> ()
   acc.yield
 } attributes {seq = [#acc.device_type<none>], vector = [#acc.device_type<none>]}
 
 // -----
 
+%1 = arith.constant 1 : i32
+%2 = arith.constant 10 : i32
 // expected-error at +1 {{gang, worker or vector cannot appear with the seq attr}}
-acc.loop {
+acc.loop (%iv : i32) = (%1 : i32) to (%2 : i32) step (%1 : i32) {
   "test.openacc_dummy_op"() : () -> ()
   acc.yield
 } attributes {seq = [#acc.device_type<none>], worker = [#acc.device_type<none>], gang = [#acc.device_type<none>]}
 
 // -----
 
+%1 = arith.constant 1 : i32
+%2 = arith.constant 10 : i32
 // expected-error at +1 {{gang, worker or vector cannot appear with the seq attr}}
-acc.loop {
+acc.loop (%iv : i32) = (%1 : i32) to (%2 : i32) step (%1 : i32) {
   "test.openacc_dummy_op"() : () -> ()
   acc.yield
 } attributes {seq = [#acc.device_type<none>], vector = [#acc.device_type<none>], gang = [#acc.device_type<none>]}
 
 // -----
 
+%1 = arith.constant 1 : i32
+%2 = arith.constant 10 : i32
 // expected-error at +1 {{gang, worker or vector cannot appear with the seq attr}}
-acc.loop {
+acc.loop (%iv : i32) = (%1 : i32) to (%2 : i32) step (%1 : i32) {
   "test.openacc_dummy_op"() : () -> ()
   acc.yield
 } attributes {seq = [#acc.device_type<none>], vector = [#acc.device_type<none>], worker = [#acc.device_type<none>]}
 
 // -----
 
+%1 = arith.constant 1 : i32
+%2 = arith.constant 10 : i32
 // expected-error at +1 {{gang, worker or vector cannot appear with the seq attr}}
 acc.loop {
   "test.openacc_dummy_op"() : () -> ()
@@ -83,10 +97,12 @@ acc.loop {
 
 // -----
 
+%1 = arith.constant 1 : i32
+%2 = arith.constant 10 : i32
 // expected-error at +1 {{only one of "auto", "independent", "seq" can be present at the same time}}
-acc.loop {
+acc.loop (%iv : i32) = (%1 : i32) to (%2 : i32) step (%1 : i32) {
   acc.yield
-} attributes {auto_ = [#acc.device_type<none>], seq = [#acc.device_type<none>]}
+} attributes {auto_ = [#acc.device_type<none>], seq = [#acc.device_type<none>], inclusiveUpperbound = array<i1: true>}
 
 // -----
 
@@ -154,11 +170,13 @@ acc.parallel {
 
 // -----
 
-acc.loop {
+%1 = arith.constant 1 : i32
+%2 = arith.constant 10 : i32
+acc.loop (%iv : i32) = (%1 : i32) to (%2 : i32) step (%1 : i32){
 // expected-error at +1 {{'acc.init' op cannot be nested in a compute operation}}
   acc.init
   acc.yield
-}
+} attributes {inclusiveUpperbound = array<i1: true>}
 
 // -----
 
@@ -170,21 +188,25 @@ acc.parallel {
 
 // -----
 
-acc.loop {
+%1 = arith.constant 1 : i32
+%2 = arith.constant 10 : i32
+acc.loop (%iv : i32) = (%1 : i32) to (%2 : i32) step (%1 : i32) {
 // expected-error at +1 {{'acc.shutdown' op cannot be nested in a compute operation}}
   acc.shutdown
   acc.yield
-}
+} attributes {inclusiveUpperbound = array<i1: true>}
 
 // -----
 
-acc.loop {
+%1 = arith.constant 1 : i32
+%2 = arith.constant 10 : i32
+acc.loop (%iv : i32) = (%1 : i32) to (%2 : i32) step (%1 : i32) {
   "test.openacc_dummy_op"() ({
     // expected-error at +1 {{'acc.shutdown' op cannot be nested in a compute operation}}
     acc.shutdown
   }) : () -> ()
   acc.yield
-}
+} attributes {inclusiveUpperbound = array<i1: true>}
 
 // -----
 
@@ -388,8 +410,10 @@ acc.firstprivate.recipe @privatization_i32 : i32 init {
 
 // -----
 
+%1 = arith.constant 1 : i32
+%2 = arith.constant 10 : i32
 // expected-error at +1 {{expected ')'}}
-acc.loop gang({static=%i64Value: i64, num=%i64Value: i64} {
+acc.loop gang({static=%i64Value: i64, num=%i64Value: i64} (%iv : i32) = (%1 : i32) to (%2 : i32) step (%1 : i32) {
   "test.openacc_dummy_op"() : () -> ()
   acc.yield
 }
@@ -457,8 +481,10 @@ acc.reduction.recipe @reduction_i64 : i64 reduction_operator<add> init {
 
 // -----
 
+%1 = arith.constant 1 : i32
+%2 = arith.constant 10 : i32
 // expected-error at +1 {{new value expected after comma}}
-acc.loop gang({static=%i64Value: i64, ) {
+acc.loop gang({static=%i64Value: i64, ) (%iv : i32) = (%1 : i32) to (%2 : i32) step (%1 : i32) {
   "test.openacc_dummy_op"() : () -> ()
   acc.yield
 }
diff --git a/mlir/test/Dialect/OpenACC/ops.mlir b/mlir/test/Dialect/OpenACC/ops.mlir
index bda31a19cf5cd8..d4c884a837f875 100644
--- a/mlir/test/Dialect/OpenACC/ops.mlir
+++ b/mlir/test/Dialect/OpenACC/ops.mlir
@@ -11,52 +11,40 @@ func.func @compute1(%A: memref<10x10xf32>, %B: memref<10x10xf32>, %C: memref<10x
   %async = arith.constant 1 : i64
 
   acc.parallel async(%async: i64) {
-    acc.loop gang vector {
-      scf.for %arg3 = %c0 to %c10 step %c1 {
-        scf.for %arg4 = %c0 to %c10 step %c1 {
-          scf.for %arg5 = %c0 to %c10 step %c1 {
-            %a = memref.load %A[%arg3, %arg5] : memref<10x10xf32>
-            %b = memref.load %B[%arg5, %arg4] : memref<10x10xf32>
-            %cij = memref.load %C[%arg3, %arg4] : memref<10x10xf32>
-            %p = arith.mulf %a, %b : f32
-            %co = arith.addf %cij, %p : f32
-            memref.store %co, %C[%arg3, %arg4] : memref<10x10xf32>
-          }
-        }
-      }
+    acc.loop gang() vector() (%arg3 : index, %arg4 : index, %arg5 : index) = (%c0, %c0, %c0 : index, index, index) to (%c10, %c10, %c10 : index, index, index) step (%c1, %c1, %c1 : index, index, index) {
+      %a = memref.load %A[%arg3, %arg5] : memref<10x10xf32>
+      %b = memref.load %B[%arg5, %arg4] : memref<10x10xf32>
+      %cij = memref.load %C[%arg3, %arg4] : memref<10x10xf32>
+      %p = arith.mulf %a, %b : f32
+      %co = arith.addf %cij, %p : f32
+      memref.store %co, %C[%arg3, %arg4] : memref<10x10xf32>
       acc.yield
-    } attributes { collapse = [3], collapseDeviceType = [#acc.device_type<none>]}
+    } attributes { collapse = [3], collapseDeviceType = [#acc.device_type<none>], inclusiveUpperbound = array<i1: true, true, true>}
     acc.yield
   }
 
   return %C : memref<10x10xf32>
 }
 
-// CHECK-LABEL: func @compute1(
+// CHECK-LABEL: func @compute1
 //  CHECK-NEXT:   %{{.*}} = arith.constant 0 : index
 //  CHECK-NEXT:   %{{.*}} = arith.constant 10 : index
 //  CHECK-NEXT:   %{{.*}} = arith.constant 1 : index
 //  CHECK-NEXT:   [[ASYNC:%.*]] = arith.constant 1 : i64
 //  CHECK-NEXT:   acc.parallel async([[ASYNC]] : i64) {
-//  CHECK-NEXT:     acc.loop gang vector {
-//  CHECK-NEXT:       scf.for %{{.*}} = %{{.*}} to %{{.*}} step %{{.*}} {
-//  CHECK-NEXT:         scf.for %{{.*}} = %{{.*}} to %{{.*}} step %{{.*}} {
-//  CHECK-NEXT:           scf.for %{{.*}} = %{{.*}} to %{{.*}} step %{{.*}} {
-//  CHECK-NEXT:             %{{.*}} = memref.load %{{.*}}[%{{.*}}, %{{.*}}] : memref<10x10xf32>
-//  CHECK-NEXT:             %{{.*}} = memref.load %{{.*}}[%{{.*}}, %{{.*}}] : memref<10x10xf32>
-//  CHECK-NEXT:             %{{.*}} = memref.load %{{.*}}[%{{.*}}, %{{.*}}] : memref<10x10xf32>
-//  CHECK-NEXT:             %{{.*}} = arith.mulf %{{.*}}, %{{.*}} : f32
-//  CHECK-NEXT:             %{{.*}} = arith.addf %{{.*}}, %{{.*}} : f32
-//  CHECK-NEXT:             memref.store %{{.*}}, %{{.*}}[%{{.*}}, %{{.*}}] : memref<10x10xf32>
-//  CHECK-NEXT:           }
-//  CHECK-NEXT:         }
-//  CHECK-NEXT:       }
+//  CHECK-NEXT:     acc.loop gang() vector() (%{{.*}}) = (%{{.*}}) to (%{{.*}}) step (%{{.*}}) {
+//  CHECK-NEXT:       %{{.*}} = memref.load %{{.*}}[%{{.*}}, %{{.*}}] : memref<10x10xf32>
+//  CHECK-NEXT:       %{{.*}} = memref.load %{{.*}}[%{{.*}}, %{{.*}}] : memref<10x10xf32>
+//  CHECK-NEXT:       %{{.*}} = memref.load %{{.*}}[%{{.*}}, %{{.*}}] : memref<10x10xf32>
+//  CHECK-NEXT:       %{{.*}} = arith.mulf %{{.*}}, %{{.*}} : f32
+//  CHECK-NEXT:       %{{.*}} = arith.addf %{{.*}}, %{{.*}} : f32
+//  CHECK-NEXT:       memref.store %{{.*}}, %{{.*}}[%{{.*}}, %{{.*}}] : memref<10x10xf32>
 //  CHECK-NEXT:       acc.yield
-//  CHECK-NEXT:     } attributes {collapse = [3], collapseDeviceType = [#acc.device_type<none>]}
+//  CHECK-NEXT:     } attributes {collapse = [3], collapseDeviceType = [#acc.device_type<none>], inclusiveUpperbound = array<i1: true, true, true>}
 //  CHECK-NEXT:     acc.yield
 //  CHECK-NEXT:   }
 //  CHECK-NEXT:   return %{{.*}} : memref<10x10xf32>
-//  CHECK-NEXT: }
+
 
 // -----
 
@@ -66,21 +54,19 @@ func.func @compute2(%A: memref<10x10xf32>, %B: memref<10x10xf32>, %C: memref<10x
   %c1 = arith.constant 1 : index
 
   acc.parallel {
-    acc.loop {
-      scf.for %arg3 = %c0 to %c10 step %c1 {
-        scf.for %arg4 = %c0 to %c10 step %c1 {
-          scf.for %arg5 = %c0 to %c10 step %c1 {
-            %a = memref.load %A[%arg3, %arg5] : memref<10x10xf32>
-            %b = memref.load %B[%arg5, %arg4] : memref<10x10xf32>
-            %cij = memref.load %C[%arg3, %arg4] : memref<10x10xf32>
-            %p = arith.mulf %a, %b : f32
-            %co = arith.addf %cij, %p : f32
-            memref.store %co, %C[%arg3, %arg4] : memref<10x10xf32>
-          }
+    acc.loop (%arg3 : index) = (%c0 : index) to (%c10 : index) step (%c1 : index) {
+      scf.for %arg4 = %c0 to %c10 step %c1 {
+        scf.for %arg5 = %c0 to %c10 step %c1 {
+          %a = memref.load %A[%arg3, %arg5] : memref<10x10xf32>
+          %b = memref.load %B[%arg5, %arg4] : memref<10x10xf32>
+          %cij = memref.load %C[%arg3, %arg4] : memref<10x10xf32>
+          %p = arith.mulf %a, %b : f32
+          %co = arith.addf %cij, %p : f32
+          memref.store %co, %C[%arg3, %arg4] : memref<10x10xf32>
         }
       }
       acc.yield
-    } attributes {seq = [#acc.device_type<none>]}
+    } attributes {seq = [#acc.device_type<none>], inclusiveUpperbound = array<i1: true>}
     acc.yield
   }
 
@@ -92,21 +78,19 @@ func.func @compute2(%A: memref<10x10xf32>, %B: memref<10x10xf32>, %C: memref<10x
 //  CHECK-NEXT:   %{{.*}} = arith.constant 10 : index
 //  CHECK-NEXT:   %{{.*}} = arith.constant 1 : index
 //  CHECK-NEXT:   acc.parallel {
-//  CHECK-NEXT:     acc.loop {
+//  CHECK-NEXT:     acc.loop  (%{{.*}}) = (%{{.*}}) to (%{{.*}}) step (%{{.*}})
 //  CHECK-NEXT:       scf.for %{{.*}} = %{{.*}} to %{{.*}} step %{{.*}} {
 //  CHECK-NEXT:         scf.for %{{.*}} = %{{.*}} to %{{.*}} step %{{.*}} {
-//  CHECK-NEXT:           scf.for %{{.*}} = %{{.*}} to %{{.*}} step %{{.*}} {
-//  CHECK-NEXT:             %{{.*}} = memref.load %{{.*}}[%{{.*}}, %{{.*}}] : memref<10x10xf32>
-//  CHECK-NEXT:             %{{.*}} = memref.load %{{.*}}[%{{.*}}, %{{.*}}] : memref<10x10xf32>
-//  CHECK-NEXT:             %{{.*}} = memref.load %{{.*}}[%{{.*}}, %{{.*}}] : memref<10x10xf32>
-//  CHECK-NEXT:             %{{.*}} = arith.mulf %{{.*}}, %{{.*}} : f32
-//  CHECK-NEXT:             %{{.*}} = arith.addf %{{.*}}, %{{.*}} : f32
-//  CHECK-NEXT:             memref.store %{{.*}}, %{{.*}}[%{{.*}}, %{{.*}}] : memref<10x10xf32>
-//  CHECK-NEXT:           }
+//  CHECK-NEXT:           %{{.*}} = memref.load %{{.*}}[%{{.*}}, %{{.*}}] : memref<10x10xf32>
+//  CHECK-NEXT:           %{{.*}} = memref.load %{{.*}}[%{{.*}}, %{{.*}}] : memref<10x10xf32>
+//  CHECK-NEXT:           %{{.*}} = memref.load %{{.*}}[%{{.*}}, %{{.*}}] : memref<10x10xf32>
+//  CHECK-NEXT:           %{{.*}} = arith.mulf %{{.*}}, %{{.*}} : f32
+//  CHECK-NEXT:           %{{.*}} = arith.addf %{{.*}}, %{{.*}} : f32
+//  CHECK-NEXT:           memref.store %{{.*}}, %{{.*}}[%{{.*}}, %{{.*}}] : memref<10x10xf32>
 //  CHECK-NEXT:         }
 //  CHECK-NEXT:       }
 //  CHECK-NEXT:       acc.yield
-//  CHECK-NEXT:     } attributes {seq = [#acc.device_type<none>]}
+//  CHECK-NEXT:     } attributes {inclusiveUpperbound = array<i1: true>, seq = [#acc.device_type<none>]}
 //  CHECK-NEXT:     acc.yield
 //  CHECK-NEXT:   }
 //  CHECK-NEXT:   return %{{.*}} : memref<10x10xf32>
@@ -138,32 +122,26 @@ func.func @compute3(%a: memref<10x10xf32>, %b: memref<10x10xf32>, %c: memref<10x
   acc.data dataOperands(%pa, %pb, %pc, %pd: memref<10x10xf32>, memref<10x10xf32>, memref<10xf32>, memref<10xf32>) {
     %private = acc.private varPtr(%c : memref<10xf32>) -> memref<10xf32>
     acc.parallel num_gangs({%numGangs: i64}) num_workers(%numWorkers: i64 [#acc.device_type<nvidia>]) private(@privatization_memref_10_f32 -> %private : memref<10xf32>) {
-      acc.loop gang {
-        scf.for %x = %lb to %c10 step %st {
-          acc.loop worker {
-            scf.for %y = %lb to %c10 step %st {
-              %axy = memref.load %a[%x, %y] : memref<10x10xf32>
-              %bxy = memref.load %b[%x, %y] : memref<10x10xf32>
-              %tmp = arith.addf %axy, %bxy : f32
-              memref.store %tmp, %c[%y] : memref<10xf32>
-            }
-            acc.yield
-          }
-
-          acc.loop {
-            // for i = 0 to 10 step 1
-            //   d[x] += c[i]
-            scf.for %i = %lb to %c10 step %st {
-              %ci = memref.load %c[%i] : memref<10xf32>
-              %dx = memref.load %d[%x] : memref<10xf32>
-              %z = arith.addf %ci, %dx : f32
-              memref.store %z, %d[%x] : memref<10xf32>
-            }
-            acc.yield
-          } attributes {seq = [#acc.device_type<none>]}
-        }
+      acc.loop gang() (%x : index) = (%lb : index) to (%c10 : index) step (%st : index) {
+        acc.loop worker() (%y : index) = (%lb : index) to (%c10 : index) step (%st : index) {
+          %axy = memref.load %a[%x, %y] : memref<10x10xf32>
+          %bxy = memref.load %b[%x, %y] : memref<10x10xf32>
+          %tmp = arith.addf %axy, %bxy : f32
+          memref.store %tmp, %c[%y] : memref<10xf32>
+          acc.yield
+        } attributes {inclusiveUpperbound = array<i1: true>}
+
+        acc.loop (%i : index) = (%lb : index) to (%c10 : index) step (%st : index) {
+          // for i = 0 to 10 step 1
+          //   d[x] += c[i]
+          %ci = memref.load %c[%i] : memref<10xf32>
+          %dx = memref.load %d[%x] : memref<10xf32>
+          %z = arith.addf %ci, %dx : f32
+          memref.store %z, %d[%x] : memref<10xf32>
+          acc.yield
+        } attributes {inclusiveUpperbound = array<i1: true>, seq = [#acc.device_type<nvidia>]}
         acc.yield
-      }
+      } attributes {inclusiveUpperbound = array<i1: true>}
       acc.yield
     }
     acc.terminator
@@ -181,29 +159,23 @@ func.func @compute3(%a: memref<10x10xf32>, %b: memref<10x10xf32>, %c: memref<10x
 // CHECK:        acc.data dataOperands(%{{.*}}, %{{.*}}, %{{.*}}, %{{.*}} : memref<10x10xf32>, memref<10x10xf32>, memref<10xf32>, memref<10xf32>) {
 // CHECK-NEXT:     %[[P_ARG2:.*]] = acc.private varPtr([[ARG2]] : memref<10xf32>) -> memref<10xf32> 
 // CHECK-NEXT:     acc.parallel num_gangs({[[NUMGANG]] : i64}) num_workers([[NUMWORKERS]] : i64 [#acc.device_type<nvidia>]) private(@privatization_memref_10_f32 -> %[[P_ARG2]] : memref<10xf32>) {
-// CHECK-NEXT:       acc.loop gang {
-// CHECK-NEXT:         scf.for %{{.*}} = [[C0]] to [[C10]] step [[C1]] {
-// CHECK-NEXT:           acc.loop worker {
-// CHECK-NEXT:             scf.for %{{.*}} = [[C0]] to [[C10]] step [[C1]] {
-// CHECK-NEXT:               %{{.*}} = memref.load %{{.*}}[%{{.*}}, %{{.*}}] : memref<10x10xf32>
-// CHECK-NEXT:               %{{.*}} = memref.load %{{.*}}[%{{.*}}, %{{.*}}] : memref<10x10xf32>
-// CHECK-NEXT:               %{{.*}} = arith.addf %{{.*}}, %{{.*}} : f32
-// CHECK-NEXT:               memref.store %{{.*}}, %{{.*}}[%{{.*}}] : memref<10xf32>
-// CHECK-NEXT:             }
-// CHECK-NEXT:             acc.yield
-// CHECK-NEXT:           }
-// CHECK-NEXT:           acc.loop {
-// CHECK-NEXT:             scf.for %{{.*}} = [[C0]] to [[C10]] step [[C1]] {
-// CHECK-NEXT:               %{{.*}} = memref.load %{{.*}}[%{{.*}}] : memref<10xf32>
-// CHECK-NEXT:               %{{.*}} = memref.load %{{.*}}[%{{.*}}] : memref<10xf32>
-// CHECK-NEXT:               %{{.*}} = arith.addf %{{.*}}, %{{.*}} : f32
-// CHECK-NEXT:               memref.store %{{.*}}, %{{.*}}[%{{.*}}] : memref<10xf32>
-// CHECK-NEXT:             }
-// CHECK-NEXT:             acc.yield
-// CHECK-NEXT:           } attributes {seq = [#acc.device_type<none>]}
-// CHECK-NEXT:         }
+// CHECK-NEXT:       acc.loop gang() (%{{.*}}) = (%{{.*}}) to (%{{.*}}) step (%{{.*}}) {
+// CHECK-NEXT:         acc.loop worker() (%{{.*}}) = (%{{.*}}) to (%{{.*}}) step (%{{.*}}) {
+// CHECK-NEXT:           %{{.*}} = memref.load %{{.*}}[%{{.*}}, %{{.*}}] : memref<10x10xf32>
+// CHECK-NEXT:           %{{.*}} = memref.load %{{.*}}[%{{.*}}, %{{.*}}] : memref<10x10xf32>
+// CHECK-NEXT:           %{{.*}} = arith.addf %{{.*}}, %{{.*}} : f32
+// CHECK-NEXT:           memref.store %{{.*}}, %{{.*}}[%{{.*}}] : memref<10xf32>
+// CHECK-NEXT:           acc.yield
+// CHECK-NEXT:         } attributes {inclusiveUpperbound = array<i1: true>}
+// CHECK-NEXT:         acc.loop (%{{.*}}) = (%{{.*}}) to (%{{.*}}) step (%{{.*}}) {
+// CHECK-NEXT:           %{{.*}} = memref.load %{{.*}}[%{{.*}}] : memref<10xf32>
+// CHECK-NEXT:           %{{.*}} = memref.load %{{.*}}[%{{.*}}] : memref<10xf32>
+// CHECK-NEXT:           %{{.*}} = arith.addf %{{.*}}, %{{.*}} : f32
+// CHECK-NEXT:           memref.store %{{.*}}, %{{.*}}[%{{.*}}] : memref<10xf32>
+// CHECK-NEXT:           acc.yield
+// CHECK-NEXT:         } attributes {inclusiveUpperbound = array<i1: true>, seq = [#acc.device_type<nvidia>]}
 // CHECK-NEXT:         acc.yield
-// CHECK-NEXT:       }
+// CHECK-NEXT:       } attributes {inclusiveUpperbound = array<i1: true>}
 // CHECK-NEXT:       acc.yield
 // CHECK-NEXT:     }
 // CHECK-NEXT:     acc.terminator
@@ -217,171 +189,161 @@ func.func @testloopop(%a : memref<10xf32>) -> () {
   %i64Value = arith.constant 1 : i64
   %i32Value = arith.constant 128 : i32
   %idxValue = arith.constant 8 : index
+  %c0 = arith.constant 0 : index
+  %c10 = arith.constant 10 : index
+  %c1 = arith.constant 1 : index
 
-  acc.loop gang vector worker {
+  acc.loop gang() vector() worker() (%iv : index) = (%c0 : index) to (%c10 : index) step (%c1 : index) {
     "test.openacc_dummy_op"() : () -> ()
     acc.yield
-  }
-  acc.loop gang({num=%i64Value: i64}) {
+  } attributes {inclusiveUpperbound = array<i1: true>}
+  acc.loop gang({num=%i64Value: i64}) (%iv : index) = (%c0 : index) to (%c10 : index) step (%c1 : index) {
     "test.openacc_dummy_op"() : () -> ()
     acc.yield
-  }
-  acc.loop gang({static=%i64Value: i64}) {
+  } attributes {inclusiveUpperbound = array<i1: true>}
+  acc.loop gang({static=%i64Value: i64}) (%iv : index) = (%c0 : index) to (%c10 : index) step (%c1 : index) {
     "test.openacc_dummy_op"() : () -> ()
     acc.yield
-  }
-  acc.loop worker(%i64Value: i64) {
+  } attributes {inclusiveUpperbound = array<i1: true>}
+  acc.loop worker(%i64Value: i64) (%iv : index) = (%c0 : index) to (%c10 : index) step (%c1 : index) {
     "test.openacc_dummy_op"() : () -> ()
     acc.yield
-  }
-  acc.loop worker(%i32Value: i32) {
+  } attributes {inclusiveUpperbound = array<i1: true>}
+  acc.loop worker(%i32Value: i32) (%iv : index) = (%c0 : index) to (%c10 : index) step (%c1 : index) {
     "test.openacc_dummy_op"() : () -> ()
     acc.yield
-  }
-  acc.loop worker(%idxValue: index) {
+  } attributes {inclusiveUpperbound = array<i1: true>}
+  acc.loop worker(%idxValue: index) (%iv : index) = (%c0 : index) to (%c10 : index) step (%c1 : index) {
     "test.openacc_dummy_op"() : () -> ()
     acc.yield
-  }
-  acc.loop vector(%i64Value: i64) {
+  } attributes {inclusiveUpperbound = array<i1: true>}
+  acc.loop vector(%i64Value: i64) (%iv : index) = (%c0 : index) to (%c10 : index) step (%c1 : index) {
     "test.openacc_dummy_op"() : () -> ()
     acc.yield
-  }
-  acc.loop vector(%i32Value: i32) {
+  } attributes {inclusiveUpperbound = array<i1: true>}
+  acc.loop vector(%i32Value: i32) (%iv : index) = (%c0 : index) to (%c10 : index) step (%c1 : index) {
     "test.openacc_dummy_op"() : () -> ()
     acc.yield
-  }
-  acc.loop vector(%idxValue: index) {
+  } attributes {inclusiveUpperbound = array<i1: true>}
+  acc.loop vector(%idxValue: index) (%iv : index) = (%c0 : index) to (%c10 : index) step (%c1 : index) {
     "test.openacc_dummy_op"() : () -> ()
     acc.yield
-  }
-  acc.loop gang({num=%i64Value: i64}) worker vector {
+  } attributes {inclusiveUpperbound = array<i1: true>}
+  acc.loop gang({num=%i64Value: i64}) worker() vector() (%iv : index) = (%c0 : index) to (%c10 : index) step (%c1 : index) {
     "test.openacc_dummy_op"() : () -> ()
     acc.yield
-  }
-  acc.loop gang({num=%i64Value: i64, static=%i64Value: i64}) worker(%i64Value: i64) vector(%i64Value: i64) {
+  } attributes {inclusiveUpperbound = array<i1: true>}
+  acc.loop gang({num=%i64Value: i64, static=%i64Value: i64}) worker(%i64Value: i64) vector(%i64Value: i64) (%iv : index) = (%c0 : index) to (%c10 : index) step (%c1 : index) {
     "test.openacc_dummy_op"() : () -> ()
     acc.yield
-  }
-  acc.loop gang({num=%i32Value: i32, static=%idxValue: index}) {
+  } attributes {inclusiveUpperbound = array<i1: true>}
+  acc.loop gang({num=%i32Value: i32, static=%idxValue: index}) (%iv : index) = (%c0 : index) to (%c10 : index) step (%c1 : index) {
     "test.openacc_dummy_op"() : () -> ()
     acc.yield
-  }
-  acc.loop tile({%i64Value : i64, %i64Value : i64}) {
+  } attributes {inclusiveUpperbound = array<i1: true>}
+  acc.loop tile({%i64Value : i64, %i64Value : i64}) (%iv : index) = (%c0 : index) to (%c10 : index) step (%c1 : index) {
     "test.openacc_dummy_op"() : () -> ()
     acc.yield
-  }
-  acc.loop tile({%i32Value : i32, %i32Value : i32}) {
+  } attributes {inclusiveUpperbound = array<i1: true>}
+  acc.loop tile({%i32Value : i32, %i32Value : i32}) (%iv : index) = (%c0 : index) to (%c10 : index) step (%c1 : index) {
     "test.openacc_dummy_op"() : () -> ()
     acc.yield
-  }
-  acc.loop gang({static=%i64Value: i64, num=%i64Value: i64}) {
+  } attributes {inclusiveUpperbound = array<i1: true>}
+  acc.loop gang({static=%i64Value: i64, num=%i64Value: i64}) (%iv : index) = (%c0 : index) to (%c10 : index) step (%c1 : index) {
     "test.openacc_dummy_op"() : () -> ()
     acc.yield
-  }
-  acc.loop gang({dim=%i64Value : i64, static=%i64Value: i64}) {
+  } attributes {inclusiveUpperbound = array<i1: true>}
+  acc.loop gang({dim=%i64Value : i64, static=%i64Value: i64}) (%iv : index) = (%c0 : index) to (%c10 : index) step (%c1 : index) {
     "test.openacc_dummy_op"() : () -> ()
     acc.yield
-  }
+  } attributes {inclusiveUpperbound = array<i1: true>}
   %b = acc.cache varPtr(%a : memref<10xf32>) -> memref<10xf32>
-  acc.loop cache(%b : memref<10xf32>) {
+  acc.loop cache(%b : memref<10xf32>) (%iv : index) = (%c0 : index) to (%c10 : index) step (%c1 : index) {
     "test.openacc_dummy_op"() : () -> ()
     acc.yield
-  }
+  } attributes {inclusiveUpperbound = array<i1: true>}
   return
 }
 
 // CHECK:      [[I64VALUE:%.*]] = arith.constant 1 : i64
 // CHECK-NEXT: [[I32VALUE:%.*]] = arith.constant 128 : i32
 // CHECK-NEXT: [[IDXVALUE:%.*]] = arith.constant 8 : index
-// CHECK:      acc.loop gang worker vector {
+// CHECK:      acc.loop
 // CHECK-NEXT:   "test.openacc_dummy_op"() : () -> ()
 // CHECK-NEXT:   acc.yield
-// CHECK-NEXT: }
-// CHECK:      acc.loop gang({num=[[I64VALUE]] : i64}) {
+// CHECK-NEXT: attributes {inclusiveUpperbound = array<i1: true>}
+// CHECK:      acc.loop gang({num=[[I64VALUE]] : i64})
 // CHECK-NEXT:   "test.openacc_dummy_op"() : () -> ()
 // CHECK-NEXT:   acc.yield
-// CHECK-NEXT: }
-// CHECK:      acc.loop gang({static=[[I64VALUE]] : i64}) {
+// CHECK:      acc.loop gang({static=[[I64VALUE]] : i64})
 // CHECK-NEXT:   "test.openacc_dummy_op"() : () -> ()
 // CHECK-NEXT:   acc.yield
-// CHECK-NEXT: }
-// CHECK:      acc.loop worker([[I64VALUE]] : i64) {
+// CHECK:      acc.loop worker([[I64VALUE]] : i64)
 // CHECK-NEXT:   "test.openacc_dummy_op"() : () -> ()
 // CHECK-NEXT:   acc.yield
-// CHECK-NEXT: }
-// CHECK:      acc.loop worker([[I32VALUE]] : i32) {
+// CHECK:      acc.loop worker([[I32VALUE]] : i32)
 // CHECK-NEXT:   "test.openacc_dummy_op"() : () -> ()
 // CHECK-NEXT:   acc.yield
-// CHECK-NEXT: }
-// CHECK:      acc.loop worker([[IDXVALUE]] : index) {
+// CHECK:      acc.loop worker([[IDXVALUE]] : index)
 // CHECK-NEXT:   "test.openacc_dummy_op"() : () -> ()
 // CHECK-NEXT:   acc.yield
-// CHECK-NEXT: }
-// CHECK:      acc.loop vector([[I64VALUE]] : i64) {
+// CHECK:      acc.loop vector([[I64VALUE]] : i64)
 // CHECK-NEXT:   "test.openacc_dummy_op"() : () -> ()
 // CHECK-NEXT:   acc.yield
-// CHECK-NEXT: }
-// CHECK:      acc.loop vector([[I32VALUE]] : i32) {
+// CHECK:      acc.loop vector([[I32VALUE]] : i32)
 // CHECK-NEXT:   "test.openacc_dummy_op"() : () -> ()
 // CHECK-NEXT:   acc.yield
-// CHECK-NEXT: }
-// CHECK:      acc.loop vector([[IDXVALUE]] : index) {
+// CHECK:      acc.loop vector([[IDXVALUE]] : index)
 // CHECK-NEXT:   "test.openacc_dummy_op"() : () -> ()
 // CHECK-NEXT:   acc.yield
-// CHECK-NEXT: }
-// CHECK:      acc.loop gang({num=[[I64VALUE]] : i64}) worker vector {
+// CHECK:      acc.loop gang({num=[[I64VALUE]] : i64}) worker() vector()
 // CHECK-NEXT:   "test.openacc_dummy_op"() : () -> ()
 // CHECK-NEXT:   acc.yield
-// CHECK-NEXT: }
-// CHECK:      acc.loop gang({num=[[I64VALUE]] : i64, static=[[I64VALUE]] : i64}) worker([[I64VALUE]] : i64) vector([[I64VALUE]] : i64) {
+// CHECK:      acc.loop gang({num=[[I64VALUE]] : i64, static=[[I64VALUE]] : i64}) worker([[I64VALUE]] : i64) vector([[I64VALUE]] : i64)
 // CHECK-NEXT:   "test.openacc_dummy_op"() : () -> ()
 // CHECK-NEXT:   acc.yield
-// CHECK-NEXT: }
-// CHECK:      acc.loop gang({num=[[I32VALUE]] : i32, static=[[IDXVALUE]] : index}) {
+// CHECK:      acc.loop gang({num=[[I32VALUE]] : i32, static=[[IDXVALUE]] : index})
 // CHECK-NEXT:   "test.openacc_dummy_op"() : () -> ()
 // CHECK-NEXT:   acc.yield
-// CHECK-NEXT: }
-// CHECK:      acc.loop tile({[[I64VALUE]] : i64, [[I64VALUE]] : i64}) {
+// CHECK:      acc.loop tile({[[I64VALUE]] : i64, [[I64VALUE]] : i64})
 // CHECK-NEXT:   "test.openacc_dummy_op"() : () -> ()
 // CHECK-NEXT:   acc.yield
-// CHECK-NEXT: }
-// CHECK:      acc.loop tile({[[I32VALUE]] : i32, [[I32VALUE]] : i32}) {
+// CHECK:      acc.loop tile({[[I32VALUE]] : i32, [[I32VALUE]] : i32})
 // CHECK-NEXT:   "test.openacc_dummy_op"() : () -> ()
 // CHECK-NEXT:   acc.yield
-// CHECK-NEXT: }
-// CHECK:      acc.loop gang({static=[[I64VALUE]] : i64, num=[[I64VALUE]] : i64}) {
+// CHECK:      acc.loop gang({static=[[I64VALUE]] : i64, num=[[I64VALUE]] : i64})
 // CHECK-NEXT:   "test.openacc_dummy_op"() : () -> ()
 // CHECK-NEXT:   acc.yield
-// CHECK-NEXT: }
-// CHECK:      acc.loop gang({dim=[[I64VALUE]] : i64, static=[[I64VALUE]] : i64}) {
+// CHECK:      acc.loop gang({dim=[[I64VALUE]] : i64, static=[[I64VALUE]] : i64})
 // CHECK-NEXT:   "test.openacc_dummy_op"() : () -> ()
 // CHECK-NEXT:   acc.yield
-// CHECK-NEXT: }
 // CHECK:      %{{.*}} = acc.cache varPtr(%{{.*}} : memref<10xf32>) -> memref<10xf32>
-// CHECK-NEXT: acc.loop cache(%{{.*}} : memref<10xf32>) {
+// CHECK-NEXT: acc.loop cache(%{{.*}} : memref<10xf32>)
 // CHECK-NEXT:   "test.openacc_dummy_op"() : () -> ()
 // CHECK-NEXT:   acc.yield
-// CHECK-NEXT: }
 
 // -----
 
 func.func @acc_loop_multiple_block() {
+  %c0 = arith.constant 0 : index
+  %c10 = arith.constant 10 : index
+  %c1 = arith.constant 1 : index
   acc.parallel {
-    acc.loop {
-      %c1 = arith.constant 1 : index
-      cf.br ^bb1(%c1 : index)
+    acc.loop  (%iv : index) = (%c0 : index) to (%c10 : index) step (%c1 : index) {
+      %c1_1 = arith.constant 1 : index
+      cf.br ^bb1(%c1_1 : index)
     ^bb1(%9: index):
-      %c0 = arith.constant 0 : index
-      %12 = arith.cmpi sgt, %9, %c0 : index
+      %c0_1 = arith.constant 0 : index
+      %12 = arith.cmpi sgt, %9, %c0_1 : index
       cf.cond_br %12, ^bb2, ^bb3
     ^bb2:
       %c1_0 = arith.constant 1 : index
-      %c10 = arith.constant 10 : index
-      %22 = arith.subi %c10, %c1_0 : index
+      %c10_1 = arith.constant 10 : index
+      %22 = arith.subi %c10_1, %c1_0 : index
       cf.br ^bb1(%22 : index)
     ^bb3:
       acc.yield
-    }
+    } attributes {inclusiveUpperbound = array<i1: true>}
     acc.yield
   }
   return
@@ -1509,10 +1471,13 @@ acc.reduction.recipe @reduction_add_i64 : i64 reduction_operator<add> init {
 // CHECK:       }
 
 func.func @acc_reduc_test(%a : i64) -> () {
+  %c0 = arith.constant 0 : index
+  %c10 = arith.constant 10 : index
+  %c1 = arith.constant 1 : index
   acc.parallel reduction(@reduction_add_i64 -> %a : i64) {
-    acc.loop reduction(@reduction_add_i64 -> %a : i64) {
+    acc.loop reduction(@reduction_add_i64 -> %a : i64) (%iv : index) = (%c0 : index) to (%c10 : index) step (%c1 : index) {
       acc.yield
-    }
+    } attributes { inclusiveUpperbound = array<i1: true> }
     acc.yield
   }
   return

>From 0b407faf727df8d2a64bdca8b15d1ffe7e533829 Mon Sep 17 00:00:00 2001
From: Valentin Clement <clementval at gmail.com>
Date: Fri, 19 Jan 2024 12:00:45 -0800
Subject: [PATCH 2/2] Add getBody

---
 mlir/include/mlir/Dialect/OpenACC/OpenACCOps.td | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/mlir/include/mlir/Dialect/OpenACC/OpenACCOps.td b/mlir/include/mlir/Dialect/OpenACC/OpenACCOps.td
index f7976b87ccac79..992f2809644a6a 100644
--- a/mlir/include/mlir/Dialect/OpenACC/OpenACCOps.td
+++ b/mlir/include/mlir/Dialect/OpenACC/OpenACCOps.td
@@ -1545,6 +1545,8 @@ def OpenACC_LoopOp : OpenACC_Op<"loop",
     /// The i-th data operand passed.
     Value getDataOperand(unsigned i);
 
+    Block &getBody() { return getLoopRegions().front()->front(); }
+
     /// Return true if the op has the auto attribute for the
     /// mlir::acc::DeviceType::None device_type.
     bool hasAuto();



More information about the Mlir-commits mailing list