[Mlir-commits] [mlir] [mlir][acc] Add loop tiling utilities for OpenACC (PR #171490)

Razvan Lupusoru llvmlistbot at llvm.org
Tue Dec 9 11:04:49 PST 2025


https://github.com/razvanlupusoru updated https://github.com/llvm/llvm-project/pull/171490

>From f0bc575e73a634f3058d6317ecca0ab6e0c4ebad Mon Sep 17 00:00:00 2001
From: Vijay Kandiah <vkandiah at nvidia.com>
Date: Tue, 9 Dec 2025 10:55:29 -0800
Subject: [PATCH 1/5] [mlir][acc] Add loop tiling utilities for OpenACC

Add utilities in OpenACCUtilsTiling.h/.cpp to support tiling
transformations on acc.loop operations:

- uncollapseLoops: Expand collapsed loops with multiple IVs into
  nested loop structures when tile count exceeds collapse count
- tileACCLoops: Transform loop nests into tile and element loops
  based on provided tile sizes, with automatic resolution of
  unknown tile sizes (tile(*) represented as -1)

These utilities prepare for the ACCLoopTiling pass which handles
the OpenACC loop tile directive.
---
 .../mlir/Dialect/OpenACC/OpenACCUtilsTiling.h |  83 +++++
 mlir/lib/Dialect/OpenACC/Utils/CMakeLists.txt |   3 +
 .../OpenACC/Utils/OpenACCUtilsTiling.cpp      | 313 ++++++++++++++++
 mlir/unittests/Dialect/OpenACC/CMakeLists.txt |   1 +
 .../OpenACC/OpenACCUtilsTilingTest.cpp        | 349 ++++++++++++++++++
 5 files changed, 749 insertions(+)
 create mode 100644 mlir/include/mlir/Dialect/OpenACC/OpenACCUtilsTiling.h
 create mode 100644 mlir/lib/Dialect/OpenACC/Utils/OpenACCUtilsTiling.cpp
 create mode 100644 mlir/unittests/Dialect/OpenACC/OpenACCUtilsTilingTest.cpp

diff --git a/mlir/include/mlir/Dialect/OpenACC/OpenACCUtilsTiling.h b/mlir/include/mlir/Dialect/OpenACC/OpenACCUtilsTiling.h
new file mode 100644
index 0000000000000..3152526cc0582
--- /dev/null
+++ b/mlir/include/mlir/Dialect/OpenACC/OpenACCUtilsTiling.h
@@ -0,0 +1,83 @@
+//===- OpenACCUtilsTiling.h - OpenACC Loop Tiling Utilities -----*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains utility functions for tiling OpenACC loops.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef MLIR_DIALECT_OPENACC_OPENACCUTILSTILING_H_
+#define MLIR_DIALECT_OPENACC_OPENACCUTILSTILING_H_
+
+#include "mlir/Dialect/OpenACC/OpenACC.h"
+#include "mlir/IR/PatternMatch.h"
+#include "llvm/ADT/SmallVector.h"
+
+namespace mlir {
+namespace acc {
+
+/// Uncollapse tile loops with multiple IVs and collapseCount < tileCount.
+/// This is used to prepare loops for tiling when the collapse count is less
+/// than the tile count.
+///
+/// \param origLoop The original loop operation to uncollapse.
+/// \param tileCount The number of tile dimensions.
+/// \param collapseCount The collapse count from the original loop.
+/// \param rewriter The rewriter to use for modifications.
+/// \return A vector of uncollapsed loop operations.
+llvm::SmallVector<mlir::acc::LoopOp>
+uncollapseLoops(mlir::acc::LoopOp origLoop, unsigned tileCount,
+                unsigned collapseCount, mlir::RewriterBase &rewriter);
+
+/// Tile ACC loops according to the given tile sizes.
+///
+/// Tiling a 2-level nested loop will create two 'tile' loops containing two
+/// 'element' loops. The transformation looks like:
+///
+/// Before Tiling:
+/// \code
+/// #pragma acc loop tile(tile_size1, tile_size2)
+///  for (i = lb1; i < ub1; i += step1) { // original loop
+///    for (j = lb2; j < ub2; j += step2) {
+///      a[i,j] = i + j;
+///    }
+///  }
+/// \endcode
+///
+/// After Tiling:
+/// \code
+///  for (i = lb1; i < ub1; i += (step1 * tile_size1)) { // tile loop 1
+///    for (j = lb2; j < ub2; j += (step2 * tile_size2)) { // tile loop 2
+///      for (ii = i; ii < min(ub1, (step1 * tile_size1) + i); ii += step1) {
+///      // element loop 1
+///        for (jj = j; jj < min(ub2, (step2 * tile_size2) + j); jj += step2)
+///        { // element loop 2
+///          a[ii,jj] = i + j;
+///        }
+///      }
+///    }
+///  }
+/// \endcode
+///
+/// Unknown tile sizes (represented as -1 in OpenACC for `tile(*)`) are
+/// resolved to the provided default tile size.
+///
+/// \param tileLoops The loops to tile (outermost first).
+/// \param tileSizes The tile sizes for each dimension. Values of -1 are
+///        treated as unknown and resolved to defaultTileSize.
+/// \param defaultTileSize The default tile size to use for unknown (*) tiles.
+/// \param rewriter The rewriter to use for modifications.
+/// \return The outermost loop after tiling.
+mlir::acc::LoopOp tileACCLoops(llvm::SmallVector<mlir::acc::LoopOp> &tileLoops,
+                               const llvm::SmallVector<mlir::Value> &tileSizes,
+                               int32_t defaultTileSize,
+                               mlir::RewriterBase &rewriter);
+
+} // namespace acc
+} // namespace mlir
+
+#endif // MLIR_DIALECT_OPENACC_OPENACCUTILSTILING_H_
diff --git a/mlir/lib/Dialect/OpenACC/Utils/CMakeLists.txt b/mlir/lib/Dialect/OpenACC/Utils/CMakeLists.txt
index 68e124625921f..c3de4f7e3e282 100644
--- a/mlir/lib/Dialect/OpenACC/Utils/CMakeLists.txt
+++ b/mlir/lib/Dialect/OpenACC/Utils/CMakeLists.txt
@@ -1,4 +1,5 @@
 add_mlir_dialect_library(MLIROpenACCUtils
+  OpenACCUtilsTiling.cpp
   OpenACCUtils.cpp
 
   ADDITIONAL_HEADER_DIRS
@@ -14,7 +15,9 @@ add_mlir_dialect_library(MLIROpenACCUtils
   MLIROpenACCTypeInterfacesIncGen
 
   LINK_LIBS PUBLIC
+  MLIRArithDialect
   MLIROpenACCDialect
   MLIRIR
   MLIRSupport
+  MLIRTransformUtils
 )
diff --git a/mlir/lib/Dialect/OpenACC/Utils/OpenACCUtilsTiling.cpp b/mlir/lib/Dialect/OpenACC/Utils/OpenACCUtilsTiling.cpp
new file mode 100644
index 0000000000000..f939ec1c58cfd
--- /dev/null
+++ b/mlir/lib/Dialect/OpenACC/Utils/OpenACCUtilsTiling.cpp
@@ -0,0 +1,313 @@
+//===- OpenACCUtilsTiling.cpp - OpenACC Loop Tiling Utilities -------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains utility functions for tiling OpenACC loops.
+//
+//===----------------------------------------------------------------------===//
+
+#include "mlir/Dialect/OpenACC/OpenACCUtilsTiling.h"
+
+#include "mlir/Dialect/Arith/IR/Arith.h"
+#include "mlir/Dialect/OpenACC/OpenACC.h"
+#include "mlir/Dialect/Utils/StaticValueUtils.h"
+#include "mlir/Transforms/RegionUtils.h"
+
+// Resolve unknown tile sizes (represented as -1 for tile(*)) to the default.
+static mlir::Value resolveUnknownTileSize(mlir::Value tileSize,
+                                          int32_t defaultTileSize,
+                                          mlir::RewriterBase &rewriter,
+                                          mlir::Location loc) {
+  auto constVal = mlir::getConstantIntValue(tileSize);
+  if (constVal && *constVal < 0) {
+    return mlir::arith::ConstantOp::create(
+        rewriter, loc, rewriter.getI32Type(),
+        rewriter.getI32IntegerAttr(defaultTileSize));
+  }
+  return tileSize;
+}
+
+// Remove vector/worker attributes from loop
+static void removeWorkerVectorFromLoop(mlir::acc::LoopOp loop) {
+  if (loop.hasVector() || loop.getVectorValue()) {
+    loop.removeVectorAttr();
+    loop.removeVectorOperandsDeviceTypeAttr();
+  } else if (loop.hasWorker() || loop.getWorkerValue()) {
+    loop.removeWorkerAttr();
+    loop.removeWorkerNumOperandsDeviceTypeAttr();
+  }
+}
+
+// Create a new ACC loop with new steps, lb, ub from original loop
+static mlir::acc::LoopOp
+createACCLoopFromOriginal(mlir::acc::LoopOp origLoop,
+                          mlir::RewriterBase &rewriter, mlir::ValueRange lb,
+                          mlir::ValueRange ub, mlir::ValueRange step,
+                          mlir::DenseBoolArrayAttr inclusiveUBAttr,
+                          mlir::acc::CombinedConstructsTypeAttr combinedAttr,
+                          mlir::Location loc, bool preserveCollapse) {
+  mlir::ArrayAttr collapseAttr = mlir::ArrayAttr{};
+  mlir::ArrayAttr collapseDeviceTypeAttr = mlir::ArrayAttr{};
+  if (preserveCollapse) {
+    collapseAttr = origLoop.getCollapseAttr();
+    collapseDeviceTypeAttr = origLoop.getCollapseDeviceTypeAttr();
+  }
+  auto newLoop = mlir::acc::LoopOp::create(
+      rewriter, loc, origLoop->getResultTypes(), lb, ub, step, inclusiveUBAttr,
+      collapseAttr, collapseDeviceTypeAttr, origLoop.getGangOperands(),
+      origLoop.getGangOperandsArgTypeAttr(),
+      origLoop.getGangOperandsSegmentsAttr(),
+      origLoop.getGangOperandsDeviceTypeAttr(), origLoop.getWorkerNumOperands(),
+      origLoop.getWorkerNumOperandsDeviceTypeAttr(),
+      origLoop.getVectorOperands(), origLoop.getVectorOperandsDeviceTypeAttr(),
+      origLoop.getSeqAttr(), origLoop.getIndependentAttr(),
+      origLoop.getAuto_Attr(), origLoop.getGangAttr(), origLoop.getWorkerAttr(),
+      origLoop.getVectorAttr(), mlir::ValueRange{}, mlir::DenseI32ArrayAttr{},
+      mlir::ArrayAttr{}, origLoop.getCacheOperands(),
+      origLoop.getPrivateOperands(), origLoop.getFirstprivateOperands(),
+      origLoop.getReductionOperands(), combinedAttr);
+  return newLoop;
+}
+
+// Create inner loop inside input loop
+static mlir::acc::LoopOp
+createInnerLoop(mlir::acc::LoopOp inputLoop, mlir::RewriterBase &rewriter,
+                mlir::ValueRange lb, mlir::ValueRange ub, mlir::ValueRange step,
+                mlir::DenseBoolArrayAttr inclusiveUBAttr, mlir::Location loc) {
+  mlir::acc::LoopOp elementLoop = createACCLoopFromOriginal(
+      inputLoop, rewriter, lb, ub, step, inclusiveUBAttr,
+      mlir::acc::CombinedConstructsTypeAttr{}, loc, /*preserveCollapse*/ false);
+
+  // Remove gang/worker attributes from inner loops
+  rewriter.startOpModification(elementLoop);
+  if (inputLoop.hasGang() ||
+      inputLoop.getGangValue(mlir::acc::GangArgType::Num) ||
+      inputLoop.getGangValue(mlir::acc::GangArgType::Dim) ||
+      inputLoop.getGangValue(mlir::acc::GangArgType::Static)) {
+    elementLoop.removeGangAttr();
+    elementLoop.removeGangOperandsArgTypeAttr();
+    elementLoop.removeGangOperandsSegmentsAttr();
+    elementLoop.removeGangOperandsDeviceTypeAttr();
+  }
+  if (inputLoop.hasVector() || inputLoop.getVectorValue()) {
+    elementLoop.removeWorkerAttr();
+    elementLoop.removeWorkerNumOperandsDeviceTypeAttr();
+  }
+  rewriter.finalizeOpModification(elementLoop);
+
+  // Create empty block in elementLoop and add IV argument
+  mlir::Block *blk = rewriter.createBlock(&elementLoop.getRegion(),
+                                          elementLoop.getRegion().begin());
+  rewriter.setInsertionPointToEnd(blk);
+  mlir::acc::YieldOp::create(rewriter, loc);
+  elementLoop.getBody().addArgument(
+      inputLoop.getBody().getArgument(0).getType(), loc);
+
+  return elementLoop;
+}
+
+// Move ops from source to target Loop and replace uses of IVs
+static void moveOpsAndReplaceIVs(mlir::acc::LoopOp sourceLoop,
+                                 mlir::acc::LoopOp targetLoop,
+                                 llvm::ArrayRef<mlir::Value> newIVs,
+                                 llvm::ArrayRef<mlir::Value> origIVs,
+                                 size_t nOps, mlir::RewriterBase &rewriter) {
+  // Move ops from source to target loop [begin, begin + nOps - 1)
+  mlir::Block::iterator begin = sourceLoop.getBody().begin();
+  targetLoop.getBody().getOperations().splice(
+      targetLoop.getBody().getOperations().begin(),
+      sourceLoop.getBody().getOperations(), begin, std::next(begin, nOps - 1));
+
+  // Replace uses of origIV with newIV
+  for (auto [i, newIV] : llvm::enumerate(newIVs))
+    mlir::replaceAllUsesInRegionWith(origIVs[i], newIV, targetLoop.getRegion());
+}
+
+mlir::acc::LoopOp
+mlir::acc::tileACCLoops(llvm::SmallVector<mlir::acc::LoopOp> &tileLoops,
+                        const llvm::SmallVector<mlir::Value> &tileSizes,
+                        int32_t defaultTileSize, mlir::RewriterBase &rewriter) {
+  // Tile collapsed and/or nested loops
+  mlir::acc::LoopOp outerLoop = tileLoops[0];
+  const mlir::Location loc = outerLoop.getLoc();
+
+  // Resolve unknown tile sizes (tile(*) represented as -1)
+  llvm::SmallVector<mlir::Value> resolvedTileSizes;
+  rewriter.setInsertionPoint(outerLoop);
+  for (mlir::Value tileSize : tileSizes) {
+    resolvedTileSizes.push_back(
+        resolveUnknownTileSize(tileSize, defaultTileSize, rewriter, loc));
+  }
+
+  mlir::acc::LoopOp innerLoop = tileLoops[tileLoops.size() - 1];
+  llvm::SmallVector<mlir::Value, 3> origIVs;
+  llvm::SmallVector<mlir::Value, 3> origSteps;
+  llvm::SmallVector<mlir::Value, 3> origUBs;
+  llvm::SmallVector<mlir::Value, 3> newSteps;
+  llvm::SmallVector<mlir::Value, 3> newUBs;
+  llvm::SmallVector<mlir::Value, 3> newIVs;
+  size_t nOps = innerLoop.getBody().getOperations().size();
+
+  // Extract original inclusiveUBs
+  llvm::SmallVector<bool> inclusiveUBs;
+  for (auto tileLoop : tileLoops) {
+    for (auto [j, step] : llvm::enumerate(tileLoop.getStep())) {
+      // inclusiveUBs are present on the IR from Fortran frontend for DO loops
+      // but might not be present from other frontends (python)
+      // So check if it exists
+      if (tileLoop.getInclusiveUpperboundAttr())
+        inclusiveUBs.push_back(
+            tileLoop.getInclusiveUpperboundAttr().asArrayRef()[j]);
+      else
+        inclusiveUBs.push_back(false);
+    }
+  }
+
+  // Extract original ivs, UBs, steps, and calculate new steps
+  rewriter.setInsertionPoint(outerLoop);
+  for (auto [i, tileLoop] : llvm::enumerate(tileLoops)) {
+    for (auto arg : tileLoop.getBody().getArguments())
+      origIVs.push_back(arg);
+    for (auto ub : tileLoop.getUpperbound())
+      origUBs.push_back(ub);
+
+    llvm::SmallVector<mlir::Value, 3> currentLoopSteps;
+    for (auto [j, step] : llvm::enumerate(tileLoop.getStep())) {
+      origSteps.push_back(step);
+      if (i + j >= resolvedTileSizes.size()) {
+        currentLoopSteps.push_back(step);
+      } else {
+        mlir::Value tileSize = resolvedTileSizes[i + j];
+        auto newLoopStep =
+            mlir::arith::MulIOp::create(rewriter, loc, step, tileSize);
+        currentLoopSteps.push_back(newLoopStep);
+        newSteps.push_back(newLoopStep);
+      }
+    }
+
+    rewriter.startOpModification(tileLoop);
+    tileLoop.getStepMutable().clear();
+    tileLoop.getStepMutable().append(currentLoopSteps);
+    rewriter.finalizeOpModification(tileLoop);
+  }
+
+  // Calculate new upper bounds for element loops
+  for (size_t i = 0; i < newSteps.size(); i++) {
+    rewriter.setInsertionPoint(innerLoop.getBody().getTerminator());
+    // UpperBound: min(origUB, origIV+(originalStep*tile_size))
+    auto stepped =
+        mlir::arith::AddIOp::create(rewriter, loc, origIVs[i], newSteps[i]);
+    mlir::Value newUB = stepped;
+    if (inclusiveUBs[i]) {
+      // Handle InclusiveUB
+      // UpperBound: min(origUB, origIV+(originalStep*tile_size - 1))
+      auto c1 = mlir::arith::ConstantOp::create(
+          rewriter, loc, newSteps[i].getType(),
+          rewriter.getIntegerAttr(newSteps[i].getType(), 1));
+      newUB = mlir::arith::SubIOp::create(rewriter, loc, stepped, c1);
+    }
+    newUBs.push_back(
+        mlir::arith::MinSIOp::create(rewriter, loc, origUBs[i], newUB));
+  }
+
+  // Create and insert nested elementLoopOps before terminator of outer loopOp
+  mlir::acc::LoopOp currentLoop = innerLoop;
+  for (size_t i = 0; i < resolvedTileSizes.size(); i++) {
+    rewriter.setInsertionPoint(currentLoop.getBody().getTerminator());
+    mlir::DenseBoolArrayAttr inclusiveUBAttr = mlir::DenseBoolArrayAttr{};
+    if (inclusiveUBs[i])
+      inclusiveUBAttr = rewriter.getDenseBoolArrayAttr({true});
+
+    mlir::acc::LoopOp elementLoop =
+        createInnerLoop(innerLoop, rewriter, mlir::ValueRange{origIVs[i]},
+                        mlir::ValueRange{newUBs[i]},
+                        mlir::ValueRange{origSteps[i]}, inclusiveUBAttr, loc);
+
+    // Remove vector/worker attributes from inner element loops except
+    // outermost element loop
+    if (i > 0) {
+      rewriter.startOpModification(elementLoop);
+      removeWorkerVectorFromLoop(elementLoop);
+      rewriter.finalizeOpModification(elementLoop);
+    }
+    newIVs.push_back(elementLoop.getBody().getArgument(0));
+    currentLoop = elementLoop;
+  }
+
+  // Remove vector/worker attributes from outer tile loops
+  for (auto tileLoop : tileLoops) {
+    rewriter.startOpModification(tileLoop);
+    removeWorkerVectorFromLoop(tileLoop);
+    rewriter.finalizeOpModification(tileLoop);
+  }
+
+  // Move ops from inner tile loop to inner element loop and replace IV uses
+  moveOpsAndReplaceIVs(innerLoop, currentLoop, newIVs, origIVs, nOps, rewriter);
+
+  return outerLoop;
+}
+
+llvm::SmallVector<mlir::acc::LoopOp>
+mlir::acc::uncollapseLoops(mlir::acc::LoopOp origLoop, unsigned tileCount,
+                           unsigned collapseCount,
+                           mlir::RewriterBase &rewriter) {
+  llvm::SmallVector<mlir::acc::LoopOp, 3> newLoops;
+  llvm::SmallVector<mlir::Value, 3> newIVs;
+  mlir::Location loc = origLoop.getLoc();
+  llvm::SmallVector<bool> newInclusiveUBs;
+  llvm::SmallVector<mlir::Value, 3> lbs, ubs, steps;
+  for (unsigned i = 0; i < collapseCount; i++) {
+    // inclusiveUpperbound attribute might not be set, default to false
+    bool inclusiveUB = false;
+    if (origLoop.getInclusiveUpperboundAttr())
+      inclusiveUB = origLoop.getInclusiveUpperboundAttr().asArrayRef()[i];
+    newInclusiveUBs.push_back(inclusiveUB);
+    lbs.push_back(origLoop.getLowerbound()[i]);
+    ubs.push_back(origLoop.getUpperbound()[i]);
+    steps.push_back(origLoop.getStep()[i]);
+  }
+  mlir::acc::LoopOp outerLoop = createACCLoopFromOriginal(
+      origLoop, rewriter, lbs, ubs, steps,
+      rewriter.getDenseBoolArrayAttr(newInclusiveUBs),
+      origLoop.getCombinedAttr(), loc, /*preserveCollapse*/ true);
+  mlir::Block *blk = rewriter.createBlock(&outerLoop.getRegion(),
+                                          outerLoop.getRegion().begin());
+  rewriter.setInsertionPointToEnd(blk);
+  mlir::acc::YieldOp::create(rewriter, loc);
+  for (unsigned i = 0; i < collapseCount; i++) {
+    outerLoop.getBody().addArgument(origLoop.getBody().getArgument(i).getType(),
+                                    loc);
+    newIVs.push_back(outerLoop.getBody().getArgument(i));
+  }
+  newLoops.push_back(outerLoop);
+
+  mlir::acc::LoopOp currentLoopOp = outerLoop;
+  for (unsigned i = collapseCount; i < tileCount; i++) {
+    rewriter.setInsertionPoint(currentLoopOp.getBody().getTerminator());
+    bool inclusiveUB = false;
+    if (origLoop.getInclusiveUpperboundAttr())
+      inclusiveUB = origLoop.getInclusiveUpperboundAttr().asArrayRef()[i];
+    mlir::DenseBoolArrayAttr inclusiveUBAttr =
+        rewriter.getDenseBoolArrayAttr({inclusiveUB});
+    mlir::acc::LoopOp innerLoop = createInnerLoop(
+        origLoop, rewriter, mlir::ValueRange{origLoop.getLowerbound()[i]},
+        mlir::ValueRange{origLoop.getUpperbound()[i]},
+        mlir::ValueRange{origLoop.getStep()[i]}, inclusiveUBAttr, loc);
+    newIVs.push_back(innerLoop.getBody().getArgument(0));
+    newLoops.push_back(innerLoop);
+    currentLoopOp = innerLoop;
+  }
+  // Move ops from origLoop to innermost loop and replace uses of IVs
+  size_t nOps = origLoop.getBody().getOperations().size();
+  llvm::SmallVector<mlir::Value, 3> origIVs;
+  for (auto arg : origLoop.getBody().getArguments())
+    origIVs.push_back(arg);
+  moveOpsAndReplaceIVs(origLoop, currentLoopOp, newIVs, origIVs, nOps,
+                       rewriter);
+
+  return newLoops;
+}
diff --git a/mlir/unittests/Dialect/OpenACC/CMakeLists.txt b/mlir/unittests/Dialect/OpenACC/CMakeLists.txt
index c8c2bb96b0539..060c8b8d2679d 100644
--- a/mlir/unittests/Dialect/OpenACC/CMakeLists.txt
+++ b/mlir/unittests/Dialect/OpenACC/CMakeLists.txt
@@ -2,6 +2,7 @@ add_mlir_unittest(MLIROpenACCTests
   OpenACCOpsTest.cpp
   OpenACCOpsInterfacesTest.cpp
   OpenACCUtilsTest.cpp
+  OpenACCUtilsTilingTest.cpp
 )
 mlir_target_link_libraries(MLIROpenACCTests
   PRIVATE
diff --git a/mlir/unittests/Dialect/OpenACC/OpenACCUtilsTilingTest.cpp b/mlir/unittests/Dialect/OpenACC/OpenACCUtilsTilingTest.cpp
new file mode 100644
index 0000000000000..287af9fafd5b7
--- /dev/null
+++ b/mlir/unittests/Dialect/OpenACC/OpenACCUtilsTilingTest.cpp
@@ -0,0 +1,349 @@
+//===- OpenACCUtilsTilingTest.cpp - Unit tests for loop tiling utilities --===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "mlir/Dialect/OpenACC/OpenACCUtilsTiling.h"
+#include "mlir/Dialect/Arith/IR/Arith.h"
+#include "mlir/Dialect/Func/IR/FuncOps.h"
+#include "mlir/Dialect/MemRef/IR/MemRef.h"
+#include "mlir/Dialect/OpenACC/OpenACC.h"
+#include "mlir/IR/BuiltinOps.h"
+#include "mlir/IR/BuiltinTypes.h"
+#include "mlir/IR/MLIRContext.h"
+#include "mlir/IR/OwningOpRef.h"
+#include "gtest/gtest.h"
+
+using namespace mlir;
+using namespace mlir::acc;
+
+//===----------------------------------------------------------------------===//
+// Test Fixture
+//===----------------------------------------------------------------------===//
+
+class OpenACCUtilsTilingTest : public ::testing::Test {
+protected:
+  OpenACCUtilsTilingTest() : b(&context), loc(UnknownLoc::get(&context)) {
+    context.loadDialect<acc::OpenACCDialect, arith::ArithDialect,
+                        memref::MemRefDialect, func::FuncDialect>();
+  }
+
+  // Create a simple LoopOp with specified bounds using the simple builder
+  acc::LoopOp createLoopOp(OpBuilder &builder, ValueRange lbs, ValueRange ubs,
+                           ValueRange steps) {
+    auto loopOp = acc::LoopOp::create(builder, loc, lbs, ubs, steps,
+                                      acc::LoopParMode::loop_independent);
+
+    // Add body block with IV arguments and yield
+    Region &region = loopOp.getRegion();
+    Block *block = builder.createBlock(&region, region.begin());
+    for (Value lb : lbs)
+      block->addArgument(lb.getType(), loc);
+    builder.setInsertionPointToEnd(block);
+    acc::YieldOp::create(builder, loc);
+
+    return loopOp;
+  }
+
+  // Helper to count nested acc.loop ops within a loop
+  unsigned countNestedLoops(acc::LoopOp loop) {
+    unsigned count = 0;
+    loop.getBody().walk([&](acc::LoopOp) { ++count; });
+    return count;
+  }
+
+  // Helper to collect all nested acc.loop ops in order
+  SmallVector<acc::LoopOp> collectNestedLoops(acc::LoopOp loop) {
+    SmallVector<acc::LoopOp> loops;
+    loop.getBody().walk([&](acc::LoopOp nestedLoop) {
+      loops.push_back(nestedLoop);
+    });
+    return loops;
+  }
+
+  MLIRContext context;
+  OpBuilder b;
+  Location loc;
+};
+
+//===----------------------------------------------------------------------===//
+// tileACCLoops Tests
+//===----------------------------------------------------------------------===//
+
+TEST_F(OpenACCUtilsTilingTest, tileACCLoopsSingleLoop) {
+  // Create a module to hold the function
+  OwningOpRef<ModuleOp> module = ModuleOp::create(loc);
+  Block *moduleBlock = module->getBody();
+
+  OpBuilder::InsertionGuard guard(b);
+  b.setInsertionPointToStart(moduleBlock);
+
+  // Create a function
+  auto funcType = b.getFunctionType({}, {});
+  OwningOpRef<func::FuncOp> funcOp =
+      func::FuncOp::create(b, loc, "test_func", funcType);
+  Block *funcBlock = funcOp->addEntryBlock();
+
+  b.setInsertionPointToStart(funcBlock);
+
+  // Create loop bounds
+  Value lb =
+      arith::ConstantOp::create(b, loc, b.getIndexType(), b.getIndexAttr(0));
+  Value ub =
+      arith::ConstantOp::create(b, loc, b.getIndexType(), b.getIndexAttr(100));
+  Value step =
+      arith::ConstantOp::create(b, loc, b.getIndexType(), b.getIndexAttr(1));
+  Value tileSize =
+      arith::ConstantOp::create(b, loc, b.getIndexType(), b.getIndexAttr(4));
+
+  // Create the loop
+  acc::LoopOp loopOp = createLoopOp(b, {lb}, {ub}, {step});
+
+  // Tile the loop using IRRewriter
+  IRRewriter rewriter(&context);
+  rewriter.setInsertionPoint(loopOp);
+
+  SmallVector<acc::LoopOp> loopsToTile = {loopOp};
+  SmallVector<Value> tileSizes = {tileSize};
+
+  acc::LoopOp tiledLoop = tileACCLoops(loopsToTile, tileSizes, /*defaultTileSize=*/128, rewriter);
+
+  // Verify the tiled loop was created
+  EXPECT_TRUE(tiledLoop != nullptr);
+  EXPECT_FALSE(tiledLoop.getBody().empty());
+
+  // After tiling a single loop with tile(4), we should have:
+  // - 1 tile loop (the outer loop)
+  // - 1 element loop nested inside
+  // Total: 1 nested loop inside the tile loop
+  EXPECT_EQ(countNestedLoops(tiledLoop), 1u);
+
+  // The tile loop (outer) should have 1 IV
+  EXPECT_EQ(tiledLoop.getBody().getNumArguments(), 1u);
+
+  // Collect nested loops and verify
+  auto nestedLoops = collectNestedLoops(tiledLoop);
+  EXPECT_EQ(nestedLoops.size(), 1u);
+  if (!nestedLoops.empty()) {
+    // The element loop should have 1 IV
+    EXPECT_EQ(nestedLoops[0].getBody().getNumArguments(), 1u);
+  }
+}
+
+TEST_F(OpenACCUtilsTilingTest, tileACCLoopsNestedLoops) {
+  // Create a module to hold the function
+  OwningOpRef<ModuleOp> module = ModuleOp::create(loc);
+  Block *moduleBlock = module->getBody();
+
+  OpBuilder::InsertionGuard guard(b);
+  b.setInsertionPointToStart(moduleBlock);
+
+  // Create a function
+  auto funcType = b.getFunctionType({}, {});
+  OwningOpRef<func::FuncOp> funcOp =
+      func::FuncOp::create(b, loc, "test_func", funcType);
+  Block *funcBlock = funcOp->addEntryBlock();
+
+  b.setInsertionPointToStart(funcBlock);
+
+  // Create loop bounds for outer loop
+  Value lb1 =
+      arith::ConstantOp::create(b, loc, b.getIndexType(), b.getIndexAttr(0));
+  Value ub1 =
+      arith::ConstantOp::create(b, loc, b.getIndexType(), b.getIndexAttr(100));
+  Value step1 =
+      arith::ConstantOp::create(b, loc, b.getIndexType(), b.getIndexAttr(1));
+
+  // Create loop bounds for inner loop
+  Value lb2 =
+      arith::ConstantOp::create(b, loc, b.getIndexType(), b.getIndexAttr(0));
+  Value ub2 =
+      arith::ConstantOp::create(b, loc, b.getIndexType(), b.getIndexAttr(50));
+  Value step2 =
+      arith::ConstantOp::create(b, loc, b.getIndexType(), b.getIndexAttr(1));
+
+  // Tile sizes
+  Value tileSize1 =
+      arith::ConstantOp::create(b, loc, b.getIndexType(), b.getIndexAttr(4));
+  Value tileSize2 =
+      arith::ConstantOp::create(b, loc, b.getIndexType(), b.getIndexAttr(8));
+
+  // Create outer loop
+  acc::LoopOp outerLoop = createLoopOp(b, {lb1}, {ub1}, {step1});
+
+  // Create inner loop inside outer loop
+  b.setInsertionPoint(outerLoop.getBody().getTerminator());
+  acc::LoopOp innerLoop = createLoopOp(b, {lb2}, {ub2}, {step2});
+
+  // Tile the loops
+  IRRewriter rewriter(&context);
+  rewriter.setInsertionPoint(outerLoop);
+
+  SmallVector<acc::LoopOp> loopsToTile = {outerLoop, innerLoop};
+  SmallVector<Value> tileSizes = {tileSize1, tileSize2};
+
+  acc::LoopOp tiledLoop = tileACCLoops(loopsToTile, tileSizes, /*defaultTileSize=*/128, rewriter);
+
+  // Verify the tiled loop nest was created
+  EXPECT_TRUE(tiledLoop != nullptr);
+  EXPECT_FALSE(tiledLoop.getBody().empty());
+
+  // After tiling a 2-level nested loop with tile(4,8), we should have:
+  // tile_loop_1 -> tile_loop_2 -> element_loop_1 -> element_loop_2
+  // Total: 3 nested loops inside the outermost tile loop
+  unsigned nestedCount = countNestedLoops(tiledLoop);
+  EXPECT_EQ(nestedCount, 3u);
+
+  // The outermost tile loop should have 1 IV
+  EXPECT_EQ(tiledLoop.getBody().getNumArguments(), 1u);
+
+  // Collect all nested loops and verify each has 1 IV
+  auto nestedLoops = collectNestedLoops(tiledLoop);
+  EXPECT_EQ(nestedLoops.size(), 3u);
+  for (auto loop : nestedLoops) {
+    EXPECT_EQ(loop.getBody().getNumArguments(), 1u);
+  }
+}
+
+//===----------------------------------------------------------------------===//
+// uncollapseLoops Tests
+//===----------------------------------------------------------------------===//
+
+TEST_F(OpenACCUtilsTilingTest, uncollapseLoopsBasic) {
+  // Create a module to hold the function
+  OwningOpRef<ModuleOp> module = ModuleOp::create(loc);
+  Block *moduleBlock = module->getBody();
+
+  OpBuilder::InsertionGuard guard(b);
+  b.setInsertionPointToStart(moduleBlock);
+
+  // Create a function
+  auto funcType = b.getFunctionType({}, {});
+  OwningOpRef<func::FuncOp> funcOp =
+      func::FuncOp::create(b, loc, "test_func", funcType);
+  Block *funcBlock = funcOp->addEntryBlock();
+
+  b.setInsertionPointToStart(funcBlock);
+
+  // Create loop bounds for a collapsed 2-level loop
+  Value lb1 =
+      arith::ConstantOp::create(b, loc, b.getIndexType(), b.getIndexAttr(0));
+  Value ub1 =
+      arith::ConstantOp::create(b, loc, b.getIndexType(), b.getIndexAttr(10));
+  Value step1 =
+      arith::ConstantOp::create(b, loc, b.getIndexType(), b.getIndexAttr(1));
+  Value lb2 =
+      arith::ConstantOp::create(b, loc, b.getIndexType(), b.getIndexAttr(0));
+  Value ub2 =
+      arith::ConstantOp::create(b, loc, b.getIndexType(), b.getIndexAttr(20));
+  Value step2 =
+      arith::ConstantOp::create(b, loc, b.getIndexType(), b.getIndexAttr(1));
+
+  // Create a collapsed loop with 2 IVs
+  acc::LoopOp collapsedLoop =
+      createLoopOp(b, {lb1, lb2}, {ub1, ub2}, {step1, step2});
+
+  // Set the collapse attribute
+  collapsedLoop.setCollapseForDeviceTypes(&context, {acc::DeviceType::None},
+                                          llvm::APInt(64, 1));
+
+  // Uncollapse the loop: tileCount=2, collapseCount=1
+  IRRewriter rewriter(&context);
+  rewriter.setInsertionPoint(collapsedLoop);
+
+  SmallVector<acc::LoopOp> uncollapsedLoops = uncollapseLoops(
+      collapsedLoop, /*tileCount=*/2, /*collapseCount=*/1, rewriter);
+
+  // Should produce 2 loops (one outer with collapse=1, one inner)
+  EXPECT_EQ(uncollapsedLoops.size(), 2u);
+
+  if (uncollapsedLoops.size() >= 2) {
+    // Verify the outer loop has 1 IV (collapseCount=1)
+    acc::LoopOp outerLoop = uncollapsedLoops[0];
+    EXPECT_EQ(outerLoop.getBody().getNumArguments(), 1u);
+    EXPECT_EQ(outerLoop.getLowerbound().size(), 1u);
+    EXPECT_EQ(outerLoop.getUpperbound().size(), 1u);
+    EXPECT_EQ(outerLoop.getStep().size(), 1u);
+
+    // Verify the inner loop has 1 IV
+    acc::LoopOp innerLoop = uncollapsedLoops[1];
+    EXPECT_EQ(innerLoop.getBody().getNumArguments(), 1u);
+    EXPECT_EQ(innerLoop.getLowerbound().size(), 1u);
+    EXPECT_EQ(innerLoop.getUpperbound().size(), 1u);
+    EXPECT_EQ(innerLoop.getStep().size(), 1u);
+
+    // Verify nesting: inner loop should be inside outer loop
+    unsigned nestedCount = countNestedLoops(outerLoop);
+    EXPECT_EQ(nestedCount, 1u);
+  }
+}
+
+TEST_F(OpenACCUtilsTilingTest, uncollapseLoopsThreeLevels) {
+  // Test uncollapsing with 3 levels: collapse(2) with tile(3)
+  OwningOpRef<ModuleOp> module = ModuleOp::create(loc);
+  Block *moduleBlock = module->getBody();
+
+  OpBuilder::InsertionGuard guard(b);
+  b.setInsertionPointToStart(moduleBlock);
+
+  auto funcType = b.getFunctionType({}, {});
+  OwningOpRef<func::FuncOp> funcOp =
+      func::FuncOp::create(b, loc, "test_func", funcType);
+  Block *funcBlock = funcOp->addEntryBlock();
+
+  b.setInsertionPointToStart(funcBlock);
+
+  // Create 3 sets of bounds
+  Value lb1 =
+      arith::ConstantOp::create(b, loc, b.getIndexType(), b.getIndexAttr(0));
+  Value ub1 =
+      arith::ConstantOp::create(b, loc, b.getIndexType(), b.getIndexAttr(10));
+  Value step1 =
+      arith::ConstantOp::create(b, loc, b.getIndexType(), b.getIndexAttr(1));
+  Value lb2 =
+      arith::ConstantOp::create(b, loc, b.getIndexType(), b.getIndexAttr(0));
+  Value ub2 =
+      arith::ConstantOp::create(b, loc, b.getIndexType(), b.getIndexAttr(20));
+  Value step2 =
+      arith::ConstantOp::create(b, loc, b.getIndexType(), b.getIndexAttr(1));
+  Value lb3 =
+      arith::ConstantOp::create(b, loc, b.getIndexType(), b.getIndexAttr(0));
+  Value ub3 =
+      arith::ConstantOp::create(b, loc, b.getIndexType(), b.getIndexAttr(30));
+  Value step3 =
+      arith::ConstantOp::create(b, loc, b.getIndexType(), b.getIndexAttr(1));
+
+  // Create a collapsed loop with 3 IVs
+  acc::LoopOp collapsedLoop = createLoopOp(b, {lb1, lb2, lb3}, {ub1, ub2, ub3},
+                                           {step1, step2, step3});
+
+  // Set collapse(2)
+  collapsedLoop.setCollapseForDeviceTypes(&context, {acc::DeviceType::None},
+                                          llvm::APInt(64, 2));
+
+  // Uncollapse: tileCount=3, collapseCount=2
+  // This should create: outer loop with 2 IVs, then 1 inner loop
+  IRRewriter rewriter(&context);
+  rewriter.setInsertionPoint(collapsedLoop);
+
+  SmallVector<acc::LoopOp> uncollapsedLoops = uncollapseLoops(
+      collapsedLoop, /*tileCount=*/3, /*collapseCount=*/2, rewriter);
+
+  // Should produce 2 loops
+  EXPECT_EQ(uncollapsedLoops.size(), 2u);
+
+  if (uncollapsedLoops.size() >= 2) {
+    // Outer loop should have 2 IVs (from collapse=2)
+    acc::LoopOp outerLoop = uncollapsedLoops[0];
+    EXPECT_EQ(outerLoop.getBody().getNumArguments(), 2u);
+    EXPECT_EQ(outerLoop.getLowerbound().size(), 2u);
+
+    // Inner loop should have 1 IV (the 3rd dimension)
+    acc::LoopOp innerLoop = uncollapsedLoops[1];
+    EXPECT_EQ(innerLoop.getBody().getNumArguments(), 1u);
+    EXPECT_EQ(innerLoop.getLowerbound().size(), 1u);
+  }
+}

>From ec1c2946c4bc73c7ff84120342dc25f4010760ef Mon Sep 17 00:00:00 2001
From: Razvan Lupusoru <rlupusoru at nvidia.com>
Date: Tue, 9 Dec 2025 11:01:44 -0800
Subject: [PATCH 2/5] Fix braces

---
 mlir/lib/Dialect/OpenACC/Utils/OpenACCUtilsTiling.cpp | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/mlir/lib/Dialect/OpenACC/Utils/OpenACCUtilsTiling.cpp b/mlir/lib/Dialect/OpenACC/Utils/OpenACCUtilsTiling.cpp
index f939ec1c58cfd..afd1a32e5087b 100644
--- a/mlir/lib/Dialect/OpenACC/Utils/OpenACCUtilsTiling.cpp
+++ b/mlir/lib/Dialect/OpenACC/Utils/OpenACCUtilsTiling.cpp
@@ -23,11 +23,10 @@ static mlir::Value resolveUnknownTileSize(mlir::Value tileSize,
                                           mlir::RewriterBase &rewriter,
                                           mlir::Location loc) {
   auto constVal = mlir::getConstantIntValue(tileSize);
-  if (constVal && *constVal < 0) {
+  if (constVal && *constVal < 0)
     return mlir::arith::ConstantOp::create(
         rewriter, loc, rewriter.getI32Type(),
         rewriter.getI32IntegerAttr(defaultTileSize));
-  }
   return tileSize;
 }
 

>From 71f5758d6ce0901726b4d60dbace77455f3360aa Mon Sep 17 00:00:00 2001
From: Razvan Lupusoru <rlupusoru at nvidia.com>
Date: Tue, 9 Dec 2025 11:02:07 -0800
Subject: [PATCH 3/5] Fix API comment

---
 mlir/include/mlir/Dialect/OpenACC/OpenACCUtilsTiling.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/mlir/include/mlir/Dialect/OpenACC/OpenACCUtilsTiling.h b/mlir/include/mlir/Dialect/OpenACC/OpenACCUtilsTiling.h
index 3152526cc0582..6fcb706aa3488 100644
--- a/mlir/include/mlir/Dialect/OpenACC/OpenACCUtilsTiling.h
+++ b/mlir/include/mlir/Dialect/OpenACC/OpenACCUtilsTiling.h
@@ -63,7 +63,7 @@ uncollapseLoops(mlir::acc::LoopOp origLoop, unsigned tileCount,
 ///  }
 /// \endcode
 ///
-/// Unknown tile sizes (represented as -1 in OpenACC for `tile(*)`) are
+/// Unknown tile sizes (represented as -1 in acc dialect for `tile(*)`) are
 /// resolved to the provided default tile size.
 ///
 /// \param tileLoops The loops to tile (outermost first).

>From 32c12cb27f38b7013bdc02b13a01906e87746d03 Mon Sep 17 00:00:00 2001
From: Razvan Lupusoru <rlupusoru at nvidia.com>
Date: Tue, 9 Dec 2025 11:02:59 -0800
Subject: [PATCH 4/5] Fix formatting of test

---
 .../Dialect/OpenACC/OpenACCUtilsTilingTest.cpp    | 15 ++++++++-------
 1 file changed, 8 insertions(+), 7 deletions(-)

diff --git a/mlir/unittests/Dialect/OpenACC/OpenACCUtilsTilingTest.cpp b/mlir/unittests/Dialect/OpenACC/OpenACCUtilsTilingTest.cpp
index 287af9fafd5b7..07f2ca67d43bc 100644
--- a/mlir/unittests/Dialect/OpenACC/OpenACCUtilsTilingTest.cpp
+++ b/mlir/unittests/Dialect/OpenACC/OpenACCUtilsTilingTest.cpp
@@ -58,9 +58,8 @@ class OpenACCUtilsTilingTest : public ::testing::Test {
   // Helper to collect all nested acc.loop ops in order
   SmallVector<acc::LoopOp> collectNestedLoops(acc::LoopOp loop) {
     SmallVector<acc::LoopOp> loops;
-    loop.getBody().walk([&](acc::LoopOp nestedLoop) {
-      loops.push_back(nestedLoop);
-    });
+    loop.getBody().walk(
+        [&](acc::LoopOp nestedLoop) { loops.push_back(nestedLoop); });
     return loops;
   }
 
@@ -109,7 +108,8 @@ TEST_F(OpenACCUtilsTilingTest, tileACCLoopsSingleLoop) {
   SmallVector<acc::LoopOp> loopsToTile = {loopOp};
   SmallVector<Value> tileSizes = {tileSize};
 
-  acc::LoopOp tiledLoop = tileACCLoops(loopsToTile, tileSizes, /*defaultTileSize=*/128, rewriter);
+  acc::LoopOp tiledLoop =
+      tileACCLoops(loopsToTile, tileSizes, /*defaultTileSize=*/128, rewriter);
 
   // Verify the tiled loop was created
   EXPECT_TRUE(tiledLoop != nullptr);
@@ -185,7 +185,8 @@ TEST_F(OpenACCUtilsTilingTest, tileACCLoopsNestedLoops) {
   SmallVector<acc::LoopOp> loopsToTile = {outerLoop, innerLoop};
   SmallVector<Value> tileSizes = {tileSize1, tileSize2};
 
-  acc::LoopOp tiledLoop = tileACCLoops(loopsToTile, tileSizes, /*defaultTileSize=*/128, rewriter);
+  acc::LoopOp tiledLoop =
+      tileACCLoops(loopsToTile, tileSizes, /*defaultTileSize=*/128, rewriter);
 
   // Verify the tiled loop nest was created
   EXPECT_TRUE(tiledLoop != nullptr);
@@ -317,8 +318,8 @@ TEST_F(OpenACCUtilsTilingTest, uncollapseLoopsThreeLevels) {
       arith::ConstantOp::create(b, loc, b.getIndexType(), b.getIndexAttr(1));
 
   // Create a collapsed loop with 3 IVs
-  acc::LoopOp collapsedLoop = createLoopOp(b, {lb1, lb2, lb3}, {ub1, ub2, ub3},
-                                           {step1, step2, step3});
+  acc::LoopOp collapsedLoop =
+      createLoopOp(b, {lb1, lb2, lb3}, {ub1, ub2, ub3}, {step1, step2, step3});
 
   // Set collapse(2)
   collapsedLoop.setCollapseForDeviceTypes(&context, {acc::DeviceType::None},

>From a408794bdaaa2cb7cfd657040338d956325c3252 Mon Sep 17 00:00:00 2001
From: Razvan Lupusoru <rlupusoru at nvidia.com>
Date: Tue, 9 Dec 2025 11:04:36 -0800
Subject: [PATCH 5/5] Fix brace issues in test

---
 mlir/unittests/Dialect/OpenACC/OpenACCUtilsTilingTest.cpp | 8 +++-----
 1 file changed, 3 insertions(+), 5 deletions(-)

diff --git a/mlir/unittests/Dialect/OpenACC/OpenACCUtilsTilingTest.cpp b/mlir/unittests/Dialect/OpenACC/OpenACCUtilsTilingTest.cpp
index 07f2ca67d43bc..95bc1eab7d3fe 100644
--- a/mlir/unittests/Dialect/OpenACC/OpenACCUtilsTilingTest.cpp
+++ b/mlir/unittests/Dialect/OpenACC/OpenACCUtilsTilingTest.cpp
@@ -127,10 +127,9 @@ TEST_F(OpenACCUtilsTilingTest, tileACCLoopsSingleLoop) {
   // Collect nested loops and verify
   auto nestedLoops = collectNestedLoops(tiledLoop);
   EXPECT_EQ(nestedLoops.size(), 1u);
-  if (!nestedLoops.empty()) {
-    // The element loop should have 1 IV
+  // The element loop should have 1 IV
+  if (!nestedLoops.empty())
     EXPECT_EQ(nestedLoops[0].getBody().getNumArguments(), 1u);
-  }
 }
 
 TEST_F(OpenACCUtilsTilingTest, tileACCLoopsNestedLoops) {
@@ -204,9 +203,8 @@ TEST_F(OpenACCUtilsTilingTest, tileACCLoopsNestedLoops) {
   // Collect all nested loops and verify each has 1 IV
   auto nestedLoops = collectNestedLoops(tiledLoop);
   EXPECT_EQ(nestedLoops.size(), 3u);
-  for (auto loop : nestedLoops) {
+  for (auto loop : nestedLoops)
     EXPECT_EQ(loop.getBody().getNumArguments(), 1u);
-  }
 }
 
 //===----------------------------------------------------------------------===//



More information about the Mlir-commits mailing list