[Mlir-commits] [mlir] [mlir][acc] Add utilities for converting acc.loop to scf (PR #172953)

llvmlistbot at llvm.org llvmlistbot at llvm.org
Thu Dec 18 21:24:00 PST 2025


llvmbot wrote:


<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-openacc

@llvm/pr-subscribers-mlir

Author: Razvan Lupusoru (razvanlupusoru)

<details>
<summary>Changes</summary>

Add OpenACCUtilsLoop.h/.cpp with utilities for converting acc.loop operations to SCF dialect operations:

- convertACCLoopToSCFFor: Convert structured acc.loop to scf.for with loop collapsing support
- convertACCLoopToSCFParallel: Convert acc.loop to scf.parallel
- convertUnstructuredACCLoopToSCFExecuteRegion: Convert unstructured acc.loop (multi-block) to scf.execute_region

Key features:
- Automatic type conversion between integer types and index
- Inclusive-to-exclusive upper bound conversion
- Trip count calculation with clamping for negative counts
- Constant folding via createOrFold for cleaner IR
- Assertions to prevent misuse (e.g., builder inside loop region)
- Error emission for unsupported cases (loops with results)

Comprehensive unit tests covering these APIs are also added.

---

Patch is 38.74 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/172953.diff


5 Files Affected:

- (added) mlir/include/mlir/Dialect/OpenACC/OpenACCUtilsLoop.h (+54) 
- (modified) mlir/lib/Dialect/OpenACC/Utils/CMakeLists.txt (+4-1) 
- (added) mlir/lib/Dialect/OpenACC/Utils/OpenACCUtilsLoop.cpp (+323) 
- (modified) mlir/unittests/Dialect/OpenACC/CMakeLists.txt (+4) 
- (added) mlir/unittests/Dialect/OpenACC/OpenACCUtilsLoopTest.cpp (+597) 


``````````diff
diff --git a/mlir/include/mlir/Dialect/OpenACC/OpenACCUtilsLoop.h b/mlir/include/mlir/Dialect/OpenACC/OpenACCUtilsLoop.h
new file mode 100644
index 0000000000000..d2e7174fd306a
--- /dev/null
+++ b/mlir/include/mlir/Dialect/OpenACC/OpenACCUtilsLoop.h
@@ -0,0 +1,54 @@
+//===- OpenACCUtilsLoop.h - OpenACC Loop Utilities --------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// Utilities for converting OpenACC loop operations.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef MLIR_DIALECT_OPENACC_OPENACCUTILSLOOP_H_
+#define MLIR_DIALECT_OPENACC_OPENACCUTILSLOOP_H_
+
+namespace mlir {
+class OpBuilder;
+namespace scf {
+class ForOp;
+class ParallelOp;
+class ExecuteRegionOp;
+} // namespace scf
+namespace acc {
+class LoopOp;
+
+/// Convert a structured acc.loop to scf.for.
+/// The loop arguments are converted to index type. If enableCollapse is true,
+/// nested loops are collapsed into a single loop.
+/// @param loopOp The acc.loop operation to convert (must not be unstructured)
+/// @param enableCollapse Whether to collapse nested loops into one
+/// @return The created scf.for operation or nullptr on creation error.
+///         An InFlightDiagnostic is emitted on creation error.
+scf::ForOp convertACCLoopToSCFFor(LoopOp loopOp, bool enableCollapse);
+
+/// Convert acc.loop to scf.parallel.
+/// The loop induction variables are converted to index types.
+/// @param loopOp The acc.loop operation to convert
+/// @param builder OpBuilder for creating operations
+/// @return The created scf.parallel operation or nullptr on creation error.
+///         An InFlightDiagnostic is emitted on creation error.
+scf::ParallelOp convertACCLoopToSCFParallel(LoopOp loopOp, OpBuilder &builder);
+
+/// Convert an unstructured acc.loop to scf.execute_region.
+/// @param loopOp The acc.loop operation to convert (must be unstructured)
+/// @param builder OpBuilder for creating operations
+/// @return The created scf.execute_region operation or nullptr on creation
+///         error. An InFlightDiagnostic is emitted on creation error.
+scf::ExecuteRegionOp
+convertUnstructuredACCLoopToSCFExecuteRegion(LoopOp loopOp, OpBuilder &builder);
+
+} // namespace acc
+} // namespace mlir
+
+#endif // MLIR_DIALECT_OPENACC_OPENACCUTILSLOOP_H_
diff --git a/mlir/lib/Dialect/OpenACC/Utils/CMakeLists.txt b/mlir/lib/Dialect/OpenACC/Utils/CMakeLists.txt
index c7c322be70d09..532ba90355b44 100644
--- a/mlir/lib/Dialect/OpenACC/Utils/CMakeLists.txt
+++ b/mlir/lib/Dialect/OpenACC/Utils/CMakeLists.txt
@@ -1,6 +1,7 @@
 add_mlir_dialect_library(MLIROpenACCUtils
-  OpenACCUtilsTiling.cpp
   OpenACCUtils.cpp
+  OpenACCUtilsLoop.cpp
+  OpenACCUtilsTiling.cpp
 
   ADDITIONAL_HEADER_DIRS
   ${MLIR_MAIN_INCLUDE_DIR}/mlir/Dialect/OpenACC
@@ -19,6 +20,8 @@ add_mlir_dialect_library(MLIROpenACCUtils
   MLIRArithUtils
   MLIROpenACCDialect
   MLIRIR
+  MLIRSCFDialect
+  MLIRSCFUtils
   MLIRSupport
   MLIRTransformUtils
 )
diff --git a/mlir/lib/Dialect/OpenACC/Utils/OpenACCUtilsLoop.cpp b/mlir/lib/Dialect/OpenACC/Utils/OpenACCUtilsLoop.cpp
new file mode 100644
index 0000000000000..fe0707320cc79
--- /dev/null
+++ b/mlir/lib/Dialect/OpenACC/Utils/OpenACCUtilsLoop.cpp
@@ -0,0 +1,323 @@
+//===- OpenACCUtilsLoop.cpp - OpenACC Loop Utilities ----------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains utility functions for converting OpenACC loops to SCF.
+//
+//===----------------------------------------------------------------------===//
+
+#include "mlir/Dialect/OpenACC/OpenACCUtilsLoop.h"
+
+#include "mlir/Dialect/Arith/IR/Arith.h"
+#include "mlir/Dialect/Arith/Utils/Utils.h"
+#include "mlir/Dialect/OpenACC/OpenACC.h"
+#include "mlir/Dialect/SCF/IR/SCF.h"
+#include "mlir/Dialect/SCF/Utils/Utils.h"
+#include "mlir/IR/IRMapping.h"
+
+using namespace mlir;
+
+namespace {
+
+/// Calculate trip count for a loop: max(0, (ub - lb + step) / step)
+/// If inclusiveUpperbound is true, uses ub as-is; otherwise subtracts 1.
+/// The result is clamped to 0 to handle cases where lb > ub for positive step
+/// (or lb < ub for negative step), which would result in a negative trip count.
+static Value calculateTripCount(OpBuilder &b, Location loc, Value lb, Value ub,
+                                Value step, bool inclusiveUpperbound) {
+  Type type = b.getIndexType();
+
+  // Convert original loop arguments to index type
+  lb = getValueOrCreateCastToIndexLike(b, loc, type, lb);
+  ub = getValueOrCreateCastToIndexLike(b, loc, type, ub);
+  step = getValueOrCreateCastToIndexLike(b, loc, type, step);
+
+  if (!inclusiveUpperbound) {
+    Value one = arith::ConstantIndexOp::create(b, loc, 1);
+    ub = b.createOrFold<arith::SubIOp>(loc, ub, one);
+  }
+
+  Value sub = b.createOrFold<arith::SubIOp>(loc, ub, lb);
+  Value add = b.createOrFold<arith::AddIOp>(loc, sub, step);
+  Value unclampedTrips = b.createOrFold<arith::DivSIOp>(loc, add, step);
+
+  // Clamp negative trip counts to 0
+  Value zero = arith::ConstantIndexOp::create(b, loc, 0);
+  Value isNegative = b.createOrFold<arith::CmpIOp>(
+      loc, arith::CmpIPredicate::slt, unclampedTrips, zero);
+  Value trips =
+      b.createOrFold<arith::SelectOp>(loc, isNegative, zero, unclampedTrips);
+  return trips;
+}
+
+/// Get exclusive upper bound from acc.loop (add 1 if inclusive).
+static Value getExclusiveUpperBound(acc::LoopOp loopOp, size_t ivPos,
+                                    OpBuilder &b) {
+  bool isInclusive = false;
+  if (loopOp.getInclusiveUpperbound().has_value())
+    isInclusive = loopOp.getInclusiveUpperboundAttr().asArrayRef()[ivPos];
+
+  Value origUB = loopOp.getUpperbound()[ivPos];
+  if (isInclusive) {
+    Location loc = origUB.getLoc();
+    Value one;
+    Type ubType = origUB.getType();
+    if (ubType.isIndex())
+      one = arith::ConstantIndexOp::create(b, loc, 1);
+    else
+      one = arith::ConstantIntOp::create(b, loc, ubType, 1);
+    return b.createOrFold<arith::AddIOp>(loc, origUB, one);
+  }
+  return origUB;
+}
+
+/// Handle differing types between SCF (index) and ACC loops.
+/// Creates casts from the new SCF IVs to the original ACC IV types and updates
+/// the mapping. The newIVs should correspond 1:1 with the ACC loop's IVs.
+static void mapACCLoopIVsToSCFIVs(acc::LoopOp accLoop, ValueRange newIVs,
+                                  OpBuilder &b, IRMapping &mapping) {
+  for (auto [origIV, newIV] :
+       llvm::zip(accLoop.getBody().getArguments(), newIVs)) {
+    Value replacementIV = getValueOrCreateCastToIndexLike(
+        b, accLoop->getLoc(), origIV.getType(), newIV);
+    mapping.map(origIV, replacementIV);
+  }
+}
+
+/// Normalize IV uses after converting to normalized loop form.
+/// For normalized loops (lb=0, step=1), we need to denormalize the IV:
+/// original_iv = new_iv * orig_step + orig_lb
+static void normalizeIVUses(OpBuilder &b, Location loc, Value iv, Value origLB,
+                            Value origStep) {
+  Type indexType = b.getIndexType();
+  Value lb = getValueOrCreateCastToIndexLike(b, loc, indexType, origLB);
+  Value step = getValueOrCreateCastToIndexLike(b, loc, indexType, origStep);
+
+  // new_iv * step + lb
+  Value scaled = arith::MulIOp::create(b, loc, iv, step);
+  Value denormalized = arith::AddIOp::create(b, loc, scaled, lb);
+
+  // Replace uses of iv with denormalized value, except for the ops that
+  // compute the denormalized value itself (muli and addi)
+  llvm::SmallPtrSet<Operation *, 2> exceptions;
+  exceptions.insert(scaled.getDefiningOp());
+  exceptions.insert(denormalized.getDefiningOp());
+  iv.replaceAllUsesExcept(denormalized, exceptions);
+}
+
+/// Clone an ACC region into a destination block, handling the ACC terminators.
+/// Returns the insertion point after the cloned operations.
+static Block::iterator cloneACCRegionInto(Region *src, Block *dest,
+                                          Block::iterator insertionPoint,
+                                          IRMapping &mapping) {
+  assert(src->hasOneBlock() && "expected single-block region");
+
+  Region *insertRegion = dest->getParent();
+  Block *postInsertBlock = dest->splitBlock(insertionPoint);
+  src->cloneInto(insertRegion, postInsertBlock->getIterator(), mapping);
+
+  auto lastNewBlock = std::prev(postInsertBlock->getIterator());
+
+  Block::iterator ip;
+  Operation *terminator = lastNewBlock->getTerminator();
+
+  if (auto yieldOp = dyn_cast<acc::YieldOp>(terminator)) {
+    ip = std::prev(yieldOp->getIterator());
+    yieldOp.erase();
+  } else if (auto terminatorOp = dyn_cast<acc::TerminatorOp>(terminator)) {
+    ip = std::prev(terminatorOp->getIterator());
+    terminatorOp.erase();
+  } else {
+    llvm_unreachable("unexpected terminator in ACC region");
+  }
+
+  // Merge last block with the postInsertBlock
+  lastNewBlock->getOperations().splice(lastNewBlock->end(),
+                                       postInsertBlock->getOperations());
+  postInsertBlock->erase();
+
+  // Merge first block with original dest block
+  auto firstNewBlock = std::next(dest->getIterator());
+  dest->getOperations().splice(dest->end(), firstNewBlock->getOperations());
+  firstNewBlock->erase();
+
+  return ip;
+}
+
+/// Wrap a multi-block region with scf.execute_region.
+static scf::ExecuteRegionOp
+wrapMultiBlockRegionWithSCFExecuteRegion(Region &region, IRMapping &mapping,
+                                         Location loc, OpBuilder &b) {
+  auto exeRegionOp = scf::ExecuteRegionOp::create(b, loc, TypeRange{});
+
+  b.cloneRegionBefore(region, exeRegionOp.getRegion(),
+                      exeRegionOp.getRegion().end(), mapping);
+
+  // Find and replace the ACC terminator with scf.yield
+  Operation *terminator = exeRegionOp.getRegion().back().getTerminator();
+  if (auto yieldOp = dyn_cast<acc::YieldOp>(terminator)) {
+    if (yieldOp.getNumOperands() > 0) {
+      region.getParentOp()->emitError(
+          "acc.loop with results not yet supported");
+      return nullptr;
+    }
+    terminator->erase();
+  } else if (auto accTerminator = dyn_cast<acc::TerminatorOp>(terminator)) {
+    terminator->erase();
+  } else {
+    llvm_unreachable("unexpected terminator in ACC region");
+  }
+
+  b.setInsertionPointToEnd(&exeRegionOp.getRegion().back());
+  scf::YieldOp::create(b, loc);
+  return exeRegionOp;
+}
+
+} // namespace
+
+namespace mlir {
+namespace acc {
+
+scf::ForOp convertACCLoopToSCFFor(LoopOp loopOp, bool enableCollapse) {
+  assert(!loopOp.getUnstructured() &&
+         "use convertUnstructuredACCLoopToSCFExecuteRegion for unstructured "
+         "loops");
+
+  OpBuilder b(loopOp);
+
+  // Lambda to create an scf::ForOp for a single dimension of the acc.loop
+  auto createSCFForOp = [&](acc::LoopOp accLoopOp, size_t idx, OpBuilder &b,
+                            OpBuilder &nestBuilder) -> scf::ForOp {
+    assert(idx < accLoopOp.getBody().getNumArguments());
+
+    Location loc = accLoopOp->getLoc();
+    Type indexType = b.getIndexType();
+
+    Value newLowerBound = getValueOrCreateCastToIndexLike(
+        b, loc, indexType, accLoopOp.getLowerbound()[idx]);
+    Value newUpperBound = getValueOrCreateCastToIndexLike(
+        b, loc, indexType, getExclusiveUpperBound(accLoopOp, idx, b));
+    Value newStep = getValueOrCreateCastToIndexLike(b, loc, indexType,
+                                                    accLoopOp.getStep()[idx]);
+
+    return scf::ForOp::create(nestBuilder, loc, newLowerBound, newUpperBound,
+                              newStep);
+  };
+
+  // Create nested scf.for loops and build IR mapping for IVs
+  IRMapping mapping;
+  SmallVector<scf::ForOp, 4> forOps;
+  b.setInsertionPoint(loopOp);
+  OpBuilder nestBuilder(loopOp);
+
+  for (BlockArgument iv : loopOp.getBody().getArguments()) {
+    size_t idx = iv.getArgNumber();
+    scf::ForOp forOp = createSCFForOp(loopOp, idx, b, nestBuilder);
+    forOps.push_back(forOp);
+    mapping.map(iv, forOp.getInductionVar());
+
+    // The "outside" builder stays before the outer loop
+    if (idx == 0)
+      b.setInsertionPoint(forOp);
+
+    // The "inside" builder moves into each new loop
+    nestBuilder.setInsertionPointToStart(forOp.getBody());
+  }
+
+  // Handle IV type conversion (index -> original type)
+  SmallVector<Value, 4> scfIVs;
+  for (scf::ForOp forOp : forOps)
+    scfIVs.push_back(forOp.getInductionVar());
+  mapACCLoopIVsToSCFIVs(loopOp, scfIVs, nestBuilder, mapping);
+
+  // Clone the loop body into the innermost scf.for
+  cloneACCRegionInto(&loopOp.getRegion(), forOps.back().getBody(),
+                     nestBuilder.getInsertionPoint(), mapping);
+
+  // Optionally collapse nested loops
+  if (enableCollapse && forOps.size() > 1)
+    if (failed(coalesceLoops(forOps)))
+      loopOp.emitError("failed to collapse acc.loop");
+
+  return forOps.front();
+}
+
+scf::ParallelOp convertACCLoopToSCFParallel(LoopOp loopOp, OpBuilder &b) {
+  assert(!loopOp.getUnstructured() &&
+         "use convertUnstructuredACCLoopToSCFExecuteRegion for unstructured "
+         "loops");
+  assert(b.getInsertionBlock() &&
+         !loopOp->isProperAncestor(b.getInsertionBlock()->getParentOp()) &&
+         "builder insertion point must not be inside the loop being converted");
+
+  Location loc = loopOp->getLoc();
+
+  SmallVector<Value, 4> lowerBounds, upperBounds, steps;
+
+  // Normalize all loops: lb=0, step=1, ub=tripCount
+  Value lb = arith::ConstantIndexOp::create(b, loc, 0);
+  Value step = arith::ConstantIndexOp::create(b, loc, 1);
+
+  for (auto [idx, iv] : llvm::enumerate(loopOp.getBody().getArguments())) {
+    bool inclusiveUpperbound = false;
+    if (loopOp.getInclusiveUpperbound().has_value())
+      inclusiveUpperbound = loopOp.getInclusiveUpperbound().value()[idx];
+
+    Value ub = calculateTripCount(b, loc, loopOp.getLowerbound()[idx],
+                                  loopOp.getUpperbound()[idx],
+                                  loopOp.getStep()[idx], inclusiveUpperbound);
+
+    lowerBounds.push_back(lb);
+    upperBounds.push_back(ub);
+    steps.push_back(step);
+  }
+
+  auto parallelOp =
+      scf::ParallelOp::create(b, loc, lowerBounds, upperBounds, steps);
+
+  // Create IV type conversions
+  IRMapping mapping;
+  b.setInsertionPointToStart(parallelOp.getBody());
+  mapACCLoopIVsToSCFIVs(loopOp, parallelOp.getInductionVars(), b, mapping);
+
+  if (!loopOp.getRegion().hasOneBlock()) {
+    auto exeRegion = wrapMultiBlockRegionWithSCFExecuteRegion(
+        loopOp.getRegion(), mapping, loc, b);
+    if (!exeRegion) {
+      parallelOp.erase();
+      return nullptr;
+    }
+  } else {
+    cloneACCRegionInto(&loopOp.getRegion(), parallelOp.getBody(),
+                       b.getInsertionPoint(), mapping);
+  }
+
+  // Denormalize IV uses
+  b.setInsertionPointToStart(parallelOp.getBody());
+  for (auto [idx, iv] : llvm::enumerate(parallelOp.getBody()->getArguments()))
+    if (!iv.use_empty())
+      normalizeIVUses(b, loc, iv, loopOp.getLowerbound()[idx],
+                      loopOp.getStep()[idx]);
+
+  return parallelOp;
+}
+
+scf::ExecuteRegionOp
+convertUnstructuredACCLoopToSCFExecuteRegion(LoopOp loopOp, OpBuilder &b) {
+  assert(loopOp.getUnstructured() &&
+         "use convertACCLoopToSCFFor for structured loops");
+  assert(b.getInsertionBlock() &&
+         !loopOp->isProperAncestor(b.getInsertionBlock()->getParentOp()) &&
+         "builder insertion point must not be inside the loop being converted");
+
+  IRMapping mapping;
+  return wrapMultiBlockRegionWithSCFExecuteRegion(loopOp.getRegion(), mapping,
+                                                  loopOp->getLoc(), b);
+}
+
+} // namespace acc
+} // namespace mlir
diff --git a/mlir/unittests/Dialect/OpenACC/CMakeLists.txt b/mlir/unittests/Dialect/OpenACC/CMakeLists.txt
index 060c8b8d2679d..29448d2af5537 100644
--- a/mlir/unittests/Dialect/OpenACC/CMakeLists.txt
+++ b/mlir/unittests/Dialect/OpenACC/CMakeLists.txt
@@ -3,13 +3,17 @@ add_mlir_unittest(MLIROpenACCTests
   OpenACCOpsInterfacesTest.cpp
   OpenACCUtilsTest.cpp
   OpenACCUtilsTilingTest.cpp
+  OpenACCUtilsLoopTest.cpp
 )
 mlir_target_link_libraries(MLIROpenACCTests
   PRIVATE
   MLIRIR
+  MLIRAffineDialect
   MLIRFuncDialect
   MLIRMemRefDialect
   MLIRArithDialect
   MLIROpenACCDialect
   MLIROpenACCUtils
+  MLIRSCFDialect
+  MLIRControlFlowDialect
 )
diff --git a/mlir/unittests/Dialect/OpenACC/OpenACCUtilsLoopTest.cpp b/mlir/unittests/Dialect/OpenACC/OpenACCUtilsLoopTest.cpp
new file mode 100644
index 0000000000000..e23ff2049ca37
--- /dev/null
+++ b/mlir/unittests/Dialect/OpenACC/OpenACCUtilsLoopTest.cpp
@@ -0,0 +1,597 @@
+//===- OpenACCUtilsLoopTest.cpp - Unit tests for OpenACC loop utilities --===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "mlir/Dialect/OpenACC/OpenACCUtilsLoop.h"
+#include "mlir/Dialect/Affine/IR/AffineOps.h"
+#include "mlir/Dialect/Arith/IR/Arith.h"
+#include "mlir/Dialect/ControlFlow/IR/ControlFlowOps.h"
+#include "mlir/Dialect/Func/IR/FuncOps.h"
+#include "mlir/Dialect/MemRef/IR/MemRef.h"
+#include "mlir/Dialect/OpenACC/OpenACC.h"
+#include "mlir/Dialect/SCF/IR/SCF.h"
+#include "mlir/IR/BuiltinOps.h"
+#include "mlir/IR/BuiltinTypes.h"
+#include "mlir/IR/Diagnostics.h"
+#include "mlir/IR/MLIRContext.h"
+#include "mlir/IR/OwningOpRef.h"
+#include "mlir/IR/Value.h"
+#include "gtest/gtest.h"
+
+using namespace mlir;
+using namespace mlir::acc;
+
+//===----------------------------------------------------------------------===//
+// Test Fixture
+//===----------------------------------------------------------------------===//
+
+class OpenACCUtilsLoopTest : public ::testing::Test {
+protected:
+  OpenACCUtilsLoopTest() : b(&context), loc(UnknownLoc::get(&context)) {
+    context.loadDialect<acc::OpenACCDialect, affine::AffineDialect,
+                        arith::ArithDialect, memref::MemRefDialect,
+                        func::FuncDialect, scf::SCFDialect,
+                        cf::ControlFlowDialect>();
+  }
+
+  /// Helper to create an index constant
+  Value createIndexConstant(int64_t value) {
+    return arith::ConstantOp::create(b, loc, b.getIndexType(),
+                                     b.getIndexAttr(value));
+  }
+
+  /// Helper to create an i32 constant
+  Value createI32Constant(int32_t value) {
+    return arith::ConstantOp::create(b, loc, b.getI32Type(),
+                                     b.getI32IntegerAttr(value));
+  }
+
+  /// Helper to create a simple acc.loop with the given bounds.
+  /// Preserves the builder's insertion point.
+  acc::LoopOp createLoopOp(ValueRange lbs, ValueRange ubs, ValueRange steps,
+                           bool inclusiveUpperbound = true) {
+    OpBuilder::InsertionGuard guard(b);
+
+    auto loopOp = acc::LoopOp::create(b, loc, lbs, ubs, steps,
+                                      acc::LoopParMode::loop_independent);
+
+    // Set inclusive upper bound attribute
+    SmallVector<bool> inclusiveFlags(lbs.size(), inclusiveUpperbound);
+    loopOp.setInclusiveUpperboundAttr(b.getDenseBoolArrayAttr(inclusiveFlags));
+
+    // Add body block with IV arguments and yield
+    Region &region = loopOp.getRegion();
+    Block *block = b.createBlock(&region, region.begin());
+    for (Value lb : lbs)
+      block->addArgument(lb.getType(), loc);
+    b.setInsertionPointToEnd(block);
+    acc::YieldOp::create(b, loc);
+
+    return loopOp;
+  }
+
+  /// Helper to create an unstructured acc.loop with multiple blocks and ops.
+  /// Preserves the builder's insertion point.
+  acc::LoopOp createUnstructuredLoopOp(ValueRange lbs, ValueRange ubs,
+                                       ValueRange steps) {
+    OpBuilder::InsertionGuard guard(b);
+
+    auto ...
[truncated]

``````````

</details>


https://github.com/llvm/llvm-project/pull/172953


More information about the Mlir-commits mailing list