[Mlir-commits] [mlir] [mlir][OpenMP] Support pure taskloop-local loop bounds (PR #190992)

Tom Eccles llvmlistbot at llvm.org
Mon Apr 13 06:21:37 PDT 2026


https://github.com/tblah updated https://github.com/llvm/llvm-project/pull/190992

>From 5ca274d43db57f987e392ccc32e4a9004874e361 Mon Sep 17 00:00:00 2001
From: Tom Eccles <tom.eccles at arm.com>
Date: Wed, 8 Apr 2026 16:16:53 +0100
Subject: [PATCH 1/2] [mlir][OpenMP] Support pure taskloop-local loop bounds

Follow up to https://github.com/llvm/llvm-project/pull/190827

Assisted-by: codex
---
 mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td |   4 +-
 .../mlir/Target/LLVMIR/ModuleTranslation.h    |  15 +-
 mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp  |  33 ++-
 .../OpenMP/OpenMPToLLVMIRTranslation.cpp      | 254 ++++++++++++------
 mlir/lib/Target/LLVMIR/ModuleTranslation.cpp  |   7 +-
 mlir/test/Dialect/OpenMP/invalid.mlir         |  14 +-
 mlir/test/Dialect/OpenMP/ops.mlir             |  19 ++
 .../LLVMIR/openmp-taskloop-local-bounds.mlir  |  65 +++++
 8 files changed, 310 insertions(+), 101 deletions(-)
 create mode 100644 mlir/test/Target/LLVMIR/openmp-taskloop-local-bounds.mlir

diff --git a/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td b/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td
index 40ec8904a136f..669dd3cd1544a 100644
--- a/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td
+++ b/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td
@@ -1017,7 +1017,9 @@ def TaskloopContextOp : OpenMP_Op<"taskloop.context", traits = [
     ```
 
     The wrapped `omp.loop_nest` lower bounds, upper bounds and steps must be
-    defined outside of the `omp.taskloop.context` region.
+    either defined outside of the `omp.taskloop.context` region or produced by
+    pure, regionless operations inside the region that do not depend on block
+    arguments.
 
     For definitions of "undeferred task", "included task", "final task" and
     "mergeable task", please check OpenMP Specification.
diff --git a/mlir/include/mlir/Target/LLVMIR/ModuleTranslation.h b/mlir/include/mlir/Target/LLVMIR/ModuleTranslation.h
index c67bb57985bd0..443f7c9fc2cb2 100644
--- a/mlir/include/mlir/Target/LLVMIR/ModuleTranslation.h
+++ b/mlir/include/mlir/Target/LLVMIR/ModuleTranslation.h
@@ -202,6 +202,9 @@ class ModuleTranslation {
   /// in these blocks.
   void forgetMapping(Region &region);
 
+  /// Removes the mapping for the given value.
+  void forgetMapping(Value value) { valueMapping.erase(value); }
+
   /// Returns the LLVM metadata corresponding to a mlir LLVM dialect alias scope
   /// attribute. Creates the metadata node if it has not been converted before.
   llvm::MDNode *getOrCreateAliasScope(AliasScopeAttr aliasScopeAttr);
@@ -308,6 +311,13 @@ class ModuleTranslation {
                             /*recordInsertions=*/false);
   }
 
+  /// Converts the given MLIR operation into LLVM IR using this translator. It
+  /// is up to the caller to ensure that all operands have been mapped before
+  /// calling this function.
+  LogicalResult convertOperation(Operation &op, llvm::IRBuilderBase &builder) {
+    return convertOperationImpl(op, builder, /*recordInsertions=*/false);
+  }
+
   /// Converts argument and result attributes from `attrsOp` to LLVM IR
   /// attributes on the `call` instruction. Returns failure if conversion fails.
   /// The `immArgPositions` parameter is only relevant for intrinsics. It
@@ -381,8 +391,9 @@ class ModuleTranslation {
   ~ModuleTranslation();
 
   /// Converts individual components.
-  LogicalResult convertOperation(Operation &op, llvm::IRBuilderBase &builder,
-                                 bool recordInsertions = false);
+  LogicalResult convertOperationImpl(Operation &op,
+                                     llvm::IRBuilderBase &builder,
+                                     bool recordInsertions = false);
   LogicalResult convertFunctionSignatures();
   LogicalResult convertFunctions();
   LogicalResult convertIFuncs();
diff --git a/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp b/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp
index df74a316c7b66..20328375e8e9f 100644
--- a/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp
+++ b/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp
@@ -23,6 +23,7 @@
 #include "mlir/IR/OperationSupport.h"
 #include "mlir/IR/SymbolTable.h"
 #include "mlir/Interfaces/FoldInterfaces.h"
+#include "mlir/Interfaces/SideEffectInterfaces.h"
 
 #include "llvm/ADT/ArrayRef.h"
 #include "llvm/ADT/PostOrderIterator.h"
@@ -3437,19 +3438,31 @@ LogicalResult TaskloopContextOp::verifyRegions() {
   if (!loopNestOp)
     return failure();
 
-  auto isDefinedInTaskloopContext = [&](Value value) {
-    // A region is considered an ancestor of itself
-    return region.isAncestor(value.getParentRegion());
+  std::function<bool(Value)> isValidBoundValue = [&](Value value) -> bool {
+    Region *valueRegion = value.getParentRegion();
+    // A loop bound value defined outside of the taskloop context region is
+    // valid. A region is considered an ancestor of itself.
+    if (!region.isAncestor(valueRegion))
+      return true;
+
+    Operation *defOp = value.getDefiningOp();
+    if (!defOp || defOp->getNumRegions() != 0 || !isPure(defOp))
+      return false;
+
+    return llvm::all_of(defOp->getOperands(), isValidBoundValue);
   };
-  auto hasTaskloopLocalBound = [&](OperandRange range) {
-    return llvm::any_of(range, isDefinedInTaskloopContext);
+  auto hasUnsupportedTaskloopLocalBound = [&](OperandRange range) {
+    return llvm::any_of(range,
+                        [&](Value value) { return !isValidBoundValue(value); });
   };
 
-  if (hasTaskloopLocalBound(loopNestOp.getLoopLowerBounds()) ||
-      hasTaskloopLocalBound(loopNestOp.getLoopUpperBounds()) ||
-      hasTaskloopLocalBound(loopNestOp.getLoopSteps())) {
-    return emitOpError() << "expects loop bounds and steps to be defined "
-                            "outside of the taskloop.context region";
+  if (hasUnsupportedTaskloopLocalBound(loopNestOp.getLoopLowerBounds()) ||
+      hasUnsupportedTaskloopLocalBound(loopNestOp.getLoopUpperBounds()) ||
+      hasUnsupportedTaskloopLocalBound(loopNestOp.getLoopSteps())) {
+    return emitOpError()
+           << "expects loop bounds and steps to be defined outside of the "
+              "taskloop.context region or by pure, regionless operations "
+              "that do not depend on block arguments";
   }
 
   return success();
diff --git a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
index 2e15f4de4545d..f7cda64e0f780 100644
--- a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
+++ b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
@@ -17,6 +17,7 @@
 #include "mlir/Dialect/OpenMP/OpenMPDialect.h"
 #include "mlir/Dialect/OpenMP/OpenMPInterfaces.h"
 #include "mlir/IR/Operation.h"
+#include "mlir/Interfaces/SideEffectInterfaces.h"
 #include "mlir/Support/LLVM.h"
 #include "mlir/Target/LLVMIR/Dialect/OpenMPCommon.h"
 #include "mlir/Target/LLVMIR/ModuleTranslation.h"
@@ -2874,6 +2875,171 @@ convertOmpTaskloopWrapperOp(omp::TaskloopWrapperOp loopWrapperOp,
   return success();
 }
 
+/// Look up the given value in the mapping, and if it's not there, translate its
+/// defining operation at the current builder insertion point. Only pure,
+/// regionless operations are supported because the same operation will later be
+/// translated again when the taskloop body itself is lowered.
+static llvm::Expected<llvm::Value *>
+lookupOrTranslatePureValue(Value value,
+                           LLVM::ModuleTranslation &moduleTranslation,
+                           llvm::IRBuilderBase &builder) {
+  if (llvm::Value *mapped = moduleTranslation.lookupValue(value))
+    return mapped;
+
+  Operation *defOp = value.getDefiningOp();
+  if (!defOp)
+    return llvm::make_error<llvm::StringError>(
+        "value is a block argument and is not mapped",
+        llvm::inconvertibleErrorCode());
+  if (defOp->getNumRegions() != 0 || !isPure(defOp))
+    return llvm::make_error<llvm::StringError>(
+        "unsupported op defining taskloop loop bound",
+        llvm::inconvertibleErrorCode());
+
+  SmallVector<Value> mappingsToRemove;
+  mappingsToRemove.reserve(defOp->getNumOperands() + defOp->getNumResults());
+  for (Value operand : defOp->getOperands()) {
+    if (moduleTranslation.lookupValue(operand))
+      continue;
+
+    llvm::Expected<llvm::Value *> operandOrError =
+        lookupOrTranslatePureValue(operand, moduleTranslation, builder);
+    if (!operandOrError)
+      return operandOrError.takeError();
+    moduleTranslation.mapValue(operand, *operandOrError);
+    mappingsToRemove.push_back(operand);
+  }
+
+  if (failed(moduleTranslation.convertOperation(*defOp, builder)))
+    return llvm::make_error<llvm::StringError>(
+        "failed to convert op defining taskloop loop bound",
+        llvm::inconvertibleErrorCode());
+
+  llvm::Value *result = moduleTranslation.lookupValue(value);
+  assert(result && "expected conversion of loop bound op to produce a value");
+
+  for (Value resultValue : defOp->getResults()) {
+    if (moduleTranslation.lookupValue(resultValue))
+      mappingsToRemove.push_back(resultValue);
+  }
+  for (Value mappedValue : mappingsToRemove)
+    moduleTranslation.forgetMapping(mappedValue);
+
+  return result;
+}
+
+static llvm::Error
+computeTaskloopBounds(omp::LoopNestOp loopOp, llvm::IRBuilderBase &builder,
+                      LLVM::ModuleTranslation &moduleTranslation,
+                      llvm::Value *&lbVal, llvm::Value *&ubVal,
+                      llvm::Value *&stepVal) {
+  Operation::operand_range lowerBounds = loopOp.getLoopLowerBounds();
+  Operation::operand_range upperBounds = loopOp.getLoopUpperBounds();
+  Operation::operand_range steps = loopOp.getLoopSteps();
+
+  llvm::Expected<llvm::Value *> firstLbOrErr =
+      lookupOrTranslatePureValue(lowerBounds[0], moduleTranslation, builder);
+  if (!firstLbOrErr)
+    return firstLbOrErr.takeError();
+
+  llvm::Type *boundType = (*firstLbOrErr)->getType();
+  ubVal = builder.getIntN(boundType->getIntegerBitWidth(), 1);
+  if (loopOp.getCollapseNumLoops() > 1) {
+    // In cases where Collapse is used with Taskloop, the upper bound of the
+    // iteration space needs to be recalculated to cater for the collapsed loop.
+    // The Collapsed Loop UpperBound is the product of all collapsed
+    // loop's tripcount.
+    // The LowerBound for collapsed loops is always 1. When the loops are
+    // collapsed, it will reset the bounds and introduce processing to ensure
+    // the index's are presented as expected. As this happens after creating
+    // Taskloop, these bounds need predicting. Example:
+    // !$omp taskloop collapse(2)
+    //   do i = 1, 10
+    //     do j = 1, 5
+    //       ..
+    //     end do
+    //   end do
+    // This loop above has a total of 50 iterations, so the lb will be 1, and
+    // the ub will be 50. collapseLoops in OMPIRBuilder then handles ensuring
+    // that i and j are properly presented when used in the loop.
+    for (uint64_t i = 0; i < loopOp.getCollapseNumLoops(); i++) {
+      llvm::Expected<llvm::Value *> lbOrErr =
+          i == 0 ? std::move(firstLbOrErr)
+                 : lookupOrTranslatePureValue(lowerBounds[i], moduleTranslation,
+                                              builder);
+      if (!lbOrErr)
+        return lbOrErr.takeError();
+      llvm::Expected<llvm::Value *> ubOrErr = lookupOrTranslatePureValue(
+          upperBounds[i], moduleTranslation, builder);
+      if (!ubOrErr)
+        return ubOrErr.takeError();
+      llvm::Expected<llvm::Value *> stepOrErr =
+          lookupOrTranslatePureValue(steps[i], moduleTranslation, builder);
+      if (!stepOrErr)
+        return stepOrErr.takeError();
+
+      llvm::Value *loopLb = *lbOrErr;
+      llvm::Value *loopUb = *ubOrErr;
+      llvm::Value *loopStep = *stepOrErr;
+      // In some cases, such as where the ub is less than the lb so the loop
+      // steps down, the calculation for the loopTripCount is swapped. To ensure
+      // the correct value is found, calculate both UB - LB and LB - UB then
+      // select which value to use depending on how the loop has been
+      // configured.
+      llvm::Value *loopLbMinusOne = builder.CreateSub(
+          loopLb, builder.getIntN(boundType->getIntegerBitWidth(), 1));
+      llvm::Value *loopUbMinusOne = builder.CreateSub(
+          loopUb, builder.getIntN(boundType->getIntegerBitWidth(), 1));
+      llvm::Value *boundsCmp = builder.CreateICmpSLT(loopLb, loopUb);
+      llvm::Value *ubMinusLb = builder.CreateSub(loopUb, loopLbMinusOne);
+      llvm::Value *lbMinusUb = builder.CreateSub(loopLb, loopUbMinusOne);
+      llvm::Value *loopTripCount =
+          builder.CreateSelect(boundsCmp, ubMinusLb, lbMinusUb);
+      loopTripCount = builder.CreateBinaryIntrinsic(
+          llvm::Intrinsic::abs, loopTripCount, builder.getFalse());
+      // For loops that have a step value not equal to 1, we need to adjust the
+      // trip count to ensure the correct number of iterations for the loop is
+      // captured.
+      llvm::Value *loopTripCountDivStep =
+          builder.CreateSDiv(loopTripCount, loopStep);
+      loopTripCountDivStep = builder.CreateBinaryIntrinsic(
+          llvm::Intrinsic::abs, loopTripCountDivStep, builder.getFalse());
+      llvm::Value *loopTripCountRem =
+          builder.CreateSRem(loopTripCount, loopStep);
+      loopTripCountRem = builder.CreateBinaryIntrinsic(
+          llvm::Intrinsic::abs, loopTripCountRem, builder.getFalse());
+      llvm::Value *needsRoundUp = builder.CreateICmpNE(
+          loopTripCountRem,
+          builder.getIntN(loopTripCountRem->getType()->getIntegerBitWidth(),
+                          0));
+      loopTripCount =
+          builder.CreateAdd(loopTripCountDivStep,
+                            builder.CreateZExtOrTrunc(
+                                needsRoundUp, loopTripCountDivStep->getType()));
+      ubVal = builder.CreateMul(ubVal, loopTripCount);
+    }
+    lbVal = builder.getIntN(boundType->getIntegerBitWidth(), 1);
+    stepVal = builder.getIntN(boundType->getIntegerBitWidth(), 1);
+  } else {
+    llvm::Expected<llvm::Value *> ubOrErr =
+        lookupOrTranslatePureValue(upperBounds[0], moduleTranslation, builder);
+    if (!ubOrErr)
+      return ubOrErr.takeError();
+    llvm::Expected<llvm::Value *> stepOrErr =
+        lookupOrTranslatePureValue(steps[0], moduleTranslation, builder);
+    if (!stepOrErr)
+      return stepOrErr.takeError();
+    lbVal = *firstLbOrErr;
+    ubVal = *ubOrErr;
+    stepVal = *stepOrErr;
+  }
+
+  assert(lbVal != nullptr && "Expected value for lbVal");
+  assert(ubVal != nullptr && "Expected value for ubVal");
+  assert(stepVal != nullptr && "Expected value for stepVal");
+  return llvm::Error::success();
+}
+
 // Converts an OpenMP taskloop construct into LLVM IR using OpenMPIRBuilder.
 static LogicalResult
 convertOmpTaskloopContextOp(omp::TaskloopContextOp contextOp,
@@ -2963,6 +3129,14 @@ convertOmpTaskloopContextOp(omp::TaskloopContextOp contextOp,
   // Set up inserttion point for call to createTaskloop()
   builder.SetInsertPoint(taskloopStartBlock);
 
+  auto loopOp = cast<omp::LoopNestOp>(loopWrapperOp.getWrappedLoop());
+  llvm::Value *lbVal = nullptr;
+  llvm::Value *ubVal = nullptr;
+  llvm::Value *stepVal = nullptr;
+  if (llvm::Error err = computeTaskloopBounds(
+          loopOp, builder, moduleTranslation, lbVal, ubVal, stepVal))
+    return handleError(std::move(err), opInst);
+
   auto bodyCB = [&](InsertPointTy allocaIP,
                     InsertPointTy codegenIP) -> llvm::Error {
     // Save the alloca insertion point on ModuleTranslation stack for use in
@@ -3129,91 +3303,11 @@ convertOmpTaskloopContextOp(omp::TaskloopContextOp contextOp,
     return builder.saveIP();
   };
 
-  auto loopOp = cast<omp::LoopNestOp>(loopWrapperOp.getWrappedLoop());
-
   auto loopInfo = [&]() -> llvm::Expected<llvm::CanonicalLoopInfo *> {
     llvm::CanonicalLoopInfo *loopInfo = findCurrentLoopInfo(moduleTranslation);
     return loopInfo;
   };
 
-  Operation::operand_range lowerBounds = loopOp.getLoopLowerBounds();
-  Operation::operand_range upperBounds = loopOp.getLoopUpperBounds();
-  Operation::operand_range steps = loopOp.getLoopSteps();
-  llvm::Type *boundType =
-      moduleTranslation.lookupValue(lowerBounds[0])->getType();
-  llvm::Value *lbVal = nullptr;
-  llvm::Value *ubVal = builder.getIntN(boundType->getIntegerBitWidth(), 1);
-  llvm::Value *stepVal = nullptr;
-  if (loopOp.getCollapseNumLoops() > 1) {
-    // In cases where Collapse is used with Taskloop, the upper bound of the
-    // iteration space needs to be recalculated to cater for the collapsed loop.
-    // The Collapsed Loop UpperBound is the product of all collapsed
-    // loop's tripcount.
-    // The LowerBound for collapsed loops is always 1. When the loops are
-    // collapsed, it will reset the bounds and introduce processing to ensure
-    // the index's are presented as expected. As this happens after creating
-    // Taskloop, these bounds need predicting. Example:
-    // !$omp taskloop collapse(2)
-    //   do i = 1, 10
-    //     do j = 1, 5
-    //       ..
-    //     end do
-    //   end do
-    // This loop above has a total of 50 iterations, so the lb will be 1, and
-    // the ub will be 50. collapseLoops in OMPIRBuilder then handles ensuring
-    // that i and j are properly presented when used in the loop.
-    for (uint64_t i = 0; i < loopOp.getCollapseNumLoops(); i++) {
-      llvm::Value *loopLb = moduleTranslation.lookupValue(lowerBounds[i]);
-      llvm::Value *loopUb = moduleTranslation.lookupValue(upperBounds[i]);
-      llvm::Value *loopStep = moduleTranslation.lookupValue(steps[i]);
-      // In some cases, such as where the ub is less than the lb so the loop
-      // steps down, the calculation for the loopTripCount is swapped. To ensure
-      // the correct value is found, calculate both UB - LB and LB - UB then
-      // select which value to use depending on how the loop has been
-      // configured.
-      llvm::Value *loopLbMinusOne = builder.CreateSub(
-          loopLb, builder.getIntN(boundType->getIntegerBitWidth(), 1));
-      llvm::Value *loopUbMinusOne = builder.CreateSub(
-          loopUb, builder.getIntN(boundType->getIntegerBitWidth(), 1));
-      llvm::Value *boundsCmp = builder.CreateICmpSLT(loopLb, loopUb);
-      llvm::Value *ubMinusLb = builder.CreateSub(loopUb, loopLbMinusOne);
-      llvm::Value *lbMinusUb = builder.CreateSub(loopLb, loopUbMinusOne);
-      llvm::Value *loopTripCount =
-          builder.CreateSelect(boundsCmp, ubMinusLb, lbMinusUb);
-      loopTripCount = builder.CreateBinaryIntrinsic(
-          llvm::Intrinsic::abs, loopTripCount, builder.getFalse());
-      // For loops that have a step value not equal to 1, we need to adjust the
-      // trip count to ensure the correct number of iterations for the loop is
-      // captured.
-      llvm::Value *loopTripCountDivStep =
-          builder.CreateSDiv(loopTripCount, loopStep);
-      loopTripCountDivStep = builder.CreateBinaryIntrinsic(
-          llvm::Intrinsic::abs, loopTripCountDivStep, builder.getFalse());
-      llvm::Value *loopTripCountRem =
-          builder.CreateSRem(loopTripCount, loopStep);
-      loopTripCountRem = builder.CreateBinaryIntrinsic(
-          llvm::Intrinsic::abs, loopTripCountRem, builder.getFalse());
-      llvm::Value *needsRoundUp = builder.CreateICmpNE(
-          loopTripCountRem,
-          builder.getIntN(loopTripCountRem->getType()->getIntegerBitWidth(),
-                          0));
-      loopTripCount =
-          builder.CreateAdd(loopTripCountDivStep,
-                            builder.CreateZExtOrTrunc(
-                                needsRoundUp, loopTripCountDivStep->getType()));
-      ubVal = builder.CreateMul(ubVal, loopTripCount);
-    }
-    lbVal = builder.getIntN(boundType->getIntegerBitWidth(), 1);
-    stepVal = builder.getIntN(boundType->getIntegerBitWidth(), 1);
-  } else {
-    lbVal = moduleTranslation.lookupValue(lowerBounds[0]);
-    ubVal = moduleTranslation.lookupValue(upperBounds[0]);
-    stepVal = moduleTranslation.lookupValue(steps[0]);
-  }
-  assert(lbVal != nullptr && "Expected value for lbVal");
-  assert(ubVal != nullptr && "Expected value for ubVal");
-  assert(stepVal != nullptr && "Expected value for stepVal");
-
   llvm::Value *ifCond = nullptr;
   llvm::Value *grainsize = nullptr;
   int sched = 0; // default
diff --git a/mlir/lib/Target/LLVMIR/ModuleTranslation.cpp b/mlir/lib/Target/LLVMIR/ModuleTranslation.cpp
index cf398f151ed0b..7818c2f9e6fc9 100644
--- a/mlir/lib/Target/LLVMIR/ModuleTranslation.cpp
+++ b/mlir/lib/Target/LLVMIR/ModuleTranslation.cpp
@@ -990,9 +990,8 @@ llvm::CallInst *mlir::LLVM::detail::createIntrinsicCall(
 
 /// Given a single MLIR operation, create the corresponding LLVM IR operation
 /// using the `builder`.
-LogicalResult ModuleTranslation::convertOperation(Operation &op,
-                                                  llvm::IRBuilderBase &builder,
-                                                  bool recordInsertions) {
+LogicalResult ModuleTranslation::convertOperationImpl(
+    Operation &op, llvm::IRBuilderBase &builder, bool recordInsertions) {
   const LLVMTranslationDialectInterface *opIface = iface.getInterfaceFor(&op);
   if (!opIface)
     return op.emitError("cannot be converted to LLVM IR: missing "
@@ -1052,7 +1051,7 @@ LogicalResult ModuleTranslation::convertBlockImpl(Block &bb,
     builder.SetCurrentDebugLocation(
         debugTranslation->translateLoc(op.getLoc(), subprogram));
 
-    if (failed(convertOperation(op, builder, recordInsertions)))
+    if (failed(convertOperationImpl(op, builder, recordInsertions)))
       return failure();
 
     // Set the branch weight metadata on the translated instruction.
diff --git a/mlir/test/Dialect/OpenMP/invalid.mlir b/mlir/test/Dialect/OpenMP/invalid.mlir
index 98087cc43588f..2a7a7827ca6fa 100644
--- a/mlir/test/Dialect/OpenMP/invalid.mlir
+++ b/mlir/test/Dialect/OpenMP/invalid.mlir
@@ -3035,11 +3035,17 @@ func.func @omp_taskloop_invalid_composite(%lb: index, %ub: index, %step: index)
   return
 }
 
+omp.private {type = private} @taskloop.bound.privatizer : index init {
+^bb0(%arg0: !llvm.ptr, %arg1: !llvm.ptr):
+  omp.yield(%arg0 : !llvm.ptr)
+}
+
 // -----
-func.func @omp_taskloop_local_loop_bounds() {
-  // expected-error @below {{'omp.taskloop.context' op expects loop bounds and steps to be defined outside of the taskloop.context region}}
-  omp.taskloop.context {
-    %lb = arith.constant 1 : index
+func.func @omp_taskloop_local_loop_bounds_from_block_arg(%arg0: index) {
+  %c1 = arith.constant 1 : index
+  // expected-error @below {{'omp.taskloop.context' op expects loop bounds and steps to be defined outside of the taskloop.context region or by pure, regionless operations that do not depend on block arguments}}
+  omp.taskloop.context private(@taskloop.bound.privatizer %arg0 -> %arg1 : index) {
+    %lb = arith.addi %arg1, %c1 : index
     %ub = arith.constant 10 : index
     %step = arith.constant 1 : index
     omp.taskloop.wrapper {
diff --git a/mlir/test/Dialect/OpenMP/ops.mlir b/mlir/test/Dialect/OpenMP/ops.mlir
index 869f163cb4014..90db9187a56bf 100644
--- a/mlir/test/Dialect/OpenMP/ops.mlir
+++ b/mlir/test/Dialect/OpenMP/ops.mlir
@@ -2861,6 +2861,25 @@ func.func @omp_taskloop(%lb: i32, %ub: i32, %step: i32) -> () {
     omp.terminator
   }
 
+  // CHECK: omp.taskloop.context {
+  omp.taskloop.context {
+    // CHECK: %[[LB:.+]] = arith.constant 1 : i32
+    %local_lb = arith.constant 1 : i32
+    // CHECK: %[[UB:.+]] = arith.constant 10 : i32
+    %local_ub = arith.constant 10 : i32
+    // CHECK: %[[STEP:.+]] = arith.constant 1 : i32
+    %local_step = arith.constant 1 : i32
+    // CHECK: omp.taskloop.wrapper {
+    omp.taskloop.wrapper {
+      // CHECK: omp.loop_nest (%{{.+}}) : i32 = (%[[LB]]) to (%[[UB]]) step (%[[STEP]]) {
+      omp.loop_nest (%i) : i32 = (%local_lb) to (%local_ub) step (%local_step) {
+        // CHECK: omp.yield
+        omp.yield
+      }
+    }
+    omp.terminator
+  }
+
   // CHECK: return
   return
 }
diff --git a/mlir/test/Target/LLVMIR/openmp-taskloop-local-bounds.mlir b/mlir/test/Target/LLVMIR/openmp-taskloop-local-bounds.mlir
new file mode 100644
index 0000000000000..8868423b9b194
--- /dev/null
+++ b/mlir/test/Target/LLVMIR/openmp-taskloop-local-bounds.mlir
@@ -0,0 +1,65 @@
+// RUN: mlir-translate -mlir-to-llvmir %s | FileCheck %s
+
+// Taskloop loop bounds defined inside omp.taskloop.context by pure operations
+// that do not depend on block arguments should be materialized in the parent
+// function before the runtime call and translated again inside the outlined
+// task.
+
+omp.private {type = private} @_QPtest_taskloop_local_bounds_private_i32 : i32
+
+// CHECK-LABEL: define void @_QPtest_taskloop_local_constants(
+llvm.func @_QPtest_taskloop_local_constants() {
+  %one_i64 = llvm.mlir.constant(1 : i64) : i64
+  %i = llvm.alloca %one_i64 x i32 {bindc_name = "i"} : (i64) -> !llvm.ptr
+  omp.taskloop.context private(@_QPtest_taskloop_local_bounds_private_i32 %i -> %arg0 : !llvm.ptr) {
+    %lb = llvm.mlir.constant(1 : i32) : i32
+    %ub = llvm.mlir.constant(10 : i32) : i32
+    %step = llvm.mlir.constant(1 : i32) : i32
+    omp.taskloop.wrapper {
+      omp.loop_nest (%iv) : i32 = (%lb) to (%ub) inclusive step (%step) {
+        llvm.store %iv, %arg0 : i32, !llvm.ptr
+        omp.yield
+      }
+    }
+    omp.terminator
+  }
+  llvm.return
+}
+
+// CHECK:         %[[GEP_LB:.*]] = getelementptr
+// CHECK:         store i64 1, ptr %[[GEP_LB]]
+// CHECK:         %[[GEP_UB:.*]] = getelementptr
+// CHECK:         store i64 10, ptr %[[GEP_UB]]
+// CHECK:         %[[GEP_STEP:.*]] = getelementptr
+// CHECK:         store i64 1, ptr %[[GEP_STEP]]
+// CHECK:         call void @__kmpc_taskloop(
+
+// CHECK-LABEL: define internal void @_QPtest_taskloop_local_constants..omp_par(
+// CHECK:         %[[OL_GEP_LB:.*]] = getelementptr
+// CHECK:         %[[OL_LOAD_LB:.*]] = load i64, ptr %[[OL_GEP_LB]]
+// CHECK:         %[[OL_GEP_UB:.*]] = getelementptr
+// CHECK:         %[[OL_LOAD_UB:.*]] = load i64, ptr %[[OL_GEP_UB]]
+
+// CHECK-LABEL: define void @_QPtest_taskloop_local_derived_bound(
+llvm.func @_QPtest_taskloop_local_derived_bound() {
+  %one_i64 = llvm.mlir.constant(1 : i64) : i64
+  %i = llvm.alloca %one_i64 x i32 {bindc_name = "i"} : (i64) -> !llvm.ptr
+  omp.taskloop.context private(@_QPtest_taskloop_local_bounds_private_i32 %i -> %arg0 : !llvm.ptr) {
+    %lb = llvm.mlir.constant(1 : i32) : i32
+    %ten = llvm.mlir.constant(10 : i32) : i32
+    %two = llvm.mlir.constant(2 : i32) : i32
+    %ub = llvm.add %ten, %two : i32
+    %step = llvm.mlir.constant(1 : i32) : i32
+    omp.taskloop.wrapper {
+      omp.loop_nest (%iv) : i32 = (%lb) to (%ub) inclusive step (%step) {
+        llvm.store %iv, %arg0 : i32, !llvm.ptr
+        omp.yield
+      }
+    }
+    omp.terminator
+  }
+  llvm.return
+}
+
+// CHECK:         store i64 12, ptr %{{.*}}
+// CHECK:         call void @__kmpc_taskloop(

>From d20b1b650b4cdde7fc5e1ed8befff2a1124f9f3a Mon Sep 17 00:00:00 2001
From: Tom Eccles <tom.eccles at arm.com>
Date: Mon, 13 Apr 2026 14:21:02 +0100
Subject: [PATCH 2/2] Add return type to lambda

---
 mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp b/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp
index 20328375e8e9f..691793b50d33d 100644
--- a/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp
+++ b/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp
@@ -3451,7 +3451,7 @@ LogicalResult TaskloopContextOp::verifyRegions() {
 
     return llvm::all_of(defOp->getOperands(), isValidBoundValue);
   };
-  auto hasUnsupportedTaskloopLocalBound = [&](OperandRange range) {
+  auto hasUnsupportedTaskloopLocalBound = [&](OperandRange range) -> bool {
     return llvm::any_of(range,
                         [&](Value value) { return !isValidBoundValue(value); });
   };



More information about the Mlir-commits mailing list