[llvm-branch-commits] [llvm] [mlir] [mlir][llvmir][OpenMP] Translate affinity clause in task construct to llvmir (PR #182223)

via llvm-branch-commits llvm-branch-commits at lists.llvm.org
Thu Mar 5 12:48:51 PST 2026


https://github.com/chichunchen updated https://github.com/llvm/llvm-project/pull/182223

>From b67c5654405ec0998e460b16e29d9716616f753b Mon Sep 17 00:00:00 2001
From: cchen <chichun.chen at hpe.com>
Date: Wed, 18 Feb 2026 15:20:40 -0600
Subject: [PATCH 1/6] [mlir][llvmir][OpenMP] Translate affinity clause in task
 construct to llvmir

Translate affinity entries to LLVMIR by passing affinity information to
createTask (__kmpc_omp_reg_task_with_affinity is created inside PostOutlineCB).
---
 .../llvm/Frontend/OpenMP/OMPIRBuilder.h       | 18 ++--
 llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp     | 22 ++++-
 .../Frontend/OpenMPIRBuilderTest.cpp          | 92 +++++++++++++++++++
 .../Conversion/OpenMPToLLVM/OpenMPToLLVM.cpp  |  2 +
 .../OpenMP/OpenMPToLLVMIRTranslation.cpp      | 72 ++++++++++++---
 mlir/test/Target/LLVMIR/openmp-todo.mlir      | 12 ---
 6 files changed, 184 insertions(+), 34 deletions(-)

diff --git a/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h b/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h
index 9885ffc8b2065..deb2535ac843f 100644
--- a/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h
+++ b/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h
@@ -1510,6 +1510,12 @@ class OpenMPIRBuilder {
         : DepKind(DepKind), DepValueType(DepValueType), DepVal(DepVal) {}
   };
 
+  /// A struct to pack the relevant information for an OpenMP affinity clause.
+  struct AffinityData {
+    Value *Count; // number of kmp_task_affinity_info_t entries
+    Value *Info;  // kmp_task_affinity_info_t
+  };
+
   /// Generator for `#omp taskloop`
   ///
   /// \param Loc The location where the taskloop construct was encountered.
@@ -1573,12 +1579,12 @@ class OpenMPIRBuilder {
   /// \param Mergeable	 If the given task is `mergeable`
   /// \param priority `priority-value' specifies the execution order of the
   ///                 tasks that is generated by the construct
-  LLVM_ABI InsertPointOrErrorTy
-  createTask(const LocationDescription &Loc, InsertPointTy AllocaIP,
-             BodyGenCallbackTy BodyGenCB, bool Tied = true,
-             Value *Final = nullptr, Value *IfCondition = nullptr,
-             SmallVector<DependData> Dependencies = {}, bool Mergeable = false,
-             Value *EventHandle = nullptr, Value *Priority = nullptr);
+  LLVM_ABI InsertPointOrErrorTy createTask(
+      const LocationDescription &Loc, InsertPointTy AllocaIP,
+      BodyGenCallbackTy BodyGenCB, bool Tied = true, Value *Final = nullptr,
+      Value *IfCondition = nullptr, SmallVector<DependData> Dependencies = {},
+      AffinityData Affinity = {}, bool Mergeable = false,
+      Value *EventHandle = nullptr, Value *Priority = nullptr);
 
   /// Generator for the taskgroup construct
   ///
diff --git a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
index 1d522e9b14c7a..95e7a86e01808 100644
--- a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
+++ b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
@@ -2434,8 +2434,8 @@ OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::createTaskloop(
 OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::createTask(
     const LocationDescription &Loc, InsertPointTy AllocaIP,
     BodyGenCallbackTy BodyGenCB, bool Tied, Value *Final, Value *IfCondition,
-    SmallVector<DependData> Dependencies, bool Mergeable, Value *EventHandle,
-    Value *Priority) {
+    SmallVector<DependData> Dependencies, AffinityData Affinity, bool Mergeable,
+    Value *EventHandle, Value *Priority) {
 
   if (!updateToLocation(Loc))
     return InsertPointTy();
@@ -2481,7 +2481,7 @@ OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::createTask(
       Builder, AllocaIP, ToBeDeleted, TaskAllocaIP, "global.tid", false));
 
   OI.PostOutlineCB = [this, Ident, Tied, Final, IfCondition, Dependencies,
-                      Mergeable, Priority, EventHandle, TaskAllocaBB,
+                      Affinity, Mergeable, Priority, EventHandle, TaskAllocaBB,
                       ToBeDeleted](Function &OutlinedFn) mutable {
     // Replace the Stale CI by appropriate RTL function call.
     assert(OutlinedFn.hasOneUse() &&
@@ -2555,6 +2555,22 @@ OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::createTask(
                       /*sizeof_task=*/TaskSize, /*sizeof_shared=*/SharedsSize,
                       /*task_func=*/&OutlinedFn});
 
+    if (Affinity.Count && Affinity.Info) {
+      Function *RegAffFn = getOrCreateRuntimeFunctionPtr(
+          OMPRTL___kmpc_omp_reg_task_with_affinity);
+
+      // bitcast to i8*
+      Value *AffPtr = Builder.CreatePointerBitCastOrAddrSpaceCast(
+          Affinity.Info, Builder.getPtrTy(0));
+
+      if (!Affinity.Count->getType()->isIntegerTy(32))
+        Affinity.Count =
+            Builder.CreateTruncOrBitCast(Affinity.Count, Builder.getInt32Ty());
+
+      createRuntimeFunctionCall(
+          RegAffFn, {Ident, ThreadID, TaskData, Affinity.Count, AffPtr});
+    }
+
     // Emit detach clause initialization.
     // evt = (typeof(evt))__kmpc_task_allow_completion_event(loc, tid,
     // task_descriptor);
diff --git a/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp b/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp
index 200dda84b13f5..8292b055f6cd9 100644
--- a/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp
+++ b/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp
@@ -7555,6 +7555,98 @@ TEST_F(OpenMPIRBuilderTest, CreateTaskIfCondition) {
   EXPECT_EQ(OulinedFnCall->getNextNode(), TaskCompleteCall);
 }
 
+TEST_F(OpenMPIRBuilderTest, CreateTaskAffinity) {
+  using InsertPointTy = OpenMPIRBuilder::InsertPointTy;
+  OpenMPIRBuilder OMPBuilder(*M);
+  OMPBuilder.Config.IsTargetDevice = false;
+  OMPBuilder.initialize();
+  F->setName("func");
+  IRBuilder<> Builder(BB);
+
+  auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) {
+    return Error::success();
+  };
+
+  LLVMContext &Ctx = M->getContext();
+  StructType *AffInfoTy = StructType::get(
+      Type::getInt64Ty(Ctx), Type::getInt64Ty(Ctx), Type::getInt32Ty(Ctx));
+
+  // Create [1 x AffInfoTy] as alloca (element alloca is fine too).
+  Value *CountI32 = Builder.getInt32(1);
+  AllocaInst *AffArr =
+      Builder.CreateAlloca(AffInfoTy, Builder.getInt64(1), "omp.affinity_list");
+
+  // Fill entry 0 minimally so the pointer definitely dominates use.
+  Value *Entry0 = Builder.CreateInBoundsGEP(
+      AffInfoTy, AffArr, Builder.getInt64(0), "omp.affinity.entry");
+  Builder.CreateStore(Builder.getInt64(0),
+                      Builder.CreateStructGEP(AffInfoTy, Entry0, 0));
+  Builder.CreateStore(Builder.getInt64(64),
+                      Builder.CreateStructGEP(AffInfoTy, Entry0, 1));
+  Builder.CreateStore(Builder.getInt32(0),
+                      Builder.CreateStructGEP(AffInfoTy, Entry0, 2));
+
+  SmallVector<OpenMPIRBuilder::AffinityData> Affinities;
+  OpenMPIRBuilder::AffinityData Affinity{CountI32, AffArr};
+  Affinities.push_back(Affinity);
+
+  BasicBlock *AllocaBB = Builder.GetInsertBlock();
+  BasicBlock *BodyBB = splitBB(Builder, /*CreateBranch=*/true, "alloca.split");
+  OpenMPIRBuilder::LocationDescription Loc(
+      InsertPointTy(BodyBB, BodyBB->getFirstInsertionPt()), DL);
+
+  ASSERT_EXPECTED_INIT(
+      OpenMPIRBuilder::InsertPointTy, AfterIP,
+      OMPBuilder.createTask(
+          Loc, InsertPointTy(AllocaBB, AllocaBB->getFirstInsertionPt()),
+          BodyGenCB,
+          /*Tied=*/true,
+          /*Final=*/nullptr,
+          /*IfCondition=*/nullptr,
+          /*Dependencies=*/{},
+          /*Affinity=*/Affinities,
+          /*Mergeable=*/false,
+          /*EventHandle=*/nullptr,
+          /*Priority=*/nullptr));
+
+  Builder.restoreIP(AfterIP);
+  OMPBuilder.finalize();
+  Builder.CreateRetVoid();
+
+  EXPECT_FALSE(verifyModule(*M, &errs()));
+
+  Function *TaskAllocFn =
+      OMPBuilder.getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_omp_task_alloc);
+  Function *RegAffFn = OMPBuilder.getOrCreateRuntimeFunctionPtr(
+      OMPRTL___kmpc_omp_reg_task_with_affinity);
+
+  CallInst *TaskAllocCI = nullptr;
+  CallInst *RegAffCI = nullptr;
+
+  for (auto &I : instructions(F)) {
+    if (auto *CI = dyn_cast<CallInst>(&I)) {
+      if (CI->getCalledFunction() == TaskAllocFn)
+        TaskAllocCI = CI;
+      if (CI->getCalledFunction() == RegAffFn)
+        RegAffCI = CI;
+    }
+  }
+
+  ASSERT_NE(TaskAllocCI, nullptr) << "expected __kmpc_omp_task_alloc call";
+  ASSERT_NE(RegAffCI, nullptr)
+      << "expected __kmpc_omp_reg_task_with_affinity call";
+
+  // Check reg_task_with_affinity signature:
+  //   i32 __kmpc_omp_reg_task_with_affinity(ident_t*, i32 gtid,
+  //                                         kmp_task_t*, i32 naffins,
+  //                                         kmp_task_affinity_info_t*)
+  ASSERT_EQ(RegAffCI->arg_size(), 5u);
+  // naffins
+  EXPECT_TRUE(RegAffCI->getArgOperand(3)->getType()->isIntegerTy(32));
+  // kmp_task_affinity_info_t*
+  EXPECT_TRUE(RegAffCI->getArgOperand(4)->getType()->isPointerTy());
+}
+
 TEST_F(OpenMPIRBuilderTest, CreateTaskgroup) {
   using InsertPointTy = OpenMPIRBuilder::InsertPointTy;
   OpenMPIRBuilder OMPBuilder(*M);
diff --git a/mlir/lib/Conversion/OpenMPToLLVM/OpenMPToLLVM.cpp b/mlir/lib/Conversion/OpenMPToLLVM/OpenMPToLLVM.cpp
index 7fdc23adc8573..e6242e5f06418 100644
--- a/mlir/lib/Conversion/OpenMPToLLVM/OpenMPToLLVM.cpp
+++ b/mlir/lib/Conversion/OpenMPToLLVM/OpenMPToLLVM.cpp
@@ -154,6 +154,8 @@ void mlir::populateOpenMPToLLVMConversionPatterns(LLVMTypeConverter &converter,
   // discarded on lowering to LLVM-IR from the OpenMP dialect.
   converter.addConversion(
       [&](omp::MapBoundsType type) -> Type { return type; });
+  converter.addConversion(
+      [&](omp::AffinityEntryType type) -> Type { return type; });
 
   // Add conversions for all OpenMP operations.
   addOpenMPOpConversions<
diff --git a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
index 571575762d54a..7f3d78c563308 100644
--- a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
+++ b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
@@ -321,10 +321,6 @@ static LogicalResult checkImplementationStatus(Operation &op) {
                           << " operation";
   };
 
-  auto checkAffinity = [&todo](auto op, LogicalResult &result) {
-    if (!op.getAffinityVars().empty())
-      result = todo("affinity");
-  };
   auto checkAllocate = [&todo](auto op, LogicalResult &result) {
     if (!op.getAllocateVars().empty() || !op.getAllocatorVars().empty())
       result = todo("allocate");
@@ -408,7 +404,6 @@ static LogicalResult checkImplementationStatus(Operation &op) {
         checkThreadLimit(op, result);
       })
       .Case([&](omp::TaskOp op) {
-        checkAffinity(op, result);
         checkAllocate(op, result);
         checkInReduction(op, result);
       })
@@ -2303,6 +2298,53 @@ void TaskContextStructManager::freeStructPtr() {
   builder.CreateFree(structPtr);
 }
 
+static void buildTaskAffinityList(mlir::omp::TaskOp taskOp,
+                                  llvm::IRBuilderBase &builder,
+                                  LLVM::ModuleTranslation &moduleTranslation,
+                                  llvm::OpenMPIRBuilder::AffinityData &ad) {
+  auto &ctx = builder.getContext();
+  llvm::StructType *kmpTaskAffinityInfoTy = llvm::StructType::get(
+      llvm::Type::getInt64Ty(ctx), llvm::Type::getInt64Ty(ctx),
+      llvm::Type::getInt32Ty(ctx));
+
+  SmallVector<mlir::Value> affinityVars(taskOp.getAffinityVars().begin(),
+                                        taskOp.getAffinityVars().end());
+
+  // Allocate [N x kmp_task_affinity_info_t]
+  llvm::Value *count =
+      llvm::ConstantInt::get(builder.getInt64Ty(), affinityVars.size());
+  llvm::AllocaInst *affinityList =
+      builder.CreateAlloca(kmpTaskAffinityInfoTy, count, "omp.affinity_list");
+
+  for (unsigned i = 0; i < affinityVars.size(); ++i) {
+    auto entryOp = affinityVars[i].getDefiningOp<mlir::omp::AffinityEntryOp>();
+    assert(entryOp && "affinity item must be omp.affinity_entry");
+
+    llvm::Value *addr = moduleTranslation.lookupValue(entryOp.getAddr());
+    assert(addr && "expect affinity addr to be non-null");
+    llvm::Value *baseAddr = builder.CreatePtrToInt(addr, builder.getInt64Ty());
+    llvm::Value *len = moduleTranslation.lookupValue(entryOp.getLen());
+    llvm::Value *flags = builder.getInt32(0);
+
+    llvm::Value *entry =
+        builder.CreateInBoundsGEP(kmpTaskAffinityInfoTy, affinityList,
+                                  builder.getInt64(i), "omp.affinity.entry");
+
+    llvm::Value *gep0 =
+        builder.CreateStructGEP(kmpTaskAffinityInfoTy, entry, 0); // base_addr
+    llvm::Value *gep1 =
+        builder.CreateStructGEP(kmpTaskAffinityInfoTy, entry, 1); // len
+    llvm::Value *gep2 =
+        builder.CreateStructGEP(kmpTaskAffinityInfoTy, entry, 2); // flags (i32)
+
+    builder.CreateStore(baseAddr, gep0);
+    builder.CreateStore(len, gep1);
+    builder.CreateStore(flags, gep2);
+  }
+  ad.Info = affinityList;
+  ad.Count = builder.getInt32(static_cast<uint32_t>(affinityVars.size()));
+}
+
 /// Converts an OpenMP task construct into LLVM IR using OpenMPIRBuilder.
 static LogicalResult
 convertOmpTaskOp(omp::TaskOp taskOp, llvm::IRBuilderBase &builder,
@@ -2515,12 +2557,16 @@ convertOmpTaskOp(omp::TaskOp taskOp, llvm::IRBuilderBase &builder,
   buildDependData(taskOp.getDependKinds(), taskOp.getDependVars(),
                   moduleTranslation, dds);
 
+  llvm::OpenMPIRBuilder::AffinityData ad = {nullptr, nullptr};
+  if (!taskOp.getAffinityVars().empty())
+    buildTaskAffinityList(taskOp, builder, moduleTranslation, ad);
+
   llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
   llvm::OpenMPIRBuilder::InsertPointOrErrorTy afterIP =
       moduleTranslation.getOpenMPBuilder()->createTask(
           ompLoc, allocaIP, bodyCB, !taskOp.getUntied(),
           moduleTranslation.lookupValue(taskOp.getFinal()),
-          moduleTranslation.lookupValue(taskOp.getIfExpr()), dds,
+          moduleTranslation.lookupValue(taskOp.getIfExpr()), dds, ad,
           taskOp.getMergeable(),
           moduleTranslation.lookupValue(taskOp.getEventHandle()),
           moduleTranslation.lookupValue(taskOp.getPriority()));
@@ -7301,13 +7347,13 @@ LogicalResult OpenMPDialectLLVMIRTranslationInterface::convertOperation(
           .Case([&](omp::LoopNestOp) {
             return convertOmpLoopNest(*op, builder, moduleTranslation);
           })
-          .Case<omp::MapInfoOp, omp::MapBoundsOp, omp::PrivateClauseOp>(
-              [&](auto op) {
-                // No-op, should be handled by relevant owning operations e.g.
-                // TargetOp, TargetEnterDataOp, TargetExitDataOp, TargetDataOp
-                // etc. and then discarded
-                return success();
-              })
+          .Case<omp::MapInfoOp, omp::MapBoundsOp, omp::PrivateClauseOp,
+                omp::AffinityEntryOp>([&](auto op) {
+            // No-op, should be handled by relevant owning operations e.g.
+            // TargetOp, TargetEnterDataOp, TargetExitDataOp, TargetDataOp
+            // etc. and then discarded
+            return success();
+          })
           .Case([&](omp::NewCliOp op) {
             // Meta-operation: Doesn't do anything by itself, but used to
             // identify a loop.
diff --git a/mlir/test/Target/LLVMIR/openmp-todo.mlir b/mlir/test/Target/LLVMIR/openmp-todo.mlir
index 2500d546fcf4d..8fb66cb4dd0eb 100644
--- a/mlir/test/Target/LLVMIR/openmp-todo.mlir
+++ b/mlir/test/Target/LLVMIR/openmp-todo.mlir
@@ -462,15 +462,3 @@ llvm.func @wsloop_order(%lb : i32, %ub : i32, %step : i32) {
   }
   llvm.return
 }
-
-// -----
-llvm.func @task_affinity(%ptr : !llvm.ptr, %len : i64) {
-  // expected-error at below {{not yet implemented: omp.affinity_entry}}
-  // expected-error at below {{LLVM Translation failed for operation: omp.affinity_entry}}
-  %ae = omp.affinity_entry %ptr, %len
-    : (!llvm.ptr, i64) -> !omp.affinity_entry_ty<!llvm.ptr, i64>
-  omp.task affinity(%ae : !omp.affinity_entry_ty<!llvm.ptr, i64>) {
-    omp.terminator
-  }
-  llvm.return
-}

>From 166492a004dbd85b88c5be7e44c25e45040e0d94 Mon Sep 17 00:00:00 2001
From: cchen <chichun.chen at hpe.com>
Date: Wed, 18 Feb 2026 18:25:34 -0600
Subject: [PATCH 2/6] Implement lowering for omp.iterator in affinity

Create IteratorLoopNestScope for building nested loop for iterator.
Take advantage of RAII so that we can have correct exit for each
level of the loop.
---
 .../llvm/Frontend/OpenMP/OMPIRBuilder.h       |  27 +++
 llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp     |  82 ++++++++
 .../Conversion/OpenMPToLLVM/OpenMPToLLVM.cpp  |   1 +
 .../OpenMP/OpenMPToLLVMIRTranslation.cpp      | 180 +++++++++++++++---
 4 files changed, 268 insertions(+), 22 deletions(-)

diff --git a/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h b/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h
index deb2535ac843f..0332930eafedf 100644
--- a/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h
+++ b/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h
@@ -4293,6 +4293,33 @@ class ScanInfo {
   ~ScanInfo() { delete (ScanBuffPtrs); }
 };
 
+class IteratorLoopNestScope {
+private:
+  llvm::IRBuilderBase &B;
+  llvm::LLVMContext &Ctx;
+
+  unsigned Dims = 0;
+  llvm::ArrayRef<llvm::Value *> LowerBounds;
+  llvm::ArrayRef<llvm::Value *> UpperBounds;
+  llvm::ArrayRef<llvm::Value *> Steps;
+
+  llvm::SmallVector<llvm::PHINode *> IVs;
+  llvm::SmallVector<llvm::BasicBlock *> HdrBBs;
+  llvm::SmallVector<llvm::BasicBlock *> BodyBBs;
+  llvm::SmallVector<llvm::BasicBlock *> LatchBBs;
+  llvm::SmallVector<llvm::BasicBlock *> ExitBBs;
+
+public:
+  IteratorLoopNestScope(llvm::IRBuilderBase &Builder, unsigned Dims,
+                        llvm::ArrayRef<llvm::Value *> LowerBounds,
+                        llvm::ArrayRef<llvm::Value *> UpperBounds,
+                        llvm::ArrayRef<llvm::Value *> Steps);
+
+  ~IteratorLoopNestScope();
+
+  llvm::ArrayRef<llvm::PHINode *> getIVs() const { return IVs; }
+};
+
 } // end namespace llvm
 
 #endif // LLVM_FRONTEND_OPENMP_OMPIRBUILDER_H
diff --git a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
index 95e7a86e01808..19f97d848b548 100644
--- a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
+++ b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
@@ -11860,3 +11860,85 @@ void CanonicalLoopInfo::invalidate() {
   Latch = nullptr;
   Exit = nullptr;
 }
+
+IteratorLoopNestScope::IteratorLoopNestScope(
+    llvm::IRBuilderBase &Builder, unsigned Dims,
+    llvm::ArrayRef<llvm::Value *> LowerBounds,
+    llvm::ArrayRef<llvm::Value *> UpperBounds,
+    llvm::ArrayRef<llvm::Value *> Steps)
+    : B(Builder), Ctx(Builder.getContext()), Dims(Dims),
+      LowerBounds(LowerBounds), UpperBounds(UpperBounds), Steps(Steps) {
+  Function *F = B.GetInsertBlock()->getParent();
+  IVs.assign(Dims, nullptr);
+  HdrBBs.resize(Dims);
+  BodyBBs.resize(Dims);
+  LatchBBs.resize(Dims);
+  ExitBBs.resize(Dims);
+
+  llvm::BasicBlock *PreBB = B.GetInsertBlock();
+
+  for (unsigned Dimension = 0; Dimension < Dims; ++Dimension) {
+    HdrBBs[Dimension] = llvm::BasicBlock::Create(Ctx, "omp.iter.hdr", F);
+    BodyBBs[Dimension] = llvm::BasicBlock::Create(Ctx, "omp.iter.body", F);
+    LatchBBs[Dimension] = llvm::BasicBlock::Create(Ctx, "omp.iter.latch", F);
+    ExitBBs[Dimension] = llvm::BasicBlock::Create(Ctx, "omp.iter.exit", F);
+  }
+
+  // Enter outermost loop.
+  B.CreateBr(HdrBBs[0]);
+
+  for (unsigned Dimension = 0; Dimension < Dims; ++Dimension) {
+    // header
+    B.SetInsertPoint(HdrBBs[Dimension]);
+    IVs[Dimension] = B.CreatePHI(B.getInt64Ty(), 2, "omp.it.iv");
+    IVs[Dimension]->addIncoming(LowerBounds[Dimension],
+                                (Dimension == 0) ? PreBB
+                                                 : BodyBBs[Dimension - 1]);
+
+    llvm::Value *Cond =
+        B.CreateICmpULE(IVs[Dimension], UpperBounds[Dimension], "omp.it.cmp");
+    B.CreateCondBr(Cond, BodyBBs[Dimension], ExitBBs[Dimension]);
+
+    // body
+    B.SetInsertPoint(BodyBBs[Dimension]);
+    if (Dimension + 1 < Dims) {
+      B.CreateBr(HdrBBs[Dimension + 1]);
+    }
+
+    // iv = iv + step
+    B.SetInsertPoint(LatchBBs[Dimension]);
+    llvm::Value *Next =
+        B.CreateAdd(IVs[Dimension], Steps[Dimension], "omp.it.next");
+    IVs[Dimension]->addIncoming(Next, LatchBBs[Dimension]);
+    B.CreateBr(HdrBBs[Dimension]);
+  }
+
+  // Continue emitting the body of the innermost loop.
+  B.SetInsertPoint(BodyBBs[Dims - 1]);
+}
+
+IteratorLoopNestScope::~IteratorLoopNestScope() {
+  if (Dims == 0)
+    return;
+
+  const unsigned Last = Dims - 1;
+
+  // Fallthrough to latch if the innermost body isn't terminated by the user.
+  if (!BodyBBs[Last]->getTerminator()) {
+    llvm::IRBuilderBase::InsertPointGuard g(B);
+    B.SetInsertPoint(BodyBBs[Last]);
+    B.CreateBr(LatchBBs[Last]);
+  }
+
+  // Wire exits in reverse: exit(d) -> latch(d-1).
+  for (unsigned Dimension = Last; Dimension > 0; --Dimension) {
+    if (ExitBBs[Dimension]->getTerminator())
+      continue;
+    llvm::IRBuilderBase::InsertPointGuard g(B);
+    B.SetInsertPoint(ExitBBs[Dimension]);
+    B.CreateBr(LatchBBs[Dimension - 1]);
+  }
+
+  // Continue after the whole nest at outermost exit.
+  B.SetInsertPoint(ExitBBs[0]);
+}
diff --git a/mlir/lib/Conversion/OpenMPToLLVM/OpenMPToLLVM.cpp b/mlir/lib/Conversion/OpenMPToLLVM/OpenMPToLLVM.cpp
index e6242e5f06418..d90912f9f686f 100644
--- a/mlir/lib/Conversion/OpenMPToLLVM/OpenMPToLLVM.cpp
+++ b/mlir/lib/Conversion/OpenMPToLLVM/OpenMPToLLVM.cpp
@@ -156,6 +156,7 @@ void mlir::populateOpenMPToLLVMConversionPatterns(LLVMTypeConverter &converter,
       [&](omp::MapBoundsType type) -> Type { return type; });
   converter.addConversion(
       [&](omp::AffinityEntryType type) -> Type { return type; });
+  converter.addConversion([&](omp::IteratedType type) -> Type { return type; });
 
   // Add conversions for all OpenMP operations.
   addOpenMPOpConversions<
diff --git a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
index 7f3d78c563308..515ec97b56849 100644
--- a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
+++ b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
@@ -2224,6 +2224,71 @@ class TaskContextStructManager {
   /// The type of the structure
   llvm::Type *structTy = nullptr;
 };
+
+class IteratorInfo {
+private:
+  llvm::SmallVector<llvm::Value *> lowerBounds;
+  llvm::SmallVector<llvm::Value *> upperBounds;
+  llvm::SmallVector<llvm::Value *> steps;
+  llvm::SmallVector<llvm::Value *> trips;
+  unsigned dims;
+  llvm::Value *totalTrips;
+  const mlir::LLVM::ModuleTranslation &moduleTranslation;
+  llvm::IRBuilderBase &builder;
+
+  llvm::Value *lookUpAsI64(mlir::Value val) {
+    llvm::Value *v = moduleTranslation.lookupValue(val);
+    if (!v)
+      return nullptr;
+    if (v->getType()->isIntegerTy(64))
+      return v;
+    if (v->getType()->isIntegerTy())
+      return builder.CreateZExtOrTrunc(v, builder.getInt64Ty());
+    return nullptr;
+  }
+
+public:
+  IteratorInfo(mlir::omp::IteratorsOp itersOp,
+               mlir::LLVM::ModuleTranslation &moduleTranslation,
+               llvm::IRBuilderBase &builder)
+      : moduleTranslation(moduleTranslation), builder(builder) {
+    dims = itersOp.getLbs().size();
+    this->lowerBounds.resize(dims);
+    this->upperBounds.resize(dims);
+    this->steps.resize(dims);
+    this->trips.resize(dims);
+
+    for (unsigned d = 0; d < dims; ++d) {
+      llvm::Value *lb = lookUpAsI64(itersOp.getLbs()[d]);
+      llvm::Value *ub = lookUpAsI64(itersOp.getUbs()[d]);
+      llvm::Value *st = lookUpAsI64(itersOp.getSteps()[d]);
+      assert(lb && ub && st &&
+             "Expect lowerBounds, upperBounds, and steps in IteratorsOp");
+
+      this->lowerBounds[d] = lb;
+      this->upperBounds[d] = ub;
+      this->steps[d] = st;
+
+      // trips = ((ub - lb) / step) + 1  (inclusive ub, assume positive step)
+      llvm::Value *diff = builder.CreateSub(ub, lb);
+      llvm::Value *div = builder.CreateUDiv(diff, st);
+      this->trips[d] = builder.CreateAdd(
+          div, llvm::ConstantInt::get(builder.getInt64Ty(), 1));
+    }
+
+    this->totalTrips = llvm::ConstantInt::get(builder.getInt64Ty(), 1);
+    for (unsigned d = 0; d < dims; ++d)
+      this->totalTrips = builder.CreateMul(this->totalTrips, this->trips[d]);
+  }
+
+  unsigned getDims() const { return dims; }
+  llvm::ArrayRef<llvm::Value *> getLowerBounds() const { return lowerBounds; }
+  llvm::ArrayRef<llvm::Value *> getUpperBounds() const { return upperBounds; }
+  llvm::ArrayRef<llvm::Value *> getSteps() const { return steps; }
+  llvm::ArrayRef<llvm::Value *> getTrips() const { return trips; }
+  llvm::Value *getTotalTrips() { return totalTrips; }
+};
+
 } // namespace
 
 void TaskContextStructManager::generateTaskContextStruct() {
@@ -2298,19 +2363,42 @@ void TaskContextStructManager::freeStructPtr() {
   builder.CreateFree(structPtr);
 }
 
+static void storeAffinityEntry(llvm::IRBuilderBase &builder,
+                               llvm::Value *affinityList, llvm::Value *index,
+                               llvm::Value *addr, llvm::Value *len) {
+  auto &ctx = builder.getContext();
+  // { base_addr (i64), len (i64), flags (i32) }
+  llvm::StructType *kmpTaskAffinityInfoTy = llvm::StructType::get(
+      llvm::Type::getInt64Ty(ctx), llvm::Type::getInt64Ty(ctx),
+      llvm::Type::getInt32Ty(ctx));
+
+  llvm::Value *entry = builder.CreateInBoundsGEP(
+      kmpTaskAffinityInfoTy, affinityList, index, "omp.affinity.entry");
+
+  llvm::Value *baseAddrI64 = builder.CreatePtrToInt(addr, builder.getInt64Ty());
+  llvm::Value *flags = builder.getInt32(0);
+
+  builder.CreateStore(baseAddrI64,
+                      builder.CreateStructGEP(kmpTaskAffinityInfoTy, entry, 0));
+  builder.CreateStore(len,
+                      builder.CreateStructGEP(kmpTaskAffinityInfoTy, entry, 1));
+  builder.CreateStore(flags,
+                      builder.CreateStructGEP(kmpTaskAffinityInfoTy, entry, 2));
+}
+
 static void buildTaskAffinityList(mlir::omp::TaskOp taskOp,
                                   llvm::IRBuilderBase &builder,
                                   LLVM::ModuleTranslation &moduleTranslation,
                                   llvm::OpenMPIRBuilder::AffinityData &ad) {
   auto &ctx = builder.getContext();
+  SmallVector<mlir::Value> affinityVars(taskOp.getAffinityVars().begin(),
+                                        taskOp.getAffinityVars().end());
+
+  // Define the type locally just for the Alloca
   llvm::StructType *kmpTaskAffinityInfoTy = llvm::StructType::get(
       llvm::Type::getInt64Ty(ctx), llvm::Type::getInt64Ty(ctx),
       llvm::Type::getInt32Ty(ctx));
 
-  SmallVector<mlir::Value> affinityVars(taskOp.getAffinityVars().begin(),
-                                        taskOp.getAffinityVars().end());
-
-  // Allocate [N x kmp_task_affinity_info_t]
   llvm::Value *count =
       llvm::ConstantInt::get(builder.getInt64Ty(), affinityVars.size());
   llvm::AllocaInst *affinityList =
@@ -2322,29 +2410,70 @@ static void buildTaskAffinityList(mlir::omp::TaskOp taskOp,
 
     llvm::Value *addr = moduleTranslation.lookupValue(entryOp.getAddr());
     assert(addr && "expect affinity addr to be non-null");
-    llvm::Value *baseAddr = builder.CreatePtrToInt(addr, builder.getInt64Ty());
     llvm::Value *len = moduleTranslation.lookupValue(entryOp.getLen());
-    llvm::Value *flags = builder.getInt32(0);
-
-    llvm::Value *entry =
-        builder.CreateInBoundsGEP(kmpTaskAffinityInfoTy, affinityList,
-                                  builder.getInt64(i), "omp.affinity.entry");
-
-    llvm::Value *gep0 =
-        builder.CreateStructGEP(kmpTaskAffinityInfoTy, entry, 0); // base_addr
-    llvm::Value *gep1 =
-        builder.CreateStructGEP(kmpTaskAffinityInfoTy, entry, 1); // len
-    llvm::Value *gep2 =
-        builder.CreateStructGEP(kmpTaskAffinityInfoTy, entry, 2); // flags (i32)
-
-    builder.CreateStore(baseAddr, gep0);
-    builder.CreateStore(len, gep1);
-    builder.CreateStore(flags, gep2);
+    storeAffinityEntry(builder, affinityList, builder.getInt64(i), addr, len);
   }
+
   ad.Info = affinityList;
   ad.Count = builder.getInt32(static_cast<uint32_t>(affinityVars.size()));
 }
 
+static mlir::LogicalResult
+buildAffinityIterator(mlir::omp::IteratorsOp itersOp,
+                      llvm::IRBuilderBase &builder,
+                      mlir::LLVM::ModuleTranslation &moduleTranslation,
+                      llvm::OpenMPIRBuilder::AffinityData &A) {
+  auto &ctx = builder.getContext();
+  llvm::StructType *kmpTaskAffinityInfoTy = llvm::StructType::get(
+      llvm::Type::getInt64Ty(ctx), llvm::Type::getInt64Ty(ctx),
+      llvm::Type::getInt32Ty(ctx));
+
+  mlir::Block &iteratorRegionBlock = itersOp.getRegion().front();
+  IteratorInfo iterInfo(itersOp, moduleTranslation, builder);
+
+  auto *list = builder.CreateAlloca(
+      kmpTaskAffinityInfoTy, iterInfo.getTotalTrips(), "omp.affinity_list");
+
+  llvm::IteratorLoopNestScope iterLoops(
+      builder, iterInfo.getDims(), iterInfo.getLowerBounds(),
+      iterInfo.getUpperBounds(), iterInfo.getSteps());
+  auto indVars = iterLoops.getIVs();
+  for (unsigned d = 0; d < iterInfo.getDims(); ++d)
+    moduleTranslation.mapValue(iteratorRegionBlock.getArgument(d), indVars[d]);
+
+  moduleTranslation.mapBlock(&iteratorRegionBlock, builder.GetInsertBlock());
+  if (mlir::failed(moduleTranslation.convertBlock(
+          iteratorRegionBlock, /*ignoreArguments=*/true, builder)))
+    return itersOp.emitOpError() << "failed to translate iterators region";
+
+  auto yield =
+      mlir::dyn_cast<mlir::omp::YieldOp>(iteratorRegionBlock.getTerminator());
+  auto entryOp =
+      yield.getResults()[0].getDefiningOp<mlir::omp::AffinityEntryOp>();
+
+  llvm::Value *addr = moduleTranslation.lookupValue(entryOp.getAddr());
+  llvm::Value *len = moduleTranslation.lookupValue(entryOp.getLen());
+
+  llvm::Value *linearIdx = llvm::ConstantInt::get(builder.getInt64Ty(), 0);
+  for (unsigned d = 0; d < iterInfo.getDims(); ++d) {
+    // Normalize the physical IV to a 0-based logical index for this dimension.
+    llvm::Value *logicalIdx = builder.CreateUDiv(
+        builder.CreateSub(indVars[d], iterInfo.getLowerBounds()[d]),
+        iterInfo.getSteps()[d]);
+    // Row-major flattening: linear = linear * Trips[d] + logicalIdx
+    linearIdx = builder.CreateAdd(
+        builder.CreateMul(linearIdx, iterInfo.getTrips()[d]), logicalIdx);
+  }
+
+  storeAffinityEntry(builder, list, linearIdx, addr, len);
+
+  moduleTranslation.forgetMapping(itersOp.getRegion());
+
+  A.Info = list;
+  A.Count = builder.CreateTrunc(iterInfo.getTotalTrips(), builder.getInt32Ty());
+  return mlir::success();
+}
+
 /// Converts an OpenMP task construct into LLVM IR using OpenMPIRBuilder.
 static LogicalResult
 convertOmpTaskOp(omp::TaskOp taskOp, llvm::IRBuilderBase &builder,
@@ -2560,6 +2689,13 @@ convertOmpTaskOp(omp::TaskOp taskOp, llvm::IRBuilderBase &builder,
   llvm::OpenMPIRBuilder::AffinityData ad = {nullptr, nullptr};
   if (!taskOp.getAffinityVars().empty())
     buildTaskAffinityList(taskOp, builder, moduleTranslation, ad);
+  else if (!taskOp.getIterated().empty()) {
+    for (size_t i = 0; i < taskOp.getIterated().size(); ++i) {
+      auto iterOp = taskOp.getIterated()[i].getDefiningOp<omp::IteratorsOp>();
+      if (failed(buildAffinityIterator(iterOp, builder, moduleTranslation, ad)))
+        return failure();
+    }
+  }
 
   llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
   llvm::OpenMPIRBuilder::InsertPointOrErrorTy afterIP =
@@ -7348,7 +7484,7 @@ LogicalResult OpenMPDialectLLVMIRTranslationInterface::convertOperation(
             return convertOmpLoopNest(*op, builder, moduleTranslation);
           })
           .Case<omp::MapInfoOp, omp::MapBoundsOp, omp::PrivateClauseOp,
-                omp::AffinityEntryOp>([&](auto op) {
+                omp::AffinityEntryOp, omp::IteratorsOp>([&](auto op) {
             // No-op, should be handled by relevant owning operations e.g.
             // TargetOp, TargetEnterDataOp, TargetExitDataOp, TargetDataOp
             // etc. and then discarded

>From 4c17f40bac5576510570adfff07c0b8ca6ea6e36 Mon Sep 17 00:00:00 2001
From: cchen <chichun.chen at hpe.com>
Date: Fri, 20 Feb 2026 15:20:55 -0600
Subject: [PATCH 3/6] Use createLoopSkeleton intead of manually building nested
 loop

Create flattened 1-dimension canonical loop for omp.iterator
---
 .../llvm/Frontend/OpenMP/OMPIRBuilder.h       |  27 ----
 llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp     |  82 ----------
 .../OpenMP/OpenMPToLLVMIRTranslation.cpp      | 142 +++++++++++-------
 3 files changed, 91 insertions(+), 160 deletions(-)

diff --git a/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h b/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h
index 0332930eafedf..deb2535ac843f 100644
--- a/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h
+++ b/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h
@@ -4293,33 +4293,6 @@ class ScanInfo {
   ~ScanInfo() { delete (ScanBuffPtrs); }
 };
 
-class IteratorLoopNestScope {
-private:
-  llvm::IRBuilderBase &B;
-  llvm::LLVMContext &Ctx;
-
-  unsigned Dims = 0;
-  llvm::ArrayRef<llvm::Value *> LowerBounds;
-  llvm::ArrayRef<llvm::Value *> UpperBounds;
-  llvm::ArrayRef<llvm::Value *> Steps;
-
-  llvm::SmallVector<llvm::PHINode *> IVs;
-  llvm::SmallVector<llvm::BasicBlock *> HdrBBs;
-  llvm::SmallVector<llvm::BasicBlock *> BodyBBs;
-  llvm::SmallVector<llvm::BasicBlock *> LatchBBs;
-  llvm::SmallVector<llvm::BasicBlock *> ExitBBs;
-
-public:
-  IteratorLoopNestScope(llvm::IRBuilderBase &Builder, unsigned Dims,
-                        llvm::ArrayRef<llvm::Value *> LowerBounds,
-                        llvm::ArrayRef<llvm::Value *> UpperBounds,
-                        llvm::ArrayRef<llvm::Value *> Steps);
-
-  ~IteratorLoopNestScope();
-
-  llvm::ArrayRef<llvm::PHINode *> getIVs() const { return IVs; }
-};
-
 } // end namespace llvm
 
 #endif // LLVM_FRONTEND_OPENMP_OMPIRBUILDER_H
diff --git a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
index 19f97d848b548..95e7a86e01808 100644
--- a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
+++ b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
@@ -11860,85 +11860,3 @@ void CanonicalLoopInfo::invalidate() {
   Latch = nullptr;
   Exit = nullptr;
 }
-
-IteratorLoopNestScope::IteratorLoopNestScope(
-    llvm::IRBuilderBase &Builder, unsigned Dims,
-    llvm::ArrayRef<llvm::Value *> LowerBounds,
-    llvm::ArrayRef<llvm::Value *> UpperBounds,
-    llvm::ArrayRef<llvm::Value *> Steps)
-    : B(Builder), Ctx(Builder.getContext()), Dims(Dims),
-      LowerBounds(LowerBounds), UpperBounds(UpperBounds), Steps(Steps) {
-  Function *F = B.GetInsertBlock()->getParent();
-  IVs.assign(Dims, nullptr);
-  HdrBBs.resize(Dims);
-  BodyBBs.resize(Dims);
-  LatchBBs.resize(Dims);
-  ExitBBs.resize(Dims);
-
-  llvm::BasicBlock *PreBB = B.GetInsertBlock();
-
-  for (unsigned Dimension = 0; Dimension < Dims; ++Dimension) {
-    HdrBBs[Dimension] = llvm::BasicBlock::Create(Ctx, "omp.iter.hdr", F);
-    BodyBBs[Dimension] = llvm::BasicBlock::Create(Ctx, "omp.iter.body", F);
-    LatchBBs[Dimension] = llvm::BasicBlock::Create(Ctx, "omp.iter.latch", F);
-    ExitBBs[Dimension] = llvm::BasicBlock::Create(Ctx, "omp.iter.exit", F);
-  }
-
-  // Enter outermost loop.
-  B.CreateBr(HdrBBs[0]);
-
-  for (unsigned Dimension = 0; Dimension < Dims; ++Dimension) {
-    // header
-    B.SetInsertPoint(HdrBBs[Dimension]);
-    IVs[Dimension] = B.CreatePHI(B.getInt64Ty(), 2, "omp.it.iv");
-    IVs[Dimension]->addIncoming(LowerBounds[Dimension],
-                                (Dimension == 0) ? PreBB
-                                                 : BodyBBs[Dimension - 1]);
-
-    llvm::Value *Cond =
-        B.CreateICmpULE(IVs[Dimension], UpperBounds[Dimension], "omp.it.cmp");
-    B.CreateCondBr(Cond, BodyBBs[Dimension], ExitBBs[Dimension]);
-
-    // body
-    B.SetInsertPoint(BodyBBs[Dimension]);
-    if (Dimension + 1 < Dims) {
-      B.CreateBr(HdrBBs[Dimension + 1]);
-    }
-
-    // iv = iv + step
-    B.SetInsertPoint(LatchBBs[Dimension]);
-    llvm::Value *Next =
-        B.CreateAdd(IVs[Dimension], Steps[Dimension], "omp.it.next");
-    IVs[Dimension]->addIncoming(Next, LatchBBs[Dimension]);
-    B.CreateBr(HdrBBs[Dimension]);
-  }
-
-  // Continue emitting the body of the innermost loop.
-  B.SetInsertPoint(BodyBBs[Dims - 1]);
-}
-
-IteratorLoopNestScope::~IteratorLoopNestScope() {
-  if (Dims == 0)
-    return;
-
-  const unsigned Last = Dims - 1;
-
-  // Fallthrough to latch if the innermost body isn't terminated by the user.
-  if (!BodyBBs[Last]->getTerminator()) {
-    llvm::IRBuilderBase::InsertPointGuard g(B);
-    B.SetInsertPoint(BodyBBs[Last]);
-    B.CreateBr(LatchBBs[Last]);
-  }
-
-  // Wire exits in reverse: exit(d) -> latch(d-1).
-  for (unsigned Dimension = Last; Dimension > 0; --Dimension) {
-    if (ExitBBs[Dimension]->getTerminator())
-      continue;
-    llvm::IRBuilderBase::InsertPointGuard g(B);
-    B.SetInsertPoint(ExitBBs[Dimension]);
-    B.CreateBr(LatchBBs[Dimension - 1]);
-  }
-
-  // Continue after the whole nest at outermost exit.
-  B.SetInsertPoint(ExitBBs[0]);
-}
diff --git a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
index 515ec97b56849..e8bdf4c5aa4dc 100644
--- a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
+++ b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
@@ -2233,11 +2233,10 @@ class IteratorInfo {
   llvm::SmallVector<llvm::Value *> trips;
   unsigned dims;
   llvm::Value *totalTrips;
-  const mlir::LLVM::ModuleTranslation &moduleTranslation;
-  llvm::IRBuilderBase &builder;
 
-  llvm::Value *lookUpAsI64(mlir::Value val) {
-    llvm::Value *v = moduleTranslation.lookupValue(val);
+  llvm::Value *lookUpAsI64(mlir::Value val, const LLVM::ModuleTranslation &mt,
+                           llvm::IRBuilderBase &builder) {
+    llvm::Value *v = mt.lookupValue(val);
     if (!v)
       return nullptr;
     if (v->getType()->isIntegerTy(64))
@@ -2250,8 +2249,7 @@ class IteratorInfo {
 public:
   IteratorInfo(mlir::omp::IteratorsOp itersOp,
                mlir::LLVM::ModuleTranslation &moduleTranslation,
-               llvm::IRBuilderBase &builder)
-      : moduleTranslation(moduleTranslation), builder(builder) {
+               llvm::IRBuilderBase &builder) {
     dims = itersOp.getLbs().size();
     this->lowerBounds.resize(dims);
     this->upperBounds.resize(dims);
@@ -2259,9 +2257,12 @@ class IteratorInfo {
     this->trips.resize(dims);
 
     for (unsigned d = 0; d < dims; ++d) {
-      llvm::Value *lb = lookUpAsI64(itersOp.getLbs()[d]);
-      llvm::Value *ub = lookUpAsI64(itersOp.getUbs()[d]);
-      llvm::Value *st = lookUpAsI64(itersOp.getSteps()[d]);
+      llvm::Value *lb =
+          lookUpAsI64(itersOp.getLbs()[d], moduleTranslation, builder);
+      llvm::Value *ub =
+          lookUpAsI64(itersOp.getUbs()[d], moduleTranslation, builder);
+      llvm::Value *st =
+          lookUpAsI64(itersOp.getSteps()[d], moduleTranslation, builder);
       assert(lb && ub && st &&
              "Expect lowerBounds, upperBounds, and steps in IteratorsOp");
 
@@ -2286,7 +2287,7 @@ class IteratorInfo {
   llvm::ArrayRef<llvm::Value *> getUpperBounds() const { return upperBounds; }
   llvm::ArrayRef<llvm::Value *> getSteps() const { return steps; }
   llvm::ArrayRef<llvm::Value *> getTrips() const { return trips; }
-  llvm::Value *getTotalTrips() { return totalTrips; }
+  llvm::Value *getTotalTrips() const { return totalTrips; }
 };
 
 } // namespace
@@ -2419,58 +2420,96 @@ static void buildTaskAffinityList(mlir::omp::TaskOp taskOp,
 }
 
 static mlir::LogicalResult
-buildAffinityIterator(mlir::omp::IteratorsOp itersOp,
-                      llvm::IRBuilderBase &builder,
-                      mlir::LLVM::ModuleTranslation &moduleTranslation,
-                      llvm::OpenMPIRBuilder::AffinityData &A) {
+buildTaskAffinityIteratorLoop(mlir::omp::IteratorsOp itersOp,
+                              llvm::IRBuilderBase &builder,
+                              mlir::LLVM::ModuleTranslation &moduleTranslation,
+                              llvm::OpenMPIRBuilder::AffinityData &ad) {
+
   auto &ctx = builder.getContext();
+  auto &ompBuilder = *moduleTranslation.getOpenMPBuilder();
+  IteratorInfo iterInfo(itersOp, moduleTranslation, builder);
+
   llvm::StructType *kmpTaskAffinityInfoTy = llvm::StructType::get(
       llvm::Type::getInt64Ty(ctx), llvm::Type::getInt64Ty(ctx),
       llvm::Type::getInt32Ty(ctx));
+  auto *list = builder.CreateAlloca(
+      kmpTaskAffinityInfoTy, iterInfo.getTotalTrips(), "omp.affinity_list");
 
   mlir::Block &iteratorRegionBlock = itersOp.getRegion().front();
-  IteratorInfo iterInfo(itersOp, moduleTranslation, builder);
 
-  auto *list = builder.CreateAlloca(
-      kmpTaskAffinityInfoTy, iterInfo.getTotalTrips(), "omp.affinity_list");
+  llvm::Function *F = builder.GetInsertBlock()->getParent();
+  llvm::BasicBlock *curBB = builder.GetInsertBlock();
+  llvm::Instruction *splitPt = (builder.GetInsertPoint() == curBB->end())
+                                   ? curBB->getTerminator()
+                                   : &*builder.GetInsertPoint();
+  if (!splitPt) {
+    llvm::BasicBlock *tmp = llvm::BasicBlock::Create(ctx, "omp.tmp.cont", F);
+    builder.SetInsertPoint(curBB);
+    builder.CreateBr(tmp);
+    splitPt = curBB->getTerminator();
+  }
 
-  llvm::IteratorLoopNestScope iterLoops(
-      builder, iterInfo.getDims(), iterInfo.getLowerBounds(),
-      iterInfo.getUpperBounds(), iterInfo.getSteps());
-  auto indVars = iterLoops.getIVs();
-  for (unsigned d = 0; d < iterInfo.getDims(); ++d)
-    moduleTranslation.mapValue(iteratorRegionBlock.getArgument(d), indVars[d]);
+  llvm::BasicBlock *contBB = curBB->splitBasicBlock(splitPt, "omp.task.cont");
+  // Remove the branch to contBB since we will branch to contBB after the loop
+  curBB->getTerminator()->eraseFromParent();
+
+  auto *cli = ompBuilder.createLoopSkeleton(
+      builder.getCurrentDebugLocation(), iterInfo.getTotalTrips(),
+      builder.GetInsertBlock()->getParent(), contBB, contBB);
+  builder.SetInsertPoint(curBB);
+  builder.CreateBr(cli->getPreheader());
+
+  // Remove the unconditional branch inserted by createLoopSkeleton in the body
+  if (llvm::Instruction *T = cli->getBody()->getTerminator())
+    T->eraseFromParent();
+
+  // Start building the loop body
+  builder.SetInsertPoint(cli->getBody());
+
+  llvm::Value *linearIV = cli->getIndVar();
+  for (int d = (int)iterInfo.getDims() - 1; d >= 0; --d) {
+    llvm::Value *trip = iterInfo.getTrips()[d];
+    // idx = linearIV % trips[d]
+    llvm::Value *idx = builder.CreateURem(linearIV, trip);
+    // linearIV = linearIV / trips[d]
+    linearIV = builder.CreateUDiv(linearIV, trip);
+
+    // physicalIV = lb + logical * step.
+    llvm::Value *physicalIV = builder.CreateAdd(
+        iterInfo.getLowerBounds()[d],
+        builder.CreateMul(idx, iterInfo.getSteps()[d]), "omp.it.phys_iv");
+
+    moduleTranslation.mapValue(iteratorRegionBlock.getArgument(d), physicalIV);
+  }
 
   moduleTranslation.mapBlock(&iteratorRegionBlock, builder.GetInsertBlock());
-  if (mlir::failed(moduleTranslation.convertBlock(
-          iteratorRegionBlock, /*ignoreArguments=*/true, builder)))
+  if (mlir::failed(moduleTranslation.convertBlock(iteratorRegionBlock,
+                                                  /*ignoreArguments=*/true,
+                                                  builder))) {
     return itersOp.emitOpError() << "failed to translate iterators region";
+  }
 
   auto yield =
       mlir::dyn_cast<mlir::omp::YieldOp>(iteratorRegionBlock.getTerminator());
   auto entryOp =
       yield.getResults()[0].getDefiningOp<mlir::omp::AffinityEntryOp>();
-
   llvm::Value *addr = moduleTranslation.lookupValue(entryOp.getAddr());
   llvm::Value *len = moduleTranslation.lookupValue(entryOp.getLen());
+  storeAffinityEntry(builder, list, cli->getIndVar(), addr, len);
 
-  llvm::Value *linearIdx = llvm::ConstantInt::get(builder.getInt64Ty(), 0);
-  for (unsigned d = 0; d < iterInfo.getDims(); ++d) {
-    // Normalize the physical IV to a 0-based logical index for this dimension.
-    llvm::Value *logicalIdx = builder.CreateUDiv(
-        builder.CreateSub(indVars[d], iterInfo.getLowerBounds()[d]),
-        iterInfo.getSteps()[d]);
-    // Row-major flattening: linear = linear * Trips[d] + logicalIdx
-    linearIdx = builder.CreateAdd(
-        builder.CreateMul(linearIdx, iterInfo.getTrips()[d]), logicalIdx);
-  }
-
-  storeAffinityEntry(builder, list, linearIdx, addr, len);
+  // Ensure we end the loop body by jumping to the latch
+  if (!builder.GetInsertBlock()->getTerminator())
+    builder.CreateBr(cli->getLatch());
 
   moduleTranslation.forgetMapping(itersOp.getRegion());
 
-  A.Info = list;
-  A.Count = builder.CreateTrunc(iterInfo.getTotalTrips(), builder.getInt32Ty());
+  builder.SetInsertPoint(cli->getAfter(), cli->getAfter()->begin());
+  builder.CreateBr(contBB);
+  builder.SetInsertPoint(contBB, contBB->begin());
+
+  ad.Info = list;
+  ad.Count =
+      builder.CreateTrunc(iterInfo.getTotalTrips(), builder.getInt32Ty());
   return mlir::success();
 }
 
@@ -2588,6 +2627,18 @@ convertOmpTaskOp(omp::TaskOp taskOp, llvm::IRBuilderBase &builder,
           taskOp.getPrivateNeedsBarrier())))
     return llvm::failure();
 
+  llvm::OpenMPIRBuilder::AffinityData ad = {nullptr, nullptr};
+  if (!taskOp.getAffinityVars().empty())
+    buildTaskAffinityList(taskOp, builder, moduleTranslation, ad);
+  else if (!taskOp.getIterated().empty()) {
+    for (size_t i = 0; i < taskOp.getIterated().size(); ++i) {
+      auto iterOp = taskOp.getIterated()[i].getDefiningOp<omp::IteratorsOp>();
+      if (failed(buildTaskAffinityIteratorLoop(iterOp, builder,
+                                               moduleTranslation, ad)))
+        return llvm::failure();
+    }
+  }
+
   // Set up for call to createTask()
   builder.SetInsertPoint(taskStartBlock);
 
@@ -2686,17 +2737,6 @@ convertOmpTaskOp(omp::TaskOp taskOp, llvm::IRBuilderBase &builder,
   buildDependData(taskOp.getDependKinds(), taskOp.getDependVars(),
                   moduleTranslation, dds);
 
-  llvm::OpenMPIRBuilder::AffinityData ad = {nullptr, nullptr};
-  if (!taskOp.getAffinityVars().empty())
-    buildTaskAffinityList(taskOp, builder, moduleTranslation, ad);
-  else if (!taskOp.getIterated().empty()) {
-    for (size_t i = 0; i < taskOp.getIterated().size(); ++i) {
-      auto iterOp = taskOp.getIterated()[i].getDefiningOp<omp::IteratorsOp>();
-      if (failed(buildAffinityIterator(iterOp, builder, moduleTranslation, ad)))
-        return failure();
-    }
-  }
-
   llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
   llvm::OpenMPIRBuilder::InsertPointOrErrorTy afterIP =
       moduleTranslation.getOpenMPBuilder()->createTask(

>From 27146898f8754a2c341cd74c90ada86bbe95e877 Mon Sep 17 00:00:00 2001
From: cchen <chichun.chen at hpe.com>
Date: Fri, 20 Feb 2026 16:23:42 -0600
Subject: [PATCH 4/6] Refactor and support multiple affinity register for a
 task

- Support multiple affinity register for a task
- Move iterator loop generate logic to OMPIRBuilder
- Extract iterator loop body convertion logic
- Refactor buildAffinityData by hoisting the creation of affinity_list
- IteratorsOp -> IteratorOp
- Add mlir to llvmir test
---
 .../llvm/Frontend/OpenMP/OMPIRBuilder.h       |  35 ++-
 llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp     |  84 ++++--
 .../OpenMP/OpenMPToLLVMIRTranslation.cpp      | 258 ++++++++++--------
 mlir/test/Target/LLVMIR/openmp-iterator.mlir  | 226 +++++++++++++++
 mlir/test/Target/LLVMIR/openmp-llvm.mlir      |  33 +++
 5 files changed, 500 insertions(+), 136 deletions(-)
 create mode 100644 mlir/test/Target/LLVMIR/openmp-iterator.mlir

diff --git a/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h b/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h
index deb2535ac843f..fcf7a02a18e88 100644
--- a/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h
+++ b/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h
@@ -1583,7 +1583,7 @@ class OpenMPIRBuilder {
       const LocationDescription &Loc, InsertPointTy AllocaIP,
       BodyGenCallbackTy BodyGenCB, bool Tied = true, Value *Final = nullptr,
       Value *IfCondition = nullptr, SmallVector<DependData> Dependencies = {},
-      AffinityData Affinity = {}, bool Mergeable = false,
+      SmallVector<AffinityData> Affinities = {}, bool Mergeable = false,
       Value *EventHandle = nullptr, Value *Priority = nullptr);
 
   /// Generator for the taskgroup construct
@@ -3932,6 +3932,39 @@ class OpenMPIRBuilder {
   LLVM_ABI GlobalVariable *
   getOrCreateInternalVariable(Type *Ty, const StringRef &Name,
                               std::optional<unsigned> AddressSpace = {});
+
+  using IteratorBodyGenTy = llvm::function_ref<llvm::Error(
+      InsertPointTy BodyIP, llvm::Value *LinearIV)>;
+
+  /// Create a canonical iterator loop at the current insertion point.
+  ///
+  /// This helper splits the current block and builds a canonical loop
+  /// using createLoopSkeleton(). The resulting control flow looks like:
+  ///
+  ///   CurBB -> Preheader -> Header -> Body -> Latch -> After -> ContBB
+  ///
+  /// The body of the loop is produced by calling \p BodyGen with the insertion
+  /// point for the loop body and the induction variable.
+  /// Unlike createCanonicalLoop(), this function is intended for \p BodyGen
+  /// that may perform region lowering (e.g., translating MLIR regions) and are
+  /// not guaranteed to preserve the canonical skeleton's body terminator. In
+  /// particular:
+  ///
+  ///  - The skeleton’s unconditional branch from the loop body is removed
+  ///    before invoking \p BodyGen.
+  ///  - \p BodyGen may freely emit instructions and temporarily introduce
+  ///    control flow.
+  ///  - If the loop body does not end with a terminator after \p BodyGen
+  ///    returns, a branch to the latch is inserted to restore canonical form.
+  ///
+  /// \param Loc The location where the iterator modifier was encountered.
+  /// \param TripCount Number of loop iterations.
+  /// \param BodyGen Callback to generate the loop body.
+  /// \param Name Base name used for creating the loop
+  /// \returns The insertion position *after* the iterator loop
+  LLVM_ABI InsertPointOrErrorTy createIteratorLoop(
+      LocationDescription Loc, llvm::Value *TripCount,
+      IteratorBodyGenTy BodyGen, llvm::StringRef Name = "iterator");
 };
 
 /// Class to represented the control flow structure of an OpenMP canonical loop.
diff --git a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
index 95e7a86e01808..8fbf1b603f1aa 100644
--- a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
+++ b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
@@ -2434,8 +2434,8 @@ OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::createTaskloop(
 OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::createTask(
     const LocationDescription &Loc, InsertPointTy AllocaIP,
     BodyGenCallbackTy BodyGenCB, bool Tied, Value *Final, Value *IfCondition,
-    SmallVector<DependData> Dependencies, AffinityData Affinity, bool Mergeable,
-    Value *EventHandle, Value *Priority) {
+    SmallVector<DependData> Dependencies, SmallVector<AffinityData> Affinities,
+    bool Mergeable, Value *EventHandle, Value *Priority) {
 
   if (!updateToLocation(Loc))
     return InsertPointTy();
@@ -2481,8 +2481,8 @@ OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::createTask(
       Builder, AllocaIP, ToBeDeleted, TaskAllocaIP, "global.tid", false));
 
   OI.PostOutlineCB = [this, Ident, Tied, Final, IfCondition, Dependencies,
-                      Affinity, Mergeable, Priority, EventHandle, TaskAllocaBB,
-                      ToBeDeleted](Function &OutlinedFn) mutable {
+                      Affinities, Mergeable, Priority, EventHandle,
+                      TaskAllocaBB, ToBeDeleted](Function &OutlinedFn) mutable {
     // Replace the Stale CI by appropriate RTL function call.
     assert(OutlinedFn.hasOneUse() &&
            "there must be a single user for the outlined function");
@@ -2555,20 +2555,13 @@ OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::createTask(
                       /*sizeof_task=*/TaskSize, /*sizeof_shared=*/SharedsSize,
                       /*task_func=*/&OutlinedFn});
 
-    if (Affinity.Count && Affinity.Info) {
+    if (!Affinities.empty()) {
       Function *RegAffFn = getOrCreateRuntimeFunctionPtr(
           OMPRTL___kmpc_omp_reg_task_with_affinity);
-
-      // bitcast to i8*
-      Value *AffPtr = Builder.CreatePointerBitCastOrAddrSpaceCast(
-          Affinity.Info, Builder.getPtrTy(0));
-
-      if (!Affinity.Count->getType()->isIntegerTy(32))
-        Affinity.Count =
-            Builder.CreateTruncOrBitCast(Affinity.Count, Builder.getInt32Ty());
-
-      createRuntimeFunctionCall(
-          RegAffFn, {Ident, ThreadID, TaskData, Affinity.Count, AffPtr});
+      for (const auto &Affinity : Affinities) {
+        createRuntimeFunctionCall(RegAffFn, {Ident, ThreadID, TaskData,
+                                             Affinity.Count, Affinity.Info});
+      }
     }
 
     // Emit detach clause initialization.
@@ -11548,6 +11541,65 @@ void OpenMPIRBuilder::loadOffloadInfoMetadata(vfs::FileSystem &VFS,
   loadOffloadInfoMetadata(*M.get());
 }
 
+OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::createIteratorLoop(
+    LocationDescription Loc, llvm::Value *TripCount, IteratorBodyGenTy BodyGen,
+    llvm::StringRef Name) {
+  IRBuilderBase &B = Builder;
+  B.restoreIP(Loc.IP);
+
+  BasicBlock *CurBB = Builder.GetInsertBlock();
+  assert(CurBB &&
+         "expected a valid insertion block for creating an iterator loop");
+  Function *F = CurBB->getParent();
+  LLVMContext &Ctx = F->getContext();
+
+  // If splitting at end but CurBB has no terminator, make it well-formed first.
+  if (B.GetInsertPoint() == CurBB->end() && !CurBB->getTerminator()) {
+    BasicBlock *TmpCont = BasicBlock::Create(Ctx, "omp.it.tmp.cont", F);
+    B.SetInsertPoint(CurBB);
+    B.CreateBr(TmpCont);
+
+    // The terminator we just inserted is now the end of CurBB. To split after
+    // it, set insertion point to CurBB->end() (which is fine now).
+    B.SetInsertPoint(CurBB->end());
+  }
+
+  BasicBlock *ContBB =
+      CurBB->splitBasicBlock(Builder.GetInsertPoint(), "omp.it.cont");
+  // Remove the branch to contBB since we will branch to contBB after the loop
+  CurBB->getTerminator()->eraseFromParent();
+
+  CanonicalLoopInfo *CLI =
+      createLoopSkeleton(B.getCurrentDebugLocation(), TripCount, F,
+                         /*PreInsertBefore=*/ContBB,
+                         /*PostInsertBefore=*/ContBB, Name);
+
+  // Enter loop from original block.
+  B.SetInsertPoint(CurBB);
+  B.CreateBr(CLI->getPreheader());
+
+  // Remove the unconditional branch inserted by createLoopSkeleton in the body
+  if (Instruction *T = CLI->getBody()->getTerminator())
+    T->eraseFromParent();
+
+  InsertPointTy BodyIP = CLI->getBodyIP();
+  if (llvm::Error Err = BodyGen(BodyIP, CLI->getIndVar()))
+    return Err;
+
+  // Ensure we end the loop body by jumping to the latch
+  if (!CLI->getBody()->getTerminator()) {
+    B.SetInsertPoint(CLI->getBody());
+    B.CreateBr(CLI->getLatch());
+  }
+
+  // Link After -> ContBB
+  B.SetInsertPoint(CLI->getAfter(), CLI->getAfter()->begin());
+  if (!CLI->getAfter()->getTerminator())
+    B.CreateBr(ContBB);
+
+  return InsertPointTy{ContBB, ContBB->begin()};
+}
+
 //===----------------------------------------------------------------------===//
 // OffloadEntriesInfoManager
 //===----------------------------------------------------------------------===//
diff --git a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
index e8bdf4c5aa4dc..5bc29fa271694 100644
--- a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
+++ b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
@@ -2225,6 +2225,12 @@ class TaskContextStructManager {
   llvm::Type *structTy = nullptr;
 };
 
+/// IteratorInfo extracts and prepares loop bounds information from an
+/// mlir::omp::IteratorOp for lowering to LLVM IR.
+///
+/// It computes the per-dimension trip counts and the total linearized trip
+/// count, casted to i64. These are used to build a canonical loop and to
+/// reconstruct the physical induction variables inside the loop body.
 class IteratorInfo {
 private:
   llvm::SmallVector<llvm::Value *> lowerBounds;
@@ -2242,29 +2248,29 @@ class IteratorInfo {
     if (v->getType()->isIntegerTy(64))
       return v;
     if (v->getType()->isIntegerTy())
-      return builder.CreateZExtOrTrunc(v, builder.getInt64Ty());
+      return builder.CreateSExtOrTrunc(v, builder.getInt64Ty());
     return nullptr;
   }
 
 public:
-  IteratorInfo(mlir::omp::IteratorsOp itersOp,
+  IteratorInfo(mlir::omp::IteratorOp itersOp,
                mlir::LLVM::ModuleTranslation &moduleTranslation,
                llvm::IRBuilderBase &builder) {
-    dims = itersOp.getLbs().size();
+    dims = itersOp.getLoopLowerBounds().size();
     this->lowerBounds.resize(dims);
     this->upperBounds.resize(dims);
     this->steps.resize(dims);
     this->trips.resize(dims);
 
     for (unsigned d = 0; d < dims; ++d) {
-      llvm::Value *lb =
-          lookUpAsI64(itersOp.getLbs()[d], moduleTranslation, builder);
-      llvm::Value *ub =
-          lookUpAsI64(itersOp.getUbs()[d], moduleTranslation, builder);
+      llvm::Value *lb = lookUpAsI64(itersOp.getLoopLowerBounds()[d],
+                                    moduleTranslation, builder);
+      llvm::Value *ub = lookUpAsI64(itersOp.getLoopUpperBounds()[d],
+                                    moduleTranslation, builder);
       llvm::Value *st =
-          lookUpAsI64(itersOp.getSteps()[d], moduleTranslation, builder);
+          lookUpAsI64(itersOp.getLoopSteps()[d], moduleTranslation, builder);
       assert(lb && ub && st &&
-             "Expect lowerBounds, upperBounds, and steps in IteratorsOp");
+             "Expect lowerBounds, upperBounds, and steps in IteratorOp");
 
       this->lowerBounds[d] = lb;
       this->upperBounds[d] = ub;
@@ -2272,7 +2278,7 @@ class IteratorInfo {
 
       // trips = ((ub - lb) / step) + 1  (inclusive ub, assume positive step)
       llvm::Value *diff = builder.CreateSub(ub, lb);
-      llvm::Value *div = builder.CreateUDiv(diff, st);
+      llvm::Value *div = builder.CreateSDiv(diff, st);
       this->trips[d] = builder.CreateAdd(
           div, llvm::ConstantInt::get(builder.getInt64Ty(), 1));
     }
@@ -2387,129 +2393,151 @@ static void storeAffinityEntry(llvm::IRBuilderBase &builder,
                       builder.CreateStructGEP(kmpTaskAffinityInfoTy, entry, 2));
 }
 
-static void buildTaskAffinityList(mlir::omp::TaskOp taskOp,
-                                  llvm::IRBuilderBase &builder,
-                                  LLVM::ModuleTranslation &moduleTranslation,
-                                  llvm::OpenMPIRBuilder::AffinityData &ad) {
-  auto &ctx = builder.getContext();
-  SmallVector<mlir::Value> affinityVars(taskOp.getAffinityVars().begin(),
-                                        taskOp.getAffinityVars().end());
-
-  // Define the type locally just for the Alloca
-  llvm::StructType *kmpTaskAffinityInfoTy = llvm::StructType::get(
-      llvm::Type::getInt64Ty(ctx), llvm::Type::getInt64Ty(ctx),
-      llvm::Type::getInt32Ty(ctx));
-
-  llvm::Value *count =
-      llvm::ConstantInt::get(builder.getInt64Ty(), affinityVars.size());
-  llvm::AllocaInst *affinityList =
-      builder.CreateAlloca(kmpTaskAffinityInfoTy, count, "omp.affinity_list");
-
-  for (unsigned i = 0; i < affinityVars.size(); ++i) {
-    auto entryOp = affinityVars[i].getDefiningOp<mlir::omp::AffinityEntryOp>();
+static void fillAffinityLocators(Operation::operand_range affinityVars,
+                                 llvm::IRBuilderBase &builder,
+                                 LLVM::ModuleTranslation &moduleTranslation,
+                                 llvm::Value *affinityList) {
+  for (auto [i, affinityVar] : llvm::enumerate(affinityVars)) {
+    auto entryOp = affinityVar.getDefiningOp<mlir::omp::AffinityEntryOp>();
     assert(entryOp && "affinity item must be omp.affinity_entry");
 
     llvm::Value *addr = moduleTranslation.lookupValue(entryOp.getAddr());
-    assert(addr && "expect affinity addr to be non-null");
     llvm::Value *len = moduleTranslation.lookupValue(entryOp.getLen());
+    assert(addr && len && "expect affinity addr and len to be non-null");
     storeAffinityEntry(builder, affinityList, builder.getInt64(i), addr, len);
   }
-
-  ad.Info = affinityList;
-  ad.Count = builder.getInt32(static_cast<uint32_t>(affinityVars.size()));
 }
 
 static mlir::LogicalResult
-buildTaskAffinityIteratorLoop(mlir::omp::IteratorsOp itersOp,
-                              llvm::IRBuilderBase &builder,
-                              mlir::LLVM::ModuleTranslation &moduleTranslation,
-                              llvm::OpenMPIRBuilder::AffinityData &ad) {
-
-  auto &ctx = builder.getContext();
-  auto &ompBuilder = *moduleTranslation.getOpenMPBuilder();
-  IteratorInfo iterInfo(itersOp, moduleTranslation, builder);
-
-  llvm::StructType *kmpTaskAffinityInfoTy = llvm::StructType::get(
-      llvm::Type::getInt64Ty(ctx), llvm::Type::getInt64Ty(ctx),
-      llvm::Type::getInt32Ty(ctx));
-  auto *list = builder.CreateAlloca(
-      kmpTaskAffinityInfoTy, iterInfo.getTotalTrips(), "omp.affinity_list");
-
-  mlir::Block &iteratorRegionBlock = itersOp.getRegion().front();
-
-  llvm::Function *F = builder.GetInsertBlock()->getParent();
-  llvm::BasicBlock *curBB = builder.GetInsertBlock();
-  llvm::Instruction *splitPt = (builder.GetInsertPoint() == curBB->end())
-                                   ? curBB->getTerminator()
-                                   : &*builder.GetInsertPoint();
-  if (!splitPt) {
-    llvm::BasicBlock *tmp = llvm::BasicBlock::Create(ctx, "omp.tmp.cont", F);
-    builder.SetInsertPoint(curBB);
-    builder.CreateBr(tmp);
-    splitPt = curBB->getTerminator();
-  }
-
-  llvm::BasicBlock *contBB = curBB->splitBasicBlock(splitPt, "omp.task.cont");
-  // Remove the branch to contBB since we will branch to contBB after the loop
-  curBB->getTerminator()->eraseFromParent();
-
-  auto *cli = ompBuilder.createLoopSkeleton(
-      builder.getCurrentDebugLocation(), iterInfo.getTotalTrips(),
-      builder.GetInsertBlock()->getParent(), contBB, contBB);
-  builder.SetInsertPoint(curBB);
-  builder.CreateBr(cli->getPreheader());
-
-  // Remove the unconditional branch inserted by createLoopSkeleton in the body
-  if (llvm::Instruction *T = cli->getBody()->getTerminator())
-    T->eraseFromParent();
-
-  // Start building the loop body
-  builder.SetInsertPoint(cli->getBody());
-
-  llvm::Value *linearIV = cli->getIndVar();
+convertIteratorRegion(llvm::Value *linearIV, IteratorInfo &iterInfo,
+                      mlir::Block &iteratorRegionBlock,
+                      llvm::IRBuilderBase &builder,
+                      LLVM::ModuleTranslation &moduleTranslation) {
+  llvm::Value *tmp = linearIV;
   for (int d = (int)iterInfo.getDims() - 1; d >= 0; --d) {
     llvm::Value *trip = iterInfo.getTrips()[d];
-    // idx = linearIV % trips[d]
-    llvm::Value *idx = builder.CreateURem(linearIV, trip);
-    // linearIV = linearIV / trips[d]
-    linearIV = builder.CreateUDiv(linearIV, trip);
+    // idx_d = tmp % trip_d
+    llvm::Value *idx = builder.CreateURem(tmp, trip);
+    // tmp = tmp / trip_d
+    tmp = builder.CreateUDiv(tmp, trip);
 
-    // physicalIV = lb + logical * step.
-    llvm::Value *physicalIV = builder.CreateAdd(
+    // physIV_d = lb_d + idx_d * step_d
+    llvm::Value *physIV = builder.CreateAdd(
         iterInfo.getLowerBounds()[d],
         builder.CreateMul(idx, iterInfo.getSteps()[d]), "omp.it.phys_iv");
 
-    moduleTranslation.mapValue(iteratorRegionBlock.getArgument(d), physicalIV);
+    moduleTranslation.mapValue(iteratorRegionBlock.getArgument(d), physIV);
   }
 
+  // Translate the iterator region into the loop body.
   moduleTranslation.mapBlock(&iteratorRegionBlock, builder.GetInsertBlock());
   if (mlir::failed(moduleTranslation.convertBlock(iteratorRegionBlock,
                                                   /*ignoreArguments=*/true,
                                                   builder))) {
-    return itersOp.emitOpError() << "failed to translate iterators region";
+    return mlir::failure();
   }
+  return mlir::success();
+}
+
+static mlir::LogicalResult
+fillAffinityIteratorLoop(mlir::omp::IteratorOp itersOp,
+                         llvm::IRBuilderBase &builder,
+                         mlir::LLVM::ModuleTranslation &moduleTranslation,
+                         llvm::Value *affinityList, IteratorInfo &iterInfo) {
+  mlir::Region &itersRegion = itersOp.getRegion();
+  mlir::Block &iteratorRegionBlock = itersRegion.front();
+
+  llvm::OpenMPIRBuilder::LocationDescription loc(builder);
+
+  // Build the iterator loop using the new OMPIRBuilder helper.
+  auto bodyGen = [&](llvm::OpenMPIRBuilder::InsertPointTy bodyIP,
+                     llvm::Value *linearIV) -> llvm::Error {
+    llvm::IRBuilderBase::InsertPointGuard g(builder);
+    builder.restoreIP(bodyIP);
+
+    if (failed(convertIteratorRegion(linearIV, iterInfo, iteratorRegionBlock,
+                                     builder, moduleTranslation))) {
+      return llvm::make_error<llvm::StringError>(
+          "failed to convert iterator region", llvm::inconvertibleErrorCode());
+    }
+
+    // Extract affinity entry from omp.yield and store into list[linearIV].
+    auto yield =
+        mlir::dyn_cast<mlir::omp::YieldOp>(iteratorRegionBlock.getTerminator());
+    assert(yield.getResults().size() == 1 &&
+           "expect omp.yield in iterator region to have one result");
+    auto entryOp =
+        yield.getResults()[0].getDefiningOp<mlir::omp::AffinityEntryOp>();
+
+    llvm::Value *addr = moduleTranslation.lookupValue(entryOp.getAddr());
+    llvm::Value *len = moduleTranslation.lookupValue(entryOp.getLen());
+    storeAffinityEntry(builder, affinityList, linearIV, addr, len);
+
+    // Avoid leaking region mappings if this iterator loop is reused/expanded.
+    moduleTranslation.forgetMapping(itersRegion);
+
+    return llvm::Error::success();
+  };
+
+  llvm::OpenMPIRBuilder::InsertPointOrErrorTy afterIP =
+      moduleTranslation.getOpenMPBuilder()->createIteratorLoop(
+          loc, iterInfo.getTotalTrips(), bodyGen,
+          /*Name=*/"iterator");
+  if (failed(handleError(afterIP, *itersOp)))
+    return failure();
+
+  builder.restoreIP(*afterIP);
+
+  return mlir::success();
+}
 
-  auto yield =
-      mlir::dyn_cast<mlir::omp::YieldOp>(iteratorRegionBlock.getTerminator());
-  auto entryOp =
-      yield.getResults()[0].getDefiningOp<mlir::omp::AffinityEntryOp>();
-  llvm::Value *addr = moduleTranslation.lookupValue(entryOp.getAddr());
-  llvm::Value *len = moduleTranslation.lookupValue(entryOp.getLen());
-  storeAffinityEntry(builder, list, cli->getIndVar(), addr, len);
+static mlir::LogicalResult buildAffinityData(
+    mlir::omp::TaskOp &taskOp, llvm::IRBuilderBase &builder,
+    mlir::LLVM::ModuleTranslation &moduleTranslation,
+    llvm::SmallVectorImpl<llvm::OpenMPIRBuilder::AffinityData> &ads) {
+  auto &ctx = builder.getContext();
+  llvm::StructType *kmpTaskAffinityInfoTy = llvm::StructType::get(
+      llvm::Type::getInt64Ty(ctx), llvm::Type::getInt64Ty(ctx),
+      llvm::Type::getInt32Ty(ctx));
 
-  // Ensure we end the loop body by jumping to the latch
-  if (!builder.GetInsertBlock()->getTerminator())
-    builder.CreateBr(cli->getLatch());
+  auto allocateAffinityList = [&](llvm::Value *count) -> llvm::Value * {
+    llvm::IRBuilderBase::InsertPointGuard guard(builder);
+    builder.SetInsertPointPastAllocas(builder.GetInsertBlock()->getParent());
+    return builder.CreateAlloca(kmpTaskAffinityInfoTy, count,
+                                "omp.affinity_list");
+  };
 
-  moduleTranslation.forgetMapping(itersOp.getRegion());
+  auto createAffinity =
+      [&](llvm::Value *count,
+          llvm::Value *info) -> llvm::OpenMPIRBuilder::AffinityData {
+    llvm::OpenMPIRBuilder::AffinityData ad{};
+    ad.Count = builder.CreateTrunc(count, builder.getInt32Ty());
+    ad.Info =
+        builder.CreatePointerBitCastOrAddrSpaceCast(info, builder.getPtrTy(0));
+    return ad;
+  };
 
-  builder.SetInsertPoint(cli->getAfter(), cli->getAfter()->begin());
-  builder.CreateBr(contBB);
-  builder.SetInsertPoint(contBB, contBB->begin());
+  if (!taskOp.getAffinityVars().empty()) {
+    llvm::Value *count = llvm::ConstantInt::get(
+        builder.getInt64Ty(), taskOp.getAffinityVars().size());
+    llvm::Value *list = allocateAffinityList(count);
+    fillAffinityLocators(taskOp.getAffinityVars(), builder, moduleTranslation,
+                         list);
+    ads.emplace_back(createAffinity(count, list));
+  }
 
-  ad.Info = list;
-  ad.Count =
-      builder.CreateTrunc(iterInfo.getTotalTrips(), builder.getInt32Ty());
+  if (!taskOp.getIterated().empty()) {
+    for (auto [i, iter] : llvm::enumerate(taskOp.getIterated())) {
+      auto itersOp = iter.getDefiningOp<omp::IteratorOp>();
+      assert(itersOp && "iterated value must be defined by omp.iterator");
+      IteratorInfo iterInfo(itersOp, moduleTranslation, builder);
+      llvm::Value *affList = allocateAffinityList(iterInfo.getTotalTrips());
+      if (failed(fillAffinityIteratorLoop(itersOp, builder, moduleTranslation,
+                                          affList, iterInfo)))
+        return llvm::failure();
+      ads.emplace_back(createAffinity(iterInfo.getTotalTrips(), affList));
+    }
+  }
   return mlir::success();
 }
 
@@ -2627,17 +2655,9 @@ convertOmpTaskOp(omp::TaskOp taskOp, llvm::IRBuilderBase &builder,
           taskOp.getPrivateNeedsBarrier())))
     return llvm::failure();
 
-  llvm::OpenMPIRBuilder::AffinityData ad = {nullptr, nullptr};
-  if (!taskOp.getAffinityVars().empty())
-    buildTaskAffinityList(taskOp, builder, moduleTranslation, ad);
-  else if (!taskOp.getIterated().empty()) {
-    for (size_t i = 0; i < taskOp.getIterated().size(); ++i) {
-      auto iterOp = taskOp.getIterated()[i].getDefiningOp<omp::IteratorsOp>();
-      if (failed(buildTaskAffinityIteratorLoop(iterOp, builder,
-                                               moduleTranslation, ad)))
-        return llvm::failure();
-    }
-  }
+  llvm::SmallVector<llvm::OpenMPIRBuilder::AffinityData> ads;
+  if (failed(buildAffinityData(taskOp, builder, moduleTranslation, ads)))
+    return llvm::failure();
 
   // Set up for call to createTask()
   builder.SetInsertPoint(taskStartBlock);
@@ -2742,7 +2762,7 @@ convertOmpTaskOp(omp::TaskOp taskOp, llvm::IRBuilderBase &builder,
       moduleTranslation.getOpenMPBuilder()->createTask(
           ompLoc, allocaIP, bodyCB, !taskOp.getUntied(),
           moduleTranslation.lookupValue(taskOp.getFinal()),
-          moduleTranslation.lookupValue(taskOp.getIfExpr()), dds, ad,
+          moduleTranslation.lookupValue(taskOp.getIfExpr()), dds, ads,
           taskOp.getMergeable(),
           moduleTranslation.lookupValue(taskOp.getEventHandle()),
           moduleTranslation.lookupValue(taskOp.getPriority()));
@@ -7524,7 +7544,7 @@ LogicalResult OpenMPDialectLLVMIRTranslationInterface::convertOperation(
             return convertOmpLoopNest(*op, builder, moduleTranslation);
           })
           .Case<omp::MapInfoOp, omp::MapBoundsOp, omp::PrivateClauseOp,
-                omp::AffinityEntryOp, omp::IteratorsOp>([&](auto op) {
+                omp::AffinityEntryOp, omp::IteratorOp>([&](auto op) {
             // No-op, should be handled by relevant owning operations e.g.
             // TargetOp, TargetEnterDataOp, TargetExitDataOp, TargetDataOp
             // etc. and then discarded
diff --git a/mlir/test/Target/LLVMIR/openmp-iterator.mlir b/mlir/test/Target/LLVMIR/openmp-iterator.mlir
new file mode 100644
index 0000000000000..1c3647c5bb42b
--- /dev/null
+++ b/mlir/test/Target/LLVMIR/openmp-iterator.mlir
@@ -0,0 +1,226 @@
+// RUN: mlir-translate --mlir-to-llvmir %s | FileCheck %s
+
+llvm.func @task_affinity_iterator_1d(%arr: !llvm.ptr {llvm.nocapture}) {
+  %c1  = llvm.mlir.constant(1 : i64) : i64
+  %c4  = llvm.mlir.constant(4 : i64) : i64
+  %c6  = llvm.mlir.constant(6 : i64) : i64
+  %len = llvm.mlir.constant(4 : i64) : i64
+
+  omp.parallel {
+    omp.single {
+      %it = omp.iterator(%i: i64, %j: i64) =
+          (%c1 to %c4 step %c1, %c1 to %c6 step %c1) {
+        %entry = omp.affinity_entry %arr, %len
+            : (!llvm.ptr, i64) -> !omp.affinity_entry_ty<!llvm.ptr, i64>
+        omp.yield(%entry : !omp.affinity_entry_ty<!llvm.ptr, i64>)
+      } -> !omp.iterated<!omp.affinity_entry_ty<!llvm.ptr, i64>>
+
+      omp.task affinity(%it : !omp.iterated<!omp.affinity_entry_ty<!llvm.ptr, i64>>) {
+        omp.terminator
+      }
+      omp.terminator
+    }
+    omp.terminator
+  }
+  llvm.return
+}
+
+// CHECK-LABEL: define internal void @task_affinity_iterator_1d
+
+// Preheader -> Header
+// CHECK: omp_iterator.preheader:
+// CHECK: br label %omp_iterator.header
+//
+// Header has the IV phi and branches to cond
+// CHECK: omp_iterator.header:
+// CHECK: [[IV:%.*]] = phi i64 [ 0, %omp_iterator.preheader ], [ [[NEXT:%.*]], %omp_iterator.inc ]
+// CHECK: br label %omp_iterator.cond
+//
+// Cond: IV < 24 and branches to body or exit
+// CHECK: omp_iterator.cond:
+// CHECK: [[CMP:%.*]] = icmp ult i64 [[IV]], 24
+// CHECK: br i1 [[CMP]], label %omp_iterator.body, label %omp_iterator.exit
+//
+// Exit -> After -> continuation
+// CHECK: omp_iterator.exit:
+// CHECK: br label %omp_iterator.after
+// CHECK: omp_iterator.after:
+// CHECK: br label %omp.it.cont
+//
+// Body: store into affinity_list[IV] then branch to inc
+// CHECK: omp_iterator.body:
+// CHECK: [[ENTRY:%.*]] = getelementptr inbounds { i64, i64, i32 }, ptr %loadgep_omp.affinity_list, i64 [[IV]]
+// CHECK: [[ADDRI64:%.*]] = ptrtoint ptr %loadgep_ to i64
+// CHECK: [[ADDRGEP:%.*]] = getelementptr inbounds nuw { i64, i64, i32 }, ptr [[ENTRY]], i32 0, i32 0
+// CHECK: store i64 [[ADDRI64]], ptr [[ADDRGEP]]
+// CHECK: [[LENGEP:%.*]] = getelementptr inbounds nuw { i64, i64, i32 }, ptr [[ENTRY]], i32 0, i32 1
+// CHECK: store i64 4, ptr [[LENGEP]]
+// CHECK: [[FLAGGEP:%.*]] = getelementptr inbounds nuw { i64, i64, i32 }, ptr [[ENTRY]], i32 0, i32 2
+// CHECK: store i32 0, ptr [[FLAGGEP]]
+// CHECK: br label %omp_iterator.inc
+//
+// CHECK: omp_iterator.inc:
+// CHECK: [[NEXT]] = add nuw i64 [[IV]], 1
+// CHECK: br label %omp_iterator.header
+
+llvm.func @task_affinity_iterator_3d(%arr: !llvm.ptr {llvm.nocapture}) {
+  %c1  = llvm.mlir.constant(1 : i64) : i64
+  %c2  = llvm.mlir.constant(2 : i64) : i64
+  %c4  = llvm.mlir.constant(4 : i64) : i64
+  %c6  = llvm.mlir.constant(6 : i64) : i64
+  %len = llvm.mlir.constant(4 : i64) : i64
+
+  omp.parallel {
+    omp.single {
+      // 3-D iterator: i=1..4, j=1..6, k=1..2 => total trips = 48
+      %it = omp.iterator(%i: i64, %j: i64, %k: i64) =
+          (%c1 to %c4 step %c1, %c1 to %c6 step %c1, %c1 to %c2 step %c1) {
+        %entry = omp.affinity_entry %arr, %len
+            : (!llvm.ptr, i64) -> !omp.affinity_entry_ty<!llvm.ptr, i64>
+        omp.yield(%entry : !omp.affinity_entry_ty<!llvm.ptr, i64>)
+      } -> !omp.iterated<!omp.affinity_entry_ty<!llvm.ptr, i64>>
+
+      omp.task affinity(%it : !omp.iterated<!omp.affinity_entry_ty<!llvm.ptr, i64>>) {
+        omp.terminator
+      }
+      omp.terminator
+    }
+    omp.terminator
+  }
+  llvm.return
+}
+
+// CHECK-LABEL: define internal void @task_affinity_iterator_3d
+
+// Preheader -> Header
+// CHECK: omp_iterator.preheader:
+// CHECK: br label %omp_iterator.header
+//
+// Header has the IV phi and branches to cond
+// CHECK: omp_iterator.header:
+// CHECK: [[IV:%.*]] = phi i64 [ 0, %omp_iterator.preheader ], [ [[NEXT:%.*]], %omp_iterator.inc ]
+// CHECK: br label %omp_iterator.cond
+//
+// Cond: IV < 48 and branches to body or exit
+// CHECK: omp_iterator.cond:
+// CHECK: [[CMP:%.*]] = icmp ult i64 [[IV]], 48
+// CHECK: br i1 [[CMP]], label %omp_iterator.body, label %omp_iterator.exit
+//
+// Exit -> After -> continuation
+// CHECK: omp_iterator.exit:
+// CHECK: br label %omp_iterator.after
+// CHECK: omp_iterator.after:
+// CHECK: br label %omp.it.cont
+//
+// Body: store into affinity_list[IV] then branch to inc
+// CHECK: omp_iterator.body:
+// CHECK: [[ENTRY:%.*]] = getelementptr inbounds { i64, i64, i32 }, ptr %loadgep_omp.affinity_list, i64 [[IV]]
+// CHECK: [[ADDRI64:%.*]] = ptrtoint ptr %loadgep_ to i64
+// CHECK: [[ADDRGEP:%.*]] = getelementptr inbounds nuw { i64, i64, i32 }, ptr [[ENTRY]], i32 0, i32 0
+// CHECK: store i64 [[ADDRI64]], ptr [[ADDRGEP]]
+// CHECK: [[LENGEP:%.*]] = getelementptr inbounds nuw { i64, i64, i32 }, ptr [[ENTRY]], i32 0, i32 1
+// CHECK: store i64 4, ptr [[LENGEP]]
+// CHECK: [[FLAGGEP:%.*]] = getelementptr inbounds nuw { i64, i64, i32 }, ptr [[ENTRY]], i32 0, i32 2
+// CHECK: store i32 0, ptr [[FLAGGEP]]
+// CHECK: br label %omp_iterator.inc
+//
+// CHECK: omp_iterator.inc:
+// CHECK: [[NEXT]] = add nuw i64 [[IV]], 1
+// CHECK: br label %omp_iterator.header
+
+llvm.func @task_affinity_iterator_multiple(%arr: !llvm.ptr {llvm.nocapture}) {
+  %c1  = llvm.mlir.constant(1 : i64) : i64
+  %c3  = llvm.mlir.constant(3 : i64) : i64
+  %c4  = llvm.mlir.constant(4 : i64) : i64
+  %c6  = llvm.mlir.constant(6 : i64) : i64
+  %len = llvm.mlir.constant(4 : i64) : i64
+
+  omp.parallel {
+    omp.single {
+      // First iterator: 2-D (4 * 6 = 24)
+      %it0 = omp.iterator(%i: i64, %j: i64) =
+          (%c1 to %c4 step %c1, %c1 to %c6 step %c1) {
+        %entry0 = omp.affinity_entry %arr, %len
+            : (!llvm.ptr, i64) -> !omp.affinity_entry_ty<!llvm.ptr, i64>
+        omp.yield(%entry0 : !omp.affinity_entry_ty<!llvm.ptr, i64>)
+      } -> !omp.iterated<!omp.affinity_entry_ty<!llvm.ptr, i64>>
+
+      // second iterator: 1-D (3)
+      %it1 = omp.iterator(%k: i64) = (%c1 to %c3 step %c1) {
+        %entry1 = omp.affinity_entry %arr, %len
+            : (!llvm.ptr, i64) -> !omp.affinity_entry_ty<!llvm.ptr, i64>
+        omp.yield(%entry1 : !omp.affinity_entry_ty<!llvm.ptr, i64>)
+      } -> !omp.iterated<!omp.affinity_entry_ty<!llvm.ptr, i64>>
+
+      // Multiple iterators in a single affinity clause.
+      omp.task affinity(%it0: !omp.iterated<!omp.affinity_entry_ty<!llvm.ptr, i64>>,
+            %it1: !omp.iterated<!omp.affinity_entry_ty<!llvm.ptr, i64>>) {
+        omp.terminator
+      }
+
+      omp.terminator
+    }
+    omp.terminator
+  }
+
+  llvm.return
+}
+
+// CHECK-LABEL: define internal void @task_affinity_iterator_multiple..omp_par.3(
+// CHECK-DAG: %gep_omp.affinity_list = getelementptr { ptr, ptr, ptr }, ptr %0, i32 0, i32 0
+// CHECK-DAG: %gep_omp.affinity_list{{.*}} = getelementptr { ptr, ptr, ptr }, ptr %0, i32 0, i32 1
+
+// First iterator header
+// CHECK: omp_iterator.preheader:
+// CHECK: br label %[[HEADER0:.+]]
+// CHECK: [[HEADER0]]:
+// CHECK: [[IV0:%.*]] = phi i64 [ 0, %omp_iterator.preheader ], [ [[NEXT0:%.*]], %[[INC0:.+]] ]
+// CHECK: br label %[[COND0:.+]]
+// CHECK: [[COND0]]:
+// CHECK: [[CMP0:%.*]] = icmp ult i64 [[IV0]], 24
+// CHECK: br i1 [[CMP0]], label %[[BODY0:.+]], label %omp_iterator.exit
+
+// Second iterator header
+// CHECK: omp_iterator.preheader{{.*}}:
+// CHECK: [[HEADER1:.+]]:
+// CHECK: [[IV1:%.*]] = phi i64 [ 0, %omp_iterator.preheader{{.*}} ], [ [[NEXT1:%.*]], %[[INC1:.+]] ]
+// CHECK: br label %omp_iterator.cond{{.*}}
+// CHECK: omp_iterator.cond{{.*}}:
+// CHECK: [[CMP1:%.*]] = icmp ult i64 [[IV1]], 3
+// CHECK: br i1 [[CMP1]], label %[[BODY1:.+]], label %omp_iterator.exit{{.*}}
+
+// CHECK: codeRepl:
+// CHECK: call ptr @__kmpc_omp_task_alloc
+// CHECK: call i32 @__kmpc_omp_reg_task_with_affinity{{.*}}i32 24{{.*}}ptr %loadgep_omp.affinity_list
+// CHECK: call i32 @__kmpc_omp_reg_task_with_affinity{{.*}}i32 3{{.*}}ptr %loadgep_omp.affinity_list{{.*}}
+// CHECK: call i32 @__kmpc_omp_task
+
+// Second iterator body
+// CHECK: [[BODY1]]:
+// CHECK: [[ENTRY1:%.*]] = getelementptr inbounds { i64, i64, i32 }, ptr %loadgep_omp.affinity_list
+// CHECK: [[ADDR1:%.*]] = ptrtoint ptr %loadgep_ to i64
+// CHECK: [[ADDRGEP1:%.*]] = getelementptr inbounds{{.*}} { i64, i64, i32 }, ptr [[ENTRY1]], i32 0, i32 0
+// CHECK: store i64 [[ADDR1]], ptr [[ADDRGEP1]]
+// CHECK: [[LENGEP1:%.*]] = getelementptr inbounds{{.*}} { i64, i64, i32 }, ptr [[ENTRY1]], i32 0, i32 1
+// CHECK: store i64 4, ptr [[LENGEP1]]
+// CHECK: [[FLAGGEP1:%.*]] = getelementptr inbounds{{.*}} { i64, i64, i32 }, ptr [[ENTRY1]], i32 0, i32 2
+// CHECK: store i32 0, ptr [[FLAGGEP1]]
+// CHECK: br label %[[INC1]]
+// CHECK: [[INC1]]:
+// CHECK: [[NEXT1]] = add nuw i64 [[IV1]], 1
+// CHECK: br label %[[HEADER1]]
+
+// First iterator body
+// CHECK: [[BODY0]]:
+// CHECK: [[ENTRY0:%.*]] = getelementptr inbounds { i64, i64, i32 }, ptr %loadgep_omp.affinity_list, i64 [[IV0]]
+// CHECK: [[ADDR0:%.*]] = ptrtoint ptr %loadgep_ to i64
+// CHECK: [[ADDRGEP0:%.*]] = getelementptr inbounds{{.*}} { i64, i64, i32 }, ptr [[ENTRY0]], i32 0, i32 0
+// CHECK: store i64 [[ADDR0]], ptr [[ADDRGEP0]]
+// CHECK: [[LENGEP0:%.*]] = getelementptr inbounds{{.*}} { i64, i64, i32 }, ptr [[ENTRY0]], i32 0, i32 1
+// CHECK: store i64 4, ptr [[LENGEP0]]
+// CHECK: [[FLAGGEP0:%.*]] = getelementptr inbounds{{.*}} { i64, i64, i32 }, ptr [[ENTRY0]], i32 0, i32 2
+// CHECK: store i32 0, ptr [[FLAGGEP0]]
+// CHECK: br label %[[INC0]]
+// CHECK: [[INC0]]:
+// CHECK: [[NEXT0]] = add nuw i64 [[IV0]], 1
+// CHECK: br label %[[HEADER0]]
diff --git a/mlir/test/Target/LLVMIR/openmp-llvm.mlir b/mlir/test/Target/LLVMIR/openmp-llvm.mlir
index fcb937dbc1867..2a55cc351160c 100644
--- a/mlir/test/Target/LLVMIR/openmp-llvm.mlir
+++ b/mlir/test/Target/LLVMIR/openmp-llvm.mlir
@@ -3589,3 +3589,36 @@ llvm.func @nested_task_with_deps() {
 
 // CHECK:         ret void
 // CHECK:       }
+
+llvm.func @task_affinity_plain(%arr: !llvm.ptr {llvm.nocapture}) {
+  %len = llvm.mlir.constant(4 : i64) : i64
+
+  omp.parallel {
+    omp.single {
+      %ae = omp.affinity_entry %arr, %len
+        : (!llvm.ptr, i64) -> !omp.affinity_entry_ty<!llvm.ptr, i64>
+
+      omp.task affinity(%ae : !omp.affinity_entry_ty<!llvm.ptr, i64>) {
+        omp.terminator
+      }
+      omp.terminator
+    }
+    omp.terminator
+  }
+  llvm.return
+}
+
+// CHECK-LABEL: define internal void @task_affinity_plain
+// CHECK: [[BASE:%.*]] = load ptr, ptr %gep_, align 8
+// CHECK: [[ENTRY:%.*]] = getelementptr inbounds { i64, i64, i32 }, ptr %loadgep_omp.affinity_list, i64 0
+// addr
+// CHECK: [[ADDRI64:%.*]] = ptrtoint ptr [[BASE]] to i64
+// CHECK: [[ADDRGEP:%.*]] = getelementptr inbounds nuw { i64, i64, i32 }, ptr [[ENTRY]], i32 0, i32 0
+// CHECK: store i64 [[ADDRI64]], ptr [[ADDRGEP]]
+// len
+// CHECK: [[LENGEP:%.*]] = getelementptr inbounds nuw { i64, i64, i32 }, ptr [[ENTRY]], i32 0, i32 1
+// CHECK: store i64 4, ptr [[LENGEP]]
+// flags is always 0
+// CHECK: [[FLAGGEP:%.*]] = getelementptr inbounds nuw { i64, i64, i32 }, ptr [[ENTRY]], i32 0, i32 2
+// CHECK: store i32 0, ptr [[FLAGGEP]]
+// CHECK: call i32 @__kmpc_omp_reg_task_with_affinity{{.*}}i32 1, ptr %loadgep_omp.affinity_list

>From 87211e26d3d3792500d46326e9933c013544973c Mon Sep 17 00:00:00 2001
From: "Chi Chun, Chen" <chichun.chen at hpe.com>
Date: Thu, 5 Mar 2026 13:31:37 -0600
Subject: [PATCH 5/6] Fix insert point for affinity list

Fix dominance issue if affinity list created before dynamic count
---
 .../OpenMP/OpenMPToLLVMIRTranslation.cpp      |  8 ++--
 mlir/test/Target/LLVMIR/openmp-iterator.mlir  | 45 +++++++++++++++----
 mlir/test/Target/LLVMIR/openmp-llvm.mlir      |  5 ++-
 3 files changed, 43 insertions(+), 15 deletions(-)

diff --git a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
index 5bc29fa271694..2d90fdaf33eda 100644
--- a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
+++ b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
@@ -2449,10 +2449,9 @@ fillAffinityIteratorLoop(mlir::omp::IteratorOp itersOp,
 
   llvm::OpenMPIRBuilder::LocationDescription loc(builder);
 
-  // Build the iterator loop using the new OMPIRBuilder helper.
   auto bodyGen = [&](llvm::OpenMPIRBuilder::InsertPointTy bodyIP,
                      llvm::Value *linearIV) -> llvm::Error {
-    llvm::IRBuilderBase::InsertPointGuard g(builder);
+    llvm::IRBuilderBase::InsertPointGuard guard(builder);
     builder.restoreIP(bodyIP);
 
     if (failed(convertIteratorRegion(linearIV, iterInfo, iteratorRegionBlock,
@@ -2473,7 +2472,8 @@ fillAffinityIteratorLoop(mlir::omp::IteratorOp itersOp,
     llvm::Value *len = moduleTranslation.lookupValue(entryOp.getLen());
     storeAffinityEntry(builder, affinityList, linearIV, addr, len);
 
-    // Avoid leaking region mappings if this iterator loop is reused/expanded.
+    // Iterator-region block/value mappings are temporary for this conversion,
+    // clear them to avoid stale entries in ModuleTranslation.
     moduleTranslation.forgetMapping(itersRegion);
 
     return llvm::Error::success();
@@ -2501,8 +2501,6 @@ static mlir::LogicalResult buildAffinityData(
       llvm::Type::getInt32Ty(ctx));
 
   auto allocateAffinityList = [&](llvm::Value *count) -> llvm::Value * {
-    llvm::IRBuilderBase::InsertPointGuard guard(builder);
-    builder.SetInsertPointPastAllocas(builder.GetInsertBlock()->getParent());
     return builder.CreateAlloca(kmpTaskAffinityInfoTy, count,
                                 "omp.affinity_list");
   };
diff --git a/mlir/test/Target/LLVMIR/openmp-iterator.mlir b/mlir/test/Target/LLVMIR/openmp-iterator.mlir
index 1c3647c5bb42b..0163d5539b76e 100644
--- a/mlir/test/Target/LLVMIR/openmp-iterator.mlir
+++ b/mlir/test/Target/LLVMIR/openmp-iterator.mlir
@@ -49,7 +49,7 @@ llvm.func @task_affinity_iterator_1d(%arr: !llvm.ptr {llvm.nocapture}) {
 //
 // Body: store into affinity_list[IV] then branch to inc
 // CHECK: omp_iterator.body:
-// CHECK: [[ENTRY:%.*]] = getelementptr inbounds { i64, i64, i32 }, ptr %loadgep_omp.affinity_list, i64 [[IV]]
+// CHECK: [[ENTRY:%.*]] = getelementptr inbounds { i64, i64, i32 }, ptr %{{.*affinity_list.*}}, i64 [[IV]]
 // CHECK: [[ADDRI64:%.*]] = ptrtoint ptr %loadgep_ to i64
 // CHECK: [[ADDRGEP:%.*]] = getelementptr inbounds nuw { i64, i64, i32 }, ptr [[ENTRY]], i32 0, i32 0
 // CHECK: store i64 [[ADDRI64]], ptr [[ADDRGEP]]
@@ -114,7 +114,7 @@ llvm.func @task_affinity_iterator_3d(%arr: !llvm.ptr {llvm.nocapture}) {
 //
 // Body: store into affinity_list[IV] then branch to inc
 // CHECK: omp_iterator.body:
-// CHECK: [[ENTRY:%.*]] = getelementptr inbounds { i64, i64, i32 }, ptr %loadgep_omp.affinity_list, i64 [[IV]]
+// CHECK: [[ENTRY:%.*]] = getelementptr inbounds { i64, i64, i32 }, ptr %{{.*affinity_list.*}}, i64 [[IV]]
 // CHECK: [[ADDRI64:%.*]] = ptrtoint ptr %loadgep_ to i64
 // CHECK: [[ADDRGEP:%.*]] = getelementptr inbounds nuw { i64, i64, i32 }, ptr [[ENTRY]], i32 0, i32 0
 // CHECK: store i64 [[ADDRI64]], ptr [[ADDRGEP]]
@@ -167,8 +167,7 @@ llvm.func @task_affinity_iterator_multiple(%arr: !llvm.ptr {llvm.nocapture}) {
 }
 
 // CHECK-LABEL: define internal void @task_affinity_iterator_multiple..omp_par.3(
-// CHECK-DAG: %gep_omp.affinity_list = getelementptr { ptr, ptr, ptr }, ptr %0, i32 0, i32 0
-// CHECK-DAG: %gep_omp.affinity_list{{.*}} = getelementptr { ptr, ptr, ptr }, ptr %0, i32 0, i32 1
+// CHECK: [[AFFLIST0:%.*]] = alloca { i64, i64, i32 }, i64 24, align 8
 
 // First iterator header
 // CHECK: omp_iterator.preheader:
@@ -181,6 +180,7 @@ llvm.func @task_affinity_iterator_multiple(%arr: !llvm.ptr {llvm.nocapture}) {
 // CHECK: br i1 [[CMP0]], label %[[BODY0:.+]], label %omp_iterator.exit
 
 // Second iterator header
+// CHECK: [[AFFLIST1:%.*]] = alloca { i64, i64, i32 }, i64 3, align 8
 // CHECK: omp_iterator.preheader{{.*}}:
 // CHECK: [[HEADER1:.+]]:
 // CHECK: [[IV1:%.*]] = phi i64 [ 0, %omp_iterator.preheader{{.*}} ], [ [[NEXT1:%.*]], %[[INC1:.+]] ]
@@ -191,13 +191,13 @@ llvm.func @task_affinity_iterator_multiple(%arr: !llvm.ptr {llvm.nocapture}) {
 
 // CHECK: codeRepl:
 // CHECK: call ptr @__kmpc_omp_task_alloc
-// CHECK: call i32 @__kmpc_omp_reg_task_with_affinity{{.*}}i32 24{{.*}}ptr %loadgep_omp.affinity_list
-// CHECK: call i32 @__kmpc_omp_reg_task_with_affinity{{.*}}i32 3{{.*}}ptr %loadgep_omp.affinity_list{{.*}}
+// CHECK: call i32 @__kmpc_omp_reg_task_with_affinity{{.*}}i32 24{{.*}}ptr [[AFFLIST0]]
+// CHECK: call i32 @__kmpc_omp_reg_task_with_affinity{{.*}}i32 3{{.*}}ptr [[AFFLIST1]]
 // CHECK: call i32 @__kmpc_omp_task
 
 // Second iterator body
 // CHECK: [[BODY1]]:
-// CHECK: [[ENTRY1:%.*]] = getelementptr inbounds { i64, i64, i32 }, ptr %loadgep_omp.affinity_list
+// CHECK: [[ENTRY1:%.*]] = getelementptr inbounds { i64, i64, i32 }, ptr [[AFFLIST1]]
 // CHECK: [[ADDR1:%.*]] = ptrtoint ptr %loadgep_ to i64
 // CHECK: [[ADDRGEP1:%.*]] = getelementptr inbounds{{.*}} { i64, i64, i32 }, ptr [[ENTRY1]], i32 0, i32 0
 // CHECK: store i64 [[ADDR1]], ptr [[ADDRGEP1]]
@@ -212,7 +212,7 @@ llvm.func @task_affinity_iterator_multiple(%arr: !llvm.ptr {llvm.nocapture}) {
 
 // First iterator body
 // CHECK: [[BODY0]]:
-// CHECK: [[ENTRY0:%.*]] = getelementptr inbounds { i64, i64, i32 }, ptr %loadgep_omp.affinity_list, i64 [[IV0]]
+// CHECK: [[ENTRY0:%.*]] = getelementptr inbounds { i64, i64, i32 }, ptr [[AFFLIST0]], i64 [[IV0]]
 // CHECK: [[ADDR0:%.*]] = ptrtoint ptr %loadgep_ to i64
 // CHECK: [[ADDRGEP0:%.*]] = getelementptr inbounds{{.*}} { i64, i64, i32 }, ptr [[ENTRY0]], i32 0, i32 0
 // CHECK: store i64 [[ADDR0]], ptr [[ADDRGEP0]]
@@ -224,3 +224,32 @@ llvm.func @task_affinity_iterator_multiple(%arr: !llvm.ptr {llvm.nocapture}) {
 // CHECK: [[INC0]]:
 // CHECK: [[NEXT0]] = add nuw i64 [[IV0]], 1
 // CHECK: br label %[[HEADER0]]
+
+// Makes sure affinity list only created after dynamic count
+llvm.func @task_affinity_iterator_dynamic_tripcount(
+    %arr: !llvm.ptr {llvm.nocapture}, %lb: i64, %ub: i64, %step: i64,
+    %len: i64) {
+  omp.parallel {
+    omp.single {
+      %it = omp.iterator(%i: i64) = (%lb to %ub step %step) {
+        %entry = omp.affinity_entry %arr, %len
+            : (!llvm.ptr, i64) -> !omp.affinity_entry_ty<!llvm.ptr, i64>
+        omp.yield(%entry : !omp.affinity_entry_ty<!llvm.ptr, i64>)
+      } -> !omp.iterated<!omp.affinity_entry_ty<!llvm.ptr, i64>>
+
+      omp.task affinity(%it : !omp.iterated<!omp.affinity_entry_ty<!llvm.ptr, i64>>) {
+        omp.terminator
+      }
+      omp.terminator
+    }
+    omp.terminator
+  }
+  llvm.return
+}
+
+// CHECK-LABEL: define internal void @task_affinity_iterator_dynamic_tripcount
+// CHECK: [[DIFF:%.*]] = sub i64 {{.*}}, {{.*}}
+// CHECK: [[DIV:%.*]] = sdiv i64 [[DIFF]], {{.*}}
+// CHECK: [[TRIPS:%.*]] = add i64 [[DIV]], 1
+// CHECK: [[SCALED:%.*]] = mul i64 1, [[TRIPS]]
+// CHECK: [[AFFLIST:%.*]] = alloca { i64, i64, i32 }, i64 [[SCALED]]
diff --git a/mlir/test/Target/LLVMIR/openmp-llvm.mlir b/mlir/test/Target/LLVMIR/openmp-llvm.mlir
index 2a55cc351160c..c5cdecd091770 100644
--- a/mlir/test/Target/LLVMIR/openmp-llvm.mlir
+++ b/mlir/test/Target/LLVMIR/openmp-llvm.mlir
@@ -3610,7 +3610,8 @@ llvm.func @task_affinity_plain(%arr: !llvm.ptr {llvm.nocapture}) {
 
 // CHECK-LABEL: define internal void @task_affinity_plain
 // CHECK: [[BASE:%.*]] = load ptr, ptr %gep_, align 8
-// CHECK: [[ENTRY:%.*]] = getelementptr inbounds { i64, i64, i32 }, ptr %loadgep_omp.affinity_list, i64 0
+// CHECK: [[AFFLIST:%.*]] = alloca { i64, i64, i32 }, i64 1, align 8
+// CHECK: [[ENTRY:%.*]] = getelementptr inbounds { i64, i64, i32 }, ptr [[AFFLIST]], i64 0
 // addr
 // CHECK: [[ADDRI64:%.*]] = ptrtoint ptr [[BASE]] to i64
 // CHECK: [[ADDRGEP:%.*]] = getelementptr inbounds nuw { i64, i64, i32 }, ptr [[ENTRY]], i32 0, i32 0
@@ -3621,4 +3622,4 @@ llvm.func @task_affinity_plain(%arr: !llvm.ptr {llvm.nocapture}) {
 // flags is always 0
 // CHECK: [[FLAGGEP:%.*]] = getelementptr inbounds nuw { i64, i64, i32 }, ptr [[ENTRY]], i32 0, i32 2
 // CHECK: store i32 0, ptr [[FLAGGEP]]
-// CHECK: call i32 @__kmpc_omp_reg_task_with_affinity{{.*}}i32 1, ptr %loadgep_omp.affinity_list
+// CHECK: call i32 @__kmpc_omp_reg_task_with_affinity{{.*}}i32 1, ptr [[AFFLIST]]

>From b54a8d27b6294695197f942ffaaac629a9be4f6f Mon Sep 17 00:00:00 2001
From: "Chi Chun, Chen" <chichun.chen at hpe.com>
Date: Thu, 5 Mar 2026 14:26:06 -0600
Subject: [PATCH 6/6] Refactor createIteratorLoop to use OMPIRBuilder utility
 functions and make end-of-block insertion robust.

- Replace manual splitBasicBlock/branch with splitBB
  and redirectTo()
- When insertion point is at BB.end() and the block is terminated, split
  before the terminator so the original successor path is preserved
  through omp.it.cont
- Add test for unterminated blocks
---
 llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp     | 36 ++++------
 .../Frontend/OpenMPIRBuilderTest.cpp          | 66 +++++++++++++++++++
 2 files changed, 79 insertions(+), 23 deletions(-)

diff --git a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
index 8fbf1b603f1aa..5736d7d9d1ab3 100644
--- a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
+++ b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
@@ -11544,39 +11544,29 @@ void OpenMPIRBuilder::loadOffloadInfoMetadata(vfs::FileSystem &VFS,
 OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::createIteratorLoop(
     LocationDescription Loc, llvm::Value *TripCount, IteratorBodyGenTy BodyGen,
     llvm::StringRef Name) {
-  IRBuilderBase &B = Builder;
-  B.restoreIP(Loc.IP);
+  Builder.restoreIP(Loc.IP);
 
   BasicBlock *CurBB = Builder.GetInsertBlock();
   assert(CurBB &&
          "expected a valid insertion block for creating an iterator loop");
   Function *F = CurBB->getParent();
-  LLVMContext &Ctx = F->getContext();
-
-  // If splitting at end but CurBB has no terminator, make it well-formed first.
-  if (B.GetInsertPoint() == CurBB->end() && !CurBB->getTerminator()) {
-    BasicBlock *TmpCont = BasicBlock::Create(Ctx, "omp.it.tmp.cont", F);
-    B.SetInsertPoint(CurBB);
-    B.CreateBr(TmpCont);
 
-    // The terminator we just inserted is now the end of CurBB. To split after
-    // it, set insertion point to CurBB->end() (which is fine now).
-    B.SetInsertPoint(CurBB->end());
-  }
+  InsertPointTy SplitIP = Builder.saveIP();
+  if (SplitIP.getPoint() == CurBB->end())
+    if (Instruction *Terminator = CurBB->getTerminator())
+      SplitIP = InsertPointTy(CurBB, Terminator->getIterator());
 
   BasicBlock *ContBB =
-      CurBB->splitBasicBlock(Builder.GetInsertPoint(), "omp.it.cont");
-  // Remove the branch to contBB since we will branch to contBB after the loop
-  CurBB->getTerminator()->eraseFromParent();
+      splitBB(SplitIP, /*CreateBranch=*/false,
+              Builder.getCurrentDebugLocation(), "omp.it.cont");
 
   CanonicalLoopInfo *CLI =
-      createLoopSkeleton(B.getCurrentDebugLocation(), TripCount, F,
+      createLoopSkeleton(Builder.getCurrentDebugLocation(), TripCount, F,
                          /*PreInsertBefore=*/ContBB,
                          /*PostInsertBefore=*/ContBB, Name);
 
   // Enter loop from original block.
-  B.SetInsertPoint(CurBB);
-  B.CreateBr(CLI->getPreheader());
+  redirectTo(CurBB, CLI->getPreheader(), Builder.getCurrentDebugLocation());
 
   // Remove the unconditional branch inserted by createLoopSkeleton in the body
   if (Instruction *T = CLI->getBody()->getTerminator())
@@ -11588,14 +11578,14 @@ OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::createIteratorLoop(
 
   // Ensure we end the loop body by jumping to the latch
   if (!CLI->getBody()->getTerminator()) {
-    B.SetInsertPoint(CLI->getBody());
-    B.CreateBr(CLI->getLatch());
+    Builder.SetInsertPoint(CLI->getBody());
+    Builder.CreateBr(CLI->getLatch());
   }
 
   // Link After -> ContBB
-  B.SetInsertPoint(CLI->getAfter(), CLI->getAfter()->begin());
+  Builder.SetInsertPoint(CLI->getAfter(), CLI->getAfter()->begin());
   if (!CLI->getAfter()->getTerminator())
-    B.CreateBr(ContBB);
+    Builder.CreateBr(ContBB);
 
   return InsertPointTy{ContBB, ContBB->begin()};
 }
diff --git a/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp b/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp
index 8292b055f6cd9..cc19ccc6a3b39 100644
--- a/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp
+++ b/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp
@@ -7647,6 +7647,72 @@ TEST_F(OpenMPIRBuilderTest, CreateTaskAffinity) {
   EXPECT_TRUE(RegAffCI->getArgOperand(4)->getType()->isPointerTy());
 }
 
+TEST_F(OpenMPIRBuilderTest, CreateIteratorLoop) {
+  using InsertPointTy = OpenMPIRBuilder::InsertPointTy;
+  {
+    OpenMPIRBuilder OMPBuilder(*M);
+    OMPBuilder.initialize();
+    F->setName("func.unterminated");
+    IRBuilder<> Builder(BB);
+
+    auto BodyGenCB = [&](InsertPointTy BodyIP, Value *LinearIV) -> Error {
+      Builder.restoreIP(BodyIP);
+      Builder.CreateAdd(LinearIV, Builder.getInt64(1));
+      return Error::success();
+    };
+
+    OpenMPIRBuilder::LocationDescription Loc(Builder.saveIP(), DL);
+    ASSERT_EXPECTED_INIT(InsertPointTy, AfterIP,
+                         OMPBuilder.createIteratorLoop(Loc, Builder.getInt64(4),
+                                                       BodyGenCB, "iterator"));
+
+    Builder.restoreIP(AfterIP);
+    Builder.CreateRetVoid();
+
+    EXPECT_EQ(AfterIP.getBlock()->getName(), "omp.it.cont");
+    EXPECT_FALSE(verifyFunction(*F, &errs()));
+  }
+
+  {
+    Function *F2 =
+        Function::Create(F->getFunctionType(), Function::ExternalLinkage,
+                         "func.terminated", M.get());
+    BasicBlock *BB2 = BasicBlock::Create(Ctx, "", F2);
+    OpenMPIRBuilder OMPBuilder(*M);
+    OMPBuilder.initialize();
+    IRBuilder<> Builder(BB2);
+
+    BasicBlock *OrigSucc =
+        BasicBlock::Create(Builder.getContext(), "orig.succ", F2);
+    Builder.CreateBr(OrigSucc);
+
+    auto BodyGenCB = [&](InsertPointTy BodyIP, Value *LinearIV) -> Error {
+      Builder.restoreIP(BodyIP);
+      Builder.CreateAdd(LinearIV, Builder.getInt64(1));
+      return Error::success();
+    };
+
+    OpenMPIRBuilder::LocationDescription Loc(InsertPointTy(BB2, BB2->end()),
+                                             DL);
+    ASSERT_EXPECTED_INIT(InsertPointTy, AfterIP,
+                         OMPBuilder.createIteratorLoop(Loc, Builder.getInt64(4),
+                                                       BodyGenCB, "iterator"));
+
+    EXPECT_EQ(AfterIP.getBlock()->getName(), "omp.it.cont");
+    auto *ContBr = dyn_cast<BranchInst>(AfterIP.getBlock()->getTerminator());
+    ASSERT_NE(ContBr, nullptr);
+    ASSERT_FALSE(ContBr->isConditional());
+    EXPECT_EQ(ContBr->getSuccessor(0), OrigSucc);
+
+    Builder.SetInsertPoint(OrigSucc);
+    Builder.CreateRetVoid();
+
+    EXPECT_FALSE(verifyFunction(*F2, &errs()));
+  }
+
+  EXPECT_FALSE(verifyModule(*M, &errs()));
+}
+
 TEST_F(OpenMPIRBuilderTest, CreateTaskgroup) {
   using InsertPointTy = OpenMPIRBuilder::InsertPointTy;
   OpenMPIRBuilder OMPBuilder(*M);



More information about the llvm-branch-commits mailing list