[llvm-branch-commits] [flang] [llvm] [mlir] [WIP][mlir][llvmir][OpenMP] Translate affinity clause in task construct to llvmir (PR #182223)

via llvm-branch-commits llvm-branch-commits at lists.llvm.org
Wed Feb 25 12:51:23 PST 2026


https://github.com/chichunchen updated https://github.com/llvm/llvm-project/pull/182223

>From f30db773091041afdb375168de091df255ea5ba3 Mon Sep 17 00:00:00 2001
From: cchen <chichun.chen at hpe.com>
Date: Wed, 18 Feb 2026 13:25:17 -0600
Subject: [PATCH 01/11] Emit omp.iterator in affinity clause from Flang

---
 flang/lib/Lower/OpenMP/Utils.h | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/flang/lib/Lower/OpenMP/Utils.h b/flang/lib/Lower/OpenMP/Utils.h
index 01aeb0ef58bfd..5834d7019ecb7 100644
--- a/flang/lib/Lower/OpenMP/Utils.h
+++ b/flang/lib/Lower/OpenMP/Utils.h
@@ -217,6 +217,15 @@ mlir::Value genIteratorCoordinate(Fortran::lower::AbstractConverter &converter,
                                   llvm::ArrayRef<mlir::Value> ivs,
                                   mlir::Location loc);
 
+bool hasIVReference(
+    const omp::Object &object,
+    const llvm::SmallPtrSetImpl<const Fortran::semantics::Symbol *> &ivSyms);
+
+mlir::Value genIteratorCoordinate(Fortran::lower::AbstractConverter &converter,
+                                  mlir::Value base,
+                                  llvm::ArrayRef<mlir::Value> ivs,
+                                  mlir::Location loc);
+
 } // namespace omp
 } // namespace lower
 } // namespace Fortran

>From f5645f2565a6a0876e251de522186408feca92e6 Mon Sep 17 00:00:00 2001
From: cchen <chichun.chen at hpe.com>
Date: Wed, 18 Feb 2026 14:15:02 -0600
Subject: [PATCH 02/11] Add iterator test and remove redundant check lines

---
 mlir/test/Dialect/OpenMP/ops.mlir | 53 +++++++++++++++++++++++++++++++
 1 file changed, 53 insertions(+)

diff --git a/mlir/test/Dialect/OpenMP/ops.mlir b/mlir/test/Dialect/OpenMP/ops.mlir
index b908874c2010b..febf16788ee4a 100644
--- a/mlir/test/Dialect/OpenMP/ops.mlir
+++ b/mlir/test/Dialect/OpenMP/ops.mlir
@@ -3654,3 +3654,56 @@ func.func @omp_task_affinity_iterator_2d(%lb0 : index, %ub0 : index, %st0 : inde
 
   return
 }
+
+// CHECK-LABEL: func.func @omp_task_affinity_iterator_1d(
+func.func @omp_task_affinity_iterator_1d(%lb : index, %ub : index, %step : index,
+                                       %addr : !llvm.ptr, %len : i64) -> () {
+  // CHECK: %[[IT:.*]] = omp.iterators(%[[IV:.*]]: index) = (%[[LB:.*]] to %[[UB:.*]] step %[[ST:.*]]) {
+  // CHECK:   %[[E:.*]] = omp.affinity_entry %[[ADDR:.*]], %[[LEN:.*]] : (!llvm.ptr, i64) -> !omp.affinity_entry_ty<!llvm.ptr, i64>
+  // CHECK:   omp.yield(%[[E]] : !omp.affinity_entry_ty<!llvm.ptr, i64>)
+  // CHECK: } -> !omp.iterated<!omp.affinity_entry_ty<!llvm.ptr, i64>>
+  // CHECK: omp.task affinity(%[[IT]] : !omp.iterated<!omp.affinity_entry_ty<!llvm.ptr, i64>>) {
+  // CHECK: }
+  %it = omp.iterators(%iv: index) = (%lb to %ub step %step) {
+    %e = omp.affinity_entry %addr, %len
+      : (!llvm.ptr, i64) -> !omp.affinity_entry_ty<!llvm.ptr, i64>
+    omp.yield(%e : !omp.affinity_entry_ty<!llvm.ptr, i64>)
+  } -> !omp.iterated<!omp.affinity_entry_ty<!llvm.ptr, i64>>
+
+  omp.task affinity(%it : !omp.iterated<!omp.affinity_entry_ty<!llvm.ptr, i64>>) {
+    omp.terminator
+  }
+
+  return
+}
+
+func.func @omp_task_affinity_iterator_2d(%lb0 : index, %ub0 : index, %st0 : index,
+                                          %lb1 : index, %ub1 : index, %st1 : index,
+                                          %addr0 : !llvm.ptr, %addr1 : !llvm.ptr,
+                                          %len0 : i64, %len1 : i64) -> () {
+  // CHECK: %[[IT:.*]] = omp.iterators(%[[I:.*]]: index, %[[J:.*]]: index) = (%[[LB0:.*]] to %[[UB0:.*]] step %[[ST0:.*]], %[[LB1:.*]] to %[[UB1:.*]] step %[[ST1:.*]]) {
+  // CHECK:   %[[E0:.*]] = omp.affinity_entry %[[A0:.*]], %[[L0:.*]] : (!llvm.ptr, i64) -> !omp.affinity_entry_ty<!llvm.ptr, i64>
+  // CHECK:   %[[E1:.*]] = omp.affinity_entry %[[A1:.*]], %[[L1:.*]] : (!llvm.ptr, i64) -> !omp.affinity_entry_ty<!llvm.ptr, i64>
+  // CHECK:   omp.yield(%[[E1]] : !omp.affinity_entry_ty<!llvm.ptr, i64>)
+  // CHECK: } -> !omp.iterated<!omp.affinity_entry_ty<!llvm.ptr, i64>>
+  // CHECK: omp.task affinity(%[[IT]] : !omp.iterated<!omp.affinity_entry_ty<!llvm.ptr, i64>>) {
+  // CHECK: }
+  %it = omp.iterators(%i: index, %j: index) = (%lb0 to %ub0 step %st0, %lb1 to %ub1 step %st1) {
+    %use_i = arith.addi %i, %lb0 : index
+    %use_j = arith.addi %j, %lb1 : index
+    %_ = arith.cmpi ult, %use_i, %use_j : index
+
+    %e0 = omp.affinity_entry %addr0, %len0
+      : (!llvm.ptr, i64) -> !omp.affinity_entry_ty<!llvm.ptr, i64>
+    %e1 = omp.affinity_entry %addr1, %len1
+      : (!llvm.ptr, i64) -> !omp.affinity_entry_ty<!llvm.ptr, i64>
+
+    omp.yield(%e1 : !omp.affinity_entry_ty<!llvm.ptr, i64>)
+  } -> !omp.iterated<!omp.affinity_entry_ty<!llvm.ptr, i64>>
+
+  omp.task affinity(%it : !omp.iterated<!omp.affinity_entry_ty<!llvm.ptr, i64>>) {
+    omp.terminator
+  }
+
+  return
+}

>From 3d1f96555b571e34f02b06d8141f7cd0b966af77 Mon Sep 17 00:00:00 2001
From: cchen <chichun.chen at hpe.com>
Date: Wed, 18 Feb 2026 15:20:40 -0600
Subject: [PATCH 03/11] [mlir][llvmir][OpenMP] Translate affinity clause in
 task construct to llvmir

Translate affinity entries to LLVMIR by passing affinity information to
createTask (__kmpc_omp_reg_task_with_affinity is created inside PostOutlineCB).
---
 .../llvm/Frontend/OpenMP/OMPIRBuilder.h       | 18 ++--
 llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp     | 22 ++++-
 .../Frontend/OpenMPIRBuilderTest.cpp          | 92 +++++++++++++++++++
 .../Conversion/OpenMPToLLVM/OpenMPToLLVM.cpp  |  2 +
 .../OpenMP/OpenMPToLLVMIRTranslation.cpp      | 72 ++++++++++++---
 5 files changed, 184 insertions(+), 22 deletions(-)

diff --git a/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h b/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h
index 9885ffc8b2065..deb2535ac843f 100644
--- a/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h
+++ b/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h
@@ -1510,6 +1510,12 @@ class OpenMPIRBuilder {
         : DepKind(DepKind), DepValueType(DepValueType), DepVal(DepVal) {}
   };
 
+  /// A struct to pack the relevant information for an OpenMP affinity clause.
+  struct AffinityData {
+    Value *Count; // number of kmp_task_affinity_info_t entries
+    Value *Info;  // kmp_task_affinity_info_t
+  };
+
   /// Generator for `#omp taskloop`
   ///
   /// \param Loc The location where the taskloop construct was encountered.
@@ -1573,12 +1579,12 @@ class OpenMPIRBuilder {
   /// \param Mergeable	 If the given task is `mergeable`
   /// \param priority `priority-value' specifies the execution order of the
   ///                 tasks that is generated by the construct
-  LLVM_ABI InsertPointOrErrorTy
-  createTask(const LocationDescription &Loc, InsertPointTy AllocaIP,
-             BodyGenCallbackTy BodyGenCB, bool Tied = true,
-             Value *Final = nullptr, Value *IfCondition = nullptr,
-             SmallVector<DependData> Dependencies = {}, bool Mergeable = false,
-             Value *EventHandle = nullptr, Value *Priority = nullptr);
+  LLVM_ABI InsertPointOrErrorTy createTask(
+      const LocationDescription &Loc, InsertPointTy AllocaIP,
+      BodyGenCallbackTy BodyGenCB, bool Tied = true, Value *Final = nullptr,
+      Value *IfCondition = nullptr, SmallVector<DependData> Dependencies = {},
+      AffinityData Affinity = {}, bool Mergeable = false,
+      Value *EventHandle = nullptr, Value *Priority = nullptr);
 
   /// Generator for the taskgroup construct
   ///
diff --git a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
index 6775674d733fe..8cd31fd3e207c 100644
--- a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
+++ b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
@@ -2434,8 +2434,8 @@ OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::createTaskloop(
 OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::createTask(
     const LocationDescription &Loc, InsertPointTy AllocaIP,
     BodyGenCallbackTy BodyGenCB, bool Tied, Value *Final, Value *IfCondition,
-    SmallVector<DependData> Dependencies, bool Mergeable, Value *EventHandle,
-    Value *Priority) {
+    SmallVector<DependData> Dependencies, AffinityData Affinity, bool Mergeable,
+    Value *EventHandle, Value *Priority) {
 
   if (!updateToLocation(Loc))
     return InsertPointTy();
@@ -2481,7 +2481,7 @@ OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::createTask(
       Builder, AllocaIP, ToBeDeleted, TaskAllocaIP, "global.tid", false));
 
   OI.PostOutlineCB = [this, Ident, Tied, Final, IfCondition, Dependencies,
-                      Mergeable, Priority, EventHandle, TaskAllocaBB,
+                      Affinity, Mergeable, Priority, EventHandle, TaskAllocaBB,
                       ToBeDeleted](Function &OutlinedFn) mutable {
     // Replace the Stale CI by appropriate RTL function call.
     assert(OutlinedFn.hasOneUse() &&
@@ -2555,6 +2555,22 @@ OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::createTask(
                       /*sizeof_task=*/TaskSize, /*sizeof_shared=*/SharedsSize,
                       /*task_func=*/&OutlinedFn});
 
+    if (Affinity.Count && Affinity.Info) {
+      Function *RegAffFn = getOrCreateRuntimeFunctionPtr(
+          OMPRTL___kmpc_omp_reg_task_with_affinity);
+
+      // bitcast to i8*
+      Value *AffPtr = Builder.CreatePointerBitCastOrAddrSpaceCast(
+          Affinity.Info, Builder.getPtrTy(0));
+
+      if (!Affinity.Count->getType()->isIntegerTy(32))
+        Affinity.Count =
+            Builder.CreateTruncOrBitCast(Affinity.Count, Builder.getInt32Ty());
+
+      createRuntimeFunctionCall(
+          RegAffFn, {Ident, ThreadID, TaskData, Affinity.Count, AffPtr});
+    }
+
     // Emit detach clause initialization.
     // evt = (typeof(evt))__kmpc_task_allow_completion_event(loc, tid,
     // task_descriptor);
diff --git a/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp b/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp
index 200dda84b13f5..11f03d2a06d6e 100644
--- a/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp
+++ b/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp
@@ -7555,6 +7555,98 @@ TEST_F(OpenMPIRBuilderTest, CreateTaskIfCondition) {
   EXPECT_EQ(OulinedFnCall->getNextNode(), TaskCompleteCall);
 }
 
+TEST_F(OpenMPIRBuilderTest, CreateTaskAffinity) {
+  using InsertPointTy = OpenMPIRBuilder::InsertPointTy;
+  OpenMPIRBuilder OMPBuilder(*M);
+  OMPBuilder.Config.IsTargetDevice = false;
+  OMPBuilder.initialize();
+  F->setName("func");
+  IRBuilder<> Builder(BB);
+
+  auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) {
+    return Error::success();
+  };
+
+  LLVMContext &Ctx = M->getContext();
+  StructType *AffInfoTy = StructType::get(
+      Type::getInt64Ty(Ctx), Type::getInt64Ty(Ctx), Type::getInt32Ty(Ctx));
+
+  // Create [1 x AffInfoTy] as alloca (element alloca is fine too).
+  Value *CountI32 = Builder.getInt32(1);
+  AllocaInst *AffArr =
+      Builder.CreateAlloca(AffInfoTy, Builder.getInt64(1), "omp.affinity_list");
+
+  // Fill entry 0 minimally so the pointer definitely dominates use.
+  Value *Entry0 = Builder.CreateInBoundsGEP(
+      AffInfoTy, AffArr, Builder.getInt64(0), "omp.affinity.entry");
+  Builder.CreateStore(Builder.getInt64(0),
+                      Builder.CreateStructGEP(AffInfoTy, Entry0, 0));
+  Builder.CreateStore(Builder.getInt64(64),
+                      Builder.CreateStructGEP(AffInfoTy, Entry0, 1));
+  Builder.CreateStore(Builder.getInt32(0),
+                      Builder.CreateStructGEP(AffInfoTy, Entry0, 2));
+
+  OpenMPIRBuilder::AffinityData Affinity;
+  Affinity.Count = CountI32;
+  Affinity.Info = AffArr;
+
+  BasicBlock *AllocaBB = Builder.GetInsertBlock();
+  BasicBlock *BodyBB = splitBB(Builder, /*CreateBranch=*/true, "alloca.split");
+  OpenMPIRBuilder::LocationDescription Loc(
+      InsertPointTy(BodyBB, BodyBB->getFirstInsertionPt()), DL);
+
+  ASSERT_EXPECTED_INIT(
+      OpenMPIRBuilder::InsertPointTy, AfterIP,
+      OMPBuilder.createTask(
+          Loc, InsertPointTy(AllocaBB, AllocaBB->getFirstInsertionPt()),
+          BodyGenCB,
+          /*Tied=*/true,
+          /*Final=*/nullptr,
+          /*IfCondition=*/nullptr,
+          /*Dependencies=*/{},
+          /*Affinity=*/Affinity,
+          /*Mergeable=*/false,
+          /*EventHandle=*/nullptr,
+          /*Priority=*/nullptr));
+
+  Builder.restoreIP(AfterIP);
+  OMPBuilder.finalize();
+  Builder.CreateRetVoid();
+
+  EXPECT_FALSE(verifyModule(*M, &errs()));
+
+  Function *TaskAllocFn =
+      OMPBuilder.getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_omp_task_alloc);
+  Function *RegAffFn = OMPBuilder.getOrCreateRuntimeFunctionPtr(
+      OMPRTL___kmpc_omp_reg_task_with_affinity);
+
+  CallInst *TaskAllocCI = nullptr;
+  CallInst *RegAffCI = nullptr;
+
+  for (auto &I : instructions(F)) {
+    if (auto *CI = dyn_cast<CallInst>(&I)) {
+      if (CI->getCalledFunction() == TaskAllocFn)
+        TaskAllocCI = CI;
+      if (CI->getCalledFunction() == RegAffFn)
+        RegAffCI = CI;
+    }
+  }
+
+  ASSERT_NE(TaskAllocCI, nullptr) << "expected __kmpc_omp_task_alloc call";
+  ASSERT_NE(RegAffCI, nullptr)
+      << "expected __kmpc_omp_reg_task_with_affinity call";
+
+  // Check reg_task_with_affinity signature:
+  //   i32 __kmpc_omp_reg_task_with_affinity(ident_t*, i32 gtid,
+  //                                         kmp_task_t*, i32 naffins,
+  //                                         kmp_task_affinity_info_t*)
+  ASSERT_EQ(RegAffCI->arg_size(), 5u);
+  // naffins
+  EXPECT_TRUE(RegAffCI->getArgOperand(3)->getType()->isIntegerTy(32));
+  // kmp_task_affinity_info_t*
+  EXPECT_TRUE(RegAffCI->getArgOperand(4)->getType()->isPointerTy());
+}
+
 TEST_F(OpenMPIRBuilderTest, CreateTaskgroup) {
   using InsertPointTy = OpenMPIRBuilder::InsertPointTy;
   OpenMPIRBuilder OMPBuilder(*M);
diff --git a/mlir/lib/Conversion/OpenMPToLLVM/OpenMPToLLVM.cpp b/mlir/lib/Conversion/OpenMPToLLVM/OpenMPToLLVM.cpp
index 7fdc23adc8573..e6242e5f06418 100644
--- a/mlir/lib/Conversion/OpenMPToLLVM/OpenMPToLLVM.cpp
+++ b/mlir/lib/Conversion/OpenMPToLLVM/OpenMPToLLVM.cpp
@@ -154,6 +154,8 @@ void mlir::populateOpenMPToLLVMConversionPatterns(LLVMTypeConverter &converter,
   // discarded on lowering to LLVM-IR from the OpenMP dialect.
   converter.addConversion(
       [&](omp::MapBoundsType type) -> Type { return type; });
+  converter.addConversion(
+      [&](omp::AffinityEntryType type) -> Type { return type; });
 
   // Add conversions for all OpenMP operations.
   addOpenMPOpConversions<
diff --git a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
index 38c5802ed60ed..4949eae218c6e 100644
--- a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
+++ b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
@@ -321,10 +321,6 @@ static LogicalResult checkImplementationStatus(Operation &op) {
                           << " operation";
   };
 
-  auto checkAffinity = [&todo](auto op, LogicalResult &result) {
-    if (!op.getAffinityVars().empty())
-      result = todo("affinity");
-  };
   auto checkAllocate = [&todo](auto op, LogicalResult &result) {
     if (!op.getAllocateVars().empty() || !op.getAllocatorVars().empty())
       result = todo("allocate");
@@ -413,7 +409,6 @@ static LogicalResult checkImplementationStatus(Operation &op) {
         checkThreadLimit(op, result);
       })
       .Case([&](omp::TaskOp op) {
-        checkAffinity(op, result);
         checkAllocate(op, result);
         checkInReduction(op, result);
       })
@@ -2308,6 +2303,53 @@ void TaskContextStructManager::freeStructPtr() {
   builder.CreateFree(structPtr);
 }
 
+static void buildTaskAffinityList(mlir::omp::TaskOp taskOp,
+                                  llvm::IRBuilderBase &builder,
+                                  LLVM::ModuleTranslation &moduleTranslation,
+                                  llvm::OpenMPIRBuilder::AffinityData &ad) {
+  auto &ctx = builder.getContext();
+  llvm::StructType *kmpTaskAffinityInfoTy = llvm::StructType::get(
+      llvm::Type::getInt64Ty(ctx), llvm::Type::getInt64Ty(ctx),
+      llvm::Type::getInt32Ty(ctx));
+
+  SmallVector<mlir::Value> affinityVars(taskOp.getAffinityVars().begin(),
+                                        taskOp.getAffinityVars().end());
+
+  // Allocate [N x kmp_task_affinity_info_t]
+  llvm::Value *count =
+      llvm::ConstantInt::get(builder.getInt64Ty(), affinityVars.size());
+  llvm::AllocaInst *affinityList =
+      builder.CreateAlloca(kmpTaskAffinityInfoTy, count, "omp.affinity_list");
+
+  for (unsigned i = 0; i < affinityVars.size(); ++i) {
+    auto entryOp = affinityVars[i].getDefiningOp<mlir::omp::AffinityEntryOp>();
+    assert(entryOp && "affinity item must be omp.affinity_entry");
+
+    llvm::Value *addr = moduleTranslation.lookupValue(entryOp.getAddr());
+    assert(addr && "expect affinity addr to be non-null");
+    llvm::Value *baseAddr = builder.CreatePtrToInt(addr, builder.getInt64Ty());
+    llvm::Value *len = moduleTranslation.lookupValue(entryOp.getLen());
+    llvm::Value *flags = builder.getInt32(0);
+
+    llvm::Value *entry =
+        builder.CreateInBoundsGEP(kmpTaskAffinityInfoTy, affinityList,
+                                  builder.getInt64(i), "omp.affinity.entry");
+
+    llvm::Value *gep0 =
+        builder.CreateStructGEP(kmpTaskAffinityInfoTy, entry, 0); // base_addr
+    llvm::Value *gep1 =
+        builder.CreateStructGEP(kmpTaskAffinityInfoTy, entry, 1); // len
+    llvm::Value *gep2 =
+        builder.CreateStructGEP(kmpTaskAffinityInfoTy, entry, 2); // flags (i32)
+
+    builder.CreateStore(baseAddr, gep0);
+    builder.CreateStore(len, gep1);
+    builder.CreateStore(flags, gep2);
+  }
+  ad.Info = affinityList;
+  ad.Count = builder.getInt32(static_cast<uint32_t>(affinityVars.size()));
+}
+
 /// Converts an OpenMP task construct into LLVM IR using OpenMPIRBuilder.
 static LogicalResult
 convertOmpTaskOp(omp::TaskOp taskOp, llvm::IRBuilderBase &builder,
@@ -2520,12 +2562,16 @@ convertOmpTaskOp(omp::TaskOp taskOp, llvm::IRBuilderBase &builder,
   buildDependData(taskOp.getDependKinds(), taskOp.getDependVars(),
                   moduleTranslation, dds);
 
+  llvm::OpenMPIRBuilder::AffinityData ad = {nullptr, nullptr};
+  if (!taskOp.getAffinityVars().empty())
+    buildTaskAffinityList(taskOp, builder, moduleTranslation, ad);
+
   llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
   llvm::OpenMPIRBuilder::InsertPointOrErrorTy afterIP =
       moduleTranslation.getOpenMPBuilder()->createTask(
           ompLoc, allocaIP, bodyCB, !taskOp.getUntied(),
           moduleTranslation.lookupValue(taskOp.getFinal()),
-          moduleTranslation.lookupValue(taskOp.getIfExpr()), dds,
+          moduleTranslation.lookupValue(taskOp.getIfExpr()), dds, ad,
           taskOp.getMergeable(),
           moduleTranslation.lookupValue(taskOp.getEventHandle()),
           moduleTranslation.lookupValue(taskOp.getPriority()));
@@ -7289,13 +7335,13 @@ LogicalResult OpenMPDialectLLVMIRTranslationInterface::convertOperation(
           .Case([&](omp::LoopNestOp) {
             return convertOmpLoopNest(*op, builder, moduleTranslation);
           })
-          .Case<omp::MapInfoOp, omp::MapBoundsOp, omp::PrivateClauseOp>(
-              [&](auto op) {
-                // No-op, should be handled by relevant owning operations e.g.
-                // TargetOp, TargetEnterDataOp, TargetExitDataOp, TargetDataOp
-                // etc. and then discarded
-                return success();
-              })
+          .Case<omp::MapInfoOp, omp::MapBoundsOp, omp::PrivateClauseOp,
+                omp::AffinityEntryOp>([&](auto op) {
+            // No-op, should be handled by relevant owning operations e.g.
+            // TargetOp, TargetEnterDataOp, TargetExitDataOp, TargetDataOp
+            // etc. and then discarded
+            return success();
+          })
           .Case([&](omp::NewCliOp op) {
             // Meta-operation: Doesn't do anything by itself, but used to
             // identify a loop.

>From 8d272556ad61ffef78760fa8d17f8190e533c76d Mon Sep 17 00:00:00 2001
From: cchen <chichun.chen at hpe.com>
Date: Wed, 18 Feb 2026 18:25:34 -0600
Subject: [PATCH 04/11] Implement lowering for omp.iterator in affinity

---
 .../llvm/Frontend/OpenMP/OMPIRBuilder.h       |  27 +++
 llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp     |  82 ++++++++
 .../Conversion/OpenMPToLLVM/OpenMPToLLVM.cpp  |   1 +
 .../OpenMP/OpenMPToLLVMIRTranslation.cpp      | 180 +++++++++++++++---
 4 files changed, 268 insertions(+), 22 deletions(-)

diff --git a/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h b/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h
index deb2535ac843f..0332930eafedf 100644
--- a/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h
+++ b/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h
@@ -4293,6 +4293,33 @@ class ScanInfo {
   ~ScanInfo() { delete (ScanBuffPtrs); }
 };
 
+class IteratorLoopNestScope {
+private:
+  llvm::IRBuilderBase &B;
+  llvm::LLVMContext &Ctx;
+
+  unsigned Dims = 0;
+  llvm::ArrayRef<llvm::Value *> LowerBounds;
+  llvm::ArrayRef<llvm::Value *> UpperBounds;
+  llvm::ArrayRef<llvm::Value *> Steps;
+
+  llvm::SmallVector<llvm::PHINode *> IVs;
+  llvm::SmallVector<llvm::BasicBlock *> HdrBBs;
+  llvm::SmallVector<llvm::BasicBlock *> BodyBBs;
+  llvm::SmallVector<llvm::BasicBlock *> LatchBBs;
+  llvm::SmallVector<llvm::BasicBlock *> ExitBBs;
+
+public:
+  IteratorLoopNestScope(llvm::IRBuilderBase &Builder, unsigned Dims,
+                        llvm::ArrayRef<llvm::Value *> LowerBounds,
+                        llvm::ArrayRef<llvm::Value *> UpperBounds,
+                        llvm::ArrayRef<llvm::Value *> Steps);
+
+  ~IteratorLoopNestScope();
+
+  llvm::ArrayRef<llvm::PHINode *> getIVs() const { return IVs; }
+};
+
 } // end namespace llvm
 
 #endif // LLVM_FRONTEND_OPENMP_OMPIRBUILDER_H
diff --git a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
index 8cd31fd3e207c..e183eb7dca89f 100644
--- a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
+++ b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
@@ -11862,3 +11862,85 @@ void CanonicalLoopInfo::invalidate() {
   Latch = nullptr;
   Exit = nullptr;
 }
+
+IteratorLoopNestScope::IteratorLoopNestScope(
+    llvm::IRBuilderBase &Builder, unsigned Dims,
+    llvm::ArrayRef<llvm::Value *> LowerBounds,
+    llvm::ArrayRef<llvm::Value *> UpperBounds,
+    llvm::ArrayRef<llvm::Value *> Steps)
+    : B(Builder), Ctx(Builder.getContext()), Dims(Dims),
+      LowerBounds(LowerBounds), UpperBounds(UpperBounds), Steps(Steps) {
+  Function *F = B.GetInsertBlock()->getParent();
+  IVs.assign(Dims, nullptr);
+  HdrBBs.resize(Dims);
+  BodyBBs.resize(Dims);
+  LatchBBs.resize(Dims);
+  ExitBBs.resize(Dims);
+
+  llvm::BasicBlock *PreBB = B.GetInsertBlock();
+
+  for (unsigned Dimension = 0; Dimension < Dims; ++Dimension) {
+    HdrBBs[Dimension] = llvm::BasicBlock::Create(Ctx, "omp.iter.hdr", F);
+    BodyBBs[Dimension] = llvm::BasicBlock::Create(Ctx, "omp.iter.body", F);
+    LatchBBs[Dimension] = llvm::BasicBlock::Create(Ctx, "omp.iter.latch", F);
+    ExitBBs[Dimension] = llvm::BasicBlock::Create(Ctx, "omp.iter.exit", F);
+  }
+
+  // Enter outermost loop.
+  B.CreateBr(HdrBBs[0]);
+
+  for (unsigned Dimension = 0; Dimension < Dims; ++Dimension) {
+    // header
+    B.SetInsertPoint(HdrBBs[Dimension]);
+    IVs[Dimension] = B.CreatePHI(B.getInt64Ty(), 2, "omp.it.iv");
+    IVs[Dimension]->addIncoming(LowerBounds[Dimension],
+                                (Dimension == 0) ? PreBB
+                                                 : BodyBBs[Dimension - 1]);
+
+    llvm::Value *Cond =
+        B.CreateICmpULE(IVs[Dimension], UpperBounds[Dimension], "omp.it.cmp");
+    B.CreateCondBr(Cond, BodyBBs[Dimension], ExitBBs[Dimension]);
+
+    // body
+    B.SetInsertPoint(BodyBBs[Dimension]);
+    if (Dimension + 1 < Dims) {
+      B.CreateBr(HdrBBs[Dimension + 1]);
+    }
+
+    // iv = iv + step
+    B.SetInsertPoint(LatchBBs[Dimension]);
+    llvm::Value *Next =
+        B.CreateAdd(IVs[Dimension], Steps[Dimension], "omp.it.next");
+    IVs[Dimension]->addIncoming(Next, LatchBBs[Dimension]);
+    B.CreateBr(HdrBBs[Dimension]);
+  }
+
+  // Continue emitting the body of the innermost loop.
+  B.SetInsertPoint(BodyBBs[Dims - 1]);
+}
+
+IteratorLoopNestScope::~IteratorLoopNestScope() {
+  if (Dims == 0)
+    return;
+
+  const unsigned Last = Dims - 1;
+
+  // Fallthrough to latch if the innermost body isn't terminated by the user.
+  if (!BodyBBs[Last]->getTerminator()) {
+    llvm::IRBuilderBase::InsertPointGuard g(B);
+    B.SetInsertPoint(BodyBBs[Last]);
+    B.CreateBr(LatchBBs[Last]);
+  }
+
+  // Wire exits in reverse: exit(d) -> latch(d-1).
+  for (unsigned Dimension = Last; Dimension > 0; --Dimension) {
+    if (ExitBBs[Dimension]->getTerminator())
+      continue;
+    llvm::IRBuilderBase::InsertPointGuard g(B);
+    B.SetInsertPoint(ExitBBs[Dimension]);
+    B.CreateBr(LatchBBs[Dimension - 1]);
+  }
+
+  // Continue after the whole nest at outermost exit.
+  B.SetInsertPoint(ExitBBs[0]);
+}
diff --git a/mlir/lib/Conversion/OpenMPToLLVM/OpenMPToLLVM.cpp b/mlir/lib/Conversion/OpenMPToLLVM/OpenMPToLLVM.cpp
index e6242e5f06418..d90912f9f686f 100644
--- a/mlir/lib/Conversion/OpenMPToLLVM/OpenMPToLLVM.cpp
+++ b/mlir/lib/Conversion/OpenMPToLLVM/OpenMPToLLVM.cpp
@@ -156,6 +156,7 @@ void mlir::populateOpenMPToLLVMConversionPatterns(LLVMTypeConverter &converter,
       [&](omp::MapBoundsType type) -> Type { return type; });
   converter.addConversion(
       [&](omp::AffinityEntryType type) -> Type { return type; });
+  converter.addConversion([&](omp::IteratedType type) -> Type { return type; });
 
   // Add conversions for all OpenMP operations.
   addOpenMPOpConversions<
diff --git a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
index 4949eae218c6e..0f8d2f4becc45 100644
--- a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
+++ b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
@@ -2229,6 +2229,71 @@ class TaskContextStructManager {
   /// The type of the structure
   llvm::Type *structTy = nullptr;
 };
+
+class IteratorInfo {
+private:
+  llvm::SmallVector<llvm::Value *> lowerBounds;
+  llvm::SmallVector<llvm::Value *> upperBounds;
+  llvm::SmallVector<llvm::Value *> steps;
+  llvm::SmallVector<llvm::Value *> trips;
+  unsigned dims;
+  llvm::Value *totalTrips;
+  const mlir::LLVM::ModuleTranslation &moduleTranslation;
+  llvm::IRBuilderBase &builder;
+
+  llvm::Value *lookUpAsI64(mlir::Value val) {
+    llvm::Value *v = moduleTranslation.lookupValue(val);
+    if (!v)
+      return nullptr;
+    if (v->getType()->isIntegerTy(64))
+      return v;
+    if (v->getType()->isIntegerTy())
+      return builder.CreateZExtOrTrunc(v, builder.getInt64Ty());
+    return nullptr;
+  }
+
+public:
+  IteratorInfo(mlir::omp::IteratorsOp itersOp,
+               mlir::LLVM::ModuleTranslation &moduleTranslation,
+               llvm::IRBuilderBase &builder)
+      : moduleTranslation(moduleTranslation), builder(builder) {
+    dims = itersOp.getLbs().size();
+    this->lowerBounds.resize(dims);
+    this->upperBounds.resize(dims);
+    this->steps.resize(dims);
+    this->trips.resize(dims);
+
+    for (unsigned d = 0; d < dims; ++d) {
+      llvm::Value *lb = lookUpAsI64(itersOp.getLbs()[d]);
+      llvm::Value *ub = lookUpAsI64(itersOp.getUbs()[d]);
+      llvm::Value *st = lookUpAsI64(itersOp.getSteps()[d]);
+      assert(lb && ub && st &&
+             "Expect lowerBounds, upperBounds, and steps in IteratorsOp");
+
+      this->lowerBounds[d] = lb;
+      this->upperBounds[d] = ub;
+      this->steps[d] = st;
+
+      // trips = ((ub - lb) / step) + 1  (inclusive ub, assume positive step)
+      llvm::Value *diff = builder.CreateSub(ub, lb);
+      llvm::Value *div = builder.CreateUDiv(diff, st);
+      this->trips[d] = builder.CreateAdd(
+          div, llvm::ConstantInt::get(builder.getInt64Ty(), 1));
+    }
+
+    this->totalTrips = llvm::ConstantInt::get(builder.getInt64Ty(), 1);
+    for (unsigned d = 0; d < dims; ++d)
+      this->totalTrips = builder.CreateMul(this->totalTrips, this->trips[d]);
+  }
+
+  unsigned getDims() const { return dims; }
+  llvm::ArrayRef<llvm::Value *> getLowerBounds() const { return lowerBounds; }
+  llvm::ArrayRef<llvm::Value *> getUpperBounds() const { return upperBounds; }
+  llvm::ArrayRef<llvm::Value *> getSteps() const { return steps; }
+  llvm::ArrayRef<llvm::Value *> getTrips() const { return trips; }
+  llvm::Value *getTotalTrips() { return totalTrips; }
+};
+
 } // namespace
 
 void TaskContextStructManager::generateTaskContextStruct() {
@@ -2303,19 +2368,42 @@ void TaskContextStructManager::freeStructPtr() {
   builder.CreateFree(structPtr);
 }
 
+static void storeAffinityEntry(llvm::IRBuilderBase &builder,
+                               llvm::Value *affinityList, llvm::Value *index,
+                               llvm::Value *addr, llvm::Value *len) {
+  auto &ctx = builder.getContext();
+  // { base_addr (i64), len (i64), flags (i32) }
+  llvm::StructType *kmpTaskAffinityInfoTy = llvm::StructType::get(
+      llvm::Type::getInt64Ty(ctx), llvm::Type::getInt64Ty(ctx),
+      llvm::Type::getInt32Ty(ctx));
+
+  llvm::Value *entry = builder.CreateInBoundsGEP(
+      kmpTaskAffinityInfoTy, affinityList, index, "omp.affinity.entry");
+
+  llvm::Value *baseAddrI64 = builder.CreatePtrToInt(addr, builder.getInt64Ty());
+  llvm::Value *flags = builder.getInt32(0);
+
+  builder.CreateStore(baseAddrI64,
+                      builder.CreateStructGEP(kmpTaskAffinityInfoTy, entry, 0));
+  builder.CreateStore(len,
+                      builder.CreateStructGEP(kmpTaskAffinityInfoTy, entry, 1));
+  builder.CreateStore(flags,
+                      builder.CreateStructGEP(kmpTaskAffinityInfoTy, entry, 2));
+}
+
 static void buildTaskAffinityList(mlir::omp::TaskOp taskOp,
                                   llvm::IRBuilderBase &builder,
                                   LLVM::ModuleTranslation &moduleTranslation,
                                   llvm::OpenMPIRBuilder::AffinityData &ad) {
   auto &ctx = builder.getContext();
+  SmallVector<mlir::Value> affinityVars(taskOp.getAffinityVars().begin(),
+                                        taskOp.getAffinityVars().end());
+
+  // Define the type locally just for the Alloca
   llvm::StructType *kmpTaskAffinityInfoTy = llvm::StructType::get(
       llvm::Type::getInt64Ty(ctx), llvm::Type::getInt64Ty(ctx),
       llvm::Type::getInt32Ty(ctx));
 
-  SmallVector<mlir::Value> affinityVars(taskOp.getAffinityVars().begin(),
-                                        taskOp.getAffinityVars().end());
-
-  // Allocate [N x kmp_task_affinity_info_t]
   llvm::Value *count =
       llvm::ConstantInt::get(builder.getInt64Ty(), affinityVars.size());
   llvm::AllocaInst *affinityList =
@@ -2327,29 +2415,70 @@ static void buildTaskAffinityList(mlir::omp::TaskOp taskOp,
 
     llvm::Value *addr = moduleTranslation.lookupValue(entryOp.getAddr());
     assert(addr && "expect affinity addr to be non-null");
-    llvm::Value *baseAddr = builder.CreatePtrToInt(addr, builder.getInt64Ty());
     llvm::Value *len = moduleTranslation.lookupValue(entryOp.getLen());
-    llvm::Value *flags = builder.getInt32(0);
-
-    llvm::Value *entry =
-        builder.CreateInBoundsGEP(kmpTaskAffinityInfoTy, affinityList,
-                                  builder.getInt64(i), "omp.affinity.entry");
-
-    llvm::Value *gep0 =
-        builder.CreateStructGEP(kmpTaskAffinityInfoTy, entry, 0); // base_addr
-    llvm::Value *gep1 =
-        builder.CreateStructGEP(kmpTaskAffinityInfoTy, entry, 1); // len
-    llvm::Value *gep2 =
-        builder.CreateStructGEP(kmpTaskAffinityInfoTy, entry, 2); // flags (i32)
-
-    builder.CreateStore(baseAddr, gep0);
-    builder.CreateStore(len, gep1);
-    builder.CreateStore(flags, gep2);
+    storeAffinityEntry(builder, affinityList, builder.getInt64(i), addr, len);
   }
+
   ad.Info = affinityList;
   ad.Count = builder.getInt32(static_cast<uint32_t>(affinityVars.size()));
 }
 
+static mlir::LogicalResult
+buildAffinityIterator(mlir::omp::IteratorsOp itersOp,
+                      llvm::IRBuilderBase &builder,
+                      mlir::LLVM::ModuleTranslation &moduleTranslation,
+                      llvm::OpenMPIRBuilder::AffinityData &A) {
+  auto &ctx = builder.getContext();
+  llvm::StructType *kmpTaskAffinityInfoTy = llvm::StructType::get(
+      llvm::Type::getInt64Ty(ctx), llvm::Type::getInt64Ty(ctx),
+      llvm::Type::getInt32Ty(ctx));
+
+  mlir::Block &iteratorRegionBlock = itersOp.getRegion().front();
+  IteratorInfo iterInfo(itersOp, moduleTranslation, builder);
+
+  auto *list = builder.CreateAlloca(
+      kmpTaskAffinityInfoTy, iterInfo.getTotalTrips(), "omp.affinity_list");
+
+  llvm::IteratorLoopNestScope iterLoops(
+      builder, iterInfo.getDims(), iterInfo.getLowerBounds(),
+      iterInfo.getUpperBounds(), iterInfo.getSteps());
+  auto indVars = iterLoops.getIVs();
+  for (unsigned d = 0; d < iterInfo.getDims(); ++d)
+    moduleTranslation.mapValue(iteratorRegionBlock.getArgument(d), indVars[d]);
+
+  moduleTranslation.mapBlock(&iteratorRegionBlock, builder.GetInsertBlock());
+  if (mlir::failed(moduleTranslation.convertBlock(
+          iteratorRegionBlock, /*ignoreArguments=*/true, builder)))
+    return itersOp.emitOpError() << "failed to translate iterators region";
+
+  auto yield =
+      mlir::dyn_cast<mlir::omp::YieldOp>(iteratorRegionBlock.getTerminator());
+  auto entryOp =
+      yield.getResults()[0].getDefiningOp<mlir::omp::AffinityEntryOp>();
+
+  llvm::Value *addr = moduleTranslation.lookupValue(entryOp.getAddr());
+  llvm::Value *len = moduleTranslation.lookupValue(entryOp.getLen());
+
+  llvm::Value *linearIdx = llvm::ConstantInt::get(builder.getInt64Ty(), 0);
+  for (unsigned d = 0; d < iterInfo.getDims(); ++d) {
+    // Normalize the physical IV to a 0-based logical index for this dimension.
+    llvm::Value *logicalIdx = builder.CreateUDiv(
+        builder.CreateSub(indVars[d], iterInfo.getLowerBounds()[d]),
+        iterInfo.getSteps()[d]);
+    // Row-major flattening: linear = linear * Trips[d] + logicalIdx
+    linearIdx = builder.CreateAdd(
+        builder.CreateMul(linearIdx, iterInfo.getTrips()[d]), logicalIdx);
+  }
+
+  storeAffinityEntry(builder, list, linearIdx, addr, len);
+
+  moduleTranslation.forgetMapping(itersOp.getRegion());
+
+  A.Info = list;
+  A.Count = builder.CreateTrunc(iterInfo.getTotalTrips(), builder.getInt32Ty());
+  return mlir::success();
+}
+
 /// Converts an OpenMP task construct into LLVM IR using OpenMPIRBuilder.
 static LogicalResult
 convertOmpTaskOp(omp::TaskOp taskOp, llvm::IRBuilderBase &builder,
@@ -2565,6 +2694,13 @@ convertOmpTaskOp(omp::TaskOp taskOp, llvm::IRBuilderBase &builder,
   llvm::OpenMPIRBuilder::AffinityData ad = {nullptr, nullptr};
   if (!taskOp.getAffinityVars().empty())
     buildTaskAffinityList(taskOp, builder, moduleTranslation, ad);
+  else if (!taskOp.getIterated().empty()) {
+    for (size_t i = 0; i < taskOp.getIterated().size(); ++i) {
+      auto iterOp = taskOp.getIterated()[i].getDefiningOp<omp::IteratorsOp>();
+      if (failed(buildAffinityIterator(iterOp, builder, moduleTranslation, ad)))
+        return failure();
+    }
+  }
 
   llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
   llvm::OpenMPIRBuilder::InsertPointOrErrorTy afterIP =
@@ -7336,7 +7472,7 @@ LogicalResult OpenMPDialectLLVMIRTranslationInterface::convertOperation(
             return convertOmpLoopNest(*op, builder, moduleTranslation);
           })
           .Case<omp::MapInfoOp, omp::MapBoundsOp, omp::PrivateClauseOp,
-                omp::AffinityEntryOp>([&](auto op) {
+                omp::AffinityEntryOp, omp::IteratorsOp>([&](auto op) {
             // No-op, should be handled by relevant owning operations e.g.
             // TargetOp, TargetEnterDataOp, TargetExitDataOp, TargetDataOp
             // etc. and then discarded

>From 95e235a29b89133903bf890c4815524927afab8a Mon Sep 17 00:00:00 2001
From: cchen <chichun.chen at hpe.com>
Date: Fri, 20 Feb 2026 15:20:55 -0600
Subject: [PATCH 05/11] Create 1-dim canonical loop for omp.iterators

---
 .../llvm/Frontend/OpenMP/OMPIRBuilder.h       |  27 ----
 llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp     |  82 ----------
 .../OpenMP/OpenMPToLLVMIRTranslation.cpp      | 142 +++++++++++-------
 3 files changed, 91 insertions(+), 160 deletions(-)

diff --git a/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h b/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h
index 0332930eafedf..deb2535ac843f 100644
--- a/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h
+++ b/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h
@@ -4293,33 +4293,6 @@ class ScanInfo {
   ~ScanInfo() { delete (ScanBuffPtrs); }
 };
 
-class IteratorLoopNestScope {
-private:
-  llvm::IRBuilderBase &B;
-  llvm::LLVMContext &Ctx;
-
-  unsigned Dims = 0;
-  llvm::ArrayRef<llvm::Value *> LowerBounds;
-  llvm::ArrayRef<llvm::Value *> UpperBounds;
-  llvm::ArrayRef<llvm::Value *> Steps;
-
-  llvm::SmallVector<llvm::PHINode *> IVs;
-  llvm::SmallVector<llvm::BasicBlock *> HdrBBs;
-  llvm::SmallVector<llvm::BasicBlock *> BodyBBs;
-  llvm::SmallVector<llvm::BasicBlock *> LatchBBs;
-  llvm::SmallVector<llvm::BasicBlock *> ExitBBs;
-
-public:
-  IteratorLoopNestScope(llvm::IRBuilderBase &Builder, unsigned Dims,
-                        llvm::ArrayRef<llvm::Value *> LowerBounds,
-                        llvm::ArrayRef<llvm::Value *> UpperBounds,
-                        llvm::ArrayRef<llvm::Value *> Steps);
-
-  ~IteratorLoopNestScope();
-
-  llvm::ArrayRef<llvm::PHINode *> getIVs() const { return IVs; }
-};
-
 } // end namespace llvm
 
 #endif // LLVM_FRONTEND_OPENMP_OMPIRBUILDER_H
diff --git a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
index e183eb7dca89f..8cd31fd3e207c 100644
--- a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
+++ b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
@@ -11862,85 +11862,3 @@ void CanonicalLoopInfo::invalidate() {
   Latch = nullptr;
   Exit = nullptr;
 }
-
-IteratorLoopNestScope::IteratorLoopNestScope(
-    llvm::IRBuilderBase &Builder, unsigned Dims,
-    llvm::ArrayRef<llvm::Value *> LowerBounds,
-    llvm::ArrayRef<llvm::Value *> UpperBounds,
-    llvm::ArrayRef<llvm::Value *> Steps)
-    : B(Builder), Ctx(Builder.getContext()), Dims(Dims),
-      LowerBounds(LowerBounds), UpperBounds(UpperBounds), Steps(Steps) {
-  Function *F = B.GetInsertBlock()->getParent();
-  IVs.assign(Dims, nullptr);
-  HdrBBs.resize(Dims);
-  BodyBBs.resize(Dims);
-  LatchBBs.resize(Dims);
-  ExitBBs.resize(Dims);
-
-  llvm::BasicBlock *PreBB = B.GetInsertBlock();
-
-  for (unsigned Dimension = 0; Dimension < Dims; ++Dimension) {
-    HdrBBs[Dimension] = llvm::BasicBlock::Create(Ctx, "omp.iter.hdr", F);
-    BodyBBs[Dimension] = llvm::BasicBlock::Create(Ctx, "omp.iter.body", F);
-    LatchBBs[Dimension] = llvm::BasicBlock::Create(Ctx, "omp.iter.latch", F);
-    ExitBBs[Dimension] = llvm::BasicBlock::Create(Ctx, "omp.iter.exit", F);
-  }
-
-  // Enter outermost loop.
-  B.CreateBr(HdrBBs[0]);
-
-  for (unsigned Dimension = 0; Dimension < Dims; ++Dimension) {
-    // header
-    B.SetInsertPoint(HdrBBs[Dimension]);
-    IVs[Dimension] = B.CreatePHI(B.getInt64Ty(), 2, "omp.it.iv");
-    IVs[Dimension]->addIncoming(LowerBounds[Dimension],
-                                (Dimension == 0) ? PreBB
-                                                 : BodyBBs[Dimension - 1]);
-
-    llvm::Value *Cond =
-        B.CreateICmpULE(IVs[Dimension], UpperBounds[Dimension], "omp.it.cmp");
-    B.CreateCondBr(Cond, BodyBBs[Dimension], ExitBBs[Dimension]);
-
-    // body
-    B.SetInsertPoint(BodyBBs[Dimension]);
-    if (Dimension + 1 < Dims) {
-      B.CreateBr(HdrBBs[Dimension + 1]);
-    }
-
-    // iv = iv + step
-    B.SetInsertPoint(LatchBBs[Dimension]);
-    llvm::Value *Next =
-        B.CreateAdd(IVs[Dimension], Steps[Dimension], "omp.it.next");
-    IVs[Dimension]->addIncoming(Next, LatchBBs[Dimension]);
-    B.CreateBr(HdrBBs[Dimension]);
-  }
-
-  // Continue emitting the body of the innermost loop.
-  B.SetInsertPoint(BodyBBs[Dims - 1]);
-}
-
-IteratorLoopNestScope::~IteratorLoopNestScope() {
-  if (Dims == 0)
-    return;
-
-  const unsigned Last = Dims - 1;
-
-  // Fallthrough to latch if the innermost body isn't terminated by the user.
-  if (!BodyBBs[Last]->getTerminator()) {
-    llvm::IRBuilderBase::InsertPointGuard g(B);
-    B.SetInsertPoint(BodyBBs[Last]);
-    B.CreateBr(LatchBBs[Last]);
-  }
-
-  // Wire exits in reverse: exit(d) -> latch(d-1).
-  for (unsigned Dimension = Last; Dimension > 0; --Dimension) {
-    if (ExitBBs[Dimension]->getTerminator())
-      continue;
-    llvm::IRBuilderBase::InsertPointGuard g(B);
-    B.SetInsertPoint(ExitBBs[Dimension]);
-    B.CreateBr(LatchBBs[Dimension - 1]);
-  }
-
-  // Continue after the whole nest at outermost exit.
-  B.SetInsertPoint(ExitBBs[0]);
-}
diff --git a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
index 0f8d2f4becc45..97d96660405d0 100644
--- a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
+++ b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
@@ -2238,11 +2238,10 @@ class IteratorInfo {
   llvm::SmallVector<llvm::Value *> trips;
   unsigned dims;
   llvm::Value *totalTrips;
-  const mlir::LLVM::ModuleTranslation &moduleTranslation;
-  llvm::IRBuilderBase &builder;
 
-  llvm::Value *lookUpAsI64(mlir::Value val) {
-    llvm::Value *v = moduleTranslation.lookupValue(val);
+  llvm::Value *lookUpAsI64(mlir::Value val, const LLVM::ModuleTranslation &mt,
+                           llvm::IRBuilderBase &builder) {
+    llvm::Value *v = mt.lookupValue(val);
     if (!v)
       return nullptr;
     if (v->getType()->isIntegerTy(64))
@@ -2255,8 +2254,7 @@ class IteratorInfo {
 public:
   IteratorInfo(mlir::omp::IteratorsOp itersOp,
                mlir::LLVM::ModuleTranslation &moduleTranslation,
-               llvm::IRBuilderBase &builder)
-      : moduleTranslation(moduleTranslation), builder(builder) {
+               llvm::IRBuilderBase &builder) {
     dims = itersOp.getLbs().size();
     this->lowerBounds.resize(dims);
     this->upperBounds.resize(dims);
@@ -2264,9 +2262,12 @@ class IteratorInfo {
     this->trips.resize(dims);
 
     for (unsigned d = 0; d < dims; ++d) {
-      llvm::Value *lb = lookUpAsI64(itersOp.getLbs()[d]);
-      llvm::Value *ub = lookUpAsI64(itersOp.getUbs()[d]);
-      llvm::Value *st = lookUpAsI64(itersOp.getSteps()[d]);
+      llvm::Value *lb =
+          lookUpAsI64(itersOp.getLbs()[d], moduleTranslation, builder);
+      llvm::Value *ub =
+          lookUpAsI64(itersOp.getUbs()[d], moduleTranslation, builder);
+      llvm::Value *st =
+          lookUpAsI64(itersOp.getSteps()[d], moduleTranslation, builder);
       assert(lb && ub && st &&
              "Expect lowerBounds, upperBounds, and steps in IteratorsOp");
 
@@ -2291,7 +2292,7 @@ class IteratorInfo {
   llvm::ArrayRef<llvm::Value *> getUpperBounds() const { return upperBounds; }
   llvm::ArrayRef<llvm::Value *> getSteps() const { return steps; }
   llvm::ArrayRef<llvm::Value *> getTrips() const { return trips; }
-  llvm::Value *getTotalTrips() { return totalTrips; }
+  llvm::Value *getTotalTrips() const { return totalTrips; }
 };
 
 } // namespace
@@ -2424,58 +2425,96 @@ static void buildTaskAffinityList(mlir::omp::TaskOp taskOp,
 }
 
 static mlir::LogicalResult
-buildAffinityIterator(mlir::omp::IteratorsOp itersOp,
-                      llvm::IRBuilderBase &builder,
-                      mlir::LLVM::ModuleTranslation &moduleTranslation,
-                      llvm::OpenMPIRBuilder::AffinityData &A) {
+buildTaskAffinityIteratorLoop(mlir::omp::IteratorsOp itersOp,
+                              llvm::IRBuilderBase &builder,
+                              mlir::LLVM::ModuleTranslation &moduleTranslation,
+                              llvm::OpenMPIRBuilder::AffinityData &ad) {
+
   auto &ctx = builder.getContext();
+  auto &ompBuilder = *moduleTranslation.getOpenMPBuilder();
+  IteratorInfo iterInfo(itersOp, moduleTranslation, builder);
+
   llvm::StructType *kmpTaskAffinityInfoTy = llvm::StructType::get(
       llvm::Type::getInt64Ty(ctx), llvm::Type::getInt64Ty(ctx),
       llvm::Type::getInt32Ty(ctx));
+  auto *list = builder.CreateAlloca(
+      kmpTaskAffinityInfoTy, iterInfo.getTotalTrips(), "omp.affinity_list");
 
   mlir::Block &iteratorRegionBlock = itersOp.getRegion().front();
-  IteratorInfo iterInfo(itersOp, moduleTranslation, builder);
 
-  auto *list = builder.CreateAlloca(
-      kmpTaskAffinityInfoTy, iterInfo.getTotalTrips(), "omp.affinity_list");
+  llvm::Function *F = builder.GetInsertBlock()->getParent();
+  llvm::BasicBlock *curBB = builder.GetInsertBlock();
+  llvm::Instruction *splitPt = (builder.GetInsertPoint() == curBB->end())
+                                   ? curBB->getTerminator()
+                                   : &*builder.GetInsertPoint();
+  if (!splitPt) {
+    llvm::BasicBlock *tmp = llvm::BasicBlock::Create(ctx, "omp.tmp.cont", F);
+    builder.SetInsertPoint(curBB);
+    builder.CreateBr(tmp);
+    splitPt = curBB->getTerminator();
+  }
 
-  llvm::IteratorLoopNestScope iterLoops(
-      builder, iterInfo.getDims(), iterInfo.getLowerBounds(),
-      iterInfo.getUpperBounds(), iterInfo.getSteps());
-  auto indVars = iterLoops.getIVs();
-  for (unsigned d = 0; d < iterInfo.getDims(); ++d)
-    moduleTranslation.mapValue(iteratorRegionBlock.getArgument(d), indVars[d]);
+  llvm::BasicBlock *contBB = curBB->splitBasicBlock(splitPt, "omp.task.cont");
+  // Remove the branch to contBB since we will branch to contBB after the loop
+  curBB->getTerminator()->eraseFromParent();
+
+  auto *cli = ompBuilder.createLoopSkeleton(
+      builder.getCurrentDebugLocation(), iterInfo.getTotalTrips(),
+      builder.GetInsertBlock()->getParent(), contBB, contBB);
+  builder.SetInsertPoint(curBB);
+  builder.CreateBr(cli->getPreheader());
+
+  // Remove the unconditional branch inserted by createLoopSkeleton in the body
+  if (llvm::Instruction *T = cli->getBody()->getTerminator())
+    T->eraseFromParent();
+
+  // Start building the loop body
+  builder.SetInsertPoint(cli->getBody());
+
+  llvm::Value *linearIV = cli->getIndVar();
+  for (int d = (int)iterInfo.getDims() - 1; d >= 0; --d) {
+    llvm::Value *trip = iterInfo.getTrips()[d];
+    // idx = linearIV % trips[d]
+    llvm::Value *idx = builder.CreateURem(linearIV, trip);
+    // linearIV = linearIV / trips[d]
+    linearIV = builder.CreateUDiv(linearIV, trip);
+
+    // physicalIV = lb + logical * step.
+    llvm::Value *physicalIV = builder.CreateAdd(
+        iterInfo.getLowerBounds()[d],
+        builder.CreateMul(idx, iterInfo.getSteps()[d]), "omp.it.phys_iv");
+
+    moduleTranslation.mapValue(iteratorRegionBlock.getArgument(d), physicalIV);
+  }
 
   moduleTranslation.mapBlock(&iteratorRegionBlock, builder.GetInsertBlock());
-  if (mlir::failed(moduleTranslation.convertBlock(
-          iteratorRegionBlock, /*ignoreArguments=*/true, builder)))
+  if (mlir::failed(moduleTranslation.convertBlock(iteratorRegionBlock,
+                                                  /*ignoreArguments=*/true,
+                                                  builder))) {
     return itersOp.emitOpError() << "failed to translate iterators region";
+  }
 
   auto yield =
       mlir::dyn_cast<mlir::omp::YieldOp>(iteratorRegionBlock.getTerminator());
   auto entryOp =
       yield.getResults()[0].getDefiningOp<mlir::omp::AffinityEntryOp>();
-
   llvm::Value *addr = moduleTranslation.lookupValue(entryOp.getAddr());
   llvm::Value *len = moduleTranslation.lookupValue(entryOp.getLen());
+  storeAffinityEntry(builder, list, cli->getIndVar(), addr, len);
 
-  llvm::Value *linearIdx = llvm::ConstantInt::get(builder.getInt64Ty(), 0);
-  for (unsigned d = 0; d < iterInfo.getDims(); ++d) {
-    // Normalize the physical IV to a 0-based logical index for this dimension.
-    llvm::Value *logicalIdx = builder.CreateUDiv(
-        builder.CreateSub(indVars[d], iterInfo.getLowerBounds()[d]),
-        iterInfo.getSteps()[d]);
-    // Row-major flattening: linear = linear * Trips[d] + logicalIdx
-    linearIdx = builder.CreateAdd(
-        builder.CreateMul(linearIdx, iterInfo.getTrips()[d]), logicalIdx);
-  }
-
-  storeAffinityEntry(builder, list, linearIdx, addr, len);
+  // Ensure we end the loop body by jumping to the latch
+  if (!builder.GetInsertBlock()->getTerminator())
+    builder.CreateBr(cli->getLatch());
 
   moduleTranslation.forgetMapping(itersOp.getRegion());
 
-  A.Info = list;
-  A.Count = builder.CreateTrunc(iterInfo.getTotalTrips(), builder.getInt32Ty());
+  builder.SetInsertPoint(cli->getAfter(), cli->getAfter()->begin());
+  builder.CreateBr(contBB);
+  builder.SetInsertPoint(contBB, contBB->begin());
+
+  ad.Info = list;
+  ad.Count =
+      builder.CreateTrunc(iterInfo.getTotalTrips(), builder.getInt32Ty());
   return mlir::success();
 }
 
@@ -2593,6 +2632,18 @@ convertOmpTaskOp(omp::TaskOp taskOp, llvm::IRBuilderBase &builder,
           taskOp.getPrivateNeedsBarrier())))
     return llvm::failure();
 
+  llvm::OpenMPIRBuilder::AffinityData ad = {nullptr, nullptr};
+  if (!taskOp.getAffinityVars().empty())
+    buildTaskAffinityList(taskOp, builder, moduleTranslation, ad);
+  else if (!taskOp.getIterated().empty()) {
+    for (size_t i = 0; i < taskOp.getIterated().size(); ++i) {
+      auto iterOp = taskOp.getIterated()[i].getDefiningOp<omp::IteratorsOp>();
+      if (failed(buildTaskAffinityIteratorLoop(iterOp, builder,
+                                               moduleTranslation, ad)))
+        return llvm::failure();
+    }
+  }
+
   // Set up for call to createTask()
   builder.SetInsertPoint(taskStartBlock);
 
@@ -2691,17 +2742,6 @@ convertOmpTaskOp(omp::TaskOp taskOp, llvm::IRBuilderBase &builder,
   buildDependData(taskOp.getDependKinds(), taskOp.getDependVars(),
                   moduleTranslation, dds);
 
-  llvm::OpenMPIRBuilder::AffinityData ad = {nullptr, nullptr};
-  if (!taskOp.getAffinityVars().empty())
-    buildTaskAffinityList(taskOp, builder, moduleTranslation, ad);
-  else if (!taskOp.getIterated().empty()) {
-    for (size_t i = 0; i < taskOp.getIterated().size(); ++i) {
-      auto iterOp = taskOp.getIterated()[i].getDefiningOp<omp::IteratorsOp>();
-      if (failed(buildAffinityIterator(iterOp, builder, moduleTranslation, ad)))
-        return failure();
-    }
-  }
-
   llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
   llvm::OpenMPIRBuilder::InsertPointOrErrorTy afterIP =
       moduleTranslation.getOpenMPBuilder()->createTask(

>From 9690a796c46e7135765fe64365ad27f2d0b0f689 Mon Sep 17 00:00:00 2001
From: cchen <chichun.chen at hpe.com>
Date: Fri, 20 Feb 2026 16:23:42 -0600
Subject: [PATCH 06/11] Support multiple affinity register for a task

---
 .../llvm/Frontend/OpenMP/OMPIRBuilder.h       |  2 +-
 llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp     | 28 ++++++-------
 .../OpenMP/OpenMPToLLVMIRTranslation.cpp      | 42 +++++++++++++------
 3 files changed, 42 insertions(+), 30 deletions(-)

diff --git a/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h b/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h
index deb2535ac843f..1c421f0dfce66 100644
--- a/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h
+++ b/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h
@@ -1583,7 +1583,7 @@ class OpenMPIRBuilder {
       const LocationDescription &Loc, InsertPointTy AllocaIP,
       BodyGenCallbackTy BodyGenCB, bool Tied = true, Value *Final = nullptr,
       Value *IfCondition = nullptr, SmallVector<DependData> Dependencies = {},
-      AffinityData Affinity = {}, bool Mergeable = false,
+      SmallVector<AffinityData> Affinities = {}, bool Mergeable = false,
       Value *EventHandle = nullptr, Value *Priority = nullptr);
 
   /// Generator for the taskgroup construct
diff --git a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
index 8cd31fd3e207c..397de485473ac 100644
--- a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
+++ b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
@@ -2434,8 +2434,8 @@ OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::createTaskloop(
 OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::createTask(
     const LocationDescription &Loc, InsertPointTy AllocaIP,
     BodyGenCallbackTy BodyGenCB, bool Tied, Value *Final, Value *IfCondition,
-    SmallVector<DependData> Dependencies, AffinityData Affinity, bool Mergeable,
-    Value *EventHandle, Value *Priority) {
+    SmallVector<DependData> Dependencies, SmallVector<AffinityData> Affinities,
+    bool Mergeable, Value *EventHandle, Value *Priority) {
 
   if (!updateToLocation(Loc))
     return InsertPointTy();
@@ -2481,8 +2481,8 @@ OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::createTask(
       Builder, AllocaIP, ToBeDeleted, TaskAllocaIP, "global.tid", false));
 
   OI.PostOutlineCB = [this, Ident, Tied, Final, IfCondition, Dependencies,
-                      Affinity, Mergeable, Priority, EventHandle, TaskAllocaBB,
-                      ToBeDeleted](Function &OutlinedFn) mutable {
+                      Affinities, Mergeable, Priority, EventHandle,
+                      TaskAllocaBB, ToBeDeleted](Function &OutlinedFn) mutable {
     // Replace the Stale CI by appropriate RTL function call.
     assert(OutlinedFn.hasOneUse() &&
            "there must be a single user for the outlined function");
@@ -2555,20 +2555,16 @@ OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::createTask(
                       /*sizeof_task=*/TaskSize, /*sizeof_shared=*/SharedsSize,
                       /*task_func=*/&OutlinedFn});
 
-    if (Affinity.Count && Affinity.Info) {
+    if (!Affinities.empty()) {
       Function *RegAffFn = getOrCreateRuntimeFunctionPtr(
           OMPRTL___kmpc_omp_reg_task_with_affinity);
-
-      // bitcast to i8*
-      Value *AffPtr = Builder.CreatePointerBitCastOrAddrSpaceCast(
-          Affinity.Info, Builder.getPtrTy(0));
-
-      if (!Affinity.Count->getType()->isIntegerTy(32))
-        Affinity.Count =
-            Builder.CreateTruncOrBitCast(Affinity.Count, Builder.getInt32Ty());
-
-      createRuntimeFunctionCall(
-          RegAffFn, {Ident, ThreadID, TaskData, Affinity.Count, AffPtr});
+      for (const auto &Affinity : Affinities) {
+        // bitcast to i8*
+        Value *AffPtr = Builder.CreatePointerBitCastOrAddrSpaceCast(
+            Affinity.Info, Builder.getPtrTy(0));
+        createRuntimeFunctionCall(
+            RegAffFn, {Ident, ThreadID, TaskData, Affinity.Count, AffPtr});
+      }
     }
 
     // Emit detach clause initialization.
diff --git a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
index 97d96660405d0..4e521c5496413 100644
--- a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
+++ b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
@@ -2460,7 +2460,7 @@ buildTaskAffinityIteratorLoop(mlir::omp::IteratorsOp itersOp,
 
   auto *cli = ompBuilder.createLoopSkeleton(
       builder.getCurrentDebugLocation(), iterInfo.getTotalTrips(),
-      builder.GetInsertBlock()->getParent(), contBB, contBB);
+      builder.GetInsertBlock()->getParent(), contBB, contBB, "iterator");
   builder.SetInsertPoint(curBB);
   builder.CreateBr(cli->getPreheader());
 
@@ -2518,6 +2518,30 @@ buildTaskAffinityIteratorLoop(mlir::omp::IteratorsOp itersOp,
   return mlir::success();
 }
 
+static mlir::LogicalResult buildAffinityData(
+    mlir::omp::TaskOp &taskOp, llvm::IRBuilderBase &builder,
+    mlir::LLVM::ModuleTranslation &moduleTranslation,
+    llvm::SmallVectorImpl<llvm::OpenMPIRBuilder::AffinityData> &ads) {
+  using AffinityData = llvm::OpenMPIRBuilder::AffinityData;
+
+  if (!taskOp.getAffinityVars().empty()) {
+    AffinityData ad;
+    buildTaskAffinityList(taskOp, builder, moduleTranslation, ad);
+    ads.emplace_back(ad);
+  }
+  if (!taskOp.getIterated().empty()) {
+    for (size_t i = 0; i < taskOp.getIterated().size(); ++i) {
+      auto iterOp = taskOp.getIterated()[i].getDefiningOp<omp::IteratorsOp>();
+      AffinityData ad;
+      if (failed(buildTaskAffinityIteratorLoop(iterOp, builder,
+                                               moduleTranslation, ad)))
+        return llvm::failure();
+      ads.emplace_back(ad);
+    }
+  }
+  return mlir::success();
+}
+
 /// Converts an OpenMP task construct into LLVM IR using OpenMPIRBuilder.
 static LogicalResult
 convertOmpTaskOp(omp::TaskOp taskOp, llvm::IRBuilderBase &builder,
@@ -2632,17 +2656,9 @@ convertOmpTaskOp(omp::TaskOp taskOp, llvm::IRBuilderBase &builder,
           taskOp.getPrivateNeedsBarrier())))
     return llvm::failure();
 
-  llvm::OpenMPIRBuilder::AffinityData ad = {nullptr, nullptr};
-  if (!taskOp.getAffinityVars().empty())
-    buildTaskAffinityList(taskOp, builder, moduleTranslation, ad);
-  else if (!taskOp.getIterated().empty()) {
-    for (size_t i = 0; i < taskOp.getIterated().size(); ++i) {
-      auto iterOp = taskOp.getIterated()[i].getDefiningOp<omp::IteratorsOp>();
-      if (failed(buildTaskAffinityIteratorLoop(iterOp, builder,
-                                               moduleTranslation, ad)))
-        return llvm::failure();
-    }
-  }
+  llvm::SmallVector<llvm::OpenMPIRBuilder::AffinityData> ads;
+  if (failed(buildAffinityData(taskOp, builder, moduleTranslation, ads)))
+    return llvm::failure();
 
   // Set up for call to createTask()
   builder.SetInsertPoint(taskStartBlock);
@@ -2747,7 +2763,7 @@ convertOmpTaskOp(omp::TaskOp taskOp, llvm::IRBuilderBase &builder,
       moduleTranslation.getOpenMPBuilder()->createTask(
           ompLoc, allocaIP, bodyCB, !taskOp.getUntied(),
           moduleTranslation.lookupValue(taskOp.getFinal()),
-          moduleTranslation.lookupValue(taskOp.getIfExpr()), dds, ad,
+          moduleTranslation.lookupValue(taskOp.getIfExpr()), dds, ads,
           taskOp.getMergeable(),
           moduleTranslation.lookupValue(taskOp.getEventHandle()),
           moduleTranslation.lookupValue(taskOp.getPriority()));

>From 1338ac6c0d4f2d5fd1d4aac7b4ce4aab1d43e817 Mon Sep 17 00:00:00 2001
From: cchen <chichun.chen at hpe.com>
Date: Fri, 20 Feb 2026 17:42:18 -0600
Subject: [PATCH 07/11] Move iterator loop generate logic to OMPIRBuilder

---
 .../llvm/Frontend/OpenMP/OMPIRBuilder.h       |   7 +
 llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp     |  61 ++++++++
 .../OpenMP/OpenMPToLLVMIRTranslation.cpp      | 140 +++++++++---------
 3 files changed, 135 insertions(+), 73 deletions(-)

diff --git a/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h b/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h
index 1c421f0dfce66..a85b34eaee4ce 100644
--- a/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h
+++ b/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h
@@ -3932,6 +3932,13 @@ class OpenMPIRBuilder {
   LLVM_ABI GlobalVariable *
   getOrCreateInternalVariable(Type *Ty, const StringRef &Name,
                               std::optional<unsigned> AddressSpace = {});
+
+  using IteratorBodyGenTy = llvm::function_ref<llvm::Error(
+      InsertPointTy BodyIP, llvm::Value *LinearIV)>;
+
+  LLVM_ABI InsertPointOrErrorTy createIteratorLoop(
+      LocationDescription Loc, llvm::Value *TripCount,
+      IteratorBodyGenTy BodyGen, llvm::StringRef Name = "iterator");
 };
 
 /// Class to represented the control flow structure of an OpenMP canonical loop.
diff --git a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
index 397de485473ac..a7fe01375556e 100644
--- a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
+++ b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
@@ -11546,6 +11546,67 @@ void OpenMPIRBuilder::loadOffloadInfoMetadata(vfs::FileSystem &VFS,
   loadOffloadInfoMetadata(*M.get());
 }
 
+OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::createIteratorLoop(
+    LocationDescription Loc, llvm::Value *TripCount, IteratorBodyGenTy BodyGen,
+    llvm::StringRef Name) {
+  IRBuilderBase &B = Builder;
+  B.restoreIP(Loc.IP);
+
+  BasicBlock *CurBB = Builder.GetInsertBlock();
+  assert(CurBB &&
+         "expected a valid insertion block for creating an iterator loop");
+  Function *F = CurBB->getParent();
+  LLVMContext &Ctx = F->getContext();
+
+  // If splitting at end() but CurBB has no terminator, make it well-formed
+  // first. This happens in some pipelines where blocks are still under
+  // construction.
+  if (B.GetInsertPoint() == CurBB->end() && !CurBB->getTerminator()) {
+    BasicBlock *TmpCont = BasicBlock::Create(Ctx, "omp.it.tmp.cont", F);
+    B.SetInsertPoint(CurBB);
+    B.CreateBr(TmpCont);
+
+    // The terminator we just inserted is now the "end" of CurBB. To split after
+    // it, set insertion point to CurBB->end() (which is fine now).
+    B.SetInsertPoint(CurBB->end());
+  }
+
+  BasicBlock *ContBB =
+      CurBB->splitBasicBlock(Builder.GetInsertPoint(), "omp.it.cont");
+  // Remove the branch to contBB since we will branch to contBB after the loop
+  CurBB->getTerminator()->eraseFromParent();
+
+  CanonicalLoopInfo *CLI =
+      createLoopSkeleton(B.getCurrentDebugLocation(), TripCount, F,
+                         /*PreInsertBefore=*/ContBB,
+                         /*PostInsertBefore=*/ContBB, Name);
+
+  // Enter loop from original block.
+  B.SetInsertPoint(CurBB);
+  B.CreateBr(CLI->getPreheader());
+
+  // Remove the unconditional branch inserted by createLoopSkeleton in the body
+  if (Instruction *T = CLI->getBody()->getTerminator())
+    T->eraseFromParent();
+
+  InsertPointTy BodyIP = CLI->getBodyIP();
+  if (llvm::Error Err = BodyGen(BodyIP, CLI->getIndVar()))
+    return Err;
+
+  // Ensure we end the loop body by jumping to the latch
+  if (!CLI->getBody()->getTerminator()) {
+    B.SetInsertPoint(CLI->getBody());
+    B.CreateBr(CLI->getLatch());
+  }
+
+  // Link After -> ContBB
+  B.SetInsertPoint(CLI->getAfter(), CLI->getAfter()->begin());
+  if (!CLI->getAfter()->getTerminator())
+    B.CreateBr(ContBB);
+
+  return InsertPointTy{ContBB, ContBB->begin()};
+}
+
 //===----------------------------------------------------------------------===//
 // OffloadEntriesInfoManager
 //===----------------------------------------------------------------------===//
diff --git a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
index 4e521c5496413..1cf040db00163 100644
--- a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
+++ b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
@@ -2429,92 +2429,86 @@ buildTaskAffinityIteratorLoop(mlir::omp::IteratorsOp itersOp,
                               llvm::IRBuilderBase &builder,
                               mlir::LLVM::ModuleTranslation &moduleTranslation,
                               llvm::OpenMPIRBuilder::AffinityData &ad) {
-
   auto &ctx = builder.getContext();
-  auto &ompBuilder = *moduleTranslation.getOpenMPBuilder();
+  auto *ompBuilder = moduleTranslation.getOpenMPBuilder();
+  if (!ompBuilder)
+    return itersOp.emitOpError() << "missing OpenMPIRBuilder";
+
   IteratorInfo iterInfo(itersOp, moduleTranslation, builder);
 
   llvm::StructType *kmpTaskAffinityInfoTy = llvm::StructType::get(
       llvm::Type::getInt64Ty(ctx), llvm::Type::getInt64Ty(ctx),
       llvm::Type::getInt32Ty(ctx));
-  auto *list = builder.CreateAlloca(
-      kmpTaskAffinityInfoTy, iterInfo.getTotalTrips(), "omp.affinity_list");
-
-  mlir::Block &iteratorRegionBlock = itersOp.getRegion().front();
-
-  llvm::Function *F = builder.GetInsertBlock()->getParent();
-  llvm::BasicBlock *curBB = builder.GetInsertBlock();
-  llvm::Instruction *splitPt = (builder.GetInsertPoint() == curBB->end())
-                                   ? curBB->getTerminator()
-                                   : &*builder.GetInsertPoint();
-  if (!splitPt) {
-    llvm::BasicBlock *tmp = llvm::BasicBlock::Create(ctx, "omp.tmp.cont", F);
-    builder.SetInsertPoint(curBB);
-    builder.CreateBr(tmp);
-    splitPt = curBB->getTerminator();
-  }
 
-  llvm::BasicBlock *contBB = curBB->splitBasicBlock(splitPt, "omp.task.cont");
-  // Remove the branch to contBB since we will branch to contBB after the loop
-  curBB->getTerminator()->eraseFromParent();
-
-  auto *cli = ompBuilder.createLoopSkeleton(
-      builder.getCurrentDebugLocation(), iterInfo.getTotalTrips(),
-      builder.GetInsertBlock()->getParent(), contBB, contBB, "iterator");
-  builder.SetInsertPoint(curBB);
-  builder.CreateBr(cli->getPreheader());
-
-  // Remove the unconditional branch inserted by createLoopSkeleton in the body
-  if (llvm::Instruction *T = cli->getBody()->getTerminator())
-    T->eraseFromParent();
-
-  // Start building the loop body
-  builder.SetInsertPoint(cli->getBody());
-
-  llvm::Value *linearIV = cli->getIndVar();
-  for (int d = (int)iterInfo.getDims() - 1; d >= 0; --d) {
-    llvm::Value *trip = iterInfo.getTrips()[d];
-    // idx = linearIV % trips[d]
-    llvm::Value *idx = builder.CreateURem(linearIV, trip);
-    // linearIV = linearIV / trips[d]
-    linearIV = builder.CreateUDiv(linearIV, trip);
-
-    // physicalIV = lb + logical * step.
-    llvm::Value *physicalIV = builder.CreateAdd(
-        iterInfo.getLowerBounds()[d],
-        builder.CreateMul(idx, iterInfo.getSteps()[d]), "omp.it.phys_iv");
-
-    moduleTranslation.mapValue(iteratorRegionBlock.getArgument(d), physicalIV);
-  }
+  auto *list = builder.CreateAlloca(kmpTaskAffinityInfoTy, iterInfo.getTotalTrips(),
+                                   "omp.affinity_list");
 
-  moduleTranslation.mapBlock(&iteratorRegionBlock, builder.GetInsertBlock());
-  if (mlir::failed(moduleTranslation.convertBlock(iteratorRegionBlock,
-                                                  /*ignoreArguments=*/true,
-                                                  builder))) {
-    return itersOp.emitOpError() << "failed to translate iterators region";
-  }
+  mlir::Region &itersRegion = itersOp.getRegion();
+  mlir::Block &iteratorRegionBlock = itersRegion.front();
 
-  auto yield =
-      mlir::dyn_cast<mlir::omp::YieldOp>(iteratorRegionBlock.getTerminator());
-  auto entryOp =
-      yield.getResults()[0].getDefiningOp<mlir::omp::AffinityEntryOp>();
-  llvm::Value *addr = moduleTranslation.lookupValue(entryOp.getAddr());
-  llvm::Value *len = moduleTranslation.lookupValue(entryOp.getLen());
-  storeAffinityEntry(builder, list, cli->getIndVar(), addr, len);
+  llvm::OpenMPIRBuilder::LocationDescription loc(builder);
 
-  // Ensure we end the loop body by jumping to the latch
-  if (!builder.GetInsertBlock()->getTerminator())
-    builder.CreateBr(cli->getLatch());
+  // Build the iterator loop using the new OMPIRBuilder helper.
+  auto bodyGen = [&](llvm::OpenMPIRBuilder::InsertPointTy bodyIP,
+                     llvm::Value *linearIV) -> llvm::Error {
+    llvm::IRBuilderBase::InsertPointGuard g(builder);
+    builder.restoreIP(bodyIP);
+
+    // Unflatten linearIV into per-dimension logical indices (row-major).
+    llvm::Value *tmp = linearIV;
+    for (int d = (int)iterInfo.getDims() - 1; d >= 0; --d) {
+      llvm::Value *trip = iterInfo.getTrips()[d];
+      // idx_d = tmp % trip_d
+      llvm::Value *idx = builder.CreateURem(tmp, trip, "omp.it.idx");
+      // tmp = tmp / trip_d
+      tmp = builder.CreateUDiv(tmp, trip, "omp.it.lin.next");
+
+      // physIV_d = lb_d + idx_d * step_d
+      llvm::Value *physIV = builder.CreateAdd(
+          iterInfo.getLowerBounds()[d],
+          builder.CreateMul(idx, iterInfo.getSteps()[d]),
+          "omp.it.phys_iv");
+
+      moduleTranslation.mapValue(iteratorRegionBlock.getArgument(d), physIV);
+    }
 
-  moduleTranslation.forgetMapping(itersOp.getRegion());
+    // Translate the iterator region into the loop body.
+    moduleTranslation.mapBlock(&iteratorRegionBlock, builder.GetInsertBlock());
+    if (mlir::failed(moduleTranslation.convertBlock(iteratorRegionBlock,
+                                                    /*ignoreArguments=*/true,
+                                                    builder))) {
+      return llvm::make_error<llvm::StringError>(
+          "failed to translate iterators region",
+          llvm::inconvertibleErrorCode());
+    }
 
-  builder.SetInsertPoint(cli->getAfter(), cli->getAfter()->begin());
-  builder.CreateBr(contBB);
-  builder.SetInsertPoint(contBB, contBB->begin());
+    // Extract affinity entry from omp.yield and store into list[linearIV].
+    auto yield = mlir::dyn_cast<mlir::omp::YieldOp>(
+        iteratorRegionBlock.getTerminator());
+    auto entryOp =
+        yield.getResults()[0].getDefiningOp<mlir::omp::AffinityEntryOp>();
+
+    llvm::Value *addr = moduleTranslation.lookupValue(entryOp.getAddr());
+    llvm::Value *len  = moduleTranslation.lookupValue(entryOp.getLen());
+
+    storeAffinityEntry(builder, list, linearIV, addr, len);
+
+    // Avoid leaking region mappings if this iterator loop is reused/expanded.
+    moduleTranslation.forgetMapping(itersRegion);
+
+    return llvm::Error::success();
+  };
+
+  llvm::OpenMPIRBuilder::InsertPointOrErrorTy afterIP =
+      ompBuilder->createIteratorLoop(loc, iterInfo.getTotalTrips(), bodyGen,
+                                     /*Name=*/"iterator");
+  if (!afterIP)
+    return itersOp.emitOpError() << llvm::toString(afterIP.takeError());
+
+  builder.restoreIP(*afterIP);
 
-  ad.Info = list;
-  ad.Count =
-      builder.CreateTrunc(iterInfo.getTotalTrips(), builder.getInt32Ty());
+  ad.Info  = list;
+  ad.Count = builder.CreateTrunc(iterInfo.getTotalTrips(), builder.getInt32Ty());
   return mlir::success();
 }
 

>From f641cad62c56c16e4908cdd9deb4d377949f0ee4 Mon Sep 17 00:00:00 2001
From: cchen <chichun.chen at hpe.com>
Date: Fri, 20 Feb 2026 17:51:09 -0600
Subject: [PATCH 08/11] Fix tests

---
 llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp |  8 ++++----
 mlir/test/Target/LLVMIR/openmp-todo.mlir        | 12 ------------
 2 files changed, 4 insertions(+), 16 deletions(-)

diff --git a/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp b/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp
index 11f03d2a06d6e..8292b055f6cd9 100644
--- a/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp
+++ b/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp
@@ -7586,9 +7586,9 @@ TEST_F(OpenMPIRBuilderTest, CreateTaskAffinity) {
   Builder.CreateStore(Builder.getInt32(0),
                       Builder.CreateStructGEP(AffInfoTy, Entry0, 2));
 
-  OpenMPIRBuilder::AffinityData Affinity;
-  Affinity.Count = CountI32;
-  Affinity.Info = AffArr;
+  SmallVector<OpenMPIRBuilder::AffinityData> Affinities;
+  OpenMPIRBuilder::AffinityData Affinity{CountI32, AffArr};
+  Affinities.push_back(Affinity);
 
   BasicBlock *AllocaBB = Builder.GetInsertBlock();
   BasicBlock *BodyBB = splitBB(Builder, /*CreateBranch=*/true, "alloca.split");
@@ -7604,7 +7604,7 @@ TEST_F(OpenMPIRBuilderTest, CreateTaskAffinity) {
           /*Final=*/nullptr,
           /*IfCondition=*/nullptr,
           /*Dependencies=*/{},
-          /*Affinity=*/Affinity,
+          /*Affinity=*/Affinities,
           /*Mergeable=*/false,
           /*EventHandle=*/nullptr,
           /*Priority=*/nullptr));
diff --git a/mlir/test/Target/LLVMIR/openmp-todo.mlir b/mlir/test/Target/LLVMIR/openmp-todo.mlir
index ae02b5878f763..c0f43f27e6b0f 100644
--- a/mlir/test/Target/LLVMIR/openmp-todo.mlir
+++ b/mlir/test/Target/LLVMIR/openmp-todo.mlir
@@ -473,15 +473,3 @@ llvm.func @wsloop_order(%lb : i32, %ub : i32, %step : i32) {
   }
   llvm.return
 }
-
-// -----
-llvm.func @task_affinity(%ptr : !llvm.ptr, %len : i64) {
-  // expected-error at below {{not yet implemented: omp.affinity_entry}}
-  // expected-error at below {{LLVM Translation failed for operation: omp.affinity_entry}}
-  %ae = omp.affinity_entry %ptr, %len
-    : (!llvm.ptr, i64) -> !omp.affinity_entry_ty<!llvm.ptr, i64>
-  omp.task affinity(%ae : !omp.affinity_entry_ty<!llvm.ptr, i64>) {
-    omp.terminator
-  }
-  llvm.return
-}

>From f1bde7aebc6a33fc41b5086887938270d3ee3cf7 Mon Sep 17 00:00:00 2001
From: cchen <chichun.chen at hpe.com>
Date: Fri, 20 Feb 2026 18:13:47 -0600
Subject: [PATCH 09/11] Extract iterator loop body convertion logic

---
 .../OpenMP/OpenMPToLLVMIRTranslation.cpp      | 64 +++++++++++--------
 1 file changed, 37 insertions(+), 27 deletions(-)

diff --git a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
index 1cf040db00163..78ea31a626033 100644
--- a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
+++ b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
@@ -2424,6 +2424,37 @@ static void buildTaskAffinityList(mlir::omp::TaskOp taskOp,
   ad.Count = builder.getInt32(static_cast<uint32_t>(affinityVars.size()));
 }
 
+static mlir::LogicalResult
+convertIteratorRegion(llvm::Value *linearIV, IteratorInfo &iterInfo,
+                      mlir::Block &iteratorRegionBlock,
+                      llvm::IRBuilderBase &builder,
+                      LLVM::ModuleTranslation &moduleTranslation) {
+  llvm::Value *tmp = linearIV;
+  for (int d = (int)iterInfo.getDims() - 1; d >= 0; --d) {
+    llvm::Value *trip = iterInfo.getTrips()[d];
+    // idx_d = tmp % trip_d
+    llvm::Value *idx = builder.CreateURem(tmp, trip, "omp.it.idx");
+    // tmp = tmp / trip_d
+    tmp = builder.CreateUDiv(tmp, trip, "omp.it.lin.next");
+
+    // physIV_d = lb_d + idx_d * step_d
+    llvm::Value *physIV = builder.CreateAdd(
+        iterInfo.getLowerBounds()[d],
+        builder.CreateMul(idx, iterInfo.getSteps()[d]), "omp.it.phys_iv");
+
+    moduleTranslation.mapValue(iteratorRegionBlock.getArgument(d), physIV);
+  }
+
+  // Translate the iterator region into the loop body.
+  moduleTranslation.mapBlock(&iteratorRegionBlock, builder.GetInsertBlock());
+  if (mlir::failed(moduleTranslation.convertBlock(iteratorRegionBlock,
+                                                  /*ignoreArguments=*/true,
+                                                  builder))) {
+    return mlir::failure();
+  }
+  return mlir::success();
+}
+
 static mlir::LogicalResult
 buildTaskAffinityIteratorLoop(mlir::omp::IteratorsOp itersOp,
                               llvm::IRBuilderBase &builder,
@@ -2454,43 +2485,22 @@ buildTaskAffinityIteratorLoop(mlir::omp::IteratorsOp itersOp,
     llvm::IRBuilderBase::InsertPointGuard g(builder);
     builder.restoreIP(bodyIP);
 
-    // Unflatten linearIV into per-dimension logical indices (row-major).
-    llvm::Value *tmp = linearIV;
-    for (int d = (int)iterInfo.getDims() - 1; d >= 0; --d) {
-      llvm::Value *trip = iterInfo.getTrips()[d];
-      // idx_d = tmp % trip_d
-      llvm::Value *idx = builder.CreateURem(tmp, trip, "omp.it.idx");
-      // tmp = tmp / trip_d
-      tmp = builder.CreateUDiv(tmp, trip, "omp.it.lin.next");
-
-      // physIV_d = lb_d + idx_d * step_d
-      llvm::Value *physIV = builder.CreateAdd(
-          iterInfo.getLowerBounds()[d],
-          builder.CreateMul(idx, iterInfo.getSteps()[d]),
-          "omp.it.phys_iv");
-
-      moduleTranslation.mapValue(iteratorRegionBlock.getArgument(d), physIV);
-    }
-
-    // Translate the iterator region into the loop body.
-    moduleTranslation.mapBlock(&iteratorRegionBlock, builder.GetInsertBlock());
-    if (mlir::failed(moduleTranslation.convertBlock(iteratorRegionBlock,
-                                                    /*ignoreArguments=*/true,
-                                                    builder))) {
+    if (failed(convertIteratorRegion(linearIV, iterInfo, iteratorRegionBlock,
+                                     builder, moduleTranslation))) {
       return llvm::make_error<llvm::StringError>(
-          "failed to translate iterators region",
-          llvm::inconvertibleErrorCode());
+          "failed to convert iterators region", llvm::inconvertibleErrorCode());
     }
 
     // Extract affinity entry from omp.yield and store into list[linearIV].
     auto yield = mlir::dyn_cast<mlir::omp::YieldOp>(
         iteratorRegionBlock.getTerminator());
+    assert(yield.getResults().size() == 1 &&
+           "expect omp.yield in iterator region to have one result");
     auto entryOp =
         yield.getResults()[0].getDefiningOp<mlir::omp::AffinityEntryOp>();
 
     llvm::Value *addr = moduleTranslation.lookupValue(entryOp.getAddr());
-    llvm::Value *len  = moduleTranslation.lookupValue(entryOp.getLen());
-
+    llvm::Value *len = moduleTranslation.lookupValue(entryOp.getLen());
     storeAffinityEntry(builder, list, linearIV, addr, len);
 
     // Avoid leaking region mappings if this iterator loop is reused/expanded.

>From 0d477947c2bbed4206325c81ca25bf5bd615e2fe Mon Sep 17 00:00:00 2001
From: cchen <chichun.chen at hpe.com>
Date: Fri, 20 Feb 2026 19:24:31 -0600
Subject: [PATCH 10/11] Refactor buildAffinityData by hoisting the creation of
 affinity_list

---
 llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp     |   7 +-
 .../OpenMP/OpenMPToLLVMIRTranslation.cpp      | 101 ++++++++----------
 2 files changed, 49 insertions(+), 59 deletions(-)

diff --git a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
index a7fe01375556e..229f5627b7552 100644
--- a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
+++ b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
@@ -2559,11 +2559,8 @@ OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::createTask(
       Function *RegAffFn = getOrCreateRuntimeFunctionPtr(
           OMPRTL___kmpc_omp_reg_task_with_affinity);
       for (const auto &Affinity : Affinities) {
-        // bitcast to i8*
-        Value *AffPtr = Builder.CreatePointerBitCastOrAddrSpaceCast(
-            Affinity.Info, Builder.getPtrTy(0));
-        createRuntimeFunctionCall(
-            RegAffFn, {Ident, ThreadID, TaskData, Affinity.Count, AffPtr});
+        createRuntimeFunctionCall(RegAffFn, {Ident, ThreadID, TaskData,
+                                             Affinity.Count, Affinity.Info});
       }
     }
 
diff --git a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
index 78ea31a626033..aa4fa2c9ca284 100644
--- a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
+++ b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
@@ -2392,24 +2392,13 @@ static void storeAffinityEntry(llvm::IRBuilderBase &builder,
                       builder.CreateStructGEP(kmpTaskAffinityInfoTy, entry, 2));
 }
 
-static void buildTaskAffinityList(mlir::omp::TaskOp taskOp,
-                                  llvm::IRBuilderBase &builder,
-                                  LLVM::ModuleTranslation &moduleTranslation,
-                                  llvm::OpenMPIRBuilder::AffinityData &ad) {
-  auto &ctx = builder.getContext();
+static void fillAffinityLocators(mlir::omp::TaskOp taskOp,
+                                 llvm::IRBuilderBase &builder,
+                                 LLVM::ModuleTranslation &moduleTranslation,
+                                 llvm::Value *affinityList) {
   SmallVector<mlir::Value> affinityVars(taskOp.getAffinityVars().begin(),
                                         taskOp.getAffinityVars().end());
 
-  // Define the type locally just for the Alloca
-  llvm::StructType *kmpTaskAffinityInfoTy = llvm::StructType::get(
-      llvm::Type::getInt64Ty(ctx), llvm::Type::getInt64Ty(ctx),
-      llvm::Type::getInt32Ty(ctx));
-
-  llvm::Value *count =
-      llvm::ConstantInt::get(builder.getInt64Ty(), affinityVars.size());
-  llvm::AllocaInst *affinityList =
-      builder.CreateAlloca(kmpTaskAffinityInfoTy, count, "omp.affinity_list");
-
   for (unsigned i = 0; i < affinityVars.size(); ++i) {
     auto entryOp = affinityVars[i].getDefiningOp<mlir::omp::AffinityEntryOp>();
     assert(entryOp && "affinity item must be omp.affinity_entry");
@@ -2419,9 +2408,6 @@ static void buildTaskAffinityList(mlir::omp::TaskOp taskOp,
     llvm::Value *len = moduleTranslation.lookupValue(entryOp.getLen());
     storeAffinityEntry(builder, affinityList, builder.getInt64(i), addr, len);
   }
-
-  ad.Info = affinityList;
-  ad.Count = builder.getInt32(static_cast<uint32_t>(affinityVars.size()));
 }
 
 static mlir::LogicalResult
@@ -2433,9 +2419,9 @@ convertIteratorRegion(llvm::Value *linearIV, IteratorInfo &iterInfo,
   for (int d = (int)iterInfo.getDims() - 1; d >= 0; --d) {
     llvm::Value *trip = iterInfo.getTrips()[d];
     // idx_d = tmp % trip_d
-    llvm::Value *idx = builder.CreateURem(tmp, trip, "omp.it.idx");
+    llvm::Value *idx = builder.CreateURem(tmp, trip);
     // tmp = tmp / trip_d
-    tmp = builder.CreateUDiv(tmp, trip, "omp.it.lin.next");
+    tmp = builder.CreateUDiv(tmp, trip);
 
     // physIV_d = lb_d + idx_d * step_d
     llvm::Value *physIV = builder.CreateAdd(
@@ -2456,24 +2442,10 @@ convertIteratorRegion(llvm::Value *linearIV, IteratorInfo &iterInfo,
 }
 
 static mlir::LogicalResult
-buildTaskAffinityIteratorLoop(mlir::omp::IteratorsOp itersOp,
-                              llvm::IRBuilderBase &builder,
-                              mlir::LLVM::ModuleTranslation &moduleTranslation,
-                              llvm::OpenMPIRBuilder::AffinityData &ad) {
-  auto &ctx = builder.getContext();
-  auto *ompBuilder = moduleTranslation.getOpenMPBuilder();
-  if (!ompBuilder)
-    return itersOp.emitOpError() << "missing OpenMPIRBuilder";
-
-  IteratorInfo iterInfo(itersOp, moduleTranslation, builder);
-
-  llvm::StructType *kmpTaskAffinityInfoTy = llvm::StructType::get(
-      llvm::Type::getInt64Ty(ctx), llvm::Type::getInt64Ty(ctx),
-      llvm::Type::getInt32Ty(ctx));
-
-  auto *list = builder.CreateAlloca(kmpTaskAffinityInfoTy, iterInfo.getTotalTrips(),
-                                   "omp.affinity_list");
-
+fillAffinityIteratorLoop(mlir::omp::IteratorsOp itersOp,
+                         llvm::IRBuilderBase &builder,
+                         mlir::LLVM::ModuleTranslation &moduleTranslation,
+                         llvm::Value *affinityList, IteratorInfo &iterInfo) {
   mlir::Region &itersRegion = itersOp.getRegion();
   mlir::Block &iteratorRegionBlock = itersRegion.front();
 
@@ -2492,8 +2464,8 @@ buildTaskAffinityIteratorLoop(mlir::omp::IteratorsOp itersOp,
     }
 
     // Extract affinity entry from omp.yield and store into list[linearIV].
-    auto yield = mlir::dyn_cast<mlir::omp::YieldOp>(
-        iteratorRegionBlock.getTerminator());
+    auto yield =
+        mlir::dyn_cast<mlir::omp::YieldOp>(iteratorRegionBlock.getTerminator());
     assert(yield.getResults().size() == 1 &&
            "expect omp.yield in iterator region to have one result");
     auto entryOp =
@@ -2501,7 +2473,7 @@ buildTaskAffinityIteratorLoop(mlir::omp::IteratorsOp itersOp,
 
     llvm::Value *addr = moduleTranslation.lookupValue(entryOp.getAddr());
     llvm::Value *len = moduleTranslation.lookupValue(entryOp.getLen());
-    storeAffinityEntry(builder, list, linearIV, addr, len);
+    storeAffinityEntry(builder, affinityList, linearIV, addr, len);
 
     // Avoid leaking region mappings if this iterator loop is reused/expanded.
     moduleTranslation.forgetMapping(itersRegion);
@@ -2510,15 +2482,14 @@ buildTaskAffinityIteratorLoop(mlir::omp::IteratorsOp itersOp,
   };
 
   llvm::OpenMPIRBuilder::InsertPointOrErrorTy afterIP =
-      ompBuilder->createIteratorLoop(loc, iterInfo.getTotalTrips(), bodyGen,
-                                     /*Name=*/"iterator");
+      moduleTranslation.getOpenMPBuilder()->createIteratorLoop(
+          loc, iterInfo.getTotalTrips(), bodyGen,
+          /*Name=*/"iterator");
   if (!afterIP)
     return itersOp.emitOpError() << llvm::toString(afterIP.takeError());
 
   builder.restoreIP(*afterIP);
 
-  ad.Info  = list;
-  ad.Count = builder.CreateTrunc(iterInfo.getTotalTrips(), builder.getInt32Ty());
   return mlir::success();
 }
 
@@ -2526,21 +2497,43 @@ static mlir::LogicalResult buildAffinityData(
     mlir::omp::TaskOp &taskOp, llvm::IRBuilderBase &builder,
     mlir::LLVM::ModuleTranslation &moduleTranslation,
     llvm::SmallVectorImpl<llvm::OpenMPIRBuilder::AffinityData> &ads) {
-  using AffinityData = llvm::OpenMPIRBuilder::AffinityData;
+  auto &ctx = builder.getContext();
+  llvm::StructType *kmpTaskAffinityInfoTy = llvm::StructType::get(
+      llvm::Type::getInt64Ty(ctx), llvm::Type::getInt64Ty(ctx),
+      llvm::Type::getInt32Ty(ctx));
+
+  auto allocateAffinityList = [&](llvm::Value *count) -> llvm::Value * {
+    return builder.CreateAlloca(kmpTaskAffinityInfoTy, count,
+                                "omp.affinity_list");
+  };
+
+  auto createAffinity =
+      [&](llvm::Value *count,
+          llvm::Value *info) -> llvm::OpenMPIRBuilder::AffinityData {
+    llvm::OpenMPIRBuilder::AffinityData ad{};
+    ad.Count = builder.CreateTrunc(count, builder.getInt32Ty());
+    ad.Info =
+        builder.CreatePointerBitCastOrAddrSpaceCast(info, builder.getPtrTy(0));
+    return ad;
+  };
 
   if (!taskOp.getAffinityVars().empty()) {
-    AffinityData ad;
-    buildTaskAffinityList(taskOp, builder, moduleTranslation, ad);
-    ads.emplace_back(ad);
+    llvm::Value *count = llvm::ConstantInt::get(
+        builder.getInt64Ty(), taskOp.getAffinityVars().size());
+    llvm::Value *list = allocateAffinityList(count);
+    fillAffinityLocators(taskOp, builder, moduleTranslation, list);
+    ads.emplace_back(createAffinity(count, list));
   }
+
   if (!taskOp.getIterated().empty()) {
     for (size_t i = 0; i < taskOp.getIterated().size(); ++i) {
-      auto iterOp = taskOp.getIterated()[i].getDefiningOp<omp::IteratorsOp>();
-      AffinityData ad;
-      if (failed(buildTaskAffinityIteratorLoop(iterOp, builder,
-                                               moduleTranslation, ad)))
+      auto itersOp = taskOp.getIterated()[i].getDefiningOp<omp::IteratorsOp>();
+      IteratorInfo iterInfo(itersOp, moduleTranslation, builder);
+      llvm::Value *list = allocateAffinityList(iterInfo.getTotalTrips());
+      if (failed(fillAffinityIteratorLoop(itersOp, builder, moduleTranslation,
+                                          list, iterInfo)))
         return llvm::failure();
-      ads.emplace_back(ad);
+      ads.emplace_back(createAffinity(iterInfo.getTotalTrips(), list));
     }
   }
   return mlir::success();

>From c791eabd8a09d98585125b7ae1efd9c8e47c3cfb Mon Sep 17 00:00:00 2001
From: cchen <chichun.chen at hpe.com>
Date: Wed, 25 Feb 2026 14:08:13 -0600
Subject: [PATCH 11/11] Fix iteratorop

---
 .../OpenMP/OpenMPToLLVMIRTranslation.cpp      | 20 +++++++++----------
 1 file changed, 10 insertions(+), 10 deletions(-)

diff --git a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
index aa4fa2c9ca284..87981fb3af4b0 100644
--- a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
+++ b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
@@ -2252,10 +2252,10 @@ class IteratorInfo {
   }
 
 public:
-  IteratorInfo(mlir::omp::IteratorsOp itersOp,
+  IteratorInfo(mlir::omp::IteratorOp itersOp,
                mlir::LLVM::ModuleTranslation &moduleTranslation,
                llvm::IRBuilderBase &builder) {
-    dims = itersOp.getLbs().size();
+    dims = itersOp.getLoopLowerBounds().size();
     this->lowerBounds.resize(dims);
     this->upperBounds.resize(dims);
     this->steps.resize(dims);
@@ -2263,13 +2263,13 @@ class IteratorInfo {
 
     for (unsigned d = 0; d < dims; ++d) {
       llvm::Value *lb =
-          lookUpAsI64(itersOp.getLbs()[d], moduleTranslation, builder);
+          lookUpAsI64(itersOp.getLoopLowerBounds()[d], moduleTranslation, builder);
       llvm::Value *ub =
-          lookUpAsI64(itersOp.getUbs()[d], moduleTranslation, builder);
+          lookUpAsI64(itersOp.getLoopUpperBounds()[d], moduleTranslation, builder);
       llvm::Value *st =
-          lookUpAsI64(itersOp.getSteps()[d], moduleTranslation, builder);
+          lookUpAsI64(itersOp.getLoopSteps()[d], moduleTranslation, builder);
       assert(lb && ub && st &&
-             "Expect lowerBounds, upperBounds, and steps in IteratorsOp");
+             "Expect lowerBounds, upperBounds, and steps in IteratorOp");
 
       this->lowerBounds[d] = lb;
       this->upperBounds[d] = ub;
@@ -2442,7 +2442,7 @@ convertIteratorRegion(llvm::Value *linearIV, IteratorInfo &iterInfo,
 }
 
 static mlir::LogicalResult
-fillAffinityIteratorLoop(mlir::omp::IteratorsOp itersOp,
+fillAffinityIteratorLoop(mlir::omp::IteratorOp itersOp,
                          llvm::IRBuilderBase &builder,
                          mlir::LLVM::ModuleTranslation &moduleTranslation,
                          llvm::Value *affinityList, IteratorInfo &iterInfo) {
@@ -2460,7 +2460,7 @@ fillAffinityIteratorLoop(mlir::omp::IteratorsOp itersOp,
     if (failed(convertIteratorRegion(linearIV, iterInfo, iteratorRegionBlock,
                                      builder, moduleTranslation))) {
       return llvm::make_error<llvm::StringError>(
-          "failed to convert iterators region", llvm::inconvertibleErrorCode());
+          "failed to convert iterator region", llvm::inconvertibleErrorCode());
     }
 
     // Extract affinity entry from omp.yield and store into list[linearIV].
@@ -2527,7 +2527,7 @@ static mlir::LogicalResult buildAffinityData(
 
   if (!taskOp.getIterated().empty()) {
     for (size_t i = 0; i < taskOp.getIterated().size(); ++i) {
-      auto itersOp = taskOp.getIterated()[i].getDefiningOp<omp::IteratorsOp>();
+      auto itersOp = taskOp.getIterated()[i].getDefiningOp<omp::IteratorOp>();
       IteratorInfo iterInfo(itersOp, moduleTranslation, builder);
       llvm::Value *list = allocateAffinityList(iterInfo.getTotalTrips());
       if (failed(fillAffinityIteratorLoop(itersOp, builder, moduleTranslation,
@@ -7525,7 +7525,7 @@ LogicalResult OpenMPDialectLLVMIRTranslationInterface::convertOperation(
             return convertOmpLoopNest(*op, builder, moduleTranslation);
           })
           .Case<omp::MapInfoOp, omp::MapBoundsOp, omp::PrivateClauseOp,
-                omp::AffinityEntryOp, omp::IteratorsOp>([&](auto op) {
+                omp::AffinityEntryOp, omp::IteratorOp>([&](auto op) {
             // No-op, should be handled by relevant owning operations e.g.
             // TargetOp, TargetEnterDataOp, TargetExitDataOp, TargetDataOp
             // etc. and then discarded



More information about the llvm-branch-commits mailing list