[llvm-branch-commits] [flang] [llvm] [mlir] [WIP][mlir][llvmir][OpenMP] Translate affinity clause in task construct to llvmir (PR #182223)
via llvm-branch-commits
llvm-branch-commits at lists.llvm.org
Wed Feb 25 12:51:23 PST 2026
https://github.com/chichunchen updated https://github.com/llvm/llvm-project/pull/182223
>From f30db773091041afdb375168de091df255ea5ba3 Mon Sep 17 00:00:00 2001
From: cchen <chichun.chen at hpe.com>
Date: Wed, 18 Feb 2026 13:25:17 -0600
Subject: [PATCH 01/11] Emit omp.iterator in affinity clause from Flang
---
flang/lib/Lower/OpenMP/Utils.h | 9 +++++++++
1 file changed, 9 insertions(+)
diff --git a/flang/lib/Lower/OpenMP/Utils.h b/flang/lib/Lower/OpenMP/Utils.h
index 01aeb0ef58bfd..5834d7019ecb7 100644
--- a/flang/lib/Lower/OpenMP/Utils.h
+++ b/flang/lib/Lower/OpenMP/Utils.h
@@ -217,6 +217,15 @@ mlir::Value genIteratorCoordinate(Fortran::lower::AbstractConverter &converter,
llvm::ArrayRef<mlir::Value> ivs,
mlir::Location loc);
+bool hasIVReference(
+ const omp::Object &object,
+ const llvm::SmallPtrSetImpl<const Fortran::semantics::Symbol *> &ivSyms);
+
+mlir::Value genIteratorCoordinate(Fortran::lower::AbstractConverter &converter,
+ mlir::Value base,
+ llvm::ArrayRef<mlir::Value> ivs,
+ mlir::Location loc);
+
} // namespace omp
} // namespace lower
} // namespace Fortran
>From f5645f2565a6a0876e251de522186408feca92e6 Mon Sep 17 00:00:00 2001
From: cchen <chichun.chen at hpe.com>
Date: Wed, 18 Feb 2026 14:15:02 -0600
Subject: [PATCH 02/11] Add iterator test and remove redundant check lines
---
mlir/test/Dialect/OpenMP/ops.mlir | 53 +++++++++++++++++++++++++++++++
1 file changed, 53 insertions(+)
diff --git a/mlir/test/Dialect/OpenMP/ops.mlir b/mlir/test/Dialect/OpenMP/ops.mlir
index b908874c2010b..febf16788ee4a 100644
--- a/mlir/test/Dialect/OpenMP/ops.mlir
+++ b/mlir/test/Dialect/OpenMP/ops.mlir
@@ -3654,3 +3654,56 @@ func.func @omp_task_affinity_iterator_2d(%lb0 : index, %ub0 : index, %st0 : inde
return
}
+
+// CHECK-LABEL: func.func @omp_task_affinity_iterator_1d(
+func.func @omp_task_affinity_iterator_1d(%lb : index, %ub : index, %step : index,
+ %addr : !llvm.ptr, %len : i64) -> () {
+ // CHECK: %[[IT:.*]] = omp.iterators(%[[IV:.*]]: index) = (%[[LB:.*]] to %[[UB:.*]] step %[[ST:.*]]) {
+ // CHECK: %[[E:.*]] = omp.affinity_entry %[[ADDR:.*]], %[[LEN:.*]] : (!llvm.ptr, i64) -> !omp.affinity_entry_ty<!llvm.ptr, i64>
+ // CHECK: omp.yield(%[[E]] : !omp.affinity_entry_ty<!llvm.ptr, i64>)
+ // CHECK: } -> !omp.iterated<!omp.affinity_entry_ty<!llvm.ptr, i64>>
+ // CHECK: omp.task affinity(%[[IT]] : !omp.iterated<!omp.affinity_entry_ty<!llvm.ptr, i64>>) {
+ // CHECK: }
+ %it = omp.iterators(%iv: index) = (%lb to %ub step %step) {
+ %e = omp.affinity_entry %addr, %len
+ : (!llvm.ptr, i64) -> !omp.affinity_entry_ty<!llvm.ptr, i64>
+ omp.yield(%e : !omp.affinity_entry_ty<!llvm.ptr, i64>)
+ } -> !omp.iterated<!omp.affinity_entry_ty<!llvm.ptr, i64>>
+
+ omp.task affinity(%it : !omp.iterated<!omp.affinity_entry_ty<!llvm.ptr, i64>>) {
+ omp.terminator
+ }
+
+ return
+}
+
+func.func @omp_task_affinity_iterator_2d(%lb0 : index, %ub0 : index, %st0 : index,
+ %lb1 : index, %ub1 : index, %st1 : index,
+ %addr0 : !llvm.ptr, %addr1 : !llvm.ptr,
+ %len0 : i64, %len1 : i64) -> () {
+ // CHECK: %[[IT:.*]] = omp.iterators(%[[I:.*]]: index, %[[J:.*]]: index) = (%[[LB0:.*]] to %[[UB0:.*]] step %[[ST0:.*]], %[[LB1:.*]] to %[[UB1:.*]] step %[[ST1:.*]]) {
+ // CHECK: %[[E0:.*]] = omp.affinity_entry %[[A0:.*]], %[[L0:.*]] : (!llvm.ptr, i64) -> !omp.affinity_entry_ty<!llvm.ptr, i64>
+ // CHECK: %[[E1:.*]] = omp.affinity_entry %[[A1:.*]], %[[L1:.*]] : (!llvm.ptr, i64) -> !omp.affinity_entry_ty<!llvm.ptr, i64>
+ // CHECK: omp.yield(%[[E1]] : !omp.affinity_entry_ty<!llvm.ptr, i64>)
+ // CHECK: } -> !omp.iterated<!omp.affinity_entry_ty<!llvm.ptr, i64>>
+ // CHECK: omp.task affinity(%[[IT]] : !omp.iterated<!omp.affinity_entry_ty<!llvm.ptr, i64>>) {
+ // CHECK: }
+ %it = omp.iterators(%i: index, %j: index) = (%lb0 to %ub0 step %st0, %lb1 to %ub1 step %st1) {
+ %use_i = arith.addi %i, %lb0 : index
+ %use_j = arith.addi %j, %lb1 : index
+ %_ = arith.cmpi ult, %use_i, %use_j : index
+
+ %e0 = omp.affinity_entry %addr0, %len0
+ : (!llvm.ptr, i64) -> !omp.affinity_entry_ty<!llvm.ptr, i64>
+ %e1 = omp.affinity_entry %addr1, %len1
+ : (!llvm.ptr, i64) -> !omp.affinity_entry_ty<!llvm.ptr, i64>
+
+ omp.yield(%e1 : !omp.affinity_entry_ty<!llvm.ptr, i64>)
+ } -> !omp.iterated<!omp.affinity_entry_ty<!llvm.ptr, i64>>
+
+ omp.task affinity(%it : !omp.iterated<!omp.affinity_entry_ty<!llvm.ptr, i64>>) {
+ omp.terminator
+ }
+
+ return
+}
>From 3d1f96555b571e34f02b06d8141f7cd0b966af77 Mon Sep 17 00:00:00 2001
From: cchen <chichun.chen at hpe.com>
Date: Wed, 18 Feb 2026 15:20:40 -0600
Subject: [PATCH 03/11] [mlir][llvmir][OpenMP] Translate affinity clause in
task construct to llvmir
Translate affinity entries to LLVMIR by passing affinity information to
createTask (__kmpc_omp_reg_task_with_affinity is created inside PostOutlineCB).
---
.../llvm/Frontend/OpenMP/OMPIRBuilder.h | 18 ++--
llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp | 22 ++++-
.../Frontend/OpenMPIRBuilderTest.cpp | 92 +++++++++++++++++++
.../Conversion/OpenMPToLLVM/OpenMPToLLVM.cpp | 2 +
.../OpenMP/OpenMPToLLVMIRTranslation.cpp | 72 ++++++++++++---
5 files changed, 184 insertions(+), 22 deletions(-)
diff --git a/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h b/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h
index 9885ffc8b2065..deb2535ac843f 100644
--- a/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h
+++ b/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h
@@ -1510,6 +1510,12 @@ class OpenMPIRBuilder {
: DepKind(DepKind), DepValueType(DepValueType), DepVal(DepVal) {}
};
+ /// A struct to pack the relevant information for an OpenMP affinity clause.
+ struct AffinityData {
+ Value *Count; // number of kmp_task_affinity_info_t entries
+ Value *Info; // kmp_task_affinity_info_t
+ };
+
/// Generator for `#omp taskloop`
///
/// \param Loc The location where the taskloop construct was encountered.
@@ -1573,12 +1579,12 @@ class OpenMPIRBuilder {
/// \param Mergeable If the given task is `mergeable`
/// \param priority `priority-value' specifies the execution order of the
/// tasks that is generated by the construct
- LLVM_ABI InsertPointOrErrorTy
- createTask(const LocationDescription &Loc, InsertPointTy AllocaIP,
- BodyGenCallbackTy BodyGenCB, bool Tied = true,
- Value *Final = nullptr, Value *IfCondition = nullptr,
- SmallVector<DependData> Dependencies = {}, bool Mergeable = false,
- Value *EventHandle = nullptr, Value *Priority = nullptr);
+ LLVM_ABI InsertPointOrErrorTy createTask(
+ const LocationDescription &Loc, InsertPointTy AllocaIP,
+ BodyGenCallbackTy BodyGenCB, bool Tied = true, Value *Final = nullptr,
+ Value *IfCondition = nullptr, SmallVector<DependData> Dependencies = {},
+ AffinityData Affinity = {}, bool Mergeable = false,
+ Value *EventHandle = nullptr, Value *Priority = nullptr);
/// Generator for the taskgroup construct
///
diff --git a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
index 6775674d733fe..8cd31fd3e207c 100644
--- a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
+++ b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
@@ -2434,8 +2434,8 @@ OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::createTaskloop(
OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::createTask(
const LocationDescription &Loc, InsertPointTy AllocaIP,
BodyGenCallbackTy BodyGenCB, bool Tied, Value *Final, Value *IfCondition,
- SmallVector<DependData> Dependencies, bool Mergeable, Value *EventHandle,
- Value *Priority) {
+ SmallVector<DependData> Dependencies, AffinityData Affinity, bool Mergeable,
+ Value *EventHandle, Value *Priority) {
if (!updateToLocation(Loc))
return InsertPointTy();
@@ -2481,7 +2481,7 @@ OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::createTask(
Builder, AllocaIP, ToBeDeleted, TaskAllocaIP, "global.tid", false));
OI.PostOutlineCB = [this, Ident, Tied, Final, IfCondition, Dependencies,
- Mergeable, Priority, EventHandle, TaskAllocaBB,
+ Affinity, Mergeable, Priority, EventHandle, TaskAllocaBB,
ToBeDeleted](Function &OutlinedFn) mutable {
// Replace the Stale CI by appropriate RTL function call.
assert(OutlinedFn.hasOneUse() &&
@@ -2555,6 +2555,22 @@ OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::createTask(
/*sizeof_task=*/TaskSize, /*sizeof_shared=*/SharedsSize,
/*task_func=*/&OutlinedFn});
+ if (Affinity.Count && Affinity.Info) {
+ Function *RegAffFn = getOrCreateRuntimeFunctionPtr(
+ OMPRTL___kmpc_omp_reg_task_with_affinity);
+
+ // bitcast to i8*
+ Value *AffPtr = Builder.CreatePointerBitCastOrAddrSpaceCast(
+ Affinity.Info, Builder.getPtrTy(0));
+
+ if (!Affinity.Count->getType()->isIntegerTy(32))
+ Affinity.Count =
+ Builder.CreateTruncOrBitCast(Affinity.Count, Builder.getInt32Ty());
+
+ createRuntimeFunctionCall(
+ RegAffFn, {Ident, ThreadID, TaskData, Affinity.Count, AffPtr});
+ }
+
// Emit detach clause initialization.
// evt = (typeof(evt))__kmpc_task_allow_completion_event(loc, tid,
// task_descriptor);
diff --git a/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp b/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp
index 200dda84b13f5..11f03d2a06d6e 100644
--- a/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp
+++ b/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp
@@ -7555,6 +7555,98 @@ TEST_F(OpenMPIRBuilderTest, CreateTaskIfCondition) {
EXPECT_EQ(OulinedFnCall->getNextNode(), TaskCompleteCall);
}
+TEST_F(OpenMPIRBuilderTest, CreateTaskAffinity) {
+ using InsertPointTy = OpenMPIRBuilder::InsertPointTy;
+ OpenMPIRBuilder OMPBuilder(*M);
+ OMPBuilder.Config.IsTargetDevice = false;
+ OMPBuilder.initialize();
+ F->setName("func");
+ IRBuilder<> Builder(BB);
+
+ auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) {
+ return Error::success();
+ };
+
+ LLVMContext &Ctx = M->getContext();
+ StructType *AffInfoTy = StructType::get(
+ Type::getInt64Ty(Ctx), Type::getInt64Ty(Ctx), Type::getInt32Ty(Ctx));
+
+ // Create [1 x AffInfoTy] as alloca (element alloca is fine too).
+ Value *CountI32 = Builder.getInt32(1);
+ AllocaInst *AffArr =
+ Builder.CreateAlloca(AffInfoTy, Builder.getInt64(1), "omp.affinity_list");
+
+ // Fill entry 0 minimally so the pointer definitely dominates use.
+ Value *Entry0 = Builder.CreateInBoundsGEP(
+ AffInfoTy, AffArr, Builder.getInt64(0), "omp.affinity.entry");
+ Builder.CreateStore(Builder.getInt64(0),
+ Builder.CreateStructGEP(AffInfoTy, Entry0, 0));
+ Builder.CreateStore(Builder.getInt64(64),
+ Builder.CreateStructGEP(AffInfoTy, Entry0, 1));
+ Builder.CreateStore(Builder.getInt32(0),
+ Builder.CreateStructGEP(AffInfoTy, Entry0, 2));
+
+ OpenMPIRBuilder::AffinityData Affinity;
+ Affinity.Count = CountI32;
+ Affinity.Info = AffArr;
+
+ BasicBlock *AllocaBB = Builder.GetInsertBlock();
+ BasicBlock *BodyBB = splitBB(Builder, /*CreateBranch=*/true, "alloca.split");
+ OpenMPIRBuilder::LocationDescription Loc(
+ InsertPointTy(BodyBB, BodyBB->getFirstInsertionPt()), DL);
+
+ ASSERT_EXPECTED_INIT(
+ OpenMPIRBuilder::InsertPointTy, AfterIP,
+ OMPBuilder.createTask(
+ Loc, InsertPointTy(AllocaBB, AllocaBB->getFirstInsertionPt()),
+ BodyGenCB,
+ /*Tied=*/true,
+ /*Final=*/nullptr,
+ /*IfCondition=*/nullptr,
+ /*Dependencies=*/{},
+ /*Affinity=*/Affinity,
+ /*Mergeable=*/false,
+ /*EventHandle=*/nullptr,
+ /*Priority=*/nullptr));
+
+ Builder.restoreIP(AfterIP);
+ OMPBuilder.finalize();
+ Builder.CreateRetVoid();
+
+ EXPECT_FALSE(verifyModule(*M, &errs()));
+
+ Function *TaskAllocFn =
+ OMPBuilder.getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_omp_task_alloc);
+ Function *RegAffFn = OMPBuilder.getOrCreateRuntimeFunctionPtr(
+ OMPRTL___kmpc_omp_reg_task_with_affinity);
+
+ CallInst *TaskAllocCI = nullptr;
+ CallInst *RegAffCI = nullptr;
+
+ for (auto &I : instructions(F)) {
+ if (auto *CI = dyn_cast<CallInst>(&I)) {
+ if (CI->getCalledFunction() == TaskAllocFn)
+ TaskAllocCI = CI;
+ if (CI->getCalledFunction() == RegAffFn)
+ RegAffCI = CI;
+ }
+ }
+
+ ASSERT_NE(TaskAllocCI, nullptr) << "expected __kmpc_omp_task_alloc call";
+ ASSERT_NE(RegAffCI, nullptr)
+ << "expected __kmpc_omp_reg_task_with_affinity call";
+
+ // Check reg_task_with_affinity signature:
+ // i32 __kmpc_omp_reg_task_with_affinity(ident_t*, i32 gtid,
+ // kmp_task_t*, i32 naffins,
+ // kmp_task_affinity_info_t*)
+ ASSERT_EQ(RegAffCI->arg_size(), 5u);
+ // naffins
+ EXPECT_TRUE(RegAffCI->getArgOperand(3)->getType()->isIntegerTy(32));
+ // kmp_task_affinity_info_t*
+ EXPECT_TRUE(RegAffCI->getArgOperand(4)->getType()->isPointerTy());
+}
+
TEST_F(OpenMPIRBuilderTest, CreateTaskgroup) {
using InsertPointTy = OpenMPIRBuilder::InsertPointTy;
OpenMPIRBuilder OMPBuilder(*M);
diff --git a/mlir/lib/Conversion/OpenMPToLLVM/OpenMPToLLVM.cpp b/mlir/lib/Conversion/OpenMPToLLVM/OpenMPToLLVM.cpp
index 7fdc23adc8573..e6242e5f06418 100644
--- a/mlir/lib/Conversion/OpenMPToLLVM/OpenMPToLLVM.cpp
+++ b/mlir/lib/Conversion/OpenMPToLLVM/OpenMPToLLVM.cpp
@@ -154,6 +154,8 @@ void mlir::populateOpenMPToLLVMConversionPatterns(LLVMTypeConverter &converter,
// discarded on lowering to LLVM-IR from the OpenMP dialect.
converter.addConversion(
[&](omp::MapBoundsType type) -> Type { return type; });
+ converter.addConversion(
+ [&](omp::AffinityEntryType type) -> Type { return type; });
// Add conversions for all OpenMP operations.
addOpenMPOpConversions<
diff --git a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
index 38c5802ed60ed..4949eae218c6e 100644
--- a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
+++ b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
@@ -321,10 +321,6 @@ static LogicalResult checkImplementationStatus(Operation &op) {
<< " operation";
};
- auto checkAffinity = [&todo](auto op, LogicalResult &result) {
- if (!op.getAffinityVars().empty())
- result = todo("affinity");
- };
auto checkAllocate = [&todo](auto op, LogicalResult &result) {
if (!op.getAllocateVars().empty() || !op.getAllocatorVars().empty())
result = todo("allocate");
@@ -413,7 +409,6 @@ static LogicalResult checkImplementationStatus(Operation &op) {
checkThreadLimit(op, result);
})
.Case([&](omp::TaskOp op) {
- checkAffinity(op, result);
checkAllocate(op, result);
checkInReduction(op, result);
})
@@ -2308,6 +2303,53 @@ void TaskContextStructManager::freeStructPtr() {
builder.CreateFree(structPtr);
}
+static void buildTaskAffinityList(mlir::omp::TaskOp taskOp,
+ llvm::IRBuilderBase &builder,
+ LLVM::ModuleTranslation &moduleTranslation,
+ llvm::OpenMPIRBuilder::AffinityData &ad) {
+ auto &ctx = builder.getContext();
+ llvm::StructType *kmpTaskAffinityInfoTy = llvm::StructType::get(
+ llvm::Type::getInt64Ty(ctx), llvm::Type::getInt64Ty(ctx),
+ llvm::Type::getInt32Ty(ctx));
+
+ SmallVector<mlir::Value> affinityVars(taskOp.getAffinityVars().begin(),
+ taskOp.getAffinityVars().end());
+
+ // Allocate [N x kmp_task_affinity_info_t]
+ llvm::Value *count =
+ llvm::ConstantInt::get(builder.getInt64Ty(), affinityVars.size());
+ llvm::AllocaInst *affinityList =
+ builder.CreateAlloca(kmpTaskAffinityInfoTy, count, "omp.affinity_list");
+
+ for (unsigned i = 0; i < affinityVars.size(); ++i) {
+ auto entryOp = affinityVars[i].getDefiningOp<mlir::omp::AffinityEntryOp>();
+ assert(entryOp && "affinity item must be omp.affinity_entry");
+
+ llvm::Value *addr = moduleTranslation.lookupValue(entryOp.getAddr());
+ assert(addr && "expect affinity addr to be non-null");
+ llvm::Value *baseAddr = builder.CreatePtrToInt(addr, builder.getInt64Ty());
+ llvm::Value *len = moduleTranslation.lookupValue(entryOp.getLen());
+ llvm::Value *flags = builder.getInt32(0);
+
+ llvm::Value *entry =
+ builder.CreateInBoundsGEP(kmpTaskAffinityInfoTy, affinityList,
+ builder.getInt64(i), "omp.affinity.entry");
+
+ llvm::Value *gep0 =
+ builder.CreateStructGEP(kmpTaskAffinityInfoTy, entry, 0); // base_addr
+ llvm::Value *gep1 =
+ builder.CreateStructGEP(kmpTaskAffinityInfoTy, entry, 1); // len
+ llvm::Value *gep2 =
+ builder.CreateStructGEP(kmpTaskAffinityInfoTy, entry, 2); // flags (i32)
+
+ builder.CreateStore(baseAddr, gep0);
+ builder.CreateStore(len, gep1);
+ builder.CreateStore(flags, gep2);
+ }
+ ad.Info = affinityList;
+ ad.Count = builder.getInt32(static_cast<uint32_t>(affinityVars.size()));
+}
+
/// Converts an OpenMP task construct into LLVM IR using OpenMPIRBuilder.
static LogicalResult
convertOmpTaskOp(omp::TaskOp taskOp, llvm::IRBuilderBase &builder,
@@ -2520,12 +2562,16 @@ convertOmpTaskOp(omp::TaskOp taskOp, llvm::IRBuilderBase &builder,
buildDependData(taskOp.getDependKinds(), taskOp.getDependVars(),
moduleTranslation, dds);
+ llvm::OpenMPIRBuilder::AffinityData ad = {nullptr, nullptr};
+ if (!taskOp.getAffinityVars().empty())
+ buildTaskAffinityList(taskOp, builder, moduleTranslation, ad);
+
llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
llvm::OpenMPIRBuilder::InsertPointOrErrorTy afterIP =
moduleTranslation.getOpenMPBuilder()->createTask(
ompLoc, allocaIP, bodyCB, !taskOp.getUntied(),
moduleTranslation.lookupValue(taskOp.getFinal()),
- moduleTranslation.lookupValue(taskOp.getIfExpr()), dds,
+ moduleTranslation.lookupValue(taskOp.getIfExpr()), dds, ad,
taskOp.getMergeable(),
moduleTranslation.lookupValue(taskOp.getEventHandle()),
moduleTranslation.lookupValue(taskOp.getPriority()));
@@ -7289,13 +7335,13 @@ LogicalResult OpenMPDialectLLVMIRTranslationInterface::convertOperation(
.Case([&](omp::LoopNestOp) {
return convertOmpLoopNest(*op, builder, moduleTranslation);
})
- .Case<omp::MapInfoOp, omp::MapBoundsOp, omp::PrivateClauseOp>(
- [&](auto op) {
- // No-op, should be handled by relevant owning operations e.g.
- // TargetOp, TargetEnterDataOp, TargetExitDataOp, TargetDataOp
- // etc. and then discarded
- return success();
- })
+ .Case<omp::MapInfoOp, omp::MapBoundsOp, omp::PrivateClauseOp,
+ omp::AffinityEntryOp>([&](auto op) {
+ // No-op, should be handled by relevant owning operations e.g.
+ // TargetOp, TargetEnterDataOp, TargetExitDataOp, TargetDataOp
+ // etc. and then discarded
+ return success();
+ })
.Case([&](omp::NewCliOp op) {
// Meta-operation: Doesn't do anything by itself, but used to
// identify a loop.
>From 8d272556ad61ffef78760fa8d17f8190e533c76d Mon Sep 17 00:00:00 2001
From: cchen <chichun.chen at hpe.com>
Date: Wed, 18 Feb 2026 18:25:34 -0600
Subject: [PATCH 04/11] Implement lowering for omp.iterator in affinity
---
.../llvm/Frontend/OpenMP/OMPIRBuilder.h | 27 +++
llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp | 82 ++++++++
.../Conversion/OpenMPToLLVM/OpenMPToLLVM.cpp | 1 +
.../OpenMP/OpenMPToLLVMIRTranslation.cpp | 180 +++++++++++++++---
4 files changed, 268 insertions(+), 22 deletions(-)
diff --git a/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h b/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h
index deb2535ac843f..0332930eafedf 100644
--- a/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h
+++ b/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h
@@ -4293,6 +4293,33 @@ class ScanInfo {
~ScanInfo() { delete (ScanBuffPtrs); }
};
+class IteratorLoopNestScope {
+private:
+ llvm::IRBuilderBase &B;
+ llvm::LLVMContext &Ctx;
+
+ unsigned Dims = 0;
+ llvm::ArrayRef<llvm::Value *> LowerBounds;
+ llvm::ArrayRef<llvm::Value *> UpperBounds;
+ llvm::ArrayRef<llvm::Value *> Steps;
+
+ llvm::SmallVector<llvm::PHINode *> IVs;
+ llvm::SmallVector<llvm::BasicBlock *> HdrBBs;
+ llvm::SmallVector<llvm::BasicBlock *> BodyBBs;
+ llvm::SmallVector<llvm::BasicBlock *> LatchBBs;
+ llvm::SmallVector<llvm::BasicBlock *> ExitBBs;
+
+public:
+ IteratorLoopNestScope(llvm::IRBuilderBase &Builder, unsigned Dims,
+ llvm::ArrayRef<llvm::Value *> LowerBounds,
+ llvm::ArrayRef<llvm::Value *> UpperBounds,
+ llvm::ArrayRef<llvm::Value *> Steps);
+
+ ~IteratorLoopNestScope();
+
+ llvm::ArrayRef<llvm::PHINode *> getIVs() const { return IVs; }
+};
+
} // end namespace llvm
#endif // LLVM_FRONTEND_OPENMP_OMPIRBUILDER_H
diff --git a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
index 8cd31fd3e207c..e183eb7dca89f 100644
--- a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
+++ b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
@@ -11862,3 +11862,85 @@ void CanonicalLoopInfo::invalidate() {
Latch = nullptr;
Exit = nullptr;
}
+
+IteratorLoopNestScope::IteratorLoopNestScope(
+ llvm::IRBuilderBase &Builder, unsigned Dims,
+ llvm::ArrayRef<llvm::Value *> LowerBounds,
+ llvm::ArrayRef<llvm::Value *> UpperBounds,
+ llvm::ArrayRef<llvm::Value *> Steps)
+ : B(Builder), Ctx(Builder.getContext()), Dims(Dims),
+ LowerBounds(LowerBounds), UpperBounds(UpperBounds), Steps(Steps) {
+ Function *F = B.GetInsertBlock()->getParent();
+ IVs.assign(Dims, nullptr);
+ HdrBBs.resize(Dims);
+ BodyBBs.resize(Dims);
+ LatchBBs.resize(Dims);
+ ExitBBs.resize(Dims);
+
+ llvm::BasicBlock *PreBB = B.GetInsertBlock();
+
+ for (unsigned Dimension = 0; Dimension < Dims; ++Dimension) {
+ HdrBBs[Dimension] = llvm::BasicBlock::Create(Ctx, "omp.iter.hdr", F);
+ BodyBBs[Dimension] = llvm::BasicBlock::Create(Ctx, "omp.iter.body", F);
+ LatchBBs[Dimension] = llvm::BasicBlock::Create(Ctx, "omp.iter.latch", F);
+ ExitBBs[Dimension] = llvm::BasicBlock::Create(Ctx, "omp.iter.exit", F);
+ }
+
+ // Enter outermost loop.
+ B.CreateBr(HdrBBs[0]);
+
+ for (unsigned Dimension = 0; Dimension < Dims; ++Dimension) {
+ // header
+ B.SetInsertPoint(HdrBBs[Dimension]);
+ IVs[Dimension] = B.CreatePHI(B.getInt64Ty(), 2, "omp.it.iv");
+ IVs[Dimension]->addIncoming(LowerBounds[Dimension],
+ (Dimension == 0) ? PreBB
+ : BodyBBs[Dimension - 1]);
+
+ llvm::Value *Cond =
+ B.CreateICmpULE(IVs[Dimension], UpperBounds[Dimension], "omp.it.cmp");
+ B.CreateCondBr(Cond, BodyBBs[Dimension], ExitBBs[Dimension]);
+
+ // body
+ B.SetInsertPoint(BodyBBs[Dimension]);
+ if (Dimension + 1 < Dims) {
+ B.CreateBr(HdrBBs[Dimension + 1]);
+ }
+
+ // iv = iv + step
+ B.SetInsertPoint(LatchBBs[Dimension]);
+ llvm::Value *Next =
+ B.CreateAdd(IVs[Dimension], Steps[Dimension], "omp.it.next");
+ IVs[Dimension]->addIncoming(Next, LatchBBs[Dimension]);
+ B.CreateBr(HdrBBs[Dimension]);
+ }
+
+ // Continue emitting the body of the innermost loop.
+ B.SetInsertPoint(BodyBBs[Dims - 1]);
+}
+
+IteratorLoopNestScope::~IteratorLoopNestScope() {
+ if (Dims == 0)
+ return;
+
+ const unsigned Last = Dims - 1;
+
+ // Fallthrough to latch if the innermost body isn't terminated by the user.
+ if (!BodyBBs[Last]->getTerminator()) {
+ llvm::IRBuilderBase::InsertPointGuard g(B);
+ B.SetInsertPoint(BodyBBs[Last]);
+ B.CreateBr(LatchBBs[Last]);
+ }
+
+ // Wire exits in reverse: exit(d) -> latch(d-1).
+ for (unsigned Dimension = Last; Dimension > 0; --Dimension) {
+ if (ExitBBs[Dimension]->getTerminator())
+ continue;
+ llvm::IRBuilderBase::InsertPointGuard g(B);
+ B.SetInsertPoint(ExitBBs[Dimension]);
+ B.CreateBr(LatchBBs[Dimension - 1]);
+ }
+
+ // Continue after the whole nest at outermost exit.
+ B.SetInsertPoint(ExitBBs[0]);
+}
diff --git a/mlir/lib/Conversion/OpenMPToLLVM/OpenMPToLLVM.cpp b/mlir/lib/Conversion/OpenMPToLLVM/OpenMPToLLVM.cpp
index e6242e5f06418..d90912f9f686f 100644
--- a/mlir/lib/Conversion/OpenMPToLLVM/OpenMPToLLVM.cpp
+++ b/mlir/lib/Conversion/OpenMPToLLVM/OpenMPToLLVM.cpp
@@ -156,6 +156,7 @@ void mlir::populateOpenMPToLLVMConversionPatterns(LLVMTypeConverter &converter,
[&](omp::MapBoundsType type) -> Type { return type; });
converter.addConversion(
[&](omp::AffinityEntryType type) -> Type { return type; });
+ converter.addConversion([&](omp::IteratedType type) -> Type { return type; });
// Add conversions for all OpenMP operations.
addOpenMPOpConversions<
diff --git a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
index 4949eae218c6e..0f8d2f4becc45 100644
--- a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
+++ b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
@@ -2229,6 +2229,71 @@ class TaskContextStructManager {
/// The type of the structure
llvm::Type *structTy = nullptr;
};
+
+class IteratorInfo {
+private:
+ llvm::SmallVector<llvm::Value *> lowerBounds;
+ llvm::SmallVector<llvm::Value *> upperBounds;
+ llvm::SmallVector<llvm::Value *> steps;
+ llvm::SmallVector<llvm::Value *> trips;
+ unsigned dims;
+ llvm::Value *totalTrips;
+ const mlir::LLVM::ModuleTranslation &moduleTranslation;
+ llvm::IRBuilderBase &builder;
+
+ llvm::Value *lookUpAsI64(mlir::Value val) {
+ llvm::Value *v = moduleTranslation.lookupValue(val);
+ if (!v)
+ return nullptr;
+ if (v->getType()->isIntegerTy(64))
+ return v;
+ if (v->getType()->isIntegerTy())
+ return builder.CreateZExtOrTrunc(v, builder.getInt64Ty());
+ return nullptr;
+ }
+
+public:
+ IteratorInfo(mlir::omp::IteratorsOp itersOp,
+ mlir::LLVM::ModuleTranslation &moduleTranslation,
+ llvm::IRBuilderBase &builder)
+ : moduleTranslation(moduleTranslation), builder(builder) {
+ dims = itersOp.getLbs().size();
+ this->lowerBounds.resize(dims);
+ this->upperBounds.resize(dims);
+ this->steps.resize(dims);
+ this->trips.resize(dims);
+
+ for (unsigned d = 0; d < dims; ++d) {
+ llvm::Value *lb = lookUpAsI64(itersOp.getLbs()[d]);
+ llvm::Value *ub = lookUpAsI64(itersOp.getUbs()[d]);
+ llvm::Value *st = lookUpAsI64(itersOp.getSteps()[d]);
+ assert(lb && ub && st &&
+ "Expect lowerBounds, upperBounds, and steps in IteratorsOp");
+
+ this->lowerBounds[d] = lb;
+ this->upperBounds[d] = ub;
+ this->steps[d] = st;
+
+ // trips = ((ub - lb) / step) + 1 (inclusive ub, assume positive step)
+ llvm::Value *diff = builder.CreateSub(ub, lb);
+ llvm::Value *div = builder.CreateUDiv(diff, st);
+ this->trips[d] = builder.CreateAdd(
+ div, llvm::ConstantInt::get(builder.getInt64Ty(), 1));
+ }
+
+ this->totalTrips = llvm::ConstantInt::get(builder.getInt64Ty(), 1);
+ for (unsigned d = 0; d < dims; ++d)
+ this->totalTrips = builder.CreateMul(this->totalTrips, this->trips[d]);
+ }
+
+ unsigned getDims() const { return dims; }
+ llvm::ArrayRef<llvm::Value *> getLowerBounds() const { return lowerBounds; }
+ llvm::ArrayRef<llvm::Value *> getUpperBounds() const { return upperBounds; }
+ llvm::ArrayRef<llvm::Value *> getSteps() const { return steps; }
+ llvm::ArrayRef<llvm::Value *> getTrips() const { return trips; }
+ llvm::Value *getTotalTrips() { return totalTrips; }
+};
+
} // namespace
void TaskContextStructManager::generateTaskContextStruct() {
@@ -2303,19 +2368,42 @@ void TaskContextStructManager::freeStructPtr() {
builder.CreateFree(structPtr);
}
+static void storeAffinityEntry(llvm::IRBuilderBase &builder,
+ llvm::Value *affinityList, llvm::Value *index,
+ llvm::Value *addr, llvm::Value *len) {
+ auto &ctx = builder.getContext();
+ // { base_addr (i64), len (i64), flags (i32) }
+ llvm::StructType *kmpTaskAffinityInfoTy = llvm::StructType::get(
+ llvm::Type::getInt64Ty(ctx), llvm::Type::getInt64Ty(ctx),
+ llvm::Type::getInt32Ty(ctx));
+
+ llvm::Value *entry = builder.CreateInBoundsGEP(
+ kmpTaskAffinityInfoTy, affinityList, index, "omp.affinity.entry");
+
+ llvm::Value *baseAddrI64 = builder.CreatePtrToInt(addr, builder.getInt64Ty());
+ llvm::Value *flags = builder.getInt32(0);
+
+ builder.CreateStore(baseAddrI64,
+ builder.CreateStructGEP(kmpTaskAffinityInfoTy, entry, 0));
+ builder.CreateStore(len,
+ builder.CreateStructGEP(kmpTaskAffinityInfoTy, entry, 1));
+ builder.CreateStore(flags,
+ builder.CreateStructGEP(kmpTaskAffinityInfoTy, entry, 2));
+}
+
static void buildTaskAffinityList(mlir::omp::TaskOp taskOp,
llvm::IRBuilderBase &builder,
LLVM::ModuleTranslation &moduleTranslation,
llvm::OpenMPIRBuilder::AffinityData &ad) {
auto &ctx = builder.getContext();
+ SmallVector<mlir::Value> affinityVars(taskOp.getAffinityVars().begin(),
+ taskOp.getAffinityVars().end());
+
+ // Define the type locally just for the Alloca
llvm::StructType *kmpTaskAffinityInfoTy = llvm::StructType::get(
llvm::Type::getInt64Ty(ctx), llvm::Type::getInt64Ty(ctx),
llvm::Type::getInt32Ty(ctx));
- SmallVector<mlir::Value> affinityVars(taskOp.getAffinityVars().begin(),
- taskOp.getAffinityVars().end());
-
- // Allocate [N x kmp_task_affinity_info_t]
llvm::Value *count =
llvm::ConstantInt::get(builder.getInt64Ty(), affinityVars.size());
llvm::AllocaInst *affinityList =
@@ -2327,29 +2415,70 @@ static void buildTaskAffinityList(mlir::omp::TaskOp taskOp,
llvm::Value *addr = moduleTranslation.lookupValue(entryOp.getAddr());
assert(addr && "expect affinity addr to be non-null");
- llvm::Value *baseAddr = builder.CreatePtrToInt(addr, builder.getInt64Ty());
llvm::Value *len = moduleTranslation.lookupValue(entryOp.getLen());
- llvm::Value *flags = builder.getInt32(0);
-
- llvm::Value *entry =
- builder.CreateInBoundsGEP(kmpTaskAffinityInfoTy, affinityList,
- builder.getInt64(i), "omp.affinity.entry");
-
- llvm::Value *gep0 =
- builder.CreateStructGEP(kmpTaskAffinityInfoTy, entry, 0); // base_addr
- llvm::Value *gep1 =
- builder.CreateStructGEP(kmpTaskAffinityInfoTy, entry, 1); // len
- llvm::Value *gep2 =
- builder.CreateStructGEP(kmpTaskAffinityInfoTy, entry, 2); // flags (i32)
-
- builder.CreateStore(baseAddr, gep0);
- builder.CreateStore(len, gep1);
- builder.CreateStore(flags, gep2);
+ storeAffinityEntry(builder, affinityList, builder.getInt64(i), addr, len);
}
+
ad.Info = affinityList;
ad.Count = builder.getInt32(static_cast<uint32_t>(affinityVars.size()));
}
+static mlir::LogicalResult
+buildAffinityIterator(mlir::omp::IteratorsOp itersOp,
+ llvm::IRBuilderBase &builder,
+ mlir::LLVM::ModuleTranslation &moduleTranslation,
+ llvm::OpenMPIRBuilder::AffinityData &A) {
+ auto &ctx = builder.getContext();
+ llvm::StructType *kmpTaskAffinityInfoTy = llvm::StructType::get(
+ llvm::Type::getInt64Ty(ctx), llvm::Type::getInt64Ty(ctx),
+ llvm::Type::getInt32Ty(ctx));
+
+ mlir::Block &iteratorRegionBlock = itersOp.getRegion().front();
+ IteratorInfo iterInfo(itersOp, moduleTranslation, builder);
+
+ auto *list = builder.CreateAlloca(
+ kmpTaskAffinityInfoTy, iterInfo.getTotalTrips(), "omp.affinity_list");
+
+ llvm::IteratorLoopNestScope iterLoops(
+ builder, iterInfo.getDims(), iterInfo.getLowerBounds(),
+ iterInfo.getUpperBounds(), iterInfo.getSteps());
+ auto indVars = iterLoops.getIVs();
+ for (unsigned d = 0; d < iterInfo.getDims(); ++d)
+ moduleTranslation.mapValue(iteratorRegionBlock.getArgument(d), indVars[d]);
+
+ moduleTranslation.mapBlock(&iteratorRegionBlock, builder.GetInsertBlock());
+ if (mlir::failed(moduleTranslation.convertBlock(
+ iteratorRegionBlock, /*ignoreArguments=*/true, builder)))
+ return itersOp.emitOpError() << "failed to translate iterators region";
+
+ auto yield =
+ mlir::dyn_cast<mlir::omp::YieldOp>(iteratorRegionBlock.getTerminator());
+ auto entryOp =
+ yield.getResults()[0].getDefiningOp<mlir::omp::AffinityEntryOp>();
+
+ llvm::Value *addr = moduleTranslation.lookupValue(entryOp.getAddr());
+ llvm::Value *len = moduleTranslation.lookupValue(entryOp.getLen());
+
+ llvm::Value *linearIdx = llvm::ConstantInt::get(builder.getInt64Ty(), 0);
+ for (unsigned d = 0; d < iterInfo.getDims(); ++d) {
+ // Normalize the physical IV to a 0-based logical index for this dimension.
+ llvm::Value *logicalIdx = builder.CreateUDiv(
+ builder.CreateSub(indVars[d], iterInfo.getLowerBounds()[d]),
+ iterInfo.getSteps()[d]);
+ // Row-major flattening: linear = linear * Trips[d] + logicalIdx
+ linearIdx = builder.CreateAdd(
+ builder.CreateMul(linearIdx, iterInfo.getTrips()[d]), logicalIdx);
+ }
+
+ storeAffinityEntry(builder, list, linearIdx, addr, len);
+
+ moduleTranslation.forgetMapping(itersOp.getRegion());
+
+ A.Info = list;
+ A.Count = builder.CreateTrunc(iterInfo.getTotalTrips(), builder.getInt32Ty());
+ return mlir::success();
+}
+
/// Converts an OpenMP task construct into LLVM IR using OpenMPIRBuilder.
static LogicalResult
convertOmpTaskOp(omp::TaskOp taskOp, llvm::IRBuilderBase &builder,
@@ -2565,6 +2694,13 @@ convertOmpTaskOp(omp::TaskOp taskOp, llvm::IRBuilderBase &builder,
llvm::OpenMPIRBuilder::AffinityData ad = {nullptr, nullptr};
if (!taskOp.getAffinityVars().empty())
buildTaskAffinityList(taskOp, builder, moduleTranslation, ad);
+ else if (!taskOp.getIterated().empty()) {
+ for (size_t i = 0; i < taskOp.getIterated().size(); ++i) {
+ auto iterOp = taskOp.getIterated()[i].getDefiningOp<omp::IteratorsOp>();
+ if (failed(buildAffinityIterator(iterOp, builder, moduleTranslation, ad)))
+ return failure();
+ }
+ }
llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
llvm::OpenMPIRBuilder::InsertPointOrErrorTy afterIP =
@@ -7336,7 +7472,7 @@ LogicalResult OpenMPDialectLLVMIRTranslationInterface::convertOperation(
return convertOmpLoopNest(*op, builder, moduleTranslation);
})
.Case<omp::MapInfoOp, omp::MapBoundsOp, omp::PrivateClauseOp,
- omp::AffinityEntryOp>([&](auto op) {
+ omp::AffinityEntryOp, omp::IteratorsOp>([&](auto op) {
// No-op, should be handled by relevant owning operations e.g.
// TargetOp, TargetEnterDataOp, TargetExitDataOp, TargetDataOp
// etc. and then discarded
>From 95e235a29b89133903bf890c4815524927afab8a Mon Sep 17 00:00:00 2001
From: cchen <chichun.chen at hpe.com>
Date: Fri, 20 Feb 2026 15:20:55 -0600
Subject: [PATCH 05/11] Create 1-dim canonical loop for omp.iterators
---
.../llvm/Frontend/OpenMP/OMPIRBuilder.h | 27 ----
llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp | 82 ----------
.../OpenMP/OpenMPToLLVMIRTranslation.cpp | 142 +++++++++++-------
3 files changed, 91 insertions(+), 160 deletions(-)
diff --git a/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h b/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h
index 0332930eafedf..deb2535ac843f 100644
--- a/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h
+++ b/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h
@@ -4293,33 +4293,6 @@ class ScanInfo {
~ScanInfo() { delete (ScanBuffPtrs); }
};
-class IteratorLoopNestScope {
-private:
- llvm::IRBuilderBase &B;
- llvm::LLVMContext &Ctx;
-
- unsigned Dims = 0;
- llvm::ArrayRef<llvm::Value *> LowerBounds;
- llvm::ArrayRef<llvm::Value *> UpperBounds;
- llvm::ArrayRef<llvm::Value *> Steps;
-
- llvm::SmallVector<llvm::PHINode *> IVs;
- llvm::SmallVector<llvm::BasicBlock *> HdrBBs;
- llvm::SmallVector<llvm::BasicBlock *> BodyBBs;
- llvm::SmallVector<llvm::BasicBlock *> LatchBBs;
- llvm::SmallVector<llvm::BasicBlock *> ExitBBs;
-
-public:
- IteratorLoopNestScope(llvm::IRBuilderBase &Builder, unsigned Dims,
- llvm::ArrayRef<llvm::Value *> LowerBounds,
- llvm::ArrayRef<llvm::Value *> UpperBounds,
- llvm::ArrayRef<llvm::Value *> Steps);
-
- ~IteratorLoopNestScope();
-
- llvm::ArrayRef<llvm::PHINode *> getIVs() const { return IVs; }
-};
-
} // end namespace llvm
#endif // LLVM_FRONTEND_OPENMP_OMPIRBUILDER_H
diff --git a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
index e183eb7dca89f..8cd31fd3e207c 100644
--- a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
+++ b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
@@ -11862,85 +11862,3 @@ void CanonicalLoopInfo::invalidate() {
Latch = nullptr;
Exit = nullptr;
}
-
-IteratorLoopNestScope::IteratorLoopNestScope(
- llvm::IRBuilderBase &Builder, unsigned Dims,
- llvm::ArrayRef<llvm::Value *> LowerBounds,
- llvm::ArrayRef<llvm::Value *> UpperBounds,
- llvm::ArrayRef<llvm::Value *> Steps)
- : B(Builder), Ctx(Builder.getContext()), Dims(Dims),
- LowerBounds(LowerBounds), UpperBounds(UpperBounds), Steps(Steps) {
- Function *F = B.GetInsertBlock()->getParent();
- IVs.assign(Dims, nullptr);
- HdrBBs.resize(Dims);
- BodyBBs.resize(Dims);
- LatchBBs.resize(Dims);
- ExitBBs.resize(Dims);
-
- llvm::BasicBlock *PreBB = B.GetInsertBlock();
-
- for (unsigned Dimension = 0; Dimension < Dims; ++Dimension) {
- HdrBBs[Dimension] = llvm::BasicBlock::Create(Ctx, "omp.iter.hdr", F);
- BodyBBs[Dimension] = llvm::BasicBlock::Create(Ctx, "omp.iter.body", F);
- LatchBBs[Dimension] = llvm::BasicBlock::Create(Ctx, "omp.iter.latch", F);
- ExitBBs[Dimension] = llvm::BasicBlock::Create(Ctx, "omp.iter.exit", F);
- }
-
- // Enter outermost loop.
- B.CreateBr(HdrBBs[0]);
-
- for (unsigned Dimension = 0; Dimension < Dims; ++Dimension) {
- // header
- B.SetInsertPoint(HdrBBs[Dimension]);
- IVs[Dimension] = B.CreatePHI(B.getInt64Ty(), 2, "omp.it.iv");
- IVs[Dimension]->addIncoming(LowerBounds[Dimension],
- (Dimension == 0) ? PreBB
- : BodyBBs[Dimension - 1]);
-
- llvm::Value *Cond =
- B.CreateICmpULE(IVs[Dimension], UpperBounds[Dimension], "omp.it.cmp");
- B.CreateCondBr(Cond, BodyBBs[Dimension], ExitBBs[Dimension]);
-
- // body
- B.SetInsertPoint(BodyBBs[Dimension]);
- if (Dimension + 1 < Dims) {
- B.CreateBr(HdrBBs[Dimension + 1]);
- }
-
- // iv = iv + step
- B.SetInsertPoint(LatchBBs[Dimension]);
- llvm::Value *Next =
- B.CreateAdd(IVs[Dimension], Steps[Dimension], "omp.it.next");
- IVs[Dimension]->addIncoming(Next, LatchBBs[Dimension]);
- B.CreateBr(HdrBBs[Dimension]);
- }
-
- // Continue emitting the body of the innermost loop.
- B.SetInsertPoint(BodyBBs[Dims - 1]);
-}
-
-IteratorLoopNestScope::~IteratorLoopNestScope() {
- if (Dims == 0)
- return;
-
- const unsigned Last = Dims - 1;
-
- // Fallthrough to latch if the innermost body isn't terminated by the user.
- if (!BodyBBs[Last]->getTerminator()) {
- llvm::IRBuilderBase::InsertPointGuard g(B);
- B.SetInsertPoint(BodyBBs[Last]);
- B.CreateBr(LatchBBs[Last]);
- }
-
- // Wire exits in reverse: exit(d) -> latch(d-1).
- for (unsigned Dimension = Last; Dimension > 0; --Dimension) {
- if (ExitBBs[Dimension]->getTerminator())
- continue;
- llvm::IRBuilderBase::InsertPointGuard g(B);
- B.SetInsertPoint(ExitBBs[Dimension]);
- B.CreateBr(LatchBBs[Dimension - 1]);
- }
-
- // Continue after the whole nest at outermost exit.
- B.SetInsertPoint(ExitBBs[0]);
-}
diff --git a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
index 0f8d2f4becc45..97d96660405d0 100644
--- a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
+++ b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
@@ -2238,11 +2238,10 @@ class IteratorInfo {
llvm::SmallVector<llvm::Value *> trips;
unsigned dims;
llvm::Value *totalTrips;
- const mlir::LLVM::ModuleTranslation &moduleTranslation;
- llvm::IRBuilderBase &builder;
- llvm::Value *lookUpAsI64(mlir::Value val) {
- llvm::Value *v = moduleTranslation.lookupValue(val);
+ llvm::Value *lookUpAsI64(mlir::Value val, const LLVM::ModuleTranslation &mt,
+ llvm::IRBuilderBase &builder) {
+ llvm::Value *v = mt.lookupValue(val);
if (!v)
return nullptr;
if (v->getType()->isIntegerTy(64))
@@ -2255,8 +2254,7 @@ class IteratorInfo {
public:
IteratorInfo(mlir::omp::IteratorsOp itersOp,
mlir::LLVM::ModuleTranslation &moduleTranslation,
- llvm::IRBuilderBase &builder)
- : moduleTranslation(moduleTranslation), builder(builder) {
+ llvm::IRBuilderBase &builder) {
dims = itersOp.getLbs().size();
this->lowerBounds.resize(dims);
this->upperBounds.resize(dims);
@@ -2264,9 +2262,12 @@ class IteratorInfo {
this->trips.resize(dims);
for (unsigned d = 0; d < dims; ++d) {
- llvm::Value *lb = lookUpAsI64(itersOp.getLbs()[d]);
- llvm::Value *ub = lookUpAsI64(itersOp.getUbs()[d]);
- llvm::Value *st = lookUpAsI64(itersOp.getSteps()[d]);
+ llvm::Value *lb =
+ lookUpAsI64(itersOp.getLbs()[d], moduleTranslation, builder);
+ llvm::Value *ub =
+ lookUpAsI64(itersOp.getUbs()[d], moduleTranslation, builder);
+ llvm::Value *st =
+ lookUpAsI64(itersOp.getSteps()[d], moduleTranslation, builder);
assert(lb && ub && st &&
"Expect lowerBounds, upperBounds, and steps in IteratorsOp");
@@ -2291,7 +2292,7 @@ class IteratorInfo {
llvm::ArrayRef<llvm::Value *> getUpperBounds() const { return upperBounds; }
llvm::ArrayRef<llvm::Value *> getSteps() const { return steps; }
llvm::ArrayRef<llvm::Value *> getTrips() const { return trips; }
- llvm::Value *getTotalTrips() { return totalTrips; }
+ llvm::Value *getTotalTrips() const { return totalTrips; }
};
} // namespace
@@ -2424,58 +2425,96 @@ static void buildTaskAffinityList(mlir::omp::TaskOp taskOp,
}
static mlir::LogicalResult
-buildAffinityIterator(mlir::omp::IteratorsOp itersOp,
- llvm::IRBuilderBase &builder,
- mlir::LLVM::ModuleTranslation &moduleTranslation,
- llvm::OpenMPIRBuilder::AffinityData &A) {
+buildTaskAffinityIteratorLoop(mlir::omp::IteratorsOp itersOp,
+ llvm::IRBuilderBase &builder,
+ mlir::LLVM::ModuleTranslation &moduleTranslation,
+ llvm::OpenMPIRBuilder::AffinityData &ad) {
+
auto &ctx = builder.getContext();
+ auto &ompBuilder = *moduleTranslation.getOpenMPBuilder();
+ IteratorInfo iterInfo(itersOp, moduleTranslation, builder);
+
llvm::StructType *kmpTaskAffinityInfoTy = llvm::StructType::get(
llvm::Type::getInt64Ty(ctx), llvm::Type::getInt64Ty(ctx),
llvm::Type::getInt32Ty(ctx));
+ auto *list = builder.CreateAlloca(
+ kmpTaskAffinityInfoTy, iterInfo.getTotalTrips(), "omp.affinity_list");
mlir::Block &iteratorRegionBlock = itersOp.getRegion().front();
- IteratorInfo iterInfo(itersOp, moduleTranslation, builder);
- auto *list = builder.CreateAlloca(
- kmpTaskAffinityInfoTy, iterInfo.getTotalTrips(), "omp.affinity_list");
+ llvm::Function *F = builder.GetInsertBlock()->getParent();
+ llvm::BasicBlock *curBB = builder.GetInsertBlock();
+ llvm::Instruction *splitPt = (builder.GetInsertPoint() == curBB->end())
+ ? curBB->getTerminator()
+ : &*builder.GetInsertPoint();
+ if (!splitPt) {
+ llvm::BasicBlock *tmp = llvm::BasicBlock::Create(ctx, "omp.tmp.cont", F);
+ builder.SetInsertPoint(curBB);
+ builder.CreateBr(tmp);
+ splitPt = curBB->getTerminator();
+ }
- llvm::IteratorLoopNestScope iterLoops(
- builder, iterInfo.getDims(), iterInfo.getLowerBounds(),
- iterInfo.getUpperBounds(), iterInfo.getSteps());
- auto indVars = iterLoops.getIVs();
- for (unsigned d = 0; d < iterInfo.getDims(); ++d)
- moduleTranslation.mapValue(iteratorRegionBlock.getArgument(d), indVars[d]);
+ llvm::BasicBlock *contBB = curBB->splitBasicBlock(splitPt, "omp.task.cont");
+ // Remove the branch to contBB since we will branch to contBB after the loop
+ curBB->getTerminator()->eraseFromParent();
+
+ auto *cli = ompBuilder.createLoopSkeleton(
+ builder.getCurrentDebugLocation(), iterInfo.getTotalTrips(),
+ builder.GetInsertBlock()->getParent(), contBB, contBB);
+ builder.SetInsertPoint(curBB);
+ builder.CreateBr(cli->getPreheader());
+
+ // Remove the unconditional branch inserted by createLoopSkeleton in the body
+ if (llvm::Instruction *T = cli->getBody()->getTerminator())
+ T->eraseFromParent();
+
+ // Start building the loop body
+ builder.SetInsertPoint(cli->getBody());
+
+ llvm::Value *linearIV = cli->getIndVar();
+ for (int d = (int)iterInfo.getDims() - 1; d >= 0; --d) {
+ llvm::Value *trip = iterInfo.getTrips()[d];
+ // idx = linearIV % trips[d]
+ llvm::Value *idx = builder.CreateURem(linearIV, trip);
+ // linearIV = linearIV / trips[d]
+ linearIV = builder.CreateUDiv(linearIV, trip);
+
+ // physicalIV = lb + logical * step.
+ llvm::Value *physicalIV = builder.CreateAdd(
+ iterInfo.getLowerBounds()[d],
+ builder.CreateMul(idx, iterInfo.getSteps()[d]), "omp.it.phys_iv");
+
+ moduleTranslation.mapValue(iteratorRegionBlock.getArgument(d), physicalIV);
+ }
moduleTranslation.mapBlock(&iteratorRegionBlock, builder.GetInsertBlock());
- if (mlir::failed(moduleTranslation.convertBlock(
- iteratorRegionBlock, /*ignoreArguments=*/true, builder)))
+ if (mlir::failed(moduleTranslation.convertBlock(iteratorRegionBlock,
+ /*ignoreArguments=*/true,
+ builder))) {
return itersOp.emitOpError() << "failed to translate iterators region";
+ }
auto yield =
mlir::dyn_cast<mlir::omp::YieldOp>(iteratorRegionBlock.getTerminator());
auto entryOp =
yield.getResults()[0].getDefiningOp<mlir::omp::AffinityEntryOp>();
-
llvm::Value *addr = moduleTranslation.lookupValue(entryOp.getAddr());
llvm::Value *len = moduleTranslation.lookupValue(entryOp.getLen());
+ storeAffinityEntry(builder, list, cli->getIndVar(), addr, len);
- llvm::Value *linearIdx = llvm::ConstantInt::get(builder.getInt64Ty(), 0);
- for (unsigned d = 0; d < iterInfo.getDims(); ++d) {
- // Normalize the physical IV to a 0-based logical index for this dimension.
- llvm::Value *logicalIdx = builder.CreateUDiv(
- builder.CreateSub(indVars[d], iterInfo.getLowerBounds()[d]),
- iterInfo.getSteps()[d]);
- // Row-major flattening: linear = linear * Trips[d] + logicalIdx
- linearIdx = builder.CreateAdd(
- builder.CreateMul(linearIdx, iterInfo.getTrips()[d]), logicalIdx);
- }
-
- storeAffinityEntry(builder, list, linearIdx, addr, len);
+ // Ensure we end the loop body by jumping to the latch
+ if (!builder.GetInsertBlock()->getTerminator())
+ builder.CreateBr(cli->getLatch());
moduleTranslation.forgetMapping(itersOp.getRegion());
- A.Info = list;
- A.Count = builder.CreateTrunc(iterInfo.getTotalTrips(), builder.getInt32Ty());
+ builder.SetInsertPoint(cli->getAfter(), cli->getAfter()->begin());
+ builder.CreateBr(contBB);
+ builder.SetInsertPoint(contBB, contBB->begin());
+
+ ad.Info = list;
+ ad.Count =
+ builder.CreateTrunc(iterInfo.getTotalTrips(), builder.getInt32Ty());
return mlir::success();
}
@@ -2593,6 +2632,18 @@ convertOmpTaskOp(omp::TaskOp taskOp, llvm::IRBuilderBase &builder,
taskOp.getPrivateNeedsBarrier())))
return llvm::failure();
+ llvm::OpenMPIRBuilder::AffinityData ad = {nullptr, nullptr};
+ if (!taskOp.getAffinityVars().empty())
+ buildTaskAffinityList(taskOp, builder, moduleTranslation, ad);
+ else if (!taskOp.getIterated().empty()) {
+ for (size_t i = 0; i < taskOp.getIterated().size(); ++i) {
+ auto iterOp = taskOp.getIterated()[i].getDefiningOp<omp::IteratorsOp>();
+ if (failed(buildTaskAffinityIteratorLoop(iterOp, builder,
+ moduleTranslation, ad)))
+ return llvm::failure();
+ }
+ }
+
// Set up for call to createTask()
builder.SetInsertPoint(taskStartBlock);
@@ -2691,17 +2742,6 @@ convertOmpTaskOp(omp::TaskOp taskOp, llvm::IRBuilderBase &builder,
buildDependData(taskOp.getDependKinds(), taskOp.getDependVars(),
moduleTranslation, dds);
- llvm::OpenMPIRBuilder::AffinityData ad = {nullptr, nullptr};
- if (!taskOp.getAffinityVars().empty())
- buildTaskAffinityList(taskOp, builder, moduleTranslation, ad);
- else if (!taskOp.getIterated().empty()) {
- for (size_t i = 0; i < taskOp.getIterated().size(); ++i) {
- auto iterOp = taskOp.getIterated()[i].getDefiningOp<omp::IteratorsOp>();
- if (failed(buildAffinityIterator(iterOp, builder, moduleTranslation, ad)))
- return failure();
- }
- }
-
llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
llvm::OpenMPIRBuilder::InsertPointOrErrorTy afterIP =
moduleTranslation.getOpenMPBuilder()->createTask(
>From 9690a796c46e7135765fe64365ad27f2d0b0f689 Mon Sep 17 00:00:00 2001
From: cchen <chichun.chen at hpe.com>
Date: Fri, 20 Feb 2026 16:23:42 -0600
Subject: [PATCH 06/11] Support multiple affinity register for a task
---
.../llvm/Frontend/OpenMP/OMPIRBuilder.h | 2 +-
llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp | 28 ++++++-------
.../OpenMP/OpenMPToLLVMIRTranslation.cpp | 42 +++++++++++++------
3 files changed, 42 insertions(+), 30 deletions(-)
diff --git a/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h b/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h
index deb2535ac843f..1c421f0dfce66 100644
--- a/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h
+++ b/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h
@@ -1583,7 +1583,7 @@ class OpenMPIRBuilder {
const LocationDescription &Loc, InsertPointTy AllocaIP,
BodyGenCallbackTy BodyGenCB, bool Tied = true, Value *Final = nullptr,
Value *IfCondition = nullptr, SmallVector<DependData> Dependencies = {},
- AffinityData Affinity = {}, bool Mergeable = false,
+ SmallVector<AffinityData> Affinities = {}, bool Mergeable = false,
Value *EventHandle = nullptr, Value *Priority = nullptr);
/// Generator for the taskgroup construct
diff --git a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
index 8cd31fd3e207c..397de485473ac 100644
--- a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
+++ b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
@@ -2434,8 +2434,8 @@ OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::createTaskloop(
OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::createTask(
const LocationDescription &Loc, InsertPointTy AllocaIP,
BodyGenCallbackTy BodyGenCB, bool Tied, Value *Final, Value *IfCondition,
- SmallVector<DependData> Dependencies, AffinityData Affinity, bool Mergeable,
- Value *EventHandle, Value *Priority) {
+ SmallVector<DependData> Dependencies, SmallVector<AffinityData> Affinities,
+ bool Mergeable, Value *EventHandle, Value *Priority) {
if (!updateToLocation(Loc))
return InsertPointTy();
@@ -2481,8 +2481,8 @@ OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::createTask(
Builder, AllocaIP, ToBeDeleted, TaskAllocaIP, "global.tid", false));
OI.PostOutlineCB = [this, Ident, Tied, Final, IfCondition, Dependencies,
- Affinity, Mergeable, Priority, EventHandle, TaskAllocaBB,
- ToBeDeleted](Function &OutlinedFn) mutable {
+ Affinities, Mergeable, Priority, EventHandle,
+ TaskAllocaBB, ToBeDeleted](Function &OutlinedFn) mutable {
// Replace the Stale CI by appropriate RTL function call.
assert(OutlinedFn.hasOneUse() &&
"there must be a single user for the outlined function");
@@ -2555,20 +2555,16 @@ OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::createTask(
/*sizeof_task=*/TaskSize, /*sizeof_shared=*/SharedsSize,
/*task_func=*/&OutlinedFn});
- if (Affinity.Count && Affinity.Info) {
+ if (!Affinities.empty()) {
Function *RegAffFn = getOrCreateRuntimeFunctionPtr(
OMPRTL___kmpc_omp_reg_task_with_affinity);
-
- // bitcast to i8*
- Value *AffPtr = Builder.CreatePointerBitCastOrAddrSpaceCast(
- Affinity.Info, Builder.getPtrTy(0));
-
- if (!Affinity.Count->getType()->isIntegerTy(32))
- Affinity.Count =
- Builder.CreateTruncOrBitCast(Affinity.Count, Builder.getInt32Ty());
-
- createRuntimeFunctionCall(
- RegAffFn, {Ident, ThreadID, TaskData, Affinity.Count, AffPtr});
+ for (const auto &Affinity : Affinities) {
+ // bitcast to i8*
+ Value *AffPtr = Builder.CreatePointerBitCastOrAddrSpaceCast(
+ Affinity.Info, Builder.getPtrTy(0));
+ createRuntimeFunctionCall(
+ RegAffFn, {Ident, ThreadID, TaskData, Affinity.Count, AffPtr});
+ }
}
// Emit detach clause initialization.
diff --git a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
index 97d96660405d0..4e521c5496413 100644
--- a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
+++ b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
@@ -2460,7 +2460,7 @@ buildTaskAffinityIteratorLoop(mlir::omp::IteratorsOp itersOp,
auto *cli = ompBuilder.createLoopSkeleton(
builder.getCurrentDebugLocation(), iterInfo.getTotalTrips(),
- builder.GetInsertBlock()->getParent(), contBB, contBB);
+ builder.GetInsertBlock()->getParent(), contBB, contBB, "iterator");
builder.SetInsertPoint(curBB);
builder.CreateBr(cli->getPreheader());
@@ -2518,6 +2518,30 @@ buildTaskAffinityIteratorLoop(mlir::omp::IteratorsOp itersOp,
return mlir::success();
}
+static mlir::LogicalResult buildAffinityData(
+ mlir::omp::TaskOp &taskOp, llvm::IRBuilderBase &builder,
+ mlir::LLVM::ModuleTranslation &moduleTranslation,
+ llvm::SmallVectorImpl<llvm::OpenMPIRBuilder::AffinityData> &ads) {
+ using AffinityData = llvm::OpenMPIRBuilder::AffinityData;
+
+ if (!taskOp.getAffinityVars().empty()) {
+ AffinityData ad;
+ buildTaskAffinityList(taskOp, builder, moduleTranslation, ad);
+ ads.emplace_back(ad);
+ }
+ if (!taskOp.getIterated().empty()) {
+ for (size_t i = 0; i < taskOp.getIterated().size(); ++i) {
+ auto iterOp = taskOp.getIterated()[i].getDefiningOp<omp::IteratorsOp>();
+ AffinityData ad;
+ if (failed(buildTaskAffinityIteratorLoop(iterOp, builder,
+ moduleTranslation, ad)))
+ return llvm::failure();
+ ads.emplace_back(ad);
+ }
+ }
+ return mlir::success();
+}
+
/// Converts an OpenMP task construct into LLVM IR using OpenMPIRBuilder.
static LogicalResult
convertOmpTaskOp(omp::TaskOp taskOp, llvm::IRBuilderBase &builder,
@@ -2632,17 +2656,9 @@ convertOmpTaskOp(omp::TaskOp taskOp, llvm::IRBuilderBase &builder,
taskOp.getPrivateNeedsBarrier())))
return llvm::failure();
- llvm::OpenMPIRBuilder::AffinityData ad = {nullptr, nullptr};
- if (!taskOp.getAffinityVars().empty())
- buildTaskAffinityList(taskOp, builder, moduleTranslation, ad);
- else if (!taskOp.getIterated().empty()) {
- for (size_t i = 0; i < taskOp.getIterated().size(); ++i) {
- auto iterOp = taskOp.getIterated()[i].getDefiningOp<omp::IteratorsOp>();
- if (failed(buildTaskAffinityIteratorLoop(iterOp, builder,
- moduleTranslation, ad)))
- return llvm::failure();
- }
- }
+ llvm::SmallVector<llvm::OpenMPIRBuilder::AffinityData> ads;
+ if (failed(buildAffinityData(taskOp, builder, moduleTranslation, ads)))
+ return llvm::failure();
// Set up for call to createTask()
builder.SetInsertPoint(taskStartBlock);
@@ -2747,7 +2763,7 @@ convertOmpTaskOp(omp::TaskOp taskOp, llvm::IRBuilderBase &builder,
moduleTranslation.getOpenMPBuilder()->createTask(
ompLoc, allocaIP, bodyCB, !taskOp.getUntied(),
moduleTranslation.lookupValue(taskOp.getFinal()),
- moduleTranslation.lookupValue(taskOp.getIfExpr()), dds, ad,
+ moduleTranslation.lookupValue(taskOp.getIfExpr()), dds, ads,
taskOp.getMergeable(),
moduleTranslation.lookupValue(taskOp.getEventHandle()),
moduleTranslation.lookupValue(taskOp.getPriority()));
>From 1338ac6c0d4f2d5fd1d4aac7b4ce4aab1d43e817 Mon Sep 17 00:00:00 2001
From: cchen <chichun.chen at hpe.com>
Date: Fri, 20 Feb 2026 17:42:18 -0600
Subject: [PATCH 07/11] Move iterator loop generate logic to OMPIRBuilder
---
.../llvm/Frontend/OpenMP/OMPIRBuilder.h | 7 +
llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp | 61 ++++++++
.../OpenMP/OpenMPToLLVMIRTranslation.cpp | 140 +++++++++---------
3 files changed, 135 insertions(+), 73 deletions(-)
diff --git a/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h b/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h
index 1c421f0dfce66..a85b34eaee4ce 100644
--- a/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h
+++ b/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h
@@ -3932,6 +3932,13 @@ class OpenMPIRBuilder {
LLVM_ABI GlobalVariable *
getOrCreateInternalVariable(Type *Ty, const StringRef &Name,
std::optional<unsigned> AddressSpace = {});
+
+ using IteratorBodyGenTy = llvm::function_ref<llvm::Error(
+ InsertPointTy BodyIP, llvm::Value *LinearIV)>;
+
+ LLVM_ABI InsertPointOrErrorTy createIteratorLoop(
+ LocationDescription Loc, llvm::Value *TripCount,
+ IteratorBodyGenTy BodyGen, llvm::StringRef Name = "iterator");
};
/// Class to represented the control flow structure of an OpenMP canonical loop.
diff --git a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
index 397de485473ac..a7fe01375556e 100644
--- a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
+++ b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
@@ -11546,6 +11546,67 @@ void OpenMPIRBuilder::loadOffloadInfoMetadata(vfs::FileSystem &VFS,
loadOffloadInfoMetadata(*M.get());
}
+OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::createIteratorLoop(
+ LocationDescription Loc, llvm::Value *TripCount, IteratorBodyGenTy BodyGen,
+ llvm::StringRef Name) {
+ IRBuilderBase &B = Builder;
+ B.restoreIP(Loc.IP);
+
+ BasicBlock *CurBB = Builder.GetInsertBlock();
+ assert(CurBB &&
+ "expected a valid insertion block for creating an iterator loop");
+ Function *F = CurBB->getParent();
+ LLVMContext &Ctx = F->getContext();
+
+ // If splitting at end() but CurBB has no terminator, make it well-formed
+ // first. This happens in some pipelines where blocks are still under
+ // construction.
+ if (B.GetInsertPoint() == CurBB->end() && !CurBB->getTerminator()) {
+ BasicBlock *TmpCont = BasicBlock::Create(Ctx, "omp.it.tmp.cont", F);
+ B.SetInsertPoint(CurBB);
+ B.CreateBr(TmpCont);
+
+ // The terminator we just inserted is now the "end" of CurBB. To split after
+ // it, set insertion point to CurBB->end() (which is fine now).
+ B.SetInsertPoint(CurBB->end());
+ }
+
+ BasicBlock *ContBB =
+ CurBB->splitBasicBlock(Builder.GetInsertPoint(), "omp.it.cont");
+ // Remove the branch to contBB since we will branch to contBB after the loop
+ CurBB->getTerminator()->eraseFromParent();
+
+ CanonicalLoopInfo *CLI =
+ createLoopSkeleton(B.getCurrentDebugLocation(), TripCount, F,
+ /*PreInsertBefore=*/ContBB,
+ /*PostInsertBefore=*/ContBB, Name);
+
+ // Enter loop from original block.
+ B.SetInsertPoint(CurBB);
+ B.CreateBr(CLI->getPreheader());
+
+ // Remove the unconditional branch inserted by createLoopSkeleton in the body
+ if (Instruction *T = CLI->getBody()->getTerminator())
+ T->eraseFromParent();
+
+ InsertPointTy BodyIP = CLI->getBodyIP();
+ if (llvm::Error Err = BodyGen(BodyIP, CLI->getIndVar()))
+ return Err;
+
+ // Ensure we end the loop body by jumping to the latch
+ if (!CLI->getBody()->getTerminator()) {
+ B.SetInsertPoint(CLI->getBody());
+ B.CreateBr(CLI->getLatch());
+ }
+
+ // Link After -> ContBB
+ B.SetInsertPoint(CLI->getAfter(), CLI->getAfter()->begin());
+ if (!CLI->getAfter()->getTerminator())
+ B.CreateBr(ContBB);
+
+ return InsertPointTy{ContBB, ContBB->begin()};
+}
+
//===----------------------------------------------------------------------===//
// OffloadEntriesInfoManager
//===----------------------------------------------------------------------===//
diff --git a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
index 4e521c5496413..1cf040db00163 100644
--- a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
+++ b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
@@ -2429,92 +2429,86 @@ buildTaskAffinityIteratorLoop(mlir::omp::IteratorsOp itersOp,
llvm::IRBuilderBase &builder,
mlir::LLVM::ModuleTranslation &moduleTranslation,
llvm::OpenMPIRBuilder::AffinityData &ad) {
-
auto &ctx = builder.getContext();
- auto &ompBuilder = *moduleTranslation.getOpenMPBuilder();
+ auto *ompBuilder = moduleTranslation.getOpenMPBuilder();
+ if (!ompBuilder)
+ return itersOp.emitOpError() << "missing OpenMPIRBuilder";
+
IteratorInfo iterInfo(itersOp, moduleTranslation, builder);
llvm::StructType *kmpTaskAffinityInfoTy = llvm::StructType::get(
llvm::Type::getInt64Ty(ctx), llvm::Type::getInt64Ty(ctx),
llvm::Type::getInt32Ty(ctx));
- auto *list = builder.CreateAlloca(
- kmpTaskAffinityInfoTy, iterInfo.getTotalTrips(), "omp.affinity_list");
-
- mlir::Block &iteratorRegionBlock = itersOp.getRegion().front();
-
- llvm::Function *F = builder.GetInsertBlock()->getParent();
- llvm::BasicBlock *curBB = builder.GetInsertBlock();
- llvm::Instruction *splitPt = (builder.GetInsertPoint() == curBB->end())
- ? curBB->getTerminator()
- : &*builder.GetInsertPoint();
- if (!splitPt) {
- llvm::BasicBlock *tmp = llvm::BasicBlock::Create(ctx, "omp.tmp.cont", F);
- builder.SetInsertPoint(curBB);
- builder.CreateBr(tmp);
- splitPt = curBB->getTerminator();
- }
- llvm::BasicBlock *contBB = curBB->splitBasicBlock(splitPt, "omp.task.cont");
- // Remove the branch to contBB since we will branch to contBB after the loop
- curBB->getTerminator()->eraseFromParent();
-
- auto *cli = ompBuilder.createLoopSkeleton(
- builder.getCurrentDebugLocation(), iterInfo.getTotalTrips(),
- builder.GetInsertBlock()->getParent(), contBB, contBB, "iterator");
- builder.SetInsertPoint(curBB);
- builder.CreateBr(cli->getPreheader());
-
- // Remove the unconditional branch inserted by createLoopSkeleton in the body
- if (llvm::Instruction *T = cli->getBody()->getTerminator())
- T->eraseFromParent();
-
- // Start building the loop body
- builder.SetInsertPoint(cli->getBody());
-
- llvm::Value *linearIV = cli->getIndVar();
- for (int d = (int)iterInfo.getDims() - 1; d >= 0; --d) {
- llvm::Value *trip = iterInfo.getTrips()[d];
- // idx = linearIV % trips[d]
- llvm::Value *idx = builder.CreateURem(linearIV, trip);
- // linearIV = linearIV / trips[d]
- linearIV = builder.CreateUDiv(linearIV, trip);
-
- // physicalIV = lb + logical * step.
- llvm::Value *physicalIV = builder.CreateAdd(
- iterInfo.getLowerBounds()[d],
- builder.CreateMul(idx, iterInfo.getSteps()[d]), "omp.it.phys_iv");
-
- moduleTranslation.mapValue(iteratorRegionBlock.getArgument(d), physicalIV);
- }
+ auto *list = builder.CreateAlloca(kmpTaskAffinityInfoTy, iterInfo.getTotalTrips(),
+ "omp.affinity_list");
- moduleTranslation.mapBlock(&iteratorRegionBlock, builder.GetInsertBlock());
- if (mlir::failed(moduleTranslation.convertBlock(iteratorRegionBlock,
- /*ignoreArguments=*/true,
- builder))) {
- return itersOp.emitOpError() << "failed to translate iterators region";
- }
+ mlir::Region &itersRegion = itersOp.getRegion();
+ mlir::Block &iteratorRegionBlock = itersRegion.front();
- auto yield =
- mlir::dyn_cast<mlir::omp::YieldOp>(iteratorRegionBlock.getTerminator());
- auto entryOp =
- yield.getResults()[0].getDefiningOp<mlir::omp::AffinityEntryOp>();
- llvm::Value *addr = moduleTranslation.lookupValue(entryOp.getAddr());
- llvm::Value *len = moduleTranslation.lookupValue(entryOp.getLen());
- storeAffinityEntry(builder, list, cli->getIndVar(), addr, len);
+ llvm::OpenMPIRBuilder::LocationDescription loc(builder);
- // Ensure we end the loop body by jumping to the latch
- if (!builder.GetInsertBlock()->getTerminator())
- builder.CreateBr(cli->getLatch());
+ // Build the iterator loop using the new OMPIRBuilder helper.
+ auto bodyGen = [&](llvm::OpenMPIRBuilder::InsertPointTy bodyIP,
+ llvm::Value *linearIV) -> llvm::Error {
+ llvm::IRBuilderBase::InsertPointGuard g(builder);
+ builder.restoreIP(bodyIP);
+
+ // Unflatten linearIV into per-dimension logical indices (row-major).
+ llvm::Value *tmp = linearIV;
+ for (int d = (int)iterInfo.getDims() - 1; d >= 0; --d) {
+ llvm::Value *trip = iterInfo.getTrips()[d];
+ // idx_d = tmp % trip_d
+ llvm::Value *idx = builder.CreateURem(tmp, trip, "omp.it.idx");
+ // tmp = tmp / trip_d
+ tmp = builder.CreateUDiv(tmp, trip, "omp.it.lin.next");
+
+ // physIV_d = lb_d + idx_d * step_d
+ llvm::Value *physIV = builder.CreateAdd(
+ iterInfo.getLowerBounds()[d],
+ builder.CreateMul(idx, iterInfo.getSteps()[d]),
+ "omp.it.phys_iv");
+
+ moduleTranslation.mapValue(iteratorRegionBlock.getArgument(d), physIV);
+ }
- moduleTranslation.forgetMapping(itersOp.getRegion());
+ // Translate the iterator region into the loop body.
+ moduleTranslation.mapBlock(&iteratorRegionBlock, builder.GetInsertBlock());
+ if (mlir::failed(moduleTranslation.convertBlock(iteratorRegionBlock,
+ /*ignoreArguments=*/true,
+ builder))) {
+ return llvm::make_error<llvm::StringError>(
+ "failed to translate iterators region",
+ llvm::inconvertibleErrorCode());
+ }
- builder.SetInsertPoint(cli->getAfter(), cli->getAfter()->begin());
- builder.CreateBr(contBB);
- builder.SetInsertPoint(contBB, contBB->begin());
+ // Extract affinity entry from omp.yield and store into list[linearIV].
+ auto yield = mlir::dyn_cast<mlir::omp::YieldOp>(
+ iteratorRegionBlock.getTerminator());
+ auto entryOp =
+ yield.getResults()[0].getDefiningOp<mlir::omp::AffinityEntryOp>();
+
+ llvm::Value *addr = moduleTranslation.lookupValue(entryOp.getAddr());
+ llvm::Value *len = moduleTranslation.lookupValue(entryOp.getLen());
+
+ storeAffinityEntry(builder, list, linearIV, addr, len);
+
+ // Avoid leaking region mappings if this iterator loop is reused/expanded.
+ moduleTranslation.forgetMapping(itersRegion);
+
+ return llvm::Error::success();
+ };
+
+ llvm::OpenMPIRBuilder::InsertPointOrErrorTy afterIP =
+ ompBuilder->createIteratorLoop(loc, iterInfo.getTotalTrips(), bodyGen,
+ /*Name=*/"iterator");
+ if (!afterIP)
+ return itersOp.emitOpError() << llvm::toString(afterIP.takeError());
+
+ builder.restoreIP(*afterIP);
- ad.Info = list;
- ad.Count =
- builder.CreateTrunc(iterInfo.getTotalTrips(), builder.getInt32Ty());
+ ad.Info = list;
+ ad.Count = builder.CreateTrunc(iterInfo.getTotalTrips(), builder.getInt32Ty());
return mlir::success();
}
>From f641cad62c56c16e4908cdd9deb4d377949f0ee4 Mon Sep 17 00:00:00 2001
From: cchen <chichun.chen at hpe.com>
Date: Fri, 20 Feb 2026 17:51:09 -0600
Subject: [PATCH 08/11] Fix tests
---
llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp | 8 ++++----
mlir/test/Target/LLVMIR/openmp-todo.mlir | 12 ------------
2 files changed, 4 insertions(+), 16 deletions(-)
diff --git a/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp b/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp
index 11f03d2a06d6e..8292b055f6cd9 100644
--- a/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp
+++ b/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp
@@ -7586,9 +7586,9 @@ TEST_F(OpenMPIRBuilderTest, CreateTaskAffinity) {
Builder.CreateStore(Builder.getInt32(0),
Builder.CreateStructGEP(AffInfoTy, Entry0, 2));
- OpenMPIRBuilder::AffinityData Affinity;
- Affinity.Count = CountI32;
- Affinity.Info = AffArr;
+ SmallVector<OpenMPIRBuilder::AffinityData> Affinities;
+ OpenMPIRBuilder::AffinityData Affinity{CountI32, AffArr};
+ Affinities.push_back(Affinity);
BasicBlock *AllocaBB = Builder.GetInsertBlock();
BasicBlock *BodyBB = splitBB(Builder, /*CreateBranch=*/true, "alloca.split");
@@ -7604,7 +7604,7 @@ TEST_F(OpenMPIRBuilderTest, CreateTaskAffinity) {
/*Final=*/nullptr,
/*IfCondition=*/nullptr,
/*Dependencies=*/{},
- /*Affinity=*/Affinity,
+ /*Affinity=*/Affinities,
/*Mergeable=*/false,
/*EventHandle=*/nullptr,
/*Priority=*/nullptr));
diff --git a/mlir/test/Target/LLVMIR/openmp-todo.mlir b/mlir/test/Target/LLVMIR/openmp-todo.mlir
index ae02b5878f763..c0f43f27e6b0f 100644
--- a/mlir/test/Target/LLVMIR/openmp-todo.mlir
+++ b/mlir/test/Target/LLVMIR/openmp-todo.mlir
@@ -473,15 +473,3 @@ llvm.func @wsloop_order(%lb : i32, %ub : i32, %step : i32) {
}
llvm.return
}
-
-// -----
-llvm.func @task_affinity(%ptr : !llvm.ptr, %len : i64) {
- // expected-error at below {{not yet implemented: omp.affinity_entry}}
- // expected-error at below {{LLVM Translation failed for operation: omp.affinity_entry}}
- %ae = omp.affinity_entry %ptr, %len
- : (!llvm.ptr, i64) -> !omp.affinity_entry_ty<!llvm.ptr, i64>
- omp.task affinity(%ae : !omp.affinity_entry_ty<!llvm.ptr, i64>) {
- omp.terminator
- }
- llvm.return
-}
>From f1bde7aebc6a33fc41b5086887938270d3ee3cf7 Mon Sep 17 00:00:00 2001
From: cchen <chichun.chen at hpe.com>
Date: Fri, 20 Feb 2026 18:13:47 -0600
Subject: [PATCH 09/11] Extract iterator loop body convertion logic
---
.../OpenMP/OpenMPToLLVMIRTranslation.cpp | 64 +++++++++++--------
1 file changed, 37 insertions(+), 27 deletions(-)
diff --git a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
index 1cf040db00163..78ea31a626033 100644
--- a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
+++ b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
@@ -2424,6 +2424,37 @@ static void buildTaskAffinityList(mlir::omp::TaskOp taskOp,
ad.Count = builder.getInt32(static_cast<uint32_t>(affinityVars.size()));
}
+static mlir::LogicalResult
+convertIteratorRegion(llvm::Value *linearIV, IteratorInfo &iterInfo,
+ mlir::Block &iteratorRegionBlock,
+ llvm::IRBuilderBase &builder,
+ LLVM::ModuleTranslation &moduleTranslation) {
+ llvm::Value *tmp = linearIV;
+ for (int d = (int)iterInfo.getDims() - 1; d >= 0; --d) {
+ llvm::Value *trip = iterInfo.getTrips()[d];
+ // idx_d = tmp % trip_d
+ llvm::Value *idx = builder.CreateURem(tmp, trip, "omp.it.idx");
+ // tmp = tmp / trip_d
+ tmp = builder.CreateUDiv(tmp, trip, "omp.it.lin.next");
+
+ // physIV_d = lb_d + idx_d * step_d
+ llvm::Value *physIV = builder.CreateAdd(
+ iterInfo.getLowerBounds()[d],
+ builder.CreateMul(idx, iterInfo.getSteps()[d]), "omp.it.phys_iv");
+
+ moduleTranslation.mapValue(iteratorRegionBlock.getArgument(d), physIV);
+ }
+
+ // Translate the iterator region into the loop body.
+ moduleTranslation.mapBlock(&iteratorRegionBlock, builder.GetInsertBlock());
+ if (mlir::failed(moduleTranslation.convertBlock(iteratorRegionBlock,
+ /*ignoreArguments=*/true,
+ builder))) {
+ return mlir::failure();
+ }
+ return mlir::success();
+}
+
static mlir::LogicalResult
buildTaskAffinityIteratorLoop(mlir::omp::IteratorsOp itersOp,
llvm::IRBuilderBase &builder,
@@ -2454,43 +2485,22 @@ buildTaskAffinityIteratorLoop(mlir::omp::IteratorsOp itersOp,
llvm::IRBuilderBase::InsertPointGuard g(builder);
builder.restoreIP(bodyIP);
- // Unflatten linearIV into per-dimension logical indices (row-major).
- llvm::Value *tmp = linearIV;
- for (int d = (int)iterInfo.getDims() - 1; d >= 0; --d) {
- llvm::Value *trip = iterInfo.getTrips()[d];
- // idx_d = tmp % trip_d
- llvm::Value *idx = builder.CreateURem(tmp, trip, "omp.it.idx");
- // tmp = tmp / trip_d
- tmp = builder.CreateUDiv(tmp, trip, "omp.it.lin.next");
-
- // physIV_d = lb_d + idx_d * step_d
- llvm::Value *physIV = builder.CreateAdd(
- iterInfo.getLowerBounds()[d],
- builder.CreateMul(idx, iterInfo.getSteps()[d]),
- "omp.it.phys_iv");
-
- moduleTranslation.mapValue(iteratorRegionBlock.getArgument(d), physIV);
- }
-
- // Translate the iterator region into the loop body.
- moduleTranslation.mapBlock(&iteratorRegionBlock, builder.GetInsertBlock());
- if (mlir::failed(moduleTranslation.convertBlock(iteratorRegionBlock,
- /*ignoreArguments=*/true,
- builder))) {
+ if (failed(convertIteratorRegion(linearIV, iterInfo, iteratorRegionBlock,
+ builder, moduleTranslation))) {
return llvm::make_error<llvm::StringError>(
- "failed to translate iterators region",
- llvm::inconvertibleErrorCode());
+ "failed to convert iterators region", llvm::inconvertibleErrorCode());
}
// Extract affinity entry from omp.yield and store into list[linearIV].
auto yield = mlir::dyn_cast<mlir::omp::YieldOp>(
iteratorRegionBlock.getTerminator());
+ assert(yield.getResults().size() == 1 &&
+ "expect omp.yield in iterator region to have one result");
auto entryOp =
yield.getResults()[0].getDefiningOp<mlir::omp::AffinityEntryOp>();
llvm::Value *addr = moduleTranslation.lookupValue(entryOp.getAddr());
- llvm::Value *len = moduleTranslation.lookupValue(entryOp.getLen());
-
+ llvm::Value *len = moduleTranslation.lookupValue(entryOp.getLen());
storeAffinityEntry(builder, list, linearIV, addr, len);
// Avoid leaking region mappings if this iterator loop is reused/expanded.
>From 0d477947c2bbed4206325c81ca25bf5bd615e2fe Mon Sep 17 00:00:00 2001
From: cchen <chichun.chen at hpe.com>
Date: Fri, 20 Feb 2026 19:24:31 -0600
Subject: [PATCH 10/11] Refactor buildAffinityData by hoisting the creation of
affinity_list
---
llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp | 7 +-
.../OpenMP/OpenMPToLLVMIRTranslation.cpp | 101 ++++++++----------
2 files changed, 49 insertions(+), 59 deletions(-)
diff --git a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
index a7fe01375556e..229f5627b7552 100644
--- a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
+++ b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
@@ -2559,11 +2559,8 @@ OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::createTask(
Function *RegAffFn = getOrCreateRuntimeFunctionPtr(
OMPRTL___kmpc_omp_reg_task_with_affinity);
for (const auto &Affinity : Affinities) {
- // bitcast to i8*
- Value *AffPtr = Builder.CreatePointerBitCastOrAddrSpaceCast(
- Affinity.Info, Builder.getPtrTy(0));
- createRuntimeFunctionCall(
- RegAffFn, {Ident, ThreadID, TaskData, Affinity.Count, AffPtr});
+ createRuntimeFunctionCall(RegAffFn, {Ident, ThreadID, TaskData,
+ Affinity.Count, Affinity.Info});
}
}
diff --git a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
index 78ea31a626033..aa4fa2c9ca284 100644
--- a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
+++ b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
@@ -2392,24 +2392,13 @@ static void storeAffinityEntry(llvm::IRBuilderBase &builder,
builder.CreateStructGEP(kmpTaskAffinityInfoTy, entry, 2));
}
-static void buildTaskAffinityList(mlir::omp::TaskOp taskOp,
- llvm::IRBuilderBase &builder,
- LLVM::ModuleTranslation &moduleTranslation,
- llvm::OpenMPIRBuilder::AffinityData &ad) {
- auto &ctx = builder.getContext();
+static void fillAffinityLocators(mlir::omp::TaskOp taskOp,
+ llvm::IRBuilderBase &builder,
+ LLVM::ModuleTranslation &moduleTranslation,
+ llvm::Value *affinityList) {
SmallVector<mlir::Value> affinityVars(taskOp.getAffinityVars().begin(),
taskOp.getAffinityVars().end());
- // Define the type locally just for the Alloca
- llvm::StructType *kmpTaskAffinityInfoTy = llvm::StructType::get(
- llvm::Type::getInt64Ty(ctx), llvm::Type::getInt64Ty(ctx),
- llvm::Type::getInt32Ty(ctx));
-
- llvm::Value *count =
- llvm::ConstantInt::get(builder.getInt64Ty(), affinityVars.size());
- llvm::AllocaInst *affinityList =
- builder.CreateAlloca(kmpTaskAffinityInfoTy, count, "omp.affinity_list");
-
for (unsigned i = 0; i < affinityVars.size(); ++i) {
auto entryOp = affinityVars[i].getDefiningOp<mlir::omp::AffinityEntryOp>();
assert(entryOp && "affinity item must be omp.affinity_entry");
@@ -2419,9 +2408,6 @@ static void buildTaskAffinityList(mlir::omp::TaskOp taskOp,
llvm::Value *len = moduleTranslation.lookupValue(entryOp.getLen());
storeAffinityEntry(builder, affinityList, builder.getInt64(i), addr, len);
}
-
- ad.Info = affinityList;
- ad.Count = builder.getInt32(static_cast<uint32_t>(affinityVars.size()));
}
static mlir::LogicalResult
@@ -2433,9 +2419,9 @@ convertIteratorRegion(llvm::Value *linearIV, IteratorInfo &iterInfo,
for (int d = (int)iterInfo.getDims() - 1; d >= 0; --d) {
llvm::Value *trip = iterInfo.getTrips()[d];
// idx_d = tmp % trip_d
- llvm::Value *idx = builder.CreateURem(tmp, trip, "omp.it.idx");
+ llvm::Value *idx = builder.CreateURem(tmp, trip);
// tmp = tmp / trip_d
- tmp = builder.CreateUDiv(tmp, trip, "omp.it.lin.next");
+ tmp = builder.CreateUDiv(tmp, trip);
// physIV_d = lb_d + idx_d * step_d
llvm::Value *physIV = builder.CreateAdd(
@@ -2456,24 +2442,10 @@ convertIteratorRegion(llvm::Value *linearIV, IteratorInfo &iterInfo,
}
static mlir::LogicalResult
-buildTaskAffinityIteratorLoop(mlir::omp::IteratorsOp itersOp,
- llvm::IRBuilderBase &builder,
- mlir::LLVM::ModuleTranslation &moduleTranslation,
- llvm::OpenMPIRBuilder::AffinityData &ad) {
- auto &ctx = builder.getContext();
- auto *ompBuilder = moduleTranslation.getOpenMPBuilder();
- if (!ompBuilder)
- return itersOp.emitOpError() << "missing OpenMPIRBuilder";
-
- IteratorInfo iterInfo(itersOp, moduleTranslation, builder);
-
- llvm::StructType *kmpTaskAffinityInfoTy = llvm::StructType::get(
- llvm::Type::getInt64Ty(ctx), llvm::Type::getInt64Ty(ctx),
- llvm::Type::getInt32Ty(ctx));
-
- auto *list = builder.CreateAlloca(kmpTaskAffinityInfoTy, iterInfo.getTotalTrips(),
- "omp.affinity_list");
-
+fillAffinityIteratorLoop(mlir::omp::IteratorsOp itersOp,
+ llvm::IRBuilderBase &builder,
+ mlir::LLVM::ModuleTranslation &moduleTranslation,
+ llvm::Value *affinityList, IteratorInfo &iterInfo) {
mlir::Region &itersRegion = itersOp.getRegion();
mlir::Block &iteratorRegionBlock = itersRegion.front();
@@ -2492,8 +2464,8 @@ buildTaskAffinityIteratorLoop(mlir::omp::IteratorsOp itersOp,
}
// Extract affinity entry from omp.yield and store into list[linearIV].
- auto yield = mlir::dyn_cast<mlir::omp::YieldOp>(
- iteratorRegionBlock.getTerminator());
+ auto yield =
+ mlir::dyn_cast<mlir::omp::YieldOp>(iteratorRegionBlock.getTerminator());
assert(yield.getResults().size() == 1 &&
"expect omp.yield in iterator region to have one result");
auto entryOp =
@@ -2501,7 +2473,7 @@ buildTaskAffinityIteratorLoop(mlir::omp::IteratorsOp itersOp,
llvm::Value *addr = moduleTranslation.lookupValue(entryOp.getAddr());
llvm::Value *len = moduleTranslation.lookupValue(entryOp.getLen());
- storeAffinityEntry(builder, list, linearIV, addr, len);
+ storeAffinityEntry(builder, affinityList, linearIV, addr, len);
// Avoid leaking region mappings if this iterator loop is reused/expanded.
moduleTranslation.forgetMapping(itersRegion);
@@ -2510,15 +2482,14 @@ buildTaskAffinityIteratorLoop(mlir::omp::IteratorsOp itersOp,
};
llvm::OpenMPIRBuilder::InsertPointOrErrorTy afterIP =
- ompBuilder->createIteratorLoop(loc, iterInfo.getTotalTrips(), bodyGen,
- /*Name=*/"iterator");
+ moduleTranslation.getOpenMPBuilder()->createIteratorLoop(
+ loc, iterInfo.getTotalTrips(), bodyGen,
+ /*Name=*/"iterator");
if (!afterIP)
return itersOp.emitOpError() << llvm::toString(afterIP.takeError());
builder.restoreIP(*afterIP);
- ad.Info = list;
- ad.Count = builder.CreateTrunc(iterInfo.getTotalTrips(), builder.getInt32Ty());
return mlir::success();
}
@@ -2526,21 +2497,43 @@ static mlir::LogicalResult buildAffinityData(
mlir::omp::TaskOp &taskOp, llvm::IRBuilderBase &builder,
mlir::LLVM::ModuleTranslation &moduleTranslation,
llvm::SmallVectorImpl<llvm::OpenMPIRBuilder::AffinityData> &ads) {
- using AffinityData = llvm::OpenMPIRBuilder::AffinityData;
+ auto &ctx = builder.getContext();
+ llvm::StructType *kmpTaskAffinityInfoTy = llvm::StructType::get(
+ llvm::Type::getInt64Ty(ctx), llvm::Type::getInt64Ty(ctx),
+ llvm::Type::getInt32Ty(ctx));
+
+ auto allocateAffinityList = [&](llvm::Value *count) -> llvm::Value * {
+ return builder.CreateAlloca(kmpTaskAffinityInfoTy, count,
+ "omp.affinity_list");
+ };
+
+ auto createAffinity =
+ [&](llvm::Value *count,
+ llvm::Value *info) -> llvm::OpenMPIRBuilder::AffinityData {
+ llvm::OpenMPIRBuilder::AffinityData ad{};
+ ad.Count = builder.CreateTrunc(count, builder.getInt32Ty());
+ ad.Info =
+ builder.CreatePointerBitCastOrAddrSpaceCast(info, builder.getPtrTy(0));
+ return ad;
+ };
if (!taskOp.getAffinityVars().empty()) {
- AffinityData ad;
- buildTaskAffinityList(taskOp, builder, moduleTranslation, ad);
- ads.emplace_back(ad);
+ llvm::Value *count = llvm::ConstantInt::get(
+ builder.getInt64Ty(), taskOp.getAffinityVars().size());
+ llvm::Value *list = allocateAffinityList(count);
+ fillAffinityLocators(taskOp, builder, moduleTranslation, list);
+ ads.emplace_back(createAffinity(count, list));
}
+
if (!taskOp.getIterated().empty()) {
for (size_t i = 0; i < taskOp.getIterated().size(); ++i) {
- auto iterOp = taskOp.getIterated()[i].getDefiningOp<omp::IteratorsOp>();
- AffinityData ad;
- if (failed(buildTaskAffinityIteratorLoop(iterOp, builder,
- moduleTranslation, ad)))
+ auto itersOp = taskOp.getIterated()[i].getDefiningOp<omp::IteratorsOp>();
+ IteratorInfo iterInfo(itersOp, moduleTranslation, builder);
+ llvm::Value *list = allocateAffinityList(iterInfo.getTotalTrips());
+ if (failed(fillAffinityIteratorLoop(itersOp, builder, moduleTranslation,
+ list, iterInfo)))
return llvm::failure();
- ads.emplace_back(ad);
+ ads.emplace_back(createAffinity(iterInfo.getTotalTrips(), list));
}
}
return mlir::success();
>From c791eabd8a09d98585125b7ae1efd9c8e47c3cfb Mon Sep 17 00:00:00 2001
From: cchen <chichun.chen at hpe.com>
Date: Wed, 25 Feb 2026 14:08:13 -0600
Subject: [PATCH 11/11] Fix iteratorop
---
.../OpenMP/OpenMPToLLVMIRTranslation.cpp | 20 +++++++++----------
1 file changed, 10 insertions(+), 10 deletions(-)
diff --git a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
index aa4fa2c9ca284..87981fb3af4b0 100644
--- a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
+++ b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
@@ -2252,10 +2252,10 @@ class IteratorInfo {
}
public:
- IteratorInfo(mlir::omp::IteratorsOp itersOp,
+ IteratorInfo(mlir::omp::IteratorOp itersOp,
mlir::LLVM::ModuleTranslation &moduleTranslation,
llvm::IRBuilderBase &builder) {
- dims = itersOp.getLbs().size();
+ dims = itersOp.getLoopLowerBounds().size();
this->lowerBounds.resize(dims);
this->upperBounds.resize(dims);
this->steps.resize(dims);
@@ -2263,13 +2263,13 @@ class IteratorInfo {
for (unsigned d = 0; d < dims; ++d) {
llvm::Value *lb =
- lookUpAsI64(itersOp.getLbs()[d], moduleTranslation, builder);
+ lookUpAsI64(itersOp.getLoopLowerBounds()[d], moduleTranslation, builder);
llvm::Value *ub =
- lookUpAsI64(itersOp.getUbs()[d], moduleTranslation, builder);
+ lookUpAsI64(itersOp.getLoopUpperBounds()[d], moduleTranslation, builder);
llvm::Value *st =
- lookUpAsI64(itersOp.getSteps()[d], moduleTranslation, builder);
+ lookUpAsI64(itersOp.getLoopSteps()[d], moduleTranslation, builder);
assert(lb && ub && st &&
- "Expect lowerBounds, upperBounds, and steps in IteratorsOp");
+ "Expect lowerBounds, upperBounds, and steps in IteratorOp");
this->lowerBounds[d] = lb;
this->upperBounds[d] = ub;
@@ -2442,7 +2442,7 @@ convertIteratorRegion(llvm::Value *linearIV, IteratorInfo &iterInfo,
}
static mlir::LogicalResult
-fillAffinityIteratorLoop(mlir::omp::IteratorsOp itersOp,
+fillAffinityIteratorLoop(mlir::omp::IteratorOp itersOp,
llvm::IRBuilderBase &builder,
mlir::LLVM::ModuleTranslation &moduleTranslation,
llvm::Value *affinityList, IteratorInfo &iterInfo) {
@@ -2460,7 +2460,7 @@ fillAffinityIteratorLoop(mlir::omp::IteratorsOp itersOp,
if (failed(convertIteratorRegion(linearIV, iterInfo, iteratorRegionBlock,
builder, moduleTranslation))) {
return llvm::make_error<llvm::StringError>(
- "failed to convert iterators region", llvm::inconvertibleErrorCode());
+ "failed to convert iterator region", llvm::inconvertibleErrorCode());
}
// Extract affinity entry from omp.yield and store into list[linearIV].
@@ -2527,7 +2527,7 @@ static mlir::LogicalResult buildAffinityData(
if (!taskOp.getIterated().empty()) {
for (size_t i = 0; i < taskOp.getIterated().size(); ++i) {
- auto itersOp = taskOp.getIterated()[i].getDefiningOp<omp::IteratorsOp>();
+ auto itersOp = taskOp.getIterated()[i].getDefiningOp<omp::IteratorOp>();
IteratorInfo iterInfo(itersOp, moduleTranslation, builder);
llvm::Value *list = allocateAffinityList(iterInfo.getTotalTrips());
if (failed(fillAffinityIteratorLoop(itersOp, builder, moduleTranslation,
@@ -7525,7 +7525,7 @@ LogicalResult OpenMPDialectLLVMIRTranslationInterface::convertOperation(
return convertOmpLoopNest(*op, builder, moduleTranslation);
})
.Case<omp::MapInfoOp, omp::MapBoundsOp, omp::PrivateClauseOp,
- omp::AffinityEntryOp, omp::IteratorsOp>([&](auto op) {
+ omp::AffinityEntryOp, omp::IteratorOp>([&](auto op) {
// No-op, should be handled by relevant owning operations e.g.
// TargetOp, TargetEnterDataOp, TargetExitDataOp, TargetDataOp
// etc. and then discarded
More information about the llvm-branch-commits
mailing list