[Mlir-commits] [mlir] 2ad51ff - [mlir][llvmir][OpenMP] Translate affinity clause in task construct to llvmir (#182223)
llvmlistbot at llvm.org
llvmlistbot at llvm.org
Mon Mar 16 08:16:48 PDT 2026
Author: Chi-Chun, Chen
Date: 2026-03-16T10:16:38-05:00
New Revision: 2ad51ffbfae77ecb67e64ed8e7e833e54285f4c2
URL: https://github.com/llvm/llvm-project/commit/2ad51ffbfae77ecb67e64ed8e7e833e54285f4c2
DIFF: https://github.com/llvm/llvm-project/commit/2ad51ffbfae77ecb67e64ed8e7e833e54285f4c2.diff
LOG: [mlir][llvmir][OpenMP] Translate affinity clause in task construct to llvmir (#182223)
Translate affinity entries to LLVMIR by passing affinity information to
createTask (__kmpc_omp_reg_task_with_affinity is created inside
PostOutlineCB).
3/3 in stack for implementing affinity clause with iterator modifier
1/3 #182218
2/3 #182222
3/3 #182223
Added:
mlir/test/Target/LLVMIR/openmp-iterator.mlir
Modified:
llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h
llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp
mlir/lib/Conversion/OpenMPToLLVM/OpenMPToLLVM.cpp
mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp
mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
mlir/test/Dialect/OpenMP/invalid.mlir
mlir/test/Dialect/OpenMP/ops.mlir
mlir/test/Target/LLVMIR/openmp-llvm.mlir
mlir/test/Target/LLVMIR/openmp-todo.mlir
openmp/runtime/src/kmp_tasking.cpp
Removed:
################################################################################
diff --git a/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h b/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h
index 9885ffc8b2065..c36d721b567e5 100644
--- a/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h
+++ b/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h
@@ -1510,6 +1510,16 @@ class OpenMPIRBuilder {
: DepKind(DepKind), DepValueType(DepValueType), DepVal(DepVal) {}
};
+ /// Return the LLVM struct type matching runtime `kmp_task_affinity_info_t`.
+ /// `{ kmp_intptr_t base_addr; size_t len; flags (bitfield storage as i32) }`
+ LLVM_ABI llvm::StructType *getKmpTaskAffinityInfoTy();
+
+ /// A struct to pack the relevant information for an OpenMP affinity clause.
+ struct AffinityData {
+ Value *Count; // number of kmp_task_affinity_info_t entries
+ Value *Info; // kmp_task_affinity_info_t
+ };
+
/// Generator for `#omp taskloop`
///
/// \param Loc The location where the taskloop construct was encountered.
@@ -1568,17 +1578,21 @@ class OpenMPIRBuilder {
/// cannot be resumed until execution of the structured
/// block that is associated with the generated task is
/// completed.
+ /// \param Dependencies Vector of DependData objects holding information of
+ /// dependencies as specified by the 'depend' clause.
+ /// \param Affinities AffinityData object holding information of accumulated
+ /// affinities as specified by the 'affinity' clause.
/// \param EventHandle If present, signifies the event handle as part of
/// the detach clause
/// \param Mergeable If the given task is `mergeable`
/// \param priority `priority-value' specifies the execution order of the
/// tasks that is generated by the construct
- LLVM_ABI InsertPointOrErrorTy
- createTask(const LocationDescription &Loc, InsertPointTy AllocaIP,
- BodyGenCallbackTy BodyGenCB, bool Tied = true,
- Value *Final = nullptr, Value *IfCondition = nullptr,
- SmallVector<DependData> Dependencies = {}, bool Mergeable = false,
- Value *EventHandle = nullptr, Value *Priority = nullptr);
+ LLVM_ABI InsertPointOrErrorTy createTask(
+ const LocationDescription &Loc, InsertPointTy AllocaIP,
+ BodyGenCallbackTy BodyGenCB, bool Tied = true, Value *Final = nullptr,
+ Value *IfCondition = nullptr, SmallVector<DependData> Dependencies = {},
+ AffinityData Affinities = {}, bool Mergeable = false,
+ Value *EventHandle = nullptr, Value *Priority = nullptr);
/// Generator for the taskgroup construct
///
@@ -3926,6 +3940,39 @@ class OpenMPIRBuilder {
LLVM_ABI GlobalVariable *
getOrCreateInternalVariable(Type *Ty, const StringRef &Name,
std::optional<unsigned> AddressSpace = {});
+
+ using IteratorBodyGenTy = llvm::function_ref<llvm::Error(
+ InsertPointTy BodyIP, llvm::Value *LinearIV)>;
+
+ /// Create a canonical iterator loop at the current insertion point.
+ ///
+ /// This helper splits the current block and builds a canonical loop
+ /// using createLoopSkeleton(). The resulting control flow looks like:
+ ///
+ /// CurBB -> Preheader -> Header -> Body -> Latch -> After -> ContBB
+ ///
+ /// The body of the loop is produced by calling \p BodyGen with the insertion
+ /// point for the loop body and the induction variable.
+ /// Unlike createCanonicalLoop(), this function is intended for \p BodyGen
+ /// that may perform region lowering (e.g., translating MLIR regions) and are
+ /// not guaranteed to preserve the canonical skeleton's body terminator. In
+ /// particular:
+ ///
+ /// - The skeleton’s unconditional branch from the loop body is removed
+ /// before invoking \p BodyGen.
+ /// - \p BodyGen may freely emit instructions and temporarily introduce
+ /// control flow.
+ /// - If the loop body does not end with a terminator after \p BodyGen
+ /// returns, a branch to the latch is inserted to restore canonical form.
+ ///
+ /// \param Loc The location where the iterator modifier was encountered.
+ /// \param TripCount Number of loop iterations.
+ /// \param BodyGen Callback to generate the loop body.
+ /// \param Name Base name used for creating the loop
+ /// \returns The insertion position *after* the iterator loop
+ LLVM_ABI InsertPointOrErrorTy createIteratorLoop(
+ LocationDescription Loc, llvm::Value *TripCount,
+ IteratorBodyGenTy BodyGen, llvm::StringRef Name = "iterator");
};
/// Class to represented the control flow structure of an OpenMP canonical loop.
diff --git a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
index aa001fbf8c4d2..85ecec046cfdb 100644
--- a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
+++ b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
@@ -2431,11 +2431,18 @@ OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::createTaskloop(
return Builder.saveIP();
}
+llvm::StructType *OpenMPIRBuilder::getKmpTaskAffinityInfoTy() {
+ llvm::Type *IntPtrTy = llvm::Type::getIntNTy(
+ M.getContext(), M.getDataLayout().getPointerSizeInBits());
+ return llvm::StructType::get(IntPtrTy, IntPtrTy,
+ llvm::Type::getInt32Ty(M.getContext()));
+}
+
OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::createTask(
const LocationDescription &Loc, InsertPointTy AllocaIP,
BodyGenCallbackTy BodyGenCB, bool Tied, Value *Final, Value *IfCondition,
- SmallVector<DependData> Dependencies, bool Mergeable, Value *EventHandle,
- Value *Priority) {
+ SmallVector<DependData> Dependencies, AffinityData Affinities,
+ bool Mergeable, Value *EventHandle, Value *Priority) {
if (!updateToLocation(Loc))
return InsertPointTy();
@@ -2481,8 +2488,8 @@ OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::createTask(
Builder, AllocaIP, ToBeDeleted, TaskAllocaIP, "global.tid", false));
OI.PostOutlineCB = [this, Ident, Tied, Final, IfCondition, Dependencies,
- Mergeable, Priority, EventHandle, TaskAllocaBB,
- ToBeDeleted](Function &OutlinedFn) mutable {
+ Affinities, Mergeable, Priority, EventHandle,
+ TaskAllocaBB, ToBeDeleted](Function &OutlinedFn) mutable {
// Replace the Stale CI by appropriate RTL function call.
assert(OutlinedFn.hasOneUse() &&
"there must be a single user for the outlined function");
@@ -2555,6 +2562,14 @@ OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::createTask(
/*sizeof_task=*/TaskSize, /*sizeof_shared=*/SharedsSize,
/*task_func=*/&OutlinedFn});
+ if (Affinities.Count && Affinities.Info) {
+ Function *RegAffFn = getOrCreateRuntimeFunctionPtr(
+ OMPRTL___kmpc_omp_reg_task_with_affinity);
+
+ createRuntimeFunctionCall(RegAffFn, {Ident, ThreadID, TaskData,
+ Affinities.Count, Affinities.Info});
+ }
+
// Emit detach clause initialization.
// evt = (typeof(evt))__kmpc_task_allow_completion_event(loc, tid,
// task_descriptor);
@@ -11573,6 +11588,65 @@ void OpenMPIRBuilder::loadOffloadInfoMetadata(vfs::FileSystem &VFS,
loadOffloadInfoMetadata(*M.get());
}
+OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::createIteratorLoop(
+ LocationDescription Loc, llvm::Value *TripCount, IteratorBodyGenTy BodyGen,
+ llvm::StringRef Name) {
+ Builder.restoreIP(Loc.IP);
+
+ BasicBlock *CurBB = Builder.GetInsertBlock();
+ assert(CurBB &&
+ "expected a valid insertion block for creating an iterator loop");
+ Function *F = CurBB->getParent();
+
+ InsertPointTy SplitIP = Builder.saveIP();
+ if (SplitIP.getPoint() == CurBB->end())
+ if (Instruction *Terminator = CurBB->getTerminator())
+ SplitIP = InsertPointTy(CurBB, Terminator->getIterator());
+
+ BasicBlock *ContBB =
+ splitBB(SplitIP, /*CreateBranch=*/false,
+ Builder.getCurrentDebugLocation(), "omp.it.cont");
+
+ CanonicalLoopInfo *CLI =
+ createLoopSkeleton(Builder.getCurrentDebugLocation(), TripCount, F,
+ /*PreInsertBefore=*/ContBB,
+ /*PostInsertBefore=*/ContBB, Name);
+
+ // Enter loop from original block.
+ redirectTo(CurBB, CLI->getPreheader(), Builder.getCurrentDebugLocation());
+
+ // Remove the unconditional branch inserted by createLoopSkeleton in the body
+ if (Instruction *T = CLI->getBody()->getTerminator())
+ T->eraseFromParent();
+
+ InsertPointTy BodyIP = CLI->getBodyIP();
+ if (llvm::Error Err = BodyGen(BodyIP, CLI->getIndVar()))
+ return Err;
+
+ // Body must either fallthrough to the latch or branch directly to it.
+ if (Instruction *BodyTerminator = CLI->getBody()->getTerminator()) {
+ auto *BodyBr = dyn_cast<BranchInst>(BodyTerminator);
+ if (!BodyBr || !BodyBr->isUnconditional() ||
+ BodyBr->getSuccessor(0) != CLI->getLatch()) {
+ return make_error<StringError>(
+ "iterator bodygen must terminate the canonical body with an "
+ "unconditional branch to the loop latch",
+ inconvertibleErrorCode());
+ }
+ } else {
+ // Ensure we end the loop body by jumping to the latch.
+ Builder.SetInsertPoint(CLI->getBody());
+ Builder.CreateBr(CLI->getLatch());
+ }
+
+ // Link After -> ContBB
+ Builder.SetInsertPoint(CLI->getAfter(), CLI->getAfter()->begin());
+ if (!CLI->getAfter()->getTerminator())
+ Builder.CreateBr(ContBB);
+
+ return InsertPointTy{ContBB, ContBB->begin()};
+}
+
//===----------------------------------------------------------------------===//
// OffloadEntriesInfoManager
//===----------------------------------------------------------------------===//
diff --git a/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp b/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp
index 5d7ecbce73750..eab4f88c7fbf7 100644
--- a/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp
+++ b/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp
@@ -7555,6 +7555,186 @@ TEST_F(OpenMPIRBuilderTest, CreateTaskIfCondition) {
EXPECT_EQ(OulinedFnCall->getNextNode(), TaskCompleteCall);
}
+TEST_F(OpenMPIRBuilderTest, CreateTaskAffinity) {
+ using InsertPointTy = OpenMPIRBuilder::InsertPointTy;
+ OpenMPIRBuilder OMPBuilder(*M);
+ OMPBuilder.Config.IsTargetDevice = false;
+ OMPBuilder.initialize();
+ F->setName("func");
+ IRBuilder<> Builder(BB);
+
+ auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) {
+ return Error::success();
+ };
+
+ LLVMContext &Ctx = M->getContext();
+ StructType *AffInfoTy = StructType::get(
+ Type::getInt64Ty(Ctx), Type::getInt64Ty(Ctx), Type::getInt32Ty(Ctx));
+
+ // Create [1 x AffInfoTy] as alloca (element alloca is fine too).
+ Value *CountI32 = Builder.getInt32(1);
+ AllocaInst *AffArr =
+ Builder.CreateAlloca(AffInfoTy, Builder.getInt64(1), "omp.affinity_list");
+
+ // Fill entry 0 minimally so the pointer definitely dominates use.
+ Value *Entry0 = Builder.CreateInBoundsGEP(
+ AffInfoTy, AffArr, Builder.getInt64(0), "omp.affinity.entry");
+ Builder.CreateStore(Builder.getInt64(0),
+ Builder.CreateStructGEP(AffInfoTy, Entry0, 0));
+ Builder.CreateStore(Builder.getInt64(64),
+ Builder.CreateStructGEP(AffInfoTy, Entry0, 1));
+ Builder.CreateStore(Builder.getInt32(0),
+ Builder.CreateStructGEP(AffInfoTy, Entry0, 2));
+
+ OpenMPIRBuilder::AffinityData Affinity{CountI32, AffArr};
+
+ BasicBlock *AllocaBB = Builder.GetInsertBlock();
+ BasicBlock *BodyBB = splitBB(Builder, /*CreateBranch=*/true, "alloca.split");
+ OpenMPIRBuilder::LocationDescription Loc(
+ InsertPointTy(BodyBB, BodyBB->getFirstInsertionPt()), DL);
+
+ ASSERT_EXPECTED_INIT(
+ OpenMPIRBuilder::InsertPointTy, AfterIP,
+ OMPBuilder.createTask(
+ Loc, InsertPointTy(AllocaBB, AllocaBB->getFirstInsertionPt()),
+ BodyGenCB,
+ /*Tied=*/true,
+ /*Final=*/nullptr,
+ /*IfCondition=*/nullptr,
+ /*Dependencies=*/{},
+ /*Affinity=*/Affinity,
+ /*Mergeable=*/false,
+ /*EventHandle=*/nullptr,
+ /*Priority=*/nullptr));
+
+ Builder.restoreIP(AfterIP);
+ OMPBuilder.finalize();
+ Builder.CreateRetVoid();
+
+ EXPECT_FALSE(verifyModule(*M, &errs()));
+
+ Function *TaskAllocFn =
+ OMPBuilder.getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_omp_task_alloc);
+ Function *RegAffFn = OMPBuilder.getOrCreateRuntimeFunctionPtr(
+ OMPRTL___kmpc_omp_reg_task_with_affinity);
+
+ CallInst *TaskAllocCI = nullptr;
+ CallInst *RegAffCI = nullptr;
+
+ for (auto &I : instructions(F)) {
+ if (auto *CI = dyn_cast<CallInst>(&I)) {
+ if (CI->getCalledFunction() == TaskAllocFn)
+ TaskAllocCI = CI;
+ if (CI->getCalledFunction() == RegAffFn)
+ RegAffCI = CI;
+ }
+ }
+
+ ASSERT_NE(TaskAllocCI, nullptr) << "expected __kmpc_omp_task_alloc call";
+ ASSERT_NE(RegAffCI, nullptr)
+ << "expected __kmpc_omp_reg_task_with_affinity call";
+
+ // Check reg_task_with_affinity signature:
+ // i32 __kmpc_omp_reg_task_with_affinity(ident_t*, i32 gtid,
+ // kmp_task_t*, i32 naffins,
+ // kmp_task_affinity_info_t*)
+ ASSERT_EQ(RegAffCI->arg_size(), 5u);
+ // naffins
+ EXPECT_TRUE(RegAffCI->getArgOperand(3)->getType()->isIntegerTy(32));
+ // kmp_task_affinity_info_t*
+ EXPECT_TRUE(RegAffCI->getArgOperand(4)->getType()->isPointerTy());
+}
+
+TEST_F(OpenMPIRBuilderTest, CreateIteratorLoop) {
+ using InsertPointTy = OpenMPIRBuilder::InsertPointTy;
+ {
+ OpenMPIRBuilder OMPBuilder(*M);
+ OMPBuilder.initialize();
+ F->setName("func.unterminated");
+ IRBuilder<> Builder(BB);
+
+ auto BodyGenCB = [&](InsertPointTy BodyIP, Value *LinearIV) -> Error {
+ Builder.restoreIP(BodyIP);
+ Builder.CreateAdd(LinearIV, Builder.getInt64(1));
+ return Error::success();
+ };
+
+ OpenMPIRBuilder::LocationDescription Loc(Builder.saveIP(), DL);
+ ASSERT_EXPECTED_INIT(InsertPointTy, AfterIP,
+ OMPBuilder.createIteratorLoop(Loc, Builder.getInt64(4),
+ BodyGenCB, "iterator"));
+
+ Builder.restoreIP(AfterIP);
+ Builder.CreateRetVoid();
+
+ EXPECT_EQ(AfterIP.getBlock()->getName(), "omp.it.cont");
+ EXPECT_FALSE(verifyFunction(*F, &errs()));
+ }
+
+ {
+ Function *F2 =
+ Function::Create(F->getFunctionType(), Function::ExternalLinkage,
+ "func.terminated", M.get());
+ BasicBlock *BB2 = BasicBlock::Create(Ctx, "", F2);
+ OpenMPIRBuilder OMPBuilder(*M);
+ OMPBuilder.initialize();
+ IRBuilder<> Builder(BB2);
+
+ BasicBlock *OrigSucc =
+ BasicBlock::Create(Builder.getContext(), "orig.succ", F2);
+ Builder.CreateBr(OrigSucc);
+
+ auto BodyGenCB = [&](InsertPointTy BodyIP, Value *LinearIV) -> Error {
+ Builder.restoreIP(BodyIP);
+ Builder.CreateAdd(LinearIV, Builder.getInt64(1));
+ return Error::success();
+ };
+
+ OpenMPIRBuilder::LocationDescription Loc(InsertPointTy(BB2, BB2->end()),
+ DL);
+ ASSERT_EXPECTED_INIT(InsertPointTy, AfterIP,
+ OMPBuilder.createIteratorLoop(Loc, Builder.getInt64(4),
+ BodyGenCB, "iterator"));
+
+ EXPECT_EQ(AfterIP.getBlock()->getName(), "omp.it.cont");
+ auto *ContBr = dyn_cast<BranchInst>(AfterIP.getBlock()->getTerminator());
+ ASSERT_NE(ContBr, nullptr);
+ ASSERT_FALSE(ContBr->isConditional());
+ EXPECT_EQ(ContBr->getSuccessor(0), OrigSucc);
+
+ Builder.SetInsertPoint(OrigSucc);
+ Builder.CreateRetVoid();
+
+ EXPECT_FALSE(verifyFunction(*F2, &errs()));
+ }
+
+ EXPECT_FALSE(verifyModule(*M, &errs()));
+}
+
+TEST_F(OpenMPIRBuilderTest, CreateIteratorLoopInvalidLoopBody) {
+ using InsertPointTy = OpenMPIRBuilder::InsertPointTy;
+ OpenMPIRBuilder OMPBuilder(*M);
+ OMPBuilder.initialize();
+ F->setName("func");
+ IRBuilder<> Builder(BB);
+
+ auto BodyGenCB = [&](InsertPointTy BodyIP, Value *LinearIV) -> Error {
+ Builder.restoreIP(BodyIP);
+ Builder.CreateAdd(LinearIV, Builder.getInt64(1));
+ BasicBlock *BadDest =
+ BasicBlock::Create(Builder.getContext(), "iterator.bad.dest", F);
+ Builder.CreateBr(BadDest);
+ Builder.SetInsertPoint(BadDest);
+ Builder.CreateUnreachable();
+ return Error::success();
+ };
+
+ OpenMPIRBuilder::LocationDescription Loc(Builder.saveIP(), DL);
+ OpenMPIRBuilder::InsertPointOrErrorTy AfterIP = OMPBuilder.createIteratorLoop(
+ Loc, Builder.getInt64(4), BodyGenCB, "iterator");
+ ASSERT_TRUE(errorToBool(AfterIP.takeError()));
+}
+
TEST_F(OpenMPIRBuilderTest, CreateTaskgroup) {
using InsertPointTy = OpenMPIRBuilder::InsertPointTy;
OpenMPIRBuilder OMPBuilder(*M);
diff --git a/mlir/lib/Conversion/OpenMPToLLVM/OpenMPToLLVM.cpp b/mlir/lib/Conversion/OpenMPToLLVM/OpenMPToLLVM.cpp
index 7fdc23adc8573..d90912f9f686f 100644
--- a/mlir/lib/Conversion/OpenMPToLLVM/OpenMPToLLVM.cpp
+++ b/mlir/lib/Conversion/OpenMPToLLVM/OpenMPToLLVM.cpp
@@ -154,6 +154,9 @@ void mlir::populateOpenMPToLLVMConversionPatterns(LLVMTypeConverter &converter,
// discarded on lowering to LLVM-IR from the OpenMP dialect.
converter.addConversion(
[&](omp::MapBoundsType type) -> Type { return type; });
+ converter.addConversion(
+ [&](omp::AffinityEntryType type) -> Type { return type; });
+ converter.addConversion([&](omp::IteratedType type) -> Type { return type; });
// Add conversions for all OpenMP operations.
addOpenMPOpConversions<
diff --git a/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp b/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp
index e0559e850faf6..7cab929d583ca 100644
--- a/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp
+++ b/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp
@@ -18,6 +18,7 @@
#include "mlir/IR/Attributes.h"
#include "mlir/IR/BuiltinAttributes.h"
#include "mlir/IR/DialectImplementation.h"
+#include "mlir/IR/Matchers.h"
#include "mlir/IR/OpImplementation.h"
#include "mlir/IR/OperationSupport.h"
#include "mlir/IR/SymbolTable.h"
@@ -4796,6 +4797,30 @@ LogicalResult IteratorOp::verify() {
if (!iteratedTy)
return emitOpError() << "result must be omp.iterated<entry_ty>";
+ for (auto [lb, ub, step] : llvm::zip_equal(
+ getLoopLowerBounds(), getLoopUpperBounds(), getLoopSteps())) {
+ if (matchPattern(step, m_Zero()))
+ return emitOpError() << "loop step must not be zero";
+
+ IntegerAttr lbAttr;
+ IntegerAttr ubAttr;
+ IntegerAttr stepAttr;
+ if (!matchPattern(lb, m_Constant(&lbAttr)) ||
+ !matchPattern(ub, m_Constant(&ubAttr)) ||
+ !matchPattern(step, m_Constant(&stepAttr)))
+ continue;
+
+ const APInt &lbVal = lbAttr.getValue();
+ const APInt &ubVal = ubAttr.getValue();
+ const APInt &stepVal = stepAttr.getValue();
+ if (stepVal.isStrictlyPositive() && lbVal.sgt(ubVal))
+ return emitOpError() << "positive loop step requires lower bound to be "
+ "less than or equal to upper bound";
+ if (stepVal.isNegative() && lbVal.slt(ubVal))
+ return emitOpError() << "negative loop step requires lower bound to be "
+ "greater than or equal to upper bound";
+ }
+
Block &b = getRegion().front();
auto yield = llvm::dyn_cast<omp::YieldOp>(b.getTerminator());
diff --git a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
index 9d7c0003c2336..37b1a37c2e1a5 100644
--- a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
+++ b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
@@ -321,10 +321,6 @@ static LogicalResult checkImplementationStatus(Operation &op) {
<< " operation";
};
- auto checkAffinity = [&todo](auto op, LogicalResult &result) {
- if (!op.getAffinityVars().empty())
- result = todo("affinity");
- };
auto checkAllocate = [&todo](auto op, LogicalResult &result) {
if (!op.getAllocateVars().empty() || !op.getAllocatorVars().empty())
result = todo("allocate");
@@ -408,7 +404,6 @@ static LogicalResult checkImplementationStatus(Operation &op) {
checkThreadLimit(op, result);
})
.Case([&](omp::TaskOp op) {
- checkAffinity(op, result);
checkAllocate(op, result);
checkInReduction(op, result);
})
@@ -2233,6 +2228,81 @@ class TaskContextStructManager {
/// The type of the structure
llvm::Type *structTy = nullptr;
};
+
+/// IteratorInfo extracts and prepares loop bounds information from an
+/// mlir::omp::IteratorOp for lowering to LLVM IR.
+///
+/// It computes the per-dimension trip counts and the total linearized trip
+/// count, casted to i64. These are used to build a canonical loop and to
+/// reconstruct the physical induction variables inside the loop body.
+class IteratorInfo {
+private:
+ llvm::SmallVector<llvm::Value *> lowerBounds;
+ llvm::SmallVector<llvm::Value *> upperBounds;
+ llvm::SmallVector<llvm::Value *> steps;
+ llvm::SmallVector<llvm::Value *> trips;
+ unsigned dims;
+ llvm::Value *totalTrips;
+
+ llvm::Value *lookUpAsI64(mlir::Value val, const LLVM::ModuleTranslation &mt,
+ llvm::IRBuilderBase &builder) {
+ llvm::Value *v = mt.lookupValue(val);
+ if (!v)
+ return nullptr;
+ if (v->getType()->isIntegerTy(64))
+ return v;
+ if (v->getType()->isIntegerTy())
+ return builder.CreateSExtOrTrunc(v, builder.getInt64Ty());
+ return nullptr;
+ }
+
+public:
+ IteratorInfo(mlir::omp::IteratorOp itersOp,
+ mlir::LLVM::ModuleTranslation &moduleTranslation,
+ llvm::IRBuilderBase &builder) {
+ dims = itersOp.getLoopLowerBounds().size();
+ lowerBounds.resize(dims);
+ upperBounds.resize(dims);
+ steps.resize(dims);
+ trips.resize(dims);
+
+ for (unsigned d = 0; d < dims; ++d) {
+ llvm::Value *lb = lookUpAsI64(itersOp.getLoopLowerBounds()[d],
+ moduleTranslation, builder);
+ llvm::Value *ub = lookUpAsI64(itersOp.getLoopUpperBounds()[d],
+ moduleTranslation, builder);
+ llvm::Value *st =
+ lookUpAsI64(itersOp.getLoopSteps()[d], moduleTranslation, builder);
+ assert(lb && ub && st &&
+ "Expect lowerBounds, upperBounds, and steps in IteratorOp");
+ assert((!llvm::isa<llvm::ConstantInt>(st) ||
+ !llvm::cast<llvm::ConstantInt>(st)->isZero()) &&
+ "Expect non-zero step in IteratorOp");
+
+ lowerBounds[d] = lb;
+ upperBounds[d] = ub;
+ steps[d] = st;
+
+ // trips = ((ub - lb) / step) + 1 (inclusive ub, assume positive step)
+ llvm::Value *
diff = builder.CreateSub(ub, lb);
+ llvm::Value *div = builder.CreateSDiv(
diff , st);
+ trips[d] = builder.CreateAdd(
+ div, llvm::ConstantInt::get(builder.getInt64Ty(), 1));
+ }
+
+ totalTrips = llvm::ConstantInt::get(builder.getInt64Ty(), 1);
+ for (unsigned d = 0; d < dims; ++d)
+ totalTrips = builder.CreateMul(totalTrips, trips[d]);
+ }
+
+ unsigned getDims() const { return dims; }
+ llvm::ArrayRef<llvm::Value *> getLowerBounds() const { return lowerBounds; }
+ llvm::ArrayRef<llvm::Value *> getUpperBounds() const { return upperBounds; }
+ llvm::ArrayRef<llvm::Value *> getSteps() const { return steps; }
+ llvm::ArrayRef<llvm::Value *> getTrips() const { return trips; }
+ llvm::Value *getTotalTrips() const { return totalTrips; }
+};
+
} // namespace
void TaskContextStructManager::generateTaskContextStruct() {
@@ -2307,6 +2377,235 @@ void TaskContextStructManager::freeStructPtr() {
builder.CreateFree(structPtr);
}
+static void storeAffinityEntry(llvm::IRBuilderBase &builder,
+ llvm::OpenMPIRBuilder &ompBuilder,
+ llvm::Value *affinityList, llvm::Value *index,
+ llvm::Value *addr, llvm::Value *len) {
+ llvm::StructType *kmpTaskAffinityInfoTy =
+ ompBuilder.getKmpTaskAffinityInfoTy();
+ llvm::Value *entry = builder.CreateInBoundsGEP(
+ kmpTaskAffinityInfoTy, affinityList, index, "omp.affinity.entry");
+
+ addr = builder.CreatePtrToInt(addr, kmpTaskAffinityInfoTy->getElementType(0));
+ len = builder.CreateIntCast(len, kmpTaskAffinityInfoTy->getElementType(1),
+ /*isSigned=*/false);
+ llvm::Value *flags = builder.getInt32(0);
+
+ builder.CreateStore(addr,
+ builder.CreateStructGEP(kmpTaskAffinityInfoTy, entry, 0));
+ builder.CreateStore(len,
+ builder.CreateStructGEP(kmpTaskAffinityInfoTy, entry, 1));
+ builder.CreateStore(flags,
+ builder.CreateStructGEP(kmpTaskAffinityInfoTy, entry, 2));
+}
+
+static void fillAffinityLocators(Operation::operand_range affinityVars,
+ llvm::IRBuilderBase &builder,
+ LLVM::ModuleTranslation &moduleTranslation,
+ llvm::Value *affinityList) {
+ for (auto [i, affinityVar] : llvm::enumerate(affinityVars)) {
+ auto entryOp = affinityVar.getDefiningOp<mlir::omp::AffinityEntryOp>();
+ assert(entryOp && "affinity item must be omp.affinity_entry");
+
+ llvm::Value *addr = moduleTranslation.lookupValue(entryOp.getAddr());
+ llvm::Value *len = moduleTranslation.lookupValue(entryOp.getLen());
+ assert(addr && len && "expect affinity addr and len to be non-null");
+ storeAffinityEntry(builder, *moduleTranslation.getOpenMPBuilder(),
+ affinityList, builder.getInt64(i), addr, len);
+ }
+}
+
+static mlir::LogicalResult
+convertIteratorRegion(llvm::Value *linearIV, IteratorInfo &iterInfo,
+ mlir::Block &iteratorRegionBlock,
+ llvm::IRBuilderBase &builder,
+ LLVM::ModuleTranslation &moduleTranslation) {
+ llvm::Value *tmp = linearIV;
+ for (int d = (int)iterInfo.getDims() - 1; d >= 0; --d) {
+ llvm::Value *trip = iterInfo.getTrips()[d];
+ // idx_d = tmp % trip_d
+ llvm::Value *idx = builder.CreateURem(tmp, trip);
+ // tmp = tmp / trip_d
+ tmp = builder.CreateUDiv(tmp, trip);
+
+ // physIV_d = lb_d + idx_d * step_d
+ llvm::Value *physIV = builder.CreateAdd(
+ iterInfo.getLowerBounds()[d],
+ builder.CreateMul(idx, iterInfo.getSteps()[d]), "omp.it.phys_iv");
+
+ moduleTranslation.mapValue(iteratorRegionBlock.getArgument(d), physIV);
+ }
+
+ // Translate the iterator region into the loop body.
+ moduleTranslation.mapBlock(&iteratorRegionBlock, builder.GetInsertBlock());
+ if (mlir::failed(moduleTranslation.convertBlock(iteratorRegionBlock,
+ /*ignoreArguments=*/true,
+ builder))) {
+ return mlir::failure();
+ }
+ return mlir::success();
+}
+
+static mlir::LogicalResult
+fillAffinityIteratorLoop(mlir::omp::IteratorOp itersOp,
+ llvm::IRBuilderBase &builder,
+ mlir::LLVM::ModuleTranslation &moduleTranslation,
+ llvm::Value *affinityList, IteratorInfo &iterInfo) {
+ mlir::Region &itersRegion = itersOp.getRegion();
+ mlir::Block &iteratorRegionBlock = itersRegion.front();
+
+ llvm::OpenMPIRBuilder::LocationDescription loc(builder);
+
+ auto bodyGen = [&](llvm::OpenMPIRBuilder::InsertPointTy bodyIP,
+ llvm::Value *linearIV) -> llvm::Error {
+ llvm::IRBuilderBase::InsertPointGuard guard(builder);
+ builder.restoreIP(bodyIP);
+
+ if (failed(convertIteratorRegion(linearIV, iterInfo, iteratorRegionBlock,
+ builder, moduleTranslation))) {
+ return llvm::make_error<llvm::StringError>(
+ "failed to convert iterator region", llvm::inconvertibleErrorCode());
+ }
+
+ // Extract affinity entry from omp.yield and store into list[linearIV].
+ auto yield =
+ mlir::dyn_cast<mlir::omp::YieldOp>(iteratorRegionBlock.getTerminator());
+ assert(yield && yield.getResults().size() == 1 &&
+ "expect omp.yield in iterator region to have one result");
+ auto entryOp =
+ yield.getResults()[0].getDefiningOp<mlir::omp::AffinityEntryOp>();
+ assert(entryOp && "expect yield generate an affinity entry");
+
+ llvm::Value *addr = moduleTranslation.lookupValue(entryOp.getAddr());
+ llvm::Value *len = moduleTranslation.lookupValue(entryOp.getLen());
+ storeAffinityEntry(builder, *moduleTranslation.getOpenMPBuilder(),
+ affinityList, linearIV, addr, len);
+
+ // Iterator-region block/value mappings are temporary for this conversion,
+ // clear them to avoid stale entries in ModuleTranslation.
+ moduleTranslation.forgetMapping(itersRegion);
+
+ return llvm::Error::success();
+ };
+
+ llvm::OpenMPIRBuilder::InsertPointOrErrorTy afterIP =
+ moduleTranslation.getOpenMPBuilder()->createIteratorLoop(
+ loc, iterInfo.getTotalTrips(), bodyGen,
+ /*Name=*/"iterator");
+ if (failed(handleError(afterIP, *itersOp)))
+ return failure();
+
+ builder.restoreIP(*afterIP);
+
+ return mlir::success();
+}
+
+static mlir::LogicalResult
+buildAffinityData(mlir::omp::TaskOp &taskOp, llvm::IRBuilderBase &builder,
+ mlir::LLVM::ModuleTranslation &moduleTranslation,
+ llvm::OpenMPIRBuilder::AffinityData &ad) {
+
+ if (taskOp.getAffinityVars().empty() && taskOp.getIterated().empty()) {
+ ad.Count = nullptr;
+ ad.Info = nullptr;
+ return mlir::success();
+ }
+
+ llvm::SmallVector<llvm::OpenMPIRBuilder::AffinityData> ads;
+ llvm::StructType *kmpTaskAffinityInfoTy =
+ moduleTranslation.getOpenMPBuilder()->getKmpTaskAffinityInfoTy();
+
+ auto allocateAffinityList = [&](llvm::Value *count) -> llvm::Value * {
+ llvm::IRBuilderBase::InsertPointGuard guard(builder);
+ if (llvm::isa<llvm::Constant>(count) || llvm::isa<llvm::Argument>(count))
+ builder.restoreIP(findAllocaInsertPoint(builder, moduleTranslation));
+ return builder.CreateAlloca(kmpTaskAffinityInfoTy, count,
+ "omp.affinity_list");
+ };
+
+ auto createAffinity =
+ [&](llvm::Value *count,
+ llvm::Value *info) -> llvm::OpenMPIRBuilder::AffinityData {
+ llvm::OpenMPIRBuilder::AffinityData ad{};
+ ad.Count = builder.CreateTrunc(count, builder.getInt32Ty());
+ ad.Info =
+ builder.CreatePointerBitCastOrAddrSpaceCast(info, builder.getPtrTy(0));
+ return ad;
+ };
+
+ if (!taskOp.getAffinityVars().empty()) {
+ llvm::Value *count = llvm::ConstantInt::get(
+ builder.getInt64Ty(), taskOp.getAffinityVars().size());
+ llvm::Value *list = allocateAffinityList(count);
+ fillAffinityLocators(taskOp.getAffinityVars(), builder, moduleTranslation,
+ list);
+ ads.emplace_back(createAffinity(count, list));
+ }
+
+ if (!taskOp.getIterated().empty()) {
+ for (auto [i, iter] : llvm::enumerate(taskOp.getIterated())) {
+ auto itersOp = iter.getDefiningOp<omp::IteratorOp>();
+ assert(itersOp && "iterated value must be defined by omp.iterator");
+ IteratorInfo iterInfo(itersOp, moduleTranslation, builder);
+ llvm::Value *affList = allocateAffinityList(iterInfo.getTotalTrips());
+ if (failed(fillAffinityIteratorLoop(itersOp, builder, moduleTranslation,
+ affList, iterInfo)))
+ return llvm::failure();
+ ads.emplace_back(createAffinity(iterInfo.getTotalTrips(), affList));
+ }
+ }
+
+ llvm::Value *totalAffinityCount = builder.getInt32(0);
+ for (const auto &affinity : ads)
+ totalAffinityCount = builder.CreateAdd(
+ totalAffinityCount,
+ builder.CreateIntCast(affinity.Count, builder.getInt32Ty(),
+ /*isSigned=*/false));
+
+ llvm::Value *affinityInfo = ads.front().Info;
+ if (ads.size() > 1) {
+ llvm::StructType *kmpTaskAffinityInfoTy =
+ moduleTranslation.getOpenMPBuilder()->getKmpTaskAffinityInfoTy();
+ llvm::Value *affinityInfoElemSize = builder.getInt64(
+ moduleTranslation.getLLVMModule()->getDataLayout().getTypeAllocSize(
+ kmpTaskAffinityInfoTy));
+
+ llvm::Value *packedAffinityInfo = allocateAffinityList(totalAffinityCount);
+ llvm::Value *packedAffinityInfoOffset = builder.getInt32(0);
+ for (const auto &affinity : ads) {
+ llvm::Value *affinityCount = builder.CreateIntCast(
+ affinity.Count, builder.getInt32Ty(), /*isSigned=*/false);
+ llvm::Value *affinityCountInt64 = builder.CreateIntCast(
+ affinityCount, builder.getInt64Ty(), /*isSigned=*/false);
+ llvm::Value *affinityInfoSize =
+ builder.CreateMul(affinityCountInt64, affinityInfoElemSize);
+
+ llvm::Value *packedAffinityInfoIndex = builder.CreateIntCast(
+ packedAffinityInfoOffset, kmpTaskAffinityInfoTy->getElementType(0),
+ /*isSigned=*/false);
+ packedAffinityInfoIndex = builder.CreateInBoundsGEP(
+ kmpTaskAffinityInfoTy, packedAffinityInfo, packedAffinityInfoIndex);
+
+ builder.CreateMemCpy(
+ packedAffinityInfoIndex, llvm::Align(1),
+ builder.CreatePointerBitCastOrAddrSpaceCast(
+ affinity.Info, builder.getPtrTy(packedAffinityInfoIndex->getType()
+ ->getPointerAddressSpace())),
+ llvm::Align(1), affinityInfoSize);
+
+ packedAffinityInfoOffset =
+ builder.CreateAdd(packedAffinityInfoOffset, affinityCount);
+ }
+
+ affinityInfo = packedAffinityInfo;
+ }
+
+ ad.Count = totalAffinityCount;
+ ad.Info = affinityInfo;
+
+ return mlir::success();
+}
+
/// Converts an OpenMP task construct into LLVM IR using OpenMPIRBuilder.
static LogicalResult
convertOmpTaskOp(omp::TaskOp taskOp, llvm::IRBuilderBase &builder,
@@ -2421,6 +2720,10 @@ convertOmpTaskOp(omp::TaskOp taskOp, llvm::IRBuilderBase &builder,
taskOp.getPrivateNeedsBarrier())))
return llvm::failure();
+ llvm::OpenMPIRBuilder::AffinityData ad;
+ if (failed(buildAffinityData(taskOp, builder, moduleTranslation, ad)))
+ return llvm::failure();
+
// Set up for call to createTask()
builder.SetInsertPoint(taskStartBlock);
@@ -2524,7 +2827,7 @@ convertOmpTaskOp(omp::TaskOp taskOp, llvm::IRBuilderBase &builder,
moduleTranslation.getOpenMPBuilder()->createTask(
ompLoc, allocaIP, bodyCB, !taskOp.getUntied(),
moduleTranslation.lookupValue(taskOp.getFinal()),
- moduleTranslation.lookupValue(taskOp.getIfExpr()), dds,
+ moduleTranslation.lookupValue(taskOp.getIfExpr()), dds, ad,
taskOp.getMergeable(),
moduleTranslation.lookupValue(taskOp.getEventHandle()),
moduleTranslation.lookupValue(taskOp.getPriority()));
@@ -7321,13 +7624,13 @@ LogicalResult OpenMPDialectLLVMIRTranslationInterface::convertOperation(
.Case([&](omp::LoopNestOp) {
return convertOmpLoopNest(*op, builder, moduleTranslation);
})
- .Case<omp::MapInfoOp, omp::MapBoundsOp, omp::PrivateClauseOp>(
- [&](auto op) {
- // No-op, should be handled by relevant owning operations e.g.
- // TargetOp, TargetEnterDataOp, TargetExitDataOp, TargetDataOp
- // etc. and then discarded
- return success();
- })
+ .Case<omp::MapInfoOp, omp::MapBoundsOp, omp::PrivateClauseOp,
+ omp::AffinityEntryOp, omp::IteratorOp>([&](auto op) {
+ // No-op, should be handled by relevant owning operations e.g.
+ // TargetOp, TargetEnterDataOp, TargetExitDataOp, TargetDataOp
+ // etc. and then discarded
+ return success();
+ })
.Case([&](omp::NewCliOp op) {
// Meta-operation: Doesn't do anything by itself, but used to
// identify a loop.
diff --git a/mlir/test/Dialect/OpenMP/invalid.mlir b/mlir/test/Dialect/OpenMP/invalid.mlir
index bc508d66fbd5f..cbe18b9b882da 100644
--- a/mlir/test/Dialect/OpenMP/invalid.mlir
+++ b/mlir/test/Dialect/OpenMP/invalid.mlir
@@ -3180,6 +3180,48 @@ func.func @iterator_bad_result_type(%lb : index, %ub : index, %st : index) {
// -----
+func.func @iterator_zero_step(%s2 : !llvm.struct<(ptr, i64)>) {
+ %lb = arith.constant 1 : index
+ %ub = arith.constant 4 : index
+ %st = arith.constant 0 : index
+
+ // expected-error at +1 {{loop step must not be zero}}
+ %0 = omp.iterator(%iv: index) = (%lb to %ub step %st) {
+ omp.yield(%s2 : !llvm.struct<(ptr, i64)>)
+ } -> !omp.iterated<!llvm.struct<(ptr, i64)>>
+ return
+}
+
+// -----
+
+func.func @iterator_positive_step_wrong_direction(%s2 : !llvm.struct<(ptr, i64)>) {
+ %lb = arith.constant 1000 : index
+ %ub = arith.constant -1 : index
+ %st = arith.constant 10 : index
+
+ // expected-error at +1 {{positive loop step requires lower bound to be less than or equal to upper bound}}
+ %0 = omp.iterator(%iv: index) = (%lb to %ub step %st) {
+ omp.yield(%s2 : !llvm.struct<(ptr, i64)>)
+ } -> !omp.iterated<!llvm.struct<(ptr, i64)>>
+ return
+}
+
+// -----
+
+func.func @iterator_negative_step_wrong_direction(%s2 : !llvm.struct<(ptr, i64)>) {
+ %lb = arith.constant -1000 : index
+ %ub = arith.constant 4 : index
+ %st = arith.constant -999 : index
+
+ // expected-error at +1 {{negative loop step requires lower bound to be greater than or equal to upper bound}}
+ %0 = omp.iterator(%iv: index) = (%lb to %ub step %st) {
+ omp.yield(%s2 : !llvm.struct<(ptr, i64)>)
+ } -> !omp.iterated<!llvm.struct<(ptr, i64)>>
+ return
+}
+
+// -----
+
func.func @iterator_missing_yield(%lb : index, %ub : index, %st : index) {
// expected-error at +1 {{region must be terminated by omp.yield}}
%0 = omp.iterator(%i: index) = (%lb to %ub step %st) {
diff --git a/mlir/test/Dialect/OpenMP/ops.mlir b/mlir/test/Dialect/OpenMP/ops.mlir
index b908874c2010b..ba329cc67bb14 100644
--- a/mlir/test/Dialect/OpenMP/ops.mlir
+++ b/mlir/test/Dialect/OpenMP/ops.mlir
@@ -3601,6 +3601,24 @@ func.func @omp_iterator_2d(%s2 : !llvm.struct<(ptr, i64)>) -> () {
return
}
+// CHECK-LABEL: func.func @omp_iterator_negative_step
+func.func @omp_iterator_negative_step(%s2 : !llvm.struct<(ptr, i64)>) -> () {
+ // CHECK: %[[LB:.*]] = arith.constant 4 : index
+ // CHECK: %[[UB:.*]] = arith.constant 1 : index
+ // CHECK: %[[ST:.*]] = arith.constant -1 : index
+ // CHECK: %[[IT:.*]] = omp.iterator(%[[IV:.*]]: index) = (%[[LB]] to %[[UB]] step %[[ST]]) {
+ // CHECK: omp.yield(%{{.*}} : !llvm.struct<(ptr, i64)>)
+ // CHECK: } -> !omp.iterated<!llvm.struct<(ptr, i64)>>
+ %lb = arith.constant 4 : index
+ %ub = arith.constant 1 : index
+ %st = arith.constant -1 : index
+
+ %0 = omp.iterator(%iv: index) = (%lb to %ub step %st) {
+ omp.yield(%s2 : !llvm.struct<(ptr, i64)>)
+ } -> !omp.iterated<!llvm.struct<(ptr, i64)>>
+ return
+}
+
// CHECK-LABEL: func.func @omp_task_affinity_iterator_1d
func.func @omp_task_affinity_iterator_1d(%lb : index, %ub : index, %step : index,
%addr : !llvm.ptr, %len : i64) -> () {
diff --git a/mlir/test/Target/LLVMIR/openmp-iterator.mlir b/mlir/test/Target/LLVMIR/openmp-iterator.mlir
new file mode 100644
index 0000000000000..faadfbdc7202f
--- /dev/null
+++ b/mlir/test/Target/LLVMIR/openmp-iterator.mlir
@@ -0,0 +1,295 @@
+// RUN: mlir-translate --mlir-to-llvmir %s | FileCheck %s
+
+llvm.func @task_affinity_iterator_1d(%arr: !llvm.ptr {llvm.nocapture}) {
+ %c1 = llvm.mlir.constant(1 : i64) : i64
+ %c4 = llvm.mlir.constant(4 : i64) : i64
+ %c6 = llvm.mlir.constant(6 : i64) : i64
+ %len = llvm.mlir.constant(4 : i64) : i64
+
+ omp.parallel {
+ omp.single {
+ %it = omp.iterator(%i: i64, %j: i64) =
+ (%c1 to %c4 step %c1, %c1 to %c6 step %c1) {
+ %entry = omp.affinity_entry %arr, %len
+ : (!llvm.ptr, i64) -> !omp.affinity_entry_ty<!llvm.ptr, i64>
+ omp.yield(%entry : !omp.affinity_entry_ty<!llvm.ptr, i64>)
+ } -> !omp.iterated<!omp.affinity_entry_ty<!llvm.ptr, i64>>
+
+ omp.task affinity(%it : !omp.iterated<!omp.affinity_entry_ty<!llvm.ptr, i64>>) {
+ omp.terminator
+ }
+ omp.terminator
+ }
+ omp.terminator
+ }
+ llvm.return
+}
+
+// CHECK-LABEL: define internal void @task_affinity_iterator_1d
+
+// Preheader -> Header
+// CHECK: omp_iterator.preheader:
+// CHECK: br label %omp_iterator.header
+//
+// Header has the IV phi and branches to cond
+// CHECK: omp_iterator.header:
+// CHECK: [[IV:%.*]] = phi i64 [ 0, %omp_iterator.preheader ], [ [[NEXT:%.*]], %omp_iterator.inc ]
+// CHECK: br label %omp_iterator.cond
+//
+// Cond: IV < 24 and branches to body or exit
+// CHECK: omp_iterator.cond:
+// CHECK: [[CMP:%.*]] = icmp ult i64 [[IV]], 24
+// CHECK: br i1 [[CMP]], label %omp_iterator.body, label %omp_iterator.exit
+//
+// Exit -> After -> continuation
+// CHECK: omp_iterator.exit:
+// CHECK: br label %omp_iterator.after
+// CHECK: omp_iterator.after:
+// CHECK: br label %omp.it.cont
+//
+// Body: store into affinity_list[IV] then branch to inc
+// CHECK: omp_iterator.body:
+// CHECK: [[ENTRY:%.*]] = getelementptr inbounds { i64, i64, i32 }, ptr %{{.*affinity_list.*}}, i64 [[IV]]
+// CHECK: [[ADDRI64:%.*]] = ptrtoint ptr %loadgep_ to i64
+// CHECK: [[ADDRGEP:%.*]] = getelementptr inbounds nuw { i64, i64, i32 }, ptr [[ENTRY]], i32 0, i32 0
+// CHECK: store i64 [[ADDRI64]], ptr [[ADDRGEP]]
+// CHECK: [[LENGEP:%.*]] = getelementptr inbounds nuw { i64, i64, i32 }, ptr [[ENTRY]], i32 0, i32 1
+// CHECK: store i64 4, ptr [[LENGEP]]
+// CHECK: [[FLAGGEP:%.*]] = getelementptr inbounds nuw { i64, i64, i32 }, ptr [[ENTRY]], i32 0, i32 2
+// CHECK: store i32 0, ptr [[FLAGGEP]]
+// CHECK: br label %omp_iterator.inc
+//
+// CHECK: omp_iterator.inc:
+// CHECK: [[NEXT]] = add nuw i64 [[IV]], 1
+// CHECK: br label %omp_iterator.header
+
+llvm.func @task_affinity_iterator_3d(%arr: !llvm.ptr {llvm.nocapture}) {
+ %c1 = llvm.mlir.constant(1 : i64) : i64
+ %c2 = llvm.mlir.constant(2 : i64) : i64
+ %c4 = llvm.mlir.constant(4 : i64) : i64
+ %c6 = llvm.mlir.constant(6 : i64) : i64
+ %len = llvm.mlir.constant(4 : i64) : i64
+
+ omp.parallel {
+ omp.single {
+ // 3-D iterator: i=1..4, j=1..6, k=1..2 => total trips = 48
+ %it = omp.iterator(%i: i64, %j: i64, %k: i64) =
+ (%c1 to %c4 step %c1, %c1 to %c6 step %c1, %c1 to %c2 step %c1) {
+ %entry = omp.affinity_entry %arr, %len
+ : (!llvm.ptr, i64) -> !omp.affinity_entry_ty<!llvm.ptr, i64>
+ omp.yield(%entry : !omp.affinity_entry_ty<!llvm.ptr, i64>)
+ } -> !omp.iterated<!omp.affinity_entry_ty<!llvm.ptr, i64>>
+
+ omp.task affinity(%it : !omp.iterated<!omp.affinity_entry_ty<!llvm.ptr, i64>>) {
+ omp.terminator
+ }
+ omp.terminator
+ }
+ omp.terminator
+ }
+ llvm.return
+}
+
+// CHECK-LABEL: define internal void @task_affinity_iterator_3d
+
+// Preheader -> Header
+// CHECK: omp_iterator.preheader:
+// CHECK: br label %omp_iterator.header
+//
+// Header has the IV phi and branches to cond
+// CHECK: omp_iterator.header:
+// CHECK: [[IV:%.*]] = phi i64 [ 0, %omp_iterator.preheader ], [ [[NEXT:%.*]], %omp_iterator.inc ]
+// CHECK: br label %omp_iterator.cond
+//
+// Cond: IV < 48 and branches to body or exit
+// CHECK: omp_iterator.cond:
+// CHECK: [[CMP:%.*]] = icmp ult i64 [[IV]], 48
+// CHECK: br i1 [[CMP]], label %omp_iterator.body, label %omp_iterator.exit
+//
+// Exit -> After -> continuation
+// CHECK: omp_iterator.exit:
+// CHECK: br label %omp_iterator.after
+// CHECK: omp_iterator.after:
+// CHECK: br label %omp.it.cont
+//
+// Body: store into affinity_list[IV] then branch to inc
+// CHECK: omp_iterator.body:
+// CHECK: [[ENTRY:%.*]] = getelementptr inbounds { i64, i64, i32 }, ptr %{{.*affinity_list.*}}, i64 [[IV]]
+// CHECK: [[ADDRI64:%.*]] = ptrtoint ptr %loadgep_ to i64
+// CHECK: [[ADDRGEP:%.*]] = getelementptr inbounds nuw { i64, i64, i32 }, ptr [[ENTRY]], i32 0, i32 0
+// CHECK: store i64 [[ADDRI64]], ptr [[ADDRGEP]]
+// CHECK: [[LENGEP:%.*]] = getelementptr inbounds nuw { i64, i64, i32 }, ptr [[ENTRY]], i32 0, i32 1
+// CHECK: store i64 4, ptr [[LENGEP]]
+// CHECK: [[FLAGGEP:%.*]] = getelementptr inbounds nuw { i64, i64, i32 }, ptr [[ENTRY]], i32 0, i32 2
+// CHECK: store i32 0, ptr [[FLAGGEP]]
+// CHECK: br label %omp_iterator.inc
+//
+// CHECK: omp_iterator.inc:
+// CHECK: [[NEXT]] = add nuw i64 [[IV]], 1
+// CHECK: br label %omp_iterator.header
+
+llvm.func @task_affinity_iterator_multiple(%arr: !llvm.ptr {llvm.nocapture}) {
+ %c1 = llvm.mlir.constant(1 : i64) : i64
+ %c3 = llvm.mlir.constant(3 : i64) : i64
+ %c4 = llvm.mlir.constant(4 : i64) : i64
+ %c6 = llvm.mlir.constant(6 : i64) : i64
+ %len = llvm.mlir.constant(4 : i64) : i64
+
+ omp.parallel {
+ omp.single {
+ // First iterator: 2-D (4 * 6 = 24)
+ %it0 = omp.iterator(%i: i64, %j: i64) =
+ (%c1 to %c4 step %c1, %c1 to %c6 step %c1) {
+ %entry0 = omp.affinity_entry %arr, %len
+ : (!llvm.ptr, i64) -> !omp.affinity_entry_ty<!llvm.ptr, i64>
+ omp.yield(%entry0 : !omp.affinity_entry_ty<!llvm.ptr, i64>)
+ } -> !omp.iterated<!omp.affinity_entry_ty<!llvm.ptr, i64>>
+
+ // second iterator: 1-D (3)
+ %it1 = omp.iterator(%k: i64) = (%c1 to %c3 step %c1) {
+ %entry1 = omp.affinity_entry %arr, %len
+ : (!llvm.ptr, i64) -> !omp.affinity_entry_ty<!llvm.ptr, i64>
+ omp.yield(%entry1 : !omp.affinity_entry_ty<!llvm.ptr, i64>)
+ } -> !omp.iterated<!omp.affinity_entry_ty<!llvm.ptr, i64>>
+
+ // Multiple iterators in a single affinity clause.
+ omp.task affinity(%it0: !omp.iterated<!omp.affinity_entry_ty<!llvm.ptr, i64>>,
+ %it1: !omp.iterated<!omp.affinity_entry_ty<!llvm.ptr, i64>>) {
+ omp.terminator
+ }
+
+ omp.terminator
+ }
+ omp.terminator
+ }
+
+ llvm.return
+}
+
+// CHECK-LABEL: define internal void @task_affinity_iterator_multiple
+// CHECK-DAG: [[AFFLIST0:%.*]] = alloca { i64, i64, i32 }, i64 24, align 8
+// CHECK-DAG: [[AFFLIST1:%.*]] = alloca { i64, i64, i32 }, i64 3, align 8
+// CHECK-DAG: [[AFFINITY_LIST:%.*]] = alloca { i64, i64, i32 }, i32 27, align 8
+
+// First iterator header
+// CHECK: omp_iterator.preheader:
+// CHECK: br label %[[HEADER0:.+]]
+// CHECK: [[HEADER0]]:
+// CHECK: [[IV0:%.*]] = phi i64 [ 0, %omp_iterator.preheader ], [ [[NEXT0:%.*]], %[[INC0:.+]] ]
+// CHECK: br label %[[COND0:.+]]
+// CHECK: [[COND0]]:
+// CHECK: [[CMP0:%.*]] = icmp ult i64 [[IV0]], 24
+// CHECK: br i1 [[CMP0]], label %[[BODY0:.+]], label %omp_iterator.exit
+
+// Second iterator header
+// CHECK: omp_iterator.preheader{{.*}}:
+// CHECK: [[HEADER1:.+]]:
+// CHECK: [[IV1:%.*]] = phi i64 [ 0, %omp_iterator.preheader{{.*}} ], [ [[NEXT1:%.*]], %[[INC1:.+]] ]
+// CHECK: br label %omp_iterator.cond{{.*}}
+// CHECK: omp_iterator.cond{{.*}}:
+// CHECK: [[CMP1:%.*]] = icmp ult i64 [[IV1]], 3
+// CHECK: br i1 [[CMP1]], label %[[BODY1:.+]], label %omp_iterator.exit{{.*}}
+
+// CHECK: [[AFFINITY_LIST_1:%.*]] = getelementptr inbounds { i64, i64, i32 }, ptr [[AFFINITY_LIST]], i64 0
+// CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 1 [[AFFINITY_LIST_1]], ptr align 1 [[AFFLIST0]], i64 480, i1 false)
+// CHECK: [[AFFINITY_LIST_2:%.*]] = getelementptr inbounds { i64, i64, i32 }, ptr [[AFFINITY_LIST]], i64 24
+// CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 1 [[AFFINITY_LIST_2]], ptr align 1 [[AFFLIST1]], i64 60, i1 false)
+// CHECK: codeRepl:
+// CHECK: call ptr @__kmpc_omp_task_alloc
+// CHECK: call i32 @__kmpc_omp_reg_task_with_affinity{{.*}}i32 27{{.*}}ptr [[AFFINITY_LIST]]
+// CHECK: call i32 @__kmpc_omp_task
+
+// Second iterator body
+// CHECK: [[BODY1]]:
+// CHECK: [[ENTRY1:%.*]] = getelementptr inbounds { i64, i64, i32 }, ptr [[AFFLIST1]]
+// CHECK: [[ADDR1:%.*]] = ptrtoint ptr %loadgep_ to i64
+// CHECK: [[ADDRGEP1:%.*]] = getelementptr inbounds{{.*}} { i64, i64, i32 }, ptr [[ENTRY1]], i32 0, i32 0
+// CHECK: store i64 [[ADDR1]], ptr [[ADDRGEP1]]
+// CHECK: [[LENGEP1:%.*]] = getelementptr inbounds{{.*}} { i64, i64, i32 }, ptr [[ENTRY1]], i32 0, i32 1
+// CHECK: store i64 4, ptr [[LENGEP1]]
+// CHECK: [[FLAGGEP1:%.*]] = getelementptr inbounds{{.*}} { i64, i64, i32 }, ptr [[ENTRY1]], i32 0, i32 2
+// CHECK: store i32 0, ptr [[FLAGGEP1]]
+// CHECK: br label %[[INC1]]
+// CHECK: [[INC1]]:
+// CHECK: [[NEXT1]] = add nuw i64 [[IV1]], 1
+// CHECK: br label %[[HEADER1]]
+
+// First iterator body
+// CHECK: [[BODY0]]:
+// CHECK: [[ENTRY0:%.*]] = getelementptr inbounds { i64, i64, i32 }, ptr [[AFFLIST0]], i64 [[IV0]]
+// CHECK: [[ADDR0:%.*]] = ptrtoint ptr %loadgep_ to i64
+// CHECK: [[ADDRGEP0:%.*]] = getelementptr inbounds{{.*}} { i64, i64, i32 }, ptr [[ENTRY0]], i32 0, i32 0
+// CHECK: store i64 [[ADDR0]], ptr [[ADDRGEP0]]
+// CHECK: [[LENGEP0:%.*]] = getelementptr inbounds{{.*}} { i64, i64, i32 }, ptr [[ENTRY0]], i32 0, i32 1
+// CHECK: store i64 4, ptr [[LENGEP0]]
+// CHECK: [[FLAGGEP0:%.*]] = getelementptr inbounds{{.*}} { i64, i64, i32 }, ptr [[ENTRY0]], i32 0, i32 2
+// CHECK: store i32 0, ptr [[FLAGGEP0]]
+// CHECK: br label %[[INC0]]
+// CHECK: [[INC0]]:
+// CHECK: [[NEXT0]] = add nuw i64 [[IV0]], 1
+// CHECK: br label %[[HEADER0]]
+
+// Makes sure affinity list only created after dynamic count
+llvm.func @task_affinity_iterator_dynamic_tripcount(
+ %arr: !llvm.ptr {llvm.nocapture}, %lb: i64, %ub: i64, %step: i64,
+ %len: i64) {
+ omp.parallel {
+ omp.single {
+ %it = omp.iterator(%i: i64) = (%lb to %ub step %step) {
+ %entry = omp.affinity_entry %arr, %len
+ : (!llvm.ptr, i64) -> !omp.affinity_entry_ty<!llvm.ptr, i64>
+ omp.yield(%entry : !omp.affinity_entry_ty<!llvm.ptr, i64>)
+ } -> !omp.iterated<!omp.affinity_entry_ty<!llvm.ptr, i64>>
+
+ omp.task affinity(%it : !omp.iterated<!omp.affinity_entry_ty<!llvm.ptr, i64>>) {
+ omp.terminator
+ }
+ omp.terminator
+ }
+ omp.terminator
+ }
+ llvm.return
+}
+
+// CHECK-LABEL: define internal void @task_affinity_iterator_dynamic_tripcount
+// CHECK: [[DIFF:%.*]] = sub i64 {{.*}}, {{.*}}
+// CHECK: [[DIV:%.*]] = sdiv i64 [[DIFF]], {{.*}}
+// CHECK: [[TRIPS:%.*]] = add i64 [[DIV]], 1
+// CHECK: [[SCALED:%.*]] = mul i64 1, [[TRIPS]]
+// CHECK: [[AFFLIST:%.*]] = alloca { i64, i64, i32 }, i64 [[SCALED]]
+
+llvm.func @task_affinity_iterator_negative_step(%arr: !llvm.ptr {llvm.nocapture}) {
+ %c4 = llvm.mlir.constant(4 : i64) : i64
+ %c1 = llvm.mlir.constant(1 : i64) : i64
+ %cn1 = llvm.mlir.constant(-1 : i64) : i64
+
+ omp.parallel {
+ omp.single {
+ %it = omp.iterator(%i: i64) = (%c4 to %c1 step %cn1) {
+ %entry = omp.affinity_entry %arr, %i
+ : (!llvm.ptr, i64) -> !omp.affinity_entry_ty<!llvm.ptr, i64>
+ omp.yield(%entry : !omp.affinity_entry_ty<!llvm.ptr, i64>)
+ } -> !omp.iterated<!omp.affinity_entry_ty<!llvm.ptr, i64>>
+
+ omp.task affinity(%it : !omp.iterated<!omp.affinity_entry_ty<!llvm.ptr, i64>>) {
+ omp.terminator
+ }
+ omp.terminator
+ }
+ omp.terminator
+ }
+ llvm.return
+}
+
+// CHECK-LABEL: define internal void @task_affinity_iterator_negative_step
+// CHECK: [[AFFLIST:%.*]] = alloca { i64, i64, i32 }, i64 4, align 8
+// CHECK: omp_iterator.cond:
+// CHECK: [[CMP:%.*]] = icmp ult i64 %omp_iterator.iv, 4
+// CHECK: br i1 [[CMP]], label %omp_iterator.body, label %omp_iterator.exit
+// CHECK: omp_iterator.body:
+// CHECK: [[IDX:%.*]] = urem i64 %omp_iterator.iv, 4
+// CHECK: [[STEPMUL:%.*]] = mul i64 [[IDX]], -1
+// CHECK: [[PHYSIV:%.*]] = add i64 4, [[STEPMUL]]
+// CHECK: [[ENTRY:%.*]] = getelementptr inbounds { i64, i64, i32 }, ptr [[AFFLIST]], i64 %omp_iterator.iv
+// CHECK: [[LENPTR:%.*]] = getelementptr inbounds nuw { i64, i64, i32 }, ptr [[ENTRY]], i32 0, i32 1
+// CHECK: store i64 [[PHYSIV]], ptr [[LENPTR]]
diff --git a/mlir/test/Target/LLVMIR/openmp-llvm.mlir b/mlir/test/Target/LLVMIR/openmp-llvm.mlir
index fcb937dbc1867..c5cdecd091770 100644
--- a/mlir/test/Target/LLVMIR/openmp-llvm.mlir
+++ b/mlir/test/Target/LLVMIR/openmp-llvm.mlir
@@ -3589,3 +3589,37 @@ llvm.func @nested_task_with_deps() {
// CHECK: ret void
// CHECK: }
+
+llvm.func @task_affinity_plain(%arr: !llvm.ptr {llvm.nocapture}) {
+ %len = llvm.mlir.constant(4 : i64) : i64
+
+ omp.parallel {
+ omp.single {
+ %ae = omp.affinity_entry %arr, %len
+ : (!llvm.ptr, i64) -> !omp.affinity_entry_ty<!llvm.ptr, i64>
+
+ omp.task affinity(%ae : !omp.affinity_entry_ty<!llvm.ptr, i64>) {
+ omp.terminator
+ }
+ omp.terminator
+ }
+ omp.terminator
+ }
+ llvm.return
+}
+
+// CHECK-LABEL: define internal void @task_affinity_plain
+// CHECK: [[BASE:%.*]] = load ptr, ptr %gep_, align 8
+// CHECK: [[AFFLIST:%.*]] = alloca { i64, i64, i32 }, i64 1, align 8
+// CHECK: [[ENTRY:%.*]] = getelementptr inbounds { i64, i64, i32 }, ptr [[AFFLIST]], i64 0
+// addr
+// CHECK: [[ADDRI64:%.*]] = ptrtoint ptr [[BASE]] to i64
+// CHECK: [[ADDRGEP:%.*]] = getelementptr inbounds nuw { i64, i64, i32 }, ptr [[ENTRY]], i32 0, i32 0
+// CHECK: store i64 [[ADDRI64]], ptr [[ADDRGEP]]
+// len
+// CHECK: [[LENGEP:%.*]] = getelementptr inbounds nuw { i64, i64, i32 }, ptr [[ENTRY]], i32 0, i32 1
+// CHECK: store i64 4, ptr [[LENGEP]]
+// flags is always 0
+// CHECK: [[FLAGGEP:%.*]] = getelementptr inbounds nuw { i64, i64, i32 }, ptr [[ENTRY]], i32 0, i32 2
+// CHECK: store i32 0, ptr [[FLAGGEP]]
+// CHECK: call i32 @__kmpc_omp_reg_task_with_affinity{{.*}}i32 1, ptr [[AFFLIST]]
diff --git a/mlir/test/Target/LLVMIR/openmp-todo.mlir b/mlir/test/Target/LLVMIR/openmp-todo.mlir
index 2500d546fcf4d..8fb66cb4dd0eb 100644
--- a/mlir/test/Target/LLVMIR/openmp-todo.mlir
+++ b/mlir/test/Target/LLVMIR/openmp-todo.mlir
@@ -462,15 +462,3 @@ llvm.func @wsloop_order(%lb : i32, %ub : i32, %step : i32) {
}
llvm.return
}
-
-// -----
-llvm.func @task_affinity(%ptr : !llvm.ptr, %len : i64) {
- // expected-error at below {{not yet implemented: omp.affinity_entry}}
- // expected-error at below {{LLVM Translation failed for operation: omp.affinity_entry}}
- %ae = omp.affinity_entry %ptr, %len
- : (!llvm.ptr, i64) -> !omp.affinity_entry_ty<!llvm.ptr, i64>
- omp.task affinity(%ae : !omp.affinity_entry_ty<!llvm.ptr, i64>) {
- omp.terminator
- }
- llvm.return
-}
diff --git a/openmp/runtime/src/kmp_tasking.cpp b/openmp/runtime/src/kmp_tasking.cpp
index 37836fb457537..ae2d617c3ea40 100644
--- a/openmp/runtime/src/kmp_tasking.cpp
+++ b/openmp/runtime/src/kmp_tasking.cpp
@@ -1505,6 +1505,18 @@ kmp_int32
__kmpc_omp_reg_task_with_affinity(ident_t *loc_ref, kmp_int32 gtid,
kmp_task_t *new_task, kmp_int32 naffins,
kmp_task_affinity_info_t *affin_list) {
+ if (naffins > 0)
+ KMP_DEBUG_ASSERT(affin_list != NULL);
+
+ for (kmp_int32 i = 0; i < naffins; ++i) {
+ KA_TRACE(30, ("__kmpc_omp_reg_task_with_affinity: T#%d aff[%d] "
+ "base_addr=0x%llx len=%zu flags={%d,%d,%d}\n",
+ gtid, i, (unsigned long long)affin_list[i].base_addr,
+ affin_list[i].len, (int)affin_list[i].flags.flag1,
+ (int)affin_list[i].flags.flag2,
+ (int)affin_list[i].flags.reserved));
+ }
+
return 0;
}
More information about the Mlir-commits
mailing list