[Mlir-commits] [mlir] 7ff87af - [MLIR][OpenMP] Host lowering of standalone distribute (#127817)
llvmlistbot at llvm.org
llvmlistbot at llvm.org
Tue Feb 25 02:28:48 PST 2025
Author: Sergio Afonso
Date: 2025-02-25T10:28:44Z
New Revision: 7ff87af533a7acf47134eabe656702180d8ad171
URL: https://github.com/llvm/llvm-project/commit/7ff87af533a7acf47134eabe656702180d8ad171
DIFF: https://github.com/llvm/llvm-project/commit/7ff87af533a7acf47134eabe656702180d8ad171.diff
LOG: [MLIR][OpenMP] Host lowering of standalone distribute (#127817)
This patch adds MLIR to LLVM IR translation support for standalone
`omp.distribute` operations, as well as `distribute simd` through
ignoring SIMD information (similarly to `do/for simd`).
Co-authored-by: Dominik Adamski <dominik.adamski at amd.com>
Added:
Modified:
mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
mlir/test/Target/LLVMIR/openmp-llvm.mlir
mlir/test/Target/LLVMIR/openmp-todo.mlir
Removed:
################################################################################
diff --git a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
index 6883d78cd317d..090a5a9836b79 100644
--- a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
+++ b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
@@ -161,6 +161,10 @@ static LogicalResult checkImplementationStatus(Operation &op) {
if (op.getDevice())
result = todo("device");
};
+ auto checkDistSchedule = [&todo](auto op, LogicalResult &result) {
+ if (op.getDistScheduleChunkSize())
+ result = todo("dist_schedule with chunk_size");
+ };
auto checkHasDeviceAddr = [&todo](auto op, LogicalResult &result) {
if (!op.getHasDeviceAddrVars().empty())
result = todo("has_device_addr");
@@ -252,6 +256,16 @@ static LogicalResult checkImplementationStatus(Operation &op) {
LogicalResult result = success();
llvm::TypeSwitch<Operation &>(op)
+ .Case([&](omp::DistributeOp op) {
+ if (op.isComposite() &&
+ isa_and_present<omp::WsloopOp>(op.getNestedWrapper()))
+ result = op.emitError() << "not yet implemented: "
+ "composite omp.distribute + omp.wsloop";
+ checkAllocate(op, result);
+ checkDistSchedule(op, result);
+ checkOrder(op, result);
+ checkPrivate(op, result);
+ })
.Case([&](omp::OrderedRegionOp op) { checkParLevelSimd(op, result); })
.Case([&](omp::SectionsOp op) {
checkAllocate(op, result);
@@ -3854,6 +3868,72 @@ convertOmpTargetData(Operation *op, llvm::IRBuilderBase &builder,
return success();
}
+static LogicalResult
+convertOmpDistribute(Operation &opInst, llvm::IRBuilderBase &builder,
+ LLVM::ModuleTranslation &moduleTranslation) {
+ llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder();
+ auto distributeOp = cast<omp::DistributeOp>(opInst);
+ if (failed(checkImplementationStatus(opInst)))
+ return failure();
+
+ using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
+ auto bodyGenCB = [&](InsertPointTy allocaIP,
+ InsertPointTy codeGenIP) -> llvm::Error {
+ // Save the alloca insertion point on ModuleTranslation stack for use in
+ // nested regions.
+ LLVM::ModuleTranslation::SaveStack<OpenMPAllocaStackFrame> frame(
+ moduleTranslation, allocaIP);
+
+ // DistributeOp has only one region associated with it.
+ builder.restoreIP(codeGenIP);
+
+ llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder();
+ llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
+ llvm::Expected<llvm::BasicBlock *> regionBlock =
+ convertOmpOpRegions(distributeOp.getRegion(), "omp.distribute.region",
+ builder, moduleTranslation);
+ if (!regionBlock)
+ return regionBlock.takeError();
+ builder.SetInsertPoint(*regionBlock, (*regionBlock)->begin());
+
+ // TODO: Add support for clauses which are valid for DISTRIBUTE constructs.
+ // Static schedule is the default.
+ auto schedule = omp::ClauseScheduleKind::Static;
+ bool isOrdered = false;
+ std::optional<omp::ScheduleModifier> scheduleMod;
+ bool isSimd = false;
+ llvm::omp::WorksharingLoopType workshareLoopType =
+ llvm::omp::WorksharingLoopType::DistributeStaticLoop;
+ bool loopNeedsBarrier = false;
+ llvm::Value *chunk = nullptr;
+
+ llvm::CanonicalLoopInfo *loopInfo = findCurrentLoopInfo(moduleTranslation);
+ llvm::OpenMPIRBuilder::InsertPointOrErrorTy wsloopIP =
+ ompBuilder->applyWorkshareLoop(
+ ompLoc.DL, loopInfo, allocaIP, loopNeedsBarrier,
+ convertToScheduleKind(schedule), chunk, isSimd,
+ scheduleMod == omp::ScheduleModifier::monotonic,
+ scheduleMod == omp::ScheduleModifier::nonmonotonic, isOrdered,
+ workshareLoopType);
+
+ if (!wsloopIP)
+ return wsloopIP.takeError();
+ return llvm::Error::success();
+ };
+
+ llvm::OpenMPIRBuilder::InsertPointTy allocaIP =
+ findAllocaInsertPoint(builder, moduleTranslation);
+ llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
+ llvm::OpenMPIRBuilder::InsertPointOrErrorTy afterIP =
+ ompBuilder->createDistribute(ompLoc, allocaIP, bodyGenCB);
+
+ if (failed(handleError(afterIP, opInst)))
+ return failure();
+
+ builder.restoreIP(*afterIP);
+ return success();
+}
+
/// Lowers the FlagsAttr which is applied to the module on the device
/// pass when offloading, this attribute contains OpenMP RTL globals that can
/// be passed as flags to the frontend, otherwise they are set to default
@@ -4813,6 +4893,9 @@ convertHostOrTargetOperation(Operation *op, llvm::IRBuilderBase &builder,
.Case([&](omp::TargetOp) {
return convertOmpTarget(*op, builder, moduleTranslation);
})
+ .Case([&](omp::DistributeOp) {
+ return convertOmpDistribute(*op, builder, moduleTranslation);
+ })
.Case([&](omp::LoopNestOp) {
return convertOmpLoopNest(*op, builder, moduleTranslation);
})
diff --git a/mlir/test/Target/LLVMIR/openmp-llvm.mlir b/mlir/test/Target/LLVMIR/openmp-llvm.mlir
index cf18c07dd605b..a5a490e527d79 100644
--- a/mlir/test/Target/LLVMIR/openmp-llvm.mlir
+++ b/mlir/test/Target/LLVMIR/openmp-llvm.mlir
@@ -3270,3 +3270,40 @@ llvm.func @omp_task_if(%boolexpr: i1) {
// -----
module attributes {omp.requires = #omp<clause_requires reverse_offload|unified_shared_memory>} {}
+
+// -----
+
+llvm.func @distribute() {
+ %0 = llvm.mlir.constant(42 : index) : i64
+ %1 = llvm.mlir.constant(10 : index) : i64
+ %2 = llvm.mlir.constant(1 : index) : i64
+ omp.distribute {
+ omp.loop_nest (%arg1) : i64 = (%1) to (%0) step (%2) {
+ omp.yield
+ }
+ }
+ llvm.return
+}
+
+// CHECK-LABEL: define void @distribute
+// CHECK: call void @[[OUTLINED:.*]]({{.*}})
+// CHECK-NEXT: br label %[[EXIT:.*]]
+// CHECK: [[EXIT]]:
+// CHECK: ret void
+
+// CHECK: define internal void @[[OUTLINED]]({{.*}})
+// CHECK: %[[LASTITER:.*]] = alloca i32
+// CHECK: %[[LB:.*]] = alloca i64
+// CHECK: %[[UB:.*]] = alloca i64
+// CHECK: %[[STRIDE:.*]] = alloca i64
+// CHECK: br label %[[BODY:.*]]
+// CHECK: [[BODY]]:
+// CHECK-NEXT: br label %[[REGION:.*]]
+// CHECK: [[REGION]]:
+// CHECK-NEXT: br label %[[PREHEADER:.*]]
+// CHECK: [[PREHEADER]]:
+// CHECK: store i64 0, ptr %[[LB]]
+// CHECK: store i64 31, ptr %[[UB]]
+// CHECK: store i64 1, ptr %[[STRIDE]]
+// CHECK: %[[TID:.*]] = call i32 @__kmpc_global_thread_num({{.*}})
+// CHECK: call void @__kmpc_for_static_init_{{.*}}(ptr @{{.*}}, i32 %[[TID]], i32 92, ptr %[[LASTITER]], ptr %[[LB]], ptr %[[UB]], ptr %[[STRIDE]], i64 1, i64 0)
diff --git a/mlir/test/Target/LLVMIR/openmp-todo.mlir b/mlir/test/Target/LLVMIR/openmp-todo.mlir
index e97b5e54e6415..71dbc061c3104 100644
--- a/mlir/test/Target/LLVMIR/openmp-todo.mlir
+++ b/mlir/test/Target/LLVMIR/openmp-todo.mlir
@@ -66,10 +66,70 @@ llvm.func @do_simd(%lb : i32, %ub : i32, %step : i32) {
// -----
-llvm.func @distribute(%lb : i32, %ub : i32, %step : i32) {
- // expected-error at below {{not yet implemented: omp.distribute}}
+llvm.func @distribute_wsloop(%lb : i32, %ub : i32, %step : i32) {
+ // expected-error at below {{LLVM Translation failed for operation: omp.parallel}}
+ omp.parallel {
+ // expected-error at below {{not yet implemented: composite omp.distribute + omp.wsloop}}
+ // expected-error at below {{LLVM Translation failed for operation: omp.distribute}}
+ omp.distribute {
+ omp.wsloop {
+ omp.loop_nest (%iv) : i32 = (%lb) to (%ub) step (%step) {
+ omp.yield
+ }
+ } {omp.composite}
+ } {omp.composite}
+ omp.terminator
+ } {omp.composite}
+ llvm.return
+}
+
+// -----
+
+llvm.func @distribute_allocate(%lb : i32, %ub : i32, %step : i32, %x : !llvm.ptr) {
+ // expected-error at below {{not yet implemented: Unhandled clause allocate in omp.distribute operation}}
+ // expected-error at below {{LLVM Translation failed for operation: omp.distribute}}
+ omp.distribute allocate(%x : !llvm.ptr -> %x : !llvm.ptr) {
+ omp.loop_nest (%iv) : i32 = (%lb) to (%ub) step (%step) {
+ omp.yield
+ }
+ }
+ llvm.return
+}
+
+// -----
+
+llvm.func @distribute_dist_schedule(%lb : i32, %ub : i32, %step : i32, %x : i32) {
+ // expected-error at below {{not yet implemented: Unhandled clause dist_schedule with chunk_size in omp.distribute operation}}
+ // expected-error at below {{LLVM Translation failed for operation: omp.distribute}}
+ omp.distribute dist_schedule_static dist_schedule_chunk_size(%x : i32) {
+ omp.loop_nest (%iv) : i32 = (%lb) to (%ub) step (%step) {
+ omp.yield
+ }
+ }
+ llvm.return
+}
+
+// -----
+
+llvm.func @distribute_order(%lb : i32, %ub : i32, %step : i32) {
+ // expected-error at below {{not yet implemented: Unhandled clause order in omp.distribute operation}}
+ // expected-error at below {{LLVM Translation failed for operation: omp.distribute}}
+ omp.distribute order(concurrent) {
+ omp.loop_nest (%iv) : i32 = (%lb) to (%ub) step (%step) {
+ omp.yield
+ }
+ }
+ llvm.return
+}
+
+// -----
+
+omp.private {type = private} @x.privatizer : !llvm.ptr
+
+llvm.func @distribute_private(%lb : i32, %ub : i32, %step : i32, %x : !llvm.ptr) {
+ // expected-error at below {{not yet implemented: Unhandled clause privatization in omp.distribute operation}}
// expected-error at below {{LLVM Translation failed for operation: omp.distribute}}
- omp.distribute {
+ omp.distribute private(@x.privatizer %x -> %arg0 : !llvm.ptr) {
omp.loop_nest (%iv) : i32 = (%lb) to (%ub) step (%step) {
omp.yield
}
More information about the Mlir-commits
mailing list