[Mlir-commits] [mlir] [mlir][OpenMP] - MLIR to LLVMIR translation support for delayed privatization in `omp.target` ops. (PR #109668)
Pranav Bhandarkar
llvmlistbot at llvm.org
Mon Sep 30 09:48:54 PDT 2024
https://github.com/bhandarkar-pranav updated https://github.com/llvm/llvm-project/pull/109668
>From 88105963cac19ad87f08e2a2605003062699d8a1 Mon Sep 17 00:00:00 2001
From: Pranav Bhandarkar <pranav.bhandarkar at amd.com>
Date: Fri, 20 Sep 2024 16:15:34 -0500
Subject: [PATCH 1/5] [mlir][OpenMP] - Implement lowering from MLIR to LLVMIR
for private clause on target constructs
This patch adds support to translate the `private` clause on `omp.target`
ops from MLIR to LLVMIR. This first cut only handles non-allocatable.
Also, this is for delayed privatization.
---
.../OpenMP/OpenMPToLLVMIRTranslation.cpp | 110 ++++++++++++++----
.../Target/LLVMIR/openmp-target-private.mlir | 71 +++++++++++
2 files changed, 158 insertions(+), 23 deletions(-)
create mode 100644 mlir/test/Target/LLVMIR/openmp-target-private.mlir
diff --git a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
index 0cba8d80681f13..b62ce167276f41 100644
--- a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
+++ b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
@@ -1356,6 +1356,38 @@ class OmpParallelOpConversionManager {
unsigned privateArgEndIdx;
};
+namespace {
+omp::PrivateClauseOp findPrivatizer(Operation *from, SymbolRefAttr symbolName) {
+ omp::PrivateClauseOp privatizer =
+ SymbolTable::lookupNearestSymbolFrom<omp::PrivateClauseOp>(from,
+ symbolName);
+ assert(privatizer && "privatizer not found in the symbol table");
+ return privatizer;
+}
+omp::PrivateClauseOp clonePrivatizer(LLVM::ModuleTranslation &moduleTranslation,
+ omp::PrivateClauseOp privatizer,
+ Operation *fromOperation) {
+ MLIRContext &context = moduleTranslation.getContext();
+ mlir::IRRewriter opCloner(&context);
+ opCloner.setInsertionPoint(privatizer);
+ auto clone =
+ llvm::cast<mlir::omp::PrivateClauseOp>(opCloner.clone(*privatizer));
+
+ // Unique the clone name to avoid clashes in the symbol table.
+ unsigned counter = 0;
+ SmallString<256> cloneName = SymbolTable::generateSymbolName<256>(
+ privatizer.getSymName(),
+ [&](llvm::StringRef candidate) {
+ return SymbolTable::lookupNearestSymbolFrom(
+ fromOperation, StringAttr::get(&context, candidate)) !=
+ nullptr;
+ },
+ counter);
+
+ clone.setSymName(cloneName);
+ return clone;
+}
+} // namespace
/// Converts the OpenMP parallel operation to LLVM IR.
static LogicalResult
convertOmpParallel(omp::ParallelOp opInst, llvm::IRBuilderBase &builder,
@@ -1611,34 +1643,14 @@ convertOmpParallel(omp::ParallelOp opInst, llvm::IRBuilderBase &builder,
continue;
SymbolRefAttr privSym = llvm::cast<SymbolRefAttr>(mlirPrivatizerAttr);
- omp::PrivateClauseOp privatizer =
- SymbolTable::lookupNearestSymbolFrom<omp::PrivateClauseOp>(
- opInst, privSym);
+ omp::PrivateClauseOp privatizer = findPrivatizer(opInst, privSym);
// Clone the privatizer in case it is used by more than one parallel
// region. The privatizer is processed in-place (see below) before it
// gets inlined in the parallel region and therefore processing the
// original op is dangerous.
-
- MLIRContext &context = moduleTranslation.getContext();
- mlir::IRRewriter opCloner(&context);
- opCloner.setInsertionPoint(privatizer);
- auto clone = llvm::cast<mlir::omp::PrivateClauseOp>(
- opCloner.clone(*privatizer));
-
- // Unique the clone name to avoid clashes in the symbol table.
- unsigned counter = 0;
- SmallString<256> cloneName = SymbolTable::generateSymbolName<256>(
- privatizer.getSymName(),
- [&](llvm::StringRef candidate) {
- return SymbolTable::lookupNearestSymbolFrom(
- opInst, StringAttr::get(&context, candidate)) !=
- nullptr;
- },
- counter);
-
- clone.setSymName(cloneName);
- return {mlirPrivVar, clone};
+ return {mlirPrivVar,
+ clonePrivatizer(moduleTranslation, privatizer, opInst)};
}
}
@@ -3435,6 +3447,58 @@ convertOmpTarget(Operation &opInst, llvm::IRBuilderBase &builder,
const auto &arg = targetRegion.front().getArgument(argIndex);
moduleTranslation.mapValue(arg, mapOpValue);
}
+
+ // Do privatization after moduleTranslation has already recorded
+ // mapped values.
+ if (!targetOp.getPrivateVars().empty()) {
+ auto oldIP = builder.saveIP();
+ builder.restoreIP(allocaIP);
+
+ OperandRange privateVars = targetOp.getPrivateVars();
+ std::optional<ArrayAttr> privateSyms = targetOp.getPrivateSyms();
+ unsigned numMapVars = targetOp.getMapVars().size();
+ Block &firstTargetBlock = targetRegion.front();
+ auto *blockArgsStart = firstTargetBlock.getArguments().begin();
+ auto *privArgsStart = blockArgsStart + numMapVars;
+ auto *privArgsEnd = privArgsStart + targetOp.getPrivateVars().size();
+ MutableArrayRef privateBlockArgs(privArgsStart, privArgsEnd);
+
+ for (auto [privVar, privatizerNameAttr, privBlockArg] :
+ llvm::zip_equal(privateVars, *privateSyms, privateBlockArgs)) {
+
+ SymbolRefAttr privSym = llvm::cast<SymbolRefAttr>(privatizerNameAttr);
+ omp::PrivateClauseOp privatizer = findPrivatizer(&opInst, privSym);
+ if (privatizer.getDataSharingType() ==
+ omp::DataSharingClauseType::FirstPrivate ||
+ !privatizer.getDeallocRegion().empty()) {
+ opInst.emitError("Translation of omp.target from MLIR to LLVMIR "
+ "failed because translation of firstprivate and "
+ " private allocatables is not supported yet");
+ bodyGenStatus = failure();
+ } else {
+ omp::PrivateClauseOp clonedPrivatizer =
+ clonePrivatizer(moduleTranslation, privatizer, &opInst);
+ Region &allocRegion = clonedPrivatizer.getAllocRegion();
+ BlockArgument allocRegionArg = allocRegion.getArgument(0);
+ replaceAllUsesInRegionWith(allocRegionArg, privVar, allocRegion);
+ SmallVector<llvm::Value *, 1> yieldedValues;
+ if (failed(inlineConvertOmpRegions(
+ allocRegion, "omp.targetop.privatizer", builder,
+ moduleTranslation, &yieldedValues))) {
+ opInst.emitError(
+ "failed to inline `alloc` region of an `omp.private` "
+ "op in the target region");
+ bodyGenStatus = failure();
+ } else {
+ assert(yieldedValues.size() == 1);
+ moduleTranslation.mapValue(privBlockArg, yieldedValues.front());
+ }
+ clonedPrivatizer.erase();
+ builder.restoreIP(oldIP);
+ }
+ }
+ }
+ builder.restoreIP(codeGenIP);
llvm::BasicBlock *exitBlock = convertOmpOpRegions(
targetRegion, "omp.target", builder, moduleTranslation, bodyGenStatus);
builder.SetInsertPoint(exitBlock);
diff --git a/mlir/test/Target/LLVMIR/openmp-target-private.mlir b/mlir/test/Target/LLVMIR/openmp-target-private.mlir
new file mode 100644
index 00000000000000..e3b024e6a4d51e
--- /dev/null
+++ b/mlir/test/Target/LLVMIR/openmp-target-private.mlir
@@ -0,0 +1,71 @@
+// RUN: mlir-translate -mlir-to-llvmir %s | FileCheck %s
+
+omp.private {type = private} @simple_var.privatizer : !llvm.ptr alloc {
+^bb0(%arg0: !llvm.ptr):
+ %0 = llvm.mlir.constant(1 : i64) : i64
+ %1 = llvm.alloca %0 x i32 {bindc_name = "simple_var", pinned} : (i64) -> !llvm.ptr
+ omp.yield(%1 : !llvm.ptr)
+}
+llvm.func @target_map_single_private() attributes {fir.internal_name = "_QPtarget_map_single_private"} {
+%0 = llvm.mlir.constant(1 : i64) : i64
+%1 = llvm.alloca %0 x i32 {bindc_name = "simple_var"} : (i64) -> !llvm.ptr
+%2 = llvm.mlir.constant(1 : i64) : i64
+%3 = llvm.alloca %2 x i32 {bindc_name = "a"} : (i64) -> !llvm.ptr
+%4 = llvm.mlir.constant(2 : i32) : i32
+llvm.store %4, %3 : i32, !llvm.ptr
+%5 = omp.map.info var_ptr(%3 : !llvm.ptr, i32) map_clauses(to) capture(ByRef) -> !llvm.ptr {name = "a"}
+omp.target map_entries(%5 -> %arg0 : !llvm.ptr) private(@simple_var.privatizer %1 -> %arg1 : !llvm.ptr) {
+^bb0(%arg0: !llvm.ptr, %arg1: !llvm.ptr):
+%6 = llvm.mlir.constant(10 : i32) : i32
+%7 = llvm.load %arg0 : !llvm.ptr -> i32
+%8 = llvm.add %7, %6 : i32
+llvm.store %8, %arg1 : i32, !llvm.ptr
+omp.terminator
+}
+llvm.return
+}
+// CHECK: define internal void @__omp_offloading_fd00
+// CHECK-NOT: define {{.*}}
+// CHECK: %[[PRIV_ALLOC:.*]] = alloca i32, i64 1, align 4
+// CHECK: %[[ADD:.*]] = add i32 {{.*}}, 10
+// CHECK: store i32 %[[ADD]], ptr %[[PRIV_ALLOC]], align 4
+
+omp.private {type = private} @n.privatizer : !llvm.ptr alloc {
+^bb0(%arg0: !llvm.ptr):
+%0 = llvm.mlir.constant(1 : i64) : i64
+%1 = llvm.alloca %0 x f32 {bindc_name = "n", pinned} : (i64) -> !llvm.ptr
+omp.yield(%1 : !llvm.ptr)
+}
+llvm.func @target_map_2_privates() attributes {fir.internal_name = "_QPtarget_map_2_privates"} {
+%0 = llvm.mlir.constant(1 : i64) : i64
+%1 = llvm.alloca %0 x i32 {bindc_name = "simple_var"} : (i64) -> !llvm.ptr
+%3 = llvm.alloca %0 x f32 {bindc_name = "n"} : (i64) -> !llvm.ptr
+%5 = llvm.alloca %0 x i32 {bindc_name = "a"} : (i64) -> !llvm.ptr
+%6 = llvm.mlir.constant(2 : i32) : i32
+llvm.store %6, %5 : i32, !llvm.ptr
+%7 = omp.map.info var_ptr(%5 : !llvm.ptr, i32) map_clauses(to) capture(ByRef) -> !llvm.ptr {name = "a"}
+omp.target map_entries(%7 -> %arg0 : !llvm.ptr) private(@simple_var.privatizer %1 -> %arg1 : !llvm.ptr, @n.privatizer %3 -> %arg2 : !llvm.ptr) {
+^bb0(%arg0: !llvm.ptr, %arg1: !llvm.ptr, %arg2: !llvm.ptr):
+%8 = llvm.mlir.constant(1.100000e+01 : f32) : f32
+%9 = llvm.mlir.constant(10 : i32) : i32
+%10 = llvm.load %arg0 : !llvm.ptr -> i32
+%11 = llvm.add %10, %9 : i32
+llvm.store %11, %arg1 : i32, !llvm.ptr
+%12 = llvm.load %arg1 : !llvm.ptr -> i32
+%13 = llvm.sitofp %12 : i32 to f32
+%14 = llvm.fadd %13, %8 {fastmathFlags = #llvm.fastmath<contract>} : f32
+llvm.store %14, %arg2 : f32, !llvm.ptr
+omp.terminator
+}
+llvm.return
+}
+
+// CHECK: define internal void @__omp_offloading_fd00
+// CHECK: %[[PRIV_I32_ALLOC:.*]] = alloca i32, i64 1, align 4
+// CHECK: %[[PRIV_FLOAT_ALLOC:.*]] = alloca float, i64 1, align 4
+// CHECK: %[[ADD_I32:.*]] = add i32 {{.*}}, 10
+// CHECK: store i32 %[[ADD_I32]], ptr %[[PRIV_I32_ALLOC]], align 4
+// CHECK: %[[LOAD_I32_AGAIN:.*]] = load i32, ptr %[[PRIV_I32_ALLOC]], align 4
+// CHECK: %[[CAST_TO_FLOAT:.*]] = sitofp i32 %[[LOAD_I32_AGAIN]] to float
+// CHECK: %[[ADD_FLOAT:.*]] = fadd contract float %[[CAST_TO_FLOAT]], 1.100000e+01
+// CHECK: store float %[[ADD_FLOAT]], ptr %[[PRIV_FLOAT_ALLOC]], align 4
>From c690d86ca8c0a8f0e7c7fbf2886e7952ea45c728 Mon Sep 17 00:00:00 2001
From: Pranav Bhandarkar <pranav.bhandarkar at amd.com>
Date: Tue, 24 Sep 2024 16:42:25 -0500
Subject: [PATCH 2/5] Address review comments
---
.../OpenMP/OpenMPToLLVMIRTranslation.cpp | 22 +++--
.../Target/LLVMIR/openmp-target-private.mlir | 85 +++++++++----------
2 files changed, 55 insertions(+), 52 deletions(-)
diff --git a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
index b62ce167276f41..513d80188a1fc0 100644
--- a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
+++ b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
@@ -1356,17 +1356,21 @@ class OmpParallelOpConversionManager {
unsigned privateArgEndIdx;
};
-namespace {
-omp::PrivateClauseOp findPrivatizer(Operation *from, SymbolRefAttr symbolName) {
+// Looks up from the operation from and returns the PrivateClauseOp with
+// name symbolName
+static omp::PrivateClauseOp findPrivatizer(Operation *from,
+ SymbolRefAttr symbolName) {
omp::PrivateClauseOp privatizer =
SymbolTable::lookupNearestSymbolFrom<omp::PrivateClauseOp>(from,
symbolName);
assert(privatizer && "privatizer not found in the symbol table");
return privatizer;
}
-omp::PrivateClauseOp clonePrivatizer(LLVM::ModuleTranslation &moduleTranslation,
- omp::PrivateClauseOp privatizer,
- Operation *fromOperation) {
+// clones the given privatizer. The original privatizer is used as
+// the insert point for the clone.
+static omp::PrivateClauseOp
+clonePrivatizer(LLVM::ModuleTranslation &moduleTranslation,
+ omp::PrivateClauseOp privatizer, Operation *fromOperation) {
MLIRContext &context = moduleTranslation.getContext();
mlir::IRRewriter opCloner(&context);
opCloner.setInsertionPoint(privatizer);
@@ -1387,7 +1391,6 @@ omp::PrivateClauseOp clonePrivatizer(LLVM::ModuleTranslation &moduleTranslation,
clone.setSymName(cloneName);
return clone;
}
-} // namespace
/// Converts the OpenMP parallel operation to LLVM IR.
static LogicalResult
convertOmpParallel(omp::ParallelOp opInst, llvm::IRBuilderBase &builder,
@@ -3458,9 +3461,10 @@ convertOmpTarget(Operation &opInst, llvm::IRBuilderBase &builder,
std::optional<ArrayAttr> privateSyms = targetOp.getPrivateSyms();
unsigned numMapVars = targetOp.getMapVars().size();
Block &firstTargetBlock = targetRegion.front();
- auto *blockArgsStart = firstTargetBlock.getArguments().begin();
- auto *privArgsStart = blockArgsStart + numMapVars;
- auto *privArgsEnd = privArgsStart + targetOp.getPrivateVars().size();
+ BlockArgument *blockArgsStart = firstTargetBlock.getArguments().begin();
+ BlockArgument *privArgsStart = blockArgsStart + numMapVars;
+ BlockArgument *privArgsEnd =
+ privArgsStart + targetOp.getPrivateVars().size();
MutableArrayRef privateBlockArgs(privArgsStart, privArgsEnd);
for (auto [privVar, privatizerNameAttr, privBlockArg] :
diff --git a/mlir/test/Target/LLVMIR/openmp-target-private.mlir b/mlir/test/Target/LLVMIR/openmp-target-private.mlir
index e3b024e6a4d51e..8d5bac42852bb0 100644
--- a/mlir/test/Target/LLVMIR/openmp-target-private.mlir
+++ b/mlir/test/Target/LLVMIR/openmp-target-private.mlir
@@ -2,27 +2,26 @@
omp.private {type = private} @simple_var.privatizer : !llvm.ptr alloc {
^bb0(%arg0: !llvm.ptr):
- %0 = llvm.mlir.constant(1 : i64) : i64
- %1 = llvm.alloca %0 x i32 {bindc_name = "simple_var", pinned} : (i64) -> !llvm.ptr
- omp.yield(%1 : !llvm.ptr)
+ %0 = llvm.mlir.constant(1 : i64) : i64
+ %1 = llvm.alloca %0 x i32 {bindc_name = "simple_var", pinned} : (i64) -> !llvm.ptr
+ omp.yield(%1 : !llvm.ptr)
}
llvm.func @target_map_single_private() attributes {fir.internal_name = "_QPtarget_map_single_private"} {
-%0 = llvm.mlir.constant(1 : i64) : i64
-%1 = llvm.alloca %0 x i32 {bindc_name = "simple_var"} : (i64) -> !llvm.ptr
-%2 = llvm.mlir.constant(1 : i64) : i64
-%3 = llvm.alloca %2 x i32 {bindc_name = "a"} : (i64) -> !llvm.ptr
-%4 = llvm.mlir.constant(2 : i32) : i32
-llvm.store %4, %3 : i32, !llvm.ptr
-%5 = omp.map.info var_ptr(%3 : !llvm.ptr, i32) map_clauses(to) capture(ByRef) -> !llvm.ptr {name = "a"}
-omp.target map_entries(%5 -> %arg0 : !llvm.ptr) private(@simple_var.privatizer %1 -> %arg1 : !llvm.ptr) {
-^bb0(%arg0: !llvm.ptr, %arg1: !llvm.ptr):
-%6 = llvm.mlir.constant(10 : i32) : i32
-%7 = llvm.load %arg0 : !llvm.ptr -> i32
-%8 = llvm.add %7, %6 : i32
-llvm.store %8, %arg1 : i32, !llvm.ptr
-omp.terminator
-}
-llvm.return
+ %0 = llvm.mlir.constant(1 : i64) : i64
+ %1 = llvm.alloca %0 x i32 {bindc_name = "simple_var"} : (i64) -> !llvm.ptr
+ %3 = llvm.alloca %0 x i32 {bindc_name = "a"} : (i64) -> !llvm.ptr
+ %4 = llvm.mlir.constant(2 : i32) : i32
+ llvm.store %4, %3 : i32, !llvm.ptr
+ %5 = omp.map.info var_ptr(%3 : !llvm.ptr, i32) map_clauses(to) capture(ByRef) -> !llvm.ptr {name = "a"}
+ omp.target map_entries(%5 -> %arg0 : !llvm.ptr) private(@simple_var.privatizer %1 -> %arg1 : !llvm.ptr) {
+ ^bb0(%arg0: !llvm.ptr, %arg1: !llvm.ptr):
+ %6 = llvm.mlir.constant(10 : i32) : i32
+ %7 = llvm.load %arg0 : !llvm.ptr -> i32
+ %8 = llvm.add %7, %6 : i32
+ llvm.store %8, %arg1 : i32, !llvm.ptr
+ omp.terminator
+ }
+ llvm.return
}
// CHECK: define internal void @__omp_offloading_fd00
// CHECK-NOT: define {{.*}}
@@ -32,32 +31,32 @@ llvm.return
omp.private {type = private} @n.privatizer : !llvm.ptr alloc {
^bb0(%arg0: !llvm.ptr):
-%0 = llvm.mlir.constant(1 : i64) : i64
-%1 = llvm.alloca %0 x f32 {bindc_name = "n", pinned} : (i64) -> !llvm.ptr
-omp.yield(%1 : !llvm.ptr)
+ %0 = llvm.mlir.constant(1 : i64) : i64
+ %1 = llvm.alloca %0 x f32 {bindc_name = "n", pinned} : (i64) -> !llvm.ptr
+ omp.yield(%1 : !llvm.ptr)
}
llvm.func @target_map_2_privates() attributes {fir.internal_name = "_QPtarget_map_2_privates"} {
-%0 = llvm.mlir.constant(1 : i64) : i64
-%1 = llvm.alloca %0 x i32 {bindc_name = "simple_var"} : (i64) -> !llvm.ptr
-%3 = llvm.alloca %0 x f32 {bindc_name = "n"} : (i64) -> !llvm.ptr
-%5 = llvm.alloca %0 x i32 {bindc_name = "a"} : (i64) -> !llvm.ptr
-%6 = llvm.mlir.constant(2 : i32) : i32
-llvm.store %6, %5 : i32, !llvm.ptr
-%7 = omp.map.info var_ptr(%5 : !llvm.ptr, i32) map_clauses(to) capture(ByRef) -> !llvm.ptr {name = "a"}
-omp.target map_entries(%7 -> %arg0 : !llvm.ptr) private(@simple_var.privatizer %1 -> %arg1 : !llvm.ptr, @n.privatizer %3 -> %arg2 : !llvm.ptr) {
-^bb0(%arg0: !llvm.ptr, %arg1: !llvm.ptr, %arg2: !llvm.ptr):
-%8 = llvm.mlir.constant(1.100000e+01 : f32) : f32
-%9 = llvm.mlir.constant(10 : i32) : i32
-%10 = llvm.load %arg0 : !llvm.ptr -> i32
-%11 = llvm.add %10, %9 : i32
-llvm.store %11, %arg1 : i32, !llvm.ptr
-%12 = llvm.load %arg1 : !llvm.ptr -> i32
-%13 = llvm.sitofp %12 : i32 to f32
-%14 = llvm.fadd %13, %8 {fastmathFlags = #llvm.fastmath<contract>} : f32
-llvm.store %14, %arg2 : f32, !llvm.ptr
-omp.terminator
-}
-llvm.return
+ %0 = llvm.mlir.constant(1 : i64) : i64
+ %1 = llvm.alloca %0 x i32 {bindc_name = "simple_var"} : (i64) -> !llvm.ptr
+ %3 = llvm.alloca %0 x f32 {bindc_name = "n"} : (i64) -> !llvm.ptr
+ %5 = llvm.alloca %0 x i32 {bindc_name = "a"} : (i64) -> !llvm.ptr
+ %6 = llvm.mlir.constant(2 : i32) : i32
+ llvm.store %6, %5 : i32, !llvm.ptr
+ %7 = omp.map.info var_ptr(%5 : !llvm.ptr, i32) map_clauses(to) capture(ByRef) -> !llvm.ptr {name = "a"}
+ omp.target map_entries(%7 -> %arg0 : !llvm.ptr) private(@simple_var.privatizer %1 -> %arg1 : !llvm.ptr, @n.privatizer %3 -> %arg2 : !llvm.ptr) {
+ ^bb0(%arg0: !llvm.ptr, %arg1: !llvm.ptr, %arg2: !llvm.ptr):
+ %8 = llvm.mlir.constant(1.100000e+01 : f32) : f32
+ %9 = llvm.mlir.constant(10 : i32) : i32
+ %10 = llvm.load %arg0 : !llvm.ptr -> i32
+ %11 = llvm.add %10, %9 : i32
+ llvm.store %11, %arg1 : i32, !llvm.ptr
+ %12 = llvm.load %arg1 : !llvm.ptr -> i32
+ %13 = llvm.sitofp %12 : i32 to f32
+ %14 = llvm.fadd %13, %8 {fastmathFlags = #llvm.fastmath<contract>} : f32
+ llvm.store %14, %arg2 : f32, !llvm.ptr
+ omp.terminator
+ }
+ llvm.return
}
// CHECK: define internal void @__omp_offloading_fd00
>From ee20ced8176dc82c839ab3da4f1854441d6cb653 Mon Sep 17 00:00:00 2001
From: Pranav Bhandarkar <pranav.bhandarkar at amd.com>
Date: Wed, 25 Sep 2024 17:13:14 -0500
Subject: [PATCH 3/5] Address review comments
- Add an aritificial multi-blcok alloc region privatizer test in
mlir/test/Target/LLVMIR/openmp-target-private.mlir
- Get rid of the need to clone the privatizer and map the
block argument of the privatizer's alloc region to the llvm value
corresponding to the host-associated source variable being privatized instead
of replacing all uses of the block argument the the mlir value
of the host associated source variable. This obviates the need to
clone the privatizer.
---
.../OpenMP/OpenMPToLLVMIRTranslation.cpp | 17 ++++++-----
.../Target/LLVMIR/openmp-target-private.mlir | 29 +++++++++++++++++++
2 files changed, 38 insertions(+), 8 deletions(-)
diff --git a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
index 513d80188a1fc0..e9fbc325b06e74 100644
--- a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
+++ b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
@@ -3454,7 +3454,6 @@ convertOmpTarget(Operation &opInst, llvm::IRBuilderBase &builder,
// Do privatization after moduleTranslation has already recorded
// mapped values.
if (!targetOp.getPrivateVars().empty()) {
- auto oldIP = builder.saveIP();
builder.restoreIP(allocaIP);
OperandRange privateVars = targetOp.getPrivateVars();
@@ -3480,11 +3479,11 @@ convertOmpTarget(Operation &opInst, llvm::IRBuilderBase &builder,
" private allocatables is not supported yet");
bodyGenStatus = failure();
} else {
- omp::PrivateClauseOp clonedPrivatizer =
- clonePrivatizer(moduleTranslation, privatizer, &opInst);
- Region &allocRegion = clonedPrivatizer.getAllocRegion();
+ llvm::errs() << "here\n";
+ Region &allocRegion = privatizer.getAllocRegion();
BlockArgument allocRegionArg = allocRegion.getArgument(0);
- replaceAllUsesInRegionWith(allocRegionArg, privVar, allocRegion);
+ moduleTranslation.mapValue(allocRegionArg,
+ moduleTranslation.lookupValue(privVar));
SmallVector<llvm::Value *, 1> yieldedValues;
if (failed(inlineConvertOmpRegions(
allocRegion, "omp.targetop.privatizer", builder,
@@ -3494,17 +3493,19 @@ convertOmpTarget(Operation &opInst, llvm::IRBuilderBase &builder,
"op in the target region");
bodyGenStatus = failure();
} else {
+ builder.GetInsertBlock()->getParent()->getParent()->dump();
assert(yieldedValues.size() == 1);
moduleTranslation.mapValue(privBlockArg, yieldedValues.front());
}
- clonedPrivatizer.erase();
- builder.restoreIP(oldIP);
+ moduleTranslation.forgetMapping(allocRegion);
+ builder.restoreIP(builder.saveIP());
}
}
}
- builder.restoreIP(codeGenIP);
+ llvm::errs() << builder.GetInsertBlock()->getName().str() << "\n";
llvm::BasicBlock *exitBlock = convertOmpOpRegions(
targetRegion, "omp.target", builder, moduleTranslation, bodyGenStatus);
+ builder.GetInsertBlock()->getParent()->dump();
builder.SetInsertPoint(exitBlock);
return builder.saveIP();
};
diff --git a/mlir/test/Target/LLVMIR/openmp-target-private.mlir b/mlir/test/Target/LLVMIR/openmp-target-private.mlir
index 8d5bac42852bb0..4ce10050657937 100644
--- a/mlir/test/Target/LLVMIR/openmp-target-private.mlir
+++ b/mlir/test/Target/LLVMIR/openmp-target-private.mlir
@@ -59,6 +59,7 @@ llvm.func @target_map_2_privates() attributes {fir.internal_name = "_QPtarget_ma
llvm.return
}
+
// CHECK: define internal void @__omp_offloading_fd00
// CHECK: %[[PRIV_I32_ALLOC:.*]] = alloca i32, i64 1, align 4
// CHECK: %[[PRIV_FLOAT_ALLOC:.*]] = alloca float, i64 1, align 4
@@ -68,3 +69,31 @@ llvm.func @target_map_2_privates() attributes {fir.internal_name = "_QPtarget_ma
// CHECK: %[[CAST_TO_FLOAT:.*]] = sitofp i32 %[[LOAD_I32_AGAIN]] to float
// CHECK: %[[ADD_FLOAT:.*]] = fadd contract float %[[CAST_TO_FLOAT]], 1.100000e+01
// CHECK: store float %[[ADD_FLOAT]], ptr %[[PRIV_FLOAT_ALLOC]], align 4
+
+// An entirely artifical privatizer that is meant to check multi-block
+// privatizers. The idea here is to prove that we set the correct
+// insertion points for the builder when generating, first, LLVM IR for the
+// privatizer and then for the actual target region.
+omp.private {type = private} @multi_block.privatizer : !llvm.ptr alloc {
+^bb0(%arg0: !llvm.ptr):
+ %c1 = llvm.mlir.constant(1 : i32) : i32
+ llvm.br ^bb1(%c1 : i32)
+
+^bb1(%arg1: i32):
+ %0 = llvm.alloca %arg1 x f32 : (i32) -> !llvm.ptr
+ omp.yield(%0 : !llvm.ptr)
+}
+
+llvm.func @target_op_private_multi_block(%arg0: !llvm.ptr) {
+ omp.target private(@multi_block.privatizer %arg0 -> %arg2 : !llvm.ptr) {
+ ^bb0(%arg2: !llvm.ptr):
+ %0 = llvm.load %arg2 : !llvm.ptr -> f32
+ omp.terminator
+ }
+ llvm.return
+}
+// CHECK: define internal void @__omp_offloading_fd00
+// CHECK: %[[ONE:.*]] = phi i32 [ 1, {{.*}} ]
+// CHECK: %[[PRIV_ALLOC:.*]] = alloca float, i32 %[[ONE]], align 4
+// CHECK: %[[PHI_ALLOCA:.*]] = phi ptr [ %[[PRIV_ALLOC]], {{.*}} ]
+// CHECK: %[[RESULT:.*]] = load float, ptr %[[PHI_ALLOCA]], align 4
>From 3f4ddcd4e171538e4d9b245b9f81f264b9461ce3 Mon Sep 17 00:00:00 2001
From: Pranav Bhandarkar <pranav.bhandarkar at amd.com>
Date: Wed, 25 Sep 2024 17:20:50 -0500
Subject: [PATCH 4/5] Remove some debug prints
---
.../LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp | 4 ----
1 file changed, 4 deletions(-)
diff --git a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
index e9fbc325b06e74..c2069cd61bae41 100644
--- a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
+++ b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
@@ -3479,7 +3479,6 @@ convertOmpTarget(Operation &opInst, llvm::IRBuilderBase &builder,
" private allocatables is not supported yet");
bodyGenStatus = failure();
} else {
- llvm::errs() << "here\n";
Region &allocRegion = privatizer.getAllocRegion();
BlockArgument allocRegionArg = allocRegion.getArgument(0);
moduleTranslation.mapValue(allocRegionArg,
@@ -3493,7 +3492,6 @@ convertOmpTarget(Operation &opInst, llvm::IRBuilderBase &builder,
"op in the target region");
bodyGenStatus = failure();
} else {
- builder.GetInsertBlock()->getParent()->getParent()->dump();
assert(yieldedValues.size() == 1);
moduleTranslation.mapValue(privBlockArg, yieldedValues.front());
}
@@ -3502,10 +3500,8 @@ convertOmpTarget(Operation &opInst, llvm::IRBuilderBase &builder,
}
}
}
- llvm::errs() << builder.GetInsertBlock()->getName().str() << "\n";
llvm::BasicBlock *exitBlock = convertOmpOpRegions(
targetRegion, "omp.target", builder, moduleTranslation, bodyGenStatus);
- builder.GetInsertBlock()->getParent()->dump();
builder.SetInsertPoint(exitBlock);
return builder.saveIP();
};
>From da04f27c815e716c17868db72a62a5b02d66ade1 Mon Sep 17 00:00:00 2001
From: Pranav Bhandarkar <pranav.bhandarkar at amd.com>
Date: Mon, 30 Sep 2024 11:48:24 -0500
Subject: [PATCH 5/5] Make check for offloaded function name portable across
user platforms
---
mlir/test/Target/LLVMIR/openmp-target-private.mlir | 6 +++---
1 file changed, 3 insertions(+), 3 deletions(-)
diff --git a/mlir/test/Target/LLVMIR/openmp-target-private.mlir b/mlir/test/Target/LLVMIR/openmp-target-private.mlir
index 4ce10050657937..6480d4e2bff0b1 100644
--- a/mlir/test/Target/LLVMIR/openmp-target-private.mlir
+++ b/mlir/test/Target/LLVMIR/openmp-target-private.mlir
@@ -23,7 +23,7 @@ llvm.func @target_map_single_private() attributes {fir.internal_name = "_QPtarge
}
llvm.return
}
-// CHECK: define internal void @__omp_offloading_fd00
+// CHECK: define internal void @__omp_offloading_
// CHECK-NOT: define {{.*}}
// CHECK: %[[PRIV_ALLOC:.*]] = alloca i32, i64 1, align 4
// CHECK: %[[ADD:.*]] = add i32 {{.*}}, 10
@@ -60,7 +60,7 @@ llvm.func @target_map_2_privates() attributes {fir.internal_name = "_QPtarget_ma
}
-// CHECK: define internal void @__omp_offloading_fd00
+// CHECK: define internal void @__omp_offloading_
// CHECK: %[[PRIV_I32_ALLOC:.*]] = alloca i32, i64 1, align 4
// CHECK: %[[PRIV_FLOAT_ALLOC:.*]] = alloca float, i64 1, align 4
// CHECK: %[[ADD_I32:.*]] = add i32 {{.*}}, 10
@@ -92,7 +92,7 @@ llvm.func @target_op_private_multi_block(%arg0: !llvm.ptr) {
}
llvm.return
}
-// CHECK: define internal void @__omp_offloading_fd00
+// CHECK: define internal void @__omp_offloading_
// CHECK: %[[ONE:.*]] = phi i32 [ 1, {{.*}} ]
// CHECK: %[[PRIV_ALLOC:.*]] = alloca float, i32 %[[ONE]], align 4
// CHECK: %[[PHI_ALLOCA:.*]] = phi ptr [ %[[PRIV_ALLOC]], {{.*}} ]
More information about the Mlir-commits
mailing list