[Mlir-commits] [mlir] [OpenMP][mlir] Add Groupprivate op in omp dialect. (PR #162704)
llvmlistbot at llvm.org
llvmlistbot at llvm.org
Sat Oct 25 03:02:20 PDT 2025
https://github.com/skc7 updated https://github.com/llvm/llvm-project/pull/162704
>From f6d0c6ec85fee75ce655d9e7909df2aa9123c7c7 Mon Sep 17 00:00:00 2001
From: skc7 <Krishna.Sankisa at amd.com>
Date: Fri, 26 Sep 2025 10:06:26 +0530
Subject: [PATCH 1/5] [OpenMP][mlir] Add Groupprivate op in omp dialect.
---
mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td | 27 ++++++++++++++
.../OpenMP/OpenMPToLLVMIRTranslation.cpp | 37 +++++++++++++++++++
mlir/test/Dialect/OpenMP/ops.mlir | 20 ++++++++++
3 files changed, 84 insertions(+)
diff --git a/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td b/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td
index 377f1febf6b8f..2846df0e37980 100644
--- a/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td
+++ b/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td
@@ -2224,4 +2224,31 @@ def WorkdistributeOp : OpenMP_Op<"workdistribute"> {
let assemblyFormat = "$region attr-dict";
}
+//===----------------------------------------------------------------------===//
+// [6.0] groupprivate Directive
+//===----------------------------------------------------------------------===//
+
+def GroupprivateOp : OpenMP_Op<"groupprivate",
+ [AllTypesMatch<["sym_addr", "gp_addr"]>]> {
+ let summary = "groupprivate directive";
+ let description = [{
+ The groupprivate directive specifies that variables are replicated, with
+ each group having its own copy.
+
+ This operation takes in the address of a symbol that represents the original
+ variable and returns the address of its groupprivate copy. All occurrences of
+ groupprivate variables in a parallel region should use the groupprivate copy
+ returned by this operation.
+
+ The `sym_addr` refers to the address of the symbol, which is a pointer to
+ the original variable.
+ }];
+
+ let arguments = (ins OpenMP_PointerLikeType:$sym_addr);
+ let results = (outs OpenMP_PointerLikeType:$gp_addr);
+ let assemblyFormat = [{
+ $sym_addr `:` type($sym_addr) `->` type($gp_addr) attr-dict
+ }];
+}
+
#endif // OPENMP_OPS
diff --git a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
index f28454075f1d3..9ed0addaa31e7 100644
--- a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
+++ b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
@@ -6128,6 +6128,40 @@ convertTargetFreeMemOp(Operation &opInst, llvm::IRBuilderBase &builder,
return success();
}
+/// Converts an OpenMP Groupprivate operation into LLVM IR.
+static LogicalResult
+convertOmpGroupprivate(Operation &opInst, llvm::IRBuilderBase &builder,
+ LLVM::ModuleTranslation &moduleTranslation) {
+ llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
+ llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder();
+ auto groupprivateOp = cast<omp::GroupprivateOp>(opInst);
+
+ if (failed(checkImplementationStatus(opInst)))
+ return failure();
+
+ Value symAddr = groupprivateOp.getSymAddr();
+ auto *symOp = symAddr.getDefiningOp();
+
+ if (auto asCast = dyn_cast<LLVM::AddrSpaceCastOp>(symOp))
+ symOp = asCast.getOperand().getDefiningOp();
+
+ if (!isa<LLVM::AddressOfOp>(symOp))
+ return opInst.emitError("Addressing symbol not found");
+ LLVM::AddressOfOp addressOfOp = dyn_cast<LLVM::AddressOfOp>(symOp);
+
+ LLVM::GlobalOp global =
+ addressOfOp.getGlobal(moduleTranslation.symbolTable());
+ llvm::GlobalValue *globalValue = moduleTranslation.lookupGlobal(global);
+
+ if (!ompBuilder->Config.isTargetDevice()) {
+ llvm_unreachable("NYI");
+ } else {
+ moduleTranslation.mapValue(opInst.getResult(0), globalValue);
+ }
+
+ return success();
+}
+
/// Given an OpenMP MLIR operation, create the corresponding LLVM IR (including
/// OpenMP runtime calls).
static LogicalResult
@@ -6311,6 +6345,9 @@ convertHostOrTargetOperation(Operation *op, llvm::IRBuilderBase &builder,
.Case([&](omp::TargetFreeMemOp) {
return convertTargetFreeMemOp(*op, builder, moduleTranslation);
})
+ .Case([&](omp::GroupprivateOp) {
+ return convertOmpGroupprivate(*op, builder, moduleTranslation);
+ })
.Default([&](Operation *inst) {
return inst->emitError()
<< "not yet implemented: " << inst->getName();
diff --git a/mlir/test/Dialect/OpenMP/ops.mlir b/mlir/test/Dialect/OpenMP/ops.mlir
index ac29e20907b55..0d070923cb157 100644
--- a/mlir/test/Dialect/OpenMP/ops.mlir
+++ b/mlir/test/Dialect/OpenMP/ops.mlir
@@ -3367,3 +3367,23 @@ func.func @omp_target_map_clause_type_test(%arg0 : memref<?xi32>) -> () {
return
}
+
+
+// CHECK-LABEL: func.func @omp_groupprivate
+llvm.mlir.global internal @_QFgpEx() : i32
+func.func @omp_groupprivate() {
+ %0 = arith.constant 1 : i32
+ %1 = arith.constant 2 : i32
+ // CHECK: [[ARG0:%.*]] = llvm.mlir.addressof @_QFgpEx : !llvm.ptr
+ %global_addr = llvm.mlir.addressof @_QFgpEx : !llvm.ptr
+ omp.teams {
+ // CHECK: {{.*}} = omp.groupprivate [[ARG0]] : !llvm.ptr -> !llvm.ptr
+ %group_private_addr_in_teams = omp.groupprivate %global_addr : !llvm.ptr -> !llvm.ptr
+ llvm.store %0, %group_private_addr_in_teams : i32, !llvm.ptr
+ omp.terminator
+ }
+ // CHECK: {{.*}} = omp.groupprivate [[ARG0]] : !llvm.ptr -> !llvm.ptr
+ %group_private_addr_after_teams = omp.groupprivate %global_addr : !llvm.ptr -> !llvm.ptr
+ llvm.store %1, %group_private_addr_after_teams : i32, !llvm.ptr
+ return
+}
>From b2772c44fa24d9da47c13a5b97e89bc3c4297608 Mon Sep 17 00:00:00 2001
From: skc7 <Krishna.Sankisa at amd.com>
Date: Fri, 10 Oct 2025 22:17:49 +0530
Subject: [PATCH 2/5] update llvmir lowering and tests
---
.../OpenMP/OpenMPToLLVMIRTranslation.cpp | 25 ++++++++-----
mlir/test/Target/LLVMIR/openmp-llvm.mlir | 36 +++++++++++++++++++
2 files changed, 52 insertions(+), 9 deletions(-)
diff --git a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
index 9ed0addaa31e7..ff9d5f3ae1fe9 100644
--- a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
+++ b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
@@ -6030,7 +6030,7 @@ static bool isTargetDeviceOp(Operation *op) {
// by taking it in as an operand, so we must always lower these in
// some manner or result in an ICE (whether they end up in a no-op
// or otherwise).
- if (mlir::isa<omp::ThreadprivateOp>(op))
+ if (mlir::isa<omp::ThreadprivateOp, omp::GroupprivateOp>(op))
return true;
if (mlir::isa<omp::TargetAllocMemOp>(op) ||
@@ -6131,8 +6131,7 @@ convertTargetFreeMemOp(Operation &opInst, llvm::IRBuilderBase &builder,
/// Converts an OpenMP Groupprivate operation into LLVM IR.
static LogicalResult
convertOmpGroupprivate(Operation &opInst, llvm::IRBuilderBase &builder,
- LLVM::ModuleTranslation &moduleTranslation) {
- llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
+ LLVM::ModuleTranslation &moduleTranslation) {
llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder();
auto groupprivateOp = cast<omp::GroupprivateOp>(opInst);
@@ -6153,12 +6152,20 @@ convertOmpGroupprivate(Operation &opInst, llvm::IRBuilderBase &builder,
addressOfOp.getGlobal(moduleTranslation.symbolTable());
llvm::GlobalValue *globalValue = moduleTranslation.lookupGlobal(global);
- if (!ompBuilder->Config.isTargetDevice()) {
- llvm_unreachable("NYI");
- } else {
- moduleTranslation.mapValue(opInst.getResult(0), globalValue);
- }
-
+ // Get the size of the variable
+ llvm::Type *varType = globalValue->getValueType();
+ llvm::Module *llvmModule = moduleTranslation.getLLVMModule();
+ llvm::DataLayout DL = llvmModule->getDataLayout();
+ uint64_t typeSize = DL.getTypeAllocSize(varType);
+ // Call omp_alloc_shared to allocate memory for groupprivate variable.
+ llvm::FunctionCallee allocSharedFn = ompBuilder->getOrCreateRuntimeFunction(
+ *llvmModule, llvm::omp::OMPRTL___kmpc_alloc_shared);
+ // Call runtime to allocate shared memory for this group
+ llvm::Value *groupPrivatePtr =
+ builder.CreateCall(allocSharedFn, {builder.getInt64(typeSize)});
+ groupPrivatePtr =
+ builder.CreateBitCast(groupPrivatePtr, globalValue->getType());
+ moduleTranslation.mapValue(opInst.getResult(0), groupPrivatePtr);
return success();
}
diff --git a/mlir/test/Target/LLVMIR/openmp-llvm.mlir b/mlir/test/Target/LLVMIR/openmp-llvm.mlir
index 8bd33a382197e..d7c43a88349e3 100644
--- a/mlir/test/Target/LLVMIR/openmp-llvm.mlir
+++ b/mlir/test/Target/LLVMIR/openmp-llvm.mlir
@@ -3449,3 +3449,39 @@ llvm.func @nested_task_with_deps() {
// CHECK: ret void
// CHECK: }
+
+// -----
+
+// CHECK: @_QFsubEx = internal global i32 undef
+
+// CHECK-LABEL: @omp_groupprivate
+llvm.func @omp_groupprivate() {
+// CHECK: [[TMP1:%.*]] = call ptr @__kmpc_alloc_shared(i64 4)
+// CHECK: store i32 1, ptr [[TMP1]], align 4
+
+// CHECK-LABEL: omp.teams.region{{.*}}
+// CHECK: [[TMP2:%.*]] = call ptr @__kmpc_alloc_shared(i64 4)
+// CHECK: store i32 2, ptr [[TMP2]], align 4
+
+ %0 = llvm.mlir.constant(1 : i32) : i32
+ %1 = llvm.mlir.constant(2 : i32) : i32
+ %2 = llvm.mlir.constant(3 : i32) : i32
+
+ %3 = llvm.mlir.addressof @_QFsubEx : !llvm.ptr
+ %4 = omp.groupprivate %3 : !llvm.ptr -> !llvm.ptr
+
+ llvm.store %0, %4 : i32, !llvm.ptr
+
+ omp.teams {
+ %5 = omp.groupprivate %3 : !llvm.ptr -> !llvm.ptr
+ llvm.store %1, %5 : i32, !llvm.ptr
+ omp.terminator
+ }
+
+ llvm.store %2, %4 : i32, !llvm.ptr
+ llvm.return
+}
+
+llvm.mlir.global internal @_QFsubEx() : i32
+
+// -----
>From 33483d56cb31ad763ccb7ddfef037ac73f87648a Mon Sep 17 00:00:00 2001
From: skc7 <Krishna.Sankisa at amd.com>
Date: Fri, 24 Oct 2025 23:23:58 +0530
Subject: [PATCH 3/5] add device_type attr to groupprivate
---
mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td | 7 ++-
.../OpenMP/OpenMPToLLVMIRTranslation.cpp | 61 ++++++++++++++-----
mlir/test/Dialect/OpenMP/ops.mlir | 29 +++++----
3 files changed, 69 insertions(+), 28 deletions(-)
diff --git a/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td b/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td
index 2846df0e37980..a768f7bb5e9f0 100644
--- a/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td
+++ b/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td
@@ -2244,10 +2244,13 @@ def GroupprivateOp : OpenMP_Op<"groupprivate",
the original variable.
}];
- let arguments = (ins OpenMP_PointerLikeType:$sym_addr);
+ let arguments = (ins
+ OpenMP_PointerLikeType:$sym_addr,
+ OptionalAttr<DeclareTargetDeviceTypeAttr>:$device_type
+ );
let results = (outs OpenMP_PointerLikeType:$gp_addr);
let assemblyFormat = [{
- $sym_addr `:` type($sym_addr) `->` type($gp_addr) attr-dict
+ $sym_addr `:` type($sym_addr) ( `,` `device_type` $device_type^ )? `->` type($gp_addr) attr-dict
}];
}
diff --git a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
index ff9d5f3ae1fe9..73dd1fa866980 100644
--- a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
+++ b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
@@ -6137,6 +6137,28 @@ convertOmpGroupprivate(Operation &opInst, llvm::IRBuilderBase &builder,
if (failed(checkImplementationStatus(opInst)))
return failure();
+
+ bool isTargetDevice = ompBuilder->Config.isTargetDevice();
+ auto deviceType = groupprivateOp.getDeviceType();
+
+ // skip allocation based on device_type
+ bool shouldAllocate = true;
+ if (deviceType.has_value()) {
+ switch (*deviceType) {
+ case mlir::omp::DeclareTargetDeviceType::host:
+ // Only allocate on host
+ shouldAllocate = !isTargetDevice;
+ break;
+ case mlir::omp::DeclareTargetDeviceType::nohost:
+ // Only allocate on device
+ shouldAllocate = isTargetDevice;
+ break;
+ case mlir::omp::DeclareTargetDeviceType::any:
+ // Allocate on both
+ shouldAllocate = true;
+ break;
+ }
+ }
Value symAddr = groupprivateOp.getSymAddr();
auto *symOp = symAddr.getDefiningOp();
@@ -6151,21 +6173,32 @@ convertOmpGroupprivate(Operation &opInst, llvm::IRBuilderBase &builder,
LLVM::GlobalOp global =
addressOfOp.getGlobal(moduleTranslation.symbolTable());
llvm::GlobalValue *globalValue = moduleTranslation.lookupGlobal(global);
+ llvm::Value *resultPtr;
- // Get the size of the variable
- llvm::Type *varType = globalValue->getValueType();
- llvm::Module *llvmModule = moduleTranslation.getLLVMModule();
- llvm::DataLayout DL = llvmModule->getDataLayout();
- uint64_t typeSize = DL.getTypeAllocSize(varType);
- // Call omp_alloc_shared to allocate memory for groupprivate variable.
- llvm::FunctionCallee allocSharedFn = ompBuilder->getOrCreateRuntimeFunction(
- *llvmModule, llvm::omp::OMPRTL___kmpc_alloc_shared);
- // Call runtime to allocate shared memory for this group
- llvm::Value *groupPrivatePtr =
- builder.CreateCall(allocSharedFn, {builder.getInt64(typeSize)});
- groupPrivatePtr =
- builder.CreateBitCast(groupPrivatePtr, globalValue->getType());
- moduleTranslation.mapValue(opInst.getResult(0), groupPrivatePtr);
+ if (shouldAllocate) {
+ // Get the size of the variable
+ llvm::Type *varType = globalValue->getValueType();
+ llvm::Module *llvmModule = moduleTranslation.getLLVMModule();
+ llvm::DataLayout DL = llvmModule->getDataLayout();
+ uint64_t typeSize = DL.getTypeAllocSize(varType);
+ // Call omp_alloc_shared to allocate memory for groupprivate variable.
+ llvm::FunctionCallee allocSharedFn = ompBuilder->getOrCreateRuntimeFunction(
+ *llvmModule, llvm::omp::OMPRTL___kmpc_alloc_shared);
+ // Call runtime to allocate shared memory for this group
+ llvm::Value *groupPrivatePtr =
+ builder.CreateCall(allocSharedFn, {builder.getInt64(typeSize)});
+ resultPtr =
+ builder.CreateBitCast(groupPrivatePtr, globalValue->getType());
+ }
+ else {
+ // Use original global address when not allocating group-private storage
+ resultPtr = moduleTranslation.lookupValue(symAddr);
+ if (!resultPtr) {
+ // Fallback: create address-of for the global
+ resultPtr = builder.CreateBitCast(globalValue, globalValue->getType());
+ }
+ }
+ moduleTranslation.mapValue(opInst.getResult(0), resultPtr);
return success();
}
diff --git a/mlir/test/Dialect/OpenMP/ops.mlir b/mlir/test/Dialect/OpenMP/ops.mlir
index 0d070923cb157..78f210508c6d3 100644
--- a/mlir/test/Dialect/OpenMP/ops.mlir
+++ b/mlir/test/Dialect/OpenMP/ops.mlir
@@ -3368,22 +3368,27 @@ func.func @omp_target_map_clause_type_test(%arg0 : memref<?xi32>) -> () {
return
}
-
-// CHECK-LABEL: func.func @omp_groupprivate
-llvm.mlir.global internal @_QFgpEx() : i32
-func.func @omp_groupprivate() {
+// CHECK-LABEL: func.func @omp_groupprivate_device_type
+func.func @omp_groupprivate_device_type() {
%0 = arith.constant 1 : i32
%1 = arith.constant 2 : i32
// CHECK: [[ARG0:%.*]] = llvm.mlir.addressof @_QFgpEx : !llvm.ptr
%global_addr = llvm.mlir.addressof @_QFgpEx : !llvm.ptr
- omp.teams {
- // CHECK: {{.*}} = omp.groupprivate [[ARG0]] : !llvm.ptr -> !llvm.ptr
- %group_private_addr_in_teams = omp.groupprivate %global_addr : !llvm.ptr -> !llvm.ptr
- llvm.store %0, %group_private_addr_in_teams : i32, !llvm.ptr
- omp.terminator
- }
+
// CHECK: {{.*}} = omp.groupprivate [[ARG0]] : !llvm.ptr -> !llvm.ptr
- %group_private_addr_after_teams = omp.groupprivate %global_addr : !llvm.ptr -> !llvm.ptr
- llvm.store %1, %group_private_addr_after_teams : i32, !llvm.ptr
+ %group_private_addr = omp.groupprivate %global_addr : !llvm.ptr -> !llvm.ptr
+
+ // CHECK: {{.*}} = omp.groupprivate [[ARG0]] : !llvm.ptr, device_type (any) -> !llvm.ptr
+ %group_private_any = omp.groupprivate %global_addr : !llvm.ptr, device_type(any) -> !llvm.ptr
+ llvm.store %1, %group_private_any : i32, !llvm.ptr
+
+ // CHECK: {{.*}} = omp.groupprivate [[ARG0]] : !llvm.ptr, device_type (host) -> !llvm.ptr
+ %group_private_host = omp.groupprivate %global_addr : !llvm.ptr, device_type(host) -> !llvm.ptr
+ llvm.store %1, %group_private_host : i32, !llvm.ptr
+
+ // CHECK: {{.*}} = omp.groupprivate [[ARG0]] : !llvm.ptr, device_type (nohost) -> !llvm.ptr
+ %group_private_nohost = omp.groupprivate %global_addr : !llvm.ptr, device_type(nohost) -> !llvm.ptr
+ llvm.store %1, %group_private_nohost : i32, !llvm.ptr
+
return
}
>From a1b38513f92a8c652344e8ae6b65403df4137e69 Mon Sep 17 00:00:00 2001
From: skc7 <Krishna.Sankisa at amd.com>
Date: Sat, 25 Oct 2025 11:13:19 +0530
Subject: [PATCH 4/5] add llvm translation tests for groupprivate
---
.../OpenMP/OpenMPToLLVMIRTranslation.cpp | 12 ++--
mlir/test/Target/LLVMIR/openmp-llvm.mlir | 70 ++++++++++++++-----
2 files changed, 56 insertions(+), 26 deletions(-)
diff --git a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
index 73dd1fa866980..3e9b2bd3c2d8c 100644
--- a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
+++ b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
@@ -6137,13 +6137,13 @@ convertOmpGroupprivate(Operation &opInst, llvm::IRBuilderBase &builder,
if (failed(checkImplementationStatus(opInst)))
return failure();
-
+
bool isTargetDevice = ompBuilder->Config.isTargetDevice();
auto deviceType = groupprivateOp.getDeviceType();
-
+
// skip allocation based on device_type
bool shouldAllocate = true;
- if (deviceType.has_value()) {
+ if (deviceType.has_value()) {
switch (*deviceType) {
case mlir::omp::DeclareTargetDeviceType::host:
// Only allocate on host
@@ -6187,10 +6187,8 @@ convertOmpGroupprivate(Operation &opInst, llvm::IRBuilderBase &builder,
// Call runtime to allocate shared memory for this group
llvm::Value *groupPrivatePtr =
builder.CreateCall(allocSharedFn, {builder.getInt64(typeSize)});
- resultPtr =
- builder.CreateBitCast(groupPrivatePtr, globalValue->getType());
- }
- else {
+ resultPtr = builder.CreateBitCast(groupPrivatePtr, globalValue->getType());
+ } else {
// Use original global address when not allocating group-private storage
resultPtr = moduleTranslation.lookupValue(symAddr);
if (!resultPtr) {
diff --git a/mlir/test/Target/LLVMIR/openmp-llvm.mlir b/mlir/test/Target/LLVMIR/openmp-llvm.mlir
index d7c43a88349e3..2969676f1225d 100644
--- a/mlir/test/Target/LLVMIR/openmp-llvm.mlir
+++ b/mlir/test/Target/LLVMIR/openmp-llvm.mlir
@@ -3452,36 +3452,68 @@ llvm.func @nested_task_with_deps() {
// -----
-// CHECK: @_QFsubEx = internal global i32 undef
+module attributes {omp.is_target_device = false} {
+llvm.mlir.global internal @any() : i32
+llvm.mlir.global internal @host() : i32
+llvm.mlir.global internal @nohost() : i32
+llvm.func @omp_groupprivate_host() {
+ %0 = llvm.mlir.constant(1 : i32) : i32
+ %1 = llvm.mlir.addressof @any : !llvm.ptr
+ %2 = omp.groupprivate %1 : !llvm.ptr, device_type(any) -> !llvm.ptr
+ llvm.store %0, %2 : i32, !llvm.ptr
+
+ %3 = llvm.mlir.addressof @host : !llvm.ptr
+ %4 = omp.groupprivate %3 : !llvm.ptr, device_type(host) -> !llvm.ptr
+ llvm.store %0, %4 : i32, !llvm.ptr
+
+ %5 = llvm.mlir.addressof @nohost : !llvm.ptr
+ %6 = omp.groupprivate %5 : !llvm.ptr, device_type(nohost) -> !llvm.ptr
+ llvm.store %0, %6 : i32, !llvm.ptr
+ llvm.return
+}
+}
-// CHECK-LABEL: @omp_groupprivate
-llvm.func @omp_groupprivate() {
+// CHECK: @any = internal global i32 undef
+// CHECK: @host = internal global i32 undef
+// CHECK: @nohost = internal global i32 undef
+// CHECK-LABEL: @omp_groupprivate_host
// CHECK: [[TMP1:%.*]] = call ptr @__kmpc_alloc_shared(i64 4)
// CHECK: store i32 1, ptr [[TMP1]], align 4
-
-// CHECK-LABEL: omp.teams.region{{.*}}
// CHECK: [[TMP2:%.*]] = call ptr @__kmpc_alloc_shared(i64 4)
-// CHECK: store i32 2, ptr [[TMP2]], align 4
+// CHECK: store i32 1, ptr [[TMP2]], align 4
+// CHECK: store i32 1, ptr @nohost, align 4
- %0 = llvm.mlir.constant(1 : i32) : i32
- %1 = llvm.mlir.constant(2 : i32) : i32
- %2 = llvm.mlir.constant(3 : i32) : i32
+// -----
- %3 = llvm.mlir.addressof @_QFsubEx : !llvm.ptr
- %4 = omp.groupprivate %3 : !llvm.ptr -> !llvm.ptr
+module attributes {omp.is_target_device = true} {
+llvm.mlir.global internal @any() : i32
+llvm.mlir.global internal @host() : i32
+llvm.mlir.global internal @nohost() : i32
+llvm.func @omp_groupprivate_device() {
+ %0 = llvm.mlir.constant(1 : i32) : i32
+ %1 = llvm.mlir.addressof @any : !llvm.ptr
+ %2 = omp.groupprivate %1 : !llvm.ptr, device_type(any) -> !llvm.ptr
+ llvm.store %0, %2 : i32, !llvm.ptr
+ %3 = llvm.mlir.addressof @host : !llvm.ptr
+ %4 = omp.groupprivate %3 : !llvm.ptr, device_type(host) -> !llvm.ptr
llvm.store %0, %4 : i32, !llvm.ptr
- omp.teams {
- %5 = omp.groupprivate %3 : !llvm.ptr -> !llvm.ptr
- llvm.store %1, %5 : i32, !llvm.ptr
- omp.terminator
- }
-
- llvm.store %2, %4 : i32, !llvm.ptr
+ %5 = llvm.mlir.addressof @nohost : !llvm.ptr
+ %6 = omp.groupprivate %5 : !llvm.ptr, device_type(nohost) -> !llvm.ptr
+ llvm.store %0, %6 : i32, !llvm.ptr
llvm.return
}
+}
-llvm.mlir.global internal @_QFsubEx() : i32
+// CHECK: @any = internal global i32 undef
+// CHECK: @host = internal global i32 undef
+// CHECK: @nohost = internal global i32 undef
+// CHECK-LABEL: @omp_groupprivate_device
+// CHECK: [[TMP1:%.*]] = call ptr @__kmpc_alloc_shared(i64 4)
+// CHECK: store i32 1, ptr [[TMP1]], align 4
+// CHECK: store i32 1, ptr @host, align 4
+// CHECK: [[TMP2:%.*]] = call ptr @__kmpc_alloc_shared(i64 4)
+// CHECK: store i32 1, ptr [[TMP2]], align 4
// -----
>From 82f794bf31a0edec5683b9bfb57e0d48d772b6cf Mon Sep 17 00:00:00 2001
From: skc7 <Krishna.Sankisa at amd.com>
Date: Sat, 25 Oct 2025 15:30:46 +0530
Subject: [PATCH 5/5] add test for target groupprivate usage
---
mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td | 6 +--
mlir/test/Dialect/OpenMP/ops.mlir | 2 +
.../Target/LLVMIR/omptarget-groupprivate.mlir | 41 +++++++++++++++++++
3 files changed, 46 insertions(+), 3 deletions(-)
create mode 100644 mlir/test/Target/LLVMIR/omptarget-groupprivate.mlir
diff --git a/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td b/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td
index a768f7bb5e9f0..6922b29115078 100644
--- a/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td
+++ b/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td
@@ -2236,9 +2236,9 @@ def GroupprivateOp : OpenMP_Op<"groupprivate",
each group having its own copy.
This operation takes in the address of a symbol that represents the original
- variable and returns the address of its groupprivate copy. All occurrences of
- groupprivate variables in a parallel region should use the groupprivate copy
- returned by this operation.
+ variable, optional DeviceTypeAttr and returns the address of its groupprivate copy.
+ All occurrences of groupprivate variables in a parallel region should
+ use the groupprivate copy returned by this operation.
The `sym_addr` refers to the address of the symbol, which is a pointer to
the original variable.
diff --git a/mlir/test/Dialect/OpenMP/ops.mlir b/mlir/test/Dialect/OpenMP/ops.mlir
index 78f210508c6d3..637dbef20303b 100644
--- a/mlir/test/Dialect/OpenMP/ops.mlir
+++ b/mlir/test/Dialect/OpenMP/ops.mlir
@@ -3392,3 +3392,5 @@ func.func @omp_groupprivate_device_type() {
return
}
+
+llvm.mlir.global internal @_QFgpEx() : i32
diff --git a/mlir/test/Target/LLVMIR/omptarget-groupprivate.mlir b/mlir/test/Target/LLVMIR/omptarget-groupprivate.mlir
new file mode 100644
index 0000000000000..46e9639adcc06
--- /dev/null
+++ b/mlir/test/Target/LLVMIR/omptarget-groupprivate.mlir
@@ -0,0 +1,41 @@
+// RUN: mlir-translate -mlir-to-llvmir %s | FileCheck %s
+
+module attributes {omp.is_target_device = true, llvm.target_triple = "amdgcn-amd-amdhsa",
+ dlti.dl_spec = #dlti.dl_spec<#dlti.dl_entry<"dlti.alloca_memory_space", 5 : ui32>>} {
+ llvm.func @_QQmain() attributes {fir.bindc_name = "main"} {
+
+ %ga = llvm.mlir.addressof @global_a : !llvm.ptr
+ %map_a = omp.map.info var_ptr(%ga : !llvm.ptr, i32) map_clauses(tofrom) capture(ByCopy) -> !llvm.ptr {name = "i"}
+ omp.target map_entries(%map_a -> %arg1 : !llvm.ptr) {
+ %loaded = llvm.load %arg1 : !llvm.ptr -> i32
+
+ %any_addr = llvm.mlir.addressof @global_any : !llvm.ptr
+ %any_gp = omp.groupprivate %any_addr : !llvm.ptr, device_type(any) -> !llvm.ptr
+ llvm.store %loaded, %any_gp : i32, !llvm.ptr
+
+ %host_addr = llvm.mlir.addressof @global_host : !llvm.ptr
+ %host_gp = omp.groupprivate %host_addr : !llvm.ptr, device_type(host) -> !llvm.ptr
+ llvm.store %loaded, %host_gp : i32, !llvm.ptr
+
+ %nohost_addr = llvm.mlir.addressof @global_nohost : !llvm.ptr
+ %nohost_gp = omp.groupprivate %nohost_addr : !llvm.ptr, device_type(nohost) -> !llvm.ptr
+ llvm.store %loaded, %nohost_gp : i32, !llvm.ptr
+
+ omp.terminator
+ }
+ llvm.return
+ }
+ llvm.mlir.global internal @global_a() : i32
+ llvm.mlir.global internal @global_any() : i32
+ llvm.mlir.global internal @global_host() : i32
+ llvm.mlir.global internal @global_nohost() : i32
+}
+
+// CHECK: define {{.*}} amdgpu_kernel void @__omp_offloading_{{.*}}_{{.*}}__QQmain_{{.*}}(ptr %{{.*}}, ptr %{{.*}}) #{{[0-9]+}} {
+// CHECK-LABEL: omp.target:
+// CHECK-NEXT : %[[LOAD:.*]] = load i32, ptr %3, align 4
+// CHECK-NEXT : %[[ALLOC_any:.*]] = call ptr @__kmpc_alloc_shared(i64 4)
+// CHECK-NEXT : store i32 %[[LOAD]], ptr %[[ALLOC_any]], align 4
+// CHECK-NEXT : store i32 %[[LOAD]], ptr @global_host, align 4
+// CHECK-NEXT : %[[ALLOC_NOHOST:.*]] = call ptr @__kmpc_alloc_shared(i64 4)
+// CHECK-NEXT : store i32 %[[LOAD]], ptr %[[ALLOC_NOHOST]], align 4
More information about the Mlir-commits
mailing list