[Mlir-commits] [mlir] [OpenMP][mlir] Add Groupprivate op in omp dialect. (PR #162704)
llvmlistbot at llvm.org
llvmlistbot at llvm.org
Tue Apr 14 03:54:37 PDT 2026
https://github.com/skc7 updated https://github.com/llvm/llvm-project/pull/162704
>From 8b8fb9b4202b6f7c8b9d0674c720ffebb6784f64 Mon Sep 17 00:00:00 2001
From: skc7 <Krishna.Sankisa at amd.com>
Date: Fri, 26 Sep 2025 10:06:26 +0530
Subject: [PATCH 1/9] [OpenMP][mlir] Add Groupprivate op in omp dialect.
---
mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td | 30 ++++++
.../OpenMP/OpenMPToLLVMIRTranslation.cpp | 71 ++++++++++++
mlir/test/Dialect/OpenMP/ops.mlir | 36 +++++++
.../Target/LLVMIR/omptarget-groupprivate.mlir | 41 +++++++
mlir/test/Target/LLVMIR/openmp-llvm.mlir | 101 ++++++++++++++++++
5 files changed, 279 insertions(+)
create mode 100644 mlir/test/Target/LLVMIR/omptarget-groupprivate.mlir
diff --git a/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td b/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td
index 88c8ab4f6f949..a53134cb49ee2 100644
--- a/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td
+++ b/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td
@@ -2353,4 +2353,34 @@ def IteratorOp : OpenMP_Op<"iterator",
let hasVerifier = 1;
}
+//===----------------------------------------------------------------------===//
+// [6.0] groupprivate Directive
+//===----------------------------------------------------------------------===//
+
+def GroupprivateOp : OpenMP_Op<"groupprivate",
+ [AllTypesMatch<["sym_addr", "gp_addr"]>]> {
+ let summary = "groupprivate directive";
+ let description = [{
+ The groupprivate directive specifies that variables are replicated, with
+ each group having its own copy.
+
+ This operation takes in the address of a symbol that represents the original
+ variable, optional DeviceTypeAttr and returns the address of its groupprivate copy.
+ All occurrences of groupprivate variables in a parallel region should
+ use the groupprivate copy returned by this operation.
+
+ The `sym_addr` refers to the address of the symbol, which is a pointer to
+ the original variable.
+ }];
+
+ let arguments = (ins
+ OpenMP_PointerLikeType:$sym_addr,
+ OptionalAttr<DeclareTargetDeviceTypeAttr>:$device_type
+ );
+ let results = (outs OpenMP_PointerLikeType:$gp_addr);
+ let assemblyFormat = [{
+ $sym_addr `:` type($sym_addr) ( `,` `device_type` $device_type^ )? `->` type($gp_addr) attr-dict
+ }];
+}
+
#endif // OPENMP_OPS
diff --git a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
index 37b1a37c2e1a5..f70c86d530980 100644
--- a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
+++ b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
@@ -7465,6 +7465,74 @@ convertTargetFreeMemOp(Operation &opInst, llvm::IRBuilderBase &builder,
return success();
}
+/// Converts an OpenMP Groupprivate operation into LLVM IR.
+static LogicalResult
+convertOmpGroupprivate(Operation &opInst, llvm::IRBuilderBase &builder,
+ LLVM::ModuleTranslation &moduleTranslation) {
+ llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder();
+ auto groupprivateOp = cast<omp::GroupprivateOp>(opInst);
+
+ if (failed(checkImplementationStatus(opInst)))
+ return failure();
+
+ bool isTargetDevice = ompBuilder->Config.isTargetDevice();
+ auto deviceType = groupprivateOp.getDeviceType();
+
+ // skip allocation based on device_type
+ bool shouldAllocate = true;
+ if (deviceType.has_value()) {
+ switch (*deviceType) {
+ case mlir::omp::DeclareTargetDeviceType::host:
+ // Only allocate on host
+ shouldAllocate = !isTargetDevice;
+ break;
+ case mlir::omp::DeclareTargetDeviceType::nohost:
+ // Only allocate on device
+ shouldAllocate = isTargetDevice;
+ break;
+ case mlir::omp::DeclareTargetDeviceType::any:
+ // Allocate on both
+ shouldAllocate = true;
+ break;
+ }
+ }
+
+ Value symAddr = groupprivateOp.getSymAddr();
+ auto *symOp = symAddr.getDefiningOp();
+
+ if (auto asCast = dyn_cast<LLVM::AddrSpaceCastOp>(symOp))
+ symOp = asCast.getOperand().getDefiningOp();
+
+ if (!isa<LLVM::AddressOfOp>(symOp))
+ return opInst.emitError("Addressing symbol not found");
+ LLVM::AddressOfOp addressOfOp = dyn_cast<LLVM::AddressOfOp>(symOp);
+
+ LLVM::GlobalOp global =
+ addressOfOp.getGlobal(moduleTranslation.symbolTable());
+ llvm::GlobalValue *globalValue = moduleTranslation.lookupGlobal(global);
+ llvm::Value *resultPtr;
+
+ if (shouldAllocate) {
+ // Get the size of the variable
+ llvm::Type *varType = globalValue->getValueType();
+ llvm::Module *llvmModule = moduleTranslation.getLLVMModule();
+ llvm::DataLayout DL = llvmModule->getDataLayout();
+ uint64_t typeSize = DL.getTypeAllocSize(varType);
+ // Call omp_alloc_shared to allocate memory for groupprivate variable.
+ llvm::FunctionCallee allocSharedFn = ompBuilder->getOrCreateRuntimeFunction(
+ *llvmModule, llvm::omp::OMPRTL___kmpc_alloc_shared);
+ // Call runtime to allocate shared memory for this group
+ resultPtr = builder.CreateCall(allocSharedFn, {builder.getInt64(typeSize)});
+ } else {
+ // Use original global address when not allocating group-private storage
+ resultPtr = moduleTranslation.lookupValue(symAddr);
+ if (!resultPtr)
+ resultPtr = globalValue;
+ }
+ moduleTranslation.mapValue(opInst.getResult(0), resultPtr);
+ return success();
+}
+
/// Given an OpenMP MLIR operation, create the corresponding LLVM IR (including
/// OpenMP runtime calls).
LogicalResult OpenMPDialectLLVMIRTranslationInterface::convertOperation(
@@ -7660,6 +7728,9 @@ LogicalResult OpenMPDialectLLVMIRTranslationInterface::convertOperation(
.Case([&](omp::TargetFreeMemOp) {
return convertTargetFreeMemOp(*op, builder, moduleTranslation);
})
+ .Case([&](omp::GroupprivateOp) {
+ return convertOmpGroupprivate(*op, builder, moduleTranslation);
+ })
.Default([&](Operation *inst) {
return inst->emitError()
<< "not yet implemented: " << inst->getName();
diff --git a/mlir/test/Dialect/OpenMP/ops.mlir b/mlir/test/Dialect/OpenMP/ops.mlir
index ba329cc67bb14..87962e4228c24 100644
--- a/mlir/test/Dialect/OpenMP/ops.mlir
+++ b/mlir/test/Dialect/OpenMP/ops.mlir
@@ -3672,3 +3672,39 @@ func.func @omp_task_affinity_iterator_2d(%lb0 : index, %ub0 : index, %st0 : inde
return
}
+
+// CHECK-LABEL: func.func @omp_groupprivate_device_type
+func.func @omp_groupprivate_device_type() {
+ %0 = arith.constant 1 : i32
+ %1 = arith.constant 2 : i32
+ // CHECK: [[ARG0:%.*]] = llvm.mlir.addressof @gp : !llvm.ptr
+ %gp_addr = llvm.mlir.addressof @gp : !llvm.ptr
+ // CHECK: [[ARG1:%.*]] = llvm.mlir.addressof @any : !llvm.ptr
+ %any_addr = llvm.mlir.addressof @any : !llvm.ptr
+ // CHECK: [[ARG2:%.*]] = llvm.mlir.addressof @host : !llvm.ptr
+ %host_addr = llvm.mlir.addressof @host : !llvm.ptr
+ // CHECK: [[ARG3:%.*]] = llvm.mlir.addressof @nohost : !llvm.ptr
+ %nohost_addr = llvm.mlir.addressof @nohost : !llvm.ptr
+
+ // CHECK: {{.*}} = omp.groupprivate [[ARG0]] : !llvm.ptr -> !llvm.ptr
+ %group_private_addr = omp.groupprivate %gp_addr : !llvm.ptr -> !llvm.ptr
+
+ // CHECK: {{.*}} = omp.groupprivate [[ARG1]] : !llvm.ptr, device_type (any) -> !llvm.ptr
+ %group_private_any = omp.groupprivate %any_addr : !llvm.ptr, device_type(any) -> !llvm.ptr
+ llvm.store %1, %group_private_any : i32, !llvm.ptr
+
+ // CHECK: {{.*}} = omp.groupprivate [[ARG2]] : !llvm.ptr, device_type (host) -> !llvm.ptr
+ %group_private_host = omp.groupprivate %host_addr : !llvm.ptr, device_type(host) -> !llvm.ptr
+ llvm.store %1, %group_private_host : i32, !llvm.ptr
+
+ // CHECK: {{.*}} = omp.groupprivate [[ARG3]] : !llvm.ptr, device_type (nohost) -> !llvm.ptr
+ %group_private_nohost = omp.groupprivate %nohost_addr : !llvm.ptr, device_type(nohost) -> !llvm.ptr
+ llvm.store %1, %group_private_nohost : i32, !llvm.ptr
+
+ return
+}
+
+llvm.mlir.global internal @gp() : i32
+llvm.mlir.global internal @any() : i32
+llvm.mlir.global internal @host() : i32
+llvm.mlir.global internal @nohost() : i32
diff --git a/mlir/test/Target/LLVMIR/omptarget-groupprivate.mlir b/mlir/test/Target/LLVMIR/omptarget-groupprivate.mlir
new file mode 100644
index 0000000000000..46e9639adcc06
--- /dev/null
+++ b/mlir/test/Target/LLVMIR/omptarget-groupprivate.mlir
@@ -0,0 +1,41 @@
+// RUN: mlir-translate -mlir-to-llvmir %s | FileCheck %s
+
+module attributes {omp.is_target_device = true, llvm.target_triple = "amdgcn-amd-amdhsa",
+ dlti.dl_spec = #dlti.dl_spec<#dlti.dl_entry<"dlti.alloca_memory_space", 5 : ui32>>} {
+ llvm.func @_QQmain() attributes {fir.bindc_name = "main"} {
+
+ %ga = llvm.mlir.addressof @global_a : !llvm.ptr
+ %map_a = omp.map.info var_ptr(%ga : !llvm.ptr, i32) map_clauses(tofrom) capture(ByCopy) -> !llvm.ptr {name = "i"}
+ omp.target map_entries(%map_a -> %arg1 : !llvm.ptr) {
+ %loaded = llvm.load %arg1 : !llvm.ptr -> i32
+
+ %any_addr = llvm.mlir.addressof @global_any : !llvm.ptr
+ %any_gp = omp.groupprivate %any_addr : !llvm.ptr, device_type(any) -> !llvm.ptr
+ llvm.store %loaded, %any_gp : i32, !llvm.ptr
+
+ %host_addr = llvm.mlir.addressof @global_host : !llvm.ptr
+ %host_gp = omp.groupprivate %host_addr : !llvm.ptr, device_type(host) -> !llvm.ptr
+ llvm.store %loaded, %host_gp : i32, !llvm.ptr
+
+ %nohost_addr = llvm.mlir.addressof @global_nohost : !llvm.ptr
+ %nohost_gp = omp.groupprivate %nohost_addr : !llvm.ptr, device_type(nohost) -> !llvm.ptr
+ llvm.store %loaded, %nohost_gp : i32, !llvm.ptr
+
+ omp.terminator
+ }
+ llvm.return
+ }
+ llvm.mlir.global internal @global_a() : i32
+ llvm.mlir.global internal @global_any() : i32
+ llvm.mlir.global internal @global_host() : i32
+ llvm.mlir.global internal @global_nohost() : i32
+}
+
+// CHECK: define {{.*}} amdgpu_kernel void @__omp_offloading_{{.*}}_{{.*}}__QQmain_{{.*}}(ptr %{{.*}}, ptr %{{.*}}) #{{[0-9]+}} {
+// CHECK-LABEL: omp.target:
+// CHECK-NEXT : %[[LOAD:.*]] = load i32, ptr %3, align 4
+// CHECK-NEXT : %[[ALLOC_any:.*]] = call ptr @__kmpc_alloc_shared(i64 4)
+// CHECK-NEXT : store i32 %[[LOAD]], ptr %[[ALLOC_any]], align 4
+// CHECK-NEXT : store i32 %[[LOAD]], ptr @global_host, align 4
+// CHECK-NEXT : %[[ALLOC_NOHOST:.*]] = call ptr @__kmpc_alloc_shared(i64 4)
+// CHECK-NEXT : store i32 %[[LOAD]], ptr %[[ALLOC_NOHOST]], align 4
diff --git a/mlir/test/Target/LLVMIR/openmp-llvm.mlir b/mlir/test/Target/LLVMIR/openmp-llvm.mlir
index c5cdecd091770..84b5955a5627a 100644
--- a/mlir/test/Target/LLVMIR/openmp-llvm.mlir
+++ b/mlir/test/Target/LLVMIR/openmp-llvm.mlir
@@ -3623,3 +3623,104 @@ llvm.func @task_affinity_plain(%arr: !llvm.ptr {llvm.nocapture}) {
// CHECK: [[FLAGGEP:%.*]] = getelementptr inbounds nuw { i64, i64, i32 }, ptr [[ENTRY]], i32 0, i32 2
// CHECK: store i32 0, ptr [[FLAGGEP]]
// CHECK: call i32 @__kmpc_omp_reg_task_with_affinity{{.*}}i32 1, ptr [[AFFLIST]]
+
+// -----
+
+module attributes {omp.is_target_device = false} {
+llvm.mlir.global internal @any() : i32
+llvm.mlir.global internal @host() : i32
+llvm.mlir.global internal @nohost() : i32
+llvm.func @omp_groupprivate_host() {
+ %0 = llvm.mlir.constant(1 : i32) : i32
+ %1 = llvm.mlir.addressof @any : !llvm.ptr
+ %2 = omp.groupprivate %1 : !llvm.ptr, device_type(any) -> !llvm.ptr
+ llvm.store %0, %2 : i32, !llvm.ptr
+
+ %3 = llvm.mlir.addressof @host : !llvm.ptr
+ %4 = omp.groupprivate %3 : !llvm.ptr, device_type(host) -> !llvm.ptr
+ llvm.store %0, %4 : i32, !llvm.ptr
+
+ %5 = llvm.mlir.addressof @nohost : !llvm.ptr
+ %6 = omp.groupprivate %5 : !llvm.ptr, device_type(nohost) -> !llvm.ptr
+ llvm.store %0, %6 : i32, !llvm.ptr
+ llvm.return
+}
+}
+
+// CHECK: @any = internal global i32 undef
+// CHECK: @host = internal global i32 undef
+// CHECK: @nohost = internal global i32 undef
+// CHECK-LABEL: @omp_groupprivate_host
+// CHECK: [[TMP1:%.*]] = call ptr @__kmpc_alloc_shared(i64 4)
+// CHECK: store i32 1, ptr [[TMP1]], align 4
+// CHECK: [[TMP2:%.*]] = call ptr @__kmpc_alloc_shared(i64 4)
+// CHECK: store i32 1, ptr [[TMP2]], align 4
+// CHECK: store i32 1, ptr @nohost, align 4
+
+// -----
+
+module attributes {omp.is_target_device = true} {
+llvm.mlir.global internal @any() : i32
+llvm.mlir.global internal @host() : i32
+llvm.mlir.global internal @nohost() : i32
+llvm.func @omp_groupprivate_device() {
+ %0 = llvm.mlir.constant(1 : i32) : i32
+ %1 = llvm.mlir.addressof @any : !llvm.ptr
+ %2 = omp.groupprivate %1 : !llvm.ptr, device_type(any) -> !llvm.ptr
+ llvm.store %0, %2 : i32, !llvm.ptr
+
+ %3 = llvm.mlir.addressof @host : !llvm.ptr
+ %4 = omp.groupprivate %3 : !llvm.ptr, device_type(host) -> !llvm.ptr
+ llvm.store %0, %4 : i32, !llvm.ptr
+
+ %5 = llvm.mlir.addressof @nohost : !llvm.ptr
+ %6 = omp.groupprivate %5 : !llvm.ptr, device_type(nohost) -> !llvm.ptr
+ llvm.store %0, %6 : i32, !llvm.ptr
+ llvm.return
+}
+}
+
+// CHECK: @any = internal global i32 undef
+// CHECK: @host = internal global i32 undef
+// CHECK: @nohost = internal global i32 undef
+// CHECK-LABEL: @omp_groupprivate_device
+// CHECK: [[TMP1:%.*]] = call ptr @__kmpc_alloc_shared(i64 4)
+// CHECK: store i32 1, ptr [[TMP1]], align 4
+// CHECK: store i32 1, ptr @host, align 4
+// CHECK: [[TMP2:%.*]] = call ptr @__kmpc_alloc_shared(i64 4)
+// CHECK: store i32 1, ptr [[TMP2]], align 4
+
+// -----
+
+module attributes {omp.is_target_device = false} {
+llvm.mlir.global internal @any1() : i32
+llvm.mlir.global internal @host1() : i32
+llvm.mlir.global internal @nohost1() : i32
+llvm.func @omp_groupprivate_host() {
+ %0 = llvm.mlir.constant(1 : i32) : i32
+ %1 = llvm.mlir.addressof @any1 : !llvm.ptr
+ %2 = omp.groupprivate %1 : !llvm.ptr, device_type(any) -> !llvm.ptr
+ llvm.store %0, %2 : i32, !llvm.ptr
+
+ %3 = llvm.mlir.addressof @host1 : !llvm.ptr
+ %4 = omp.groupprivate %3 : !llvm.ptr, device_type(host) -> !llvm.ptr
+ llvm.store %0, %4 : i32, !llvm.ptr
+
+ %5 = llvm.mlir.addressof @nohost1 : !llvm.ptr
+ %6 = omp.groupprivate %5 : !llvm.ptr, device_type(nohost) -> !llvm.ptr
+ llvm.store %0, %6 : i32, !llvm.ptr
+ llvm.return
+}
+}
+
+// CHECK: @any1 = internal global i32 undef
+// CHECK: @host1 = internal global i32 undef
+// CHECK: @nohost1 = internal global i32 undef
+// CHECK-LABEL: @omp_groupprivate_host
+// CHECK: [[TMP1:%.*]] = call ptr @__kmpc_alloc_shared(i64 4)
+// CHECK: store i32 1, ptr [[TMP1]], align 4
+// CHECK: [[TMP2:%.*]] = call ptr @__kmpc_alloc_shared(i64 4)
+// CHECK: store i32 1, ptr [[TMP2]], align 4
+// CHECK: store i32 1, ptr @nohost1, align 4
+
+// -----
>From 1b3f3a413ea06a9585534cfba28e602ec507cef3 Mon Sep 17 00:00:00 2001
From: skc7 <Krishna.Sankisa at amd.com>
Date: Sun, 9 Nov 2025 09:14:30 +0530
Subject: [PATCH 2/9] Use getGlobalFromSymbol for threadprivate and
groupprivate
---
.../OpenMP/OpenMPToLLVMIRTranslation.cpp | 50 ++++++++++++-------
.../Target/LLVMIR/omptarget-groupprivate.mlir | 2 +-
2 files changed, 34 insertions(+), 18 deletions(-)
diff --git a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
index f70c86d530980..a0476ed935314 100644
--- a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
+++ b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
@@ -4515,6 +4515,26 @@ convertOmpCancellationPoint(omp::CancellationPointOp op,
return success();
}
+static LLVM::GlobalOp
+getGlobalFromSymbol(Operation *symOp,
+ LLVM::ModuleTranslation &moduleTranslation,
+ Operation *opInst) {
+
+ // Handle potential address space cast
+ if (auto asCast = dyn_cast<LLVM::AddrSpaceCastOp>(symOp))
+ symOp = asCast.getOperand().getDefiningOp();
+
+ // Check if we have an AddressOfOp
+ if (!isa<LLVM::AddressOfOp>(symOp)) {
+ if (opInst)
+ opInst->emitError("Addressing symbol not found");
+ return nullptr;
+ }
+
+ LLVM::AddressOfOp addressOfOp = cast<LLVM::AddressOfOp>(symOp);
+ return addressOfOp.getGlobal(moduleTranslation.symbolTable());
+}
+
/// Converts an OpenMP Threadprivate operation into LLVM IR using
/// OpenMPIRBuilder.
static LogicalResult
@@ -4530,15 +4550,10 @@ convertOmpThreadprivate(Operation &opInst, llvm::IRBuilderBase &builder,
Value symAddr = threadprivateOp.getSymAddr();
auto *symOp = symAddr.getDefiningOp();
- if (auto asCast = dyn_cast<LLVM::AddrSpaceCastOp>(symOp))
- symOp = asCast.getOperand().getDefiningOp();
-
- if (!isa<LLVM::AddressOfOp>(symOp))
- return opInst.emitError("Addressing symbol not found");
- LLVM::AddressOfOp addressOfOp = dyn_cast<LLVM::AddressOfOp>(symOp);
-
LLVM::GlobalOp global =
- addressOfOp.getGlobal(moduleTranslation.symbolTable());
+ getGlobalFromSymbol(symOp, moduleTranslation, &opInst);
+ if (!global)
+ return failure();
llvm::GlobalValue *globalValue = moduleTranslation.lookupGlobal(global);
llvm::Type *type = globalValue->getValueType();
llvm::TypeSize typeSize =
@@ -7498,17 +7513,13 @@ convertOmpGroupprivate(Operation &opInst, llvm::IRBuilderBase &builder,
}
Value symAddr = groupprivateOp.getSymAddr();
- auto *symOp = symAddr.getDefiningOp();
-
- if (auto asCast = dyn_cast<LLVM::AddrSpaceCastOp>(symOp))
- symOp = asCast.getOperand().getDefiningOp();
-
- if (!isa<LLVM::AddressOfOp>(symOp))
- return opInst.emitError("Addressing symbol not found");
- LLVM::AddressOfOp addressOfOp = dyn_cast<LLVM::AddressOfOp>(symOp);
+ Operation *symOp = symAddr.getDefiningOp();
LLVM::GlobalOp global =
- addressOfOp.getGlobal(moduleTranslation.symbolTable());
+ getGlobalFromSymbol(symOp, moduleTranslation, &opInst);
+ if (!global)
+ return failure();
+
llvm::GlobalValue *globalValue = moduleTranslation.lookupGlobal(global);
llvm::Value *resultPtr;
@@ -7529,6 +7540,11 @@ convertOmpGroupprivate(Operation &opInst, llvm::IRBuilderBase &builder,
if (!resultPtr)
resultPtr = globalValue;
}
+
+ llvm::Type *ptrTy = builder.getPtrTy();
+ if (resultPtr->getType() != ptrTy)
+ resultPtr = builder.CreateBitCast(resultPtr, ptrTy);
+
moduleTranslation.mapValue(opInst.getResult(0), resultPtr);
return success();
}
diff --git a/mlir/test/Target/LLVMIR/omptarget-groupprivate.mlir b/mlir/test/Target/LLVMIR/omptarget-groupprivate.mlir
index 46e9639adcc06..f6b37e6446fe7 100644
--- a/mlir/test/Target/LLVMIR/omptarget-groupprivate.mlir
+++ b/mlir/test/Target/LLVMIR/omptarget-groupprivate.mlir
@@ -33,7 +33,7 @@ module attributes {omp.is_target_device = true, llvm.target_triple = "amdgcn-amd
// CHECK: define {{.*}} amdgpu_kernel void @__omp_offloading_{{.*}}_{{.*}}__QQmain_{{.*}}(ptr %{{.*}}, ptr %{{.*}}) #{{[0-9]+}} {
// CHECK-LABEL: omp.target:
-// CHECK-NEXT : %[[LOAD:.*]] = load i32, ptr %3, align 4
+// CHECK-NEXT : %[[LOAD:.*]] = load i32, ptr %{{.*}}, align 4
// CHECK-NEXT : %[[ALLOC_any:.*]] = call ptr @__kmpc_alloc_shared(i64 4)
// CHECK-NEXT : store i32 %[[LOAD]], ptr %[[ALLOC_any]], align 4
// CHECK-NEXT : store i32 %[[LOAD]], ptr @global_host, align 4
>From 3f79cb05c05581f800e1acc1739b3673d3dd23f6 Mon Sep 17 00:00:00 2001
From: skc7 <Krishna.Sankisa at amd.com>
Date: Fri, 28 Nov 2025 21:28:56 +0530
Subject: [PATCH 3/9] Remove bitcast for resultPtr
---
.../LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp | 4 ----
1 file changed, 4 deletions(-)
diff --git a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
index a0476ed935314..acee82e7954da 100644
--- a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
+++ b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
@@ -7541,10 +7541,6 @@ convertOmpGroupprivate(Operation &opInst, llvm::IRBuilderBase &builder,
resultPtr = globalValue;
}
- llvm::Type *ptrTy = builder.getPtrTy();
- if (resultPtr->getType() != ptrTy)
- resultPtr = builder.CreateBitCast(resultPtr, ptrTy);
-
moduleTranslation.mapValue(opInst.getResult(0), resultPtr);
return success();
}
>From cacc5b3e4d18301291a69b581bb7e7dce2d109a3 Mon Sep 17 00:00:00 2001
From: skc7 <Krishna.Sankisa at amd.com>
Date: Mon, 1 Dec 2025 12:41:50 +0530
Subject: [PATCH 4/9] Use llvm addrspace(3) globals for amdgpu and nvptx
grouppriavte lowering
---
.../OpenMP/OpenMPToLLVMIRTranslation.cpp | 40 +++++++----
.../Target/LLVMIR/omptarget-groupprivate.mlir | 12 ++--
mlir/test/Target/LLVMIR/openmp-llvm.mlir | 71 +++++--------------
3 files changed, 53 insertions(+), 70 deletions(-)
diff --git a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
index acee82e7954da..efff0865d7b2f 100644
--- a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
+++ b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
@@ -7524,21 +7524,33 @@ convertOmpGroupprivate(Operation &opInst, llvm::IRBuilderBase &builder,
llvm::Value *resultPtr;
if (shouldAllocate) {
- // Get the size of the variable
- llvm::Type *varType = globalValue->getValueType();
- llvm::Module *llvmModule = moduleTranslation.getLLVMModule();
- llvm::DataLayout DL = llvmModule->getDataLayout();
- uint64_t typeSize = DL.getTypeAllocSize(varType);
- // Call omp_alloc_shared to allocate memory for groupprivate variable.
- llvm::FunctionCallee allocSharedFn = ompBuilder->getOrCreateRuntimeFunction(
- *llvmModule, llvm::omp::OMPRTL___kmpc_alloc_shared);
- // Call runtime to allocate shared memory for this group
- resultPtr = builder.CreateCall(allocSharedFn, {builder.getInt64(typeSize)});
- } else {
- // Use original global address when not allocating group-private storage
- resultPtr = moduleTranslation.lookupValue(symAddr);
- if (!resultPtr)
+ if (isTargetDevice) {
+ // Get the size of the variable
+ llvm::Type *varType = globalValue->getValueType();
+ llvm::Module *llvmModule = moduleTranslation.getLLVMModule();
+ // Create a llvm global variable in shared memory
+ llvm::Triple targetTriple = llvm::Triple(llvmModule->getTargetTriple());
+ if (targetTriple.isAMDGCN() || targetTriple.isNVPTX()) {
+ // Shared address space is 3 for amdgpu and nvptx targets.
+ unsigned sharedAddressSpace = 3;
+ llvm::GlobalVariable *sharedVar = new llvm::GlobalVariable(
+ *llvmModule, varType, false, llvm::GlobalValue::InternalLinkage,
+ llvm::PoisonValue::get(varType), globalValue->getName(), nullptr,
+ llvm::GlobalValue::NotThreadLocal, sharedAddressSpace, false);
+ resultPtr = sharedVar;
+ } else {
+ return opInst.emitError()
+ << "Groupprivate operation is not supported for this target: "
+ << targetTriple.str();
+ }
+ } else {
+ // Use original global address when allocating on host device.
+ // TODO: Add support for allocating group-private storage on host device.
resultPtr = globalValue;
+ }
+ } else {
+ // Use original global address when not allocating group-private storage.
+ resultPtr = globalValue;
}
moduleTranslation.mapValue(opInst.getResult(0), resultPtr);
diff --git a/mlir/test/Target/LLVMIR/omptarget-groupprivate.mlir b/mlir/test/Target/LLVMIR/omptarget-groupprivate.mlir
index f6b37e6446fe7..bdbe6d11d9957 100644
--- a/mlir/test/Target/LLVMIR/omptarget-groupprivate.mlir
+++ b/mlir/test/Target/LLVMIR/omptarget-groupprivate.mlir
@@ -31,11 +31,15 @@ module attributes {omp.is_target_device = true, llvm.target_triple = "amdgcn-amd
llvm.mlir.global internal @global_nohost() : i32
}
+// CHECK-DAG: @global_a = internal global i32 undef
+// CHECK-DAG: @global_any = internal global i32 undef
+// CHECK-DAG: @global_host = internal global i32 undef
+// CHECK-DAG: @global_nohost = internal global i32 undef
+// CHECK-DAG: {{.*}} = internal addrspace(3) global i32 poison
+// CHECK-DAG: {{.*}} = internal addrspace(3) global i32 poison
// CHECK: define {{.*}} amdgpu_kernel void @__omp_offloading_{{.*}}_{{.*}}__QQmain_{{.*}}(ptr %{{.*}}, ptr %{{.*}}) #{{[0-9]+}} {
// CHECK-LABEL: omp.target:
// CHECK-NEXT : %[[LOAD:.*]] = load i32, ptr %{{.*}}, align 4
-// CHECK-NEXT : %[[ALLOC_any:.*]] = call ptr @__kmpc_alloc_shared(i64 4)
-// CHECK-NEXT : store i32 %[[LOAD]], ptr %[[ALLOC_any]], align 4
+// CHECK-NEXT : store i32 %[[LOAD]], ptr addrspace(3) {{.*}}, align 4
// CHECK-NEXT : store i32 %[[LOAD]], ptr @global_host, align 4
-// CHECK-NEXT : %[[ALLOC_NOHOST:.*]] = call ptr @__kmpc_alloc_shared(i64 4)
-// CHECK-NEXT : store i32 %[[LOAD]], ptr %[[ALLOC_NOHOST]], align 4
+// CHECK-NEXT : store i32 %[[LOAD]], ptr addrspace(3) {{.*}}, align 4
diff --git a/mlir/test/Target/LLVMIR/openmp-llvm.mlir b/mlir/test/Target/LLVMIR/openmp-llvm.mlir
index 84b5955a5627a..2e2d7c9ab9d7e 100644
--- a/mlir/test/Target/LLVMIR/openmp-llvm.mlir
+++ b/mlir/test/Target/LLVMIR/openmp-llvm.mlir
@@ -3626,40 +3626,7 @@ llvm.func @task_affinity_plain(%arr: !llvm.ptr {llvm.nocapture}) {
// -----
-module attributes {omp.is_target_device = false} {
-llvm.mlir.global internal @any() : i32
-llvm.mlir.global internal @host() : i32
-llvm.mlir.global internal @nohost() : i32
-llvm.func @omp_groupprivate_host() {
- %0 = llvm.mlir.constant(1 : i32) : i32
- %1 = llvm.mlir.addressof @any : !llvm.ptr
- %2 = omp.groupprivate %1 : !llvm.ptr, device_type(any) -> !llvm.ptr
- llvm.store %0, %2 : i32, !llvm.ptr
-
- %3 = llvm.mlir.addressof @host : !llvm.ptr
- %4 = omp.groupprivate %3 : !llvm.ptr, device_type(host) -> !llvm.ptr
- llvm.store %0, %4 : i32, !llvm.ptr
-
- %5 = llvm.mlir.addressof @nohost : !llvm.ptr
- %6 = omp.groupprivate %5 : !llvm.ptr, device_type(nohost) -> !llvm.ptr
- llvm.store %0, %6 : i32, !llvm.ptr
- llvm.return
-}
-}
-
-// CHECK: @any = internal global i32 undef
-// CHECK: @host = internal global i32 undef
-// CHECK: @nohost = internal global i32 undef
-// CHECK-LABEL: @omp_groupprivate_host
-// CHECK: [[TMP1:%.*]] = call ptr @__kmpc_alloc_shared(i64 4)
-// CHECK: store i32 1, ptr [[TMP1]], align 4
-// CHECK: [[TMP2:%.*]] = call ptr @__kmpc_alloc_shared(i64 4)
-// CHECK: store i32 1, ptr [[TMP2]], align 4
-// CHECK: store i32 1, ptr @nohost, align 4
-
-// -----
-
-module attributes {omp.is_target_device = true} {
+module attributes {omp.is_target_device = true, llvm.target_triple = "nvptx64-nvidia-cuda"} {
llvm.mlir.global internal @any() : i32
llvm.mlir.global internal @host() : i32
llvm.mlir.global internal @nohost() : i32
@@ -3680,15 +3647,16 @@ llvm.func @omp_groupprivate_device() {
}
}
-// CHECK: @any = internal global i32 undef
-// CHECK: @host = internal global i32 undef
-// CHECK: @nohost = internal global i32 undef
-// CHECK-LABEL: @omp_groupprivate_device
-// CHECK: [[TMP1:%.*]] = call ptr @__kmpc_alloc_shared(i64 4)
-// CHECK: store i32 1, ptr [[TMP1]], align 4
-// CHECK: store i32 1, ptr @host, align 4
-// CHECK: [[TMP2:%.*]] = call ptr @__kmpc_alloc_shared(i64 4)
-// CHECK: store i32 1, ptr [[TMP2]], align 4
+// CHECK-DAG: @any = internal global i32 undef
+// CHECK-DAG: @host = internal global i32 undef
+// CHECK-DAG: @nohost = internal global i32 undef
+// CHECK-DAG: {{.*}} = internal addrspace(3) global i32 poison
+// CHECK-DAG: {{.*}} = internal addrspace(3) global i32 poison
+// CHECK-LABEL: define void @omp_groupprivate_device()
+// CHECK: store i32 1, ptr addrspace(3) {{.*}}, align 4
+// CHECK: store i32 1, ptr @host, align 4
+// CHECK: store i32 1, ptr addrspace(3) {{.*}}, align 4
+// CHECK: ret void
// -----
@@ -3713,14 +3681,13 @@ llvm.func @omp_groupprivate_host() {
}
}
-// CHECK: @any1 = internal global i32 undef
-// CHECK: @host1 = internal global i32 undef
-// CHECK: @nohost1 = internal global i32 undef
-// CHECK-LABEL: @omp_groupprivate_host
-// CHECK: [[TMP1:%.*]] = call ptr @__kmpc_alloc_shared(i64 4)
-// CHECK: store i32 1, ptr [[TMP1]], align 4
-// CHECK: [[TMP2:%.*]] = call ptr @__kmpc_alloc_shared(i64 4)
-// CHECK: store i32 1, ptr [[TMP2]], align 4
-// CHECK: store i32 1, ptr @nohost1, align 4
+// CHECK-DAG: @any1 = internal global i32 undef
+// CHECK-DAG: @host1 = internal global i32 undef
+// CHECK-DAG: @nohost1 = internal global i32 undef
+// CHECK-LABEL: define void @omp_groupprivate_host()
+// CHECK: store i32 1, ptr @any1, align 4
+// CHECK: store i32 1, ptr @host1, align 4
+// CHECK: store i32 1, ptr @nohost1, align 4
+// CHECK: ret void
// -----
>From e1b6d9cd61ad8e4c5492fb50380f71bdc17b000d Mon Sep 17 00:00:00 2001
From: skc7 <Krishna.Sankisa at amd.com>
Date: Mon, 12 Jan 2026 19:55:27 +0530
Subject: [PATCH 5/9] Fix the llvmir translation by getting valid type from
global
---
mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td | 11 ++--
.../OpenMP/OpenMPToLLVMIRTranslation.cpp | 57 ++++++++++++-------
2 files changed, 43 insertions(+), 25 deletions(-)
diff --git a/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td b/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td
index a53134cb49ee2..83b9c940d90d7 100644
--- a/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td
+++ b/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td
@@ -2365,12 +2365,15 @@ def GroupprivateOp : OpenMP_Op<"groupprivate",
each group having its own copy.
This operation takes in the address of a symbol that represents the original
- variable, optional DeviceTypeAttr and returns the address of its groupprivate copy.
- All occurrences of groupprivate variables in a parallel region should
- use the groupprivate copy returned by this operation.
+ variable and returns the address of its groupprivate copy. The symbol must
+ refer to a global variable so that type information can be obtained from it.
The `sym_addr` refers to the address of the symbol, which is a pointer to
- the original variable.
+ the original variable. It must be obtained via `llvm.mlir.addressof` from
+ a global variable.
+
+ The optional `device_type` attribute specifies where the groupprivate
+ storage should be allocated (host, nohost, or any).
}];
let arguments = (ins
diff --git a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
index efff0865d7b2f..b92d3bf29d510 100644
--- a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
+++ b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
@@ -7493,7 +7493,7 @@ convertOmpGroupprivate(Operation &opInst, llvm::IRBuilderBase &builder,
bool isTargetDevice = ompBuilder->Config.isTargetDevice();
auto deviceType = groupprivateOp.getDeviceType();
- // skip allocation based on device_type
+ // Skip allocation based on device_type
bool shouldAllocate = true;
if (deviceType.has_value()) {
switch (*deviceType) {
@@ -7513,30 +7513,45 @@ convertOmpGroupprivate(Operation &opInst, llvm::IRBuilderBase &builder,
}
Value symAddr = groupprivateOp.getSymAddr();
- Operation *symOp = symAddr.getDefiningOp();
+ llvm::Value *symValue = moduleTranslation.lookupValue(symAddr);
+ llvm::Value *resultPtr;
- LLVM::GlobalOp global =
- getGlobalFromSymbol(symOp, moduleTranslation, &opInst);
- if (!global)
- return failure();
+ // Get the element type and variable name from the global.
+ // Groupprivate requires sym_addr to come from a global variable.
+ llvm::Type *varType = nullptr;
+ std::string varName = "omp.groupprivate";
+
+ if (Operation *symOp = symAddr.getDefiningOp()) {
+ if (LLVM::GlobalOp global =
+ getGlobalFromSymbol(symOp, moduleTranslation, nullptr)) {
+ // Get type from the global
+ varType = moduleTranslation.convertType(global.getType());
+ // Get name from the global
+ if (llvm::GlobalValue *globalValue =
+ moduleTranslation.lookupGlobal(global)) {
+ varName = globalValue->getName().str();
+ }
+ }
+ }
- llvm::GlobalValue *globalValue = moduleTranslation.lookupGlobal(global);
- llvm::Value *resultPtr;
+ if (!varType) {
+ return opInst.emitError()
+ << "Groupprivate requires sym_addr to reference a global variable";
+ }
if (shouldAllocate) {
if (isTargetDevice) {
- // Get the size of the variable
- llvm::Type *varType = globalValue->getValueType();
llvm::Module *llvmModule = moduleTranslation.getLLVMModule();
- // Create a llvm global variable in shared memory
llvm::Triple targetTriple = llvm::Triple(llvmModule->getTargetTriple());
if (targetTriple.isAMDGCN() || targetTriple.isNVPTX()) {
- // Shared address space is 3 for amdgpu and nvptx targets.
+ // Shared address space is 3 for AMDGPU and NVPTX targets.
unsigned sharedAddressSpace = 3;
llvm::GlobalVariable *sharedVar = new llvm::GlobalVariable(
- *llvmModule, varType, false, llvm::GlobalValue::InternalLinkage,
- llvm::PoisonValue::get(varType), globalValue->getName(), nullptr,
- llvm::GlobalValue::NotThreadLocal, sharedAddressSpace, false);
+ *llvmModule, varType, /*isConstant=*/false,
+ llvm::GlobalValue::InternalLinkage, llvm::PoisonValue::get(varType),
+ varName, /*InsertBefore=*/nullptr,
+ llvm::GlobalValue::NotThreadLocal, sharedAddressSpace,
+ /*isExternallyInitialized=*/false);
resultPtr = sharedVar;
} else {
return opInst.emitError()
@@ -7544,13 +7559,13 @@ convertOmpGroupprivate(Operation &opInst, llvm::IRBuilderBase &builder,
<< targetTriple.str();
}
} else {
- // Use original global address when allocating on host device.
+ // Use original address when allocating on host device.
// TODO: Add support for allocating group-private storage on host device.
- resultPtr = globalValue;
+ resultPtr = symValue;
}
} else {
- // Use original global address when not allocating group-private storage.
- resultPtr = globalValue;
+ // Use original address when not allocating group-private storage.
+ resultPtr = symValue;
}
moduleTranslation.mapValue(opInst.getResult(0), resultPtr);
@@ -7565,8 +7580,8 @@ LogicalResult OpenMPDialectLLVMIRTranslationInterface::convertOperation(
llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder();
if (ompBuilder->Config.isTargetDevice() &&
- !isa<omp::TargetOp, omp::MapInfoOp, omp::TerminatorOp, omp::YieldOp>(
- op) &&
+ !isa<omp::TargetOp, omp::MapInfoOp, omp::TerminatorOp, omp::YieldOp,
+ omp::GroupprivateOp>(op) &&
isHostDeviceOp(op))
return op->emitOpError() << "unsupported host op found in device";
>From c45176737d5ec6926fc6393c5e008d0555db191a Mon Sep 17 00:00:00 2001
From: skc7 <Krishna.Sankisa at amd.com>
Date: Wed, 8 Apr 2026 15:03:17 +0530
Subject: [PATCH 6/9] Change omp.groupprivate to take a symbol reference
---
mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td | 18 +--
mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp | 17 +++
.../OpenMP/OpenMPToLLVMIRTranslation.cpp | 142 +++++++-----------
mlir/test/Dialect/OpenMP/ops.mlir | 29 ++--
.../Target/LLVMIR/omptarget-groupprivate.mlir | 9 +-
mlir/test/Target/LLVMIR/openmp-llvm.mlir | 21 +--
6 files changed, 97 insertions(+), 139 deletions(-)
diff --git a/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td b/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td
index 83b9c940d90d7..913c3ff3bbecb 100644
--- a/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td
+++ b/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td
@@ -2357,33 +2357,29 @@ def IteratorOp : OpenMP_Op<"iterator",
// [6.0] groupprivate Directive
//===----------------------------------------------------------------------===//
-def GroupprivateOp : OpenMP_Op<"groupprivate",
- [AllTypesMatch<["sym_addr", "gp_addr"]>]> {
+def GroupprivateOp : OpenMP_Op<"groupprivate", [Pure]> {
let summary = "groupprivate directive";
let description = [{
The groupprivate directive specifies that variables are replicated, with
each group having its own copy.
- This operation takes in the address of a symbol that represents the original
- variable and returns the address of its groupprivate copy. The symbol must
- refer to a global variable so that type information can be obtained from it.
-
- The `sym_addr` refers to the address of the symbol, which is a pointer to
- the original variable. It must be obtained via `llvm.mlir.addressof` from
- a global variable.
+ This operation takes a symbol reference to a global variable and returns
+ the address of its groupprivate copy. The referenced symbol must exist and
+ must not be a function.
The optional `device_type` attribute specifies where the groupprivate
storage should be allocated (host, nohost, or any).
}];
let arguments = (ins
- OpenMP_PointerLikeType:$sym_addr,
+ FlatSymbolRefAttr:$sym_name,
OptionalAttr<DeclareTargetDeviceTypeAttr>:$device_type
);
let results = (outs OpenMP_PointerLikeType:$gp_addr);
let assemblyFormat = [{
- $sym_addr `:` type($sym_addr) ( `,` `device_type` $device_type^ )? `->` type($gp_addr) attr-dict
+ $sym_name `:` type($gp_addr) (`,` `device_type` $device_type^)? attr-dict
}];
+ let hasVerifier = 1;
}
#endif // OPENMP_OPS
diff --git a/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp b/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp
index 7cab929d583ca..89449e045b09a 100644
--- a/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp
+++ b/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp
@@ -4842,6 +4842,23 @@ LogicalResult IteratorOp::verify() {
return success();
}
+//===----------------------------------------------------------------------===//
+// GroupprivateOp
+//===----------------------------------------------------------------------===//
+
+LogicalResult GroupprivateOp::verify() {
+ auto *symbol = SymbolTable::lookupNearestSymbolFrom(*this, getSymNameAttr());
+ if (!symbol)
+ return emitOpError() << "expected symbol reference '" << getSymName()
+ << "' to point to a global variable";
+
+ if (isa<FunctionOpInterface>(symbol))
+ return emitOpError() << "expected symbol reference '" << getSymName()
+ << "' to point to a global variable, not a function";
+
+ return success();
+}
+
#define GET_ATTRDEF_CLASSES
#include "mlir/Dialect/OpenMP/OpenMPOpsAttributes.cpp.inc"
diff --git a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
index b92d3bf29d510..e10573171cb49 100644
--- a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
+++ b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
@@ -4515,26 +4515,6 @@ convertOmpCancellationPoint(omp::CancellationPointOp op,
return success();
}
-static LLVM::GlobalOp
-getGlobalFromSymbol(Operation *symOp,
- LLVM::ModuleTranslation &moduleTranslation,
- Operation *opInst) {
-
- // Handle potential address space cast
- if (auto asCast = dyn_cast<LLVM::AddrSpaceCastOp>(symOp))
- symOp = asCast.getOperand().getDefiningOp();
-
- // Check if we have an AddressOfOp
- if (!isa<LLVM::AddressOfOp>(symOp)) {
- if (opInst)
- opInst->emitError("Addressing symbol not found");
- return nullptr;
- }
-
- LLVM::AddressOfOp addressOfOp = cast<LLVM::AddressOfOp>(symOp);
- return addressOfOp.getGlobal(moduleTranslation.symbolTable());
-}
-
/// Converts an OpenMP Threadprivate operation into LLVM IR using
/// OpenMPIRBuilder.
static LogicalResult
@@ -4550,8 +4530,15 @@ convertOmpThreadprivate(Operation &opInst, llvm::IRBuilderBase &builder,
Value symAddr = threadprivateOp.getSymAddr();
auto *symOp = symAddr.getDefiningOp();
+ if (auto asCast = dyn_cast<LLVM::AddrSpaceCastOp>(symOp))
+ symOp = asCast.getOperand().getDefiningOp();
+
+ if (!isa<LLVM::AddressOfOp>(symOp))
+ return opInst.emitError("Addressing symbol not found");
+
+ LLVM::AddressOfOp addressOfOp = cast<LLVM::AddressOfOp>(symOp);
LLVM::GlobalOp global =
- getGlobalFromSymbol(symOp, moduleTranslation, &opInst);
+ addressOfOp.getGlobal(moduleTranslation.symbolTable());
if (!global)
return failure();
llvm::GlobalValue *globalValue = moduleTranslation.lookupGlobal(global);
@@ -7480,7 +7467,7 @@ convertTargetFreeMemOp(Operation &opInst, llvm::IRBuilderBase &builder,
return success();
}
-/// Converts an OpenMP Groupprivate operation into LLVM IR.
+/// Converts an OpenMP groupprivate operation into LLVM IR.
static LogicalResult
convertOmpGroupprivate(Operation &opInst, llvm::IRBuilderBase &builder,
LLVM::ModuleTranslation &moduleTranslation) {
@@ -7491,81 +7478,56 @@ convertOmpGroupprivate(Operation &opInst, llvm::IRBuilderBase &builder,
return failure();
bool isTargetDevice = ompBuilder->Config.isTargetDevice();
- auto deviceType = groupprivateOp.getDeviceType();
- // Skip allocation based on device_type
+ // Determine whether group-private storage should be allocated based on
+ // device_type. When not specified, default to 'any' (allocate on both).
bool shouldAllocate = true;
- if (deviceType.has_value()) {
- switch (*deviceType) {
- case mlir::omp::DeclareTargetDeviceType::host:
- // Only allocate on host
- shouldAllocate = !isTargetDevice;
- break;
- case mlir::omp::DeclareTargetDeviceType::nohost:
- // Only allocate on device
- shouldAllocate = isTargetDevice;
- break;
- case mlir::omp::DeclareTargetDeviceType::any:
- // Allocate on both
- shouldAllocate = true;
- break;
- }
+ switch (groupprivateOp.getDeviceType().value_or(
+ mlir::omp::DeclareTargetDeviceType::any)) {
+ case mlir::omp::DeclareTargetDeviceType::host:
+ shouldAllocate = !isTargetDevice;
+ break;
+ case mlir::omp::DeclareTargetDeviceType::nohost:
+ shouldAllocate = isTargetDevice;
+ break;
+ case mlir::omp::DeclareTargetDeviceType::any:
+ shouldAllocate = true;
+ break;
}
- Value symAddr = groupprivateOp.getSymAddr();
- llvm::Value *symValue = moduleTranslation.lookupValue(symAddr);
- llvm::Value *resultPtr;
+ // Look up the global variable directly by symbol name.
+ LLVM::GlobalOp global = SymbolTable::lookupNearestSymbolFrom<LLVM::GlobalOp>(
+ &opInst, groupprivateOp.getSymNameAttr());
+ if (!global)
+ return opInst.emitError()
+ << "expected symbol '" << groupprivateOp.getSymName()
+ << "' to reference an LLVM global variable";
- // Get the element type and variable name from the global.
- // Groupprivate requires sym_addr to come from a global variable.
- llvm::Type *varType = nullptr;
- std::string varName = "omp.groupprivate";
-
- if (Operation *symOp = symAddr.getDefiningOp()) {
- if (LLVM::GlobalOp global =
- getGlobalFromSymbol(symOp, moduleTranslation, nullptr)) {
- // Get type from the global
- varType = moduleTranslation.convertType(global.getType());
- // Get name from the global
- if (llvm::GlobalValue *globalValue =
- moduleTranslation.lookupGlobal(global)) {
- varName = globalValue->getName().str();
- }
- }
- }
+ llvm::GlobalValue *globalValue = moduleTranslation.lookupGlobal(global);
+ llvm::Type *varType = moduleTranslation.convertType(global.getType());
+ std::string varName = globalValue->getName().str();
- if (!varType) {
- return opInst.emitError()
- << "Groupprivate requires sym_addr to reference a global variable";
- }
-
- if (shouldAllocate) {
- if (isTargetDevice) {
- llvm::Module *llvmModule = moduleTranslation.getLLVMModule();
- llvm::Triple targetTriple = llvm::Triple(llvmModule->getTargetTriple());
- if (targetTriple.isAMDGCN() || targetTriple.isNVPTX()) {
- // Shared address space is 3 for AMDGPU and NVPTX targets.
- unsigned sharedAddressSpace = 3;
- llvm::GlobalVariable *sharedVar = new llvm::GlobalVariable(
- *llvmModule, varType, /*isConstant=*/false,
- llvm::GlobalValue::InternalLinkage, llvm::PoisonValue::get(varType),
- varName, /*InsertBefore=*/nullptr,
- llvm::GlobalValue::NotThreadLocal, sharedAddressSpace,
- /*isExternallyInitialized=*/false);
- resultPtr = sharedVar;
- } else {
- return opInst.emitError()
- << "Groupprivate operation is not supported for this target: "
- << targetTriple.str();
- }
+ llvm::Value *resultPtr;
+ if (shouldAllocate && isTargetDevice) {
+ llvm::Module *llvmModule = moduleTranslation.getLLVMModule();
+ llvm::Triple targetTriple(llvmModule->getTargetTriple());
+ if (targetTriple.isAMDGCN() || targetTriple.isNVPTX()) {
+ unsigned sharedAddressSpace = 3;
+ llvm::GlobalVariable *sharedVar = new llvm::GlobalVariable(
+ *llvmModule, varType, /*isConstant=*/false,
+ llvm::GlobalValue::InternalLinkage, llvm::PoisonValue::get(varType),
+ varName, /*InsertBefore=*/nullptr, llvm::GlobalValue::NotThreadLocal,
+ sharedAddressSpace,
+ /*isExternallyInitialized=*/false);
+ resultPtr = sharedVar;
} else {
- // Use original address when allocating on host device.
- // TODO: Add support for allocating group-private storage on host device.
- resultPtr = symValue;
+ return opInst.emitError() << "groupprivate is not supported for target: "
+ << targetTriple.str();
}
} else {
- // Use original address when not allocating group-private storage.
- resultPtr = symValue;
+ // Use original global address on host or when not allocating
+ // group-private storage.
+ resultPtr = globalValue;
}
moduleTranslation.mapValue(opInst.getResult(0), resultPtr);
@@ -7580,8 +7542,8 @@ LogicalResult OpenMPDialectLLVMIRTranslationInterface::convertOperation(
llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder();
if (ompBuilder->Config.isTargetDevice() &&
- !isa<omp::TargetOp, omp::MapInfoOp, omp::TerminatorOp, omp::YieldOp,
- omp::GroupprivateOp>(op) &&
+ !isa<omp::TargetOp, omp::MapInfoOp, omp::TerminatorOp, omp::YieldOp>(
+ op) &&
isHostDeviceOp(op))
return op->emitOpError() << "unsupported host op found in device";
diff --git a/mlir/test/Dialect/OpenMP/ops.mlir b/mlir/test/Dialect/OpenMP/ops.mlir
index 87962e4228c24..1340919357d60 100644
--- a/mlir/test/Dialect/OpenMP/ops.mlir
+++ b/mlir/test/Dialect/OpenMP/ops.mlir
@@ -3675,30 +3675,21 @@ func.func @omp_task_affinity_iterator_2d(%lb0 : index, %ub0 : index, %st0 : inde
// CHECK-LABEL: func.func @omp_groupprivate_device_type
func.func @omp_groupprivate_device_type() {
- %0 = arith.constant 1 : i32
%1 = arith.constant 2 : i32
- // CHECK: [[ARG0:%.*]] = llvm.mlir.addressof @gp : !llvm.ptr
- %gp_addr = llvm.mlir.addressof @gp : !llvm.ptr
- // CHECK: [[ARG1:%.*]] = llvm.mlir.addressof @any : !llvm.ptr
- %any_addr = llvm.mlir.addressof @any : !llvm.ptr
- // CHECK: [[ARG2:%.*]] = llvm.mlir.addressof @host : !llvm.ptr
- %host_addr = llvm.mlir.addressof @host : !llvm.ptr
- // CHECK: [[ARG3:%.*]] = llvm.mlir.addressof @nohost : !llvm.ptr
- %nohost_addr = llvm.mlir.addressof @nohost : !llvm.ptr
-
- // CHECK: {{.*}} = omp.groupprivate [[ARG0]] : !llvm.ptr -> !llvm.ptr
- %group_private_addr = omp.groupprivate %gp_addr : !llvm.ptr -> !llvm.ptr
-
- // CHECK: {{.*}} = omp.groupprivate [[ARG1]] : !llvm.ptr, device_type (any) -> !llvm.ptr
- %group_private_any = omp.groupprivate %any_addr : !llvm.ptr, device_type(any) -> !llvm.ptr
+
+ // CHECK: {{.*}} = omp.groupprivate @gp : !llvm.ptr
+ %group_private_addr = omp.groupprivate @gp : !llvm.ptr
+
+ // CHECK: {{.*}} = omp.groupprivate @any : !llvm.ptr, device_type (any)
+ %group_private_any = omp.groupprivate @any : !llvm.ptr, device_type(any)
llvm.store %1, %group_private_any : i32, !llvm.ptr
- // CHECK: {{.*}} = omp.groupprivate [[ARG2]] : !llvm.ptr, device_type (host) -> !llvm.ptr
- %group_private_host = omp.groupprivate %host_addr : !llvm.ptr, device_type(host) -> !llvm.ptr
+ // CHECK: {{.*}} = omp.groupprivate @host : !llvm.ptr, device_type (host)
+ %group_private_host = omp.groupprivate @host : !llvm.ptr, device_type(host)
llvm.store %1, %group_private_host : i32, !llvm.ptr
- // CHECK: {{.*}} = omp.groupprivate [[ARG3]] : !llvm.ptr, device_type (nohost) -> !llvm.ptr
- %group_private_nohost = omp.groupprivate %nohost_addr : !llvm.ptr, device_type(nohost) -> !llvm.ptr
+ // CHECK: {{.*}} = omp.groupprivate @nohost : !llvm.ptr, device_type (nohost)
+ %group_private_nohost = omp.groupprivate @nohost : !llvm.ptr, device_type(nohost)
llvm.store %1, %group_private_nohost : i32, !llvm.ptr
return
diff --git a/mlir/test/Target/LLVMIR/omptarget-groupprivate.mlir b/mlir/test/Target/LLVMIR/omptarget-groupprivate.mlir
index bdbe6d11d9957..132e7b8d14242 100644
--- a/mlir/test/Target/LLVMIR/omptarget-groupprivate.mlir
+++ b/mlir/test/Target/LLVMIR/omptarget-groupprivate.mlir
@@ -9,16 +9,13 @@ module attributes {omp.is_target_device = true, llvm.target_triple = "amdgcn-amd
omp.target map_entries(%map_a -> %arg1 : !llvm.ptr) {
%loaded = llvm.load %arg1 : !llvm.ptr -> i32
- %any_addr = llvm.mlir.addressof @global_any : !llvm.ptr
- %any_gp = omp.groupprivate %any_addr : !llvm.ptr, device_type(any) -> !llvm.ptr
+ %any_gp = omp.groupprivate @global_any : !llvm.ptr, device_type(any)
llvm.store %loaded, %any_gp : i32, !llvm.ptr
- %host_addr = llvm.mlir.addressof @global_host : !llvm.ptr
- %host_gp = omp.groupprivate %host_addr : !llvm.ptr, device_type(host) -> !llvm.ptr
+ %host_gp = omp.groupprivate @global_host : !llvm.ptr, device_type(host)
llvm.store %loaded, %host_gp : i32, !llvm.ptr
- %nohost_addr = llvm.mlir.addressof @global_nohost : !llvm.ptr
- %nohost_gp = omp.groupprivate %nohost_addr : !llvm.ptr, device_type(nohost) -> !llvm.ptr
+ %nohost_gp = omp.groupprivate @global_nohost : !llvm.ptr, device_type(nohost)
llvm.store %loaded, %nohost_gp : i32, !llvm.ptr
omp.terminator
diff --git a/mlir/test/Target/LLVMIR/openmp-llvm.mlir b/mlir/test/Target/LLVMIR/openmp-llvm.mlir
index 2e2d7c9ab9d7e..9401ba933ede5 100644
--- a/mlir/test/Target/LLVMIR/openmp-llvm.mlir
+++ b/mlir/test/Target/LLVMIR/openmp-llvm.mlir
@@ -3630,18 +3630,16 @@ module attributes {omp.is_target_device = true, llvm.target_triple = "nvptx64-nv
llvm.mlir.global internal @any() : i32
llvm.mlir.global internal @host() : i32
llvm.mlir.global internal @nohost() : i32
-llvm.func @omp_groupprivate_device() {
+llvm.func @omp_groupprivate_device() attributes {
+ omp.declare_target = #omp.declaretarget<device_type = (any), capture_clause = (to)>} {
%0 = llvm.mlir.constant(1 : i32) : i32
- %1 = llvm.mlir.addressof @any : !llvm.ptr
- %2 = omp.groupprivate %1 : !llvm.ptr, device_type(any) -> !llvm.ptr
+ %2 = omp.groupprivate @any : !llvm.ptr, device_type(any)
llvm.store %0, %2 : i32, !llvm.ptr
- %3 = llvm.mlir.addressof @host : !llvm.ptr
- %4 = omp.groupprivate %3 : !llvm.ptr, device_type(host) -> !llvm.ptr
+ %4 = omp.groupprivate @host : !llvm.ptr, device_type(host)
llvm.store %0, %4 : i32, !llvm.ptr
- %5 = llvm.mlir.addressof @nohost : !llvm.ptr
- %6 = omp.groupprivate %5 : !llvm.ptr, device_type(nohost) -> !llvm.ptr
+ %6 = omp.groupprivate @nohost : !llvm.ptr, device_type(nohost)
llvm.store %0, %6 : i32, !llvm.ptr
llvm.return
}
@@ -3666,16 +3664,13 @@ llvm.mlir.global internal @host1() : i32
llvm.mlir.global internal @nohost1() : i32
llvm.func @omp_groupprivate_host() {
%0 = llvm.mlir.constant(1 : i32) : i32
- %1 = llvm.mlir.addressof @any1 : !llvm.ptr
- %2 = omp.groupprivate %1 : !llvm.ptr, device_type(any) -> !llvm.ptr
+ %2 = omp.groupprivate @any1 : !llvm.ptr, device_type(any)
llvm.store %0, %2 : i32, !llvm.ptr
- %3 = llvm.mlir.addressof @host1 : !llvm.ptr
- %4 = omp.groupprivate %3 : !llvm.ptr, device_type(host) -> !llvm.ptr
+ %4 = omp.groupprivate @host1 : !llvm.ptr, device_type(host)
llvm.store %0, %4 : i32, !llvm.ptr
- %5 = llvm.mlir.addressof @nohost1 : !llvm.ptr
- %6 = omp.groupprivate %5 : !llvm.ptr, device_type(nohost) -> !llvm.ptr
+ %6 = omp.groupprivate @nohost1 : !llvm.ptr, device_type(nohost)
llvm.store %0, %6 : i32, !llvm.ptr
llvm.return
}
>From 422237b80588435354be7df9c89f8eac4a9fdeed Mon Sep 17 00:00:00 2001
From: skc7 <Krishna.Sankisa at amd.com>
Date: Wed, 8 Apr 2026 15:06:33 +0530
Subject: [PATCH 7/9] Remove threadprivate changes
---
.../Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp | 3 ---
1 file changed, 3 deletions(-)
diff --git a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
index e10573171cb49..7e681e60923bc 100644
--- a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
+++ b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
@@ -4535,12 +4535,9 @@ convertOmpThreadprivate(Operation &opInst, llvm::IRBuilderBase &builder,
if (!isa<LLVM::AddressOfOp>(symOp))
return opInst.emitError("Addressing symbol not found");
-
LLVM::AddressOfOp addressOfOp = cast<LLVM::AddressOfOp>(symOp);
LLVM::GlobalOp global =
addressOfOp.getGlobal(moduleTranslation.symbolTable());
- if (!global)
- return failure();
llvm::GlobalValue *globalValue = moduleTranslation.lookupGlobal(global);
llvm::Type *type = globalValue->getValueType();
llvm::TypeSize typeSize =
>From e022b7bd63685dbbcb61f1a119fd2e78ff798b37 Mon Sep 17 00:00:00 2001
From: skc7 <Krishna.Sankisa at amd.com>
Date: Wed, 8 Apr 2026 15:08:19 +0530
Subject: [PATCH 8/9] fix threadprivate
---
.../Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp | 3 ++-
1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
index 7e681e60923bc..51023ce59d92e 100644
--- a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
+++ b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
@@ -4535,7 +4535,8 @@ convertOmpThreadprivate(Operation &opInst, llvm::IRBuilderBase &builder,
if (!isa<LLVM::AddressOfOp>(symOp))
return opInst.emitError("Addressing symbol not found");
- LLVM::AddressOfOp addressOfOp = cast<LLVM::AddressOfOp>(symOp);
+ LLVM::AddressOfOp addressOfOp = dyn_cast<LLVM::AddressOfOp>(symOp);
+
LLVM::GlobalOp global =
addressOfOp.getGlobal(moduleTranslation.symbolTable());
llvm::GlobalValue *globalValue = moduleTranslation.lookupGlobal(global);
>From 29cb0463827d47848917990aa272707d490ddfc2 Mon Sep 17 00:00:00 2001
From: skc7 <Krishna.Sankisa at amd.com>
Date: Tue, 14 Apr 2026 16:22:54 +0530
Subject: [PATCH 9/9] update
---
mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td | 6 ++---
mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp | 5 ++--
.../OpenMP/OpenMPToLLVMIRTranslation.cpp | 26 +++++++++++--------
mlir/test/Dialect/OpenMP/ops.mlir | 12 ++++-----
.../Target/LLVMIR/omptarget-groupprivate.mlir | 19 +++++++-------
mlir/test/Target/LLVMIR/openmp-llvm.mlir | 22 ++++++++--------
6 files changed, 47 insertions(+), 43 deletions(-)
diff --git a/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td b/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td
index 913c3ff3bbecb..f3254aa50e689 100644
--- a/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td
+++ b/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td
@@ -2357,7 +2357,8 @@ def IteratorOp : OpenMP_Op<"iterator",
// [6.0] groupprivate Directive
//===----------------------------------------------------------------------===//
-def GroupprivateOp : OpenMP_Op<"groupprivate", [Pure]> {
+def GroupprivateOp : OpenMP_Op<"groupprivate",
+ [Pure, DeclareOpInterfaceMethods<SymbolUserOpInterface>]> {
let summary = "groupprivate directive";
let description = [{
The groupprivate directive specifies that variables are replicated, with
@@ -2377,9 +2378,8 @@ def GroupprivateOp : OpenMP_Op<"groupprivate", [Pure]> {
);
let results = (outs OpenMP_PointerLikeType:$gp_addr);
let assemblyFormat = [{
- $sym_name `:` type($gp_addr) (`,` `device_type` $device_type^)? attr-dict
+ $sym_name (`device_type` $device_type^)? `:` type($gp_addr) attr-dict
}];
- let hasVerifier = 1;
}
#endif // OPENMP_OPS
diff --git a/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp b/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp
index 89449e045b09a..dfc5d92af2ecd 100644
--- a/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp
+++ b/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp
@@ -4846,8 +4846,9 @@ LogicalResult IteratorOp::verify() {
// GroupprivateOp
//===----------------------------------------------------------------------===//
-LogicalResult GroupprivateOp::verify() {
- auto *symbol = SymbolTable::lookupNearestSymbolFrom(*this, getSymNameAttr());
+LogicalResult
+GroupprivateOp::verifySymbolUses(SymbolTableCollection &symbolTable) {
+ auto *symbol = symbolTable.lookupNearestSymbolFrom(*this, getSymNameAttr());
if (!symbol)
return emitOpError() << "expected symbol reference '" << getSymName()
<< "' to point to a global variable";
diff --git a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
index 51023ce59d92e..9ec464925ba70 100644
--- a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
+++ b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
@@ -32,7 +32,9 @@
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/MDBuilder.h"
#include "llvm/IR/ReplaceConstant.h"
+#include "llvm/Support/AMDGPUAddrSpace.h"
#include "llvm/Support/FileSystem.h"
+#include "llvm/Support/NVPTXAddrSpace.h"
#include "llvm/Support/VirtualFileSystem.h"
#include "llvm/TargetParser/Triple.h"
#include "llvm/Transforms/Utils/ModuleUtils.h"
@@ -7509,19 +7511,21 @@ convertOmpGroupprivate(Operation &opInst, llvm::IRBuilderBase &builder,
if (shouldAllocate && isTargetDevice) {
llvm::Module *llvmModule = moduleTranslation.getLLVMModule();
llvm::Triple targetTriple(llvmModule->getTargetTriple());
- if (targetTriple.isAMDGCN() || targetTriple.isNVPTX()) {
- unsigned sharedAddressSpace = 3;
- llvm::GlobalVariable *sharedVar = new llvm::GlobalVariable(
- *llvmModule, varType, /*isConstant=*/false,
- llvm::GlobalValue::InternalLinkage, llvm::PoisonValue::get(varType),
- varName, /*InsertBefore=*/nullptr, llvm::GlobalValue::NotThreadLocal,
- sharedAddressSpace,
- /*isExternallyInitialized=*/false);
- resultPtr = sharedVar;
- } else {
+ unsigned sharedAddressSpace;
+ if (targetTriple.isAMDGCN())
+ sharedAddressSpace = llvm::AMDGPUAS::LOCAL_ADDRESS;
+ else if (targetTriple.isNVPTX())
+ sharedAddressSpace = llvm::NVPTXAS::ADDRESS_SPACE_SHARED;
+ else
return opInst.emitError() << "groupprivate is not supported for target: "
<< targetTriple.str();
- }
+ llvm::GlobalVariable *sharedVar = new llvm::GlobalVariable(
+ *llvmModule, varType, /*isConstant=*/false,
+ llvm::GlobalValue::InternalLinkage, llvm::PoisonValue::get(varType),
+ varName, /*InsertBefore=*/nullptr, llvm::GlobalValue::NotThreadLocal,
+ sharedAddressSpace,
+ /*isExternallyInitialized=*/false);
+ resultPtr = sharedVar;
} else {
// Use original global address on host or when not allocating
// group-private storage.
diff --git a/mlir/test/Dialect/OpenMP/ops.mlir b/mlir/test/Dialect/OpenMP/ops.mlir
index 1340919357d60..05367598454a6 100644
--- a/mlir/test/Dialect/OpenMP/ops.mlir
+++ b/mlir/test/Dialect/OpenMP/ops.mlir
@@ -3680,16 +3680,16 @@ func.func @omp_groupprivate_device_type() {
// CHECK: {{.*}} = omp.groupprivate @gp : !llvm.ptr
%group_private_addr = omp.groupprivate @gp : !llvm.ptr
- // CHECK: {{.*}} = omp.groupprivate @any : !llvm.ptr, device_type (any)
- %group_private_any = omp.groupprivate @any : !llvm.ptr, device_type(any)
+ // CHECK: {{.*}} = omp.groupprivate @any device_type (any) : !llvm.ptr
+ %group_private_any = omp.groupprivate @any device_type(any) : !llvm.ptr
llvm.store %1, %group_private_any : i32, !llvm.ptr
- // CHECK: {{.*}} = omp.groupprivate @host : !llvm.ptr, device_type (host)
- %group_private_host = omp.groupprivate @host : !llvm.ptr, device_type(host)
+ // CHECK: {{.*}} = omp.groupprivate @host device_type (host) : !llvm.ptr
+ %group_private_host = omp.groupprivate @host device_type(host) : !llvm.ptr
llvm.store %1, %group_private_host : i32, !llvm.ptr
- // CHECK: {{.*}} = omp.groupprivate @nohost : !llvm.ptr, device_type (nohost)
- %group_private_nohost = omp.groupprivate @nohost : !llvm.ptr, device_type(nohost)
+ // CHECK: {{.*}} = omp.groupprivate @nohost device_type (nohost) : !llvm.ptr
+ %group_private_nohost = omp.groupprivate @nohost device_type(nohost) : !llvm.ptr
llvm.store %1, %group_private_nohost : i32, !llvm.ptr
return
diff --git a/mlir/test/Target/LLVMIR/omptarget-groupprivate.mlir b/mlir/test/Target/LLVMIR/omptarget-groupprivate.mlir
index 132e7b8d14242..f064996427d96 100644
--- a/mlir/test/Target/LLVMIR/omptarget-groupprivate.mlir
+++ b/mlir/test/Target/LLVMIR/omptarget-groupprivate.mlir
@@ -9,13 +9,13 @@ module attributes {omp.is_target_device = true, llvm.target_triple = "amdgcn-amd
omp.target map_entries(%map_a -> %arg1 : !llvm.ptr) {
%loaded = llvm.load %arg1 : !llvm.ptr -> i32
- %any_gp = omp.groupprivate @global_any : !llvm.ptr, device_type(any)
+ %any_gp = omp.groupprivate @global_any device_type(any) : !llvm.ptr
llvm.store %loaded, %any_gp : i32, !llvm.ptr
- %host_gp = omp.groupprivate @global_host : !llvm.ptr, device_type(host)
+ %host_gp = omp.groupprivate @global_host device_type(host) : !llvm.ptr
llvm.store %loaded, %host_gp : i32, !llvm.ptr
- %nohost_gp = omp.groupprivate @global_nohost : !llvm.ptr, device_type(nohost)
+ %nohost_gp = omp.groupprivate @global_nohost device_type(nohost) : !llvm.ptr
llvm.store %loaded, %nohost_gp : i32, !llvm.ptr
omp.terminator
@@ -32,11 +32,10 @@ module attributes {omp.is_target_device = true, llvm.target_triple = "amdgcn-amd
// CHECK-DAG: @global_any = internal global i32 undef
// CHECK-DAG: @global_host = internal global i32 undef
// CHECK-DAG: @global_nohost = internal global i32 undef
-// CHECK-DAG: {{.*}} = internal addrspace(3) global i32 poison
-// CHECK-DAG: {{.*}} = internal addrspace(3) global i32 poison
+// CHECK-DAG: @[[SHARED_ANY:global_any.*]] = internal addrspace(3) global i32 poison
+// CHECK-DAG: @[[SHARED_NOHOST:global_nohost.*]] = internal addrspace(3) global i32 poison
// CHECK: define {{.*}} amdgpu_kernel void @__omp_offloading_{{.*}}_{{.*}}__QQmain_{{.*}}(ptr %{{.*}}, ptr %{{.*}}) #{{[0-9]+}} {
-// CHECK-LABEL: omp.target:
-// CHECK-NEXT : %[[LOAD:.*]] = load i32, ptr %{{.*}}, align 4
-// CHECK-NEXT : store i32 %[[LOAD]], ptr addrspace(3) {{.*}}, align 4
-// CHECK-NEXT : store i32 %[[LOAD]], ptr @global_host, align 4
-// CHECK-NEXT : store i32 %[[LOAD]], ptr addrspace(3) {{.*}}, align 4
+// CHECK: %[[LOAD:.*]] = load i32, ptr %{{.*}}, align 4
+// CHECK-NEXT : store i32 %[[LOAD]], ptr addrspace(3) @[[SHARED_ANY]], align 4
+// CHECK-NEXT : store i32 %[[LOAD]], ptr @global_host, align 4
+// CHECK-NEXT : store i32 %[[LOAD]], ptr addrspace(3) @[[SHARED_NOHOST]], align 4
diff --git a/mlir/test/Target/LLVMIR/openmp-llvm.mlir b/mlir/test/Target/LLVMIR/openmp-llvm.mlir
index 9401ba933ede5..04d76ef07f0a6 100644
--- a/mlir/test/Target/LLVMIR/openmp-llvm.mlir
+++ b/mlir/test/Target/LLVMIR/openmp-llvm.mlir
@@ -3633,13 +3633,13 @@ llvm.mlir.global internal @nohost() : i32
llvm.func @omp_groupprivate_device() attributes {
omp.declare_target = #omp.declaretarget<device_type = (any), capture_clause = (to)>} {
%0 = llvm.mlir.constant(1 : i32) : i32
- %2 = omp.groupprivate @any : !llvm.ptr, device_type(any)
+ %2 = omp.groupprivate @any device_type(any) : !llvm.ptr
llvm.store %0, %2 : i32, !llvm.ptr
- %4 = omp.groupprivate @host : !llvm.ptr, device_type(host)
+ %4 = omp.groupprivate @host device_type(host) : !llvm.ptr
llvm.store %0, %4 : i32, !llvm.ptr
- %6 = omp.groupprivate @nohost : !llvm.ptr, device_type(nohost)
+ %6 = omp.groupprivate @nohost device_type(nohost) : !llvm.ptr
llvm.store %0, %6 : i32, !llvm.ptr
llvm.return
}
@@ -3648,12 +3648,12 @@ llvm.func @omp_groupprivate_device() attributes {
// CHECK-DAG: @any = internal global i32 undef
// CHECK-DAG: @host = internal global i32 undef
// CHECK-DAG: @nohost = internal global i32 undef
-// CHECK-DAG: {{.*}} = internal addrspace(3) global i32 poison
-// CHECK-DAG: {{.*}} = internal addrspace(3) global i32 poison
-// CHECK-LABEL: define void @omp_groupprivate_device()
-// CHECK: store i32 1, ptr addrspace(3) {{.*}}, align 4
+// CHECK-DAG: @[[SHARED_ANY:any.*]] = internal addrspace(3) global i32 poison
+// CHECK-DAG: @[[SHARED_NOHOST:nohost.*]] = internal addrspace(3) global i32 poison
+// CHECK: define void @omp_groupprivate_device()
+// CHECK: store i32 1, ptr addrspace(3) @[[SHARED_ANY]], align 4
// CHECK: store i32 1, ptr @host, align 4
-// CHECK: store i32 1, ptr addrspace(3) {{.*}}, align 4
+// CHECK: store i32 1, ptr addrspace(3) @[[SHARED_NOHOST]], align 4
// CHECK: ret void
// -----
@@ -3664,13 +3664,13 @@ llvm.mlir.global internal @host1() : i32
llvm.mlir.global internal @nohost1() : i32
llvm.func @omp_groupprivate_host() {
%0 = llvm.mlir.constant(1 : i32) : i32
- %2 = omp.groupprivate @any1 : !llvm.ptr, device_type(any)
+ %2 = omp.groupprivate @any1 device_type(any) : !llvm.ptr
llvm.store %0, %2 : i32, !llvm.ptr
- %4 = omp.groupprivate @host1 : !llvm.ptr, device_type(host)
+ %4 = omp.groupprivate @host1 device_type(host) : !llvm.ptr
llvm.store %0, %4 : i32, !llvm.ptr
- %6 = omp.groupprivate @nohost1 : !llvm.ptr, device_type(nohost)
+ %6 = omp.groupprivate @nohost1 device_type(nohost) : !llvm.ptr
llvm.store %0, %6 : i32, !llvm.ptr
llvm.return
}
More information about the Mlir-commits
mailing list