[Mlir-commits] [mlir] [OpenMP][mlir] Add Groupprivate op in omp dialect. (PR #162704)

llvmlistbot at llvm.org llvmlistbot at llvm.org
Wed Apr 15 21:53:30 PDT 2026


https://github.com/skc7 updated https://github.com/llvm/llvm-project/pull/162704

>From 195cddbf505bb5b5daab4dd206f111938f8769a3 Mon Sep 17 00:00:00 2001
From: skc7 <Krishna.Sankisa at amd.com>
Date: Fri, 26 Sep 2025 10:06:26 +0530
Subject: [PATCH 01/10] [OpenMP][mlir] Add Groupprivate op in omp dialect.

---
 mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td |  30 ++++++
 .../OpenMP/OpenMPToLLVMIRTranslation.cpp      |  71 ++++++++++++
 mlir/test/Dialect/OpenMP/ops.mlir             |  36 +++++++
 .../Target/LLVMIR/omptarget-groupprivate.mlir |  41 +++++++
 mlir/test/Target/LLVMIR/openmp-llvm.mlir      | 101 ++++++++++++++++++
 5 files changed, 279 insertions(+)
 create mode 100644 mlir/test/Target/LLVMIR/omptarget-groupprivate.mlir

diff --git a/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td b/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td
index 1931c91080644..5bc9cbee2561a 100644
--- a/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td
+++ b/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td
@@ -2441,4 +2441,34 @@ def IteratorOp : OpenMP_Op<"iterator",
   let hasVerifier = 1;
 }
 
+//===----------------------------------------------------------------------===//
+// [6.0] groupprivate Directive
+//===----------------------------------------------------------------------===//
+
+def GroupprivateOp : OpenMP_Op<"groupprivate",
+                      [AllTypesMatch<["sym_addr", "gp_addr"]>]> {
+  let summary = "groupprivate directive";
+  let description = [{
+    The groupprivate directive specifies that variables are replicated, with
+    each group having its own copy.
+
+    This operation takes in the address of a symbol that represents the original
+    variable, optional DeviceTypeAttr and returns the address of its groupprivate copy.
+    All occurrences of groupprivate variables in a parallel region should
+    use the groupprivate copy returned by this operation.
+
+    The `sym_addr` refers to the address of the symbol, which is a pointer to
+    the original variable.
+  }];
+
+  let arguments = (ins
+    OpenMP_PointerLikeType:$sym_addr,
+    OptionalAttr<DeclareTargetDeviceTypeAttr>:$device_type
+  );
+  let results = (outs OpenMP_PointerLikeType:$gp_addr);
+  let assemblyFormat = [{
+    $sym_addr `:` type($sym_addr) ( `,` `device_type` $device_type^ )? `->` type($gp_addr) attr-dict
+  }];
+}
+
 #endif // OPENMP_OPS
diff --git a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
index 227e6d205ace6..6cc1658152d18 100644
--- a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
+++ b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
@@ -7853,6 +7853,74 @@ convertTargetFreeMemOp(Operation &opInst, llvm::IRBuilderBase &builder,
   return success();
 }
 
+/// Converts an OpenMP Groupprivate operation into LLVM IR.
+static LogicalResult
+convertOmpGroupprivate(Operation &opInst, llvm::IRBuilderBase &builder,
+                       LLVM::ModuleTranslation &moduleTranslation) {
+  llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder();
+  auto groupprivateOp = cast<omp::GroupprivateOp>(opInst);
+
+  if (failed(checkImplementationStatus(opInst)))
+    return failure();
+
+  bool isTargetDevice = ompBuilder->Config.isTargetDevice();
+  auto deviceType = groupprivateOp.getDeviceType();
+
+  // skip allocation based on device_type
+  bool shouldAllocate = true;
+  if (deviceType.has_value()) {
+    switch (*deviceType) {
+    case mlir::omp::DeclareTargetDeviceType::host:
+      // Only allocate on host
+      shouldAllocate = !isTargetDevice;
+      break;
+    case mlir::omp::DeclareTargetDeviceType::nohost:
+      // Only allocate on device
+      shouldAllocate = isTargetDevice;
+      break;
+    case mlir::omp::DeclareTargetDeviceType::any:
+      // Allocate on both
+      shouldAllocate = true;
+      break;
+    }
+  }
+
+  Value symAddr = groupprivateOp.getSymAddr();
+  auto *symOp = symAddr.getDefiningOp();
+
+  if (auto asCast = dyn_cast<LLVM::AddrSpaceCastOp>(symOp))
+    symOp = asCast.getOperand().getDefiningOp();
+
+  if (!isa<LLVM::AddressOfOp>(symOp))
+    return opInst.emitError("Addressing symbol not found");
+  LLVM::AddressOfOp addressOfOp = dyn_cast<LLVM::AddressOfOp>(symOp);
+
+  LLVM::GlobalOp global =
+      addressOfOp.getGlobal(moduleTranslation.symbolTable());
+  llvm::GlobalValue *globalValue = moduleTranslation.lookupGlobal(global);
+  llvm::Value *resultPtr;
+
+  if (shouldAllocate) {
+    // Get the size of the variable
+    llvm::Type *varType = globalValue->getValueType();
+    llvm::Module *llvmModule = moduleTranslation.getLLVMModule();
+    llvm::DataLayout DL = llvmModule->getDataLayout();
+    uint64_t typeSize = DL.getTypeAllocSize(varType);
+    // Call omp_alloc_shared to allocate memory for groupprivate variable.
+    llvm::FunctionCallee allocSharedFn = ompBuilder->getOrCreateRuntimeFunction(
+        *llvmModule, llvm::omp::OMPRTL___kmpc_alloc_shared);
+    // Call runtime to allocate shared memory for this group
+    resultPtr = builder.CreateCall(allocSharedFn, {builder.getInt64(typeSize)});
+  } else {
+    // Use original global address when not allocating group-private storage
+    resultPtr = moduleTranslation.lookupValue(symAddr);
+    if (!resultPtr)
+      resultPtr = globalValue;
+  }
+  moduleTranslation.mapValue(opInst.getResult(0), resultPtr);
+  return success();
+}
+
 /// Given an OpenMP MLIR operation, create the corresponding LLVM IR (including
 /// OpenMP runtime calls).
 LogicalResult OpenMPDialectLLVMIRTranslationInterface::convertOperation(
@@ -8070,6 +8138,9 @@ LogicalResult OpenMPDialectLLVMIRTranslationInterface::convertOperation(
             return convertAllocateFreeOp(*op, builder, moduleTranslation,
                                          *this);
           })
+          .Case([&](omp::GroupprivateOp) {
+            return convertOmpGroupprivate(*op, builder, moduleTranslation);
+          })
           .Default([&](Operation *inst) {
             return inst->emitError()
                    << "not yet implemented: " << inst->getName();
diff --git a/mlir/test/Dialect/OpenMP/ops.mlir b/mlir/test/Dialect/OpenMP/ops.mlir
index 7b3d2c9a0732e..a4d5e1b77447a 100644
--- a/mlir/test/Dialect/OpenMP/ops.mlir
+++ b/mlir/test/Dialect/OpenMP/ops.mlir
@@ -3905,3 +3905,39 @@ func.func @omp_task_affinity_iterator_2d(%lb0 : index, %ub0 : index, %st0 : inde
 
   return
 }
+
+// CHECK-LABEL: func.func @omp_groupprivate_device_type
+func.func @omp_groupprivate_device_type() {
+  %0 = arith.constant 1 : i32
+  %1 = arith.constant 2 : i32
+  // CHECK: [[ARG0:%.*]] = llvm.mlir.addressof @gp : !llvm.ptr
+  %gp_addr = llvm.mlir.addressof @gp : !llvm.ptr
+  // CHECK: [[ARG1:%.*]] = llvm.mlir.addressof @any : !llvm.ptr
+  %any_addr = llvm.mlir.addressof @any : !llvm.ptr
+  // CHECK: [[ARG2:%.*]] = llvm.mlir.addressof @host : !llvm.ptr
+  %host_addr = llvm.mlir.addressof @host : !llvm.ptr
+  // CHECK: [[ARG3:%.*]] = llvm.mlir.addressof @nohost : !llvm.ptr
+  %nohost_addr = llvm.mlir.addressof @nohost : !llvm.ptr
+
+  // CHECK: {{.*}} = omp.groupprivate [[ARG0]] : !llvm.ptr -> !llvm.ptr
+  %group_private_addr = omp.groupprivate %gp_addr : !llvm.ptr -> !llvm.ptr
+
+  // CHECK: {{.*}} = omp.groupprivate [[ARG1]] : !llvm.ptr, device_type (any) -> !llvm.ptr
+  %group_private_any = omp.groupprivate %any_addr : !llvm.ptr, device_type(any) -> !llvm.ptr
+  llvm.store %1, %group_private_any : i32, !llvm.ptr
+
+  // CHECK: {{.*}} = omp.groupprivate [[ARG2]] : !llvm.ptr, device_type (host) -> !llvm.ptr
+  %group_private_host = omp.groupprivate %host_addr : !llvm.ptr, device_type(host) -> !llvm.ptr
+  llvm.store %1, %group_private_host : i32, !llvm.ptr
+
+  // CHECK: {{.*}} = omp.groupprivate [[ARG3]] : !llvm.ptr, device_type (nohost) -> !llvm.ptr
+  %group_private_nohost = omp.groupprivate %nohost_addr : !llvm.ptr, device_type(nohost) -> !llvm.ptr
+  llvm.store %1, %group_private_nohost : i32, !llvm.ptr
+
+  return
+}
+
+llvm.mlir.global internal @gp() : i32
+llvm.mlir.global internal @any() : i32
+llvm.mlir.global internal @host() : i32
+llvm.mlir.global internal @nohost() : i32
diff --git a/mlir/test/Target/LLVMIR/omptarget-groupprivate.mlir b/mlir/test/Target/LLVMIR/omptarget-groupprivate.mlir
new file mode 100644
index 0000000000000..46e9639adcc06
--- /dev/null
+++ b/mlir/test/Target/LLVMIR/omptarget-groupprivate.mlir
@@ -0,0 +1,41 @@
+// RUN: mlir-translate -mlir-to-llvmir %s | FileCheck %s
+
+module attributes {omp.is_target_device = true, llvm.target_triple = "amdgcn-amd-amdhsa",
+                    dlti.dl_spec = #dlti.dl_spec<#dlti.dl_entry<"dlti.alloca_memory_space", 5 : ui32>>} {
+  llvm.func @_QQmain() attributes {fir.bindc_name = "main"} {
+
+    %ga = llvm.mlir.addressof @global_a : !llvm.ptr
+    %map_a = omp.map.info var_ptr(%ga : !llvm.ptr, i32) map_clauses(tofrom) capture(ByCopy) -> !llvm.ptr {name = "i"}
+    omp.target map_entries(%map_a -> %arg1 : !llvm.ptr) {
+      %loaded = llvm.load %arg1 : !llvm.ptr -> i32
+
+      %any_addr = llvm.mlir.addressof @global_any : !llvm.ptr
+      %any_gp = omp.groupprivate %any_addr : !llvm.ptr, device_type(any) -> !llvm.ptr
+      llvm.store %loaded, %any_gp : i32, !llvm.ptr
+
+      %host_addr = llvm.mlir.addressof @global_host : !llvm.ptr
+      %host_gp = omp.groupprivate %host_addr : !llvm.ptr, device_type(host) -> !llvm.ptr
+      llvm.store %loaded, %host_gp : i32, !llvm.ptr
+
+      %nohost_addr = llvm.mlir.addressof @global_nohost : !llvm.ptr
+      %nohost_gp = omp.groupprivate %nohost_addr : !llvm.ptr, device_type(nohost) -> !llvm.ptr
+      llvm.store %loaded, %nohost_gp : i32, !llvm.ptr
+
+      omp.terminator
+    }
+    llvm.return
+  }
+  llvm.mlir.global internal @global_a() : i32
+  llvm.mlir.global internal @global_any() : i32
+  llvm.mlir.global internal @global_host() : i32
+  llvm.mlir.global internal @global_nohost() : i32
+}
+
+// CHECK: define {{.*}} amdgpu_kernel void @__omp_offloading_{{.*}}_{{.*}}__QQmain_{{.*}}(ptr %{{.*}}, ptr %{{.*}}) #{{[0-9]+}} {
+// CHECK-LABEL:  omp.target:
+// CHECK-NEXT :    %[[LOAD:.*]] = load i32, ptr %3, align 4
+// CHECK-NEXT :    %[[ALLOC_any:.*]] = call ptr @__kmpc_alloc_shared(i64 4)
+// CHECK-NEXT :    store i32 %[[LOAD]], ptr %[[ALLOC_any]], align 4
+// CHECK-NEXT :    store i32 %[[LOAD]], ptr @global_host, align 4
+// CHECK-NEXT :    %[[ALLOC_NOHOST:.*]] = call ptr @__kmpc_alloc_shared(i64 4)
+// CHECK-NEXT :    store i32 %[[LOAD]], ptr %[[ALLOC_NOHOST]], align 4
diff --git a/mlir/test/Target/LLVMIR/openmp-llvm.mlir b/mlir/test/Target/LLVMIR/openmp-llvm.mlir
index dd663bc2ae652..fb27d710da11f 100644
--- a/mlir/test/Target/LLVMIR/openmp-llvm.mlir
+++ b/mlir/test/Target/LLVMIR/openmp-llvm.mlir
@@ -3623,3 +3623,104 @@ llvm.func @task_affinity_plain(%arr: !llvm.ptr {llvm.nocapture}) {
 // CHECK: [[FLAGGEP:%.*]] = getelementptr inbounds nuw { i64, i64, i32 }, ptr [[ENTRY]], i32 0, i32 2
 // CHECK: store i32 0, ptr [[FLAGGEP]]
 // CHECK: call i32 @__kmpc_omp_reg_task_with_affinity{{.*}}i32 1, ptr [[AFFLIST]]
+
+// -----
+
+module attributes {omp.is_target_device = false} {
+llvm.mlir.global internal @any() : i32
+llvm.mlir.global internal @host() : i32
+llvm.mlir.global internal @nohost() : i32
+llvm.func @omp_groupprivate_host() {
+  %0 = llvm.mlir.constant(1 : i32) : i32
+  %1 = llvm.mlir.addressof @any : !llvm.ptr
+  %2 = omp.groupprivate %1 : !llvm.ptr, device_type(any) -> !llvm.ptr
+  llvm.store %0, %2 : i32, !llvm.ptr
+
+  %3 = llvm.mlir.addressof @host : !llvm.ptr
+  %4 = omp.groupprivate %3 : !llvm.ptr, device_type(host) -> !llvm.ptr
+  llvm.store %0, %4 : i32, !llvm.ptr
+
+  %5 = llvm.mlir.addressof @nohost : !llvm.ptr
+  %6 = omp.groupprivate %5 : !llvm.ptr, device_type(nohost) -> !llvm.ptr
+  llvm.store %0, %6 : i32, !llvm.ptr
+  llvm.return
+}
+}
+
+// CHECK: @any = internal global i32 undef
+// CHECK: @host = internal global i32 undef
+// CHECK: @nohost = internal global i32 undef
+// CHECK-LABEL: @omp_groupprivate_host
+// CHECK:  [[TMP1:%.*]] = call ptr @__kmpc_alloc_shared(i64 4)
+// CHECK:  store i32 1, ptr [[TMP1]], align 4
+// CHECK:  [[TMP2:%.*]] = call ptr @__kmpc_alloc_shared(i64 4)
+// CHECK:  store i32 1, ptr [[TMP2]], align 4
+// CHECK:  store i32 1, ptr @nohost, align 4
+
+// -----
+
+module attributes {omp.is_target_device = true} {
+llvm.mlir.global internal @any() : i32
+llvm.mlir.global internal @host() : i32
+llvm.mlir.global internal @nohost() : i32
+llvm.func @omp_groupprivate_device() {
+  %0 = llvm.mlir.constant(1 : i32) : i32
+  %1 = llvm.mlir.addressof @any : !llvm.ptr
+  %2 = omp.groupprivate %1 : !llvm.ptr, device_type(any) -> !llvm.ptr
+  llvm.store %0, %2 : i32, !llvm.ptr
+
+  %3 = llvm.mlir.addressof @host : !llvm.ptr
+  %4 = omp.groupprivate %3 : !llvm.ptr, device_type(host) -> !llvm.ptr
+  llvm.store %0, %4 : i32, !llvm.ptr
+
+  %5 = llvm.mlir.addressof @nohost : !llvm.ptr
+  %6 = omp.groupprivate %5 : !llvm.ptr, device_type(nohost) -> !llvm.ptr
+  llvm.store %0, %6 : i32, !llvm.ptr
+  llvm.return
+}
+}
+
+// CHECK: @any = internal global i32 undef
+// CHECK: @host = internal global i32 undef
+// CHECK: @nohost = internal global i32 undef
+// CHECK-LABEL: @omp_groupprivate_device
+// CHECK:  [[TMP1:%.*]] = call ptr @__kmpc_alloc_shared(i64 4)
+// CHECK:  store i32 1, ptr [[TMP1]], align 4
+// CHECK:  store i32 1, ptr @host, align 4
+// CHECK:  [[TMP2:%.*]] = call ptr @__kmpc_alloc_shared(i64 4)
+// CHECK:  store i32 1, ptr [[TMP2]], align 4
+
+// -----
+
+module attributes {omp.is_target_device = false} {
+llvm.mlir.global internal @any1() : i32
+llvm.mlir.global internal @host1() : i32
+llvm.mlir.global internal @nohost1() : i32
+llvm.func @omp_groupprivate_host() {
+  %0 = llvm.mlir.constant(1 : i32) : i32
+  %1 = llvm.mlir.addressof @any1 : !llvm.ptr
+  %2 = omp.groupprivate %1 : !llvm.ptr, device_type(any) -> !llvm.ptr
+  llvm.store %0, %2 : i32, !llvm.ptr
+
+  %3 = llvm.mlir.addressof @host1 : !llvm.ptr
+  %4 = omp.groupprivate %3 : !llvm.ptr, device_type(host) -> !llvm.ptr
+  llvm.store %0, %4 : i32, !llvm.ptr
+
+  %5 = llvm.mlir.addressof @nohost1 : !llvm.ptr
+  %6 = omp.groupprivate %5 : !llvm.ptr, device_type(nohost) -> !llvm.ptr
+  llvm.store %0, %6 : i32, !llvm.ptr
+  llvm.return
+}
+}
+
+// CHECK: @any1 = internal global i32 undef
+// CHECK: @host1 = internal global i32 undef
+// CHECK: @nohost1 = internal global i32 undef
+// CHECK-LABEL: @omp_groupprivate_host
+// CHECK:  [[TMP1:%.*]] = call ptr @__kmpc_alloc_shared(i64 4)
+// CHECK:  store i32 1, ptr [[TMP1]], align 4
+// CHECK:  [[TMP2:%.*]] = call ptr @__kmpc_alloc_shared(i64 4)
+// CHECK:  store i32 1, ptr [[TMP2]], align 4
+// CHECK:  store i32 1, ptr @nohost1, align 4
+
+// -----

>From 051d2d3ca01634826aeb858159db4a42a9fa7091 Mon Sep 17 00:00:00 2001
From: skc7 <Krishna.Sankisa at amd.com>
Date: Sun, 9 Nov 2025 09:14:30 +0530
Subject: [PATCH 02/10] Use getGlobalFromSymbol for threadprivate and
 groupprivate

---
 .../OpenMP/OpenMPToLLVMIRTranslation.cpp      | 50 ++++++++++++-------
 .../Target/LLVMIR/omptarget-groupprivate.mlir |  2 +-
 2 files changed, 34 insertions(+), 18 deletions(-)

diff --git a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
index 6cc1658152d18..93ec037723ac4 100644
--- a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
+++ b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
@@ -4734,6 +4734,26 @@ convertOmpCancellationPoint(omp::CancellationPointOp op,
   return success();
 }
 
+static LLVM::GlobalOp
+getGlobalFromSymbol(Operation *symOp,
+                    LLVM::ModuleTranslation &moduleTranslation,
+                    Operation *opInst) {
+
+  // Handle potential address space cast
+  if (auto asCast = dyn_cast<LLVM::AddrSpaceCastOp>(symOp))
+    symOp = asCast.getOperand().getDefiningOp();
+
+  // Check if we have an AddressOfOp
+  if (!isa<LLVM::AddressOfOp>(symOp)) {
+    if (opInst)
+      opInst->emitError("Addressing symbol not found");
+    return nullptr;
+  }
+
+  LLVM::AddressOfOp addressOfOp = cast<LLVM::AddressOfOp>(symOp);
+  return addressOfOp.getGlobal(moduleTranslation.symbolTable());
+}
+
 /// Converts an OpenMP Threadprivate operation into LLVM IR using
 /// OpenMPIRBuilder.
 static LogicalResult
@@ -4749,15 +4769,10 @@ convertOmpThreadprivate(Operation &opInst, llvm::IRBuilderBase &builder,
   Value symAddr = threadprivateOp.getSymAddr();
   auto *symOp = symAddr.getDefiningOp();
 
-  if (auto asCast = dyn_cast<LLVM::AddrSpaceCastOp>(symOp))
-    symOp = asCast.getOperand().getDefiningOp();
-
-  if (!isa<LLVM::AddressOfOp>(symOp))
-    return opInst.emitError("Addressing symbol not found");
-  LLVM::AddressOfOp addressOfOp = dyn_cast<LLVM::AddressOfOp>(symOp);
-
   LLVM::GlobalOp global =
-      addressOfOp.getGlobal(moduleTranslation.symbolTable());
+      getGlobalFromSymbol(symOp, moduleTranslation, &opInst);
+  if (!global)
+    return failure();
   llvm::GlobalValue *globalValue = moduleTranslation.lookupGlobal(global);
   llvm::Type *type = globalValue->getValueType();
   llvm::TypeSize typeSize =
@@ -7886,17 +7901,13 @@ convertOmpGroupprivate(Operation &opInst, llvm::IRBuilderBase &builder,
   }
 
   Value symAddr = groupprivateOp.getSymAddr();
-  auto *symOp = symAddr.getDefiningOp();
-
-  if (auto asCast = dyn_cast<LLVM::AddrSpaceCastOp>(symOp))
-    symOp = asCast.getOperand().getDefiningOp();
-
-  if (!isa<LLVM::AddressOfOp>(symOp))
-    return opInst.emitError("Addressing symbol not found");
-  LLVM::AddressOfOp addressOfOp = dyn_cast<LLVM::AddressOfOp>(symOp);
+  Operation *symOp = symAddr.getDefiningOp();
 
   LLVM::GlobalOp global =
-      addressOfOp.getGlobal(moduleTranslation.symbolTable());
+      getGlobalFromSymbol(symOp, moduleTranslation, &opInst);
+  if (!global)
+    return failure();
+
   llvm::GlobalValue *globalValue = moduleTranslation.lookupGlobal(global);
   llvm::Value *resultPtr;
 
@@ -7917,6 +7928,11 @@ convertOmpGroupprivate(Operation &opInst, llvm::IRBuilderBase &builder,
     if (!resultPtr)
       resultPtr = globalValue;
   }
+
+  llvm::Type *ptrTy = builder.getPtrTy();
+  if (resultPtr->getType() != ptrTy)
+    resultPtr = builder.CreateBitCast(resultPtr, ptrTy);
+
   moduleTranslation.mapValue(opInst.getResult(0), resultPtr);
   return success();
 }
diff --git a/mlir/test/Target/LLVMIR/omptarget-groupprivate.mlir b/mlir/test/Target/LLVMIR/omptarget-groupprivate.mlir
index 46e9639adcc06..f6b37e6446fe7 100644
--- a/mlir/test/Target/LLVMIR/omptarget-groupprivate.mlir
+++ b/mlir/test/Target/LLVMIR/omptarget-groupprivate.mlir
@@ -33,7 +33,7 @@ module attributes {omp.is_target_device = true, llvm.target_triple = "amdgcn-amd
 
 // CHECK: define {{.*}} amdgpu_kernel void @__omp_offloading_{{.*}}_{{.*}}__QQmain_{{.*}}(ptr %{{.*}}, ptr %{{.*}}) #{{[0-9]+}} {
 // CHECK-LABEL:  omp.target:
-// CHECK-NEXT :    %[[LOAD:.*]] = load i32, ptr %3, align 4
+// CHECK-NEXT :    %[[LOAD:.*]] = load i32, ptr %{{.*}}, align 4
 // CHECK-NEXT :    %[[ALLOC_any:.*]] = call ptr @__kmpc_alloc_shared(i64 4)
 // CHECK-NEXT :    store i32 %[[LOAD]], ptr %[[ALLOC_any]], align 4
 // CHECK-NEXT :    store i32 %[[LOAD]], ptr @global_host, align 4

>From 1ea3653a4460995a55b43b1c6cb563c308461de3 Mon Sep 17 00:00:00 2001
From: skc7 <Krishna.Sankisa at amd.com>
Date: Fri, 28 Nov 2025 21:28:56 +0530
Subject: [PATCH 03/10] Remove bitcast for resultPtr

---
 .../LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp       | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
index 93ec037723ac4..b8516c8672e4e 100644
--- a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
+++ b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
@@ -7929,10 +7929,6 @@ convertOmpGroupprivate(Operation &opInst, llvm::IRBuilderBase &builder,
       resultPtr = globalValue;
   }
 
-  llvm::Type *ptrTy = builder.getPtrTy();
-  if (resultPtr->getType() != ptrTy)
-    resultPtr = builder.CreateBitCast(resultPtr, ptrTy);
-
   moduleTranslation.mapValue(opInst.getResult(0), resultPtr);
   return success();
 }

>From 7d3dd874c9110a5460e94a45607d6ef3aa247ec6 Mon Sep 17 00:00:00 2001
From: skc7 <Krishna.Sankisa at amd.com>
Date: Mon, 1 Dec 2025 12:41:50 +0530
Subject: [PATCH 04/10] Use llvm addrspace(3) globals for amdgpu and nvptx
 grouppriavte lowering

---
 .../OpenMP/OpenMPToLLVMIRTranslation.cpp      | 40 +++++++----
 .../Target/LLVMIR/omptarget-groupprivate.mlir | 12 ++--
 mlir/test/Target/LLVMIR/openmp-llvm.mlir      | 71 +++++--------------
 3 files changed, 53 insertions(+), 70 deletions(-)

diff --git a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
index b8516c8672e4e..5c81af1616ba2 100644
--- a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
+++ b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
@@ -7912,21 +7912,33 @@ convertOmpGroupprivate(Operation &opInst, llvm::IRBuilderBase &builder,
   llvm::Value *resultPtr;
 
   if (shouldAllocate) {
-    // Get the size of the variable
-    llvm::Type *varType = globalValue->getValueType();
-    llvm::Module *llvmModule = moduleTranslation.getLLVMModule();
-    llvm::DataLayout DL = llvmModule->getDataLayout();
-    uint64_t typeSize = DL.getTypeAllocSize(varType);
-    // Call omp_alloc_shared to allocate memory for groupprivate variable.
-    llvm::FunctionCallee allocSharedFn = ompBuilder->getOrCreateRuntimeFunction(
-        *llvmModule, llvm::omp::OMPRTL___kmpc_alloc_shared);
-    // Call runtime to allocate shared memory for this group
-    resultPtr = builder.CreateCall(allocSharedFn, {builder.getInt64(typeSize)});
-  } else {
-    // Use original global address when not allocating group-private storage
-    resultPtr = moduleTranslation.lookupValue(symAddr);
-    if (!resultPtr)
+    if (isTargetDevice) {
+      // Get the size of the variable
+      llvm::Type *varType = globalValue->getValueType();
+      llvm::Module *llvmModule = moduleTranslation.getLLVMModule();
+      // Create a llvm global variable in shared memory
+      llvm::Triple targetTriple = llvm::Triple(llvmModule->getTargetTriple());
+      if (targetTriple.isAMDGCN() || targetTriple.isNVPTX()) {
+        // Shared address space is 3 for amdgpu and nvptx targets.
+        unsigned sharedAddressSpace = 3;
+        llvm::GlobalVariable *sharedVar = new llvm::GlobalVariable(
+            *llvmModule, varType, false, llvm::GlobalValue::InternalLinkage,
+            llvm::PoisonValue::get(varType), globalValue->getName(), nullptr,
+            llvm::GlobalValue::NotThreadLocal, sharedAddressSpace, false);
+        resultPtr = sharedVar;
+      } else {
+        return opInst.emitError()
+               << "Groupprivate operation is not supported for this target: "
+               << targetTriple.str();
+      }
+    } else {
+      // Use original global address when allocating on host device.
+      // TODO: Add support for allocating group-private storage on host device.
       resultPtr = globalValue;
+    }
+  } else {
+    // Use original global address when not allocating group-private storage.
+    resultPtr = globalValue;
   }
 
   moduleTranslation.mapValue(opInst.getResult(0), resultPtr);
diff --git a/mlir/test/Target/LLVMIR/omptarget-groupprivate.mlir b/mlir/test/Target/LLVMIR/omptarget-groupprivate.mlir
index f6b37e6446fe7..bdbe6d11d9957 100644
--- a/mlir/test/Target/LLVMIR/omptarget-groupprivate.mlir
+++ b/mlir/test/Target/LLVMIR/omptarget-groupprivate.mlir
@@ -31,11 +31,15 @@ module attributes {omp.is_target_device = true, llvm.target_triple = "amdgcn-amd
   llvm.mlir.global internal @global_nohost() : i32
 }
 
+// CHECK-DAG: @global_a = internal global i32 undef
+// CHECK-DAG: @global_any = internal global i32 undef
+// CHECK-DAG: @global_host = internal global i32 undef
+// CHECK-DAG: @global_nohost = internal global i32 undef
+// CHECK-DAG: {{.*}} = internal addrspace(3) global i32 poison
+// CHECK-DAG: {{.*}} = internal addrspace(3) global i32 poison
 // CHECK: define {{.*}} amdgpu_kernel void @__omp_offloading_{{.*}}_{{.*}}__QQmain_{{.*}}(ptr %{{.*}}, ptr %{{.*}}) #{{[0-9]+}} {
 // CHECK-LABEL:  omp.target:
 // CHECK-NEXT :    %[[LOAD:.*]] = load i32, ptr %{{.*}}, align 4
-// CHECK-NEXT :    %[[ALLOC_any:.*]] = call ptr @__kmpc_alloc_shared(i64 4)
-// CHECK-NEXT :    store i32 %[[LOAD]], ptr %[[ALLOC_any]], align 4
+// CHECK-NEXT :    store i32 %[[LOAD]], ptr addrspace(3) {{.*}}, align 4
 // CHECK-NEXT :    store i32 %[[LOAD]], ptr @global_host, align 4
-// CHECK-NEXT :    %[[ALLOC_NOHOST:.*]] = call ptr @__kmpc_alloc_shared(i64 4)
-// CHECK-NEXT :    store i32 %[[LOAD]], ptr %[[ALLOC_NOHOST]], align 4
+// CHECK-NEXT :    store i32 %[[LOAD]], ptr addrspace(3) {{.*}}, align 4
diff --git a/mlir/test/Target/LLVMIR/openmp-llvm.mlir b/mlir/test/Target/LLVMIR/openmp-llvm.mlir
index fb27d710da11f..b05c3645c052f 100644
--- a/mlir/test/Target/LLVMIR/openmp-llvm.mlir
+++ b/mlir/test/Target/LLVMIR/openmp-llvm.mlir
@@ -3626,40 +3626,7 @@ llvm.func @task_affinity_plain(%arr: !llvm.ptr {llvm.nocapture}) {
 
 // -----
 
-module attributes {omp.is_target_device = false} {
-llvm.mlir.global internal @any() : i32
-llvm.mlir.global internal @host() : i32
-llvm.mlir.global internal @nohost() : i32
-llvm.func @omp_groupprivate_host() {
-  %0 = llvm.mlir.constant(1 : i32) : i32
-  %1 = llvm.mlir.addressof @any : !llvm.ptr
-  %2 = omp.groupprivate %1 : !llvm.ptr, device_type(any) -> !llvm.ptr
-  llvm.store %0, %2 : i32, !llvm.ptr
-
-  %3 = llvm.mlir.addressof @host : !llvm.ptr
-  %4 = omp.groupprivate %3 : !llvm.ptr, device_type(host) -> !llvm.ptr
-  llvm.store %0, %4 : i32, !llvm.ptr
-
-  %5 = llvm.mlir.addressof @nohost : !llvm.ptr
-  %6 = omp.groupprivate %5 : !llvm.ptr, device_type(nohost) -> !llvm.ptr
-  llvm.store %0, %6 : i32, !llvm.ptr
-  llvm.return
-}
-}
-
-// CHECK: @any = internal global i32 undef
-// CHECK: @host = internal global i32 undef
-// CHECK: @nohost = internal global i32 undef
-// CHECK-LABEL: @omp_groupprivate_host
-// CHECK:  [[TMP1:%.*]] = call ptr @__kmpc_alloc_shared(i64 4)
-// CHECK:  store i32 1, ptr [[TMP1]], align 4
-// CHECK:  [[TMP2:%.*]] = call ptr @__kmpc_alloc_shared(i64 4)
-// CHECK:  store i32 1, ptr [[TMP2]], align 4
-// CHECK:  store i32 1, ptr @nohost, align 4
-
-// -----
-
-module attributes {omp.is_target_device = true} {
+module attributes {omp.is_target_device = true, llvm.target_triple = "nvptx64-nvidia-cuda"} {
 llvm.mlir.global internal @any() : i32
 llvm.mlir.global internal @host() : i32
 llvm.mlir.global internal @nohost() : i32
@@ -3680,15 +3647,16 @@ llvm.func @omp_groupprivate_device() {
 }
 }
 
-// CHECK: @any = internal global i32 undef
-// CHECK: @host = internal global i32 undef
-// CHECK: @nohost = internal global i32 undef
-// CHECK-LABEL: @omp_groupprivate_device
-// CHECK:  [[TMP1:%.*]] = call ptr @__kmpc_alloc_shared(i64 4)
-// CHECK:  store i32 1, ptr [[TMP1]], align 4
-// CHECK:  store i32 1, ptr @host, align 4
-// CHECK:  [[TMP2:%.*]] = call ptr @__kmpc_alloc_shared(i64 4)
-// CHECK:  store i32 1, ptr [[TMP2]], align 4
+// CHECK-DAG: @any = internal global i32 undef
+// CHECK-DAG: @host = internal global i32 undef
+// CHECK-DAG: @nohost = internal global i32 undef
+// CHECK-DAG: {{.*}} = internal addrspace(3) global i32 poison
+// CHECK-DAG: {{.*}} = internal addrspace(3) global i32 poison
+// CHECK-LABEL: define void @omp_groupprivate_device()
+// CHECK: store i32 1, ptr addrspace(3) {{.*}}, align 4
+// CHECK: store i32 1, ptr @host, align 4
+// CHECK: store i32 1, ptr addrspace(3) {{.*}}, align 4
+// CHECK: ret void
 
 // -----
 
@@ -3713,14 +3681,13 @@ llvm.func @omp_groupprivate_host() {
 }
 }
 
-// CHECK: @any1 = internal global i32 undef
-// CHECK: @host1 = internal global i32 undef
-// CHECK: @nohost1 = internal global i32 undef
-// CHECK-LABEL: @omp_groupprivate_host
-// CHECK:  [[TMP1:%.*]] = call ptr @__kmpc_alloc_shared(i64 4)
-// CHECK:  store i32 1, ptr [[TMP1]], align 4
-// CHECK:  [[TMP2:%.*]] = call ptr @__kmpc_alloc_shared(i64 4)
-// CHECK:  store i32 1, ptr [[TMP2]], align 4
-// CHECK:  store i32 1, ptr @nohost1, align 4
+// CHECK-DAG: @any1 = internal global i32 undef
+// CHECK-DAG: @host1 = internal global i32 undef
+// CHECK-DAG: @nohost1 = internal global i32 undef
+// CHECK-LABEL: define void @omp_groupprivate_host()
+// CHECK: store i32 1, ptr @any1, align 4
+// CHECK: store i32 1, ptr @host1, align 4
+// CHECK: store i32 1, ptr @nohost1, align 4
+// CHECK: ret void
 
 // -----

>From a54ab432a171b5216a1b6f5a691016b7605c8682 Mon Sep 17 00:00:00 2001
From: skc7 <Krishna.Sankisa at amd.com>
Date: Mon, 12 Jan 2026 19:55:27 +0530
Subject: [PATCH 05/10] Fix the llvmir translation by getting valid type from
 global

---
 mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td | 11 ++--
 .../OpenMP/OpenMPToLLVMIRTranslation.cpp      | 57 ++++++++++++-------
 2 files changed, 43 insertions(+), 25 deletions(-)

diff --git a/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td b/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td
index 5bc9cbee2561a..2ef65708e17fa 100644
--- a/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td
+++ b/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td
@@ -2453,12 +2453,15 @@ def GroupprivateOp : OpenMP_Op<"groupprivate",
     each group having its own copy.
 
     This operation takes in the address of a symbol that represents the original
-    variable, optional DeviceTypeAttr and returns the address of its groupprivate copy.
-    All occurrences of groupprivate variables in a parallel region should
-    use the groupprivate copy returned by this operation.
+    variable and returns the address of its groupprivate copy. The symbol must
+    refer to a global variable so that type information can be obtained from it.
 
     The `sym_addr` refers to the address of the symbol, which is a pointer to
-    the original variable.
+    the original variable. It must be obtained via `llvm.mlir.addressof` from
+    a global variable.
+
+    The optional `device_type` attribute specifies where the groupprivate
+    storage should be allocated (host, nohost, or any).
   }];
 
   let arguments = (ins
diff --git a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
index 5c81af1616ba2..835e499fd69db 100644
--- a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
+++ b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
@@ -7881,7 +7881,7 @@ convertOmpGroupprivate(Operation &opInst, llvm::IRBuilderBase &builder,
   bool isTargetDevice = ompBuilder->Config.isTargetDevice();
   auto deviceType = groupprivateOp.getDeviceType();
 
-  // skip allocation based on device_type
+  // Skip allocation based on device_type
   bool shouldAllocate = true;
   if (deviceType.has_value()) {
     switch (*deviceType) {
@@ -7901,30 +7901,45 @@ convertOmpGroupprivate(Operation &opInst, llvm::IRBuilderBase &builder,
   }
 
   Value symAddr = groupprivateOp.getSymAddr();
-  Operation *symOp = symAddr.getDefiningOp();
+  llvm::Value *symValue = moduleTranslation.lookupValue(symAddr);
+  llvm::Value *resultPtr;
 
-  LLVM::GlobalOp global =
-      getGlobalFromSymbol(symOp, moduleTranslation, &opInst);
-  if (!global)
-    return failure();
+  // Get the element type and variable name from the global.
+  // Groupprivate requires sym_addr to come from a global variable.
+  llvm::Type *varType = nullptr;
+  std::string varName = "omp.groupprivate";
+
+  if (Operation *symOp = symAddr.getDefiningOp()) {
+    if (LLVM::GlobalOp global =
+            getGlobalFromSymbol(symOp, moduleTranslation, nullptr)) {
+      // Get type from the global
+      varType = moduleTranslation.convertType(global.getType());
+      // Get name from the global
+      if (llvm::GlobalValue *globalValue =
+              moduleTranslation.lookupGlobal(global)) {
+        varName = globalValue->getName().str();
+      }
+    }
+  }
 
-  llvm::GlobalValue *globalValue = moduleTranslation.lookupGlobal(global);
-  llvm::Value *resultPtr;
+  if (!varType) {
+    return opInst.emitError()
+           << "Groupprivate requires sym_addr to reference a global variable";
+  }
 
   if (shouldAllocate) {
     if (isTargetDevice) {
-      // Get the size of the variable
-      llvm::Type *varType = globalValue->getValueType();
       llvm::Module *llvmModule = moduleTranslation.getLLVMModule();
-      // Create a llvm global variable in shared memory
       llvm::Triple targetTriple = llvm::Triple(llvmModule->getTargetTriple());
       if (targetTriple.isAMDGCN() || targetTriple.isNVPTX()) {
-        // Shared address space is 3 for amdgpu and nvptx targets.
+        // Shared address space is 3 for AMDGPU and NVPTX targets.
         unsigned sharedAddressSpace = 3;
         llvm::GlobalVariable *sharedVar = new llvm::GlobalVariable(
-            *llvmModule, varType, false, llvm::GlobalValue::InternalLinkage,
-            llvm::PoisonValue::get(varType), globalValue->getName(), nullptr,
-            llvm::GlobalValue::NotThreadLocal, sharedAddressSpace, false);
+            *llvmModule, varType, /*isConstant=*/false,
+            llvm::GlobalValue::InternalLinkage, llvm::PoisonValue::get(varType),
+            varName, /*InsertBefore=*/nullptr,
+            llvm::GlobalValue::NotThreadLocal, sharedAddressSpace,
+            /*isExternallyInitialized=*/false);
         resultPtr = sharedVar;
       } else {
         return opInst.emitError()
@@ -7932,13 +7947,13 @@ convertOmpGroupprivate(Operation &opInst, llvm::IRBuilderBase &builder,
                << targetTriple.str();
       }
     } else {
-      // Use original global address when allocating on host device.
+      // Use original address when allocating on host device.
       // TODO: Add support for allocating group-private storage on host device.
-      resultPtr = globalValue;
+      resultPtr = symValue;
     }
   } else {
-    // Use original global address when not allocating group-private storage.
-    resultPtr = globalValue;
+    // Use original address when not allocating group-private storage.
+    resultPtr = symValue;
   }
 
   moduleTranslation.mapValue(opInst.getResult(0), resultPtr);
@@ -7953,8 +7968,8 @@ LogicalResult OpenMPDialectLLVMIRTranslationInterface::convertOperation(
   llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder();
 
   if (ompBuilder->Config.isTargetDevice() &&
-      !isa<omp::TargetOp, omp::MapInfoOp, omp::TerminatorOp, omp::YieldOp>(
-          op) &&
+      !isa<omp::TargetOp, omp::MapInfoOp, omp::TerminatorOp, omp::YieldOp,
+           omp::GroupprivateOp>(op) &&
       isHostDeviceOp(op))
     return op->emitOpError() << "unsupported host op found in device";
 

>From 842c1ff3760cd5cf0b09201842ec6acfb9414efd Mon Sep 17 00:00:00 2001
From: skc7 <Krishna.Sankisa at amd.com>
Date: Wed, 8 Apr 2026 15:03:17 +0530
Subject: [PATCH 06/10] Change omp.groupprivate to take a symbol reference

---
 mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td |  18 +--
 mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp  |  17 +++
 .../OpenMP/OpenMPToLLVMIRTranslation.cpp      | 142 +++++++-----------
 mlir/test/Dialect/OpenMP/ops.mlir             |  29 ++--
 .../Target/LLVMIR/omptarget-groupprivate.mlir |   9 +-
 mlir/test/Target/LLVMIR/openmp-llvm.mlir      |  21 +--
 6 files changed, 97 insertions(+), 139 deletions(-)

diff --git a/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td b/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td
index 2ef65708e17fa..f37d5d80045b1 100644
--- a/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td
+++ b/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td
@@ -2445,33 +2445,29 @@ def IteratorOp : OpenMP_Op<"iterator",
 // [6.0] groupprivate Directive
 //===----------------------------------------------------------------------===//
 
-def GroupprivateOp : OpenMP_Op<"groupprivate",
-                      [AllTypesMatch<["sym_addr", "gp_addr"]>]> {
+def GroupprivateOp : OpenMP_Op<"groupprivate", [Pure]> {
   let summary = "groupprivate directive";
   let description = [{
     The groupprivate directive specifies that variables are replicated, with
     each group having its own copy.
 
-    This operation takes in the address of a symbol that represents the original
-    variable and returns the address of its groupprivate copy. The symbol must
-    refer to a global variable so that type information can be obtained from it.
-
-    The `sym_addr` refers to the address of the symbol, which is a pointer to
-    the original variable. It must be obtained via `llvm.mlir.addressof` from
-    a global variable.
+    This operation takes a symbol reference to a global variable and returns
+    the address of its groupprivate copy. The referenced symbol must exist and
+    must not be a function.
 
     The optional `device_type` attribute specifies where the groupprivate
     storage should be allocated (host, nohost, or any).
   }];
 
   let arguments = (ins
-    OpenMP_PointerLikeType:$sym_addr,
+    FlatSymbolRefAttr:$sym_name,
     OptionalAttr<DeclareTargetDeviceTypeAttr>:$device_type
   );
   let results = (outs OpenMP_PointerLikeType:$gp_addr);
   let assemblyFormat = [{
-    $sym_addr `:` type($sym_addr) ( `,` `device_type` $device_type^ )? `->` type($gp_addr) attr-dict
+    $sym_name `:` type($gp_addr) (`,` `device_type` $device_type^)? attr-dict
   }];
+  let hasVerifier = 1;
 }
 
 #endif // OPENMP_OPS
diff --git a/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp b/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp
index 691793b50d33d..50c5bc0e23f69 100644
--- a/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp
+++ b/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp
@@ -5054,6 +5054,23 @@ LogicalResult IteratorOp::verify() {
   return success();
 }
 
+//===----------------------------------------------------------------------===//
+// GroupprivateOp
+//===----------------------------------------------------------------------===//
+
+LogicalResult GroupprivateOp::verify() {
+  auto *symbol = SymbolTable::lookupNearestSymbolFrom(*this, getSymNameAttr());
+  if (!symbol)
+    return emitOpError() << "expected symbol reference '" << getSymName()
+                         << "' to point to a global variable";
+
+  if (isa<FunctionOpInterface>(symbol))
+    return emitOpError() << "expected symbol reference '" << getSymName()
+                         << "' to point to a global variable, not a function";
+
+  return success();
+}
+
 #define GET_ATTRDEF_CLASSES
 #include "mlir/Dialect/OpenMP/OpenMPOpsAttributes.cpp.inc"
 
diff --git a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
index 835e499fd69db..73c60a4606f2f 100644
--- a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
+++ b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
@@ -4734,26 +4734,6 @@ convertOmpCancellationPoint(omp::CancellationPointOp op,
   return success();
 }
 
-static LLVM::GlobalOp
-getGlobalFromSymbol(Operation *symOp,
-                    LLVM::ModuleTranslation &moduleTranslation,
-                    Operation *opInst) {
-
-  // Handle potential address space cast
-  if (auto asCast = dyn_cast<LLVM::AddrSpaceCastOp>(symOp))
-    symOp = asCast.getOperand().getDefiningOp();
-
-  // Check if we have an AddressOfOp
-  if (!isa<LLVM::AddressOfOp>(symOp)) {
-    if (opInst)
-      opInst->emitError("Addressing symbol not found");
-    return nullptr;
-  }
-
-  LLVM::AddressOfOp addressOfOp = cast<LLVM::AddressOfOp>(symOp);
-  return addressOfOp.getGlobal(moduleTranslation.symbolTable());
-}
-
 /// Converts an OpenMP Threadprivate operation into LLVM IR using
 /// OpenMPIRBuilder.
 static LogicalResult
@@ -4769,8 +4749,15 @@ convertOmpThreadprivate(Operation &opInst, llvm::IRBuilderBase &builder,
   Value symAddr = threadprivateOp.getSymAddr();
   auto *symOp = symAddr.getDefiningOp();
 
+  if (auto asCast = dyn_cast<LLVM::AddrSpaceCastOp>(symOp))
+    symOp = asCast.getOperand().getDefiningOp();
+
+  if (!isa<LLVM::AddressOfOp>(symOp))
+    return opInst.emitError("Addressing symbol not found");
+
+  LLVM::AddressOfOp addressOfOp = cast<LLVM::AddressOfOp>(symOp);
   LLVM::GlobalOp global =
-      getGlobalFromSymbol(symOp, moduleTranslation, &opInst);
+      addressOfOp.getGlobal(moduleTranslation.symbolTable());
   if (!global)
     return failure();
   llvm::GlobalValue *globalValue = moduleTranslation.lookupGlobal(global);
@@ -7868,7 +7855,7 @@ convertTargetFreeMemOp(Operation &opInst, llvm::IRBuilderBase &builder,
   return success();
 }
 
-/// Converts an OpenMP Groupprivate operation into LLVM IR.
+/// Converts an OpenMP groupprivate operation into LLVM IR.
 static LogicalResult
 convertOmpGroupprivate(Operation &opInst, llvm::IRBuilderBase &builder,
                        LLVM::ModuleTranslation &moduleTranslation) {
@@ -7879,81 +7866,56 @@ convertOmpGroupprivate(Operation &opInst, llvm::IRBuilderBase &builder,
     return failure();
 
   bool isTargetDevice = ompBuilder->Config.isTargetDevice();
-  auto deviceType = groupprivateOp.getDeviceType();
 
-  // Skip allocation based on device_type
+  // Determine whether group-private storage should be allocated based on
+  // device_type. When not specified, default to 'any' (allocate on both).
   bool shouldAllocate = true;
-  if (deviceType.has_value()) {
-    switch (*deviceType) {
-    case mlir::omp::DeclareTargetDeviceType::host:
-      // Only allocate on host
-      shouldAllocate = !isTargetDevice;
-      break;
-    case mlir::omp::DeclareTargetDeviceType::nohost:
-      // Only allocate on device
-      shouldAllocate = isTargetDevice;
-      break;
-    case mlir::omp::DeclareTargetDeviceType::any:
-      // Allocate on both
-      shouldAllocate = true;
-      break;
-    }
+  switch (groupprivateOp.getDeviceType().value_or(
+      mlir::omp::DeclareTargetDeviceType::any)) {
+  case mlir::omp::DeclareTargetDeviceType::host:
+    shouldAllocate = !isTargetDevice;
+    break;
+  case mlir::omp::DeclareTargetDeviceType::nohost:
+    shouldAllocate = isTargetDevice;
+    break;
+  case mlir::omp::DeclareTargetDeviceType::any:
+    shouldAllocate = true;
+    break;
   }
 
-  Value symAddr = groupprivateOp.getSymAddr();
-  llvm::Value *symValue = moduleTranslation.lookupValue(symAddr);
-  llvm::Value *resultPtr;
+  // Look up the global variable directly by symbol name.
+  LLVM::GlobalOp global = SymbolTable::lookupNearestSymbolFrom<LLVM::GlobalOp>(
+      &opInst, groupprivateOp.getSymNameAttr());
+  if (!global)
+    return opInst.emitError()
+           << "expected symbol '" << groupprivateOp.getSymName()
+           << "' to reference an LLVM global variable";
 
-  // Get the element type and variable name from the global.
-  // Groupprivate requires sym_addr to come from a global variable.
-  llvm::Type *varType = nullptr;
-  std::string varName = "omp.groupprivate";
-
-  if (Operation *symOp = symAddr.getDefiningOp()) {
-    if (LLVM::GlobalOp global =
-            getGlobalFromSymbol(symOp, moduleTranslation, nullptr)) {
-      // Get type from the global
-      varType = moduleTranslation.convertType(global.getType());
-      // Get name from the global
-      if (llvm::GlobalValue *globalValue =
-              moduleTranslation.lookupGlobal(global)) {
-        varName = globalValue->getName().str();
-      }
-    }
-  }
+  llvm::GlobalValue *globalValue = moduleTranslation.lookupGlobal(global);
+  llvm::Type *varType = moduleTranslation.convertType(global.getType());
+  std::string varName = globalValue->getName().str();
 
-  if (!varType) {
-    return opInst.emitError()
-           << "Groupprivate requires sym_addr to reference a global variable";
-  }
-
-  if (shouldAllocate) {
-    if (isTargetDevice) {
-      llvm::Module *llvmModule = moduleTranslation.getLLVMModule();
-      llvm::Triple targetTriple = llvm::Triple(llvmModule->getTargetTriple());
-      if (targetTriple.isAMDGCN() || targetTriple.isNVPTX()) {
-        // Shared address space is 3 for AMDGPU and NVPTX targets.
-        unsigned sharedAddressSpace = 3;
-        llvm::GlobalVariable *sharedVar = new llvm::GlobalVariable(
-            *llvmModule, varType, /*isConstant=*/false,
-            llvm::GlobalValue::InternalLinkage, llvm::PoisonValue::get(varType),
-            varName, /*InsertBefore=*/nullptr,
-            llvm::GlobalValue::NotThreadLocal, sharedAddressSpace,
-            /*isExternallyInitialized=*/false);
-        resultPtr = sharedVar;
-      } else {
-        return opInst.emitError()
-               << "Groupprivate operation is not supported for this target: "
-               << targetTriple.str();
-      }
+  llvm::Value *resultPtr;
+  if (shouldAllocate && isTargetDevice) {
+    llvm::Module *llvmModule = moduleTranslation.getLLVMModule();
+    llvm::Triple targetTriple(llvmModule->getTargetTriple());
+    if (targetTriple.isAMDGCN() || targetTriple.isNVPTX()) {
+      unsigned sharedAddressSpace = 3;
+      llvm::GlobalVariable *sharedVar = new llvm::GlobalVariable(
+          *llvmModule, varType, /*isConstant=*/false,
+          llvm::GlobalValue::InternalLinkage, llvm::PoisonValue::get(varType),
+          varName, /*InsertBefore=*/nullptr, llvm::GlobalValue::NotThreadLocal,
+          sharedAddressSpace,
+          /*isExternallyInitialized=*/false);
+      resultPtr = sharedVar;
     } else {
-      // Use original address when allocating on host device.
-      // TODO: Add support for allocating group-private storage on host device.
-      resultPtr = symValue;
+      return opInst.emitError() << "groupprivate is not supported for target: "
+                                << targetTriple.str();
     }
   } else {
-    // Use original address when not allocating group-private storage.
-    resultPtr = symValue;
+    // Use original global address on host or when not allocating
+    // group-private storage.
+    resultPtr = globalValue;
   }
 
   moduleTranslation.mapValue(opInst.getResult(0), resultPtr);
@@ -7968,8 +7930,8 @@ LogicalResult OpenMPDialectLLVMIRTranslationInterface::convertOperation(
   llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder();
 
   if (ompBuilder->Config.isTargetDevice() &&
-      !isa<omp::TargetOp, omp::MapInfoOp, omp::TerminatorOp, omp::YieldOp,
-           omp::GroupprivateOp>(op) &&
+      !isa<omp::TargetOp, omp::MapInfoOp, omp::TerminatorOp, omp::YieldOp>(
+          op) &&
       isHostDeviceOp(op))
     return op->emitOpError() << "unsupported host op found in device";
 
diff --git a/mlir/test/Dialect/OpenMP/ops.mlir b/mlir/test/Dialect/OpenMP/ops.mlir
index a4d5e1b77447a..33ef9b327e6d6 100644
--- a/mlir/test/Dialect/OpenMP/ops.mlir
+++ b/mlir/test/Dialect/OpenMP/ops.mlir
@@ -3908,30 +3908,21 @@ func.func @omp_task_affinity_iterator_2d(%lb0 : index, %ub0 : index, %st0 : inde
 
 // CHECK-LABEL: func.func @omp_groupprivate_device_type
 func.func @omp_groupprivate_device_type() {
-  %0 = arith.constant 1 : i32
   %1 = arith.constant 2 : i32
-  // CHECK: [[ARG0:%.*]] = llvm.mlir.addressof @gp : !llvm.ptr
-  %gp_addr = llvm.mlir.addressof @gp : !llvm.ptr
-  // CHECK: [[ARG1:%.*]] = llvm.mlir.addressof @any : !llvm.ptr
-  %any_addr = llvm.mlir.addressof @any : !llvm.ptr
-  // CHECK: [[ARG2:%.*]] = llvm.mlir.addressof @host : !llvm.ptr
-  %host_addr = llvm.mlir.addressof @host : !llvm.ptr
-  // CHECK: [[ARG3:%.*]] = llvm.mlir.addressof @nohost : !llvm.ptr
-  %nohost_addr = llvm.mlir.addressof @nohost : !llvm.ptr
-
-  // CHECK: {{.*}} = omp.groupprivate [[ARG0]] : !llvm.ptr -> !llvm.ptr
-  %group_private_addr = omp.groupprivate %gp_addr : !llvm.ptr -> !llvm.ptr
-
-  // CHECK: {{.*}} = omp.groupprivate [[ARG1]] : !llvm.ptr, device_type (any) -> !llvm.ptr
-  %group_private_any = omp.groupprivate %any_addr : !llvm.ptr, device_type(any) -> !llvm.ptr
+
+  // CHECK: {{.*}} = omp.groupprivate @gp : !llvm.ptr
+  %group_private_addr = omp.groupprivate @gp : !llvm.ptr
+
+  // CHECK: {{.*}} = omp.groupprivate @any : !llvm.ptr, device_type (any)
+  %group_private_any = omp.groupprivate @any : !llvm.ptr, device_type(any)
   llvm.store %1, %group_private_any : i32, !llvm.ptr
 
-  // CHECK: {{.*}} = omp.groupprivate [[ARG2]] : !llvm.ptr, device_type (host) -> !llvm.ptr
-  %group_private_host = omp.groupprivate %host_addr : !llvm.ptr, device_type(host) -> !llvm.ptr
+  // CHECK: {{.*}} = omp.groupprivate @host : !llvm.ptr, device_type (host)
+  %group_private_host = omp.groupprivate @host : !llvm.ptr, device_type(host)
   llvm.store %1, %group_private_host : i32, !llvm.ptr
 
-  // CHECK: {{.*}} = omp.groupprivate [[ARG3]] : !llvm.ptr, device_type (nohost) -> !llvm.ptr
-  %group_private_nohost = omp.groupprivate %nohost_addr : !llvm.ptr, device_type(nohost) -> !llvm.ptr
+  // CHECK: {{.*}} = omp.groupprivate @nohost : !llvm.ptr, device_type (nohost)
+  %group_private_nohost = omp.groupprivate @nohost : !llvm.ptr, device_type(nohost)
   llvm.store %1, %group_private_nohost : i32, !llvm.ptr
 
   return
diff --git a/mlir/test/Target/LLVMIR/omptarget-groupprivate.mlir b/mlir/test/Target/LLVMIR/omptarget-groupprivate.mlir
index bdbe6d11d9957..132e7b8d14242 100644
--- a/mlir/test/Target/LLVMIR/omptarget-groupprivate.mlir
+++ b/mlir/test/Target/LLVMIR/omptarget-groupprivate.mlir
@@ -9,16 +9,13 @@ module attributes {omp.is_target_device = true, llvm.target_triple = "amdgcn-amd
     omp.target map_entries(%map_a -> %arg1 : !llvm.ptr) {
       %loaded = llvm.load %arg1 : !llvm.ptr -> i32
 
-      %any_addr = llvm.mlir.addressof @global_any : !llvm.ptr
-      %any_gp = omp.groupprivate %any_addr : !llvm.ptr, device_type(any) -> !llvm.ptr
+      %any_gp = omp.groupprivate @global_any : !llvm.ptr, device_type(any)
       llvm.store %loaded, %any_gp : i32, !llvm.ptr
 
-      %host_addr = llvm.mlir.addressof @global_host : !llvm.ptr
-      %host_gp = omp.groupprivate %host_addr : !llvm.ptr, device_type(host) -> !llvm.ptr
+      %host_gp = omp.groupprivate @global_host : !llvm.ptr, device_type(host)
       llvm.store %loaded, %host_gp : i32, !llvm.ptr
 
-      %nohost_addr = llvm.mlir.addressof @global_nohost : !llvm.ptr
-      %nohost_gp = omp.groupprivate %nohost_addr : !llvm.ptr, device_type(nohost) -> !llvm.ptr
+      %nohost_gp = omp.groupprivate @global_nohost : !llvm.ptr, device_type(nohost)
       llvm.store %loaded, %nohost_gp : i32, !llvm.ptr
 
       omp.terminator
diff --git a/mlir/test/Target/LLVMIR/openmp-llvm.mlir b/mlir/test/Target/LLVMIR/openmp-llvm.mlir
index b05c3645c052f..f4b494b9e99bf 100644
--- a/mlir/test/Target/LLVMIR/openmp-llvm.mlir
+++ b/mlir/test/Target/LLVMIR/openmp-llvm.mlir
@@ -3630,18 +3630,16 @@ module attributes {omp.is_target_device = true, llvm.target_triple = "nvptx64-nv
 llvm.mlir.global internal @any() : i32
 llvm.mlir.global internal @host() : i32
 llvm.mlir.global internal @nohost() : i32
-llvm.func @omp_groupprivate_device() {
+llvm.func @omp_groupprivate_device() attributes {
+    omp.declare_target = #omp.declaretarget<device_type = (any), capture_clause = (to)>} {
   %0 = llvm.mlir.constant(1 : i32) : i32
-  %1 = llvm.mlir.addressof @any : !llvm.ptr
-  %2 = omp.groupprivate %1 : !llvm.ptr, device_type(any) -> !llvm.ptr
+  %2 = omp.groupprivate @any : !llvm.ptr, device_type(any)
   llvm.store %0, %2 : i32, !llvm.ptr
 
-  %3 = llvm.mlir.addressof @host : !llvm.ptr
-  %4 = omp.groupprivate %3 : !llvm.ptr, device_type(host) -> !llvm.ptr
+  %4 = omp.groupprivate @host : !llvm.ptr, device_type(host)
   llvm.store %0, %4 : i32, !llvm.ptr
 
-  %5 = llvm.mlir.addressof @nohost : !llvm.ptr
-  %6 = omp.groupprivate %5 : !llvm.ptr, device_type(nohost) -> !llvm.ptr
+  %6 = omp.groupprivate @nohost : !llvm.ptr, device_type(nohost)
   llvm.store %0, %6 : i32, !llvm.ptr
   llvm.return
 }
@@ -3666,16 +3664,13 @@ llvm.mlir.global internal @host1() : i32
 llvm.mlir.global internal @nohost1() : i32
 llvm.func @omp_groupprivate_host() {
   %0 = llvm.mlir.constant(1 : i32) : i32
-  %1 = llvm.mlir.addressof @any1 : !llvm.ptr
-  %2 = omp.groupprivate %1 : !llvm.ptr, device_type(any) -> !llvm.ptr
+  %2 = omp.groupprivate @any1 : !llvm.ptr, device_type(any)
   llvm.store %0, %2 : i32, !llvm.ptr
 
-  %3 = llvm.mlir.addressof @host1 : !llvm.ptr
-  %4 = omp.groupprivate %3 : !llvm.ptr, device_type(host) -> !llvm.ptr
+  %4 = omp.groupprivate @host1 : !llvm.ptr, device_type(host)
   llvm.store %0, %4 : i32, !llvm.ptr
 
-  %5 = llvm.mlir.addressof @nohost1 : !llvm.ptr
-  %6 = omp.groupprivate %5 : !llvm.ptr, device_type(nohost) -> !llvm.ptr
+  %6 = omp.groupprivate @nohost1 : !llvm.ptr, device_type(nohost)
   llvm.store %0, %6 : i32, !llvm.ptr
   llvm.return
 }

>From 8a635316a87c519236463bc0cc5ee7b687eb3610 Mon Sep 17 00:00:00 2001
From: skc7 <Krishna.Sankisa at amd.com>
Date: Wed, 8 Apr 2026 15:06:33 +0530
Subject: [PATCH 07/10] Remove threadprivate changes

---
 .../Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
index 73c60a4606f2f..95886658c653b 100644
--- a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
+++ b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
@@ -4754,12 +4754,9 @@ convertOmpThreadprivate(Operation &opInst, llvm::IRBuilderBase &builder,
 
   if (!isa<LLVM::AddressOfOp>(symOp))
     return opInst.emitError("Addressing symbol not found");
-
   LLVM::AddressOfOp addressOfOp = cast<LLVM::AddressOfOp>(symOp);
   LLVM::GlobalOp global =
       addressOfOp.getGlobal(moduleTranslation.symbolTable());
-  if (!global)
-    return failure();
   llvm::GlobalValue *globalValue = moduleTranslation.lookupGlobal(global);
   llvm::Type *type = globalValue->getValueType();
   llvm::TypeSize typeSize =

>From e341bce1cfb31f5be6ea6e94115b8e4b3e466553 Mon Sep 17 00:00:00 2001
From: skc7 <Krishna.Sankisa at amd.com>
Date: Wed, 8 Apr 2026 15:08:19 +0530
Subject: [PATCH 08/10] fix threadprivate

---
 .../Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
index 95886658c653b..dd62b2adced02 100644
--- a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
+++ b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
@@ -4754,7 +4754,8 @@ convertOmpThreadprivate(Operation &opInst, llvm::IRBuilderBase &builder,
 
   if (!isa<LLVM::AddressOfOp>(symOp))
     return opInst.emitError("Addressing symbol not found");
-  LLVM::AddressOfOp addressOfOp = cast<LLVM::AddressOfOp>(symOp);
+  LLVM::AddressOfOp addressOfOp = dyn_cast<LLVM::AddressOfOp>(symOp);
+
   LLVM::GlobalOp global =
       addressOfOp.getGlobal(moduleTranslation.symbolTable());
   llvm::GlobalValue *globalValue = moduleTranslation.lookupGlobal(global);

>From 712812feb2dfdbca6dd3feba45b63c88c6bfa6b2 Mon Sep 17 00:00:00 2001
From: skc7 <Krishna.Sankisa at amd.com>
Date: Tue, 14 Apr 2026 16:22:54 +0530
Subject: [PATCH 09/10] update

---
 mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td |  6 ++---
 mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp  |  5 ++--
 .../OpenMP/OpenMPToLLVMIRTranslation.cpp      | 26 +++++++++++--------
 mlir/test/Dialect/OpenMP/ops.mlir             | 12 ++++-----
 .../Target/LLVMIR/omptarget-groupprivate.mlir | 19 +++++++-------
 mlir/test/Target/LLVMIR/openmp-llvm.mlir      | 22 ++++++++--------
 6 files changed, 47 insertions(+), 43 deletions(-)

diff --git a/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td b/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td
index f37d5d80045b1..2c155be75c673 100644
--- a/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td
+++ b/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td
@@ -2445,7 +2445,8 @@ def IteratorOp : OpenMP_Op<"iterator",
 // [6.0] groupprivate Directive
 //===----------------------------------------------------------------------===//
 
-def GroupprivateOp : OpenMP_Op<"groupprivate", [Pure]> {
+def GroupprivateOp : OpenMP_Op<"groupprivate",
+    [Pure, DeclareOpInterfaceMethods<SymbolUserOpInterface>]> {
   let summary = "groupprivate directive";
   let description = [{
     The groupprivate directive specifies that variables are replicated, with
@@ -2465,9 +2466,8 @@ def GroupprivateOp : OpenMP_Op<"groupprivate", [Pure]> {
   );
   let results = (outs OpenMP_PointerLikeType:$gp_addr);
   let assemblyFormat = [{
-    $sym_name `:` type($gp_addr) (`,` `device_type` $device_type^)? attr-dict
+    $sym_name (`device_type` $device_type^)? `:` type($gp_addr) attr-dict
   }];
-  let hasVerifier = 1;
 }
 
 #endif // OPENMP_OPS
diff --git a/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp b/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp
index 50c5bc0e23f69..ca7766192d030 100644
--- a/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp
+++ b/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp
@@ -5058,8 +5058,9 @@ LogicalResult IteratorOp::verify() {
 // GroupprivateOp
 //===----------------------------------------------------------------------===//
 
-LogicalResult GroupprivateOp::verify() {
-  auto *symbol = SymbolTable::lookupNearestSymbolFrom(*this, getSymNameAttr());
+LogicalResult
+GroupprivateOp::verifySymbolUses(SymbolTableCollection &symbolTable) {
+  auto *symbol = symbolTable.lookupNearestSymbolFrom(*this, getSymNameAttr());
   if (!symbol)
     return emitOpError() << "expected symbol reference '" << getSymName()
                          << "' to point to a global variable";
diff --git a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
index dd62b2adced02..092c65273ddb2 100644
--- a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
+++ b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
@@ -33,7 +33,9 @@
 #include "llvm/IR/IRBuilder.h"
 #include "llvm/IR/MDBuilder.h"
 #include "llvm/IR/ReplaceConstant.h"
+#include "llvm/Support/AMDGPUAddrSpace.h"
 #include "llvm/Support/FileSystem.h"
+#include "llvm/Support/NVPTXAddrSpace.h"
 #include "llvm/Support/VirtualFileSystem.h"
 #include "llvm/TargetParser/Triple.h"
 #include "llvm/Transforms/Utils/ModuleUtils.h"
@@ -7897,19 +7899,21 @@ convertOmpGroupprivate(Operation &opInst, llvm::IRBuilderBase &builder,
   if (shouldAllocate && isTargetDevice) {
     llvm::Module *llvmModule = moduleTranslation.getLLVMModule();
     llvm::Triple targetTriple(llvmModule->getTargetTriple());
-    if (targetTriple.isAMDGCN() || targetTriple.isNVPTX()) {
-      unsigned sharedAddressSpace = 3;
-      llvm::GlobalVariable *sharedVar = new llvm::GlobalVariable(
-          *llvmModule, varType, /*isConstant=*/false,
-          llvm::GlobalValue::InternalLinkage, llvm::PoisonValue::get(varType),
-          varName, /*InsertBefore=*/nullptr, llvm::GlobalValue::NotThreadLocal,
-          sharedAddressSpace,
-          /*isExternallyInitialized=*/false);
-      resultPtr = sharedVar;
-    } else {
+    unsigned sharedAddressSpace;
+    if (targetTriple.isAMDGCN())
+      sharedAddressSpace = llvm::AMDGPUAS::LOCAL_ADDRESS;
+    else if (targetTriple.isNVPTX())
+      sharedAddressSpace = llvm::NVPTXAS::ADDRESS_SPACE_SHARED;
+    else
       return opInst.emitError() << "groupprivate is not supported for target: "
                                 << targetTriple.str();
-    }
+    llvm::GlobalVariable *sharedVar = new llvm::GlobalVariable(
+        *llvmModule, varType, /*isConstant=*/false,
+        llvm::GlobalValue::InternalLinkage, llvm::PoisonValue::get(varType),
+        varName, /*InsertBefore=*/nullptr, llvm::GlobalValue::NotThreadLocal,
+        sharedAddressSpace,
+        /*isExternallyInitialized=*/false);
+    resultPtr = sharedVar;
   } else {
     // Use original global address on host or when not allocating
     // group-private storage.
diff --git a/mlir/test/Dialect/OpenMP/ops.mlir b/mlir/test/Dialect/OpenMP/ops.mlir
index 33ef9b327e6d6..30ce476b717d6 100644
--- a/mlir/test/Dialect/OpenMP/ops.mlir
+++ b/mlir/test/Dialect/OpenMP/ops.mlir
@@ -3913,16 +3913,16 @@ func.func @omp_groupprivate_device_type() {
   // CHECK: {{.*}} = omp.groupprivate @gp : !llvm.ptr
   %group_private_addr = omp.groupprivate @gp : !llvm.ptr
 
-  // CHECK: {{.*}} = omp.groupprivate @any : !llvm.ptr, device_type (any)
-  %group_private_any = omp.groupprivate @any : !llvm.ptr, device_type(any)
+  // CHECK: {{.*}} = omp.groupprivate @any device_type (any) : !llvm.ptr
+  %group_private_any = omp.groupprivate @any device_type(any) : !llvm.ptr
   llvm.store %1, %group_private_any : i32, !llvm.ptr
 
-  // CHECK: {{.*}} = omp.groupprivate @host : !llvm.ptr, device_type (host)
-  %group_private_host = omp.groupprivate @host : !llvm.ptr, device_type(host)
+  // CHECK: {{.*}} = omp.groupprivate @host device_type (host) : !llvm.ptr
+  %group_private_host = omp.groupprivate @host device_type(host) : !llvm.ptr
   llvm.store %1, %group_private_host : i32, !llvm.ptr
 
-  // CHECK: {{.*}} = omp.groupprivate @nohost : !llvm.ptr, device_type (nohost)
-  %group_private_nohost = omp.groupprivate @nohost : !llvm.ptr, device_type(nohost)
+  // CHECK: {{.*}} = omp.groupprivate @nohost device_type (nohost) : !llvm.ptr
+  %group_private_nohost = omp.groupprivate @nohost device_type(nohost) : !llvm.ptr
   llvm.store %1, %group_private_nohost : i32, !llvm.ptr
 
   return
diff --git a/mlir/test/Target/LLVMIR/omptarget-groupprivate.mlir b/mlir/test/Target/LLVMIR/omptarget-groupprivate.mlir
index 132e7b8d14242..f064996427d96 100644
--- a/mlir/test/Target/LLVMIR/omptarget-groupprivate.mlir
+++ b/mlir/test/Target/LLVMIR/omptarget-groupprivate.mlir
@@ -9,13 +9,13 @@ module attributes {omp.is_target_device = true, llvm.target_triple = "amdgcn-amd
     omp.target map_entries(%map_a -> %arg1 : !llvm.ptr) {
       %loaded = llvm.load %arg1 : !llvm.ptr -> i32
 
-      %any_gp = omp.groupprivate @global_any : !llvm.ptr, device_type(any)
+      %any_gp = omp.groupprivate @global_any device_type(any) : !llvm.ptr
       llvm.store %loaded, %any_gp : i32, !llvm.ptr
 
-      %host_gp = omp.groupprivate @global_host : !llvm.ptr, device_type(host)
+      %host_gp = omp.groupprivate @global_host device_type(host) : !llvm.ptr
       llvm.store %loaded, %host_gp : i32, !llvm.ptr
 
-      %nohost_gp = omp.groupprivate @global_nohost : !llvm.ptr, device_type(nohost)
+      %nohost_gp = omp.groupprivate @global_nohost device_type(nohost) : !llvm.ptr
       llvm.store %loaded, %nohost_gp : i32, !llvm.ptr
 
       omp.terminator
@@ -32,11 +32,10 @@ module attributes {omp.is_target_device = true, llvm.target_triple = "amdgcn-amd
 // CHECK-DAG: @global_any = internal global i32 undef
 // CHECK-DAG: @global_host = internal global i32 undef
 // CHECK-DAG: @global_nohost = internal global i32 undef
-// CHECK-DAG: {{.*}} = internal addrspace(3) global i32 poison
-// CHECK-DAG: {{.*}} = internal addrspace(3) global i32 poison
+// CHECK-DAG: @[[SHARED_ANY:global_any.*]] = internal addrspace(3) global i32 poison
+// CHECK-DAG: @[[SHARED_NOHOST:global_nohost.*]] = internal addrspace(3) global i32 poison
 // CHECK: define {{.*}} amdgpu_kernel void @__omp_offloading_{{.*}}_{{.*}}__QQmain_{{.*}}(ptr %{{.*}}, ptr %{{.*}}) #{{[0-9]+}} {
-// CHECK-LABEL:  omp.target:
-// CHECK-NEXT :    %[[LOAD:.*]] = load i32, ptr %{{.*}}, align 4
-// CHECK-NEXT :    store i32 %[[LOAD]], ptr addrspace(3) {{.*}}, align 4
-// CHECK-NEXT :    store i32 %[[LOAD]], ptr @global_host, align 4
-// CHECK-NEXT :    store i32 %[[LOAD]], ptr addrspace(3) {{.*}}, align 4
+// CHECK:        %[[LOAD:.*]] = load i32, ptr %{{.*}}, align 4
+// CHECK-NEXT :  store i32 %[[LOAD]], ptr addrspace(3) @[[SHARED_ANY]], align 4
+// CHECK-NEXT :  store i32 %[[LOAD]], ptr @global_host, align 4
+// CHECK-NEXT :  store i32 %[[LOAD]], ptr addrspace(3) @[[SHARED_NOHOST]], align 4
diff --git a/mlir/test/Target/LLVMIR/openmp-llvm.mlir b/mlir/test/Target/LLVMIR/openmp-llvm.mlir
index f4b494b9e99bf..30d766f986931 100644
--- a/mlir/test/Target/LLVMIR/openmp-llvm.mlir
+++ b/mlir/test/Target/LLVMIR/openmp-llvm.mlir
@@ -3633,13 +3633,13 @@ llvm.mlir.global internal @nohost() : i32
 llvm.func @omp_groupprivate_device() attributes {
     omp.declare_target = #omp.declaretarget<device_type = (any), capture_clause = (to)>} {
   %0 = llvm.mlir.constant(1 : i32) : i32
-  %2 = omp.groupprivate @any : !llvm.ptr, device_type(any)
+  %2 = omp.groupprivate @any device_type(any) : !llvm.ptr
   llvm.store %0, %2 : i32, !llvm.ptr
 
-  %4 = omp.groupprivate @host : !llvm.ptr, device_type(host)
+  %4 = omp.groupprivate @host device_type(host) : !llvm.ptr
   llvm.store %0, %4 : i32, !llvm.ptr
 
-  %6 = omp.groupprivate @nohost : !llvm.ptr, device_type(nohost)
+  %6 = omp.groupprivate @nohost device_type(nohost) : !llvm.ptr
   llvm.store %0, %6 : i32, !llvm.ptr
   llvm.return
 }
@@ -3648,12 +3648,12 @@ llvm.func @omp_groupprivate_device() attributes {
 // CHECK-DAG: @any = internal global i32 undef
 // CHECK-DAG: @host = internal global i32 undef
 // CHECK-DAG: @nohost = internal global i32 undef
-// CHECK-DAG: {{.*}} = internal addrspace(3) global i32 poison
-// CHECK-DAG: {{.*}} = internal addrspace(3) global i32 poison
-// CHECK-LABEL: define void @omp_groupprivate_device()
-// CHECK: store i32 1, ptr addrspace(3) {{.*}}, align 4
+// CHECK-DAG: @[[SHARED_ANY:any.*]] = internal addrspace(3) global i32 poison
+// CHECK-DAG: @[[SHARED_NOHOST:nohost.*]] = internal addrspace(3) global i32 poison
+// CHECK: define void @omp_groupprivate_device()
+// CHECK: store i32 1, ptr addrspace(3) @[[SHARED_ANY]], align 4
 // CHECK: store i32 1, ptr @host, align 4
-// CHECK: store i32 1, ptr addrspace(3) {{.*}}, align 4
+// CHECK: store i32 1, ptr addrspace(3) @[[SHARED_NOHOST]], align 4
 // CHECK: ret void
 
 // -----
@@ -3664,13 +3664,13 @@ llvm.mlir.global internal @host1() : i32
 llvm.mlir.global internal @nohost1() : i32
 llvm.func @omp_groupprivate_host() {
   %0 = llvm.mlir.constant(1 : i32) : i32
-  %2 = omp.groupprivate @any1 : !llvm.ptr, device_type(any)
+  %2 = omp.groupprivate @any1 device_type(any) : !llvm.ptr
   llvm.store %0, %2 : i32, !llvm.ptr
 
-  %4 = omp.groupprivate @host1 : !llvm.ptr, device_type(host)
+  %4 = omp.groupprivate @host1 device_type(host) : !llvm.ptr
   llvm.store %0, %4 : i32, !llvm.ptr
 
-  %6 = omp.groupprivate @nohost1 : !llvm.ptr, device_type(nohost)
+  %6 = omp.groupprivate @nohost1 device_type(nohost) : !llvm.ptr
   llvm.store %0, %6 : i32, !llvm.ptr
   llvm.return
 }

>From f174d7f154effa27218ad00f26b225d1e4068271 Mon Sep 17 00:00:00 2001
From: skc7 <Krishna.Sankisa at amd.com>
Date: Thu, 16 Apr 2026 10:21:21 +0530
Subject: [PATCH 10/10] emit warning for host groupprivate usage

---
 .../LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp      | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
index 092c65273ddb2..805060f932690 100644
--- a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
+++ b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
@@ -7915,8 +7915,9 @@ convertOmpGroupprivate(Operation &opInst, llvm::IRBuilderBase &builder,
         /*isExternallyInitialized=*/false);
     resultPtr = sharedVar;
   } else {
-    // Use original global address on host or when not allocating
-    // group-private storage.
+    if (shouldAllocate && !isTargetDevice)
+      opInst.emitWarning("groupprivate directive is currently ignored on the "
+                         "host, using original global");
     resultPtr = globalValue;
   }
 



More information about the Mlir-commits mailing list