[flang] [llvm] [Flang][OpenMP] Add global address space to globals for target device (PR #119585)
via llvm-commits
llvm-commits at lists.llvm.org
Fri Aug 8 09:51:18 PDT 2025
https://github.com/agozillon updated https://github.com/llvm/llvm-project/pull/119585
>From 1d59ea567e0b2baf200e9e225ee6c71e8f01acca Mon Sep 17 00:00:00 2001
From: agozillon <Andrew.Gozillon at amd.com>
Date: Fri, 1 Aug 2025 15:55:38 -0500
Subject: [PATCH] [Flang][OpenMP] Additional global address space modifications
for device
A prior PR added a portion of the global address space modifications required for declare target to, this PR seeks to add a small amount more leftover from that PR.
The intent is to allow for more correct IR that the backends (in particular AMDGPU) can treat more aptly for optimisations and code correctness
1/3 required PRs to enable declare target to mapping, should look at PR 3/3 to check for full green passes (this one will fail a number due to some dependencies).
Co-authored-by: Raghu Maddhipatla raghu.maddhipatla at amd.com
---
.../flang/Optimizer/Builder/FIRBuilder.h | 4 ++
flang/lib/Optimizer/CodeGen/CodeGen.cpp | 2 +
flang/lib/Optimizer/CodeGen/FIROpPatterns.cpp | 20 +++++--
flang/test/Fir/convert-to-llvm.fir | 56 ++++++++++++++-----
llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp | 10 +++-
5 files changed, 74 insertions(+), 18 deletions(-)
diff --git a/flang/include/flang/Optimizer/Builder/FIRBuilder.h b/flang/include/flang/Optimizer/Builder/FIRBuilder.h
index d8b6a9f87ab19..8ab1b388b49c1 100644
--- a/flang/include/flang/Optimizer/Builder/FIRBuilder.h
+++ b/flang/include/flang/Optimizer/Builder/FIRBuilder.h
@@ -955,6 +955,10 @@ mlir::Value genLifetimeStart(mlir::OpBuilder &builder, mlir::Location loc,
void genLifetimeEnd(mlir::OpBuilder &builder, mlir::Location loc,
mlir::Value mem, int64_t size);
+uint64_t getGlobalAddressSpace(mlir::DataLayout *dataLayout);
+
+uint64_t getProgramAddressSpace(mlir::DataLayout *dataLayout);
+
} // namespace fir::factory
#endif // FORTRAN_OPTIMIZER_BUILDER_FIRBUILDER_H
diff --git a/flang/lib/Optimizer/CodeGen/CodeGen.cpp b/flang/lib/Optimizer/CodeGen/CodeGen.cpp
index 1b289ae690cbe..9d8f558a6292a 100644
--- a/flang/lib/Optimizer/CodeGen/CodeGen.cpp
+++ b/flang/lib/Optimizer/CodeGen/CodeGen.cpp
@@ -140,6 +140,8 @@ addLLVMOpBundleAttrs(mlir::ConversionPatternRewriter &rewriter,
namespace {
+// Replaces an existing operation with an AddressOfOp or an AddrSpaceCastOp
+// depending on the existing address spaces of the type.
mlir::Value replaceWithAddrOfOrASCast(mlir::ConversionPatternRewriter &rewriter,
mlir::Location loc,
std::uint64_t globalAS,
diff --git a/flang/lib/Optimizer/CodeGen/FIROpPatterns.cpp b/flang/lib/Optimizer/CodeGen/FIROpPatterns.cpp
index c52be5601bbfe..96e3caa481f51 100644
--- a/flang/lib/Optimizer/CodeGen/FIROpPatterns.cpp
+++ b/flang/lib/Optimizer/CodeGen/FIROpPatterns.cpp
@@ -349,7 +349,10 @@ unsigned ConvertFIRToLLVMPattern::getAllocaAddressSpace(
mlir::Operation *parentOp = rewriter.getInsertionBlock()->getParentOp();
assert(parentOp != nullptr &&
"expected insertion block to have parent operation");
- if (auto module = parentOp->getParentOfType<mlir::ModuleOp>())
+ auto module = mlir::isa<mlir::ModuleOp>(parentOp)
+ ? mlir::cast<mlir::ModuleOp>(parentOp)
+ : parentOp->getParentOfType<mlir::ModuleOp>();
+ if (module)
if (mlir::Attribute addrSpace =
mlir::DataLayout(module).getAllocaMemorySpace())
return llvm::cast<mlir::IntegerAttr>(addrSpace).getUInt();
@@ -361,7 +364,10 @@ unsigned ConvertFIRToLLVMPattern::getProgramAddressSpace(
mlir::Operation *parentOp = rewriter.getInsertionBlock()->getParentOp();
assert(parentOp != nullptr &&
"expected insertion block to have parent operation");
- if (auto module = parentOp->getParentOfType<mlir::ModuleOp>())
+ auto module = mlir::isa<mlir::ModuleOp>(parentOp)
+ ? mlir::cast<mlir::ModuleOp>(parentOp)
+ : parentOp->getParentOfType<mlir::ModuleOp>();
+ if (module)
if (mlir::Attribute addrSpace =
mlir::DataLayout(module).getProgramMemorySpace())
return llvm::cast<mlir::IntegerAttr>(addrSpace).getUInt();
@@ -373,8 +379,14 @@ unsigned ConvertFIRToLLVMPattern::getGlobalAddressSpace(
mlir::Operation *parentOp = rewriter.getInsertionBlock()->getParentOp();
assert(parentOp != nullptr &&
"expected insertion block to have parent operation");
- auto dataLayout = mlir::DataLayout::closest(parentOp);
- return fir::factory::getGlobalAddressSpace(&dataLayout);
+ auto module = mlir::isa<mlir::ModuleOp>(parentOp)
+ ? mlir::cast<mlir::ModuleOp>(parentOp)
+ : parentOp->getParentOfType<mlir::ModuleOp>();
+ if (module)
+ if (mlir::Attribute addrSpace =
+ mlir::DataLayout(module).getGlobalMemorySpace())
+ return llvm::cast<mlir::IntegerAttr>(addrSpace).getUInt();
+ return defaultAddressSpace;
}
} // namespace fir
diff --git a/flang/test/Fir/convert-to-llvm.fir b/flang/test/Fir/convert-to-llvm.fir
index 50a98466f0d4b..28abec9d38e08 100644
--- a/flang/test/Fir/convert-to-llvm.fir
+++ b/flang/test/Fir/convert-to-llvm.fir
@@ -3,8 +3,8 @@
// RUN: fir-opt --split-input-file --fir-to-llvm-ir="target=i386-unknown-linux-gnu" %s | FileCheck %s --check-prefixes=CHECK,CHECK-COMDAT,GENERIC
// RUN: fir-opt --split-input-file --fir-to-llvm-ir="target=powerpc64le-unknown-linux-gnu" %s | FileCheck %s --check-prefixes=CHECK,CHECK-COMDAT,GENERIC
// RUN: fir-opt --split-input-file --fir-to-llvm-ir="target=x86_64-pc-win32" %s | FileCheck %s --check-prefixes=CHECK,CHECK-COMDAT,GENERIC
-// RUN: fir-opt --split-input-file --fir-to-llvm-ir="target=aarch64-apple-darwin" %s | FileCheck %s --check-prefixes=CHECK,CHECK-NO-COMDAT,GENERIC
-// RUN: fir-opt --split-input-file --fir-to-llvm-ir="target=amdgcn-amd-amdhsa, datalayout=e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-P0" %s | FileCheck -check-prefixes=CHECK,AMDGPU %s
+// RUN: fir-opt --split-input-file --fir-to-llvm-ir="target=aarch64-apple-darwin" %s | FileCheck %s --check-prefixes=CHECK,CHECK-NO-COMDAT,GENERIC
+// RUN: fir-opt --split-input-file --fir-to-llvm-ir="target=amdgcn-amd-amdhsa, datalayout=e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:256:256:32-p8:128:128-p9:192:256:256:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7:8:9" %s | FileCheck -check-prefixes=CHECK,AMDGPU %s
//===================================================
// SUMMARY: Tests for FIR --> LLVM MLIR conversion
@@ -17,7 +17,10 @@ fir.global @g_i0 : i32 {
fir.has_value %1 : i32
}
-// CHECK: llvm.mlir.global external @g_i0() {addr_space = 0 : i32} : i32 {
+// CHECK: llvm.mlir.global external @g_i0()
+// GENERIC-SAME: {addr_space = 0 : i32}
+// AMDGPU-SAME: {addr_space = 1 : i32}
+// CHECK-SAME: i32 {
// CHECK: %[[C0:.*]] = llvm.mlir.constant(0 : i32) : i32
// CHECK: llvm.return %[[C0]] : i32
// CHECK: }
@@ -29,7 +32,10 @@ fir.global @g_ci5 constant : i32 {
fir.has_value %c : i32
}
-// CHECK: llvm.mlir.global external constant @g_ci5() {addr_space = 0 : i32} : i32 {
+// CHECK: llvm.mlir.global external constant @g_ci5()
+// GENERIC-SAME: {addr_space = 0 : i32}
+// AMDGPU-SAME: {addr_space = 1 : i32}
+// CHECK-SAME: i32 {
// CHECK: %[[C5:.*]] = llvm.mlir.constant(5 : i32) : i32
// CHECK: llvm.return %[[C5]] : i32
// CHECK: }
@@ -37,17 +43,26 @@ fir.global @g_ci5 constant : i32 {
// -----
fir.global internal @i_i515 (515:i32) : i32
-// CHECK: llvm.mlir.global internal @i_i515(515 : i32) {addr_space = 0 : i32} : i32
+// CHECK: llvm.mlir.global internal @i_i515(515 : i32)
+// GENERIC-SAME: {addr_space = 0 : i32}
+// AMDGPU-SAME: {addr_space = 1 : i32}
+// CHECK-SAME: : i32
// -----
fir.global common @C_i511 (0:i32) : i32
-// CHECK: llvm.mlir.global common @C_i511(0 : i32) {addr_space = 0 : i32} : i32
+// CHECK: llvm.mlir.global common @C_i511(0 : i32)
+// GENERIC-SAME: {addr_space = 0 : i32}
+// AMDGPU-SAME: {addr_space = 1 : i32}
+// CHECK-SAME: : i32
// -----
fir.global weak @w_i86 (86:i32) : i32
-// CHECK: llvm.mlir.global weak @w_i86(86 : i32) {addr_space = 0 : i32} : i32
+// CHECK: llvm.mlir.global weak @w_i86(86 : i32)
+// GENERIC-SAME: {addr_space = 0 : i32}
+// AMDGPU-SAME: {addr_space = 1 : i32}
+// CHECK-SAME: : i32
// -----
@@ -69,9 +84,13 @@ fir.global @symbol : i64 {
fir.has_value %0 : i64
}
-// CHECK: %{{.*}} = llvm.mlir.addressof @[[SYMBOL:.*]] : !llvm.ptr
+// CHECK: %[[ADDROF:.*]] = llvm.mlir.addressof @[[SYMBOL:.*]] : !llvm.ptr
+// AMDGPU: %{{.*}} = llvm.addrspacecast %[[ADDROF]] : !llvm.ptr<1> to !llvm.ptr
-// CHECK: llvm.mlir.global external @[[SYMBOL]]() {addr_space = 0 : i32} : i64 {
+// CHECK: llvm.mlir.global external @[[SYMBOL]]()
+// GENERIC-SAME: {addr_space = 0 : i32}
+// AMDGPU-SAME: {addr_space = 1 : i32}
+// CHECK-SAME: i64 {
// CHECK: %{{.*}} = llvm.mlir.constant(1 : i64) : i64
// CHECK: llvm.return %{{.*}} : i64
// CHECK: }
@@ -88,7 +107,10 @@ fir.global internal @_QEmultiarray : !fir.array<32x32xi32> {
fir.has_value %2 : !fir.array<32x32xi32>
}
-// CHECK: llvm.mlir.global internal @_QEmultiarray() {addr_space = 0 : i32} : !llvm.array<32 x array<32 x i32>> {
+// CHECK: llvm.mlir.global internal @_QEmultiarray()
+// GENERIC-SAME: {addr_space = 0 : i32}
+// AMDGPU-SAME: {addr_space = 1 : i32}
+// CHECK-SAME: : !llvm.array<32 x array<32 x i32>> {
// CHECK: %[[CST:.*]] = llvm.mlir.constant(dense<1> : vector<32x32xi32>) : !llvm.array<32 x array<32 x i32>>
// CHECK: llvm.return %[[CST]] : !llvm.array<32 x array<32 x i32>>
// CHECK: }
@@ -105,7 +127,10 @@ fir.global internal @_QEmultiarray : !fir.array<32xi32> {
fir.has_value %2 : !fir.array<32xi32>
}
-// CHECK: llvm.mlir.global internal @_QEmultiarray() {addr_space = 0 : i32} : !llvm.array<32 x i32> {
+// CHECK: llvm.mlir.global internal @_QEmultiarray()
+// GENERIC-SAME: {addr_space = 0 : i32}
+// AMDGPU-SAME: {addr_space = 1 : i32}
+// CHECK-SAME: : !llvm.array<32 x i32> {
// CHECK: %[[CST:.*]] = llvm.mlir.constant(1 : i32) : i32
// CHECK: %{{.*}} = llvm.mlir.undef : !llvm.array<32 x i32>
// CHECK: %{{.*}} = llvm.insertvalue %[[CST]], %{{.*}}[5] : !llvm.array<32 x i32>
@@ -1801,7 +1826,9 @@ func.func @embox1(%arg0: !fir.ref<!fir.type<_QMtest_dinitTtseq{i:i32}>>) {
// CHECK: %{{.*}} = llvm.insertvalue %[[VERSION]], %{{.*}}[2] : !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, ptr, array<1 x i64>)>
// CHECK: %[[TYPE_CODE_I8:.*]] = llvm.trunc %[[TYPE_CODE]] : i32 to i8
// CHECK: %{{.*}} = llvm.insertvalue %[[TYPE_CODE_I8]], %{{.*}}[4] : !llvm.struct<(ptr, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, ptr, array<1 x i{{.*}}>)>
-// CHECK: %[[TDESC:.*]] = llvm.mlir.addressof @_QMtest_dinitE.dt.tseq : !llvm.ptr
+// GENERIC: %[[TDESC:.*]] = llvm.mlir.addressof @_QMtest_dinitE.dt.tseq : !llvm.ptr
+// AMDGPU: %[[ADDROF:.*]] = llvm.mlir.addressof @_QMtest_dinitE.dt.tseq : !llvm.ptr<1>
+// AMDGPU: %[[TDESC:.*]] = llvm.addrspacecast %[[ADDROF]] : !llvm.ptr<1> to !llvm.ptr
// CHECK: %{{.*}} = llvm.insertvalue %[[TDESC]], %{{.*}}[7] : !llvm.struct<(ptr, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, ptr, array<1 x i{{.*}}>)>
// -----
@@ -2824,7 +2851,10 @@ func.func @coordinate_array_unknown_size_1d(%arg0: !fir.ptr<!fir.array<? x i32>>
fir.global common @c_(dense<0> : vector<4294967296xi8>) : !fir.array<4294967296xi8>
-// CHECK: llvm.mlir.global common @c_(dense<0> : vector<4294967296xi8>) {addr_space = 0 : i32} : !llvm.array<4294967296 x i8>
+// CHECK: llvm.mlir.global common @c_(dense<0> : vector<4294967296xi8>)
+// GENERIC-SAME: {addr_space = 0 : i32}
+// AMDGPU-SAME: {addr_space = 1 : i32}
+// CHECK-SAME: !llvm.array<4294967296 x i8>
// -----
diff --git a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
index ea027e48fa2f1..e8d57da698b93 100644
--- a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
+++ b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
@@ -7379,6 +7379,12 @@ static void FixupDebugInfoForOutlinedFunction(
}
}
+static Value *removeASCastIfPresent(Value *V) {
+ if (Operator::getOpcode(V) == Instruction::AddrSpaceCast)
+ return cast<Operator>(V)->getOperand(0);
+ return V;
+}
+
static Expected<Function *> createOutlinedFunction(
OpenMPIRBuilder &OMPBuilder, IRBuilderBase &Builder,
const OpenMPIRBuilder::TargetKernelDefaultAttrs &DefaultAttrs,
@@ -7542,7 +7548,9 @@ static Expected<Function *> createOutlinedFunction(
// preceding mapped arguments that refer to the same global that may be
// seperate segments. To prevent this, we defer global processing until all
// other processing has been performed.
- if (isa<GlobalValue>(Input)) {
+ if (llvm::isa<llvm::GlobalValue>(removeASCastIfPresent(Input)) ||
+ llvm::isa<llvm::GlobalObject>(removeASCastIfPresent(Input)) ||
+ llvm::isa<llvm::GlobalVariable>(removeASCastIfPresent(Input))) {
DeferredReplacement.push_back(std::make_pair(Input, InputCopy));
continue;
}
More information about the llvm-commits
mailing list