[flang-commits] [flang] [llvm] [Flang][OpenMP] Add global address space to globals for target device (PR #119585)
via flang-commits
flang-commits at lists.llvm.org
Wed Jan 15 17:15:22 PST 2025
https://github.com/agozillon updated https://github.com/llvm/llvm-project/pull/119585
>From c7263e78dc3cdefefef52369aeed96864f04c0bf Mon Sep 17 00:00:00 2001
From: agozillon <Andrew.Gozillon at amd.com>
Date: Tue, 10 Dec 2024 11:46:32 -0600
Subject: [PATCH] [Flang][OpenMP] Add global address space to globals for
target device
Currently we do not add the appropriate global address space to globals on the target device
pass, this PR attempts to add at least a preliminary number of these address spaces markings
and then add the appropriate casts where neccesary (from global to program primarily).
This allows for more correct IR that the backends (in particular AMDGPU) can treat more
aptly for optimisations and code correctness.
Co-authored-by: Raghu Maddhipatla <raghu.maddhipatla at amd.com>
---
.../flang/Optimizer/Builder/FIRBuilder.h | 4 +
.../flang/Optimizer/CodeGen/FIROpPatterns.h | 3 +
flang/lib/Optimizer/Builder/FIRBuilder.cpp | 14 +++
flang/lib/Optimizer/CodeGen/CodeGen.cpp | 99 ++++++++++++++++---
flang/lib/Optimizer/CodeGen/FIROpPatterns.cpp | 25 ++++-
flang/test/Fir/convert-to-llvm.fir | 56 ++++++++---
llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp | 12 ++-
7 files changed, 182 insertions(+), 31 deletions(-)
diff --git a/flang/include/flang/Optimizer/Builder/FIRBuilder.h b/flang/include/flang/Optimizer/Builder/FIRBuilder.h
index c5d86e713f253a..eb32aeee9fe482 100644
--- a/flang/include/flang/Optimizer/Builder/FIRBuilder.h
+++ b/flang/include/flang/Optimizer/Builder/FIRBuilder.h
@@ -804,6 +804,10 @@ elideLengthsAlreadyInType(mlir::Type type, mlir::ValueRange lenParams);
/// Get the address space which should be used for allocas
uint64_t getAllocaAddressSpace(mlir::DataLayout *dataLayout);
+uint64_t getGlobalAddressSpace(mlir::DataLayout *dataLayout);
+
+uint64_t getProgramAddressSpace(mlir::DataLayout *dataLayout);
+
} // namespace fir::factory
#endif // FORTRAN_OPTIMIZER_BUILDER_FIRBUILDER_H
diff --git a/flang/include/flang/Optimizer/CodeGen/FIROpPatterns.h b/flang/include/flang/Optimizer/CodeGen/FIROpPatterns.h
index 35749dae5d7e9f..1659ede937f017 100644
--- a/flang/include/flang/Optimizer/CodeGen/FIROpPatterns.h
+++ b/flang/include/flang/Optimizer/CodeGen/FIROpPatterns.h
@@ -185,6 +185,9 @@ class ConvertFIRToLLVMPattern : public mlir::ConvertToLLVMPattern {
unsigned
getProgramAddressSpace(mlir::ConversionPatternRewriter &rewriter) const;
+ unsigned
+ getGlobalAddressSpace(mlir::ConversionPatternRewriter &rewriter) const;
+
const fir::FIRToLLVMPassOptions &options;
using ConvertToLLVMPattern::match;
diff --git a/flang/lib/Optimizer/Builder/FIRBuilder.cpp b/flang/lib/Optimizer/Builder/FIRBuilder.cpp
index d01becfe800937..558f62616ae7ed 100644
--- a/flang/lib/Optimizer/Builder/FIRBuilder.cpp
+++ b/flang/lib/Optimizer/Builder/FIRBuilder.cpp
@@ -1740,3 +1740,17 @@ uint64_t fir::factory::getAllocaAddressSpace(mlir::DataLayout *dataLayout) {
return mlir::cast<mlir::IntegerAttr>(addrSpace).getUInt();
return 0;
}
+
+uint64_t fir::factory::getGlobalAddressSpace(mlir::DataLayout *dataLayout) {
+ if (dataLayout)
+ if (mlir::Attribute addrSpace = dataLayout->getGlobalMemorySpace())
+ return mlir::cast<mlir::IntegerAttr>(addrSpace).getUInt();
+ return 0;
+}
+
+uint64_t fir::factory::getProgramAddressSpace(mlir::DataLayout *dataLayout) {
+ if (dataLayout)
+ if (mlir::Attribute addrSpace = dataLayout->getProgramMemorySpace())
+ return mlir::cast<mlir::IntegerAttr>(addrSpace).getUInt();
+ return 0;
+}
diff --git a/flang/lib/Optimizer/CodeGen/CodeGen.cpp b/flang/lib/Optimizer/CodeGen/CodeGen.cpp
index 5ba93fefab3f9e..29de325817ab20 100644
--- a/flang/lib/Optimizer/CodeGen/CodeGen.cpp
+++ b/flang/lib/Optimizer/CodeGen/CodeGen.cpp
@@ -134,6 +134,49 @@ addLLVMOpBundleAttrs(mlir::ConversionPatternRewriter &rewriter,
}
namespace {
+
+// Creates an existing operation with an AddressOfOp or an AddrSpaceCastOp
+// depending on the existing address spaces of the type.
+mlir::Value createAddrOfOrASCast(mlir::ConversionPatternRewriter &rewriter,
+ mlir::Location loc, std::uint64_t globalAS,
+ std::uint64_t programAS,
+ llvm::StringRef symName, mlir::Type type) {
+ if (mlir::isa<mlir::LLVM::LLVMPointerType>(type)) {
+ if (globalAS != programAS) {
+ auto llvmAddrOp = rewriter.create<mlir::LLVM::AddressOfOp>(
+ loc, getLlvmPtrType(rewriter.getContext(), globalAS), symName);
+ return rewriter.create<mlir::LLVM::AddrSpaceCastOp>(
+ loc, getLlvmPtrType(rewriter.getContext(), programAS), llvmAddrOp);
+ }
+ return rewriter.create<mlir::LLVM::AddressOfOp>(
+ loc, getLlvmPtrType(rewriter.getContext(), globalAS), symName);
+ }
+ return rewriter.create<mlir::LLVM::AddressOfOp>(loc, type, symName);
+}
+
+// Replaces an existing operation with an AddressOfOp or an AddrSpaceCastOp
+// depending on the existing address spaces of the type.
+mlir::Value replaceWithAddrOfOrASCast(mlir::ConversionPatternRewriter &rewriter,
+ mlir::Location loc,
+ std::uint64_t globalAS,
+ std::uint64_t programAS,
+ llvm::StringRef symName, mlir::Type type,
+ mlir::Operation *replaceOp) {
+ if (mlir::isa<mlir::LLVM::LLVMPointerType>(type)) {
+ if (globalAS != programAS) {
+ auto llvmAddrOp = rewriter.create<mlir::LLVM::AddressOfOp>(
+ loc, getLlvmPtrType(rewriter.getContext(), globalAS), symName);
+ return rewriter.replaceOpWithNewOp<mlir::LLVM::AddrSpaceCastOp>(
+ replaceOp, ::getLlvmPtrType(rewriter.getContext(), programAS),
+ llvmAddrOp);
+ }
+ return rewriter.replaceOpWithNewOp<mlir::LLVM::AddressOfOp>(
+ replaceOp, getLlvmPtrType(rewriter.getContext(), globalAS), symName);
+ }
+ return rewriter.replaceOpWithNewOp<mlir::LLVM::AddressOfOp>(replaceOp, type,
+ symName);
+}
+
/// Lower `fir.address_of` operation to `llvm.address_of` operation.
struct AddrOfOpConversion : public fir::FIROpConversion<fir::AddrOfOp> {
using FIROpConversion::FIROpConversion;
@@ -141,9 +184,15 @@ struct AddrOfOpConversion : public fir::FIROpConversion<fir::AddrOfOp> {
llvm::LogicalResult
matchAndRewrite(fir::AddrOfOp addr, OpAdaptor adaptor,
mlir::ConversionPatternRewriter &rewriter) const override {
- auto ty = convertType(addr.getType());
- rewriter.replaceOpWithNewOp<mlir::LLVM::AddressOfOp>(
- addr, ty, addr.getSymbol().getRootReference().getValue());
+ auto global = addr->getParentOfType<mlir::ModuleOp>()
+ .lookupSymbol<mlir::LLVM::GlobalOp>(addr.getSymbol());
+ replaceWithAddrOfOrASCast(
+ rewriter, addr->getLoc(),
+ global ? global.getAddrSpace() : getGlobalAddressSpace(rewriter),
+ getProgramAddressSpace(rewriter),
+ global ? global.getSymName()
+ : addr.getSymbol().getRootReference().getValue(),
+ convertType(addr.getType()), addr);
return mlir::success();
}
};
@@ -1350,14 +1399,34 @@ struct EmboxCommonConversion : public fir::FIROpConversion<OP> {
? fir::NameUniquer::getTypeDescriptorAssemblyName(recType.getName())
: fir::NameUniquer::getTypeDescriptorName(recType.getName());
mlir::Type llvmPtrTy = ::getLlvmPtrType(mod.getContext());
+
+ // We allow the module to be set to a default layout if it's a regular module
+ // however, we prevent this if it's a GPU module, as the datalayout in these
+ // cases will currently be the union of the GPU Module and the parent builtin
+ // module, with the GPU module overriding the parent where there are duplicates.
+ // However, if we force the default layout onto a GPU module, with no datalayout
+ // it'll result in issues as the infrastructure does not support the union of
+ // two layouts with builtin data layout entries currently (and it doesn't look
+ // like it was intended to).
+ std::optional<mlir::DataLayout> dataLayout =
+ fir::support::getOrSetMLIRDataLayout(
+ mod, /*allowDefaultLayout*/ mlir::isa<mlir::gpu::GPUModuleOp>(mod)
+ ? false
+ : true);
+ assert(dataLayout.has_value() && "Module missing DataLayout information");
+
if (auto global = mod.template lookupSymbol<fir::GlobalOp>(name)) {
- return rewriter.create<mlir::LLVM::AddressOfOp>(loc, llvmPtrTy,
- global.getSymName());
+ return createAddrOfOrASCast(
+ rewriter, loc, fir::factory::getGlobalAddressSpace(&*dataLayout),
+ fir::factory::getProgramAddressSpace(&*dataLayout),
+ global.getSymName(), llvmPtrTy);
}
if (auto global = mod.template lookupSymbol<mlir::LLVM::GlobalOp>(name)) {
// The global may have already been translated to LLVM.
- return rewriter.create<mlir::LLVM::AddressOfOp>(loc, llvmPtrTy,
- global.getSymName());
+ return createAddrOfOrASCast(
+ rewriter, loc, global.getAddrSpace(),
+ fir::factory::getProgramAddressSpace(&*dataLayout),
+ global.getSymName(), llvmPtrTy);
}
// Type info derived types do not have type descriptors since they are the
// types defining type descriptors.
@@ -2896,12 +2965,16 @@ struct TypeDescOpConversion : public fir::FIROpConversion<fir::TypeDescOp> {
: fir::NameUniquer::getTypeDescriptorName(recordType.getName());
auto llvmPtrTy = ::getLlvmPtrType(typeDescOp.getContext());
if (auto global = module.lookupSymbol<mlir::LLVM::GlobalOp>(typeDescName)) {
- rewriter.replaceOpWithNewOp<mlir::LLVM::AddressOfOp>(
- typeDescOp, llvmPtrTy, global.getSymName());
+ replaceWithAddrOfOrASCast(rewriter, typeDescOp->getLoc(),
+ global.getAddrSpace(),
+ getProgramAddressSpace(rewriter),
+ global.getSymName(), llvmPtrTy, typeDescOp);
return mlir::success();
} else if (auto global = module.lookupSymbol<fir::GlobalOp>(typeDescName)) {
- rewriter.replaceOpWithNewOp<mlir::LLVM::AddressOfOp>(
- typeDescOp, llvmPtrTy, global.getSymName());
+ replaceWithAddrOfOrASCast(rewriter, typeDescOp->getLoc(),
+ getGlobalAddressSpace(rewriter),
+ getProgramAddressSpace(rewriter),
+ global.getSymName(), llvmPtrTy, typeDescOp);
return mlir::success();
}
return mlir::failure();
@@ -2992,8 +3065,8 @@ struct GlobalOpConversion : public fir::FIROpConversion<fir::GlobalOp> {
mlir::SymbolRefAttr comdat;
llvm::ArrayRef<mlir::NamedAttribute> attrs;
auto g = rewriter.create<mlir::LLVM::GlobalOp>(
- loc, tyAttr, isConst, linkage, global.getSymName(), initAttr, 0, 0,
- false, false, comdat, attrs, dbgExprs);
+ loc, tyAttr, isConst, linkage, global.getSymName(), initAttr, 0,
+ getGlobalAddressSpace(rewriter), false, false, comdat, attrs, dbgExprs);
if (global.getAlignment() && *global.getAlignment() > 0)
g.setAlignment(*global.getAlignment());
diff --git a/flang/lib/Optimizer/CodeGen/FIROpPatterns.cpp b/flang/lib/Optimizer/CodeGen/FIROpPatterns.cpp
index 12021deb4bd97a..a352e00649c23c 100644
--- a/flang/lib/Optimizer/CodeGen/FIROpPatterns.cpp
+++ b/flang/lib/Optimizer/CodeGen/FIROpPatterns.cpp
@@ -346,7 +346,10 @@ unsigned ConvertFIRToLLVMPattern::getAllocaAddressSpace(
mlir::Operation *parentOp = rewriter.getInsertionBlock()->getParentOp();
assert(parentOp != nullptr &&
"expected insertion block to have parent operation");
- if (auto module = parentOp->getParentOfType<mlir::ModuleOp>())
+ auto module = mlir::isa<mlir::ModuleOp>(parentOp)
+ ? mlir::cast<mlir::ModuleOp>(parentOp)
+ : parentOp->getParentOfType<mlir::ModuleOp>();
+ if (module)
if (mlir::Attribute addrSpace =
mlir::DataLayout(module).getAllocaMemorySpace())
return llvm::cast<mlir::IntegerAttr>(addrSpace).getUInt();
@@ -358,11 +361,29 @@ unsigned ConvertFIRToLLVMPattern::getProgramAddressSpace(
mlir::Operation *parentOp = rewriter.getInsertionBlock()->getParentOp();
assert(parentOp != nullptr &&
"expected insertion block to have parent operation");
- if (auto module = parentOp->getParentOfType<mlir::ModuleOp>())
+ auto module = mlir::isa<mlir::ModuleOp>(parentOp)
+ ? mlir::cast<mlir::ModuleOp>(parentOp)
+ : parentOp->getParentOfType<mlir::ModuleOp>();
+ if (module)
if (mlir::Attribute addrSpace =
mlir::DataLayout(module).getProgramMemorySpace())
return llvm::cast<mlir::IntegerAttr>(addrSpace).getUInt();
return defaultAddressSpace;
}
+unsigned ConvertFIRToLLVMPattern::getGlobalAddressSpace(
+ mlir::ConversionPatternRewriter &rewriter) const {
+ mlir::Operation *parentOp = rewriter.getInsertionBlock()->getParentOp();
+ assert(parentOp != nullptr &&
+ "expected insertion block to have parent operation");
+ auto module = mlir::isa<mlir::ModuleOp>(parentOp)
+ ? mlir::cast<mlir::ModuleOp>(parentOp)
+ : parentOp->getParentOfType<mlir::ModuleOp>();
+ if (module)
+ if (mlir::Attribute addrSpace =
+ mlir::DataLayout(module).getGlobalMemorySpace())
+ return llvm::cast<mlir::IntegerAttr>(addrSpace).getUInt();
+ return defaultAddressSpace;
+}
+
} // namespace fir
diff --git a/flang/test/Fir/convert-to-llvm.fir b/flang/test/Fir/convert-to-llvm.fir
index 4c9f965e1241a0..a5d741d7f566ad 100644
--- a/flang/test/Fir/convert-to-llvm.fir
+++ b/flang/test/Fir/convert-to-llvm.fir
@@ -3,8 +3,8 @@
// RUN: fir-opt --split-input-file --fir-to-llvm-ir="target=i386-unknown-linux-gnu" %s | FileCheck %s --check-prefixes=CHECK,CHECK-COMDAT,GENERIC
// RUN: fir-opt --split-input-file --fir-to-llvm-ir="target=powerpc64le-unknown-linux-gnu" %s | FileCheck %s --check-prefixes=CHECK,CHECK-COMDAT,GENERIC
// RUN: fir-opt --split-input-file --fir-to-llvm-ir="target=x86_64-pc-win32" %s | FileCheck %s --check-prefixes=CHECK,CHECK-COMDAT,GENERIC
-// RUN: fir-opt --split-input-file --fir-to-llvm-ir="target=aarch64-apple-darwin" %s | FileCheck %s --check-prefixes=CHECK,CHECK-NO-COMDAT,GENERIC
-// RUN: fir-opt --split-input-file --fir-to-llvm-ir="target=amdgcn-amd-amdhsa, datalayout=e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-P0" %s | FileCheck -check-prefixes=CHECK,AMDGPU %s
+// RUN: fir-opt --split-input-file --fir-to-llvm-ir="target=aarch64-apple-darwin" %s | FileCheck %s --check-prefixes=CHECK,CHECK-NO-COMDAT,GENERIC
+// RUN: fir-opt --split-input-file --fir-to-llvm-ir="target=amdgcn-amd-amdhsa, datalayout=e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:256:256:32-p8:128:128-p9:192:256:256:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7:8:9" %s | FileCheck -check-prefixes=CHECK,AMDGPU %s
//===================================================
// SUMMARY: Tests for FIR --> LLVM MLIR conversion
@@ -17,7 +17,10 @@ fir.global @g_i0 : i32 {
fir.has_value %1 : i32
}
-// CHECK: llvm.mlir.global external @g_i0() {addr_space = 0 : i32} : i32 {
+// CHECK: llvm.mlir.global external @g_i0()
+// GENERIC-SAME: {addr_space = 0 : i32}
+// AMDGPU-SAME: {addr_space = 1 : i32}
+// CHECK-SAME: i32 {
// CHECK: %[[C0:.*]] = llvm.mlir.constant(0 : i32) : i32
// CHECK: llvm.return %[[C0]] : i32
// CHECK: }
@@ -29,7 +32,10 @@ fir.global @g_ci5 constant : i32 {
fir.has_value %c : i32
}
-// CHECK: llvm.mlir.global external constant @g_ci5() {addr_space = 0 : i32} : i32 {
+// CHECK: llvm.mlir.global external constant @g_ci5()
+// GENERIC-SAME: {addr_space = 0 : i32}
+// AMDGPU-SAME: {addr_space = 1 : i32}
+// CHECK-SAME: i32 {
// CHECK: %[[C5:.*]] = llvm.mlir.constant(5 : i32) : i32
// CHECK: llvm.return %[[C5]] : i32
// CHECK: }
@@ -37,17 +43,26 @@ fir.global @g_ci5 constant : i32 {
// -----
fir.global internal @i_i515 (515:i32) : i32
-// CHECK: llvm.mlir.global internal @i_i515(515 : i32) {addr_space = 0 : i32} : i32
+// CHECK: llvm.mlir.global internal @i_i515(515 : i32)
+// GENERIC-SAME: {addr_space = 0 : i32}
+// AMDGPU-SAME: {addr_space = 1 : i32}
+// CHECK-SAME: : i32
// -----
fir.global common @C_i511 (0:i32) : i32
-// CHECK: llvm.mlir.global common @C_i511(0 : i32) {addr_space = 0 : i32} : i32
+// CHECK: llvm.mlir.global common @C_i511(0 : i32)
+// GENERIC-SAME: {addr_space = 0 : i32}
+// AMDGPU-SAME: {addr_space = 1 : i32}
+// CHECK-SAME: : i32
// -----
fir.global weak @w_i86 (86:i32) : i32
-// CHECK: llvm.mlir.global weak @w_i86(86 : i32) {addr_space = 0 : i32} : i32
+// CHECK: llvm.mlir.global weak @w_i86(86 : i32)
+// GENERIC-SAME: {addr_space = 0 : i32}
+// AMDGPU-SAME: {addr_space = 1 : i32}
+// CHECK-SAME: : i32
// -----
@@ -69,9 +84,13 @@ fir.global @symbol : i64 {
fir.has_value %0 : i64
}
-// CHECK: %{{.*}} = llvm.mlir.addressof @[[SYMBOL:.*]] : !llvm.ptr
+// CHECK: %[[ADDROF:.*]] = llvm.mlir.addressof @[[SYMBOL:.*]] : !llvm.ptr
+// AMDGPU: %{{.*}} = llvm.addrspacecast %[[ADDROF]] : !llvm.ptr<1> to !llvm.ptr
-// CHECK: llvm.mlir.global external @[[SYMBOL]]() {addr_space = 0 : i32} : i64 {
+// CHECK: llvm.mlir.global external @[[SYMBOL]]()
+// GENERIC-SAME: {addr_space = 0 : i32}
+// AMDGPU-SAME: {addr_space = 1 : i32}
+// CHECK-SAME: i64 {
// CHECK: %{{.*}} = llvm.mlir.constant(1 : i64) : i64
// CHECK: llvm.return %{{.*}} : i64
// CHECK: }
@@ -88,7 +107,10 @@ fir.global internal @_QEmultiarray : !fir.array<32x32xi32> {
fir.has_value %2 : !fir.array<32x32xi32>
}
-// CHECK: llvm.mlir.global internal @_QEmultiarray() {addr_space = 0 : i32} : !llvm.array<32 x array<32 x i32>> {
+// CHECK: llvm.mlir.global internal @_QEmultiarray()
+// GENERIC-SAME: {addr_space = 0 : i32}
+// AMDGPU-SAME: {addr_space = 1 : i32}
+// CHECK-SAME: : !llvm.array<32 x array<32 x i32>> {
// CHECK: %[[CST:.*]] = llvm.mlir.constant(dense<1> : vector<32x32xi32>) : !llvm.array<32 x array<32 x i32>>
// CHECK: llvm.return %[[CST]] : !llvm.array<32 x array<32 x i32>>
// CHECK: }
@@ -105,7 +127,10 @@ fir.global internal @_QEmultiarray : !fir.array<32xi32> {
fir.has_value %2 : !fir.array<32xi32>
}
-// CHECK: llvm.mlir.global internal @_QEmultiarray() {addr_space = 0 : i32} : !llvm.array<32 x i32> {
+// CHECK: llvm.mlir.global internal @_QEmultiarray()
+// GENERIC-SAME: {addr_space = 0 : i32}
+// AMDGPU-SAME: {addr_space = 1 : i32}
+// CHECK-SAME: : !llvm.array<32 x i32> {
// CHECK: %[[CST:.*]] = llvm.mlir.constant(1 : i32) : i32
// CHECK: %{{.*}} = llvm.mlir.undef : !llvm.array<32 x i32>
// CHECK: %{{.*}} = llvm.insertvalue %[[CST]], %{{.*}}[5] : !llvm.array<32 x i32>
@@ -1787,7 +1812,9 @@ func.func @embox1(%arg0: !fir.ref<!fir.type<_QMtest_dinitTtseq{i:i32}>>) {
// CHECK: %{{.*}} = llvm.insertvalue %[[VERSION]], %{{.*}}[2] : !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, ptr, array<1 x i64>)>
// CHECK: %[[TYPE_CODE_I8:.*]] = llvm.trunc %[[TYPE_CODE]] : i32 to i8
// CHECK: %{{.*}} = llvm.insertvalue %[[TYPE_CODE_I8]], %{{.*}}[4] : !llvm.struct<(ptr, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, ptr, array<1 x i{{.*}}>)>
-// CHECK: %[[TDESC:.*]] = llvm.mlir.addressof @_QMtest_dinitE.dt.tseq : !llvm.ptr
+// GENERIC: %[[TDESC:.*]] = llvm.mlir.addressof @_QMtest_dinitE.dt.tseq : !llvm.ptr
+// AMDGPU: %[[ADDROF:.*]] = llvm.mlir.addressof @_QMtest_dinitE.dt.tseq : !llvm.ptr<1>
+// AMDGPU: %[[TDESC:.*]] = llvm.addrspacecast %[[ADDROF]] : !llvm.ptr<1> to !llvm.ptr
// CHECK: %{{.*}} = llvm.insertvalue %[[TDESC]], %{{.*}}[7] : !llvm.struct<(ptr, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, ptr, array<1 x i{{.*}}>)>
// -----
@@ -2775,7 +2802,10 @@ func.func @coordinate_array_unknown_size_1d(%arg0: !fir.ptr<!fir.array<? x i32>>
fir.global common @c_(dense<0> : vector<4294967296xi8>) : !fir.array<4294967296xi8>
-// CHECK: llvm.mlir.global common @c_(dense<0> : vector<4294967296xi8>) {addr_space = 0 : i32} : !llvm.array<4294967296 x i8>
+// CHECK: llvm.mlir.global common @c_(dense<0> : vector<4294967296xi8>)
+// GENERIC-SAME: {addr_space = 0 : i32}
+// AMDGPU-SAME: {addr_space = 1 : i32}
+// CHECK-SAME: !llvm.array<4294967296 x i8>
// -----
diff --git a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
index 3d461f0ad4228c..ae31598ea2c7b8 100644
--- a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
+++ b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
@@ -6743,6 +6743,12 @@ FunctionCallee OpenMPIRBuilder::createDispatchDeinitFunction() {
return getOrCreateRuntimeFunction(M, omp::OMPRTL___kmpc_dispatch_deinit);
}
+static Value *removeASCastIfPresent(Value *V) {
+ if (Operator::getOpcode(V) == Instruction::AddrSpaceCast)
+ return cast<Operator>(V)->getOperand(0);
+ return V;
+}
+
static Expected<Function *> createOutlinedFunction(
OpenMPIRBuilder &OMPBuilder, IRBuilderBase &Builder,
const OpenMPIRBuilder::TargetKernelDefaultAttrs &DefaultAttrs,
@@ -6926,9 +6932,9 @@ static Expected<Function *> createOutlinedFunction(
// preceding mapped arguments that refer to the same global that may be
// seperate segments. To prevent this, we defer global processing until all
// other processing has been performed.
- if (llvm::isa<llvm::GlobalValue>(std::get<0>(InArg)) ||
- llvm::isa<llvm::GlobalObject>(std::get<0>(InArg)) ||
- llvm::isa<llvm::GlobalVariable>(std::get<0>(InArg))) {
+ if (llvm::isa<llvm::GlobalValue>(removeASCastIfPresent(Input)) ||
+ llvm::isa<llvm::GlobalObject>(removeASCastIfPresent(Input)) ||
+ llvm::isa<llvm::GlobalVariable>(removeASCastIfPresent(Input))) {
DeferredReplacement.push_back(std::make_pair(Input, InputCopy));
continue;
}
More information about the flang-commits
mailing list