[flang-commits] [flang] [llvm] [Flang][OpenMP] Add global address space to globals for target device (PR #119585)
via flang-commits
flang-commits at lists.llvm.org
Wed Dec 11 08:44:37 PST 2024
https://github.com/agozillon created https://github.com/llvm/llvm-project/pull/119585
Currently we do not add the appropriate global address space to globals on the target device pass, this PR attempts to add at least a preliminary number of these address spaces markings and then add the appropriate casts where neccesary (from global to program primarily).
This allows for more correct IR that the backends (in particular AMDGPU) can treat more aptly for optimisations and code correctness
1/3 required PRs to enable declare target to mapping, should look at PR 3/3 to check for full green passes (this one will fail a number due to some dependencies).
Co-authored-by: Raghu Maddhipatla <raghu.maddhipatla at amd.com>
>From a59c4fbdb3daeaf1f11a96a0d658c6aae4e5b7dc Mon Sep 17 00:00:00 2001
From: agozillon <Andrew.Gozillon at amd.com>
Date: Tue, 10 Dec 2024 11:46:32 -0600
Subject: [PATCH] [Flang][OpenMP] Add global address space to globals for
target device
Currently we do not add the appropriate global address space to globals on the target device
pass, this PR attempts to add at least a preliminary number of these address spaces markings
and then add the appropriate casts where neccesary (from global to program primarily).
This allows for more correct IR that the backends (in particular AMDGPU) can treat more
aptly for optimisations and code correctness.
---
.../flang/Optimizer/Builder/FIRBuilder.h | 4 +
.../flang/Optimizer/CodeGen/FIROpPatterns.h | 3 +
flang/lib/Optimizer/Builder/FIRBuilder.cpp | 14 ++++
flang/lib/Optimizer/CodeGen/CodeGen.cpp | 73 +++++++++++++++----
flang/lib/Optimizer/CodeGen/FIROpPatterns.cpp | 25 ++++++-
flang/test/Fir/convert-to-llvm.fir | 56 ++++++++++----
llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp | 12 ++-
7 files changed, 156 insertions(+), 31 deletions(-)
diff --git a/flang/include/flang/Optimizer/Builder/FIRBuilder.h b/flang/include/flang/Optimizer/Builder/FIRBuilder.h
index b772c523caba49..0b7cbe92f4d879 100644
--- a/flang/include/flang/Optimizer/Builder/FIRBuilder.h
+++ b/flang/include/flang/Optimizer/Builder/FIRBuilder.h
@@ -774,6 +774,10 @@ elideLengthsAlreadyInType(mlir::Type type, mlir::ValueRange lenParams);
/// Get the address space which should be used for allocas
uint64_t getAllocaAddressSpace(mlir::DataLayout *dataLayout);
+uint64_t getGlobalAddressSpace(mlir::DataLayout *dataLayout);
+
+uint64_t getProgramAddressSpace(mlir::DataLayout *dataLayout);
+
} // namespace fir::factory
#endif // FORTRAN_OPTIMIZER_BUILDER_FIRBUILDER_H
diff --git a/flang/include/flang/Optimizer/CodeGen/FIROpPatterns.h b/flang/include/flang/Optimizer/CodeGen/FIROpPatterns.h
index 35749dae5d7e9f..1659ede937f017 100644
--- a/flang/include/flang/Optimizer/CodeGen/FIROpPatterns.h
+++ b/flang/include/flang/Optimizer/CodeGen/FIROpPatterns.h
@@ -185,6 +185,9 @@ class ConvertFIRToLLVMPattern : public mlir::ConvertToLLVMPattern {
unsigned
getProgramAddressSpace(mlir::ConversionPatternRewriter &rewriter) const;
+ unsigned
+ getGlobalAddressSpace(mlir::ConversionPatternRewriter &rewriter) const;
+
const fir::FIRToLLVMPassOptions &options;
using ConvertToLLVMPattern::match;
diff --git a/flang/lib/Optimizer/Builder/FIRBuilder.cpp b/flang/lib/Optimizer/Builder/FIRBuilder.cpp
index 3a39c455015f9f..ff98d13ad86c24 100644
--- a/flang/lib/Optimizer/Builder/FIRBuilder.cpp
+++ b/flang/lib/Optimizer/Builder/FIRBuilder.cpp
@@ -1721,3 +1721,17 @@ uint64_t fir::factory::getAllocaAddressSpace(mlir::DataLayout *dataLayout) {
return mlir::cast<mlir::IntegerAttr>(addrSpace).getUInt();
return 0;
}
+
+uint64_t fir::factory::getGlobalAddressSpace(mlir::DataLayout *dataLayout) {
+ if (dataLayout)
+ if (mlir::Attribute addrSpace = dataLayout->getGlobalMemorySpace())
+ return mlir::cast<mlir::IntegerAttr>(addrSpace).getUInt();
+ return 0;
+}
+
+uint64_t fir::factory::getProgramAddressSpace(mlir::DataLayout *dataLayout) {
+ if (dataLayout)
+ if (mlir::Attribute addrSpace = dataLayout->getProgramMemorySpace())
+ return mlir::cast<mlir::IntegerAttr>(addrSpace).getUInt();
+ return 0;
+}
diff --git a/flang/lib/Optimizer/CodeGen/CodeGen.cpp b/flang/lib/Optimizer/CodeGen/CodeGen.cpp
index 5345d64c330f06..1b79b9d9975dde 100644
--- a/flang/lib/Optimizer/CodeGen/CodeGen.cpp
+++ b/flang/lib/Optimizer/CodeGen/CodeGen.cpp
@@ -132,6 +132,38 @@ addLLVMOpBundleAttrs(mlir::ConversionPatternRewriter &rewriter,
}
namespace {
+
+mlir::Value replaceWithAddrOfOrASCast(mlir::ConversionPatternRewriter &rewriter,
+ mlir::Location loc,
+ std::uint64_t globalAS,
+ std::uint64_t programAS,
+ llvm::StringRef symName, mlir::Type type,
+ mlir::Operation *replaceOp = nullptr) {
+ if (mlir::isa<mlir::LLVM::LLVMPointerType>(type)) {
+ if (globalAS != programAS) {
+ auto llvmAddrOp = rewriter.create<mlir::LLVM::AddressOfOp>(
+ loc, getLlvmPtrType(rewriter.getContext(), globalAS), symName);
+ if (replaceOp)
+ return rewriter.replaceOpWithNewOp<mlir::LLVM::AddrSpaceCastOp>(
+ replaceOp, ::getLlvmPtrType(rewriter.getContext(), programAS),
+ llvmAddrOp);
+ return rewriter.create<mlir::LLVM::AddrSpaceCastOp>(
+ loc, getLlvmPtrType(rewriter.getContext(), programAS), llvmAddrOp);
+ }
+
+ if (replaceOp)
+ return rewriter.replaceOpWithNewOp<mlir::LLVM::AddressOfOp>(
+ replaceOp, getLlvmPtrType(rewriter.getContext(), globalAS), symName);
+ return rewriter.create<mlir::LLVM::AddressOfOp>(
+ loc, getLlvmPtrType(rewriter.getContext(), globalAS), symName);
+ }
+
+ if (replaceOp)
+ return rewriter.replaceOpWithNewOp<mlir::LLVM::AddressOfOp>(replaceOp, type,
+ symName);
+ return rewriter.create<mlir::LLVM::AddressOfOp>(loc, type, symName);
+}
+
/// Lower `fir.address_of` operation to `llvm.address_of` operation.
struct AddrOfOpConversion : public fir::FIROpConversion<fir::AddrOfOp> {
using FIROpConversion::FIROpConversion;
@@ -139,9 +171,15 @@ struct AddrOfOpConversion : public fir::FIROpConversion<fir::AddrOfOp> {
llvm::LogicalResult
matchAndRewrite(fir::AddrOfOp addr, OpAdaptor adaptor,
mlir::ConversionPatternRewriter &rewriter) const override {
- auto ty = convertType(addr.getType());
- rewriter.replaceOpWithNewOp<mlir::LLVM::AddressOfOp>(
- addr, ty, addr.getSymbol().getRootReference().getValue());
+ auto global = addr->getParentOfType<mlir::ModuleOp>()
+ .lookupSymbol<mlir::LLVM::GlobalOp>(addr.getSymbol());
+ replaceWithAddrOfOrASCast(
+ rewriter, addr->getLoc(),
+ global ? global.getAddrSpace() : getGlobalAddressSpace(rewriter),
+ getProgramAddressSpace(rewriter),
+ global ? global.getSymName()
+ : addr.getSymbol().getRootReference().getValue(),
+ convertType(addr.getType()), addr);
return mlir::success();
}
};
@@ -1255,14 +1293,19 @@ struct EmboxCommonConversion : public fir::FIROpConversion<OP> {
? fir::NameUniquer::getTypeDescriptorAssemblyName(recType.getName())
: fir::NameUniquer::getTypeDescriptorName(recType.getName());
mlir::Type llvmPtrTy = ::getLlvmPtrType(mod.getContext());
+ mlir::DataLayout dataLayout(mod);
if (auto global = mod.template lookupSymbol<fir::GlobalOp>(name)) {
- return rewriter.create<mlir::LLVM::AddressOfOp>(loc, llvmPtrTy,
- global.getSymName());
+ return replaceWithAddrOfOrASCast(
+ rewriter, loc, fir::factory::getGlobalAddressSpace(&dataLayout),
+ fir::factory::getProgramAddressSpace(&dataLayout),
+ global.getSymName(), llvmPtrTy);
}
if (auto global = mod.template lookupSymbol<mlir::LLVM::GlobalOp>(name)) {
// The global may have already been translated to LLVM.
- return rewriter.create<mlir::LLVM::AddressOfOp>(loc, llvmPtrTy,
- global.getSymName());
+ return replaceWithAddrOfOrASCast(
+ rewriter, loc, global.getAddrSpace(),
+ fir::factory::getProgramAddressSpace(&dataLayout),
+ global.getSymName(), llvmPtrTy);
}
// Type info derived types do not have type descriptors since they are the
// types defining type descriptors.
@@ -2759,12 +2802,16 @@ struct TypeDescOpConversion : public fir::FIROpConversion<fir::TypeDescOp> {
: fir::NameUniquer::getTypeDescriptorName(recordType.getName());
auto llvmPtrTy = ::getLlvmPtrType(typeDescOp.getContext());
if (auto global = module.lookupSymbol<mlir::LLVM::GlobalOp>(typeDescName)) {
- rewriter.replaceOpWithNewOp<mlir::LLVM::AddressOfOp>(
- typeDescOp, llvmPtrTy, global.getSymName());
+ replaceWithAddrOfOrASCast(rewriter, typeDescOp->getLoc(),
+ global.getAddrSpace(),
+ getProgramAddressSpace(rewriter),
+ global.getSymName(), llvmPtrTy, typeDescOp);
return mlir::success();
} else if (auto global = module.lookupSymbol<fir::GlobalOp>(typeDescName)) {
- rewriter.replaceOpWithNewOp<mlir::LLVM::AddressOfOp>(
- typeDescOp, llvmPtrTy, global.getSymName());
+ replaceWithAddrOfOrASCast(rewriter, typeDescOp->getLoc(),
+ getGlobalAddressSpace(rewriter),
+ getProgramAddressSpace(rewriter),
+ global.getSymName(), llvmPtrTy, typeDescOp);
return mlir::success();
}
return mlir::failure();
@@ -2855,8 +2902,8 @@ struct GlobalOpConversion : public fir::FIROpConversion<fir::GlobalOp> {
mlir::SymbolRefAttr comdat;
llvm::ArrayRef<mlir::NamedAttribute> attrs;
auto g = rewriter.create<mlir::LLVM::GlobalOp>(
- loc, tyAttr, isConst, linkage, global.getSymName(), initAttr, 0, 0,
- false, false, comdat, attrs, dbgExprs);
+ loc, tyAttr, isConst, linkage, global.getSymName(), initAttr, 0,
+ getGlobalAddressSpace(rewriter), false, false, comdat, attrs, dbgExprs);
if (global.getAlignment() && *global.getAlignment() > 0)
g.setAlignment(*global.getAlignment());
diff --git a/flang/lib/Optimizer/CodeGen/FIROpPatterns.cpp b/flang/lib/Optimizer/CodeGen/FIROpPatterns.cpp
index 12021deb4bd97a..a352e00649c23c 100644
--- a/flang/lib/Optimizer/CodeGen/FIROpPatterns.cpp
+++ b/flang/lib/Optimizer/CodeGen/FIROpPatterns.cpp
@@ -346,7 +346,10 @@ unsigned ConvertFIRToLLVMPattern::getAllocaAddressSpace(
mlir::Operation *parentOp = rewriter.getInsertionBlock()->getParentOp();
assert(parentOp != nullptr &&
"expected insertion block to have parent operation");
- if (auto module = parentOp->getParentOfType<mlir::ModuleOp>())
+ auto module = mlir::isa<mlir::ModuleOp>(parentOp)
+ ? mlir::cast<mlir::ModuleOp>(parentOp)
+ : parentOp->getParentOfType<mlir::ModuleOp>();
+ if (module)
if (mlir::Attribute addrSpace =
mlir::DataLayout(module).getAllocaMemorySpace())
return llvm::cast<mlir::IntegerAttr>(addrSpace).getUInt();
@@ -358,11 +361,29 @@ unsigned ConvertFIRToLLVMPattern::getProgramAddressSpace(
mlir::Operation *parentOp = rewriter.getInsertionBlock()->getParentOp();
assert(parentOp != nullptr &&
"expected insertion block to have parent operation");
- if (auto module = parentOp->getParentOfType<mlir::ModuleOp>())
+ auto module = mlir::isa<mlir::ModuleOp>(parentOp)
+ ? mlir::cast<mlir::ModuleOp>(parentOp)
+ : parentOp->getParentOfType<mlir::ModuleOp>();
+ if (module)
if (mlir::Attribute addrSpace =
mlir::DataLayout(module).getProgramMemorySpace())
return llvm::cast<mlir::IntegerAttr>(addrSpace).getUInt();
return defaultAddressSpace;
}
+unsigned ConvertFIRToLLVMPattern::getGlobalAddressSpace(
+ mlir::ConversionPatternRewriter &rewriter) const {
+ mlir::Operation *parentOp = rewriter.getInsertionBlock()->getParentOp();
+ assert(parentOp != nullptr &&
+ "expected insertion block to have parent operation");
+ auto module = mlir::isa<mlir::ModuleOp>(parentOp)
+ ? mlir::cast<mlir::ModuleOp>(parentOp)
+ : parentOp->getParentOfType<mlir::ModuleOp>();
+ if (module)
+ if (mlir::Attribute addrSpace =
+ mlir::DataLayout(module).getGlobalMemorySpace())
+ return llvm::cast<mlir::IntegerAttr>(addrSpace).getUInt();
+ return defaultAddressSpace;
+}
+
} // namespace fir
diff --git a/flang/test/Fir/convert-to-llvm.fir b/flang/test/Fir/convert-to-llvm.fir
index 4c9f965e1241a0..a5d741d7f566ad 100644
--- a/flang/test/Fir/convert-to-llvm.fir
+++ b/flang/test/Fir/convert-to-llvm.fir
@@ -3,8 +3,8 @@
// RUN: fir-opt --split-input-file --fir-to-llvm-ir="target=i386-unknown-linux-gnu" %s | FileCheck %s --check-prefixes=CHECK,CHECK-COMDAT,GENERIC
// RUN: fir-opt --split-input-file --fir-to-llvm-ir="target=powerpc64le-unknown-linux-gnu" %s | FileCheck %s --check-prefixes=CHECK,CHECK-COMDAT,GENERIC
// RUN: fir-opt --split-input-file --fir-to-llvm-ir="target=x86_64-pc-win32" %s | FileCheck %s --check-prefixes=CHECK,CHECK-COMDAT,GENERIC
-// RUN: fir-opt --split-input-file --fir-to-llvm-ir="target=aarch64-apple-darwin" %s | FileCheck %s --check-prefixes=CHECK,CHECK-NO-COMDAT,GENERIC
-// RUN: fir-opt --split-input-file --fir-to-llvm-ir="target=amdgcn-amd-amdhsa, datalayout=e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-P0" %s | FileCheck -check-prefixes=CHECK,AMDGPU %s
+// RUN: fir-opt --split-input-file --fir-to-llvm-ir="target=aarch64-apple-darwin" %s | FileCheck %s --check-prefixes=CHECK,CHECK-NO-COMDAT,GENERIC
+// RUN: fir-opt --split-input-file --fir-to-llvm-ir="target=amdgcn-amd-amdhsa, datalayout=e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:256:256:32-p8:128:128-p9:192:256:256:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7:8:9" %s | FileCheck -check-prefixes=CHECK,AMDGPU %s
//===================================================
// SUMMARY: Tests for FIR --> LLVM MLIR conversion
@@ -17,7 +17,10 @@ fir.global @g_i0 : i32 {
fir.has_value %1 : i32
}
-// CHECK: llvm.mlir.global external @g_i0() {addr_space = 0 : i32} : i32 {
+// CHECK: llvm.mlir.global external @g_i0()
+// GENERIC-SAME: {addr_space = 0 : i32}
+// AMDGPU-SAME: {addr_space = 1 : i32}
+// CHECK-SAME: i32 {
// CHECK: %[[C0:.*]] = llvm.mlir.constant(0 : i32) : i32
// CHECK: llvm.return %[[C0]] : i32
// CHECK: }
@@ -29,7 +32,10 @@ fir.global @g_ci5 constant : i32 {
fir.has_value %c : i32
}
-// CHECK: llvm.mlir.global external constant @g_ci5() {addr_space = 0 : i32} : i32 {
+// CHECK: llvm.mlir.global external constant @g_ci5()
+// GENERIC-SAME: {addr_space = 0 : i32}
+// AMDGPU-SAME: {addr_space = 1 : i32}
+// CHECK-SAME: i32 {
// CHECK: %[[C5:.*]] = llvm.mlir.constant(5 : i32) : i32
// CHECK: llvm.return %[[C5]] : i32
// CHECK: }
@@ -37,17 +43,26 @@ fir.global @g_ci5 constant : i32 {
// -----
fir.global internal @i_i515 (515:i32) : i32
-// CHECK: llvm.mlir.global internal @i_i515(515 : i32) {addr_space = 0 : i32} : i32
+// CHECK: llvm.mlir.global internal @i_i515(515 : i32)
+// GENERIC-SAME: {addr_space = 0 : i32}
+// AMDGPU-SAME: {addr_space = 1 : i32}
+// CHECK-SAME: : i32
// -----
fir.global common @C_i511 (0:i32) : i32
-// CHECK: llvm.mlir.global common @C_i511(0 : i32) {addr_space = 0 : i32} : i32
+// CHECK: llvm.mlir.global common @C_i511(0 : i32)
+// GENERIC-SAME: {addr_space = 0 : i32}
+// AMDGPU-SAME: {addr_space = 1 : i32}
+// CHECK-SAME: : i32
// -----
fir.global weak @w_i86 (86:i32) : i32
-// CHECK: llvm.mlir.global weak @w_i86(86 : i32) {addr_space = 0 : i32} : i32
+// CHECK: llvm.mlir.global weak @w_i86(86 : i32)
+// GENERIC-SAME: {addr_space = 0 : i32}
+// AMDGPU-SAME: {addr_space = 1 : i32}
+// CHECK-SAME: : i32
// -----
@@ -69,9 +84,13 @@ fir.global @symbol : i64 {
fir.has_value %0 : i64
}
-// CHECK: %{{.*}} = llvm.mlir.addressof @[[SYMBOL:.*]] : !llvm.ptr
+// CHECK: %[[ADDROF:.*]] = llvm.mlir.addressof @[[SYMBOL:.*]] : !llvm.ptr
+// AMDGPU: %{{.*}} = llvm.addrspacecast %[[ADDROF]] : !llvm.ptr<1> to !llvm.ptr
-// CHECK: llvm.mlir.global external @[[SYMBOL]]() {addr_space = 0 : i32} : i64 {
+// CHECK: llvm.mlir.global external @[[SYMBOL]]()
+// GENERIC-SAME: {addr_space = 0 : i32}
+// AMDGPU-SAME: {addr_space = 1 : i32}
+// CHECK-SAME: i64 {
// CHECK: %{{.*}} = llvm.mlir.constant(1 : i64) : i64
// CHECK: llvm.return %{{.*}} : i64
// CHECK: }
@@ -88,7 +107,10 @@ fir.global internal @_QEmultiarray : !fir.array<32x32xi32> {
fir.has_value %2 : !fir.array<32x32xi32>
}
-// CHECK: llvm.mlir.global internal @_QEmultiarray() {addr_space = 0 : i32} : !llvm.array<32 x array<32 x i32>> {
+// CHECK: llvm.mlir.global internal @_QEmultiarray()
+// GENERIC-SAME: {addr_space = 0 : i32}
+// AMDGPU-SAME: {addr_space = 1 : i32}
+// CHECK-SAME: : !llvm.array<32 x array<32 x i32>> {
// CHECK: %[[CST:.*]] = llvm.mlir.constant(dense<1> : vector<32x32xi32>) : !llvm.array<32 x array<32 x i32>>
// CHECK: llvm.return %[[CST]] : !llvm.array<32 x array<32 x i32>>
// CHECK: }
@@ -105,7 +127,10 @@ fir.global internal @_QEmultiarray : !fir.array<32xi32> {
fir.has_value %2 : !fir.array<32xi32>
}
-// CHECK: llvm.mlir.global internal @_QEmultiarray() {addr_space = 0 : i32} : !llvm.array<32 x i32> {
+// CHECK: llvm.mlir.global internal @_QEmultiarray()
+// GENERIC-SAME: {addr_space = 0 : i32}
+// AMDGPU-SAME: {addr_space = 1 : i32}
+// CHECK-SAME: : !llvm.array<32 x i32> {
// CHECK: %[[CST:.*]] = llvm.mlir.constant(1 : i32) : i32
// CHECK: %{{.*}} = llvm.mlir.undef : !llvm.array<32 x i32>
// CHECK: %{{.*}} = llvm.insertvalue %[[CST]], %{{.*}}[5] : !llvm.array<32 x i32>
@@ -1787,7 +1812,9 @@ func.func @embox1(%arg0: !fir.ref<!fir.type<_QMtest_dinitTtseq{i:i32}>>) {
// CHECK: %{{.*}} = llvm.insertvalue %[[VERSION]], %{{.*}}[2] : !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, ptr, array<1 x i64>)>
// CHECK: %[[TYPE_CODE_I8:.*]] = llvm.trunc %[[TYPE_CODE]] : i32 to i8
// CHECK: %{{.*}} = llvm.insertvalue %[[TYPE_CODE_I8]], %{{.*}}[4] : !llvm.struct<(ptr, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, ptr, array<1 x i{{.*}}>)>
-// CHECK: %[[TDESC:.*]] = llvm.mlir.addressof @_QMtest_dinitE.dt.tseq : !llvm.ptr
+// GENERIC: %[[TDESC:.*]] = llvm.mlir.addressof @_QMtest_dinitE.dt.tseq : !llvm.ptr
+// AMDGPU: %[[ADDROF:.*]] = llvm.mlir.addressof @_QMtest_dinitE.dt.tseq : !llvm.ptr<1>
+// AMDGPU: %[[TDESC:.*]] = llvm.addrspacecast %[[ADDROF]] : !llvm.ptr<1> to !llvm.ptr
// CHECK: %{{.*}} = llvm.insertvalue %[[TDESC]], %{{.*}}[7] : !llvm.struct<(ptr, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, ptr, array<1 x i{{.*}}>)>
// -----
@@ -2775,7 +2802,10 @@ func.func @coordinate_array_unknown_size_1d(%arg0: !fir.ptr<!fir.array<? x i32>>
fir.global common @c_(dense<0> : vector<4294967296xi8>) : !fir.array<4294967296xi8>
-// CHECK: llvm.mlir.global common @c_(dense<0> : vector<4294967296xi8>) {addr_space = 0 : i32} : !llvm.array<4294967296 x i8>
+// CHECK: llvm.mlir.global common @c_(dense<0> : vector<4294967296xi8>)
+// GENERIC-SAME: {addr_space = 0 : i32}
+// AMDGPU-SAME: {addr_space = 1 : i32}
+// CHECK-SAME: !llvm.array<4294967296 x i8>
// -----
diff --git a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
index 21004e6a15d495..ba23635160e062 100644
--- a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
+++ b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
@@ -6741,6 +6741,12 @@ FunctionCallee OpenMPIRBuilder::createDispatchDeinitFunction() {
return getOrCreateRuntimeFunction(M, omp::OMPRTL___kmpc_dispatch_deinit);
}
+static Value *removeASCastIfPresent(Value *V) {
+ if (Operator::getOpcode(V) == Instruction::AddrSpaceCast)
+ return cast<Operator>(V)->getOperand(0);
+ return V;
+}
+
static Expected<Function *> createOutlinedFunction(
OpenMPIRBuilder &OMPBuilder, IRBuilderBase &Builder, StringRef FuncName,
SmallVectorImpl<Value *> &Inputs,
@@ -6902,9 +6908,9 @@ static Expected<Function *> createOutlinedFunction(
// preceding mapped arguments that refer to the same global that may be
// seperate segments. To prevent this, we defer global processing until all
// other processing has been performed.
- if (llvm::isa<llvm::GlobalValue>(std::get<0>(InArg)) ||
- llvm::isa<llvm::GlobalObject>(std::get<0>(InArg)) ||
- llvm::isa<llvm::GlobalVariable>(std::get<0>(InArg))) {
+ if (llvm::isa<llvm::GlobalValue>(removeASCastIfPresent(Input)) ||
+ llvm::isa<llvm::GlobalObject>(removeASCastIfPresent(Input)) ||
+ llvm::isa<llvm::GlobalVariable>(removeASCastIfPresent(Input))) {
DeferredReplacement.push_back(std::make_pair(Input, InputCopy));
continue;
}
More information about the flang-commits
mailing list