[flang-commits] [flang] 6811a3b - [flang][cuda] Allocate extra descriptor in managed memory when it is coming from device (#140818)
via flang-commits
flang-commits at lists.llvm.org
Tue May 20 18:55:16 PDT 2025
Author: Valentin Clement (バレンタイン クレメン)
Date: 2025-05-20T18:55:13-07:00
New Revision: 6811a3bedfd33ee64e884467791d2c299504b0e8
URL: https://github.com/llvm/llvm-project/commit/6811a3bedfd33ee64e884467791d2c299504b0e8
DIFF: https://github.com/llvm/llvm-project/commit/6811a3bedfd33ee64e884467791d2c299504b0e8.diff
LOG: [flang][cuda] Allocate extra descriptor in managed memory when it is coming from device (#140818)
Added:
Modified:
flang/lib/Optimizer/CodeGen/CodeGen.cpp
flang/test/Fir/CUDA/cuda-code-gen.mlir
Removed:
################################################################################
diff --git a/flang/lib/Optimizer/CodeGen/CodeGen.cpp b/flang/lib/Optimizer/CodeGen/CodeGen.cpp
index 70c90fae34086..205807eab403a 100644
--- a/flang/lib/Optimizer/CodeGen/CodeGen.cpp
+++ b/flang/lib/Optimizer/CodeGen/CodeGen.cpp
@@ -1830,7 +1830,9 @@ static bool isDeviceAllocation(mlir::Value val, mlir::Value adaptorVal) {
(callOp.getCallee().value().getRootReference().getValue().starts_with(
RTNAME_STRING(CUFMemAlloc)) ||
callOp.getCallee().value().getRootReference().getValue().starts_with(
- RTNAME_STRING(CUFAllocDescriptor))))
+ RTNAME_STRING(CUFAllocDescriptor)) ||
+ callOp.getCallee().value().getRootReference().getValue() ==
+ "__tgt_acc_get_deviceptr"))
return true;
return false;
}
@@ -3253,8 +3255,9 @@ struct LoadOpConversion : public fir::FIROpConversion<fir::LoadOp> {
if (auto callOp = mlir::dyn_cast_or_null<mlir::LLVM::CallOp>(
inputBoxStorage.getDefiningOp())) {
if (callOp.getCallee() &&
- (*callOp.getCallee())
- .starts_with(RTNAME_STRING(CUFAllocDescriptor))) {
+ ((*callOp.getCallee())
+ .starts_with(RTNAME_STRING(CUFAllocDescriptor)) ||
+ (*callOp.getCallee()).starts_with("__tgt_acc_get_deviceptr"))) {
// CUDA Fortran local descriptor are allocated in managed memory. So
// new storage must be allocated the same way.
auto mod = load->getParentOfType<mlir::ModuleOp>();
diff --git a/flang/test/Fir/CUDA/cuda-code-gen.mlir b/flang/test/Fir/CUDA/cuda-code-gen.mlir
index fdd9f1ac12b1f..672be13beae24 100644
--- a/flang/test/Fir/CUDA/cuda-code-gen.mlir
+++ b/flang/test/Fir/CUDA/cuda-code-gen.mlir
@@ -204,3 +204,20 @@ func.func @_QMm1Psub1(%arg0: !fir.box<!fir.array<?xi32>> {cuf.data_attr = #cuf.c
fir.global common @_QPshared_static__shared_mem(dense<0> : vector<28xi8>) {alignment = 8 : i64, data_attr = #cuf.cuda<shared>} : !fir.array<28xi8>
// CHECK: llvm.mlir.global common @_QPshared_static__shared_mem(dense<0> : vector<28xi8>) {addr_space = 3 : i32, alignment = 8 : i64} : !llvm.array<28 x i8>
+
+// -----
+
+module attributes {dlti.dl_spec = #dlti.dl_spec<#dlti.dl_entry<f80, dense<128> : vector<2xi64>>, #dlti.dl_entry<i128, dense<128> : vector<2xi64>>, #dlti.dl_entry<i64, dense<64> : vector<2xi64>>, #dlti.dl_entry<!llvm.ptr<272>, dense<64> : vector<4xi64>>, #dlti.dl_entry<!llvm.ptr<271>, dense<32> : vector<4xi64>>, #dlti.dl_entry<!llvm.ptr<270>, dense<32> : vector<4xi64>>, #dlti.dl_entry<f128, dense<128> : vector<2xi64>>, #dlti.dl_entry<f64, dense<64> : vector<2xi64>>, #dlti.dl_entry<f16, dense<16> : vector<2xi64>>, #dlti.dl_entry<i32, dense<32> : vector<2xi64>>, #dlti.dl_entry<i16, dense<16> : vector<2xi64>>, #dlti.dl_entry<i8, dense<8> : vector<2xi64>>, #dlti.dl_entry<i1, dense<8> : vector<2xi64>>, #dlti.dl_entry<!llvm.ptr, dense<64> : vector<4xi64>>, #dlti.dl_entry<"dlti.endianness", "little">, #dlti.dl_entry<"dlti.stack_alignment", 128 : i64>>} {
+ func.func @_QQmain() attributes {fir.bindc_name = "cufkernel_global"} {
+ %c0 = arith.constant 0 : index
+ %3 = fir.call @__tgt_acc_get_deviceptr() : () -> !fir.ref<!fir.box<none>>
+ %4 = fir.convert %3 : (!fir.ref<!fir.box<none>>) -> !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>
+ %5 = fir.load %4 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>
+ return
+ }
+
+ // CHECK-LABEL: llvm.func @_QQmain()
+ // CHECK: llvm.call @_FortranACUFAllocDescriptor
+
+ func.func private @__tgt_acc_get_deviceptr() -> !fir.ref<!fir.box<none>>
+}
More information about the flang-commits
mailing list