[flang-commits] [flang] f1b075d - [flang][cuda] Pass the pinned variable in allocate calls (#125310)
via flang-commits
flang-commits at lists.llvm.org
Sun Feb 2 18:06:03 PST 2025
Author: Valentin Clement (バレンタイン クレメン)
Date: 2025-02-02T18:05:59-08:00
New Revision: f1b075df2e8510b6ebed405a857f6941913ddf9c
URL: https://github.com/llvm/llvm-project/commit/f1b075df2e8510b6ebed405a857f6941913ddf9c
DIFF: https://github.com/llvm/llvm-project/commit/f1b075df2e8510b6ebed405a857f6941913ddf9c.diff
LOG: [flang][cuda] Pass the pinned variable in allocate calls (#125310)
Added:
Modified:
flang/include/flang/Optimizer/Builder/Runtime/RTBuilder.h
flang/include/flang/Runtime/CUDA/allocatable.h
flang/include/flang/Runtime/CUDA/pointer.h
flang/lib/Optimizer/Transforms/CUFOpConversion.cpp
flang/runtime/CUDA/allocatable.cpp
flang/runtime/CUDA/pointer.cpp
flang/test/Fir/CUDA/cuda-allocate.fir
Removed:
################################################################################
diff --git a/flang/include/flang/Optimizer/Builder/Runtime/RTBuilder.h b/flang/include/flang/Optimizer/Builder/Runtime/RTBuilder.h
index eaa1de76154d93..116b927a86a7a9 100644
--- a/flang/include/flang/Optimizer/Builder/Runtime/RTBuilder.h
+++ b/flang/include/flang/Optimizer/Builder/Runtime/RTBuilder.h
@@ -401,6 +401,13 @@ constexpr TypeBuilderFunc getModel<bool &>() {
};
}
template <>
+constexpr TypeBuilderFunc getModel<bool *>() {
+ return [](mlir::MLIRContext *context) -> mlir::Type {
+ TypeBuilderFunc f{getModel<bool>()};
+ return fir::ReferenceType::get(f(context));
+ };
+}
+template <>
constexpr TypeBuilderFunc getModel<unsigned short>() {
return [](mlir::MLIRContext *context) -> mlir::Type {
return mlir::IntegerType::get(
diff --git a/flang/include/flang/Runtime/CUDA/allocatable.h b/flang/include/flang/Runtime/CUDA/allocatable.h
index 0a96f73b6be44b..822f2d4a2b297d 100644
--- a/flang/include/flang/Runtime/CUDA/allocatable.h
+++ b/flang/include/flang/Runtime/CUDA/allocatable.h
@@ -18,28 +18,30 @@ extern "C" {
/// Perform allocation of the descriptor.
int RTDECL(CUFAllocatableAllocate)(Descriptor &, int64_t stream = -1,
- bool hasStat = false, const Descriptor *errMsg = nullptr,
- const char *sourceFile = nullptr, int sourceLine = 0);
+ bool *pinned = nullptr, bool hasStat = false,
+ const Descriptor *errMsg = nullptr, const char *sourceFile = nullptr,
+ int sourceLine = 0);
/// Perform allocation of the descriptor with synchronization of it when
/// necessary.
int RTDECL(CUFAllocatableAllocateSync)(Descriptor &, int64_t stream = -1,
- bool hasStat = false, const Descriptor *errMsg = nullptr,
- const char *sourceFile = nullptr, int sourceLine = 0);
+ bool *pinned = nullptr, bool hasStat = false,
+ const Descriptor *errMsg = nullptr, const char *sourceFile = nullptr,
+ int sourceLine = 0);
/// Perform allocation of the descriptor without synchronization. Assign data
/// from source.
int RTDEF(CUFAllocatableAllocateSource)(Descriptor &alloc,
- const Descriptor &source, int64_t stream = -1, bool hasStat = false,
- const Descriptor *errMsg = nullptr, const char *sourceFile = nullptr,
- int sourceLine = 0);
+ const Descriptor &source, int64_t stream = -1, bool *pinned = nullptr,
+ bool hasStat = false, const Descriptor *errMsg = nullptr,
+ const char *sourceFile = nullptr, int sourceLine = 0);
/// Perform allocation of the descriptor with synchronization of it when
/// necessary. Assign data from source.
int RTDEF(CUFAllocatableAllocateSourceSync)(Descriptor &alloc,
- const Descriptor &source, int64_t stream = -1, bool hasStat = false,
- const Descriptor *errMsg = nullptr, const char *sourceFile = nullptr,
- int sourceLine = 0);
+ const Descriptor &source, int64_t stream = -1, bool *pinned = nullptr,
+ bool hasStat = false, const Descriptor *errMsg = nullptr,
+ const char *sourceFile = nullptr, int sourceLine = 0);
/// Perform deallocation of the descriptor with synchronization of it when
/// necessary.
diff --git a/flang/include/flang/Runtime/CUDA/pointer.h b/flang/include/flang/Runtime/CUDA/pointer.h
index 78c7a1a92b7ea9..7fbd8f8e061f20 100644
--- a/flang/include/flang/Runtime/CUDA/pointer.h
+++ b/flang/include/flang/Runtime/CUDA/pointer.h
@@ -18,28 +18,30 @@ extern "C" {
/// Perform allocation of the descriptor.
int RTDECL(CUFPointerAllocate)(Descriptor &, int64_t stream = -1,
- bool hasStat = false, const Descriptor *errMsg = nullptr,
- const char *sourceFile = nullptr, int sourceLine = 0);
+ bool *pinned = nullptr, bool hasStat = false,
+ const Descriptor *errMsg = nullptr, const char *sourceFile = nullptr,
+ int sourceLine = 0);
/// Perform allocation of the descriptor with synchronization of it when
/// necessary.
int RTDECL(CUFPointerAllocateSync)(Descriptor &, int64_t stream = -1,
- bool hasStat = false, const Descriptor *errMsg = nullptr,
- const char *sourceFile = nullptr, int sourceLine = 0);
+ bool *pinned = nullptr, bool hasStat = false,
+ const Descriptor *errMsg = nullptr, const char *sourceFile = nullptr,
+ int sourceLine = 0);
/// Perform allocation of the descriptor without synchronization. Assign data
/// from source.
int RTDEF(CUFPointerAllocateSource)(Descriptor &pointer,
- const Descriptor &source, int64_t stream = -1, bool hasStat = false,
- const Descriptor *errMsg = nullptr, const char *sourceFile = nullptr,
- int sourceLine = 0);
+ const Descriptor &source, int64_t stream = -1, bool *pinned = nullptr,
+ bool hasStat = false, const Descriptor *errMsg = nullptr,
+ const char *sourceFile = nullptr, int sourceLine = 0);
/// Perform allocation of the descriptor with synchronization of it when
/// necessary. Assign data from source.
int RTDEF(CUFPointerAllocateSourceSync)(Descriptor &pointer,
- const Descriptor &source, int64_t stream = -1, bool hasStat = false,
- const Descriptor *errMsg = nullptr, const char *sourceFile = nullptr,
- int sourceLine = 0);
+ const Descriptor &source, int64_t stream = -1, bool *pinned = nullptr,
+ bool hasStat = false, const Descriptor *errMsg = nullptr,
+ const char *sourceFile = nullptr, int sourceLine = 0);
} // extern "C"
diff --git a/flang/lib/Optimizer/Transforms/CUFOpConversion.cpp b/flang/lib/Optimizer/Transforms/CUFOpConversion.cpp
index 77aa11f0603f69..549498f5585a68 100644
--- a/flang/lib/Optimizer/Transforms/CUFOpConversion.cpp
+++ b/flang/lib/Optimizer/Transforms/CUFOpConversion.cpp
@@ -103,7 +103,7 @@ static mlir::LogicalResult convertOpToCall(OpTy op,
mlir::Value sourceLine;
if constexpr (std::is_same_v<OpTy, cuf::AllocateOp>)
sourceLine = fir::factory::locationToLineNo(
- builder, loc, op.getSource() ? fTy.getInput(6) : fTy.getInput(5));
+ builder, loc, op.getSource() ? fTy.getInput(7) : fTy.getInput(6));
else
sourceLine = fir::factory::locationToLineNo(builder, loc, fTy.getInput(4));
@@ -119,22 +119,28 @@ static mlir::LogicalResult convertOpToCall(OpTy op,
}
llvm::SmallVector<mlir::Value> args;
if constexpr (std::is_same_v<OpTy, cuf::AllocateOp>) {
+ mlir::Value pinned =
+ op.getPinned()
+ ? op.getPinned()
+ : builder.createNullConstant(
+ loc, fir::ReferenceType::get(
+ mlir::IntegerType::get(op.getContext(), 1)));
if (op.getSource()) {
mlir::Value stream =
op.getStream()
? op.getStream()
: builder.createIntegerConstant(loc, fTy.getInput(2), -1);
- args = fir::runtime::createArguments(builder, loc, fTy, op.getBox(),
- op.getSource(), stream, hasStat,
- errmsg, sourceFile, sourceLine);
+ args = fir::runtime::createArguments(
+ builder, loc, fTy, op.getBox(), op.getSource(), stream, pinned,
+ hasStat, errmsg, sourceFile, sourceLine);
} else {
mlir::Value stream =
op.getStream()
? op.getStream()
: builder.createIntegerConstant(loc, fTy.getInput(1), -1);
args = fir::runtime::createArguments(builder, loc, fTy, op.getBox(),
- stream, hasStat, errmsg, sourceFile,
- sourceLine);
+ stream, pinned, hasStat, errmsg,
+ sourceFile, sourceLine);
}
} else {
args =
@@ -153,11 +159,6 @@ struct CUFAllocateOpConversion
mlir::LogicalResult
matchAndRewrite(cuf::AllocateOp op,
mlir::PatternRewriter &rewriter) const override {
- // TODO: Pinned is a reference to a logical value that can be set to true
- // when pinned allocation succeed. This will require a new entry point.
- if (op.getPinned())
- return mlir::failure();
-
auto mod = op->getParentOfType<mlir::ModuleOp>();
fir::FirOpBuilder builder(rewriter, mod);
mlir::Location loc = op.getLoc();
diff --git a/flang/runtime/CUDA/allocatable.cpp b/flang/runtime/CUDA/allocatable.cpp
index 9be54e8906903d..6df3b06793b3e6 100644
--- a/flang/runtime/CUDA/allocatable.cpp
+++ b/flang/runtime/CUDA/allocatable.cpp
@@ -23,10 +23,10 @@ extern "C" {
RT_EXT_API_GROUP_BEGIN
int RTDEF(CUFAllocatableAllocateSync)(Descriptor &desc, int64_t stream,
- bool hasStat, const Descriptor *errMsg, const char *sourceFile,
- int sourceLine) {
+ bool *pinned, bool hasStat, const Descriptor *errMsg,
+ const char *sourceFile, int sourceLine) {
int stat{RTNAME(CUFAllocatableAllocate)(
- desc, stream, hasStat, errMsg, sourceFile, sourceLine)};
+ desc, stream, pinned, hasStat, errMsg, sourceFile, sourceLine)};
#ifndef RT_DEVICE_COMPILATION
// Descriptor synchronization is only done when the allocation is done
// from the host.
@@ -41,8 +41,8 @@ int RTDEF(CUFAllocatableAllocateSync)(Descriptor &desc, int64_t stream,
}
int RTDEF(CUFAllocatableAllocate)(Descriptor &desc, int64_t stream,
- bool hasStat, const Descriptor *errMsg, const char *sourceFile,
- int sourceLine) {
+ bool *pinned, bool hasStat, const Descriptor *errMsg,
+ const char *sourceFile, int sourceLine) {
if (desc.HasAddendum()) {
Terminator terminator{sourceFile, sourceLine};
// TODO: This require a bit more work to set the correct type descriptor
@@ -53,14 +53,19 @@ int RTDEF(CUFAllocatableAllocate)(Descriptor &desc, int64_t stream,
// Perform the standard allocation.
int stat{RTNAME(AllocatableAllocate)(
desc, hasStat, errMsg, sourceFile, sourceLine)};
+ if (pinned) {
+ // Set pinned according to stat. More infrastructre is needed to set it
+ // closer to the actual allocation call.
+ *pinned = (stat == StatOk);
+ }
return stat;
}
int RTDEF(CUFAllocatableAllocateSource)(Descriptor &alloc,
- const Descriptor &source, int64_t stream, bool hasStat,
+ const Descriptor &source, int64_t stream, bool *pinned, bool hasStat,
const Descriptor *errMsg, const char *sourceFile, int sourceLine) {
int stat{RTNAME(CUFAllocatableAllocate)(
- alloc, stream, hasStat, errMsg, sourceFile, sourceLine)};
+ alloc, stream, pinned, hasStat, errMsg, sourceFile, sourceLine)};
if (stat == StatOk) {
Terminator terminator{sourceFile, sourceLine};
Fortran::runtime::DoFromSourceAssign(
@@ -70,10 +75,10 @@ int RTDEF(CUFAllocatableAllocateSource)(Descriptor &alloc,
}
int RTDEF(CUFAllocatableAllocateSourceSync)(Descriptor &alloc,
- const Descriptor &source, int64_t stream, bool hasStat,
+ const Descriptor &source, int64_t stream, bool *pinned, bool hasStat,
const Descriptor *errMsg, const char *sourceFile, int sourceLine) {
int stat{RTNAME(CUFAllocatableAllocateSync)(
- alloc, stream, hasStat, errMsg, sourceFile, sourceLine)};
+ alloc, stream, pinned, hasStat, errMsg, sourceFile, sourceLine)};
if (stat == StatOk) {
Terminator terminator{sourceFile, sourceLine};
Fortran::runtime::DoFromSourceAssign(
diff --git a/flang/runtime/CUDA/pointer.cpp b/flang/runtime/CUDA/pointer.cpp
index 3252410bd8d2c2..d3ebe97b4e4acc 100644
--- a/flang/runtime/CUDA/pointer.cpp
+++ b/flang/runtime/CUDA/pointer.cpp
@@ -21,8 +21,9 @@ namespace Fortran::runtime::cuda {
extern "C" {
RT_EXT_API_GROUP_BEGIN
-int RTDEF(CUFPointerAllocate)(Descriptor &desc, int64_t stream, bool hasStat,
- const Descriptor *errMsg, const char *sourceFile, int sourceLine) {
+int RTDEF(CUFPointerAllocate)(Descriptor &desc, int64_t stream, bool *pinned,
+ bool hasStat, const Descriptor *errMsg, const char *sourceFile,
+ int sourceLine) {
if (desc.HasAddendum()) {
Terminator terminator{sourceFile, sourceLine};
// TODO: This require a bit more work to set the correct type descriptor
@@ -33,14 +34,19 @@ int RTDEF(CUFPointerAllocate)(Descriptor &desc, int64_t stream, bool hasStat,
// Perform the standard allocation.
int stat{
RTNAME(PointerAllocate)(desc, hasStat, errMsg, sourceFile, sourceLine)};
+ if (pinned) {
+ // Set pinned according to stat. More infrastructre is needed to set it
+ // closer to the actual allocation call.
+ *pinned = (stat == StatOk);
+ }
return stat;
}
int RTDEF(CUFPointerAllocateSync)(Descriptor &desc, int64_t stream,
- bool hasStat, const Descriptor *errMsg, const char *sourceFile,
- int sourceLine) {
+ bool *pinned, bool hasStat, const Descriptor *errMsg,
+ const char *sourceFile, int sourceLine) {
int stat{RTNAME(CUFPointerAllocate)(
- desc, stream, hasStat, errMsg, sourceFile, sourceLine)};
+ desc, stream, pinned, hasStat, errMsg, sourceFile, sourceLine)};
#ifndef RT_DEVICE_COMPILATION
// Descriptor synchronization is only done when the allocation is done
// from the host.
@@ -55,10 +61,10 @@ int RTDEF(CUFPointerAllocateSync)(Descriptor &desc, int64_t stream,
}
int RTDEF(CUFPointerAllocateSource)(Descriptor &pointer,
- const Descriptor &source, int64_t stream, bool hasStat,
+ const Descriptor &source, int64_t stream, bool *pinned, bool hasStat,
const Descriptor *errMsg, const char *sourceFile, int sourceLine) {
int stat{RTNAME(CUFPointerAllocate)(
- pointer, stream, hasStat, errMsg, sourceFile, sourceLine)};
+ pointer, stream, pinned, hasStat, errMsg, sourceFile, sourceLine)};
if (stat == StatOk) {
Terminator terminator{sourceFile, sourceLine};
Fortran::runtime::DoFromSourceAssign(
@@ -68,10 +74,10 @@ int RTDEF(CUFPointerAllocateSource)(Descriptor &pointer,
}
int RTDEF(CUFPointerAllocateSourceSync)(Descriptor &pointer,
- const Descriptor &source, int64_t stream, bool hasStat,
+ const Descriptor &source, int64_t stream, bool *pinned, bool hasStat,
const Descriptor *errMsg, const char *sourceFile, int sourceLine) {
int stat{RTNAME(CUFPointerAllocateSync)(
- pointer, stream, hasStat, errMsg, sourceFile, sourceLine)};
+ pointer, stream, pinned, hasStat, errMsg, sourceFile, sourceLine)};
if (stat == StatOk) {
Terminator terminator{sourceFile, sourceLine};
Fortran::runtime::DoFromSourceAssign(
diff --git a/flang/test/Fir/CUDA/cuda-allocate.fir b/flang/test/Fir/CUDA/cuda-allocate.fir
index 08573110821cc2..095ad92d5deb50 100644
--- a/flang/test/Fir/CUDA/cuda-allocate.fir
+++ b/flang/test/Fir/CUDA/cuda-allocate.fir
@@ -19,7 +19,7 @@ func.func @_QPsub1() {
// CHECK: %[[DESC:.*]] = fir.convert %[[DESC_RT_CALL]] : (!fir.ref<!fir.box<none>>) -> !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>
// CHECK: %[[DECL_DESC:.*]]:2 = hlfir.declare %[[DESC]] {data_attr = #cuf.cuda<device>, fortran_attrs = #fir.var_attrs<allocatable>, uniq_name = "_QFsub1Ea"} : (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>) -> (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>, !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>)
// CHECK: %[[BOX_NONE:.*]] = fir.convert %[[DECL_DESC]]#1 : (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>) -> !fir.ref<!fir.box<none>>
-// CHECK: %{{.*}} = fir.call @_FortranACUFAllocatableAllocate(%[[BOX_NONE]], %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}) : (!fir.ref<!fir.box<none>>, i64, i1, !fir.box<none>, !fir.ref<i8>, i32) -> i32
+// CHECK: %{{.*}} = fir.call @_FortranACUFAllocatableAllocate(%[[BOX_NONE]], %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}) : (!fir.ref<!fir.box<none>>, i64, !fir.ref<i1>, i1, !fir.box<none>, !fir.ref<i8>, i32) -> i32
// CHECK: %[[BOX_NONE:.*]] = fir.convert %[[DECL_DESC]]#1 : (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>) -> !fir.ref<!fir.box<none>>
// CHECK: %{{.*}} = fir.call @_FortranAAllocatableDeallocate(%[[BOX_NONE]], %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}) : (!fir.ref<!fir.box<none>>, i1, !fir.box<none>, !fir.ref<i8>, i32) -> i32
@@ -47,7 +47,7 @@ func.func @_QPsub3() {
// CHECK: %[[A:.*]]:2 = hlfir.declare %[[A_ADDR]] {data_attr = #cuf.cuda<device>, fortran_attrs = #fir.var_attrs<allocatable>, uniq_name = "_QMmod1Ea"} : (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>) -> (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>, !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>)
// CHECK: %[[A_BOX:.*]] = fir.convert %[[A]]#1 : (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>) -> !fir.ref<!fir.box<none>>
-// CHECK: fir.call @_FortranACUFAllocatableAllocateSync(%[[A_BOX]], %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}) : (!fir.ref<!fir.box<none>>, i64, i1, !fir.box<none>, !fir.ref<i8>, i32) -> i32
+// CHECK: fir.call @_FortranACUFAllocatableAllocateSync(%[[A_BOX]], %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}) : (!fir.ref<!fir.box<none>>, i64, !fir.ref<i1>, i1, !fir.box<none>, !fir.ref<i8>, i32) -> i32
// CHECK: %[[A_BOX:.*]] = fir.convert %[[A]]#1 : (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>) -> !fir.ref<!fir.box<none>>
// CHECK: fir.call @_FortranACUFAllocatableDeallocate(%[[A_BOX]], %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}) : (!fir.ref<!fir.box<none>>, i1, !fir.box<none>, !fir.ref<i8>, i32) -> i32
@@ -87,7 +87,7 @@ func.func @_QPsub5() {
}
// CHECK-LABEL: func.func @_QPsub5()
-// CHECK: fir.call @_FortranACUFAllocatableAllocate({{.*}}) : (!fir.ref<!fir.box<none>>, i64, i1, !fir.box<none>, !fir.ref<i8>, i32) -> i32
+// CHECK: fir.call @_FortranACUFAllocatableAllocate({{.*}}) : (!fir.ref<!fir.box<none>>, i64, !fir.ref<i1>, i1, !fir.box<none>, !fir.ref<i8>, i32) -> i32
// CHECK: fir.call @_FortranAAllocatableDeallocate({{.*}}) : (!fir.ref<!fir.box<none>>, i1, !fir.box<none>, !fir.ref<i8>, i32) -> i32
@@ -118,7 +118,7 @@ func.func @_QQsub6() attributes {fir.bindc_name = "test"} {
// CHECK: %[[B:.*]]:2 = hlfir.declare %[[B_ADDR]] {data_attr = #cuf.cuda<device>, fortran_attrs = #fir.var_attrs<allocatable>, uniq_name = "_QMdataEb"} : (!fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>) -> (!fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>, !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>)
// CHECK: _FortranAAllocatableSetBounds
// CHECK: %[[B_BOX:.*]] = fir.convert %[[B]]#1 : (!fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>) -> !fir.ref<!fir.box<none>>
-// CHECK: fir.call @_FortranACUFAllocatableAllocateSync(%[[B_BOX]], %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}) : (!fir.ref<!fir.box<none>>, i64, i1, !fir.box<none>, !fir.ref<i8>, i32) -> i32
+// CHECK: fir.call @_FortranACUFAllocatableAllocateSync(%[[B_BOX]], %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}) : (!fir.ref<!fir.box<none>>, i64, !fir.ref<i1>, i1, !fir.box<none>, !fir.ref<i8>, i32) -> i32
func.func @_QPallocate_source() {
@@ -142,7 +142,7 @@ func.func @_QPallocate_source() {
// CHECK: %[[SOURCE:.*]] = fir.load %[[DECL_HOST]] : !fir.ref<!fir.box<!fir.heap<!fir.array<?x?xf32>>>>
// CHECK: %[[DEV_CONV:.*]] = fir.convert %[[DECL_DEV]] : (!fir.ref<!fir.box<!fir.heap<!fir.array<?x?xf32>>>>) -> !fir.ref<!fir.box<none>>
// CHECK: %[[SOURCE_CONV:.*]] = fir.convert %[[SOURCE]] : (!fir.box<!fir.heap<!fir.array<?x?xf32>>>) -> !fir.box<none>
-// CHECK: %{{.*}} = fir.call @_FortranACUFAllocatableAllocateSource(%[[DEV_CONV]], %[[SOURCE_CONV]], %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}) : (!fir.ref<!fir.box<none>>, !fir.box<none>, i64, i1, !fir.box<none>, !fir.ref<i8>, i32) -> i32
+// CHECK: %{{.*}} = fir.call @_FortranACUFAllocatableAllocateSource(%[[DEV_CONV]], %[[SOURCE_CONV]], %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}) : (!fir.ref<!fir.box<none>>, !fir.box<none>, i64, !fir.ref<i1>, i1, !fir.box<none>, !fir.ref<i8>, i32) -> i32
fir.global @_QMmod1Ea_d {data_attr = #cuf.cuda<device>} : !fir.box<!fir.heap<!fir.array<?x?xf32>>> {
@@ -179,7 +179,7 @@ func.func @_QQallocate_stream() {
// CHECK: %[[STREAM_ALLOCA:.*]] = fir.alloca i64 {bindc_name = "stream1", uniq_name = "_QFEstream1"}
// CHECK: %[[STREAM:.*]] = fir.declare %[[STREAM_ALLOCA]] {uniq_name = "_QFEstream1"} : (!fir.ref<i64>) -> !fir.ref<i64>
// CHECK: %[[STREAM_LOAD:.*]] = fir.load %[[STREAM]] : !fir.ref<i64>
-// CHECK: fir.call @_FortranACUFAllocatableAllocate(%{{.*}}, %[[STREAM_LOAD]], %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}) : (!fir.ref<!fir.box<none>>, i64, i1, !fir.box<none>, !fir.ref<i8>, i32) -> i32
+// CHECK: fir.call @_FortranACUFAllocatableAllocate(%{{.*}}, %[[STREAM_LOAD]], %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}) : (!fir.ref<!fir.box<none>>, i64, !fir.ref<i1>, i1, !fir.box<none>, !fir.ref<i8>, i32) -> i32
func.func @_QPp_alloc() {
@@ -255,4 +255,19 @@ func.func @_QMmod1Ppointer_source_global() {
// CHECK-LABEL: func.func @_QMmod1Ppointer_source_global()
// CHECK: fir.call @_FortranACUFPointerAllocateSourceSync
+func.func @_QQpinned() attributes {fir.bindc_name = "testasync"} {
+ %0 = cuf.alloc !fir.box<!fir.heap<!fir.array<?xf32>>> {bindc_name = "a", data_attr = #cuf.cuda<pinned>, uniq_name = "_QFEa"} -> !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>
+ %4 = fir.declare %0 {data_attr = #cuf.cuda<pinned>, fortran_attrs = #fir.var_attrs<allocatable>, uniq_name = "_QFEa"} : (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>) -> !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>
+ %13 = fir.alloca !fir.logical<4> {bindc_name = "pinnedflag", uniq_name = "_QFEpinnedflag"}
+ %14 = fir.declare %13 {uniq_name = "_QFEpinnedflag"} : (!fir.ref<!fir.logical<4>>) -> !fir.ref<!fir.logical<4>>
+ %18 = cuf.allocate %4 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>> pinned(%14 : !fir.ref<!fir.logical<4>>) {data_attr = #cuf.cuda<pinned>, hasStat} -> i32
+ return
+}
+
+// CHECK-LABEL: func.func @_QQpinned() attributes {fir.bindc_name = "testasync"} {
+// CHECK: %[[PINNED:.*]] = fir.alloca !fir.logical<4> {bindc_name = "pinnedflag", uniq_name = "_QFEpinnedflag"}
+// CHECK: %[[DECL_PINNED:.*]] = fir.declare %[[PINNED]] {uniq_name = "_QFEpinnedflag"} : (!fir.ref<!fir.logical<4>>) -> !fir.ref<!fir.logical<4>>
+// CHECK: %[[CONV_PINNED:.*]] = fir.convert %[[DECL_PINNED]] : (!fir.ref<!fir.logical<4>>) -> !fir.ref<i1>
+// CHECK: fir.call @_FortranACUFAllocatableAllocate(%{{.*}}, %{{.*}}, %[[CONV_PINNED]], %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}) : (!fir.ref<!fir.box<none>>, i64, !fir.ref<i1>, i1, !fir.box<none>, !fir.ref<i8>, i32) -> i32
+
} // end of module
More information about the flang-commits
mailing list