[flang-commits] [flang] [flang][cuda] Add c_devloc as intrinsic and inline it during lowering (PR #120648)
Valentin Clement バレンタイン クレメン via flang-commits
flang-commits at lists.llvm.org
Thu Dec 19 15:29:07 PST 2024
https://github.com/clementval created https://github.com/llvm/llvm-project/pull/120648
None
>From ff7b608d81cad9cbcd8d08b47785c2e64a676975 Mon Sep 17 00:00:00 2001
From: Valentin Clement <clementval at gmail.com>
Date: Thu, 19 Dec 2024 15:26:48 -0800
Subject: [PATCH] [flang][cuda] Add c_devloc as intrinsic and inline it during
lowering
---
.../flang/Optimizer/Builder/FIRBuilder.h | 5 ++
.../flang/Optimizer/Builder/IntrinsicCall.h | 1 +
flang/lib/Evaluate/intrinsics.cpp | 74 ++++++++++++++++++-
flang/lib/Optimizer/Builder/FIRBuilder.cpp | 19 +++++
flang/lib/Optimizer/Builder/IntrinsicCall.cpp | 18 ++++-
flang/module/__fortran_builtins.f90 | 4 +
flang/module/__fortran_type_info.f90 | 2 +-
flang/test/Lower/CUDA/cuda-cdevloc.cuf | 21 ++++++
8 files changed, 139 insertions(+), 5 deletions(-)
create mode 100644 flang/test/Lower/CUDA/cuda-cdevloc.cuf
diff --git a/flang/include/flang/Optimizer/Builder/FIRBuilder.h b/flang/include/flang/Optimizer/Builder/FIRBuilder.h
index b772c523caba49..ffaaa66a862a8a 100644
--- a/flang/include/flang/Optimizer/Builder/FIRBuilder.h
+++ b/flang/include/flang/Optimizer/Builder/FIRBuilder.h
@@ -744,6 +744,11 @@ mlir::Value genMaxWithZero(fir::FirOpBuilder &builder, mlir::Location loc,
mlir::Value genCPtrOrCFunptrAddr(fir::FirOpBuilder &builder, mlir::Location loc,
mlir::Value cPtr, mlir::Type ty);
+/// The type(C_DEVPTR) is defined as the derived type with only one
+/// component of C_PTR type. Get the C address from the C_PTR component.
+mlir::Value genCDevPtrAddr(fir::FirOpBuilder &builder, mlir::Location loc,
+ mlir::Value cDevPtr, mlir::Type ty);
+
/// Get the C address value.
mlir::Value genCPtrOrCFunptrValue(fir::FirOpBuilder &builder,
mlir::Location loc, mlir::Value cPtr);
diff --git a/flang/include/flang/Optimizer/Builder/IntrinsicCall.h b/flang/include/flang/Optimizer/Builder/IntrinsicCall.h
index bc0020e614db24..79c1aec9d56e25 100644
--- a/flang/include/flang/Optimizer/Builder/IntrinsicCall.h
+++ b/flang/include/flang/Optimizer/Builder/IntrinsicCall.h
@@ -214,6 +214,7 @@ struct IntrinsicLibrary {
llvm::ArrayRef<fir::ExtendedValue>);
fir::ExtendedValue genCAssociatedCPtr(mlir::Type,
llvm::ArrayRef<fir::ExtendedValue>);
+ fir::ExtendedValue genCDevLoc(mlir::Type, llvm::ArrayRef<fir::ExtendedValue>);
mlir::Value genErfcScaled(mlir::Type resultType,
llvm::ArrayRef<mlir::Value> args);
void genCFPointer(llvm::ArrayRef<fir::ExtendedValue>);
diff --git a/flang/lib/Evaluate/intrinsics.cpp b/flang/lib/Evaluate/intrinsics.cpp
index cdea572c147576..fb0207a7685353 100644
--- a/flang/lib/Evaluate/intrinsics.cpp
+++ b/flang/lib/Evaluate/intrinsics.cpp
@@ -2595,6 +2595,8 @@ class IntrinsicProcTable::Implementation {
ActualArguments &, FoldingContext &) const;
std::optional<SpecificCall> HandleC_Loc(
ActualArguments &, FoldingContext &) const;
+ std::optional<SpecificCall> HandleC_Devloc(
+ ActualArguments &, FoldingContext &) const;
const std::string &ResolveAlias(const std::string &name) const {
auto iter{aliases_.find(name)};
return iter == aliases_.end() ? name : iter->second;
@@ -2622,7 +2624,8 @@ bool IntrinsicProcTable::Implementation::IsIntrinsicFunction(
return true;
}
// special cases
- return name == "__builtin_c_loc" || name == "null";
+ return name == "__builtin_c_loc" || name == "__builtin_c_devloc" ||
+ name == "null";
}
bool IntrinsicProcTable::Implementation::IsIntrinsicSubroutine(
const std::string &name0) const {
@@ -3012,6 +3015,73 @@ std::optional<SpecificCall> IntrinsicProcTable::Implementation::HandleC_Loc(
return std::nullopt;
}
+// CUDA Fortran C_DEVLOC(x)
+std::optional<SpecificCall> IntrinsicProcTable::Implementation::HandleC_Devloc(
+ ActualArguments &arguments, FoldingContext &context) const {
+ static const char *const keywords[]{"cptr", nullptr};
+
+ if (CheckAndRearrangeArguments(arguments, context.messages(), keywords)) {
+ CHECK(arguments.size() == 1);
+ const auto *expr{arguments[0].value().UnwrapExpr()};
+ if (auto typeAndShape{characteristics::TypeAndShape::Characterize(
+ arguments[0], context)}) {
+ if (expr && !IsContiguous(*expr, context).value_or(true)) {
+ context.messages().Say(arguments[0]->sourceLocation(),
+ "C_DEVLOC() argument must be contiguous"_err_en_US);
+ }
+ if (auto constExtents{AsConstantExtents(context, typeAndShape->shape())};
+ constExtents && GetSize(*constExtents) == 0) {
+ context.messages().Say(arguments[0]->sourceLocation(),
+ "C_DEVLOC() argument may not be a zero-sized array"_err_en_US);
+ }
+ if (!(typeAndShape->type().category() != TypeCategory::Derived ||
+ typeAndShape->type().IsAssumedType() ||
+ (!typeAndShape->type().IsPolymorphic() &&
+ CountNonConstantLenParameters(
+ typeAndShape->type().GetDerivedTypeSpec()) == 0))) {
+ context.messages().Say(arguments[0]->sourceLocation(),
+ "C_DEVLOC() argument must have an intrinsic type, assumed type, or non-polymorphic derived type with no non-constant length parameter"_err_en_US);
+ } else if (typeAndShape->type().knownLength().value_or(1) == 0) {
+ context.messages().Say(arguments[0]->sourceLocation(),
+ "C_DEVLOC() argument may not be zero-length character"_err_en_US);
+ } else if (typeAndShape->type().category() != TypeCategory::Derived &&
+ !IsInteroperableIntrinsicType(typeAndShape->type()).value_or(true)) {
+ if (typeAndShape->type().category() == TypeCategory::Character &&
+ typeAndShape->type().kind() == 1) {
+ // Default character kind, but length is not known to be 1
+ if (context.languageFeatures().ShouldWarn(
+ common::UsageWarning::CharacterInteroperability)) {
+ context.messages().Say(
+ common::UsageWarning::CharacterInteroperability,
+ arguments[0]->sourceLocation(),
+ "C_DEVLOC() argument has non-interoperable character length"_warn_en_US);
+ }
+ } else if (context.languageFeatures().ShouldWarn(
+ common::UsageWarning::Interoperability)) {
+ context.messages().Say(common::UsageWarning::Interoperability,
+ arguments[0]->sourceLocation(),
+ "C_DEVLOC() argument has non-interoperable intrinsic type or kind"_warn_en_US);
+ }
+ }
+
+ characteristics::DummyDataObject ddo{std::move(*typeAndShape)};
+ ddo.intent = common::Intent::In;
+ return SpecificCall{
+ SpecificIntrinsic{"__builtin_c_devloc"s,
+ characteristics::Procedure{
+ characteristics::FunctionResult{
+ DynamicType{GetBuiltinDerivedType(
+ builtinsScope_, "__builtin_c_devptr")}},
+ characteristics::DummyArguments{
+ characteristics::DummyArgument{"cptr"s, std::move(ddo)}},
+ characteristics::Procedure::Attrs{
+ characteristics::Procedure::Attr::Pure}}},
+ std::move(arguments)};
+ }
+ }
+ return std::nullopt;
+}
+
static bool CheckForNonPositiveValues(FoldingContext &context,
const ActualArgument &arg, const std::string &procName,
const std::string &argName) {
@@ -3202,6 +3272,8 @@ std::optional<SpecificCall> IntrinsicProcTable::Implementation::Probe(
} else { // function
if (call.name == "__builtin_c_loc") {
return HandleC_Loc(arguments, context);
+ } else if (call.name == "__builtin_c_devloc") {
+ return HandleC_Devloc(arguments, context);
} else if (call.name == "null") {
return HandleNull(arguments, context);
}
diff --git a/flang/lib/Optimizer/Builder/FIRBuilder.cpp b/flang/lib/Optimizer/Builder/FIRBuilder.cpp
index 3a39c455015f9f..d01becfe800937 100644
--- a/flang/lib/Optimizer/Builder/FIRBuilder.cpp
+++ b/flang/lib/Optimizer/Builder/FIRBuilder.cpp
@@ -1626,6 +1626,25 @@ mlir::Value fir::factory::genCPtrOrCFunptrAddr(fir::FirOpBuilder &builder,
cPtr, addrFieldIndex);
}
+mlir::Value fir::factory::genCDevPtrAddr(fir::FirOpBuilder &builder,
+ mlir::Location loc,
+ mlir::Value cDevPtr, mlir::Type ty) {
+ auto recTy = mlir::cast<fir::RecordType>(ty);
+ assert(recTy.getTypeList().size() == 1);
+ auto cptrFieldName = recTy.getTypeList()[0].first;
+ mlir::Type cptrFieldTy = recTy.getTypeList()[0].second;
+ auto fieldIndexType = fir::FieldType::get(ty.getContext());
+ mlir::Value cptrFieldIndex = builder.create<fir::FieldIndexOp>(
+ loc, fieldIndexType, cptrFieldName, recTy,
+ /*typeParams=*/mlir::ValueRange{});
+ auto cptrCoord = builder.create<fir::CoordinateOp>(
+ loc, builder.getRefType(cptrFieldTy), cDevPtr, cptrFieldIndex);
+ auto [addrFieldIndex, addrFieldTy] =
+ genCPtrOrCFunptrFieldIndex(builder, loc, cptrFieldTy);
+ return builder.create<fir::CoordinateOp>(loc, builder.getRefType(addrFieldTy),
+ cptrCoord, addrFieldIndex);
+}
+
mlir::Value fir::factory::genCPtrOrCFunptrValue(fir::FirOpBuilder &builder,
mlir::Location loc,
mlir::Value cPtr) {
diff --git a/flang/lib/Optimizer/Builder/IntrinsicCall.cpp b/flang/lib/Optimizer/Builder/IntrinsicCall.cpp
index 547cebefd2df47..e7a3d40a5c2ff4 100644
--- a/flang/lib/Optimizer/Builder/IntrinsicCall.cpp
+++ b/flang/lib/Optimizer/Builder/IntrinsicCall.cpp
@@ -167,6 +167,7 @@ static constexpr IntrinsicHandler handlers[]{
&I::genCAssociatedCPtr,
{{{"c_ptr_1", asAddr}, {"c_ptr_2", asAddr, handleDynamicOptional}}},
/*isElemental=*/false},
+ {"c_devloc", &I::genCDevLoc, {{{"x", asBox}}}, /*isElemental=*/false},
{"c_f_pointer",
&I::genCFPointer,
{{{"cptr", asValue},
@@ -2828,11 +2829,14 @@ static mlir::Value getAddrFromBox(fir::FirOpBuilder &builder,
static fir::ExtendedValue
genCLocOrCFunLoc(fir::FirOpBuilder &builder, mlir::Location loc,
mlir::Type resultType, llvm::ArrayRef<fir::ExtendedValue> args,
- bool isFunc = false) {
+ bool isFunc = false, bool isDevLoc = false) {
assert(args.size() == 1);
mlir::Value res = builder.create<fir::AllocaOp>(loc, resultType);
- mlir::Value resAddr =
- fir::factory::genCPtrOrCFunptrAddr(builder, loc, res, resultType);
+ mlir::Value resAddr;
+ if (isDevLoc)
+ resAddr = fir::factory::genCDevPtrAddr(builder, loc, res, resultType);
+ else
+ resAddr = fir::factory::genCPtrOrCFunptrAddr(builder, loc, res, resultType);
assert(fir::isa_box_type(fir::getBase(args[0]).getType()) &&
"argument must have been lowered to box type");
mlir::Value argAddr = getAddrFromBox(builder, loc, args[0], isFunc);
@@ -2889,6 +2893,14 @@ IntrinsicLibrary::genCAssociatedCPtr(mlir::Type resultType,
return genCAssociated(builder, loc, resultType, args);
}
+// C_DEVLOC
+fir::ExtendedValue
+IntrinsicLibrary::genCDevLoc(mlir::Type resultType,
+ llvm::ArrayRef<fir::ExtendedValue> args) {
+ return genCLocOrCFunLoc(builder, loc, resultType, args, /*isFunc=*/false,
+ /*isDevLoc=*/true);
+}
+
// C_F_POINTER
void IntrinsicLibrary::genCFPointer(llvm::ArrayRef<fir::ExtendedValue> args) {
assert(args.size() == 3);
diff --git a/flang/module/__fortran_builtins.f90 b/flang/module/__fortran_builtins.f90
index ef206dfd943102..ab12d6c3089c59 100644
--- a/flang/module/__fortran_builtins.f90
+++ b/flang/module/__fortran_builtins.f90
@@ -22,6 +22,9 @@
intrinsic :: __builtin_c_loc
public :: __builtin_c_loc
+ intrinsic :: __builtin_c_devloc
+ public :: __builtin_c_devloc
+
intrinsic :: __builtin_c_f_pointer
public :: __builtin_c_f_pointer
@@ -144,6 +147,7 @@
type :: __force_derived_type_instantiations
type(__builtin_c_ptr) :: c_ptr
+ type(__builtin_c_devptr) :: c_devptr
type(__builtin_c_funptr) :: c_funptr
type(__builtin_event_type) :: event_type
type(__builtin_lock_type) :: lock_type
diff --git a/flang/module/__fortran_type_info.f90 b/flang/module/__fortran_type_info.f90
index 5f2273de1e3d1e..b30a6bf6975638 100644
--- a/flang/module/__fortran_type_info.f90
+++ b/flang/module/__fortran_type_info.f90
@@ -14,7 +14,7 @@
module __fortran_type_info
use, intrinsic :: __fortran_builtins, &
- only: __builtin_c_ptr, __builtin_c_funptr
+ only: __builtin_c_ptr, __builtin_c_devptr, __builtin_c_funptr
implicit none
! Set PRIVATE by default to explicitly only export what is meant
diff --git a/flang/test/Lower/CUDA/cuda-cdevloc.cuf b/flang/test/Lower/CUDA/cuda-cdevloc.cuf
new file mode 100644
index 00000000000000..a71490207909a8
--- /dev/null
+++ b/flang/test/Lower/CUDA/cuda-cdevloc.cuf
@@ -0,0 +1,21 @@
+! RUN: bbc -emit-hlfir -fcuda %s -o - | FileCheck %s
+
+attributes(global) subroutine testcdevloc(a)
+ use __fortran_builtins, only: c_devloc => __builtin_c_devloc
+ integer, device :: a(10)
+ print*, c_devloc(a(1))
+end
+
+! CHECK-LABEL: func.func @_QPtestcdevloc(
+! CHECK-SAME: %[[A_ARG:.*]]: !fir.ref<!fir.array<10xi32>> {cuf.data_attr = #cuf.cuda<device>, fir.bindc_name = "a"}) attributes {cuf.proc_attr = #cuf.cuda_proc<global>}
+! CHECK: %[[A:.*]]:2 = hlfir.declare %[[A_ARG]](%{{.*}}) dummy_scope %{{.*}} {data_attr = #cuf.cuda<device>, uniq_name = "_QFtestcdevlocEa"} : (!fir.ref<!fir.array<10xi32>>, !fir.shape<1>, !fir.dscope) -> (!fir.ref<!fir.array<10xi32>>, !fir.ref<!fir.array<10xi32>>)
+! CHECK: %[[A1:.*]] = hlfir.designate %[[A]]#0 (%c1{{.*}}) : (!fir.ref<!fir.array<10xi32>>, index) -> !fir.ref<i32>
+! CHECK: %[[BOX:.*]] = fir.embox %[[A1]] : (!fir.ref<i32>) -> !fir.box<i32>
+! CHECK: %[[CDEVPTR:.*]] = fir.alloca !fir.type<_QM__fortran_builtinsT__builtin_c_devptr{cptr:!fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}>}>
+! CHECK: %[[FIELD_CPTR:.*]] = fir.field_index cptr, !fir.type<_QM__fortran_builtinsT__builtin_c_devptr{cptr:!fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}>}>
+! CHECK: %[[COORD_CPTR:.*]] = fir.coordinate_of %[[CDEVPTR]], %[[FIELD_CPTR]] : (!fir.ref<!fir.type<_QM__fortran_builtinsT__builtin_c_devptr{cptr:!fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}>}>>, !fir.field) -> !fir.ref<!fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}>>
+! CHECK: %[[FIELD_ADDRESS:.*]] = fir.field_index __address, !fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}>
+! CHECK: %[[COORD_ADDRESS:.*]] = fir.coordinate_of %[[COORD_CPTR]], %[[FIELD_ADDRESS]] : (!fir.ref<!fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}>>, !fir.field) -> !fir.ref<i64>
+! CHECK: %[[BOX_ADDR:.*]] = fir.box_addr %[[BOX]] : (!fir.box<i32>) -> !fir.ref<i32>
+! CHECK: %[[ADDRESS_A1:.*]] = fir.convert %[[BOX_ADDR]] : (!fir.ref<i32>) -> i64
+! CHECK: fir.store %[[ADDRESS_A1]] to %[[COORD_ADDRESS]] : !fir.ref<i64>
More information about the flang-commits
mailing list