[flang-commits] [flang] [flang] Rely on global initialization for simpler derived types (PR #114002)
via flang-commits
flang-commits at lists.llvm.org
Mon Oct 28 21:51:04 PDT 2024
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-flang-fir-hlfir
Author: None (NimishMishra)
<details>
<summary>Changes</summary>
Currently, all derived types are initialized through `_FortranAInitialize`, which is functionally correct, but bears poor runtime performance. This patch falls back on global initialization for "simpler" derived types to speed up the initialization.
---
Full diff: https://github.com/llvm/llvm-project/pull/114002.diff
6 Files Affected:
- (modified) flang/include/flang/Lower/ConvertVariable.h (+1-1)
- (modified) flang/lib/Lower/ConvertVariable.cpp (+31-8)
- (modified) flang/lib/Lower/OpenMP/DataSharingProcessor.cpp (+2-1)
- (modified) flang/test/Lower/HLFIR/structure-constructor.f90 (+3-6)
- (modified) flang/test/Lower/default-initialization.f90 (+9-9)
- (modified) flang/test/Lower/pointer-default-init.f90 (+3-1)
``````````diff
diff --git a/flang/include/flang/Lower/ConvertVariable.h b/flang/include/flang/Lower/ConvertVariable.h
index de394a39e112ed..ac285c846fc7d1 100644
--- a/flang/include/flang/Lower/ConvertVariable.h
+++ b/flang/include/flang/Lower/ConvertVariable.h
@@ -67,7 +67,7 @@ bool hasDefaultInitialization(const Fortran::semantics::Symbol &sym);
/// Call default initialization runtime routine to initialize \p var.
void defaultInitializeAtRuntime(Fortran::lower::AbstractConverter &converter,
- const Fortran::semantics::Symbol &sym,
+ const Fortran::lower::pft::Variable &var,
Fortran::lower::SymMap &symMap);
/// Create a fir::GlobalOp given a module variable definition. This is intended
diff --git a/flang/lib/Lower/ConvertVariable.cpp b/flang/lib/Lower/ConvertVariable.cpp
index cc51d5a9bb8daf..c261d3b6c10fb8 100644
--- a/flang/lib/Lower/ConvertVariable.cpp
+++ b/flang/lib/Lower/ConvertVariable.cpp
@@ -776,9 +776,10 @@ mustBeDefaultInitializedAtRuntime(const Fortran::lower::pft::Variable &var) {
/// Call default initialization runtime routine to initialize \p var.
void Fortran::lower::defaultInitializeAtRuntime(
Fortran::lower::AbstractConverter &converter,
- const Fortran::semantics::Symbol &sym, Fortran::lower::SymMap &symMap) {
+ const Fortran::lower::pft::Variable &var, Fortran::lower::SymMap &symMap) {
fir::FirOpBuilder &builder = converter.getFirOpBuilder();
mlir::Location loc = converter.getCurrentLocation();
+ const Fortran::semantics::Symbol &sym = var.getSymbol();
fir::ExtendedValue exv = converter.getSymbolExtendedValue(sym, &symMap);
if (Fortran::semantics::IsOptional(sym)) {
// 15.5.2.12 point 3, absent optional dummies are not initialized.
@@ -793,11 +794,35 @@ void Fortran::lower::defaultInitializeAtRuntime(
})
.end();
} else {
- mlir::Value box = builder.createBox(loc, exv);
- fir::runtime::genDerivedTypeInitialize(builder, loc, box);
+ /// For "simpler" types, relying on "_FortranAInitialize"
+ /// leads to poor runtime performance. Hence optimize
+ /// the same.
+ const Fortran::semantics::DeclTypeSpec *declTy = sym.GetType();
+ mlir::Type symTy = converter.genType(var);
+ if (!var.isAlias() && !hasAllocatableDirectComponent(sym) &&
+ declTy->category() ==
+ Fortran::semantics::DeclTypeSpec::Category::TypeDerived &&
+ !mlir::isa<fir::SequenceType>(symTy) &&
+ !sym.test(Fortran::semantics::Symbol::Flag::OmpPrivate) &&
+ !sym.test(Fortran::semantics::Symbol::Flag::OmpFirstPrivate)) {
+ std::string globalName = converter.mangleName(sym) + "_globalinit";
+ mlir::Location loc = genLocation(converter, sym);
+ mlir::StringAttr linkage = getLinkageAttribute(builder, var);
+ cuf::DataAttributeAttr dataAttr =
+ Fortran::lower::translateSymbolCUFDataAttribute(builder.getContext(),
+ sym);
+ fir::GlobalOp global =
+ defineGlobal(converter, var, globalName, linkage, dataAttr);
+ auto addrOf = builder.create<fir::AddrOfOp>(loc, global.resultType(),
+ global.getSymbol());
+ fir::LoadOp load = builder.create<fir::LoadOp>(loc, addrOf.getResult());
+ builder.create<fir::StoreOp>(loc, load, fir::getBase(exv));
+ } else {
+ mlir::Value box = builder.createBox(loc, exv);
+ fir::runtime::genDerivedTypeInitialize(builder, loc, box);
+ }
}
}
-
enum class VariableCleanUp { Finalize, Deallocate };
/// Check whether a local variable needs to be finalized according to clause
/// 7.5.6.3 point 3 or if it is an allocatable that must be deallocated. Note
@@ -943,8 +968,7 @@ static void instantiateLocal(Fortran::lower::AbstractConverter &converter,
if (needDummyIntentoutFinalization(var))
finalizeAtRuntime(converter, var, symMap);
if (mustBeDefaultInitializedAtRuntime(var))
- Fortran::lower::defaultInitializeAtRuntime(converter, var.getSymbol(),
- symMap);
+ Fortran::lower::defaultInitializeAtRuntime(converter, var, symMap);
if (Fortran::semantics::NeedCUDAAlloc(var.getSymbol())) {
auto *builder = &converter.getFirOpBuilder();
mlir::Location loc = converter.getCurrentLocation();
@@ -1185,8 +1209,7 @@ static void instantiateAlias(Fortran::lower::AbstractConverter &converter,
// do not try optimizing this to single default initializations of
// the equivalenced storages. Keep lowering simple.
if (mustBeDefaultInitializedAtRuntime(var))
- Fortran::lower::defaultInitializeAtRuntime(converter, var.getSymbol(),
- symMap);
+ Fortran::lower::defaultInitializeAtRuntime(converter, var, symMap);
}
//===--------------------------------------------------------------===//
diff --git a/flang/lib/Lower/OpenMP/DataSharingProcessor.cpp b/flang/lib/Lower/OpenMP/DataSharingProcessor.cpp
index 709ac402cc702d..ba8b7177953bb5 100644
--- a/flang/lib/Lower/OpenMP/DataSharingProcessor.cpp
+++ b/flang/lib/Lower/OpenMP/DataSharingProcessor.cpp
@@ -118,7 +118,8 @@ void DataSharingProcessor::cloneSymbol(const semantics::Symbol *sym) {
bool isFirstPrivate = sym->test(semantics::Symbol::Flag::OmpFirstPrivate);
if (!isFirstPrivate &&
Fortran::lower::hasDefaultInitialization(sym->GetUltimate()))
- Fortran::lower::defaultInitializeAtRuntime(converter, *sym, *symTable);
+ Fortran::lower::defaultInitializeAtRuntime(converter, pft::Variable{*sym},
+ *symTable);
}
void DataSharingProcessor::copyFirstPrivateSymbol(
diff --git a/flang/test/Lower/HLFIR/structure-constructor.f90 b/flang/test/Lower/HLFIR/structure-constructor.f90
index 41d08c14f5fa98..68a29015f60177 100644
--- a/flang/test/Lower/HLFIR/structure-constructor.f90
+++ b/flang/test/Lower/HLFIR/structure-constructor.f90
@@ -98,12 +98,9 @@ end subroutine test3
! CHECK: %[[VAL_1:.*]] = fir.alloca !fir.type<_QMtypesTt3{r:!fir.box<!fir.ptr<!fir.array<?xf32>>>}>
! CHECK: %[[VAL_2:.*]] = fir.alloca !fir.type<_QMtypesTt3{r:!fir.box<!fir.ptr<!fir.array<?xf32>>>}> {bindc_name = "res", uniq_name = "_QFtest3Eres"}
! CHECK: %[[VAL_3:.*]]:2 = hlfir.declare %[[VAL_2]] {uniq_name = "_QFtest3Eres"} : (!fir.ref<!fir.type<_QMtypesTt3{r:!fir.box<!fir.ptr<!fir.array<?xf32>>>}>>) -> (!fir.ref<!fir.type<_QMtypesTt3{r:!fir.box<!fir.ptr<!fir.array<?xf32>>>}>>, !fir.ref<!fir.type<_QMtypesTt3{r:!fir.box<!fir.ptr<!fir.array<?xf32>>>}>>)
-! CHECK: %[[VAL_4:.*]] = fir.embox %[[VAL_3]]#1 : (!fir.ref<!fir.type<_QMtypesTt3{r:!fir.box<!fir.ptr<!fir.array<?xf32>>>}>>) -> !fir.box<!fir.type<_QMtypesTt3{r:!fir.box<!fir.ptr<!fir.array<?xf32>>>}>>
-! CHECK: %[[VAL_5:.*]] = fir.address_of(@_QQclX{{.*}}) : !fir.ref<!fir.char<1,{{[0-9]*}}>>
-! CHECK: %[[VAL_6:.*]] = arith.constant {{[0-9]*}} : i32
-! CHECK: %[[VAL_7:.*]] = fir.convert %[[VAL_4]] : (!fir.box<!fir.type<_QMtypesTt3{r:!fir.box<!fir.ptr<!fir.array<?xf32>>>}>>) -> !fir.box<none>
-! CHECK: %[[VAL_8:.*]] = fir.convert %[[VAL_5]] : (!fir.ref<!fir.char<1,{{[0-9]*}}>>) -> !fir.ref<i8>
-! CHECK: %[[VAL_9:.*]] = fir.call @_FortranAInitialize(%[[VAL_7]], %[[VAL_8]], %[[VAL_6]]) fastmath<contract> : (!fir.box<none>, !fir.ref<i8>, i32) -> none
+! CHECK: %[[ADDR:.*]] = fir.address_of(@_QFtest3Eres_globalinit) : !fir.ref<!fir.type<_QMtypesTt3{r:!fir.box<!fir.ptr<!fir.array<?xf32>>>}>>
+! CHECK: %[[LOADED_VAL:.*]] = fir.load %[[ADDR]] : !fir.ref<!fir.type<_QMtypesTt3{r:!fir.box<!fir.ptr<!fir.array<?xf32>>>}>>
+! CHECK: fir.store %[[LOADED_VAL]] to %[[VAL_3]]#1 : !fir.ref<!fir.type<_QMtypesTt3{r:!fir.box<!fir.ptr<!fir.array<?xf32>>>}>>
! CHECK: %[[VAL_10:.*]]:2 = hlfir.declare %[[VAL_0]] dummy_scope %{{[0-9]+}} {fortran_attrs = #fir.var_attrs<pointer>, uniq_name = "_QFtest3Ex"} : (!fir.ref<!fir.box<!fir.ptr<!fir.array<?xf32>>>>, !fir.dscope) -> (!fir.ref<!fir.box<!fir.ptr<!fir.array<?xf32>>>>, !fir.ref<!fir.box<!fir.ptr<!fir.array<?xf32>>>>)
! CHECK: %[[VAL_11:.*]]:2 = hlfir.declare %[[VAL_1]] {uniq_name = "ctor.temp"} : (!fir.ref<!fir.type<_QMtypesTt3{r:!fir.box<!fir.ptr<!fir.array<?xf32>>>}>>) -> (!fir.ref<!fir.type<_QMtypesTt3{r:!fir.box<!fir.ptr<!fir.array<?xf32>>>}>>, !fir.ref<!fir.type<_QMtypesTt3{r:!fir.box<!fir.ptr<!fir.array<?xf32>>>}>>)
! CHECK: %[[VAL_12:.*]] = fir.embox %[[VAL_11]]#0 : (!fir.ref<!fir.type<_QMtypesTt3{r:!fir.box<!fir.ptr<!fir.array<?xf32>>>}>>) -> !fir.box<!fir.type<_QMtypesTt3{r:!fir.box<!fir.ptr<!fir.array<?xf32>>>}>>
diff --git a/flang/test/Lower/default-initialization.f90 b/flang/test/Lower/default-initialization.f90
index 7a6133452b3a25..f6e37d57f19eb4 100644
--- a/flang/test/Lower/default-initialization.f90
+++ b/flang/test/Lower/default-initialization.f90
@@ -22,9 +22,9 @@ module test_dinit
! CHECK-LABEL: func @_QMtest_dinitPlocal()
subroutine local
! CHECK: %[[x:.*]] = fir.alloca !fir.type<_QMtest_dinitTt{i:i32}>
- ! CHECK: %[[xbox:.*]] = fir.embox %[[x]] : (!fir.ref<!fir.type<_QMtest_dinitTt{i:i32}>>) -> !fir.box<!fir.type<_QMtest_dinitTt{i:i32}>>
- ! CHECK: %[[xboxNone:.*]] = fir.convert %[[xbox]]
- ! CHECK: fir.call @_FortranAInitialize(%[[xboxNone]], %{{.*}}, %{{.*}}) {{.*}}: (!fir.box<none>, !fir.ref<i8>, i32) -> none
+ ! CHECK: %[[ADDR:.*]] = fir.address_of(@_QMtest_dinitFlocalEx_globalinit) : !fir.ref<!fir.type<_QMtest_dinitTt{i:i32}>>
+ ! CHECK: %[[LOADED_VAL:.*]] = fir.load %[[ADDR]] : !fir.ref<!fir.type<_QMtest_dinitTt{i:i32}>>
+ ! CHECK: fir.store %[[LOADED_VAL]] to %[[x]] : !fir.ref<!fir.type<_QMtest_dinitTt{i:i32}>>
type(t) :: x
print *, x%i
end subroutine
@@ -56,9 +56,9 @@ subroutine local_alloc_comp
! CHECK-LABEL: func @_QMtest_dinitPresult() -> !fir.type<_QMtest_dinitTt{i:i32}>
function result()
! CHECK: %[[x:.*]] = fir.alloca !fir.type<_QMtest_dinitTt{i:i32}>
- ! CHECK: %[[xbox:.*]] = fir.embox %[[x]] : (!fir.ref<!fir.type<_QMtest_dinitTt{i:i32}>>) -> !fir.box<!fir.type<_QMtest_dinitTt{i:i32}>>
- ! CHECK: %[[xboxNone:.*]] = fir.convert %[[xbox]]
- ! CHECK: fir.call @_FortranAInitialize(%[[xboxNone]], %{{.*}}, %{{.*}}) {{.*}}: (!fir.box<none>, !fir.ref<i8>, i32) -> none
+ ! CHECK: %[[ADDR:.*]] = fir.address_of(@_QMtest_dinitFresultEresult_globalinit) : !fir.ref<!fir.type<_QMtest_dinitTt{i:i32}>>
+ ! CHECK: %[[LOADED_VAL:.*]] = fir.load %[[ADDR]] : !fir.ref<!fir.type<_QMtest_dinitTt{i:i32}>>
+ ! CHECK: fir.store %[[LOADED_VAL]] to %[[x]] : !fir.ref<!fir.type<_QMtest_dinitTt{i:i32}>>
type(t) :: result
end function
@@ -66,9 +66,9 @@ function result()
! CHECK-LABEL: func @_QMtest_dinitPintent_out(
! CHECK-SAME: %[[x:.*]]: !fir.ref<!fir.type<_QMtest_dinitTt{i:i32}>>
subroutine intent_out(x)
- ! CHECK: %[[xbox:.*]] = fir.embox %[[x]] : (!fir.ref<!fir.type<_QMtest_dinitTt{i:i32}>>) -> !fir.box<!fir.type<_QMtest_dinitTt{i:i32}>>
- ! CHECK: %[[xboxNone:.*]] = fir.convert %[[xbox]]
- ! CHECK: fir.call @_FortranAInitialize(%[[xboxNone]], %{{.*}}, %{{.*}}) {{.*}}: (!fir.box<none>, !fir.ref<i8>, i32) -> none
+ ! CHECK: %[[ADDR:.*]] = fir.address_of(@_QMtest_dinitFintent_outEx_globalinit) : !fir.ref<!fir.type<_QMtest_dinitTt{i:i32}>>
+ ! CHECK: %[[LOADED_VAL:.*]] = fir.load %[[ADDR]] : !fir.ref<!fir.type<_QMtest_dinitTt{i:i32}>>
+ ! CHECK: fir.store %[[LOADED_VAL]] to %[[x]] : !fir.ref<!fir.type<_QMtest_dinitTt{i:i32}>>
type(t), intent(out) :: x
end subroutine
diff --git a/flang/test/Lower/pointer-default-init.f90 b/flang/test/Lower/pointer-default-init.f90
index 0fb42683a3486b..0e97f3bea90024 100644
--- a/flang/test/Lower/pointer-default-init.f90
+++ b/flang/test/Lower/pointer-default-init.f90
@@ -38,7 +38,9 @@ subroutine test_local()
type(t) :: x
end subroutine
! CHECK-LABEL: func.func @_QPtest_local() {
-! CHECK: fir.call @_FortranAInitialize(
+! CHECK: %[[ADDR:.*]] = fir.address_of(@_QFtest_localEx_globalinit) : !fir.ref<!fir.type<_QMtestTt{i:i32,x:!fir.box<!fir.ptr<!fir.array<?xf32>>>}>>
+! CHECK: %[[LOAD:.*]] = fir.load %[[ADDR]] : !fir.ref<!fir.type<_QMtestTt{i:i32,x:!fir.box<!fir.ptr<!fir.array<?xf32>>>}>>
+! CHECK: fir.store %[[LOAD]] to {{.*}} : !fir.ref<!fir.type<_QMtestTt{i:i32,x:!fir.box<!fir.ptr<!fir.array<?xf32>>>}>>
subroutine test_saved()
use test, only : t
``````````
</details>
https://github.com/llvm/llvm-project/pull/114002
More information about the flang-commits
mailing list