[flang-commits] [clang] [flang] [flang] Added driver options for arrays repacking. (PR #134002)
Slava Zakharin via flang-commits
flang-commits at lists.llvm.org
Tue Apr 1 16:07:30 PDT 2025
https://github.com/vzakhari created https://github.com/llvm/llvm-project/pull/134002
Added options:
* -f[no-]repack-arrays
* -f[no-]stack-repack-arrays
* -frepack-arrays-contiguity=whole/innermost
>From b936044f9a77cb717d74248cad5021b5d997d407 Mon Sep 17 00:00:00 2001
From: Slava Zakharin <szakharin at nvidia.com>
Date: Tue, 1 Apr 2025 15:55:30 -0700
Subject: [PATCH] [flang] Added driver options for arrays repacking.
Added options:
* -f[no-]repack-arrays
* -f[no-]stack-repack-arrays
* -frepack-arrays-contiguity=whole/innermost
---
clang/include/clang/Driver/Options.td | 62 ++++++++++++++++++-
clang/lib/Driver/ToolChains/Flang.cpp | 28 ++++++---
flang/docs/ArrayRepacking.md | 19 +++---
flang/include/flang/Lower/LoweringOptions.def | 5 ++
flang/lib/Frontend/CompilerInvocation.cpp | 14 +++++
flang/lib/Lower/ConvertVariable.cpp | 2 +-
.../test/Driver/frepack-arrays-contiguity.f90 | 27 ++++++++
flang/test/Driver/frepack-arrays.f90 | 24 +++++++
flang/test/Driver/fstack-repack-arrays.f90 | 24 +++++++
flang/test/Lower/repack-arrays.f90 | 8 +--
flang/tools/bbc/bbc.cpp | 12 +++-
11 files changed, 198 insertions(+), 27 deletions(-)
create mode 100644 flang/test/Driver/frepack-arrays-contiguity.f90
create mode 100644 flang/test/Driver/frepack-arrays.f90
create mode 100644 flang/test/Driver/fstack-repack-arrays.f90
diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td
index 89cb03cc33b98..49a59c0417455 100644
--- a/clang/include/clang/Driver/Options.td
+++ b/clang/include/clang/Driver/Options.td
@@ -6819,7 +6819,6 @@ defm real_8_real_10 : BooleanFFlag<"real-8-real-10">, Group<gfortran_Group>;
defm real_8_real_16 : BooleanFFlag<"real-8-real-16">, Group<gfortran_Group>;
defm real_8_real_4 : BooleanFFlag<"real-8-real-4">, Group<gfortran_Group>;
defm recursive : BooleanFFlag<"recursive">, Group<gfortran_Group>;
-defm repack_arrays : BooleanFFlag<"repack-arrays">, Group<gfortran_Group>;
defm second_underscore : BooleanFFlag<"second-underscore">, Group<gfortran_Group>;
defm sign_zero : BooleanFFlag<"sign-zero">, Group<gfortran_Group>;
defm whole_file : BooleanFFlag<"whole-file">, Group<gfortran_Group>;
@@ -6961,6 +6960,51 @@ defm unsigned : OptInFC1FFlag<"unsigned", "Enables UNSIGNED type">;
def fno_automatic : Flag<["-"], "fno-automatic">, Group<f_Group>,
HelpText<"Implies the SAVE attribute for non-automatic local objects in subprograms unless RECURSIVE">;
+defm repack_arrays
+ : BoolOptionWithoutMarshalling<
+ "f", "repack-arrays", PosFlag<SetTrue, [], [], "Pack">,
+ NegFlag<SetFalse, [], [], "Do not pack">,
+ BothFlags<[], [],
+ " non-contiguous assumed shape dummy arrays into "
+ "contiguous memory">>,
+ DocBrief<[{Create temporary copies of non-contiguous assumed shape dummy
+arrays in subprogram prologues, and destroy them in subprotram epilogues.
+The temporary copy is initialized with values from the original array
+in the prologue, if needed. In the epilogue, the current values
+in the temporary array are copied into the original array, if needed.
+
+Accessing the contiguous temporary in the program code may result
+in faster execution comparing to accessing elements of the original array,
+when they are sparse in memory. At the same time, the overhead
+of copying values between the original and the temporary arrays
+may be significant, which may slow down some programs.
+
+Enabling array repacking may also change the behavior of certain
+programs:
+* The copy actions may introduce a data race in valid OpenACC/OpenMP programs.
+ For example, if different threads execute the same subprogram
+ with a non-contiguous assumed shape dummy array, and the different threads
+ access unrelated parts of the array, then the whole array copy
+ made in each thread will cause a data race.
+* OpenACC/OpenMP offload programs may behave incorrectly with regards
+ to the device data environment, due to the fact that the original
+ array and the temporary may have different presence status on the device.
+* ``IS_CONTIGUOUS`` intrinsic may return ``TRUE`` with the array repacking
+ enabled, whereas if would return ``FALSE`` with the repacking disabled.
+* The result of ``LOC`` intrinsic applied to an actual argument associated
+ with a non-contiguous assumed shape dummy array, may be different
+ from the result of ``LOC`` applied to the dummy array.}]>;
+
+def frepack_arrays_contiguity_EQ
+ : Joined<["-"], "frepack-arrays-contiguity=">,
+ Group<f_Group>,
+ Values<"whole,innermost">,
+ HelpText<
+ "When -frepack-arrays is in effect, 'whole' enables "
+ "repacking for arrays that are non-contiguous in any dimension, "
+ "'innermost' enables repacking for arrays that are non-contiguous "
+ "in the innermost dimension (the default)">;
+
defm save_main_program : BoolOptionWithoutMarshalling<"f", "save-main-program",
PosFlag<SetTrue, [], [],
"Place all main program variables in static memory (otherwise scalars may be placed on the stack)">,
@@ -6974,6 +7018,22 @@ defm loop_versioning : BoolOptionWithoutMarshalling<"f", "version-loops-for-stri
PosFlag<SetTrue, [], [ClangOption], "Create unit-strided versions of loops">,
NegFlag<SetFalse, [], [ClangOption], "Do not create unit-strided loops (default)">>;
+defm stack_repack_arrays
+ : BoolOptionWithoutMarshalling<
+ "f", "stack-repack-arrays",
+ PosFlag<SetTrue, [], [],
+ "Attempt to allocate array temporaries created under "
+ "-frepack-arrays on the stack">,
+ NegFlag<
+ SetFalse, [], [],
+ "Allocate -frepack-arrays temporaries on the heap (default)">>,
+ DocBrief<[{Controls whether the array temporaries created under
+**-frepack-arrays** are allocated on the stack or on the heap.
+
+By default, the heap is used. Allocations of the polymorphic types
+are always done on the heap, though this may change in future releases.
+ }]>;
+
def fhermetic_module_files : Flag<["-"], "fhermetic-module-files">, Group<f_Group>,
HelpText<"Emit hermetic module files (no nested USE association)">;
} // let Visibility = [FC1Option, FlangOption]
diff --git a/clang/lib/Driver/ToolChains/Flang.cpp b/clang/lib/Driver/ToolChains/Flang.cpp
index a44513a83a2d7..ef451eb618a05 100644
--- a/clang/lib/Driver/ToolChains/Flang.cpp
+++ b/clang/lib/Driver/ToolChains/Flang.cpp
@@ -157,15 +157,25 @@ void Flang::addCodegenOptions(const ArgList &Args,
if (shouldLoopVersion(Args))
CmdArgs.push_back("-fversion-loops-for-stride");
- Args.addAllArgs(CmdArgs,
- {options::OPT_flang_experimental_hlfir,
- options::OPT_flang_deprecated_no_hlfir,
- options::OPT_fno_ppc_native_vec_elem_order,
- options::OPT_fppc_native_vec_elem_order,
- options::OPT_finit_global_zero,
- options::OPT_fno_init_global_zero, options::OPT_ftime_report,
- options::OPT_ftime_report_EQ, options::OPT_funroll_loops,
- options::OPT_fno_unroll_loops});
+ for (const auto &arg :
+ Args.getAllArgValues(options::OPT_frepack_arrays_contiguity_EQ))
+ if (arg.compare("whole") != 0 && arg.compare("innermost") != 0) {
+ getToolChain().getDriver().Diag(diag::err_drv_unsupported_option_argument)
+ << "-frepack-arrays-contiguity=" << arg;
+ }
+
+ Args.addAllArgs(
+ CmdArgs,
+ {options::OPT_flang_experimental_hlfir,
+ options::OPT_flang_deprecated_no_hlfir,
+ options::OPT_fno_ppc_native_vec_elem_order,
+ options::OPT_fppc_native_vec_elem_order, options::OPT_finit_global_zero,
+ options::OPT_fno_init_global_zero, options::OPT_frepack_arrays,
+ options::OPT_fno_repack_arrays,
+ options::OPT_frepack_arrays_contiguity_EQ,
+ options::OPT_fstack_repack_arrays, options::OPT_fno_stack_repack_arrays,
+ options::OPT_ftime_report, options::OPT_ftime_report_EQ,
+ options::OPT_funroll_loops, options::OPT_fno_unroll_loops});
}
void Flang::addPicOptions(const ArgList &Args, ArgStringList &CmdArgs) const {
diff --git a/flang/docs/ArrayRepacking.md b/flang/docs/ArrayRepacking.md
index 87cfc5d1bb4bc..7de599f293e40 100755
--- a/flang/docs/ArrayRepacking.md
+++ b/flang/docs/ArrayRepacking.md
@@ -39,13 +39,13 @@ Having these results it seems reasonable to provide support for arrays repacking
#### Facts and guesses about the implementation
-The dynamic checks for continuity and the array copy code is located completely in the [runtime](https://github.com/gcc-mirror/gcc/blob/3e08a4ecea27c54fda90e8f58641b1986ad957e1/libgfortran/generated/in_pack_r8.c#L35), so the compiler inserts unconditional calls in the subprogram prologue/epilogue.
+The dynamic checks for contiguity and the array copy code is located completely in the [runtime](https://github.com/gcc-mirror/gcc/blob/3e08a4ecea27c54fda90e8f58641b1986ad957e1/libgfortran/generated/in_pack_r8.c#L35), so the compiler inserts unconditional calls in the subprogram prologue/epilogue.
It looks like `gfortran` ignores `intent(out)/intent(in)` which could have helped to avoid some of the `pack/unpack` overhead.
It looks like the `pack`/`unpack` actions are inserted early in the compilation pipeline, and these extra calls affect behavior of the later optimization passes. For example, `Polyhedron/fatigue2` slows down by about 2x with `-frepack-arrays`: this slowdown is not caused by the `pack`/`unpack` overhead, but is a consequence of worse function inlining decisions made after the calls insertion. The benchmarks becomes even faster than the original version with `-frepack-arrays` and proper `-finline-limit=` settings, but it does not look like the benchmark contains code that would benefit from the array repacking.
-It does not look like `gfortran` is able to eliminate the `pack`/`unpack` code after the function inlining, if the actual argument is statically known to be contiguous. So the overhead from the dynamic continuity checks is inevitable when `-frepack-arrays` is specified.
+It does not look like `gfortran` is able to eliminate the `pack`/`unpack` code after the function inlining, if the actual argument is statically known to be contiguous. So the overhead from the dynamic contiguity checks is inevitable when `-frepack-arrays` is specified.
It does not look like `gfortran` tries to optimize the insertion of `pack`/`unpack` code. For example, if a dummy array is only used under a condition within the subprogram, the repacking code might be inserted under the same condition to minimize the overhead on the unconditional path through the subprogram.
@@ -59,7 +59,7 @@ It does not look like `gfortran` tries to optimize the insertion of `pack`/`unpa
#### Facts and guesses about the implementation
-The `pack` code is only generated if the actual argument may be non-contiguous in the innermost dimension, as determined statically, i.e. the compiler does not generate any dynamic continuity checks. For example:
+The `pack` code is only generated if the actual argument may be non-contiguous in the innermost dimension, as determined statically, i.e. the compiler does not generate any dynamic contiguity checks. For example:
```Fortran
interface
@@ -132,8 +132,8 @@ So it does not seem practical/reasonable to enable the array repacking by defaul
### Performance
1. Minimize the overhead of array repacking, e.g. avoid copy-in/out whenever possible, execute copy-in/out only on the execution paths where the array is accessed.
-2. Provide different modes of repacking depending on the "continuity" meaning, i.e. one - array is contiguous in the innermost dimension, two - array is contiguous in all dimensions.
-3. Avoid generating repacking code, when the "continuity" can be statically proven (including after optimization passes like constant propagation, function inlining, etc.).
+2. Provide different modes of repacking depending on the "contiguity" meaning, i.e. one - array is contiguous in the innermost dimension, two - array is contiguous in all dimensions.
+3. Avoid generating repacking code, when the "contiguity" can be statically proven (including after optimization passes like constant propagation, function inlining, etc.).
4. Use a set of heuristics to avoid generating repacking code based on the array usage pattern, e.g. if an array is proven not to be used in an array expression or a loop, etc.
5. Use a set of heuristics to avoid repacking actions dynamically, e.g. based on the array size, element size, byte stride(s) of the [innermost] dimension(s), etc.
6. Minimize the impact of the IR changes, introduced by repacking, on the later optimization passes.
@@ -156,7 +156,7 @@ Controlled by cli options, Lowering will generate a `fir.pack_array` operation i
The new operations will hold all the information that customizes further handling of the `pack`/`unpack` actions, such as:
* Optional array of attributes supporting an interface to generate a predicate that says if the repacking is safe in the current context.
-* The continuity mode: `innermost` vs `whole`.
+* The contiguity mode: `innermost` vs `whole`.
* Attributes selecting the heuristics (both compiler and runtime ones) that may be applied to avoid `pack`/`unpack` actions.
* Other attributes, like `stack` vs `heap` to manage the temporary allocation according to `-fstack-arrays`, etc.
@@ -195,7 +195,7 @@ The operation creates a new `!fir.box/class<!fir.array<>>` value to represent ei
Arguments:
* `stack` - indicates if `-fstack-arrays` is in effect for compiling this function.
-* `innermost` - tells that the repacking has to be done iff the array is not contiguous in the innermost dimension. This also describes what type of continuity can be expected from `%new_var`, i.e. `innermost` means that the resulting array is definitely contiguous in the innermost dimension, but may be non-contiguous in other dimensions (unless additional analysis proves otherwise). For 1-D arrays, `innermost` attribute is not valid.
+* `innermost` - tells that the repacking has to be done iff the array is not contiguous in the innermost dimension. This also describes what type of contiguity can be expected from `%new_var`, i.e. `innermost` means that the resulting array is definitely contiguous in the innermost dimension, but may be non-contiguous in other dimensions (unless additional analysis proves otherwise). For 1-D arrays, `innermost` attribute is not valid.
* `no_copy` - indicates that, in case a temporary array is created, `%var` to `%new_var` copy is not required (`intent(out)` dummy argument case).
* `heuristics`
* `loop-only` - `fir.pack_array` can be optimized away, if the array is not used in a loop.
@@ -351,7 +351,7 @@ The `fir.pack_array`'s copy-in action cannot be skipped for `INTENT(OUT)` dummy
#### Optional behavior
-In case of the `whole` continuity mode or with 1-D array, Flang can propagate this information to `hlfir.declare` - this may improve optimizations down the road. This can be done iff the repacking has no dynamic constraints and/or heuristics. For example:
+In case of the `whole` contiguity mode or with 1-D array, Flang can propagate this information to `hlfir.declare` - this may improve optimizations down the road. This can be done iff the repacking has no dynamic constraints and/or heuristics. For example:
```
%c0 = arith.constant 0 : index
@@ -441,10 +441,11 @@ In cases where `fir.pack_array` is statically known to produce a copy that is co
The following user options are proposed:
* `-frepack-arrays` - the option forces Flang to repack a non-contiguous assumed-shape dummy array into a temporary contiguous memory, which may result in faster accesses of the array. The compiler will insert special code in subprogram prologue to allocate a temporary array and copy the original array into the temporary; in subprogram epilogue, it will insert a copy from the temporary array into the original array and deallocate the temporary. The overhead of the allocation/deallocation and the copies may be significant depending on the array size. The compiler will try to optimize the unnecessary/unprofitable repacking.
+* `-fstack-repack-arrays` - attempt allocating the temporary arrays in stack memory. By default, they are allocated in heap memory (note that `-fstack-arrays` does not affect the allocation of the temporaries created for the arrays repacking).
* `-frepack-arrays-opts=[none|loop-only]` - the option enables optimizations that may eliminate the array repacking code depending on the array usage pattern:
* `none` - no optimizations.
* `loop-only` - the array repacking code will be removed in any subprogram where the array is not used inside a loop or an array expression.
-* `-frepack-arrays-continuity=[whole|innermost]`:
+* `-frepack-arrays-contiguity=[whole|innermost]`:
* `whole` - the option will repack arrays that are non-contiguous in any dimension (default).
* `innermost` - the option will repack arrays that are non-contiguous in the innermost dimension.
* `-frepack-arrays-max-size=<int>` - arrays bigger than the specified size will not be repacked.
diff --git a/flang/include/flang/Lower/LoweringOptions.def b/flang/include/flang/Lower/LoweringOptions.def
index 6735bea551414..d98823a0e3341 100644
--- a/flang/include/flang/Lower/LoweringOptions.def
+++ b/flang/include/flang/Lower/LoweringOptions.def
@@ -56,6 +56,11 @@ ENUM_LOWERINGOPT(StackArrays, unsigned, 1, 0)
/// packed into contiguous memory.
ENUM_LOWERINGOPT(RepackArrays, unsigned, 1, 0)
+/// If true, the temporary arrays created under RepackArrays
+/// control will be allocated in stack memory. If false,
+/// they will be allocated in heap memory.
+ENUM_LOWERINGOPT(StackRepackArrays, unsigned, 1, 0)
+
/// If true, the repacking (RepackArrays option above)
/// will be done for arrays non-contiguous in any dimension,
/// otherwise, it will be done only for arrays non-contiguous
diff --git a/flang/lib/Frontend/CompilerInvocation.cpp b/flang/lib/Frontend/CompilerInvocation.cpp
index 229695b18d278..4c3b63f07b190 100644
--- a/flang/lib/Frontend/CompilerInvocation.cpp
+++ b/flang/lib/Frontend/CompilerInvocation.cpp
@@ -1448,6 +1448,19 @@ bool CompilerInvocation::createFromArgs(
clang::driver::options::OPT_fno_realloc_lhs, true))
invoc.loweringOpts.setReallocateLHS(false);
+ invoc.loweringOpts.setRepackArrays(
+ args.hasFlag(clang::driver::options::OPT_frepack_arrays,
+ clang::driver::options::OPT_fno_repack_arrays,
+ /*default=*/false));
+ invoc.loweringOpts.setStackRepackArrays(
+ args.hasFlag(clang::driver::options::OPT_fstack_repack_arrays,
+ clang::driver::options::OPT_fno_stack_repack_arrays,
+ /*default=*/false));
+ if (auto *arg = args.getLastArg(
+ clang::driver::options::OPT_frepack_arrays_contiguity_EQ))
+ invoc.loweringOpts.setRepackArraysWhole(arg->getValue() ==
+ llvm::StringRef{"whole"});
+
success &= parseFrontendArgs(invoc.getFrontendOpts(), args, diags);
parseTargetArgs(invoc.getTargetOpts(), args);
parsePreprocessorArgs(invoc.getPreprocessorOpts(), args);
@@ -1687,6 +1700,7 @@ void CompilerInvocation::setLoweringOptions() {
const Fortran::common::LangOptions &langOptions = getLangOpts();
loweringOpts.setIntegerWrapAround(langOptions.getSignedOverflowBehavior() ==
Fortran::common::LangOptions::SOB_Defined);
+ loweringOpts.setStackArrays(codegenOpts.StackArrays);
Fortran::common::MathOptionsBase &mathOpts = loweringOpts.getMathOptions();
// TODO: when LangOptions are finalized, we can represent
// the math related options using Fortran::commmon::MathOptionsBase,
diff --git a/flang/lib/Lower/ConvertVariable.cpp b/flang/lib/Lower/ConvertVariable.cpp
index 0b22b743edee9..366ff328bfa27 100644
--- a/flang/lib/Lower/ConvertVariable.cpp
+++ b/flang/lib/Lower/ConvertVariable.cpp
@@ -2630,7 +2630,7 @@ Fortran::lower::genPackArray(Fortran::lower::AbstractConverter &converter,
});
fir::FirOpBuilder &builder = converter.getFirOpBuilder();
const mlir::Location loc = genLocation(converter, sym);
- bool stackAlloc = opts.getStackArrays();
+ bool stackAlloc = opts.getStackRepackArrays();
// 1D arrays must always use 'whole' mode.
bool isInnermostMode = !opts.getRepackArraysWhole() && sym.Rank() > 1;
// Avoid copy-in for 'intent(out)' variable, unless this is a dummy
diff --git a/flang/test/Driver/frepack-arrays-contiguity.f90 b/flang/test/Driver/frepack-arrays-contiguity.f90
new file mode 100644
index 0000000000000..d642cdac598af
--- /dev/null
+++ b/flang/test/Driver/frepack-arrays-contiguity.f90
@@ -0,0 +1,27 @@
+! Test forwarding just the forwarding of -frepack-arrays-contiguity options:
+! RUN: %flang -frepack-arrays-contiguity=whole %s -### -fsyntax-only 2>&1 | FileCheck --check-prefix=WHOLECMD %s
+! RUN: %flang -frepack-arrays-contiguity=innermost %s -### -fsyntax-only 2>&1 | FileCheck --check-prefix=INNERMOSTCMD %s
+! RUN: %flang -frepack-arrays-contiguity=innermost -frepack-arrays-contiguity=whole %s -### -fsyntax-only 2>&1 | FileCheck --check-prefix=WHOLECMD %s
+! RUN: %flang -frepack-arrays-contiguity=whole -frepack-arrays-contiguity=innermost %s -### -fsyntax-only 2>&1 | FileCheck --check-prefix=INNERMOSTCMD %s
+
+! Test proper setting of the lowering options:
+! RUN: %flang_fc1 -frepack-arrays -frepack-arrays-contiguity=whole %s -emit-hlfir -o - | FileCheck --check-prefix=WHOLE %s
+! RUN: %flang_fc1 -frepack-arrays-contiguity=whole %s -emit-hlfir -o - | FileCheck --check-prefix=NOREPACK %s
+! RUN: %flang_fc1 -frepack-arrays -frepack-arrays-contiguity=innermost %s -emit-hlfir -o - | FileCheck --check-prefix=INNERMOST %s
+! RUN: %flang_fc1 -frepack-arrays-contiguity=innermost %s -emit-hlfir -o - | FileCheck --check-prefix=NOREPACK %s
+
+! Default setting is 'innermost':
+! RUN: %flang_fc1 -frepack-arrays %s -emit-hlfir -o - | FileCheck --check-prefix=INNERMOST %s
+
+! WHOLECMD: "-fc1"{{.*}}"-frepack-arrays-contiguity=whole"
+! INNERMOSTCMD: "-fc1"{{.*}}"-frepack-arrays-contiguity=innermost"
+
+subroutine test(x)
+ real :: x(:,:)
+ ! WHOLE: fir.pack_array{{.*}}whole
+ ! WHOLE: fir.unpack_array
+ ! INERMOST: fir.pack_array{{.*}}innermost
+ ! INNERMOST: fir.unpack_array
+ ! NOREPACK-NOT: fir.pack_array
+ ! NOREPACK-NOT: fir.unpack_array
+end subroutine
diff --git a/flang/test/Driver/frepack-arrays.f90 b/flang/test/Driver/frepack-arrays.f90
new file mode 100644
index 0000000000000..ad96a14f74378
--- /dev/null
+++ b/flang/test/Driver/frepack-arrays.f90
@@ -0,0 +1,24 @@
+! Test forwarding just the forwarding of -f[no-]repack-arrays options:
+! RUN: %flang -frepack-arrays %s -### -fsyntax-only 2>&1 | FileCheck --check-prefix=REPACKCMD %s
+! RUN: %flang -fno-repack-arrays %s -### -fsyntax-only 2>&1 | FileCheck --check-prefix=NOREPACKCMD %s
+! RUN: %flang -frepack-arrays -fno-repack-arrays %s -### -fsyntax-only 2>&1 | FileCheck --check-prefix=NOREPACKCMD %s
+! RUN: %flang -fno-repack-arrays -frepack-arrays %s -### -fsyntax-only 2>&1 | FileCheck --check-prefix=REPACKCMD %s
+
+! Test proper setting of the lowering options:
+! RUN: %flang_fc1 -frepack-arrays %s -emit-hlfir -o - | FileCheck --check-prefix=REPACK %s
+! RUN: %flang_fc1 -fno-repack-arrays %s -emit-hlfir -o - | FileCheck --check-prefix=NOREPACK %s
+! RUN: %flang_fc1 -frepack-arrays -fno-repack-arrays %s -emit-hlfir -o - | FileCheck --check-prefix=NOREPACK %s
+! RUN: %flang_fc1 -fno-repack-arrays -frepack-arrays %s -emit-hlfir -o - | FileCheck --check-prefix=REPACK %s
+
+! REPACKCMD: "-fc1"{{.*}}"-frepack-arrays"
+! REPACKCMD-NOT: -fno-repack-arrays
+! NOREPACKCMD: "-fc1"{{.*}}"-fno-repack-arrays"
+! NOREPACKCMD-NOT: -frepack-arrays
+
+subroutine test(x)
+ real :: x(:)
+ ! REPACK: fir.pack_array
+ ! REPACK: fir.unpack_array
+ ! NOREPACK-NOT: fir.pack_array
+ ! NOREPACK-NOT: fir.unpack_array
+end subroutine
diff --git a/flang/test/Driver/fstack-repack-arrays.f90 b/flang/test/Driver/fstack-repack-arrays.f90
new file mode 100644
index 0000000000000..4fb5aa0c18032
--- /dev/null
+++ b/flang/test/Driver/fstack-repack-arrays.f90
@@ -0,0 +1,24 @@
+! Test forwarding just the forwarding of -f[no-]stack-repack-arrays options:
+! RUN: %flang -fstack-repack-arrays %s -### -fsyntax-only 2>&1 | FileCheck --check-prefix=STACKCMD %s
+! RUN: %flang -fno-stack-repack-arrays %s -### -fsyntax-only 2>&1 | FileCheck --check-prefix=HEAPCMD %s
+! RUN: %flang -fstack-repack-arrays -fno-stack-repack-arrays %s -### -fsyntax-only 2>&1 | FileCheck --check-prefix=HEAPCMD %s
+! RUN: %flang -fno-stack-repack-arrays -fstack-arrays %s -### -fsyntax-only 2>&1 | FileCheck --check-prefix=HEAPCMD %s
+! RUN: %flang -fno-stack-repack-arrays -fstack-repack-arrays %s -### -fsyntax-only 2>&1 | FileCheck --check-prefix=STACKCMD %s
+
+! Test proper setting of the lowering options:
+! RUN: %flang_fc1 -frepack-arrays -fstack-repack-arrays %s -emit-hlfir -o - | FileCheck --check-prefix=STACK %s
+! RUN: %flang_fc1 -frepack-arrays -fno-stack-repack-arrays %s -emit-hlfir -o - | FileCheck --check-prefix=HEAP %s
+
+! Default setting is 'heap':
+! RUN: %flang_fc1 -frepack-arrays %s -emit-hlfir -o - | FileCheck --check-prefix=HEAP %s
+
+! STACKCMD: "-fc1"{{.*}}"-fstack-repack-arrays"
+! HEAPCMD: "-fc1"{{.*}}"-fno-stack-repack-arrays"
+
+subroutine test(x)
+ real :: x(:,:)
+ ! STACK: fir.pack_array{{.*}}stack
+ ! STACK: fir.unpack_array{{.*}}stack
+ ! HEAP: fir.pack_array{{.*}}heap
+ ! HEAP: fir.unpack_array{{.*}}heap
+end subroutine
diff --git a/flang/test/Lower/repack-arrays.f90 b/flang/test/Lower/repack-arrays.f90
index 19ea93a3521a3..ff89df82793a3 100644
--- a/flang/test/Lower/repack-arrays.f90
+++ b/flang/test/Lower/repack-arrays.f90
@@ -1,7 +1,7 @@
-! RUN: bbc -emit-hlfir -frepack-arrays -fstack-arrays -frepack-arrays-continuity-whole %s -o - -I nowhere | FileCheck --check-prefixes=ALL,STACK,WHOLE %s
-! RUN: bbc -emit-hlfir -frepack-arrays -fstack-arrays=false -frepack-arrays-continuity-whole %s -o - -I nowhere | FileCheck --check-prefixes=ALL,HEAP,WHOLE %s
-! RUN: bbc -emit-hlfir -frepack-arrays -fstack-arrays -frepack-arrays-continuity-whole=false %s -o - -I nowhere | FileCheck --check-prefixes=ALL,STACK,INNER %s
-! RUN: bbc -emit-hlfir -frepack-arrays -fstack-arrays=false -frepack-arrays-continuity-whole=false %s -o - -I nowhere | FileCheck --check-prefixes=ALL,HEAP,INNER %s
+! RUN: bbc -emit-hlfir -frepack-arrays -fstack-repack-arrays -frepack-arrays-continuity-whole %s -o - -I nowhere | FileCheck --check-prefixes=ALL,STACK,WHOLE %s
+! RUN: bbc -emit-hlfir -frepack-arrays -fstack-repack-arrays=false -frepack-arrays-continuity-whole %s -o - -I nowhere | FileCheck --check-prefixes=ALL,HEAP,WHOLE %s
+! RUN: bbc -emit-hlfir -frepack-arrays -fstack-repack-arrays -frepack-arrays-continuity-whole=false %s -o - -I nowhere | FileCheck --check-prefixes=ALL,STACK,INNER %s
+! RUN: bbc -emit-hlfir -frepack-arrays -fstack-repack-arrays=false -frepack-arrays-continuity-whole=false %s -o - -I nowhere | FileCheck --check-prefixes=ALL,HEAP,INNER %s
! ALL-LABEL: func.func @_QPtest1(
! ALL-SAME: %[[VAL_0:[0-9]+|[a-zA-Z$._-][a-zA-Z0-9$._-]*]]: !fir.box<!fir.array<?xf32>> {fir.bindc_name = "x"}) {
diff --git a/flang/tools/bbc/bbc.cpp b/flang/tools/bbc/bbc.cpp
index 2cc75b7aa4e87..4cf9074fc93c4 100644
--- a/flang/tools/bbc/bbc.cpp
+++ b/flang/tools/bbc/bbc.cpp
@@ -245,15 +245,20 @@ static llvm::cl::opt<bool>
"the LHS of the intrinsic assignment"),
llvm::cl::init(true));
-// TODO: -fstack-arrays is currently only used for fir.pack_array,
-// but it should probably be used for deciding how arrays/temporaries
-// are allocated during lowering.
+// TODO: -fstack-arrays is currently unused, but it should probably be used
+// for deciding how arrays/temporaries are allocated during lowering.
static llvm::cl::opt<bool>
stackArrays("fstack-arrays",
llvm::cl::desc("Allocate all arrays of unknown size and "
"temporary arrays in stack memory"),
llvm::cl::init(false));
+static llvm::cl::opt<bool> stackRepackArrays(
+ "fstack-repack-arrays",
+ llvm::cl::desc("Allocate temporary arrays for -frepack-arrays "
+ "in stack memory"),
+ llvm::cl::init(false));
+
static llvm::cl::opt<bool>
repackArrays("frepack-arrays",
llvm::cl::desc("Pack non-contiguous assummed shape arrays "
@@ -412,6 +417,7 @@ static llvm::LogicalResult convertFortranSourceToMLIR(
loweringOptions.setInitGlobalZero(initGlobalZero);
loweringOptions.setReallocateLHS(reallocateLHS);
loweringOptions.setStackArrays(stackArrays);
+ loweringOptions.setStackRepackArrays(stackRepackArrays);
loweringOptions.setRepackArrays(repackArrays);
loweringOptions.setRepackArraysWhole(repackArraysWhole);
std::vector<Fortran::lower::EnvironmentDefault> envDefaults = {};
More information about the flang-commits
mailing list