[flang-commits] [flang] [flang][lowering] delay stack save/restor emission in elemental calls (PR #109142)

via flang-commits flang-commits at lists.llvm.org
Wed Sep 18 06:47:20 PDT 2024


https://github.com/jeanPerier created https://github.com/llvm/llvm-project/pull/109142

stack save/restore emitted for character elemental function result allocation inside hlfir.elemental in lowering created memory bugs because result memory is actually still used after the stack restore when lowering the elemental into a loop where the result element is copied into the array result storage.

Instead of adding special handling for stack save/restore in lowering, just avoid emitting those since the stack reclaim pass is able to emit them in the generated loop. Not having those stack save/restore will also help optimizations that want to elide the temporary allocation for the element result when that is possible.

>From d5435bde2823ba9c6bcd580b86916d38e9cd0dc4 Mon Sep 17 00:00:00 2001
From: Jean Perier <jperier at nvidia.com>
Date: Wed, 18 Sep 2024 06:39:44 -0700
Subject: [PATCH] [flang][lowering] delay stack save/restor emission in
 elemental calls

---
 flang/lib/Lower/ConvertCall.cpp               |  6 ++++-
 .../test/Lower/HLFIR/elemental-array-ops.f90  |  2 --
 .../elemental-user-procedure-stacksave.f90    | 22 +++++++++++++++++++
 3 files changed, 27 insertions(+), 3 deletions(-)
 create mode 100644 flang/test/Lower/HLFIR/elemental-user-procedure-stacksave.f90

diff --git a/flang/lib/Lower/ConvertCall.cpp b/flang/lib/Lower/ConvertCall.cpp
index 2fedc01bc77fc1..017bfd049d3dc5 100644
--- a/flang/lib/Lower/ConvertCall.cpp
+++ b/flang/lib/Lower/ConvertCall.cpp
@@ -366,7 +366,11 @@ std::pair<fir::ExtendedValue, bool> Fortran::lower::genCallOpAndResult(
       resultLengths = lengths;
     }
 
-    if (!extents.empty() || !lengths.empty()) {
+    if ((!extents.empty() || !lengths.empty()) && !isElemental) {
+      // Note: in the elemental context, the alloca ownership inside the
+      // elemental region is implicit, and later pass in lowering (stack
+      // reclaim) fir.do_loop will be in charge of emitting any stack
+      // save/restore if needed.
       auto *bldr = &converter.getFirOpBuilder();
       mlir::Value sp = bldr->genStackSave(loc);
       stmtCtx.attachCleanup(
diff --git a/flang/test/Lower/HLFIR/elemental-array-ops.f90 b/flang/test/Lower/HLFIR/elemental-array-ops.f90
index 9929c17ec33994..18e1fb0a787e73 100644
--- a/flang/test/Lower/HLFIR/elemental-array-ops.f90
+++ b/flang/test/Lower/HLFIR/elemental-array-ops.f90
@@ -182,12 +182,10 @@ end subroutine char_return
 ! CHECK:             %[[VAL_23:.*]] = arith.constant 0 : index
 ! CHECK:             %[[VAL_24:.*]] = arith.cmpi sgt, %[[VAL_22]], %[[VAL_23]] : index
 ! CHECK:             %[[VAL_25:.*]] = arith.select %[[VAL_24]], %[[VAL_22]], %[[VAL_23]] : index
-! CHECK:             %[[VAL_26:.*]] = llvm.intr.stacksave : !llvm.ptr
 ! CHECK:             %[[VAL_27:.*]] = fir.call @_QPcallee(%[[VAL_2]], %[[VAL_25]], %[[VAL_20]]) fastmath<contract> : (!fir.ref<!fir.char<1,3>>, index, !fir.boxchar<1>) -> !fir.boxchar<1>
 ! CHECK:             %[[VAL_28:.*]]:2 = hlfir.declare %[[VAL_2]] typeparams %[[VAL_25]] {uniq_name = ".tmp.func_result"} : (!fir.ref<!fir.char<1,3>>, index) -> (!fir.ref<!fir.char<1,3>>, !fir.ref<!fir.char<1,3>>)
 ! CHECK:             %[[MustFree:.*]] = arith.constant false
 ! CHECK:             %[[ResultTemp:.*]] = hlfir.as_expr %[[VAL_28]]#0 move %[[MustFree]] : (!fir.ref<!fir.char<1,3>>, i1) -> !hlfir.expr<!fir.char<1,3>>
-! CHECK:             llvm.intr.stackrestore %[[VAL_26]] : !llvm.ptr
 ! CHECK:             hlfir.yield_element %[[ResultTemp]] : !hlfir.expr<!fir.char<1,3>>
 ! CHECK:           }
 ! CHECK:           %[[VAL_29:.*]] = arith.constant 0 : index
diff --git a/flang/test/Lower/HLFIR/elemental-user-procedure-stacksave.f90 b/flang/test/Lower/HLFIR/elemental-user-procedure-stacksave.f90
new file mode 100644
index 00000000000000..839342f2da6d85
--- /dev/null
+++ b/flang/test/Lower/HLFIR/elemental-user-procedure-stacksave.f90
@@ -0,0 +1,22 @@
+! Check that stack save and restore needed for elemental function result
+! allocation inside loops are not emitted directly in lowering, but inserted if
+! needed in the stack-reclaim pass.
+
+! RUN: %flang_fc1 -emit-hlfir %s -o - | FileCheck %s --check-prefix=CHECK-HLFIR
+! RUN: %flang_fc1 -emit-llvm %s -o - | FileCheck %s --check-prefix=CHECK-LLVM
+subroutine foo(c1, c2)
+  character(*), dimension(100) :: c1, c2
+  interface
+    elemental pure function func(c)
+      character(*), intent(in) :: c
+      character(len(c)) :: func
+    end function
+  end interface
+  c1 = func(c2)
+end subroutine
+
+! CHECK-HLFIR-NOT: stacksave
+! CHECK: return
+
+! CHECK-LLVM: stacksave
+! CHECK-LLVM: stackrestore



More information about the flang-commits mailing list