[flang-commits] [flang] [flang] Relax conflicts detection in ElementalAssignBufferization. (PR #143045)

Slava Zakharin via flang-commits flang-commits at lists.llvm.org
Thu Jun 5 16:10:11 PDT 2025


https://github.com/vzakhari created https://github.com/llvm/llvm-project/pull/143045

If there is a read-effect operation inside `hlfir.elemental`,
there is no reason to block moving it to the assignment point
unless there are write-effect operations between the elemental
and the assignment. The previous code was disallowing the optimization
even if there were only read-effect operations in between.

This case is from 465.tonto, though this change does not improve
performance at all.


>From cca08523984f96b05ba6f2317d8157f0cb82b339 Mon Sep 17 00:00:00 2001
From: Slava Zakharin <szakharin at nvidia.com>
Date: Thu, 5 Jun 2025 15:33:59 -0700
Subject: [PATCH] [flang] Relax conflicts detection in
 ElementalAssignBufferization.

If there is a read-effect operation inside `hlfir.elemental`,
there is no reason to block moving it to the assignment point
unless there are write-effect operations between the elemental
and the assignment. The previous code was disallowing the optimization
even if there were only read-effect operations in between.

This case is from 465.tonto, though this change does not improve
performance at all.
---
 .../Transforms/OptimizedBufferization.cpp     | 30 +++++++---
 flang/test/HLFIR/opt-bufferization-tonto.fir  | 59 +++++++++++++++++++
 2 files changed, 80 insertions(+), 9 deletions(-)
 create mode 100644 flang/test/HLFIR/opt-bufferization-tonto.fir

diff --git a/flang/lib/Optimizer/HLFIR/Transforms/OptimizedBufferization.cpp b/flang/lib/Optimizer/HLFIR/Transforms/OptimizedBufferization.cpp
index e2ca754a1817a..1af6e014a8187 100644
--- a/flang/lib/Optimizer/HLFIR/Transforms/OptimizedBufferization.cpp
+++ b/flang/lib/Optimizer/HLFIR/Transforms/OptimizedBufferization.cpp
@@ -572,13 +572,11 @@ ElementalAssignBufferization::findMatch(hlfir::ElementalOp elemental) {
   // hlfir.assign, check there are no effects which make this unsafe
 
   // keep track of any values written to in the elemental, as these can't be
-  // read from between the elemental and the assignment
+  // read from or written to between the elemental and the assignment
+  mlir::SmallVector<mlir::Value, 1> notToBeAccessedBeforeAssign;
   // likewise, values read in the elemental cannot be written to between the
   // elemental and the assign
-  mlir::SmallVector<mlir::Value, 1> notToBeAccessedBeforeAssign;
-  // any accesses to the array between the array and the assignment means it
-  // would be unsafe to move the elemental to the assignment
-  notToBeAccessedBeforeAssign.push_back(match.array);
+  mlir::SmallVector<mlir::Value, 1> notToBeWrittenBeforeAssign;
 
   // 1) side effects in the elemental body - it isn't sufficient to just look
   // for ordered elementals because we also cannot support out of order reads
@@ -593,10 +591,12 @@ ElementalAssignBufferization::findMatch(hlfir::ElementalOp elemental) {
   for (const mlir::MemoryEffects::EffectInstance &effect : *effects) {
     mlir::AliasResult res = containsReadOrWriteEffectOn(effect, match.array);
     if (res.isNo()) {
-      if (mlir::isa<mlir::MemoryEffects::Write, mlir::MemoryEffects::Read>(
-              effect.getEffect()))
-        if (effect.getValue())
+      if (effect.getValue()) {
+        if (mlir::isa<mlir::MemoryEffects::Write>(effect.getEffect()))
           notToBeAccessedBeforeAssign.push_back(effect.getValue());
+        else if (mlir::isa<mlir::MemoryEffects::Read>(effect.getEffect()))
+          notToBeWrittenBeforeAssign.push_back(effect.getValue());
+      }
 
       // this is safe in the elemental
       continue;
@@ -669,11 +669,23 @@ ElementalAssignBufferization::findMatch(hlfir::ElementalOp elemental) {
       mlir::AliasResult res = containsReadOrWriteEffectOn(effect, val);
       if (!res.isNo()) {
         LLVM_DEBUG(llvm::dbgs()
-                   << "diasllowed side-effect: " << effect.getValue() << " for "
+                   << "disallowed side-effect: " << effect.getValue() << " for "
                    << elemental.getLoc() << "\n");
         return std::nullopt;
       }
     }
+    // Anything that is read inside the elemental can only be safely read
+    // between the elemental and the assignment.
+    for (mlir::Value val : notToBeWrittenBeforeAssign) {
+      mlir::AliasResult res = containsReadOrWriteEffectOn(effect, val);
+      if (!res.isNo() &&
+          !mlir::isa<mlir::MemoryEffects::Read>(effect.getEffect())) {
+        LLVM_DEBUG(llvm::dbgs()
+                   << "disallowed non-read side-effect: " << effect.getValue()
+                   << " for " << elemental.getLoc() << "\n");
+        return std::nullopt;
+      }
+    }
   }
 
   return match;
diff --git a/flang/test/HLFIR/opt-bufferization-tonto.fir b/flang/test/HLFIR/opt-bufferization-tonto.fir
new file mode 100644
index 0000000000000..df30a90bc6ecf
--- /dev/null
+++ b/flang/test/HLFIR/opt-bufferization-tonto.fir
@@ -0,0 +1,59 @@
+// RUN: fir-opt --opt-bufferization %s | FileCheck %s
+
+// tonto case where optimized bufferization conservatively
+// did not pull elemental into the assignment due to
+// the designate op in between:
+// module test
+//   type my_type
+//      real, dimension(:,:), pointer :: p => null()
+//   end type my_type
+// contains
+//   subroutine assign1(self, i)
+//     type(my_type) :: self
+//     real, dimension(3) :: ct, q
+//     integer :: i
+//     self%p(:,i) = q - ct
+//   end subroutine assign1
+// end module test
+
+// CHECK-LABEL:   func.func @_QMtestPassign1(
+// CHECK-NOT: hlfir.elemental
+// CHECK-NOT: hlfir.assign{{.*}}array
+func.func @_QMtestPassign1(%arg0: !fir.ref<!fir.type<_QMtestTmy_type{p:!fir.box<!fir.ptr<!fir.array<?x?xf32>>>}>> {fir.bindc_name = "self"}, %arg1: !fir.ref<i32> {fir.bindc_name = "i"}) {
+  %c1 = arith.constant 1 : index
+  %c0 = arith.constant 0 : index
+  %c3 = arith.constant 3 : index
+  %0 = fir.dummy_scope : !fir.dscope
+  %1 = fir.alloca !fir.array<3xf32> {bindc_name = "ct", uniq_name = "_QMtestFassign1Ect"}
+  %2 = fir.shape %c3 : (index) -> !fir.shape<1>
+  %3:2 = hlfir.declare %1(%2) {uniq_name = "_QMtestFassign1Ect"} : (!fir.ref<!fir.array<3xf32>>, !fir.shape<1>) -> (!fir.ref<!fir.array<3xf32>>, !fir.ref<!fir.array<3xf32>>)
+  %4:2 = hlfir.declare %arg1 dummy_scope %0 {uniq_name = "_QMtestFassign1Ei"} : (!fir.ref<i32>, !fir.dscope) -> (!fir.ref<i32>, !fir.ref<i32>)
+  %5 = fir.alloca !fir.array<3xf32> {bindc_name = "q", uniq_name = "_QMtestFassign1Eq"}
+  %6:2 = hlfir.declare %5(%2) {uniq_name = "_QMtestFassign1Eq"} : (!fir.ref<!fir.array<3xf32>>, !fir.shape<1>) -> (!fir.ref<!fir.array<3xf32>>, !fir.ref<!fir.array<3xf32>>)
+  %7:2 = hlfir.declare %arg0 dummy_scope %0 {uniq_name = "_QMtestFassign1Eself"} : (!fir.ref<!fir.type<_QMtestTmy_type{p:!fir.box<!fir.ptr<!fir.array<?x?xf32>>>}>>, !fir.dscope) -> (!fir.ref<!fir.type<_QMtestTmy_type{p:!fir.box<!fir.ptr<!fir.array<?x?xf32>>>}>>, !fir.ref<!fir.type<_QMtestTmy_type{p:!fir.box<!fir.ptr<!fir.array<?x?xf32>>>}>>)
+  %8 = hlfir.elemental %2 unordered : (!fir.shape<1>) -> !hlfir.expr<3xf32> {
+  ^bb0(%arg2: index):
+    %22 = hlfir.designate %6#0 (%arg2)  : (!fir.ref<!fir.array<3xf32>>, index) -> !fir.ref<f32>
+    %23 = hlfir.designate %3#0 (%arg2)  : (!fir.ref<!fir.array<3xf32>>, index) -> !fir.ref<f32>
+    %24 = fir.load %22 : !fir.ref<f32>
+    %25 = fir.load %23 : !fir.ref<f32>
+    %26 = arith.subf %24, %25 fastmath<contract> : f32
+    hlfir.yield_element %26 : f32
+  }
+  %9 = hlfir.designate %7#0{"p"}   {fortran_attrs = #fir.var_attrs<pointer>} : (!fir.ref<!fir.type<_QMtestTmy_type{p:!fir.box<!fir.ptr<!fir.array<?x?xf32>>>}>>) -> !fir.ref<!fir.box<!fir.ptr<!fir.array<?x?xf32>>>>
+  %10 = fir.load %9 : !fir.ref<!fir.box<!fir.ptr<!fir.array<?x?xf32>>>>
+  %11:3 = fir.box_dims %10, %c0 : (!fir.box<!fir.ptr<!fir.array<?x?xf32>>>, index) -> (index, index, index)
+  %12 = arith.addi %11#0, %11#1 : index
+  %13 = arith.subi %12, %c1 : index
+  %14 = arith.subi %13, %11#0 : index
+  %15 = arith.addi %14, %c1 : index
+  %16 = arith.cmpi sgt, %15, %c0 : index
+  %17 = arith.select %16, %15, %c0 : index
+  %18 = fir.load %4#0 : !fir.ref<i32>
+  %19 = fir.convert %18 : (i32) -> i64
+  %20 = fir.shape %17 : (index) -> !fir.shape<1>
+  %21 = hlfir.designate %10 (%11#0:%13:%c1, %19)  shape %20 : (!fir.box<!fir.ptr<!fir.array<?x?xf32>>>, index, index, index, i64, !fir.shape<1>) -> !fir.box<!fir.array<?xf32>>
+  hlfir.assign %8 to %21 : !hlfir.expr<3xf32>, !fir.box<!fir.array<?xf32>>
+  hlfir.destroy %8 : !hlfir.expr<3xf32>
+  return
+}



More information about the flang-commits mailing list