[flang-commits] [flang] [flang, openacc] Limit operations hoisting from acc.loop. (PR #177727)

Slava Zakharin via flang-commits flang-commits at lists.llvm.org
Fri Jan 23 18:58:02 PST 2026


https://github.com/vzakhari created https://github.com/llvm/llvm-project/pull/177727

This patch implements `OperationMoveOpInterface::canMoveOutOf()`
method for `acc.loop`, such that even Pure operations are not hoisted
by LICM if any of their operands are referenced in the data operands
of `acc.loop`. Related to #175108.


>From 3c5c9e31bf58ada23536b8fccece18f25aadd17b Mon Sep 17 00:00:00 2001
From: Slava Zakharin <szakharin at nvidia.com>
Date: Fri, 23 Jan 2026 18:00:07 -0800
Subject: [PATCH] [flang,openacc] Limit operations hoisting from acc.loop.

This patch implements `OperationMoveOpInterface::canMoveOutOf()`
method for `acc.loop`, such that even Pure operations are not hoisted
by LICM if any of their operands are referenced in the data operands
of `acc.loop`. Related to #175108.
---
 .../Support/FIROpenACCOpsInterfaces.cpp       | 21 ++++-
 flang/test/Transforms/licm.fir                | 88 +++++++++++++++++++
 2 files changed, 105 insertions(+), 4 deletions(-)

diff --git a/flang/lib/Optimizer/OpenACC/Support/FIROpenACCOpsInterfaces.cpp b/flang/lib/Optimizer/OpenACC/Support/FIROpenACCOpsInterfaces.cpp
index 4e48f16dd1144..55a5330dc9ba6 100644
--- a/flang/lib/Optimizer/OpenACC/Support/FIROpenACCOpsInterfaces.cpp
+++ b/flang/lib/Optimizer/OpenACC/Support/FIROpenACCOpsInterfaces.cpp
@@ -196,7 +196,7 @@ bool OperationMoveModel<mlir::acc::LoopOp>::canMoveFromDescendant(
 template <>
 bool OperationMoveModel<mlir::acc::LoopOp>::canMoveOutOf(
     mlir::Operation *op, mlir::Operation *candidate) const {
-  // TODO: disallow moving operations, which have operands that are referenced
+  // Disallow moving operations, which have operands that are referenced
   // in the data operands (e.g. in [first]private() etc.) of the acc.loop.
   // For example:
   //   %17 = acc.private var(%16 : !fir.box<!fir.array<?xf32>>)
@@ -205,9 +205,22 @@ bool OperationMoveModel<mlir::acc::LoopOp>::canMoveOutOf(
   //   }
   // We cannot hoist %19 without violating assumptions that OpenACC
   // transformations rely on.
-  //
-  // Always return false in the initial implementation.
-  return false;
+
+  // In general, some movement out of acc.loop is allowed,
+  // so return true if candidate is nullptr.
+  if (!candidate)
+    return true;
+
+  auto loopOp = mlir::cast<mlir::acc::LoopOp>(op);
+  unsigned numDataOperands = loopOp.getNumDataOperands();
+  for (unsigned i = 0; i < numDataOperands; ++i) {
+    mlir::Value dataOperand = loopOp.getDataOperand(i);
+    return !llvm::any_of(candidate->getOperands(),
+                         [&](mlir::Value candidateOperand) {
+                           return dataOperand == candidateOperand;
+                         });
+  }
+  return true;
 }
 
 } // namespace fir::acc
diff --git a/flang/test/Transforms/licm.fir b/flang/test/Transforms/licm.fir
index 56bf6fc1a2302..341fb9ddfade0 100644
--- a/flang/test/Transforms/licm.fir
+++ b/flang/test/Transforms/licm.fir
@@ -1660,6 +1660,7 @@ func.func @test_if_hoisting(%arg0: !fir.ref<!fir.array<?xi32>> {fir.bindc_name =
   return
 }
 
+// -----
 // Check that fir.box_addr applied to the private box is not hoisted
 // out of acc.loop. This breaks the assumptions taken by OpenACC
 // transformations that the results of acc.private operations,
@@ -1727,6 +1728,7 @@ func.func @_QPtest_acc_loop_with_private(%arg0: !fir.ref<f32> {fir.bindc_name =
   return
 }
 
+// -----
 // Test fir.slice and fir.rebox hoisting:
 // CHECK-LABEL:   func.func @_QPtest_slice_rebox_licm(
 // CHECK-SAME:      %[[ARG0:.*]]: !fir.box<!fir.array<?xf32>> {fir.bindc_name = "x"}) {
@@ -1774,6 +1776,7 @@ func.func @_QPtest_slice_rebox_licm(%arg0: !fir.box<!fir.array<?xf32>> {fir.bind
 }
 func.func private @takes_assumed_shape(!fir.box<!fir.array<?xf32>>)
 
+// -----
 // Test fir.shape and fir.embox hoisting:
 // CHECK-LABEL:   func.func @_QPtest_shape_embox_licm(
 // CHECK-SAME:      %[[ARG0:.*]]: !fir.ref<!fir.array<?xf32>> {fir.bindc_name = "x"}) {
@@ -1826,6 +1829,7 @@ func.func @_QPtest_shape_embox_licm(%arg0: !fir.ref<!fir.array<?xf32>> {fir.bind
   return
 }
 
+// -----
 // CHECK-LABEL:   func.func @test_shapeshift_licm(
 // CHECK-SAME:      %[[ARG0:.*]]: index) {
 // CHECK:           %[[CONSTANT_0:.*]] = arith.constant 1 : index
@@ -1845,6 +1849,7 @@ func.func @test_shapeshift_licm(%arg0 : index) {
 }
 func.func private @takes_shape_shift(!fir.shapeshift<1>)
 
+// -----
 // CHECK-LABEL:   func.func @test_shift_licm(
 // CHECK-SAME:      %[[ARG0:.*]]: index) {
 // CHECK:           %[[CONSTANT_0:.*]] = arith.constant 1 : index
@@ -1864,6 +1869,7 @@ func.func @test_shift_licm(%arg0 : index) {
 }
 func.func private @takes_shift(!fir.shift<1>)
 
+// -----
 // Check that fir.box_addr of !fir.boxproc is not speculated.
 // For some reason, we do not produce proper [hl]fir.declare
 // for optional dummy procedure pointers.
@@ -1891,3 +1897,85 @@ func.func @_QPtest_box_addr_proc(%arg0: !fir.ref<f32> {fir.bindc_name = "x"}, %a
   fir.store %5 to %2 : !fir.ref<i32>
   return
 }
+
+// -----
+// CHECK-LABEL: func.func @test_acc_loop_private_hoisting(
+// CHECK: %[[CLAUSE_VAL:.*]] = acc.private
+// CHECK-NOT: %[[CLAUSE_VAL]]
+// CHECK: acc.loop{{.*}}private(%[[CLAUSE_VAL]] : !fir.ref<f32>)
+func.func @test_acc_loop_private_hoisting() {
+  %cst = arith.constant 1.000000e+00 : f32
+  %c10_i32 = arith.constant 10 : i32
+  %c1_i32 = arith.constant 1 : i32
+  %0 = fir.dummy_scope : !fir.dscope
+  %1 = fir.alloca f32 {bindc_name = "b", uniq_name = "_QFtestEb"}
+  %2 = fir.declare %1 {uniq_name = "_QFtestEb"} : (!fir.ref<f32>) -> !fir.ref<f32>
+  acc.parallel combined(loop) {
+    %5 = acc.private varPtr(%2 : !fir.ref<f32>) recipe(@privatization_ref_f32) -> !fir.ref<f32> {name = "b"}
+    acc.loop combined(parallel) private(%5 : !fir.ref<f32>) control(%arg0 : i32) = (%c1_i32 : i32) to (%c10_i32 : i32)  step (%c1_i32 : i32) {
+      %cvt = fir.convert %5 : (!fir.ref<f32>) -> !fir.ref<f32>
+      %7 = fir.declare %cvt {uniq_name = "_QFtestEb"} : (!fir.ref<f32>) -> !fir.ref<f32>
+      fir.store %cst to %7 : !fir.ref<f32>
+      acc.yield
+    } attributes {inclusiveUpperbound = array<i1: true>, independent = [#acc.device_type<none>]}
+    acc.yield
+  }
+  return
+}
+
+// -----
+// CHECK-LABEL: func.func @test_acc_loop_firstprivate_hoisting(
+// CHECK: %[[CLAUSE_VAL:.*]] = acc.firstprivate
+// CHECK-NOT: %[[CLAUSE_VAL]]
+// CHECK: acc.loop{{.*}}firstprivate(%[[CLAUSE_VAL]] : !fir.ref<f32>)
+func.func @test_acc_loop_firstprivate_hoisting() {
+  %cst = arith.constant 1.000000e+00 : f32
+  %c10_i32 = arith.constant 10 : i32
+  %c1_i32 = arith.constant 1 : i32
+  %0 = fir.dummy_scope : !fir.dscope
+  %1 = fir.alloca f32 {bindc_name = "b", uniq_name = "_QFtestEb"}
+  %2 = fir.declare %1 {uniq_name = "_QFtestEb"} : (!fir.ref<f32>) -> !fir.ref<f32>
+  acc.parallel combined(loop) {
+    %5 = acc.firstprivate varPtr(%2 : !fir.ref<f32>) recipe(@privatization_ref_f32) -> !fir.ref<f32> {name = "b"}
+    acc.loop combined(parallel) firstprivate(%5 : !fir.ref<f32>) control(%arg0 : i32) = (%c1_i32 : i32) to (%c10_i32 : i32)  step (%c1_i32 : i32) {
+      %cvt = fir.convert %5 : (!fir.ref<f32>) -> !fir.ref<f32>
+      %7 = fir.declare %cvt {uniq_name = "_QFtestEb"} : (!fir.ref<f32>) -> !fir.ref<f32>
+      fir.store %cst to %7 : !fir.ref<f32>
+      acc.yield
+    } attributes {inclusiveUpperbound = array<i1: true>, independent = [#acc.device_type<none>]}
+    acc.yield
+  }
+  return
+}
+
+// -----
+// CHECK-LABEL: func.func @test_acc_loop_reduction_hoisting(
+// CHECK: %[[CLAUSE_VAL:.*]] = acc.reduction
+// CHECK-NOT: %[[CLAUSE_VAL]]
+// CHECK: acc.loop{{.*}}reduction(%[[CLAUSE_VAL]] : !fir.ref<f32>)
+func.func @test_acc_loop_reduction_hoisting() {
+  %cst = arith.constant 1.000000e+00 : f32
+  %c10_i32 = arith.constant 10 : i32
+  %c1_i32 = arith.constant 1 : i32
+  %0 = fir.dummy_scope : !fir.dscope
+  %1 = fir.alloca f32 {bindc_name = "b", uniq_name = "_QFtestEb"}
+  %2 = fir.declare %1 {uniq_name = "_QFtestEb"} : (!fir.ref<f32>) -> !fir.ref<f32>
+  acc.parallel combined(loop) {
+  %5 = acc.reduction varPtr(%2 : !fir.ref<f32>) recipe(@reduction_add_ref_f32) -> !fir.ref<f32> {name = "b"}
+    acc.loop combined(parallel) reduction(%5 : !fir.ref<f32>) control(%arg0 : i32) = (%c1_i32 : i32) to (%c10_i32 : i32)  step (%c1_i32 : i32) {
+      %cvt = fir.convert %5 : (!fir.ref<f32>) -> !fir.ref<f32>
+      %7 = fir.declare %cvt {uniq_name = "_QFtestEb"} : (!fir.ref<f32>) -> !fir.ref<f32>
+      fir.store %cst to %7 : !fir.ref<f32>
+      acc.yield
+    } attributes {inclusiveUpperbound = array<i1: true>, independent = [#acc.device_type<none>]}
+    acc.yield
+  }
+  return
+}
+acc.reduction.recipe @reduction_add_ref_f32 : !fir.ref<f32> reduction_operator <add> init {
+^bb0(%arg0: !fir.ref<f32>):
+  acc.yield %arg0 : !fir.ref<f32>
+} combiner {
+^bb0(%arg0: !fir.ref<f32>, %arg1: !fir.ref<f32>):
+  acc.yield %arg0 : !fir.ref<f32>
+}



More information about the flang-commits mailing list