[flang-commits] [flang] [flang, openacc] Limit operations hoisting from acc.loop. (PR #177727)
Slava Zakharin via flang-commits
flang-commits at lists.llvm.org
Fri Jan 23 18:58:02 PST 2026
https://github.com/vzakhari created https://github.com/llvm/llvm-project/pull/177727
This patch implements `OperationMoveOpInterface::canMoveOutOf()`
method for `acc.loop`, such that even Pure operations are not hoisted
by LICM if any of their operands are referenced in the data operands
of `acc.loop`. Related to #175108.
>From 3c5c9e31bf58ada23536b8fccece18f25aadd17b Mon Sep 17 00:00:00 2001
From: Slava Zakharin <szakharin at nvidia.com>
Date: Fri, 23 Jan 2026 18:00:07 -0800
Subject: [PATCH] [flang,openacc] Limit operations hoisting from acc.loop.
This patch implements `OperationMoveOpInterface::canMoveOutOf()`
method for `acc.loop`, such that even Pure operations are not hoisted
by LICM if any of their operands are referenced in the data operands
of `acc.loop`. Related to #175108.
---
.../Support/FIROpenACCOpsInterfaces.cpp | 21 ++++-
flang/test/Transforms/licm.fir | 88 +++++++++++++++++++
2 files changed, 105 insertions(+), 4 deletions(-)
diff --git a/flang/lib/Optimizer/OpenACC/Support/FIROpenACCOpsInterfaces.cpp b/flang/lib/Optimizer/OpenACC/Support/FIROpenACCOpsInterfaces.cpp
index 4e48f16dd1144..55a5330dc9ba6 100644
--- a/flang/lib/Optimizer/OpenACC/Support/FIROpenACCOpsInterfaces.cpp
+++ b/flang/lib/Optimizer/OpenACC/Support/FIROpenACCOpsInterfaces.cpp
@@ -196,7 +196,7 @@ bool OperationMoveModel<mlir::acc::LoopOp>::canMoveFromDescendant(
template <>
bool OperationMoveModel<mlir::acc::LoopOp>::canMoveOutOf(
mlir::Operation *op, mlir::Operation *candidate) const {
- // TODO: disallow moving operations, which have operands that are referenced
+ // Disallow moving operations, which have operands that are referenced
// in the data operands (e.g. in [first]private() etc.) of the acc.loop.
// For example:
// %17 = acc.private var(%16 : !fir.box<!fir.array<?xf32>>)
@@ -205,9 +205,22 @@ bool OperationMoveModel<mlir::acc::LoopOp>::canMoveOutOf(
// }
// We cannot hoist %19 without violating assumptions that OpenACC
// transformations rely on.
- //
- // Always return false in the initial implementation.
- return false;
+
+ // In general, some movement out of acc.loop is allowed,
+ // so return true if candidate is nullptr.
+ if (!candidate)
+ return true;
+
+ auto loopOp = mlir::cast<mlir::acc::LoopOp>(op);
+ unsigned numDataOperands = loopOp.getNumDataOperands();
+ for (unsigned i = 0; i < numDataOperands; ++i) {
+ mlir::Value dataOperand = loopOp.getDataOperand(i);
+ return !llvm::any_of(candidate->getOperands(),
+ [&](mlir::Value candidateOperand) {
+ return dataOperand == candidateOperand;
+ });
+ }
+ return true;
}
} // namespace fir::acc
diff --git a/flang/test/Transforms/licm.fir b/flang/test/Transforms/licm.fir
index 56bf6fc1a2302..341fb9ddfade0 100644
--- a/flang/test/Transforms/licm.fir
+++ b/flang/test/Transforms/licm.fir
@@ -1660,6 +1660,7 @@ func.func @test_if_hoisting(%arg0: !fir.ref<!fir.array<?xi32>> {fir.bindc_name =
return
}
+// -----
// Check that fir.box_addr applied to the private box is not hoisted
// out of acc.loop. This breaks the assumptions taken by OpenACC
// transformations that the results of acc.private operations,
@@ -1727,6 +1728,7 @@ func.func @_QPtest_acc_loop_with_private(%arg0: !fir.ref<f32> {fir.bindc_name =
return
}
+// -----
// Test fir.slice and fir.rebox hoisting:
// CHECK-LABEL: func.func @_QPtest_slice_rebox_licm(
// CHECK-SAME: %[[ARG0:.*]]: !fir.box<!fir.array<?xf32>> {fir.bindc_name = "x"}) {
@@ -1774,6 +1776,7 @@ func.func @_QPtest_slice_rebox_licm(%arg0: !fir.box<!fir.array<?xf32>> {fir.bind
}
func.func private @takes_assumed_shape(!fir.box<!fir.array<?xf32>>)
+// -----
// Test fir.shape and fir.embox hoisting:
// CHECK-LABEL: func.func @_QPtest_shape_embox_licm(
// CHECK-SAME: %[[ARG0:.*]]: !fir.ref<!fir.array<?xf32>> {fir.bindc_name = "x"}) {
@@ -1826,6 +1829,7 @@ func.func @_QPtest_shape_embox_licm(%arg0: !fir.ref<!fir.array<?xf32>> {fir.bind
return
}
+// -----
// CHECK-LABEL: func.func @test_shapeshift_licm(
// CHECK-SAME: %[[ARG0:.*]]: index) {
// CHECK: %[[CONSTANT_0:.*]] = arith.constant 1 : index
@@ -1845,6 +1849,7 @@ func.func @test_shapeshift_licm(%arg0 : index) {
}
func.func private @takes_shape_shift(!fir.shapeshift<1>)
+// -----
// CHECK-LABEL: func.func @test_shift_licm(
// CHECK-SAME: %[[ARG0:.*]]: index) {
// CHECK: %[[CONSTANT_0:.*]] = arith.constant 1 : index
@@ -1864,6 +1869,7 @@ func.func @test_shift_licm(%arg0 : index) {
}
func.func private @takes_shift(!fir.shift<1>)
+// -----
// Check that fir.box_addr of !fir.boxproc is not speculated.
// For some reason, we do not produce proper [hl]fir.declare
// for optional dummy procedure pointers.
@@ -1891,3 +1897,85 @@ func.func @_QPtest_box_addr_proc(%arg0: !fir.ref<f32> {fir.bindc_name = "x"}, %a
fir.store %5 to %2 : !fir.ref<i32>
return
}
+
+// -----
+// CHECK-LABEL: func.func @test_acc_loop_private_hoisting(
+// CHECK: %[[CLAUSE_VAL:.*]] = acc.private
+// CHECK-NOT: %[[CLAUSE_VAL]]
+// CHECK: acc.loop{{.*}}private(%[[CLAUSE_VAL]] : !fir.ref<f32>)
+func.func @test_acc_loop_private_hoisting() {
+ %cst = arith.constant 1.000000e+00 : f32
+ %c10_i32 = arith.constant 10 : i32
+ %c1_i32 = arith.constant 1 : i32
+ %0 = fir.dummy_scope : !fir.dscope
+ %1 = fir.alloca f32 {bindc_name = "b", uniq_name = "_QFtestEb"}
+ %2 = fir.declare %1 {uniq_name = "_QFtestEb"} : (!fir.ref<f32>) -> !fir.ref<f32>
+ acc.parallel combined(loop) {
+ %5 = acc.private varPtr(%2 : !fir.ref<f32>) recipe(@privatization_ref_f32) -> !fir.ref<f32> {name = "b"}
+ acc.loop combined(parallel) private(%5 : !fir.ref<f32>) control(%arg0 : i32) = (%c1_i32 : i32) to (%c10_i32 : i32) step (%c1_i32 : i32) {
+ %cvt = fir.convert %5 : (!fir.ref<f32>) -> !fir.ref<f32>
+ %7 = fir.declare %cvt {uniq_name = "_QFtestEb"} : (!fir.ref<f32>) -> !fir.ref<f32>
+ fir.store %cst to %7 : !fir.ref<f32>
+ acc.yield
+ } attributes {inclusiveUpperbound = array<i1: true>, independent = [#acc.device_type<none>]}
+ acc.yield
+ }
+ return
+}
+
+// -----
+// CHECK-LABEL: func.func @test_acc_loop_firstprivate_hoisting(
+// CHECK: %[[CLAUSE_VAL:.*]] = acc.firstprivate
+// CHECK-NOT: %[[CLAUSE_VAL]]
+// CHECK: acc.loop{{.*}}firstprivate(%[[CLAUSE_VAL]] : !fir.ref<f32>)
+func.func @test_acc_loop_firstprivate_hoisting() {
+ %cst = arith.constant 1.000000e+00 : f32
+ %c10_i32 = arith.constant 10 : i32
+ %c1_i32 = arith.constant 1 : i32
+ %0 = fir.dummy_scope : !fir.dscope
+ %1 = fir.alloca f32 {bindc_name = "b", uniq_name = "_QFtestEb"}
+ %2 = fir.declare %1 {uniq_name = "_QFtestEb"} : (!fir.ref<f32>) -> !fir.ref<f32>
+ acc.parallel combined(loop) {
+ %5 = acc.firstprivate varPtr(%2 : !fir.ref<f32>) recipe(@privatization_ref_f32) -> !fir.ref<f32> {name = "b"}
+ acc.loop combined(parallel) firstprivate(%5 : !fir.ref<f32>) control(%arg0 : i32) = (%c1_i32 : i32) to (%c10_i32 : i32) step (%c1_i32 : i32) {
+ %cvt = fir.convert %5 : (!fir.ref<f32>) -> !fir.ref<f32>
+ %7 = fir.declare %cvt {uniq_name = "_QFtestEb"} : (!fir.ref<f32>) -> !fir.ref<f32>
+ fir.store %cst to %7 : !fir.ref<f32>
+ acc.yield
+ } attributes {inclusiveUpperbound = array<i1: true>, independent = [#acc.device_type<none>]}
+ acc.yield
+ }
+ return
+}
+
+// -----
+// CHECK-LABEL: func.func @test_acc_loop_reduction_hoisting(
+// CHECK: %[[CLAUSE_VAL:.*]] = acc.reduction
+// CHECK-NOT: %[[CLAUSE_VAL]]
+// CHECK: acc.loop{{.*}}reduction(%[[CLAUSE_VAL]] : !fir.ref<f32>)
+func.func @test_acc_loop_reduction_hoisting() {
+ %cst = arith.constant 1.000000e+00 : f32
+ %c10_i32 = arith.constant 10 : i32
+ %c1_i32 = arith.constant 1 : i32
+ %0 = fir.dummy_scope : !fir.dscope
+ %1 = fir.alloca f32 {bindc_name = "b", uniq_name = "_QFtestEb"}
+ %2 = fir.declare %1 {uniq_name = "_QFtestEb"} : (!fir.ref<f32>) -> !fir.ref<f32>
+ acc.parallel combined(loop) {
+ %5 = acc.reduction varPtr(%2 : !fir.ref<f32>) recipe(@reduction_add_ref_f32) -> !fir.ref<f32> {name = "b"}
+ acc.loop combined(parallel) reduction(%5 : !fir.ref<f32>) control(%arg0 : i32) = (%c1_i32 : i32) to (%c10_i32 : i32) step (%c1_i32 : i32) {
+ %cvt = fir.convert %5 : (!fir.ref<f32>) -> !fir.ref<f32>
+ %7 = fir.declare %cvt {uniq_name = "_QFtestEb"} : (!fir.ref<f32>) -> !fir.ref<f32>
+ fir.store %cst to %7 : !fir.ref<f32>
+ acc.yield
+ } attributes {inclusiveUpperbound = array<i1: true>, independent = [#acc.device_type<none>]}
+ acc.yield
+ }
+ return
+}
+acc.reduction.recipe @reduction_add_ref_f32 : !fir.ref<f32> reduction_operator <add> init {
+^bb0(%arg0: !fir.ref<f32>):
+ acc.yield %arg0 : !fir.ref<f32>
+} combiner {
+^bb0(%arg0: !fir.ref<f32>, %arg1: !fir.ref<f32>):
+ acc.yield %arg0 : !fir.ref<f32>
+}
More information about the flang-commits
mailing list