[flang-commits] [flang] b838695 - [flang][OpenACC] Limit hoisting out of compute operations. (#193099)
via flang-commits
flang-commits at lists.llvm.org
Tue Apr 21 08:53:12 PDT 2026
Author: Slava Zakharin
Date: 2026-04-21T08:53:06-07:00
New Revision: b8386954ac11c17a439dcc495bbe147375268201
URL: https://github.com/llvm/llvm-project/commit/b8386954ac11c17a439dcc495bbe147375268201
DIFF: https://github.com/llvm/llvm-project/commit/b8386954ac11c17a439dcc495bbe147375268201.diff
LOG: [flang][OpenACC] Limit hoisting out of compute operations. (#193099)
When a compute operation is nested inside a loop-like operation,
LICM for nested regions may hoist operations that are illegal to hoist,
e.g. operations with reference operands/results should not be hoisted,
in general. This patch limits the hoisting for kernel, parallel and
serial.
Added:
flang/test/Transforms/OpenACC/acc-compute-region-licm.fir
Modified:
flang/lib/Optimizer/OpenACC/Support/FIROpenACCOpsInterfaces.cpp
flang/lib/Optimizer/OpenACC/Support/RegisterOpenACCExtensions.cpp
Removed:
################################################################################
diff --git a/flang/lib/Optimizer/OpenACC/Support/FIROpenACCOpsInterfaces.cpp b/flang/lib/Optimizer/OpenACC/Support/FIROpenACCOpsInterfaces.cpp
index 6d2c6ea5c8e57..34395fdde1d25 100644
--- a/flang/lib/Optimizer/OpenACC/Support/FIROpenACCOpsInterfaces.cpp
+++ b/flang/lib/Optimizer/OpenACC/Support/FIROpenACCOpsInterfaces.cpp
@@ -272,4 +272,54 @@ bool OperationMoveModel<mlir::acc::LoopOp>::canMoveOutOf(
return true;
}
+// Return true iff 'candidate' can be hoisted out of 'op',
+// which is an OpenACC compute operation (e.g. kernels, parallel, etc.).
+template <typename Op>
+bool OperationMoveModel<Op>::canMoveOutOf(mlir::Operation *op,
+ mlir::Operation *candidate) const {
+ // In general, some movement out of the compute operations is allowed,
+ // so return true if candidate is nullptr.
+ if (!candidate)
+ return true;
+
+ // Hoist operations with trivial type operands and results.
+ return llvm::all_of(candidate->getOperands(),
+ [](mlir::Value operand) {
+ return fir::isa_trivial(operand.getType());
+ }) &&
+ llvm::all_of(candidate->getResults(), [](mlir::Value result) {
+ return fir::isa_trivial(result.getType());
+ });
+}
+
+template <>
+bool OperationMoveModel<mlir::acc::KernelsOp>::canMoveFromDescendant(
+ mlir::Operation *op, mlir::Operation *descendant,
+ mlir::Operation *candidate) const {
+ return true;
+}
+
+template bool OperationMoveModel<mlir::acc::KernelsOp>::canMoveOutOf(
+ mlir::Operation *op, mlir::Operation *candidate) const;
+
+template <>
+bool OperationMoveModel<mlir::acc::ParallelOp>::canMoveFromDescendant(
+ mlir::Operation *op, mlir::Operation *descendant,
+ mlir::Operation *candidate) const {
+ return true;
+}
+
+template bool OperationMoveModel<mlir::acc::ParallelOp>::canMoveOutOf(
+ mlir::Operation *op, mlir::Operation *candidate) const;
+
+template <>
+bool OperationMoveModel<mlir::acc::SerialOp>::canMoveFromDescendant(
+ mlir::Operation *op, mlir::Operation *descendant,
+ mlir::Operation *candidate) const {
+ return true;
+}
+
+template bool OperationMoveModel<mlir::acc::SerialOp>::canMoveOutOf(
+ mlir::Operation *op, mlir::Operation *candidate) const;
+
} // namespace fir::acc
diff --git a/flang/lib/Optimizer/OpenACC/Support/RegisterOpenACCExtensions.cpp b/flang/lib/Optimizer/OpenACC/Support/RegisterOpenACCExtensions.cpp
index f2fa5bf38872d..7808972033c22 100644
--- a/flang/lib/Optimizer/OpenACC/Support/RegisterOpenACCExtensions.cpp
+++ b/flang/lib/Optimizer/OpenACC/Support/RegisterOpenACCExtensions.cpp
@@ -106,6 +106,12 @@ void registerOpenACCExtensions(mlir::DialectRegistry ®istry) {
mlir::acc::OpenACCDialect *dialect) {
mlir::acc::LoopOp::attachInterface<OperationMoveModel<mlir::acc::LoopOp>>(
*ctx);
+ mlir::acc::KernelsOp::attachInterface<
+ OperationMoveModel<mlir::acc::KernelsOp>>(*ctx);
+ mlir::acc::ParallelOp::attachInterface<
+ OperationMoveModel<mlir::acc::ParallelOp>>(*ctx);
+ mlir::acc::SerialOp::attachInterface<
+ OperationMoveModel<mlir::acc::SerialOp>>(*ctx);
mlir::acc::ReductionInitOp::attachInterface<
fir::acc::ReductionInitOpFortranObjectViewModel>(*ctx);
});
diff --git a/flang/test/Transforms/OpenACC/acc-compute-region-licm.fir b/flang/test/Transforms/OpenACC/acc-compute-region-licm.fir
new file mode 100644
index 0000000000000..58a0bd5fd9edf
--- /dev/null
+++ b/flang/test/Transforms/OpenACC/acc-compute-region-licm.fir
@@ -0,0 +1,146 @@
+// RUN: fir-opt -flang-licm --split-input-file %s | FileCheck %s
+
+// Test that canMoveOutOf for acc.parallel allows hoisting of operations
+// with trivial type operands and results, and prevents hoisting of operations
+// with non-trivial types out of the compute region.
+//
+// acc.loop LICM hoists both invariant fir.convert ops into acc.parallel.
+// scf.for nested LICM then hoists only the trivial one out of acc.parallel.
+//
+// CHECK-LABEL: func.func @test_parallel_canMoveOutOf(
+// CHECK-SAME: %[[ARG0:.*]]: !fir.ref<!fir.array<10xf32>>)
+// CHECK: %[[CVT_TRIVIAL:.*]] = fir.convert %{{.*}} : (i32) -> i64
+// CHECK: scf.for
+// CHECK: acc.parallel {
+// CHECK: fir.convert %[[ARG0]] : (!fir.ref<!fir.array<10xf32>>) -> memref<10xf32>
+// CHECK: acc.loop
+func.func @test_parallel_canMoveOutOf(%arg0: !fir.ref<!fir.array<10xf32>>) {
+ %c0 = arith.constant 0 : index
+ %c1 = arith.constant 1 : index
+ %c10 = arith.constant 10 : index
+ %c1_i32 = arith.constant 1 : i32
+ %c10_i32 = arith.constant 10 : i32
+ %c5_i32 = arith.constant 5 : i32
+ %cst = arith.constant 1.000000e+00 : f32
+ scf.for %i = %c0 to %c10 step %c1 {
+ acc.parallel {
+ acc.loop control(%iv : i32) = (%c1_i32 : i32) to (%c10_i32 : i32) step (%c1_i32 : i32) {
+ %cvt_trivial = fir.convert %c5_i32 : (i32) -> i64
+ %cvt_ref = fir.convert %arg0 : (!fir.ref<!fir.array<10xf32>>) -> memref<10xf32>
+ %idx = fir.convert %iv : (i32) -> index
+ memref.store %cst, %cvt_ref[%idx] : memref<10xf32>
+ acc.yield
+ } attributes {inclusiveUpperbound = array<i1: true>, independent = [#acc.device_type<none>]}
+ acc.yield
+ }
+ }
+ return
+}
+
+// -----
+
+// Same test for acc.kernels.
+// CHECK-LABEL: func.func @test_kernels_canMoveOutOf(
+// CHECK-SAME: %[[ARG0:.*]]: !fir.ref<!fir.array<10xf32>>)
+// CHECK: %[[CVT_TRIVIAL:.*]] = fir.convert %{{.*}} : (i32) -> i64
+// CHECK: scf.for
+// CHECK: acc.kernels {
+// CHECK: fir.convert %[[ARG0]] : (!fir.ref<!fir.array<10xf32>>) -> memref<10xf32>
+// CHECK: acc.loop
+func.func @test_kernels_canMoveOutOf(%arg0: !fir.ref<!fir.array<10xf32>>) {
+ %c0 = arith.constant 0 : index
+ %c1 = arith.constant 1 : index
+ %c10 = arith.constant 10 : index
+ %c1_i32 = arith.constant 1 : i32
+ %c10_i32 = arith.constant 10 : i32
+ %c5_i32 = arith.constant 5 : i32
+ %cst = arith.constant 1.000000e+00 : f32
+ scf.for %i = %c0 to %c10 step %c1 {
+ acc.kernels {
+ acc.loop control(%iv : i32) = (%c1_i32 : i32) to (%c10_i32 : i32) step (%c1_i32 : i32) {
+ %cvt_trivial = fir.convert %c5_i32 : (i32) -> i64
+ %cvt_ref = fir.convert %arg0 : (!fir.ref<!fir.array<10xf32>>) -> memref<10xf32>
+ %idx = fir.convert %iv : (i32) -> index
+ memref.store %cst, %cvt_ref[%idx] : memref<10xf32>
+ acc.yield
+ } attributes {inclusiveUpperbound = array<i1: true>, independent = [#acc.device_type<none>]}
+ acc.terminator
+ }
+ }
+ return
+}
+
+// -----
+
+// Same test for acc.serial.
+// CHECK-LABEL: func.func @test_serial_canMoveOutOf(
+// CHECK-SAME: %[[ARG0:.*]]: !fir.ref<!fir.array<10xf32>>)
+// CHECK: %[[CVT_TRIVIAL:.*]] = fir.convert %{{.*}} : (i32) -> i64
+// CHECK: scf.for
+// CHECK: acc.serial {
+// CHECK: fir.convert %[[ARG0]] : (!fir.ref<!fir.array<10xf32>>) -> memref<10xf32>
+// CHECK: acc.loop
+func.func @test_serial_canMoveOutOf(%arg0: !fir.ref<!fir.array<10xf32>>) {
+ %c0 = arith.constant 0 : index
+ %c1 = arith.constant 1 : index
+ %c10 = arith.constant 10 : index
+ %c1_i32 = arith.constant 1 : i32
+ %c10_i32 = arith.constant 10 : i32
+ %c5_i32 = arith.constant 5 : i32
+ %cst = arith.constant 1.000000e+00 : f32
+ scf.for %i = %c0 to %c10 step %c1 {
+ acc.serial {
+ acc.loop control(%iv : i32) = (%c1_i32 : i32) to (%c10_i32 : i32) step (%c1_i32 : i32) {
+ %cvt_trivial = fir.convert %c5_i32 : (i32) -> i64
+ %cvt_ref = fir.convert %arg0 : (!fir.ref<!fir.array<10xf32>>) -> memref<10xf32>
+ %idx = fir.convert %iv : (i32) -> index
+ memref.store %cst, %cvt_ref[%idx] : memref<10xf32>
+ acc.yield
+ } attributes {inclusiveUpperbound = array<i1: true>, independent = [#acc.device_type<none>]}
+ acc.yield
+ }
+ }
+ return
+}
+
+// -----
+
+// Test transitive canMoveOutOf: scf.if containing a fir.convert with
+// non-trivial operand must NOT be hoisted out of acc.parallel as a whole,
+// even though scf.if itself yields a trivial type (i64).
+// CHECK-LABEL: func.func @test_parallel_transitive_canMoveOutOf(
+// CHECK-SAME: %[[ARG0:.*]]: !fir.ref<!fir.array<10xf32>>,
+// CHECK-SAME: %[[COND:.*]]: i1)
+// CHECK: scf.for
+// CHECK: acc.parallel {
+// scf.if is hoisted from acc.loop into acc.parallel, but NOT out of
+// acc.parallel because nested fir.convert has non-trivial operand type:
+// CHECK: %[[ADDR:.*]] = scf.if %[[COND]] -> (i64)
+// CHECK: fir.convert %[[ARG0]]
+// CHECK: acc.loop
+func.func @test_parallel_transitive_canMoveOutOf(%arg0: !fir.ref<!fir.array<10xf32>>, %cond: i1) {
+ %c0_i64 = arith.constant 0 : i64
+ %c0 = arith.constant 0 : index
+ %c1 = arith.constant 1 : index
+ %c10 = arith.constant 10 : index
+ %c1_i32 = arith.constant 1 : i32
+ %c10_i32 = arith.constant 10 : i32
+ %cst = arith.constant 1.000000e+00 : f32
+ scf.for %i = %c0 to %c10 step %c1 {
+ acc.parallel {
+ acc.loop control(%iv : i32) = (%c1_i32 : i32) to (%c10_i32 : i32) step (%c1_i32 : i32) {
+ %addr = scf.if %cond -> (i64) {
+ %ptr = fir.convert %arg0 : (!fir.ref<!fir.array<10xf32>>) -> i64
+ scf.yield %ptr : i64
+ } else {
+ scf.yield %c0_i64 : i64
+ }
+ %ref = fir.convert %addr : (i64) -> !fir.ref<f32>
+ fir.store %cst to %ref : !fir.ref<f32>
+ acc.yield
+ } attributes {inclusiveUpperbound = array<i1: true>, independent = [#acc.device_type<none>]}
+ acc.yield
+ }
+ }
+ return
+}
More information about the flang-commits
mailing list