[flang-commits] [flang] b838695 - [flang][OpenACC] Limit hoisting out of compute operations. (#193099)

via flang-commits flang-commits at lists.llvm.org
Tue Apr 21 08:53:12 PDT 2026


Author: Slava Zakharin
Date: 2026-04-21T08:53:06-07:00
New Revision: b8386954ac11c17a439dcc495bbe147375268201

URL: https://github.com/llvm/llvm-project/commit/b8386954ac11c17a439dcc495bbe147375268201
DIFF: https://github.com/llvm/llvm-project/commit/b8386954ac11c17a439dcc495bbe147375268201.diff

LOG: [flang][OpenACC] Limit hoisting out of compute operations. (#193099)

When a compute operation is nested inside a loop-like operation,
LICM for nested regions may hoist operations that are illegal to hoist,
e.g. operations with reference operands/results should not be hoisted,
in general. This patch limits the hoisting for kernel, parallel and
serial.

Added: 
    flang/test/Transforms/OpenACC/acc-compute-region-licm.fir

Modified: 
    flang/lib/Optimizer/OpenACC/Support/FIROpenACCOpsInterfaces.cpp
    flang/lib/Optimizer/OpenACC/Support/RegisterOpenACCExtensions.cpp

Removed: 
    


################################################################################
diff  --git a/flang/lib/Optimizer/OpenACC/Support/FIROpenACCOpsInterfaces.cpp b/flang/lib/Optimizer/OpenACC/Support/FIROpenACCOpsInterfaces.cpp
index 6d2c6ea5c8e57..34395fdde1d25 100644
--- a/flang/lib/Optimizer/OpenACC/Support/FIROpenACCOpsInterfaces.cpp
+++ b/flang/lib/Optimizer/OpenACC/Support/FIROpenACCOpsInterfaces.cpp
@@ -272,4 +272,54 @@ bool OperationMoveModel<mlir::acc::LoopOp>::canMoveOutOf(
   return true;
 }
 
+// Return true iff 'candidate' can be hoisted out of 'op',
+// which is an OpenACC compute operation (e.g. kernels, parallel, etc.).
+template <typename Op>
+bool OperationMoveModel<Op>::canMoveOutOf(mlir::Operation *op,
+                                          mlir::Operation *candidate) const {
+  // In general, some movement out of the compute operations is allowed,
+  // so return true if candidate is nullptr.
+  if (!candidate)
+    return true;
+
+  // Hoist operations with trivial type operands and results.
+  return llvm::all_of(candidate->getOperands(),
+                      [](mlir::Value operand) {
+                        return fir::isa_trivial(operand.getType());
+                      }) &&
+         llvm::all_of(candidate->getResults(), [](mlir::Value result) {
+           return fir::isa_trivial(result.getType());
+         });
+}
+
+template <>
+bool OperationMoveModel<mlir::acc::KernelsOp>::canMoveFromDescendant(
+    mlir::Operation *op, mlir::Operation *descendant,
+    mlir::Operation *candidate) const {
+  return true;
+}
+
+template bool OperationMoveModel<mlir::acc::KernelsOp>::canMoveOutOf(
+    mlir::Operation *op, mlir::Operation *candidate) const;
+
+template <>
+bool OperationMoveModel<mlir::acc::ParallelOp>::canMoveFromDescendant(
+    mlir::Operation *op, mlir::Operation *descendant,
+    mlir::Operation *candidate) const {
+  return true;
+}
+
+template bool OperationMoveModel<mlir::acc::ParallelOp>::canMoveOutOf(
+    mlir::Operation *op, mlir::Operation *candidate) const;
+
+template <>
+bool OperationMoveModel<mlir::acc::SerialOp>::canMoveFromDescendant(
+    mlir::Operation *op, mlir::Operation *descendant,
+    mlir::Operation *candidate) const {
+  return true;
+}
+
+template bool OperationMoveModel<mlir::acc::SerialOp>::canMoveOutOf(
+    mlir::Operation *op, mlir::Operation *candidate) const;
+
 } // namespace fir::acc

diff  --git a/flang/lib/Optimizer/OpenACC/Support/RegisterOpenACCExtensions.cpp b/flang/lib/Optimizer/OpenACC/Support/RegisterOpenACCExtensions.cpp
index f2fa5bf38872d..7808972033c22 100644
--- a/flang/lib/Optimizer/OpenACC/Support/RegisterOpenACCExtensions.cpp
+++ b/flang/lib/Optimizer/OpenACC/Support/RegisterOpenACCExtensions.cpp
@@ -106,6 +106,12 @@ void registerOpenACCExtensions(mlir::DialectRegistry &registry) {
                             mlir::acc::OpenACCDialect *dialect) {
     mlir::acc::LoopOp::attachInterface<OperationMoveModel<mlir::acc::LoopOp>>(
         *ctx);
+    mlir::acc::KernelsOp::attachInterface<
+        OperationMoveModel<mlir::acc::KernelsOp>>(*ctx);
+    mlir::acc::ParallelOp::attachInterface<
+        OperationMoveModel<mlir::acc::ParallelOp>>(*ctx);
+    mlir::acc::SerialOp::attachInterface<
+        OperationMoveModel<mlir::acc::SerialOp>>(*ctx);
     mlir::acc::ReductionInitOp::attachInterface<
         fir::acc::ReductionInitOpFortranObjectViewModel>(*ctx);
   });

diff  --git a/flang/test/Transforms/OpenACC/acc-compute-region-licm.fir b/flang/test/Transforms/OpenACC/acc-compute-region-licm.fir
new file mode 100644
index 0000000000000..58a0bd5fd9edf
--- /dev/null
+++ b/flang/test/Transforms/OpenACC/acc-compute-region-licm.fir
@@ -0,0 +1,146 @@
+// RUN: fir-opt -flang-licm --split-input-file %s | FileCheck %s
+
+// Test that canMoveOutOf for acc.parallel allows hoisting of operations
+// with trivial type operands and results, and prevents hoisting of operations
+// with non-trivial types out of the compute region.
+//
+// acc.loop LICM hoists both invariant fir.convert ops into acc.parallel.
+// scf.for nested LICM then hoists only the trivial one out of acc.parallel.
+//
+// CHECK-LABEL:   func.func @test_parallel_canMoveOutOf(
+// CHECK-SAME:      %[[ARG0:.*]]: !fir.ref<!fir.array<10xf32>>)
+// CHECK:         %[[CVT_TRIVIAL:.*]] = fir.convert %{{.*}} : (i32) -> i64
+// CHECK:         scf.for
+// CHECK:           acc.parallel {
+// CHECK:             fir.convert %[[ARG0]] : (!fir.ref<!fir.array<10xf32>>) -> memref<10xf32>
+// CHECK:             acc.loop
+func.func @test_parallel_canMoveOutOf(%arg0: !fir.ref<!fir.array<10xf32>>) {
+  %c0 = arith.constant 0 : index
+  %c1 = arith.constant 1 : index
+  %c10 = arith.constant 10 : index
+  %c1_i32 = arith.constant 1 : i32
+  %c10_i32 = arith.constant 10 : i32
+  %c5_i32 = arith.constant 5 : i32
+  %cst = arith.constant 1.000000e+00 : f32
+  scf.for %i = %c0 to %c10 step %c1 {
+    acc.parallel {
+      acc.loop control(%iv : i32) = (%c1_i32 : i32) to (%c10_i32 : i32) step (%c1_i32 : i32) {
+        %cvt_trivial = fir.convert %c5_i32 : (i32) -> i64
+        %cvt_ref = fir.convert %arg0 : (!fir.ref<!fir.array<10xf32>>) -> memref<10xf32>
+        %idx = fir.convert %iv : (i32) -> index
+        memref.store %cst, %cvt_ref[%idx] : memref<10xf32>
+        acc.yield
+      } attributes {inclusiveUpperbound = array<i1: true>, independent = [#acc.device_type<none>]}
+      acc.yield
+    }
+  }
+  return
+}
+
+// -----
+
+// Same test for acc.kernels.
+// CHECK-LABEL:   func.func @test_kernels_canMoveOutOf(
+// CHECK-SAME:      %[[ARG0:.*]]: !fir.ref<!fir.array<10xf32>>)
+// CHECK:         %[[CVT_TRIVIAL:.*]] = fir.convert %{{.*}} : (i32) -> i64
+// CHECK:         scf.for
+// CHECK:           acc.kernels {
+// CHECK:             fir.convert %[[ARG0]] : (!fir.ref<!fir.array<10xf32>>) -> memref<10xf32>
+// CHECK:             acc.loop
+func.func @test_kernels_canMoveOutOf(%arg0: !fir.ref<!fir.array<10xf32>>) {
+  %c0 = arith.constant 0 : index
+  %c1 = arith.constant 1 : index
+  %c10 = arith.constant 10 : index
+  %c1_i32 = arith.constant 1 : i32
+  %c10_i32 = arith.constant 10 : i32
+  %c5_i32 = arith.constant 5 : i32
+  %cst = arith.constant 1.000000e+00 : f32
+  scf.for %i = %c0 to %c10 step %c1 {
+    acc.kernels {
+      acc.loop control(%iv : i32) = (%c1_i32 : i32) to (%c10_i32 : i32) step (%c1_i32 : i32) {
+        %cvt_trivial = fir.convert %c5_i32 : (i32) -> i64
+        %cvt_ref = fir.convert %arg0 : (!fir.ref<!fir.array<10xf32>>) -> memref<10xf32>
+        %idx = fir.convert %iv : (i32) -> index
+        memref.store %cst, %cvt_ref[%idx] : memref<10xf32>
+        acc.yield
+      } attributes {inclusiveUpperbound = array<i1: true>, independent = [#acc.device_type<none>]}
+      acc.terminator
+    }
+  }
+  return
+}
+
+// -----
+
+// Same test for acc.serial.
+// CHECK-LABEL:   func.func @test_serial_canMoveOutOf(
+// CHECK-SAME:      %[[ARG0:.*]]: !fir.ref<!fir.array<10xf32>>)
+// CHECK:         %[[CVT_TRIVIAL:.*]] = fir.convert %{{.*}} : (i32) -> i64
+// CHECK:         scf.for
+// CHECK:           acc.serial {
+// CHECK:             fir.convert %[[ARG0]] : (!fir.ref<!fir.array<10xf32>>) -> memref<10xf32>
+// CHECK:             acc.loop
+func.func @test_serial_canMoveOutOf(%arg0: !fir.ref<!fir.array<10xf32>>) {
+  %c0 = arith.constant 0 : index
+  %c1 = arith.constant 1 : index
+  %c10 = arith.constant 10 : index
+  %c1_i32 = arith.constant 1 : i32
+  %c10_i32 = arith.constant 10 : i32
+  %c5_i32 = arith.constant 5 : i32
+  %cst = arith.constant 1.000000e+00 : f32
+  scf.for %i = %c0 to %c10 step %c1 {
+    acc.serial {
+      acc.loop control(%iv : i32) = (%c1_i32 : i32) to (%c10_i32 : i32) step (%c1_i32 : i32) {
+        %cvt_trivial = fir.convert %c5_i32 : (i32) -> i64
+        %cvt_ref = fir.convert %arg0 : (!fir.ref<!fir.array<10xf32>>) -> memref<10xf32>
+        %idx = fir.convert %iv : (i32) -> index
+        memref.store %cst, %cvt_ref[%idx] : memref<10xf32>
+        acc.yield
+      } attributes {inclusiveUpperbound = array<i1: true>, independent = [#acc.device_type<none>]}
+      acc.yield
+    }
+  }
+  return
+}
+
+// -----
+
+// Test transitive canMoveOutOf: scf.if containing a fir.convert with
+// non-trivial operand must NOT be hoisted out of acc.parallel as a whole,
+// even though scf.if itself yields a trivial type (i64).
+// CHECK-LABEL:   func.func @test_parallel_transitive_canMoveOutOf(
+// CHECK-SAME:      %[[ARG0:.*]]: !fir.ref<!fir.array<10xf32>>,
+// CHECK-SAME:      %[[COND:.*]]: i1)
+// CHECK:         scf.for
+// CHECK:           acc.parallel {
+// scf.if is hoisted from acc.loop into acc.parallel, but NOT out of
+// acc.parallel because nested fir.convert has non-trivial operand type:
+// CHECK:             %[[ADDR:.*]] = scf.if %[[COND]] -> (i64)
+// CHECK:               fir.convert %[[ARG0]]
+// CHECK:             acc.loop
+func.func @test_parallel_transitive_canMoveOutOf(%arg0: !fir.ref<!fir.array<10xf32>>, %cond: i1) {
+  %c0_i64 = arith.constant 0 : i64
+  %c0 = arith.constant 0 : index
+  %c1 = arith.constant 1 : index
+  %c10 = arith.constant 10 : index
+  %c1_i32 = arith.constant 1 : i32
+  %c10_i32 = arith.constant 10 : i32
+  %cst = arith.constant 1.000000e+00 : f32
+  scf.for %i = %c0 to %c10 step %c1 {
+    acc.parallel {
+      acc.loop control(%iv : i32) = (%c1_i32 : i32) to (%c10_i32 : i32) step (%c1_i32 : i32) {
+        %addr = scf.if %cond -> (i64) {
+          %ptr = fir.convert %arg0 : (!fir.ref<!fir.array<10xf32>>) -> i64
+          scf.yield %ptr : i64
+        } else {
+          scf.yield %c0_i64 : i64
+        }
+        %ref = fir.convert %addr : (i64) -> !fir.ref<f32>
+        fir.store %cst to %ref : !fir.ref<f32>
+        acc.yield
+      } attributes {inclusiveUpperbound = array<i1: true>, independent = [#acc.device_type<none>]}
+      acc.yield
+    }
+  }
+  return
+}


        


More information about the flang-commits mailing list