[flang-commits] [flang] 91981a5 - [flang] Fixed operations hoisting out of fir.do_concurrent. (#173502)

via flang-commits flang-commits at lists.llvm.org
Tue Dec 30 10:27:35 PST 2025


Author: Slava Zakharin
Date: 2025-12-30T10:27:31-08:00
New Revision: 91981a5736d0b414c9ef34edeb9dc3a9b6dd7c4f

URL: https://github.com/llvm/llvm-project/commit/91981a5736d0b414c9ef34edeb9dc3a9b6dd7c4f
DIFF: https://github.com/llvm/llvm-project/commit/91981a5736d0b414c9ef34edeb9dc3a9b6dd7c4f.diff

LOG: [flang] Fixed operations hoisting out of fir.do_concurrent. (#173502)

LICM (#173438) may insert new operations at the beginning of
`fir.do_concurrent`'s block and they cannot be always hoisted
to the alloca-block of the parent operation. This patch
only moves `fir.alloca`s into the alloca-block, and moves
all other operations right before fir.do_concurrent.

Added: 
    

Modified: 
    flang/lib/Optimizer/Transforms/SimplifyFIROperations.cpp
    flang/test/Transforms/do-concurrent-localizer-dealloc-region.fir
    flang/test/Transforms/do-concurrent-localizer-init-region.fir
    flang/test/Transforms/do_concurrent-to-do_loop-unodered.fir

Removed: 
    


################################################################################
diff  --git a/flang/lib/Optimizer/Transforms/SimplifyFIROperations.cpp b/flang/lib/Optimizer/Transforms/SimplifyFIROperations.cpp
index 03f97ebdc6352..6c754f226e26f 100644
--- a/flang/lib/Optimizer/Transforms/SimplifyFIROperations.cpp
+++ b/flang/lib/Optimizer/Transforms/SimplifyFIROperations.cpp
@@ -254,6 +254,10 @@ class DoConcurrentConversion
 
     // Collect iteration variable(s) allocations so that we can move them
     // outside the `fir.do_concurrent` wrapper.
+    // There actually may be more operations that just allocations
+    // at the beginning of the wrapper block, e.g. LICM may move
+    // some operations from the inner fir.do_concurrent.loop into
+    // this block.
     llvm::SmallVector<mlir::Operation *> opsToMove;
     for (mlir::Operation &op : llvm::drop_end(wrapperBlock))
       opsToMove.push_back(&op);
@@ -262,8 +266,13 @@ class DoConcurrentConversion
         rewriter, doConcurentOp->getParentOfType<mlir::ModuleOp>());
     auto *allocIt = firBuilder.getAllocaBlock();
 
+    // Move alloca operations into the alloca-block, and all other
+    // operations - right before fir.do_concurrent.
     for (mlir::Operation *op : llvm::reverse(opsToMove))
-      rewriter.moveOpBefore(op, allocIt, allocIt->begin());
+      if (mlir::isa<fir::AllocaOp>(op))
+        rewriter.moveOpBefore(op, allocIt, allocIt->begin());
+      else
+        rewriter.moveOpBefore(op, doConcurentOp);
 
     rewriter.setInsertionPointAfter(doConcurentOp);
     fir::DoLoopOp innermostUnorderdLoop;

diff  --git a/flang/test/Transforms/do-concurrent-localizer-dealloc-region.fir b/flang/test/Transforms/do-concurrent-localizer-dealloc-region.fir
index b59ffdfb34adf..ccc5f25446281 100644
--- a/flang/test/Transforms/do-concurrent-localizer-dealloc-region.fir
+++ b/flang/test/Transforms/do-concurrent-localizer-dealloc-region.fir
@@ -71,7 +71,6 @@ func.func @_QPlocalizer_with_dealloc_region(%arg0: !fir.ref<i32> {fir.bindc_name
 // CHECK:           %[[VAL_2:.*]] = arith.constant 1 : index
 // CHECK:           %[[VAL_3:.*]] = arith.constant 42 : i32
 // CHECK:           %[[VAL_4:.*]] = fir.alloca i32 {bindc_name = "i"}
-// CHECK:           %[[VAL_5:.*]] = fir.declare %[[VAL_4]] {uniq_name = "_QFlocalizer_with_dealloc_regionEi"} : (!fir.ref<i32>) -> !fir.ref<i32>
 // CHECK:           %[[VAL_6:.*]] = fir.alloca !fir.box<!fir.array<?xi32>>
 // CHECK:           %[[VAL_7:.*]] = fir.dummy_scope : !fir.dscope
 // CHECK:           %[[VAL_8:.*]] = fir.declare %[[ARG0]] dummy_scope %[[VAL_7]] {uniq_name = "_QFlocalizer_with_dealloc_regionEn"} : (!fir.ref<i32>, !fir.dscope) -> !fir.ref<i32>
@@ -84,6 +83,7 @@ func.func @_QPlocalizer_with_dealloc_region(%arg0: !fir.ref<i32> {fir.bindc_name
 // CHECK:           %[[VAL_15:.*]] = fir.declare %[[VAL_13]](%[[VAL_14]]) {uniq_name = "_QFlocalizer_with_dealloc_regionEa"} : (!fir.ref<!fir.array<?xi32>>, !fir.shape<1>) -> !fir.ref<!fir.array<?xi32>>
 // CHECK:           %[[VAL_16:.*]] = fir.embox %[[VAL_15]](%[[VAL_14]]) : (!fir.ref<!fir.array<?xi32>>, !fir.shape<1>) -> !fir.box<!fir.array<?xi32>>
 // CHECK:           fir.store %[[VAL_16]] to %[[VAL_6]] : !fir.ref<!fir.box<!fir.array<?xi32>>>
+// CHECK:           %[[VAL_5:.*]] = fir.declare %[[VAL_4]] {uniq_name = "_QFlocalizer_with_dealloc_regionEi"} : (!fir.ref<i32>) -> !fir.ref<i32>
 // CHECK:           fir.do_loop %[[VAL_17:.*]] = %[[VAL_2]] to %[[VAL_10]] step %[[VAL_2]] unordered {
 
 // Local allocation

diff  --git a/flang/test/Transforms/do-concurrent-localizer-init-region.fir b/flang/test/Transforms/do-concurrent-localizer-init-region.fir
index ebb56aec278f6..0e31d7147ec5f 100644
--- a/flang/test/Transforms/do-concurrent-localizer-init-region.fir
+++ b/flang/test/Transforms/do-concurrent-localizer-init-region.fir
@@ -58,7 +58,6 @@ func.func @_QPlocalizer_with_init_region() {
 // CHECK:           %[[VAL_1:.*]] = arith.constant 1 : index
 // CHECK:           %[[VAL_2:.*]] = arith.constant 42 : i32
 // CHECK:           %[[VAL_3:.*]] = fir.alloca i32 {bindc_name = "i"}
-// CHECK:           %[[VAL_4:.*]] = fir.declare %[[VAL_3]] {uniq_name = "_QFlocalizer_with_init_regionEi"} : (!fir.ref<i32>) -> !fir.ref<i32>
 // CHECK:           %[[VAL_5:.*]] = fir.dummy_scope : !fir.dscope
 // CHECK:           %[[VAL_6:.*]] = fir.alloca i32 {bindc_name = "n", uniq_name = "_QFlocalizer_with_init_regionEn"}
 // CHECK:           %[[VAL_7:.*]] = fir.declare %[[VAL_6]] {uniq_name = "_QFlocalizer_with_init_regionEn"} : (!fir.ref<i32>) -> !fir.ref<i32>
@@ -70,6 +69,7 @@ func.func @_QPlocalizer_with_init_region() {
 // CHECK:           %[[VAL_12:.*]] = fir.declare %[[VAL_8]] {fortran_attrs = #fir.var_attrs<pointer>, uniq_name = "_QFlocalizer_with_init_regionEp"} : (!fir.ref<!fir.box<!fir.ptr<!fir.array<?xi32>>>>) -> !fir.ref<!fir.box<!fir.ptr<!fir.array<?xi32>>>>
 // CHECK:           %[[VAL_13:.*]] = fir.load %[[VAL_7]] : !fir.ref<i32>
 // CHECK:           %[[VAL_14:.*]] = fir.convert %[[VAL_13]] : (i32) -> index
+// CHECK:           %[[VAL_4:.*]] = fir.declare %[[VAL_3]] {uniq_name = "_QFlocalizer_with_init_regionEi"} : (!fir.ref<i32>) -> !fir.ref<i32>
 // CHECK:           fir.do_loop %[[VAL_15:.*]] = %[[VAL_1]] to %[[VAL_14]] step %[[VAL_1]] unordered {
 
 // Local allocation

diff  --git a/flang/test/Transforms/do_concurrent-to-do_loop-unodered.fir b/flang/test/Transforms/do_concurrent-to-do_loop-unodered.fir
index c550ab8a97d4c..eb04e7e58bec9 100644
--- a/flang/test/Transforms/do_concurrent-to-do_loop-unodered.fir
+++ b/flang/test/Transforms/do_concurrent-to-do_loop-unodered.fir
@@ -182,3 +182,46 @@ func.func @do_concurrent_locality_specs() {
 // CHECK:           }
 // CHECK:           return
 // CHECK:         }
+
+// -----
+
+// Check that moving operations out of fir.do_concurrent
+// does not result in %newval2 definition being inserted
+// before %newval1 definition. Definitions like %newval2
+// may appear after LICM.
+func.func @after_licm(%i_lb: index, %i_ub: index, %i_st: index, %val : i32) {
+  %c1_i32 = arith.constant 1 : i32
+  %c7_i32 = arith.constant 7 : i32
+  cf.br ^bb1
+^bb1:
+  %newval1 = arith.muli %val, %c7_i32 : i32
+  fir.do_concurrent {
+    %i = fir.alloca i32
+    %newval2 = arith.subi %newval1, %c1_i32 : i32
+    fir.do_concurrent.loop (%i_iv) = (%i_lb) to (%i_ub) step (%i_st) {
+      %0 = fir.convert %i_iv : (index) -> i32
+      %1 = arith.addi %newval2, %0 : i32
+      fir.store %1 to %i : !fir.ref<i32>
+    }
+  }
+  return
+}
+// CHECK-LABEL:   func.func @after_licm(
+// CHECK-SAME:      %[[ARG0:[0-9]+|[a-zA-Z$._-][a-zA-Z0-9$._-]*]]: index,
+// CHECK-SAME:      %[[ARG1:[0-9]+|[a-zA-Z$._-][a-zA-Z0-9$._-]*]]: index,
+// CHECK-SAME:      %[[ARG2:[0-9]+|[a-zA-Z$._-][a-zA-Z0-9$._-]*]]: index,
+// CHECK-SAME:      %[[ARG3:[0-9]+|[a-zA-Z$._-][a-zA-Z0-9$._-]*]]: i32) {
+// CHECK:           %[[CONSTANT_0:.*]] = arith.constant 7 : i32
+// CHECK:           %[[CONSTANT_1:.*]] = arith.constant 1 : i32
+// CHECK:           %[[ALLOCA_0:.*]] = fir.alloca i32
+// CHECK:           cf.br ^bb1
+// CHECK:         ^bb1:
+// CHECK:           %[[MULI_0:.*]] = arith.muli %[[ARG3]], %[[CONSTANT_0]] : i32
+// CHECK:           %[[SUBI_0:.*]] = arith.subi %[[MULI_0]], %[[CONSTANT_1]] : i32
+// CHECK:           fir.do_loop %[[VAL_0:.*]] = %[[ARG0]] to %[[ARG1]] step %[[ARG2]] unordered {
+// CHECK:             %[[CONVERT_0:.*]] = fir.convert %[[VAL_0]] : (index) -> i32
+// CHECK:             %[[ADDI_0:.*]] = arith.addi %[[SUBI_0]], %[[CONVERT_0]] : i32
+// CHECK:             fir.store %[[ADDI_0]] to %[[ALLOCA_0]] : !fir.ref<i32>
+// CHECK:           }
+// CHECK:           return
+// CHECK:         }


        


More information about the flang-commits mailing list