[flang-commits] [flang] [Flang][OpenMP] Skip DSA for canonical loops (PR #150593)

Michael Kruse via flang-commits flang-commits at lists.llvm.org
Mon Jul 28 07:22:41 PDT 2025


https://github.com/Meinersbur updated https://github.com/llvm/llvm-project/pull/150593

>From a9f13b711bffc91ae037833a2cbf91954e7c75ae Mon Sep 17 00:00:00 2001
From: Michael Kruse <llvm-project at meinersbur.de>
Date: Fri, 25 Jul 2025 09:53:24 +0200
Subject: [PATCH 1/2] Remove privatization

---
 flang/lib/Lower/OpenMP/OpenMP.cpp             | 35 ++++---
 .../test/Lower/OpenMP/unroll-heuristic01.f90  | 63 +++++++-----
 .../test/Lower/OpenMP/unroll-heuristic02.f90  | 98 +++++++++----------
 .../test/Lower/OpenMP/unroll-heuristic03.f90  | 61 ++++++++++++
 4 files changed, 167 insertions(+), 90 deletions(-)
 create mode 100644 flang/test/Lower/OpenMP/unroll-heuristic03.f90

diff --git a/flang/lib/Lower/OpenMP/OpenMP.cpp b/flang/lib/Lower/OpenMP/OpenMP.cpp
index fc5fef9b2c577..b232f5bdbd6ff 100644
--- a/flang/lib/Lower/OpenMP/OpenMP.cpp
+++ b/flang/lib/Lower/OpenMP/OpenMP.cpp
@@ -1138,6 +1138,11 @@ struct OpWithBodyGenInfo {
     return *this;
   }
 
+  OpWithBodyGenInfo &setPrivatize(bool value) {
+    privatize = value;
+    return *this;
+  }
+
   /// [inout] converter to use for the clauses.
   lower::AbstractConverter &converter;
   /// [in] Symbol table
@@ -1164,6 +1169,8 @@ struct OpWithBodyGenInfo {
   /// [in] if set to `true`, skip generating nested evaluations and dispatching
   /// any further leaf constructs.
   bool genSkeletonOnly = false;
+  /// [in] enables handling of privatized variable unless set to `false`.
+  bool privatize = true;
 };
 
 /// Create the body (block) for an OpenMP Operation.
@@ -1224,7 +1231,7 @@ static void createBodyOfOp(mlir::Operation &op, const OpWithBodyGenInfo &info,
   // code will use the right symbols.
   bool isLoop = llvm::omp::getDirectiveAssociation(info.dir) ==
                 llvm::omp::Association::Loop;
-  bool privatize = info.clauses;
+  bool privatize = info.clauses && info.privatize;
 
   firOpBuilder.setInsertionPoint(marker);
   std::optional<DataSharingProcessor> tempDsp;
@@ -2098,7 +2105,7 @@ genCanonicalLoopOp(lower::AbstractConverter &converter, lower::SymMap &symTable,
                    const ConstructQueue &queue,
                    ConstructQueue::const_iterator item,
                    llvm::ArrayRef<const semantics::Symbol *> ivs,
-                   llvm::omp::Directive directive, DataSharingProcessor &dsp) {
+                   llvm::omp::Directive directive) {
   fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder();
 
   assert(ivs.size() == 1 && "Nested loops not yet implemented");
@@ -2191,10 +2198,17 @@ genCanonicalLoopOp(lower::AbstractConverter &converter, lower::SymMap &symTable,
     mlir::Value userVal =
         firOpBuilder.create<mlir::arith::AddIOp>(loc, loopLBVar, scaled);
 
-    // The argument is not currently in memory, so make a temporary for the
-    // argument, and store it there, then bind that location to the argument.
+    mlir::OpBuilder::InsertPoint insPt = firOpBuilder.saveInsertionPoint();
+    firOpBuilder.setInsertionPointToStart(firOpBuilder.getAllocaBlock());
+    mlir::Type tempTy = converter.genType(*iv);
+    firOpBuilder.restoreInsertionPoint(insPt);
+
+    // Write loop value to loop variable
+    mlir::Value cvtVal = firOpBuilder.createConvert(loc, tempTy, userVal);
+    hlfir::Entity lhs{converter.getSymbolAddress(*iv)};
+    lhs = hlfir::derefPointersAndAllocatables(loc, firOpBuilder, lhs);
     mlir::Operation *storeOp =
-        createAndSetPrivatizedLoopVar(converter, loc, userVal, iv);
+        hlfir::AssignOp::create(firOpBuilder, loc, cvtVal, lhs);
 
     firOpBuilder.setInsertionPointAfter(storeOp);
     return {iv};
@@ -2205,7 +2219,7 @@ genCanonicalLoopOp(lower::AbstractConverter &converter, lower::SymMap &symTable,
       OpWithBodyGenInfo(converter, symTable, semaCtx, loc, nestedEval,
                         directive)
           .setClauses(&item->clauses)
-          .setDataSharingProcessor(&dsp)
+          .setPrivatize(false)
           .setGenRegionEntryCb(ivCallback),
       queue, item, tripcount, cli);
 
@@ -2231,17 +2245,10 @@ static void genUnrollOp(Fortran::lower::AbstractConverter &converter,
   cp.processTODO<clause::Partial, clause::Full>(
       loc, llvm::omp::Directive::OMPD_unroll);
 
-  // Even though unroll does not support data-sharing clauses, but this is
-  // required to fill the symbol table.
-  DataSharingProcessor dsp(converter, semaCtx, item->clauses, eval,
-                           /*shouldCollectPreDeterminedSymbols=*/true,
-                           /*useDelayedPrivatization=*/false, symTable);
-  dsp.processStep1();
-
   // Emit the associated loop
   auto canonLoop =
       genCanonicalLoopOp(converter, symTable, semaCtx, eval, loc, queue, item,
-                         iv, llvm::omp::Directive::OMPD_unroll, dsp);
+                         iv, llvm::omp::Directive::OMPD_unroll);
 
   // Apply unrolling to it
   auto cli = canonLoop.getCli();
diff --git a/flang/test/Lower/OpenMP/unroll-heuristic01.f90 b/flang/test/Lower/OpenMP/unroll-heuristic01.f90
index a5f5c003b8a7c..34020eb727e55 100644
--- a/flang/test/Lower/OpenMP/unroll-heuristic01.f90
+++ b/flang/test/Lower/OpenMP/unroll-heuristic01.f90
@@ -13,27 +13,42 @@ subroutine omp_unroll_heuristic01(lb, ub, inc)
 end subroutine omp_unroll_heuristic01
 
 
-!CHECK-LABEL: func.func @_QPomp_unroll_heuristic01(
-!CHECK:      %c0_i32 = arith.constant 0 : i32
-!CHECK-NEXT: %c1_i32 = arith.constant 1 : i32
-!CHECK-NEXT: %13 = arith.cmpi slt, %12, %c0_i32 : i32
-!CHECK-NEXT: %14 = arith.subi %c0_i32, %12 : i32
-!CHECK-NEXT: %15 = arith.select %13, %14, %12 : i32
-!CHECK-NEXT: %16 = arith.select %13, %11, %10 : i32
-!CHECK-NEXT: %17 = arith.select %13, %10, %11 : i32
-!CHECK-NEXT: %18 = arith.subi %17, %16 overflow<nuw> : i32
-!CHECK-NEXT: %19 = arith.divui %18, %15 : i32
-!CHECK-NEXT: %20 = arith.addi %19, %c1_i32 overflow<nuw> : i32
-!CHECK-NEXT: %21 = arith.cmpi slt, %17, %16 : i32
-!CHECK-NEXT: %22 = arith.select %21, %c0_i32, %20 : i32
-!CHECK-NEXT: %canonloop_s0 = omp.new_cli
-!CHECK-NEXT: omp.canonical_loop(%canonloop_s0) %iv : i32 in range(%22) {
-!CHECK-NEXT:   %23 = arith.muli %iv, %12 : i32
-!CHECK-NEXT:   %24 = arith.addi %10, %23 : i32
-!CHECK-NEXT:   hlfir.assign %24 to %9#0 : i32, !fir.ref<i32>
-!CHECK-NEXT:   %25 = fir.load %9#0 : !fir.ref<i32>
-!CHECK-NEXT:   hlfir.assign %25 to %6#0 : i32, !fir.ref<i32>
-!CHECK-NEXT:   omp.terminator
-!CHECK-NEXT: }
-!CHECK-NEXT: omp.unroll_heuristic(%canonloop_s0)
-!CHECK-NEXT: return
+! CHECK-LABEL:   func.func @_QPomp_unroll_heuristic01(
+! CHECK-SAME:      %[[ARG0:.*]]: !fir.ref<i32> {fir.bindc_name = "lb"},
+! CHECK-SAME:      %[[ARG1:.*]]: !fir.ref<i32> {fir.bindc_name = "ub"},
+! CHECK-SAME:      %[[ARG2:.*]]: !fir.ref<i32> {fir.bindc_name = "inc"}) {
+! CHECK:           %[[VAL_0:.*]] = fir.dummy_scope : !fir.dscope
+! CHECK:           %[[VAL_1:.*]] = fir.alloca i32 {bindc_name = "i", uniq_name = "_QFomp_unroll_heuristic01Ei"}
+! CHECK:           %[[VAL_2:.*]]:2 = hlfir.declare %[[VAL_1]] {uniq_name = "_QFomp_unroll_heuristic01Ei"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+! CHECK:           %[[VAL_3:.*]]:2 = hlfir.declare %[[ARG2]] dummy_scope %[[VAL_0]] {uniq_name = "_QFomp_unroll_heuristic01Einc"} : (!fir.ref<i32>, !fir.dscope) -> (!fir.ref<i32>, !fir.ref<i32>)
+! CHECK:           %[[VAL_4:.*]]:2 = hlfir.declare %[[ARG0]] dummy_scope %[[VAL_0]] {uniq_name = "_QFomp_unroll_heuristic01Elb"} : (!fir.ref<i32>, !fir.dscope) -> (!fir.ref<i32>, !fir.ref<i32>)
+! CHECK:           %[[VAL_5:.*]] = fir.alloca i32 {bindc_name = "res", uniq_name = "_QFomp_unroll_heuristic01Eres"}
+! CHECK:           %[[VAL_6:.*]]:2 = hlfir.declare %[[VAL_5]] {uniq_name = "_QFomp_unroll_heuristic01Eres"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+! CHECK:           %[[VAL_7:.*]]:2 = hlfir.declare %[[ARG1]] dummy_scope %[[VAL_0]] {uniq_name = "_QFomp_unroll_heuristic01Eub"} : (!fir.ref<i32>, !fir.dscope) -> (!fir.ref<i32>, !fir.ref<i32>)
+! CHECK:           %[[VAL_8:.*]] = fir.load %[[VAL_4]]#0 : !fir.ref<i32>
+! CHECK:           %[[VAL_9:.*]] = fir.load %[[VAL_7]]#0 : !fir.ref<i32>
+! CHECK:           %[[VAL_10:.*]] = fir.load %[[VAL_3]]#0 : !fir.ref<i32>
+! CHECK:           %[[VAL_11:.*]] = arith.constant 0 : i32
+! CHECK:           %[[VAL_12:.*]] = arith.constant 1 : i32
+! CHECK:           %[[VAL_13:.*]] = arith.cmpi slt, %[[VAL_10]], %[[VAL_11]] : i32
+! CHECK:           %[[VAL_14:.*]] = arith.subi %[[VAL_11]], %[[VAL_10]] : i32
+! CHECK:           %[[VAL_15:.*]] = arith.select %[[VAL_13]], %[[VAL_14]], %[[VAL_10]] : i32
+! CHECK:           %[[VAL_16:.*]] = arith.select %[[VAL_13]], %[[VAL_9]], %[[VAL_8]] : i32
+! CHECK:           %[[VAL_17:.*]] = arith.select %[[VAL_13]], %[[VAL_8]], %[[VAL_9]] : i32
+! CHECK:           %[[VAL_18:.*]] = arith.subi %[[VAL_17]], %[[VAL_16]] overflow<nuw> : i32
+! CHECK:           %[[VAL_19:.*]] = arith.divui %[[VAL_18]], %[[VAL_15]] : i32
+! CHECK:           %[[VAL_20:.*]] = arith.addi %[[VAL_19]], %[[VAL_12]] overflow<nuw> : i32
+! CHECK:           %[[VAL_21:.*]] = arith.cmpi slt, %[[VAL_17]], %[[VAL_16]] : i32
+! CHECK:           %[[VAL_22:.*]] = arith.select %[[VAL_21]], %[[VAL_11]], %[[VAL_20]] : i32
+! CHECK:           %[[VAL_23:.*]] = omp.new_cli
+! CHECK:           omp.canonical_loop(%[[VAL_23]]) %[[VAL_24:.*]] : i32 in range(%[[VAL_22]]) {
+! CHECK:             %[[VAL_25:.*]] = arith.muli %[[VAL_24]], %[[VAL_10]] : i32
+! CHECK:             %[[VAL_26:.*]] = arith.addi %[[VAL_8]], %[[VAL_25]] : i32
+! CHECK:             hlfir.assign %[[VAL_26]] to %[[VAL_2]]#0 : i32, !fir.ref<i32>
+! CHECK:             %[[VAL_27:.*]] = fir.load %[[VAL_2]]#0 : !fir.ref<i32>
+! CHECK:             hlfir.assign %[[VAL_27]] to %[[VAL_6]]#0 : i32, !fir.ref<i32>
+! CHECK:             omp.terminator
+! CHECK:           }
+! CHECK:           omp.unroll_heuristic(%[[VAL_23]])
+! CHECK:           return
+! CHECK:         }
\ No newline at end of file
diff --git a/flang/test/Lower/OpenMP/unroll-heuristic02.f90 b/flang/test/Lower/OpenMP/unroll-heuristic02.f90
index 14f694d6cdb78..fdb1366960b23 100644
--- a/flang/test/Lower/OpenMP/unroll-heuristic02.f90
+++ b/flang/test/Lower/OpenMP/unroll-heuristic02.f90
@@ -37,61 +37,55 @@ end subroutine omp_unroll_heuristic_nested02
 !CHECK:           %[[VAL_10:.*]]:2 = hlfir.declare %[[ARG1]] dummy_scope %[[VAL_0]] {uniq_name = "_QFomp_unroll_heuristic_nested02Eouter_ub"} : (!fir.ref<i32>, !fir.dscope) -> (!fir.ref<i32>, !fir.ref<i32>)
 !CHECK:           %[[VAL_11:.*]] = fir.alloca i32 {bindc_name = "res", uniq_name = "_QFomp_unroll_heuristic_nested02Eres"}
 !CHECK:           %[[VAL_12:.*]]:2 = hlfir.declare %[[VAL_11]] {uniq_name = "_QFomp_unroll_heuristic_nested02Eres"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
-!CHECK:           %[[VAL_13:.*]] = fir.alloca i32 {bindc_name = "i", pinned, uniq_name = "_QFomp_unroll_heuristic_nested02Ei"}
-!CHECK:           %[[VAL_14:.*]]:2 = hlfir.declare %[[VAL_13]] {uniq_name = "_QFomp_unroll_heuristic_nested02Ei"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
-!CHECK:           %[[VAL_15:.*]] = fir.alloca i32 {bindc_name = "j", pinned, uniq_name = "_QFomp_unroll_heuristic_nested02Ej"}
-!CHECK:           %[[VAL_16:.*]]:2 = hlfir.declare %[[VAL_15]] {uniq_name = "_QFomp_unroll_heuristic_nested02Ej"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
-!CHECK:           %[[VAL_17:.*]] = fir.load %[[VAL_9]]#0 : !fir.ref<i32>
-!CHECK:           %[[VAL_18:.*]] = fir.load %[[VAL_10]]#0 : !fir.ref<i32>
-!CHECK:           %[[VAL_19:.*]] = fir.load %[[VAL_8]]#0 : !fir.ref<i32>
-!CHECK:           %[[VAL_20:.*]] = arith.constant 0 : i32
-!CHECK:           %[[VAL_21:.*]] = arith.constant 1 : i32
-!CHECK:           %[[VAL_22:.*]] = arith.cmpi slt, %[[VAL_19]], %[[VAL_20]] : i32
-!CHECK:           %[[VAL_23:.*]] = arith.subi %[[VAL_20]], %[[VAL_19]] : i32
-!CHECK:           %[[VAL_24:.*]] = arith.select %[[VAL_22]], %[[VAL_23]], %[[VAL_19]] : i32
-!CHECK:           %[[VAL_25:.*]] = arith.select %[[VAL_22]], %[[VAL_18]], %[[VAL_17]] : i32
-!CHECK:           %[[VAL_26:.*]] = arith.select %[[VAL_22]], %[[VAL_17]], %[[VAL_18]] : i32
-!CHECK:           %[[VAL_27:.*]] = arith.subi %[[VAL_26]], %[[VAL_25]] overflow<nuw> : i32
-!CHECK:           %[[VAL_28:.*]] = arith.divui %[[VAL_27]], %[[VAL_24]] : i32
-!CHECK:           %[[VAL_29:.*]] = arith.addi %[[VAL_28]], %[[VAL_21]] overflow<nuw> : i32
-!CHECK:           %[[VAL_30:.*]] = arith.cmpi slt, %[[VAL_26]], %[[VAL_25]] : i32
-!CHECK:           %[[VAL_31:.*]] = arith.select %[[VAL_30]], %[[VAL_20]], %[[VAL_29]] : i32
-!CHECK:           %[[VAL_32:.*]] = omp.new_cli
-!CHECK:           omp.canonical_loop(%[[VAL_32]]) %[[VAL_33:.*]] : i32 in range(%[[VAL_31]]) {
-!CHECK:             %[[VAL_34:.*]] = arith.muli %[[VAL_33]], %[[VAL_19]] : i32
-!CHECK:             %[[VAL_35:.*]] = arith.addi %[[VAL_17]], %[[VAL_34]] : i32
-!CHECK:             hlfir.assign %[[VAL_35]] to %[[VAL_14]]#0 : i32, !fir.ref<i32>
-!CHECK:             %[[VAL_36:.*]] = fir.alloca i32 {bindc_name = "j", pinned, uniq_name = "_QFomp_unroll_heuristic_nested02Ej"}
-!CHECK:             %[[VAL_37:.*]]:2 = hlfir.declare %[[VAL_36]] {uniq_name = "_QFomp_unroll_heuristic_nested02Ej"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
-!CHECK:             %[[VAL_38:.*]] = fir.load %[[VAL_4]]#0 : !fir.ref<i32>
-!CHECK:             %[[VAL_39:.*]] = fir.load %[[VAL_5]]#0 : !fir.ref<i32>
-!CHECK:             %[[VAL_40:.*]] = fir.load %[[VAL_3]]#0 : !fir.ref<i32>
-!CHECK:             %[[VAL_41:.*]] = arith.constant 0 : i32
-!CHECK:             %[[VAL_42:.*]] = arith.constant 1 : i32
-!CHECK:             %[[VAL_43:.*]] = arith.cmpi slt, %[[VAL_40]], %[[VAL_41]] : i32
-!CHECK:             %[[VAL_44:.*]] = arith.subi %[[VAL_41]], %[[VAL_40]] : i32
-!CHECK:             %[[VAL_45:.*]] = arith.select %[[VAL_43]], %[[VAL_44]], %[[VAL_40]] : i32
-!CHECK:             %[[VAL_46:.*]] = arith.select %[[VAL_43]], %[[VAL_39]], %[[VAL_38]] : i32
-!CHECK:             %[[VAL_47:.*]] = arith.select %[[VAL_43]], %[[VAL_38]], %[[VAL_39]] : i32
-!CHECK:             %[[VAL_48:.*]] = arith.subi %[[VAL_47]], %[[VAL_46]] overflow<nuw> : i32
-!CHECK:             %[[VAL_49:.*]] = arith.divui %[[VAL_48]], %[[VAL_45]] : i32
-!CHECK:             %[[VAL_50:.*]] = arith.addi %[[VAL_49]], %[[VAL_42]] overflow<nuw> : i32
-!CHECK:             %[[VAL_51:.*]] = arith.cmpi slt, %[[VAL_47]], %[[VAL_46]] : i32
-!CHECK:             %[[VAL_52:.*]] = arith.select %[[VAL_51]], %[[VAL_41]], %[[VAL_50]] : i32
-!CHECK:             %[[VAL_53:.*]] = omp.new_cli
-!CHECK:             omp.canonical_loop(%[[VAL_53]]) %[[VAL_54:.*]] : i32 in range(%[[VAL_52]]) {
-!CHECK:               %[[VAL_55:.*]] = arith.muli %[[VAL_54]], %[[VAL_40]] : i32
-!CHECK:               %[[VAL_56:.*]] = arith.addi %[[VAL_38]], %[[VAL_55]] : i32
-!CHECK:               hlfir.assign %[[VAL_56]] to %[[VAL_37]]#0 : i32, !fir.ref<i32>
-!CHECK:               %[[VAL_57:.*]] = fir.load %[[VAL_14]]#0 : !fir.ref<i32>
-!CHECK:               %[[VAL_58:.*]] = fir.load %[[VAL_37]]#0 : !fir.ref<i32>
-!CHECK:               %[[VAL_59:.*]] = arith.addi %[[VAL_57]], %[[VAL_58]] : i32
-!CHECK:               hlfir.assign %[[VAL_59]] to %[[VAL_12]]#0 : i32, !fir.ref<i32>
+!CHECK:           %[[VAL_13:.*]] = fir.load %[[VAL_9]]#0 : !fir.ref<i32>
+!CHECK:           %[[VAL_14:.*]] = fir.load %[[VAL_10]]#0 : !fir.ref<i32>
+!CHECK:           %[[VAL_15:.*]] = fir.load %[[VAL_8]]#0 : !fir.ref<i32>
+!CHECK:           %[[VAL_16:.*]] = arith.constant 0 : i32
+!CHECK:           %[[VAL_17:.*]] = arith.constant 1 : i32
+!CHECK:           %[[VAL_18:.*]] = arith.cmpi slt, %[[VAL_15]], %[[VAL_16]] : i32
+!CHECK:           %[[VAL_19:.*]] = arith.subi %[[VAL_16]], %[[VAL_15]] : i32
+!CHECK:           %[[VAL_20:.*]] = arith.select %[[VAL_18]], %[[VAL_19]], %[[VAL_15]] : i32
+!CHECK:           %[[VAL_21:.*]] = arith.select %[[VAL_18]], %[[VAL_14]], %[[VAL_13]] : i32
+!CHECK:           %[[VAL_22:.*]] = arith.select %[[VAL_18]], %[[VAL_13]], %[[VAL_14]] : i32
+!CHECK:           %[[VAL_23:.*]] = arith.subi %[[VAL_22]], %[[VAL_21]] overflow<nuw> : i32
+!CHECK:           %[[VAL_24:.*]] = arith.divui %[[VAL_23]], %[[VAL_20]] : i32
+!CHECK:           %[[VAL_25:.*]] = arith.addi %[[VAL_24]], %[[VAL_17]] overflow<nuw> : i32
+!CHECK:           %[[VAL_26:.*]] = arith.cmpi slt, %[[VAL_22]], %[[VAL_21]] : i32
+!CHECK:           %[[VAL_27:.*]] = arith.select %[[VAL_26]], %[[VAL_16]], %[[VAL_25]] : i32
+!CHECK:           %[[VAL_28:.*]] = omp.new_cli
+!CHECK:           omp.canonical_loop(%[[VAL_28]]) %[[VAL_29:.*]] : i32 in range(%[[VAL_27]]) {
+!CHECK:             %[[VAL_30:.*]] = arith.muli %[[VAL_29]], %[[VAL_15]] : i32
+!CHECK:             %[[VAL_31:.*]] = arith.addi %[[VAL_13]], %[[VAL_30]] : i32
+!CHECK:             hlfir.assign %[[VAL_31]] to %[[VAL_2]]#0 : i32, !fir.ref<i32>
+!CHECK:             %[[VAL_32:.*]] = fir.load %[[VAL_4]]#0 : !fir.ref<i32>
+!CHECK:             %[[VAL_33:.*]] = fir.load %[[VAL_5]]#0 : !fir.ref<i32>
+!CHECK:             %[[VAL_34:.*]] = fir.load %[[VAL_3]]#0 : !fir.ref<i32>
+!CHECK:             %[[VAL_35:.*]] = arith.constant 0 : i32
+!CHECK:             %[[VAL_36:.*]] = arith.constant 1 : i32
+!CHECK:             %[[VAL_37:.*]] = arith.cmpi slt, %[[VAL_34]], %[[VAL_35]] : i32
+!CHECK:             %[[VAL_38:.*]] = arith.subi %[[VAL_35]], %[[VAL_34]] : i32
+!CHECK:             %[[VAL_39:.*]] = arith.select %[[VAL_37]], %[[VAL_38]], %[[VAL_34]] : i32
+!CHECK:             %[[VAL_40:.*]] = arith.select %[[VAL_37]], %[[VAL_33]], %[[VAL_32]] : i32
+!CHECK:             %[[VAL_41:.*]] = arith.select %[[VAL_37]], %[[VAL_32]], %[[VAL_33]] : i32
+!CHECK:             %[[VAL_42:.*]] = arith.subi %[[VAL_41]], %[[VAL_40]] overflow<nuw> : i32
+!CHECK:             %[[VAL_43:.*]] = arith.divui %[[VAL_42]], %[[VAL_39]] : i32
+!CHECK:             %[[VAL_44:.*]] = arith.addi %[[VAL_43]], %[[VAL_36]] overflow<nuw> : i32
+!CHECK:             %[[VAL_45:.*]] = arith.cmpi slt, %[[VAL_41]], %[[VAL_40]] : i32
+!CHECK:             %[[VAL_46:.*]] = arith.select %[[VAL_45]], %[[VAL_35]], %[[VAL_44]] : i32
+!CHECK:             %[[VAL_47:.*]] = omp.new_cli
+!CHECK:             omp.canonical_loop(%[[VAL_47]]) %[[VAL_48:.*]] : i32 in range(%[[VAL_46]]) {
+!CHECK:               %[[VAL_49:.*]] = arith.muli %[[VAL_48]], %[[VAL_34]] : i32
+!CHECK:               %[[VAL_50:.*]] = arith.addi %[[VAL_32]], %[[VAL_49]] : i32
+!CHECK:               hlfir.assign %[[VAL_50]] to %[[VAL_7]]#0 : i32, !fir.ref<i32>
+!CHECK:               %[[VAL_51:.*]] = fir.load %[[VAL_2]]#0 : !fir.ref<i32>
+!CHECK:               %[[VAL_52:.*]] = fir.load %[[VAL_7]]#0 : !fir.ref<i32>
+!CHECK:               %[[VAL_53:.*]] = arith.addi %[[VAL_51]], %[[VAL_52]] : i32
+!CHECK:               hlfir.assign %[[VAL_53]] to %[[VAL_12]]#0 : i32, !fir.ref<i32>
 !CHECK:               omp.terminator
 !CHECK:             }
-!CHECK:             omp.unroll_heuristic(%[[VAL_53]])
+!CHECK:             omp.unroll_heuristic(%[[VAL_47]])
 !CHECK:             omp.terminator
 !CHECK:           }
-!CHECK:           omp.unroll_heuristic(%[[VAL_32]])
+!CHECK:           omp.unroll_heuristic(%[[VAL_28]])
 !CHECK:           return
 !CHECK:         }
diff --git a/flang/test/Lower/OpenMP/unroll-heuristic03.f90 b/flang/test/Lower/OpenMP/unroll-heuristic03.f90
new file mode 100644
index 0000000000000..308c149c260dc
--- /dev/null
+++ b/flang/test/Lower/OpenMP/unroll-heuristic03.f90
@@ -0,0 +1,61 @@
+! RUN: %flang_fc1 -emit-hlfir -fopenmp -fopenmp-version=51 -o - %s 2>&1 | FileCheck %s
+
+! Test implicitly privatized loop variable that is affected by unrolling.
+
+subroutine omp_unroll_heuristic03(lb, ub, inc)
+  integer res, i, lb, ub, inc
+
+  !$omp parallel
+    !$omp unroll
+    do i = lb, ub, inc
+      res = i
+    end do
+    !$omp end unroll
+  !$omp end parallel
+
+end subroutine omp_unroll_heuristic03
+
+
+! CHECK-LABEL:   func.func @_QPomp_unroll_heuristic03(
+! CHECK-SAME:      %[[ARG0:.*]]: !fir.ref<i32> {fir.bindc_name = "lb"},
+! CHECK-SAME:      %[[ARG1:.*]]: !fir.ref<i32> {fir.bindc_name = "ub"},
+! CHECK-SAME:      %[[ARG2:.*]]: !fir.ref<i32> {fir.bindc_name = "inc"}) {
+! CHECK:           %[[VAL_0:.*]] = fir.dummy_scope : !fir.dscope
+! CHECK:           %[[VAL_1:.*]] = fir.alloca i32 {bindc_name = "i", uniq_name = "_QFomp_unroll_heuristic03Ei"}
+! CHECK:           %[[VAL_2:.*]]:2 = hlfir.declare %[[VAL_1]] {uniq_name = "_QFomp_unroll_heuristic03Ei"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+! CHECK:           %[[VAL_3:.*]]:2 = hlfir.declare %[[ARG2]] dummy_scope %[[VAL_0]] {uniq_name = "_QFomp_unroll_heuristic03Einc"} : (!fir.ref<i32>, !fir.dscope) -> (!fir.ref<i32>, !fir.ref<i32>)
+! CHECK:           %[[VAL_4:.*]]:2 = hlfir.declare %[[ARG0]] dummy_scope %[[VAL_0]] {uniq_name = "_QFomp_unroll_heuristic03Elb"} : (!fir.ref<i32>, !fir.dscope) -> (!fir.ref<i32>, !fir.ref<i32>)
+! CHECK:           %[[VAL_5:.*]] = fir.alloca i32 {bindc_name = "res", uniq_name = "_QFomp_unroll_heuristic03Eres"}
+! CHECK:           %[[VAL_6:.*]]:2 = hlfir.declare %[[VAL_5]] {uniq_name = "_QFomp_unroll_heuristic03Eres"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+! CHECK:           %[[VAL_7:.*]]:2 = hlfir.declare %[[ARG1]] dummy_scope %[[VAL_0]] {uniq_name = "_QFomp_unroll_heuristic03Eub"} : (!fir.ref<i32>, !fir.dscope) -> (!fir.ref<i32>, !fir.ref<i32>)
+! CHECK:           omp.parallel private(@_QFomp_unroll_heuristic03Ei_private_i32 %[[VAL_2]]#0 -> %[[VAL_8:.*]] : !fir.ref<i32>) {
+! CHECK:             %[[VAL_9:.*]]:2 = hlfir.declare %[[VAL_8]] {uniq_name = "_QFomp_unroll_heuristic03Ei"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+! CHECK:             %[[VAL_10:.*]] = fir.load %[[VAL_4]]#0 : !fir.ref<i32>
+! CHECK:             %[[VAL_11:.*]] = fir.load %[[VAL_7]]#0 : !fir.ref<i32>
+! CHECK:             %[[VAL_12:.*]] = fir.load %[[VAL_3]]#0 : !fir.ref<i32>
+! CHECK:             %[[VAL_13:.*]] = arith.constant 0 : i32
+! CHECK:             %[[VAL_14:.*]] = arith.constant 1 : i32
+! CHECK:             %[[VAL_15:.*]] = arith.cmpi slt, %[[VAL_12]], %[[VAL_13]] : i32
+! CHECK:             %[[VAL_16:.*]] = arith.subi %[[VAL_13]], %[[VAL_12]] : i32
+! CHECK:             %[[VAL_17:.*]] = arith.select %[[VAL_15]], %[[VAL_16]], %[[VAL_12]] : i32
+! CHECK:             %[[VAL_18:.*]] = arith.select %[[VAL_15]], %[[VAL_11]], %[[VAL_10]] : i32
+! CHECK:             %[[VAL_19:.*]] = arith.select %[[VAL_15]], %[[VAL_10]], %[[VAL_11]] : i32
+! CHECK:             %[[VAL_20:.*]] = arith.subi %[[VAL_19]], %[[VAL_18]] overflow<nuw> : i32
+! CHECK:             %[[VAL_21:.*]] = arith.divui %[[VAL_20]], %[[VAL_17]] : i32
+! CHECK:             %[[VAL_22:.*]] = arith.addi %[[VAL_21]], %[[VAL_14]] overflow<nuw> : i32
+! CHECK:             %[[VAL_23:.*]] = arith.cmpi slt, %[[VAL_19]], %[[VAL_18]] : i32
+! CHECK:             %[[VAL_24:.*]] = arith.select %[[VAL_23]], %[[VAL_13]], %[[VAL_22]] : i32
+! CHECK:             %[[VAL_25:.*]] = omp.new_cli
+! CHECK:             omp.canonical_loop(%[[VAL_25]]) %[[VAL_26:.*]] : i32 in range(%[[VAL_24]]) {
+! CHECK:               %[[VAL_27:.*]] = arith.muli %[[VAL_26]], %[[VAL_12]] : i32
+! CHECK:               %[[VAL_28:.*]] = arith.addi %[[VAL_10]], %[[VAL_27]] : i32
+! CHECK:               hlfir.assign %[[VAL_28]] to %[[VAL_9]]#0 : i32, !fir.ref<i32>
+! CHECK:               %[[VAL_29:.*]] = fir.load %[[VAL_9]]#0 : !fir.ref<i32>
+! CHECK:               hlfir.assign %[[VAL_29]] to %[[VAL_6]]#0 : i32, !fir.ref<i32>
+! CHECK:               omp.terminator
+! CHECK:             }
+! CHECK:             omp.unroll_heuristic(%[[VAL_25]])
+! CHECK:             omp.terminator
+! CHECK:           }
+! CHECK:           return
+! CHECK:         }
\ No newline at end of file

>From 9b654f98095325290159c84f95bef8ba8bca0f60 Mon Sep 17 00:00:00 2001
From: Michael Kruse <llvm-project at meinersbur.de>
Date: Mon, 28 Jul 2025 16:13:31 +0200
Subject: [PATCH 2/2] Avoid code duplication

---
 flang/lib/Lower/OpenMP/OpenMP.cpp | 34 ++++++++++++++-----------------
 1 file changed, 15 insertions(+), 19 deletions(-)

diff --git a/flang/lib/Lower/OpenMP/OpenMP.cpp b/flang/lib/Lower/OpenMP/OpenMP.cpp
index b232f5bdbd6ff..8cfcc212a0083 100644
--- a/flang/lib/Lower/OpenMP/OpenMP.cpp
+++ b/flang/lib/Lower/OpenMP/OpenMP.cpp
@@ -712,20 +712,16 @@ static void threadPrivatizeVars(lower::AbstractConverter &converter,
   }
 }
 
-static mlir::Operation *
-createAndSetPrivatizedLoopVar(lower::AbstractConverter &converter,
-                              mlir::Location loc, mlir::Value indexVal,
-                              const semantics::Symbol *sym) {
+static mlir::Operation *setLoopVar(lower::AbstractConverter &converter,
+                                   mlir::Location loc, mlir::Value indexVal,
+                                   const semantics::Symbol *sym) {
   fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder();
+
   mlir::OpBuilder::InsertPoint insPt = firOpBuilder.saveInsertionPoint();
   firOpBuilder.setInsertionPointToStart(firOpBuilder.getAllocaBlock());
-
   mlir::Type tempTy = converter.genType(*sym);
-
-  assert(converter.isPresentShallowLookup(*sym) &&
-         "Expected symbol to be in symbol table.");
-
   firOpBuilder.restoreInsertionPoint(insPt);
+
   mlir::Value cvtVal = firOpBuilder.createConvert(loc, tempTy, indexVal);
   hlfir::Entity lhs{converter.getSymbolAddress(*sym)};
 
@@ -736,6 +732,15 @@ createAndSetPrivatizedLoopVar(lower::AbstractConverter &converter,
   return storeOp;
 }
 
+static mlir::Operation *
+createAndSetPrivatizedLoopVar(lower::AbstractConverter &converter,
+                              mlir::Location loc, mlir::Value indexVal,
+                              const semantics::Symbol *sym) {
+  assert(converter.isPresentShallowLookup(*sym) &&
+         "Expected symbol to be in symbol table.");
+  return setLoopVar(converter, loc, indexVal, sym);
+}
+
 // This helper function implements the functionality of "promoting" non-CPTR
 // arguments of use_device_ptr to use_device_addr arguments (automagic
 // conversion of use_device_ptr -> use_device_addr in these cases). The way we
@@ -2198,17 +2203,8 @@ genCanonicalLoopOp(lower::AbstractConverter &converter, lower::SymMap &symTable,
     mlir::Value userVal =
         firOpBuilder.create<mlir::arith::AddIOp>(loc, loopLBVar, scaled);
 
-    mlir::OpBuilder::InsertPoint insPt = firOpBuilder.saveInsertionPoint();
-    firOpBuilder.setInsertionPointToStart(firOpBuilder.getAllocaBlock());
-    mlir::Type tempTy = converter.genType(*iv);
-    firOpBuilder.restoreInsertionPoint(insPt);
-
     // Write loop value to loop variable
-    mlir::Value cvtVal = firOpBuilder.createConvert(loc, tempTy, userVal);
-    hlfir::Entity lhs{converter.getSymbolAddress(*iv)};
-    lhs = hlfir::derefPointersAndAllocatables(loc, firOpBuilder, lhs);
-    mlir::Operation *storeOp =
-        hlfir::AssignOp::create(firOpBuilder, loc, cvtVal, lhs);
+    mlir::Operation *storeOp = setLoopVar(converter, loc, userVal, iv);
 
     firOpBuilder.setInsertionPointAfter(storeOp);
     return {iv};



More information about the flang-commits mailing list