[flang-commits] [flang] [Flang][OpenMP] Skip DSA for canonical loops (PR #150593)
Michael Kruse via flang-commits
flang-commits at lists.llvm.org
Mon Jul 28 07:22:41 PDT 2025
https://github.com/Meinersbur updated https://github.com/llvm/llvm-project/pull/150593
>From a9f13b711bffc91ae037833a2cbf91954e7c75ae Mon Sep 17 00:00:00 2001
From: Michael Kruse <llvm-project at meinersbur.de>
Date: Fri, 25 Jul 2025 09:53:24 +0200
Subject: [PATCH 1/2] Remove privatization
---
flang/lib/Lower/OpenMP/OpenMP.cpp | 35 ++++---
.../test/Lower/OpenMP/unroll-heuristic01.f90 | 63 +++++++-----
.../test/Lower/OpenMP/unroll-heuristic02.f90 | 98 +++++++++----------
.../test/Lower/OpenMP/unroll-heuristic03.f90 | 61 ++++++++++++
4 files changed, 167 insertions(+), 90 deletions(-)
create mode 100644 flang/test/Lower/OpenMP/unroll-heuristic03.f90
diff --git a/flang/lib/Lower/OpenMP/OpenMP.cpp b/flang/lib/Lower/OpenMP/OpenMP.cpp
index fc5fef9b2c577..b232f5bdbd6ff 100644
--- a/flang/lib/Lower/OpenMP/OpenMP.cpp
+++ b/flang/lib/Lower/OpenMP/OpenMP.cpp
@@ -1138,6 +1138,11 @@ struct OpWithBodyGenInfo {
return *this;
}
+ OpWithBodyGenInfo &setPrivatize(bool value) {
+ privatize = value;
+ return *this;
+ }
+
/// [inout] converter to use for the clauses.
lower::AbstractConverter &converter;
/// [in] Symbol table
@@ -1164,6 +1169,8 @@ struct OpWithBodyGenInfo {
/// [in] if set to `true`, skip generating nested evaluations and dispatching
/// any further leaf constructs.
bool genSkeletonOnly = false;
+ /// [in] enables handling of privatized variable unless set to `false`.
+ bool privatize = true;
};
/// Create the body (block) for an OpenMP Operation.
@@ -1224,7 +1231,7 @@ static void createBodyOfOp(mlir::Operation &op, const OpWithBodyGenInfo &info,
// code will use the right symbols.
bool isLoop = llvm::omp::getDirectiveAssociation(info.dir) ==
llvm::omp::Association::Loop;
- bool privatize = info.clauses;
+ bool privatize = info.clauses && info.privatize;
firOpBuilder.setInsertionPoint(marker);
std::optional<DataSharingProcessor> tempDsp;
@@ -2098,7 +2105,7 @@ genCanonicalLoopOp(lower::AbstractConverter &converter, lower::SymMap &symTable,
const ConstructQueue &queue,
ConstructQueue::const_iterator item,
llvm::ArrayRef<const semantics::Symbol *> ivs,
- llvm::omp::Directive directive, DataSharingProcessor &dsp) {
+ llvm::omp::Directive directive) {
fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder();
assert(ivs.size() == 1 && "Nested loops not yet implemented");
@@ -2191,10 +2198,17 @@ genCanonicalLoopOp(lower::AbstractConverter &converter, lower::SymMap &symTable,
mlir::Value userVal =
firOpBuilder.create<mlir::arith::AddIOp>(loc, loopLBVar, scaled);
- // The argument is not currently in memory, so make a temporary for the
- // argument, and store it there, then bind that location to the argument.
+ mlir::OpBuilder::InsertPoint insPt = firOpBuilder.saveInsertionPoint();
+ firOpBuilder.setInsertionPointToStart(firOpBuilder.getAllocaBlock());
+ mlir::Type tempTy = converter.genType(*iv);
+ firOpBuilder.restoreInsertionPoint(insPt);
+
+ // Write loop value to loop variable
+ mlir::Value cvtVal = firOpBuilder.createConvert(loc, tempTy, userVal);
+ hlfir::Entity lhs{converter.getSymbolAddress(*iv)};
+ lhs = hlfir::derefPointersAndAllocatables(loc, firOpBuilder, lhs);
mlir::Operation *storeOp =
- createAndSetPrivatizedLoopVar(converter, loc, userVal, iv);
+ hlfir::AssignOp::create(firOpBuilder, loc, cvtVal, lhs);
firOpBuilder.setInsertionPointAfter(storeOp);
return {iv};
@@ -2205,7 +2219,7 @@ genCanonicalLoopOp(lower::AbstractConverter &converter, lower::SymMap &symTable,
OpWithBodyGenInfo(converter, symTable, semaCtx, loc, nestedEval,
directive)
.setClauses(&item->clauses)
- .setDataSharingProcessor(&dsp)
+ .setPrivatize(false)
.setGenRegionEntryCb(ivCallback),
queue, item, tripcount, cli);
@@ -2231,17 +2245,10 @@ static void genUnrollOp(Fortran::lower::AbstractConverter &converter,
cp.processTODO<clause::Partial, clause::Full>(
loc, llvm::omp::Directive::OMPD_unroll);
- // Even though unroll does not support data-sharing clauses, but this is
- // required to fill the symbol table.
- DataSharingProcessor dsp(converter, semaCtx, item->clauses, eval,
- /*shouldCollectPreDeterminedSymbols=*/true,
- /*useDelayedPrivatization=*/false, symTable);
- dsp.processStep1();
-
// Emit the associated loop
auto canonLoop =
genCanonicalLoopOp(converter, symTable, semaCtx, eval, loc, queue, item,
- iv, llvm::omp::Directive::OMPD_unroll, dsp);
+ iv, llvm::omp::Directive::OMPD_unroll);
// Apply unrolling to it
auto cli = canonLoop.getCli();
diff --git a/flang/test/Lower/OpenMP/unroll-heuristic01.f90 b/flang/test/Lower/OpenMP/unroll-heuristic01.f90
index a5f5c003b8a7c..34020eb727e55 100644
--- a/flang/test/Lower/OpenMP/unroll-heuristic01.f90
+++ b/flang/test/Lower/OpenMP/unroll-heuristic01.f90
@@ -13,27 +13,42 @@ subroutine omp_unroll_heuristic01(lb, ub, inc)
end subroutine omp_unroll_heuristic01
-!CHECK-LABEL: func.func @_QPomp_unroll_heuristic01(
-!CHECK: %c0_i32 = arith.constant 0 : i32
-!CHECK-NEXT: %c1_i32 = arith.constant 1 : i32
-!CHECK-NEXT: %13 = arith.cmpi slt, %12, %c0_i32 : i32
-!CHECK-NEXT: %14 = arith.subi %c0_i32, %12 : i32
-!CHECK-NEXT: %15 = arith.select %13, %14, %12 : i32
-!CHECK-NEXT: %16 = arith.select %13, %11, %10 : i32
-!CHECK-NEXT: %17 = arith.select %13, %10, %11 : i32
-!CHECK-NEXT: %18 = arith.subi %17, %16 overflow<nuw> : i32
-!CHECK-NEXT: %19 = arith.divui %18, %15 : i32
-!CHECK-NEXT: %20 = arith.addi %19, %c1_i32 overflow<nuw> : i32
-!CHECK-NEXT: %21 = arith.cmpi slt, %17, %16 : i32
-!CHECK-NEXT: %22 = arith.select %21, %c0_i32, %20 : i32
-!CHECK-NEXT: %canonloop_s0 = omp.new_cli
-!CHECK-NEXT: omp.canonical_loop(%canonloop_s0) %iv : i32 in range(%22) {
-!CHECK-NEXT: %23 = arith.muli %iv, %12 : i32
-!CHECK-NEXT: %24 = arith.addi %10, %23 : i32
-!CHECK-NEXT: hlfir.assign %24 to %9#0 : i32, !fir.ref<i32>
-!CHECK-NEXT: %25 = fir.load %9#0 : !fir.ref<i32>
-!CHECK-NEXT: hlfir.assign %25 to %6#0 : i32, !fir.ref<i32>
-!CHECK-NEXT: omp.terminator
-!CHECK-NEXT: }
-!CHECK-NEXT: omp.unroll_heuristic(%canonloop_s0)
-!CHECK-NEXT: return
+! CHECK-LABEL: func.func @_QPomp_unroll_heuristic01(
+! CHECK-SAME: %[[ARG0:.*]]: !fir.ref<i32> {fir.bindc_name = "lb"},
+! CHECK-SAME: %[[ARG1:.*]]: !fir.ref<i32> {fir.bindc_name = "ub"},
+! CHECK-SAME: %[[ARG2:.*]]: !fir.ref<i32> {fir.bindc_name = "inc"}) {
+! CHECK: %[[VAL_0:.*]] = fir.dummy_scope : !fir.dscope
+! CHECK: %[[VAL_1:.*]] = fir.alloca i32 {bindc_name = "i", uniq_name = "_QFomp_unroll_heuristic01Ei"}
+! CHECK: %[[VAL_2:.*]]:2 = hlfir.declare %[[VAL_1]] {uniq_name = "_QFomp_unroll_heuristic01Ei"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+! CHECK: %[[VAL_3:.*]]:2 = hlfir.declare %[[ARG2]] dummy_scope %[[VAL_0]] {uniq_name = "_QFomp_unroll_heuristic01Einc"} : (!fir.ref<i32>, !fir.dscope) -> (!fir.ref<i32>, !fir.ref<i32>)
+! CHECK: %[[VAL_4:.*]]:2 = hlfir.declare %[[ARG0]] dummy_scope %[[VAL_0]] {uniq_name = "_QFomp_unroll_heuristic01Elb"} : (!fir.ref<i32>, !fir.dscope) -> (!fir.ref<i32>, !fir.ref<i32>)
+! CHECK: %[[VAL_5:.*]] = fir.alloca i32 {bindc_name = "res", uniq_name = "_QFomp_unroll_heuristic01Eres"}
+! CHECK: %[[VAL_6:.*]]:2 = hlfir.declare %[[VAL_5]] {uniq_name = "_QFomp_unroll_heuristic01Eres"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+! CHECK: %[[VAL_7:.*]]:2 = hlfir.declare %[[ARG1]] dummy_scope %[[VAL_0]] {uniq_name = "_QFomp_unroll_heuristic01Eub"} : (!fir.ref<i32>, !fir.dscope) -> (!fir.ref<i32>, !fir.ref<i32>)
+! CHECK: %[[VAL_8:.*]] = fir.load %[[VAL_4]]#0 : !fir.ref<i32>
+! CHECK: %[[VAL_9:.*]] = fir.load %[[VAL_7]]#0 : !fir.ref<i32>
+! CHECK: %[[VAL_10:.*]] = fir.load %[[VAL_3]]#0 : !fir.ref<i32>
+! CHECK: %[[VAL_11:.*]] = arith.constant 0 : i32
+! CHECK: %[[VAL_12:.*]] = arith.constant 1 : i32
+! CHECK: %[[VAL_13:.*]] = arith.cmpi slt, %[[VAL_10]], %[[VAL_11]] : i32
+! CHECK: %[[VAL_14:.*]] = arith.subi %[[VAL_11]], %[[VAL_10]] : i32
+! CHECK: %[[VAL_15:.*]] = arith.select %[[VAL_13]], %[[VAL_14]], %[[VAL_10]] : i32
+! CHECK: %[[VAL_16:.*]] = arith.select %[[VAL_13]], %[[VAL_9]], %[[VAL_8]] : i32
+! CHECK: %[[VAL_17:.*]] = arith.select %[[VAL_13]], %[[VAL_8]], %[[VAL_9]] : i32
+! CHECK: %[[VAL_18:.*]] = arith.subi %[[VAL_17]], %[[VAL_16]] overflow<nuw> : i32
+! CHECK: %[[VAL_19:.*]] = arith.divui %[[VAL_18]], %[[VAL_15]] : i32
+! CHECK: %[[VAL_20:.*]] = arith.addi %[[VAL_19]], %[[VAL_12]] overflow<nuw> : i32
+! CHECK: %[[VAL_21:.*]] = arith.cmpi slt, %[[VAL_17]], %[[VAL_16]] : i32
+! CHECK: %[[VAL_22:.*]] = arith.select %[[VAL_21]], %[[VAL_11]], %[[VAL_20]] : i32
+! CHECK: %[[VAL_23:.*]] = omp.new_cli
+! CHECK: omp.canonical_loop(%[[VAL_23]]) %[[VAL_24:.*]] : i32 in range(%[[VAL_22]]) {
+! CHECK: %[[VAL_25:.*]] = arith.muli %[[VAL_24]], %[[VAL_10]] : i32
+! CHECK: %[[VAL_26:.*]] = arith.addi %[[VAL_8]], %[[VAL_25]] : i32
+! CHECK: hlfir.assign %[[VAL_26]] to %[[VAL_2]]#0 : i32, !fir.ref<i32>
+! CHECK: %[[VAL_27:.*]] = fir.load %[[VAL_2]]#0 : !fir.ref<i32>
+! CHECK: hlfir.assign %[[VAL_27]] to %[[VAL_6]]#0 : i32, !fir.ref<i32>
+! CHECK: omp.terminator
+! CHECK: }
+! CHECK: omp.unroll_heuristic(%[[VAL_23]])
+! CHECK: return
+! CHECK: }
\ No newline at end of file
diff --git a/flang/test/Lower/OpenMP/unroll-heuristic02.f90 b/flang/test/Lower/OpenMP/unroll-heuristic02.f90
index 14f694d6cdb78..fdb1366960b23 100644
--- a/flang/test/Lower/OpenMP/unroll-heuristic02.f90
+++ b/flang/test/Lower/OpenMP/unroll-heuristic02.f90
@@ -37,61 +37,55 @@ end subroutine omp_unroll_heuristic_nested02
!CHECK: %[[VAL_10:.*]]:2 = hlfir.declare %[[ARG1]] dummy_scope %[[VAL_0]] {uniq_name = "_QFomp_unroll_heuristic_nested02Eouter_ub"} : (!fir.ref<i32>, !fir.dscope) -> (!fir.ref<i32>, !fir.ref<i32>)
!CHECK: %[[VAL_11:.*]] = fir.alloca i32 {bindc_name = "res", uniq_name = "_QFomp_unroll_heuristic_nested02Eres"}
!CHECK: %[[VAL_12:.*]]:2 = hlfir.declare %[[VAL_11]] {uniq_name = "_QFomp_unroll_heuristic_nested02Eres"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
-!CHECK: %[[VAL_13:.*]] = fir.alloca i32 {bindc_name = "i", pinned, uniq_name = "_QFomp_unroll_heuristic_nested02Ei"}
-!CHECK: %[[VAL_14:.*]]:2 = hlfir.declare %[[VAL_13]] {uniq_name = "_QFomp_unroll_heuristic_nested02Ei"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
-!CHECK: %[[VAL_15:.*]] = fir.alloca i32 {bindc_name = "j", pinned, uniq_name = "_QFomp_unroll_heuristic_nested02Ej"}
-!CHECK: %[[VAL_16:.*]]:2 = hlfir.declare %[[VAL_15]] {uniq_name = "_QFomp_unroll_heuristic_nested02Ej"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
-!CHECK: %[[VAL_17:.*]] = fir.load %[[VAL_9]]#0 : !fir.ref<i32>
-!CHECK: %[[VAL_18:.*]] = fir.load %[[VAL_10]]#0 : !fir.ref<i32>
-!CHECK: %[[VAL_19:.*]] = fir.load %[[VAL_8]]#0 : !fir.ref<i32>
-!CHECK: %[[VAL_20:.*]] = arith.constant 0 : i32
-!CHECK: %[[VAL_21:.*]] = arith.constant 1 : i32
-!CHECK: %[[VAL_22:.*]] = arith.cmpi slt, %[[VAL_19]], %[[VAL_20]] : i32
-!CHECK: %[[VAL_23:.*]] = arith.subi %[[VAL_20]], %[[VAL_19]] : i32
-!CHECK: %[[VAL_24:.*]] = arith.select %[[VAL_22]], %[[VAL_23]], %[[VAL_19]] : i32
-!CHECK: %[[VAL_25:.*]] = arith.select %[[VAL_22]], %[[VAL_18]], %[[VAL_17]] : i32
-!CHECK: %[[VAL_26:.*]] = arith.select %[[VAL_22]], %[[VAL_17]], %[[VAL_18]] : i32
-!CHECK: %[[VAL_27:.*]] = arith.subi %[[VAL_26]], %[[VAL_25]] overflow<nuw> : i32
-!CHECK: %[[VAL_28:.*]] = arith.divui %[[VAL_27]], %[[VAL_24]] : i32
-!CHECK: %[[VAL_29:.*]] = arith.addi %[[VAL_28]], %[[VAL_21]] overflow<nuw> : i32
-!CHECK: %[[VAL_30:.*]] = arith.cmpi slt, %[[VAL_26]], %[[VAL_25]] : i32
-!CHECK: %[[VAL_31:.*]] = arith.select %[[VAL_30]], %[[VAL_20]], %[[VAL_29]] : i32
-!CHECK: %[[VAL_32:.*]] = omp.new_cli
-!CHECK: omp.canonical_loop(%[[VAL_32]]) %[[VAL_33:.*]] : i32 in range(%[[VAL_31]]) {
-!CHECK: %[[VAL_34:.*]] = arith.muli %[[VAL_33]], %[[VAL_19]] : i32
-!CHECK: %[[VAL_35:.*]] = arith.addi %[[VAL_17]], %[[VAL_34]] : i32
-!CHECK: hlfir.assign %[[VAL_35]] to %[[VAL_14]]#0 : i32, !fir.ref<i32>
-!CHECK: %[[VAL_36:.*]] = fir.alloca i32 {bindc_name = "j", pinned, uniq_name = "_QFomp_unroll_heuristic_nested02Ej"}
-!CHECK: %[[VAL_37:.*]]:2 = hlfir.declare %[[VAL_36]] {uniq_name = "_QFomp_unroll_heuristic_nested02Ej"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
-!CHECK: %[[VAL_38:.*]] = fir.load %[[VAL_4]]#0 : !fir.ref<i32>
-!CHECK: %[[VAL_39:.*]] = fir.load %[[VAL_5]]#0 : !fir.ref<i32>
-!CHECK: %[[VAL_40:.*]] = fir.load %[[VAL_3]]#0 : !fir.ref<i32>
-!CHECK: %[[VAL_41:.*]] = arith.constant 0 : i32
-!CHECK: %[[VAL_42:.*]] = arith.constant 1 : i32
-!CHECK: %[[VAL_43:.*]] = arith.cmpi slt, %[[VAL_40]], %[[VAL_41]] : i32
-!CHECK: %[[VAL_44:.*]] = arith.subi %[[VAL_41]], %[[VAL_40]] : i32
-!CHECK: %[[VAL_45:.*]] = arith.select %[[VAL_43]], %[[VAL_44]], %[[VAL_40]] : i32
-!CHECK: %[[VAL_46:.*]] = arith.select %[[VAL_43]], %[[VAL_39]], %[[VAL_38]] : i32
-!CHECK: %[[VAL_47:.*]] = arith.select %[[VAL_43]], %[[VAL_38]], %[[VAL_39]] : i32
-!CHECK: %[[VAL_48:.*]] = arith.subi %[[VAL_47]], %[[VAL_46]] overflow<nuw> : i32
-!CHECK: %[[VAL_49:.*]] = arith.divui %[[VAL_48]], %[[VAL_45]] : i32
-!CHECK: %[[VAL_50:.*]] = arith.addi %[[VAL_49]], %[[VAL_42]] overflow<nuw> : i32
-!CHECK: %[[VAL_51:.*]] = arith.cmpi slt, %[[VAL_47]], %[[VAL_46]] : i32
-!CHECK: %[[VAL_52:.*]] = arith.select %[[VAL_51]], %[[VAL_41]], %[[VAL_50]] : i32
-!CHECK: %[[VAL_53:.*]] = omp.new_cli
-!CHECK: omp.canonical_loop(%[[VAL_53]]) %[[VAL_54:.*]] : i32 in range(%[[VAL_52]]) {
-!CHECK: %[[VAL_55:.*]] = arith.muli %[[VAL_54]], %[[VAL_40]] : i32
-!CHECK: %[[VAL_56:.*]] = arith.addi %[[VAL_38]], %[[VAL_55]] : i32
-!CHECK: hlfir.assign %[[VAL_56]] to %[[VAL_37]]#0 : i32, !fir.ref<i32>
-!CHECK: %[[VAL_57:.*]] = fir.load %[[VAL_14]]#0 : !fir.ref<i32>
-!CHECK: %[[VAL_58:.*]] = fir.load %[[VAL_37]]#0 : !fir.ref<i32>
-!CHECK: %[[VAL_59:.*]] = arith.addi %[[VAL_57]], %[[VAL_58]] : i32
-!CHECK: hlfir.assign %[[VAL_59]] to %[[VAL_12]]#0 : i32, !fir.ref<i32>
+!CHECK: %[[VAL_13:.*]] = fir.load %[[VAL_9]]#0 : !fir.ref<i32>
+!CHECK: %[[VAL_14:.*]] = fir.load %[[VAL_10]]#0 : !fir.ref<i32>
+!CHECK: %[[VAL_15:.*]] = fir.load %[[VAL_8]]#0 : !fir.ref<i32>
+!CHECK: %[[VAL_16:.*]] = arith.constant 0 : i32
+!CHECK: %[[VAL_17:.*]] = arith.constant 1 : i32
+!CHECK: %[[VAL_18:.*]] = arith.cmpi slt, %[[VAL_15]], %[[VAL_16]] : i32
+!CHECK: %[[VAL_19:.*]] = arith.subi %[[VAL_16]], %[[VAL_15]] : i32
+!CHECK: %[[VAL_20:.*]] = arith.select %[[VAL_18]], %[[VAL_19]], %[[VAL_15]] : i32
+!CHECK: %[[VAL_21:.*]] = arith.select %[[VAL_18]], %[[VAL_14]], %[[VAL_13]] : i32
+!CHECK: %[[VAL_22:.*]] = arith.select %[[VAL_18]], %[[VAL_13]], %[[VAL_14]] : i32
+!CHECK: %[[VAL_23:.*]] = arith.subi %[[VAL_22]], %[[VAL_21]] overflow<nuw> : i32
+!CHECK: %[[VAL_24:.*]] = arith.divui %[[VAL_23]], %[[VAL_20]] : i32
+!CHECK: %[[VAL_25:.*]] = arith.addi %[[VAL_24]], %[[VAL_17]] overflow<nuw> : i32
+!CHECK: %[[VAL_26:.*]] = arith.cmpi slt, %[[VAL_22]], %[[VAL_21]] : i32
+!CHECK: %[[VAL_27:.*]] = arith.select %[[VAL_26]], %[[VAL_16]], %[[VAL_25]] : i32
+!CHECK: %[[VAL_28:.*]] = omp.new_cli
+!CHECK: omp.canonical_loop(%[[VAL_28]]) %[[VAL_29:.*]] : i32 in range(%[[VAL_27]]) {
+!CHECK: %[[VAL_30:.*]] = arith.muli %[[VAL_29]], %[[VAL_15]] : i32
+!CHECK: %[[VAL_31:.*]] = arith.addi %[[VAL_13]], %[[VAL_30]] : i32
+!CHECK: hlfir.assign %[[VAL_31]] to %[[VAL_2]]#0 : i32, !fir.ref<i32>
+!CHECK: %[[VAL_32:.*]] = fir.load %[[VAL_4]]#0 : !fir.ref<i32>
+!CHECK: %[[VAL_33:.*]] = fir.load %[[VAL_5]]#0 : !fir.ref<i32>
+!CHECK: %[[VAL_34:.*]] = fir.load %[[VAL_3]]#0 : !fir.ref<i32>
+!CHECK: %[[VAL_35:.*]] = arith.constant 0 : i32
+!CHECK: %[[VAL_36:.*]] = arith.constant 1 : i32
+!CHECK: %[[VAL_37:.*]] = arith.cmpi slt, %[[VAL_34]], %[[VAL_35]] : i32
+!CHECK: %[[VAL_38:.*]] = arith.subi %[[VAL_35]], %[[VAL_34]] : i32
+!CHECK: %[[VAL_39:.*]] = arith.select %[[VAL_37]], %[[VAL_38]], %[[VAL_34]] : i32
+!CHECK: %[[VAL_40:.*]] = arith.select %[[VAL_37]], %[[VAL_33]], %[[VAL_32]] : i32
+!CHECK: %[[VAL_41:.*]] = arith.select %[[VAL_37]], %[[VAL_32]], %[[VAL_33]] : i32
+!CHECK: %[[VAL_42:.*]] = arith.subi %[[VAL_41]], %[[VAL_40]] overflow<nuw> : i32
+!CHECK: %[[VAL_43:.*]] = arith.divui %[[VAL_42]], %[[VAL_39]] : i32
+!CHECK: %[[VAL_44:.*]] = arith.addi %[[VAL_43]], %[[VAL_36]] overflow<nuw> : i32
+!CHECK: %[[VAL_45:.*]] = arith.cmpi slt, %[[VAL_41]], %[[VAL_40]] : i32
+!CHECK: %[[VAL_46:.*]] = arith.select %[[VAL_45]], %[[VAL_35]], %[[VAL_44]] : i32
+!CHECK: %[[VAL_47:.*]] = omp.new_cli
+!CHECK: omp.canonical_loop(%[[VAL_47]]) %[[VAL_48:.*]] : i32 in range(%[[VAL_46]]) {
+!CHECK: %[[VAL_49:.*]] = arith.muli %[[VAL_48]], %[[VAL_34]] : i32
+!CHECK: %[[VAL_50:.*]] = arith.addi %[[VAL_32]], %[[VAL_49]] : i32
+!CHECK: hlfir.assign %[[VAL_50]] to %[[VAL_7]]#0 : i32, !fir.ref<i32>
+!CHECK: %[[VAL_51:.*]] = fir.load %[[VAL_2]]#0 : !fir.ref<i32>
+!CHECK: %[[VAL_52:.*]] = fir.load %[[VAL_7]]#0 : !fir.ref<i32>
+!CHECK: %[[VAL_53:.*]] = arith.addi %[[VAL_51]], %[[VAL_52]] : i32
+!CHECK: hlfir.assign %[[VAL_53]] to %[[VAL_12]]#0 : i32, !fir.ref<i32>
!CHECK: omp.terminator
!CHECK: }
-!CHECK: omp.unroll_heuristic(%[[VAL_53]])
+!CHECK: omp.unroll_heuristic(%[[VAL_47]])
!CHECK: omp.terminator
!CHECK: }
-!CHECK: omp.unroll_heuristic(%[[VAL_32]])
+!CHECK: omp.unroll_heuristic(%[[VAL_28]])
!CHECK: return
!CHECK: }
diff --git a/flang/test/Lower/OpenMP/unroll-heuristic03.f90 b/flang/test/Lower/OpenMP/unroll-heuristic03.f90
new file mode 100644
index 0000000000000..308c149c260dc
--- /dev/null
+++ b/flang/test/Lower/OpenMP/unroll-heuristic03.f90
@@ -0,0 +1,61 @@
+! RUN: %flang_fc1 -emit-hlfir -fopenmp -fopenmp-version=51 -o - %s 2>&1 | FileCheck %s
+
+! Test implicitly privatized loop variable that is affected by unrolling.
+
+subroutine omp_unroll_heuristic03(lb, ub, inc)
+ integer res, i, lb, ub, inc
+
+ !$omp parallel
+ !$omp unroll
+ do i = lb, ub, inc
+ res = i
+ end do
+ !$omp end unroll
+ !$omp end parallel
+
+end subroutine omp_unroll_heuristic03
+
+
+! CHECK-LABEL: func.func @_QPomp_unroll_heuristic03(
+! CHECK-SAME: %[[ARG0:.*]]: !fir.ref<i32> {fir.bindc_name = "lb"},
+! CHECK-SAME: %[[ARG1:.*]]: !fir.ref<i32> {fir.bindc_name = "ub"},
+! CHECK-SAME: %[[ARG2:.*]]: !fir.ref<i32> {fir.bindc_name = "inc"}) {
+! CHECK: %[[VAL_0:.*]] = fir.dummy_scope : !fir.dscope
+! CHECK: %[[VAL_1:.*]] = fir.alloca i32 {bindc_name = "i", uniq_name = "_QFomp_unroll_heuristic03Ei"}
+! CHECK: %[[VAL_2:.*]]:2 = hlfir.declare %[[VAL_1]] {uniq_name = "_QFomp_unroll_heuristic03Ei"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+! CHECK: %[[VAL_3:.*]]:2 = hlfir.declare %[[ARG2]] dummy_scope %[[VAL_0]] {uniq_name = "_QFomp_unroll_heuristic03Einc"} : (!fir.ref<i32>, !fir.dscope) -> (!fir.ref<i32>, !fir.ref<i32>)
+! CHECK: %[[VAL_4:.*]]:2 = hlfir.declare %[[ARG0]] dummy_scope %[[VAL_0]] {uniq_name = "_QFomp_unroll_heuristic03Elb"} : (!fir.ref<i32>, !fir.dscope) -> (!fir.ref<i32>, !fir.ref<i32>)
+! CHECK: %[[VAL_5:.*]] = fir.alloca i32 {bindc_name = "res", uniq_name = "_QFomp_unroll_heuristic03Eres"}
+! CHECK: %[[VAL_6:.*]]:2 = hlfir.declare %[[VAL_5]] {uniq_name = "_QFomp_unroll_heuristic03Eres"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+! CHECK: %[[VAL_7:.*]]:2 = hlfir.declare %[[ARG1]] dummy_scope %[[VAL_0]] {uniq_name = "_QFomp_unroll_heuristic03Eub"} : (!fir.ref<i32>, !fir.dscope) -> (!fir.ref<i32>, !fir.ref<i32>)
+! CHECK: omp.parallel private(@_QFomp_unroll_heuristic03Ei_private_i32 %[[VAL_2]]#0 -> %[[VAL_8:.*]] : !fir.ref<i32>) {
+! CHECK: %[[VAL_9:.*]]:2 = hlfir.declare %[[VAL_8]] {uniq_name = "_QFomp_unroll_heuristic03Ei"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+! CHECK: %[[VAL_10:.*]] = fir.load %[[VAL_4]]#0 : !fir.ref<i32>
+! CHECK: %[[VAL_11:.*]] = fir.load %[[VAL_7]]#0 : !fir.ref<i32>
+! CHECK: %[[VAL_12:.*]] = fir.load %[[VAL_3]]#0 : !fir.ref<i32>
+! CHECK: %[[VAL_13:.*]] = arith.constant 0 : i32
+! CHECK: %[[VAL_14:.*]] = arith.constant 1 : i32
+! CHECK: %[[VAL_15:.*]] = arith.cmpi slt, %[[VAL_12]], %[[VAL_13]] : i32
+! CHECK: %[[VAL_16:.*]] = arith.subi %[[VAL_13]], %[[VAL_12]] : i32
+! CHECK: %[[VAL_17:.*]] = arith.select %[[VAL_15]], %[[VAL_16]], %[[VAL_12]] : i32
+! CHECK: %[[VAL_18:.*]] = arith.select %[[VAL_15]], %[[VAL_11]], %[[VAL_10]] : i32
+! CHECK: %[[VAL_19:.*]] = arith.select %[[VAL_15]], %[[VAL_10]], %[[VAL_11]] : i32
+! CHECK: %[[VAL_20:.*]] = arith.subi %[[VAL_19]], %[[VAL_18]] overflow<nuw> : i32
+! CHECK: %[[VAL_21:.*]] = arith.divui %[[VAL_20]], %[[VAL_17]] : i32
+! CHECK: %[[VAL_22:.*]] = arith.addi %[[VAL_21]], %[[VAL_14]] overflow<nuw> : i32
+! CHECK: %[[VAL_23:.*]] = arith.cmpi slt, %[[VAL_19]], %[[VAL_18]] : i32
+! CHECK: %[[VAL_24:.*]] = arith.select %[[VAL_23]], %[[VAL_13]], %[[VAL_22]] : i32
+! CHECK: %[[VAL_25:.*]] = omp.new_cli
+! CHECK: omp.canonical_loop(%[[VAL_25]]) %[[VAL_26:.*]] : i32 in range(%[[VAL_24]]) {
+! CHECK: %[[VAL_27:.*]] = arith.muli %[[VAL_26]], %[[VAL_12]] : i32
+! CHECK: %[[VAL_28:.*]] = arith.addi %[[VAL_10]], %[[VAL_27]] : i32
+! CHECK: hlfir.assign %[[VAL_28]] to %[[VAL_9]]#0 : i32, !fir.ref<i32>
+! CHECK: %[[VAL_29:.*]] = fir.load %[[VAL_9]]#0 : !fir.ref<i32>
+! CHECK: hlfir.assign %[[VAL_29]] to %[[VAL_6]]#0 : i32, !fir.ref<i32>
+! CHECK: omp.terminator
+! CHECK: }
+! CHECK: omp.unroll_heuristic(%[[VAL_25]])
+! CHECK: omp.terminator
+! CHECK: }
+! CHECK: return
+! CHECK: }
\ No newline at end of file
>From 9b654f98095325290159c84f95bef8ba8bca0f60 Mon Sep 17 00:00:00 2001
From: Michael Kruse <llvm-project at meinersbur.de>
Date: Mon, 28 Jul 2025 16:13:31 +0200
Subject: [PATCH 2/2] Avoid code duplication
---
flang/lib/Lower/OpenMP/OpenMP.cpp | 34 ++++++++++++++-----------------
1 file changed, 15 insertions(+), 19 deletions(-)
diff --git a/flang/lib/Lower/OpenMP/OpenMP.cpp b/flang/lib/Lower/OpenMP/OpenMP.cpp
index b232f5bdbd6ff..8cfcc212a0083 100644
--- a/flang/lib/Lower/OpenMP/OpenMP.cpp
+++ b/flang/lib/Lower/OpenMP/OpenMP.cpp
@@ -712,20 +712,16 @@ static void threadPrivatizeVars(lower::AbstractConverter &converter,
}
}
-static mlir::Operation *
-createAndSetPrivatizedLoopVar(lower::AbstractConverter &converter,
- mlir::Location loc, mlir::Value indexVal,
- const semantics::Symbol *sym) {
+static mlir::Operation *setLoopVar(lower::AbstractConverter &converter,
+ mlir::Location loc, mlir::Value indexVal,
+ const semantics::Symbol *sym) {
fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder();
+
mlir::OpBuilder::InsertPoint insPt = firOpBuilder.saveInsertionPoint();
firOpBuilder.setInsertionPointToStart(firOpBuilder.getAllocaBlock());
-
mlir::Type tempTy = converter.genType(*sym);
-
- assert(converter.isPresentShallowLookup(*sym) &&
- "Expected symbol to be in symbol table.");
-
firOpBuilder.restoreInsertionPoint(insPt);
+
mlir::Value cvtVal = firOpBuilder.createConvert(loc, tempTy, indexVal);
hlfir::Entity lhs{converter.getSymbolAddress(*sym)};
@@ -736,6 +732,15 @@ createAndSetPrivatizedLoopVar(lower::AbstractConverter &converter,
return storeOp;
}
+static mlir::Operation *
+createAndSetPrivatizedLoopVar(lower::AbstractConverter &converter,
+ mlir::Location loc, mlir::Value indexVal,
+ const semantics::Symbol *sym) {
+ assert(converter.isPresentShallowLookup(*sym) &&
+ "Expected symbol to be in symbol table.");
+ return setLoopVar(converter, loc, indexVal, sym);
+}
+
// This helper function implements the functionality of "promoting" non-CPTR
// arguments of use_device_ptr to use_device_addr arguments (automagic
// conversion of use_device_ptr -> use_device_addr in these cases). The way we
@@ -2198,17 +2203,8 @@ genCanonicalLoopOp(lower::AbstractConverter &converter, lower::SymMap &symTable,
mlir::Value userVal =
firOpBuilder.create<mlir::arith::AddIOp>(loc, loopLBVar, scaled);
- mlir::OpBuilder::InsertPoint insPt = firOpBuilder.saveInsertionPoint();
- firOpBuilder.setInsertionPointToStart(firOpBuilder.getAllocaBlock());
- mlir::Type tempTy = converter.genType(*iv);
- firOpBuilder.restoreInsertionPoint(insPt);
-
// Write loop value to loop variable
- mlir::Value cvtVal = firOpBuilder.createConvert(loc, tempTy, userVal);
- hlfir::Entity lhs{converter.getSymbolAddress(*iv)};
- lhs = hlfir::derefPointersAndAllocatables(loc, firOpBuilder, lhs);
- mlir::Operation *storeOp =
- hlfir::AssignOp::create(firOpBuilder, loc, cvtVal, lhs);
+ mlir::Operation *storeOp = setLoopVar(converter, loc, userVal, iv);
firOpBuilder.setInsertionPointAfter(storeOp);
return {iv};
More information about the flang-commits
mailing list