[flang-commits] [flang] [mlir] [mlir][OpenMP] Fix update of linear iteration variables (PR #183800)
Leandro Lupori via flang-commits
flang-commits at lists.llvm.org
Fri Feb 27 10:55:01 PST 2026
https://github.com/luporl created https://github.com/llvm/llvm-project/pull/183800
The final value of a linear iteration variable must be the loop
limit_value + 1. Before this patch it was limit_value.
This fixes the second issue reported in #170784.
NOTE only the second commit should be reviewed, as the first was submitted as PR #183794
TODO add a new test and adapt the failing MLIR tests.
>From 1c329510a13a0595fca69192d2b784207f293c31 Mon Sep 17 00:00:00 2001
From: Leandro Lupori <leandro.lupori at linaro.org>
Date: Fri, 20 Feb 2026 15:44:02 -0300
Subject: [PATCH 1/2] [flang][OpenMP] Fix lowering of LINEAR iteration
variables
Linear iteration variables were being treated as private. This fixes
one of the issues reported in #170784. The other is that the final
value of a linear iteration variable should be limit_value+1, but
currently it is limit_value.
---
flang/lib/Lower/OpenMP/DataSharingProcessor.cpp | 15 +++++++++------
flang/lib/Lower/OpenMP/OpenMP.cpp | 5 ++++-
flang/test/Lower/OpenMP/composite_simd_linear.f90 | 15 ++++++++-------
.../Lower/OpenMP/distribute-parallel-do-simd.f90 | 13 ++++++-------
flang/test/Lower/OpenMP/distribute-simd.f90 | 6 +++---
flang/test/Lower/OpenMP/loop-pointer-variable.f90 | 10 +++++-----
flang/test/Lower/OpenMP/wsloop-simd.f90 | 10 +++++-----
7 files changed, 40 insertions(+), 34 deletions(-)
diff --git a/flang/lib/Lower/OpenMP/DataSharingProcessor.cpp b/flang/lib/Lower/OpenMP/DataSharingProcessor.cpp
index fcf2ae9337295..a2379aa194277 100644
--- a/flang/lib/Lower/OpenMP/DataSharingProcessor.cpp
+++ b/flang/lib/Lower/OpenMP/DataSharingProcessor.cpp
@@ -235,11 +235,6 @@ void DataSharingProcessor::collectSymbolsForPrivatization() {
// Such cases are suggested to be clearly documented and explained
// instead of being silently skipped
auto isException = [&](const Fortran::semantics::Symbol *sym) -> bool {
- // `OmpPreDetermined` symbols cannot be exceptions since
- // their privatized symbols are heavily used in FIR.
- if (sym->test(Fortran::semantics::Symbol::Flag::OmpPreDetermined))
- return false;
-
// The handling of linear clause is deferred to the OpenMP
// IRBuilder which is responsible for all its aspects,
// including privatization. Privatizing linear variables at this point would
@@ -263,6 +258,11 @@ void DataSharingProcessor::collectSymbolsForPrivatization() {
// draw a relation between %linear and %arg0. Hence skip.
if (sym->test(Fortran::semantics::Symbol::Flag::OmpLinear))
return true;
+
+ // `OmpPreDetermined` symbols cannot be exceptions since
+ // their privatized symbols are heavily used in FIR.
+ if (sym->test(Fortran::semantics::Symbol::Flag::OmpPreDetermined))
+ return false;
return false;
};
@@ -508,7 +508,10 @@ void DataSharingProcessor::collectSymbols(
!sym.GetUltimate().has<semantics::DerivedTypeDetails>() &&
!sym.GetUltimate().has<semantics::NamelistDetails>() &&
!semantics::IsImpliedDoIndex(sym.GetUltimate()) &&
- !semantics::IsStmtFunction(sym);
+ !semantics::IsStmtFunction(sym) &&
+ // Linear symbols are privatized by OpenMP IRBuilder. See comments
+ // in collectSymbolsForPrivatization() for more details.
+ !sym.test(semantics::Symbol::Flag::OmpLinear);
};
auto shouldCollectSymbol = [&](const semantics::Symbol *sym) {
diff --git a/flang/lib/Lower/OpenMP/OpenMP.cpp b/flang/lib/Lower/OpenMP/OpenMP.cpp
index 6d93f245228a8..254cd4880c539 100644
--- a/flang/lib/Lower/OpenMP/OpenMP.cpp
+++ b/flang/lib/Lower/OpenMP/OpenMP.cpp
@@ -716,7 +716,10 @@ static mlir::Operation *
createAndSetPrivatizedLoopVar(lower::AbstractConverter &converter,
mlir::Location loc, mlir::Value indexVal,
const semantics::Symbol *sym) {
- assert(converter.isPresentShallowLookup(*sym) &&
+ // The handling of linear symbols is deferred to the OpenMP IRBuilder,
+ // which is responsible for all its aspects, including privatization.
+ assert((converter.isPresentShallowLookup(*sym) ||
+ sym->test(semantics::Symbol::Flag::OmpLinear)) &&
"Expected symbol to be in symbol table.");
return setLoopVar(converter, loc, indexVal, sym);
}
diff --git a/flang/test/Lower/OpenMP/composite_simd_linear.f90 b/flang/test/Lower/OpenMP/composite_simd_linear.f90
index 706b09d378db4..d976afd6c0f5e 100644
--- a/flang/test/Lower/OpenMP/composite_simd_linear.f90
+++ b/flang/test/Lower/OpenMP/composite_simd_linear.f90
@@ -8,7 +8,7 @@ subroutine do_simd
!CHECK: %{{.*}} = arith.constant 1 : i32
!CHECK: %[[IV_STEP:.*]] = arith.constant 1 : i32
!CHECK: omp.wsloop {
-!CHECK: omp.simd linear(%[[X]]#0 = %[[CONST]] : !fir.ref<i32>, %[[I]]#0 = %[[IV_STEP]] : !fir.ref<i32>) private(@_QFdo_simdEi_private_i32 {{.*}} -> %arg0 : !fir.ref<i32>) {
+!CHECK: omp.simd linear(%[[X]]#0 = %[[CONST]] : !fir.ref<i32>, %[[I]]#0 = %[[IV_STEP]] : !fir.ref<i32>) {
!CHECK: }
!CHECK: } {linear_var_types = [i32, i32], omp.composite}
!CHECK: } {omp.composite}
@@ -21,9 +21,10 @@ end subroutine do_simd
subroutine distribute_simd
+!CHECK: %[[I:.*]]:2 = hlfir.declare %{{.*}} {uniq_name = "_QFdistribute_simdEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
!CHECK: omp.teams {
-!CHECK: omp.distribute private(@_QFdistribute_simdEi_private_i32 {{.*}} -> %[[ARG0:.*]] : !fir.ref<i32>) {
-!CHECK: omp.simd linear(%[[ARG0]] = %c1_i32 : !fir.ref<i32>) private(@_QFdistribute_simdEi_private_i32 %[[ARG0]] -> {{.*}} : !fir.ref<i32>) {
+!CHECK: omp.distribute {
+!CHECK: omp.simd linear(%[[I]]#0 = %c1_i32 : !fir.ref<i32>) {
!CHECK: } {linear_var_types = [i32], omp.composite}
!CHECK: } {omp.composite}
integer :: i
@@ -44,7 +45,7 @@ subroutine distribute_parallel_do
!CHECK: %[[CONST]] = arith.constant 1 : i32
!CHECK: omp.distribute {
!CHECK: omp.wsloop {
-!CHECK: omp.simd linear(%[[I]]#0 = %[[CONST]] : !fir.ref<i32>) private(@_QFdistribute_parallel_doEi_private_i32 %[[I]]#0 -> %arg0 : !fir.ref<i32>) {
+!CHECK: omp.simd linear(%[[I]]#0 = %[[CONST]] : !fir.ref<i32>) {
!$omp teams
!$omp distribute parallel do simd linear(i:1)
do i = 1, N
@@ -62,7 +63,7 @@ subroutine parallel_do
!CHECK: %{{.*}} = arith.constant 1 : i32
!CHECK: %[[IV_STEP:.*]] = arith.constant 1 : i32
!CHECK: omp.wsloop {
-!CHECK: omp.simd linear(%[[X]]#0 = %[[LINEAR_STEP]] : !fir.ref<i32>, %[[I]]#0 = %[[IV_STEP]] : !fir.ref<i32>) private(@_QFparallel_doEi_private_i32 %[[I]]#0 -> %arg0 : !fir.ref<i32>) {
+!CHECK: omp.simd linear(%[[X]]#0 = %[[LINEAR_STEP]] : !fir.ref<i32>, %[[I]]#0 = %[[IV_STEP]] : !fir.ref<i32>) {
integer :: x
!$omp parallel do simd linear(x:2)
do i = 1, N
@@ -79,7 +80,7 @@ subroutine teams_distribute
!CHECK: {{.*}} = arith.constant 1 : i32
!CHECK: %[[IV_STEP:.*]] = arith.constant 1 : i32
!CHECK: omp.distribute {
-!CHECK: omp.simd linear(%[[X]]#0 = %[[LINEAR_STEP]] : !fir.ref<i32>, %[[I]]#0 = %[[IV_STEP]] : !fir.ref<i32>) private(@_QFteams_distributeEi_private_i32 %[[I]]#0 -> %arg0 : !fir.ref<i32>) {
+!CHECK: omp.simd linear(%[[X]]#0 = %[[LINEAR_STEP]] : !fir.ref<i32>, %[[I]]#0 = %[[IV_STEP]] : !fir.ref<i32>) {
integer :: x
!$omp teams distribute simd linear(x)
do i = 1, N
@@ -98,7 +99,7 @@ subroutine teams_distribute_parallel_do
!CHECK: %[[IV_STEP:.*]] = arith.constant 1 : i32
!CHECK: omp.distribute {
!CHECK: omp.wsloop {
-!CHECK: omp.simd linear(%[[X]]#0 = %c1_i32 : !fir.ref<i32>, %[[I]]#0 = %c1_i32_1 : !fir.ref<i32>) private(@_QFteams_distribute_parallel_doEi_private_i32 %[[I]]#0 -> %arg0 : !fir.ref<i32>) {
+!CHECK: omp.simd linear(%[[X]]#0 = %c1_i32 : !fir.ref<i32>, %[[I]]#0 = %c1_i32_1 : !fir.ref<i32>) {
integer :: x
!$omp teams distribute parallel do simd linear(x)
do i = 1, N
diff --git a/flang/test/Lower/OpenMP/distribute-parallel-do-simd.f90 b/flang/test/Lower/OpenMP/distribute-parallel-do-simd.f90
index 120177a0420bf..7f91177cc3311 100644
--- a/flang/test/Lower/OpenMP/distribute-parallel-do-simd.f90
+++ b/flang/test/Lower/OpenMP/distribute-parallel-do-simd.f90
@@ -11,7 +11,7 @@ subroutine distribute_parallel_do_simd_num_threads()
! CHECK: omp.parallel num_threads({{.*}}) {
! CHECK: omp.distribute {
! CHECK-NEXT: omp.wsloop {
- ! CHECK-NEXT: omp.simd linear({{.*}}) private({{.*}}) {
+ ! CHECK-NEXT: omp.simd linear({{.*}}) {
! CHECK-NEXT: omp.loop_nest
!$omp distribute parallel do simd num_threads(10)
do index_ = 1, 10
@@ -28,7 +28,7 @@ subroutine distribute_parallel_do_simd_dist_schedule()
! CHECK: omp.parallel {
! CHECK: omp.distribute dist_schedule_static dist_schedule_chunk_size({{.*}}) {
! CHECK-NEXT: omp.wsloop {
- ! CHECK-NEXT: omp.simd linear({{.*}}) private({{.*}}) {
+ ! CHECK-NEXT: omp.simd linear({{.*}}) {
! CHECK-NEXT: omp.loop_nest
!$omp distribute parallel do simd dist_schedule(static, 4)
do index_ = 1, 10
@@ -45,7 +45,7 @@ subroutine distribute_parallel_do_simd_schedule()
! CHECK: omp.parallel {
! CHECK: omp.distribute {
! CHECK-NEXT: omp.wsloop schedule(static = {{.*}}) {
- ! CHECK-NEXT: omp.simd linear({{.*}}) private({{.*}}) {
+ ! CHECK-NEXT: omp.simd linear({{.*}}) {
! CHECK-NEXT: omp.loop_nest
!$omp distribute parallel do simd schedule(static, 4)
do index_ = 1, 10
@@ -62,7 +62,7 @@ subroutine distribute_parallel_do_simd_simdlen()
! CHECK: omp.parallel {
! CHECK: omp.distribute {
! CHECK-NEXT: omp.wsloop {
- ! CHECK-NEXT: omp.simd linear({{.*}}) simdlen(4) private({{.*}}) {
+ ! CHECK-NEXT: omp.simd linear({{.*}}) simdlen(4) {
! CHECK-NEXT: omp.loop_nest
!$omp distribute parallel do simd simdlen(4)
do index_ = 1, 10
@@ -86,11 +86,10 @@ subroutine distribute_parallel_do_simd_private()
! CHECK: omp.parallel {
! CHECK: omp.distribute {
! CHECK-NEXT: omp.wsloop {
- ! CHECK-NEXT: omp.simd linear(%{{.*}}) private(@{{.*}} %[[X]]#0 -> %[[X_ARG:[^,]+]],
- ! CHECK-SAME: @{{.*}} %[[INDEX]]#0 -> %[[INDEX_ARG:.*]] : !fir.ref<i64>, !fir.ref<i32>) {
+ ! CHECK-NEXT: omp.simd linear(%{{.*}}) private(@{{.*}} %[[X]]#0 -> %[[X_ARG:[^:]+]]
+ ! CHECK-SAME: : !fir.ref<i64>) {
! CHECK-NEXT: omp.loop_nest
! CHECK: %[[X_PRIV:.*]]:2 = hlfir.declare %[[X_ARG]]
- ! CHECK: %[[INDEX_PRIV:.*]]:2 = hlfir.declare %[[INDEX_ARG]]
!$omp distribute parallel do simd private(x)
do index_ = 1, 10
end do
diff --git a/flang/test/Lower/OpenMP/distribute-simd.f90 b/flang/test/Lower/OpenMP/distribute-simd.f90
index d0316d1a136ab..f06282a10d9f0 100644
--- a/flang/test/Lower/OpenMP/distribute-simd.f90
+++ b/flang/test/Lower/OpenMP/distribute-simd.f90
@@ -61,17 +61,17 @@ end subroutine distribute_simd_simdlen
! CHECK-LABEL: func.func @_QPdistribute_simd_private(
subroutine distribute_simd_private()
integer, allocatable :: tmp
+ ! CHECK: %[[INDEX:.*]]:2 = hlfir.declare %{{.*}} {uniq_name = "_QFdistribute_simd_privateEindex_"}
! CHECK: omp.teams
!$omp teams
! CHECK: omp.distribute
! CHECK: omp.simd
- ! CHECK-SAME: private(@[[PRIV_BOX_SYM:.*]] %{{.*}} -> %[[PRIV_BOX:.*]], @[[PRIV_IVAR_SYM:.*]] %{{.*}} -> %[[PRIV_IVAR:.*]] : !fir.ref<!fir.box<!fir.heap<i32>>>, !fir.ref<i32>)
+ ! CHECK-SAME: private(@[[PRIV_BOX_SYM:.*]] %{{.*}} -> %[[PRIV_BOX:.*]] : !fir.ref<!fir.box<!fir.heap<i32>>>)
! CHECK-NEXT: omp.loop_nest (%[[IVAR:.*]]) : i32
!$omp distribute simd private(tmp)
do index_ = 1, 10
! CHECK: %[[PRIV_BOX_DECL:.*]]:2 = hlfir.declare %[[PRIV_BOX]]
- ! CHECK: %[[PRIV_IVAR_DECL:.*]]:2 = hlfir.declare %[[PRIV_IVAR]]
- ! CHECK: hlfir.assign %[[IVAR]] to %[[PRIV_IVAR_DECL]]#0
+ ! CHECK: hlfir.assign %[[IVAR]] to %[[INDEX]]#0
end do
!$omp end distribute simd
!$omp end teams
diff --git a/flang/test/Lower/OpenMP/loop-pointer-variable.f90 b/flang/test/Lower/OpenMP/loop-pointer-variable.f90
index 0ca5d3a197dc5..5a2ca05be7ccf 100644
--- a/flang/test/Lower/OpenMP/loop-pointer-variable.f90
+++ b/flang/test/Lower/OpenMP/loop-pointer-variable.f90
@@ -8,6 +8,7 @@ program loop_var
integer, pointer :: ip1, ip2
integer, allocatable :: ia1
+!CHECK: %[[IA1:.*]]:2 = hlfir.declare %{{.*}} {fortran_attrs = #fir.var_attrs<allocatable>, uniq_name = "_QFEia1"}
!CHECK: omp.wsloop private(@_QFEip1_private_box_ptr_i32 %{{.*}}#0 -> %[[IP1_PVT:.*]], @_QFEip2_private_box_ptr_i32 %{{.*}}#0 -> %[[IP2_PVT:.*]] : !fir.ref<!fir.box<!fir.ptr<i32>>>, !fir.ref<!fir.box<!fir.ptr<i32>>>)
!CHECK: omp.loop_nest (%[[IP1_INDX:.*]], %[[IP2_INDX:.*]]) : i64 = ({{.*}}) to ({{.*}}) inclusive step ({{.*}})
!CHECK: %[[IP1_PVT_DECL:.*]]:2 = hlfir.declare %[[IP1_PVT]] {fortran_attrs = #fir.var_attrs<pointer>, uniq_name = "_QFEip1"} : (!fir.ref<!fir.box<!fir.ptr<i32>>>) -> (!fir.ref<!fir.box<!fir.ptr<i32>>>, !fir.ref<!fir.box<!fir.ptr<i32>>>)
@@ -28,13 +29,12 @@ program loop_var
end do
!$omp end do
-!CHECK: omp.simd private(@_QFEia1_private_box_heap_i32 %{{.*}}#0 -> %[[IA1_PVT:.*]] : !fir.ref<!fir.box<!fir.heap<i32>>>)
+!CHECK: omp.simd
!CHECK: omp.loop_nest (%[[IA1_INDX:.*]]) : i64 = ({{.*}}) to ({{.*}}) inclusive step ({{.*}})
-!CHECK: %[[IA1_PVT_DECL:.*]]:2 = hlfir.declare %[[IA1_PVT]] {fortran_attrs = #fir.var_attrs<allocatable>, uniq_name = "_QFEia1"} : (!fir.ref<!fir.box<!fir.heap<i32>>>) -> (!fir.ref<!fir.box<!fir.heap<i32>>>, !fir.ref<!fir.box<!fir.heap<i32>>>)
-!CHECK: %[[IA1:.*]] = fir.convert %[[IA1_INDX]] : (i64) -> i32
-!CHECK: %[[IA1_BOX:.*]] = fir.load %[[IA1_PVT_DECL]]#0 : !fir.ref<!fir.box<!fir.heap<i32>>>
+!CHECK: %[[IA1_INDX_I32:.*]] = fir.convert %[[IA1_INDX]] : (i64) -> i32
+!CHECK: %[[IA1_BOX:.*]] = fir.load %[[IA1]]#0 : !fir.ref<!fir.box<!fir.heap<i32>>>
!CHECK: %[[IA1_ADDR:.*]] = fir.box_addr %[[IA1_BOX]] : (!fir.box<!fir.heap<i32>>) -> !fir.heap<i32>
-!CHECK: hlfir.assign %[[IA1]] to %[[IA1_ADDR]] : i32, !fir.heap<i32>
+!CHECK: hlfir.assign %[[IA1_INDX_I32]] to %[[IA1_ADDR]] : i32, !fir.heap<i32>
!CHECK: omp.yield
!$omp simd
do ia1 = 1, 10
diff --git a/flang/test/Lower/OpenMP/wsloop-simd.f90 b/flang/test/Lower/OpenMP/wsloop-simd.f90
index 03e35de04cace..1b8ac68dad765 100644
--- a/flang/test/Lower/OpenMP/wsloop-simd.f90
+++ b/flang/test/Lower/OpenMP/wsloop-simd.f90
@@ -70,15 +70,15 @@ end subroutine do_simd_reduction
! CHECK-LABEL: func.func @_QPdo_simd_private(
subroutine do_simd_private()
integer, allocatable :: tmp
+ ! CHECK: %[[I_DECL:.*]]:2 = hlfir.declare %{{.*}} {uniq_name = "_QFdo_simd_privateEi"}
! CHECK: omp.wsloop
! CHECK-NEXT: omp.simd
- ! CHECK-SAME: private(@[[PRIV_BOX_SYM:.*]] %{{.*}} -> %[[PRIV_BOX:.*]], @[[PRIV_IVAR_SYM:.*]] %{{.*}} -> %[[PRIV_IVAR:.*]] : !fir.ref<!fir.box<!fir.heap<i32>>>, !fir.ref<i32>)
+ ! CHECK-SAME: private(@[[PRIV_BOX_SYM:.*]] %{{.*}} -> %[[PRIV_BOX:.*]] : !fir.ref<!fir.box<!fir.heap<i32>>>)
! CHECK-NEXT: omp.loop_nest (%[[IVAR:.*]]) : i32
!$omp do simd private(tmp)
do i=1, 10
! CHECK: %[[PRIV_BOX_DECL:.*]]:2 = hlfir.declare %[[PRIV_BOX]]
- ! CHECK: %[[PRIV_IVAR_DECL:.*]]:2 = hlfir.declare %[[PRIV_IVAR]]
- ! CHECK: hlfir.assign %[[IVAR]] to %[[PRIV_IVAR_DECL]]#0
+ ! CHECK: hlfir.assign %[[IVAR]] to %[[I_DECL]]#0
! CHECK: %[[PRIV_BOX_LOAD:.*]] = fir.load %[[PRIV_BOX_DECL]]
! CHECK: hlfir.assign %{{.*}} to %[[PRIV_BOX_DECL]]#0
! CHECK: omp.yield
@@ -92,12 +92,12 @@ subroutine do_simd_lastprivate_firstprivate()
! CHECK: omp.wsloop
! CHECK-SAME: private(@[[FIRSTPRIVATE_A_SYM:.*]] %{{.*}} -> %[[FIRSTPRIVATE_A:.*]] : !fir.ref<i32>)
! CHECK-NEXT: omp.simd
- ! CHECK-SAME: private(@[[PRIVATE_A_SYM:.*]] %{{.*}} -> %[[PRIVATE_A:.*]], @[[PRIVATE_I_SYM:.*]] %{{.*}} -> %[[PRIVATE_I:.*]] : !fir.ref<i32>, !fir.ref<i32>)
+ ! CHECK-SAME: linear({{.*}}#0 = %{{[^:]*}} : !fir.ref<i32>)
+ ! CHECK-SAME: private(@[[PRIVATE_A_SYM:.*]] %{{.*}} -> %[[PRIVATE_A:.*]] : !fir.ref<i32>)
!$omp do simd lastprivate(a) firstprivate(a)
do i = 1, 10
! CHECK: %[[FIRSTPRIVATE_A_DECL:.*]]:2 = hlfir.declare %[[FIRSTPRIVATE_A]]
! CHECK: %[[PRIVATE_A_DECL:.*]]:2 = hlfir.declare %[[PRIVATE_A]]
- ! CHECK: %[[PRIVATE_I_DECL:.*]]:2 = hlfir.declare %[[PRIVATE_I]]
a = a + 1
end do
!$omp end do simd
>From dd4c94d022427acff4f20a1ee3af0d2755573008 Mon Sep 17 00:00:00 2001
From: Leandro Lupori <leandro.lupori at linaro.org>
Date: Fri, 27 Feb 2026 15:32:58 -0300
Subject: [PATCH 2/2] [mlir][OpenMP] Fix update of linear iteration variables
The final value of a linear iteration variable must be the loop
limit_value + 1. Before this patch it was limit_value.
This fixes the second issue reported in #170784.
TODO add a new test and adapt the failing MLIR tests.
---
.../OpenMP/OpenMPToLLVMIRTranslation.cpp | 129 ++++++++++++------
1 file changed, 91 insertions(+), 38 deletions(-)
diff --git a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
index 38c5802ed60ed..f223649b25a85 100644
--- a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
+++ b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
@@ -143,6 +143,37 @@ class LinearClauseProcessor {
llvm::BasicBlock *linearFinalizationBB;
llvm::BasicBlock *linearExitBB;
llvm::BasicBlock *linearLastIterExitBB;
+ Value linearLoopIV;
+ Value linearLoopIVStart;
+
+ void updateLinearVar(llvm::IRBuilderBase &builder, llvm::Type *varType,
+ llvm::Value *var, llvm::Value *varStart,
+ llvm::Value *step, llvm::Value *iv) {
+ if (!iv->getType()->isIntegerTy())
+ llvm_unreachable("OpenMP loop induction variable must be an integer "
+ "type");
+
+ if (varType->isIntegerTy()) {
+ // Integer path: normalize all arithmetic to linearVarType
+ iv = builder.CreateSExtOrTrunc(iv, varType);
+ step = builder.CreateSExtOrTrunc(step, varType);
+
+ llvm::Value *mulInst = builder.CreateMul(iv, step);
+ llvm::Value *addInst = builder.CreateAdd(varStart, mulInst);
+ builder.CreateStore(addInst, var);
+ } else if (varType->isFloatingPointTy()) {
+ // Float path: perform multiply in integer, then convert to float
+ step = builder.CreateSExtOrTrunc(step, iv->getType());
+
+ llvm::Value *mulInst = builder.CreateMul(iv, step);
+ llvm::Value *mulFp = builder.CreateSIToFP(mulInst, varType);
+ llvm::Value *addInst = builder.CreateFAdd(varStart, mulFp);
+ builder.CreateStore(addInst, var);
+ } else {
+ llvm_unreachable(
+ "Linear variable must be of integer or floating-point type");
+ }
+ }
public:
// Register type for the linear variables
@@ -182,46 +213,63 @@ class LinearClauseProcessor {
}
}
+ // Find linear iteration variable and save it for later updates
+ void initLinearIV(omp::SimdOp simdOp) {
+ auto loopOp = cast<omp::LoopNestOp>(simdOp.getWrappedLoop());
+ // NOTE iteration variables can only be linear in non-nested loops.
+ if (loopOp.getIVs().size() != 1)
+ return;
+ // The linear IV is the loop IV's store address.
+ BlockArgument arg = loopOp.getIVs().front();
+ for (const Operation *user : arg.getUsers()) {
+ if (auto storeOp = dyn_cast<LLVM::StoreOp>(user)) {
+ for (Value linearVar : simdOp.getLinearVars()) {
+ if (linearVar == storeOp.getAddr()) {
+ linearLoopIV = linearVar;
+ linearLoopIVStart = loopOp.getLoopLowerBounds().front();
+ break;
+ }
+ }
+ }
+ }
+ }
+
// Emit IR for updating Linear variables
- void updateLinearVar(llvm::IRBuilderBase &builder, llvm::BasicBlock *loopBody,
- llvm::Value *loopInductionVar) {
+ void updateLinearVars(llvm::IRBuilderBase &builder,
+ llvm::BasicBlock *loopBody,
+ llvm::Value *loopInductionVar) {
builder.SetInsertPoint(loopBody->getTerminator());
for (size_t index = 0; index < linearPreconditionVars.size(); index++) {
- llvm::Type *linearVarType = linearVarTypes[index];
- llvm::Value *iv = loopInductionVar;
- llvm::Value *step = linearSteps[index];
-
- if (!iv->getType()->isIntegerTy())
- llvm_unreachable("OpenMP loop induction variable must be an integer "
- "type");
-
- if (linearVarType->isIntegerTy()) {
- // Integer path: normalize all arithmetic to linearVarType
- iv = builder.CreateSExtOrTrunc(iv, linearVarType);
- step = builder.CreateSExtOrTrunc(step, linearVarType);
-
- llvm::LoadInst *linearVarStart =
- builder.CreateLoad(linearVarType, linearPreconditionVars[index]);
- llvm::Value *mulInst = builder.CreateMul(iv, step);
- llvm::Value *addInst = builder.CreateAdd(linearVarStart, mulInst);
- builder.CreateStore(addInst, linearLoopBodyTemps[index]);
- } else if (linearVarType->isFloatingPointTy()) {
- // Float path: perform multiply in integer, then convert to float
- step = builder.CreateSExtOrTrunc(step, iv->getType());
- llvm::Value *mulInst = builder.CreateMul(iv, step);
-
- llvm::LoadInst *linearVarStart =
- builder.CreateLoad(linearVarType, linearPreconditionVars[index]);
- llvm::Value *mulFp = builder.CreateSIToFP(mulInst, linearVarType);
- llvm::Value *addInst = builder.CreateFAdd(linearVarStart, mulFp);
- builder.CreateStore(addInst, linearLoopBodyTemps[index]);
- } else {
- llvm_unreachable(
- "Linear variable must be of integer or floating-point type");
- }
+ llvm::LoadInst *linearVarStart = builder.CreateLoad(
+ linearVarTypes[index], linearPreconditionVars[index]);
+ updateLinearVar(builder, linearVarTypes[index],
+ linearLoopBodyTemps[index], linearVarStart,
+ linearSteps[index], loopInductionVar);
}
}
+ // Emit IR for updating linear iteration variables on loop exit
+ void updateLinearIV(llvm::IRBuilderBase &builder,
+ LLVM::ModuleTranslation &moduleTranslation,
+ llvm::Value *loopIV) {
+ if (!linearLoopIV)
+ return;
+ llvm::Value *linearIV = moduleTranslation.lookupValue(linearLoopIV);
+ llvm::Value *linearIVStart =
+ moduleTranslation.lookupValue(linearLoopIVStart);
+
+ // Find linearIV's index
+ size_t index;
+ for (index = 0; index < linearOrigVal.size(); index++)
+ if (linearIV == linearOrigVal[index])
+ break;
+ if (index == linearOrigVal.size())
+ return;
+
+ updateLinearVar(builder, linearVarTypes[index], linearLoopBodyTemps[index],
+ linearIVStart, linearSteps[index], loopIV);
+ }
+
// Linear variable finalization is conditional on the last logical iteration.
// Create BB splits to manage the same.
void splitLinearFiniBB(llvm::IRBuilderBase &builder,
@@ -3103,8 +3151,8 @@ convertOmpWsloop(Operation &opInst, llvm::IRBuilderBase &builder,
if (failed(handleError(afterBarrierIP, *loopOp)))
return failure();
builder.restoreIP(*afterBarrierIP);
- linearClauseProcessor.updateLinearVar(builder, loopInfo->getBody(),
- loopInfo->getIndVar());
+ linearClauseProcessor.updateLinearVars(builder, loopInfo->getBody(),
+ loopInfo->getIndVar());
linearClauseProcessor.splitLinearFiniBB(builder, loopInfo->getExit());
}
@@ -3406,6 +3454,8 @@ convertOmpSimd(Operation &opInst, llvm::IRBuilderBase &builder,
// Initialize linear variables and linear step
LinearClauseProcessor linearClauseProcessor;
+ linearClauseProcessor.initLinearIV(simdOp);
+
if (!simdOp.getLinearVars().empty()) {
auto linearVarTypes = simdOp.getLinearVarTypes().value();
for (mlir::Attribute linearVarType : linearVarTypes)
@@ -3503,8 +3553,8 @@ convertOmpSimd(Operation &opInst, llvm::IRBuilderBase &builder,
linearClauseProcessor.initLinearVar(builder, moduleTranslation,
loopInfo->getPreheader());
- linearClauseProcessor.updateLinearVar(builder, loopInfo->getBody(),
- loopInfo->getIndVar());
+ linearClauseProcessor.updateLinearVars(builder, loopInfo->getBody(),
+ loopInfo->getIndVar());
}
builder.SetInsertPoint(*regionBlock, (*regionBlock)->begin());
@@ -3514,6 +3564,9 @@ convertOmpSimd(Operation &opInst, llvm::IRBuilderBase &builder,
: nullptr,
order, simdlen, safelen);
+ linearClauseProcessor.updateLinearIV(builder, moduleTranslation,
+ loopInfo->getIndVar());
+
linearClauseProcessor.emitStoresForLinearVar(builder);
for (size_t index = 0; index < simdOp.getLinearVars().size(); index++)
linearClauseProcessor.rewriteInPlace(builder, "omp.loop_nest.region",
More information about the flang-commits
mailing list