[Mlir-commits] [mlir] 11d9694 - [flang][mlir] Add support for implicit linearization in omp.simd (#150386)
llvmlistbot at llvm.org
llvmlistbot at llvm.org
Sat Jan 3 21:37:48 PST 2026
Author: NimishMishra
Date: 2026-01-03T21:37:43-08:00
New Revision: 11d9694b757b2e2c9f5169967fcc85f25f9a5645
URL: https://github.com/llvm/llvm-project/commit/11d9694b757b2e2c9f5169967fcc85f25f9a5645
DIFF: https://github.com/llvm/llvm-project/commit/11d9694b757b2e2c9f5169967fcc85f25f9a5645.diff
LOG: [flang][mlir] Add support for implicit linearization in omp.simd (#150386)
Up till OpenMP version 4.5, the loop iteration variable in the
associated do-construct of simd is linear with a linear step equal to
the increment of the loop. This PR implements this functionality. For
versions > 4.5, such an implicit linear clause is not assumed for the
loop iteration variable.
Fixes https://github.com/llvm/llvm-project/issues/171006
Added:
Modified:
flang/docs/OpenMPSupport.md
flang/lib/Lower/OpenMP/OpenMP.cpp
flang/lib/Semantics/resolve-directives.cpp
flang/test/Lower/OpenMP/parallel-private-clause.f90
flang/test/Lower/OpenMP/simd-linear.f90
mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
Removed:
################################################################################
diff --git a/flang/docs/OpenMPSupport.md b/flang/docs/OpenMPSupport.md
index 6ef0f2a581771..c76cafd1b3a5f 100644
--- a/flang/docs/OpenMPSupport.md
+++ b/flang/docs/OpenMPSupport.md
@@ -34,7 +34,7 @@ Note : No distinction is made between the support in Parser/Semantics, MLIR, Low
| Feature | Status | Comments |
|------------------------------------------------------------|--------|---------------------------------------------------------|
| proc_bind clause | Y | |
-| simd construct | P | linear clause is not supported |
+| simd construct | P | Implicit linearization is skipped if iv is a pointer or allocatable|
| declare simd construct | N | |
| do simd construct | P | linear clause is not supported |
| target data construct | P | device clause not supported |
diff --git a/flang/lib/Lower/OpenMP/OpenMP.cpp b/flang/lib/Lower/OpenMP/OpenMP.cpp
index 7965119764e5d..090d608503f26 100644
--- a/flang/lib/Lower/OpenMP/OpenMP.cpp
+++ b/flang/lib/Lower/OpenMP/OpenMP.cpp
@@ -3025,9 +3025,44 @@ genStandaloneSimd(lower::AbstractConverter &converter, lower::SymMap &symTable,
simdArgs.priv.vars = simdClauseOps.privateVars;
simdArgs.reduction.syms = simdReductionSyms;
simdArgs.reduction.vars = simdClauseOps.reductionVars;
+
+ std::vector<mlir::Attribute> typeAttrs;
+ // If attributes from explicit `linear(...)` clause are present,
+ // carry them forward.
+ if (simdClauseOps.linearVarTypes && !simdClauseOps.linearVarTypes.empty())
+ typeAttrs.assign(simdClauseOps.linearVarTypes.begin(),
+ simdClauseOps.linearVarTypes.end());
+
+ for (auto [loopVar, loopStep] : llvm::zip(iv, loopNestClauseOps.loopSteps)) {
+ const mlir::Value variable = converter.getSymbolAddress(*loopVar);
+
+ // If the loop variable is already linearized (through an explicit
+ // `linear()` clause, skip.
+ if (std::find(simdClauseOps.linearVars.begin(),
+ simdClauseOps.linearVars.end(),
+ variable) != simdClauseOps.linearVars.end())
+ continue;
+
+ // TODO: Implicit linearization is skipped if iv is a pointer
+ // or an allocatable, due to potential mismatch between the linear
+ // variable type (example !fir.ref<!fir.box<!fir.heap<i32>>>)
+ // and the linear step size (example: i64). Handle this type mismatch
+ // gracefully.
+ if (loopVar->test(Fortran::semantics::Symbol::Flag::OmpLinear) &&
+ !(Fortran::semantics::IsAllocatableOrPointer(*loopVar) ||
+ Fortran::semantics::IsAllocatableOrPointer(loopVar->GetUltimate()))) {
+ mlir::Type ty = converter.genType(*loopVar);
+ typeAttrs.push_back(mlir::TypeAttr::get(ty));
+ simdClauseOps.linearVars.push_back(variable);
+ simdClauseOps.linearStepVars.push_back(loopStep);
+ }
+ }
+ if (!typeAttrs.empty())
+ simdClauseOps.linearVarTypes =
+ mlir::ArrayAttr::get(&converter.getMLIRContext(), typeAttrs);
+
auto simdOp =
genWrapperOp<mlir::omp::SimdOp>(converter, loc, simdClauseOps, simdArgs);
-
genLoopNestOp(converter, symTable, semaCtx, eval, loc, queue, item,
loopNestClauseOps, iv, {{simdOp, simdArgs}},
llvm::omp::Directive::OMPD_simd, dsp);
diff --git a/flang/lib/Semantics/resolve-directives.cpp b/flang/lib/Semantics/resolve-directives.cpp
index fcad3bee415fd..6467abf872c16 100644
--- a/flang/lib/Semantics/resolve-directives.cpp
+++ b/flang/lib/Semantics/resolve-directives.cpp
@@ -2351,7 +2351,7 @@ void OmpAttributeVisitor::CheckPerfectNestAndRectangularLoop(
// parallel do, taskloop, or distribute construct is (are) private.
// - The loop iteration variable in the associated do-loop of a simd construct
// with just one associated do-loop is linear with a linear-step that is the
-// increment of the associated do-loop.
+// increment of the associated do-loop (only for OpenMP versions <= 4.5)
// - The loop iteration variables in the associated do-loops of a simd
// construct with multiple associated do-loops are lastprivate.
void OmpAttributeVisitor::PrivatizeAssociatedLoopIndexAndCheckLoopLevel(
@@ -2361,9 +2361,10 @@ void OmpAttributeVisitor::PrivatizeAssociatedLoopIndexAndCheckLoopLevel(
return;
}
Symbol::Flag ivDSA;
+ unsigned version{context_.langOptions().OpenMPVersion};
if (!llvm::omp::allSimdSet.test(GetContext().directive)) {
ivDSA = Symbol::Flag::OmpPrivate;
- } else if (level == 1) {
+ } else if (level == 1 && version <= 45) {
ivDSA = Symbol::Flag::OmpLinear;
} else {
ivDSA = Symbol::Flag::OmpLastPrivate;
diff --git a/flang/test/Lower/OpenMP/parallel-private-clause.f90 b/flang/test/Lower/OpenMP/parallel-private-clause.f90
index 3a7fc22c0289b..a198ca8d09867 100644
--- a/flang/test/Lower/OpenMP/parallel-private-clause.f90
+++ b/flang/test/Lower/OpenMP/parallel-private-clause.f90
@@ -349,7 +349,7 @@ subroutine simd_loop_1
! FIRDialect: %[[UB:.*]] = arith.constant 9 : i32
! FIRDialect: %[[STEP:.*]] = arith.constant 1 : i32
- ! FIRDialect: omp.simd private({{.*}}) {
+ ! FIRDialect: omp.simd linear({{.*}} = %[[STEP]] : !fir.ref<i32>) private({{.*}}) {
! FIRDialect-NEXT: omp.loop_nest (%[[I:.*]]) : i32 = (%[[LB]]) to (%[[UB]]) inclusive step (%[[STEP]]) {
!$OMP SIMD PRIVATE(r)
do i=1, 9
diff --git a/flang/test/Lower/OpenMP/simd-linear.f90 b/flang/test/Lower/OpenMP/simd-linear.f90
index b6c7668af998b..6a50f1da18489 100644
--- a/flang/test/Lower/OpenMP/simd-linear.f90
+++ b/flang/test/Lower/OpenMP/simd-linear.f90
@@ -1,57 +1,82 @@
! This test checks lowering of OpenMP SIMD Directive
! with linear clause
-! RUN: %flang_fc1 -fopenmp -emit-hlfir %s -o - 2>&1 | FileCheck %s
+! RUN: %flang_fc1 -fopenmp -emit-hlfir -fopenmp-version=50 %s -o - 2>&1 | FileCheck %s
+! RUN: %flang_fc1 -fopenmp -emit-hlfir -fopenmp-version=45 %s -o - 2>&1 | FileCheck %s --check-prefix=IMPLICIT
!CHECK: %[[X_alloca:.*]] = fir.alloca i32 {bindc_name = "x", uniq_name = "_QFsimple_linearEx"}
!CHECK: %[[X:.*]]:2 = hlfir.declare %[[X_alloca]] {uniq_name = "_QFsimple_linearEx"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
!CHECK: %[[const:.*]] = arith.constant 1 : i32
+
+!IMPLICIT: %[[I_ALLOCA:.*]] = fir.alloca i32 {bindc_name = "i", uniq_name = "_QFsimple_linearEi"}
+!IMPLICIT: %[[I:.*]]:2 = hlfir.declare %[[I_ALLOCA]] {{.*}}
+!IMPLICIT: %[[X_alloca:.*]] = fir.alloca i32 {bindc_name = "x", uniq_name = "_QFsimple_linearEx"}
+!IMPLICIT: %[[X:.*]]:2 = hlfir.declare %[[X_alloca]] {uniq_name = "_QFsimple_linearEx"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+!IMPLICIT: %[[const:.*]] = arith.constant 1 : i32
subroutine simple_linear
implicit none
integer :: x, y, i
!CHECK: omp.simd linear(%[[X]]#0 = %[[const]] : !fir.ref<i32>) {{.*}}
+
+ !IMPLICIT: omp.simd linear(%[[X]]#0 = %[[const]] : !fir.ref<i32>, %[[I]]#0 = %{{.*}} : !fir.ref<i32>) {{.*}}
!$omp simd linear(x)
- !CHECK: %[[LOAD:.*]] = fir.load %[[X]]#0 : !fir.ref<i32>
- !CHECK: %[[const:.*]] = arith.constant 2 : i32
- !CHECK: %[[RESULT:.*]] = arith.addi %[[LOAD]], %[[const]] : i32
do i = 1, 10
- y = x + 2
end do
!CHECK: } {linear_var_types = [i32]}
+ !IMPLICIT: } {linear_var_types = [i32, i32]}
end subroutine
!CHECK: %[[X_alloca:.*]] = fir.alloca i32 {bindc_name = "x", uniq_name = "_QFlinear_stepEx"}
!CHECK: %[[X:.*]]:2 = hlfir.declare %[[X_alloca]] {uniq_name = "_QFlinear_stepEx"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+
+!IMPLICIT: %[[I_ALLOCA:.*]] = fir.alloca i32 {bindc_name = "i", uniq_name = "_QFlinear_stepEi"}
+!IMPLICIT: %[[I:.*]]:2 = hlfir.declare %[[I_ALLOCA]] {{.*}}
+!IMPLICIT: %[[X_alloca:.*]] = fir.alloca i32 {bindc_name = "x", uniq_name = "_QFlinear_stepEx"}
+!IMPLICIT: %[[X:.*]]:2 = hlfir.declare %[[X_alloca]] {uniq_name = "_QFlinear_stepEx"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+!IMPLICIT: %[[const:.*]] = arith.constant 4 : i32
subroutine linear_step
implicit none
integer :: x, y, i
!CHECK: %[[const:.*]] = arith.constant 4 : i32
!CHECK: omp.simd linear(%[[X]]#0 = %[[const]] : !fir.ref<i32>) {{.*}}
+
+ !IMPLICIT: omp.simd linear(%[[X]]#0 = %[[const]] : !fir.ref<i32>, %[[I]]#0 = %{{.*}} : !fir.ref<i32>) {{.*}}
!$omp simd linear(x:4)
- !CHECK: %[[LOAD:.*]] = fir.load %[[X]]#0 : !fir.ref<i32>
- !CHECK: %[[const:.*]] = arith.constant 2 : i32
- !CHECK: %[[RESULT:.*]] = arith.addi %[[LOAD]], %[[const]] : i32
do i = 1, 10
- y = x + 2
end do
!CHECK: } {linear_var_types = [i32]}
+ !IMPLICIT: } {linear_var_types = [i32, i32]}
end subroutine
!CHECK: %[[A_alloca:.*]] = fir.alloca i32 {bindc_name = "a", uniq_name = "_QFlinear_exprEa"}
!CHECK: %[[A:.*]]:2 = hlfir.declare %[[A_alloca]] {uniq_name = "_QFlinear_exprEa"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
!CHECK: %[[X_alloca:.*]] = fir.alloca i32 {bindc_name = "x", uniq_name = "_QFlinear_exprEx"}
!CHECK: %[[X:.*]]:2 = hlfir.declare %[[X_alloca]] {uniq_name = "_QFlinear_exprEx"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+
+!IMPLICIT: %[[A_alloca:.*]] = fir.alloca i32 {bindc_name = "a", uniq_name = "_QFlinear_exprEa"}
+!IMPLICIT: %[[A:.*]]:2 = hlfir.declare %[[A_alloca]] {uniq_name = "_QFlinear_exprEa"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+!IMPLICIT: %[[I_ALLOCA:.*]] = fir.alloca i32 {bindc_name = "i", uniq_name = "_QFlinear_exprEi"}
+!IMPLICIT: %[[I:.*]]:2 = hlfir.declare %[[I_ALLOCA]] {uniq_name = "_QFlinear_exprEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+!IMPLICIT: %[[X_alloca:.*]] = fir.alloca i32 {bindc_name = "x", uniq_name = "_QFlinear_exprEx"}
+!IMPLICIT: %[[X:.*]]:2 = hlfir.declare %[[X_alloca]] {uniq_name = "_QFlinear_exprEx"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
subroutine linear_expr
implicit none
integer :: x, y, i, a
!CHECK: %[[LOAD_A:.*]] = fir.load %[[A]]#0 : !fir.ref<i32>
!CHECK: %[[const:.*]] = arith.constant 4 : i32
!CHECK: %[[LINEAR_EXPR:.*]] = arith.addi %[[LOAD_A]], %[[const]] : i32
+
+ !IMPLICIT: %[[LOAD_A:.*]] = fir.load %[[A]]#0 : !fir.ref<i32>
+ !IMPLICIT: %[[const:.*]] = arith.constant 4 : i32
+ !IMPLICIT: %[[LINEAR_EXPR:.*]] = arith.addi %[[LOAD_A]], %[[const]] : i32
+
!CHECK: omp.simd linear(%[[X]]#0 = %[[LINEAR_EXPR]] : !fir.ref<i32>) {{.*}}
+
+ !IMPLICIT: omp.simd linear(%[[X]]#0 = %[[LINEAR_EXPR]] : !fir.ref<i32>, %[[I]]#0 = {{.*}} : !fir.ref<i32>) {{.*}}
!$omp simd linear(x:a+4)
do i = 1, 10
- y = x + 2
end do
!CHECK: } {linear_var_types = [i32]}
+ !IMPLICIT: } {linear_var_types = [i32, i32]}
end subroutine
diff --git a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
index 66c596a3c739a..c37af8d7b1673 100644
--- a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
+++ b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
@@ -155,12 +155,12 @@ class LinearClauseProcessor {
// Allocate space for linear variabes
void createLinearVar(llvm::IRBuilderBase &builder,
LLVM::ModuleTranslation &moduleTranslation,
- mlir::Value &linearVar, int idx) {
+ llvm::Value *linearVar, int idx) {
linearPreconditionVars.push_back(
builder.CreateAlloca(linearVarTypes[idx], nullptr, ".linear_var"));
llvm::Value *linearLoopBodyTemp =
builder.CreateAlloca(linearVarTypes[idx], nullptr, ".linear_result");
- linearOrigVal.push_back(moduleTranslation.lookupValue(linearVar));
+ linearOrigVal.push_back(linearVar);
linearLoopBodyTemps.push_back(linearLoopBodyTemp);
}
@@ -267,6 +267,16 @@ class LinearClauseProcessor {
builder.saveIP(), llvm::omp::OMPD_barrier);
}
+ // Emit stores for linear variables. Useful in case of SIMD
+ // construct.
+ void emitStoresForLinearVar(llvm::IRBuilderBase &builder) {
+ for (size_t index = 0; index < linearOrigVal.size(); index++) {
+ llvm::LoadInst *linearVarTemp =
+ builder.CreateLoad(linearVarTypes[index], linearLoopBodyTemps[index]);
+ builder.CreateStore(linearVarTemp, linearOrigVal[index]);
+ }
+ }
+
// Rewrite all uses of the original variable in `BBName`
// with the linear variable in-place
void rewriteInPlace(llvm::IRBuilderBase &builder, const std::string &BBName,
@@ -2643,8 +2653,9 @@ convertOmpWsloop(Operation &opInst, llvm::IRBuilderBase &builder,
linearClauseProcessor.registerType(moduleTranslation, linearVarType);
for (auto [idx, linearVar] : llvm::enumerate(wsloopOp.getLinearVars()))
- linearClauseProcessor.createLinearVar(builder, moduleTranslation,
- linearVar, idx);
+ linearClauseProcessor.createLinearVar(
+ builder, moduleTranslation, moduleTranslation.lookupValue(linearVar),
+ idx);
for (mlir::Value linearStep : wsloopOp.getLinearStepVars())
linearClauseProcessor.initLinearStep(moduleTranslation, linearStep);
}
@@ -2962,6 +2973,11 @@ convertOmpSimd(Operation &opInst, llvm::IRBuilderBase &builder,
llvm::OpenMPIRBuilder::InsertPointTy allocaIP =
findAllocaInsertPoint(builder, moduleTranslation);
+ llvm::Expected<llvm::BasicBlock *> afterAllocas = allocatePrivateVars(
+ builder, moduleTranslation, privateVarsInfo, allocaIP);
+ if (handleError(afterAllocas, opInst).failed())
+ return failure();
+
// Initialize linear variables and linear step
LinearClauseProcessor linearClauseProcessor;
@@ -2969,18 +2985,29 @@ convertOmpSimd(Operation &opInst, llvm::IRBuilderBase &builder,
auto linearVarTypes = simdOp.getLinearVarTypes().value();
for (mlir::Attribute linearVarType : linearVarTypes)
linearClauseProcessor.registerType(moduleTranslation, linearVarType);
- for (auto [idx, linearVar] : llvm::enumerate(simdOp.getLinearVars()))
- linearClauseProcessor.createLinearVar(builder, moduleTranslation,
- linearVar, idx);
+ for (auto [idx, linearVar] : llvm::enumerate(simdOp.getLinearVars())) {
+ bool isImplicit = false;
+ for (auto [mlirPrivVar, llvmPrivateVar] : llvm::zip_equal(
+ privateVarsInfo.mlirVars, privateVarsInfo.llvmVars)) {
+ // If the linear variable is implicit, reuse the already
+ // existing llvm::Value
+ if (linearVar == mlirPrivVar) {
+ isImplicit = true;
+ linearClauseProcessor.createLinearVar(builder, moduleTranslation,
+ llvmPrivateVar, idx);
+ break;
+ }
+ }
+
+ if (!isImplicit)
+ linearClauseProcessor.createLinearVar(
+ builder, moduleTranslation,
+ moduleTranslation.lookupValue(linearVar), idx);
+ }
for (mlir::Value linearStep : simdOp.getLinearStepVars())
linearClauseProcessor.initLinearStep(moduleTranslation, linearStep);
}
- llvm::Expected<llvm::BasicBlock *> afterAllocas = allocatePrivateVars(
- builder, moduleTranslation, privateVarsInfo, allocaIP);
- if (handleError(afterAllocas, opInst).failed())
- return failure();
-
if (failed(allocReductionVars(simdOp, reductionArgs, builder,
moduleTranslation, allocaIP, reductionDecls,
privateReductionVariables, reductionVariableMap,
@@ -3062,6 +3089,7 @@ convertOmpSimd(Operation &opInst, llvm::IRBuilderBase &builder,
: nullptr,
order, simdlen, safelen);
+ linearClauseProcessor.emitStoresForLinearVar(builder);
for (size_t index = 0; index < simdOp.getLinearVars().size(); index++)
linearClauseProcessor.rewriteInPlace(builder, "omp.loop_nest.region",
index);
More information about the Mlir-commits
mailing list