[Mlir-commits] [flang] [mlir] [flang][mlir] Add support for implicit linearization in omp.simd (PR #150386)
llvmlistbot at llvm.org
llvmlistbot at llvm.org
Thu Jul 24 01:06:18 PDT 2025
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-flang-semantics
Author: None (NimishMishra)
<details>
<summary>Changes</summary>
Up till OpenMP version 4.5, the loop iteration variable in the associated do-construct of simd is linear with a linear step equal to the increment of the loop. This PR implements this functionality. For versions > 4.5, such an implicit linear clause is not assumed for the loop iteration variable.
---
Patch is 32.24 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/150386.diff
10 Files Affected:
- (modified) flang/lib/Lower/OpenMP/OpenMP.cpp (+12-5)
- (modified) flang/lib/Semantics/resolve-directives.cpp (+3-2)
- (removed) flang/test/Lower/OpenMP/Todo/omp-do-simd-linear.f90 (-14)
- (modified) flang/test/Lower/OpenMP/parallel-private-clause.f90 (+1-1)
- (added) flang/test/Lower/OpenMP/simd-linear.f90 (+86)
- (added) flang/test/Lower/OpenMP/wsloop-linear.f90 (+57)
- (modified) mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp (+1-1)
- (modified) mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp (+99-37)
- (modified) mlir/test/Target/LLVMIR/openmp-llvm.mlir (+133)
- (modified) mlir/test/Target/LLVMIR/openmp-todo.mlir (+1-13)
``````````diff
diff --git a/flang/lib/Lower/OpenMP/OpenMP.cpp b/flang/lib/Lower/OpenMP/OpenMP.cpp
index fcb20fdf187ff..44dbbd52fa209 100644
--- a/flang/lib/Lower/OpenMP/OpenMP.cpp
+++ b/flang/lib/Lower/OpenMP/OpenMP.cpp
@@ -1731,8 +1731,7 @@ static void genSimdClauses(
cp.processReduction(loc, clauseOps, reductionSyms);
cp.processSafelen(clauseOps);
cp.processSimdlen(clauseOps);
-
- cp.processTODO<clause::Linear>(loc, llvm::omp::Directive::OMPD_simd);
+ cp.processLinear(clauseOps);
}
static void genSingleClauses(lower::AbstractConverter &converter,
@@ -1922,9 +1921,9 @@ static void genWsloopClauses(
cp.processOrdered(clauseOps);
cp.processReduction(loc, clauseOps, reductionSyms);
cp.processSchedule(stmtCtx, clauseOps);
+ cp.processLinear(clauseOps);
- cp.processTODO<clause::Allocate, clause::Linear>(
- loc, llvm::omp::Directive::OMPD_do);
+ cp.processTODO<clause::Allocate>(loc, llvm::omp::Directive::OMPD_do);
}
//===----------------------------------------------------------------------===//
@@ -2761,9 +2760,17 @@ genStandaloneSimd(lower::AbstractConverter &converter, lower::SymMap &symTable,
simdArgs.priv.vars = simdClauseOps.privateVars;
simdArgs.reduction.syms = simdReductionSyms;
simdArgs.reduction.vars = simdClauseOps.reductionVars;
+
+ for (auto &sym : simdArgs.priv.syms) {
+ if (sym->test(Fortran::semantics::Symbol::Flag::OmpLinear)) {
+ const mlir::Value variable = converter.getSymbolAddress(*sym);
+ simdClauseOps.linearVars.push_back(variable);
+ simdClauseOps.linearStepVars.push_back(loopNestClauseOps.loopSteps[0]);
+ }
+ }
+
auto simdOp =
genWrapperOp<mlir::omp::SimdOp>(converter, loc, simdClauseOps, simdArgs);
-
genLoopNestOp(converter, symTable, semaCtx, eval, loc, queue, item,
loopNestClauseOps, iv, {{simdOp, simdArgs}},
llvm::omp::Directive::OMPD_simd, dsp);
diff --git a/flang/lib/Semantics/resolve-directives.cpp b/flang/lib/Semantics/resolve-directives.cpp
index 521c7432d9fbb..29cf41668fb03 100644
--- a/flang/lib/Semantics/resolve-directives.cpp
+++ b/flang/lib/Semantics/resolve-directives.cpp
@@ -1979,7 +1979,7 @@ std::int64_t OmpAttributeVisitor::GetAssociatedLoopLevelFromClauses(
// parallel do, taskloop, or distribute construct is (are) private.
// - The loop iteration variable in the associated do-loop of a simd construct
// with just one associated do-loop is linear with a linear-step that is the
-// increment of the associated do-loop.
+// increment of the associated do-loop (only for OpenMP versions <= 4.5)
// - The loop iteration variables in the associated do-loops of a simd
// construct with multiple associated do-loops are lastprivate.
void OmpAttributeVisitor::PrivatizeAssociatedLoopIndexAndCheckLoopLevel(
@@ -1993,7 +1993,8 @@ void OmpAttributeVisitor::PrivatizeAssociatedLoopIndexAndCheckLoopLevel(
if (!llvm::omp::allSimdSet.test(GetContext().directive)) {
ivDSA = Symbol::Flag::OmpPrivate;
} else if (level == 1) {
- ivDSA = Symbol::Flag::OmpLinear;
+ if (version <= 45)
+ ivDSA = Symbol::Flag::OmpLinear;
} else {
ivDSA = Symbol::Flag::OmpLastPrivate;
}
diff --git a/flang/test/Lower/OpenMP/Todo/omp-do-simd-linear.f90 b/flang/test/Lower/OpenMP/Todo/omp-do-simd-linear.f90
deleted file mode 100644
index 4caf12a0169c4..0000000000000
--- a/flang/test/Lower/OpenMP/Todo/omp-do-simd-linear.f90
+++ /dev/null
@@ -1,14 +0,0 @@
-! This test checks lowering of OpenMP do simd linear() pragma
-
-! RUN: %not_todo_cmd bbc -emit-fir -fopenmp -o - %s 2>&1 | FileCheck %s
-! RUN: %not_todo_cmd %flang_fc1 -emit-fir -fopenmp -o - %s 2>&1 | FileCheck %s
-subroutine testDoSimdLinear(int_array)
- integer :: int_array(*)
-!CHECK: not yet implemented: Unhandled clause LINEAR in SIMD construct
-!$omp do simd linear(int_array)
- do index_ = 1, 10
- end do
-!$omp end do simd
-
-end subroutine testDoSimdLinear
-
diff --git a/flang/test/Lower/OpenMP/parallel-private-clause.f90 b/flang/test/Lower/OpenMP/parallel-private-clause.f90
index 492fb3bb9740d..c3f2667f77d3e 100644
--- a/flang/test/Lower/OpenMP/parallel-private-clause.f90
+++ b/flang/test/Lower/OpenMP/parallel-private-clause.f90
@@ -350,7 +350,7 @@ subroutine simd_loop_1
! FIRDialect: %[[UB:.*]] = arith.constant 9 : i32
! FIRDialect: %[[STEP:.*]] = arith.constant 1 : i32
- ! FIRDialect: omp.simd private({{.*}}) {
+ ! FIRDialect: omp.simd linear({{.*}} = %[[STEP]] : !fir.ref<i32>) private({{.*}}) {
! FIRDialect-NEXT: omp.loop_nest (%[[I:.*]]) : i32 = (%[[LB]]) to (%[[UB]]) inclusive step (%[[STEP]]) {
!$OMP SIMD PRIVATE(r)
do i=1, 9
diff --git a/flang/test/Lower/OpenMP/simd-linear.f90 b/flang/test/Lower/OpenMP/simd-linear.f90
new file mode 100644
index 0000000000000..e026e28c42517
--- /dev/null
+++ b/flang/test/Lower/OpenMP/simd-linear.f90
@@ -0,0 +1,86 @@
+! This test checks lowering of OpenMP SIMD with linear clause
+
+! RUN: %flang_fc1 -fopenmp -emit-hlfir -fopenmp-version=45 %s -o - 2>&1 | FileCheck %s
+! RUN: %flang_fc1 -fopenmp -emit-hlfir -fopenmp-version=50 %s -o - 2>&1 | FileCheck %s --check-prefix=NOIMPLICIT
+
+!CHECK: %[[IV_alloca:.*]] = fir.alloca i32 {bindc_name = "i", uniq_name = "_QFsimple_linearEi"}
+!CHECK: %[[IV:.*]]:2 = hlfir.declare %[[IV_alloca]] {uniq_name = "_QFsimple_linearEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+!CHECK: %[[X_alloca:.*]] = fir.alloca i32 {bindc_name = "x", uniq_name = "_QFsimple_linearEx"}
+!CHECK: %[[X:.*]]:2 = hlfir.declare %[[X_alloca]] {uniq_name = "_QFsimple_linearEx"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+!CHECK: %[[const:.*]] = arith.constant 1 : i32
+!CHECK: %{{.*}} = arith.constant 1 : i32
+!CHECK: %[[IV_step:.*]] = arith.constant 1 : i32
+
+!NOIMPLICIT: %[[X_alloca:.*]] = fir.alloca i32 {bindc_name = "x", uniq_name = "_QFsimple_linearEx"}
+!NOIMPLICIT: %[[X:.*]]:2 = hlfir.declare %[[X_alloca]] {uniq_name = "_QFsimple_linearEx"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+!NOIMPLICIT: %[[const:.*]] = arith.constant 1 : i32
+subroutine simple_linear
+ implicit none
+ integer :: x, y, i
+ !CHECK: omp.simd linear(%[[X]]#0 = %[[const]] : !fir.ref<i32>, %[[IV]]#0 = %[[IV_step]] : !fir.ref<i32>) {{.*}}
+ !NOIMPLICIT: omp.simd linear(%[[X]]#0 = %[[const]] : !fir.ref<i32>) {{.*}}
+ !$omp simd linear(x)
+ !CHECK: %[[LOAD:.*]] = fir.load %[[X]]#0 : !fir.ref<i32>
+ !CHECK: %[[const:.*]] = arith.constant 2 : i32
+ !CHECK: %[[RESULT:.*]] = arith.addi %[[LOAD]], %[[const]] : i32
+ do i = 1, 10
+ y = x + 2
+ end do
+end subroutine
+
+!CHECK: %[[IV_alloca:.*]] = fir.alloca i32 {bindc_name = "i", uniq_name = "_QFlinear_stepEi"}
+!CHECK: %[[IV:.*]]:2 = hlfir.declare %[[IV_alloca]] {uniq_name = "_QFlinear_stepEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+!CHECK: %[[X_alloca:.*]] = fir.alloca i32 {bindc_name = "x", uniq_name = "_QFlinear_stepEx"}
+!CHECK: %[[X:.*]]:2 = hlfir.declare %[[X_alloca]] {uniq_name = "_QFlinear_stepEx"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+
+!NOIMPLICIT: %[[X_alloca:.*]] = fir.alloca i32 {bindc_name = "x", uniq_name = "_QFlinear_stepEx"}
+!NOIMPLICIT: %[[X:.*]]:2 = hlfir.declare %[[X_alloca]] {uniq_name = "_QFlinear_stepEx"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+!NOIMPLICIT: %[[const:.*]] = arith.constant 4 : i32
+subroutine linear_step
+ implicit none
+ integer :: x, y, i
+ !CHECK: %[[const:.*]] = arith.constant 4 : i32
+ !CHECK: %{{.*}} = arith.constant 1 : i32
+ !CHECK: %[[IV_step:.*]] = arith.constant 1 : i32
+ !CHECK: omp.simd linear(%[[X]]#0 = %[[const]] : !fir.ref<i32>, %[[IV]]#0 = %[[IV_step]] : !fir.ref<i32>) {{.*}}
+
+ !NOIMPLICIT: omp.simd linear(%[[X]]#0 = %[[const]] : !fir.ref<i32>) {{.*}}
+ !$omp simd linear(x:4)
+ !CHECK: %[[LOAD:.*]] = fir.load %[[X]]#0 : !fir.ref<i32>
+ !CHECK: %[[const:.*]] = arith.constant 2 : i32
+ !CHECK: %[[RESULT:.*]] = arith.addi %[[LOAD]], %[[const]] : i32
+ do i = 1, 10
+ y = x + 2
+ end do
+end subroutine
+
+!CHECK: %[[A_alloca:.*]] = fir.alloca i32 {bindc_name = "a", uniq_name = "_QFlinear_exprEa"}
+!CHECK: %[[A:.*]]:2 = hlfir.declare %[[A_alloca]] {uniq_name = "_QFlinear_exprEa"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+!CHECK: %[[IV_alloca:.*]] = fir.alloca i32 {bindc_name = "i", uniq_name = "_QFlinear_exprEi"}
+!CHECK: %[[IV:.*]]:2 = hlfir.declare %[[IV_alloca]] {uniq_name = "_QFlinear_exprEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+!CHECK: %[[X_alloca:.*]] = fir.alloca i32 {bindc_name = "x", uniq_name = "_QFlinear_exprEx"}
+!CHECK: %[[X:.*]]:2 = hlfir.declare %[[X_alloca]] {uniq_name = "_QFlinear_exprEx"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+
+!NOIMPLICIT: %[[A_alloca:.*]] = fir.alloca i32 {bindc_name = "a", uniq_name = "_QFlinear_exprEa"}
+!NOIMPLICIT: %[[A:.*]]:2 = hlfir.declare %[[A_alloca]] {uniq_name = "_QFlinear_exprEa"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+!NOIMPLICIT: %[[X_alloca:.*]] = fir.alloca i32 {bindc_name = "x", uniq_name = "_QFlinear_exprEx"}
+!NOIMPLICIT: %[[X:.*]]:2 = hlfir.declare %[[X_alloca]] {uniq_name = "_QFlinear_exprEx"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+!NOIMPLICIT: %[[LOAD_A:.*]] = fir.load %[[A]]#0 : !fir.ref<i32>
+!NOIMPLICIT: %[[const:.*]] = arith.constant 4 : i32
+!NOIMPLICIT: %[[LINEAR_EXPR:.*]] = arith.addi %[[LOAD_A]], %[[const]] : i32
+subroutine linear_expr
+ implicit none
+ integer :: x, y, i, a
+ !CHECK: %[[LOAD_A:.*]] = fir.load %[[A]]#0 : !fir.ref<i32>
+ !CHECK: %[[const:.*]] = arith.constant 4 : i32
+ !CHECK: %[[LINEAR_EXPR:.*]] = arith.addi %[[LOAD_A]], %[[const]] : i32
+ !CHECK: %{{.*}} = arith.constant 1 : i32
+ !CHECK: %[[IV_step:.*]] = arith.constant 1 : i32
+ !CHECK: omp.simd linear(%[[X]]#0 = %[[LINEAR_EXPR]] : !fir.ref<i32>, %[[IV]]#0 = %[[IV_step]] : !fir.ref<i32>) {{.*}}
+
+ !NOIMPLICIT: omp.simd linear(%[[X]]#0 = %[[LINEAR_EXPR]] : !fir.ref<i32>) {{.*}}
+ !$omp simd linear(x:a+4)
+ do i = 1, 10
+ y = x + 2
+ end do
+end subroutine
diff --git a/flang/test/Lower/OpenMP/wsloop-linear.f90 b/flang/test/Lower/OpenMP/wsloop-linear.f90
new file mode 100644
index 0000000000000..b99677108be2f
--- /dev/null
+++ b/flang/test/Lower/OpenMP/wsloop-linear.f90
@@ -0,0 +1,57 @@
+! This test checks lowering of OpenMP DO Directive (Worksharing)
+! with linear clause
+
+! RUN: %flang_fc1 -fopenmp -emit-hlfir %s -o - 2>&1 | FileCheck %s
+
+!CHECK: %[[X_alloca:.*]] = fir.alloca i32 {bindc_name = "x", uniq_name = "_QFsimple_linearEx"}
+!CHECK: %[[X:.*]]:2 = hlfir.declare %[[X_alloca]] {uniq_name = "_QFsimple_linearEx"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+!CHECK: %[[const:.*]] = arith.constant 1 : i32
+subroutine simple_linear
+ implicit none
+ integer :: x, y, i
+ !CHECK: omp.wsloop linear(%[[X]]#0 = %[[const]] : !fir.ref<i32>) {{.*}}
+ !$omp do linear(x)
+ !CHECK: %[[LOAD:.*]] = fir.load %[[X]]#0 : !fir.ref<i32>
+ !CHECK: %[[const:.*]] = arith.constant 2 : i32
+ !CHECK: %[[RESULT:.*]] = arith.addi %[[LOAD]], %[[const]] : i32
+ do i = 1, 10
+ y = x + 2
+ end do
+ !$omp end do
+end subroutine
+
+
+!CHECK: %[[X_alloca:.*]] = fir.alloca i32 {bindc_name = "x", uniq_name = "_QFlinear_stepEx"}
+!CHECK: %[[X:.*]]:2 = hlfir.declare %[[X_alloca]] {uniq_name = "_QFlinear_stepEx"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+subroutine linear_step
+ implicit none
+ integer :: x, y, i
+ !CHECK: %[[const:.*]] = arith.constant 4 : i32
+ !CHECK: omp.wsloop linear(%[[X]]#0 = %[[const]] : !fir.ref<i32>) {{.*}}
+ !$omp do linear(x:4)
+ !CHECK: %[[LOAD:.*]] = fir.load %[[X]]#0 : !fir.ref<i32>
+ !CHECK: %[[const:.*]] = arith.constant 2 : i32
+ !CHECK: %[[RESULT:.*]] = arith.addi %[[LOAD]], %[[const]] : i32
+ do i = 1, 10
+ y = x + 2
+ end do
+ !$omp end do
+end subroutine
+
+!CHECK: %[[A_alloca:.*]] = fir.alloca i32 {bindc_name = "a", uniq_name = "_QFlinear_exprEa"}
+!CHECK: %[[A:.*]]:2 = hlfir.declare %[[A_alloca]] {uniq_name = "_QFlinear_exprEa"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+!CHECK: %[[X_alloca:.*]] = fir.alloca i32 {bindc_name = "x", uniq_name = "_QFlinear_exprEx"}
+!CHECK: %[[X:.*]]:2 = hlfir.declare %[[X_alloca]] {uniq_name = "_QFlinear_exprEx"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+subroutine linear_expr
+ implicit none
+ integer :: x, y, i, a
+ !CHECK: %[[LOAD_A:.*]] = fir.load %[[A]]#0 : !fir.ref<i32>
+ !CHECK: %[[const:.*]] = arith.constant 4 : i32
+ !CHECK: %[[LINEAR_EXPR:.*]] = arith.addi %[[LOAD_A]], %[[const]] : i32
+ !CHECK: omp.wsloop linear(%[[X]]#0 = %[[LINEAR_EXPR]] : !fir.ref<i32>) {{.*}}
+ !$omp do linear(x:a+4)
+ do i = 1, 10
+ y = x + 2
+ end do
+ !$omp end do
+end subroutine
diff --git a/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp b/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp
index 769aee64e1695..dd5d8ac5ff31d 100644
--- a/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp
+++ b/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp
@@ -2610,7 +2610,7 @@ void SimdOp::build(OpBuilder &builder, OperationState &state,
// TODO Store clauses in op: linearVars, linearStepVars
SimdOp::build(builder, state, clauses.alignedVars,
makeArrayAttr(ctx, clauses.alignments), clauses.ifExpr,
- /*linear_vars=*/{}, /*linear_step_vars=*/{},
+ clauses.linearVars, clauses.linearStepVars,
clauses.nontemporalVars, clauses.order, clauses.orderMod,
clauses.privateVars, makeArrayAttr(ctx, clauses.privateSyms),
clauses.privateNeedsBarrier, clauses.reductionMod,
diff --git a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
index 3185f28fe6681..159a22725b1c6 100644
--- a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
+++ b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
@@ -147,9 +147,30 @@ class LinearClauseProcessor {
public:
// Allocate space for linear variabes
- void createLinearVar(llvm::IRBuilderBase &builder,
- LLVM::ModuleTranslation &moduleTranslation,
- mlir::Value &linearVar) {
+ LogicalResult createLinearVar(llvm::IRBuilderBase &builder,
+ LLVM::ModuleTranslation &moduleTranslation,
+ llvm::Value *linearVar, Operation &op) {
+ if (llvm::AllocaInst *linearVarAlloca =
+ dyn_cast<llvm::AllocaInst>(linearVar)) {
+ linearPreconditionVars.push_back(builder.CreateAlloca(
+ linearVarAlloca->getAllocatedType(), nullptr, ".linear_var"));
+ llvm::Value *linearLoopBodyTemp = builder.CreateAlloca(
+ linearVarAlloca->getAllocatedType(), nullptr, ".linear_result");
+ linearOrigVal.push_back(linearVar);
+ linearLoopBodyTemps.push_back(linearLoopBodyTemp);
+ linearOrigVars.push_back(linearVarAlloca);
+ return success();
+ }
+
+ else
+ return op.emitError() << "not yet implemented: linear clause support"
+ << " for non alloca linear variables";
+ }
+
+ // Allocate space for linear variabes
+ LogicalResult createLinearVar(llvm::IRBuilderBase &builder,
+ LLVM::ModuleTranslation &moduleTranslation,
+ mlir::Value &linearVar, Operation &op) {
if (llvm::AllocaInst *linearVarAlloca = dyn_cast<llvm::AllocaInst>(
moduleTranslation.lookupValue(linearVar))) {
linearPreconditionVars.push_back(builder.CreateAlloca(
@@ -159,7 +180,12 @@ class LinearClauseProcessor {
linearOrigVal.push_back(moduleTranslation.lookupValue(linearVar));
linearLoopBodyTemps.push_back(linearLoopBodyTemp);
linearOrigVars.push_back(linearVarAlloca);
+ return success();
}
+
+ else
+ return op.emitError() << "not yet implemented: linear clause support"
+ << " for non alloca linear variables";
}
// Initialize linear step
@@ -169,20 +195,15 @@ class LinearClauseProcessor {
}
// Emit IR for initialization of linear variables
- llvm::OpenMPIRBuilder::InsertPointOrErrorTy
- initLinearVar(llvm::IRBuilderBase &builder,
- LLVM::ModuleTranslation &moduleTranslation,
- llvm::BasicBlock *loopPreHeader) {
+ void initLinearVar(llvm::IRBuilderBase &builder,
+ LLVM::ModuleTranslation &moduleTranslation,
+ llvm::BasicBlock *loopPreHeader) {
builder.SetInsertPoint(loopPreHeader->getTerminator());
for (size_t index = 0; index < linearOrigVars.size(); index++) {
llvm::LoadInst *linearVarLoad = builder.CreateLoad(
linearOrigVars[index]->getAllocatedType(), linearOrigVars[index]);
builder.CreateStore(linearVarLoad, linearPreconditionVars[index]);
}
- llvm::OpenMPIRBuilder::InsertPointOrErrorTy afterBarrierIP =
- moduleTranslation.getOpenMPBuilder()->createBarrier(
- builder.saveIP(), llvm::omp::OMPD_barrier);
- return afterBarrierIP;
}
// Emit IR for updating Linear variables
@@ -193,18 +214,27 @@ class LinearClauseProcessor {
// Emit increments for linear vars
llvm::LoadInst *linearVarStart =
builder.CreateLoad(linearOrigVars[index]->getAllocatedType(),
-
linearPreconditionVars[index]);
+
auto mulInst = builder.CreateMul(loopInductionVar, linearSteps[index]);
- auto addInst = builder.CreateAdd(linearVarStart, mulInst);
- builder.CreateStore(addInst, linearLoopBodyTemps[index]);
+ if (linearOrigVars[index]->getAllocatedType()->isIntegerTy()) {
+ auto addInst = builder.CreateAdd(linearVarStart, mulInst);
+ builder.CreateStore(addInst, linearLoopBodyTemps[index]);
+ } else if (linearOrigVars[index]
+ ->getAllocatedType()
+ ->isFloatingPointTy()) {
+ auto cvt = builder.CreateSIToFP(
+ mulInst, linearOrigVars[index]->getAllocatedType());
+ auto addInst = builder.CreateFAdd(linearVarStart, cvt);
+ builder.CreateStore(addInst, linearLoopBodyTemps[index]);
+ }
}
}
// Linear variable finalization is conditional on the last logical iteration.
// Create BB splits to manage the same.
- void outlineLinearFinalizationBB(llvm::IRBuilderBase &builder,
- llvm::BasicBlock *loopExit) {
+ void splitLinearFiniBB(llvm::IRBuilderBase &builder,
+ llvm::BasicBlock *loopExit) {
linearFinalizationBB = loopExit->splitBasicBlock(
loopExit->getTerminator(), "omp_loop.linear_finalization");
linearExitBB = linearFinalizationBB->splitBasicBlock(
@@ -256,7 +286,8 @@ class LinearClauseProcessor {
users.push_back(user);
for (auto *user : users) {
if (auto *userInst = dyn_cast<llvm::Instruction>(user)) {
- if (userInst->getParent()->getName().str() == BBName)
+ if (userInst->getParent()->getName().str().find(BBName) !=
+ std::string::npos)
user->replaceUsesOfWith(linearOrigVal[varIndex],
linearLoopBodyTemps[varIndex]);
}
@@ -339,10 +370,6 @@ static LogicalResult checkImplementationStatus(Operation &op) {
if (!op.getIsDevicePtrVars().empty())
result = todo("is_device_ptr");
};
- auto checkLinear = [&todo](auto op, LogicalResult &result) {
- if (!op.getLinearVars().empty() || !op.getLinearStepVars().empty())
- result = todo("linear");
- };
auto checkNowait = [&todo](auto op, LogicalResult &result) {
if (op.getNowait())
result = todo("nowait");
@@ -432,7 +459,6 @@ static LogicalResult checkImplementationStatus(Operation &op) {
})
.Case([&](omp::WsloopOp op) {
checkAllocate(op, result);
- checkLinear(op, result);
checkOrder(op, result);
checkReduction(op, result);
})
@@ -440,10 +466,7 @@ static LogicalResult checkImplementationStatus(Operation &op) {
checkAllocate(op, result);
checkReduction(op, result);
})
- .Case([&](omp::SimdOp op) {
- checkLinear(op, result);
- checkReduction(op, result);
- })
+ .Case([&](omp::SimdOp op)...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/150386
More information about the Mlir-commits
mailing list