[Mlir-commits] [flang] [mlir] [flang][mlir] Add support for implicit linearization in omp.simd (PR #150386)

Thu Jul 24 01:05:48 PDT 2025

https://github.com/NimishMishra created https://github.com/llvm/llvm-project/pull/150386

Up till OpenMP version 4.5, the loop iteration variable in the associated do-construct of simd is linear with a linear step equal to the increment of the loop. This PR implements this functionality. For versions > 4.5, such an implicit linear clause is not assumed for the loop iteration variable.

>From ab2e855f8fc3aac2b1792758b921f3601f5f5139 Mon Sep 17 00:00:00 2001
From: Nimish Mishra <neelam.nimish at gmail.com>
Date: Wed, 9 Jul 2025 14:22:08 +0530
Subject: [PATCH 1/2] [flang][OpenMP] Support MLIR lowering of linear clause
 for omp.wsloop

---
 flang/lib/Lower/OpenMP/OpenMP.cpp             |   7 +-
 .../Lower/OpenMP/Todo/omp-do-simd-linear.f90  |  14 --
 flang/test/Lower/OpenMP/simd-linear.f90       |  54 +++++++
 flang/test/Lower/OpenMP/wsloop-linear.f90     |  57 ++++++++
 mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp  |   2 +-
 .../OpenMP/OpenMPToLLVMIRTranslation.cpp      |  98 ++++++++-----
 mlir/test/Target/LLVMIR/openmp-llvm.mlir      | 133 ++++++++++++++++++
 mlir/test/Target/LLVMIR/openmp-todo.mlir      |  14 +-
 8 files changed, 311 insertions(+), 68 deletions(-)
 delete mode 100644 flang/test/Lower/OpenMP/Todo/omp-do-simd-linear.f90
 create mode 100644 flang/test/Lower/OpenMP/simd-linear.f90
 create mode 100644 flang/test/Lower/OpenMP/wsloop-linear.f90

diff --git a/flang/lib/Lower/OpenMP/OpenMP.cpp b/flang/lib/Lower/OpenMP/OpenMP.cpp
index fcb20fdf187ff..3f58c96900948 100644
--- a/flang/lib/Lower/OpenMP/OpenMP.cpp
+++ b/flang/lib/Lower/OpenMP/OpenMP.cpp
@@ -1731,8 +1731,7 @@ static void genSimdClauses(
   cp.processReduction(loc, clauseOps, reductionSyms);
   cp.processSafelen(clauseOps);
   cp.processSimdlen(clauseOps);
-
-  cp.processTODO<clause::Linear>(loc, llvm::omp::Directive::OMPD_simd);
+  cp.processLinear(clauseOps);
 }
 
 static void genSingleClauses(lower::AbstractConverter &converter,
@@ -1922,9 +1921,9 @@ static void genWsloopClauses(
   cp.processOrdered(clauseOps);
   cp.processReduction(loc, clauseOps, reductionSyms);
   cp.processSchedule(stmtCtx, clauseOps);
+  cp.processLinear(clauseOps);
 
-  cp.processTODO<clause::Allocate, clause::Linear>(
-      loc, llvm::omp::Directive::OMPD_do);
+  cp.processTODO<clause::Allocate>(loc, llvm::omp::Directive::OMPD_do);
 }
 
 //===----------------------------------------------------------------------===//
diff --git a/flang/test/Lower/OpenMP/Todo/omp-do-simd-linear.f90 b/flang/test/Lower/OpenMP/Todo/omp-do-simd-linear.f90
deleted file mode 100644
index 4caf12a0169c4..0000000000000
--- a/flang/test/Lower/OpenMP/Todo/omp-do-simd-linear.f90
+++ /dev/null
@@ -1,14 +0,0 @@
-! This test checks lowering of OpenMP do simd linear() pragma
-
-! RUN: %not_todo_cmd bbc -emit-fir -fopenmp -o - %s 2>&1 | FileCheck %s
-! RUN: %not_todo_cmd %flang_fc1 -emit-fir -fopenmp -o - %s 2>&1 | FileCheck %s
-subroutine testDoSimdLinear(int_array)
-        integer :: int_array(*)
-!CHECK: not yet implemented: Unhandled clause LINEAR in SIMD construct
-!$omp do simd linear(int_array)
-        do index_ = 1, 10
-        end do
-!$omp end do simd
-
-end subroutine testDoSimdLinear
-
diff --git a/flang/test/Lower/OpenMP/simd-linear.f90 b/flang/test/Lower/OpenMP/simd-linear.f90
new file mode 100644
index 0000000000000..ccb5fbb8b0a24
--- /dev/null
+++ b/flang/test/Lower/OpenMP/simd-linear.f90
@@ -0,0 +1,54 @@
+! This test checks lowering of OpenMP DO Directive (Worksharing)
+! with linear clause
+
+! RUN: %flang_fc1 -fopenmp -emit-hlfir %s -o - 2>&1 | FileCheck %s
+
+!CHECK: %[[X_alloca:.*]] = fir.alloca i32 {bindc_name = "x", uniq_name = "_QFsimple_linearEx"}
+!CHECK: %[[X:.*]]:2 = hlfir.declare %[[X_alloca]] {uniq_name = "_QFsimple_linearEx"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+!CHECK: %[[const:.*]] = arith.constant 1 : i32
+subroutine simple_linear
+    implicit none
+    integer :: x, y, i
+    !CHECK: omp.simd linear(%[[X]]#0 = %[[const]] : !fir.ref<i32>) {{.*}}
+    !$omp simd linear(x)
+    !CHECK: %[[LOAD:.*]] = fir.load %[[X]]#0 : !fir.ref<i32>
+    !CHECK: %[[const:.*]] = arith.constant 2 : i32
+    !CHECK: %[[RESULT:.*]] = arith.addi %[[LOAD]], %[[const]] : i32
+    do i = 1, 10
+        y = x + 2
+    end do
+end subroutine
+
+
+!CHECK: %[[X_alloca:.*]] = fir.alloca i32 {bindc_name = "x", uniq_name = "_QFlinear_stepEx"}
+!CHECK: %[[X:.*]]:2 = hlfir.declare %[[X_alloca]] {uniq_name = "_QFlinear_stepEx"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+subroutine linear_step
+    implicit none
+    integer :: x, y, i
+    !CHECK: %[[const:.*]] = arith.constant 4 : i32
+    !CHECK: omp.simd linear(%[[X]]#0 = %[[const]] : !fir.ref<i32>) {{.*}}
+    !$omp simd linear(x:4)
+    !CHECK: %[[LOAD:.*]] = fir.load %[[X]]#0 : !fir.ref<i32>
+    !CHECK: %[[const:.*]] = arith.constant 2 : i32
+    !CHECK: %[[RESULT:.*]] = arith.addi %[[LOAD]], %[[const]] : i32   
+    do i = 1, 10
+        y = x + 2
+    end do
+end subroutine
+
+!CHECK: %[[A_alloca:.*]] = fir.alloca i32 {bindc_name = "a", uniq_name = "_QFlinear_exprEa"}
+!CHECK: %[[A:.*]]:2 = hlfir.declare %[[A_alloca]] {uniq_name = "_QFlinear_exprEa"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+!CHECK: %[[X_alloca:.*]] = fir.alloca i32 {bindc_name = "x", uniq_name = "_QFlinear_exprEx"}
+!CHECK: %[[X:.*]]:2 = hlfir.declare %[[X_alloca]] {uniq_name = "_QFlinear_exprEx"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+subroutine linear_expr
+    implicit none
+    integer :: x, y, i, a
+    !CHECK: %[[LOAD_A:.*]] = fir.load %[[A]]#0 : !fir.ref<i32>
+    !CHECK: %[[const:.*]] = arith.constant 4 : i32
+    !CHECK: %[[LINEAR_EXPR:.*]] = arith.addi %[[LOAD_A]], %[[const]] : i32
+    !CHECK: omp.simd linear(%[[X]]#0 = %[[LINEAR_EXPR]] : !fir.ref<i32>) {{.*}}
+    !$omp simd linear(x:a+4)
+    do i = 1, 10
+        y = x + 2
+    end do
+end subroutine
diff --git a/flang/test/Lower/OpenMP/wsloop-linear.f90 b/flang/test/Lower/OpenMP/wsloop-linear.f90
new file mode 100644
index 0000000000000..b99677108be2f
--- /dev/null
+++ b/flang/test/Lower/OpenMP/wsloop-linear.f90
@@ -0,0 +1,57 @@
+! This test checks lowering of OpenMP DO Directive (Worksharing)
+! with linear clause
+
+! RUN: %flang_fc1 -fopenmp -emit-hlfir %s -o - 2>&1 | FileCheck %s
+
+!CHECK: %[[X_alloca:.*]] = fir.alloca i32 {bindc_name = "x", uniq_name = "_QFsimple_linearEx"}
+!CHECK: %[[X:.*]]:2 = hlfir.declare %[[X_alloca]] {uniq_name = "_QFsimple_linearEx"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+!CHECK: %[[const:.*]] = arith.constant 1 : i32
+subroutine simple_linear
+    implicit none
+    integer :: x, y, i
+    !CHECK: omp.wsloop linear(%[[X]]#0 = %[[const]] : !fir.ref<i32>) {{.*}}
+    !$omp do linear(x)
+    !CHECK: %[[LOAD:.*]] = fir.load %[[X]]#0 : !fir.ref<i32>
+    !CHECK: %[[const:.*]] = arith.constant 2 : i32
+    !CHECK: %[[RESULT:.*]] = arith.addi %[[LOAD]], %[[const]] : i32
+    do i = 1, 10
+        y = x + 2
+    end do
+    !$omp end do
+end subroutine
+
+
+!CHECK: %[[X_alloca:.*]] = fir.alloca i32 {bindc_name = "x", uniq_name = "_QFlinear_stepEx"}
+!CHECK: %[[X:.*]]:2 = hlfir.declare %[[X_alloca]] {uniq_name = "_QFlinear_stepEx"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+subroutine linear_step
+    implicit none
+    integer :: x, y, i
+    !CHECK: %[[const:.*]] = arith.constant 4 : i32
+    !CHECK: omp.wsloop linear(%[[X]]#0 = %[[const]] : !fir.ref<i32>) {{.*}}
+    !$omp do linear(x:4)
+    !CHECK: %[[LOAD:.*]] = fir.load %[[X]]#0 : !fir.ref<i32>
+    !CHECK: %[[const:.*]] = arith.constant 2 : i32
+    !CHECK: %[[RESULT:.*]] = arith.addi %[[LOAD]], %[[const]] : i32   
+    do i = 1, 10
+        y = x + 2
+    end do
+    !$omp end do
+end subroutine
+
+!CHECK: %[[A_alloca:.*]] = fir.alloca i32 {bindc_name = "a", uniq_name = "_QFlinear_exprEa"}
+!CHECK: %[[A:.*]]:2 = hlfir.declare %[[A_alloca]] {uniq_name = "_QFlinear_exprEa"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+!CHECK: %[[X_alloca:.*]] = fir.alloca i32 {bindc_name = "x", uniq_name = "_QFlinear_exprEx"}
+!CHECK: %[[X:.*]]:2 = hlfir.declare %[[X_alloca]] {uniq_name = "_QFlinear_exprEx"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+subroutine linear_expr
+    implicit none
+    integer :: x, y, i, a
+    !CHECK: %[[LOAD_A:.*]] = fir.load %[[A]]#0 : !fir.ref<i32>
+    !CHECK: %[[const:.*]] = arith.constant 4 : i32
+    !CHECK: %[[LINEAR_EXPR:.*]] = arith.addi %[[LOAD_A]], %[[const]] : i32
+    !CHECK: omp.wsloop linear(%[[X]]#0 = %[[LINEAR_EXPR]] : !fir.ref<i32>) {{.*}}
+    !$omp do linear(x:a+4)
+    do i = 1, 10
+        y = x + 2
+    end do
+    !$omp end do
+end subroutine
diff --git a/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp b/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp
index 769aee64e1695..dd5d8ac5ff31d 100644
--- a/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp
+++ b/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp
@@ -2610,7 +2610,7 @@ void SimdOp::build(OpBuilder &builder, OperationState &state,
   // TODO Store clauses in op: linearVars, linearStepVars
   SimdOp::build(builder, state, clauses.alignedVars,
                 makeArrayAttr(ctx, clauses.alignments), clauses.ifExpr,
-                /*linear_vars=*/{}, /*linear_step_vars=*/{},
+                clauses.linearVars, clauses.linearStepVars,
                 clauses.nontemporalVars, clauses.order, clauses.orderMod,
                 clauses.privateVars, makeArrayAttr(ctx, clauses.privateSyms),
                 clauses.privateNeedsBarrier, clauses.reductionMod,
diff --git a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
index 3185f28fe6681..d457f35a8dd2c 100644
--- a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
+++ b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
@@ -147,9 +147,9 @@ class LinearClauseProcessor {
 
 public:
   // Allocate space for linear variabes
-  void createLinearVar(llvm::IRBuilderBase &builder,
-                       LLVM::ModuleTranslation &moduleTranslation,
-                       mlir::Value &linearVar) {
+  LogicalResult createLinearVar(llvm::IRBuilderBase &builder,
+                                LLVM::ModuleTranslation &moduleTranslation,
+                                mlir::Value &linearVar, Operation &op) {
     if (llvm::AllocaInst *linearVarAlloca = dyn_cast<llvm::AllocaInst>(
             moduleTranslation.lookupValue(linearVar))) {
       linearPreconditionVars.push_back(builder.CreateAlloca(
@@ -159,7 +159,12 @@ class LinearClauseProcessor {
       linearOrigVal.push_back(moduleTranslation.lookupValue(linearVar));
       linearLoopBodyTemps.push_back(linearLoopBodyTemp);
       linearOrigVars.push_back(linearVarAlloca);
+      return success();
     }
+
+    else
+      return op.emitError() << "not yet implemented: linear clause support"
+                            << " for non alloca linear variables";
   }
 
   // Initialize linear step
@@ -169,20 +174,15 @@ class LinearClauseProcessor {
   }
 
   // Emit IR for initialization of linear variables
-  llvm::OpenMPIRBuilder::InsertPointOrErrorTy
-  initLinearVar(llvm::IRBuilderBase &builder,
-                LLVM::ModuleTranslation &moduleTranslation,
-                llvm::BasicBlock *loopPreHeader) {
+  void initLinearVar(llvm::IRBuilderBase &builder,
+                     LLVM::ModuleTranslation &moduleTranslation,
+                     llvm::BasicBlock *loopPreHeader) {
     builder.SetInsertPoint(loopPreHeader->getTerminator());
     for (size_t index = 0; index < linearOrigVars.size(); index++) {
       llvm::LoadInst *linearVarLoad = builder.CreateLoad(
           linearOrigVars[index]->getAllocatedType(), linearOrigVars[index]);
       builder.CreateStore(linearVarLoad, linearPreconditionVars[index]);
     }
-    llvm::OpenMPIRBuilder::InsertPointOrErrorTy afterBarrierIP =
-        moduleTranslation.getOpenMPBuilder()->createBarrier(
-            builder.saveIP(), llvm::omp::OMPD_barrier);
-    return afterBarrierIP;
   }
 
   // Emit IR for updating Linear variables
@@ -193,18 +193,27 @@ class LinearClauseProcessor {
       // Emit increments for linear vars
       llvm::LoadInst *linearVarStart =
           builder.CreateLoad(linearOrigVars[index]->getAllocatedType(),
-
                              linearPreconditionVars[index]);
+
       auto mulInst = builder.CreateMul(loopInductionVar, linearSteps[index]);
-      auto addInst = builder.CreateAdd(linearVarStart, mulInst);
-      builder.CreateStore(addInst, linearLoopBodyTemps[index]);
+      if (linearOrigVars[index]->getAllocatedType()->isIntegerTy()) {
+        auto addInst = builder.CreateAdd(linearVarStart, mulInst);
+        builder.CreateStore(addInst, linearLoopBodyTemps[index]);
+      } else if (linearOrigVars[index]
+                     ->getAllocatedType()
+                     ->isFloatingPointTy()) {
+        auto cvt = builder.CreateSIToFP(
+            mulInst, linearOrigVars[index]->getAllocatedType());
+        auto addInst = builder.CreateFAdd(linearVarStart, cvt);
+        builder.CreateStore(addInst, linearLoopBodyTemps[index]);
+      }
     }
   }
 
   // Linear variable finalization is conditional on the last logical iteration.
   // Create BB splits to manage the same.
-  void outlineLinearFinalizationBB(llvm::IRBuilderBase &builder,
-                                   llvm::BasicBlock *loopExit) {
+  void splitLinearFiniBB(llvm::IRBuilderBase &builder,
+                         llvm::BasicBlock *loopExit) {
     linearFinalizationBB = loopExit->splitBasicBlock(
         loopExit->getTerminator(), "omp_loop.linear_finalization");
     linearExitBB = linearFinalizationBB->splitBasicBlock(
@@ -339,10 +348,6 @@ static LogicalResult checkImplementationStatus(Operation &op) {
     if (!op.getIsDevicePtrVars().empty())
       result = todo("is_device_ptr");
   };
-  auto checkLinear = [&todo](auto op, LogicalResult &result) {
-    if (!op.getLinearVars().empty() || !op.getLinearStepVars().empty())
-      result = todo("linear");
-  };
   auto checkNowait = [&todo](auto op, LogicalResult &result) {
     if (op.getNowait())
       result = todo("nowait");
@@ -432,7 +437,6 @@ static LogicalResult checkImplementationStatus(Operation &op) {
       })
       .Case([&](omp::WsloopOp op) {
         checkAllocate(op, result);
-        checkLinear(op, result);
         checkOrder(op, result);
         checkReduction(op, result);
       })
@@ -440,10 +444,7 @@ static LogicalResult checkImplementationStatus(Operation &op) {
         checkAllocate(op, result);
         checkReduction(op, result);
       })
-      .Case([&](omp::SimdOp op) {
-        checkLinear(op, result);
-        checkReduction(op, result);
-      })
+      .Case([&](omp::SimdOp op) { checkReduction(op, result); })
       .Case<omp::AtomicReadOp, omp::AtomicWriteOp, omp::AtomicUpdateOp,
             omp::AtomicCaptureOp>([&](auto op) { checkHint(op, result); })
       .Case<omp::TargetEnterDataOp, omp::TargetExitDataOp, omp::TargetUpdateOp>(
@@ -2563,13 +2564,13 @@ convertOmpWsloop(Operation &opInst, llvm::IRBuilderBase &builder,
 
   // Initialize linear variables and linear step
   LinearClauseProcessor linearClauseProcessor;
-  if (wsloopOp.getLinearVars().size()) {
-    for (mlir::Value linearVar : wsloopOp.getLinearVars())
-      linearClauseProcessor.createLinearVar(builder, moduleTranslation,
-                                            linearVar);
-    for (mlir::Value linearStep : wsloopOp.getLinearStepVars())
-      linearClauseProcessor.initLinearStep(moduleTranslation, linearStep);
+  for (mlir::Value linearVar : wsloopOp.getLinearVars()) {
+    if (failed(linearClauseProcessor.createLinearVar(builder, moduleTranslation,
+                                                     linearVar, opInst)))
+      return failure();
   }
+  for (mlir::Value linearStep : wsloopOp.getLinearStepVars())
+    linearClauseProcessor.initLinearStep(moduleTranslation, linearStep);
 
   llvm::Expected<llvm::BasicBlock *> regionBlock = convertOmpOpRegions(
       wsloopOp.getRegion(), "omp.wsloop.region", builder, moduleTranslation);
@@ -2581,16 +2582,17 @@ convertOmpWsloop(Operation &opInst, llvm::IRBuilderBase &builder,
 
   // Emit Initialization and Update IR for linear variables
   if (wsloopOp.getLinearVars().size()) {
+    linearClauseProcessor.initLinearVar(builder, moduleTranslation,
+                                        loopInfo->getPreheader());
     llvm::OpenMPIRBuilder::InsertPointOrErrorTy afterBarrierIP =
-        linearClauseProcessor.initLinearVar(builder, moduleTranslation,
-                                            loopInfo->getPreheader());
+        moduleTranslation.getOpenMPBuilder()->createBarrier(
+            builder.saveIP(), llvm::omp::OMPD_barrier);
     if (failed(handleError(afterBarrierIP, *loopOp)))
       return failure();
     builder.restoreIP(*afterBarrierIP);
     linearClauseProcessor.updateLinearVar(builder, loopInfo->getBody(),
                                           loopInfo->getIndVar());
-    linearClauseProcessor.outlineLinearFinalizationBB(builder,
-                                                      loopInfo->getExit());
+    linearClauseProcessor.splitLinearFiniBB(builder, loopInfo->getExit());
   }
 
   builder.SetInsertPoint(*regionBlock, (*regionBlock)->begin());
@@ -2847,6 +2849,17 @@ convertOmpSimd(Operation &opInst, llvm::IRBuilderBase &builder,
   llvm::OpenMPIRBuilder::InsertPointTy allocaIP =
       findAllocaInsertPoint(builder, moduleTranslation);
 
+  // Create linear variables and initialize linear step
+  LinearClauseProcessor linearClauseProcessor;
+
+  for (mlir::Value linearVar : simdOp.getLinearVars()) {
+    if (failed(linearClauseProcessor.createLinearVar(builder, moduleTranslation,
+                                                     linearVar, opInst)))
+      return failure();
+  }
+  for (mlir::Value linearStep : simdOp.getLinearStepVars())
+    linearClauseProcessor.initLinearStep(moduleTranslation, linearStep);
+
   llvm::Expected<llvm::BasicBlock *> afterAllocas = allocatePrivateVars(
       builder, moduleTranslation, privateVarsInfo, allocaIP);
   if (handleError(afterAllocas, opInst).failed())
@@ -2910,14 +2923,27 @@ convertOmpSimd(Operation &opInst, llvm::IRBuilderBase &builder,
   if (failed(handleError(regionBlock, opInst)))
     return failure();
 
-  builder.SetInsertPoint(*regionBlock, (*regionBlock)->begin());
   llvm::CanonicalLoopInfo *loopInfo = findCurrentLoopInfo(moduleTranslation);
+
+  // Emit Initialization for linear variables
+  if (simdOp.getLinearVars().size()) {
+    linearClauseProcessor.initLinearVar(builder, moduleTranslation,
+                                        loopInfo->getPreheader());
+    linearClauseProcessor.updateLinearVar(builder, loopInfo->getBody(),
+                                          loopInfo->getIndVar());
+  }
+  builder.SetInsertPoint(*regionBlock, (*regionBlock)->begin());
+
   ompBuilder->applySimd(loopInfo, alignedVars,
                         simdOp.getIfExpr()
                             ? moduleTranslation.lookupValue(simdOp.getIfExpr())
                             : nullptr,
                         order, simdlen, safelen);
 
+  for (size_t index = 0; index < simdOp.getLinearVars().size(); index++)
+    linearClauseProcessor.rewriteInPlace(builder, "omp.loop_nest.region",
+                                         index);
+
   // We now need to reduce the per-simd-lane reduction variable into the
   // original variable. This works a bit differently to other reductions (e.g.
   // wsloop) because we don't need to call into the OpenMP runtime to handle
diff --git a/mlir/test/Target/LLVMIR/openmp-llvm.mlir b/mlir/test/Target/LLVMIR/openmp-llvm.mlir
index 3f4dcd5e24c56..55976d40ca6eb 100644
--- a/mlir/test/Target/LLVMIR/openmp-llvm.mlir
+++ b/mlir/test/Target/LLVMIR/openmp-llvm.mlir
@@ -358,6 +358,97 @@ llvm.func @wsloop_simple(%arg0: !llvm.ptr) {
 
 // -----
 
+// CHECK-LABEL: wsloop_linear
+
+// CHECK: {{.*}} = alloca i32, i64 1, align 4
+// CHECK: %[[Y:.*]] = alloca i32, i64 1, align 4
+// CHECK: %[[X:.*]] = alloca i32, i64 1, align 4
+
+// CHECK: entry:
+// CHECK: %[[LINEAR_VAR:.*]] = alloca i32, align 4
+// CHECK: %[[LINEAR_RESULT:.*]] = alloca i32, align 4
+// CHECK: br label %omp_loop.preheader
+
+
+// CHECK: omp_loop.preheader:
+// CHECK: %[[LOAD:.*]] = load i32, ptr %[[X]], align 4
+// CHECK: store i32 %[[LOAD]], ptr %[[LINEAR_VAR]], align 4
+// CHECK: %omp_global_thread_num = call i32 @__kmpc_global_thread_num(ptr @2)
+// CHECK: call void @__kmpc_barrier(ptr @1, i32 %omp_global_thread_num)
+
+// CHECK: omp_loop.body:
+// CHECK: %[[LOOP_IV:.*]] = add i32 %omp_loop.iv, {{.*}}
+// CHECK: %[[LINEAR_LOAD:.*]] = load i32, ptr %[[LINEAR_VAR]], align 4
+// CHECK: %[[MUL:.*]] = mul i32 %[[LOOP_IV]], 1
+// CHECK: %[[ADD:.*]] = add i32 %[[LINEAR_LOAD]], %[[MUL]]
+// CHECK: store i32 %[[ADD]], ptr %[[LINEAR_RESULT]], align 4
+// CHECK: br label %omp.loop_nest.region
+
+// CHECK: omp.loop_nest.region:
+// CHECK: %[[LINEAR_LOAD:.*]] = load i32, ptr %[[LINEAR_RESULT]], align 4
+// CHECK: %[[ADD:.*]] = add i32 %[[LINEAR_LOAD]], 2
+// CHECK: store i32 %[[ADD]], ptr %[[Y]], align 4
+
+// CHECK: omp_loop.exit:
+// CHECK: call void @__kmpc_for_static_fini(ptr @2, i32 %omp_global_thread_num4)
+// CHECK: %omp_global_thread_num5 = call i32 @__kmpc_global_thread_num(ptr @2)
+// CHECK: call void @__kmpc_barrier(ptr @3, i32 %omp_global_thread_num5)
+// CHECK: br label %omp_loop.linear_finalization
+
+
+// CHECK: omp_loop.linear_finalization:
+// CHECK: %[[LAST_ITER:.*]] = load i32, ptr %p.lastiter, align 4
+// CHECK: %[[CMP:.*]] = icmp ne i32 %[[LAST_ITER]], 0
+// CHECK: br i1 %[[CMP]], label %omp_loop.linear_lastiter_exit, label %omp_loop.linear_exit
+
+// CHECK: omp_loop.linear_lastiter_exit:
+// CHECK: %[[LINEAR_RESULT_LOAD:.*]] = load i32, ptr %[[LINEAR_RESULT]], align 4
+// CHECK: store i32 %[[LINEAR_RESULT_LOAD]], ptr %[[X]], align 4
+// CHECK: br label %omp_loop.linear_exit
+
+
+// CHECK: omp_loop.linear_exit:
+// CHECK: %omp_global_thread_num6 = call i32 @__kmpc_global_thread_num(ptr @2)
+// CHECK: call void @__kmpc_barrier(ptr @1, i32 %omp_global_thread_num6)
+// CHECK: br label %omp_loop.after
+
+llvm.func @wsloop_linear() {
+  %0 = llvm.mlir.constant(1 : i64) : i64
+  %1 = llvm.alloca %0 x i32 {bindc_name = "i", pinned} : (i64) -> !llvm.ptr
+  %2 = llvm.mlir.constant(1 : i64) : i64
+  %3 = llvm.alloca %2 x i32 {bindc_name = "y"} : (i64) -> !llvm.ptr
+  %4 = llvm.mlir.constant(1 : i64) : i64
+  %5 = llvm.alloca %4 x i32 {bindc_name = "x"} : (i64) -> !llvm.ptr
+  %6 = llvm.mlir.constant(1 : i64) : i64
+  %7 = llvm.alloca %6 x i32 {bindc_name = "i"} : (i64) -> !llvm.ptr
+  %8 = llvm.mlir.constant(2 : i32) : i32
+  %9 = llvm.mlir.constant(10 : i32) : i32
+  %10 = llvm.mlir.constant(1 : i32) : i32
+  %11 = llvm.mlir.constant(1 : i64) : i64
+  %12 = llvm.mlir.constant(1 : i64) : i64
+  %13 = llvm.mlir.constant(1 : i64) : i64
+  %14 = llvm.mlir.constant(1 : i64) : i64
+  omp.wsloop linear(%5 = %10 : !llvm.ptr) {
+    omp.loop_nest (%arg0) : i32 = (%10) to (%9) inclusive step (%10) {
+      llvm.store %arg0, %1 : i32, !llvm.ptr
+      %15 = llvm.load %5 : !llvm.ptr -> i32
+      %16 = llvm.add %15, %8 : i32
+      llvm.store %16, %3 : i32, !llvm.ptr
+      %17 = llvm.add %arg0, %10 : i32
+      %18 = llvm.icmp "sgt" %17, %9 : i32
+      llvm.cond_br %18, ^bb1, ^bb2
+    ^bb1:  // pred: ^bb0
+      llvm.store %17, %1 : i32, !llvm.ptr
+      llvm.br ^bb2
+    ^bb2:  // 2 preds: ^bb0, ^bb1
+      omp.yield
+     }
+   }
+  llvm.return
+}
+
+// -----
+
 // CHECK-LABEL: @wsloop_inclusive_1
 llvm.func @wsloop_inclusive_1(%arg0: !llvm.ptr) {
   %0 = llvm.mlir.constant(42 : index) : i64
@@ -695,6 +786,48 @@ llvm.func @simd_simple(%lb : i64, %ub : i64, %step : i64, %arg0: !llvm.ptr) {
 
 // -----
 
+// CHECK-LABEL: @simd_linear
+
+// CHECK: entry:
+// CHECK: %[[LINEAR_VAR:.*]] = alloca i32, align 4
+// CHECK: %[[LINEAR_RESULT:.*]] = alloca i32, align 4
+// CHECK: br label %omp.simd.region
+
+// CHECK: omp.simd.region:
+// CHECK: br label %omp_loop.preheader
+
+// CHECK: omp_loop.preheader:
+// CHECK: %[[ORIG_VAL:.*]] = load i32, ptr {{.*}}, align 4
+// CHECK: store i32 %[[ORIG_VAL]], ptr %[[LINEAR_VAR]], align 4
+// CHECK: br label %omp_loop.header
+
+// CHECK: omp_loop.body:
+// CHECK: %[[LINEAR_LOAD:.*]] = load i32, ptr %[[LINEAR_VAR]], align 4
+// CHECK: %[[MUL:.*]] = mul i32 %omp_loop.iv, 2
+// CHECK: %[[ADD:.*]] = add i32 %[[LINEAR_LOAD]], %[[MUL]]
+// CHECK: store i32 %[[ADD]], ptr %[[LINEAR_RESULT]], align 4
+// CHECK: br label %omp.loop_nest.region
+
+llvm.func @simd_linear() {
+  %0 = llvm.mlir.constant(1 : i64) : i64
+  %1 = llvm.alloca %0 x i32 {bindc_name = "x"} : (i64) -> !llvm.ptr
+  %2 = llvm.mlir.constant(1 : i64) : i64
+  %3 = llvm.alloca %2 x i32 {bindc_name = "i"} : (i64) -> !llvm.ptr
+  %4 = llvm.mlir.constant(10 : i32) : i32
+  %5 = llvm.mlir.constant(1 : i32) : i32
+  %6 = llvm.mlir.constant(2 : i32) : i32
+  %7 = llvm.mlir.constant(1 : i64) : i64
+  %8 = llvm.mlir.constant(1 : i64) : i64
+  omp.simd linear(%1 = %6 : !llvm.ptr) {
+    omp.loop_nest (%arg1) : i32 = (%5) to (%4) inclusive step (%5) {
+      omp.yield
+    }
+  }
+  llvm.return
+}
+
+// -----
+
 // CHECK-LABEL: @simd_simple_multiple
 llvm.func @simd_simple_multiple(%lb1 : i64, %ub1 : i64, %step1 : i64, %lb2 : i64, %ub2 : i64, %step2 : i64, %arg0: !llvm.ptr, %arg1: !llvm.ptr) {
   omp.simd {
diff --git a/mlir/test/Target/LLVMIR/openmp-todo.mlir b/mlir/test/Target/LLVMIR/openmp-todo.mlir
index 2fa4470bb8300..dbedf01b42ec8 100644
--- a/mlir/test/Target/LLVMIR/openmp-todo.mlir
+++ b/mlir/test/Target/LLVMIR/openmp-todo.mlir
@@ -117,7 +117,7 @@ llvm.func @sections_private(%x : !llvm.ptr) {
 // -----
 
 llvm.func @simd_linear(%lb : i32, %ub : i32, %step : i32, %x : !llvm.ptr) {
-  // expected-error at below {{not yet implemented: Unhandled clause linear in omp.simd operation}}
+  // expected-error at below {{not yet implemented: linear clause support for non alloca linear variables}}
   // expected-error at below {{LLVM Translation failed for operation: omp.simd}}
   omp.simd linear(%x = %step : !llvm.ptr) {
     omp.loop_nest (%iv) : i32 = (%lb) to (%ub) step (%step) {
@@ -467,18 +467,6 @@ llvm.func @wsloop_allocate(%lb : i32, %ub : i32, %step : i32, %x : !llvm.ptr) {
 
 // -----
 
-llvm.func @wsloop_linear(%lb : i32, %ub : i32, %step : i32, %x : !llvm.ptr) {
-  // expected-error at below {{not yet implemented: Unhandled clause linear in omp.wsloop operation}}
-  // expected-error at below {{LLVM Translation failed for operation: omp.wsloop}}
-  omp.wsloop linear(%x = %step : !llvm.ptr) {
-    omp.loop_nest (%iv) : i32 = (%lb) to (%ub) step (%step) {
-      omp.yield
-    }
-  }
-  llvm.return
-}
-
-// -----
 llvm.func @wsloop_order(%lb : i32, %ub : i32, %step : i32) {
   // expected-error at below {{not yet implemented: Unhandled clause order in omp.wsloop operation}}
   // expected-error at below {{LLVM Translation failed for operation: omp.wsloop}}

>From 19d4ef1db80049aa049b61d4c5f583bad25df8de Mon Sep 17 00:00:00 2001
From: Nimish Mishra <neelam.nimish at gmail.com>
Date: Thu, 24 Jul 2025 13:31:43 +0530
Subject: [PATCH 2/2] [flang][mlir] Add support for implicit linearization in
 simd

---
 flang/lib/Lower/OpenMP/OpenMP.cpp             | 10 +++-
 flang/lib/Semantics/resolve-directives.cpp    |  5 +-
 .../Lower/OpenMP/parallel-private-clause.f90  |  2 +-
 flang/test/Lower/OpenMP/simd-linear.f90       | 46 +++++++++++---
 .../OpenMP/OpenMPToLLVMIRTranslation.cpp      | 60 +++++++++++++++----
 5 files changed, 100 insertions(+), 23 deletions(-)

diff --git a/flang/lib/Lower/OpenMP/OpenMP.cpp b/flang/lib/Lower/OpenMP/OpenMP.cpp
index 3f58c96900948..44dbbd52fa209 100644
--- a/flang/lib/Lower/OpenMP/OpenMP.cpp
+++ b/flang/lib/Lower/OpenMP/OpenMP.cpp
@@ -2760,9 +2760,17 @@ genStandaloneSimd(lower::AbstractConverter &converter, lower::SymMap &symTable,
   simdArgs.priv.vars = simdClauseOps.privateVars;
   simdArgs.reduction.syms = simdReductionSyms;
   simdArgs.reduction.vars = simdClauseOps.reductionVars;
+
+  for (auto &sym : simdArgs.priv.syms) {
+    if (sym->test(Fortran::semantics::Symbol::Flag::OmpLinear)) {
+      const mlir::Value variable = converter.getSymbolAddress(*sym);
+      simdClauseOps.linearVars.push_back(variable);
+      simdClauseOps.linearStepVars.push_back(loopNestClauseOps.loopSteps[0]);
+    }
+  }
+
   auto simdOp =
       genWrapperOp<mlir::omp::SimdOp>(converter, loc, simdClauseOps, simdArgs);
-
   genLoopNestOp(converter, symTable, semaCtx, eval, loc, queue, item,
                 loopNestClauseOps, iv, {{simdOp, simdArgs}},
                 llvm::omp::Directive::OMPD_simd, dsp);
diff --git a/flang/lib/Semantics/resolve-directives.cpp b/flang/lib/Semantics/resolve-directives.cpp
index 521c7432d9fbb..29cf41668fb03 100644
--- a/flang/lib/Semantics/resolve-directives.cpp
+++ b/flang/lib/Semantics/resolve-directives.cpp
@@ -1979,7 +1979,7 @@ std::int64_t OmpAttributeVisitor::GetAssociatedLoopLevelFromClauses(
 //     parallel do, taskloop, or distribute construct is (are) private.
 //   - The loop iteration variable in the associated do-loop of a simd construct
 //     with just one associated do-loop is linear with a linear-step that is the
-//     increment of the associated do-loop.
+//     increment of the associated do-loop (only for OpenMP versions <= 4.5)
 //   - The loop iteration variables in the associated do-loops of a simd
 //     construct with multiple associated do-loops are lastprivate.
 void OmpAttributeVisitor::PrivatizeAssociatedLoopIndexAndCheckLoopLevel(
@@ -1993,7 +1993,8 @@ void OmpAttributeVisitor::PrivatizeAssociatedLoopIndexAndCheckLoopLevel(
   if (!llvm::omp::allSimdSet.test(GetContext().directive)) {
     ivDSA = Symbol::Flag::OmpPrivate;
   } else if (level == 1) {
-    ivDSA = Symbol::Flag::OmpLinear;
+    if (version <= 45)
+      ivDSA = Symbol::Flag::OmpLinear;
   } else {
     ivDSA = Symbol::Flag::OmpLastPrivate;
   }
diff --git a/flang/test/Lower/OpenMP/parallel-private-clause.f90 b/flang/test/Lower/OpenMP/parallel-private-clause.f90
index 492fb3bb9740d..c3f2667f77d3e 100644
--- a/flang/test/Lower/OpenMP/parallel-private-clause.f90
+++ b/flang/test/Lower/OpenMP/parallel-private-clause.f90
@@ -350,7 +350,7 @@ subroutine simd_loop_1
   ! FIRDialect:     %[[UB:.*]] = arith.constant 9 : i32
   ! FIRDialect:     %[[STEP:.*]] = arith.constant 1 : i32
 
-  ! FIRDialect: omp.simd private({{.*}}) {
+  ! FIRDialect: omp.simd linear({{.*}} = %[[STEP]] : !fir.ref<i32>) private({{.*}}) {
   ! FIRDialect-NEXT: omp.loop_nest (%[[I:.*]]) : i32 = (%[[LB]]) to (%[[UB]]) inclusive step (%[[STEP]]) {
   !$OMP SIMD PRIVATE(r)
   do i=1, 9
diff --git a/flang/test/Lower/OpenMP/simd-linear.f90 b/flang/test/Lower/OpenMP/simd-linear.f90
index ccb5fbb8b0a24..e026e28c42517 100644
--- a/flang/test/Lower/OpenMP/simd-linear.f90
+++ b/flang/test/Lower/OpenMP/simd-linear.f90
@@ -1,15 +1,24 @@
-! This test checks lowering of OpenMP DO Directive (Worksharing)
-! with linear clause
+! This test checks lowering of OpenMP SIMD with linear clause
 
-! RUN: %flang_fc1 -fopenmp -emit-hlfir %s -o - 2>&1 | FileCheck %s
+! RUN: %flang_fc1 -fopenmp -emit-hlfir -fopenmp-version=45 %s -o - 2>&1 | FileCheck %s
+! RUN: %flang_fc1 -fopenmp -emit-hlfir -fopenmp-version=50 %s -o - 2>&1 | FileCheck %s --check-prefix=NOIMPLICIT
 
+!CHECK: %[[IV_alloca:.*]] = fir.alloca i32 {bindc_name = "i", uniq_name = "_QFsimple_linearEi"}
+!CHECK: %[[IV:.*]]:2 = hlfir.declare %[[IV_alloca]] {uniq_name = "_QFsimple_linearEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
 !CHECK: %[[X_alloca:.*]] = fir.alloca i32 {bindc_name = "x", uniq_name = "_QFsimple_linearEx"}
 !CHECK: %[[X:.*]]:2 = hlfir.declare %[[X_alloca]] {uniq_name = "_QFsimple_linearEx"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
 !CHECK: %[[const:.*]] = arith.constant 1 : i32
+!CHECK: %{{.*}} = arith.constant 1 : i32
+!CHECK: %[[IV_step:.*]] = arith.constant 1 : i32
+
+!NOIMPLICIT: %[[X_alloca:.*]] = fir.alloca i32 {bindc_name = "x", uniq_name = "_QFsimple_linearEx"}
+!NOIMPLICIT: %[[X:.*]]:2 = hlfir.declare %[[X_alloca]] {uniq_name = "_QFsimple_linearEx"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+!NOIMPLICIT: %[[const:.*]] = arith.constant 1 : i32
 subroutine simple_linear
     implicit none
     integer :: x, y, i
-    !CHECK: omp.simd linear(%[[X]]#0 = %[[const]] : !fir.ref<i32>) {{.*}}
+    !CHECK: omp.simd linear(%[[X]]#0 = %[[const]] : !fir.ref<i32>, %[[IV]]#0 = %[[IV_step]] : !fir.ref<i32>) {{.*}}
+    !NOIMPLICIT: omp.simd linear(%[[X]]#0 = %[[const]] : !fir.ref<i32>) {{.*}}
     !$omp simd linear(x)
     !CHECK: %[[LOAD:.*]] = fir.load %[[X]]#0 : !fir.ref<i32>
     !CHECK: %[[const:.*]] = arith.constant 2 : i32
@@ -19,14 +28,23 @@ subroutine simple_linear
     end do
 end subroutine
 
-
+!CHECK: %[[IV_alloca:.*]] = fir.alloca i32 {bindc_name = "i", uniq_name = "_QFlinear_stepEi"}
+!CHECK: %[[IV:.*]]:2 = hlfir.declare %[[IV_alloca]] {uniq_name = "_QFlinear_stepEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
 !CHECK: %[[X_alloca:.*]] = fir.alloca i32 {bindc_name = "x", uniq_name = "_QFlinear_stepEx"}
 !CHECK: %[[X:.*]]:2 = hlfir.declare %[[X_alloca]] {uniq_name = "_QFlinear_stepEx"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+
+!NOIMPLICIT: %[[X_alloca:.*]] = fir.alloca i32 {bindc_name = "x", uniq_name = "_QFlinear_stepEx"}
+!NOIMPLICIT: %[[X:.*]]:2 = hlfir.declare %[[X_alloca]] {uniq_name = "_QFlinear_stepEx"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+!NOIMPLICIT: %[[const:.*]] = arith.constant 4 : i32
 subroutine linear_step
     implicit none
     integer :: x, y, i
     !CHECK: %[[const:.*]] = arith.constant 4 : i32
-    !CHECK: omp.simd linear(%[[X]]#0 = %[[const]] : !fir.ref<i32>) {{.*}}
+    !CHECK: %{{.*}} = arith.constant 1 : i32
+    !CHECK: %[[IV_step:.*]] = arith.constant 1 : i32
+    !CHECK: omp.simd linear(%[[X]]#0 = %[[const]] : !fir.ref<i32>, %[[IV]]#0 = %[[IV_step]] : !fir.ref<i32>) {{.*}}
+
+    !NOIMPLICIT: omp.simd linear(%[[X]]#0 = %[[const]] : !fir.ref<i32>) {{.*}}
     !$omp simd linear(x:4)
     !CHECK: %[[LOAD:.*]] = fir.load %[[X]]#0 : !fir.ref<i32>
     !CHECK: %[[const:.*]] = arith.constant 2 : i32
@@ -38,15 +56,29 @@ subroutine linear_step
 
 !CHECK: %[[A_alloca:.*]] = fir.alloca i32 {bindc_name = "a", uniq_name = "_QFlinear_exprEa"}
 !CHECK: %[[A:.*]]:2 = hlfir.declare %[[A_alloca]] {uniq_name = "_QFlinear_exprEa"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+!CHECK: %[[IV_alloca:.*]] = fir.alloca i32 {bindc_name = "i", uniq_name = "_QFlinear_exprEi"}
+!CHECK: %[[IV:.*]]:2 = hlfir.declare %[[IV_alloca]] {uniq_name = "_QFlinear_exprEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
 !CHECK: %[[X_alloca:.*]] = fir.alloca i32 {bindc_name = "x", uniq_name = "_QFlinear_exprEx"}
 !CHECK: %[[X:.*]]:2 = hlfir.declare %[[X_alloca]] {uniq_name = "_QFlinear_exprEx"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+
+!NOIMPLICIT: %[[A_alloca:.*]] = fir.alloca i32 {bindc_name = "a", uniq_name = "_QFlinear_exprEa"}
+!NOIMPLICIT: %[[A:.*]]:2 = hlfir.declare %[[A_alloca]] {uniq_name = "_QFlinear_exprEa"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+!NOIMPLICIT: %[[X_alloca:.*]] = fir.alloca i32 {bindc_name = "x", uniq_name = "_QFlinear_exprEx"}
+!NOIMPLICIT: %[[X:.*]]:2 = hlfir.declare %[[X_alloca]] {uniq_name = "_QFlinear_exprEx"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+!NOIMPLICIT: %[[LOAD_A:.*]] = fir.load %[[A]]#0 : !fir.ref<i32>
+!NOIMPLICIT: %[[const:.*]] = arith.constant 4 : i32
+!NOIMPLICIT: %[[LINEAR_EXPR:.*]] = arith.addi %[[LOAD_A]], %[[const]] : i32
 subroutine linear_expr
     implicit none
     integer :: x, y, i, a
     !CHECK: %[[LOAD_A:.*]] = fir.load %[[A]]#0 : !fir.ref<i32>
     !CHECK: %[[const:.*]] = arith.constant 4 : i32
     !CHECK: %[[LINEAR_EXPR:.*]] = arith.addi %[[LOAD_A]], %[[const]] : i32
-    !CHECK: omp.simd linear(%[[X]]#0 = %[[LINEAR_EXPR]] : !fir.ref<i32>) {{.*}}
+    !CHECK: %{{.*}} = arith.constant 1 : i32
+    !CHECK: %[[IV_step:.*]] = arith.constant 1 : i32
+    !CHECK: omp.simd linear(%[[X]]#0 = %[[LINEAR_EXPR]] : !fir.ref<i32>, %[[IV]]#0 = %[[IV_step]] : !fir.ref<i32>) {{.*}}
+
+    !NOIMPLICIT: omp.simd linear(%[[X]]#0 = %[[LINEAR_EXPR]] : !fir.ref<i32>) {{.*}}
     !$omp simd linear(x:a+4)
     do i = 1, 10
         y = x + 2
diff --git a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
index d457f35a8dd2c..159a22725b1c6 100644
--- a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
+++ b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
@@ -146,6 +146,27 @@ class LinearClauseProcessor {
   llvm::BasicBlock *linearLastIterExitBB;
 
 public:
+  // Allocate space for linear variabes
+  LogicalResult createLinearVar(llvm::IRBuilderBase &builder,
+                                LLVM::ModuleTranslation &moduleTranslation,
+                                llvm::Value *linearVar, Operation &op) {
+    if (llvm::AllocaInst *linearVarAlloca =
+            dyn_cast<llvm::AllocaInst>(linearVar)) {
+      linearPreconditionVars.push_back(builder.CreateAlloca(
+          linearVarAlloca->getAllocatedType(), nullptr, ".linear_var"));
+      llvm::Value *linearLoopBodyTemp = builder.CreateAlloca(
+          linearVarAlloca->getAllocatedType(), nullptr, ".linear_result");
+      linearOrigVal.push_back(linearVar);
+      linearLoopBodyTemps.push_back(linearLoopBodyTemp);
+      linearOrigVars.push_back(linearVarAlloca);
+      return success();
+    }
+
+    else
+      return op.emitError() << "not yet implemented: linear clause support"
+                            << " for non alloca linear variables";
+  }
+
   // Allocate space for linear variabes
   LogicalResult createLinearVar(llvm::IRBuilderBase &builder,
                                 LLVM::ModuleTranslation &moduleTranslation,
@@ -265,7 +286,8 @@ class LinearClauseProcessor {
       users.push_back(user);
     for (auto *user : users) {
       if (auto *userInst = dyn_cast<llvm::Instruction>(user)) {
-        if (userInst->getParent()->getName().str() == BBName)
+        if (userInst->getParent()->getName().str().find(BBName) !=
+            std::string::npos)
           user->replaceUsesOfWith(linearOrigVal[varIndex],
                                   linearLoopBodyTemps[varIndex]);
       }
@@ -2849,17 +2871,6 @@ convertOmpSimd(Operation &opInst, llvm::IRBuilderBase &builder,
   llvm::OpenMPIRBuilder::InsertPointTy allocaIP =
       findAllocaInsertPoint(builder, moduleTranslation);
 
-  // Create linear variables and initialize linear step
-  LinearClauseProcessor linearClauseProcessor;
-
-  for (mlir::Value linearVar : simdOp.getLinearVars()) {
-    if (failed(linearClauseProcessor.createLinearVar(builder, moduleTranslation,
-                                                     linearVar, opInst)))
-      return failure();
-  }
-  for (mlir::Value linearStep : simdOp.getLinearStepVars())
-    linearClauseProcessor.initLinearStep(moduleTranslation, linearStep);
-
   llvm::Expected<llvm::BasicBlock *> afterAllocas = allocatePrivateVars(
       builder, moduleTranslation, privateVarsInfo, allocaIP);
   if (handleError(afterAllocas, opInst).failed())
@@ -2876,6 +2887,31 @@ convertOmpSimd(Operation &opInst, llvm::IRBuilderBase &builder,
           .failed())
     return failure();
 
+  LinearClauseProcessor linearClauseProcessor;
+
+  // Create linear variables and initialize linear step
+  for (mlir::Value linearVar : simdOp.getLinearVars()) {
+    bool isImplicit = false;
+    for (auto [mlirPrivVar, llvmPrivateVar] :
+         llvm::zip_equal(privateVarsInfo.mlirVars, privateVarsInfo.llvmVars)) {
+      if (linearVar == mlirPrivVar) {
+        isImplicit = true;
+        if (failed(linearClauseProcessor.createLinearVar(
+                builder, moduleTranslation, llvmPrivateVar, opInst)))
+          return failure();
+        break;
+      }
+    }
+    if (!isImplicit) {
+      if (failed(linearClauseProcessor.createLinearVar(
+              builder, moduleTranslation, linearVar, opInst)))
+        return failure();
+    }
+  }
+
+  for (mlir::Value linearStep : simdOp.getLinearStepVars())
+    linearClauseProcessor.initLinearStep(moduleTranslation, linearStep);
+
   // No call to copyFirstPrivateVars because FIRSTPRIVATE is not allowed for
   // SIMD.