[llvm] [mlir] scan lowering changes (PR #133149)

Anchu Rajendran S via llvm-commits llvm-commits at lists.llvm.org
Mon Apr 14 10:33:45 PDT 2025


https://github.com/anchuraj updated https://github.com/llvm/llvm-project/pull/133149

>From 0987b648f075a1fccb28dad6536495078d74b506 Mon Sep 17 00:00:00 2001
From: Anchu Rajendran <asudhaku at amd.com>
Date: Fri, 4 Apr 2025 17:01:57 -0500
Subject: [PATCH 1/4] [IRBuilder] Lowering Scan Directive

---
 .../llvm/Frontend/OpenMP/OMPIRBuilder.h       | 122 +++++-
 llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp     | 346 +++++++++++++++++-
 .../Frontend/OpenMPIRBuilderTest.cpp          |  62 ++++
 .../OpenMP/OpenMPToLLVMIRTranslation.cpp      | 250 ++++++++++---
 .../Target/LLVMIR/openmp-reduction-scan.mlir  | 120 ++++++
 mlir/test/Target/LLVMIR/openmp-todo.mlir      |  31 --
 6 files changed, 839 insertions(+), 92 deletions(-)
 create mode 100644 mlir/test/Target/LLVMIR/openmp-reduction-scan.mlir

diff --git a/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h b/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h
index 6b104708bdb0d..0e6bdb14e1b94 100644
--- a/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h
+++ b/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h
@@ -503,6 +503,19 @@ class OpenMPIRBuilder {
       return allocaInst;
     }
   };
+  struct ScanInformation {
+  public:
+    llvm::BasicBlock *OMPBeforeScanBlock = nullptr;
+    llvm::BasicBlock *OMPAfterScanBlock = nullptr;
+    llvm::BasicBlock *OMPScanExitBlock = nullptr;
+    llvm::BasicBlock *OMPScanDispatch = nullptr;
+    llvm::BasicBlock *OMPScanLoopExit = nullptr;
+    bool OMPFirstScanLoop = false;
+    llvm::SmallDenseMap<llvm::Value *, llvm::Value *> ReductionVarToScanBuffs;
+    llvm::Value *IV;
+    llvm::Value *Span;
+  } ScanInfo;
+
   /// Initialize the internal state, this will put structures types and
   /// potentially other helpers into the underlying module. Must be called
   /// before any other method and only once! This internal state includes types
@@ -729,6 +742,35 @@ class OpenMPIRBuilder {
                       LoopBodyGenCallbackTy BodyGenCB, Value *TripCount,
                       const Twine &Name = "loop");
 
+  /// Generator for the control flow structure of an OpenMP canonical loops if
+  /// the parent directive has an `inscan` modifier specified.
+  /// If the `inscan` modifier is specified, the region of the parent is
+  /// expected to have a `scan` directive. Based on the clauses in
+  /// scan directive, the body of the loop is split into two loops: Input loop
+  /// and Scan Loop. Input loop contains the code generated for input phase of
+  /// scan and Scan loop contains the code generated for scan phase of scan.
+  ///
+  /// \param Loc       The insert and source location description.
+  /// \param BodyGenCB Callback that will generate the loop body code.
+  /// \param Start     Value of the loop counter for the first iterations.
+  /// \param Stop      Loop counter values past this will stop the loop.
+  /// \param Step      Loop counter increment after each iteration; negative
+  ///                  means counting down.
+  /// \param IsSigned  Whether Start, Stop and Step are signed integers.
+  /// \param InclusiveStop Whether \p Stop itself is a valid value for the loop
+  ///                      counter.
+  /// \param ComputeIP Insertion point for instructions computing the trip
+  ///                  count. Can be used to ensure the trip count is available
+  ///                  at the outermost loop of a loop nest. If not set,
+  ///                  defaults to the preheader of the generated loop.
+  /// \param Name      Base name used to derive BB and instruction names.
+  ///
+  /// \returns A vector containing Loop Info of Input Loop and Scan Loop.
+  Expected<SmallVector<llvm::CanonicalLoopInfo *>> createCanonicalScanLoops(
+      const LocationDescription &Loc, LoopBodyGenCallbackTy BodyGenCB,
+      Value *Start, Value *Stop, Value *Step, bool IsSigned, bool InclusiveStop,
+      InsertPointTy ComputeIP, const Twine &Name);
+
   /// Calculate the trip count of a canonical loop.
   ///
   /// This allows specifying user-defined loop counter values using increment,
@@ -798,13 +840,16 @@ class OpenMPIRBuilder {
   ///                  at the outermost loop of a loop nest. If not set,
   ///                  defaults to the preheader of the generated loop.
   /// \param Name      Base name used to derive BB and instruction names.
+  /// \param InScan    Whether loop has a scan reduction specified.
   ///
   /// \returns An object representing the created control flow structure which
   ///          can be used for loop-associated directives.
-  Expected<CanonicalLoopInfo *> createCanonicalLoop(
-      const LocationDescription &Loc, LoopBodyGenCallbackTy BodyGenCB,
-      Value *Start, Value *Stop, Value *Step, bool IsSigned, bool InclusiveStop,
-      InsertPointTy ComputeIP = {}, const Twine &Name = "loop");
+  Expected<CanonicalLoopInfo *>
+  createCanonicalLoop(const LocationDescription &Loc,
+                      LoopBodyGenCallbackTy BodyGenCB, Value *Start,
+                      Value *Stop, Value *Step, bool IsSigned,
+                      bool InclusiveStop, InsertPointTy ComputeIP = {},
+                      const Twine &Name = "loop", bool InScan = false);
 
   /// Collapse a loop nest into a single loop.
   ///
@@ -1532,6 +1577,38 @@ class OpenMPIRBuilder {
       ArrayRef<OpenMPIRBuilder::ReductionInfo> ReductionInfos,
       Function *ReduceFn, AttributeList FuncAttrs);
 
+  /// Creates the runtime call specified
+  /// \param Callee Function Declaration Value
+  /// \param Args Arguments passed to the call
+  /// \param Name Optional param to specify the name of the call Instruction.
+  ///
+  /// \return The Runtime call instruction created.
+  llvm::CallInst *emitNoUnwindRuntimeCall(llvm::FunctionCallee Callee,
+                                          ArrayRef<llvm::Value *> Args,
+                                          const llvm::Twine &Name);
+
+  /// Helper function for CreateCanonicalScanLoops to create InputLoop
+  /// in the firstGen and Scan Loop in the SecondGen
+  /// \param InputLoopGen Callback for generating the loop for input phase
+  /// \param ScanLoopGen Callback for generating the loop for scan phase
+  ///
+  /// \return error if any produced, else return success.
+  Error emitScanBasedDirectiveIR(
+      llvm::function_ref<Error()> InputLoopGen,
+      llvm::function_ref<Error(LocationDescription Loc)> ScanLoopGen);
+
+  /// Creates the basic blocks required for scan reduction.
+  void createScanBBs();
+
+  /// Creates the buffer needed for scan reduction.
+  /// \param ScanVars Scan Variables.
+  void emitScanBasedDirectiveDeclsIR(ArrayRef<llvm::Value *> ScanVars);
+
+  /// Copies the result back to the reduction variable.
+  /// \param ReductionInfos Array type containing the ReductionOps.
+  void emitScanBasedDirectiveFinalsIR(
+      SmallVector<llvm::OpenMPIRBuilder::ReductionInfo> ReductionInfos);
+
   /// This function emits a helper that gathers Reduce lists from the first
   /// lane of every active warp to lanes in the first warp.
   ///
@@ -2179,7 +2256,6 @@ class OpenMPIRBuilder {
   // block, if possible, or else at the end of the function. Also add a branch
   // from current block to BB if current block does not have a terminator.
   void emitBlock(BasicBlock *BB, Function *CurFn, bool IsFinished = false);
-
   /// Emits code for OpenMP 'if' clause using specified \a BodyGenCallbackTy
   /// Here is the logic:
   /// if (Cond) {
@@ -2607,6 +2683,42 @@ class OpenMPIRBuilder {
                                     BodyGenCallbackTy BodyGenCB,
                                     FinalizeCallbackTy FiniCB, Value *Filter);
 
+  /// This function performs the scan reduction of the values updated in
+  /// the input phase. The reduction logic needs to be emitted between input
+  /// and scan loop returned by `CreateCanonicalScanLoops`. The following
+  /// is the code that is generated, `buffer` and `span` are expected to be
+  /// populated before executing the generated code.
+  ///
+  ///  for (int k = 0; k != ceil(log2(span)); ++k) {
+  ///    i=pow(2,k)
+  ///    for (size cnt = last_iter; cnt >= i; --cnt)
+  ///      buffer[cnt] op= buffer[cnt-i];
+  ///  }
+  /// \param Loc The insert and source location description.
+  /// \param FinalizeIP The IP where the reduction result needs
+  //                   to be copied back to original variable.
+  /// \param ReductionInfos Array type containing the ReductionOps.
+  ///
+  /// \returns The insertion position *after* the masked.
+  InsertPointOrErrorTy emitScanReduction(
+      const LocationDescription &Loc, InsertPointTy &FinalizeIP,
+      SmallVector<llvm::OpenMPIRBuilder::ReductionInfo> ReductionInfos);
+
+  /// This directive split and directs the control flow to input phase
+  ///  blocks or scan phase blocks based on 1. whether input loop or scan loop
+  ///  is executed, 2. whether exclusive or inclusive scan is used.
+  ///
+  /// \param Loc The insert and source location description.
+  /// \param AllocaIP The IP where the temporary buffer for scan reduction
+  //                  needs to be allocated.
+  /// \param ScanVars Scan Variables.
+  /// \param IsInclusive Whether it is an inclusive or exclusive scan.
+  ///
+  /// \returns The insertion position *after* the masked.
+  InsertPointOrErrorTy createScan(const LocationDescription &Loc,
+                                  InsertPointTy AllocaIP,
+                                  ArrayRef<llvm::Value *> ScanVars,
+                                  bool IsInclusive);
   /// Generator for '#omp critical'
   ///
   /// \param Loc The insert and source location description.
diff --git a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
index 28662efc02882..2f10a52538580 100644
--- a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
+++ b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
@@ -59,6 +59,7 @@
 #include "llvm/Transforms/Utils/LoopPeel.h"
 #include "llvm/Transforms/Utils/UnrollLoop.h"
 
+#include <cassert>
 #include <cstdint>
 #include <optional>
 
@@ -3981,6 +3982,263 @@ OpenMPIRBuilder::createMasked(const LocationDescription &Loc,
                               /*Conditional*/ true, /*hasFinalize*/ true);
 }
 
+llvm::CallInst *
+OpenMPIRBuilder::emitNoUnwindRuntimeCall(llvm::FunctionCallee Callee,
+                                         ArrayRef<llvm::Value *> Args,
+                                         const llvm::Twine &Name) {
+  llvm::CallInst *Call = Builder.CreateCall(
+      Callee, Args, SmallVector<llvm::OperandBundleDef, 1>(), Name);
+  Call->setDoesNotThrow();
+  return Call;
+}
+
+// Expects input basic block is dominated by BeforeScanBB.
+// Once Scan directive is encountered, the code after scan directive should be
+// dominated by AfterScanBB. Scan directive splits the code sequence to
+// scan and input phase. Based on whether inclusive or exclusive
+// clause is used in the scan directive and whether input loop or scan loop
+// is lowered, it adds jumps to input and scan phase. First Scan loop is the
+// input loop and second is the scan loop. The code generated handles only
+// inclusive scans now.
+OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::createScan(
+    const LocationDescription &Loc, InsertPointTy AllocaIP,
+    ArrayRef<llvm::Value *> ScanVars, bool IsInclusive) {
+  if (ScanInfo.OMPFirstScanLoop) {
+    Builder.restoreIP(AllocaIP);
+    emitScanBasedDirectiveDeclsIR(ScanVars);
+  }
+  if (!updateToLocation(Loc))
+    return Loc.IP;
+  unsigned int defaultAS = M.getDataLayout().getProgramAddressSpace();
+  llvm::Value *IV = ScanInfo.IV;
+
+  if (ScanInfo.OMPFirstScanLoop) {
+    // Emit buffer[i] = red; at the end of the input phase.
+    for (Value *ScanVar : ScanVars) {
+      Value *Buff = ScanInfo.ReductionVarToScanBuffs[ScanVar];
+      Type *DestTy = Builder.getInt32Ty(); // ScanVars[i]->getType();
+      Value *Val = Builder.CreateInBoundsGEP(DestTy, Buff, IV, "arrayOffset");
+      Value *Src = Builder.CreateLoad(DestTy, ScanVar);
+      Value *Dest = Builder.CreatePointerBitCastOrAddrSpaceCast(
+          Val, DestTy->getPointerTo(defaultAS));
+
+      Builder.CreateStore(Src, Dest);
+    }
+  }
+  Builder.CreateBr(ScanInfo.OMPScanLoopExit);
+  emitBlock(ScanInfo.OMPScanDispatch, Builder.GetInsertBlock()->getParent());
+
+  // Initialize the private reduction variable to 0 in each iteration.
+  // It is used to copy intial values to scan buffer.
+  ConstantInt *Zero = ConstantInt::get(Builder.getInt32Ty(), 0);
+  for (Value *ScanVar : ScanVars) {
+    Type *DestTy = Builder.getInt32Ty(); // ScanVars[i]->getType();
+    Value *Dest = Builder.CreatePointerBitCastOrAddrSpaceCast(
+        ScanVar, DestTy->getPointerTo(defaultAS));
+    Builder.CreateStore(Zero, Dest);
+  }
+
+  if (!ScanInfo.OMPFirstScanLoop) {
+    IV = ScanInfo.IV;
+    // Emit red = buffer[i]; at the entrance to the scan phase.
+    // TODO: if exclusive scan, the red = buffer[i-1] needs to be updated.
+    for (Value *ScanVar : ScanVars) {
+      Value *Buff = ScanInfo.ReductionVarToScanBuffs[ScanVar];
+      Type *DestTy = Builder.getInt32Ty(); // ScanVars[i]->getType();
+      Value *SrcPtr =
+          Builder.CreateInBoundsGEP(DestTy, Buff, IV, "arrayOffset");
+      Value *Src = Builder.CreateLoad(DestTy, SrcPtr);
+      Value *Dest = Builder.CreatePointerBitCastOrAddrSpaceCast(
+          ScanVar, DestTy->getPointerTo(defaultAS));
+
+      Builder.CreateStore(Src, Dest);
+    }
+  }
+
+  // TODO: Update it to CreateBr and remove dead blocks
+  llvm::Value *CmpI = Builder.getInt1(true);
+  if (ScanInfo.OMPFirstScanLoop == IsInclusive) {
+    Builder.CreateCondBr(CmpI, ScanInfo.OMPBeforeScanBlock,
+                         ScanInfo.OMPAfterScanBlock);
+  } else {
+    Builder.CreateCondBr(CmpI, ScanInfo.OMPAfterScanBlock,
+                         ScanInfo.OMPBeforeScanBlock);
+  }
+  emitBlock(ScanInfo.OMPAfterScanBlock, Builder.GetInsertBlock()->getParent());
+  Builder.SetInsertPoint(ScanInfo.OMPAfterScanBlock);
+  return Builder.saveIP();
+}
+
+void OpenMPIRBuilder::emitScanBasedDirectiveDeclsIR(
+    ArrayRef<Value *> ScanVars) {
+
+  Value *AllocSpan = Builder.CreateAdd(ScanInfo.Span, Builder.getInt32(1));
+  for (Value *ScanVar : ScanVars) {
+    llvm::Value *Buff =
+        Builder.CreateAlloca(Builder.getInt32Ty(), AllocSpan, "vla");
+    ScanInfo.ReductionVarToScanBuffs[ScanVar] = Buff;
+  }
+}
+
+void OpenMPIRBuilder::emitScanBasedDirectiveFinalsIR(
+    SmallVector<ReductionInfo> ReductionInfos) {
+  llvm::Value *OMPLast = Builder.CreateNSWAdd(
+      ScanInfo.Span,
+      llvm::ConstantInt::get(ScanInfo.Span->getType(), 1, /*isSigned=*/false));
+  unsigned int DefaultAS = M.getDataLayout().getProgramAddressSpace();
+  for (ReductionInfo RedInfo : ReductionInfos) {
+    Value *PrivateVar = RedInfo.PrivateVariable;
+    Value *OrigVar = RedInfo.Variable;
+    Value *Buff = ScanInfo.ReductionVarToScanBuffs[PrivateVar];
+
+    Type *SrcTy = RedInfo.ElementType;
+    Value *Val = Builder.CreateInBoundsGEP(SrcTy, Buff, OMPLast, "arrayOffset");
+    Value *Src = Builder.CreateLoad(SrcTy, Val);
+    Value *Dest = Builder.CreatePointerBitCastOrAddrSpaceCast(
+        OrigVar, SrcTy->getPointerTo(DefaultAS));
+
+    Builder.CreateStore(Src, Dest);
+  }
+}
+
+OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::emitScanReduction(
+    const LocationDescription &Loc, InsertPointTy &FinalizeIP,
+    SmallVector<llvm::OpenMPIRBuilder::ReductionInfo> ReductionInfos) {
+
+  llvm::Value *spanDiff = ScanInfo.Span;
+
+  if (!updateToLocation(Loc))
+    return Loc.IP;
+  auto curFn = Builder.GetInsertBlock()->getParent();
+  // for (int k = 0; k <= ceil(log2(n)); ++k)
+  llvm::BasicBlock *LoopBB =
+      BasicBlock::Create(curFn->getContext(), "omp.outer.log.scan.body");
+  llvm::BasicBlock *ExitBB =
+      BasicBlock::Create(curFn->getContext(), "omp.outer.log.scan.exit");
+  llvm::Function *F = llvm::Intrinsic::getOrInsertDeclaration(
+      Builder.GetInsertBlock()->getModule(),
+      (llvm::Intrinsic::ID)llvm::Intrinsic::log2, Builder.getDoubleTy());
+  llvm::BasicBlock *InputBB = Builder.GetInsertBlock();
+  ConstantInt *One = ConstantInt::get(Builder.getInt32Ty(), 1);
+  llvm::Value *span = Builder.CreateAdd(spanDiff, One);
+  llvm::Value *Arg = Builder.CreateUIToFP(span, Builder.getDoubleTy());
+  llvm::Value *LogVal = emitNoUnwindRuntimeCall(F, Arg, "");
+  F = llvm::Intrinsic::getOrInsertDeclaration(
+      Builder.GetInsertBlock()->getModule(),
+      (llvm::Intrinsic::ID)llvm::Intrinsic::ceil, Builder.getDoubleTy());
+  LogVal = emitNoUnwindRuntimeCall(F, LogVal, "");
+  LogVal = Builder.CreateFPToUI(LogVal, Builder.getInt32Ty());
+  llvm::Value *NMin1 =
+      Builder.CreateNUWSub(span, llvm::ConstantInt::get(span->getType(), 1));
+  Builder.SetInsertPoint(InputBB);
+  Builder.CreateBr(LoopBB);
+  emitBlock(LoopBB, Builder.GetInsertBlock()->getParent());
+  Builder.SetInsertPoint(LoopBB);
+
+  PHINode *Counter = Builder.CreatePHI(Builder.getInt32Ty(), 2);
+  //// size pow2k = 1;
+  PHINode *Pow2K = Builder.CreatePHI(Builder.getInt32Ty(), 2);
+  Counter->addIncoming(llvm::ConstantInt::get(Builder.getInt32Ty(), 0),
+                       InputBB);
+  Pow2K->addIncoming(llvm::ConstantInt::get(Builder.getInt32Ty(), 1), InputBB);
+  //// for (size i = n - 1; i >= 2 ^ k; --i)
+  ////   tmp[i] op= tmp[i-pow2k];
+  llvm::BasicBlock *InnerLoopBB =
+      BasicBlock::Create(curFn->getContext(), "omp.inner.log.scan.body");
+  llvm::BasicBlock *InnerExitBB =
+      BasicBlock::Create(curFn->getContext(), "omp.inner.log.scan.exit");
+  llvm::Value *CmpI = Builder.CreateICmpUGE(NMin1, Pow2K);
+  Builder.CreateCondBr(CmpI, InnerLoopBB, InnerExitBB);
+  emitBlock(InnerLoopBB, Builder.GetInsertBlock()->getParent());
+  Builder.SetInsertPoint(InnerLoopBB);
+  auto *IVal = Builder.CreatePHI(Builder.getInt32Ty(), 2);
+  IVal->addIncoming(NMin1, LoopBB);
+  unsigned int defaultAS = M.getDataLayout().getProgramAddressSpace();
+  for (ReductionInfo RedInfo : ReductionInfos) {
+    Value *ReductionVal = RedInfo.PrivateVariable;
+    Value *Buff = ScanInfo.ReductionVarToScanBuffs[ReductionVal];
+    Type *DestTy = RedInfo.ElementType;
+    Value *IV = Builder.CreateAdd(IVal, Builder.getInt32(1));
+    Value *LHSPtr = Builder.CreateInBoundsGEP(DestTy, Buff, IV, "arrayOffset");
+    Value *OffsetIval = Builder.CreateNUWSub(IV, Pow2K);
+    Value *RHSPtr =
+        Builder.CreateInBoundsGEP(DestTy, Buff, OffsetIval, "arrayOffset");
+    Value *LHS = Builder.CreateLoad(DestTy, LHSPtr);
+    Value *RHS = Builder.CreateLoad(DestTy, RHSPtr);
+    Value *LHSAddr = Builder.CreatePointerBitCastOrAddrSpaceCast(
+        LHSPtr, RHS->getType()->getPointerTo(defaultAS));
+    llvm::Value *Result;
+    InsertPointOrErrorTy AfterIP =
+        RedInfo.ReductionGen(Builder.saveIP(), LHS, RHS, Result);
+    if (!AfterIP)
+      return AfterIP.takeError();
+    Builder.CreateStore(Result, LHSAddr);
+  }
+  llvm::Value *NextIVal = Builder.CreateNUWSub(
+      IVal, llvm::ConstantInt::get(Builder.getInt32Ty(), 1));
+  IVal->addIncoming(NextIVal, Builder.GetInsertBlock());
+  CmpI = Builder.CreateICmpUGE(NextIVal, Pow2K);
+  Builder.CreateCondBr(CmpI, InnerLoopBB, InnerExitBB);
+  emitBlock(InnerExitBB, Builder.GetInsertBlock()->getParent());
+  llvm::Value *Next = Builder.CreateNUWAdd(
+      Counter, llvm::ConstantInt::get(Counter->getType(), 1));
+  Counter->addIncoming(Next, Builder.GetInsertBlock());
+  // pow2k <<= 1;
+  llvm::Value *NextPow2K = Builder.CreateShl(Pow2K, 1, "", /*HasNUW=*/true);
+  Pow2K->addIncoming(NextPow2K, Builder.GetInsertBlock());
+  llvm::Value *Cmp = Builder.CreateICmpNE(Next, LogVal);
+  Builder.CreateCondBr(Cmp, LoopBB, ExitBB);
+  emitBlock(ExitBB, Builder.GetInsertBlock()->getParent());
+  Builder.SetInsertPoint(ExitBB);
+  llvm::OpenMPIRBuilder::InsertPointOrErrorTy AfterIP =
+      createBarrier(Builder.saveIP(), llvm::omp::OMPD_barrier);
+
+  Builder.restoreIP(FinalizeIP);
+  emitScanBasedDirectiveFinalsIR(ReductionInfos);
+  FinalizeIP = Builder.saveIP();
+
+  return AfterIP;
+}
+
+Error OpenMPIRBuilder::emitScanBasedDirectiveIR(
+    llvm::function_ref<Error()> InputLoopGen,
+    llvm::function_ref<Error(LocationDescription Loc)> ScanLoopGen) {
+
+  {
+    // Emit loop with input phase:
+    // #pragma omp ...
+    // for (i: 0..<num_iters>) {
+    //   <input phase>;
+    //   buffer[i] = red;
+    // }
+    ScanInfo.OMPFirstScanLoop = true;
+    auto Result = InputLoopGen();
+    if (Result)
+      return Result;
+  }
+  {
+    ScanInfo.OMPFirstScanLoop = false;
+    auto Result = ScanLoopGen(Builder.saveIP());
+    if (Result)
+      return Result;
+  }
+  return Error::success();
+}
+
+void OpenMPIRBuilder::createScanBBs() {
+  auto fun = Builder.GetInsertBlock()->getParent();
+  ScanInfo.OMPScanExitBlock =
+      BasicBlock::Create(fun->getContext(), "omp.exit.inscan.bb");
+  ScanInfo.OMPScanDispatch =
+      BasicBlock::Create(fun->getContext(), "omp.inscan.dispatch");
+  ScanInfo.OMPAfterScanBlock =
+      BasicBlock::Create(fun->getContext(), "omp.after.scan.bb");
+  ScanInfo.OMPBeforeScanBlock =
+      BasicBlock::Create(fun->getContext(), "omp.before.scan.bb");
+  ScanInfo.OMPScanLoopExit =
+      BasicBlock::Create(fun->getContext(), "omp.scan.loop.exit");
+}
+
 CanonicalLoopInfo *OpenMPIRBuilder::createLoopSkeleton(
     DebugLoc DL, Value *TripCount, Function *F, BasicBlock *PreInsertBefore,
     BasicBlock *PostInsertBefore, const Twine &Name) {
@@ -4078,10 +4336,91 @@ OpenMPIRBuilder::createCanonicalLoop(const LocationDescription &Loc,
   return CL;
 }
 
+Expected<SmallVector<llvm::CanonicalLoopInfo *>>
+OpenMPIRBuilder::createCanonicalScanLoops(
+    const LocationDescription &Loc, LoopBodyGenCallbackTy BodyGenCB,
+    Value *Start, Value *Stop, Value *Step, bool IsSigned, bool InclusiveStop,
+    InsertPointTy ComputeIP, const Twine &Name) {
+  LocationDescription ComputeLoc =
+      ComputeIP.isSet() ? LocationDescription(ComputeIP, Loc.DL) : Loc;
+  updateToLocation(ComputeLoc);
+
+  Value *TripCount = calculateCanonicalLoopTripCount(
+      ComputeLoc, Start, Stop, Step, IsSigned, InclusiveStop, Name);
+  ScanInfo.Span = TripCount;
+
+  auto BodyGen = [=](InsertPointTy CodeGenIP, Value *IV) {
+    /// The control of the loopbody of following structure:
+    ///
+    ///     InputBlock
+    ///        |
+    ///     ContinueBlock
+    ///
+    ///  is transformed to:
+    ///
+    ///     InputBlock
+    ///        |
+    ///     OMPScanDispatch
+    ///
+    ///     OMPBeforeScanBlock
+    ///        |
+    ///     OMPScanLoopExit
+    ///        |
+    ///     ContinueBlock
+    ///
+    /// OMPBeforeScanBlock dominates the control flow of code generated until
+    /// scan directive is encountered and OMPAfterScanBlock dominates the
+    /// control flow of code generated after scan is encountered. The successor
+    /// of OMPScanDispatch can be OMPBeforeScanBlock or OMPAfterScanBlock based
+    /// on 1.whether it is in Input phase or Scan Phase , 2. whether it is an
+    /// exclusive or inclusive scan.
+    ScanInfo.IV = IV;
+    createScanBBs();
+    BasicBlock *InputBlock = Builder.GetInsertBlock();
+    Instruction *Terminator = InputBlock->getTerminator();
+    assert(Terminator->getNumSuccessors() == 1);
+    BasicBlock *ContinueBlock = Terminator->getSuccessor(0);
+    Terminator->setSuccessor(0, ScanInfo.OMPScanDispatch);
+    emitBlock(ScanInfo.OMPBeforeScanBlock,
+              Builder.GetInsertBlock()->getParent());
+    Builder.CreateBr(ScanInfo.OMPScanLoopExit);
+    emitBlock(ScanInfo.OMPScanLoopExit, Builder.GetInsertBlock()->getParent());
+    Builder.CreateBr(ContinueBlock);
+    Builder.SetInsertPoint(ScanInfo.OMPBeforeScanBlock->getFirstInsertionPt());
+    return BodyGenCB(Builder.saveIP(), IV);
+  };
+
+  SmallVector<llvm::CanonicalLoopInfo *> Result;
+  const auto &&InputLoopGen = [&]() -> Error {
+    auto LoopInfo =
+        createCanonicalLoop(Loc, BodyGen, Start, Stop, Step, IsSigned,
+                            InclusiveStop, ComputeIP, Name, true);
+    if (!LoopInfo)
+      return LoopInfo.takeError();
+    Result.push_back(*LoopInfo);
+    Builder.restoreIP((*LoopInfo)->getAfterIP());
+    return Error::success();
+  };
+  const auto &&ScanLoopGen = [&](LocationDescription Loc) -> Error {
+    auto LoopInfo =
+        createCanonicalLoop(Loc, BodyGen, Start, Stop, Step, IsSigned,
+                            InclusiveStop, ComputeIP, Name, true);
+    if (!LoopInfo)
+      return LoopInfo.takeError();
+    Result.push_back(*LoopInfo);
+    Builder.restoreIP((*LoopInfo)->getAfterIP());
+    return Error::success();
+  };
+  Error Err = emitScanBasedDirectiveIR(InputLoopGen, ScanLoopGen);
+  if (Err) {
+    return Err;
+  }
+  return Result;
+}
+
 Value *OpenMPIRBuilder::calculateCanonicalLoopTripCount(
     const LocationDescription &Loc, Value *Start, Value *Stop, Value *Step,
     bool IsSigned, bool InclusiveStop, const Twine &Name) {
-
   // Consider the following difficulties (assuming 8-bit signed integers):
   //  * Adding \p Step to the loop counter which passes \p Stop may overflow:
   //      DO I = 1, 100, 50
@@ -4141,7 +4480,7 @@ Value *OpenMPIRBuilder::calculateCanonicalLoopTripCount(
 Expected<CanonicalLoopInfo *> OpenMPIRBuilder::createCanonicalLoop(
     const LocationDescription &Loc, LoopBodyGenCallbackTy BodyGenCB,
     Value *Start, Value *Stop, Value *Step, bool IsSigned, bool InclusiveStop,
-    InsertPointTy ComputeIP, const Twine &Name) {
+    InsertPointTy ComputeIP, const Twine &Name, bool InScan) {
   LocationDescription ComputeLoc =
       ComputeIP.isSet() ? LocationDescription(ComputeIP, Loc.DL) : Loc;
 
@@ -4152,6 +4491,9 @@ Expected<CanonicalLoopInfo *> OpenMPIRBuilder::createCanonicalLoop(
     Builder.restoreIP(CodeGenIP);
     Value *Span = Builder.CreateMul(IV, Step);
     Value *IndVar = Builder.CreateAdd(Span, Start);
+    if (InScan) {
+      ScanInfo.IV = IndVar;
+    }
     return BodyGenCB(Builder.saveIP(), IndVar);
   };
   LocationDescription LoopLoc = ComputeIP.isSet() ? Loc.IP : Builder.saveIP();
diff --git a/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp b/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp
index 2d3d318be7ff1..251042e030f0d 100644
--- a/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp
+++ b/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp
@@ -1440,6 +1440,14 @@ TEST_F(OpenMPIRBuilderTest, CanonicalLoopSimple) {
 
   EXPECT_EQ(&Loop->getAfter()->front(), RetInst);
 }
+void createScan(llvm::Value *scanVar, OpenMPIRBuilder &OMPBuilder,
+                IRBuilder<> &Builder, OpenMPIRBuilder::LocationDescription Loc,
+                OpenMPIRBuilder::InsertPointTy &allocaIP) {
+  using InsertPointTy = OpenMPIRBuilder::InsertPointTy;
+  ASSERT_EXPECTED_INIT(InsertPointTy, retIp,
+                       OMPBuilder.createScan(Loc, allocaIP, {scanVar}, true));
+  Builder.restoreIP(retIp);
+}
 
 TEST_F(OpenMPIRBuilderTest, CanonicalLoopTripCount) {
   OpenMPIRBuilder OMPBuilder(*M);
@@ -5336,6 +5344,60 @@ TEST_F(OpenMPIRBuilderTest, CreateReductions) {
   EXPECT_TRUE(findGEPZeroOne(ReductionFn->getArg(1), FirstRHS, SecondRHS));
 }
 
+TEST_F(OpenMPIRBuilderTest, ScanReduction) {
+  using InsertPointTy = OpenMPIRBuilder::InsertPointTy;
+  OpenMPIRBuilder OMPBuilder(*M);
+  OMPBuilder.initialize();
+  IRBuilder<> Builder(BB);
+  OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL});
+  Value *TripCount = F->getArg(0);
+  Type *LCTy = TripCount->getType();
+  Value *StartVal = ConstantInt::get(LCTy, 1);
+  Value *StopVal = ConstantInt::get(LCTy, 100);
+  Value *Step = ConstantInt::get(LCTy, 1);
+  auto allocaIP = Builder.saveIP();
+
+  llvm::Value *scanVar = Builder.CreateAlloca(Builder.getFloatTy());
+  llvm::Value *origVar = Builder.CreateAlloca(Builder.getFloatTy());
+  unsigned NumBodiesGenerated = 0;
+  auto LoopBodyGenCB = [&](InsertPointTy CodeGenIP, llvm::Value *LC) {
+    NumBodiesGenerated += 1;
+    Builder.restoreIP(CodeGenIP);
+    createScan(scanVar, OMPBuilder, Builder, Loc, allocaIP);
+    return Error::success();
+  };
+  SmallVector<CanonicalLoopInfo *> Loops;
+  ASSERT_EXPECTED_INIT(SmallVector<CanonicalLoopInfo *>, loopsVec,
+                       OMPBuilder.createCanonicalScanLoops(
+                           Loc, LoopBodyGenCB, StartVal, StopVal, Step, false,
+                           false, Builder.saveIP(), "scan"));
+  Loops = loopsVec;
+  EXPECT_EQ(Loops.size(), 2U);
+  auto inputLoop = Loops.front();
+  auto scanLoop = Loops.back();
+  Builder.restoreIP(scanLoop->getAfterIP());
+  inputLoop->assertOK();
+  scanLoop->assertOK();
+
+  //// Verify control flow structure (in addition to Loop->assertOK()).
+  EXPECT_EQ(inputLoop->getPreheader()->getSinglePredecessor(),
+            &F->getEntryBlock());
+  EXPECT_EQ(scanLoop->getAfter(), Builder.GetInsertBlock());
+  EXPECT_EQ(NumBodiesGenerated, 2U);
+  SmallVector<OpenMPIRBuilder::ReductionInfo> reductionInfos = {
+      {Builder.getFloatTy(), origVar, scanVar,
+       /*EvaluationKind=*/OpenMPIRBuilder::EvalKind::Scalar, sumReduction,
+       /*ReductionGenClang=*/nullptr, sumAtomicReduction}};
+  auto FinalizeIP = scanLoop->getAfterIP();
+  OpenMPIRBuilder::LocationDescription RedLoc({inputLoop->getAfterIP(), DL});
+  llvm::BasicBlock *Cont = splitBB(Builder, false, "omp.scan.loop.cont");
+  ASSERT_EXPECTED_INIT(
+      InsertPointTy, retIp,
+      OMPBuilder.emitScanReduction(RedLoc, FinalizeIP, reductionInfos));
+  Builder.restoreIP(retIp);
+  Builder.CreateBr(Cont);
+}
+
 TEST_F(OpenMPIRBuilderTest, CreateTwoReductions) {
   using InsertPointTy = OpenMPIRBuilder::InsertPointTy;
   OpenMPIRBuilder OMPBuilder(*M);
diff --git a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
index 8d1cc9b10a950..bfe6faa5f1bd4 100644
--- a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
+++ b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
@@ -86,7 +86,9 @@ class OpenMPLoopInfoStackFrame
     : public LLVM::ModuleTranslation::StackFrameBase<OpenMPLoopInfoStackFrame> {
 public:
   MLIR_DEFINE_EXPLICIT_INTERNAL_INLINE_TYPE_ID(OpenMPLoopInfoStackFrame)
-  llvm::CanonicalLoopInfo *loopInfo = nullptr;
+  // For constructs like scan, one Loop info frame can contain multiple
+  // Canonical Loops
+  SmallVector<llvm::CanonicalLoopInfo *> loopInfos;
 };
 
 /// Custom error class to signal translation errors that don't need reporting,
@@ -169,6 +171,10 @@ static LogicalResult checkImplementationStatus(Operation &op) {
     if (op.getDistScheduleChunkSize())
       result = todo("dist_schedule with chunk_size");
   };
+  auto checkExclusive = [&todo](auto op, LogicalResult &result) {
+    if (!op.getExclusiveVars().empty())
+      result = todo("exclusive");
+  };
   auto checkHint = [](auto op, LogicalResult &) {
     if (op.getHint())
       op.emitWarning("hint clause discarded");
@@ -232,8 +238,8 @@ static LogicalResult checkImplementationStatus(Operation &op) {
           op.getReductionSyms())
         result = todo("reduction");
     if (op.getReductionMod() &&
-        op.getReductionMod().value() != omp::ReductionModifier::defaultmod)
-      result = todo("reduction with modifier");
+        op.getReductionMod().value() == omp::ReductionModifier::task)
+      result = todo("reduction with task modifier");
   };
   auto checkTaskReduction = [&todo](auto op, LogicalResult &result) {
     if (!op.getTaskReductionVars().empty() || op.getTaskReductionByref() ||
@@ -253,6 +259,7 @@ static LogicalResult checkImplementationStatus(Operation &op) {
         checkOrder(op, result);
       })
       .Case([&](omp::OrderedRegionOp op) { checkParLevelSimd(op, result); })
+      .Case([&](omp::ScanOp op) { checkExclusive(op, result); })
       .Case([&](omp::SectionsOp op) {
         checkAllocate(op, result);
         checkPrivate(op, result);
@@ -382,15 +389,15 @@ findAllocaInsertPoint(llvm::IRBuilderBase &builder,
 /// Find the loop information structure for the loop nest being translated. It
 /// will return a `null` value unless called from the translation function for
 /// a loop wrapper operation after successfully translating its body.
-static llvm::CanonicalLoopInfo *
-findCurrentLoopInfo(LLVM::ModuleTranslation &moduleTranslation) {
-  llvm::CanonicalLoopInfo *loopInfo = nullptr;
+static SmallVector<llvm::CanonicalLoopInfo *>
+findCurrentLoopInfos(LLVM::ModuleTranslation &moduleTranslation) {
+  SmallVector<llvm::CanonicalLoopInfo *> loopInfos;
   moduleTranslation.stackWalk<OpenMPLoopInfoStackFrame>(
       [&](OpenMPLoopInfoStackFrame &frame) {
-        loopInfo = frame.loopInfo;
+        loopInfos = frame.loopInfos;
         return WalkResult::interrupt();
       });
-  return loopInfo;
+  return loopInfos;
 }
 
 /// Converts the given region that appears within an OpenMP dialect operation to
@@ -2342,27 +2349,62 @@ convertOmpWsloop(Operation &opInst, llvm::IRBuilderBase &builder,
   if (failed(handleError(regionBlock, opInst)))
     return failure();
 
-  builder.SetInsertPoint(*regionBlock, (*regionBlock)->begin());
-  llvm::CanonicalLoopInfo *loopInfo = findCurrentLoopInfo(moduleTranslation);
-
-  llvm::OpenMPIRBuilder::InsertPointOrErrorTy wsloopIP =
-      ompBuilder->applyWorkshareLoop(
-          ompLoc.DL, loopInfo, allocaIP, loopNeedsBarrier,
-          convertToScheduleKind(schedule), chunk, isSimd,
-          scheduleMod == omp::ScheduleModifier::monotonic,
-          scheduleMod == omp::ScheduleModifier::nonmonotonic, isOrdered,
-          workshareLoopType);
-
-  if (failed(handleError(wsloopIP, opInst)))
-    return failure();
-
-  // Process the reductions if required.
-  if (failed(createReductionsAndCleanup(
-          wsloopOp, builder, moduleTranslation, allocaIP, reductionDecls,
-          privateReductionVariables, isByRef, wsloopOp.getNowait(),
-          /*isTeamsReduction=*/false)))
-    return failure();
+  SmallVector<llvm::CanonicalLoopInfo *> loopInfos =
+      findCurrentLoopInfos(moduleTranslation);
+  auto inputLoopFinishIp = loopInfos.front()->getAfterIP();
+  auto scanLoopFinishIp = loopInfos.back()->getAfterIP();
+  bool isInScanRegion =
+      wsloopOp.getReductionMod() && (wsloopOp.getReductionMod().value() ==
+                                     mlir::omp::ReductionModifier::inscan);
+  if (isInScanRegion) {
+    builder.restoreIP(inputLoopFinishIp);
+    SmallVector<OwningReductionGen> owningReductionGens;
+    SmallVector<OwningAtomicReductionGen> owningAtomicReductionGens;
+    SmallVector<llvm::OpenMPIRBuilder::ReductionInfo> reductionInfos;
+    collectReductionInfo(wsloopOp, builder, moduleTranslation, reductionDecls,
+                         owningReductionGens, owningAtomicReductionGens,
+                         privateReductionVariables, reductionInfos);
+    llvm::BasicBlock *cont = splitBB(builder, false, "omp.scan.loop.cont");
+    llvm::OpenMPIRBuilder::InsertPointOrErrorTy redIP =
+        ompBuilder->emitScanReduction(builder.saveIP(), scanLoopFinishIp,
+                                      reductionInfos);
+    if (failed(handleError(redIP, opInst)))
+      return failure();
 
+    builder.restoreIP(*redIP);
+    builder.CreateBr(cont);
+  }
+  for (llvm::CanonicalLoopInfo *loopInfo : loopInfos) {
+    llvm::OpenMPIRBuilder::InsertPointOrErrorTy wsloopIP =
+        ompBuilder->applyWorkshareLoop(
+            ompLoc.DL, loopInfo, allocaIP, loopNeedsBarrier,
+            convertToScheduleKind(schedule), chunk, isSimd,
+            scheduleMod == omp::ScheduleModifier::monotonic,
+            scheduleMod == omp::ScheduleModifier::nonmonotonic, isOrdered,
+            workshareLoopType);
+
+    if (failed(handleError(wsloopIP, opInst)))
+      return failure();
+  }
+  builder.SetInsertPoint(*regionBlock, (*regionBlock)->begin());
+  if (isInScanRegion) {
+    SmallVector<Region *> reductionRegions;
+    llvm::transform(reductionDecls, std::back_inserter(reductionRegions),
+                    [](omp::DeclareReductionOp reductionDecl) {
+                      return &reductionDecl.getCleanupRegion();
+                    });
+    if (failed(inlineOmpRegionCleanup(
+            reductionRegions, privateReductionVariables, moduleTranslation,
+            builder, "omp.reduction.cleanup")))
+      return failure();
+  } else {
+    // Process the reductions if required.
+    if (failed(createReductionsAndCleanup(
+            wsloopOp, builder, moduleTranslation, allocaIP, reductionDecls,
+            privateReductionVariables, isByRef, wsloopOp.getNowait(),
+            /*isTeamsReduction=*/false)))
+      return failure();
+  }
   return cleanupPrivateVars(builder, moduleTranslation, wsloopOp.getLoc(),
                             privateVarsInfo.llvmVars,
                             privateVarsInfo.privatizers);
@@ -2553,6 +2595,60 @@ convertOrderKind(std::optional<omp::ClauseOrderKind> o) {
   llvm_unreachable("Unknown ClauseOrderKind kind");
 }
 
+static LogicalResult
+convertOmpScan(Operation &opInst, llvm::IRBuilderBase &builder,
+               LLVM::ModuleTranslation &moduleTranslation) {
+  if (failed(checkImplementationStatus(opInst)))
+    return failure();
+  auto scanOp = cast<omp::ScanOp>(opInst);
+  bool isInclusive = scanOp.hasInclusiveVars();
+  SmallVector<llvm::Value *> llvmScanVars;
+  mlir::OperandRange mlirScanVars = scanOp.getInclusiveVars();
+  if (!isInclusive)
+    mlirScanVars = scanOp.getExclusiveVars();
+  for (auto val : mlirScanVars) {
+    llvm::Value *llvmVal = moduleTranslation.lookupValue(val);
+
+    llvmScanVars.push_back(llvmVal);
+  }
+  llvm::OpenMPIRBuilder::InsertPointTy allocaIP =
+      findAllocaInsertPoint(builder, moduleTranslation);
+  llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
+  llvm::OpenMPIRBuilder::InsertPointOrErrorTy afterIP =
+      moduleTranslation.getOpenMPBuilder()->createScan(
+          ompLoc, allocaIP, llvmScanVars, isInclusive);
+  if (failed(handleError(afterIP, opInst)))
+    return failure();
+
+  builder.restoreIP(*afterIP);
+
+  // TODO: The argument of LoopnestOp is stored into the index variable and this
+  // variable is used
+  //  across scan operation. However that makes the mlir
+  //  invalid.(`Intra-iteration dependences from a statement in the structured
+  //  block sequence that precede a scan directive to a statement in the
+  //  structured block sequence that follows a scan directive must not exist,
+  //  except for dependences for the list items specified in an inclusive or
+  //  exclusive clause.`). The argument of LoopNestOp need to be loaded again
+  //  after ScanOp again so mlir generated is valid.
+  auto parentOp = scanOp->getParentOp();
+  auto loopOp = cast<omp::LoopNestOp>(parentOp);
+  if (loopOp) {
+    auto &firstBlock = *(scanOp->getParentRegion()->getBlocks()).begin();
+    auto &ins = *(firstBlock.begin());
+    if (isa<LLVM::StoreOp>(ins)) {
+      LLVM::StoreOp storeOp = dyn_cast<LLVM::StoreOp>(ins);
+      auto src = moduleTranslation.lookupValue(storeOp->getOperand(0));
+      if (src == moduleTranslation.lookupValue(
+                     (loopOp.getRegion().getArguments())[0])) {
+        auto dest = moduleTranslation.lookupValue(storeOp->getOperand(1));
+        builder.CreateStore(src, dest);
+      }
+    }
+  }
+  return success();
+}
+
 /// Converts an OpenMP simd loop into LLVM IR using OpenMPIRBuilder.
 static LogicalResult
 convertOmpSimd(Operation &opInst, llvm::IRBuilderBase &builder,
@@ -2626,12 +2722,15 @@ convertOmpSimd(Operation &opInst, llvm::IRBuilderBase &builder,
     return failure();
 
   builder.SetInsertPoint(*regionBlock, (*regionBlock)->begin());
-  llvm::CanonicalLoopInfo *loopInfo = findCurrentLoopInfo(moduleTranslation);
-  ompBuilder->applySimd(loopInfo, alignedVars,
-                        simdOp.getIfExpr()
-                            ? moduleTranslation.lookupValue(simdOp.getIfExpr())
-                            : nullptr,
-                        order, simdlen, safelen);
+  SmallVector<llvm::CanonicalLoopInfo *> loopInfos =
+      findCurrentLoopInfos(moduleTranslation);
+  for (llvm::CanonicalLoopInfo *loopInfo : loopInfos) {
+    ompBuilder->applySimd(
+        loopInfo, alignedVars,
+        simdOp.getIfExpr() ? moduleTranslation.lookupValue(simdOp.getIfExpr())
+                           : nullptr,
+        order, simdlen, safelen);
+  }
 
   return cleanupPrivateVars(builder, moduleTranslation, simdOp.getLoc(),
                             privateVarsInfo.llvmVars,
@@ -2698,16 +2797,53 @@ convertOmpLoopNest(Operation &opInst, llvm::IRBuilderBase &builder,
                                                        ompLoc.DL);
       computeIP = loopInfos.front()->getPreheaderIP();
     }
+    if (auto wsloopOp = loopOp->getParentOfType<omp::WsloopOp>()) {
+      bool isInScanRegion =
+          wsloopOp.getReductionMod() && (wsloopOp.getReductionMod().value() ==
+                                         mlir::omp::ReductionModifier::inscan);
+      if (isInScanRegion) {
+        //TODO: Handle nesting if Scan loop is nested in a loop
+        assert(loopOp.getNumLoops() == 1);
+        llvm::Expected<SmallVector<llvm::CanonicalLoopInfo *>> loopResults =
+            ompBuilder->createCanonicalScanLoops(
+                loc, bodyGen, lowerBound, upperBound, step,
+                /*IsSigned=*/true, loopOp.getLoopInclusive(), computeIP,
+                "loop");
+
+        if (failed(handleError(loopResults, *loopOp)))
+          return failure();
+        auto inputLoop = loopResults->front();
+        auto scanLoop = loopResults->back();
+        moduleTranslation.stackWalk<OpenMPLoopInfoStackFrame>(
+            [&](OpenMPLoopInfoStackFrame &frame) {
+              frame.loopInfos.push_back(inputLoop);
+              frame.loopInfos.push_back(scanLoop);
+              return WalkResult::interrupt();
+            });
+        builder.restoreIP(scanLoop->getAfterIP());
+        return success();
+      } else {
+        llvm::Expected<llvm::CanonicalLoopInfo *> loopResult =
+            ompBuilder->createCanonicalLoop(
+                loc, bodyGen, lowerBound, upperBound, step,
+                /*IsSigned=*/true, loopOp.getLoopInclusive(), computeIP);
 
-    llvm::Expected<llvm::CanonicalLoopInfo *> loopResult =
-        ompBuilder->createCanonicalLoop(
-            loc, bodyGen, lowerBound, upperBound, step,
-            /*IsSigned=*/true, loopOp.getLoopInclusive(), computeIP);
+        if (failed(handleError(loopResult, *loopOp)))
+          return failure();
 
-    if (failed(handleError(loopResult, *loopOp)))
-      return failure();
+        loopInfos.push_back(*loopResult);
+      }
+    } else {
+      llvm::Expected<llvm::CanonicalLoopInfo *> loopResult =
+          ompBuilder->createCanonicalLoop(
+              loc, bodyGen, lowerBound, upperBound, step,
+              /*IsSigned=*/true, loopOp.getLoopInclusive(), computeIP);
+
+      if (failed(handleError(loopResult, *loopOp)))
+        return failure();
 
-    loopInfos.push_back(*loopResult);
+      loopInfos.push_back(*loopResult);
+    }
   }
 
   // Collapse loops. Store the insertion point because LoopInfos may get
@@ -2719,7 +2855,8 @@ convertOmpLoopNest(Operation &opInst, llvm::IRBuilderBase &builder,
   // after applying transformations.
   moduleTranslation.stackWalk<OpenMPLoopInfoStackFrame>(
       [&](OpenMPLoopInfoStackFrame &frame) {
-        frame.loopInfo = ompBuilder->collapseLoops(ompLoc.DL, loopInfos, {});
+        frame.loopInfos.push_back(
+            ompBuilder->collapseLoops(ompLoc.DL, loopInfos, {}));
         return WalkResult::interrupt();
       });
 
@@ -4329,18 +4466,20 @@ convertOmpDistribute(Operation &opInst, llvm::IRBuilderBase &builder,
       bool loopNeedsBarrier = false;
       llvm::Value *chunk = nullptr;
 
-      llvm::CanonicalLoopInfo *loopInfo =
-          findCurrentLoopInfo(moduleTranslation);
-      llvm::OpenMPIRBuilder::InsertPointOrErrorTy wsloopIP =
-          ompBuilder->applyWorkshareLoop(
-              ompLoc.DL, loopInfo, allocaIP, loopNeedsBarrier,
-              convertToScheduleKind(schedule), chunk, isSimd,
-              scheduleMod == omp::ScheduleModifier::monotonic,
-              scheduleMod == omp::ScheduleModifier::nonmonotonic, isOrdered,
-              workshareLoopType);
-
-      if (!wsloopIP)
-        return wsloopIP.takeError();
+      SmallVector<llvm::CanonicalLoopInfo *> loopInfos =
+          findCurrentLoopInfos(moduleTranslation);
+      for (llvm::CanonicalLoopInfo *loopInfo : loopInfos) {
+        llvm::OpenMPIRBuilder::InsertPointOrErrorTy wsloopIP =
+            ompBuilder->applyWorkshareLoop(
+                ompLoc.DL, loopInfo, allocaIP, loopNeedsBarrier,
+                convertToScheduleKind(schedule), chunk, isSimd,
+                scheduleMod == omp::ScheduleModifier::monotonic,
+                scheduleMod == omp::ScheduleModifier::nonmonotonic, isOrdered,
+                workshareLoopType);
+
+        if (!wsloopIP)
+          return wsloopIP.takeError();
+      }
     }
 
     if (failed(cleanupPrivateVars(builder, moduleTranslation,
@@ -5370,6 +5509,9 @@ convertHostOrTargetOperation(Operation *op, llvm::IRBuilderBase &builder,
           .Case([&](omp::WsloopOp) {
             return convertOmpWsloop(*op, builder, moduleTranslation);
           })
+          .Case([&](omp::ScanOp) {
+            return convertOmpScan(*op, builder, moduleTranslation);
+          })
           .Case([&](omp::SimdOp) {
             return convertOmpSimd(*op, builder, moduleTranslation);
           })
diff --git a/mlir/test/Target/LLVMIR/openmp-reduction-scan.mlir b/mlir/test/Target/LLVMIR/openmp-reduction-scan.mlir
new file mode 100644
index 0000000000000..a88c1993aebe1
--- /dev/null
+++ b/mlir/test/Target/LLVMIR/openmp-reduction-scan.mlir
@@ -0,0 +1,120 @@
+// RUN: mlir-translate -mlir-to-llvmir %s | FileCheck %s
+
+omp.declare_reduction @add_reduction_i32 : i32 init {
+^bb0(%arg0: i32):
+  %0 = llvm.mlir.constant(0 : i32) : i32
+  omp.yield(%0 : i32)
+} combiner {
+^bb0(%arg0: i32, %arg1: i32):
+  %0 = llvm.add %arg0, %arg1 : i32
+  omp.yield(%0 : i32)
+}
+// CHECK-LABEL: @scan_reduction
+llvm.func @scan_reduction() {
+  %0 = llvm.mlir.constant(1 : i64) : i64
+  %1 = llvm.alloca %0 x i32 {bindc_name = "z"} : (i64) -> !llvm.ptr
+  %2 = llvm.mlir.constant(1 : i64) : i64
+  %3 = llvm.alloca %2 x i32 {bindc_name = "y"} : (i64) -> !llvm.ptr
+  %4 = llvm.mlir.constant(1 : i64) : i64
+  %5 = llvm.alloca %4 x i32 {bindc_name = "x"} : (i64) -> !llvm.ptr
+  %6 = llvm.mlir.constant(1 : i64) : i64
+  %7 = llvm.alloca %6 x i32 {bindc_name = "k"} : (i64) -> !llvm.ptr
+  %8 = llvm.mlir.constant(0 : index) : i64
+  %9 = llvm.mlir.constant(1 : index) : i64
+  %10 = llvm.mlir.constant(100 : i32) : i32
+  %11 = llvm.mlir.constant(1 : i32) : i32
+  %12 = llvm.mlir.constant(0 : i32) : i32
+  %13 = llvm.mlir.constant(100 : index) : i64
+  %14 = llvm.mlir.addressof @_QFEa : !llvm.ptr
+  %15 = llvm.mlir.addressof @_QFEb : !llvm.ptr
+  omp.parallel {
+    %37 = llvm.mlir.constant(1 : i64) : i64
+    %38 = llvm.alloca %37 x i32 {bindc_name = "k", pinned} : (i64) -> !llvm.ptr
+    %39 = llvm.mlir.constant(1 : i64) : i64
+    omp.wsloop reduction(mod: inscan, @add_reduction_i32 %5 -> %arg0 : !llvm.ptr) {
+      omp.loop_nest (%arg1) : i32 = (%11) to (%10) inclusive step (%11) {
+        llvm.store %arg1, %38 : i32, !llvm.ptr
+        %40 = llvm.load %arg0 : !llvm.ptr -> i32
+        %41 = llvm.load %38 : !llvm.ptr -> i32
+        %42 = llvm.sext %41 : i32 to i64
+        %50 = llvm.getelementptr %14[%42] : (!llvm.ptr, i64) -> !llvm.ptr, i32
+        %51 = llvm.load %50 : !llvm.ptr -> i32
+        %52 = llvm.add %40, %51 : i32
+        llvm.store %52, %arg0 : i32, !llvm.ptr
+        omp.scan inclusive(%arg0 : !llvm.ptr)
+        %53 = llvm.load %arg0 : !llvm.ptr -> i32
+        %54 = llvm.load %38 : !llvm.ptr -> i32
+        %55 = llvm.sext %54 : i32 to i64
+        %63 = llvm.getelementptr %15[%55] : (!llvm.ptr, i64) -> !llvm.ptr, i32
+        llvm.store %53, %63 : i32, !llvm.ptr
+        omp.yield
+      }
+    }
+    omp.terminator
+  }
+  llvm.return
+}
+llvm.mlir.global internal @_QFEa() {addr_space = 0 : i32} : !llvm.array<100 x i32> {
+  %0 = llvm.mlir.zero : !llvm.array<100 x i32>
+  llvm.return %0 : !llvm.array<100 x i32>
+}
+llvm.mlir.global internal @_QFEb() {addr_space = 0 : i32} : !llvm.array<100 x i32> {
+  %0 = llvm.mlir.zero : !llvm.array<100 x i32>
+  llvm.return %0 : !llvm.array<100 x i32>
+}
+llvm.mlir.global internal constant @_QFECn() {addr_space = 0 : i32} : i32 {
+  %0 = llvm.mlir.constant(100 : i32) : i32
+  llvm.return %0 : i32
+}
+//CHECK: %[[BUFF:.+]] = alloca i32, i32 100, align 4
+//CHECK: omp_loop.preheader{{.*}}:                              ; preds = %omp.wsloop.region
+//CHECK: omp_loop.after:                                   ; preds = %omp_loop.exit
+//CHECK:   %[[LOG:.+]] = call double @llvm.log2.f64(double 1.000000e+02) #0
+//CHECK:   %[[CEIL:.+]] = call double @llvm.ceil.f64(double %[[LOG]]) #0
+//CHECK:   %[[UB:.+]] = fptoui double %[[CEIL]] to i32
+//CHECK:   br label %omp.outer.log.scan.body
+//CHECK: omp.outer.log.scan.body:                          ; preds = %omp.inner.log.scan.exit, %omp_loop.after
+//CHECK:   %[[K:.+]] = phi i32 [ 0, %omp_loop.after ], [ %[[NEXTK:.+]], %omp.inner.log.scan.exit ]
+//CHECK:   %[[I:.+]] = phi i32 [ 1, %omp_loop.after ], [ %[[NEXTI:.+]], %omp.inner.log.scan.exit ]
+//CHECK:   %[[CMP1:.+]] = icmp uge i32 99, %[[I]]
+//CHECK:   br i1 %[[CMP1]], label %omp.inner.log.scan.body, label %omp.inner.log.scan.exit
+//CHECK: omp.inner.log.scan.exit:                          ; preds = %omp.inner.log.scan.body, %omp.outer.log.scan.body
+//CHECK:   %[[NEXTK]] = add nuw i32 %[[K]], 1
+//CHECK:   %[[NEXTI]] = shl nuw i32 %[[I]], 1
+//CHECK:   %[[CMP2:.+]] = icmp ne i32 %[[NEXTK]], %[[UB]]
+//CHECK:   br i1 %[[CMP2]], label %omp.outer.log.scan.body, label %omp.outer.log.scan.exit
+//CHECK: omp.outer.log.scan.exit:                          ; preds = %omp.inner.log.scan.exit
+//CHECK:   call void @__kmpc_barrier{{.*}}
+//CHECK:   br label %omp.scan.loop.cont
+//CHECK: omp.scan.loop.cont:                               ; preds = %omp.outer.log.scan.exit
+//CHECK:   br label %omp_loop.preheader{{.*}}
+//CHECK: omp_loop.after{{.*}}:                                 ; preds = %omp_loop.exit{{.*}}
+//CHECK:  %[[ARRLAST:.+]] = getelementptr inbounds i32, ptr %[[BUFF]], i32 100
+//CHECK:  %[[RES:.+]] = load i32, ptr %[[ARRLAST]], align 4
+//CHECK:  store i32 %[[RES]], ptr %loadgep{{.*}}, align 4
+//CHECK: omp.inscan.dispatch{{.*}}:                            ; preds = %omp_loop.body{{.*}}
+//CHECK:   store i32 0, ptr %[[REDPRIV:.+]], align 4
+//CHECK:   %[[arrayOffset1:.+]] = getelementptr inbounds i32, ptr %[[BUFF]], i32 %{{.*}}
+//CHECK:   %[[BUFFVAL1:.+]] = load i32, ptr %[[arrayOffset1]], align 4
+//CHECK:   store i32 %[[BUFFVAL1]], ptr %[[REDPRIV]], align 4
+//CHECK: omp.inner.log.scan.body:                          ; preds = %omp.inner.log.scan.body, %omp.outer.log.scan.body
+//CHECK:   %[[CNT:.+]] = phi i32 [ 99, %omp.outer.log.scan.body ], [ %[[CNTNXT:.+]], %omp.inner.log.scan.body ]
+//CHECK:   %[[IND1:.+]] = add i32 %[[CNT]], 1
+//CHECK:   %[[IND1PTR:.+]] = getelementptr inbounds i32, ptr %[[BUFF]], i32 %[[IND1]]
+//CHECK:   %[[IND2:.+]] = sub nuw i32 %[[IND1]], %[[I]]
+//CHECK:   %[[IND2PTR:.+]] = getelementptr inbounds i32, ptr %[[BUFF]], i32 %[[IND2]]
+//CHECK:   %[[IND1VAL:.+]] = load i32, ptr %[[IND1PTR]], align 4
+//CHECK:   %[[IND2VAL:.+]] = load i32, ptr %[[IND2PTR]], align 4
+//CHECK:   %[[REDVAL:.+]] = add i32 %[[IND1VAL]], %[[IND2VAL]]
+//CHECK:   store i32 %[[REDVAL]], ptr %[[IND1PTR]], align 4
+//CHECK:   %[[CNTNXT]] = sub nuw i32 %[[CNT]], 1
+//CHECK:   %[[CMP3:.+]] = icmp uge i32 %[[CNTNXT]], %[[I]]
+//CHECK:   br i1 %[[CMP3]], label %omp.inner.log.scan.body, label %omp.inner.log.scan.exit
+//CHECK: omp.inscan.dispatch:                              ; preds = %omp_loop.body
+//CHECK:   store i32 0, ptr %[[REDPRIV]], align 4
+//CHECK:   br i1 true, label %omp.before.scan.bb, label %omp.after.scan.bb
+//CHECK: omp.loop_nest.region:                             ; preds = %omp.before.scan.bb
+//CHECK:   %[[ARRAYOFFSET2:.+]] = getelementptr inbounds i32, ptr %[[BUFF]], i32 %{{.*}}
+//CHECK:   %[[REDPRIVVAL:.+]] = load i32, ptr %[[REDPRIV]], align 4
+//CHECK:   store i32 %[[REDPRIVVAL]], ptr %[[ARRAYOFFSET2]], align 4
+//CHECK:   br label %omp.scan.loop.exit
diff --git a/mlir/test/Target/LLVMIR/openmp-todo.mlir b/mlir/test/Target/LLVMIR/openmp-todo.mlir
index 7eafe396082e4..7b8e8b509d72b 100644
--- a/mlir/test/Target/LLVMIR/openmp-todo.mlir
+++ b/mlir/test/Target/LLVMIR/openmp-todo.mlir
@@ -212,37 +212,6 @@ llvm.func @simd_reduction(%lb : i32, %ub : i32, %step : i32, %x : !llvm.ptr) {
 
 // -----
 
-omp.declare_reduction @add_f32 : f32
-init {
-^bb0(%arg: f32):
-  %0 = llvm.mlir.constant(0.0 : f32) : f32
-  omp.yield (%0 : f32)
-}
-combiner {
-^bb1(%arg0: f32, %arg1: f32):
-  %1 = llvm.fadd %arg0, %arg1 : f32
-  omp.yield (%1 : f32)
-}
-atomic {
-^bb2(%arg2: !llvm.ptr, %arg3: !llvm.ptr):
-  %2 = llvm.load %arg3 : !llvm.ptr -> f32
-  llvm.atomicrmw fadd %arg2, %2 monotonic : !llvm.ptr, f32
-  omp.yield
-}
-llvm.func @scan_reduction(%lb : i32, %ub : i32, %step : i32, %x : !llvm.ptr) {
-  // expected-error at below {{not yet implemented: Unhandled clause reduction with modifier in omp.wsloop operation}}
-  // expected-error at below {{LLVM Translation failed for operation: omp.wsloop}}
-  omp.wsloop reduction(mod:inscan, @add_f32 %x -> %prv : !llvm.ptr) {
-    omp.loop_nest (%iv) : i32 = (%lb) to (%ub) step (%step) {
-      omp.scan inclusive(%prv : !llvm.ptr)
-      omp.yield
-    }
-  }
-  llvm.return
-}
-
-// -----
-
 llvm.func @single_allocate(%x : !llvm.ptr) {
   // expected-error at below {{not yet implemented: Unhandled clause allocate in omp.single operation}}
   // expected-error at below {{LLVM Translation failed for operation: omp.single}}

>From 3072fbc83eb52cd4a94dda102005e56d739a7736 Mon Sep 17 00:00:00 2001
From: Anchu Rajendran <asudhaku at amd.com>
Date: Mon, 7 Apr 2025 01:19:56 -0500
Subject: [PATCH 2/4] new changes

---
 .../llvm/Frontend/OpenMP/OMPIRBuilder.h       |  4 +-
 llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp     | 48 +++++++++----------
 .../Frontend/OpenMPIRBuilderTest.cpp          | 13 +++--
 .../OpenMP/OpenMPToLLVMIRTranslation.cpp      | 32 ++++++++-----
 4 files changed, 54 insertions(+), 43 deletions(-)

diff --git a/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h b/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h
index 0e6bdb14e1b94..37bf654f402d8 100644
--- a/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h
+++ b/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h
@@ -1602,7 +1602,8 @@ class OpenMPIRBuilder {
 
   /// Creates the buffer needed for scan reduction.
   /// \param ScanVars Scan Variables.
-  void emitScanBasedDirectiveDeclsIR(ArrayRef<llvm::Value *> ScanVars);
+  void emitScanBasedDirectiveDeclsIR(ArrayRef<llvm::Value *> ScanVars,
+                                     ArrayRef<llvm::Type *> ScanVarsType);
 
   /// Copies the result back to the reduction variable.
   /// \param ReductionInfos Array type containing the ReductionOps.
@@ -2718,6 +2719,7 @@ class OpenMPIRBuilder {
   InsertPointOrErrorTy createScan(const LocationDescription &Loc,
                                   InsertPointTy AllocaIP,
                                   ArrayRef<llvm::Value *> ScanVars,
+                                  ArrayRef<llvm::Type *> ScanVarsType,
                                   bool IsInclusive);
   /// Generator for '#omp critical'
   ///
diff --git a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
index 2f10a52538580..8fd60606df938 100644
--- a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
+++ b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
@@ -4002,10 +4002,11 @@ OpenMPIRBuilder::emitNoUnwindRuntimeCall(llvm::FunctionCallee Callee,
 // inclusive scans now.
 OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::createScan(
     const LocationDescription &Loc, InsertPointTy AllocaIP,
-    ArrayRef<llvm::Value *> ScanVars, bool IsInclusive) {
+    ArrayRef<llvm::Value *> ScanVars, ArrayRef<llvm::Type *> ScanVarsType,
+    bool IsInclusive) {
   if (ScanInfo.OMPFirstScanLoop) {
     Builder.restoreIP(AllocaIP);
-    emitScanBasedDirectiveDeclsIR(ScanVars);
+    emitScanBasedDirectiveDeclsIR(ScanVars, ScanVarsType);
   }
   if (!updateToLocation(Loc))
     return Loc.IP;
@@ -4014,11 +4015,11 @@ OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::createScan(
 
   if (ScanInfo.OMPFirstScanLoop) {
     // Emit buffer[i] = red; at the end of the input phase.
-    for (Value *ScanVar : ScanVars) {
-      Value *Buff = ScanInfo.ReductionVarToScanBuffs[ScanVar];
-      Type *DestTy = Builder.getInt32Ty(); // ScanVars[i]->getType();
+    for (int i = 0; i < ScanVars.size(); i++) {
+      Value *Buff = ScanInfo.ReductionVarToScanBuffs[ScanVars[i]];
+      Type *DestTy = ScanVarsType[i];
       Value *Val = Builder.CreateInBoundsGEP(DestTy, Buff, IV, "arrayOffset");
-      Value *Src = Builder.CreateLoad(DestTy, ScanVar);
+      Value *Src = Builder.CreateLoad(DestTy, ScanVars[i]);
       Value *Dest = Builder.CreatePointerBitCastOrAddrSpaceCast(
           Val, DestTy->getPointerTo(defaultAS));
 
@@ -4031,10 +4032,10 @@ OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::createScan(
   // Initialize the private reduction variable to 0 in each iteration.
   // It is used to copy intial values to scan buffer.
   ConstantInt *Zero = ConstantInt::get(Builder.getInt32Ty(), 0);
-  for (Value *ScanVar : ScanVars) {
-    Type *DestTy = Builder.getInt32Ty(); // ScanVars[i]->getType();
+  for (int i = 0; i < ScanVars.size(); i++) {
+    Type *DestTy = ScanVarsType[i];
     Value *Dest = Builder.CreatePointerBitCastOrAddrSpaceCast(
-        ScanVar, DestTy->getPointerTo(defaultAS));
+        ScanVars[i], DestTy->getPointerTo(defaultAS));
     Builder.CreateStore(Zero, Dest);
   }
 
@@ -4042,14 +4043,14 @@ OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::createScan(
     IV = ScanInfo.IV;
     // Emit red = buffer[i]; at the entrance to the scan phase.
     // TODO: if exclusive scan, the red = buffer[i-1] needs to be updated.
-    for (Value *ScanVar : ScanVars) {
-      Value *Buff = ScanInfo.ReductionVarToScanBuffs[ScanVar];
-      Type *DestTy = Builder.getInt32Ty(); // ScanVars[i]->getType();
+    for (int i = 0; i < ScanVars.size(); i++) {
+      Value *Buff = ScanInfo.ReductionVarToScanBuffs[ScanVars[i]];
+      Type *DestTy = ScanVarsType[i];
       Value *SrcPtr =
           Builder.CreateInBoundsGEP(DestTy, Buff, IV, "arrayOffset");
       Value *Src = Builder.CreateLoad(DestTy, SrcPtr);
       Value *Dest = Builder.CreatePointerBitCastOrAddrSpaceCast(
-          ScanVar, DestTy->getPointerTo(defaultAS));
+          ScanVars[i], DestTy->getPointerTo(defaultAS));
 
       Builder.CreateStore(Src, Dest);
     }
@@ -4070,21 +4071,17 @@ OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::createScan(
 }
 
 void OpenMPIRBuilder::emitScanBasedDirectiveDeclsIR(
-    ArrayRef<Value *> ScanVars) {
+    ArrayRef<Value *> ScanVars, ArrayRef<Type *> ScanVarsType) {
 
   Value *AllocSpan = Builder.CreateAdd(ScanInfo.Span, Builder.getInt32(1));
-  for (Value *ScanVar : ScanVars) {
-    llvm::Value *Buff =
-        Builder.CreateAlloca(Builder.getInt32Ty(), AllocSpan, "vla");
-    ScanInfo.ReductionVarToScanBuffs[ScanVar] = Buff;
+  for (int i = 0; i < ScanVars.size(); i++) {
+    llvm::Value *Buff = Builder.CreateAlloca(ScanVarsType[i], AllocSpan, "vla");
+    ScanInfo.ReductionVarToScanBuffs[ScanVars[i]] = Buff;
   }
 }
 
 void OpenMPIRBuilder::emitScanBasedDirectiveFinalsIR(
     SmallVector<ReductionInfo> ReductionInfos) {
-  llvm::Value *OMPLast = Builder.CreateNSWAdd(
-      ScanInfo.Span,
-      llvm::ConstantInt::get(ScanInfo.Span->getType(), 1, /*isSigned=*/false));
   unsigned int DefaultAS = M.getDataLayout().getProgramAddressSpace();
   for (ReductionInfo RedInfo : ReductionInfos) {
     Value *PrivateVar = RedInfo.PrivateVariable;
@@ -4092,7 +4089,8 @@ void OpenMPIRBuilder::emitScanBasedDirectiveFinalsIR(
     Value *Buff = ScanInfo.ReductionVarToScanBuffs[PrivateVar];
 
     Type *SrcTy = RedInfo.ElementType;
-    Value *Val = Builder.CreateInBoundsGEP(SrcTy, Buff, OMPLast, "arrayOffset");
+    Value *Val =
+        Builder.CreateInBoundsGEP(SrcTy, Buff, ScanInfo.Span, "arrayOffset");
     Value *Src = Builder.CreateLoad(SrcTy, Val);
     Value *Dest = Builder.CreatePointerBitCastOrAddrSpaceCast(
         OrigVar, SrcTy->getPointerTo(DefaultAS));
@@ -4120,7 +4118,7 @@ OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::emitScanReduction(
       (llvm::Intrinsic::ID)llvm::Intrinsic::log2, Builder.getDoubleTy());
   llvm::BasicBlock *InputBB = Builder.GetInsertBlock();
   ConstantInt *One = ConstantInt::get(Builder.getInt32Ty(), 1);
-  llvm::Value *span = Builder.CreateAdd(spanDiff, One);
+  llvm::Value *span = ScanInfo.Span; // Builder.CreateAdd(spanDiff, One);
   llvm::Value *Arg = Builder.CreateUIToFP(span, Builder.getDoubleTy());
   llvm::Value *LogVal = emitNoUnwindRuntimeCall(F, Arg, "");
   F = llvm::Intrinsic::getOrInsertDeclaration(
@@ -5456,7 +5454,7 @@ OpenMPIRBuilder::tileLoops(DebugLoc DL, ArrayRef<CanonicalLoopInfo *> Loops,
   // TODO: It would be sufficient to only sink them into body of the
   // corresponding tile loop.
   SmallVector<std::pair<BasicBlock *, BasicBlock *>, 4> InbetweenCode;
-  for (int i = 0; i < NumLoops - 1; ++i) {
+  for (size_t i = 0; i < NumLoops - 1; ++i) {
     CanonicalLoopInfo *Surrounding = Loops[i];
     CanonicalLoopInfo *Nested = Loops[i + 1];
 
@@ -5469,7 +5467,7 @@ OpenMPIRBuilder::tileLoops(DebugLoc DL, ArrayRef<CanonicalLoopInfo *> Loops,
   Builder.SetCurrentDebugLocation(DL);
   Builder.restoreIP(OutermostLoop->getPreheaderIP());
   SmallVector<Value *, 4> FloorCount, FloorRems;
-  for (int i = 0; i < NumLoops; ++i) {
+  for (size_t i = 0; i < NumLoops; ++i) {
     Value *TileSize = TileSizes[i];
     Value *OrigTripCount = OrigTripCounts[i];
     Type *IVType = OrigTripCount->getType();
diff --git a/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp b/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp
index 251042e030f0d..e54d9906474e7 100644
--- a/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp
+++ b/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp
@@ -1440,12 +1440,14 @@ TEST_F(OpenMPIRBuilderTest, CanonicalLoopSimple) {
 
   EXPECT_EQ(&Loop->getAfter()->front(), RetInst);
 }
-void createScan(llvm::Value *scanVar, OpenMPIRBuilder &OMPBuilder,
-                IRBuilder<> &Builder, OpenMPIRBuilder::LocationDescription Loc,
+void createScan(llvm::Value *scanVar, llvm::Type *scanType,
+                OpenMPIRBuilder &OMPBuilder, IRBuilder<> &Builder,
+                OpenMPIRBuilder::LocationDescription Loc,
                 OpenMPIRBuilder::InsertPointTy &allocaIP) {
   using InsertPointTy = OpenMPIRBuilder::InsertPointTy;
-  ASSERT_EXPECTED_INIT(InsertPointTy, retIp,
-                       OMPBuilder.createScan(Loc, allocaIP, {scanVar}, true));
+  ASSERT_EXPECTED_INIT(
+      InsertPointTy, retIp,
+      OMPBuilder.createScan(Loc, allocaIP, {scanVar}, {scanType}, true));
   Builder.restoreIP(retIp);
 }
 
@@ -5363,7 +5365,8 @@ TEST_F(OpenMPIRBuilderTest, ScanReduction) {
   auto LoopBodyGenCB = [&](InsertPointTy CodeGenIP, llvm::Value *LC) {
     NumBodiesGenerated += 1;
     Builder.restoreIP(CodeGenIP);
-    createScan(scanVar, OMPBuilder, Builder, Loc, allocaIP);
+    createScan(scanVar, Builder.getFloatTy(), OMPBuilder, Builder, Loc,
+               allocaIP);
     return Error::success();
   };
   SmallVector<CanonicalLoopInfo *> Loops;
diff --git a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
index bfe6faa5f1bd4..f093c986bb6f2 100644
--- a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
+++ b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
@@ -47,6 +47,7 @@
 
 using namespace mlir;
 
+llvm::SmallDenseMap<llvm::Value *, llvm::Type *> ReductionVarToType;
 namespace {
 static llvm::omp::ScheduleKind
 convertToScheduleKind(std::optional<omp::ClauseScheduleKind> schedKind) {
@@ -1140,6 +1141,11 @@ initReductionVars(OP op, ArrayRef<BlockArgument> reductionArgs,
   // variables. Although this could be done after allocas, we don't want to mess
   // up with the alloca insertion point.
   for (unsigned i = 0; i < op.getNumReductionVars(); ++i) {
+
+    llvm::Type *reductionType =
+        moduleTranslation.convertType(reductionDecls[i].getType());
+    ReductionVarToType[privateReductionVariables[i]] = reductionType;
+
     SmallVector<llvm::Value *, 1> phis;
 
     // map block argument to initializer region
@@ -1213,9 +1219,11 @@ static void collectReductionInfo(
       atomicGen = owningAtomicReductionGens[i];
     llvm::Value *variable =
         moduleTranslation.lookupValue(loop.getReductionVars()[i]);
+    llvm::Type *reductionType =
+        moduleTranslation.convertType(reductionDecls[i].getType());
+    ReductionVarToType[privateReductionVariables[i]] = reductionType;
     reductionInfos.push_back(
-        {moduleTranslation.convertType(reductionDecls[i].getType()), variable,
-         privateReductionVariables[i],
+        {reductionType, variable, privateReductionVariables[i],
          /*EvaluationKind=*/llvm::OpenMPIRBuilder::EvalKind::Scalar,
          owningReductionGens[i],
          /*ReductionGenClang=*/nullptr, atomicGen});
@@ -2603,34 +2611,34 @@ convertOmpScan(Operation &opInst, llvm::IRBuilderBase &builder,
   auto scanOp = cast<omp::ScanOp>(opInst);
   bool isInclusive = scanOp.hasInclusiveVars();
   SmallVector<llvm::Value *> llvmScanVars;
+  SmallVector<llvm::Type *> llvmScanVarsType;
   mlir::OperandRange mlirScanVars = scanOp.getInclusiveVars();
   if (!isInclusive)
     mlirScanVars = scanOp.getExclusiveVars();
   for (auto val : mlirScanVars) {
     llvm::Value *llvmVal = moduleTranslation.lookupValue(val);
-
     llvmScanVars.push_back(llvmVal);
+    llvmScanVarsType.push_back(ReductionVarToType[llvmVal]);
   }
   llvm::OpenMPIRBuilder::InsertPointTy allocaIP =
       findAllocaInsertPoint(builder, moduleTranslation);
   llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
   llvm::OpenMPIRBuilder::InsertPointOrErrorTy afterIP =
       moduleTranslation.getOpenMPBuilder()->createScan(
-          ompLoc, allocaIP, llvmScanVars, isInclusive);
+          ompLoc, allocaIP, llvmScanVars, llvmScanVarsType, isInclusive);
   if (failed(handleError(afterIP, opInst)))
     return failure();
 
   builder.restoreIP(*afterIP);
 
   // TODO: The argument of LoopnestOp is stored into the index variable and this
-  // variable is used
-  //  across scan operation. However that makes the mlir
-  //  invalid.(`Intra-iteration dependences from a statement in the structured
-  //  block sequence that precede a scan directive to a statement in the
-  //  structured block sequence that follows a scan directive must not exist,
-  //  except for dependences for the list items specified in an inclusive or
-  //  exclusive clause.`). The argument of LoopNestOp need to be loaded again
-  //  after ScanOp again so mlir generated is valid.
+  // variable is used across scan operation. However that makes the mlir
+  // invalid.(`Intra-iteration dependences from a statement in the structured
+  // block sequence that precede a scan directive to a statement in the
+  // structured block sequence that follows a scan directive must not exist,
+  // except for dependences for the list items specified in an inclusive or
+  // exclusive clause.`). The argument of LoopNestOp need to be loaded again
+  // after ScanOp again so mlir generated is valid.
   auto parentOp = scanOp->getParentOp();
   auto loopOp = cast<omp::LoopNestOp>(parentOp);
   if (loopOp) {

>From 20b2b94c1e67467b793e7131cb7fb170f5e94336 Mon Sep 17 00:00:00 2001
From: Anchu Rajendran <asudhaku at amd.com>
Date: Thu, 10 Apr 2025 01:15:42 -0500
Subject: [PATCH 3/4] Adding a few corrections

---
 .../llvm/Frontend/OpenMP/OMPIRBuilder.h       |   2 +-
 llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp     | 205 +++++----
 .../Frontend/OpenMPIRBuilderTest.cpp          |  25 +-
 .../OpenMP/OpenMPToLLVMIRTranslation.cpp      |  12 +-
 offload/out                                   | 424 ++++++++++++++++++
 5 files changed, 561 insertions(+), 107 deletions(-)
 create mode 100644 offload/out

diff --git a/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h b/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h
index 37bf654f402d8..dc5721125cccd 100644
--- a/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h
+++ b/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h
@@ -2715,7 +2715,7 @@ class OpenMPIRBuilder {
   /// \param ScanVars Scan Variables.
   /// \param IsInclusive Whether it is an inclusive or exclusive scan.
   ///
-  /// \returns The insertion position *after* the masked.
+  /// \returns The insertion position *after* the scan.
   InsertPointOrErrorTy createScan(const LocationDescription &Loc,
                                   InsertPointTy AllocaIP,
                                   ArrayRef<llvm::Value *> ScanVars,
diff --git a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
index 8fd60606df938..394bf28bc2574 100644
--- a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
+++ b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
@@ -4103,94 +4103,111 @@ OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::emitScanReduction(
     const LocationDescription &Loc, InsertPointTy &FinalizeIP,
     SmallVector<llvm::OpenMPIRBuilder::ReductionInfo> ReductionInfos) {
 
-  llvm::Value *spanDiff = ScanInfo.Span;
-
   if (!updateToLocation(Loc))
     return Loc.IP;
-  auto curFn = Builder.GetInsertBlock()->getParent();
-  // for (int k = 0; k <= ceil(log2(n)); ++k)
-  llvm::BasicBlock *LoopBB =
-      BasicBlock::Create(curFn->getContext(), "omp.outer.log.scan.body");
-  llvm::BasicBlock *ExitBB =
-      BasicBlock::Create(curFn->getContext(), "omp.outer.log.scan.exit");
-  llvm::Function *F = llvm::Intrinsic::getOrInsertDeclaration(
-      Builder.GetInsertBlock()->getModule(),
-      (llvm::Intrinsic::ID)llvm::Intrinsic::log2, Builder.getDoubleTy());
-  llvm::BasicBlock *InputBB = Builder.GetInsertBlock();
-  ConstantInt *One = ConstantInt::get(Builder.getInt32Ty(), 1);
-  llvm::Value *span = ScanInfo.Span; // Builder.CreateAdd(spanDiff, One);
-  llvm::Value *Arg = Builder.CreateUIToFP(span, Builder.getDoubleTy());
-  llvm::Value *LogVal = emitNoUnwindRuntimeCall(F, Arg, "");
-  F = llvm::Intrinsic::getOrInsertDeclaration(
-      Builder.GetInsertBlock()->getModule(),
-      (llvm::Intrinsic::ID)llvm::Intrinsic::ceil, Builder.getDoubleTy());
-  LogVal = emitNoUnwindRuntimeCall(F, LogVal, "");
-  LogVal = Builder.CreateFPToUI(LogVal, Builder.getInt32Ty());
-  llvm::Value *NMin1 =
-      Builder.CreateNUWSub(span, llvm::ConstantInt::get(span->getType(), 1));
-  Builder.SetInsertPoint(InputBB);
-  Builder.CreateBr(LoopBB);
-  emitBlock(LoopBB, Builder.GetInsertBlock()->getParent());
-  Builder.SetInsertPoint(LoopBB);
-
-  PHINode *Counter = Builder.CreatePHI(Builder.getInt32Ty(), 2);
-  //// size pow2k = 1;
-  PHINode *Pow2K = Builder.CreatePHI(Builder.getInt32Ty(), 2);
-  Counter->addIncoming(llvm::ConstantInt::get(Builder.getInt32Ty(), 0),
+  auto BodyGenCB = [&](InsertPointTy AllocaIP,
+                       InsertPointTy CodeGenIP) -> Error {
+    Builder.restoreIP(CodeGenIP);
+    auto CurFn = Builder.GetInsertBlock()->getParent();
+    // for (int k = 0; k <= ceil(log2(n)); ++k)
+    llvm::BasicBlock *LoopBB =
+        BasicBlock::Create(CurFn->getContext(), "omp.outer.log.scan.body");
+    llvm::BasicBlock *ExitBB =
+        splitBB(Builder, false, "omp.outer.log.scan.exit");
+    llvm::Function *F = llvm::Intrinsic::getOrInsertDeclaration(
+        Builder.GetInsertBlock()->getModule(),
+        (llvm::Intrinsic::ID)llvm::Intrinsic::log2, Builder.getDoubleTy());
+    llvm::BasicBlock *InputBB = Builder.GetInsertBlock();
+    llvm::Value *Arg =
+        Builder.CreateUIToFP(ScanInfo.Span, Builder.getDoubleTy());
+    llvm::Value *LogVal = emitNoUnwindRuntimeCall(F, Arg, "");
+    F = llvm::Intrinsic::getOrInsertDeclaration(
+        Builder.GetInsertBlock()->getModule(),
+        (llvm::Intrinsic::ID)llvm::Intrinsic::ceil, Builder.getDoubleTy());
+    LogVal = emitNoUnwindRuntimeCall(F, LogVal, "");
+    LogVal = Builder.CreateFPToUI(LogVal, Builder.getInt32Ty());
+    llvm::Value *NMin1 = Builder.CreateNUWSub(
+        ScanInfo.Span, llvm::ConstantInt::get(ScanInfo.Span->getType(), 1));
+    Builder.SetInsertPoint(InputBB);
+    Builder.CreateBr(LoopBB);
+    emitBlock(LoopBB, Builder.GetInsertBlock()->getParent());
+    Builder.SetInsertPoint(LoopBB);
+
+    PHINode *Counter = Builder.CreatePHI(Builder.getInt32Ty(), 2);
+    //// size pow2k = 1;
+    PHINode *Pow2K = Builder.CreatePHI(Builder.getInt32Ty(), 2);
+    Counter->addIncoming(llvm::ConstantInt::get(Builder.getInt32Ty(), 0),
+                         InputBB);
+    Pow2K->addIncoming(llvm::ConstantInt::get(Builder.getInt32Ty(), 1),
                        InputBB);
-  Pow2K->addIncoming(llvm::ConstantInt::get(Builder.getInt32Ty(), 1), InputBB);
-  //// for (size i = n - 1; i >= 2 ^ k; --i)
-  ////   tmp[i] op= tmp[i-pow2k];
-  llvm::BasicBlock *InnerLoopBB =
-      BasicBlock::Create(curFn->getContext(), "omp.inner.log.scan.body");
-  llvm::BasicBlock *InnerExitBB =
-      BasicBlock::Create(curFn->getContext(), "omp.inner.log.scan.exit");
-  llvm::Value *CmpI = Builder.CreateICmpUGE(NMin1, Pow2K);
-  Builder.CreateCondBr(CmpI, InnerLoopBB, InnerExitBB);
-  emitBlock(InnerLoopBB, Builder.GetInsertBlock()->getParent());
-  Builder.SetInsertPoint(InnerLoopBB);
-  auto *IVal = Builder.CreatePHI(Builder.getInt32Ty(), 2);
-  IVal->addIncoming(NMin1, LoopBB);
-  unsigned int defaultAS = M.getDataLayout().getProgramAddressSpace();
-  for (ReductionInfo RedInfo : ReductionInfos) {
-    Value *ReductionVal = RedInfo.PrivateVariable;
-    Value *Buff = ScanInfo.ReductionVarToScanBuffs[ReductionVal];
-    Type *DestTy = RedInfo.ElementType;
-    Value *IV = Builder.CreateAdd(IVal, Builder.getInt32(1));
-    Value *LHSPtr = Builder.CreateInBoundsGEP(DestTy, Buff, IV, "arrayOffset");
-    Value *OffsetIval = Builder.CreateNUWSub(IV, Pow2K);
-    Value *RHSPtr =
-        Builder.CreateInBoundsGEP(DestTy, Buff, OffsetIval, "arrayOffset");
-    Value *LHS = Builder.CreateLoad(DestTy, LHSPtr);
-    Value *RHS = Builder.CreateLoad(DestTy, RHSPtr);
-    Value *LHSAddr = Builder.CreatePointerBitCastOrAddrSpaceCast(
-        LHSPtr, RHS->getType()->getPointerTo(defaultAS));
-    llvm::Value *Result;
-    InsertPointOrErrorTy AfterIP =
-        RedInfo.ReductionGen(Builder.saveIP(), LHS, RHS, Result);
-    if (!AfterIP)
-      return AfterIP.takeError();
-    Builder.CreateStore(Result, LHSAddr);
-  }
-  llvm::Value *NextIVal = Builder.CreateNUWSub(
-      IVal, llvm::ConstantInt::get(Builder.getInt32Ty(), 1));
-  IVal->addIncoming(NextIVal, Builder.GetInsertBlock());
-  CmpI = Builder.CreateICmpUGE(NextIVal, Pow2K);
-  Builder.CreateCondBr(CmpI, InnerLoopBB, InnerExitBB);
-  emitBlock(InnerExitBB, Builder.GetInsertBlock()->getParent());
-  llvm::Value *Next = Builder.CreateNUWAdd(
-      Counter, llvm::ConstantInt::get(Counter->getType(), 1));
-  Counter->addIncoming(Next, Builder.GetInsertBlock());
-  // pow2k <<= 1;
-  llvm::Value *NextPow2K = Builder.CreateShl(Pow2K, 1, "", /*HasNUW=*/true);
-  Pow2K->addIncoming(NextPow2K, Builder.GetInsertBlock());
-  llvm::Value *Cmp = Builder.CreateICmpNE(Next, LogVal);
-  Builder.CreateCondBr(Cmp, LoopBB, ExitBB);
-  emitBlock(ExitBB, Builder.GetInsertBlock()->getParent());
-  Builder.SetInsertPoint(ExitBB);
+    //// for (size i = n - 1; i >= 2 ^ k; --i)
+    ////   tmp[i] op= tmp[i-pow2k];
+    llvm::BasicBlock *InnerLoopBB =
+        BasicBlock::Create(CurFn->getContext(), "omp.inner.log.scan.body");
+    llvm::BasicBlock *InnerExitBB =
+        BasicBlock::Create(CurFn->getContext(), "omp.inner.log.scan.exit");
+    llvm::Value *CmpI = Builder.CreateICmpUGE(NMin1, Pow2K);
+    Builder.CreateCondBr(CmpI, InnerLoopBB, InnerExitBB);
+    emitBlock(InnerLoopBB, Builder.GetInsertBlock()->getParent());
+    Builder.SetInsertPoint(InnerLoopBB);
+    auto *IVal = Builder.CreatePHI(Builder.getInt32Ty(), 2);
+    IVal->addIncoming(NMin1, LoopBB);
+    unsigned int defaultAS = M.getDataLayout().getProgramAddressSpace();
+    for (ReductionInfo RedInfo : ReductionInfos) {
+      Value *ReductionVal = RedInfo.PrivateVariable;
+      Value *Buff = ScanInfo.ReductionVarToScanBuffs[ReductionVal];
+      Type *DestTy = RedInfo.ElementType;
+      Value *IV = Builder.CreateAdd(IVal, Builder.getInt32(1));
+      Value *LHSPtr =
+          Builder.CreateInBoundsGEP(DestTy, Buff, IV, "arrayOffset");
+      Value *OffsetIval = Builder.CreateNUWSub(IV, Pow2K);
+      Value *RHSPtr =
+          Builder.CreateInBoundsGEP(DestTy, Buff, OffsetIval, "arrayOffset");
+      Value *LHS = Builder.CreateLoad(DestTy, LHSPtr);
+      Value *RHS = Builder.CreateLoad(DestTy, RHSPtr);
+      Value *LHSAddr = Builder.CreatePointerBitCastOrAddrSpaceCast(
+          LHSPtr, RHS->getType()->getPointerTo(defaultAS));
+      llvm::Value *Result;
+      InsertPointOrErrorTy AfterIP =
+          RedInfo.ReductionGen(Builder.saveIP(), LHS, RHS, Result);
+      if (!AfterIP)
+        return AfterIP.takeError();
+      Builder.CreateStore(Result, LHSAddr);
+    }
+    llvm::Value *NextIVal = Builder.CreateNUWSub(
+        IVal, llvm::ConstantInt::get(Builder.getInt32Ty(), 1));
+    IVal->addIncoming(NextIVal, Builder.GetInsertBlock());
+    CmpI = Builder.CreateICmpUGE(NextIVal, Pow2K);
+    Builder.CreateCondBr(CmpI, InnerLoopBB, InnerExitBB);
+    emitBlock(InnerExitBB, Builder.GetInsertBlock()->getParent());
+    llvm::Value *Next = Builder.CreateNUWAdd(
+        Counter, llvm::ConstantInt::get(Counter->getType(), 1));
+    Counter->addIncoming(Next, Builder.GetInsertBlock());
+    // pow2k <<= 1;
+    llvm::Value *NextPow2K = Builder.CreateShl(Pow2K, 1, "", /*HasNUW=*/true);
+    Pow2K->addIncoming(NextPow2K, Builder.GetInsertBlock());
+    llvm::Value *Cmp = Builder.CreateICmpNE(Next, LogVal);
+    Builder.CreateCondBr(Cmp, LoopBB, ExitBB);
+    Builder.SetInsertPoint(ExitBB->getFirstInsertionPt());
+    return Error::success();
+  };
+
+  // TODO: Perform finalization actions for variables. This has to be
+  // called for variables which have destructors/finalizers.
+  auto FiniCB = [&](InsertPointTy CodeGenIP) { return llvm::Error::success(); };
+
+  llvm::Value *FilterVal = Builder.getInt32(0);
   llvm::OpenMPIRBuilder::InsertPointOrErrorTy AfterIP =
-      createBarrier(Builder.saveIP(), llvm::omp::OMPD_barrier);
+      createMasked(Builder.saveIP(), BodyGenCB, FiniCB, FilterVal);
+
+  if (!AfterIP)
+    return AfterIP.takeError();
+  Builder.restoreIP(*AfterIP);
+  AfterIP = createBarrier(Builder.saveIP(), llvm::omp::OMPD_barrier);
 
+  if (!AfterIP)
+    return AfterIP.takeError();
+  Builder.restoreIP(*AfterIP);
   Builder.restoreIP(FinalizeIP);
   emitScanBasedDirectiveFinalsIR(ReductionInfos);
   FinalizeIP = Builder.saveIP();
@@ -4204,7 +4221,6 @@ Error OpenMPIRBuilder::emitScanBasedDirectiveIR(
 
   {
     // Emit loop with input phase:
-    // #pragma omp ...
     // for (i: 0..<num_iters>) {
     //   <input phase>;
     //   buffer[i] = red;
@@ -4215,6 +4231,11 @@ Error OpenMPIRBuilder::emitScanBasedDirectiveIR(
       return Result;
   }
   {
+    // Emit loop with scan phase:
+    // for (i: 0..<num_iters>) {
+    //   red = buffer[i];
+    //   <scan phase>;
+    // }
     ScanInfo.OMPFirstScanLoop = false;
     auto Result = ScanLoopGen(Builder.saveIP());
     if (Result)
@@ -4224,17 +4245,17 @@ Error OpenMPIRBuilder::emitScanBasedDirectiveIR(
 }
 
 void OpenMPIRBuilder::createScanBBs() {
-  auto fun = Builder.GetInsertBlock()->getParent();
+  Function *Fun = Builder.GetInsertBlock()->getParent();
   ScanInfo.OMPScanExitBlock =
-      BasicBlock::Create(fun->getContext(), "omp.exit.inscan.bb");
+      BasicBlock::Create(Fun->getContext(), "omp.exit.inscan.bb");
   ScanInfo.OMPScanDispatch =
-      BasicBlock::Create(fun->getContext(), "omp.inscan.dispatch");
+      BasicBlock::Create(Fun->getContext(), "omp.inscan.dispatch");
   ScanInfo.OMPAfterScanBlock =
-      BasicBlock::Create(fun->getContext(), "omp.after.scan.bb");
+      BasicBlock::Create(Fun->getContext(), "omp.after.scan.bb");
   ScanInfo.OMPBeforeScanBlock =
-      BasicBlock::Create(fun->getContext(), "omp.before.scan.bb");
+      BasicBlock::Create(Fun->getContext(), "omp.before.scan.bb");
   ScanInfo.OMPScanLoopExit =
-      BasicBlock::Create(fun->getContext(), "omp.scan.loop.exit");
+      BasicBlock::Create(Fun->getContext(), "omp.scan.loop.exit");
 }
 
 CanonicalLoopInfo *OpenMPIRBuilder::createLoopSkeleton(
@@ -5454,7 +5475,7 @@ OpenMPIRBuilder::tileLoops(DebugLoc DL, ArrayRef<CanonicalLoopInfo *> Loops,
   // TODO: It would be sufficient to only sink them into body of the
   // corresponding tile loop.
   SmallVector<std::pair<BasicBlock *, BasicBlock *>, 4> InbetweenCode;
-  for (size_t i = 0; i < NumLoops - 1; ++i) {
+  for (int i = 0; i < NumLoops - 1; ++i) {
     CanonicalLoopInfo *Surrounding = Loops[i];
     CanonicalLoopInfo *Nested = Loops[i + 1];
 
@@ -5467,7 +5488,7 @@ OpenMPIRBuilder::tileLoops(DebugLoc DL, ArrayRef<CanonicalLoopInfo *> Loops,
   Builder.SetCurrentDebugLocation(DL);
   Builder.restoreIP(OutermostLoop->getPreheaderIP());
   SmallVector<Value *, 4> FloorCount, FloorRems;
-  for (size_t i = 0; i < NumLoops; ++i) {
+  for (int i = 0; i < NumLoops; ++i) {
     Value *TileSize = TileSizes[i];
     Value *OrigTripCount = OrigTripCounts[i];
     Type *IVType = OrigTripCount->getType();
diff --git a/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp b/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp
index e54d9906474e7..bb6c2ec5ede77 100644
--- a/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp
+++ b/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp
@@ -5376,29 +5376,32 @@ TEST_F(OpenMPIRBuilderTest, ScanReduction) {
                            false, Builder.saveIP(), "scan"));
   Loops = loopsVec;
   EXPECT_EQ(Loops.size(), 2U);
-  auto inputLoop = Loops.front();
-  auto scanLoop = Loops.back();
-  Builder.restoreIP(scanLoop->getAfterIP());
-  inputLoop->assertOK();
-  scanLoop->assertOK();
-
-  //// Verify control flow structure (in addition to Loop->assertOK()).
-  EXPECT_EQ(inputLoop->getPreheader()->getSinglePredecessor(),
+  CanonicalLoopInfo *InputLoop = Loops.front();
+  CanonicalLoopInfo  *ScanLoop = Loops.back();
+  Builder.restoreIP(ScanLoop->getAfterIP());
+  InputLoop->assertOK();
+  ScanLoop->assertOK();
+
+  EXPECT_EQ(InputLoop->getPreheader()->getSinglePredecessor(),
             &F->getEntryBlock());
-  EXPECT_EQ(scanLoop->getAfter(), Builder.GetInsertBlock());
+  EXPECT_EQ(ScanLoop->getAfter(), Builder.GetInsertBlock());
   EXPECT_EQ(NumBodiesGenerated, 2U);
   SmallVector<OpenMPIRBuilder::ReductionInfo> reductionInfos = {
       {Builder.getFloatTy(), origVar, scanVar,
        /*EvaluationKind=*/OpenMPIRBuilder::EvalKind::Scalar, sumReduction,
        /*ReductionGenClang=*/nullptr, sumAtomicReduction}};
-  auto FinalizeIP = scanLoop->getAfterIP();
-  OpenMPIRBuilder::LocationDescription RedLoc({inputLoop->getAfterIP(), DL});
+  auto FinalizeIP = ScanLoop->getAfterIP();
+  OpenMPIRBuilder::LocationDescription RedLoc({InputLoop->getAfterIP(), DL});
   llvm::BasicBlock *Cont = splitBB(Builder, false, "omp.scan.loop.cont");
   ASSERT_EXPECTED_INIT(
       InsertPointTy, retIp,
       OMPBuilder.emitScanReduction(RedLoc, FinalizeIP, reductionInfos));
   Builder.restoreIP(retIp);
   Builder.CreateBr(Cont);
+  SmallVector<CallInst *> MaskedCalls;
+  findCalls(F, omp::RuntimeFunction::OMPRTL___kmpc_masked, OMPBuilder,
+            MaskedCalls);
+  ASSERT_EQ(MaskedCalls.size(), 1u);
 }
 
 TEST_F(OpenMPIRBuilderTest, CreateTwoReductions) {
diff --git a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
index f093c986bb6f2..c68cae77b2f3d 100644
--- a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
+++ b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
@@ -47,7 +47,8 @@
 
 using namespace mlir;
 
-llvm::SmallDenseMap<llvm::Value *, llvm::Type *> ReductionVarToType;
+llvm::SmallDenseMap<llvm::Value *, llvm::Type *> ReductionVarToType; 
+llvm::OpenMPIRBuilder::InsertPointTy parallelAllocaIP;// TODO: change this alloca IP to point to originalvar allocaIP. ReductionDecl need to be linked to scan var.
 namespace {
 static llvm::omp::ScheduleKind
 convertToScheduleKind(std::optional<omp::ClauseScheduleKind> schedKind) {
@@ -2578,6 +2579,7 @@ convertOmpParallel(omp::ParallelOp opInst, llvm::IRBuilderBase &builder,
 
   llvm::OpenMPIRBuilder::InsertPointTy allocaIP =
       findAllocaInsertPoint(builder, moduleTranslation);
+  parallelAllocaIP = allocaIP;
   llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
 
   llvm::OpenMPIRBuilder::InsertPointOrErrorTy afterIP =
@@ -2619,9 +2621,13 @@ convertOmpScan(Operation &opInst, llvm::IRBuilderBase &builder,
     llvm::Value *llvmVal = moduleTranslation.lookupValue(val);
     llvmScanVars.push_back(llvmVal);
     llvmScanVarsType.push_back(ReductionVarToType[llvmVal]);
+    val.getDefiningOp();
   }
-  llvm::OpenMPIRBuilder::InsertPointTy allocaIP =
-      findAllocaInsertPoint(builder, moduleTranslation);
+  auto parallelOp = scanOp->getParentOfType<omp::ParallelOp>();
+  if (!parallelOp) {
+    return failure();
+  }
+  llvm::OpenMPIRBuilder::InsertPointTy allocaIP = parallelAllocaIP;
   llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
   llvm::OpenMPIRBuilder::InsertPointOrErrorTy afterIP =
       moduleTranslation.getOpenMPBuilder()->createScan(
diff --git a/offload/out b/offload/out
new file mode 100644
index 0000000000000..5df1a3ad98775
--- /dev/null
+++ b/offload/out
@@ -0,0 +1,424 @@
+grep: ./out: input file is also the output
+./test/offloading/fortran/target-map-nested-dtype-alloca-array.f90:!$omp target map(tofrom: one_l%nest%array_k)
+./test/offloading/fortran/target-map-nested-dtype-alloca-array.f90:!$omp end target
+./test/offloading/fortran/target-map-derived-type-full-1.f90:  !$omp target map(from:out) map(to:in)
+./test/offloading/fortran/target-map-derived-type-full-1.f90:  !$omp end target
+./test/offloading/fortran/target-map-pointer-scopes-enter-exit.f90:  !$omp target enter data map(alloc: arg_alloc)
+./test/offloading/fortran/target-map-pointer-scopes-enter-exit.f90:  !$omp target
+./test/offloading/fortran/target-map-pointer-scopes-enter-exit.f90:  !$omp end target
+./test/offloading/fortran/target-map-pointer-scopes-enter-exit.f90:  !$omp target exit data map(from: arg_alloc)
+./test/offloading/fortran/target-map-pointer-scopes-enter-exit.f90:  !$omp target exit data map(delete: arg_alloc)
+./test/offloading/fortran/target-map-pointer-scopes-enter-exit.f90:  !$omp target enter data map(alloc: local_alloc)
+./test/offloading/fortran/target-map-pointer-scopes-enter-exit.f90:  !$omp target
+./test/offloading/fortran/target-map-pointer-scopes-enter-exit.f90:  !$omp end target
+./test/offloading/fortran/target-map-pointer-scopes-enter-exit.f90:  !$omp target exit data map(from: local_alloc)
+./test/offloading/fortran/target-map-pointer-scopes-enter-exit.f90:  !$omp target exit data map(delete: local_alloc)
+./test/offloading/fortran/target-map-pointer-scopes-enter-exit.f90:  !$omp target enter data map(alloc: map_ptr)
+./test/offloading/fortran/target-map-pointer-scopes-enter-exit.f90:  !$omp target
+./test/offloading/fortran/target-map-pointer-scopes-enter-exit.f90:  !$omp end target
+./test/offloading/fortran/target-map-pointer-scopes-enter-exit.f90:  !$omp target exit data map(from: map_ptr)
+./test/offloading/fortran/target-map-pointer-scopes-enter-exit.f90:  !$omp target exit data map(delete: map_ptr)
+./test/offloading/fortran/target_map_ompx_hold.f90:!$omp target data map(ompx_hold, tofrom: presence_check)
+./test/offloading/fortran/target_map_ompx_hold.f90:!$omp target exit data map(delete: presence_check)
+./test/offloading/fortran/target_map_ompx_hold.f90:!$omp target map(present, tofrom: presence_check)
+./test/offloading/fortran/target_map_ompx_hold.f90:!$omp end target
+./test/offloading/fortran/target_map_ompx_hold.f90:!$omp end target data
+./test/offloading/fortran/target_map_ompx_hold.f90:!$omp target data map(tofrom: presence_check)
+./test/offloading/fortran/target_map_ompx_hold.f90:!$omp target exit data map(delete: presence_check)
+./test/offloading/fortran/target_map_ompx_hold.f90:!$omp target map(present, tofrom: presence_check)
+./test/offloading/fortran/target_map_ompx_hold.f90:!$omp end target
+./test/offloading/fortran/target_map_ompx_hold.f90:!$omp end target data
+./test/offloading/fortran/target-map-nullary-pointer.f90:!$omp target map(Set)
+./test/offloading/fortran/target-map-nullary-pointer.f90:!$omp end target
+./test/offloading/fortran/dtype-member-map-syntax-1.f90:  !$omp target map(tofrom: array_dtype(5))
+./test/offloading/fortran/dtype-member-map-syntax-1.f90:  !$omp end target
+./test/offloading/fortran/dtype-member-map-syntax-1.f90:  !$omp target map(tofrom: array_dtype(4)%elements(3))
+./test/offloading/fortran/dtype-member-map-syntax-1.f90:  !$omp end target
+./test/offloading/fortran/dtype-member-map-syntax-1.f90:  !$omp target map(tofrom: array_dtype(5)%elements(3:5))
+./test/offloading/fortran/dtype-member-map-syntax-1.f90:  !$omp end target
+./test/offloading/fortran/dtype-member-map-syntax-1.f90:  !$omp target map(tofrom: array_dtype(3:5))
+./test/offloading/fortran/dtype-member-map-syntax-1.f90:  !$omp end target
+./test/offloading/fortran/dtype-member-map-syntax-1.f90:  !$omp target map(tofrom: array_dtype(4)%num_chars%number%float_elements(8))
+./test/offloading/fortran/dtype-member-map-syntax-1.f90:  !$omp end target
+./test/offloading/fortran/dtype-member-map-syntax-1.f90:  !$omp target map(tofrom: array_dtype(4)%num_chars%number%float_elements(5:10))
+./test/offloading/fortran/dtype-member-map-syntax-1.f90:  !$omp end target
+./test/offloading/fortran/dtype-member-map-syntax-1.f90:  !$omp target map(tofrom: array_dtype(4)%internal_dtypes(3)%float_elements(4))
+./test/offloading/fortran/dtype-member-map-syntax-1.f90:  !$omp end target
+./test/offloading/fortran/target-map-two-dtype-mixed-implicit-explicit-capture-1.f90:  !$omp target map(tofrom:scalar_arr1%break_1)
+./test/offloading/fortran/target-map-two-dtype-mixed-implicit-explicit-capture-1.f90:  !$omp end target
+./test/offloading/fortran/target-map-nested-alloca-dtype-3d-alloca-array-bounds.f90:!$omp target map(tofrom: one_l%nest%array_k(1:3, 1:3, 2:2)) map(to: inArray(1:3, 1:3, 1:3))
+./test/offloading/fortran/target-map-nested-alloca-dtype-3d-alloca-array-bounds.f90:!$omp end target
+./test/offloading/fortran/target-map-dtype-multi-explicit-array-member.f90:  !$omp target map(tofrom:scalar_arr%array_x, scalar_arr%array_y)
+./test/offloading/fortran/target-map-dtype-multi-explicit-array-member.f90:  !$omp end target
+./test/offloading/fortran/target-map-derived-type-full-2.f90:  !$omp target map(from:out) map(to:in)
+./test/offloading/fortran/target-map-derived-type-full-2.f90:  !$omp end target
+./test/offloading/fortran/target-map-double-nested-dtype-single-member.f90:!$omp target map(tofrom: top_dtype%nested%array_i2, top_dtype2%nested%array_j2)
+./test/offloading/fortran/target-map-double-nested-dtype-single-member.f90:!$omp end target
+./test/offloading/fortran/target-map-pointer-target-scopes.f90:  !$omp target map(tofrom: arg_alloc)
+./test/offloading/fortran/target-map-pointer-target-scopes.f90:  !$omp end target
+./test/offloading/fortran/target-map-pointer-target-scopes.f90:  !$omp target map(tofrom: local_alloc)
+./test/offloading/fortran/target-map-pointer-target-scopes.f90:  !$omp end target
+./test/offloading/fortran/target-map-pointer-target-scopes.f90:  !$omp target map(tofrom: map_ptr)
+./test/offloading/fortran/target-map-pointer-target-scopes.f90:  !$omp end target
+./test/offloading/fortran/basic-target-parallel-region.f90:   !$omp target parallel map(from: x)
+./test/offloading/fortran/basic-target-parallel-region.f90:   !$omp end target parallel
+./test/offloading/fortran/target-map-enter-exit-allocatables.f90:   !$omp target enter data map(alloc: A)
+./test/offloading/fortran/target-map-enter-exit-allocatables.f90:    !$omp target
+./test/offloading/fortran/target-map-enter-exit-allocatables.f90:    !$omp end target
+./test/offloading/fortran/target-map-enter-exit-allocatables.f90:    !$omp target exit data map(from: A)
+./test/offloading/fortran/target-map-enter-exit-allocatables.f90:    !$omp target exit data map(delete: A)
+./test/offloading/fortran/target-use-dev-ptr.f90:   !$omp target data map(tofrom: x) use_device_ptr(x)
+./test/offloading/fortran/target-use-dev-ptr.f90:   !$omp end target data
+./test/offloading/fortran/target-map-pointer-to-dtype-allocatable-member.f90:    !$omp target enter data map(to:my_instance, my_instance%values)
+./test/offloading/fortran/target-map-pointer-to-dtype-allocatable-member.f90:    !$omp target
+./test/offloading/fortran/target-map-pointer-to-dtype-allocatable-member.f90:    !$omp end target
+./test/offloading/fortran/target-map-pointer-to-dtype-allocatable-member.f90:    !$omp target exit data map(from:my_instance%values)
+./test/offloading/fortran/target-map-pointer-to-dtype-allocatable-member.f90:    !$omp target exit data map(release:my_instance)
+./test/offloading/fortran/target_map_present_fail.f90:!$omp target data map(present,alloc:arr)
+./test/offloading/fortran/target_map_present_fail.f90:!$omp target
+./test/offloading/fortran/target_map_present_fail.f90:!$omp end target
+./test/offloading/fortran/target_map_present_fail.f90:!$omp end target data
+./test/offloading/fortran/declare-target-vars-in-target-region.f90:  !$omp declare target link(arr1) enter(arr2)
+./test/offloading/fortran/declare-target-vars-in-target-region.f90:  !$omp declare target link(scalar)
+./test/offloading/fortran/declare-target-vars-in-target-region.f90:  !$omp target map(tofrom:arr1, i, j)
+./test/offloading/fortran/declare-target-vars-in-target-region.f90:  !$omp end target
+./test/offloading/fortran/declare-target-vars-in-target-region.f90:  !$omp target map(i, j)
+./test/offloading/fortran/declare-target-vars-in-target-region.f90:  !$omp end target
+./test/offloading/fortran/declare-target-vars-in-target-region.f90:  !$omp target map(i, j)
+./test/offloading/fortran/declare-target-vars-in-target-region.f90:  !$omp end target
+./test/offloading/fortran/declare-target-vars-in-target-region.f90:  !$omp target
+./test/offloading/fortran/declare-target-vars-in-target-region.f90:  !$omp end target
+./test/offloading/fortran/dtype-array-constant-index-map.f90:!$omp target map(tofrom: array_dtype(5))
+./test/offloading/fortran/dtype-array-constant-index-map.f90:!$omp end target
+./test/offloading/fortran/dtype-array-constant-index-map.f90:!$omp target map(tofrom: array_dtype(5))
+./test/offloading/fortran/dtype-array-constant-index-map.f90:!$omp end target
+./test/offloading/fortran/target-map-dtype-alloca-array-of-dtype.f90:!$omp target map(tofrom: dtyped%array_dtype)
+./test/offloading/fortran/target-map-dtype-alloca-array-of-dtype.f90:!$omp end target
+./test/offloading/fortran/target-map-alloca-dtype-alloca-array-of-dtype.f90:!$omp target map(tofrom: dtyped%array_dtype)
+./test/offloading/fortran/target-map-alloca-dtype-alloca-array-of-dtype.f90:!$omp end target
+./test/offloading/fortran/target-map-nested-dtype-alloca-and-non-alloca-array.f90:    !$omp target map(tofrom: one_l%nest%array_i, one_l%nest%array_k)
+./test/offloading/fortran/target-map-nested-dtype-alloca-and-non-alloca-array.f90:    !$omp end target
+./test/offloading/fortran/target-map-first-common-block-member.f90:  !$omp target map(tofrom: var1)
+./test/offloading/fortran/target-map-first-common-block-member.f90:  !$omp end target
+./test/offloading/fortran/target-map-first-common-block-member.f90:!$omp target map(tofrom: var1)
+./test/offloading/fortran/target-map-first-common-block-member.f90:!$omp end target
+./test/offloading/fortran/target-map-first-common-block-member.f90:!$omp target map(tofrom: var1)
+./test/offloading/fortran/target-map-first-common-block-member.f90:!$omp end target
+./test/offloading/fortran/target-map-two-dtype-mixed-implicit-explicit-capture-2.f90:  !$omp target map(tofrom:scalar_arr2%array_x(3:6))
+./test/offloading/fortran/target-map-two-dtype-mixed-implicit-explicit-capture-2.f90:  !$omp end target
+./test/offloading/fortran/local-descriptor-map-regress.f90:!$omp target enter data map(alloc:sbuf31)
+./test/offloading/fortran/local-descriptor-map-regress.f90:!$omp target
+./test/offloading/fortran/local-descriptor-map-regress.f90:!$omp end target
+./test/offloading/fortran/local-descriptor-map-regress.f90:!$omp target update from(sbuf31)
+./test/offloading/fortran/local-descriptor-map-regress.f90:!$omp target exit data map(delete:sbuf31)
+./test/offloading/fortran/local-descriptor-map-regress.f90:!$omp target enter data map(to:p)
+./test/offloading/fortran/local-descriptor-map-regress.f90:!$omp target
+./test/offloading/fortran/local-descriptor-map-regress.f90:!$omp end target
+./test/offloading/fortran/local-descriptor-map-regress.f90:!$omp target update from(p)
+./test/offloading/fortran/local-descriptor-map-regress.f90:!!$omp target exit data map(delete:p)
+./test/offloading/fortran/target-has-device-addr1.f90:    !$omp target enter data map(to: x)
+./test/offloading/fortran/target-has-device-addr1.f90:    !$omp target data use_device_addr(x)
+./test/offloading/fortran/target-has-device-addr1.f90:    !$omp end target data
+./test/offloading/fortran/target-has-device-addr1.f90:    !$omp target map(to: x) map(from: res1, res2) &
+./test/offloading/fortran/target-has-device-addr1.f90:    !$omp & has_device_addr(first_scalar_device_addr)
+./test/offloading/fortran/target-has-device-addr1.f90:    !$omp end target
+./test/offloading/fortran/target-region-implicit-array.f90:  !$omp target
+./test/offloading/fortran/target-region-implicit-array.f90:  !$omp end target
+./test/offloading/fortran/target-map-local-intrinisc-sized-param.f90:!$omp target map(tofrom: b)
+./test/offloading/fortran/target-map-local-intrinisc-sized-param.f90:!$omp end target
+./test/offloading/fortran/target-map-alloca-dtype-and-alloca-array-v2.f90:!$omp target map(tofrom: alloca_dtype%array_j, alloca_dtype)
+./test/offloading/fortran/target-map-alloca-dtype-and-alloca-array-v2.f90:!$omp end target
+./test/offloading/fortran/target-map-dtype-3d-alloca-array-with-bounds.f90:!$omp target map(tofrom: one_l%array_j(1:3, 1:3, 2:2)) map(to: inArray(1:3, 1:3, 1:3))
+./test/offloading/fortran/target-map-dtype-3d-alloca-array-with-bounds.f90:!$omp end target
+./test/offloading/fortran/target-map-dtype-explicit-individual-array-member.f90:  !$omp target map(tofrom:scalar_arr%array_y)
+./test/offloading/fortran/target-map-dtype-explicit-individual-array-member.f90:  !$omp end target
+./test/offloading/fortran/target-map-pointer-target-array-section-3d-bounds.f90:!$omp target map(tofrom:inArray(1:3, 1:3, 2:2), outArray(1:3, 1:3, 1:3))
+./test/offloading/fortran/target-map-pointer-target-array-section-3d-bounds.f90:!$omp end target
+./test/offloading/fortran/dump_map_tables.f90:!$omp target enter data map(to:A(:N))
+./test/offloading/fortran/dump_map_tables.f90:!$omp target parallel do
+./test/offloading/fortran/dump_map_tables.f90:!$omp target exit data map(from:A)
+./test/offloading/fortran/basic-target-region-3D-array-section.f90:!$omp target map(tofrom:inArray(1:3, 1:3, 2:2), outArray(1:3, 1:3, 1:3), j, k, j2, k2)
+./test/offloading/fortran/basic-target-region-3D-array-section.f90:!$omp end target
+./test/offloading/fortran/target-map-enter-exit-array.f90:   !$omp target enter data map(alloc: A)
+./test/offloading/fortran/target-map-enter-exit-array.f90:    !$omp target
+./test/offloading/fortran/target-map-enter-exit-array.f90:    !$omp end target
+./test/offloading/fortran/target-map-enter-exit-array.f90:    !$omp target exit data map(from: A)
+./test/offloading/fortran/target-map-enter-exit-array.f90:    !$omp target exit data map(delete: A)
+./test/offloading/fortran/target-map-dtype-alloca-array-and-non-alloca-dtype.f90:!$omp target map(tofrom: one_l%nest, one_l%array_j)
+./test/offloading/fortran/target-map-dtype-alloca-array-and-non-alloca-dtype.f90:!$omp end target
+./test/offloading/fortran/target-map-two-dtype-explicit-member.f90:  !$omp target map(tofrom:scalar_arr1%break_1, scalar_arr2%break_3)
+./test/offloading/fortran/target-map-two-dtype-explicit-member.f90:  !$omp end target
+./test/offloading/fortran/target-map-two-dtype-multi-member-array-1D-bounds.f90:  !$omp target map(tofrom:scalar_arr1%array_x(3:6), scalar_arr1%array_y(3:6), scalar_arr2%array_x(3:6), scalar_arr2%array_y(3:6))
+./test/offloading/fortran/target-map-two-dtype-multi-member-array-1D-bounds.f90:  !$omp end target
+./test/offloading/fortran/target-map-all-common-block-members.f90:  !$omp target map(tofrom: var1, var2, var3)
+./test/offloading/fortran/target-map-all-common-block-members.f90:  !$omp end target
+./test/offloading/fortran/target-map-all-common-block-members.f90:!$omp target map(tofrom: var2, var1, var3)
+./test/offloading/fortran/target-map-all-common-block-members.f90:!$omp end target
+./test/offloading/fortran/target-map-all-common-block-members.f90:!$omp target map(tofrom: var2, var3, var1)
+./test/offloading/fortran/target-map-all-common-block-members.f90:!$omp end target
+./test/offloading/fortran/target-map-alloca-dtype-array-and-scalar.f90:!$omp target map(tofrom: alloca_dtype%nested_dtype%array_i, alloca_dtype%k)
+./test/offloading/fortran/target-map-alloca-dtype-array-and-scalar.f90:!$omp end target
+./test/offloading/fortran/target-map-second-common-block-member.f90:  !$omp target map(tofrom: var2)
+./test/offloading/fortran/target-map-second-common-block-member.f90:  !$omp end target
+./test/offloading/fortran/target-map-second-common-block-member.f90:!$omp target map(tofrom: var2)
+./test/offloading/fortran/target-map-second-common-block-member.f90:!$omp end target
+./test/offloading/fortran/target-map-second-common-block-member.f90:!$omp target map(tofrom: var2)
+./test/offloading/fortran/target-map-second-common-block-member.f90:!$omp end target
+./test/offloading/fortran/double-target-call-with-declare-target.f90:    !$omp declare target link(sp)
+./test/offloading/fortran/double-target-call-with-declare-target.f90:!$omp target map(tofrom:sp) map(to: i, j)
+./test/offloading/fortran/double-target-call-with-declare-target.f90:!$omp end target
+./test/offloading/fortran/double-target-call-with-declare-target.f90:!$omp target map(tofrom:sp) map(to: i, j)
+./test/offloading/fortran/double-target-call-with-declare-target.f90:!$omp end target
+./test/offloading/fortran/target-map-dtype-multi-explicit-array-3D-member-bounds.f90:  !$omp target map(tofrom:scalar_arr%array_x(1:3, 1:3, 2:2), scalar_arr%array_y(1:3, 1:3, 1:3))
+./test/offloading/fortran/target-map-dtype-multi-explicit-array-3D-member-bounds.f90:  !$omp end target
+./test/offloading/fortran/target-map-dtype-allocatable-scalar-and-array.f90:    !$omp target map(tofrom: one_l%array_j, one_l%j)
+./test/offloading/fortran/target-map-dtype-allocatable-scalar-and-array.f90:    !$omp end target
+./test/offloading/fortran/target-map-dtype-arr-bounds-member-enter-exit.f90:    !$omp target enter data map(to: scalar_arr%array(3:6))
+./test/offloading/fortran/target-map-dtype-arr-bounds-member-enter-exit.f90:   !$omp target
+./test/offloading/fortran/target-map-dtype-arr-bounds-member-enter-exit.f90:  !$omp end target
+./test/offloading/fortran/target-map-dtype-arr-bounds-member-enter-exit.f90:  !$omp target exit data map(from: scalar_arr%array(3:6))
+./test/offloading/fortran/basic-target-region-3D-array.f90:!$omp target map(tofrom:x, counter) map(to: i, j, k, i2, j2, k2)
+./test/offloading/fortran/basic-target-region-3D-array.f90:!$omp end target
+./test/offloading/fortran/basic_target_region.f90:!$omp target map(from:x)
+./test/offloading/fortran/basic_target_region.f90:!$omp end target
+./test/offloading/fortran/target-map-nested-dtype-multi-member.f90:!$omp target map(tofrom: top_dtype%nested%i2, top_dtype%k, top_dtype%nested%array_i2)
+./test/offloading/fortran/target-map-nested-dtype-multi-member.f90:!$omp end target
+./test/offloading/fortran/constant-arr-index.f90:  !$omp target map(tofrom:sp)
+./test/offloading/fortran/constant-arr-index.f90:  !$omp end target
+./test/offloading/fortran/usm_map_close.f90:  !$omp requires unified_shared_memory
+./test/offloading/fortran/usm_map_close.f90:!$omp target data map(tofrom: a, device_alloc)
+./test/offloading/fortran/usm_map_close.f90:!$omp target map(tofrom: device_alloc)
+./test/offloading/fortran/usm_map_close.f90:!$omp end target
+./test/offloading/fortran/usm_map_close.f90:!$omp end target data
+./test/offloading/fortran/usm_map_close.f90:!$omp target data map(close, tofrom: a) map(tofrom: device_alloc)
+./test/offloading/fortran/usm_map_close.f90:!$omp target map(tofrom: device_alloc)
+./test/offloading/fortran/usm_map_close.f90:!$omp end target
+./test/offloading/fortran/usm_map_close.f90:!$omp end target data
+./test/offloading/fortran/usm_map_close.f90:!$omp target data map(tofrom: a) use_device_ptr(a)
+./test/offloading/fortran/usm_map_close.f90:!$omp end target data
+./test/offloading/fortran/usm_map_close.f90:!$omp target enter data map(close, to: a)
+./test/offloading/fortran/usm_map_close.f90:!$omp target map(from: device_alloc)
+./test/offloading/fortran/usm_map_close.f90:!$omp end target
+./test/offloading/fortran/usm_map_close.f90:!$omp target exit data map(from: a)
+./test/offloading/fortran/target-map-two-dtype-individual-member-array-1D-bounds.f90:  !$omp target map(tofrom:scalar_arr1%array_x(3:6), scalar_arr2%array_x(3:6))
+./test/offloading/fortran/target-map-two-dtype-individual-member-array-1D-bounds.f90:  !$omp end target
+./test/offloading/fortran/target_map_present_success.f90:!$omp target data map(tofrom:arr)
+./test/offloading/fortran/target_map_present_success.f90:!$omp target data map(present,alloc:arr)
+./test/offloading/fortran/target_map_present_success.f90:!$omp target
+./test/offloading/fortran/target_map_present_success.f90:!$omp end target
+./test/offloading/fortran/target_map_present_success.f90:!$omp end target data
+./test/offloading/fortran/target_map_present_success.f90:!$omp end target data
+./test/offloading/fortran/target-map-literal-write.f90:!$omp target
+./test/offloading/fortran/target-map-literal-write.f90:!$omp end target
+./test/offloading/fortran/target-map-multi-alloca-dtypes-with-multi-alloca-members.f90:!$omp target map(tofrom: top_dtype%array_i, top_dtype%nested%nest2%array_j, top_dtype%nested%nest%array_ptr) &
+./test/offloading/fortran/target-map-multi-alloca-dtypes-with-multi-alloca-members.f90:!$omp map(tofrom: top_dtype2%array_i, top_dtype2%nested%nest2%array_j, top_dtype2%nested%nest%array_ptr)
+./test/offloading/fortran/target-map-multi-alloca-dtypes-with-multi-alloca-members.f90:!$omp end target
+./test/offloading/fortran/target-map-double-nested-dtype-double-array-bounds.f90:!$omp target map(tofrom: top_dtype%nested%array_i2(4:8), top_dtype2%nested%array_j2(4:8))
+./test/offloading/fortran/target-map-double-nested-dtype-double-array-bounds.f90:!$omp end target
+./test/offloading/fortran/target-map-large-nested-dtype-multi-member.f90:!$omp target map(tofrom: top_dtype%nested%nest%j4, top_dtype%nested%nest%i4, top_dtype%nested%nest%k4) &
+./test/offloading/fortran/target-map-large-nested-dtype-multi-member.f90:!$omp map(tofrom: top_dtype%array_i, top_dtype%nested%nest2%i3, top_dtype%nested%i2, top_dtype%nested%nest2%k3, top_dtype%nested%nest2%j3)
+./test/offloading/fortran/target-map-large-nested-dtype-multi-member.f90:!$omp end target
+./test/offloading/fortran/target_map_common_block2.f90:  !$omp target map(tofrom:var4)
+./test/offloading/fortran/target_map_common_block2.f90:  !$omp end target
+./test/offloading/fortran/target-nested-target-data.f90:   !$omp target data map(to: A, B) map(alloc: C)
+./test/offloading/fortran/target-nested-target-data.f90:   !$omp target map(from: C)
+./test/offloading/fortran/target-nested-target-data.f90:   !$omp end target
+./test/offloading/fortran/target-nested-target-data.f90:   !$omp target update from(C) ! updates C device -> host
+./test/offloading/fortran/target-nested-target-data.f90:   !$omp end target data
+./test/offloading/fortran/target-map-dtype-multi-explicit-array-member-bounds.f90:  !$omp target map(tofrom:scalar_arr%array_x(3:6), scalar_arr%array_y(3:6))
+./test/offloading/fortran/target-map-dtype-multi-explicit-array-member-bounds.f90:  !$omp end target
+./test/offloading/fortran/target-map-allocatable-array-section-1d-bounds.f90:    !$omp target map(tofrom:sp_read(2:6)) map(tofrom:sp_write(2:6))
+./test/offloading/fortran/target-map-allocatable-array-section-1d-bounds.f90:    !$omp end target
+./test/offloading/fortran/target-map-dtype-allocatable-array.f90:    !$omp target map(tofrom: one_l%array_j)
+./test/offloading/fortran/target-map-dtype-allocatable-array.f90:    !$omp end target
+./test/offloading/fortran/target-map-enter-exit-array-bounds.f90:    !$omp target enter data map(to: array(3:6))
+./test/offloading/fortran/target-map-enter-exit-array-bounds.f90:   !$omp target
+./test/offloading/fortran/target-map-enter-exit-array-bounds.f90:  !$omp end target
+./test/offloading/fortran/target-map-enter-exit-array-bounds.f90:  !$omp target exit data map(from: array(3:6))
+./test/offloading/fortran/target-has-device-addr2.f90:  !$omp target data map(tofrom: b(1:3)) use_device_addr(b)
+./test/offloading/fortran/target-has-device-addr2.f90:  !$omp target has_device_addr(b(2)%x)
+./test/offloading/fortran/target-has-device-addr2.f90:  !$omp end target
+./test/offloading/fortran/target-has-device-addr2.f90:  !$omp end target data
+./test/offloading/fortran/target-has-device-addr2.f90:  !$omp target data map(tofrom: b(1:3)) use_device_addr(b)
+./test/offloading/fortran/target-has-device-addr2.f90:  !$omp target has_device_addr(b(2)%x)
+./test/offloading/fortran/target-has-device-addr2.f90:  !$omp end target
+./test/offloading/fortran/target-has-device-addr2.f90:  !$omp end target data
+./test/offloading/fortran/target-depend.f90:      !$omp parallel num_threads(3)
+./test/offloading/fortran/target-depend.f90:      !$omp single
+./test/offloading/fortran/target-depend.f90:      !$omp task depend(out: z) shared(z)
+./test/offloading/fortran/target-depend.f90:      !$omp end task
+./test/offloading/fortran/target-depend.f90:      !$omp target map(tofrom: z) depend(in:z)
+./test/offloading/fortran/target-depend.f90:      !$omp end target
+./test/offloading/fortran/target-depend.f90:      !$omp end single
+./test/offloading/fortran/target-depend.f90:      !$omp end parallel
+./test/offloading/fortran/target-map-dtype-arr-bounds-member-enter-exit-update.f90:  !$omp target enter data map(to: scalar_arr%array(3:6))
+./test/offloading/fortran/target-map-dtype-arr-bounds-member-enter-exit-update.f90:  !$omp target update to(scalar_arr%array(3:6))
+./test/offloading/fortran/target-map-dtype-arr-bounds-member-enter-exit-update.f90:   !$omp target
+./test/offloading/fortran/target-map-dtype-arr-bounds-member-enter-exit-update.f90:  !$omp end target
+./test/offloading/fortran/target-map-dtype-arr-bounds-member-enter-exit-update.f90:  !$omp target exit data map(from: scalar_arr%array(3:6))
+./test/offloading/fortran/target-map-common-block.f90:  !$omp target map(tofrom: /var_common/)
+./test/offloading/fortran/target-map-common-block.f90:  !$omp end target
+./test/offloading/fortran/target-map-common-block.f90:!$omp target map(tofrom: /var_common/)
+./test/offloading/fortran/target-map-common-block.f90:!$omp end target
+./test/offloading/fortran/target-map-common-block.f90:!$omp target map(tofrom: /var_common/)
+./test/offloading/fortran/target-map-common-block.f90:!$omp end target
+./test/offloading/fortran/target-map-dtype-alloca-and-non-alloca-array.f90:!$omp target map(tofrom: one_l%array_i, one_l%array_j)
+./test/offloading/fortran/target-map-dtype-alloca-and-non-alloca-array.f90:!$omp end target
+./test/offloading/fortran/target-data-map-if-present.f90:       !$omp target data if(present(a)) map(alloc:a)
+./test/offloading/fortran/target-data-map-if-present.f90:       !$omp end target data
+./test/offloading/fortran/target-parallel-do-collapse.f90:   !$omp target parallel do map(from:array) collapse(2)
+./test/offloading/fortran/target-parallel-do-collapse.f90:    !$omp end target parallel do
+./test/offloading/fortran/target-map-nested-dtype-complex-member.f90:!$omp target map(tofrom: top_dtype%nested%i2, top_dtype%k, top_dtype%nested%j2, top_dtype%nested%array_i2, top_dtype%l)
+./test/offloading/fortran/target-map-nested-dtype-complex-member.f90:!$omp end target
+./test/offloading/fortran/target-map-derived-type-full-implicit-2.f90:  !$omp target
+./test/offloading/fortran/target-map-derived-type-full-implicit-2.f90:  !$omp end target
+./test/offloading/fortran/target-map-enter-exit-array-2.f90:    !$omp target enter data map(to: array)
+./test/offloading/fortran/target-map-enter-exit-array-2.f90:   !$omp target
+./test/offloading/fortran/target-map-enter-exit-array-2.f90:  !$omp end target
+./test/offloading/fortran/target-map-enter-exit-array-2.f90:  !$omp target exit data map(from: array)
+./test/offloading/fortran/basic-target-parallel-do.f90:   !$omp target parallel do map(from: x)
+./test/offloading/fortran/basic-target-parallel-do.f90:   !$omp end target parallel do
+./test/offloading/fortran/target-map-dtype-multi-explicit-member.f90:    !$omp target map(from:scalar_struct%rx, scalar_struct%ry)
+./test/offloading/fortran/target-map-dtype-multi-explicit-member.f90:    !$omp end target
+./test/offloading/fortran/target-map-dynamic.f90:    !$omp target enter data map(to: A)
+./test/offloading/fortran/target-map-dynamic.f90:    !$omp target
+./test/offloading/fortran/target-map-dynamic.f90:    !$omp end target
+./test/offloading/fortran/target-map-dynamic.f90:    !$omp target exit data map(from: A)
+./test/offloading/fortran/target_map_common_block1.f90:  !$omp target map(tofrom:devices) map(tofrom:var1)
+./test/offloading/fortran/target_map_common_block1.f90:  !$omp end target
+./test/offloading/fortran/target-with-threadprivate.f90:!$omp threadprivate(pointer2)
+./test/offloading/fortran/target-with-threadprivate.f90:!$omp target
+./test/offloading/fortran/target-with-threadprivate.f90:!$omp end target
+./test/offloading/fortran/target_map_common_block.f90:  !$omp target map(tofrom:devices)
+./test/offloading/fortran/target_map_common_block.f90:  !$omp end target
+./test/offloading/fortran/target_map_common_block.f90:  !$omp target map(tofrom:var1)
+./test/offloading/fortran/target_map_common_block.f90:  !$omp end target
+./test/offloading/fortran/target_map_common_block.f90:  !$omp target map(tofrom:var2)
+./test/offloading/fortran/target_map_common_block.f90:  !$omp end target
+./test/offloading/fortran/target_map_common_block.f90:  !$omp target map(tofrom:var3)
+./test/offloading/fortran/target_map_common_block.f90:  !$omp end target
+./test/offloading/fortran/target_map_common_block.f90:  !$omp target map(to:var4) map(from:tmp)
+./test/offloading/fortran/target_map_common_block.f90:  !$omp end target
+./test/offloading/fortran/target_map_common_block.f90:  !$omp target map(tofrom: var6)
+./test/offloading/fortran/target_map_common_block.f90:  !$omp end target
+./test/offloading/fortran/target-map-alloca-dtype-and-alloca-array.f90:!$omp target map(tofrom: alloca_dtype, alloca_dtype%array_j)
+./test/offloading/fortran/target-map-alloca-dtype-and-alloca-array.f90:!$omp end target
+./test/offloading/fortran/target-map-double-large-nested-dtype-multi-member.f90:!$omp target map(from: top_dtype%nested%nest%j4, top_dtype%nested%nest%i4, top_dtype%nested%nest%k4) &
+./test/offloading/fortran/target-map-double-large-nested-dtype-multi-member.f90:!$omp map(from: top_dtype%array_i, top_dtype%nested%nest2%i3, top_dtype%nested%i2) &
+./test/offloading/fortran/target-map-double-large-nested-dtype-multi-member.f90:!$omp map(from: top_dtype%nested%nest2%k3, top_dtype%nested%nest2%j3) &
+./test/offloading/fortran/target-map-double-large-nested-dtype-multi-member.f90:!$omp map(to: top_dtype2%nested%nest%j4, top_dtype2%nested%nest%i4, top_dtype2%nested%nest%k4) &
+./test/offloading/fortran/target-map-double-large-nested-dtype-multi-member.f90:!$omp map(to: top_dtype2%array_i, top_dtype2%nested%nest2%i3, top_dtype2%nested%i2) &
+./test/offloading/fortran/target-map-double-large-nested-dtype-multi-member.f90:!$omp map(to: top_dtype2%nested%nest2%k3, top_dtype2%nested%nest2%j3)
+./test/offloading/fortran/target-map-double-large-nested-dtype-multi-member.f90:!$omp end target
+./test/offloading/fortran/target-map-derived-type-full-implicit-1.f90:  !$omp target map(from:out)
+./test/offloading/fortran/target-map-derived-type-full-implicit-1.f90:  !$omp end target
+./test/offloading/fortran/target-map-dtype-alloca-array-with-bounds.f90:!$omp target map(tofrom: one_l%array_j(2:6))
+./test/offloading/fortran/target-map-dtype-alloca-array-with-bounds.f90:!$omp end target
+./test/offloading/fortran/target-map-nested-dtype-single-member.f90:!$omp target map(tofrom: top_dtype%nested%array_i2)
+./test/offloading/fortran/target-map-nested-dtype-single-member.f90:!$omp end target
+./test/offloading/fortran/target-map-mix-imp-exp-common-block-members.f90:    !$omp target map(tofrom: var2)
+./test/offloading/fortran/target-map-mix-imp-exp-common-block-members.f90:    !$omp end target
+./test/offloading/fortran/target-map-mix-imp-exp-common-block-members.f90:  !$omp target map(tofrom: /var_common/)
+./test/offloading/fortran/target-map-mix-imp-exp-common-block-members.f90:  !$omp end target
+./test/offloading/fortran/target-map-mix-imp-exp-common-block-members.f90:  !$omp target map(tofrom: copy)
+./test/offloading/fortran/target-map-mix-imp-exp-common-block-members.f90:  !$omp end target
+./test/offloading/fortran/target-map-nested-dtype-derived-member.f90:!$omp target map(tofrom: top_dtype%k, top_dtype%nested2%array_i2, top_dtype%nested)
+./test/offloading/fortran/target-map-nested-dtype-derived-member.f90:!$omp end target
+./test/offloading/fortran/target-map-nested-alloca-dtype-alloca-array-bounds.f90:!$omp target map(tofrom: one_l%nest%array_k(2:6))
+./test/offloading/fortran/target-map-nested-alloca-dtype-alloca-array-bounds.f90:!$omp end target
+./test/offloading/fortran/basic-target-region-1D-array-section.f90:    !$omp target map(to:read_arr(2:5)) map(from:write_arr(2:5)) map(to:i,j)
+./test/offloading/fortran/basic-target-region-1D-array-section.f90:    !$omp end target
+./test/offloading/fortran/target-map-two-nested-dtype-member-array-map.f90:  !$omp target map(tofrom:scalar_arr1%nested%array_z(3:6), scalar_arr1%nested%array_ix(3:6), scalar_arr2%nested%array_z(3:6), scalar_arr2%nested%array_ix(3:6))
+./test/offloading/fortran/target-map-two-nested-dtype-member-array-map.f90:  !$omp end target
+./test/offloading/fortran/target-custom-mapper.f90:   !$omp declare mapper(my_mapper1 : mytype :: t) map(to: t%data(1 : n))
+./test/offloading/fortran/target-custom-mapper.f90:   !$omp declare mapper(my_mapper2 : mytype2 :: t) map(mapper(my_mapper1): t%my_data)
+./test/offloading/fortran/target-custom-mapper.f90:   !$omp target map(tofrom: sum_device) map(mapper(my_mapper2) : obj)
+./test/offloading/fortran/target-custom-mapper.f90:   !$omp end target
+./test/offloading/fortran/target-map-nested-dtype-3d-alloca-array-with-bounds.f90:!$omp target map(tofrom: one_l%nest%array_k(1:3, 1:3, 2:2)) map(to: inArray(1:3, 1:3, 1:3))
+./test/offloading/fortran/target-map-nested-dtype-3d-alloca-array-with-bounds.f90:!$omp end target
+./test/offloading/fortran/target-map-allocatable-array-section-3d-bounds.f90:!$omp target map(tofrom:inArray(1:3, 1:3, 2:2), outArray(1:3, 1:3, 1:3))
+./test/offloading/fortran/target-map-allocatable-array-section-3d-bounds.f90:!$omp end target
+./test/offloading/fortran/target-map-nested-dtype-alloca-array-and-non-alloca-dtype.f90:    !$omp target map(tofrom: one_l%nest%nest2, one_l%nest%array_k)
+./test/offloading/fortran/target-map-nested-dtype-alloca-array-and-non-alloca-dtype.f90:    !$omp end target
+./test/offloading/fortran/target-map-individual-dtype-member-map.f90:  !$omp target map(from:scalar_struct%rx)
+./test/offloading/fortran/target-map-individual-dtype-member-map.f90:  !$omp end target
+./test/offloading/fortran/target-map-allocatable-dtype.f90:!$omp target map(tofrom: alloca_dtype)
+./test/offloading/fortran/target-map-allocatable-dtype.f90:!$omp end target
+./test/offloading/fortran/target-map-nested-dtype-alloca-array-with-bounds.f90:!$omp target map(tofrom: one_l%nest%array_k(2:6))
+./test/offloading/fortran/target-map-nested-dtype-alloca-array-with-bounds.f90:!$omp end target
+./test/offloading/fortran/implicit-record-field-mapping.f90:  !$omp target map(from: dst_sum)
+./test/offloading/fortran/implicit-record-field-mapping.f90:  !$omp end target
+./test/offloading/fortran/target-has-device-addr3.f90:  !$omp target data map(tofrom: x) use_device_addr(x)
+./test/offloading/fortran/target-has-device-addr3.f90:  !$omp target has_device_addr(x) map(tofrom: y)
+./test/offloading/fortran/target-has-device-addr3.f90:  !$omp end target
+./test/offloading/fortran/target-has-device-addr3.f90:  !$omp end target data
+./test/offloading/fortran/target-map-double-nested-dtype-array-bounds.f90:!$omp target map(tofrom: top_dtype%nested%array_i2(4:8), top_dtype2%nested%array_j2(4:8))
+./test/offloading/fortran/target-map-double-nested-dtype-array-bounds.f90:!$omp end target
+./test/offloading/fortran/target-map-allocatable-map-scopes.f90:  !$omp target map(tofrom: arg_alloc)
+./test/offloading/fortran/target-map-allocatable-map-scopes.f90:  !$omp end target
+./test/offloading/fortran/target-map-allocatable-map-scopes.f90:  !$omp target map(tofrom: local_alloc)
+./test/offloading/fortran/target-map-allocatable-map-scopes.f90:  !$omp end target
+./test/offloading/fortran/target-map-allocatable-map-scopes.f90:  !$omp target map(tofrom: map_ptr)
+./test/offloading/fortran/target-map-allocatable-map-scopes.f90:  !$omp end target
+./test/offloading/fortran/target-map-enter-exit-scalar.f90:    !$omp target enter data map(to: scalar)
+./test/offloading/fortran/target-map-enter-exit-scalar.f90:   !$omp target
+./test/offloading/fortran/target-map-enter-exit-scalar.f90:   !$omp end target
+./test/offloading/fortran/target-map-enter-exit-scalar.f90:  !$omp target exit data map(from: scalar)
+./test/offloading/fortran/target-map-multi-alloca-dtypes-with-multi-mixed-members.f90:!$omp target map(tofrom: top_dtype%nested%nest%i4, top_dtype%nested%array_j2) &
+./test/offloading/fortran/target-map-multi-alloca-dtypes-with-multi-mixed-members.f90:!$omp map(tofrom: top_dtype%nested%nest%k4, top_dtype%array_i, top_dtype%nested%nest2%i3) &
+./test/offloading/fortran/target-map-multi-alloca-dtypes-with-multi-mixed-members.f90:!$omp map(tofrom: top_dtype%nested%i2, top_dtype%nested%nest2%j3, top_dtype%array_j)
+./test/offloading/fortran/target-map-multi-alloca-dtypes-with-multi-mixed-members.f90:!$omp end target
+./test/offloading/fortran/explicit-and-implicit-record-field-mapping.f90:  !$omp target enter data map(alloc:       &
+./test/offloading/fortran/explicit-and-implicit-record-field-mapping.f90:  !$omp  chunk%tiles(2)%field%density0)
+./test/offloading/fortran/explicit-and-implicit-record-field-mapping.f90:  !$omp target
+./test/offloading/fortran/explicit-and-implicit-record-field-mapping.f90:  !$omp end target
+./test/offloading/fortran/explicit-and-implicit-record-field-mapping.f90:  !$omp target exit data map(from:         &
+./test/offloading/fortran/explicit-and-implicit-record-field-mapping.f90:  !$omp  chunk%tiles(2)%field%density0)
+./test/offloading/fortran/dtype-member-map-syntax-2.f90:!$omp target map(tofrom: alloca_dtype%vertexes(N1)%test)
+./test/offloading/fortran/dtype-member-map-syntax-2.f90:!$omp end target
+./test/offloading/fortran/dtype-member-map-syntax-2.f90:!$omp target map(tofrom: alloca_dtype%vertexes(N1)%test, alloca_dtype%vertexes(N2)%test)
+./test/offloading/fortran/dtype-member-map-syntax-2.f90:!$omp end target
+./test/offloading/fortran/dtype-member-map-syntax-2.f90:!$omp target map(tofrom: alloca_dtype%test_tile(N1)%field%vertexx, &
+./test/offloading/fortran/dtype-member-map-syntax-2.f90:!$omp                    alloca_dtype%test_tile(N1)%field%vertexy)
+./test/offloading/fortran/dtype-member-map-syntax-2.f90:!$omp end target
+./test/offloading/fortran/dtype-member-map-syntax-2.f90:!$omp target map(tofrom:  alloca_dtype%test_tile(N1)%field%test, &
+./test/offloading/fortran/dtype-member-map-syntax-2.f90:!$omp                     alloca_dtype%test_tile(N2)%field%test, &
+./test/offloading/fortran/dtype-member-map-syntax-2.f90:!$omp                     alloca_dtype%test_tile(N1)%field%vertexy, &
+./test/offloading/fortran/dtype-member-map-syntax-2.f90:!$omp                     alloca_dtype%test_tile(N2)%field%vertexy)
+./test/offloading/fortran/dtype-member-map-syntax-2.f90:!$omp end target
+./test/offloading/fortran/dtype-member-map-syntax-2.f90:!$omp target map(tofrom:  alloca_dtype%test_tile(N2)%field%vertexy)
+./test/offloading/fortran/dtype-member-map-syntax-2.f90:!$omp end target
+./test/offloading/fortran/dtype-member-map-syntax-2.f90:!$omp target map(tofrom: alloca_dtype%vertexes(N1)%vertexx, &
+./test/offloading/fortran/dtype-member-map-syntax-2.f90:!$omp                    alloca_dtype%vertexes(N1)%vertexy, &
+./test/offloading/fortran/dtype-member-map-syntax-2.f90:!$omp                    alloca_dtype%vertexes(N2)%vertexx, &
+./test/offloading/fortran/dtype-member-map-syntax-2.f90:!$omp                    alloca_dtype%vertexes(N2)%vertexy)
+./test/offloading/fortran/dtype-member-map-syntax-2.f90:!$omp end target
+./test/offloading/fortran/dtype-member-map-syntax-2.f90:!$omp target map(tofrom: alloca_dtype%vertexes(N1)%vertexx, &
+./test/offloading/fortran/dtype-member-map-syntax-2.f90:!$omp                    alloca_dtype%vertexes(N1)%vertexy, &
+./test/offloading/fortran/dtype-member-map-syntax-2.f90:!$omp                    alloca_dtype%vertexes(4)%vertexy, &
+./test/offloading/fortran/dtype-member-map-syntax-2.f90:!$omp                    alloca_dtype%vertexes(4)%vertexx, &
+./test/offloading/fortran/dtype-member-map-syntax-2.f90:!$omp                    alloca_dtype%vertexes(N2)%vertexx, &
+./test/offloading/fortran/dtype-member-map-syntax-2.f90:!$omp                    alloca_dtype%vertexes(N2)%vertexy)
+./test/offloading/fortran/dtype-member-map-syntax-2.f90:!$omp end target
+./test/offloading/fortran/dtype-member-map-syntax-2.f90:!$omp target map(tofrom: alloca_dtype_arr(N2)%array_i)
+./test/offloading/fortran/dtype-member-map-syntax-2.f90:!$omp end target
+./test/offloading/fortran/target_update.f90:!$omp target enter data map(to:x, device_id)
+./test/offloading/fortran/target_update.f90:!$omp target
+./test/offloading/fortran/target_update.f90:!$omp end target
+./test/offloading/fortran/target_update.f90:!$omp target
+./test/offloading/fortran/target_update.f90:!$omp end target
+./test/offloading/fortran/target_update.f90:!$omp target update from(x, device_id)
+./test/offloading/fortran/target-map-declare-target-link-common-block.f90:    !$omp declare target link(/var_common/)
+./test/offloading/fortran/target-map-declare-target-link-common-block.f90:  !$omp target map(tofrom: var2)
+./test/offloading/fortran/target-map-declare-target-link-common-block.f90:  !$omp end target
+./test/offloading/fortran/target-map-declare-target-link-common-block.f90:!$omp target map(tofrom: /var_common/)
+./test/offloading/fortran/target-map-declare-target-link-common-block.f90:!$omp end target
+./test/offloading/fortran/target-map-declare-target-link-common-block.f90:!$omp target map(tofrom: copy)
+./test/offloading/fortran/target-map-declare-target-link-common-block.f90:!$omp end target
+./test/offloading/fortran/target-map-declare-target-link-common-block.f90:!$omp target map(tofrom: /var_common/)
+./test/offloading/fortran/target-map-declare-target-link-common-block.f90:!$omp end target
+./test/offloading/fortran/target-map-alloca-dtype-alloca-array.f90:!$omp target map(tofrom: alloca_dtype%array_j)
+./test/offloading/fortran/target-map-alloca-dtype-alloca-array.f90:!$omp end target
+./test/offloading/fortran/target-map-alloca-dtype-array-of-dtype.f90:!$omp target map(tofrom: dtyped%array_dtype)
+./test/offloading/fortran/target-map-alloca-dtype-array-of-dtype.f90:!$omp end target
+./test/Inputs/basic_array.f90:    !$omp declare target

>From 11afd76ba6c1c35008dc74d3d2df82504ffde088 Mon Sep 17 00:00:00 2001
From: Anchu Rajendran <asudhaku at amd.com>
Date: Fri, 11 Apr 2025 13:03:53 -0500
Subject: [PATCH 4/4] some more tests

---
 .../Frontend/OpenMPIRBuilderTest.cpp          |   2 +-
 .../OpenMP/OpenMPToLLVMIRTranslation.cpp      |   6 +-
 offload/out                                   | 424 ------------------
 3 files changed, 5 insertions(+), 427 deletions(-)
 delete mode 100644 offload/out

diff --git a/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp b/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp
index bb6c2ec5ede77..4fb3936df653a 100644
--- a/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp
+++ b/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp
@@ -5377,7 +5377,7 @@ TEST_F(OpenMPIRBuilderTest, ScanReduction) {
   Loops = loopsVec;
   EXPECT_EQ(Loops.size(), 2U);
   CanonicalLoopInfo *InputLoop = Loops.front();
-  CanonicalLoopInfo  *ScanLoop = Loops.back();
+  CanonicalLoopInfo *ScanLoop = Loops.back();
   Builder.restoreIP(ScanLoop->getAfterIP());
   InputLoop->assertOK();
   ScanLoop->assertOK();
diff --git a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
index c68cae77b2f3d..03636f86215d7 100644
--- a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
+++ b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
@@ -47,8 +47,10 @@
 
 using namespace mlir;
 
-llvm::SmallDenseMap<llvm::Value *, llvm::Type *> ReductionVarToType; 
-llvm::OpenMPIRBuilder::InsertPointTy parallelAllocaIP;// TODO: change this alloca IP to point to originalvar allocaIP. ReductionDecl need to be linked to scan var.
+llvm::SmallDenseMap<llvm::Value *, llvm::Type *> ReductionVarToType;
+llvm::OpenMPIRBuilder::InsertPointTy
+    parallelAllocaIP; // TODO: change this alloca IP to point to originalvar
+                      // allocaIP. ReductionDecl need to be linked to scan var.
 namespace {
 static llvm::omp::ScheduleKind
 convertToScheduleKind(std::optional<omp::ClauseScheduleKind> schedKind) {
diff --git a/offload/out b/offload/out
deleted file mode 100644
index 5df1a3ad98775..0000000000000
--- a/offload/out
+++ /dev/null
@@ -1,424 +0,0 @@
-grep: ./out: input file is also the output
-./test/offloading/fortran/target-map-nested-dtype-alloca-array.f90:!$omp target map(tofrom: one_l%nest%array_k)
-./test/offloading/fortran/target-map-nested-dtype-alloca-array.f90:!$omp end target
-./test/offloading/fortran/target-map-derived-type-full-1.f90:  !$omp target map(from:out) map(to:in)
-./test/offloading/fortran/target-map-derived-type-full-1.f90:  !$omp end target
-./test/offloading/fortran/target-map-pointer-scopes-enter-exit.f90:  !$omp target enter data map(alloc: arg_alloc)
-./test/offloading/fortran/target-map-pointer-scopes-enter-exit.f90:  !$omp target
-./test/offloading/fortran/target-map-pointer-scopes-enter-exit.f90:  !$omp end target
-./test/offloading/fortran/target-map-pointer-scopes-enter-exit.f90:  !$omp target exit data map(from: arg_alloc)
-./test/offloading/fortran/target-map-pointer-scopes-enter-exit.f90:  !$omp target exit data map(delete: arg_alloc)
-./test/offloading/fortran/target-map-pointer-scopes-enter-exit.f90:  !$omp target enter data map(alloc: local_alloc)
-./test/offloading/fortran/target-map-pointer-scopes-enter-exit.f90:  !$omp target
-./test/offloading/fortran/target-map-pointer-scopes-enter-exit.f90:  !$omp end target
-./test/offloading/fortran/target-map-pointer-scopes-enter-exit.f90:  !$omp target exit data map(from: local_alloc)
-./test/offloading/fortran/target-map-pointer-scopes-enter-exit.f90:  !$omp target exit data map(delete: local_alloc)
-./test/offloading/fortran/target-map-pointer-scopes-enter-exit.f90:  !$omp target enter data map(alloc: map_ptr)
-./test/offloading/fortran/target-map-pointer-scopes-enter-exit.f90:  !$omp target
-./test/offloading/fortran/target-map-pointer-scopes-enter-exit.f90:  !$omp end target
-./test/offloading/fortran/target-map-pointer-scopes-enter-exit.f90:  !$omp target exit data map(from: map_ptr)
-./test/offloading/fortran/target-map-pointer-scopes-enter-exit.f90:  !$omp target exit data map(delete: map_ptr)
-./test/offloading/fortran/target_map_ompx_hold.f90:!$omp target data map(ompx_hold, tofrom: presence_check)
-./test/offloading/fortran/target_map_ompx_hold.f90:!$omp target exit data map(delete: presence_check)
-./test/offloading/fortran/target_map_ompx_hold.f90:!$omp target map(present, tofrom: presence_check)
-./test/offloading/fortran/target_map_ompx_hold.f90:!$omp end target
-./test/offloading/fortran/target_map_ompx_hold.f90:!$omp end target data
-./test/offloading/fortran/target_map_ompx_hold.f90:!$omp target data map(tofrom: presence_check)
-./test/offloading/fortran/target_map_ompx_hold.f90:!$omp target exit data map(delete: presence_check)
-./test/offloading/fortran/target_map_ompx_hold.f90:!$omp target map(present, tofrom: presence_check)
-./test/offloading/fortran/target_map_ompx_hold.f90:!$omp end target
-./test/offloading/fortran/target_map_ompx_hold.f90:!$omp end target data
-./test/offloading/fortran/target-map-nullary-pointer.f90:!$omp target map(Set)
-./test/offloading/fortran/target-map-nullary-pointer.f90:!$omp end target
-./test/offloading/fortran/dtype-member-map-syntax-1.f90:  !$omp target map(tofrom: array_dtype(5))
-./test/offloading/fortran/dtype-member-map-syntax-1.f90:  !$omp end target
-./test/offloading/fortran/dtype-member-map-syntax-1.f90:  !$omp target map(tofrom: array_dtype(4)%elements(3))
-./test/offloading/fortran/dtype-member-map-syntax-1.f90:  !$omp end target
-./test/offloading/fortran/dtype-member-map-syntax-1.f90:  !$omp target map(tofrom: array_dtype(5)%elements(3:5))
-./test/offloading/fortran/dtype-member-map-syntax-1.f90:  !$omp end target
-./test/offloading/fortran/dtype-member-map-syntax-1.f90:  !$omp target map(tofrom: array_dtype(3:5))
-./test/offloading/fortran/dtype-member-map-syntax-1.f90:  !$omp end target
-./test/offloading/fortran/dtype-member-map-syntax-1.f90:  !$omp target map(tofrom: array_dtype(4)%num_chars%number%float_elements(8))
-./test/offloading/fortran/dtype-member-map-syntax-1.f90:  !$omp end target
-./test/offloading/fortran/dtype-member-map-syntax-1.f90:  !$omp target map(tofrom: array_dtype(4)%num_chars%number%float_elements(5:10))
-./test/offloading/fortran/dtype-member-map-syntax-1.f90:  !$omp end target
-./test/offloading/fortran/dtype-member-map-syntax-1.f90:  !$omp target map(tofrom: array_dtype(4)%internal_dtypes(3)%float_elements(4))
-./test/offloading/fortran/dtype-member-map-syntax-1.f90:  !$omp end target
-./test/offloading/fortran/target-map-two-dtype-mixed-implicit-explicit-capture-1.f90:  !$omp target map(tofrom:scalar_arr1%break_1)
-./test/offloading/fortran/target-map-two-dtype-mixed-implicit-explicit-capture-1.f90:  !$omp end target
-./test/offloading/fortran/target-map-nested-alloca-dtype-3d-alloca-array-bounds.f90:!$omp target map(tofrom: one_l%nest%array_k(1:3, 1:3, 2:2)) map(to: inArray(1:3, 1:3, 1:3))
-./test/offloading/fortran/target-map-nested-alloca-dtype-3d-alloca-array-bounds.f90:!$omp end target
-./test/offloading/fortran/target-map-dtype-multi-explicit-array-member.f90:  !$omp target map(tofrom:scalar_arr%array_x, scalar_arr%array_y)
-./test/offloading/fortran/target-map-dtype-multi-explicit-array-member.f90:  !$omp end target
-./test/offloading/fortran/target-map-derived-type-full-2.f90:  !$omp target map(from:out) map(to:in)
-./test/offloading/fortran/target-map-derived-type-full-2.f90:  !$omp end target
-./test/offloading/fortran/target-map-double-nested-dtype-single-member.f90:!$omp target map(tofrom: top_dtype%nested%array_i2, top_dtype2%nested%array_j2)
-./test/offloading/fortran/target-map-double-nested-dtype-single-member.f90:!$omp end target
-./test/offloading/fortran/target-map-pointer-target-scopes.f90:  !$omp target map(tofrom: arg_alloc)
-./test/offloading/fortran/target-map-pointer-target-scopes.f90:  !$omp end target
-./test/offloading/fortran/target-map-pointer-target-scopes.f90:  !$omp target map(tofrom: local_alloc)
-./test/offloading/fortran/target-map-pointer-target-scopes.f90:  !$omp end target
-./test/offloading/fortran/target-map-pointer-target-scopes.f90:  !$omp target map(tofrom: map_ptr)
-./test/offloading/fortran/target-map-pointer-target-scopes.f90:  !$omp end target
-./test/offloading/fortran/basic-target-parallel-region.f90:   !$omp target parallel map(from: x)
-./test/offloading/fortran/basic-target-parallel-region.f90:   !$omp end target parallel
-./test/offloading/fortran/target-map-enter-exit-allocatables.f90:   !$omp target enter data map(alloc: A)
-./test/offloading/fortran/target-map-enter-exit-allocatables.f90:    !$omp target
-./test/offloading/fortran/target-map-enter-exit-allocatables.f90:    !$omp end target
-./test/offloading/fortran/target-map-enter-exit-allocatables.f90:    !$omp target exit data map(from: A)
-./test/offloading/fortran/target-map-enter-exit-allocatables.f90:    !$omp target exit data map(delete: A)
-./test/offloading/fortran/target-use-dev-ptr.f90:   !$omp target data map(tofrom: x) use_device_ptr(x)
-./test/offloading/fortran/target-use-dev-ptr.f90:   !$omp end target data
-./test/offloading/fortran/target-map-pointer-to-dtype-allocatable-member.f90:    !$omp target enter data map(to:my_instance, my_instance%values)
-./test/offloading/fortran/target-map-pointer-to-dtype-allocatable-member.f90:    !$omp target
-./test/offloading/fortran/target-map-pointer-to-dtype-allocatable-member.f90:    !$omp end target
-./test/offloading/fortran/target-map-pointer-to-dtype-allocatable-member.f90:    !$omp target exit data map(from:my_instance%values)
-./test/offloading/fortran/target-map-pointer-to-dtype-allocatable-member.f90:    !$omp target exit data map(release:my_instance)
-./test/offloading/fortran/target_map_present_fail.f90:!$omp target data map(present,alloc:arr)
-./test/offloading/fortran/target_map_present_fail.f90:!$omp target
-./test/offloading/fortran/target_map_present_fail.f90:!$omp end target
-./test/offloading/fortran/target_map_present_fail.f90:!$omp end target data
-./test/offloading/fortran/declare-target-vars-in-target-region.f90:  !$omp declare target link(arr1) enter(arr2)
-./test/offloading/fortran/declare-target-vars-in-target-region.f90:  !$omp declare target link(scalar)
-./test/offloading/fortran/declare-target-vars-in-target-region.f90:  !$omp target map(tofrom:arr1, i, j)
-./test/offloading/fortran/declare-target-vars-in-target-region.f90:  !$omp end target
-./test/offloading/fortran/declare-target-vars-in-target-region.f90:  !$omp target map(i, j)
-./test/offloading/fortran/declare-target-vars-in-target-region.f90:  !$omp end target
-./test/offloading/fortran/declare-target-vars-in-target-region.f90:  !$omp target map(i, j)
-./test/offloading/fortran/declare-target-vars-in-target-region.f90:  !$omp end target
-./test/offloading/fortran/declare-target-vars-in-target-region.f90:  !$omp target
-./test/offloading/fortran/declare-target-vars-in-target-region.f90:  !$omp end target
-./test/offloading/fortran/dtype-array-constant-index-map.f90:!$omp target map(tofrom: array_dtype(5))
-./test/offloading/fortran/dtype-array-constant-index-map.f90:!$omp end target
-./test/offloading/fortran/dtype-array-constant-index-map.f90:!$omp target map(tofrom: array_dtype(5))
-./test/offloading/fortran/dtype-array-constant-index-map.f90:!$omp end target
-./test/offloading/fortran/target-map-dtype-alloca-array-of-dtype.f90:!$omp target map(tofrom: dtyped%array_dtype)
-./test/offloading/fortran/target-map-dtype-alloca-array-of-dtype.f90:!$omp end target
-./test/offloading/fortran/target-map-alloca-dtype-alloca-array-of-dtype.f90:!$omp target map(tofrom: dtyped%array_dtype)
-./test/offloading/fortran/target-map-alloca-dtype-alloca-array-of-dtype.f90:!$omp end target
-./test/offloading/fortran/target-map-nested-dtype-alloca-and-non-alloca-array.f90:    !$omp target map(tofrom: one_l%nest%array_i, one_l%nest%array_k)
-./test/offloading/fortran/target-map-nested-dtype-alloca-and-non-alloca-array.f90:    !$omp end target
-./test/offloading/fortran/target-map-first-common-block-member.f90:  !$omp target map(tofrom: var1)
-./test/offloading/fortran/target-map-first-common-block-member.f90:  !$omp end target
-./test/offloading/fortran/target-map-first-common-block-member.f90:!$omp target map(tofrom: var1)
-./test/offloading/fortran/target-map-first-common-block-member.f90:!$omp end target
-./test/offloading/fortran/target-map-first-common-block-member.f90:!$omp target map(tofrom: var1)
-./test/offloading/fortran/target-map-first-common-block-member.f90:!$omp end target
-./test/offloading/fortran/target-map-two-dtype-mixed-implicit-explicit-capture-2.f90:  !$omp target map(tofrom:scalar_arr2%array_x(3:6))
-./test/offloading/fortran/target-map-two-dtype-mixed-implicit-explicit-capture-2.f90:  !$omp end target
-./test/offloading/fortran/local-descriptor-map-regress.f90:!$omp target enter data map(alloc:sbuf31)
-./test/offloading/fortran/local-descriptor-map-regress.f90:!$omp target
-./test/offloading/fortran/local-descriptor-map-regress.f90:!$omp end target
-./test/offloading/fortran/local-descriptor-map-regress.f90:!$omp target update from(sbuf31)
-./test/offloading/fortran/local-descriptor-map-regress.f90:!$omp target exit data map(delete:sbuf31)
-./test/offloading/fortran/local-descriptor-map-regress.f90:!$omp target enter data map(to:p)
-./test/offloading/fortran/local-descriptor-map-regress.f90:!$omp target
-./test/offloading/fortran/local-descriptor-map-regress.f90:!$omp end target
-./test/offloading/fortran/local-descriptor-map-regress.f90:!$omp target update from(p)
-./test/offloading/fortran/local-descriptor-map-regress.f90:!!$omp target exit data map(delete:p)
-./test/offloading/fortran/target-has-device-addr1.f90:    !$omp target enter data map(to: x)
-./test/offloading/fortran/target-has-device-addr1.f90:    !$omp target data use_device_addr(x)
-./test/offloading/fortran/target-has-device-addr1.f90:    !$omp end target data
-./test/offloading/fortran/target-has-device-addr1.f90:    !$omp target map(to: x) map(from: res1, res2) &
-./test/offloading/fortran/target-has-device-addr1.f90:    !$omp & has_device_addr(first_scalar_device_addr)
-./test/offloading/fortran/target-has-device-addr1.f90:    !$omp end target
-./test/offloading/fortran/target-region-implicit-array.f90:  !$omp target
-./test/offloading/fortran/target-region-implicit-array.f90:  !$omp end target
-./test/offloading/fortran/target-map-local-intrinisc-sized-param.f90:!$omp target map(tofrom: b)
-./test/offloading/fortran/target-map-local-intrinisc-sized-param.f90:!$omp end target
-./test/offloading/fortran/target-map-alloca-dtype-and-alloca-array-v2.f90:!$omp target map(tofrom: alloca_dtype%array_j, alloca_dtype)
-./test/offloading/fortran/target-map-alloca-dtype-and-alloca-array-v2.f90:!$omp end target
-./test/offloading/fortran/target-map-dtype-3d-alloca-array-with-bounds.f90:!$omp target map(tofrom: one_l%array_j(1:3, 1:3, 2:2)) map(to: inArray(1:3, 1:3, 1:3))
-./test/offloading/fortran/target-map-dtype-3d-alloca-array-with-bounds.f90:!$omp end target
-./test/offloading/fortran/target-map-dtype-explicit-individual-array-member.f90:  !$omp target map(tofrom:scalar_arr%array_y)
-./test/offloading/fortran/target-map-dtype-explicit-individual-array-member.f90:  !$omp end target
-./test/offloading/fortran/target-map-pointer-target-array-section-3d-bounds.f90:!$omp target map(tofrom:inArray(1:3, 1:3, 2:2), outArray(1:3, 1:3, 1:3))
-./test/offloading/fortran/target-map-pointer-target-array-section-3d-bounds.f90:!$omp end target
-./test/offloading/fortran/dump_map_tables.f90:!$omp target enter data map(to:A(:N))
-./test/offloading/fortran/dump_map_tables.f90:!$omp target parallel do
-./test/offloading/fortran/dump_map_tables.f90:!$omp target exit data map(from:A)
-./test/offloading/fortran/basic-target-region-3D-array-section.f90:!$omp target map(tofrom:inArray(1:3, 1:3, 2:2), outArray(1:3, 1:3, 1:3), j, k, j2, k2)
-./test/offloading/fortran/basic-target-region-3D-array-section.f90:!$omp end target
-./test/offloading/fortran/target-map-enter-exit-array.f90:   !$omp target enter data map(alloc: A)
-./test/offloading/fortran/target-map-enter-exit-array.f90:    !$omp target
-./test/offloading/fortran/target-map-enter-exit-array.f90:    !$omp end target
-./test/offloading/fortran/target-map-enter-exit-array.f90:    !$omp target exit data map(from: A)
-./test/offloading/fortran/target-map-enter-exit-array.f90:    !$omp target exit data map(delete: A)
-./test/offloading/fortran/target-map-dtype-alloca-array-and-non-alloca-dtype.f90:!$omp target map(tofrom: one_l%nest, one_l%array_j)
-./test/offloading/fortran/target-map-dtype-alloca-array-and-non-alloca-dtype.f90:!$omp end target
-./test/offloading/fortran/target-map-two-dtype-explicit-member.f90:  !$omp target map(tofrom:scalar_arr1%break_1, scalar_arr2%break_3)
-./test/offloading/fortran/target-map-two-dtype-explicit-member.f90:  !$omp end target
-./test/offloading/fortran/target-map-two-dtype-multi-member-array-1D-bounds.f90:  !$omp target map(tofrom:scalar_arr1%array_x(3:6), scalar_arr1%array_y(3:6), scalar_arr2%array_x(3:6), scalar_arr2%array_y(3:6))
-./test/offloading/fortran/target-map-two-dtype-multi-member-array-1D-bounds.f90:  !$omp end target
-./test/offloading/fortran/target-map-all-common-block-members.f90:  !$omp target map(tofrom: var1, var2, var3)
-./test/offloading/fortran/target-map-all-common-block-members.f90:  !$omp end target
-./test/offloading/fortran/target-map-all-common-block-members.f90:!$omp target map(tofrom: var2, var1, var3)
-./test/offloading/fortran/target-map-all-common-block-members.f90:!$omp end target
-./test/offloading/fortran/target-map-all-common-block-members.f90:!$omp target map(tofrom: var2, var3, var1)
-./test/offloading/fortran/target-map-all-common-block-members.f90:!$omp end target
-./test/offloading/fortran/target-map-alloca-dtype-array-and-scalar.f90:!$omp target map(tofrom: alloca_dtype%nested_dtype%array_i, alloca_dtype%k)
-./test/offloading/fortran/target-map-alloca-dtype-array-and-scalar.f90:!$omp end target
-./test/offloading/fortran/target-map-second-common-block-member.f90:  !$omp target map(tofrom: var2)
-./test/offloading/fortran/target-map-second-common-block-member.f90:  !$omp end target
-./test/offloading/fortran/target-map-second-common-block-member.f90:!$omp target map(tofrom: var2)
-./test/offloading/fortran/target-map-second-common-block-member.f90:!$omp end target
-./test/offloading/fortran/target-map-second-common-block-member.f90:!$omp target map(tofrom: var2)
-./test/offloading/fortran/target-map-second-common-block-member.f90:!$omp end target
-./test/offloading/fortran/double-target-call-with-declare-target.f90:    !$omp declare target link(sp)
-./test/offloading/fortran/double-target-call-with-declare-target.f90:!$omp target map(tofrom:sp) map(to: i, j)
-./test/offloading/fortran/double-target-call-with-declare-target.f90:!$omp end target
-./test/offloading/fortran/double-target-call-with-declare-target.f90:!$omp target map(tofrom:sp) map(to: i, j)
-./test/offloading/fortran/double-target-call-with-declare-target.f90:!$omp end target
-./test/offloading/fortran/target-map-dtype-multi-explicit-array-3D-member-bounds.f90:  !$omp target map(tofrom:scalar_arr%array_x(1:3, 1:3, 2:2), scalar_arr%array_y(1:3, 1:3, 1:3))
-./test/offloading/fortran/target-map-dtype-multi-explicit-array-3D-member-bounds.f90:  !$omp end target
-./test/offloading/fortran/target-map-dtype-allocatable-scalar-and-array.f90:    !$omp target map(tofrom: one_l%array_j, one_l%j)
-./test/offloading/fortran/target-map-dtype-allocatable-scalar-and-array.f90:    !$omp end target
-./test/offloading/fortran/target-map-dtype-arr-bounds-member-enter-exit.f90:    !$omp target enter data map(to: scalar_arr%array(3:6))
-./test/offloading/fortran/target-map-dtype-arr-bounds-member-enter-exit.f90:   !$omp target
-./test/offloading/fortran/target-map-dtype-arr-bounds-member-enter-exit.f90:  !$omp end target
-./test/offloading/fortran/target-map-dtype-arr-bounds-member-enter-exit.f90:  !$omp target exit data map(from: scalar_arr%array(3:6))
-./test/offloading/fortran/basic-target-region-3D-array.f90:!$omp target map(tofrom:x, counter) map(to: i, j, k, i2, j2, k2)
-./test/offloading/fortran/basic-target-region-3D-array.f90:!$omp end target
-./test/offloading/fortran/basic_target_region.f90:!$omp target map(from:x)
-./test/offloading/fortran/basic_target_region.f90:!$omp end target
-./test/offloading/fortran/target-map-nested-dtype-multi-member.f90:!$omp target map(tofrom: top_dtype%nested%i2, top_dtype%k, top_dtype%nested%array_i2)
-./test/offloading/fortran/target-map-nested-dtype-multi-member.f90:!$omp end target
-./test/offloading/fortran/constant-arr-index.f90:  !$omp target map(tofrom:sp)
-./test/offloading/fortran/constant-arr-index.f90:  !$omp end target
-./test/offloading/fortran/usm_map_close.f90:  !$omp requires unified_shared_memory
-./test/offloading/fortran/usm_map_close.f90:!$omp target data map(tofrom: a, device_alloc)
-./test/offloading/fortran/usm_map_close.f90:!$omp target map(tofrom: device_alloc)
-./test/offloading/fortran/usm_map_close.f90:!$omp end target
-./test/offloading/fortran/usm_map_close.f90:!$omp end target data
-./test/offloading/fortran/usm_map_close.f90:!$omp target data map(close, tofrom: a) map(tofrom: device_alloc)
-./test/offloading/fortran/usm_map_close.f90:!$omp target map(tofrom: device_alloc)
-./test/offloading/fortran/usm_map_close.f90:!$omp end target
-./test/offloading/fortran/usm_map_close.f90:!$omp end target data
-./test/offloading/fortran/usm_map_close.f90:!$omp target data map(tofrom: a) use_device_ptr(a)
-./test/offloading/fortran/usm_map_close.f90:!$omp end target data
-./test/offloading/fortran/usm_map_close.f90:!$omp target enter data map(close, to: a)
-./test/offloading/fortran/usm_map_close.f90:!$omp target map(from: device_alloc)
-./test/offloading/fortran/usm_map_close.f90:!$omp end target
-./test/offloading/fortran/usm_map_close.f90:!$omp target exit data map(from: a)
-./test/offloading/fortran/target-map-two-dtype-individual-member-array-1D-bounds.f90:  !$omp target map(tofrom:scalar_arr1%array_x(3:6), scalar_arr2%array_x(3:6))
-./test/offloading/fortran/target-map-two-dtype-individual-member-array-1D-bounds.f90:  !$omp end target
-./test/offloading/fortran/target_map_present_success.f90:!$omp target data map(tofrom:arr)
-./test/offloading/fortran/target_map_present_success.f90:!$omp target data map(present,alloc:arr)
-./test/offloading/fortran/target_map_present_success.f90:!$omp target
-./test/offloading/fortran/target_map_present_success.f90:!$omp end target
-./test/offloading/fortran/target_map_present_success.f90:!$omp end target data
-./test/offloading/fortran/target_map_present_success.f90:!$omp end target data
-./test/offloading/fortran/target-map-literal-write.f90:!$omp target
-./test/offloading/fortran/target-map-literal-write.f90:!$omp end target
-./test/offloading/fortran/target-map-multi-alloca-dtypes-with-multi-alloca-members.f90:!$omp target map(tofrom: top_dtype%array_i, top_dtype%nested%nest2%array_j, top_dtype%nested%nest%array_ptr) &
-./test/offloading/fortran/target-map-multi-alloca-dtypes-with-multi-alloca-members.f90:!$omp map(tofrom: top_dtype2%array_i, top_dtype2%nested%nest2%array_j, top_dtype2%nested%nest%array_ptr)
-./test/offloading/fortran/target-map-multi-alloca-dtypes-with-multi-alloca-members.f90:!$omp end target
-./test/offloading/fortran/target-map-double-nested-dtype-double-array-bounds.f90:!$omp target map(tofrom: top_dtype%nested%array_i2(4:8), top_dtype2%nested%array_j2(4:8))
-./test/offloading/fortran/target-map-double-nested-dtype-double-array-bounds.f90:!$omp end target
-./test/offloading/fortran/target-map-large-nested-dtype-multi-member.f90:!$omp target map(tofrom: top_dtype%nested%nest%j4, top_dtype%nested%nest%i4, top_dtype%nested%nest%k4) &
-./test/offloading/fortran/target-map-large-nested-dtype-multi-member.f90:!$omp map(tofrom: top_dtype%array_i, top_dtype%nested%nest2%i3, top_dtype%nested%i2, top_dtype%nested%nest2%k3, top_dtype%nested%nest2%j3)
-./test/offloading/fortran/target-map-large-nested-dtype-multi-member.f90:!$omp end target
-./test/offloading/fortran/target_map_common_block2.f90:  !$omp target map(tofrom:var4)
-./test/offloading/fortran/target_map_common_block2.f90:  !$omp end target
-./test/offloading/fortran/target-nested-target-data.f90:   !$omp target data map(to: A, B) map(alloc: C)
-./test/offloading/fortran/target-nested-target-data.f90:   !$omp target map(from: C)
-./test/offloading/fortran/target-nested-target-data.f90:   !$omp end target
-./test/offloading/fortran/target-nested-target-data.f90:   !$omp target update from(C) ! updates C device -> host
-./test/offloading/fortran/target-nested-target-data.f90:   !$omp end target data
-./test/offloading/fortran/target-map-dtype-multi-explicit-array-member-bounds.f90:  !$omp target map(tofrom:scalar_arr%array_x(3:6), scalar_arr%array_y(3:6))
-./test/offloading/fortran/target-map-dtype-multi-explicit-array-member-bounds.f90:  !$omp end target
-./test/offloading/fortran/target-map-allocatable-array-section-1d-bounds.f90:    !$omp target map(tofrom:sp_read(2:6)) map(tofrom:sp_write(2:6))
-./test/offloading/fortran/target-map-allocatable-array-section-1d-bounds.f90:    !$omp end target
-./test/offloading/fortran/target-map-dtype-allocatable-array.f90:    !$omp target map(tofrom: one_l%array_j)
-./test/offloading/fortran/target-map-dtype-allocatable-array.f90:    !$omp end target
-./test/offloading/fortran/target-map-enter-exit-array-bounds.f90:    !$omp target enter data map(to: array(3:6))
-./test/offloading/fortran/target-map-enter-exit-array-bounds.f90:   !$omp target
-./test/offloading/fortran/target-map-enter-exit-array-bounds.f90:  !$omp end target
-./test/offloading/fortran/target-map-enter-exit-array-bounds.f90:  !$omp target exit data map(from: array(3:6))
-./test/offloading/fortran/target-has-device-addr2.f90:  !$omp target data map(tofrom: b(1:3)) use_device_addr(b)
-./test/offloading/fortran/target-has-device-addr2.f90:  !$omp target has_device_addr(b(2)%x)
-./test/offloading/fortran/target-has-device-addr2.f90:  !$omp end target
-./test/offloading/fortran/target-has-device-addr2.f90:  !$omp end target data
-./test/offloading/fortran/target-has-device-addr2.f90:  !$omp target data map(tofrom: b(1:3)) use_device_addr(b)
-./test/offloading/fortran/target-has-device-addr2.f90:  !$omp target has_device_addr(b(2)%x)
-./test/offloading/fortran/target-has-device-addr2.f90:  !$omp end target
-./test/offloading/fortran/target-has-device-addr2.f90:  !$omp end target data
-./test/offloading/fortran/target-depend.f90:      !$omp parallel num_threads(3)
-./test/offloading/fortran/target-depend.f90:      !$omp single
-./test/offloading/fortran/target-depend.f90:      !$omp task depend(out: z) shared(z)
-./test/offloading/fortran/target-depend.f90:      !$omp end task
-./test/offloading/fortran/target-depend.f90:      !$omp target map(tofrom: z) depend(in:z)
-./test/offloading/fortran/target-depend.f90:      !$omp end target
-./test/offloading/fortran/target-depend.f90:      !$omp end single
-./test/offloading/fortran/target-depend.f90:      !$omp end parallel
-./test/offloading/fortran/target-map-dtype-arr-bounds-member-enter-exit-update.f90:  !$omp target enter data map(to: scalar_arr%array(3:6))
-./test/offloading/fortran/target-map-dtype-arr-bounds-member-enter-exit-update.f90:  !$omp target update to(scalar_arr%array(3:6))
-./test/offloading/fortran/target-map-dtype-arr-bounds-member-enter-exit-update.f90:   !$omp target
-./test/offloading/fortran/target-map-dtype-arr-bounds-member-enter-exit-update.f90:  !$omp end target
-./test/offloading/fortran/target-map-dtype-arr-bounds-member-enter-exit-update.f90:  !$omp target exit data map(from: scalar_arr%array(3:6))
-./test/offloading/fortran/target-map-common-block.f90:  !$omp target map(tofrom: /var_common/)
-./test/offloading/fortran/target-map-common-block.f90:  !$omp end target
-./test/offloading/fortran/target-map-common-block.f90:!$omp target map(tofrom: /var_common/)
-./test/offloading/fortran/target-map-common-block.f90:!$omp end target
-./test/offloading/fortran/target-map-common-block.f90:!$omp target map(tofrom: /var_common/)
-./test/offloading/fortran/target-map-common-block.f90:!$omp end target
-./test/offloading/fortran/target-map-dtype-alloca-and-non-alloca-array.f90:!$omp target map(tofrom: one_l%array_i, one_l%array_j)
-./test/offloading/fortran/target-map-dtype-alloca-and-non-alloca-array.f90:!$omp end target
-./test/offloading/fortran/target-data-map-if-present.f90:       !$omp target data if(present(a)) map(alloc:a)
-./test/offloading/fortran/target-data-map-if-present.f90:       !$omp end target data
-./test/offloading/fortran/target-parallel-do-collapse.f90:   !$omp target parallel do map(from:array) collapse(2)
-./test/offloading/fortran/target-parallel-do-collapse.f90:    !$omp end target parallel do
-./test/offloading/fortran/target-map-nested-dtype-complex-member.f90:!$omp target map(tofrom: top_dtype%nested%i2, top_dtype%k, top_dtype%nested%j2, top_dtype%nested%array_i2, top_dtype%l)
-./test/offloading/fortran/target-map-nested-dtype-complex-member.f90:!$omp end target
-./test/offloading/fortran/target-map-derived-type-full-implicit-2.f90:  !$omp target
-./test/offloading/fortran/target-map-derived-type-full-implicit-2.f90:  !$omp end target
-./test/offloading/fortran/target-map-enter-exit-array-2.f90:    !$omp target enter data map(to: array)
-./test/offloading/fortran/target-map-enter-exit-array-2.f90:   !$omp target
-./test/offloading/fortran/target-map-enter-exit-array-2.f90:  !$omp end target
-./test/offloading/fortran/target-map-enter-exit-array-2.f90:  !$omp target exit data map(from: array)
-./test/offloading/fortran/basic-target-parallel-do.f90:   !$omp target parallel do map(from: x)
-./test/offloading/fortran/basic-target-parallel-do.f90:   !$omp end target parallel do
-./test/offloading/fortran/target-map-dtype-multi-explicit-member.f90:    !$omp target map(from:scalar_struct%rx, scalar_struct%ry)
-./test/offloading/fortran/target-map-dtype-multi-explicit-member.f90:    !$omp end target
-./test/offloading/fortran/target-map-dynamic.f90:    !$omp target enter data map(to: A)
-./test/offloading/fortran/target-map-dynamic.f90:    !$omp target
-./test/offloading/fortran/target-map-dynamic.f90:    !$omp end target
-./test/offloading/fortran/target-map-dynamic.f90:    !$omp target exit data map(from: A)
-./test/offloading/fortran/target_map_common_block1.f90:  !$omp target map(tofrom:devices) map(tofrom:var1)
-./test/offloading/fortran/target_map_common_block1.f90:  !$omp end target
-./test/offloading/fortran/target-with-threadprivate.f90:!$omp threadprivate(pointer2)
-./test/offloading/fortran/target-with-threadprivate.f90:!$omp target
-./test/offloading/fortran/target-with-threadprivate.f90:!$omp end target
-./test/offloading/fortran/target_map_common_block.f90:  !$omp target map(tofrom:devices)
-./test/offloading/fortran/target_map_common_block.f90:  !$omp end target
-./test/offloading/fortran/target_map_common_block.f90:  !$omp target map(tofrom:var1)
-./test/offloading/fortran/target_map_common_block.f90:  !$omp end target
-./test/offloading/fortran/target_map_common_block.f90:  !$omp target map(tofrom:var2)
-./test/offloading/fortran/target_map_common_block.f90:  !$omp end target
-./test/offloading/fortran/target_map_common_block.f90:  !$omp target map(tofrom:var3)
-./test/offloading/fortran/target_map_common_block.f90:  !$omp end target
-./test/offloading/fortran/target_map_common_block.f90:  !$omp target map(to:var4) map(from:tmp)
-./test/offloading/fortran/target_map_common_block.f90:  !$omp end target
-./test/offloading/fortran/target_map_common_block.f90:  !$omp target map(tofrom: var6)
-./test/offloading/fortran/target_map_common_block.f90:  !$omp end target
-./test/offloading/fortran/target-map-alloca-dtype-and-alloca-array.f90:!$omp target map(tofrom: alloca_dtype, alloca_dtype%array_j)
-./test/offloading/fortran/target-map-alloca-dtype-and-alloca-array.f90:!$omp end target
-./test/offloading/fortran/target-map-double-large-nested-dtype-multi-member.f90:!$omp target map(from: top_dtype%nested%nest%j4, top_dtype%nested%nest%i4, top_dtype%nested%nest%k4) &
-./test/offloading/fortran/target-map-double-large-nested-dtype-multi-member.f90:!$omp map(from: top_dtype%array_i, top_dtype%nested%nest2%i3, top_dtype%nested%i2) &
-./test/offloading/fortran/target-map-double-large-nested-dtype-multi-member.f90:!$omp map(from: top_dtype%nested%nest2%k3, top_dtype%nested%nest2%j3) &
-./test/offloading/fortran/target-map-double-large-nested-dtype-multi-member.f90:!$omp map(to: top_dtype2%nested%nest%j4, top_dtype2%nested%nest%i4, top_dtype2%nested%nest%k4) &
-./test/offloading/fortran/target-map-double-large-nested-dtype-multi-member.f90:!$omp map(to: top_dtype2%array_i, top_dtype2%nested%nest2%i3, top_dtype2%nested%i2) &
-./test/offloading/fortran/target-map-double-large-nested-dtype-multi-member.f90:!$omp map(to: top_dtype2%nested%nest2%k3, top_dtype2%nested%nest2%j3)
-./test/offloading/fortran/target-map-double-large-nested-dtype-multi-member.f90:!$omp end target
-./test/offloading/fortran/target-map-derived-type-full-implicit-1.f90:  !$omp target map(from:out)
-./test/offloading/fortran/target-map-derived-type-full-implicit-1.f90:  !$omp end target
-./test/offloading/fortran/target-map-dtype-alloca-array-with-bounds.f90:!$omp target map(tofrom: one_l%array_j(2:6))
-./test/offloading/fortran/target-map-dtype-alloca-array-with-bounds.f90:!$omp end target
-./test/offloading/fortran/target-map-nested-dtype-single-member.f90:!$omp target map(tofrom: top_dtype%nested%array_i2)
-./test/offloading/fortran/target-map-nested-dtype-single-member.f90:!$omp end target
-./test/offloading/fortran/target-map-mix-imp-exp-common-block-members.f90:    !$omp target map(tofrom: var2)
-./test/offloading/fortran/target-map-mix-imp-exp-common-block-members.f90:    !$omp end target
-./test/offloading/fortran/target-map-mix-imp-exp-common-block-members.f90:  !$omp target map(tofrom: /var_common/)
-./test/offloading/fortran/target-map-mix-imp-exp-common-block-members.f90:  !$omp end target
-./test/offloading/fortran/target-map-mix-imp-exp-common-block-members.f90:  !$omp target map(tofrom: copy)
-./test/offloading/fortran/target-map-mix-imp-exp-common-block-members.f90:  !$omp end target
-./test/offloading/fortran/target-map-nested-dtype-derived-member.f90:!$omp target map(tofrom: top_dtype%k, top_dtype%nested2%array_i2, top_dtype%nested)
-./test/offloading/fortran/target-map-nested-dtype-derived-member.f90:!$omp end target
-./test/offloading/fortran/target-map-nested-alloca-dtype-alloca-array-bounds.f90:!$omp target map(tofrom: one_l%nest%array_k(2:6))
-./test/offloading/fortran/target-map-nested-alloca-dtype-alloca-array-bounds.f90:!$omp end target
-./test/offloading/fortran/basic-target-region-1D-array-section.f90:    !$omp target map(to:read_arr(2:5)) map(from:write_arr(2:5)) map(to:i,j)
-./test/offloading/fortran/basic-target-region-1D-array-section.f90:    !$omp end target
-./test/offloading/fortran/target-map-two-nested-dtype-member-array-map.f90:  !$omp target map(tofrom:scalar_arr1%nested%array_z(3:6), scalar_arr1%nested%array_ix(3:6), scalar_arr2%nested%array_z(3:6), scalar_arr2%nested%array_ix(3:6))
-./test/offloading/fortran/target-map-two-nested-dtype-member-array-map.f90:  !$omp end target
-./test/offloading/fortran/target-custom-mapper.f90:   !$omp declare mapper(my_mapper1 : mytype :: t) map(to: t%data(1 : n))
-./test/offloading/fortran/target-custom-mapper.f90:   !$omp declare mapper(my_mapper2 : mytype2 :: t) map(mapper(my_mapper1): t%my_data)
-./test/offloading/fortran/target-custom-mapper.f90:   !$omp target map(tofrom: sum_device) map(mapper(my_mapper2) : obj)
-./test/offloading/fortran/target-custom-mapper.f90:   !$omp end target
-./test/offloading/fortran/target-map-nested-dtype-3d-alloca-array-with-bounds.f90:!$omp target map(tofrom: one_l%nest%array_k(1:3, 1:3, 2:2)) map(to: inArray(1:3, 1:3, 1:3))
-./test/offloading/fortran/target-map-nested-dtype-3d-alloca-array-with-bounds.f90:!$omp end target
-./test/offloading/fortran/target-map-allocatable-array-section-3d-bounds.f90:!$omp target map(tofrom:inArray(1:3, 1:3, 2:2), outArray(1:3, 1:3, 1:3))
-./test/offloading/fortran/target-map-allocatable-array-section-3d-bounds.f90:!$omp end target
-./test/offloading/fortran/target-map-nested-dtype-alloca-array-and-non-alloca-dtype.f90:    !$omp target map(tofrom: one_l%nest%nest2, one_l%nest%array_k)
-./test/offloading/fortran/target-map-nested-dtype-alloca-array-and-non-alloca-dtype.f90:    !$omp end target
-./test/offloading/fortran/target-map-individual-dtype-member-map.f90:  !$omp target map(from:scalar_struct%rx)
-./test/offloading/fortran/target-map-individual-dtype-member-map.f90:  !$omp end target
-./test/offloading/fortran/target-map-allocatable-dtype.f90:!$omp target map(tofrom: alloca_dtype)
-./test/offloading/fortran/target-map-allocatable-dtype.f90:!$omp end target
-./test/offloading/fortran/target-map-nested-dtype-alloca-array-with-bounds.f90:!$omp target map(tofrom: one_l%nest%array_k(2:6))
-./test/offloading/fortran/target-map-nested-dtype-alloca-array-with-bounds.f90:!$omp end target
-./test/offloading/fortran/implicit-record-field-mapping.f90:  !$omp target map(from: dst_sum)
-./test/offloading/fortran/implicit-record-field-mapping.f90:  !$omp end target
-./test/offloading/fortran/target-has-device-addr3.f90:  !$omp target data map(tofrom: x) use_device_addr(x)
-./test/offloading/fortran/target-has-device-addr3.f90:  !$omp target has_device_addr(x) map(tofrom: y)
-./test/offloading/fortran/target-has-device-addr3.f90:  !$omp end target
-./test/offloading/fortran/target-has-device-addr3.f90:  !$omp end target data
-./test/offloading/fortran/target-map-double-nested-dtype-array-bounds.f90:!$omp target map(tofrom: top_dtype%nested%array_i2(4:8), top_dtype2%nested%array_j2(4:8))
-./test/offloading/fortran/target-map-double-nested-dtype-array-bounds.f90:!$omp end target
-./test/offloading/fortran/target-map-allocatable-map-scopes.f90:  !$omp target map(tofrom: arg_alloc)
-./test/offloading/fortran/target-map-allocatable-map-scopes.f90:  !$omp end target
-./test/offloading/fortran/target-map-allocatable-map-scopes.f90:  !$omp target map(tofrom: local_alloc)
-./test/offloading/fortran/target-map-allocatable-map-scopes.f90:  !$omp end target
-./test/offloading/fortran/target-map-allocatable-map-scopes.f90:  !$omp target map(tofrom: map_ptr)
-./test/offloading/fortran/target-map-allocatable-map-scopes.f90:  !$omp end target
-./test/offloading/fortran/target-map-enter-exit-scalar.f90:    !$omp target enter data map(to: scalar)
-./test/offloading/fortran/target-map-enter-exit-scalar.f90:   !$omp target
-./test/offloading/fortran/target-map-enter-exit-scalar.f90:   !$omp end target
-./test/offloading/fortran/target-map-enter-exit-scalar.f90:  !$omp target exit data map(from: scalar)
-./test/offloading/fortran/target-map-multi-alloca-dtypes-with-multi-mixed-members.f90:!$omp target map(tofrom: top_dtype%nested%nest%i4, top_dtype%nested%array_j2) &
-./test/offloading/fortran/target-map-multi-alloca-dtypes-with-multi-mixed-members.f90:!$omp map(tofrom: top_dtype%nested%nest%k4, top_dtype%array_i, top_dtype%nested%nest2%i3) &
-./test/offloading/fortran/target-map-multi-alloca-dtypes-with-multi-mixed-members.f90:!$omp map(tofrom: top_dtype%nested%i2, top_dtype%nested%nest2%j3, top_dtype%array_j)
-./test/offloading/fortran/target-map-multi-alloca-dtypes-with-multi-mixed-members.f90:!$omp end target
-./test/offloading/fortran/explicit-and-implicit-record-field-mapping.f90:  !$omp target enter data map(alloc:       &
-./test/offloading/fortran/explicit-and-implicit-record-field-mapping.f90:  !$omp  chunk%tiles(2)%field%density0)
-./test/offloading/fortran/explicit-and-implicit-record-field-mapping.f90:  !$omp target
-./test/offloading/fortran/explicit-and-implicit-record-field-mapping.f90:  !$omp end target
-./test/offloading/fortran/explicit-and-implicit-record-field-mapping.f90:  !$omp target exit data map(from:         &
-./test/offloading/fortran/explicit-and-implicit-record-field-mapping.f90:  !$omp  chunk%tiles(2)%field%density0)
-./test/offloading/fortran/dtype-member-map-syntax-2.f90:!$omp target map(tofrom: alloca_dtype%vertexes(N1)%test)
-./test/offloading/fortran/dtype-member-map-syntax-2.f90:!$omp end target
-./test/offloading/fortran/dtype-member-map-syntax-2.f90:!$omp target map(tofrom: alloca_dtype%vertexes(N1)%test, alloca_dtype%vertexes(N2)%test)
-./test/offloading/fortran/dtype-member-map-syntax-2.f90:!$omp end target
-./test/offloading/fortran/dtype-member-map-syntax-2.f90:!$omp target map(tofrom: alloca_dtype%test_tile(N1)%field%vertexx, &
-./test/offloading/fortran/dtype-member-map-syntax-2.f90:!$omp                    alloca_dtype%test_tile(N1)%field%vertexy)
-./test/offloading/fortran/dtype-member-map-syntax-2.f90:!$omp end target
-./test/offloading/fortran/dtype-member-map-syntax-2.f90:!$omp target map(tofrom:  alloca_dtype%test_tile(N1)%field%test, &
-./test/offloading/fortran/dtype-member-map-syntax-2.f90:!$omp                     alloca_dtype%test_tile(N2)%field%test, &
-./test/offloading/fortran/dtype-member-map-syntax-2.f90:!$omp                     alloca_dtype%test_tile(N1)%field%vertexy, &
-./test/offloading/fortran/dtype-member-map-syntax-2.f90:!$omp                     alloca_dtype%test_tile(N2)%field%vertexy)
-./test/offloading/fortran/dtype-member-map-syntax-2.f90:!$omp end target
-./test/offloading/fortran/dtype-member-map-syntax-2.f90:!$omp target map(tofrom:  alloca_dtype%test_tile(N2)%field%vertexy)
-./test/offloading/fortran/dtype-member-map-syntax-2.f90:!$omp end target
-./test/offloading/fortran/dtype-member-map-syntax-2.f90:!$omp target map(tofrom: alloca_dtype%vertexes(N1)%vertexx, &
-./test/offloading/fortran/dtype-member-map-syntax-2.f90:!$omp                    alloca_dtype%vertexes(N1)%vertexy, &
-./test/offloading/fortran/dtype-member-map-syntax-2.f90:!$omp                    alloca_dtype%vertexes(N2)%vertexx, &
-./test/offloading/fortran/dtype-member-map-syntax-2.f90:!$omp                    alloca_dtype%vertexes(N2)%vertexy)
-./test/offloading/fortran/dtype-member-map-syntax-2.f90:!$omp end target
-./test/offloading/fortran/dtype-member-map-syntax-2.f90:!$omp target map(tofrom: alloca_dtype%vertexes(N1)%vertexx, &
-./test/offloading/fortran/dtype-member-map-syntax-2.f90:!$omp                    alloca_dtype%vertexes(N1)%vertexy, &
-./test/offloading/fortran/dtype-member-map-syntax-2.f90:!$omp                    alloca_dtype%vertexes(4)%vertexy, &
-./test/offloading/fortran/dtype-member-map-syntax-2.f90:!$omp                    alloca_dtype%vertexes(4)%vertexx, &
-./test/offloading/fortran/dtype-member-map-syntax-2.f90:!$omp                    alloca_dtype%vertexes(N2)%vertexx, &
-./test/offloading/fortran/dtype-member-map-syntax-2.f90:!$omp                    alloca_dtype%vertexes(N2)%vertexy)
-./test/offloading/fortran/dtype-member-map-syntax-2.f90:!$omp end target
-./test/offloading/fortran/dtype-member-map-syntax-2.f90:!$omp target map(tofrom: alloca_dtype_arr(N2)%array_i)
-./test/offloading/fortran/dtype-member-map-syntax-2.f90:!$omp end target
-./test/offloading/fortran/target_update.f90:!$omp target enter data map(to:x, device_id)
-./test/offloading/fortran/target_update.f90:!$omp target
-./test/offloading/fortran/target_update.f90:!$omp end target
-./test/offloading/fortran/target_update.f90:!$omp target
-./test/offloading/fortran/target_update.f90:!$omp end target
-./test/offloading/fortran/target_update.f90:!$omp target update from(x, device_id)
-./test/offloading/fortran/target-map-declare-target-link-common-block.f90:    !$omp declare target link(/var_common/)
-./test/offloading/fortran/target-map-declare-target-link-common-block.f90:  !$omp target map(tofrom: var2)
-./test/offloading/fortran/target-map-declare-target-link-common-block.f90:  !$omp end target
-./test/offloading/fortran/target-map-declare-target-link-common-block.f90:!$omp target map(tofrom: /var_common/)
-./test/offloading/fortran/target-map-declare-target-link-common-block.f90:!$omp end target
-./test/offloading/fortran/target-map-declare-target-link-common-block.f90:!$omp target map(tofrom: copy)
-./test/offloading/fortran/target-map-declare-target-link-common-block.f90:!$omp end target
-./test/offloading/fortran/target-map-declare-target-link-common-block.f90:!$omp target map(tofrom: /var_common/)
-./test/offloading/fortran/target-map-declare-target-link-common-block.f90:!$omp end target
-./test/offloading/fortran/target-map-alloca-dtype-alloca-array.f90:!$omp target map(tofrom: alloca_dtype%array_j)
-./test/offloading/fortran/target-map-alloca-dtype-alloca-array.f90:!$omp end target
-./test/offloading/fortran/target-map-alloca-dtype-array-of-dtype.f90:!$omp target map(tofrom: dtyped%array_dtype)
-./test/offloading/fortran/target-map-alloca-dtype-array-of-dtype.f90:!$omp end target
-./test/Inputs/basic_array.f90:    !$omp declare target



More information about the llvm-commits mailing list