[Mlir-commits] [llvm] [mlir] scan lowering changes (PR #133149)

Anchu Rajendran S llvmlistbot at llvm.org
Wed Apr 16 12:56:50 PDT 2025


https://github.com/anchuraj updated https://github.com/llvm/llvm-project/pull/133149

>From 0987b648f075a1fccb28dad6536495078d74b506 Mon Sep 17 00:00:00 2001
From: Anchu Rajendran <asudhaku at amd.com>
Date: Fri, 4 Apr 2025 17:01:57 -0500
Subject: [PATCH 1/6] [IRBuilder] Lowering Scan Directive

---
 .../llvm/Frontend/OpenMP/OMPIRBuilder.h       | 122 +++++-
 llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp     | 346 +++++++++++++++++-
 .../Frontend/OpenMPIRBuilderTest.cpp          |  62 ++++
 .../OpenMP/OpenMPToLLVMIRTranslation.cpp      | 250 ++++++++++---
 .../Target/LLVMIR/openmp-reduction-scan.mlir  | 120 ++++++
 mlir/test/Target/LLVMIR/openmp-todo.mlir      |  31 --
 6 files changed, 839 insertions(+), 92 deletions(-)
 create mode 100644 mlir/test/Target/LLVMIR/openmp-reduction-scan.mlir

diff --git a/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h b/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h
index 6b104708bdb0d..0e6bdb14e1b94 100644
--- a/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h
+++ b/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h
@@ -503,6 +503,19 @@ class OpenMPIRBuilder {
       return allocaInst;
     }
   };
+  struct ScanInformation {
+  public:
+    llvm::BasicBlock *OMPBeforeScanBlock = nullptr;
+    llvm::BasicBlock *OMPAfterScanBlock = nullptr;
+    llvm::BasicBlock *OMPScanExitBlock = nullptr;
+    llvm::BasicBlock *OMPScanDispatch = nullptr;
+    llvm::BasicBlock *OMPScanLoopExit = nullptr;
+    bool OMPFirstScanLoop = false;
+    llvm::SmallDenseMap<llvm::Value *, llvm::Value *> ReductionVarToScanBuffs;
+    llvm::Value *IV;
+    llvm::Value *Span;
+  } ScanInfo;
+
   /// Initialize the internal state, this will put structures types and
   /// potentially other helpers into the underlying module. Must be called
   /// before any other method and only once! This internal state includes types
@@ -729,6 +742,35 @@ class OpenMPIRBuilder {
                       LoopBodyGenCallbackTy BodyGenCB, Value *TripCount,
                       const Twine &Name = "loop");
 
+  /// Generator for the control flow structure of an OpenMP canonical loops if
+  /// the parent directive has an `inscan` modifier specified.
+  /// If the `inscan` modifier is specified, the region of the parent is
+  /// expected to have a `scan` directive. Based on the clauses in
+  /// scan directive, the body of the loop is split into two loops: Input loop
+  /// and Scan Loop. Input loop contains the code generated for input phase of
+  /// scan and Scan loop contains the code generated for scan phase of scan.
+  ///
+  /// \param Loc       The insert and source location description.
+  /// \param BodyGenCB Callback that will generate the loop body code.
+  /// \param Start     Value of the loop counter for the first iterations.
+  /// \param Stop      Loop counter values past this will stop the loop.
+  /// \param Step      Loop counter increment after each iteration; negative
+  ///                  means counting down.
+  /// \param IsSigned  Whether Start, Stop and Step are signed integers.
+  /// \param InclusiveStop Whether \p Stop itself is a valid value for the loop
+  ///                      counter.
+  /// \param ComputeIP Insertion point for instructions computing the trip
+  ///                  count. Can be used to ensure the trip count is available
+  ///                  at the outermost loop of a loop nest. If not set,
+  ///                  defaults to the preheader of the generated loop.
+  /// \param Name      Base name used to derive BB and instruction names.
+  ///
+  /// \returns A vector containing Loop Info of Input Loop and Scan Loop.
+  Expected<SmallVector<llvm::CanonicalLoopInfo *>> createCanonicalScanLoops(
+      const LocationDescription &Loc, LoopBodyGenCallbackTy BodyGenCB,
+      Value *Start, Value *Stop, Value *Step, bool IsSigned, bool InclusiveStop,
+      InsertPointTy ComputeIP, const Twine &Name);
+
   /// Calculate the trip count of a canonical loop.
   ///
   /// This allows specifying user-defined loop counter values using increment,
@@ -798,13 +840,16 @@ class OpenMPIRBuilder {
   ///                  at the outermost loop of a loop nest. If not set,
   ///                  defaults to the preheader of the generated loop.
   /// \param Name      Base name used to derive BB and instruction names.
+  /// \param InScan    Whether loop has a scan reduction specified.
   ///
   /// \returns An object representing the created control flow structure which
   ///          can be used for loop-associated directives.
-  Expected<CanonicalLoopInfo *> createCanonicalLoop(
-      const LocationDescription &Loc, LoopBodyGenCallbackTy BodyGenCB,
-      Value *Start, Value *Stop, Value *Step, bool IsSigned, bool InclusiveStop,
-      InsertPointTy ComputeIP = {}, const Twine &Name = "loop");
+  Expected<CanonicalLoopInfo *>
+  createCanonicalLoop(const LocationDescription &Loc,
+                      LoopBodyGenCallbackTy BodyGenCB, Value *Start,
+                      Value *Stop, Value *Step, bool IsSigned,
+                      bool InclusiveStop, InsertPointTy ComputeIP = {},
+                      const Twine &Name = "loop", bool InScan = false);
 
   /// Collapse a loop nest into a single loop.
   ///
@@ -1532,6 +1577,38 @@ class OpenMPIRBuilder {
       ArrayRef<OpenMPIRBuilder::ReductionInfo> ReductionInfos,
       Function *ReduceFn, AttributeList FuncAttrs);
 
+  /// Creates the runtime call specified
+  /// \param Callee Function Declaration Value
+  /// \param Args Arguments passed to the call
+  /// \param Name Optional param to specify the name of the call Instruction.
+  ///
+  /// \return The Runtime call instruction created.
+  llvm::CallInst *emitNoUnwindRuntimeCall(llvm::FunctionCallee Callee,
+                                          ArrayRef<llvm::Value *> Args,
+                                          const llvm::Twine &Name);
+
+  /// Helper function for CreateCanonicalScanLoops to create InputLoop
+  /// in the firstGen and Scan Loop in the SecondGen
+  /// \param InputLoopGen Callback for generating the loop for input phase
+  /// \param ScanLoopGen Callback for generating the loop for scan phase
+  ///
+  /// \return error if any produced, else return success.
+  Error emitScanBasedDirectiveIR(
+      llvm::function_ref<Error()> InputLoopGen,
+      llvm::function_ref<Error(LocationDescription Loc)> ScanLoopGen);
+
+  /// Creates the basic blocks required for scan reduction.
+  void createScanBBs();
+
+  /// Creates the buffer needed for scan reduction.
+  /// \param ScanVars Scan Variables.
+  void emitScanBasedDirectiveDeclsIR(ArrayRef<llvm::Value *> ScanVars);
+
+  /// Copies the result back to the reduction variable.
+  /// \param ReductionInfos Array type containing the ReductionOps.
+  void emitScanBasedDirectiveFinalsIR(
+      SmallVector<llvm::OpenMPIRBuilder::ReductionInfo> ReductionInfos);
+
   /// This function emits a helper that gathers Reduce lists from the first
   /// lane of every active warp to lanes in the first warp.
   ///
@@ -2179,7 +2256,6 @@ class OpenMPIRBuilder {
   // block, if possible, or else at the end of the function. Also add a branch
   // from current block to BB if current block does not have a terminator.
   void emitBlock(BasicBlock *BB, Function *CurFn, bool IsFinished = false);
-
   /// Emits code for OpenMP 'if' clause using specified \a BodyGenCallbackTy
   /// Here is the logic:
   /// if (Cond) {
@@ -2607,6 +2683,42 @@ class OpenMPIRBuilder {
                                     BodyGenCallbackTy BodyGenCB,
                                     FinalizeCallbackTy FiniCB, Value *Filter);
 
+  /// This function performs the scan reduction of the values updated in
+  /// the input phase. The reduction logic needs to be emitted between input
+  /// and scan loop returned by `CreateCanonicalScanLoops`. The following
+  /// is the code that is generated, `buffer` and `span` are expected to be
+  /// populated before executing the generated code.
+  ///
+  ///  for (int k = 0; k != ceil(log2(span)); ++k) {
+  ///    i=pow(2,k)
+  ///    for (size cnt = last_iter; cnt >= i; --cnt)
+  ///      buffer[cnt] op= buffer[cnt-i];
+  ///  }
+  /// \param Loc The insert and source location description.
+  /// \param FinalizeIP The IP where the reduction result needs
+  //                   to be copied back to original variable.
+  /// \param ReductionInfos Array type containing the ReductionOps.
+  ///
+  /// \returns The insertion position *after* the masked.
+  InsertPointOrErrorTy emitScanReduction(
+      const LocationDescription &Loc, InsertPointTy &FinalizeIP,
+      SmallVector<llvm::OpenMPIRBuilder::ReductionInfo> ReductionInfos);
+
+  /// This directive split and directs the control flow to input phase
+  ///  blocks or scan phase blocks based on 1. whether input loop or scan loop
+  ///  is executed, 2. whether exclusive or inclusive scan is used.
+  ///
+  /// \param Loc The insert and source location description.
+  /// \param AllocaIP The IP where the temporary buffer for scan reduction
+  //                  needs to be allocated.
+  /// \param ScanVars Scan Variables.
+  /// \param IsInclusive Whether it is an inclusive or exclusive scan.
+  ///
+  /// \returns The insertion position *after* the masked.
+  InsertPointOrErrorTy createScan(const LocationDescription &Loc,
+                                  InsertPointTy AllocaIP,
+                                  ArrayRef<llvm::Value *> ScanVars,
+                                  bool IsInclusive);
   /// Generator for '#omp critical'
   ///
   /// \param Loc The insert and source location description.
diff --git a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
index 28662efc02882..2f10a52538580 100644
--- a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
+++ b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
@@ -59,6 +59,7 @@
 #include "llvm/Transforms/Utils/LoopPeel.h"
 #include "llvm/Transforms/Utils/UnrollLoop.h"
 
+#include <cassert>
 #include <cstdint>
 #include <optional>
 
@@ -3981,6 +3982,263 @@ OpenMPIRBuilder::createMasked(const LocationDescription &Loc,
                               /*Conditional*/ true, /*hasFinalize*/ true);
 }
 
+llvm::CallInst *
+OpenMPIRBuilder::emitNoUnwindRuntimeCall(llvm::FunctionCallee Callee,
+                                         ArrayRef<llvm::Value *> Args,
+                                         const llvm::Twine &Name) {
+  llvm::CallInst *Call = Builder.CreateCall(
+      Callee, Args, SmallVector<llvm::OperandBundleDef, 1>(), Name);
+  Call->setDoesNotThrow();
+  return Call;
+}
+
+// Expects input basic block is dominated by BeforeScanBB.
+// Once Scan directive is encountered, the code after scan directive should be
+// dominated by AfterScanBB. Scan directive splits the code sequence to
+// scan and input phase. Based on whether inclusive or exclusive
+// clause is used in the scan directive and whether input loop or scan loop
+// is lowered, it adds jumps to input and scan phase. First Scan loop is the
+// input loop and second is the scan loop. The code generated handles only
+// inclusive scans now.
+OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::createScan(
+    const LocationDescription &Loc, InsertPointTy AllocaIP,
+    ArrayRef<llvm::Value *> ScanVars, bool IsInclusive) {
+  if (ScanInfo.OMPFirstScanLoop) {
+    Builder.restoreIP(AllocaIP);
+    emitScanBasedDirectiveDeclsIR(ScanVars);
+  }
+  if (!updateToLocation(Loc))
+    return Loc.IP;
+  unsigned int defaultAS = M.getDataLayout().getProgramAddressSpace();
+  llvm::Value *IV = ScanInfo.IV;
+
+  if (ScanInfo.OMPFirstScanLoop) {
+    // Emit buffer[i] = red; at the end of the input phase.
+    for (Value *ScanVar : ScanVars) {
+      Value *Buff = ScanInfo.ReductionVarToScanBuffs[ScanVar];
+      Type *DestTy = Builder.getInt32Ty(); // ScanVars[i]->getType();
+      Value *Val = Builder.CreateInBoundsGEP(DestTy, Buff, IV, "arrayOffset");
+      Value *Src = Builder.CreateLoad(DestTy, ScanVar);
+      Value *Dest = Builder.CreatePointerBitCastOrAddrSpaceCast(
+          Val, DestTy->getPointerTo(defaultAS));
+
+      Builder.CreateStore(Src, Dest);
+    }
+  }
+  Builder.CreateBr(ScanInfo.OMPScanLoopExit);
+  emitBlock(ScanInfo.OMPScanDispatch, Builder.GetInsertBlock()->getParent());
+
+  // Initialize the private reduction variable to 0 in each iteration.
+  // It is used to copy intial values to scan buffer.
+  ConstantInt *Zero = ConstantInt::get(Builder.getInt32Ty(), 0);
+  for (Value *ScanVar : ScanVars) {
+    Type *DestTy = Builder.getInt32Ty(); // ScanVars[i]->getType();
+    Value *Dest = Builder.CreatePointerBitCastOrAddrSpaceCast(
+        ScanVar, DestTy->getPointerTo(defaultAS));
+    Builder.CreateStore(Zero, Dest);
+  }
+
+  if (!ScanInfo.OMPFirstScanLoop) {
+    IV = ScanInfo.IV;
+    // Emit red = buffer[i]; at the entrance to the scan phase.
+    // TODO: if exclusive scan, the red = buffer[i-1] needs to be updated.
+    for (Value *ScanVar : ScanVars) {
+      Value *Buff = ScanInfo.ReductionVarToScanBuffs[ScanVar];
+      Type *DestTy = Builder.getInt32Ty(); // ScanVars[i]->getType();
+      Value *SrcPtr =
+          Builder.CreateInBoundsGEP(DestTy, Buff, IV, "arrayOffset");
+      Value *Src = Builder.CreateLoad(DestTy, SrcPtr);
+      Value *Dest = Builder.CreatePointerBitCastOrAddrSpaceCast(
+          ScanVar, DestTy->getPointerTo(defaultAS));
+
+      Builder.CreateStore(Src, Dest);
+    }
+  }
+
+  // TODO: Update it to CreateBr and remove dead blocks
+  llvm::Value *CmpI = Builder.getInt1(true);
+  if (ScanInfo.OMPFirstScanLoop == IsInclusive) {
+    Builder.CreateCondBr(CmpI, ScanInfo.OMPBeforeScanBlock,
+                         ScanInfo.OMPAfterScanBlock);
+  } else {
+    Builder.CreateCondBr(CmpI, ScanInfo.OMPAfterScanBlock,
+                         ScanInfo.OMPBeforeScanBlock);
+  }
+  emitBlock(ScanInfo.OMPAfterScanBlock, Builder.GetInsertBlock()->getParent());
+  Builder.SetInsertPoint(ScanInfo.OMPAfterScanBlock);
+  return Builder.saveIP();
+}
+
+void OpenMPIRBuilder::emitScanBasedDirectiveDeclsIR(
+    ArrayRef<Value *> ScanVars) {
+
+  Value *AllocSpan = Builder.CreateAdd(ScanInfo.Span, Builder.getInt32(1));
+  for (Value *ScanVar : ScanVars) {
+    llvm::Value *Buff =
+        Builder.CreateAlloca(Builder.getInt32Ty(), AllocSpan, "vla");
+    ScanInfo.ReductionVarToScanBuffs[ScanVar] = Buff;
+  }
+}
+
+void OpenMPIRBuilder::emitScanBasedDirectiveFinalsIR(
+    SmallVector<ReductionInfo> ReductionInfos) {
+  llvm::Value *OMPLast = Builder.CreateNSWAdd(
+      ScanInfo.Span,
+      llvm::ConstantInt::get(ScanInfo.Span->getType(), 1, /*isSigned=*/false));
+  unsigned int DefaultAS = M.getDataLayout().getProgramAddressSpace();
+  for (ReductionInfo RedInfo : ReductionInfos) {
+    Value *PrivateVar = RedInfo.PrivateVariable;
+    Value *OrigVar = RedInfo.Variable;
+    Value *Buff = ScanInfo.ReductionVarToScanBuffs[PrivateVar];
+
+    Type *SrcTy = RedInfo.ElementType;
+    Value *Val = Builder.CreateInBoundsGEP(SrcTy, Buff, OMPLast, "arrayOffset");
+    Value *Src = Builder.CreateLoad(SrcTy, Val);
+    Value *Dest = Builder.CreatePointerBitCastOrAddrSpaceCast(
+        OrigVar, SrcTy->getPointerTo(DefaultAS));
+
+    Builder.CreateStore(Src, Dest);
+  }
+}
+
+OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::emitScanReduction(
+    const LocationDescription &Loc, InsertPointTy &FinalizeIP,
+    SmallVector<llvm::OpenMPIRBuilder::ReductionInfo> ReductionInfos) {
+
+  llvm::Value *spanDiff = ScanInfo.Span;
+
+  if (!updateToLocation(Loc))
+    return Loc.IP;
+  auto curFn = Builder.GetInsertBlock()->getParent();
+  // for (int k = 0; k <= ceil(log2(n)); ++k)
+  llvm::BasicBlock *LoopBB =
+      BasicBlock::Create(curFn->getContext(), "omp.outer.log.scan.body");
+  llvm::BasicBlock *ExitBB =
+      BasicBlock::Create(curFn->getContext(), "omp.outer.log.scan.exit");
+  llvm::Function *F = llvm::Intrinsic::getOrInsertDeclaration(
+      Builder.GetInsertBlock()->getModule(),
+      (llvm::Intrinsic::ID)llvm::Intrinsic::log2, Builder.getDoubleTy());
+  llvm::BasicBlock *InputBB = Builder.GetInsertBlock();
+  ConstantInt *One = ConstantInt::get(Builder.getInt32Ty(), 1);
+  llvm::Value *span = Builder.CreateAdd(spanDiff, One);
+  llvm::Value *Arg = Builder.CreateUIToFP(span, Builder.getDoubleTy());
+  llvm::Value *LogVal = emitNoUnwindRuntimeCall(F, Arg, "");
+  F = llvm::Intrinsic::getOrInsertDeclaration(
+      Builder.GetInsertBlock()->getModule(),
+      (llvm::Intrinsic::ID)llvm::Intrinsic::ceil, Builder.getDoubleTy());
+  LogVal = emitNoUnwindRuntimeCall(F, LogVal, "");
+  LogVal = Builder.CreateFPToUI(LogVal, Builder.getInt32Ty());
+  llvm::Value *NMin1 =
+      Builder.CreateNUWSub(span, llvm::ConstantInt::get(span->getType(), 1));
+  Builder.SetInsertPoint(InputBB);
+  Builder.CreateBr(LoopBB);
+  emitBlock(LoopBB, Builder.GetInsertBlock()->getParent());
+  Builder.SetInsertPoint(LoopBB);
+
+  PHINode *Counter = Builder.CreatePHI(Builder.getInt32Ty(), 2);
+  //// size pow2k = 1;
+  PHINode *Pow2K = Builder.CreatePHI(Builder.getInt32Ty(), 2);
+  Counter->addIncoming(llvm::ConstantInt::get(Builder.getInt32Ty(), 0),
+                       InputBB);
+  Pow2K->addIncoming(llvm::ConstantInt::get(Builder.getInt32Ty(), 1), InputBB);
+  //// for (size i = n - 1; i >= 2 ^ k; --i)
+  ////   tmp[i] op= tmp[i-pow2k];
+  llvm::BasicBlock *InnerLoopBB =
+      BasicBlock::Create(curFn->getContext(), "omp.inner.log.scan.body");
+  llvm::BasicBlock *InnerExitBB =
+      BasicBlock::Create(curFn->getContext(), "omp.inner.log.scan.exit");
+  llvm::Value *CmpI = Builder.CreateICmpUGE(NMin1, Pow2K);
+  Builder.CreateCondBr(CmpI, InnerLoopBB, InnerExitBB);
+  emitBlock(InnerLoopBB, Builder.GetInsertBlock()->getParent());
+  Builder.SetInsertPoint(InnerLoopBB);
+  auto *IVal = Builder.CreatePHI(Builder.getInt32Ty(), 2);
+  IVal->addIncoming(NMin1, LoopBB);
+  unsigned int defaultAS = M.getDataLayout().getProgramAddressSpace();
+  for (ReductionInfo RedInfo : ReductionInfos) {
+    Value *ReductionVal = RedInfo.PrivateVariable;
+    Value *Buff = ScanInfo.ReductionVarToScanBuffs[ReductionVal];
+    Type *DestTy = RedInfo.ElementType;
+    Value *IV = Builder.CreateAdd(IVal, Builder.getInt32(1));
+    Value *LHSPtr = Builder.CreateInBoundsGEP(DestTy, Buff, IV, "arrayOffset");
+    Value *OffsetIval = Builder.CreateNUWSub(IV, Pow2K);
+    Value *RHSPtr =
+        Builder.CreateInBoundsGEP(DestTy, Buff, OffsetIval, "arrayOffset");
+    Value *LHS = Builder.CreateLoad(DestTy, LHSPtr);
+    Value *RHS = Builder.CreateLoad(DestTy, RHSPtr);
+    Value *LHSAddr = Builder.CreatePointerBitCastOrAddrSpaceCast(
+        LHSPtr, RHS->getType()->getPointerTo(defaultAS));
+    llvm::Value *Result;
+    InsertPointOrErrorTy AfterIP =
+        RedInfo.ReductionGen(Builder.saveIP(), LHS, RHS, Result);
+    if (!AfterIP)
+      return AfterIP.takeError();
+    Builder.CreateStore(Result, LHSAddr);
+  }
+  llvm::Value *NextIVal = Builder.CreateNUWSub(
+      IVal, llvm::ConstantInt::get(Builder.getInt32Ty(), 1));
+  IVal->addIncoming(NextIVal, Builder.GetInsertBlock());
+  CmpI = Builder.CreateICmpUGE(NextIVal, Pow2K);
+  Builder.CreateCondBr(CmpI, InnerLoopBB, InnerExitBB);
+  emitBlock(InnerExitBB, Builder.GetInsertBlock()->getParent());
+  llvm::Value *Next = Builder.CreateNUWAdd(
+      Counter, llvm::ConstantInt::get(Counter->getType(), 1));
+  Counter->addIncoming(Next, Builder.GetInsertBlock());
+  // pow2k <<= 1;
+  llvm::Value *NextPow2K = Builder.CreateShl(Pow2K, 1, "", /*HasNUW=*/true);
+  Pow2K->addIncoming(NextPow2K, Builder.GetInsertBlock());
+  llvm::Value *Cmp = Builder.CreateICmpNE(Next, LogVal);
+  Builder.CreateCondBr(Cmp, LoopBB, ExitBB);
+  emitBlock(ExitBB, Builder.GetInsertBlock()->getParent());
+  Builder.SetInsertPoint(ExitBB);
+  llvm::OpenMPIRBuilder::InsertPointOrErrorTy AfterIP =
+      createBarrier(Builder.saveIP(), llvm::omp::OMPD_barrier);
+
+  Builder.restoreIP(FinalizeIP);
+  emitScanBasedDirectiveFinalsIR(ReductionInfos);
+  FinalizeIP = Builder.saveIP();
+
+  return AfterIP;
+}
+
+Error OpenMPIRBuilder::emitScanBasedDirectiveIR(
+    llvm::function_ref<Error()> InputLoopGen,
+    llvm::function_ref<Error(LocationDescription Loc)> ScanLoopGen) {
+
+  {
+    // Emit loop with input phase:
+    // #pragma omp ...
+    // for (i: 0..<num_iters>) {
+    //   <input phase>;
+    //   buffer[i] = red;
+    // }
+    ScanInfo.OMPFirstScanLoop = true;
+    auto Result = InputLoopGen();
+    if (Result)
+      return Result;
+  }
+  {
+    ScanInfo.OMPFirstScanLoop = false;
+    auto Result = ScanLoopGen(Builder.saveIP());
+    if (Result)
+      return Result;
+  }
+  return Error::success();
+}
+
+void OpenMPIRBuilder::createScanBBs() {
+  auto fun = Builder.GetInsertBlock()->getParent();
+  ScanInfo.OMPScanExitBlock =
+      BasicBlock::Create(fun->getContext(), "omp.exit.inscan.bb");
+  ScanInfo.OMPScanDispatch =
+      BasicBlock::Create(fun->getContext(), "omp.inscan.dispatch");
+  ScanInfo.OMPAfterScanBlock =
+      BasicBlock::Create(fun->getContext(), "omp.after.scan.bb");
+  ScanInfo.OMPBeforeScanBlock =
+      BasicBlock::Create(fun->getContext(), "omp.before.scan.bb");
+  ScanInfo.OMPScanLoopExit =
+      BasicBlock::Create(fun->getContext(), "omp.scan.loop.exit");
+}
+
 CanonicalLoopInfo *OpenMPIRBuilder::createLoopSkeleton(
     DebugLoc DL, Value *TripCount, Function *F, BasicBlock *PreInsertBefore,
     BasicBlock *PostInsertBefore, const Twine &Name) {
@@ -4078,10 +4336,91 @@ OpenMPIRBuilder::createCanonicalLoop(const LocationDescription &Loc,
   return CL;
 }
 
+Expected<SmallVector<llvm::CanonicalLoopInfo *>>
+OpenMPIRBuilder::createCanonicalScanLoops(
+    const LocationDescription &Loc, LoopBodyGenCallbackTy BodyGenCB,
+    Value *Start, Value *Stop, Value *Step, bool IsSigned, bool InclusiveStop,
+    InsertPointTy ComputeIP, const Twine &Name) {
+  LocationDescription ComputeLoc =
+      ComputeIP.isSet() ? LocationDescription(ComputeIP, Loc.DL) : Loc;
+  updateToLocation(ComputeLoc);
+
+  Value *TripCount = calculateCanonicalLoopTripCount(
+      ComputeLoc, Start, Stop, Step, IsSigned, InclusiveStop, Name);
+  ScanInfo.Span = TripCount;
+
+  auto BodyGen = [=](InsertPointTy CodeGenIP, Value *IV) {
+    /// The control of the loopbody of following structure:
+    ///
+    ///     InputBlock
+    ///        |
+    ///     ContinueBlock
+    ///
+    ///  is transformed to:
+    ///
+    ///     InputBlock
+    ///        |
+    ///     OMPScanDispatch
+    ///
+    ///     OMPBeforeScanBlock
+    ///        |
+    ///     OMPScanLoopExit
+    ///        |
+    ///     ContinueBlock
+    ///
+    /// OMPBeforeScanBlock dominates the control flow of code generated until
+    /// scan directive is encountered and OMPAfterScanBlock dominates the
+    /// control flow of code generated after scan is encountered. The successor
+    /// of OMPScanDispatch can be OMPBeforeScanBlock or OMPAfterScanBlock based
+    /// on 1.whether it is in Input phase or Scan Phase , 2. whether it is an
+    /// exclusive or inclusive scan.
+    ScanInfo.IV = IV;
+    createScanBBs();
+    BasicBlock *InputBlock = Builder.GetInsertBlock();
+    Instruction *Terminator = InputBlock->getTerminator();
+    assert(Terminator->getNumSuccessors() == 1);
+    BasicBlock *ContinueBlock = Terminator->getSuccessor(0);
+    Terminator->setSuccessor(0, ScanInfo.OMPScanDispatch);
+    emitBlock(ScanInfo.OMPBeforeScanBlock,
+              Builder.GetInsertBlock()->getParent());
+    Builder.CreateBr(ScanInfo.OMPScanLoopExit);
+    emitBlock(ScanInfo.OMPScanLoopExit, Builder.GetInsertBlock()->getParent());
+    Builder.CreateBr(ContinueBlock);
+    Builder.SetInsertPoint(ScanInfo.OMPBeforeScanBlock->getFirstInsertionPt());
+    return BodyGenCB(Builder.saveIP(), IV);
+  };
+
+  SmallVector<llvm::CanonicalLoopInfo *> Result;
+  const auto &&InputLoopGen = [&]() -> Error {
+    auto LoopInfo =
+        createCanonicalLoop(Loc, BodyGen, Start, Stop, Step, IsSigned,
+                            InclusiveStop, ComputeIP, Name, true);
+    if (!LoopInfo)
+      return LoopInfo.takeError();
+    Result.push_back(*LoopInfo);
+    Builder.restoreIP((*LoopInfo)->getAfterIP());
+    return Error::success();
+  };
+  const auto &&ScanLoopGen = [&](LocationDescription Loc) -> Error {
+    auto LoopInfo =
+        createCanonicalLoop(Loc, BodyGen, Start, Stop, Step, IsSigned,
+                            InclusiveStop, ComputeIP, Name, true);
+    if (!LoopInfo)
+      return LoopInfo.takeError();
+    Result.push_back(*LoopInfo);
+    Builder.restoreIP((*LoopInfo)->getAfterIP());
+    return Error::success();
+  };
+  Error Err = emitScanBasedDirectiveIR(InputLoopGen, ScanLoopGen);
+  if (Err) {
+    return Err;
+  }
+  return Result;
+}
+
 Value *OpenMPIRBuilder::calculateCanonicalLoopTripCount(
     const LocationDescription &Loc, Value *Start, Value *Stop, Value *Step,
     bool IsSigned, bool InclusiveStop, const Twine &Name) {
-
   // Consider the following difficulties (assuming 8-bit signed integers):
   //  * Adding \p Step to the loop counter which passes \p Stop may overflow:
   //      DO I = 1, 100, 50
@@ -4141,7 +4480,7 @@ Value *OpenMPIRBuilder::calculateCanonicalLoopTripCount(
 Expected<CanonicalLoopInfo *> OpenMPIRBuilder::createCanonicalLoop(
     const LocationDescription &Loc, LoopBodyGenCallbackTy BodyGenCB,
     Value *Start, Value *Stop, Value *Step, bool IsSigned, bool InclusiveStop,
-    InsertPointTy ComputeIP, const Twine &Name) {
+    InsertPointTy ComputeIP, const Twine &Name, bool InScan) {
   LocationDescription ComputeLoc =
       ComputeIP.isSet() ? LocationDescription(ComputeIP, Loc.DL) : Loc;
 
@@ -4152,6 +4491,9 @@ Expected<CanonicalLoopInfo *> OpenMPIRBuilder::createCanonicalLoop(
     Builder.restoreIP(CodeGenIP);
     Value *Span = Builder.CreateMul(IV, Step);
     Value *IndVar = Builder.CreateAdd(Span, Start);
+    if (InScan) {
+      ScanInfo.IV = IndVar;
+    }
     return BodyGenCB(Builder.saveIP(), IndVar);
   };
   LocationDescription LoopLoc = ComputeIP.isSet() ? Loc.IP : Builder.saveIP();
diff --git a/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp b/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp
index 2d3d318be7ff1..251042e030f0d 100644
--- a/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp
+++ b/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp
@@ -1440,6 +1440,14 @@ TEST_F(OpenMPIRBuilderTest, CanonicalLoopSimple) {
 
   EXPECT_EQ(&Loop->getAfter()->front(), RetInst);
 }
+void createScan(llvm::Value *scanVar, OpenMPIRBuilder &OMPBuilder,
+                IRBuilder<> &Builder, OpenMPIRBuilder::LocationDescription Loc,
+                OpenMPIRBuilder::InsertPointTy &allocaIP) {
+  using InsertPointTy = OpenMPIRBuilder::InsertPointTy;
+  ASSERT_EXPECTED_INIT(InsertPointTy, retIp,
+                       OMPBuilder.createScan(Loc, allocaIP, {scanVar}, true));
+  Builder.restoreIP(retIp);
+}
 
 TEST_F(OpenMPIRBuilderTest, CanonicalLoopTripCount) {
   OpenMPIRBuilder OMPBuilder(*M);
@@ -5336,6 +5344,60 @@ TEST_F(OpenMPIRBuilderTest, CreateReductions) {
   EXPECT_TRUE(findGEPZeroOne(ReductionFn->getArg(1), FirstRHS, SecondRHS));
 }
 
+TEST_F(OpenMPIRBuilderTest, ScanReduction) {
+  using InsertPointTy = OpenMPIRBuilder::InsertPointTy;
+  OpenMPIRBuilder OMPBuilder(*M);
+  OMPBuilder.initialize();
+  IRBuilder<> Builder(BB);
+  OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL});
+  Value *TripCount = F->getArg(0);
+  Type *LCTy = TripCount->getType();
+  Value *StartVal = ConstantInt::get(LCTy, 1);
+  Value *StopVal = ConstantInt::get(LCTy, 100);
+  Value *Step = ConstantInt::get(LCTy, 1);
+  auto allocaIP = Builder.saveIP();
+
+  llvm::Value *scanVar = Builder.CreateAlloca(Builder.getFloatTy());
+  llvm::Value *origVar = Builder.CreateAlloca(Builder.getFloatTy());
+  unsigned NumBodiesGenerated = 0;
+  auto LoopBodyGenCB = [&](InsertPointTy CodeGenIP, llvm::Value *LC) {
+    NumBodiesGenerated += 1;
+    Builder.restoreIP(CodeGenIP);
+    createScan(scanVar, OMPBuilder, Builder, Loc, allocaIP);
+    return Error::success();
+  };
+  SmallVector<CanonicalLoopInfo *> Loops;
+  ASSERT_EXPECTED_INIT(SmallVector<CanonicalLoopInfo *>, loopsVec,
+                       OMPBuilder.createCanonicalScanLoops(
+                           Loc, LoopBodyGenCB, StartVal, StopVal, Step, false,
+                           false, Builder.saveIP(), "scan"));
+  Loops = loopsVec;
+  EXPECT_EQ(Loops.size(), 2U);
+  auto inputLoop = Loops.front();
+  auto scanLoop = Loops.back();
+  Builder.restoreIP(scanLoop->getAfterIP());
+  inputLoop->assertOK();
+  scanLoop->assertOK();
+
+  //// Verify control flow structure (in addition to Loop->assertOK()).
+  EXPECT_EQ(inputLoop->getPreheader()->getSinglePredecessor(),
+            &F->getEntryBlock());
+  EXPECT_EQ(scanLoop->getAfter(), Builder.GetInsertBlock());
+  EXPECT_EQ(NumBodiesGenerated, 2U);
+  SmallVector<OpenMPIRBuilder::ReductionInfo> reductionInfos = {
+      {Builder.getFloatTy(), origVar, scanVar,
+       /*EvaluationKind=*/OpenMPIRBuilder::EvalKind::Scalar, sumReduction,
+       /*ReductionGenClang=*/nullptr, sumAtomicReduction}};
+  auto FinalizeIP = scanLoop->getAfterIP();
+  OpenMPIRBuilder::LocationDescription RedLoc({inputLoop->getAfterIP(), DL});
+  llvm::BasicBlock *Cont = splitBB(Builder, false, "omp.scan.loop.cont");
+  ASSERT_EXPECTED_INIT(
+      InsertPointTy, retIp,
+      OMPBuilder.emitScanReduction(RedLoc, FinalizeIP, reductionInfos));
+  Builder.restoreIP(retIp);
+  Builder.CreateBr(Cont);
+}
+
 TEST_F(OpenMPIRBuilderTest, CreateTwoReductions) {
   using InsertPointTy = OpenMPIRBuilder::InsertPointTy;
   OpenMPIRBuilder OMPBuilder(*M);
diff --git a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
index 8d1cc9b10a950..bfe6faa5f1bd4 100644
--- a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
+++ b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
@@ -86,7 +86,9 @@ class OpenMPLoopInfoStackFrame
     : public LLVM::ModuleTranslation::StackFrameBase<OpenMPLoopInfoStackFrame> {
 public:
   MLIR_DEFINE_EXPLICIT_INTERNAL_INLINE_TYPE_ID(OpenMPLoopInfoStackFrame)
-  llvm::CanonicalLoopInfo *loopInfo = nullptr;
+  // For constructs like scan, one Loop info frame can contain multiple
+  // Canonical Loops
+  SmallVector<llvm::CanonicalLoopInfo *> loopInfos;
 };
 
 /// Custom error class to signal translation errors that don't need reporting,
@@ -169,6 +171,10 @@ static LogicalResult checkImplementationStatus(Operation &op) {
     if (op.getDistScheduleChunkSize())
       result = todo("dist_schedule with chunk_size");
   };
+  auto checkExclusive = [&todo](auto op, LogicalResult &result) {
+    if (!op.getExclusiveVars().empty())
+      result = todo("exclusive");
+  };
   auto checkHint = [](auto op, LogicalResult &) {
     if (op.getHint())
       op.emitWarning("hint clause discarded");
@@ -232,8 +238,8 @@ static LogicalResult checkImplementationStatus(Operation &op) {
           op.getReductionSyms())
         result = todo("reduction");
     if (op.getReductionMod() &&
-        op.getReductionMod().value() != omp::ReductionModifier::defaultmod)
-      result = todo("reduction with modifier");
+        op.getReductionMod().value() == omp::ReductionModifier::task)
+      result = todo("reduction with task modifier");
   };
   auto checkTaskReduction = [&todo](auto op, LogicalResult &result) {
     if (!op.getTaskReductionVars().empty() || op.getTaskReductionByref() ||
@@ -253,6 +259,7 @@ static LogicalResult checkImplementationStatus(Operation &op) {
         checkOrder(op, result);
       })
       .Case([&](omp::OrderedRegionOp op) { checkParLevelSimd(op, result); })
+      .Case([&](omp::ScanOp op) { checkExclusive(op, result); })
       .Case([&](omp::SectionsOp op) {
         checkAllocate(op, result);
         checkPrivate(op, result);
@@ -382,15 +389,15 @@ findAllocaInsertPoint(llvm::IRBuilderBase &builder,
 /// Find the loop information structure for the loop nest being translated. It
 /// will return a `null` value unless called from the translation function for
 /// a loop wrapper operation after successfully translating its body.
-static llvm::CanonicalLoopInfo *
-findCurrentLoopInfo(LLVM::ModuleTranslation &moduleTranslation) {
-  llvm::CanonicalLoopInfo *loopInfo = nullptr;
+static SmallVector<llvm::CanonicalLoopInfo *>
+findCurrentLoopInfos(LLVM::ModuleTranslation &moduleTranslation) {
+  SmallVector<llvm::CanonicalLoopInfo *> loopInfos;
   moduleTranslation.stackWalk<OpenMPLoopInfoStackFrame>(
       [&](OpenMPLoopInfoStackFrame &frame) {
-        loopInfo = frame.loopInfo;
+        loopInfos = frame.loopInfos;
         return WalkResult::interrupt();
       });
-  return loopInfo;
+  return loopInfos;
 }
 
 /// Converts the given region that appears within an OpenMP dialect operation to
@@ -2342,27 +2349,62 @@ convertOmpWsloop(Operation &opInst, llvm::IRBuilderBase &builder,
   if (failed(handleError(regionBlock, opInst)))
     return failure();
 
-  builder.SetInsertPoint(*regionBlock, (*regionBlock)->begin());
-  llvm::CanonicalLoopInfo *loopInfo = findCurrentLoopInfo(moduleTranslation);
-
-  llvm::OpenMPIRBuilder::InsertPointOrErrorTy wsloopIP =
-      ompBuilder->applyWorkshareLoop(
-          ompLoc.DL, loopInfo, allocaIP, loopNeedsBarrier,
-          convertToScheduleKind(schedule), chunk, isSimd,
-          scheduleMod == omp::ScheduleModifier::monotonic,
-          scheduleMod == omp::ScheduleModifier::nonmonotonic, isOrdered,
-          workshareLoopType);
-
-  if (failed(handleError(wsloopIP, opInst)))
-    return failure();
-
-  // Process the reductions if required.
-  if (failed(createReductionsAndCleanup(
-          wsloopOp, builder, moduleTranslation, allocaIP, reductionDecls,
-          privateReductionVariables, isByRef, wsloopOp.getNowait(),
-          /*isTeamsReduction=*/false)))
-    return failure();
+  SmallVector<llvm::CanonicalLoopInfo *> loopInfos =
+      findCurrentLoopInfos(moduleTranslation);
+  auto inputLoopFinishIp = loopInfos.front()->getAfterIP();
+  auto scanLoopFinishIp = loopInfos.back()->getAfterIP();
+  bool isInScanRegion =
+      wsloopOp.getReductionMod() && (wsloopOp.getReductionMod().value() ==
+                                     mlir::omp::ReductionModifier::inscan);
+  if (isInScanRegion) {
+    builder.restoreIP(inputLoopFinishIp);
+    SmallVector<OwningReductionGen> owningReductionGens;
+    SmallVector<OwningAtomicReductionGen> owningAtomicReductionGens;
+    SmallVector<llvm::OpenMPIRBuilder::ReductionInfo> reductionInfos;
+    collectReductionInfo(wsloopOp, builder, moduleTranslation, reductionDecls,
+                         owningReductionGens, owningAtomicReductionGens,
+                         privateReductionVariables, reductionInfos);
+    llvm::BasicBlock *cont = splitBB(builder, false, "omp.scan.loop.cont");
+    llvm::OpenMPIRBuilder::InsertPointOrErrorTy redIP =
+        ompBuilder->emitScanReduction(builder.saveIP(), scanLoopFinishIp,
+                                      reductionInfos);
+    if (failed(handleError(redIP, opInst)))
+      return failure();
 
+    builder.restoreIP(*redIP);
+    builder.CreateBr(cont);
+  }
+  for (llvm::CanonicalLoopInfo *loopInfo : loopInfos) {
+    llvm::OpenMPIRBuilder::InsertPointOrErrorTy wsloopIP =
+        ompBuilder->applyWorkshareLoop(
+            ompLoc.DL, loopInfo, allocaIP, loopNeedsBarrier,
+            convertToScheduleKind(schedule), chunk, isSimd,
+            scheduleMod == omp::ScheduleModifier::monotonic,
+            scheduleMod == omp::ScheduleModifier::nonmonotonic, isOrdered,
+            workshareLoopType);
+
+    if (failed(handleError(wsloopIP, opInst)))
+      return failure();
+  }
+  builder.SetInsertPoint(*regionBlock, (*regionBlock)->begin());
+  if (isInScanRegion) {
+    SmallVector<Region *> reductionRegions;
+    llvm::transform(reductionDecls, std::back_inserter(reductionRegions),
+                    [](omp::DeclareReductionOp reductionDecl) {
+                      return &reductionDecl.getCleanupRegion();
+                    });
+    if (failed(inlineOmpRegionCleanup(
+            reductionRegions, privateReductionVariables, moduleTranslation,
+            builder, "omp.reduction.cleanup")))
+      return failure();
+  } else {
+    // Process the reductions if required.
+    if (failed(createReductionsAndCleanup(
+            wsloopOp, builder, moduleTranslation, allocaIP, reductionDecls,
+            privateReductionVariables, isByRef, wsloopOp.getNowait(),
+            /*isTeamsReduction=*/false)))
+      return failure();
+  }
   return cleanupPrivateVars(builder, moduleTranslation, wsloopOp.getLoc(),
                             privateVarsInfo.llvmVars,
                             privateVarsInfo.privatizers);
@@ -2553,6 +2595,60 @@ convertOrderKind(std::optional<omp::ClauseOrderKind> o) {
   llvm_unreachable("Unknown ClauseOrderKind kind");
 }
 
+static LogicalResult
+convertOmpScan(Operation &opInst, llvm::IRBuilderBase &builder,
+               LLVM::ModuleTranslation &moduleTranslation) {
+  if (failed(checkImplementationStatus(opInst)))
+    return failure();
+  auto scanOp = cast<omp::ScanOp>(opInst);
+  bool isInclusive = scanOp.hasInclusiveVars();
+  SmallVector<llvm::Value *> llvmScanVars;
+  mlir::OperandRange mlirScanVars = scanOp.getInclusiveVars();
+  if (!isInclusive)
+    mlirScanVars = scanOp.getExclusiveVars();
+  for (auto val : mlirScanVars) {
+    llvm::Value *llvmVal = moduleTranslation.lookupValue(val);
+
+    llvmScanVars.push_back(llvmVal);
+  }
+  llvm::OpenMPIRBuilder::InsertPointTy allocaIP =
+      findAllocaInsertPoint(builder, moduleTranslation);
+  llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
+  llvm::OpenMPIRBuilder::InsertPointOrErrorTy afterIP =
+      moduleTranslation.getOpenMPBuilder()->createScan(
+          ompLoc, allocaIP, llvmScanVars, isInclusive);
+  if (failed(handleError(afterIP, opInst)))
+    return failure();
+
+  builder.restoreIP(*afterIP);
+
+  // TODO: The argument of LoopnestOp is stored into the index variable and this
+  // variable is used
+  //  across scan operation. However that makes the mlir
+  //  invalid.(`Intra-iteration dependences from a statement in the structured
+  //  block sequence that precede a scan directive to a statement in the
+  //  structured block sequence that follows a scan directive must not exist,
+  //  except for dependences for the list items specified in an inclusive or
+  //  exclusive clause.`). The argument of LoopNestOp need to be loaded again
+  //  after ScanOp again so mlir generated is valid.
+  auto parentOp = scanOp->getParentOp();
+  auto loopOp = cast<omp::LoopNestOp>(parentOp);
+  if (loopOp) {
+    auto &firstBlock = *(scanOp->getParentRegion()->getBlocks()).begin();
+    auto &ins = *(firstBlock.begin());
+    if (isa<LLVM::StoreOp>(ins)) {
+      LLVM::StoreOp storeOp = dyn_cast<LLVM::StoreOp>(ins);
+      auto src = moduleTranslation.lookupValue(storeOp->getOperand(0));
+      if (src == moduleTranslation.lookupValue(
+                     (loopOp.getRegion().getArguments())[0])) {
+        auto dest = moduleTranslation.lookupValue(storeOp->getOperand(1));
+        builder.CreateStore(src, dest);
+      }
+    }
+  }
+  return success();
+}
+
 /// Converts an OpenMP simd loop into LLVM IR using OpenMPIRBuilder.
 static LogicalResult
 convertOmpSimd(Operation &opInst, llvm::IRBuilderBase &builder,
@@ -2626,12 +2722,15 @@ convertOmpSimd(Operation &opInst, llvm::IRBuilderBase &builder,
     return failure();
 
   builder.SetInsertPoint(*regionBlock, (*regionBlock)->begin());
-  llvm::CanonicalLoopInfo *loopInfo = findCurrentLoopInfo(moduleTranslation);
-  ompBuilder->applySimd(loopInfo, alignedVars,
-                        simdOp.getIfExpr()
-                            ? moduleTranslation.lookupValue(simdOp.getIfExpr())
-                            : nullptr,
-                        order, simdlen, safelen);
+  SmallVector<llvm::CanonicalLoopInfo *> loopInfos =
+      findCurrentLoopInfos(moduleTranslation);
+  for (llvm::CanonicalLoopInfo *loopInfo : loopInfos) {
+    ompBuilder->applySimd(
+        loopInfo, alignedVars,
+        simdOp.getIfExpr() ? moduleTranslation.lookupValue(simdOp.getIfExpr())
+                           : nullptr,
+        order, simdlen, safelen);
+  }
 
   return cleanupPrivateVars(builder, moduleTranslation, simdOp.getLoc(),
                             privateVarsInfo.llvmVars,
@@ -2698,16 +2797,53 @@ convertOmpLoopNest(Operation &opInst, llvm::IRBuilderBase &builder,
                                                        ompLoc.DL);
       computeIP = loopInfos.front()->getPreheaderIP();
     }
+    if (auto wsloopOp = loopOp->getParentOfType<omp::WsloopOp>()) {
+      bool isInScanRegion =
+          wsloopOp.getReductionMod() && (wsloopOp.getReductionMod().value() ==
+                                         mlir::omp::ReductionModifier::inscan);
+      if (isInScanRegion) {
+        //TODO: Handle nesting if Scan loop is nested in a loop
+        assert(loopOp.getNumLoops() == 1);
+        llvm::Expected<SmallVector<llvm::CanonicalLoopInfo *>> loopResults =
+            ompBuilder->createCanonicalScanLoops(
+                loc, bodyGen, lowerBound, upperBound, step,
+                /*IsSigned=*/true, loopOp.getLoopInclusive(), computeIP,
+                "loop");
+
+        if (failed(handleError(loopResults, *loopOp)))
+          return failure();
+        auto inputLoop = loopResults->front();
+        auto scanLoop = loopResults->back();
+        moduleTranslation.stackWalk<OpenMPLoopInfoStackFrame>(
+            [&](OpenMPLoopInfoStackFrame &frame) {
+              frame.loopInfos.push_back(inputLoop);
+              frame.loopInfos.push_back(scanLoop);
+              return WalkResult::interrupt();
+            });
+        builder.restoreIP(scanLoop->getAfterIP());
+        return success();
+      } else {
+        llvm::Expected<llvm::CanonicalLoopInfo *> loopResult =
+            ompBuilder->createCanonicalLoop(
+                loc, bodyGen, lowerBound, upperBound, step,
+                /*IsSigned=*/true, loopOp.getLoopInclusive(), computeIP);
 
-    llvm::Expected<llvm::CanonicalLoopInfo *> loopResult =
-        ompBuilder->createCanonicalLoop(
-            loc, bodyGen, lowerBound, upperBound, step,
-            /*IsSigned=*/true, loopOp.getLoopInclusive(), computeIP);
+        if (failed(handleError(loopResult, *loopOp)))
+          return failure();
 
-    if (failed(handleError(loopResult, *loopOp)))
-      return failure();
+        loopInfos.push_back(*loopResult);
+      }
+    } else {
+      llvm::Expected<llvm::CanonicalLoopInfo *> loopResult =
+          ompBuilder->createCanonicalLoop(
+              loc, bodyGen, lowerBound, upperBound, step,
+              /*IsSigned=*/true, loopOp.getLoopInclusive(), computeIP);
+
+      if (failed(handleError(loopResult, *loopOp)))
+        return failure();
 
-    loopInfos.push_back(*loopResult);
+      loopInfos.push_back(*loopResult);
+    }
   }
 
   // Collapse loops. Store the insertion point because LoopInfos may get
@@ -2719,7 +2855,8 @@ convertOmpLoopNest(Operation &opInst, llvm::IRBuilderBase &builder,
   // after applying transformations.
   moduleTranslation.stackWalk<OpenMPLoopInfoStackFrame>(
       [&](OpenMPLoopInfoStackFrame &frame) {
-        frame.loopInfo = ompBuilder->collapseLoops(ompLoc.DL, loopInfos, {});
+        frame.loopInfos.push_back(
+            ompBuilder->collapseLoops(ompLoc.DL, loopInfos, {}));
         return WalkResult::interrupt();
       });
 
@@ -4329,18 +4466,20 @@ convertOmpDistribute(Operation &opInst, llvm::IRBuilderBase &builder,
       bool loopNeedsBarrier = false;
       llvm::Value *chunk = nullptr;
 
-      llvm::CanonicalLoopInfo *loopInfo =
-          findCurrentLoopInfo(moduleTranslation);
-      llvm::OpenMPIRBuilder::InsertPointOrErrorTy wsloopIP =
-          ompBuilder->applyWorkshareLoop(
-              ompLoc.DL, loopInfo, allocaIP, loopNeedsBarrier,
-              convertToScheduleKind(schedule), chunk, isSimd,
-              scheduleMod == omp::ScheduleModifier::monotonic,
-              scheduleMod == omp::ScheduleModifier::nonmonotonic, isOrdered,
-              workshareLoopType);
-
-      if (!wsloopIP)
-        return wsloopIP.takeError();
+      SmallVector<llvm::CanonicalLoopInfo *> loopInfos =
+          findCurrentLoopInfos(moduleTranslation);
+      for (llvm::CanonicalLoopInfo *loopInfo : loopInfos) {
+        llvm::OpenMPIRBuilder::InsertPointOrErrorTy wsloopIP =
+            ompBuilder->applyWorkshareLoop(
+                ompLoc.DL, loopInfo, allocaIP, loopNeedsBarrier,
+                convertToScheduleKind(schedule), chunk, isSimd,
+                scheduleMod == omp::ScheduleModifier::monotonic,
+                scheduleMod == omp::ScheduleModifier::nonmonotonic, isOrdered,
+                workshareLoopType);
+
+        if (!wsloopIP)
+          return wsloopIP.takeError();
+      }
     }
 
     if (failed(cleanupPrivateVars(builder, moduleTranslation,
@@ -5370,6 +5509,9 @@ convertHostOrTargetOperation(Operation *op, llvm::IRBuilderBase &builder,
           .Case([&](omp::WsloopOp) {
             return convertOmpWsloop(*op, builder, moduleTranslation);
           })
+          .Case([&](omp::ScanOp) {
+            return convertOmpScan(*op, builder, moduleTranslation);
+          })
           .Case([&](omp::SimdOp) {
             return convertOmpSimd(*op, builder, moduleTranslation);
           })
diff --git a/mlir/test/Target/LLVMIR/openmp-reduction-scan.mlir b/mlir/test/Target/LLVMIR/openmp-reduction-scan.mlir
new file mode 100644
index 0000000000000..a88c1993aebe1
--- /dev/null
+++ b/mlir/test/Target/LLVMIR/openmp-reduction-scan.mlir
@@ -0,0 +1,120 @@
+// RUN: mlir-translate -mlir-to-llvmir %s | FileCheck %s
+
+omp.declare_reduction @add_reduction_i32 : i32 init {
+^bb0(%arg0: i32):
+  %0 = llvm.mlir.constant(0 : i32) : i32
+  omp.yield(%0 : i32)
+} combiner {
+^bb0(%arg0: i32, %arg1: i32):
+  %0 = llvm.add %arg0, %arg1 : i32
+  omp.yield(%0 : i32)
+}
+// CHECK-LABEL: @scan_reduction
+llvm.func @scan_reduction() {
+  %0 = llvm.mlir.constant(1 : i64) : i64
+  %1 = llvm.alloca %0 x i32 {bindc_name = "z"} : (i64) -> !llvm.ptr
+  %2 = llvm.mlir.constant(1 : i64) : i64
+  %3 = llvm.alloca %2 x i32 {bindc_name = "y"} : (i64) -> !llvm.ptr
+  %4 = llvm.mlir.constant(1 : i64) : i64
+  %5 = llvm.alloca %4 x i32 {bindc_name = "x"} : (i64) -> !llvm.ptr
+  %6 = llvm.mlir.constant(1 : i64) : i64
+  %7 = llvm.alloca %6 x i32 {bindc_name = "k"} : (i64) -> !llvm.ptr
+  %8 = llvm.mlir.constant(0 : index) : i64
+  %9 = llvm.mlir.constant(1 : index) : i64
+  %10 = llvm.mlir.constant(100 : i32) : i32
+  %11 = llvm.mlir.constant(1 : i32) : i32
+  %12 = llvm.mlir.constant(0 : i32) : i32
+  %13 = llvm.mlir.constant(100 : index) : i64
+  %14 = llvm.mlir.addressof @_QFEa : !llvm.ptr
+  %15 = llvm.mlir.addressof @_QFEb : !llvm.ptr
+  omp.parallel {
+    %37 = llvm.mlir.constant(1 : i64) : i64
+    %38 = llvm.alloca %37 x i32 {bindc_name = "k", pinned} : (i64) -> !llvm.ptr
+    %39 = llvm.mlir.constant(1 : i64) : i64
+    omp.wsloop reduction(mod: inscan, @add_reduction_i32 %5 -> %arg0 : !llvm.ptr) {
+      omp.loop_nest (%arg1) : i32 = (%11) to (%10) inclusive step (%11) {
+        llvm.store %arg1, %38 : i32, !llvm.ptr
+        %40 = llvm.load %arg0 : !llvm.ptr -> i32
+        %41 = llvm.load %38 : !llvm.ptr -> i32
+        %42 = llvm.sext %41 : i32 to i64
+        %50 = llvm.getelementptr %14[%42] : (!llvm.ptr, i64) -> !llvm.ptr, i32
+        %51 = llvm.load %50 : !llvm.ptr -> i32
+        %52 = llvm.add %40, %51 : i32
+        llvm.store %52, %arg0 : i32, !llvm.ptr
+        omp.scan inclusive(%arg0 : !llvm.ptr)
+        %53 = llvm.load %arg0 : !llvm.ptr -> i32
+        %54 = llvm.load %38 : !llvm.ptr -> i32
+        %55 = llvm.sext %54 : i32 to i64
+        %63 = llvm.getelementptr %15[%55] : (!llvm.ptr, i64) -> !llvm.ptr, i32
+        llvm.store %53, %63 : i32, !llvm.ptr
+        omp.yield
+      }
+    }
+    omp.terminator
+  }
+  llvm.return
+}
+llvm.mlir.global internal @_QFEa() {addr_space = 0 : i32} : !llvm.array<100 x i32> {
+  %0 = llvm.mlir.zero : !llvm.array<100 x i32>
+  llvm.return %0 : !llvm.array<100 x i32>
+}
+llvm.mlir.global internal @_QFEb() {addr_space = 0 : i32} : !llvm.array<100 x i32> {
+  %0 = llvm.mlir.zero : !llvm.array<100 x i32>
+  llvm.return %0 : !llvm.array<100 x i32>
+}
+llvm.mlir.global internal constant @_QFECn() {addr_space = 0 : i32} : i32 {
+  %0 = llvm.mlir.constant(100 : i32) : i32
+  llvm.return %0 : i32
+}
+//CHECK: %[[BUFF:.+]] = alloca i32, i32 100, align 4
+//CHECK: omp_loop.preheader{{.*}}:                              ; preds = %omp.wsloop.region
+//CHECK: omp_loop.after:                                   ; preds = %omp_loop.exit
+//CHECK:   %[[LOG:.+]] = call double @llvm.log2.f64(double 1.000000e+02) #0
+//CHECK:   %[[CEIL:.+]] = call double @llvm.ceil.f64(double %[[LOG]]) #0
+//CHECK:   %[[UB:.+]] = fptoui double %[[CEIL]] to i32
+//CHECK:   br label %omp.outer.log.scan.body
+//CHECK: omp.outer.log.scan.body:                          ; preds = %omp.inner.log.scan.exit, %omp_loop.after
+//CHECK:   %[[K:.+]] = phi i32 [ 0, %omp_loop.after ], [ %[[NEXTK:.+]], %omp.inner.log.scan.exit ]
+//CHECK:   %[[I:.+]] = phi i32 [ 1, %omp_loop.after ], [ %[[NEXTI:.+]], %omp.inner.log.scan.exit ]
+//CHECK:   %[[CMP1:.+]] = icmp uge i32 99, %[[I]]
+//CHECK:   br i1 %[[CMP1]], label %omp.inner.log.scan.body, label %omp.inner.log.scan.exit
+//CHECK: omp.inner.log.scan.exit:                          ; preds = %omp.inner.log.scan.body, %omp.outer.log.scan.body
+//CHECK:   %[[NEXTK]] = add nuw i32 %[[K]], 1
+//CHECK:   %[[NEXTI]] = shl nuw i32 %[[I]], 1
+//CHECK:   %[[CMP2:.+]] = icmp ne i32 %[[NEXTK]], %[[UB]]
+//CHECK:   br i1 %[[CMP2]], label %omp.outer.log.scan.body, label %omp.outer.log.scan.exit
+//CHECK: omp.outer.log.scan.exit:                          ; preds = %omp.inner.log.scan.exit
+//CHECK:   call void @__kmpc_barrier{{.*}}
+//CHECK:   br label %omp.scan.loop.cont
+//CHECK: omp.scan.loop.cont:                               ; preds = %omp.outer.log.scan.exit
+//CHECK:   br label %omp_loop.preheader{{.*}}
+//CHECK: omp_loop.after{{.*}}:                                 ; preds = %omp_loop.exit{{.*}}
+//CHECK:  %[[ARRLAST:.+]] = getelementptr inbounds i32, ptr %[[BUFF]], i32 100
+//CHECK:  %[[RES:.+]] = load i32, ptr %[[ARRLAST]], align 4
+//CHECK:  store i32 %[[RES]], ptr %loadgep{{.*}}, align 4
+//CHECK: omp.inscan.dispatch{{.*}}:                            ; preds = %omp_loop.body{{.*}}
+//CHECK:   store i32 0, ptr %[[REDPRIV:.+]], align 4
+//CHECK:   %[[arrayOffset1:.+]] = getelementptr inbounds i32, ptr %[[BUFF]], i32 %{{.*}}
+//CHECK:   %[[BUFFVAL1:.+]] = load i32, ptr %[[arrayOffset1]], align 4
+//CHECK:   store i32 %[[BUFFVAL1]], ptr %[[REDPRIV]], align 4
+//CHECK: omp.inner.log.scan.body:                          ; preds = %omp.inner.log.scan.body, %omp.outer.log.scan.body
+//CHECK:   %[[CNT:.+]] = phi i32 [ 99, %omp.outer.log.scan.body ], [ %[[CNTNXT:.+]], %omp.inner.log.scan.body ]
+//CHECK:   %[[IND1:.+]] = add i32 %[[CNT]], 1
+//CHECK:   %[[IND1PTR:.+]] = getelementptr inbounds i32, ptr %[[BUFF]], i32 %[[IND1]]
+//CHECK:   %[[IND2:.+]] = sub nuw i32 %[[IND1]], %[[I]]
+//CHECK:   %[[IND2PTR:.+]] = getelementptr inbounds i32, ptr %[[BUFF]], i32 %[[IND2]]
+//CHECK:   %[[IND1VAL:.+]] = load i32, ptr %[[IND1PTR]], align 4
+//CHECK:   %[[IND2VAL:.+]] = load i32, ptr %[[IND2PTR]], align 4
+//CHECK:   %[[REDVAL:.+]] = add i32 %[[IND1VAL]], %[[IND2VAL]]
+//CHECK:   store i32 %[[REDVAL]], ptr %[[IND1PTR]], align 4
+//CHECK:   %[[CNTNXT]] = sub nuw i32 %[[CNT]], 1
+//CHECK:   %[[CMP3:.+]] = icmp uge i32 %[[CNTNXT]], %[[I]]
+//CHECK:   br i1 %[[CMP3]], label %omp.inner.log.scan.body, label %omp.inner.log.scan.exit
+//CHECK: omp.inscan.dispatch:                              ; preds = %omp_loop.body
+//CHECK:   store i32 0, ptr %[[REDPRIV]], align 4
+//CHECK:   br i1 true, label %omp.before.scan.bb, label %omp.after.scan.bb
+//CHECK: omp.loop_nest.region:                             ; preds = %omp.before.scan.bb
+//CHECK:   %[[ARRAYOFFSET2:.+]] = getelementptr inbounds i32, ptr %[[BUFF]], i32 %{{.*}}
+//CHECK:   %[[REDPRIVVAL:.+]] = load i32, ptr %[[REDPRIV]], align 4
+//CHECK:   store i32 %[[REDPRIVVAL]], ptr %[[ARRAYOFFSET2]], align 4
+//CHECK:   br label %omp.scan.loop.exit
diff --git a/mlir/test/Target/LLVMIR/openmp-todo.mlir b/mlir/test/Target/LLVMIR/openmp-todo.mlir
index 7eafe396082e4..7b8e8b509d72b 100644
--- a/mlir/test/Target/LLVMIR/openmp-todo.mlir
+++ b/mlir/test/Target/LLVMIR/openmp-todo.mlir
@@ -212,37 +212,6 @@ llvm.func @simd_reduction(%lb : i32, %ub : i32, %step : i32, %x : !llvm.ptr) {
 
 // -----
 
-omp.declare_reduction @add_f32 : f32
-init {
-^bb0(%arg: f32):
-  %0 = llvm.mlir.constant(0.0 : f32) : f32
-  omp.yield (%0 : f32)
-}
-combiner {
-^bb1(%arg0: f32, %arg1: f32):
-  %1 = llvm.fadd %arg0, %arg1 : f32
-  omp.yield (%1 : f32)
-}
-atomic {
-^bb2(%arg2: !llvm.ptr, %arg3: !llvm.ptr):
-  %2 = llvm.load %arg3 : !llvm.ptr -> f32
-  llvm.atomicrmw fadd %arg2, %2 monotonic : !llvm.ptr, f32
-  omp.yield
-}
-llvm.func @scan_reduction(%lb : i32, %ub : i32, %step : i32, %x : !llvm.ptr) {
-  // expected-error at below {{not yet implemented: Unhandled clause reduction with modifier in omp.wsloop operation}}
-  // expected-error at below {{LLVM Translation failed for operation: omp.wsloop}}
-  omp.wsloop reduction(mod:inscan, @add_f32 %x -> %prv : !llvm.ptr) {
-    omp.loop_nest (%iv) : i32 = (%lb) to (%ub) step (%step) {
-      omp.scan inclusive(%prv : !llvm.ptr)
-      omp.yield
-    }
-  }
-  llvm.return
-}
-
-// -----
-
 llvm.func @single_allocate(%x : !llvm.ptr) {
   // expected-error at below {{not yet implemented: Unhandled clause allocate in omp.single operation}}
   // expected-error at below {{LLVM Translation failed for operation: omp.single}}

>From 3072fbc83eb52cd4a94dda102005e56d739a7736 Mon Sep 17 00:00:00 2001
From: Anchu Rajendran <asudhaku at amd.com>
Date: Mon, 7 Apr 2025 01:19:56 -0500
Subject: [PATCH 2/6] new changes

---
 .../llvm/Frontend/OpenMP/OMPIRBuilder.h       |  4 +-
 llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp     | 48 +++++++++----------
 .../Frontend/OpenMPIRBuilderTest.cpp          | 13 +++--
 .../OpenMP/OpenMPToLLVMIRTranslation.cpp      | 32 ++++++++-----
 4 files changed, 54 insertions(+), 43 deletions(-)

diff --git a/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h b/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h
index 0e6bdb14e1b94..37bf654f402d8 100644
--- a/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h
+++ b/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h
@@ -1602,7 +1602,8 @@ class OpenMPIRBuilder {
 
   /// Creates the buffer needed for scan reduction.
   /// \param ScanVars Scan Variables.
-  void emitScanBasedDirectiveDeclsIR(ArrayRef<llvm::Value *> ScanVars);
+  void emitScanBasedDirectiveDeclsIR(ArrayRef<llvm::Value *> ScanVars,
+                                     ArrayRef<llvm::Type *> ScanVarsType);
 
   /// Copies the result back to the reduction variable.
   /// \param ReductionInfos Array type containing the ReductionOps.
@@ -2718,6 +2719,7 @@ class OpenMPIRBuilder {
   InsertPointOrErrorTy createScan(const LocationDescription &Loc,
                                   InsertPointTy AllocaIP,
                                   ArrayRef<llvm::Value *> ScanVars,
+                                  ArrayRef<llvm::Type *> ScanVarsType,
                                   bool IsInclusive);
   /// Generator for '#omp critical'
   ///
diff --git a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
index 2f10a52538580..8fd60606df938 100644
--- a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
+++ b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
@@ -4002,10 +4002,11 @@ OpenMPIRBuilder::emitNoUnwindRuntimeCall(llvm::FunctionCallee Callee,
 // inclusive scans now.
 OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::createScan(
     const LocationDescription &Loc, InsertPointTy AllocaIP,
-    ArrayRef<llvm::Value *> ScanVars, bool IsInclusive) {
+    ArrayRef<llvm::Value *> ScanVars, ArrayRef<llvm::Type *> ScanVarsType,
+    bool IsInclusive) {
   if (ScanInfo.OMPFirstScanLoop) {
     Builder.restoreIP(AllocaIP);
-    emitScanBasedDirectiveDeclsIR(ScanVars);
+    emitScanBasedDirectiveDeclsIR(ScanVars, ScanVarsType);
   }
   if (!updateToLocation(Loc))
     return Loc.IP;
@@ -4014,11 +4015,11 @@ OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::createScan(
 
   if (ScanInfo.OMPFirstScanLoop) {
     // Emit buffer[i] = red; at the end of the input phase.
-    for (Value *ScanVar : ScanVars) {
-      Value *Buff = ScanInfo.ReductionVarToScanBuffs[ScanVar];
-      Type *DestTy = Builder.getInt32Ty(); // ScanVars[i]->getType();
+    for (int i = 0; i < ScanVars.size(); i++) {
+      Value *Buff = ScanInfo.ReductionVarToScanBuffs[ScanVars[i]];
+      Type *DestTy = ScanVarsType[i];
       Value *Val = Builder.CreateInBoundsGEP(DestTy, Buff, IV, "arrayOffset");
-      Value *Src = Builder.CreateLoad(DestTy, ScanVar);
+      Value *Src = Builder.CreateLoad(DestTy, ScanVars[i]);
       Value *Dest = Builder.CreatePointerBitCastOrAddrSpaceCast(
           Val, DestTy->getPointerTo(defaultAS));
 
@@ -4031,10 +4032,10 @@ OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::createScan(
   // Initialize the private reduction variable to 0 in each iteration.
   // It is used to copy intial values to scan buffer.
   ConstantInt *Zero = ConstantInt::get(Builder.getInt32Ty(), 0);
-  for (Value *ScanVar : ScanVars) {
-    Type *DestTy = Builder.getInt32Ty(); // ScanVars[i]->getType();
+  for (int i = 0; i < ScanVars.size(); i++) {
+    Type *DestTy = ScanVarsType[i];
     Value *Dest = Builder.CreatePointerBitCastOrAddrSpaceCast(
-        ScanVar, DestTy->getPointerTo(defaultAS));
+        ScanVars[i], DestTy->getPointerTo(defaultAS));
     Builder.CreateStore(Zero, Dest);
   }
 
@@ -4042,14 +4043,14 @@ OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::createScan(
     IV = ScanInfo.IV;
     // Emit red = buffer[i]; at the entrance to the scan phase.
     // TODO: if exclusive scan, the red = buffer[i-1] needs to be updated.
-    for (Value *ScanVar : ScanVars) {
-      Value *Buff = ScanInfo.ReductionVarToScanBuffs[ScanVar];
-      Type *DestTy = Builder.getInt32Ty(); // ScanVars[i]->getType();
+    for (int i = 0; i < ScanVars.size(); i++) {
+      Value *Buff = ScanInfo.ReductionVarToScanBuffs[ScanVars[i]];
+      Type *DestTy = ScanVarsType[i];
       Value *SrcPtr =
           Builder.CreateInBoundsGEP(DestTy, Buff, IV, "arrayOffset");
       Value *Src = Builder.CreateLoad(DestTy, SrcPtr);
       Value *Dest = Builder.CreatePointerBitCastOrAddrSpaceCast(
-          ScanVar, DestTy->getPointerTo(defaultAS));
+          ScanVars[i], DestTy->getPointerTo(defaultAS));
 
       Builder.CreateStore(Src, Dest);
     }
@@ -4070,21 +4071,17 @@ OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::createScan(
 }
 
 void OpenMPIRBuilder::emitScanBasedDirectiveDeclsIR(
-    ArrayRef<Value *> ScanVars) {
+    ArrayRef<Value *> ScanVars, ArrayRef<Type *> ScanVarsType) {
 
   Value *AllocSpan = Builder.CreateAdd(ScanInfo.Span, Builder.getInt32(1));
-  for (Value *ScanVar : ScanVars) {
-    llvm::Value *Buff =
-        Builder.CreateAlloca(Builder.getInt32Ty(), AllocSpan, "vla");
-    ScanInfo.ReductionVarToScanBuffs[ScanVar] = Buff;
+  for (int i = 0; i < ScanVars.size(); i++) {
+    llvm::Value *Buff = Builder.CreateAlloca(ScanVarsType[i], AllocSpan, "vla");
+    ScanInfo.ReductionVarToScanBuffs[ScanVars[i]] = Buff;
   }
 }
 
 void OpenMPIRBuilder::emitScanBasedDirectiveFinalsIR(
     SmallVector<ReductionInfo> ReductionInfos) {
-  llvm::Value *OMPLast = Builder.CreateNSWAdd(
-      ScanInfo.Span,
-      llvm::ConstantInt::get(ScanInfo.Span->getType(), 1, /*isSigned=*/false));
   unsigned int DefaultAS = M.getDataLayout().getProgramAddressSpace();
   for (ReductionInfo RedInfo : ReductionInfos) {
     Value *PrivateVar = RedInfo.PrivateVariable;
@@ -4092,7 +4089,8 @@ void OpenMPIRBuilder::emitScanBasedDirectiveFinalsIR(
     Value *Buff = ScanInfo.ReductionVarToScanBuffs[PrivateVar];
 
     Type *SrcTy = RedInfo.ElementType;
-    Value *Val = Builder.CreateInBoundsGEP(SrcTy, Buff, OMPLast, "arrayOffset");
+    Value *Val =
+        Builder.CreateInBoundsGEP(SrcTy, Buff, ScanInfo.Span, "arrayOffset");
     Value *Src = Builder.CreateLoad(SrcTy, Val);
     Value *Dest = Builder.CreatePointerBitCastOrAddrSpaceCast(
         OrigVar, SrcTy->getPointerTo(DefaultAS));
@@ -4120,7 +4118,7 @@ OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::emitScanReduction(
       (llvm::Intrinsic::ID)llvm::Intrinsic::log2, Builder.getDoubleTy());
   llvm::BasicBlock *InputBB = Builder.GetInsertBlock();
   ConstantInt *One = ConstantInt::get(Builder.getInt32Ty(), 1);
-  llvm::Value *span = Builder.CreateAdd(spanDiff, One);
+  llvm::Value *span = ScanInfo.Span; // Builder.CreateAdd(spanDiff, One);
   llvm::Value *Arg = Builder.CreateUIToFP(span, Builder.getDoubleTy());
   llvm::Value *LogVal = emitNoUnwindRuntimeCall(F, Arg, "");
   F = llvm::Intrinsic::getOrInsertDeclaration(
@@ -5456,7 +5454,7 @@ OpenMPIRBuilder::tileLoops(DebugLoc DL, ArrayRef<CanonicalLoopInfo *> Loops,
   // TODO: It would be sufficient to only sink them into body of the
   // corresponding tile loop.
   SmallVector<std::pair<BasicBlock *, BasicBlock *>, 4> InbetweenCode;
-  for (int i = 0; i < NumLoops - 1; ++i) {
+  for (size_t i = 0; i < NumLoops - 1; ++i) {
     CanonicalLoopInfo *Surrounding = Loops[i];
     CanonicalLoopInfo *Nested = Loops[i + 1];
 
@@ -5469,7 +5467,7 @@ OpenMPIRBuilder::tileLoops(DebugLoc DL, ArrayRef<CanonicalLoopInfo *> Loops,
   Builder.SetCurrentDebugLocation(DL);
   Builder.restoreIP(OutermostLoop->getPreheaderIP());
   SmallVector<Value *, 4> FloorCount, FloorRems;
-  for (int i = 0; i < NumLoops; ++i) {
+  for (size_t i = 0; i < NumLoops; ++i) {
     Value *TileSize = TileSizes[i];
     Value *OrigTripCount = OrigTripCounts[i];
     Type *IVType = OrigTripCount->getType();
diff --git a/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp b/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp
index 251042e030f0d..e54d9906474e7 100644
--- a/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp
+++ b/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp
@@ -1440,12 +1440,14 @@ TEST_F(OpenMPIRBuilderTest, CanonicalLoopSimple) {
 
   EXPECT_EQ(&Loop->getAfter()->front(), RetInst);
 }
-void createScan(llvm::Value *scanVar, OpenMPIRBuilder &OMPBuilder,
-                IRBuilder<> &Builder, OpenMPIRBuilder::LocationDescription Loc,
+void createScan(llvm::Value *scanVar, llvm::Type *scanType,
+                OpenMPIRBuilder &OMPBuilder, IRBuilder<> &Builder,
+                OpenMPIRBuilder::LocationDescription Loc,
                 OpenMPIRBuilder::InsertPointTy &allocaIP) {
   using InsertPointTy = OpenMPIRBuilder::InsertPointTy;
-  ASSERT_EXPECTED_INIT(InsertPointTy, retIp,
-                       OMPBuilder.createScan(Loc, allocaIP, {scanVar}, true));
+  ASSERT_EXPECTED_INIT(
+      InsertPointTy, retIp,
+      OMPBuilder.createScan(Loc, allocaIP, {scanVar}, {scanType}, true));
   Builder.restoreIP(retIp);
 }
 
@@ -5363,7 +5365,8 @@ TEST_F(OpenMPIRBuilderTest, ScanReduction) {
   auto LoopBodyGenCB = [&](InsertPointTy CodeGenIP, llvm::Value *LC) {
     NumBodiesGenerated += 1;
     Builder.restoreIP(CodeGenIP);
-    createScan(scanVar, OMPBuilder, Builder, Loc, allocaIP);
+    createScan(scanVar, Builder.getFloatTy(), OMPBuilder, Builder, Loc,
+               allocaIP);
     return Error::success();
   };
   SmallVector<CanonicalLoopInfo *> Loops;
diff --git a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
index bfe6faa5f1bd4..f093c986bb6f2 100644
--- a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
+++ b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
@@ -47,6 +47,7 @@
 
 using namespace mlir;
 
+llvm::SmallDenseMap<llvm::Value *, llvm::Type *> ReductionVarToType;
 namespace {
 static llvm::omp::ScheduleKind
 convertToScheduleKind(std::optional<omp::ClauseScheduleKind> schedKind) {
@@ -1140,6 +1141,11 @@ initReductionVars(OP op, ArrayRef<BlockArgument> reductionArgs,
   // variables. Although this could be done after allocas, we don't want to mess
   // up with the alloca insertion point.
   for (unsigned i = 0; i < op.getNumReductionVars(); ++i) {
+
+    llvm::Type *reductionType =
+        moduleTranslation.convertType(reductionDecls[i].getType());
+    ReductionVarToType[privateReductionVariables[i]] = reductionType;
+
     SmallVector<llvm::Value *, 1> phis;
 
     // map block argument to initializer region
@@ -1213,9 +1219,11 @@ static void collectReductionInfo(
       atomicGen = owningAtomicReductionGens[i];
     llvm::Value *variable =
         moduleTranslation.lookupValue(loop.getReductionVars()[i]);
+    llvm::Type *reductionType =
+        moduleTranslation.convertType(reductionDecls[i].getType());
+    ReductionVarToType[privateReductionVariables[i]] = reductionType;
     reductionInfos.push_back(
-        {moduleTranslation.convertType(reductionDecls[i].getType()), variable,
-         privateReductionVariables[i],
+        {reductionType, variable, privateReductionVariables[i],
          /*EvaluationKind=*/llvm::OpenMPIRBuilder::EvalKind::Scalar,
          owningReductionGens[i],
          /*ReductionGenClang=*/nullptr, atomicGen});
@@ -2603,34 +2611,34 @@ convertOmpScan(Operation &opInst, llvm::IRBuilderBase &builder,
   auto scanOp = cast<omp::ScanOp>(opInst);
   bool isInclusive = scanOp.hasInclusiveVars();
   SmallVector<llvm::Value *> llvmScanVars;
+  SmallVector<llvm::Type *> llvmScanVarsType;
   mlir::OperandRange mlirScanVars = scanOp.getInclusiveVars();
   if (!isInclusive)
     mlirScanVars = scanOp.getExclusiveVars();
   for (auto val : mlirScanVars) {
     llvm::Value *llvmVal = moduleTranslation.lookupValue(val);
-
     llvmScanVars.push_back(llvmVal);
+    llvmScanVarsType.push_back(ReductionVarToType[llvmVal]);
   }
   llvm::OpenMPIRBuilder::InsertPointTy allocaIP =
       findAllocaInsertPoint(builder, moduleTranslation);
   llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
   llvm::OpenMPIRBuilder::InsertPointOrErrorTy afterIP =
       moduleTranslation.getOpenMPBuilder()->createScan(
-          ompLoc, allocaIP, llvmScanVars, isInclusive);
+          ompLoc, allocaIP, llvmScanVars, llvmScanVarsType, isInclusive);
   if (failed(handleError(afterIP, opInst)))
     return failure();
 
   builder.restoreIP(*afterIP);
 
   // TODO: The argument of LoopnestOp is stored into the index variable and this
-  // variable is used
-  //  across scan operation. However that makes the mlir
-  //  invalid.(`Intra-iteration dependences from a statement in the structured
-  //  block sequence that precede a scan directive to a statement in the
-  //  structured block sequence that follows a scan directive must not exist,
-  //  except for dependences for the list items specified in an inclusive or
-  //  exclusive clause.`). The argument of LoopNestOp need to be loaded again
-  //  after ScanOp again so mlir generated is valid.
+  // variable is used across scan operation. However that makes the mlir
+  // invalid.(`Intra-iteration dependences from a statement in the structured
+  // block sequence that precede a scan directive to a statement in the
+  // structured block sequence that follows a scan directive must not exist,
+  // except for dependences for the list items specified in an inclusive or
+  // exclusive clause.`). The argument of LoopNestOp need to be loaded again
+  // after ScanOp again so mlir generated is valid.
   auto parentOp = scanOp->getParentOp();
   auto loopOp = cast<omp::LoopNestOp>(parentOp);
   if (loopOp) {

>From 20b2b94c1e67467b793e7131cb7fb170f5e94336 Mon Sep 17 00:00:00 2001
From: Anchu Rajendran <asudhaku at amd.com>
Date: Thu, 10 Apr 2025 01:15:42 -0500
Subject: [PATCH 3/6] Adding a few corrections

---
 .../llvm/Frontend/OpenMP/OMPIRBuilder.h       |   2 +-
 llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp     | 205 +++++----
 .../Frontend/OpenMPIRBuilderTest.cpp          |  25 +-
 .../OpenMP/OpenMPToLLVMIRTranslation.cpp      |  12 +-
 offload/out                                   | 424 ++++++++++++++++++
 5 files changed, 561 insertions(+), 107 deletions(-)
 create mode 100644 offload/out

diff --git a/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h b/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h
index 37bf654f402d8..dc5721125cccd 100644
--- a/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h
+++ b/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h
@@ -2715,7 +2715,7 @@ class OpenMPIRBuilder {
   /// \param ScanVars Scan Variables.
   /// \param IsInclusive Whether it is an inclusive or exclusive scan.
   ///
-  /// \returns The insertion position *after* the masked.
+  /// \returns The insertion position *after* the scan.
   InsertPointOrErrorTy createScan(const LocationDescription &Loc,
                                   InsertPointTy AllocaIP,
                                   ArrayRef<llvm::Value *> ScanVars,
diff --git a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
index 8fd60606df938..394bf28bc2574 100644
--- a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
+++ b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
@@ -4103,94 +4103,111 @@ OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::emitScanReduction(
     const LocationDescription &Loc, InsertPointTy &FinalizeIP,
     SmallVector<llvm::OpenMPIRBuilder::ReductionInfo> ReductionInfos) {
 
-  llvm::Value *spanDiff = ScanInfo.Span;
-
   if (!updateToLocation(Loc))
     return Loc.IP;
-  auto curFn = Builder.GetInsertBlock()->getParent();
-  // for (int k = 0; k <= ceil(log2(n)); ++k)
-  llvm::BasicBlock *LoopBB =
-      BasicBlock::Create(curFn->getContext(), "omp.outer.log.scan.body");
-  llvm::BasicBlock *ExitBB =
-      BasicBlock::Create(curFn->getContext(), "omp.outer.log.scan.exit");
-  llvm::Function *F = llvm::Intrinsic::getOrInsertDeclaration(
-      Builder.GetInsertBlock()->getModule(),
-      (llvm::Intrinsic::ID)llvm::Intrinsic::log2, Builder.getDoubleTy());
-  llvm::BasicBlock *InputBB = Builder.GetInsertBlock();
-  ConstantInt *One = ConstantInt::get(Builder.getInt32Ty(), 1);
-  llvm::Value *span = ScanInfo.Span; // Builder.CreateAdd(spanDiff, One);
-  llvm::Value *Arg = Builder.CreateUIToFP(span, Builder.getDoubleTy());
-  llvm::Value *LogVal = emitNoUnwindRuntimeCall(F, Arg, "");
-  F = llvm::Intrinsic::getOrInsertDeclaration(
-      Builder.GetInsertBlock()->getModule(),
-      (llvm::Intrinsic::ID)llvm::Intrinsic::ceil, Builder.getDoubleTy());
-  LogVal = emitNoUnwindRuntimeCall(F, LogVal, "");
-  LogVal = Builder.CreateFPToUI(LogVal, Builder.getInt32Ty());
-  llvm::Value *NMin1 =
-      Builder.CreateNUWSub(span, llvm::ConstantInt::get(span->getType(), 1));
-  Builder.SetInsertPoint(InputBB);
-  Builder.CreateBr(LoopBB);
-  emitBlock(LoopBB, Builder.GetInsertBlock()->getParent());
-  Builder.SetInsertPoint(LoopBB);
-
-  PHINode *Counter = Builder.CreatePHI(Builder.getInt32Ty(), 2);
-  //// size pow2k = 1;
-  PHINode *Pow2K = Builder.CreatePHI(Builder.getInt32Ty(), 2);
-  Counter->addIncoming(llvm::ConstantInt::get(Builder.getInt32Ty(), 0),
+  auto BodyGenCB = [&](InsertPointTy AllocaIP,
+                       InsertPointTy CodeGenIP) -> Error {
+    Builder.restoreIP(CodeGenIP);
+    auto CurFn = Builder.GetInsertBlock()->getParent();
+    // for (int k = 0; k <= ceil(log2(n)); ++k)
+    llvm::BasicBlock *LoopBB =
+        BasicBlock::Create(CurFn->getContext(), "omp.outer.log.scan.body");
+    llvm::BasicBlock *ExitBB =
+        splitBB(Builder, false, "omp.outer.log.scan.exit");
+    llvm::Function *F = llvm::Intrinsic::getOrInsertDeclaration(
+        Builder.GetInsertBlock()->getModule(),
+        (llvm::Intrinsic::ID)llvm::Intrinsic::log2, Builder.getDoubleTy());
+    llvm::BasicBlock *InputBB = Builder.GetInsertBlock();
+    llvm::Value *Arg =
+        Builder.CreateUIToFP(ScanInfo.Span, Builder.getDoubleTy());
+    llvm::Value *LogVal = emitNoUnwindRuntimeCall(F, Arg, "");
+    F = llvm::Intrinsic::getOrInsertDeclaration(
+        Builder.GetInsertBlock()->getModule(),
+        (llvm::Intrinsic::ID)llvm::Intrinsic::ceil, Builder.getDoubleTy());
+    LogVal = emitNoUnwindRuntimeCall(F, LogVal, "");
+    LogVal = Builder.CreateFPToUI(LogVal, Builder.getInt32Ty());
+    llvm::Value *NMin1 = Builder.CreateNUWSub(
+        ScanInfo.Span, llvm::ConstantInt::get(ScanInfo.Span->getType(), 1));
+    Builder.SetInsertPoint(InputBB);
+    Builder.CreateBr(LoopBB);
+    emitBlock(LoopBB, Builder.GetInsertBlock()->getParent());
+    Builder.SetInsertPoint(LoopBB);
+
+    PHINode *Counter = Builder.CreatePHI(Builder.getInt32Ty(), 2);
+    //// size pow2k = 1;
+    PHINode *Pow2K = Builder.CreatePHI(Builder.getInt32Ty(), 2);
+    Counter->addIncoming(llvm::ConstantInt::get(Builder.getInt32Ty(), 0),
+                         InputBB);
+    Pow2K->addIncoming(llvm::ConstantInt::get(Builder.getInt32Ty(), 1),
                        InputBB);
-  Pow2K->addIncoming(llvm::ConstantInt::get(Builder.getInt32Ty(), 1), InputBB);
-  //// for (size i = n - 1; i >= 2 ^ k; --i)
-  ////   tmp[i] op= tmp[i-pow2k];
-  llvm::BasicBlock *InnerLoopBB =
-      BasicBlock::Create(curFn->getContext(), "omp.inner.log.scan.body");
-  llvm::BasicBlock *InnerExitBB =
-      BasicBlock::Create(curFn->getContext(), "omp.inner.log.scan.exit");
-  llvm::Value *CmpI = Builder.CreateICmpUGE(NMin1, Pow2K);
-  Builder.CreateCondBr(CmpI, InnerLoopBB, InnerExitBB);
-  emitBlock(InnerLoopBB, Builder.GetInsertBlock()->getParent());
-  Builder.SetInsertPoint(InnerLoopBB);
-  auto *IVal = Builder.CreatePHI(Builder.getInt32Ty(), 2);
-  IVal->addIncoming(NMin1, LoopBB);
-  unsigned int defaultAS = M.getDataLayout().getProgramAddressSpace();
-  for (ReductionInfo RedInfo : ReductionInfos) {
-    Value *ReductionVal = RedInfo.PrivateVariable;
-    Value *Buff = ScanInfo.ReductionVarToScanBuffs[ReductionVal];
-    Type *DestTy = RedInfo.ElementType;
-    Value *IV = Builder.CreateAdd(IVal, Builder.getInt32(1));
-    Value *LHSPtr = Builder.CreateInBoundsGEP(DestTy, Buff, IV, "arrayOffset");
-    Value *OffsetIval = Builder.CreateNUWSub(IV, Pow2K);
-    Value *RHSPtr =
-        Builder.CreateInBoundsGEP(DestTy, Buff, OffsetIval, "arrayOffset");
-    Value *LHS = Builder.CreateLoad(DestTy, LHSPtr);
-    Value *RHS = Builder.CreateLoad(DestTy, RHSPtr);
-    Value *LHSAddr = Builder.CreatePointerBitCastOrAddrSpaceCast(
-        LHSPtr, RHS->getType()->getPointerTo(defaultAS));
-    llvm::Value *Result;
-    InsertPointOrErrorTy AfterIP =
-        RedInfo.ReductionGen(Builder.saveIP(), LHS, RHS, Result);
-    if (!AfterIP)
-      return AfterIP.takeError();
-    Builder.CreateStore(Result, LHSAddr);
-  }
-  llvm::Value *NextIVal = Builder.CreateNUWSub(
-      IVal, llvm::ConstantInt::get(Builder.getInt32Ty(), 1));
-  IVal->addIncoming(NextIVal, Builder.GetInsertBlock());
-  CmpI = Builder.CreateICmpUGE(NextIVal, Pow2K);
-  Builder.CreateCondBr(CmpI, InnerLoopBB, InnerExitBB);
-  emitBlock(InnerExitBB, Builder.GetInsertBlock()->getParent());
-  llvm::Value *Next = Builder.CreateNUWAdd(
-      Counter, llvm::ConstantInt::get(Counter->getType(), 1));
-  Counter->addIncoming(Next, Builder.GetInsertBlock());
-  // pow2k <<= 1;
-  llvm::Value *NextPow2K = Builder.CreateShl(Pow2K, 1, "", /*HasNUW=*/true);
-  Pow2K->addIncoming(NextPow2K, Builder.GetInsertBlock());
-  llvm::Value *Cmp = Builder.CreateICmpNE(Next, LogVal);
-  Builder.CreateCondBr(Cmp, LoopBB, ExitBB);
-  emitBlock(ExitBB, Builder.GetInsertBlock()->getParent());
-  Builder.SetInsertPoint(ExitBB);
+    //// for (size i = n - 1; i >= 2 ^ k; --i)
+    ////   tmp[i] op= tmp[i-pow2k];
+    llvm::BasicBlock *InnerLoopBB =
+        BasicBlock::Create(CurFn->getContext(), "omp.inner.log.scan.body");
+    llvm::BasicBlock *InnerExitBB =
+        BasicBlock::Create(CurFn->getContext(), "omp.inner.log.scan.exit");
+    llvm::Value *CmpI = Builder.CreateICmpUGE(NMin1, Pow2K);
+    Builder.CreateCondBr(CmpI, InnerLoopBB, InnerExitBB);
+    emitBlock(InnerLoopBB, Builder.GetInsertBlock()->getParent());
+    Builder.SetInsertPoint(InnerLoopBB);
+    auto *IVal = Builder.CreatePHI(Builder.getInt32Ty(), 2);
+    IVal->addIncoming(NMin1, LoopBB);
+    unsigned int defaultAS = M.getDataLayout().getProgramAddressSpace();
+    for (ReductionInfo RedInfo : ReductionInfos) {
+      Value *ReductionVal = RedInfo.PrivateVariable;
+      Value *Buff = ScanInfo.ReductionVarToScanBuffs[ReductionVal];
+      Type *DestTy = RedInfo.ElementType;
+      Value *IV = Builder.CreateAdd(IVal, Builder.getInt32(1));
+      Value *LHSPtr =
+          Builder.CreateInBoundsGEP(DestTy, Buff, IV, "arrayOffset");
+      Value *OffsetIval = Builder.CreateNUWSub(IV, Pow2K);
+      Value *RHSPtr =
+          Builder.CreateInBoundsGEP(DestTy, Buff, OffsetIval, "arrayOffset");
+      Value *LHS = Builder.CreateLoad(DestTy, LHSPtr);
+      Value *RHS = Builder.CreateLoad(DestTy, RHSPtr);
+      Value *LHSAddr = Builder.CreatePointerBitCastOrAddrSpaceCast(
+          LHSPtr, RHS->getType()->getPointerTo(defaultAS));
+      llvm::Value *Result;
+      InsertPointOrErrorTy AfterIP =
+          RedInfo.ReductionGen(Builder.saveIP(), LHS, RHS, Result);
+      if (!AfterIP)
+        return AfterIP.takeError();
+      Builder.CreateStore(Result, LHSAddr);
+    }
+    llvm::Value *NextIVal = Builder.CreateNUWSub(
+        IVal, llvm::ConstantInt::get(Builder.getInt32Ty(), 1));
+    IVal->addIncoming(NextIVal, Builder.GetInsertBlock());
+    CmpI = Builder.CreateICmpUGE(NextIVal, Pow2K);
+    Builder.CreateCondBr(CmpI, InnerLoopBB, InnerExitBB);
+    emitBlock(InnerExitBB, Builder.GetInsertBlock()->getParent());
+    llvm::Value *Next = Builder.CreateNUWAdd(
+        Counter, llvm::ConstantInt::get(Counter->getType(), 1));
+    Counter->addIncoming(Next, Builder.GetInsertBlock());
+    // pow2k <<= 1;
+    llvm::Value *NextPow2K = Builder.CreateShl(Pow2K, 1, "", /*HasNUW=*/true);
+    Pow2K->addIncoming(NextPow2K, Builder.GetInsertBlock());
+    llvm::Value *Cmp = Builder.CreateICmpNE(Next, LogVal);
+    Builder.CreateCondBr(Cmp, LoopBB, ExitBB);
+    Builder.SetInsertPoint(ExitBB->getFirstInsertionPt());
+    return Error::success();
+  };
+
+  // TODO: Perform finalization actions for variables. This has to be
+  // called for variables which have destructors/finalizers.
+  auto FiniCB = [&](InsertPointTy CodeGenIP) { return llvm::Error::success(); };
+
+  llvm::Value *FilterVal = Builder.getInt32(0);
   llvm::OpenMPIRBuilder::InsertPointOrErrorTy AfterIP =
-      createBarrier(Builder.saveIP(), llvm::omp::OMPD_barrier);
+      createMasked(Builder.saveIP(), BodyGenCB, FiniCB, FilterVal);
+
+  if (!AfterIP)
+    return AfterIP.takeError();
+  Builder.restoreIP(*AfterIP);
+  AfterIP = createBarrier(Builder.saveIP(), llvm::omp::OMPD_barrier);
 
+  if (!AfterIP)
+    return AfterIP.takeError();
+  Builder.restoreIP(*AfterIP);
   Builder.restoreIP(FinalizeIP);
   emitScanBasedDirectiveFinalsIR(ReductionInfos);
   FinalizeIP = Builder.saveIP();
@@ -4204,7 +4221,6 @@ Error OpenMPIRBuilder::emitScanBasedDirectiveIR(
 
   {
     // Emit loop with input phase:
-    // #pragma omp ...
     // for (i: 0..<num_iters>) {
     //   <input phase>;
     //   buffer[i] = red;
@@ -4215,6 +4231,11 @@ Error OpenMPIRBuilder::emitScanBasedDirectiveIR(
       return Result;
   }
   {
+    // Emit loop with scan phase:
+    // for (i: 0..<num_iters>) {
+    //   red = buffer[i];
+    //   <scan phase>;
+    // }
     ScanInfo.OMPFirstScanLoop = false;
     auto Result = ScanLoopGen(Builder.saveIP());
     if (Result)
@@ -4224,17 +4245,17 @@ Error OpenMPIRBuilder::emitScanBasedDirectiveIR(
 }
 
 void OpenMPIRBuilder::createScanBBs() {
-  auto fun = Builder.GetInsertBlock()->getParent();
+  Function *Fun = Builder.GetInsertBlock()->getParent();
   ScanInfo.OMPScanExitBlock =
-      BasicBlock::Create(fun->getContext(), "omp.exit.inscan.bb");
+      BasicBlock::Create(Fun->getContext(), "omp.exit.inscan.bb");
   ScanInfo.OMPScanDispatch =
-      BasicBlock::Create(fun->getContext(), "omp.inscan.dispatch");
+      BasicBlock::Create(Fun->getContext(), "omp.inscan.dispatch");
   ScanInfo.OMPAfterScanBlock =
-      BasicBlock::Create(fun->getContext(), "omp.after.scan.bb");
+      BasicBlock::Create(Fun->getContext(), "omp.after.scan.bb");
   ScanInfo.OMPBeforeScanBlock =
-      BasicBlock::Create(fun->getContext(), "omp.before.scan.bb");
+      BasicBlock::Create(Fun->getContext(), "omp.before.scan.bb");
   ScanInfo.OMPScanLoopExit =
-      BasicBlock::Create(fun->getContext(), "omp.scan.loop.exit");
+      BasicBlock::Create(Fun->getContext(), "omp.scan.loop.exit");
 }
 
 CanonicalLoopInfo *OpenMPIRBuilder::createLoopSkeleton(
@@ -5454,7 +5475,7 @@ OpenMPIRBuilder::tileLoops(DebugLoc DL, ArrayRef<CanonicalLoopInfo *> Loops,
   // TODO: It would be sufficient to only sink them into body of the
   // corresponding tile loop.
   SmallVector<std::pair<BasicBlock *, BasicBlock *>, 4> InbetweenCode;
-  for (size_t i = 0; i < NumLoops - 1; ++i) {
+  for (int i = 0; i < NumLoops - 1; ++i) {
     CanonicalLoopInfo *Surrounding = Loops[i];
     CanonicalLoopInfo *Nested = Loops[i + 1];
 
@@ -5467,7 +5488,7 @@ OpenMPIRBuilder::tileLoops(DebugLoc DL, ArrayRef<CanonicalLoopInfo *> Loops,
   Builder.SetCurrentDebugLocation(DL);
   Builder.restoreIP(OutermostLoop->getPreheaderIP());
   SmallVector<Value *, 4> FloorCount, FloorRems;
-  for (size_t i = 0; i < NumLoops; ++i) {
+  for (int i = 0; i < NumLoops; ++i) {
     Value *TileSize = TileSizes[i];
     Value *OrigTripCount = OrigTripCounts[i];
     Type *IVType = OrigTripCount->getType();
diff --git a/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp b/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp
index e54d9906474e7..bb6c2ec5ede77 100644
--- a/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp
+++ b/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp
@@ -5376,29 +5376,32 @@ TEST_F(OpenMPIRBuilderTest, ScanReduction) {
                            false, Builder.saveIP(), "scan"));
   Loops = loopsVec;
   EXPECT_EQ(Loops.size(), 2U);
-  auto inputLoop = Loops.front();
-  auto scanLoop = Loops.back();
-  Builder.restoreIP(scanLoop->getAfterIP());
-  inputLoop->assertOK();
-  scanLoop->assertOK();
-
-  //// Verify control flow structure (in addition to Loop->assertOK()).
-  EXPECT_EQ(inputLoop->getPreheader()->getSinglePredecessor(),
+  CanonicalLoopInfo *InputLoop = Loops.front();
+  CanonicalLoopInfo  *ScanLoop = Loops.back();
+  Builder.restoreIP(ScanLoop->getAfterIP());
+  InputLoop->assertOK();
+  ScanLoop->assertOK();
+
+  EXPECT_EQ(InputLoop->getPreheader()->getSinglePredecessor(),
             &F->getEntryBlock());
-  EXPECT_EQ(scanLoop->getAfter(), Builder.GetInsertBlock());
+  EXPECT_EQ(ScanLoop->getAfter(), Builder.GetInsertBlock());
   EXPECT_EQ(NumBodiesGenerated, 2U);
   SmallVector<OpenMPIRBuilder::ReductionInfo> reductionInfos = {
       {Builder.getFloatTy(), origVar, scanVar,
        /*EvaluationKind=*/OpenMPIRBuilder::EvalKind::Scalar, sumReduction,
        /*ReductionGenClang=*/nullptr, sumAtomicReduction}};
-  auto FinalizeIP = scanLoop->getAfterIP();
-  OpenMPIRBuilder::LocationDescription RedLoc({inputLoop->getAfterIP(), DL});
+  auto FinalizeIP = ScanLoop->getAfterIP();
+  OpenMPIRBuilder::LocationDescription RedLoc({InputLoop->getAfterIP(), DL});
   llvm::BasicBlock *Cont = splitBB(Builder, false, "omp.scan.loop.cont");
   ASSERT_EXPECTED_INIT(
       InsertPointTy, retIp,
       OMPBuilder.emitScanReduction(RedLoc, FinalizeIP, reductionInfos));
   Builder.restoreIP(retIp);
   Builder.CreateBr(Cont);
+  SmallVector<CallInst *> MaskedCalls;
+  findCalls(F, omp::RuntimeFunction::OMPRTL___kmpc_masked, OMPBuilder,
+            MaskedCalls);
+  ASSERT_EQ(MaskedCalls.size(), 1u);
 }
 
 TEST_F(OpenMPIRBuilderTest, CreateTwoReductions) {
diff --git a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
index f093c986bb6f2..c68cae77b2f3d 100644
--- a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
+++ b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
@@ -47,7 +47,8 @@
 
 using namespace mlir;
 
-llvm::SmallDenseMap<llvm::Value *, llvm::Type *> ReductionVarToType;
+llvm::SmallDenseMap<llvm::Value *, llvm::Type *> ReductionVarToType; 
+llvm::OpenMPIRBuilder::InsertPointTy parallelAllocaIP;// TODO: change this alloca IP to point to originalvar allocaIP. ReductionDecl need to be linked to scan var.
 namespace {
 static llvm::omp::ScheduleKind
 convertToScheduleKind(std::optional<omp::ClauseScheduleKind> schedKind) {
@@ -2578,6 +2579,7 @@ convertOmpParallel(omp::ParallelOp opInst, llvm::IRBuilderBase &builder,
 
   llvm::OpenMPIRBuilder::InsertPointTy allocaIP =
       findAllocaInsertPoint(builder, moduleTranslation);
+  parallelAllocaIP = allocaIP;
   llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
 
   llvm::OpenMPIRBuilder::InsertPointOrErrorTy afterIP =
@@ -2619,9 +2621,13 @@ convertOmpScan(Operation &opInst, llvm::IRBuilderBase &builder,
     llvm::Value *llvmVal = moduleTranslation.lookupValue(val);
     llvmScanVars.push_back(llvmVal);
     llvmScanVarsType.push_back(ReductionVarToType[llvmVal]);
+    val.getDefiningOp();
   }
-  llvm::OpenMPIRBuilder::InsertPointTy allocaIP =
-      findAllocaInsertPoint(builder, moduleTranslation);
+  auto parallelOp = scanOp->getParentOfType<omp::ParallelOp>();
+  if (!parallelOp) {
+    return failure();
+  }
+  llvm::OpenMPIRBuilder::InsertPointTy allocaIP = parallelAllocaIP;
   llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
   llvm::OpenMPIRBuilder::InsertPointOrErrorTy afterIP =
       moduleTranslation.getOpenMPBuilder()->createScan(
diff --git a/offload/out b/offload/out
new file mode 100644
index 0000000000000..5df1a3ad98775
--- /dev/null
+++ b/offload/out
@@ -0,0 +1,424 @@
+grep: ./out: input file is also the output
+./test/offloading/fortran/target-map-nested-dtype-alloca-array.f90:!$omp target map(tofrom: one_l%nest%array_k)
+./test/offloading/fortran/target-map-nested-dtype-alloca-array.f90:!$omp end target
+./test/offloading/fortran/target-map-derived-type-full-1.f90:  !$omp target map(from:out) map(to:in)
+./test/offloading/fortran/target-map-derived-type-full-1.f90:  !$omp end target
+./test/offloading/fortran/target-map-pointer-scopes-enter-exit.f90:  !$omp target enter data map(alloc: arg_alloc)
+./test/offloading/fortran/target-map-pointer-scopes-enter-exit.f90:  !$omp target
+./test/offloading/fortran/target-map-pointer-scopes-enter-exit.f90:  !$omp end target
+./test/offloading/fortran/target-map-pointer-scopes-enter-exit.f90:  !$omp target exit data map(from: arg_alloc)
+./test/offloading/fortran/target-map-pointer-scopes-enter-exit.f90:  !$omp target exit data map(delete: arg_alloc)
+./test/offloading/fortran/target-map-pointer-scopes-enter-exit.f90:  !$omp target enter data map(alloc: local_alloc)
+./test/offloading/fortran/target-map-pointer-scopes-enter-exit.f90:  !$omp target
+./test/offloading/fortran/target-map-pointer-scopes-enter-exit.f90:  !$omp end target
+./test/offloading/fortran/target-map-pointer-scopes-enter-exit.f90:  !$omp target exit data map(from: local_alloc)
+./test/offloading/fortran/target-map-pointer-scopes-enter-exit.f90:  !$omp target exit data map(delete: local_alloc)
+./test/offloading/fortran/target-map-pointer-scopes-enter-exit.f90:  !$omp target enter data map(alloc: map_ptr)
+./test/offloading/fortran/target-map-pointer-scopes-enter-exit.f90:  !$omp target
+./test/offloading/fortran/target-map-pointer-scopes-enter-exit.f90:  !$omp end target
+./test/offloading/fortran/target-map-pointer-scopes-enter-exit.f90:  !$omp target exit data map(from: map_ptr)
+./test/offloading/fortran/target-map-pointer-scopes-enter-exit.f90:  !$omp target exit data map(delete: map_ptr)
+./test/offloading/fortran/target_map_ompx_hold.f90:!$omp target data map(ompx_hold, tofrom: presence_check)
+./test/offloading/fortran/target_map_ompx_hold.f90:!$omp target exit data map(delete: presence_check)
+./test/offloading/fortran/target_map_ompx_hold.f90:!$omp target map(present, tofrom: presence_check)
+./test/offloading/fortran/target_map_ompx_hold.f90:!$omp end target
+./test/offloading/fortran/target_map_ompx_hold.f90:!$omp end target data
+./test/offloading/fortran/target_map_ompx_hold.f90:!$omp target data map(tofrom: presence_check)
+./test/offloading/fortran/target_map_ompx_hold.f90:!$omp target exit data map(delete: presence_check)
+./test/offloading/fortran/target_map_ompx_hold.f90:!$omp target map(present, tofrom: presence_check)
+./test/offloading/fortran/target_map_ompx_hold.f90:!$omp end target
+./test/offloading/fortran/target_map_ompx_hold.f90:!$omp end target data
+./test/offloading/fortran/target-map-nullary-pointer.f90:!$omp target map(Set)
+./test/offloading/fortran/target-map-nullary-pointer.f90:!$omp end target
+./test/offloading/fortran/dtype-member-map-syntax-1.f90:  !$omp target map(tofrom: array_dtype(5))
+./test/offloading/fortran/dtype-member-map-syntax-1.f90:  !$omp end target
+./test/offloading/fortran/dtype-member-map-syntax-1.f90:  !$omp target map(tofrom: array_dtype(4)%elements(3))
+./test/offloading/fortran/dtype-member-map-syntax-1.f90:  !$omp end target
+./test/offloading/fortran/dtype-member-map-syntax-1.f90:  !$omp target map(tofrom: array_dtype(5)%elements(3:5))
+./test/offloading/fortran/dtype-member-map-syntax-1.f90:  !$omp end target
+./test/offloading/fortran/dtype-member-map-syntax-1.f90:  !$omp target map(tofrom: array_dtype(3:5))
+./test/offloading/fortran/dtype-member-map-syntax-1.f90:  !$omp end target
+./test/offloading/fortran/dtype-member-map-syntax-1.f90:  !$omp target map(tofrom: array_dtype(4)%num_chars%number%float_elements(8))
+./test/offloading/fortran/dtype-member-map-syntax-1.f90:  !$omp end target
+./test/offloading/fortran/dtype-member-map-syntax-1.f90:  !$omp target map(tofrom: array_dtype(4)%num_chars%number%float_elements(5:10))
+./test/offloading/fortran/dtype-member-map-syntax-1.f90:  !$omp end target
+./test/offloading/fortran/dtype-member-map-syntax-1.f90:  !$omp target map(tofrom: array_dtype(4)%internal_dtypes(3)%float_elements(4))
+./test/offloading/fortran/dtype-member-map-syntax-1.f90:  !$omp end target
+./test/offloading/fortran/target-map-two-dtype-mixed-implicit-explicit-capture-1.f90:  !$omp target map(tofrom:scalar_arr1%break_1)
+./test/offloading/fortran/target-map-two-dtype-mixed-implicit-explicit-capture-1.f90:  !$omp end target
+./test/offloading/fortran/target-map-nested-alloca-dtype-3d-alloca-array-bounds.f90:!$omp target map(tofrom: one_l%nest%array_k(1:3, 1:3, 2:2)) map(to: inArray(1:3, 1:3, 1:3))
+./test/offloading/fortran/target-map-nested-alloca-dtype-3d-alloca-array-bounds.f90:!$omp end target
+./test/offloading/fortran/target-map-dtype-multi-explicit-array-member.f90:  !$omp target map(tofrom:scalar_arr%array_x, scalar_arr%array_y)
+./test/offloading/fortran/target-map-dtype-multi-explicit-array-member.f90:  !$omp end target
+./test/offloading/fortran/target-map-derived-type-full-2.f90:  !$omp target map(from:out) map(to:in)
+./test/offloading/fortran/target-map-derived-type-full-2.f90:  !$omp end target
+./test/offloading/fortran/target-map-double-nested-dtype-single-member.f90:!$omp target map(tofrom: top_dtype%nested%array_i2, top_dtype2%nested%array_j2)
+./test/offloading/fortran/target-map-double-nested-dtype-single-member.f90:!$omp end target
+./test/offloading/fortran/target-map-pointer-target-scopes.f90:  !$omp target map(tofrom: arg_alloc)
+./test/offloading/fortran/target-map-pointer-target-scopes.f90:  !$omp end target
+./test/offloading/fortran/target-map-pointer-target-scopes.f90:  !$omp target map(tofrom: local_alloc)
+./test/offloading/fortran/target-map-pointer-target-scopes.f90:  !$omp end target
+./test/offloading/fortran/target-map-pointer-target-scopes.f90:  !$omp target map(tofrom: map_ptr)
+./test/offloading/fortran/target-map-pointer-target-scopes.f90:  !$omp end target
+./test/offloading/fortran/basic-target-parallel-region.f90:   !$omp target parallel map(from: x)
+./test/offloading/fortran/basic-target-parallel-region.f90:   !$omp end target parallel
+./test/offloading/fortran/target-map-enter-exit-allocatables.f90:   !$omp target enter data map(alloc: A)
+./test/offloading/fortran/target-map-enter-exit-allocatables.f90:    !$omp target
+./test/offloading/fortran/target-map-enter-exit-allocatables.f90:    !$omp end target
+./test/offloading/fortran/target-map-enter-exit-allocatables.f90:    !$omp target exit data map(from: A)
+./test/offloading/fortran/target-map-enter-exit-allocatables.f90:    !$omp target exit data map(delete: A)
+./test/offloading/fortran/target-use-dev-ptr.f90:   !$omp target data map(tofrom: x) use_device_ptr(x)
+./test/offloading/fortran/target-use-dev-ptr.f90:   !$omp end target data
+./test/offloading/fortran/target-map-pointer-to-dtype-allocatable-member.f90:    !$omp target enter data map(to:my_instance, my_instance%values)
+./test/offloading/fortran/target-map-pointer-to-dtype-allocatable-member.f90:    !$omp target
+./test/offloading/fortran/target-map-pointer-to-dtype-allocatable-member.f90:    !$omp end target
+./test/offloading/fortran/target-map-pointer-to-dtype-allocatable-member.f90:    !$omp target exit data map(from:my_instance%values)
+./test/offloading/fortran/target-map-pointer-to-dtype-allocatable-member.f90:    !$omp target exit data map(release:my_instance)
+./test/offloading/fortran/target_map_present_fail.f90:!$omp target data map(present,alloc:arr)
+./test/offloading/fortran/target_map_present_fail.f90:!$omp target
+./test/offloading/fortran/target_map_present_fail.f90:!$omp end target
+./test/offloading/fortran/target_map_present_fail.f90:!$omp end target data
+./test/offloading/fortran/declare-target-vars-in-target-region.f90:  !$omp declare target link(arr1) enter(arr2)
+./test/offloading/fortran/declare-target-vars-in-target-region.f90:  !$omp declare target link(scalar)
+./test/offloading/fortran/declare-target-vars-in-target-region.f90:  !$omp target map(tofrom:arr1, i, j)
+./test/offloading/fortran/declare-target-vars-in-target-region.f90:  !$omp end target
+./test/offloading/fortran/declare-target-vars-in-target-region.f90:  !$omp target map(i, j)
+./test/offloading/fortran/declare-target-vars-in-target-region.f90:  !$omp end target
+./test/offloading/fortran/declare-target-vars-in-target-region.f90:  !$omp target map(i, j)
+./test/offloading/fortran/declare-target-vars-in-target-region.f90:  !$omp end target
+./test/offloading/fortran/declare-target-vars-in-target-region.f90:  !$omp target
+./test/offloading/fortran/declare-target-vars-in-target-region.f90:  !$omp end target
+./test/offloading/fortran/dtype-array-constant-index-map.f90:!$omp target map(tofrom: array_dtype(5))
+./test/offloading/fortran/dtype-array-constant-index-map.f90:!$omp end target
+./test/offloading/fortran/dtype-array-constant-index-map.f90:!$omp target map(tofrom: array_dtype(5))
+./test/offloading/fortran/dtype-array-constant-index-map.f90:!$omp end target
+./test/offloading/fortran/target-map-dtype-alloca-array-of-dtype.f90:!$omp target map(tofrom: dtyped%array_dtype)
+./test/offloading/fortran/target-map-dtype-alloca-array-of-dtype.f90:!$omp end target
+./test/offloading/fortran/target-map-alloca-dtype-alloca-array-of-dtype.f90:!$omp target map(tofrom: dtyped%array_dtype)
+./test/offloading/fortran/target-map-alloca-dtype-alloca-array-of-dtype.f90:!$omp end target
+./test/offloading/fortran/target-map-nested-dtype-alloca-and-non-alloca-array.f90:    !$omp target map(tofrom: one_l%nest%array_i, one_l%nest%array_k)
+./test/offloading/fortran/target-map-nested-dtype-alloca-and-non-alloca-array.f90:    !$omp end target
+./test/offloading/fortran/target-map-first-common-block-member.f90:  !$omp target map(tofrom: var1)
+./test/offloading/fortran/target-map-first-common-block-member.f90:  !$omp end target
+./test/offloading/fortran/target-map-first-common-block-member.f90:!$omp target map(tofrom: var1)
+./test/offloading/fortran/target-map-first-common-block-member.f90:!$omp end target
+./test/offloading/fortran/target-map-first-common-block-member.f90:!$omp target map(tofrom: var1)
+./test/offloading/fortran/target-map-first-common-block-member.f90:!$omp end target
+./test/offloading/fortran/target-map-two-dtype-mixed-implicit-explicit-capture-2.f90:  !$omp target map(tofrom:scalar_arr2%array_x(3:6))
+./test/offloading/fortran/target-map-two-dtype-mixed-implicit-explicit-capture-2.f90:  !$omp end target
+./test/offloading/fortran/local-descriptor-map-regress.f90:!$omp target enter data map(alloc:sbuf31)
+./test/offloading/fortran/local-descriptor-map-regress.f90:!$omp target
+./test/offloading/fortran/local-descriptor-map-regress.f90:!$omp end target
+./test/offloading/fortran/local-descriptor-map-regress.f90:!$omp target update from(sbuf31)
+./test/offloading/fortran/local-descriptor-map-regress.f90:!$omp target exit data map(delete:sbuf31)
+./test/offloading/fortran/local-descriptor-map-regress.f90:!$omp target enter data map(to:p)
+./test/offloading/fortran/local-descriptor-map-regress.f90:!$omp target
+./test/offloading/fortran/local-descriptor-map-regress.f90:!$omp end target
+./test/offloading/fortran/local-descriptor-map-regress.f90:!$omp target update from(p)
+./test/offloading/fortran/local-descriptor-map-regress.f90:!!$omp target exit data map(delete:p)
+./test/offloading/fortran/target-has-device-addr1.f90:    !$omp target enter data map(to: x)
+./test/offloading/fortran/target-has-device-addr1.f90:    !$omp target data use_device_addr(x)
+./test/offloading/fortran/target-has-device-addr1.f90:    !$omp end target data
+./test/offloading/fortran/target-has-device-addr1.f90:    !$omp target map(to: x) map(from: res1, res2) &
+./test/offloading/fortran/target-has-device-addr1.f90:    !$omp & has_device_addr(first_scalar_device_addr)
+./test/offloading/fortran/target-has-device-addr1.f90:    !$omp end target
+./test/offloading/fortran/target-region-implicit-array.f90:  !$omp target
+./test/offloading/fortran/target-region-implicit-array.f90:  !$omp end target
+./test/offloading/fortran/target-map-local-intrinisc-sized-param.f90:!$omp target map(tofrom: b)
+./test/offloading/fortran/target-map-local-intrinisc-sized-param.f90:!$omp end target
+./test/offloading/fortran/target-map-alloca-dtype-and-alloca-array-v2.f90:!$omp target map(tofrom: alloca_dtype%array_j, alloca_dtype)
+./test/offloading/fortran/target-map-alloca-dtype-and-alloca-array-v2.f90:!$omp end target
+./test/offloading/fortran/target-map-dtype-3d-alloca-array-with-bounds.f90:!$omp target map(tofrom: one_l%array_j(1:3, 1:3, 2:2)) map(to: inArray(1:3, 1:3, 1:3))
+./test/offloading/fortran/target-map-dtype-3d-alloca-array-with-bounds.f90:!$omp end target
+./test/offloading/fortran/target-map-dtype-explicit-individual-array-member.f90:  !$omp target map(tofrom:scalar_arr%array_y)
+./test/offloading/fortran/target-map-dtype-explicit-individual-array-member.f90:  !$omp end target
+./test/offloading/fortran/target-map-pointer-target-array-section-3d-bounds.f90:!$omp target map(tofrom:inArray(1:3, 1:3, 2:2), outArray(1:3, 1:3, 1:3))
+./test/offloading/fortran/target-map-pointer-target-array-section-3d-bounds.f90:!$omp end target
+./test/offloading/fortran/dump_map_tables.f90:!$omp target enter data map(to:A(:N))
+./test/offloading/fortran/dump_map_tables.f90:!$omp target parallel do
+./test/offloading/fortran/dump_map_tables.f90:!$omp target exit data map(from:A)
+./test/offloading/fortran/basic-target-region-3D-array-section.f90:!$omp target map(tofrom:inArray(1:3, 1:3, 2:2), outArray(1:3, 1:3, 1:3), j, k, j2, k2)
+./test/offloading/fortran/basic-target-region-3D-array-section.f90:!$omp end target
+./test/offloading/fortran/target-map-enter-exit-array.f90:   !$omp target enter data map(alloc: A)
+./test/offloading/fortran/target-map-enter-exit-array.f90:    !$omp target
+./test/offloading/fortran/target-map-enter-exit-array.f90:    !$omp end target
+./test/offloading/fortran/target-map-enter-exit-array.f90:    !$omp target exit data map(from: A)
+./test/offloading/fortran/target-map-enter-exit-array.f90:    !$omp target exit data map(delete: A)
+./test/offloading/fortran/target-map-dtype-alloca-array-and-non-alloca-dtype.f90:!$omp target map(tofrom: one_l%nest, one_l%array_j)
+./test/offloading/fortran/target-map-dtype-alloca-array-and-non-alloca-dtype.f90:!$omp end target
+./test/offloading/fortran/target-map-two-dtype-explicit-member.f90:  !$omp target map(tofrom:scalar_arr1%break_1, scalar_arr2%break_3)
+./test/offloading/fortran/target-map-two-dtype-explicit-member.f90:  !$omp end target
+./test/offloading/fortran/target-map-two-dtype-multi-member-array-1D-bounds.f90:  !$omp target map(tofrom:scalar_arr1%array_x(3:6), scalar_arr1%array_y(3:6), scalar_arr2%array_x(3:6), scalar_arr2%array_y(3:6))
+./test/offloading/fortran/target-map-two-dtype-multi-member-array-1D-bounds.f90:  !$omp end target
+./test/offloading/fortran/target-map-all-common-block-members.f90:  !$omp target map(tofrom: var1, var2, var3)
+./test/offloading/fortran/target-map-all-common-block-members.f90:  !$omp end target
+./test/offloading/fortran/target-map-all-common-block-members.f90:!$omp target map(tofrom: var2, var1, var3)
+./test/offloading/fortran/target-map-all-common-block-members.f90:!$omp end target
+./test/offloading/fortran/target-map-all-common-block-members.f90:!$omp target map(tofrom: var2, var3, var1)
+./test/offloading/fortran/target-map-all-common-block-members.f90:!$omp end target
+./test/offloading/fortran/target-map-alloca-dtype-array-and-scalar.f90:!$omp target map(tofrom: alloca_dtype%nested_dtype%array_i, alloca_dtype%k)
+./test/offloading/fortran/target-map-alloca-dtype-array-and-scalar.f90:!$omp end target
+./test/offloading/fortran/target-map-second-common-block-member.f90:  !$omp target map(tofrom: var2)
+./test/offloading/fortran/target-map-second-common-block-member.f90:  !$omp end target
+./test/offloading/fortran/target-map-second-common-block-member.f90:!$omp target map(tofrom: var2)
+./test/offloading/fortran/target-map-second-common-block-member.f90:!$omp end target
+./test/offloading/fortran/target-map-second-common-block-member.f90:!$omp target map(tofrom: var2)
+./test/offloading/fortran/target-map-second-common-block-member.f90:!$omp end target
+./test/offloading/fortran/double-target-call-with-declare-target.f90:    !$omp declare target link(sp)
+./test/offloading/fortran/double-target-call-with-declare-target.f90:!$omp target map(tofrom:sp) map(to: i, j)
+./test/offloading/fortran/double-target-call-with-declare-target.f90:!$omp end target
+./test/offloading/fortran/double-target-call-with-declare-target.f90:!$omp target map(tofrom:sp) map(to: i, j)
+./test/offloading/fortran/double-target-call-with-declare-target.f90:!$omp end target
+./test/offloading/fortran/target-map-dtype-multi-explicit-array-3D-member-bounds.f90:  !$omp target map(tofrom:scalar_arr%array_x(1:3, 1:3, 2:2), scalar_arr%array_y(1:3, 1:3, 1:3))
+./test/offloading/fortran/target-map-dtype-multi-explicit-array-3D-member-bounds.f90:  !$omp end target
+./test/offloading/fortran/target-map-dtype-allocatable-scalar-and-array.f90:    !$omp target map(tofrom: one_l%array_j, one_l%j)
+./test/offloading/fortran/target-map-dtype-allocatable-scalar-and-array.f90:    !$omp end target
+./test/offloading/fortran/target-map-dtype-arr-bounds-member-enter-exit.f90:    !$omp target enter data map(to: scalar_arr%array(3:6))
+./test/offloading/fortran/target-map-dtype-arr-bounds-member-enter-exit.f90:   !$omp target
+./test/offloading/fortran/target-map-dtype-arr-bounds-member-enter-exit.f90:  !$omp end target
+./test/offloading/fortran/target-map-dtype-arr-bounds-member-enter-exit.f90:  !$omp target exit data map(from: scalar_arr%array(3:6))
+./test/offloading/fortran/basic-target-region-3D-array.f90:!$omp target map(tofrom:x, counter) map(to: i, j, k, i2, j2, k2)
+./test/offloading/fortran/basic-target-region-3D-array.f90:!$omp end target
+./test/offloading/fortran/basic_target_region.f90:!$omp target map(from:x)
+./test/offloading/fortran/basic_target_region.f90:!$omp end target
+./test/offloading/fortran/target-map-nested-dtype-multi-member.f90:!$omp target map(tofrom: top_dtype%nested%i2, top_dtype%k, top_dtype%nested%array_i2)
+./test/offloading/fortran/target-map-nested-dtype-multi-member.f90:!$omp end target
+./test/offloading/fortran/constant-arr-index.f90:  !$omp target map(tofrom:sp)
+./test/offloading/fortran/constant-arr-index.f90:  !$omp end target
+./test/offloading/fortran/usm_map_close.f90:  !$omp requires unified_shared_memory
+./test/offloading/fortran/usm_map_close.f90:!$omp target data map(tofrom: a, device_alloc)
+./test/offloading/fortran/usm_map_close.f90:!$omp target map(tofrom: device_alloc)
+./test/offloading/fortran/usm_map_close.f90:!$omp end target
+./test/offloading/fortran/usm_map_close.f90:!$omp end target data
+./test/offloading/fortran/usm_map_close.f90:!$omp target data map(close, tofrom: a) map(tofrom: device_alloc)
+./test/offloading/fortran/usm_map_close.f90:!$omp target map(tofrom: device_alloc)
+./test/offloading/fortran/usm_map_close.f90:!$omp end target
+./test/offloading/fortran/usm_map_close.f90:!$omp end target data
+./test/offloading/fortran/usm_map_close.f90:!$omp target data map(tofrom: a) use_device_ptr(a)
+./test/offloading/fortran/usm_map_close.f90:!$omp end target data
+./test/offloading/fortran/usm_map_close.f90:!$omp target enter data map(close, to: a)
+./test/offloading/fortran/usm_map_close.f90:!$omp target map(from: device_alloc)
+./test/offloading/fortran/usm_map_close.f90:!$omp end target
+./test/offloading/fortran/usm_map_close.f90:!$omp target exit data map(from: a)
+./test/offloading/fortran/target-map-two-dtype-individual-member-array-1D-bounds.f90:  !$omp target map(tofrom:scalar_arr1%array_x(3:6), scalar_arr2%array_x(3:6))
+./test/offloading/fortran/target-map-two-dtype-individual-member-array-1D-bounds.f90:  !$omp end target
+./test/offloading/fortran/target_map_present_success.f90:!$omp target data map(tofrom:arr)
+./test/offloading/fortran/target_map_present_success.f90:!$omp target data map(present,alloc:arr)
+./test/offloading/fortran/target_map_present_success.f90:!$omp target
+./test/offloading/fortran/target_map_present_success.f90:!$omp end target
+./test/offloading/fortran/target_map_present_success.f90:!$omp end target data
+./test/offloading/fortran/target_map_present_success.f90:!$omp end target data
+./test/offloading/fortran/target-map-literal-write.f90:!$omp target
+./test/offloading/fortran/target-map-literal-write.f90:!$omp end target
+./test/offloading/fortran/target-map-multi-alloca-dtypes-with-multi-alloca-members.f90:!$omp target map(tofrom: top_dtype%array_i, top_dtype%nested%nest2%array_j, top_dtype%nested%nest%array_ptr) &
+./test/offloading/fortran/target-map-multi-alloca-dtypes-with-multi-alloca-members.f90:!$omp map(tofrom: top_dtype2%array_i, top_dtype2%nested%nest2%array_j, top_dtype2%nested%nest%array_ptr)
+./test/offloading/fortran/target-map-multi-alloca-dtypes-with-multi-alloca-members.f90:!$omp end target
+./test/offloading/fortran/target-map-double-nested-dtype-double-array-bounds.f90:!$omp target map(tofrom: top_dtype%nested%array_i2(4:8), top_dtype2%nested%array_j2(4:8))
+./test/offloading/fortran/target-map-double-nested-dtype-double-array-bounds.f90:!$omp end target
+./test/offloading/fortran/target-map-large-nested-dtype-multi-member.f90:!$omp target map(tofrom: top_dtype%nested%nest%j4, top_dtype%nested%nest%i4, top_dtype%nested%nest%k4) &
+./test/offloading/fortran/target-map-large-nested-dtype-multi-member.f90:!$omp map(tofrom: top_dtype%array_i, top_dtype%nested%nest2%i3, top_dtype%nested%i2, top_dtype%nested%nest2%k3, top_dtype%nested%nest2%j3)
+./test/offloading/fortran/target-map-large-nested-dtype-multi-member.f90:!$omp end target
+./test/offloading/fortran/target_map_common_block2.f90:  !$omp target map(tofrom:var4)
+./test/offloading/fortran/target_map_common_block2.f90:  !$omp end target
+./test/offloading/fortran/target-nested-target-data.f90:   !$omp target data map(to: A, B) map(alloc: C)
+./test/offloading/fortran/target-nested-target-data.f90:   !$omp target map(from: C)
+./test/offloading/fortran/target-nested-target-data.f90:   !$omp end target
+./test/offloading/fortran/target-nested-target-data.f90:   !$omp target update from(C) ! updates C device -> host
+./test/offloading/fortran/target-nested-target-data.f90:   !$omp end target data
+./test/offloading/fortran/target-map-dtype-multi-explicit-array-member-bounds.f90:  !$omp target map(tofrom:scalar_arr%array_x(3:6), scalar_arr%array_y(3:6))
+./test/offloading/fortran/target-map-dtype-multi-explicit-array-member-bounds.f90:  !$omp end target
+./test/offloading/fortran/target-map-allocatable-array-section-1d-bounds.f90:    !$omp target map(tofrom:sp_read(2:6)) map(tofrom:sp_write(2:6))
+./test/offloading/fortran/target-map-allocatable-array-section-1d-bounds.f90:    !$omp end target
+./test/offloading/fortran/target-map-dtype-allocatable-array.f90:    !$omp target map(tofrom: one_l%array_j)
+./test/offloading/fortran/target-map-dtype-allocatable-array.f90:    !$omp end target
+./test/offloading/fortran/target-map-enter-exit-array-bounds.f90:    !$omp target enter data map(to: array(3:6))
+./test/offloading/fortran/target-map-enter-exit-array-bounds.f90:   !$omp target
+./test/offloading/fortran/target-map-enter-exit-array-bounds.f90:  !$omp end target
+./test/offloading/fortran/target-map-enter-exit-array-bounds.f90:  !$omp target exit data map(from: array(3:6))
+./test/offloading/fortran/target-has-device-addr2.f90:  !$omp target data map(tofrom: b(1:3)) use_device_addr(b)
+./test/offloading/fortran/target-has-device-addr2.f90:  !$omp target has_device_addr(b(2)%x)
+./test/offloading/fortran/target-has-device-addr2.f90:  !$omp end target
+./test/offloading/fortran/target-has-device-addr2.f90:  !$omp end target data
+./test/offloading/fortran/target-has-device-addr2.f90:  !$omp target data map(tofrom: b(1:3)) use_device_addr(b)
+./test/offloading/fortran/target-has-device-addr2.f90:  !$omp target has_device_addr(b(2)%x)
+./test/offloading/fortran/target-has-device-addr2.f90:  !$omp end target
+./test/offloading/fortran/target-has-device-addr2.f90:  !$omp end target data
+./test/offloading/fortran/target-depend.f90:      !$omp parallel num_threads(3)
+./test/offloading/fortran/target-depend.f90:      !$omp single
+./test/offloading/fortran/target-depend.f90:      !$omp task depend(out: z) shared(z)
+./test/offloading/fortran/target-depend.f90:      !$omp end task
+./test/offloading/fortran/target-depend.f90:      !$omp target map(tofrom: z) depend(in:z)
+./test/offloading/fortran/target-depend.f90:      !$omp end target
+./test/offloading/fortran/target-depend.f90:      !$omp end single
+./test/offloading/fortran/target-depend.f90:      !$omp end parallel
+./test/offloading/fortran/target-map-dtype-arr-bounds-member-enter-exit-update.f90:  !$omp target enter data map(to: scalar_arr%array(3:6))
+./test/offloading/fortran/target-map-dtype-arr-bounds-member-enter-exit-update.f90:  !$omp target update to(scalar_arr%array(3:6))
+./test/offloading/fortran/target-map-dtype-arr-bounds-member-enter-exit-update.f90:   !$omp target
+./test/offloading/fortran/target-map-dtype-arr-bounds-member-enter-exit-update.f90:  !$omp end target
+./test/offloading/fortran/target-map-dtype-arr-bounds-member-enter-exit-update.f90:  !$omp target exit data map(from: scalar_arr%array(3:6))
+./test/offloading/fortran/target-map-common-block.f90:  !$omp target map(tofrom: /var_common/)
+./test/offloading/fortran/target-map-common-block.f90:  !$omp end target
+./test/offloading/fortran/target-map-common-block.f90:!$omp target map(tofrom: /var_common/)
+./test/offloading/fortran/target-map-common-block.f90:!$omp end target
+./test/offloading/fortran/target-map-common-block.f90:!$omp target map(tofrom: /var_common/)
+./test/offloading/fortran/target-map-common-block.f90:!$omp end target
+./test/offloading/fortran/target-map-dtype-alloca-and-non-alloca-array.f90:!$omp target map(tofrom: one_l%array_i, one_l%array_j)
+./test/offloading/fortran/target-map-dtype-alloca-and-non-alloca-array.f90:!$omp end target
+./test/offloading/fortran/target-data-map-if-present.f90:       !$omp target data if(present(a)) map(alloc:a)
+./test/offloading/fortran/target-data-map-if-present.f90:       !$omp end target data
+./test/offloading/fortran/target-parallel-do-collapse.f90:   !$omp target parallel do map(from:array) collapse(2)
+./test/offloading/fortran/target-parallel-do-collapse.f90:    !$omp end target parallel do
+./test/offloading/fortran/target-map-nested-dtype-complex-member.f90:!$omp target map(tofrom: top_dtype%nested%i2, top_dtype%k, top_dtype%nested%j2, top_dtype%nested%array_i2, top_dtype%l)
+./test/offloading/fortran/target-map-nested-dtype-complex-member.f90:!$omp end target
+./test/offloading/fortran/target-map-derived-type-full-implicit-2.f90:  !$omp target
+./test/offloading/fortran/target-map-derived-type-full-implicit-2.f90:  !$omp end target
+./test/offloading/fortran/target-map-enter-exit-array-2.f90:    !$omp target enter data map(to: array)
+./test/offloading/fortran/target-map-enter-exit-array-2.f90:   !$omp target
+./test/offloading/fortran/target-map-enter-exit-array-2.f90:  !$omp end target
+./test/offloading/fortran/target-map-enter-exit-array-2.f90:  !$omp target exit data map(from: array)
+./test/offloading/fortran/basic-target-parallel-do.f90:   !$omp target parallel do map(from: x)
+./test/offloading/fortran/basic-target-parallel-do.f90:   !$omp end target parallel do
+./test/offloading/fortran/target-map-dtype-multi-explicit-member.f90:    !$omp target map(from:scalar_struct%rx, scalar_struct%ry)
+./test/offloading/fortran/target-map-dtype-multi-explicit-member.f90:    !$omp end target
+./test/offloading/fortran/target-map-dynamic.f90:    !$omp target enter data map(to: A)
+./test/offloading/fortran/target-map-dynamic.f90:    !$omp target
+./test/offloading/fortran/target-map-dynamic.f90:    !$omp end target
+./test/offloading/fortran/target-map-dynamic.f90:    !$omp target exit data map(from: A)
+./test/offloading/fortran/target_map_common_block1.f90:  !$omp target map(tofrom:devices) map(tofrom:var1)
+./test/offloading/fortran/target_map_common_block1.f90:  !$omp end target
+./test/offloading/fortran/target-with-threadprivate.f90:!$omp threadprivate(pointer2)
+./test/offloading/fortran/target-with-threadprivate.f90:!$omp target
+./test/offloading/fortran/target-with-threadprivate.f90:!$omp end target
+./test/offloading/fortran/target_map_common_block.f90:  !$omp target map(tofrom:devices)
+./test/offloading/fortran/target_map_common_block.f90:  !$omp end target
+./test/offloading/fortran/target_map_common_block.f90:  !$omp target map(tofrom:var1)
+./test/offloading/fortran/target_map_common_block.f90:  !$omp end target
+./test/offloading/fortran/target_map_common_block.f90:  !$omp target map(tofrom:var2)
+./test/offloading/fortran/target_map_common_block.f90:  !$omp end target
+./test/offloading/fortran/target_map_common_block.f90:  !$omp target map(tofrom:var3)
+./test/offloading/fortran/target_map_common_block.f90:  !$omp end target
+./test/offloading/fortran/target_map_common_block.f90:  !$omp target map(to:var4) map(from:tmp)
+./test/offloading/fortran/target_map_common_block.f90:  !$omp end target
+./test/offloading/fortran/target_map_common_block.f90:  !$omp target map(tofrom: var6)
+./test/offloading/fortran/target_map_common_block.f90:  !$omp end target
+./test/offloading/fortran/target-map-alloca-dtype-and-alloca-array.f90:!$omp target map(tofrom: alloca_dtype, alloca_dtype%array_j)
+./test/offloading/fortran/target-map-alloca-dtype-and-alloca-array.f90:!$omp end target
+./test/offloading/fortran/target-map-double-large-nested-dtype-multi-member.f90:!$omp target map(from: top_dtype%nested%nest%j4, top_dtype%nested%nest%i4, top_dtype%nested%nest%k4) &
+./test/offloading/fortran/target-map-double-large-nested-dtype-multi-member.f90:!$omp map(from: top_dtype%array_i, top_dtype%nested%nest2%i3, top_dtype%nested%i2) &
+./test/offloading/fortran/target-map-double-large-nested-dtype-multi-member.f90:!$omp map(from: top_dtype%nested%nest2%k3, top_dtype%nested%nest2%j3) &
+./test/offloading/fortran/target-map-double-large-nested-dtype-multi-member.f90:!$omp map(to: top_dtype2%nested%nest%j4, top_dtype2%nested%nest%i4, top_dtype2%nested%nest%k4) &
+./test/offloading/fortran/target-map-double-large-nested-dtype-multi-member.f90:!$omp map(to: top_dtype2%array_i, top_dtype2%nested%nest2%i3, top_dtype2%nested%i2) &
+./test/offloading/fortran/target-map-double-large-nested-dtype-multi-member.f90:!$omp map(to: top_dtype2%nested%nest2%k3, top_dtype2%nested%nest2%j3)
+./test/offloading/fortran/target-map-double-large-nested-dtype-multi-member.f90:!$omp end target
+./test/offloading/fortran/target-map-derived-type-full-implicit-1.f90:  !$omp target map(from:out)
+./test/offloading/fortran/target-map-derived-type-full-implicit-1.f90:  !$omp end target
+./test/offloading/fortran/target-map-dtype-alloca-array-with-bounds.f90:!$omp target map(tofrom: one_l%array_j(2:6))
+./test/offloading/fortran/target-map-dtype-alloca-array-with-bounds.f90:!$omp end target
+./test/offloading/fortran/target-map-nested-dtype-single-member.f90:!$omp target map(tofrom: top_dtype%nested%array_i2)
+./test/offloading/fortran/target-map-nested-dtype-single-member.f90:!$omp end target
+./test/offloading/fortran/target-map-mix-imp-exp-common-block-members.f90:    !$omp target map(tofrom: var2)
+./test/offloading/fortran/target-map-mix-imp-exp-common-block-members.f90:    !$omp end target
+./test/offloading/fortran/target-map-mix-imp-exp-common-block-members.f90:  !$omp target map(tofrom: /var_common/)
+./test/offloading/fortran/target-map-mix-imp-exp-common-block-members.f90:  !$omp end target
+./test/offloading/fortran/target-map-mix-imp-exp-common-block-members.f90:  !$omp target map(tofrom: copy)
+./test/offloading/fortran/target-map-mix-imp-exp-common-block-members.f90:  !$omp end target
+./test/offloading/fortran/target-map-nested-dtype-derived-member.f90:!$omp target map(tofrom: top_dtype%k, top_dtype%nested2%array_i2, top_dtype%nested)
+./test/offloading/fortran/target-map-nested-dtype-derived-member.f90:!$omp end target
+./test/offloading/fortran/target-map-nested-alloca-dtype-alloca-array-bounds.f90:!$omp target map(tofrom: one_l%nest%array_k(2:6))
+./test/offloading/fortran/target-map-nested-alloca-dtype-alloca-array-bounds.f90:!$omp end target
+./test/offloading/fortran/basic-target-region-1D-array-section.f90:    !$omp target map(to:read_arr(2:5)) map(from:write_arr(2:5)) map(to:i,j)
+./test/offloading/fortran/basic-target-region-1D-array-section.f90:    !$omp end target
+./test/offloading/fortran/target-map-two-nested-dtype-member-array-map.f90:  !$omp target map(tofrom:scalar_arr1%nested%array_z(3:6), scalar_arr1%nested%array_ix(3:6), scalar_arr2%nested%array_z(3:6), scalar_arr2%nested%array_ix(3:6))
+./test/offloading/fortran/target-map-two-nested-dtype-member-array-map.f90:  !$omp end target
+./test/offloading/fortran/target-custom-mapper.f90:   !$omp declare mapper(my_mapper1 : mytype :: t) map(to: t%data(1 : n))
+./test/offloading/fortran/target-custom-mapper.f90:   !$omp declare mapper(my_mapper2 : mytype2 :: t) map(mapper(my_mapper1): t%my_data)
+./test/offloading/fortran/target-custom-mapper.f90:   !$omp target map(tofrom: sum_device) map(mapper(my_mapper2) : obj)
+./test/offloading/fortran/target-custom-mapper.f90:   !$omp end target
+./test/offloading/fortran/target-map-nested-dtype-3d-alloca-array-with-bounds.f90:!$omp target map(tofrom: one_l%nest%array_k(1:3, 1:3, 2:2)) map(to: inArray(1:3, 1:3, 1:3))
+./test/offloading/fortran/target-map-nested-dtype-3d-alloca-array-with-bounds.f90:!$omp end target
+./test/offloading/fortran/target-map-allocatable-array-section-3d-bounds.f90:!$omp target map(tofrom:inArray(1:3, 1:3, 2:2), outArray(1:3, 1:3, 1:3))
+./test/offloading/fortran/target-map-allocatable-array-section-3d-bounds.f90:!$omp end target
+./test/offloading/fortran/target-map-nested-dtype-alloca-array-and-non-alloca-dtype.f90:    !$omp target map(tofrom: one_l%nest%nest2, one_l%nest%array_k)
+./test/offloading/fortran/target-map-nested-dtype-alloca-array-and-non-alloca-dtype.f90:    !$omp end target
+./test/offloading/fortran/target-map-individual-dtype-member-map.f90:  !$omp target map(from:scalar_struct%rx)
+./test/offloading/fortran/target-map-individual-dtype-member-map.f90:  !$omp end target
+./test/offloading/fortran/target-map-allocatable-dtype.f90:!$omp target map(tofrom: alloca_dtype)
+./test/offloading/fortran/target-map-allocatable-dtype.f90:!$omp end target
+./test/offloading/fortran/target-map-nested-dtype-alloca-array-with-bounds.f90:!$omp target map(tofrom: one_l%nest%array_k(2:6))
+./test/offloading/fortran/target-map-nested-dtype-alloca-array-with-bounds.f90:!$omp end target
+./test/offloading/fortran/implicit-record-field-mapping.f90:  !$omp target map(from: dst_sum)
+./test/offloading/fortran/implicit-record-field-mapping.f90:  !$omp end target
+./test/offloading/fortran/target-has-device-addr3.f90:  !$omp target data map(tofrom: x) use_device_addr(x)
+./test/offloading/fortran/target-has-device-addr3.f90:  !$omp target has_device_addr(x) map(tofrom: y)
+./test/offloading/fortran/target-has-device-addr3.f90:  !$omp end target
+./test/offloading/fortran/target-has-device-addr3.f90:  !$omp end target data
+./test/offloading/fortran/target-map-double-nested-dtype-array-bounds.f90:!$omp target map(tofrom: top_dtype%nested%array_i2(4:8), top_dtype2%nested%array_j2(4:8))
+./test/offloading/fortran/target-map-double-nested-dtype-array-bounds.f90:!$omp end target
+./test/offloading/fortran/target-map-allocatable-map-scopes.f90:  !$omp target map(tofrom: arg_alloc)
+./test/offloading/fortran/target-map-allocatable-map-scopes.f90:  !$omp end target
+./test/offloading/fortran/target-map-allocatable-map-scopes.f90:  !$omp target map(tofrom: local_alloc)
+./test/offloading/fortran/target-map-allocatable-map-scopes.f90:  !$omp end target
+./test/offloading/fortran/target-map-allocatable-map-scopes.f90:  !$omp target map(tofrom: map_ptr)
+./test/offloading/fortran/target-map-allocatable-map-scopes.f90:  !$omp end target
+./test/offloading/fortran/target-map-enter-exit-scalar.f90:    !$omp target enter data map(to: scalar)
+./test/offloading/fortran/target-map-enter-exit-scalar.f90:   !$omp target
+./test/offloading/fortran/target-map-enter-exit-scalar.f90:   !$omp end target
+./test/offloading/fortran/target-map-enter-exit-scalar.f90:  !$omp target exit data map(from: scalar)
+./test/offloading/fortran/target-map-multi-alloca-dtypes-with-multi-mixed-members.f90:!$omp target map(tofrom: top_dtype%nested%nest%i4, top_dtype%nested%array_j2) &
+./test/offloading/fortran/target-map-multi-alloca-dtypes-with-multi-mixed-members.f90:!$omp map(tofrom: top_dtype%nested%nest%k4, top_dtype%array_i, top_dtype%nested%nest2%i3) &
+./test/offloading/fortran/target-map-multi-alloca-dtypes-with-multi-mixed-members.f90:!$omp map(tofrom: top_dtype%nested%i2, top_dtype%nested%nest2%j3, top_dtype%array_j)
+./test/offloading/fortran/target-map-multi-alloca-dtypes-with-multi-mixed-members.f90:!$omp end target
+./test/offloading/fortran/explicit-and-implicit-record-field-mapping.f90:  !$omp target enter data map(alloc:       &
+./test/offloading/fortran/explicit-and-implicit-record-field-mapping.f90:  !$omp  chunk%tiles(2)%field%density0)
+./test/offloading/fortran/explicit-and-implicit-record-field-mapping.f90:  !$omp target
+./test/offloading/fortran/explicit-and-implicit-record-field-mapping.f90:  !$omp end target
+./test/offloading/fortran/explicit-and-implicit-record-field-mapping.f90:  !$omp target exit data map(from:         &
+./test/offloading/fortran/explicit-and-implicit-record-field-mapping.f90:  !$omp  chunk%tiles(2)%field%density0)
+./test/offloading/fortran/dtype-member-map-syntax-2.f90:!$omp target map(tofrom: alloca_dtype%vertexes(N1)%test)
+./test/offloading/fortran/dtype-member-map-syntax-2.f90:!$omp end target
+./test/offloading/fortran/dtype-member-map-syntax-2.f90:!$omp target map(tofrom: alloca_dtype%vertexes(N1)%test, alloca_dtype%vertexes(N2)%test)
+./test/offloading/fortran/dtype-member-map-syntax-2.f90:!$omp end target
+./test/offloading/fortran/dtype-member-map-syntax-2.f90:!$omp target map(tofrom: alloca_dtype%test_tile(N1)%field%vertexx, &
+./test/offloading/fortran/dtype-member-map-syntax-2.f90:!$omp                    alloca_dtype%test_tile(N1)%field%vertexy)
+./test/offloading/fortran/dtype-member-map-syntax-2.f90:!$omp end target
+./test/offloading/fortran/dtype-member-map-syntax-2.f90:!$omp target map(tofrom:  alloca_dtype%test_tile(N1)%field%test, &
+./test/offloading/fortran/dtype-member-map-syntax-2.f90:!$omp                     alloca_dtype%test_tile(N2)%field%test, &
+./test/offloading/fortran/dtype-member-map-syntax-2.f90:!$omp                     alloca_dtype%test_tile(N1)%field%vertexy, &
+./test/offloading/fortran/dtype-member-map-syntax-2.f90:!$omp                     alloca_dtype%test_tile(N2)%field%vertexy)
+./test/offloading/fortran/dtype-member-map-syntax-2.f90:!$omp end target
+./test/offloading/fortran/dtype-member-map-syntax-2.f90:!$omp target map(tofrom:  alloca_dtype%test_tile(N2)%field%vertexy)
+./test/offloading/fortran/dtype-member-map-syntax-2.f90:!$omp end target
+./test/offloading/fortran/dtype-member-map-syntax-2.f90:!$omp target map(tofrom: alloca_dtype%vertexes(N1)%vertexx, &
+./test/offloading/fortran/dtype-member-map-syntax-2.f90:!$omp                    alloca_dtype%vertexes(N1)%vertexy, &
+./test/offloading/fortran/dtype-member-map-syntax-2.f90:!$omp                    alloca_dtype%vertexes(N2)%vertexx, &
+./test/offloading/fortran/dtype-member-map-syntax-2.f90:!$omp                    alloca_dtype%vertexes(N2)%vertexy)
+./test/offloading/fortran/dtype-member-map-syntax-2.f90:!$omp end target
+./test/offloading/fortran/dtype-member-map-syntax-2.f90:!$omp target map(tofrom: alloca_dtype%vertexes(N1)%vertexx, &
+./test/offloading/fortran/dtype-member-map-syntax-2.f90:!$omp                    alloca_dtype%vertexes(N1)%vertexy, &
+./test/offloading/fortran/dtype-member-map-syntax-2.f90:!$omp                    alloca_dtype%vertexes(4)%vertexy, &
+./test/offloading/fortran/dtype-member-map-syntax-2.f90:!$omp                    alloca_dtype%vertexes(4)%vertexx, &
+./test/offloading/fortran/dtype-member-map-syntax-2.f90:!$omp                    alloca_dtype%vertexes(N2)%vertexx, &
+./test/offloading/fortran/dtype-member-map-syntax-2.f90:!$omp                    alloca_dtype%vertexes(N2)%vertexy)
+./test/offloading/fortran/dtype-member-map-syntax-2.f90:!$omp end target
+./test/offloading/fortran/dtype-member-map-syntax-2.f90:!$omp target map(tofrom: alloca_dtype_arr(N2)%array_i)
+./test/offloading/fortran/dtype-member-map-syntax-2.f90:!$omp end target
+./test/offloading/fortran/target_update.f90:!$omp target enter data map(to:x, device_id)
+./test/offloading/fortran/target_update.f90:!$omp target
+./test/offloading/fortran/target_update.f90:!$omp end target
+./test/offloading/fortran/target_update.f90:!$omp target
+./test/offloading/fortran/target_update.f90:!$omp end target
+./test/offloading/fortran/target_update.f90:!$omp target update from(x, device_id)
+./test/offloading/fortran/target-map-declare-target-link-common-block.f90:    !$omp declare target link(/var_common/)
+./test/offloading/fortran/target-map-declare-target-link-common-block.f90:  !$omp target map(tofrom: var2)
+./test/offloading/fortran/target-map-declare-target-link-common-block.f90:  !$omp end target
+./test/offloading/fortran/target-map-declare-target-link-common-block.f90:!$omp target map(tofrom: /var_common/)
+./test/offloading/fortran/target-map-declare-target-link-common-block.f90:!$omp end target
+./test/offloading/fortran/target-map-declare-target-link-common-block.f90:!$omp target map(tofrom: copy)
+./test/offloading/fortran/target-map-declare-target-link-common-block.f90:!$omp end target
+./test/offloading/fortran/target-map-declare-target-link-common-block.f90:!$omp target map(tofrom: /var_common/)
+./test/offloading/fortran/target-map-declare-target-link-common-block.f90:!$omp end target
+./test/offloading/fortran/target-map-alloca-dtype-alloca-array.f90:!$omp target map(tofrom: alloca_dtype%array_j)
+./test/offloading/fortran/target-map-alloca-dtype-alloca-array.f90:!$omp end target
+./test/offloading/fortran/target-map-alloca-dtype-array-of-dtype.f90:!$omp target map(tofrom: dtyped%array_dtype)
+./test/offloading/fortran/target-map-alloca-dtype-array-of-dtype.f90:!$omp end target
+./test/Inputs/basic_array.f90:    !$omp declare target

>From d31fe2a406c6df0e5b393654afee10d7f62c9626 Mon Sep 17 00:00:00 2001
From: Anchu Rajendran <asudhaku at amd.com>
Date: Fri, 11 Apr 2025 13:03:53 -0500
Subject: [PATCH 4/6] some more tests

---
 .../llvm/Frontend/OpenMP/OMPIRBuilder.h       |    8 +-
 llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp     |  102 +-
 llvm/out                                      | 2705 +++++++++++++++++
 .../Frontend/OpenMPIRBuilderTest.cpp          |    2 +-
 .../OpenMP/OpenMPToLLVMIRTranslation.cpp      |    6 +-
 offload/out                                   |  424 ---
 6 files changed, 2795 insertions(+), 452 deletions(-)
 create mode 100644 llvm/out
 delete mode 100644 offload/out

diff --git a/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h b/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h
index dc5721125cccd..008731aa41b23 100644
--- a/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h
+++ b/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h
@@ -1602,12 +1602,16 @@ class OpenMPIRBuilder {
 
   /// Creates the buffer needed for scan reduction.
   /// \param ScanVars Scan Variables.
-  void emitScanBasedDirectiveDeclsIR(ArrayRef<llvm::Value *> ScanVars,
+  ///
+  /// \return error if any produced, else return success.
+  Error emitScanBasedDirectiveDeclsIR(ArrayRef<llvm::Value *> ScanVars,
                                      ArrayRef<llvm::Type *> ScanVarsType);
 
   /// Copies the result back to the reduction variable.
   /// \param ReductionInfos Array type containing the ReductionOps.
-  void emitScanBasedDirectiveFinalsIR(
+  ///
+  /// \return error if any produced, else return success.
+  Error emitScanBasedDirectiveFinalsIR(
       SmallVector<llvm::OpenMPIRBuilder::ReductionInfo> ReductionInfos);
 
   /// This function emits a helper that gathers Reduce lists from the first
diff --git a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
index 394bf28bc2574..4f63030a2a269 100644
--- a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
+++ b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
@@ -4006,10 +4006,43 @@ OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::createScan(
     bool IsInclusive) {
   if (ScanInfo.OMPFirstScanLoop) {
     Builder.restoreIP(AllocaIP);
-    emitScanBasedDirectiveDeclsIR(ScanVars, ScanVarsType);
+    llvm::Error Err = emitScanBasedDirectiveDeclsIR(ScanVars, ScanVarsType);
+    if (Err) {
+      return Err;
+    }
   }
   if (!updateToLocation(Loc))
     return Loc.IP;
+  
+  // Allocate temporary buffer by master thread 
+  auto BodyGenCB = [&](InsertPointTy AllocaIP,
+                       InsertPointTy CodeGenIP) -> Error {
+
+    Value *AllocSpan = Builder.CreateAdd(ScanInfo.Span, Builder.getInt32(1));
+    for (int i = 0; i < ScanVars.size(); i++) {
+      Type* IntPtrTy = Builder.getInt32Ty();
+      Constant* allocsize = ConstantExpr::getSizeOf(ScanVarsType[i]);
+      allocsize = ConstantExpr::getTruncOrBitCast(allocsize, IntPtrTy);
+      llvm::Value *Buff = Builder.CreateMalloc(IntPtrTy, ScanVarsType[i], allocsize,AllocSpan,nullptr, "arr");
+      Builder.CreateStore(Buff, ScanInfo.ReductionVarToScanBuffs[ScanVars[i]]);
+    }
+    return Error::success();
+  };
+  // TODO: Perform finalization actions for variables. This has to be
+  // called for variables which have destructors/finalizers.
+  auto FiniCB = [&](InsertPointTy CodeGenIP) { return llvm::Error::success(); };
+
+  llvm::Value *FilterVal = Builder.getInt32(0);
+  llvm::OpenMPIRBuilder::InsertPointOrErrorTy AfterIP =
+      createMasked(Builder.saveIP(), BodyGenCB, FiniCB, FilterVal);
+
+  if (!AfterIP)
+    return AfterIP.takeError();
+  Builder.restoreIP(*AfterIP);
+  AfterIP = createBarrier(Builder.saveIP(), llvm::omp::OMPD_barrier);
+  if (!AfterIP)
+    return AfterIP.takeError();
+
   unsigned int defaultAS = M.getDataLayout().getProgramAddressSpace();
   llvm::Value *IV = ScanInfo.IV;
 
@@ -4070,33 +4103,53 @@ OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::createScan(
   return Builder.saveIP();
 }
 
-void OpenMPIRBuilder::emitScanBasedDirectiveDeclsIR(
+Error OpenMPIRBuilder::emitScanBasedDirectiveDeclsIR(
     ArrayRef<Value *> ScanVars, ArrayRef<Type *> ScanVarsType) {
-
-  Value *AllocSpan = Builder.CreateAdd(ScanInfo.Span, Builder.getInt32(1));
-  for (int i = 0; i < ScanVars.size(); i++) {
-    llvm::Value *Buff = Builder.CreateAlloca(ScanVarsType[i], AllocSpan, "vla");
-    ScanInfo.ReductionVarToScanBuffs[ScanVars[i]] = Buff;
+ 
+  for(int i=0; i<ScanVars.size(); i++){
+   llvm::Value *Buff = Builder.CreateAlloca(Builder.getPtrTy(),1); 
+   ScanInfo.ReductionVarToScanBuffs[ScanVars[i]] = Buff;
   }
+  return Error::success();
 }
 
-void OpenMPIRBuilder::emitScanBasedDirectiveFinalsIR(
+Error OpenMPIRBuilder::emitScanBasedDirectiveFinalsIR(
     SmallVector<ReductionInfo> ReductionInfos) {
-  unsigned int DefaultAS = M.getDataLayout().getProgramAddressSpace();
-  for (ReductionInfo RedInfo : ReductionInfos) {
-    Value *PrivateVar = RedInfo.PrivateVariable;
-    Value *OrigVar = RedInfo.Variable;
-    Value *Buff = ScanInfo.ReductionVarToScanBuffs[PrivateVar];
-
-    Type *SrcTy = RedInfo.ElementType;
-    Value *Val =
-        Builder.CreateInBoundsGEP(SrcTy, Buff, ScanInfo.Span, "arrayOffset");
-    Value *Src = Builder.CreateLoad(SrcTy, Val);
-    Value *Dest = Builder.CreatePointerBitCastOrAddrSpaceCast(
-        OrigVar, SrcTy->getPointerTo(DefaultAS));
+  auto BodyGenCB = [&](InsertPointTy AllocaIP,
+                       InsertPointTy CodeGenIP) -> Error {
+    unsigned int DefaultAS = M.getDataLayout().getProgramAddressSpace();
+    for (ReductionInfo RedInfo : ReductionInfos) {
+      Value *PrivateVar = RedInfo.PrivateVariable;
+      Value *OrigVar = RedInfo.Variable;
+      Value *Buff = ScanInfo.ReductionVarToScanBuffs[PrivateVar];
+
+      Type *SrcTy = RedInfo.ElementType;
+      Value *Val =
+          Builder.CreateInBoundsGEP(SrcTy, Buff, ScanInfo.Span, "arrayOffset");
+      Value *Src = Builder.CreateLoad(SrcTy, Val);
+      Value *Dest = Builder.CreatePointerBitCastOrAddrSpaceCast(
+          OrigVar, SrcTy->getPointerTo(DefaultAS));
 
-    Builder.CreateStore(Src, Dest);
-  }
+      Builder.CreateStore(Src, Dest);
+      Builder.CreateFree(Buff);
+    }
+    return Error::success();
+  };
+  // TODO: Perform finalization actions for variables. This has to be
+  // called for variables which have destructors/finalizers.
+  auto FiniCB = [&](InsertPointTy CodeGenIP) { return llvm::Error::success(); };
+
+  llvm::Value *FilterVal = Builder.getInt32(0);
+  llvm::OpenMPIRBuilder::InsertPointOrErrorTy AfterIP =
+      createMasked(Builder.saveIP(), BodyGenCB, FiniCB, FilterVal);
+
+  if (!AfterIP)
+    return AfterIP.takeError();
+  Builder.restoreIP(*AfterIP);
+  //AfterIP = createBarrier(Builder.saveIP(), llvm::omp::OMPD_barrier);
+  //if (!AfterIP)
+  //  return AfterIP.takeError();
+  return Error::success();
 }
 
 OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::emitScanReduction(
@@ -4209,7 +4262,10 @@ OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::emitScanReduction(
     return AfterIP.takeError();
   Builder.restoreIP(*AfterIP);
   Builder.restoreIP(FinalizeIP);
-  emitScanBasedDirectiveFinalsIR(ReductionInfos);
+  Error Err = emitScanBasedDirectiveFinalsIR(ReductionInfos);
+  if (Err) {
+    return Err;
+  }
   FinalizeIP = Builder.saveIP();
 
   return AfterIP;
diff --git a/llvm/out b/llvm/out
new file mode 100644
index 0000000000000..e912287141024
--- /dev/null
+++ b/llvm/out
@@ -0,0 +1,2705 @@
+bindings/ocaml/llvm/llvm.mli:(** [build_malloc ty name b] creates an [malloc]
+bindings/ocaml/llvm/llvm.mli:val build_malloc : lltype -> string -> llbuilder -> llvalue
+bindings/ocaml/llvm/llvm.mli:(** [build_array_malloc ty val name b] creates an [array malloc]
+bindings/ocaml/llvm/llvm.mli:val build_array_malloc : lltype -> llvalue -> string -> llbuilder -> llvalue
+bindings/ocaml/llvm/llvm_ocaml.c:  void **Temp = malloc(sizeof(void *) * Length);
+bindings/ocaml/llvm/llvm_ocaml.c:    value *DiagnosticContext = malloc(sizeof(value));
+bindings/ocaml/llvm/llvm_ocaml.c:value llvm_build_malloc(value Ty, value Name, value B) {
+bindings/ocaml/llvm/llvm_ocaml.c:value llvm_build_array_malloc(value Ty, value Val, value Name, value B) {
+bindings/ocaml/llvm/llvm.ml:external build_malloc : lltype -> string -> llbuilder -> llvalue =
+bindings/ocaml/llvm/llvm.ml:    "llvm_build_malloc"
+bindings/ocaml/llvm/llvm.ml:external build_array_malloc : lltype -> llvalue -> string -> llbuilder ->
+bindings/ocaml/llvm/llvm.ml:    llvalue = "llvm_build_array_malloc"
+bindings/ocaml/llvm/llvm_ocaml.h:   the representation described above and returns a malloc'd array
+lib/MC/MCDisassembler/Disassembler.h:  // FIXME: using std::unique_ptr<const llvm::Target> causes a malloc error
+lib/Frontend/OpenMP/OMPIRBuilder.cpp:        StringRef("malloc"),
+lib/LTO/LTO.cpp:  // example, if the library exported a symbol like __malloc_hot_cold the linker
+lib/IR/IRBuilder.cpp:  // malloc(type) becomes:
+lib/IR/IRBuilder.cpp:  //       i8* malloc(typeSize)
+lib/IR/IRBuilder.cpp:  // malloc(type, arraySize) becomes:
+lib/IR/IRBuilder.cpp:  //       i8* malloc(typeSize*arraySize)
+lib/IR/IRBuilder.cpp:      AllocSize = CreateMul(ArraySize, AllocSize, "mallocsize");
+lib/IR/IRBuilder.cpp:  assert(AllocSize->getType() == IntPtrTy && "malloc arg is wrong size");
+lib/IR/IRBuilder.cpp:    // prototype malloc as "void *malloc(size_t)"
+lib/IR/IRBuilder.cpp:    MallocFunc = M->getOrInsertFunction("malloc", BPTy, IntPtrTy);
+lib/IR/DataLayout.cpp:  // malloc it, then use placement new.
+lib/IR/DataLayout.cpp:  StructLayout *L = (StructLayout *)safe_malloc(
+lib/IR/Core.cpp:      safe_malloc(MFEs.size() * sizeof(LLVMOpaqueModuleFlagEntry)));
+lib/IR/Core.cpp:                                              safe_malloc(MVEs.size() * sizeof(LLVMOpaqueValueMetadataEntry)));
+lib/ProfileData/MemProfReader.cpp:          (uintptr_t)malloc(MIB.AccessHistogramSize * sizeof(uint64_t));
+lib/ProfileData/MemProfReader.cpp:  return Filename == "memprof_malloc_linux.cpp" ||
+lib/Demangle/ItaniumDemangle.cpp:    char* NewMeta = static_cast<char *>(std::malloc(AllocSize));
+lib/Demangle/ItaniumDemangle.cpp:    BlockMeta* NewMeta = reinterpret_cast<BlockMeta*>(std::malloc(NBytes));
+lib/ExecutionEngine/IntelJITProfiling/ittnotify_config.h:    h = (__itt_thread_info*)malloc(sizeof(__itt_thread_info)); \
+lib/ExecutionEngine/IntelJITProfiling/ittnotify_config.h:    h = (__itt_thread_info*)malloc(sizeof(__itt_thread_info)); \
+lib/ExecutionEngine/IntelJITProfiling/ittnotify_config.h:    h = (__itt_domain*)malloc(sizeof(__itt_domain)); \
+lib/ExecutionEngine/IntelJITProfiling/ittnotify_config.h:    h = (__itt_domain*)malloc(sizeof(__itt_domain)); \
+lib/ExecutionEngine/IntelJITProfiling/ittnotify_config.h:    h = (__itt_string_handle*)malloc(sizeof(__itt_string_handle)); \
+lib/ExecutionEngine/IntelJITProfiling/ittnotify_config.h:    h = (__itt_string_handle*)malloc(sizeof(__itt_string_handle)); \
+lib/ExecutionEngine/IntelJITProfiling/jitprofiling.c:        dllName = (char*)malloc(sizeof(char) * (dNameLength + 1));
+lib/ExecutionEngine/IntelJITProfiling/jitprofiling.c:            dllName = (char*)malloc(sizeof(char) * (dNameLength + 1));
+lib/ExecutionEngine/Orc/OrcV2CBindings.cpp:      safe_malloc(Symbols.size() * sizeof(LLVMOrcCSymbolFlagsMapPair)));
+lib/ExecutionEngine/Orc/OrcV2CBindings.cpp:      static_cast<LLVMOrcSymbolStringPoolEntryRef *>(safe_malloc(
+lib/ExecutionEngine/Orc/OrcV2CBindings.cpp:  char *TargetTriple = (char *)malloc(Tmp.size() + 1);
+lib/ExecutionEngine/Interpreter/Execution.cpp:  // Avoid malloc-ing zero bytes, use max()...
+lib/ExecutionEngine/Interpreter/Execution.cpp:  void *Memory = safe_malloc(MemToAlloc);
+lib/ExecutionEngine/Interpreter/Execution.cpp:  assert(Result.PointerVal && "Null pointer returned by malloc!");
+lib/Transforms/Instrumentation/MemorySanitizer.cpp:/// poison the shadow of the malloc-ed or alloca-ed memory, load the shadow,
+lib/Transforms/Instrumentation/MemorySanitizer.cpp:// early at program startup where malloc-ed memory is more likely to
+lib/Transforms/Instrumentation/AddressSanitizer.cpp:const char kAsanStackMallocNameTemplate[] = "__asan_stack_malloc_";
+lib/Transforms/Instrumentation/AddressSanitizer.cpp:    "__asan_stack_malloc_always_";
+lib/Transforms/Instrumentation/AddressSanitizer.cpp:  // Don't do dynamic alloca or stack malloc if:
+lib/Transforms/Instrumentation/AddressSanitizer.cpp:      //     ? __asan_stack_malloc_N(LocalStackSize)
+lib/Transforms/Instrumentation/AddressSanitizer.cpp:      // void *FakeStack = __asan_stack_malloc_N(LocalStackSize);
+lib/Transforms/HipStdPar/HipStdPar.cpp:  {"malloc",                    "__hipstdpar_malloc"},
+lib/Transforms/HipStdPar/HipStdPar.cpp:  {"__builtin_malloc",          "__hipstdpar_malloc"},
+lib/Transforms/HipStdPar/HipStdPar.cpp:  {"__libc_malloc",             "__hipstdpar_malloc"},
+lib/Transforms/HipStdPar/HipStdPar.cpp:  if (auto F = M.getFunction("__hipstdpar_hidden_malloc")) {
+lib/Transforms/HipStdPar/HipStdPar.cpp:        "__libc_malloc", F->getFunctionType(), F->getAttributes());
+lib/Transforms/Coroutines/CoroElide.cpp:  //   mem = coro.alloc(id) ? malloc(coro.size()) : 0;
+lib/Transforms/Coroutines/Coroutines.cpp://   mem = coro.alloc(id) ? malloc(coro.size()) : 0;
+lib/Transforms/InstCombine/InstructionCombining.cpp:  // If we have a malloc call which is only used in any amount of comparisons to
+lib/Transforms/InstCombine/InstructionCombining.cpp:  // outputs of a program (when we convert a malloc to an alloca, the fact that
+lib/Transforms/InstCombine/InstructionCombining.cpp:    // TODO: allow malloc?
+lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp:    // together.  Note that we only do this for alloca's, because malloc should
+lib/Transforms/IPO/OpenMPOpt.cpp:           " malloc calls eligible.";
+lib/Transforms/IPO/OpenMPOpt.cpp:    // Update free call users of found malloc calls.
+lib/Transforms/IPO/OpenMPOpt.cpp:    // Only consider malloc calls executed by a single thread with a constant.
+lib/Transforms/IPO/OpenMPOpt.cpp:  /// Collection of all malloc calls in a function.
+lib/Transforms/IPO/FunctionAttrs.cpp:/// Tests whether a function is "malloc-like".
+lib/Transforms/IPO/FunctionAttrs.cpp:/// A function is "malloc-like" if it returns either null or a pointer that
+lib/Transforms/IPO/AttributorAttributes.cpp:        "Number of malloc/calloc/aligned_alloc calls converted to allocas");
+lib/Transforms/IPO/AttributorAttributes.cpp:      LLVM_DEBUG(dbgs() << "H2S: Removing malloc-like call: " << *AI.CB
+lib/Transforms/IPO/AttributorAttributes.cpp:            Alloca, AI.CB->getType(), "malloc_cast", AI.CB->getIterator());
+lib/Transforms/IPO/AttributorAttributes.cpp:  /// Collection of all malloc-like calls in a function with associated
+lib/Transforms/IPO/AttributorAttributes.cpp:          // A store into the malloc'ed memory is fine.
+lib/Transforms/IPO/AttributorAttributes.cpp:    S += "malloced,";
+lib/Transforms/IPO/AttributorAttributes.cpp:    // TODO: implement case for malloc like instructions
+lib/Transforms/IPO/AttributorAttributes.cpp:    // TODO: update check for malloc like calls
+lib/Transforms/IPO/AttributorAttributes.cpp:    // TODO: add case for malloc like calls
+lib/Transforms/IPO/GlobalOpt.cpp:  // If Dead[n].first is the only use of a malloc result, we can delete its
+lib/Transforms/IPO/GlobalOpt.cpp:/// program as if it always contained the result of the specified malloc.
+lib/Transforms/IPO/GlobalOpt.cpp:/// Because it is always the result of the specified malloc, there is no reason
+lib/Transforms/IPO/GlobalOpt.cpp:/// to actually DO the malloc.  Instead, turn the malloc into a global, and any
+lib/Transforms/IPO/GlobalOpt.cpp:  // of the malloc value, not of the null initializer value (consider a use
+lib/Transforms/IPO/GlobalOpt.cpp:  // that compares the global's value against zero to see if the malloc has
+lib/Transforms/IPO/GlobalOpt.cpp:  // happen after the malloc.
+lib/Transforms/IPO/GlobalOpt.cpp:  // We can't optimize this if the malloc itself is used in a complex way,
+lib/Transforms/IPO/GlobalOpt.cpp:  // malloc to be stored into the specified global, loaded, gep, icmp'd.
+lib/Transforms/Utils/BuildLibCalls.cpp:    Changed |= setAllocFamily(F, "malloc");
+lib/Transforms/Utils/BuildLibCalls.cpp:  case LibFunc_malloc:
+lib/Transforms/Utils/BuildLibCalls.cpp:  case LibFunc_vec_malloc:
+lib/Transforms/Utils/BuildLibCalls.cpp:    Changed |= setAllocFamily(F, TheLibFunc == LibFunc_vec_malloc ? "vec_malloc"
+lib/Transforms/Utils/BuildLibCalls.cpp:                                                                  : "malloc");
+lib/Transforms/Utils/BuildLibCalls.cpp:    Changed |= setAllocFamily(F, "malloc");
+lib/Transforms/Utils/BuildLibCalls.cpp:        F, TheLibFunc == LibFunc_vec_realloc ? "vec_malloc" : "malloc");
+lib/Transforms/Utils/BuildLibCalls.cpp:    Changed |= setAllocFamily(F, "malloc");
+lib/Transforms/Utils/BuildLibCalls.cpp:    Changed |= setAllocFamily(F, TheLibFunc == LibFunc_vec_calloc ? "vec_malloc"
+lib/Transforms/Utils/BuildLibCalls.cpp:                                                                  : "malloc");
+lib/Transforms/Utils/BuildLibCalls.cpp:    Changed |= setAllocFamily(F, TheLibFunc == LibFunc_vec_free ? "vec_malloc"
+lib/Transforms/Utils/BuildLibCalls.cpp:                                                                : "malloc");
+lib/Transforms/Utils/BuildLibCalls.cpp:  case LibFunc_malloc:
+lib/Transforms/Utils/BuildLibCalls.cpp:  if (!isLibFuncEmittable(M, TLI, LibFunc_malloc))
+lib/Transforms/Utils/BuildLibCalls.cpp:  StringRef MallocName = TLI->getName(LibFunc_malloc);
+lib/Transforms/Utils/BuildLibCalls.cpp:      getOrInsertLibFunc(M, *TLI, LibFunc_malloc, B.getPtrTy(), SizeTTy);
+lib/Transforms/Utils/SimplifyLibCalls.cpp:// Currently this is supported by the open source version of tcmalloc, see:
+lib/Transforms/Utils/SimplifyLibCalls.cpp:// https://github.com/google/tcmalloc/blob/master/tcmalloc/new_extension.h
+lib/Transforms/Scalar/DeadStoreElimination.cpp:  /// If we have a zero initializing memset following a call to malloc,
+lib/Transforms/Scalar/DeadStoreElimination.cpp:        Func != LibFunc_malloc)
+lib/Transforms/Scalar/DeadStoreElimination.cpp:    // Gracefully handle malloc with unexpected memory attributes.
+lib/Transforms/Scalar/DeadStoreElimination.cpp:      // of malloc block
+lib/Transforms/Scalar/DeadStoreElimination.cpp:    // Can we form a calloc from a memset/malloc pair?
+lib/Support/ErrorHandling.cpp:  // If exceptions are enabled, make OOM in malloc look like OOM in new.
+lib/Support/RWMutex.cpp:    static_cast<pthread_rwlock_t*>(safe_malloc(sizeof(pthread_rwlock_t)));
+lib/Support/regexec.c:#define	STATESETUP(m, nv)	{ (m)->space = malloc((nv)*(m)->g->nstates); \
+lib/Support/Windows/Process.inc:#include <malloc.h>
+lib/Support/Windows/Program.inc:#include <malloc.h>
+lib/Support/regcomp.c:	sop *strip;		/* malloced strip */
+lib/Support/regcomp.c:	sopno ssize;		/* malloced strip size (allocated) */
+lib/Support/regcomp.c:	sopno slen;		/* malloced strip length (used) */
+lib/Support/regcomp.c:	/* do the mallocs early so failure handling is easy */
+lib/Support/regcomp.c:	g = (struct re_guts *)malloc(sizeof(struct re_guts) +
+lib/Support/regcomp.c:	g->must = malloc((size_t)g->mlen + 1);
+lib/Support/regengine.inc:			m->pmatch = (llvm_regmatch_t *)malloc((m->g->nsub + 1) *
+lib/Support/regengine.inc:				m->lastpos = (const char **)malloc((g->nplus+1) *
+lib/Support/MemoryBuffer.cpp:  // We use malloc() and manually handle it returning null instead of calling
+lib/Support/MemoryBuffer.cpp:  // deallocated with a call to free() due to needing to use malloc() in
+lib/Support/MemoryBuffer.cpp:      static_cast<char *>(std::malloc(N + sizeof(size_t) + NameRef.size() + 1));
+lib/Support/MemoryBuffer.cpp:  // We use a call to malloc() rather than a call to a non-throwing operator
+lib/Support/MemoryBuffer.cpp:  char *Mem = static_cast<char *>(std::malloc(RealLen));
+lib/Support/Unix/Process.inc:#include <malloc.h>
+lib/Support/Unix/Process.inc:#include <malloc_np.h>
+lib/Support/Unix/Process.inc:#include <malloc/malloc.h>
+lib/Support/Unix/Process.inc:  malloc_statistics_t Stats;
+lib/Support/Unix/Process.inc:  malloc_zone_statistics(malloc_default_zone(), &Stats);
+lib/Support/Unix/Process.inc:#warning Cannot get malloc info on this platform
+lib/Support/Unix/Signals.inc:  AltStack.ss_sp = static_cast<char *>(safe_malloc(AltStackSize));
+lib/Support/regex2.h:	sop *strip;		/* malloced area for strip */
+lib/Support/rpmalloc/rpnew.h:// This library provides a cross-platform lock free thread caching malloc
+lib/Support/rpmalloc/rpnew.h:#include <rpmalloc.h>
+lib/Support/rpmalloc/rpnew.h:  return rpmalloc(size);
+lib/Support/rpmalloc/rpnew.h:  return rpmalloc(size);
+lib/Support/rpmalloc/rpnew.h:  return rpmalloc(size);
+lib/Support/rpmalloc/rpnew.h:  return rpmalloc(size);
+lib/Support/rpmalloc/CACHE.md:rpmalloc has a thread cache of free memory blocks which can be used in allocations without interfering with other threads or going to system to map more memory, as well as a global cache shared by all threads to let spans of memory pages flow between threads. Configuring the size of these caches can be crucial to obtaining good performance while minimizing memory overhead blowup. Below is a simple case study using the benchmark tool to compare different thread cache configurations for rpmalloc.
+lib/Support/rpmalloc/CACHE.md:The rpmalloc thread cache is configured to be unlimited, performance oriented as meaning default values, size oriented where both thread cache and global cache is reduced significantly, or disabled where both thread and global caches are disabled and completely free pages are directly unmapped.
+lib/Support/rpmalloc/CACHE.md:The benchmark also includes results for the standard library malloc implementation as a reference for comparison with the nocache setting.
+lib/Support/rpmalloc/CACHE.md:The nocache setting still outperforms the reference standard library allocator for workloads up to 6 threads while maintaining a near zero memory overhead, which is even slightly lower than the standard library. For use case scenarios where number of allocation of each size class is lower the overhead in rpmalloc from the 64KiB span size will of course increase.
+lib/Support/rpmalloc/rpmalloc.c://===---------------------- rpmalloc.c ------------------*- C -*-=============//
+lib/Support/rpmalloc/rpmalloc.c:// This library provides a cross-platform lock free thread caching malloc
+lib/Support/rpmalloc/rpmalloc.c:#include "rpmalloc.h"
+lib/Support/rpmalloc/rpmalloc.c:#define _rpmalloc_memcpy_const(x, y, s) __builtin_memcpy_inline(x, y, s)
+lib/Support/rpmalloc/rpmalloc.c:#define _rpmalloc_memcpy_const(x, y, s)                                        \
+lib/Support/rpmalloc/rpmalloc.c:#define _rpmalloc_memset_const(x, y, s) __builtin_memset_inline(x, y, s)
+lib/Support/rpmalloc/rpmalloc.c:#define _rpmalloc_memset_const(x, y, s)                                        \
+lib/Support/rpmalloc/rpmalloc.c:#define _rpmalloc_memcpy_const(x, y, s) memcpy(x, y, s)
+lib/Support/rpmalloc/rpmalloc.c:#define _rpmalloc_memset_const(x, y, s) memset(x, y, s)
+lib/Support/rpmalloc/rpmalloc.c:#define rpmalloc_assume(cond) __builtin_assume(cond)
+lib/Support/rpmalloc/rpmalloc.c:#define rpmalloc_assume(cond)                                                  \
+lib/Support/rpmalloc/rpmalloc.c:#define rpmalloc_assume(cond) __assume(cond)
+lib/Support/rpmalloc/rpmalloc.c:#define rpmalloc_assume(cond) 0
+lib/Support/rpmalloc/rpmalloc.c://! Override standard library malloc/free and new/delete entry points
+lib/Support/rpmalloc/rpmalloc.c:#define rpmalloc_assert(truth, message)                                        \
+lib/Support/rpmalloc/rpmalloc.c:#define rpmalloc_assert(truth, message)                                        \
+lib/Support/rpmalloc/rpmalloc.c:#define _rpmalloc_stat_inc(counter) atomic_incr32(counter)
+lib/Support/rpmalloc/rpmalloc.c:#define _rpmalloc_stat_dec(counter) atomic_decr32(counter)
+lib/Support/rpmalloc/rpmalloc.c:#define _rpmalloc_stat_add(counter, value)                                     \
+lib/Support/rpmalloc/rpmalloc.c:#define _rpmalloc_stat_add64(counter, value)                                   \
+lib/Support/rpmalloc/rpmalloc.c:#define _rpmalloc_stat_add_peak(counter, value, peak)                          \
+lib/Support/rpmalloc/rpmalloc.c:#define _rpmalloc_stat_sub(counter, value)                                     \
+lib/Support/rpmalloc/rpmalloc.c:#define _rpmalloc_stat_inc_alloc(heap, class_idx)                              \
+lib/Support/rpmalloc/rpmalloc.c:#define _rpmalloc_stat_inc_free(heap, class_idx)                               \
+lib/Support/rpmalloc/rpmalloc.c:#define _rpmalloc_stat_inc(counter)                                            \
+lib/Support/rpmalloc/rpmalloc.c:#define _rpmalloc_stat_dec(counter)                                            \
+lib/Support/rpmalloc/rpmalloc.c:#define _rpmalloc_stat_add(counter, value)                                     \
+lib/Support/rpmalloc/rpmalloc.c:#define _rpmalloc_stat_add64(counter, value)                                   \
+lib/Support/rpmalloc/rpmalloc.c:#define _rpmalloc_stat_add_peak(counter, value, peak)                          \
+lib/Support/rpmalloc/rpmalloc.c:#define _rpmalloc_stat_sub(counter, value)                                     \
+lib/Support/rpmalloc/rpmalloc.c:#define _rpmalloc_stat_inc_alloc(heap, class_idx)                              \
+lib/Support/rpmalloc/rpmalloc.c:#define _rpmalloc_stat_inc_free(heap, class_idx)                               \
+lib/Support/rpmalloc/rpmalloc.c:static int _rpmalloc_initialized;
+lib/Support/rpmalloc/rpmalloc.c:static uintptr_t _rpmalloc_main_thread_id;
+lib/Support/rpmalloc/rpmalloc.c:static rpmalloc_config_t _memory_config;
+lib/Support/rpmalloc/rpmalloc.c:  rpmalloc_initialize();
+lib/Support/rpmalloc/rpmalloc.c:extern void rpmalloc_set_main_thread(void);
+lib/Support/rpmalloc/rpmalloc.c:void rpmalloc_set_main_thread(void) {
+lib/Support/rpmalloc/rpmalloc.c:  _rpmalloc_main_thread_id = get_thread_id();
+lib/Support/rpmalloc/rpmalloc.c:static void _rpmalloc_spin(void) {
+lib/Support/rpmalloc/rpmalloc.c:static void NTAPI _rpmalloc_thread_destructor(void *value) {
+lib/Support/rpmalloc/rpmalloc.c:  // If this is called on main thread it means rpmalloc_finalize
+lib/Support/rpmalloc/rpmalloc.c:  if (get_thread_id() == _rpmalloc_main_thread_id)
+lib/Support/rpmalloc/rpmalloc.c:    rpmalloc_thread_finalize(1);
+lib/Support/rpmalloc/rpmalloc.c:static void _rpmalloc_set_name(void *address, size_t size) {
+lib/Support/rpmalloc/rpmalloc.c:static void *_rpmalloc_mmap(size_t size, size_t *offset) {
+lib/Support/rpmalloc/rpmalloc.c:  rpmalloc_assert(!(size % _memory_page_size), "Invalid mmap size");
+lib/Support/rpmalloc/rpmalloc.c:  rpmalloc_assert(size >= _memory_page_size, "Invalid mmap size");
+lib/Support/rpmalloc/rpmalloc.c:    _rpmalloc_stat_add_peak(&_mapped_pages, (size >> _memory_page_size_shift),
+lib/Support/rpmalloc/rpmalloc.c:    _rpmalloc_stat_add(&_mapped_total, (size >> _memory_page_size_shift));
+lib/Support/rpmalloc/rpmalloc.c:static void _rpmalloc_unmap(void *address, size_t size, size_t offset,
+lib/Support/rpmalloc/rpmalloc.c:  rpmalloc_assert(!release || (release >= size), "Invalid unmap size");
+lib/Support/rpmalloc/rpmalloc.c:  rpmalloc_assert(!release || (release >= _memory_page_size),
+lib/Support/rpmalloc/rpmalloc.c:    rpmalloc_assert(!(release % _memory_page_size), "Invalid unmap size");
+lib/Support/rpmalloc/rpmalloc.c:    _rpmalloc_stat_sub(&_mapped_pages, (release >> _memory_page_size_shift));
+lib/Support/rpmalloc/rpmalloc.c:    _rpmalloc_stat_add(&_unmapped_total, (release >> _memory_page_size_shift));
+lib/Support/rpmalloc/rpmalloc.c:static void *_rpmalloc_mmap_os(size_t size, size_t *offset) {
+lib/Support/rpmalloc/rpmalloc.c:  rpmalloc_assert(size >= _memory_page_size, "Invalid mmap size");
+lib/Support/rpmalloc/rpmalloc.c:        return _rpmalloc_mmap_os(size, offset);
+lib/Support/rpmalloc/rpmalloc.c:      rpmalloc_assert(ptr, "Failed to map virtual memory block");
+lib/Support/rpmalloc/rpmalloc.c:      rpmalloc_assert((prm == 0), "Failed to promote the page to THP");
+lib/Support/rpmalloc/rpmalloc.c:  _rpmalloc_set_name(ptr, size + padding);
+lib/Support/rpmalloc/rpmalloc.c:        return _rpmalloc_mmap_os(size, offset);
+lib/Support/rpmalloc/rpmalloc.c:      rpmalloc_assert((ptr != MAP_FAILED) && ptr,
+lib/Support/rpmalloc/rpmalloc.c:  _rpmalloc_stat_add(&_mapped_pages_os,
+lib/Support/rpmalloc/rpmalloc.c:    rpmalloc_assert(final_padding <= _memory_span_size,
+lib/Support/rpmalloc/rpmalloc.c:    rpmalloc_assert(final_padding <= padding, "Internal failure in padding");
+lib/Support/rpmalloc/rpmalloc.c:    rpmalloc_assert(!(final_padding % 8), "Internal failure in padding");
+lib/Support/rpmalloc/rpmalloc.c:  rpmalloc_assert((size < _memory_span_size) ||
+lib/Support/rpmalloc/rpmalloc.c:static void _rpmalloc_unmap_os(void *address, size_t size, size_t offset,
+lib/Support/rpmalloc/rpmalloc.c:  rpmalloc_assert(release || (offset == 0), "Invalid unmap size");
+lib/Support/rpmalloc/rpmalloc.c:  rpmalloc_assert(!release || (release >= _memory_page_size),
+lib/Support/rpmalloc/rpmalloc.c:  rpmalloc_assert(size >= _memory_page_size, "Invalid unmap size");
+lib/Support/rpmalloc/rpmalloc.c:    rpmalloc_assert(0, "Failed to unmap virtual memory block");
+lib/Support/rpmalloc/rpmalloc.c:      rpmalloc_assert(0, "Failed to unmap virtual memory block");
+lib/Support/rpmalloc/rpmalloc.c:      rpmalloc_assert(0, "Failed to madvise virtual memory block as free");
+lib/Support/rpmalloc/rpmalloc.c:    _rpmalloc_stat_sub(&_mapped_pages_os, release >> _memory_page_size_shift);
+lib/Support/rpmalloc/rpmalloc.c:static void _rpmalloc_span_mark_as_subspan_unless_master(span_t *master,
+lib/Support/rpmalloc/rpmalloc.c:static span_t *_rpmalloc_global_get_reserved_spans(size_t span_count) {
+lib/Support/rpmalloc/rpmalloc.c:  _rpmalloc_span_mark_as_subspan_unless_master(_memory_global_reserve_master,
+lib/Support/rpmalloc/rpmalloc.c:static void _rpmalloc_global_set_reserved_spans(span_t *master, span_t *reserve,
+lib/Support/rpmalloc/rpmalloc.c:static void _rpmalloc_span_double_link_list_add(span_t **head, span_t *span) {
+lib/Support/rpmalloc/rpmalloc.c:static void _rpmalloc_span_double_link_list_pop_head(span_t **head,
+lib/Support/rpmalloc/rpmalloc.c:  rpmalloc_assert(*head == span, "Linked list corrupted");
+lib/Support/rpmalloc/rpmalloc.c:static void _rpmalloc_span_double_link_list_remove(span_t **head,
+lib/Support/rpmalloc/rpmalloc.c:  rpmalloc_assert(*head, "Linked list corrupted");
+lib/Support/rpmalloc/rpmalloc.c:static void _rpmalloc_heap_cache_insert(heap_t *heap, span_t *span);
+lib/Support/rpmalloc/rpmalloc.c:static void _rpmalloc_heap_finalize(heap_t *heap);
+lib/Support/rpmalloc/rpmalloc.c:static void _rpmalloc_heap_set_reserved_spans(heap_t *heap, span_t *master,
+lib/Support/rpmalloc/rpmalloc.c:static void _rpmalloc_span_mark_as_subspan_unless_master(span_t *master,
+lib/Support/rpmalloc/rpmalloc.c:  rpmalloc_assert((subspan != master) || (subspan->flags & SPAN_FLAG_MASTER),
+lib/Support/rpmalloc/rpmalloc.c:static span_t *_rpmalloc_span_map_from_reserve(heap_t *heap,
+lib/Support/rpmalloc/rpmalloc.c:  _rpmalloc_span_mark_as_subspan_unless_master(heap->span_reserve_master, span,
+lib/Support/rpmalloc/rpmalloc.c:    _rpmalloc_stat_inc(&heap->span_use[span_count - 1].spans_from_reserved);
+lib/Support/rpmalloc/rpmalloc.c:static size_t _rpmalloc_span_align_count(size_t span_count) {
+lib/Support/rpmalloc/rpmalloc.c:static void _rpmalloc_span_initialize(span_t *span, size_t total_span_count,
+lib/Support/rpmalloc/rpmalloc.c:static void _rpmalloc_span_unmap(span_t *span);
+lib/Support/rpmalloc/rpmalloc.c:static span_t *_rpmalloc_span_map_aligned_count(heap_t *heap,
+lib/Support/rpmalloc/rpmalloc.c:  size_t aligned_span_count = _rpmalloc_span_align_count(span_count);
+lib/Support/rpmalloc/rpmalloc.c:  span_t *span = (span_t *)_rpmalloc_mmap(
+lib/Support/rpmalloc/rpmalloc.c:  _rpmalloc_span_initialize(span, aligned_span_count, span_count, align_offset);
+lib/Support/rpmalloc/rpmalloc.c:  _rpmalloc_stat_inc(&_master_spans);
+lib/Support/rpmalloc/rpmalloc.c:    _rpmalloc_stat_inc(&heap->span_use[span_count - 1].spans_map_calls);
+lib/Support/rpmalloc/rpmalloc.c:      _rpmalloc_span_mark_as_subspan_unless_master(
+lib/Support/rpmalloc/rpmalloc.c:      _rpmalloc_heap_cache_insert(heap, heap->span_reserve);
+lib/Support/rpmalloc/rpmalloc.c:      // held by caller, _rpmalloc_span_map
+lib/Support/rpmalloc/rpmalloc.c:      rpmalloc_assert(atomic_load32(&_memory_global_lock) == 1,
+lib/Support/rpmalloc/rpmalloc.c:        _rpmalloc_span_mark_as_subspan_unless_master(
+lib/Support/rpmalloc/rpmalloc.c:        _rpmalloc_span_unmap(_memory_global_reserve);
+lib/Support/rpmalloc/rpmalloc.c:      _rpmalloc_global_set_reserved_spans(span, remain_span, remain_count);
+lib/Support/rpmalloc/rpmalloc.c:    _rpmalloc_heap_set_reserved_spans(heap, span, reserved_spans,
+lib/Support/rpmalloc/rpmalloc.c:static span_t *_rpmalloc_span_map(heap_t *heap, size_t span_count) {
+lib/Support/rpmalloc/rpmalloc.c:    return _rpmalloc_span_map_from_reserve(heap, span_count);
+lib/Support/rpmalloc/rpmalloc.c:      _rpmalloc_spin();
+lib/Support/rpmalloc/rpmalloc.c:      span = _rpmalloc_global_get_reserved_spans(reserve_count);
+lib/Support/rpmalloc/rpmalloc.c:          _rpmalloc_heap_set_reserved_spans(heap, _memory_global_reserve_master,
+lib/Support/rpmalloc/rpmalloc.c:        // Already marked as subspan in _rpmalloc_global_get_reserved_spans
+lib/Support/rpmalloc/rpmalloc.c:    span = _rpmalloc_span_map_aligned_count(heap, span_count);
+lib/Support/rpmalloc/rpmalloc.c:static void _rpmalloc_span_unmap(span_t *span) {
+lib/Support/rpmalloc/rpmalloc.c:  rpmalloc_assert((span->flags & SPAN_FLAG_MASTER) ||
+lib/Support/rpmalloc/rpmalloc.c:  rpmalloc_assert(!(span->flags & SPAN_FLAG_MASTER) ||
+lib/Support/rpmalloc/rpmalloc.c:  rpmalloc_assert(is_master || (span->flags & SPAN_FLAG_SUBSPAN),
+lib/Support/rpmalloc/rpmalloc.c:  rpmalloc_assert(master->flags & SPAN_FLAG_MASTER, "Span flag corrupted");
+lib/Support/rpmalloc/rpmalloc.c:    rpmalloc_assert(span->align_offset == 0, "Span align offset corrupted");
+lib/Support/rpmalloc/rpmalloc.c:      _rpmalloc_unmap(span, span_count * _memory_span_size, 0, 0);
+lib/Support/rpmalloc/rpmalloc.c:    _rpmalloc_stat_add(&_unmapped_master_spans, 1);
+lib/Support/rpmalloc/rpmalloc.c:    rpmalloc_assert(!!(master->flags & SPAN_FLAG_MASTER) &&
+lib/Support/rpmalloc/rpmalloc.c:    _rpmalloc_stat_sub(&_master_spans, 1);
+lib/Support/rpmalloc/rpmalloc.c:    _rpmalloc_stat_sub(&_unmapped_master_spans, 1);
+lib/Support/rpmalloc/rpmalloc.c:    _rpmalloc_unmap(master, unmap_count * _memory_span_size,
+lib/Support/rpmalloc/rpmalloc.c:static void _rpmalloc_span_release_to_cache(heap_t *heap, span_t *span) {
+lib/Support/rpmalloc/rpmalloc.c:  rpmalloc_assert(heap == span->heap, "Span heap pointer corrupted");
+lib/Support/rpmalloc/rpmalloc.c:  rpmalloc_assert(span->size_class < SIZE_CLASS_COUNT,
+lib/Support/rpmalloc/rpmalloc.c:  rpmalloc_assert(span->span_count == 1, "Invalid span count");
+lib/Support/rpmalloc/rpmalloc.c:  _rpmalloc_stat_dec(&heap->size_class_use[span->size_class].spans_current);
+lib/Support/rpmalloc/rpmalloc.c:    _rpmalloc_stat_inc(&heap->span_use[0].spans_to_cache);
+lib/Support/rpmalloc/rpmalloc.c:    _rpmalloc_stat_inc(&heap->size_class_use[span->size_class].spans_to_cache);
+lib/Support/rpmalloc/rpmalloc.c:      _rpmalloc_heap_cache_insert(heap,
+lib/Support/rpmalloc/rpmalloc.c:    _rpmalloc_span_unmap(span);
+lib/Support/rpmalloc/rpmalloc.c:  rpmalloc_assert(block_count, "Internal failure");
+lib/Support/rpmalloc/rpmalloc.c:static void *_rpmalloc_span_initialize_new(heap_t *heap,
+lib/Support/rpmalloc/rpmalloc.c:  rpmalloc_assert(span->span_count == 1, "Internal failure");
+lib/Support/rpmalloc/rpmalloc.c:    _rpmalloc_span_double_link_list_add(&heap_size_class->partial_span, span);
+lib/Support/rpmalloc/rpmalloc.c:    _rpmalloc_span_double_link_list_add(&heap->full_span[class_idx], span);
+lib/Support/rpmalloc/rpmalloc.c:static void _rpmalloc_span_extract_free_list_deferred(span_t *span) {
+lib/Support/rpmalloc/rpmalloc.c:  // the list size Refer to _rpmalloc_deallocate_defer_small_or_medium for
+lib/Support/rpmalloc/rpmalloc.c:static int _rpmalloc_span_is_fully_utilized(span_t *span) {
+lib/Support/rpmalloc/rpmalloc.c:  rpmalloc_assert(span->free_list_limit <= span->block_count,
+lib/Support/rpmalloc/rpmalloc.c:static int _rpmalloc_span_finalize(heap_t *heap, size_t iclass, span_t *span,
+lib/Support/rpmalloc/rpmalloc.c:  rpmalloc_assert(span->list_size == span->used_count, "Memory leak detected");
+lib/Support/rpmalloc/rpmalloc.c:    _rpmalloc_stat_dec(&heap->span_use[0].current);
+lib/Support/rpmalloc/rpmalloc.c:    _rpmalloc_stat_dec(&heap->size_class_use[iclass].spans_current);
+lib/Support/rpmalloc/rpmalloc.c:      _rpmalloc_span_double_link_list_remove(list_head, span);
+lib/Support/rpmalloc/rpmalloc.c:    _rpmalloc_span_unmap(span);
+lib/Support/rpmalloc/rpmalloc.c:static void _rpmalloc_global_cache_finalize(global_cache_t *cache) {
+lib/Support/rpmalloc/rpmalloc.c:    _rpmalloc_spin();
+lib/Support/rpmalloc/rpmalloc.c:    _rpmalloc_span_unmap(cache->span[ispan]);
+lib/Support/rpmalloc/rpmalloc.c:    _rpmalloc_span_unmap(span);
+lib/Support/rpmalloc/rpmalloc.c:static void _rpmalloc_global_cache_insert_spans(span_t **span,
+lib/Support/rpmalloc/rpmalloc.c:    _rpmalloc_spin();
+lib/Support/rpmalloc/rpmalloc.c:      _rpmalloc_span_unmap(current_span);
+lib/Support/rpmalloc/rpmalloc.c:      _rpmalloc_spin();
+lib/Support/rpmalloc/rpmalloc.c:          _rpmalloc_span_unmap(current_span);
+lib/Support/rpmalloc/rpmalloc.c:static size_t _rpmalloc_global_cache_extract_spans(span_t **span,
+lib/Support/rpmalloc/rpmalloc.c:    _rpmalloc_spin();
+lib/Support/rpmalloc/rpmalloc.c:    rpmalloc_assert(span[ispan]->span_count == span_count,
+lib/Support/rpmalloc/rpmalloc.c:static void _rpmalloc_deallocate_huge(span_t *);
+lib/Support/rpmalloc/rpmalloc.c:static void _rpmalloc_heap_set_reserved_spans(heap_t *heap, span_t *master,
+lib/Support/rpmalloc/rpmalloc.c:static void _rpmalloc_heap_cache_adopt_deferred(heap_t *heap,
+lib/Support/rpmalloc/rpmalloc.c:    rpmalloc_assert(span->heap == heap, "Span heap pointer corrupted");
+lib/Support/rpmalloc/rpmalloc.c:      rpmalloc_assert(heap->full_span_count, "Heap span counter corrupted");
+lib/Support/rpmalloc/rpmalloc.c:      _rpmalloc_stat_dec(&heap->span_use[0].spans_deferred);
+lib/Support/rpmalloc/rpmalloc.c:      _rpmalloc_span_double_link_list_remove(&heap->full_span[span->size_class],
+lib/Support/rpmalloc/rpmalloc.c:      _rpmalloc_stat_dec(&heap->span_use[0].current);
+lib/Support/rpmalloc/rpmalloc.c:      _rpmalloc_stat_dec(&heap->size_class_use[span->size_class].spans_current);
+lib/Support/rpmalloc/rpmalloc.c:        _rpmalloc_heap_cache_insert(heap, span);
+lib/Support/rpmalloc/rpmalloc.c:        _rpmalloc_deallocate_huge(span);
+lib/Support/rpmalloc/rpmalloc.c:        rpmalloc_assert(span->size_class == SIZE_CLASS_LARGE,
+lib/Support/rpmalloc/rpmalloc.c:        rpmalloc_assert(heap->full_span_count, "Heap span counter corrupted");
+lib/Support/rpmalloc/rpmalloc.c:        _rpmalloc_span_double_link_list_remove(&heap->large_huge_span, span);
+lib/Support/rpmalloc/rpmalloc.c:        _rpmalloc_stat_dec(&heap->span_use[idx].spans_deferred);
+lib/Support/rpmalloc/rpmalloc.c:        _rpmalloc_stat_dec(&heap->span_use[idx].current);
+lib/Support/rpmalloc/rpmalloc.c:          _rpmalloc_heap_cache_insert(heap, span);
+lib/Support/rpmalloc/rpmalloc.c:static void _rpmalloc_heap_unmap(heap_t *heap) {
+lib/Support/rpmalloc/rpmalloc.c:      _rpmalloc_span_unmap(span);
+lib/Support/rpmalloc/rpmalloc.c:      _rpmalloc_heap_unmap(heap->master_heap);
+lib/Support/rpmalloc/rpmalloc.c:static void _rpmalloc_heap_global_finalize(heap_t *heap) {
+lib/Support/rpmalloc/rpmalloc.c:  _rpmalloc_heap_finalize(heap);
+lib/Support/rpmalloc/rpmalloc.c:      _rpmalloc_span_unmap(span_cache->span[ispan]);
+lib/Support/rpmalloc/rpmalloc.c:  _rpmalloc_heap_unmap(heap);
+lib/Support/rpmalloc/rpmalloc.c:static void _rpmalloc_heap_cache_insert(heap_t *heap, span_t *span) {
+lib/Support/rpmalloc/rpmalloc.c:    _rpmalloc_span_unmap(span);
+lib/Support/rpmalloc/rpmalloc.c:    _rpmalloc_heap_global_finalize(heap);
+lib/Support/rpmalloc/rpmalloc.c:  _rpmalloc_stat_inc(&heap->span_use[span_count - 1].spans_to_cache);
+lib/Support/rpmalloc/rpmalloc.c:      _rpmalloc_stat_add64(&heap->thread_to_global,
+lib/Support/rpmalloc/rpmalloc.c:      _rpmalloc_stat_add(&heap->span_use[span_count - 1].spans_to_global,
+lib/Support/rpmalloc/rpmalloc.c:      _rpmalloc_global_cache_insert_spans(span_cache->span + remain_count,
+lib/Support/rpmalloc/rpmalloc.c:        _rpmalloc_span_unmap(span_cache->span[remain_count + ispan]);
+lib/Support/rpmalloc/rpmalloc.c:      _rpmalloc_stat_add64(&heap->thread_to_global,
+lib/Support/rpmalloc/rpmalloc.c:      _rpmalloc_stat_add(&heap->span_use[span_count - 1].spans_to_global,
+lib/Support/rpmalloc/rpmalloc.c:      _rpmalloc_global_cache_insert_spans(span_cache->span + remain_count,
+lib/Support/rpmalloc/rpmalloc.c:        _rpmalloc_span_unmap(span_cache->span[remain_count + ispan]);
+lib/Support/rpmalloc/rpmalloc.c:  _rpmalloc_span_unmap(span);
+lib/Support/rpmalloc/rpmalloc.c:static span_t *_rpmalloc_heap_thread_cache_extract(heap_t *heap,
+lib/Support/rpmalloc/rpmalloc.c:    _rpmalloc_stat_inc(&heap->span_use[span_count - 1].spans_from_cache);
+lib/Support/rpmalloc/rpmalloc.c:static span_t *_rpmalloc_heap_thread_cache_deferred_extract(heap_t *heap,
+lib/Support/rpmalloc/rpmalloc.c:    _rpmalloc_heap_cache_adopt_deferred(heap, &span);
+lib/Support/rpmalloc/rpmalloc.c:    _rpmalloc_heap_cache_adopt_deferred(heap, 0);
+lib/Support/rpmalloc/rpmalloc.c:    span = _rpmalloc_heap_thread_cache_extract(heap, span_count);
+lib/Support/rpmalloc/rpmalloc.c:static span_t *_rpmalloc_heap_reserved_extract(heap_t *heap,
+lib/Support/rpmalloc/rpmalloc.c:    return _rpmalloc_span_map(heap, span_count);
+lib/Support/rpmalloc/rpmalloc.c:static span_t *_rpmalloc_heap_global_cache_extract(heap_t *heap,
+lib/Support/rpmalloc/rpmalloc.c:  span_cache->count = _rpmalloc_global_cache_extract_spans(
+lib/Support/rpmalloc/rpmalloc.c:    _rpmalloc_stat_add64(&heap->global_to_thread,
+lib/Support/rpmalloc/rpmalloc.c:    _rpmalloc_stat_add(&heap->span_use[span_count - 1].spans_from_global,
+lib/Support/rpmalloc/rpmalloc.c:  size_t count = _rpmalloc_global_cache_extract_spans(&span, span_count, 1);
+lib/Support/rpmalloc/rpmalloc.c:    _rpmalloc_stat_add64(&heap->global_to_thread,
+lib/Support/rpmalloc/rpmalloc.c:    _rpmalloc_stat_add(&heap->span_use[span_count - 1].spans_from_global,
+lib/Support/rpmalloc/rpmalloc.c:static void _rpmalloc_inc_span_statistics(heap_t *heap, size_t span_count,
+lib/Support/rpmalloc/rpmalloc.c:  _rpmalloc_stat_add_peak(&heap->size_class_use[class_idx].spans_current, 1,
+lib/Support/rpmalloc/rpmalloc.c:_rpmalloc_heap_extract_new_span(heap_t *heap,
+lib/Support/rpmalloc/rpmalloc.c:    _rpmalloc_inc_span_statistics(heap, span_count, class_idx);
+lib/Support/rpmalloc/rpmalloc.c:    span = _rpmalloc_heap_thread_cache_extract(heap, span_count);
+lib/Support/rpmalloc/rpmalloc.c:      _rpmalloc_stat_inc(&heap->size_class_use[class_idx].spans_from_cache);
+lib/Support/rpmalloc/rpmalloc.c:      _rpmalloc_inc_span_statistics(heap, span_count, class_idx);
+lib/Support/rpmalloc/rpmalloc.c:    span = _rpmalloc_heap_thread_cache_deferred_extract(heap, span_count);
+lib/Support/rpmalloc/rpmalloc.c:      _rpmalloc_stat_inc(&heap->size_class_use[class_idx].spans_from_cache);
+lib/Support/rpmalloc/rpmalloc.c:      _rpmalloc_inc_span_statistics(heap, span_count, class_idx);
+lib/Support/rpmalloc/rpmalloc.c:    span = _rpmalloc_heap_global_cache_extract(heap, span_count);
+lib/Support/rpmalloc/rpmalloc.c:      _rpmalloc_stat_inc(&heap->size_class_use[class_idx].spans_from_cache);
+lib/Support/rpmalloc/rpmalloc.c:      _rpmalloc_inc_span_statistics(heap, span_count, class_idx);
+lib/Support/rpmalloc/rpmalloc.c:    span = _rpmalloc_heap_reserved_extract(heap, span_count);
+lib/Support/rpmalloc/rpmalloc.c:      _rpmalloc_stat_inc(&heap->size_class_use[class_idx].spans_from_reserved);
+lib/Support/rpmalloc/rpmalloc.c:      _rpmalloc_inc_span_statistics(heap, span_count, class_idx);
+lib/Support/rpmalloc/rpmalloc.c:  span = _rpmalloc_span_map(heap, base_span_count);
+lib/Support/rpmalloc/rpmalloc.c:  _rpmalloc_inc_span_statistics(heap, base_span_count, class_idx);
+lib/Support/rpmalloc/rpmalloc.c:  _rpmalloc_stat_inc(&heap->size_class_use[class_idx].spans_map_calls);
+lib/Support/rpmalloc/rpmalloc.c:static void _rpmalloc_heap_initialize(heap_t *heap) {
+lib/Support/rpmalloc/rpmalloc.c:  _rpmalloc_memset_const(heap, 0, sizeof(heap_t));
+lib/Support/rpmalloc/rpmalloc.c:static void _rpmalloc_heap_orphan(heap_t *heap, int first_class) {
+lib/Support/rpmalloc/rpmalloc.c:static heap_t *_rpmalloc_heap_allocate_new(void) {
+lib/Support/rpmalloc/rpmalloc.c:    span = _rpmalloc_global_get_reserved_spans(heap_span_count);
+lib/Support/rpmalloc/rpmalloc.c:    span = (span_t *)_rpmalloc_mmap(block_size, &align_offset);
+lib/Support/rpmalloc/rpmalloc.c:    _rpmalloc_stat_inc(&_master_spans);
+lib/Support/rpmalloc/rpmalloc.c:    _rpmalloc_span_initialize(span, span_count, heap_span_count, align_offset);
+lib/Support/rpmalloc/rpmalloc.c:  _rpmalloc_heap_initialize(heap);
+lib/Support/rpmalloc/rpmalloc.c:    _rpmalloc_heap_initialize(extra_heap);
+lib/Support/rpmalloc/rpmalloc.c:    _rpmalloc_heap_orphan(extra_heap, 1);
+lib/Support/rpmalloc/rpmalloc.c:    _rpmalloc_heap_set_reserved_spans(heap, span, remain_span, reserve_count);
+lib/Support/rpmalloc/rpmalloc.c:      _rpmalloc_global_set_reserved_spans(span, remain_span, reserve_count);
+lib/Support/rpmalloc/rpmalloc.c:static heap_t *_rpmalloc_heap_extract_orphan(heap_t **heap_list) {
+lib/Support/rpmalloc/rpmalloc.c:static heap_t *_rpmalloc_heap_allocate(int first_class) {
+lib/Support/rpmalloc/rpmalloc.c:    _rpmalloc_spin();
+lib/Support/rpmalloc/rpmalloc.c:    heap = _rpmalloc_heap_extract_orphan(&_memory_orphan_heaps);
+lib/Support/rpmalloc/rpmalloc.c:    heap = _rpmalloc_heap_extract_orphan(&_memory_first_class_orphan_heaps);
+lib/Support/rpmalloc/rpmalloc.c:    heap = _rpmalloc_heap_allocate_new();
+lib/Support/rpmalloc/rpmalloc.c:    _rpmalloc_heap_cache_adopt_deferred(heap, 0);
+lib/Support/rpmalloc/rpmalloc.c:static void _rpmalloc_heap_release(void *heapptr, int first_class,
+lib/Support/rpmalloc/rpmalloc.c:  _rpmalloc_heap_cache_adopt_deferred(heap, 0);
+lib/Support/rpmalloc/rpmalloc.c:          _rpmalloc_span_unmap(span_cache->span[ispan]);
+lib/Support/rpmalloc/rpmalloc.c:        _rpmalloc_stat_add64(&heap->thread_to_global, span_cache->count *
+lib/Support/rpmalloc/rpmalloc.c:        _rpmalloc_stat_add(&heap->span_use[iclass].spans_to_global,
+lib/Support/rpmalloc/rpmalloc.c:        _rpmalloc_global_cache_insert_spans(span_cache->span, iclass + 1,
+lib/Support/rpmalloc/rpmalloc.c:        _rpmalloc_span_unmap(span_cache->span[ispan]);
+lib/Support/rpmalloc/rpmalloc.c:  rpmalloc_assert(atomic_load32(&_memory_active_heaps) >= 0,
+lib/Support/rpmalloc/rpmalloc.c:  if (get_thread_id() != _rpmalloc_main_thread_id) {
+lib/Support/rpmalloc/rpmalloc.c:      _rpmalloc_spin();
+lib/Support/rpmalloc/rpmalloc.c:  _rpmalloc_heap_orphan(heap, first_class);
+lib/Support/rpmalloc/rpmalloc.c:static void _rpmalloc_heap_release_raw(void *heapptr, int release_cache) {
+lib/Support/rpmalloc/rpmalloc.c:  _rpmalloc_heap_release(heapptr, 0, release_cache);
+lib/Support/rpmalloc/rpmalloc.c:static void _rpmalloc_heap_release_raw_fc(void *heapptr) {
+lib/Support/rpmalloc/rpmalloc.c:  _rpmalloc_heap_release_raw(heapptr, 1);
+lib/Support/rpmalloc/rpmalloc.c:static void _rpmalloc_heap_finalize(heap_t *heap) {
+lib/Support/rpmalloc/rpmalloc.c:    span_t *span = _rpmalloc_span_map(heap, heap->spans_reserved);
+lib/Support/rpmalloc/rpmalloc.c:    _rpmalloc_span_unmap(span);
+lib/Support/rpmalloc/rpmalloc.c:  _rpmalloc_heap_cache_adopt_deferred(heap, 0);
+lib/Support/rpmalloc/rpmalloc.c:      _rpmalloc_span_unmap(heap->size_class[iclass].cache);
+lib/Support/rpmalloc/rpmalloc.c:      _rpmalloc_span_finalize(heap, iclass, span,
+lib/Support/rpmalloc/rpmalloc.c:      if (!_rpmalloc_span_finalize(heap, iclass, class_span, list)) {
+lib/Support/rpmalloc/rpmalloc.c:          _rpmalloc_span_double_link_list_remove(list, class_span);
+lib/Support/rpmalloc/rpmalloc.c:        _rpmalloc_span_double_link_list_add(
+lib/Support/rpmalloc/rpmalloc.c:      _rpmalloc_span_unmap(span_cache->span[ispan]);
+lib/Support/rpmalloc/rpmalloc.c:  rpmalloc_assert(!atomic_load_ptr(&heap->span_free_deferred),
+lib/Support/rpmalloc/rpmalloc.c:static void *_rpmalloc_allocate_from_heap_fallback(
+lib/Support/rpmalloc/rpmalloc.c:  rpmalloc_assume(heap != 0);
+lib/Support/rpmalloc/rpmalloc.c:    rpmalloc_assert(span->block_count ==
+lib/Support/rpmalloc/rpmalloc.c:    rpmalloc_assert(!_rpmalloc_span_is_fully_utilized(span),
+lib/Support/rpmalloc/rpmalloc.c:    rpmalloc_assert(span->free_list_limit <= span->block_count,
+lib/Support/rpmalloc/rpmalloc.c:      _rpmalloc_span_extract_free_list_deferred(span);
+lib/Support/rpmalloc/rpmalloc.c:    if (!_rpmalloc_span_is_fully_utilized(span))
+lib/Support/rpmalloc/rpmalloc.c:    _rpmalloc_span_double_link_list_pop_head(&heap_size_class->partial_span,
+lib/Support/rpmalloc/rpmalloc.c:    _rpmalloc_span_double_link_list_add(&heap->full_span[class_idx], span);
+lib/Support/rpmalloc/rpmalloc.c:  span = _rpmalloc_heap_extract_new_span(heap, heap_size_class, 1, class_idx);
+lib/Support/rpmalloc/rpmalloc.c:    return _rpmalloc_span_initialize_new(heap, heap_size_class, span,
+lib/Support/rpmalloc/rpmalloc.c:static void *_rpmalloc_allocate_small(heap_t *heap, size_t size) {
+lib/Support/rpmalloc/rpmalloc.c:  rpmalloc_assert(heap, "No thread heap");
+lib/Support/rpmalloc/rpmalloc.c:  _rpmalloc_stat_inc_alloc(heap, class_idx);
+lib/Support/rpmalloc/rpmalloc.c:  return _rpmalloc_allocate_from_heap_fallback(heap, heap_size_class,
+lib/Support/rpmalloc/rpmalloc.c:static void *_rpmalloc_allocate_medium(heap_t *heap, size_t size) {
+lib/Support/rpmalloc/rpmalloc.c:  rpmalloc_assert(heap, "No thread heap");
+lib/Support/rpmalloc/rpmalloc.c:  _rpmalloc_stat_inc_alloc(heap, class_idx);
+lib/Support/rpmalloc/rpmalloc.c:  return _rpmalloc_allocate_from_heap_fallback(heap, heap_size_class,
+lib/Support/rpmalloc/rpmalloc.c:static void *_rpmalloc_allocate_large(heap_t *heap, size_t size) {
+lib/Support/rpmalloc/rpmalloc.c:  rpmalloc_assert(heap, "No thread heap");
+lib/Support/rpmalloc/rpmalloc.c:      _rpmalloc_heap_extract_new_span(heap, 0, span_count, SIZE_CLASS_LARGE);
+lib/Support/rpmalloc/rpmalloc.c:  rpmalloc_assert(span->span_count >= span_count, "Internal failure");
+lib/Support/rpmalloc/rpmalloc.c:  _rpmalloc_span_double_link_list_add(&heap->large_huge_span, span);
+lib/Support/rpmalloc/rpmalloc.c:static void *_rpmalloc_allocate_huge(heap_t *heap, size_t size) {
+lib/Support/rpmalloc/rpmalloc.c:  rpmalloc_assert(heap, "No thread heap");
+lib/Support/rpmalloc/rpmalloc.c:  _rpmalloc_heap_cache_adopt_deferred(heap, 0);
+lib/Support/rpmalloc/rpmalloc.c:      (span_t *)_rpmalloc_mmap(num_pages * _memory_page_size, &align_offset);
+lib/Support/rpmalloc/rpmalloc.c:  _rpmalloc_stat_add_peak(&_huge_pages_current, num_pages, _huge_pages_peak);
+lib/Support/rpmalloc/rpmalloc.c:  _rpmalloc_span_double_link_list_add(&heap->large_huge_span, span);
+lib/Support/rpmalloc/rpmalloc.c:static void *_rpmalloc_allocate(heap_t *heap, size_t size) {
+lib/Support/rpmalloc/rpmalloc.c:  _rpmalloc_stat_add64(&_allocation_counter, 1);
+lib/Support/rpmalloc/rpmalloc.c:    return _rpmalloc_allocate_small(heap, size);
+lib/Support/rpmalloc/rpmalloc.c:    return _rpmalloc_allocate_medium(heap, size);
+lib/Support/rpmalloc/rpmalloc.c:    return _rpmalloc_allocate_large(heap, size);
+lib/Support/rpmalloc/rpmalloc.c:  return _rpmalloc_allocate_huge(heap, size);
+lib/Support/rpmalloc/rpmalloc.c:static void *_rpmalloc_aligned_allocate(heap_t *heap, size_t alignment,
+lib/Support/rpmalloc/rpmalloc.c:    return _rpmalloc_allocate(heap, size);
+lib/Support/rpmalloc/rpmalloc.c:    rpmalloc_assert(!(multiple_size % SPAN_HEADER_SIZE),
+lib/Support/rpmalloc/rpmalloc.c:      return _rpmalloc_allocate(heap, multiple_size);
+lib/Support/rpmalloc/rpmalloc.c:    ptr = _rpmalloc_allocate(heap, size + alignment);
+lib/Support/rpmalloc/rpmalloc.c:  span = (span_t *)_rpmalloc_mmap(mapped_size, &align_offset);
+lib/Support/rpmalloc/rpmalloc.c:    _rpmalloc_unmap(span, mapped_size, align_offset, mapped_size);
+lib/Support/rpmalloc/rpmalloc.c:  _rpmalloc_stat_add_peak(&_huge_pages_current, num_pages, _huge_pages_peak);
+lib/Support/rpmalloc/rpmalloc.c:  _rpmalloc_span_double_link_list_add(&heap->large_huge_span, span);
+lib/Support/rpmalloc/rpmalloc.c:  _rpmalloc_stat_add64(&_allocation_counter, 1);
+lib/Support/rpmalloc/rpmalloc.c:static void _rpmalloc_deallocate_direct_small_or_medium(span_t *span,
+lib/Support/rpmalloc/rpmalloc.c:  rpmalloc_assert(heap->owner_thread == get_thread_id() ||
+lib/Support/rpmalloc/rpmalloc.c:  if (UNEXPECTED(_rpmalloc_span_is_fully_utilized(span))) {
+lib/Support/rpmalloc/rpmalloc.c:    _rpmalloc_span_double_link_list_remove(&heap->full_span[span->size_class],
+lib/Support/rpmalloc/rpmalloc.c:    _rpmalloc_span_double_link_list_add(
+lib/Support/rpmalloc/rpmalloc.c:    _rpmalloc_span_double_link_list_remove(
+lib/Support/rpmalloc/rpmalloc.c:    _rpmalloc_span_release_to_cache(heap, span);
+lib/Support/rpmalloc/rpmalloc.c:static void _rpmalloc_deallocate_defer_free_span(heap_t *heap, span_t *span) {
+lib/Support/rpmalloc/rpmalloc.c:    _rpmalloc_stat_inc(&heap->span_use[span->span_count - 1].spans_deferred);
+lib/Support/rpmalloc/rpmalloc.c:static void _rpmalloc_deallocate_defer_small_or_medium(span_t *span,
+lib/Support/rpmalloc/rpmalloc.c:    _rpmalloc_deallocate_defer_free_span(span->heap, span);
+lib/Support/rpmalloc/rpmalloc.c:static void _rpmalloc_deallocate_small_or_medium(span_t *span, void *p) {
+lib/Support/rpmalloc/rpmalloc.c:  _rpmalloc_stat_inc_free(span->heap, span->size_class);
+lib/Support/rpmalloc/rpmalloc.c:    _rpmalloc_deallocate_direct_small_or_medium(span, p);
+lib/Support/rpmalloc/rpmalloc.c:    _rpmalloc_deallocate_defer_small_or_medium(span, p);
+lib/Support/rpmalloc/rpmalloc.c:static void _rpmalloc_deallocate_large(span_t *span) {
+lib/Support/rpmalloc/rpmalloc.c:  rpmalloc_assert(span->size_class == SIZE_CLASS_LARGE, "Bad span size class");
+lib/Support/rpmalloc/rpmalloc.c:  rpmalloc_assert(!(span->flags & SPAN_FLAG_MASTER) ||
+lib/Support/rpmalloc/rpmalloc.c:  rpmalloc_assert((span->flags & SPAN_FLAG_MASTER) ||
+lib/Support/rpmalloc/rpmalloc.c:    _rpmalloc_deallocate_defer_free_span(span->heap, span);
+lib/Support/rpmalloc/rpmalloc.c:  rpmalloc_assert(span->heap->full_span_count, "Heap span counter corrupted");
+lib/Support/rpmalloc/rpmalloc.c:  _rpmalloc_span_double_link_list_remove(&span->heap->large_huge_span, span);
+lib/Support/rpmalloc/rpmalloc.c:  rpmalloc_assert(heap, "No thread heap");
+lib/Support/rpmalloc/rpmalloc.c:      rpmalloc_assert(master->flags & SPAN_FLAG_MASTER, "Span flag corrupted");
+lib/Support/rpmalloc/rpmalloc.c:      rpmalloc_assert(atomic_load32(&master->remaining_spans) >=
+lib/Support/rpmalloc/rpmalloc.c:    _rpmalloc_stat_inc(&heap->span_use[idx].spans_to_reserved);
+lib/Support/rpmalloc/rpmalloc.c:    _rpmalloc_heap_cache_insert(heap, span);
+lib/Support/rpmalloc/rpmalloc.c:static void _rpmalloc_deallocate_huge(span_t *span) {
+lib/Support/rpmalloc/rpmalloc.c:  rpmalloc_assert(span->heap, "No span heap");
+lib/Support/rpmalloc/rpmalloc.c:    _rpmalloc_deallocate_defer_free_span(span->heap, span);
+lib/Support/rpmalloc/rpmalloc.c:  rpmalloc_assert(span->heap->full_span_count, "Heap span counter corrupted");
+lib/Support/rpmalloc/rpmalloc.c:  _rpmalloc_span_double_link_list_remove(&span->heap->large_huge_span, span);
+lib/Support/rpmalloc/rpmalloc.c:  _rpmalloc_unmap(span, num_pages * _memory_page_size, span->align_offset,
+lib/Support/rpmalloc/rpmalloc.c:  _rpmalloc_stat_sub(&_huge_pages_current, num_pages);
+lib/Support/rpmalloc/rpmalloc.c:static void _rpmalloc_deallocate(void *p) {
+lib/Support/rpmalloc/rpmalloc.c:  _rpmalloc_stat_add64(&_deallocation_counter, 1);
+lib/Support/rpmalloc/rpmalloc.c:    _rpmalloc_deallocate_small_or_medium(span, p);
+lib/Support/rpmalloc/rpmalloc.c:    _rpmalloc_deallocate_large(span);
+lib/Support/rpmalloc/rpmalloc.c:    _rpmalloc_deallocate_huge(span);
+lib/Support/rpmalloc/rpmalloc.c:static size_t _rpmalloc_usable_size(void *p);
+lib/Support/rpmalloc/rpmalloc.c:static void *_rpmalloc_reallocate(heap_t *heap, void *p, size_t size,
+lib/Support/rpmalloc/rpmalloc.c:      rpmalloc_assert(span->span_count == 1, "Span counter corrupted");
+lib/Support/rpmalloc/rpmalloc.c:  void *block = _rpmalloc_allocate(heap, new_size);
+lib/Support/rpmalloc/rpmalloc.c:    _rpmalloc_deallocate(p);
+lib/Support/rpmalloc/rpmalloc.c:static void *_rpmalloc_aligned_reallocate(heap_t *heap, void *ptr,
+lib/Support/rpmalloc/rpmalloc.c:    return _rpmalloc_reallocate(heap, ptr, size, oldsize, flags);
+lib/Support/rpmalloc/rpmalloc.c:  size_t usablesize = (ptr ? _rpmalloc_usable_size(ptr) : 0);
+lib/Support/rpmalloc/rpmalloc.c:      (!no_alloc ? _rpmalloc_aligned_allocate(heap, alignment, size) : 0);
+lib/Support/rpmalloc/rpmalloc.c:    _rpmalloc_deallocate(ptr);
+lib/Support/rpmalloc/rpmalloc.c:static size_t _rpmalloc_usable_size(void *p) {
+lib/Support/rpmalloc/rpmalloc.c:static void _rpmalloc_adjust_size_class(size_t iclass) {
+lib/Support/rpmalloc/rpmalloc.c:        _rpmalloc_memcpy_const(_memory_size_class + prevclass,
+lib/Support/rpmalloc/rpmalloc.c:extern inline int rpmalloc_initialize(void) {
+lib/Support/rpmalloc/rpmalloc.c:  if (_rpmalloc_initialized) {
+lib/Support/rpmalloc/rpmalloc.c:    rpmalloc_thread_initialize();
+lib/Support/rpmalloc/rpmalloc.c:  return rpmalloc_initialize_config(0);
+lib/Support/rpmalloc/rpmalloc.c:int rpmalloc_initialize_config(const rpmalloc_config_t *config) {
+lib/Support/rpmalloc/rpmalloc.c:  if (_rpmalloc_initialized) {
+lib/Support/rpmalloc/rpmalloc.c:    rpmalloc_thread_initialize();
+lib/Support/rpmalloc/rpmalloc.c:  _rpmalloc_initialized = 1;
+lib/Support/rpmalloc/rpmalloc.c:    memcpy(&_memory_config, config, sizeof(rpmalloc_config_t));
+lib/Support/rpmalloc/rpmalloc.c:    _rpmalloc_memset_const(&_memory_config, 0, sizeof(rpmalloc_config_t));
+lib/Support/rpmalloc/rpmalloc.c:    _memory_config.memory_map = _rpmalloc_mmap_os;
+lib/Support/rpmalloc/rpmalloc.c:    _memory_config.memory_unmap = _rpmalloc_unmap_os;
+lib/Support/rpmalloc/rpmalloc.c:            rpmalloc_assert(!(csize & (csize - 1)) && !(csize % 1024),
+lib/Support/rpmalloc/rpmalloc.c:  if (pthread_key_create(&_memory_thread_heap, _rpmalloc_heap_release_raw_fc))
+lib/Support/rpmalloc/rpmalloc.c:  fls_key = FlsAlloc(&_rpmalloc_thread_destructor);
+lib/Support/rpmalloc/rpmalloc.c:  _rpmalloc_adjust_size_class(iclass);
+lib/Support/rpmalloc/rpmalloc.c:    _rpmalloc_adjust_size_class(iclass);
+lib/Support/rpmalloc/rpmalloc.c:    _rpmalloc_adjust_size_class(SMALL_CLASS_COUNT + iclass);
+lib/Support/rpmalloc/rpmalloc.c:  rpmalloc_linker_reference();
+lib/Support/rpmalloc/rpmalloc.c:  rpmalloc_thread_initialize();
+lib/Support/rpmalloc/rpmalloc.c:void rpmalloc_finalize(void) {
+lib/Support/rpmalloc/rpmalloc.c:  rpmalloc_thread_finalize(1);
+lib/Support/rpmalloc/rpmalloc.c:  // rpmalloc_dump_statistics(stdout);
+lib/Support/rpmalloc/rpmalloc.c:      _rpmalloc_heap_global_finalize(heap);
+lib/Support/rpmalloc/rpmalloc.c:    _rpmalloc_global_cache_finalize(&_memory_span_cache[iclass]);
+lib/Support/rpmalloc/rpmalloc.c:  rpmalloc_assert(atomic_load32(&_mapped_pages) == 0, "Memory leak detected");
+lib/Support/rpmalloc/rpmalloc.c:  rpmalloc_assert(atomic_load32(&_mapped_pages_os) == 0,
+lib/Support/rpmalloc/rpmalloc.c:  _rpmalloc_initialized = 0;
+lib/Support/rpmalloc/rpmalloc.c:extern inline void rpmalloc_thread_initialize(void) {
+lib/Support/rpmalloc/rpmalloc.c:    heap_t *heap = _rpmalloc_heap_allocate(0);
+lib/Support/rpmalloc/rpmalloc.c:      _rpmalloc_stat_inc(&_memory_active_heaps);
+lib/Support/rpmalloc/rpmalloc.c:void rpmalloc_thread_finalize(int release_caches) {
+lib/Support/rpmalloc/rpmalloc.c:    _rpmalloc_heap_release_raw(heap, release_caches);
+lib/Support/rpmalloc/rpmalloc.c:int rpmalloc_is_thread_initialized(void) {
+lib/Support/rpmalloc/rpmalloc.c:const rpmalloc_config_t *rpmalloc_config(void) { return &_memory_config; }
+lib/Support/rpmalloc/rpmalloc.c:extern inline RPMALLOC_ALLOCATOR void *rpmalloc(size_t size) {
+lib/Support/rpmalloc/rpmalloc.c:  return _rpmalloc_allocate(heap, size);
+lib/Support/rpmalloc/rpmalloc.c:extern inline void rpfree(void *ptr) { _rpmalloc_deallocate(ptr); }
+lib/Support/rpmalloc/rpmalloc.c:  void *block = _rpmalloc_allocate(heap, total);
+lib/Support/rpmalloc/rpmalloc.c:  return _rpmalloc_reallocate(heap, ptr, size, 0, 0);
+lib/Support/rpmalloc/rpmalloc.c:  return _rpmalloc_aligned_reallocate(heap, ptr, alignment, size, oldsize,
+lib/Support/rpmalloc/rpmalloc.c:  return _rpmalloc_aligned_allocate(heap, alignment, size);
+lib/Support/rpmalloc/rpmalloc.c:extern inline size_t rpmalloc_usable_size(void *ptr) {
+lib/Support/rpmalloc/rpmalloc.c:  return (ptr ? _rpmalloc_usable_size(ptr) : 0);
+lib/Support/rpmalloc/rpmalloc.c:extern inline void rpmalloc_thread_collect(void) {}
+lib/Support/rpmalloc/rpmalloc.c:void rpmalloc_thread_statistics(rpmalloc_thread_statistics_t *stats) {
+lib/Support/rpmalloc/rpmalloc.c:  memset(stats, 0, sizeof(rpmalloc_thread_statistics_t));
+lib/Support/rpmalloc/rpmalloc.c:void rpmalloc_global_statistics(rpmalloc_global_statistics_t *stats) {
+lib/Support/rpmalloc/rpmalloc.c:  memset(stats, 0, sizeof(rpmalloc_global_statistics_t));
+lib/Support/rpmalloc/rpmalloc.c:      _rpmalloc_spin();
+lib/Support/rpmalloc/rpmalloc.c:void rpmalloc_dump_statistics(void *file) {
+lib/Support/rpmalloc/rpmalloc.c:          rpmalloc_assert(
+lib/Support/rpmalloc/rpmalloc.c:          rpmalloc_assert(
+lib/Support/rpmalloc/rpmalloc.c:extern inline rpmalloc_heap_t *rpmalloc_heap_acquire(void) {
+lib/Support/rpmalloc/rpmalloc.c:  // released when heap is cleared with rpmalloc_heap_free_all(). Also heaps
+lib/Support/rpmalloc/rpmalloc.c:  heap_t *heap = _rpmalloc_heap_allocate(1);
+lib/Support/rpmalloc/rpmalloc.c:  rpmalloc_assume(heap != NULL);
+lib/Support/rpmalloc/rpmalloc.c:  _rpmalloc_stat_inc(&_memory_active_heaps);
+lib/Support/rpmalloc/rpmalloc.c:extern inline void rpmalloc_heap_release(rpmalloc_heap_t *heap) {
+lib/Support/rpmalloc/rpmalloc.c:    _rpmalloc_heap_release(heap, 1, 1);
+lib/Support/rpmalloc/rpmalloc.c:rpmalloc_heap_alloc(rpmalloc_heap_t *heap, size_t size) {
+lib/Support/rpmalloc/rpmalloc.c:  return _rpmalloc_allocate(heap, size);
+lib/Support/rpmalloc/rpmalloc.c:rpmalloc_heap_aligned_alloc(rpmalloc_heap_t *heap, size_t alignment,
+lib/Support/rpmalloc/rpmalloc.c:  return _rpmalloc_aligned_allocate(heap, alignment, size);
+lib/Support/rpmalloc/rpmalloc.c:rpmalloc_heap_calloc(rpmalloc_heap_t *heap, size_t num, size_t size) {
+lib/Support/rpmalloc/rpmalloc.c:  return rpmalloc_heap_aligned_calloc(heap, 0, num, size);
+lib/Support/rpmalloc/rpmalloc.c:rpmalloc_heap_aligned_calloc(rpmalloc_heap_t *heap, size_t alignment,
+lib/Support/rpmalloc/rpmalloc.c:  void *block = _rpmalloc_aligned_allocate(heap, alignment, total);
+lib/Support/rpmalloc/rpmalloc.c:rpmalloc_heap_realloc(rpmalloc_heap_t *heap, void *ptr, size_t size,
+lib/Support/rpmalloc/rpmalloc.c:  return _rpmalloc_reallocate(heap, ptr, size, 0, flags);
+lib/Support/rpmalloc/rpmalloc.c:rpmalloc_heap_aligned_realloc(rpmalloc_heap_t *heap, void *ptr,
+lib/Support/rpmalloc/rpmalloc.c:  return _rpmalloc_aligned_reallocate(heap, ptr, alignment, size, 0, flags);
+lib/Support/rpmalloc/rpmalloc.c:extern inline void rpmalloc_heap_free(rpmalloc_heap_t *heap, void *ptr) {
+lib/Support/rpmalloc/rpmalloc.c:  _rpmalloc_deallocate(ptr);
+lib/Support/rpmalloc/rpmalloc.c:extern inline void rpmalloc_heap_free_all(rpmalloc_heap_t *heap) {
+lib/Support/rpmalloc/rpmalloc.c:  _rpmalloc_heap_cache_adopt_deferred(heap, 0);
+lib/Support/rpmalloc/rpmalloc.c:      _rpmalloc_heap_cache_insert(heap, span);
+lib/Support/rpmalloc/rpmalloc.c:      _rpmalloc_heap_cache_insert(heap, span);
+lib/Support/rpmalloc/rpmalloc.c:      _rpmalloc_heap_cache_insert(heap, span);
+lib/Support/rpmalloc/rpmalloc.c:      _rpmalloc_deallocate_huge(span);
+lib/Support/rpmalloc/rpmalloc.c:      _rpmalloc_heap_cache_insert(heap, span);
+lib/Support/rpmalloc/rpmalloc.c:    _rpmalloc_stat_add64(&heap->thread_to_global,
+lib/Support/rpmalloc/rpmalloc.c:    _rpmalloc_stat_add(&heap->span_use[iclass].spans_to_global,
+lib/Support/rpmalloc/rpmalloc.c:    _rpmalloc_global_cache_insert_spans(span_cache->span, iclass + 1,
+lib/Support/rpmalloc/rpmalloc.c:      _rpmalloc_span_unmap(span_cache->span[ispan]);
+lib/Support/rpmalloc/rpmalloc.c:extern inline void rpmalloc_heap_thread_set_current(rpmalloc_heap_t *heap) {
+lib/Support/rpmalloc/rpmalloc.c:      rpmalloc_heap_release(prev_heap);
+lib/Support/rpmalloc/rpmalloc.c:extern inline rpmalloc_heap_t *rpmalloc_get_heap_for_ptr(void *ptr) {
+lib/Support/rpmalloc/rpmalloc.c:#include "malloc.c"
+lib/Support/rpmalloc/rpmalloc.c:void rpmalloc_linker_reference(void) { (void)sizeof(_rpmalloc_initialized); }
+lib/Support/rpmalloc/README.md:# rpmalloc - General Purpose Memory Allocator
+lib/Support/rpmalloc/README.md:This is a fork of rpmalloc 1.4.5.
+lib/Support/rpmalloc/README.md:We believe rpmalloc is faster than most popular memory allocators like tcmalloc, hoard, ptmalloc3 and others without causing extra allocated memory overhead in the thread caches compared to these allocators. We also believe the implementation to be easier to read and modify compared to these allocators, as it is a single source file of ~3000 lines of C code. All allocations have a natural 16-byte alignment.
+lib/Support/rpmalloc/README.md:https://github.com/mjansson/rpmalloc-benchmark
+lib/Support/rpmalloc/README.md:Below is an example performance comparison chart of rpmalloc and other popular allocator implementations, with default configurations used.
+lib/Support/rpmalloc/README.md:Before calling any other function in the API, you __MUST__ call the initialization function, either __rpmalloc_initialize__ or __rpmalloc_initialize_config__, or you will get undefined behaviour when calling other rpmalloc entry point.
+lib/Support/rpmalloc/README.md:Before terminating your use of the allocator, you __SHOULD__ call __rpmalloc_finalize__ in order to release caches and unmap virtual memory, as well as prepare the allocator for global scope cleanup at process exit or dynamic library unload depending on your use case.
+lib/Support/rpmalloc/README.md:The easiest way to use the library is simply adding __rpmalloc.[h|c]__ to your project and compile them along with your sources. This contains only the rpmalloc specific entry points and does not provide internal hooks to process and/or thread creation at the moment. You are required to call these functions from your own code in order to initialize and finalize the allocator in your process and threads:
+lib/Support/rpmalloc/README.md:__rpmalloc_initialize__ : Call at process start to initialize the allocator
+lib/Support/rpmalloc/README.md:__rpmalloc_initialize_config__ : Optional entry point to call at process start to initialize the allocator with a custom memory mapping backend, memory page size and mapping granularity.
+lib/Support/rpmalloc/README.md:__rpmalloc_finalize__: Call at process exit to finalize the allocator
+lib/Support/rpmalloc/README.md:__rpmalloc_thread_initialize__: Call at each thread start to initialize the thread local data for the allocator
+lib/Support/rpmalloc/README.md:__rpmalloc_thread_finalize__: Call at each thread exit to finalize and release thread cache back to global cache
+lib/Support/rpmalloc/README.md:__rpmalloc_config__: Get the current runtime configuration of the allocator
+lib/Support/rpmalloc/README.md:Then simply use the __rpmalloc__/__rpfree__ and the other malloc style replacement functions. Remember all allocations are 16-byte aligned, so no need to call the explicit rpmemalign/rpaligned_alloc/rpposix_memalign functions unless you need greater alignment, they are simply wrappers to make it easier to replace in existing code.
+lib/Support/rpmalloc/README.md:If you wish to override the standard library malloc family of functions and have automatic initialization/finalization of process and threads, define __ENABLE_OVERRIDE__ to non-zero which will include the `malloc.c` file in compilation of __rpmalloc.c__, and then rebuild the library or your project where you added the rpmalloc source. If you compile rpmalloc as a separate library you must make the linker use the override symbols from the library by referencing at least one symbol. The easiest way is to simply include `rpmalloc.h` in at least one source file and call `rpmalloc_linker_reference` somewhere - it's a dummy empty function. On Windows platforms and C++ overrides you have to `#include <rpnew.h>` in at least one source file and also manually handle the initialize/finalize of the process and all threads. The list of libc entry points replaced may not be complete, use libc/stdc++ replacement only as a convenience for testing the library on an existing code base, not a final solution.
+lib/Support/rpmalloc/README.md:For explicit first class heaps, see the __rpmalloc_heap_*__ API under [first class heaps](#first-class-heaps) section, requiring __RPMALLOC_FIRST_CLASS_HEAPS__ tp be defined to 1.
+lib/Support/rpmalloc/README.md:To compile as a static library run the configure python script which generates a Ninja build script, then build using ninja. The ninja build produces two static libraries, one named `rpmalloc` and one named `rpmallocwrap`, where the latter includes the libc entry point overrides.
+lib/Support/rpmalloc/README.md:The configure + ninja build also produces two shared object/dynamic libraries. The `rpmallocwrap` shared library can be used with LD_PRELOAD/DYLD_INSERT_LIBRARIES to inject in a preexisting binary, replacing any malloc/free family of function calls. This is only implemented for Linux and macOS targets. The list of libc entry points replaced may not be complete, use preloading as a convenience for testing the library on an existing binary, not a final solution. The dynamic library also provides automatic init/fini of process and threads for all platforms.
+lib/Support/rpmalloc/README.md:Free memory pages are cached both per thread and in a global cache for all threads. The size of the thread caches is determined by an adaptive scheme where each cache is limited by a percentage of the maximum allocation count of the corresponding size class. The size of the global caches is determined by a multiple of the maximum of all thread caches. The factors controlling the cache sizes can be set by editing the individual defines in the `rpmalloc.c` source file for fine tuned control.
+lib/Support/rpmalloc/README.md:Detailed statistics are available if __ENABLE_STATISTICS__ is defined to 1 (default is 0, or disabled), either on compile command line or by setting the value in `rpmalloc.c`. This will cause a slight overhead in runtime to collect statistics for each memory operation, and will also add 4 bytes overhead per allocation to track sizes.
+lib/Support/rpmalloc/README.md:Integer safety checks on all calls are enabled if __ENABLE_VALIDATE_ARGS__ is defined to 1 (default is 0, or disabled), either on compile command line or by setting the value in `rpmalloc.c`. If enabled, size arguments to the global entry points are verified not to cause integer overflows in calculations.
+lib/Support/rpmalloc/README.md:Asserts are enabled if __ENABLE_ASSERTS__ is defined to 1 (default is 0, or disabled), either on compile command line or by setting the value in `rpmalloc.c`.
+lib/Support/rpmalloc/README.md:To include __malloc.c__ in compilation and provide overrides of standard library malloc entry points define __ENABLE_OVERRIDE__ to 1. To enable automatic initialization of finalization of process and threads in order to preload the library into executables using standard library malloc, define __ENABLE_PRELOAD__ to 1.
+lib/Support/rpmalloc/README.md:The allocator has support for huge/large pages on Windows, Linux and MacOS. To enable it, pass a non-zero value in the config value `enable_huge_pages` when initializing the allocator with `rpmalloc_initialize_config`. If the system does not support huge pages it will be automatically disabled. You can query the status by looking at `enable_huge_pages` in the config returned from a call to `rpmalloc_config` after initialization is done.
+lib/Support/rpmalloc/README.md:The allocator is similar in spirit to tcmalloc from the [Google Performance Toolkit](https://github.com/gperftools/gperftools). It uses separate heaps for each thread and partitions memory blocks according to a preconfigured set of size classes, up to 2MiB. Larger blocks are mapped and unmapped directly. Allocations for different size classes will be served from different set of memory pages, each "span" of pages is dedicated to one size class. Spans of pages can flow between threads when the thread cache overflows and are released to a global cache, or when the thread ends. Unlike tcmalloc, single blocks do not flow between threads, only entire spans of pages.
+lib/Support/rpmalloc/README.md:The allocator is based on a fixed but configurable page alignment (defaults to 64KiB) and 16 byte block alignment, where all runs of memory pages (spans) are mapped to this alignment boundary. On Windows this is automatically guaranteed up to 64KiB by the VirtualAlloc granularity, and on mmap systems it is achieved by oversizing the mapping and aligning the returned virtual memory address to the required boundaries. By aligning to a fixed size the free operation can locate the header of the memory span without having to do a table lookup (as tcmalloc does) by simply masking out the low bits of the address (for 64KiB this would be the low 16 bits).
+lib/Support/rpmalloc/README.md:By default the allocator uses OS APIs to map virtual memory pages as needed, either `VirtualAlloc` on Windows or `mmap` on POSIX systems. If you want to use your own custom memory mapping provider you can use __rpmalloc_initialize_config__ and pass function pointers to map and unmap virtual memory. These function should reserve and free the requested number of bytes.
+lib/Support/rpmalloc/README.md:The returned memory address from the memory map function MUST be aligned to the memory page size and the memory span size (which ever is larger), both of which is configurable. Either provide the page and span sizes during initialization using __rpmalloc_initialize_config__, or use __rpmalloc_config__ to find the required alignment which is equal to the maximum of page and span size. The span size MUST be a power of two in [4096, 262144] range, and be a multiple or divisor of the memory page size.
+lib/Support/rpmalloc/README.md:Memory mapping requests are always done in multiples of the memory page size. You can specify a custom page size when initializing rpmalloc with __rpmalloc_initialize_config__, or pass 0 to let rpmalloc determine the system memory page size using OS APIs. The page size MUST be a power of two.
+lib/Support/rpmalloc/README.md:rpmalloc keeps an "active span" and free list for each size class. This leads to back-to-back allocations will most likely be served from within the same span of memory pages (unless the span runs out of free blocks). The rpmalloc implementation will also use any "holes" in memory pages in semi-filled spans before using a completely free span.
+lib/Support/rpmalloc/README.md:rpmalloc provides a first class heap type with explicit heap control API. Heaps are maintained with calls to __rpmalloc_heap_acquire__ and __rpmalloc_heap_release__ and allocations/frees are done with __rpmalloc_heap_alloc__ and __rpmalloc_heap_free__. See the `rpmalloc.h` documentation for the full list of functions in the heap API. The main use case of explicit heap control is to scope allocations in a heap and release everything with a single call to __rpmalloc_heap_free_all__ without having to maintain ownership of memory blocks. Note that the heap API is not thread-safe, the caller must make sure that each heap is only used in a single thread at any given time.
+lib/Support/rpmalloc/README.md:Compared to the some other allocators, rpmalloc does not suffer as much from a producer-consumer thread scenario where one thread allocates memory blocks and another thread frees the blocks. In some allocators the free blocks need to traverse both the thread cache of the thread doing the free operations as well as the global cache before being reused in the allocating thread. In rpmalloc the freed blocks will be reused as soon as the allocating thread needs to get new spans from the thread cache. This enables faster release of completely freed memory pages as blocks in a memory page will not be aliased between different owning threads.
+lib/Support/rpmalloc/README.md:To support global scope data doing dynamic allocation/deallocation such as C++ objects with custom constructors and destructors, the call to __rpmalloc_finalize__ will not completely terminate the allocator but rather empty all caches and put the allocator in finalization mode. Once this call has been made, the allocator is no longer thread safe and expects all remaining calls to originate from global data destruction on main thread. Any spans or heaps becoming free during this phase will be immediately unmapped to allow correct teardown of the process or dynamic library without any leaks.
+lib/Support/rpmalloc/README.md:[Johan Andersson](https://github.com/repi) at Embark has created a Rust wrapper available at [rpmalloc-rs](https://github.com/EmbarkStudios/rpmalloc-rs)
+lib/Support/rpmalloc/README.md:[Stas Denisov](https://github.com/nxrighthere) has created a C# wrapper available at [Rpmalloc-CSharp](https://github.com/nxrighthere/Rpmalloc-CSharp)
+lib/Support/rpmalloc/rpmalloc.h://===---------------------- rpmalloc.h ------------------*- C -*-=============//
+lib/Support/rpmalloc/rpmalloc.h:// This library provides a cross-platform lock free thread caching malloc
+lib/Support/rpmalloc/rpmalloc.h:#define RPMALLOC_ATTRIB_MALLOC __attribute__((__malloc__))
+lib/Support/rpmalloc/rpmalloc.h://! Define RPMALLOC_FIRST_CLASS_HEAPS to enable heap based API (rpmalloc_heap_*
+lib/Support/rpmalloc/rpmalloc.h:typedef struct rpmalloc_global_statistics_t {
+lib/Support/rpmalloc/rpmalloc.h:} rpmalloc_global_statistics_t;
+lib/Support/rpmalloc/rpmalloc.h:typedef struct rpmalloc_thread_statistics_t {
+lib/Support/rpmalloc/rpmalloc.h:} rpmalloc_thread_statistics_t;
+lib/Support/rpmalloc/rpmalloc.h:typedef struct rpmalloc_config_t {
+lib/Support/rpmalloc/rpmalloc.h:  //  aligned to the rpmalloc span size, which will always be a power of two.
+lib/Support/rpmalloc/rpmalloc.h:} rpmalloc_config_t;
+lib/Support/rpmalloc/rpmalloc.h:RPMALLOC_EXPORT int rpmalloc_initialize(void);
+lib/Support/rpmalloc/rpmalloc.h:RPMALLOC_EXPORT int rpmalloc_initialize_config(const rpmalloc_config_t *config);
+lib/Support/rpmalloc/rpmalloc.h:RPMALLOC_EXPORT const rpmalloc_config_t *rpmalloc_config(void);
+lib/Support/rpmalloc/rpmalloc.h:RPMALLOC_EXPORT void rpmalloc_finalize(void);
+lib/Support/rpmalloc/rpmalloc.h:RPMALLOC_EXPORT void rpmalloc_thread_initialize(void);
+lib/Support/rpmalloc/rpmalloc.h:RPMALLOC_EXPORT void rpmalloc_thread_finalize(int release_caches);
+lib/Support/rpmalloc/rpmalloc.h:RPMALLOC_EXPORT void rpmalloc_thread_collect(void);
+lib/Support/rpmalloc/rpmalloc.h:RPMALLOC_EXPORT int rpmalloc_is_thread_initialized(void);
+lib/Support/rpmalloc/rpmalloc.h:rpmalloc_thread_statistics(rpmalloc_thread_statistics_t *stats);
+lib/Support/rpmalloc/rpmalloc.h:rpmalloc_global_statistics(rpmalloc_global_statistics_t *stats);
+lib/Support/rpmalloc/rpmalloc.h:RPMALLOC_EXPORT void rpmalloc_dump_statistics(void *file);
+lib/Support/rpmalloc/rpmalloc.h:rpmalloc(size_t size) RPMALLOC_ATTRIB_MALLOC RPMALLOC_ATTRIB_ALLOC_SIZE(1);
+lib/Support/rpmalloc/rpmalloc.h://  and should ideally be less than memory page size. A caveat of rpmalloc
+lib/Support/rpmalloc/rpmalloc.h://  and should ideally be less than memory page size. A caveat of rpmalloc
+lib/Support/rpmalloc/rpmalloc.h://  and should ideally be less than memory page size. A caveat of rpmalloc
+lib/Support/rpmalloc/rpmalloc.h://  and should ideally be less than memory page size. A caveat of rpmalloc
+lib/Support/rpmalloc/rpmalloc.h://  and should ideally be less than memory page size. A caveat of rpmalloc
+lib/Support/rpmalloc/rpmalloc.h:RPMALLOC_EXPORT size_t rpmalloc_usable_size(void *ptr);
+lib/Support/rpmalloc/rpmalloc.h:RPMALLOC_EXPORT void rpmalloc_linker_reference(void);
+lib/Support/rpmalloc/rpmalloc.h:typedef struct heap_t rpmalloc_heap_t;
+lib/Support/rpmalloc/rpmalloc.h:RPMALLOC_EXPORT rpmalloc_heap_t *rpmalloc_heap_acquire(void);
+lib/Support/rpmalloc/rpmalloc.h://! rpmalloc_heap_free_all before destroying the heap).
+lib/Support/rpmalloc/rpmalloc.h:RPMALLOC_EXPORT void rpmalloc_heap_release(rpmalloc_heap_t *heap);
+lib/Support/rpmalloc/rpmalloc.h:rpmalloc_heap_alloc(rpmalloc_heap_t *heap, size_t size) RPMALLOC_ATTRIB_MALLOC
+lib/Support/rpmalloc/rpmalloc.h://  size. A caveat of rpmalloc internals is that this must also be strictly less
+lib/Support/rpmalloc/rpmalloc.h:rpmalloc_heap_aligned_alloc(rpmalloc_heap_t *heap, size_t alignment,
+lib/Support/rpmalloc/rpmalloc.h:rpmalloc_heap_calloc(rpmalloc_heap_t *heap, size_t num,
+lib/Support/rpmalloc/rpmalloc.h://  than memory page size. A caveat of rpmalloc internals is that this must also
+lib/Support/rpmalloc/rpmalloc.h:rpmalloc_heap_aligned_calloc(rpmalloc_heap_t *heap, size_t alignment,
+lib/Support/rpmalloc/rpmalloc.h:rpmalloc_heap_realloc(rpmalloc_heap_t *heap, void *ptr, size_t size,
+lib/Support/rpmalloc/rpmalloc.h://  A caveat of rpmalloc internals is that this must also be strictly less than
+lib/Support/rpmalloc/rpmalloc.h:RPMALLOC_EXPORT RPMALLOC_ALLOCATOR void *rpmalloc_heap_aligned_realloc(
+lib/Support/rpmalloc/rpmalloc.h:    rpmalloc_heap_t *heap, void *ptr, size_t alignment, size_t size,
+lib/Support/rpmalloc/rpmalloc.h:RPMALLOC_EXPORT void rpmalloc_heap_free(rpmalloc_heap_t *heap, void *ptr);
+lib/Support/rpmalloc/rpmalloc.h:RPMALLOC_EXPORT void rpmalloc_heap_free_all(rpmalloc_heap_t *heap);
+lib/Support/rpmalloc/rpmalloc.h:RPMALLOC_EXPORT void rpmalloc_heap_thread_set_current(rpmalloc_heap_t *heap);
+lib/Support/rpmalloc/rpmalloc.h:RPMALLOC_EXPORT rpmalloc_heap_t *rpmalloc_get_heap_for_ptr(void *ptr);
+lib/Support/rpmalloc/malloc.c://===------------------------ malloc.c ------------------*- C -*-=============//
+lib/Support/rpmalloc/malloc.c:// This library provides a cross-platform lock free thread caching malloc
+lib/Support/rpmalloc/malloc.c:// This file provides overrides for the standard library malloc entry points for
+lib/Support/rpmalloc/malloc.c:#undef malloc
+lib/Support/rpmalloc/malloc.c:extern inline RPMALLOC_RESTRICT void *RPMALLOC_CDECL malloc(size_t size) {
+lib/Support/rpmalloc/malloc.c:  return rpmalloc(size);
+lib/Support/rpmalloc/malloc.c:extern inline size_t RPMALLOC_CDECL malloc_usable_size(void *ptr) {
+lib/Support/rpmalloc/malloc.c:  return rpmalloc_usable_size(ptr);
+lib/Support/rpmalloc/malloc.c:extern inline size_t RPMALLOC_CDECL malloc_size(void *ptr) {
+lib/Support/rpmalloc/malloc.c:  return rpmalloc_usable_size(ptr);
+lib/Support/rpmalloc/malloc.c:extern inline RPMALLOC_RESTRICT void *RPMALLOC_CDECL _malloc_base(size_t size) {
+lib/Support/rpmalloc/malloc.c:  return rpmalloc(size);
+lib/Support/rpmalloc/malloc.c:  return rpmalloc_usable_size(ptr);
+lib/Support/rpmalloc/malloc.c:  return rpmalloc_usable_size(ptr);
+lib/Support/rpmalloc/malloc.c:void *RPDEFVIS _Znwm(uint64_t size) { return rpmalloc(size); }
+lib/Support/rpmalloc/malloc.c:void *RPDEFVIS _Znam(uint64_t size) { return rpmalloc(size); }
+lib/Support/rpmalloc/malloc.c:  return rpmalloc(size);
+lib/Support/rpmalloc/malloc.c:  return rpmalloc(size);
+lib/Support/rpmalloc/malloc.c:void *RPDEFVIS _Znwj(uint32_t size) { return rpmalloc(size); }
+lib/Support/rpmalloc/malloc.c:void *RPDEFVIS _Znaj(uint32_t size) { return rpmalloc(size); }
+lib/Support/rpmalloc/malloc.c:  return rpmalloc(size);
+lib/Support/rpmalloc/malloc.c:  return rpmalloc(size);
+lib/Support/rpmalloc/malloc.c:static void *rpmalloc_nothrow(size_t size, rp_nothrow_t t) {
+lib/Support/rpmalloc/malloc.c:  return rpmalloc(size);
+lib/Support/rpmalloc/malloc.c:__attribute__((used)) static const interpose_t macinterpose_malloc[]
+lib/Support/rpmalloc/malloc.c:        MAC_INTERPOSE_PAIR(rpmalloc, _Znwm),
+lib/Support/rpmalloc/malloc.c:        MAC_INTERPOSE_PAIR(rpmalloc, _Znam),
+lib/Support/rpmalloc/malloc.c:        MAC_INTERPOSE_PAIR(rpmalloc_nothrow, _ZnwmRKSt9nothrow_t),
+lib/Support/rpmalloc/malloc.c:        MAC_INTERPOSE_PAIR(rpmalloc_nothrow, _ZnamRKSt9nothrow_t),
+lib/Support/rpmalloc/malloc.c:        MAC_INTERPOSE_PAIR(rpmalloc, malloc),
+lib/Support/rpmalloc/malloc.c:        MAC_INTERPOSE_PAIR(rpmalloc, calloc),
+lib/Support/rpmalloc/malloc.c:        MAC_INTERPOSE_PAIR(rpmalloc_usable_size, malloc_usable_size),
+lib/Support/rpmalloc/malloc.c:        MAC_INTERPOSE_PAIR(rpmalloc_usable_size, malloc_size)};
+lib/Support/rpmalloc/malloc.c:        RPALIAS(rpmalloc) void *_Znam(uint64_t size) RPMALLOC_ATTRIB_MALLOC
+lib/Support/rpmalloc/malloc.c:    RPMALLOC_ATTRIB_ALLOC_SIZE(1) RPALIAS(rpmalloc) void *_Znwmm(uint64_t size,
+lib/Support/rpmalloc/malloc.c:                        RPALIAS(rpmalloc_nothrow) void *_ZnamRKSt9nothrow_t(
+lib/Support/rpmalloc/malloc.c:                            rp_nothrow_t t) RPALIAS(rpmalloc_nothrow) void
+lib/Support/rpmalloc/malloc.c:        RPALIAS(rpmalloc) void *_Znaj(uint32_t size) RPMALLOC_ATTRIB_MALLOC
+lib/Support/rpmalloc/malloc.c:    RPMALLOC_ATTRIB_ALLOC_SIZE(1) RPALIAS(rpmalloc) void *_Znwjj(uint32_t size,
+lib/Support/rpmalloc/malloc.c:                        RPALIAS(rpmalloc_nothrow) void *_ZnajRKSt9nothrow_t(
+lib/Support/rpmalloc/malloc.c:                            rp_nothrow_t t) RPALIAS(rpmalloc_nothrow) void
+lib/Support/rpmalloc/malloc.c:                            void *malloc(size_t size) RPMALLOC_ATTRIB_MALLOC
+lib/Support/rpmalloc/malloc.c:        RPALIAS(rpmalloc) void *calloc(size_t count, size_t size)
+lib/Support/rpmalloc/malloc.c:    malloc_usable_size(const void *ptr) RPALIAS(rpmalloc_usable_size)
+lib/Support/rpmalloc/malloc.c:    malloc_usable_size(void *ptr) RPALIAS(rpmalloc_usable_size)
+lib/Support/rpmalloc/malloc.c:        size_t malloc_size(void *ptr) RPALIAS(rpmalloc_usable_size)
+lib/Support/rpmalloc/malloc.c:            static inline size_t _rpmalloc_page_size(void) {
+lib/Support/rpmalloc/malloc.c:  return rpaligned_alloc(_rpmalloc_page_size(), size);
+lib/Support/rpmalloc/malloc.c:  const size_t page_size = _rpmalloc_page_size();
+lib/Support/rpmalloc/malloc.c:  return rpaligned_alloc(_rpmalloc_page_size(), aligned_size);
+lib/Support/rpmalloc/malloc.c:    rpmalloc_initialize();
+lib/Support/rpmalloc/malloc.c:    rpmalloc_finalize();
+lib/Support/rpmalloc/malloc.c:    rpmalloc_thread_initialize();
+lib/Support/rpmalloc/malloc.c:    rpmalloc_thread_finalize(1);
+lib/Support/rpmalloc/malloc.c:extern void _global_rpmalloc_init(void) {
+lib/Support/rpmalloc/malloc.c:  rpmalloc_set_main_thread();
+lib/Support/rpmalloc/malloc.c:  rpmalloc_initialize();
+lib/Support/rpmalloc/malloc.c:  _global_rpmalloc_init();
+lib/Support/rpmalloc/malloc.c:static int _global_rpmalloc_xib(void) {
+lib/Support/rpmalloc/malloc.c:  _global_rpmalloc_init();
+lib/Support/rpmalloc/malloc.c:__declspec(allocate(".CRT$XIB")) void (*_rpmalloc_module_init)(void) =
+lib/Support/rpmalloc/malloc.c:    _global_rpmalloc_xib;
+lib/Support/rpmalloc/malloc.c:                        "__rpmalloc_module_init")
+lib/Support/rpmalloc/malloc.c:                        "_rpmalloc_module_init")
+lib/Support/rpmalloc/malloc.c:extern void rpmalloc_set_main_thread(void);
+lib/Support/rpmalloc/malloc.c:  rpmalloc_set_main_thread();
+lib/Support/rpmalloc/malloc.c:  rpmalloc_initialize();
+lib/Support/rpmalloc/malloc.c:static void __attribute__((destructor)) finalizer(void) { rpmalloc_finalize(); }
+lib/Support/rpmalloc/malloc.c:  rpmalloc_thread_initialize();
+lib/Support/rpmalloc/malloc.c:  rpmalloc_thread_finalize(1);
+lib/Support/rpmalloc/malloc.c:  rpmalloc_initialize();
+lib/Support/rpmalloc/malloc.c:  thread_starter_arg *starter_arg = rpmalloc(sizeof(thread_starter_arg));
+lib/Support/rpmalloc/malloc.c:  rpmalloc_thread_initialize();
+lib/Support/rpmalloc/malloc.c:  thread_starter_arg *starter_arg = rpmalloc(sizeof(thread_starter_arg));
+lib/Support/rpmalloc/malloc.c:void *__libc_malloc(size_t size) RPMALLOC_ATTRIB_MALLOC
+lib/Support/rpmalloc/malloc.c:        RPALIAS(rpmalloc) void *__libc_calloc(size_t count, size_t size)
+lib/Support/SmallVector.cpp:  void *NewEltsReplace = llvm::safe_malloc(NewCapacity * TSize);
+lib/Support/SmallVector.cpp:void *SmallVectorBase<Size_T>::mallocForGrow(void *FirstEl, size_t MinSize,
+lib/Support/SmallVector.cpp:  // capacity 0, it's possible for the malloc to return FirstEl.
+lib/Support/SmallVector.cpp:  void *NewElts = llvm::safe_malloc(NewCapacity * TSize);
+lib/Support/SmallVector.cpp:    NewElts = llvm::safe_malloc(NewCapacity * TSize);
+lib/Support/CMakeLists.txt:    message(FATAL_ERROR "Cannot find the path to `git clone` for the CRT allocator! (${LLVM_INTEGRATED_CRT_ALLOC}). Currently, rpmalloc, snmalloc and mimalloc are supported.")
+lib/Support/CMakeLists.txt:  if((LLVM_INTEGRATED_CRT_ALLOC MATCHES "rpmalloc$") OR LLVM_ENABLE_RPMALLOC)
+lib/Support/CMakeLists.txt:    set(ALLOCATOR_FILES "${LLVM_INTEGRATED_CRT_ALLOC}/rpmalloc/rpmalloc.c")
+lib/Support/CMakeLists.txt:    set(delayload_flags "${delayload_flags} ${WL}-INCLUDE:malloc")
+lib/Support/CMakeLists.txt:  elseif(LLVM_INTEGRATED_CRT_ALLOC MATCHES "snmalloc$")
+lib/Support/CMakeLists.txt:    set(ALLOCATOR_FILES "${LLVM_INTEGRATED_CRT_ALLOC}/src/snmalloc/override/new.cc")
+lib/Support/CMakeLists.txt:    set(system_libs ${system_libs} "mincore.lib" "${WL}-INCLUDE:malloc")
+lib/Support/CMakeLists.txt:  elseif(LLVM_INTEGRATED_CRT_ALLOC MATCHES "mimalloc$")
+lib/Support/CMakeLists.txt:    set(MIMALLOC_LIB "${LLVM_INTEGRATED_CRT_ALLOC}/out/msvc-x64/Release/mimalloc-static.lib")
+lib/Support/CMakeLists.txt:	  message(FATAL_ERROR "Cannot find the mimalloc static library. To build it, first apply the patch from https://github.com/microsoft/mimalloc/issues/268 then build the Release x64 target through ${LLVM_INTEGRATED_CRT_ALLOC}\\ide\\vs2019\\mimalloc.sln")
+lib/Support/CMakeLists.txt:    set(system_libs ${system_libs} "${MIMALLOC_LIB}" "${WL}-INCLUDE:malloc")
+lib/Support/CMakeLists.txt:  if(LLVM_INTEGRATED_CRT_ALLOC MATCHES "snmalloc$")
+lib/Support/SmallPtrSet.cpp:  CurArray = (const void**)safe_malloc(sizeof(void*) * CurArraySize);
+lib/Support/SmallPtrSet.cpp:  const void **NewBuckets = (const void**) safe_malloc(sizeof(void*) * NewSize);
+lib/Support/SmallPtrSet.cpp:    CurArray = (const void**)safe_malloc(sizeof(void*) * that.CurArraySize);
+lib/Support/SmallPtrSet.cpp:      CurArray = (const void**)safe_malloc(sizeof(void*) * RHS.CurArraySize);
+lib/Object/Object.cpp:  char *str = static_cast<char*>(safe_malloc(ret.size()));
+lib/Target/X86/X86ISelLowering.cpp:  // If stacklet is not large enough, jump to mallocMBB
+lib/Target/X86/X86ISelLowering.cpp:  // mallocMBB:
+lib/Target/X86/X86ISelLowering.cpp:  MachineBasicBlock *mallocMBB = MF->CreateMachineBasicBlock(LLVM_BB);
+lib/Target/X86/X86ISelLowering.cpp:  Register mallocPtrVReg = MRI.createVirtualRegister(AddrRegClass),
+lib/Target/X86/X86ISelLowering.cpp:  MF->insert(MBBIter, mallocMBB);
+lib/Target/X86/X86ISelLowering.cpp:  // and if so, jump to mallocMBB otherwise to bumpMBB.
+lib/Target/X86/X86ISelLowering.cpp:  BuildMI(BB, MIMD, TII->get(X86::JCC_1)).addMBB(mallocMBB).addImm(X86::COND_G);
+lib/Target/X86/X86ISelLowering.cpp:    BuildMI(mallocMBB, MIMD, TII->get(X86::MOV64rr), X86::RDI)
+lib/Target/X86/X86ISelLowering.cpp:    BuildMI(mallocMBB, MIMD, TII->get(X86::CALL64pcrel32))
+lib/Target/X86/X86ISelLowering.cpp:    BuildMI(mallocMBB, MIMD, TII->get(X86::MOV32rr), X86::EDI)
+lib/Target/X86/X86ISelLowering.cpp:    BuildMI(mallocMBB, MIMD, TII->get(X86::CALL64pcrel32))
+lib/Target/X86/X86ISelLowering.cpp:    BuildMI(mallocMBB, MIMD, TII->get(X86::SUB32ri), physSPReg).addReg(physSPReg)
+lib/Target/X86/X86ISelLowering.cpp:    BuildMI(mallocMBB, MIMD, TII->get(X86::PUSH32r)).addReg(sizeVReg);
+lib/Target/X86/X86ISelLowering.cpp:    BuildMI(mallocMBB, MIMD, TII->get(X86::CALLpcrel32))
+lib/Target/X86/X86ISelLowering.cpp:    BuildMI(mallocMBB, MIMD, TII->get(X86::ADD32ri), physSPReg).addReg(physSPReg)
+lib/Target/X86/X86ISelLowering.cpp:  BuildMI(mallocMBB, MIMD, TII->get(TargetOpcode::COPY), mallocPtrVReg)
+lib/Target/X86/X86ISelLowering.cpp:  BuildMI(mallocMBB, MIMD, TII->get(X86::JMP_1)).addMBB(continueMBB);
+lib/Target/X86/X86ISelLowering.cpp:  BB->addSuccessor(mallocMBB);
+lib/Target/X86/X86ISelLowering.cpp:  mallocMBB->addSuccessor(continueMBB);
+lib/Target/X86/X86ISelLowering.cpp:      .addReg(mallocPtrVReg)
+lib/Target/X86/X86ISelLowering.cpp:      .addMBB(mallocMBB)
+lib/Target/README.txt:result doesn't point to anything (like malloc).  One example of this is in
+lib/Target/README.txt:  %64 = call noalias i8* @malloc(i64 %62) nounwind
+lib/Target/README.txt:llvm.objectsize.i64 should be taught about malloc/calloc, allowing it to
+lib/Target/README.txt:fold to %62.  This is a security win (overflows of malloc will get caught)
+lib/Target/README.txt:  char *p = malloc(strlen(s)+1);
+lib/Target/README.txt:  %alloc = call noalias i8* @malloc(i32 %x) nounwind
+lib/Target/README.txt:aggressively as malloc though.
+lib/Target/WebAssembly/WebAssemblyLowerEmscriptenEHSjLj.cpp:  // The reason we include malloc/free here is to exclude the malloc/free
+lib/Target/WebAssembly/WebAssemblyLowerEmscriptenEHSjLj.cpp:  if (CalleeName == "setjmp" || CalleeName == "malloc" || CalleeName == "free")
+lib/Target/AMDGPU/AMDGPUSwLowerLDS.cpp://    work-group, does a "malloc" and stores the pointer of the
+lib/Target/AMDGPU/AMDGPUSwLowerLDS.cpp:  // Create malloc block.
+lib/Target/AMDGPU/AMDGPUSwLowerLDS.cpp:  // work item which will branch to malloc block.
+lib/Target/AMDGPU/AMDGPUSwLowerLDS.cpp:  // Create a call to malloc function which does device global memory allocation
+lib/Target/AMDGPU/AMDGPUSwLowerLDS.cpp:      StringRef("__asan_malloc_impl"),
+lib/Target/AMDGPU/AMDGPUSwLowerLDS.cpp:  // Create store of malloc to new global
+lib/Target/AMDGPU/AMDGPUSwLowerLDS.cpp:  // Load malloc pointer from Sw LDS.
+grep: lib/Target/AMDGPU/.AMDGPUSwLowerLDS.cpp.swp: binary file matches
+lib/Target/AArch64/AArch64A57FPLoadBalancing.cpp:  // dependent on malloc'd pointer values).
+lib/CodeGen/LiveIntervalUnion.cpp:      safe_malloc(sizeof(LiveIntervalUnion)*NSize));
+lib/CodeGen/MachineBlockPlacement.cpp:  /// a function. To reduce malloc traffic, we allocate them using this
+lib/CodeGen/RegAllocBasic.cpp:  // Scratch space.  Allocated here to avoid repeated malloc calls in
+lib/Analysis/Loads.cpp:  // Note that it is not safe to speculate into a malloc'd region because
+lib/Analysis/Loads.cpp:  // malloc may return null.
+lib/Analysis/CaptureTracking.cpp:      // captures. This allows us to ignore comparisons of malloc results
+lib/Analysis/InstructionSimplify.cpp:  // allocas might be transformed into calls to malloc not simultaneously
+lib/Analysis/InstructionSimplify.cpp:  // library (and, thus, could be malloc'ed by the implementation).
+lib/Analysis/InstructionSimplify.cpp:    // cannot be elided. We cannot fold malloc comparison to null. Also, the
+lib/Analysis/TargetLibraryInfo.cpp:    TLI.setAvailable(llvm::LibFunc_malloc);
+lib/Analysis/TargetLibraryInfo.cpp:  // These vec_malloc/free routines are only available on AIX.
+lib/Analysis/TargetLibraryInfo.cpp:    TLI.setUnavailable(LibFunc_vec_malloc);
+lib/Analysis/ValueTracking.cpp:    // Alloca never returns null, malloc might.
+lib/Analysis/MemoryBuiltins.cpp:    return "malloc";
+lib/Analysis/MemoryBuiltins.cpp:    return "vec_malloc";
+lib/Analysis/MemoryBuiltins.cpp:  // Name of default allocator function to group malloc/free calls by family
+lib/Analysis/MemoryBuiltins.cpp:/// allocates or reallocates memory (either malloc, calloc, realloc, or strdup
+lib/Analysis/MemoryBuiltins.cpp:/// allocates memory similar to malloc or calloc.
+lib/Analysis/MemoryBuiltins.cpp:/// allocates memory (either malloc, calloc, or strdup like).
+lib/Analysis/MemoryBuiltins.cpp:  // malloc are uninitialized (undef)
+lib/Analysis/MemoryBuiltins.cpp:  // Name of default allocator function to group malloc/free calls by family
+lib/Analysis/LazyValueInfo.cpp:/// alloca or a malloc call for which a comparison against a constant can
+lib/Analysis/AliasAnalysis.cpp:// alias, a global cannot alias a malloc, two different mallocs cannot alias,
+lib/Analysis/BasicAliasAnalysis.cpp:  //     char *p = (char*)malloc(100)
+lib/Analysis/BasicAliasAnalysis.cpp:  // If the call is malloc/calloc like, we can assume that it doesn't
+utils/llvm.grm:MemoryInst ::= malloc Types OptCAlign
+utils/llvm.grm: | malloc Types ^ "," INTTYPE ValueRef OptCAlign
+utils/gn/secondary/compiler-rt/lib/asan/BUILD.gn:    "asan_malloc_linux.cpp",
+utils/gn/secondary/compiler-rt/lib/asan/BUILD.gn:    "asan_malloc_mac.cpp",
+utils/gn/secondary/compiler-rt/lib/asan/BUILD.gn:    "asan_malloc_win.cpp",
+utils/gn/secondary/compiler-rt/lib/asan/BUILD.gn:        "-Wl,-U,___sanitizer_malloc_hook",
+utils/gn/secondary/compiler-rt/lib/asan/BUILD.gn:        "asan_malloc_win_thunk.cpp",
+utils/gn/secondary/compiler-rt/lib/tsan/rtl/BUILD.gn:    "tsan_malloc_mac.cpp",
+utils/gn/secondary/compiler-rt/lib/tsan/rtl/BUILD.gn:      "-Wl,-U,___sanitizer_malloc_hook",
+utils/gn/secondary/compiler-rt/lib/lsan/BUILD.gn:    "lsan_malloc_mac.cpp",
+utils/gn/secondary/compiler-rt/lib/ubsan/BUILD.gn:      "-Wl,-U,___sanitizer_malloc_hook",
+utils/gn/secondary/compiler-rt/lib/hwasan/BUILD.gn:    "hwasan_malloc_bisect.h",
+utils/gn/secondary/clang/lib/Headers/BUILD.gn:    "mm_malloc.h",
+utils/gn/secondary/clang/lib/Headers/BUILD.gn:    "ppc_wrappers/mm_malloc.h",
+utils/gn/secondary/libcxxabi/src/BUILD.gn:  "fallback_malloc.cpp",
+utils/vscode/llvm/syntaxes/ll.tmLanguage.yaml:            \\bmalloc\\b|\
+utils/lit/lit/llvm/config.py:        use_gmalloc = lit_config.params.get("use_gmalloc", None)
+utils/lit/lit/llvm/config.py:        if lit.util.pythonize_bool(use_gmalloc):
+utils/lit/lit/llvm/config.py:            # Allow use of an explicit path for gmalloc library.
+utils/lit/lit/llvm/config.py:            # Will default to '/usr/lib/libgmalloc.dylib' if not set.
+utils/lit/lit/llvm/config.py:            gmalloc_path_str = lit_config.params.get(
+utils/lit/lit/llvm/config.py:                "gmalloc_path", "/usr/lib/libgmalloc.dylib"
+utils/lit/lit/llvm/config.py:            if gmalloc_path_str is not None:
+utils/lit/lit/llvm/config.py:                self.with_environment("DYLD_INSERT_LIBRARIES", gmalloc_path_str)
+grep: utils/lit/lit/llvm/__pycache__/config.cpython-310.pyc: binary file matches
+utils/valgrind/x86_64-pc-linux-gnu.supp:   fun:malloc
+utils/valgrind/x86_64-pc-linux-gnu.supp:   fun:xmalloc
+utils/valgrind/x86_64-pc-linux-gnu.supp:   fun:malloc
+utils/valgrind/x86_64-pc-linux-gnu.supp:   fun:malloc
+utils/valgrind/i386-pc-linux-gnu.supp:   fun:malloc
+utils/vim/syntax/llvm.vim:syn keyword llvmStatement malloc max min mul nand ne ninf nnan nsw nsz nuw oeq
+utils/emacs/llvm-mode.el:   `(,(regexp-opt '("malloc" "alloca" "free" "load" "store" "getelementptr" "fence" "cmpxchg" "atomicrmw") 'symbols) . font-lock-keyword-face)
+docs/GarbageCollection.rst:collector or building atop ``malloc`` are great places to start, and can be
+docs/ProgrammersManual.rst:avoids (relatively) expensive malloc/free calls, which dwarf the cost of adding
+docs/ProgrammersManual.rst:array is the cost of the new/delete (aka malloc/free).  Also note that if you
+docs/ProgrammersManual.rst:dynamically smaller than N, no malloc is performed.  This can be a big win in
+docs/ProgrammersManual.rst:cases where the malloc/free call is far more expensive than the code that
+docs/ProgrammersManual.rst:no malloc traffic is required) and accesses them with a simple linear search.
+docs/ProgrammersManual.rst:malloc traffic.
+docs/ProgrammersManual.rst:inserted (thus it is very malloc intensive) and typically stores three pointers
+docs/ProgrammersManual.rst:and malloc traffic is not a big deal, but if the elements of the set are small,
+docs/ProgrammersManual.rst:produces a lot of malloc traffic.  It should be avoided.
+docs/ProgrammersManual.rst:they are generally very expensive (each insertion requires a malloc).
+docs/ProgrammersManual.rst:they are generally very expensive (each insertion requires a malloc).
+docs/Reference.rst:  A library that implements a security-hardened `malloc()`.
+docs/tutorial/MyFirstLanguageFrontend/LangImpl10.rst:   memory, either with calls to the standard libc malloc/free interface
+docs/CommandLine.rst:   naturally), ``ld`` style `prefix`_ options (to parse '``-lmalloc
+docs/CommandLine.rst:  arguments like ``-lmalloc`` and ``-L/usr/lib`` in a linker tool or
+docs/LangRef.rst:    allocator function, that is "malloc" for malloc/calloc/realloc/free,
+docs/LangRef.rst:    ``::operator::delete``. Matching malloc/realloc/free calls within a family
+docs/GwpAsan.rst:implementation of ``malloc()``, ``free()`` and ``realloc()``. The stubs are
+docs/GwpAsan.rst:The stubs follow the same general pattern (example ``malloc()`` pseudocode
+docs/GwpAsan.rst:  void* YourAllocator::malloc(..) {
+docs/Coroutines.rst:    %alloc = call ptr @malloc(i32 %size)
+docs/Coroutines.rst:    %alloc = call noalias ptr @malloc(i32 24)
+docs/Coroutines.rst:    %alloc = call ptr @malloc(i32 %size)
+docs/Coroutines.rst:    %alloc = call ptr @malloc(i32 %size)
+docs/MemTagSanitizer.rst:allocated memory in malloc(), as long as a pointer with the matching
+docs/HistoricalNotes/2001-02-09-AdveCommentsResponse.txt:> I still have some major concerns about including malloc and free in the
+docs/HistoricalNotes/2001-02-09-AdveCommentsResponse.txt:malloc/free are either built in functions or actual opcodes.  They provide
+docs/HistoricalNotes/2001-02-09-AdveCommentsResponse.txt:implementation may want to override the default malloc/free behavior of
+docs/HistoricalNotes/2001-02-09-AdveCommentsResponse.txt:the program.  To do this, they simply implement a "malloc" and
+docs/HistoricalNotes/2001-02-09-AdveCommentsResponse.txt:defined malloc/free function (which return/take void*'s, not type'd
+docs/HistoricalNotes/2001-02-09-AdveCommentsResponse.txt:fall back on a system malloc/free.
+docs/HistoricalNotes/2001-02-09-AdveCommentsResponse.txt:>  problems with malloc listed above.
+docs/HistoricalNotes/2003-06-26-Reoptimizer2.txt:-lmalloc -lcpc -lm -ldl
+docs/HistoricalNotes/2001-02-09-AdveComments.txt:o  I still have some major concerns about including malloc and free in the
+docs/HistoricalNotes/2001-02-09-AdveComments.txt:   C, C++ Java and Fortran 90 would not be able to use our malloc anyway
+docs/HistoricalNotes/2001-02-09-AdveComments.txt:   Having a single malloc would just not suffice, and instead would simply
+docs/HistoricalNotes/2001-02-09-AdveComments.txt:   Instead, providing a default library version of malloc and free
+docs/HistoricalNotes/2001-02-09-AdveComments.txt:   (and perhaps a malloc_gc with garbage collection instead of free)
+docs/HistoricalNotes/2001-02-09-AdveComments.txt:   problems with malloc listed above.
+docs/WritingAnLLVMPass.rst:converts ``malloc`` and ``free`` instructions into platform dependent
+docs/WritingAnLLVMPass.rst:``malloc()`` and ``free()`` function calls.  It uses the ``doInitialization``
+docs/WritingAnLLVMPass.rst:method to get a reference to the ``malloc`` and ``free`` functions that it
+docs/AMDGPUUsage.rst:                                                      that conforms to the requirements of the malloc/free device library V1
+docs/AMDGPUUsage.rst:                                                       buffer that conforms to the requirements of the malloc/free
+docs/LibFuzzer.rst:``-malloc_limit_mb``
+docs/LibFuzzer.rst:  number of Mb with one malloc call.
+docs/LibFuzzer.rst:``malloc`` and ``free`` calls when executing every mutation.
+docs/ScudoHardenedAllocator.rst:- the allocation type (malloc, new, new[] or memalign), to detect potential
+docs/ScudoHardenedAllocator.rst:jemalloc).
+docs/ScudoHardenedAllocator.rst:|                                 |                | malloc/delete, new/free, new/delete[], etc.     |
+docs/ScudoHardenedAllocator.rst:|                                 |                | the scudo_malloc_set_track_allocation_stacks    |
+docs/CMake.rst:  midly improves Clang build times, by about 5-10%. At the moment, rpmalloc,
+docs/CMake.rst:  snmalloc and mimalloc are supported. Use the path to `git clone` to select
+docs/CMake.rst:    $ D:\git> git clone https://github.com/mjansson/rpmalloc
+docs/CMake.rst:    $ D:\llvm-project> cmake ... -DLLVM_INTEGRATED_CRT_ALLOC=D:\git\rpmalloc
+docs/CMake.rst:  Note that rpmalloc is also supported natively in-tree, see option below.
+docs/CMake.rst:  Similar to LLVM_INTEGRATED_CRT_ALLOC, embeds the in-tree rpmalloc into the
+docs/CMake.rst:  rpmalloc 1.4.5. This option also implies linking with the static CRT, there's
+grep: out: input file is also the output
+test/Instrumentation/AddressSanitizer/X86/asm_cpuid.ll:; We used to introduce stack mallocs for UAR detection, but that makes LLVM run
+test/Instrumentation/AddressSanitizer/X86/asm_cpuid.ll:; out of registers on 32-bit platforms. Therefore, we don't do stack malloc on
+test/Instrumentation/AddressSanitizer/X86/asm_cpuid.ll:; CHECK-NOT: call {{.*}} @__asan_stack_malloc
+test/Instrumentation/AddressSanitizer/X86/asm_more_registers_than_available.ll:; Don't do stack malloc on functions containing inline assembly on 64-bit
+test/Instrumentation/AddressSanitizer/X86/asm_more_registers_than_available.ll:; CHECK-NOT: call {{.*}} @__asan_stack_malloc
+test/Instrumentation/AddressSanitizer/asan-funclet.ll:; CHECK-INLINE-NEXT:    [[TMP5:%.*]] = call i64 @__asan_stack_malloc_8(i64 8544)
+test/Instrumentation/AddressSanitizer/asan-funclet.ll:; CHECK-OUTLINE-NEXT:    [[TMP5:%.*]] = call i64 @__asan_stack_malloc_8(i64 8608)
+test/Instrumentation/AddressSanitizer/asan-funclet.ll:; CHECK-NEXT:    [[TMP4:%.*]] = call i64 @__asan_stack_malloc_0(i64 64)
+test/Instrumentation/AddressSanitizer/fake-stack.ll:; RUNTIME-NEXT:    [[TMP3:%.*]] = call i64 @__asan_stack_malloc_0(i64 64)
+test/Instrumentation/AddressSanitizer/fake-stack.ll:; ALWAYS-NEXT:    [[TMP0:%.*]] = call i64 @__asan_stack_malloc_always_0(i64 64)
+test/Instrumentation/AddressSanitizer/stack_dynamic_alloca.ll:; CHECK-RUNTIME: [[FAKE_STACK_RT:%[0-9]+]] = call i64 @__asan_stack_malloc_
+test/Instrumentation/AddressSanitizer/stack_dynamic_alloca.ll:; CHECK-ALWAYS: [[FAKE_STACK_RT:%[0-9]+]] = call i64 @__asan_stack_malloc_always_
+test/Instrumentation/AddressSanitizer/stack_dynamic_alloca.ll:; CHECK-NOT: __asan_stack_malloc
+test/Instrumentation/AddressSanitizer/stack-poisoning-byval-args.ll:; CHECK: call i64 @__asan_stack_malloc
+test/Instrumentation/AddressSanitizer/stack-poisoning-byval-args.ll:; CHECK: call i64 @__asan_stack_malloc
+test/Instrumentation/AddressSanitizer/do-not-instrument-promotable-allocas.ll:; CHECK-NOT: __asan_stack_malloc_0
+test/Instrumentation/AddressSanitizer/debug-info-alloca.ll:; CHECK:     %3 = call i64 @__asan_stack_malloc_0(i64 64){{$}}
+test/Instrumentation/AddressSanitizer/asan-stack-safety.ll:  ; NOSAFETY: call i64 @__asan_stack_malloc
+test/Instrumentation/AddressSanitizer/asan-stack-safety.ll:  ; NOSAFETY: call i64 @__asan_stack_malloc
+test/Instrumentation/AddressSanitizer/asan-stack-safety.ll:  ; CHECK: call i64 @__asan_stack_malloc
+test/Instrumentation/AddressSanitizer/asan-stack-safety.ll:  ; NOSAFETY: call i64 @__asan_stack_malloc
+test/Instrumentation/AddressSanitizer/asan-stack-safety.ll:  ; NOSAFETY: call i64 @__asan_stack_malloc
+test/Instrumentation/AddressSanitizer/asan-stack-safety.ll:; NOSAFETY: call i64 @__asan_stack_malloc
+test/Instrumentation/AddressSanitizer/stack-poisoning.ll:; RUN: opt < %s -passes=asan -asan-use-after-return=never -S | FileCheck --check-prefix=CHECK-PLAIN --implicit-check-not=__asan_stack_malloc %s
+test/Instrumentation/AddressSanitizer/stack-poisoning.ll:; CHECK-UAR-RUNTIME: call i64 @__asan_stack_malloc_4
+test/Instrumentation/AddressSanitizer/stack-poisoning.ll:; CHECK-UAR-ALWAYS: call i64 @__asan_stack_malloc_always_4
+test/Instrumentation/RealtimeSanitizer/rtsan.ll:  %2 = call ptr @malloc(i64 noundef 2) #3
+test/Instrumentation/RealtimeSanitizer/rtsan.ll:declare ptr @malloc(i64 noundef) #1
+test/Instrumentation/BoundsChecking/ubsan-unique-traps.ll:declare noalias ptr @malloc(i64) nounwind allocsize(0)
+test/Instrumentation/BoundsChecking/ubsan-unique-traps.ll:; CHECK-NEXT:    [[TMP1:%.*]] = tail call ptr @malloc(i64 32)
+test/Instrumentation/BoundsChecking/ubsan-unique-traps.ll:; CHECK-NEXT:    [[TMP3:%.*]] = tail call ptr @malloc(i64 32)
+test/Instrumentation/BoundsChecking/ubsan-unique-traps.ll:; CHECK-NEXT:    [[TMP5:%.*]] = tail call ptr @malloc(i64 32)
+test/Instrumentation/BoundsChecking/ubsan-unique-traps.ll:  %1 = tail call ptr @malloc(i64 32)
+test/Instrumentation/BoundsChecking/ubsan-unique-traps.ll:  %2 = tail call ptr @malloc(i64 32)
+test/Instrumentation/BoundsChecking/ubsan-unique-traps.ll:  %3 = tail call ptr @malloc(i64 32)
+test/Instrumentation/BoundsChecking/simple.ll:declare noalias ptr @malloc(i64) nounwind allocsize(0)
+test/Instrumentation/BoundsChecking/simple.ll:; CHECK-NEXT:    [[TMP1:%.*]] = tail call ptr @malloc(i64 32)
+test/Instrumentation/BoundsChecking/simple.ll:  %1 = tail call ptr @malloc(i64 32)
+test/Instrumentation/BoundsChecking/simple.ll:; CHECK-NEXT:    [[TMP1:%.*]] = tail call ptr @malloc(i64 32)
+test/Instrumentation/BoundsChecking/simple.ll:  %1 = tail call ptr @malloc(i64 32)
+test/MC/ELF/relax-branch.s:	callq	sqlite3_mallocPLT
+test/Verifier/memprof-metadata-bad.ll:  %call1 = call noalias dereferenceable_or_null(40) ptr @malloc(i64 noundef 40), !memprof !0
+test/Verifier/memprof-metadata-bad.ll:  %call2 = call noalias dereferenceable_or_null(40) ptr @malloc(i64 noundef 40), !memprof !1
+test/Verifier/memprof-metadata-bad.ll:  %call3 = call noalias dereferenceable_or_null(40) ptr @malloc(i64 noundef 40), !memprof !3
+test/Verifier/memprof-metadata-bad.ll:  %call4 = call noalias dereferenceable_or_null(40) ptr @malloc(i64 noundef 40), !memprof !5
+test/Verifier/memprof-metadata-bad.ll:  %call5 = call noalias dereferenceable_or_null(40) ptr @malloc(i64 noundef 40), !memprof !7, !callsite !9
+test/Verifier/memprof-metadata-bad.ll:declare dso_local noalias noundef ptr @malloc(i64 noundef)
+test/Verifier/memprof-metadata-good.ll:  %call = call noalias dereferenceable_or_null(40) ptr @malloc(i64 noundef 40), !memprof !0, !callsite !5
+test/Verifier/memprof-metadata-good.ll:declare dso_local noalias noundef ptr @malloc(i64 noundef)
+test/Transforms/IndVarSimplify/2006-09-20-LFTR-Crash.ll:	br i1 false, label %cond_true.i, label %sre_malloc.exit
+test/Transforms/IndVarSimplify/2006-09-20-LFTR-Crash.ll:sre_malloc.exit:		; preds = %bb
+test/Transforms/IndVarSimplify/2006-09-20-LFTR-Crash.ll:cond_true:		; preds = %cond_true66, %cond_true, %sre_malloc.exit
+test/Transforms/IndVarSimplify/2006-09-20-LFTR-Crash.ll:	%tmp59 = phi i32 [ 1, %sre_malloc.exit ], [ %phitmp, %cond_true66 ], [ %tmp59, %cond_true ]		; <i32> [#uses=2]
+test/Transforms/IndVarSimplify/2006-09-20-LFTR-Crash.ll:	%indvar245.0.ph = phi i32 [ 0, %sre_malloc.exit ], [ %indvar.next246, %cond_true66 ], [ %indvar245.0.ph, %cond_true ]		; <i32> [#uses=2]
+test/Transforms/PGOProfile/ppc-prevent-mma-types.ll:declare ptr @__malloc()
+test/Transforms/PGOProfile/ppc-prevent-mma-types.ll:  %i61 = call ptr @__malloc()
+grep: test/Transforms/PGOProfile/Inputs/memprof.exe: binary file matches
+grep: test/Transforms/PGOProfile/Inputs/memprof.nocolinfo.exe: binary file matches
+grep: test/Transforms/PGOProfile/Inputs/memprof_loop_unroll.exe: binary file matches
+grep: test/Transforms/PGOProfile/Inputs/memprof_internal_linkage.exe: binary file matches
+test/Transforms/PGOProfile/chr_coro.ll:declare noalias ptr @malloc(i32)
+test/Transforms/PGOProfile/chr_coro.ll:; CHECK-NEXT:    [[ALLOC:%.*]] = call ptr @malloc(i32 24)
+test/Transforms/PGOProfile/chr_coro.ll:  %alloc = call ptr @malloc(i32 24)
+test/Transforms/DeadStoreElimination/noop-stores.ll:declare noalias ptr @malloc(i64)
+test/Transforms/DeadStoreElimination/noop-stores.ll:define ptr @zero_memset_after_malloc(i64 %size) {
+test/Transforms/DeadStoreElimination/noop-stores.ll:; CHECK-LABEL: @zero_memset_after_malloc(
+test/Transforms/DeadStoreElimination/noop-stores.ll:  %call = call ptr @malloc(i64 %size) inaccessiblememonly
+test/Transforms/DeadStoreElimination/noop-stores.ll:define ptr @zero_memset_after_malloc_with_intermediate_clobbering(i64 %size) {
+test/Transforms/DeadStoreElimination/noop-stores.ll:; CHECK-LABEL: @zero_memset_after_malloc_with_intermediate_clobbering(
+test/Transforms/DeadStoreElimination/noop-stores.ll:; CHECK-NEXT:    [[CALL:%.*]] = call ptr @malloc(i64 [[SIZE:%.*]]) #[[ATTR7:[0-9]+]]
+test/Transforms/DeadStoreElimination/noop-stores.ll:  %call = call ptr @malloc(i64 %size) inaccessiblememonly
+test/Transforms/DeadStoreElimination/noop-stores.ll:define ptr @zero_memset_after_malloc_with_different_sizes(i64 %size) {
+test/Transforms/DeadStoreElimination/noop-stores.ll:; CHECK-LABEL: @zero_memset_after_malloc_with_different_sizes(
+test/Transforms/DeadStoreElimination/noop-stores.ll:; CHECK-NEXT:    [[CALL:%.*]] = call ptr @malloc(i64 [[SIZE:%.*]]) #[[ATTR7]]
+test/Transforms/DeadStoreElimination/noop-stores.ll:  %call = call ptr @malloc(i64 %size) inaccessiblememonly
+test/Transforms/DeadStoreElimination/noop-stores.ll:define ptr @notmalloc_memset(i64 %size, ptr %notmalloc) {
+test/Transforms/DeadStoreElimination/noop-stores.ll:; CHECK-LABEL: @notmalloc_memset(
+test/Transforms/DeadStoreElimination/noop-stores.ll:  %call1 = call ptr %notmalloc(i64 %size)
+test/Transforms/DeadStoreElimination/noop-stores.ll:; CHECK-NEXT:    [[CALL:%.*]] = tail call noalias align 16 ptr @malloc(i64 [[MUL]])
+test/Transforms/DeadStoreElimination/noop-stores.ll:  %call = tail call noalias align 16 ptr @malloc(i64 %mul)
+test/Transforms/DeadStoreElimination/noop-stores.ll:  %call = call ptr @malloc(i64 %size) inaccessiblememonly
+test/Transforms/DeadStoreElimination/noop-stores.ll:  %call = call ptr @malloc(i64 %size) inaccessiblememonly
+test/Transforms/DeadStoreElimination/noop-stores.ll:define ptr @malloc_with_no_nointer_null_check(i64 %0, i32 %1) {
+test/Transforms/DeadStoreElimination/noop-stores.ll:; CHECK-LABEL: @malloc_with_no_nointer_null_check(
+test/Transforms/DeadStoreElimination/noop-stores.ll:; CHECK-NEXT:    [[CALL:%.*]] = call ptr @malloc(i64 [[TMP0:%.*]]) #[[ATTR7]]
+test/Transforms/DeadStoreElimination/noop-stores.ll:  %call = call ptr @malloc(i64 %0) inaccessiblememonly
+test/Transforms/DeadStoreElimination/noop-stores.ll:; TODO: This could be replaced with a call to malloc + memset_pattern16.
+test/Transforms/DeadStoreElimination/noop-stores.ll:define ptr @test_malloc_memset_to_calloc(ptr %0) {
+test/Transforms/DeadStoreElimination/noop-stores.ll:; CHECK-LABEL: @test_malloc_memset_to_calloc(
+test/Transforms/DeadStoreElimination/noop-stores.ll:  %3 = tail call ptr @malloc(i64 %2) inaccessiblememonly
+test/Transforms/DeadStoreElimination/noop-stores.ll:define ptr @readnone_malloc() {
+test/Transforms/DeadStoreElimination/noop-stores.ll:; CHECK-LABEL: @readnone_malloc(
+test/Transforms/DeadStoreElimination/noop-stores.ll:; CHECK-NEXT:    [[ALLOC:%.*]] = call ptr @malloc(i64 16) #[[ATTR8:[0-9]+]]
+test/Transforms/DeadStoreElimination/noop-stores.ll:  %alloc = call ptr @malloc(i64 16) memory(none)
+test/Transforms/DeadStoreElimination/malloc-to-calloc-with-nonzero-default-as.ll:define ptr addrspace(4) @malloc_to_calloc(i64 %size) {
+test/Transforms/DeadStoreElimination/malloc-to-calloc-with-nonzero-default-as.ll:; CHECK-LABEL: define ptr addrspace(4) @malloc_to_calloc(
+test/Transforms/DeadStoreElimination/malloc-to-calloc-with-nonzero-default-as.ll:  %ret = call ptr addrspace(4) @malloc(i64 %size)
+test/Transforms/DeadStoreElimination/malloc-to-calloc-with-nonzero-default-as.ll:declare noalias ptr addrspace(4) @malloc(i64) willreturn allockind("alloc,uninitialized") "alloc-family"="malloc"
+test/Transforms/DeadStoreElimination/malloc-earliest-escape-info-invalidation.ll:; CHECK-NEXT:    [[CALL1:%.*]] = tail call noalias ptr @malloc(i64 0)
+test/Transforms/DeadStoreElimination/malloc-earliest-escape-info-invalidation.ll:  %call = tail call ptr @malloc(i64 1)
+test/Transforms/DeadStoreElimination/malloc-earliest-escape-info-invalidation.ll:  %call1 = tail call noalias ptr @malloc(i64 0)
+test/Transforms/DeadStoreElimination/malloc-earliest-escape-info-invalidation.ll:declare ptr @malloc(i64)
+test/Transforms/DeadStoreElimination/multiblock-malloc-free.ll:declare noalias ptr @malloc(i64) #0
+test/Transforms/DeadStoreElimination/multiblock-malloc-free.ll:; CHECK-NEXT:    [[M:%.*]] = call noalias ptr @malloc(i64 10)
+test/Transforms/DeadStoreElimination/multiblock-malloc-free.ll:  %m = call noalias ptr @malloc(i64 10)
+test/Transforms/DeadStoreElimination/multiblock-malloc-free.ll:; CHECK-NEXT:    [[M:%.*]] = call noalias ptr @malloc(i64 10)
+test/Transforms/DeadStoreElimination/multiblock-malloc-free.ll:  %m = call noalias ptr @malloc(i64 10)
+test/Transforms/DeadStoreElimination/multiblock-malloc-free.ll:; CHECK-NEXT:    [[M:%.*]] = call noalias ptr @malloc(i64 10)
+test/Transforms/DeadStoreElimination/multiblock-malloc-free.ll:  %m = call noalias ptr @malloc(i64 10)
+test/Transforms/DeadStoreElimination/multiblock-malloc-free.ll:; CHECK-NEXT:    [[M:%.*]] = call noalias ptr @malloc(i64 10)
+test/Transforms/DeadStoreElimination/multiblock-malloc-free.ll:  %m = call noalias ptr @malloc(i64 10)
+test/Transforms/DeadStoreElimination/multiblock-malloc-free.ll:; CHECK-NEXT:    [[CALL:%.*]] = tail call dereferenceable_or_null(24) ptr @malloc(i64 24) #[[ATTR7:[0-9]+]]
+test/Transforms/DeadStoreElimination/multiblock-malloc-free.ll:  %call = tail call dereferenceable_or_null(24) ptr @malloc(i64 24) #4
+test/Transforms/DeadStoreElimination/multiblock-malloc-free.ll:; CHECK-NEXT:    [[CALL:%.*]] = tail call dereferenceable_or_null(16) ptr @malloc(i64 16) #[[ATTR7]]
+test/Transforms/DeadStoreElimination/multiblock-malloc-free.ll:  %call = tail call dereferenceable_or_null(16) ptr @malloc(i64 16) #4
+test/Transforms/DeadStoreElimination/wrong-malloc-size.ll:; malloc should have i64 argument under default data layout
+test/Transforms/DeadStoreElimination/wrong-malloc-size.ll:declare noalias ptr @malloc(i32)
+test/Transforms/DeadStoreElimination/wrong-malloc-size.ll:define ptr @malloc_and_memset_intrinsic(i32 %n) {
+test/Transforms/DeadStoreElimination/wrong-malloc-size.ll:; CHECK-LABEL: @malloc_and_memset_intrinsic(
+test/Transforms/DeadStoreElimination/wrong-malloc-size.ll:; CHECK-NEXT:    [[CALL:%.*]] = call ptr @malloc(i32 [[N:%.*]])
+test/Transforms/DeadStoreElimination/wrong-malloc-size.ll:  %call = call ptr @malloc(i32 %n)
+test/Transforms/DeadStoreElimination/operand-bundles.ll:declare noalias ptr @malloc(i64) "malloc-like"
+test/Transforms/DeadStoreElimination/operand-bundles.ll:  %obj = call ptr @malloc(i64 8)
+test/Transforms/DeadStoreElimination/operand-bundles.ll:  %obj = call ptr @malloc(i64 8)
+test/Transforms/DeadStoreElimination/operand-bundles.ll:  %obj = call ptr @malloc(i64 8)
+test/Transforms/DeadStoreElimination/memcpy-lifetimes.ll:; CHECK-NEXT:    [[CALL:%.*]] = tail call dereferenceable_or_null(192) ptr @malloc(i64 192)
+test/Transforms/DeadStoreElimination/memcpy-lifetimes.ll:  %call = tail call dereferenceable_or_null(192) ptr @malloc(i64 192) #8
+test/Transforms/DeadStoreElimination/memcpy-lifetimes.ll:declare noalias ptr @malloc(i64)
+test/Transforms/DeadStoreElimination/free.ll:declare noalias ptr @malloc(i64) allockind("alloc,uninitialized")
+test/Transforms/DeadStoreElimination/free.ll:; CHECK-NEXT:    [[M:%.*]] = call ptr @malloc(i64 24)
+test/Transforms/DeadStoreElimination/free.ll:  %m = call ptr @malloc(i64 24)
+test/Transforms/DeadStoreElimination/free.ll:; CHECK-NEXT:    [[ALLOC1:%.*]] = tail call noalias ptr @malloc(i64 4) [[ATTR0]]
+test/Transforms/DeadStoreElimination/free.ll:  %alloc1 = tail call noalias ptr @malloc(i64 4) nounwind
+test/Transforms/DeadStoreElimination/multiblock-captures.ll:declare noalias ptr @malloc(i64)
+test/Transforms/DeadStoreElimination/multiblock-captures.ll:; CHECK-NEXT:    [[M:%.*]] = call ptr @malloc(i64 24)
+test/Transforms/DeadStoreElimination/multiblock-captures.ll:  %m = call ptr @malloc(i64 24)
+test/Transforms/DeadStoreElimination/multiblock-captures.ll:; CHECK-NEXT:    [[M:%.*]] = call ptr @malloc(i64 24)
+test/Transforms/DeadStoreElimination/multiblock-captures.ll:  %m = call ptr @malloc(i64 24)
+test/Transforms/DeadStoreElimination/multiblock-captures.ll:define void @test_malloc_capture_1(ptr %E) {
+test/Transforms/DeadStoreElimination/multiblock-captures.ll:; CHECK-LABEL: @test_malloc_capture_1(
+test/Transforms/DeadStoreElimination/multiblock-captures.ll:; CHECK-NEXT:    [[M:%.*]] = call ptr @malloc(i64 24)
+test/Transforms/DeadStoreElimination/multiblock-captures.ll:  %m = call ptr @malloc(i64 24)
+test/Transforms/DeadStoreElimination/multiblock-captures.ll:define ptr @test_malloc_capture_2() {
+test/Transforms/DeadStoreElimination/multiblock-captures.ll:; CHECK-LABEL: @test_malloc_capture_2(
+test/Transforms/DeadStoreElimination/multiblock-captures.ll:; CHECK-NEXT:    [[M:%.*]] = call ptr @malloc(i64 24)
+test/Transforms/DeadStoreElimination/multiblock-captures.ll:  %m = call ptr @malloc(i64 24)
+test/Transforms/DeadStoreElimination/multiblock-captures.ll:define ptr @test_malloc_capture_3() {
+test/Transforms/DeadStoreElimination/multiblock-captures.ll:; CHECK-LABEL: @test_malloc_capture_3(
+test/Transforms/DeadStoreElimination/multiblock-captures.ll:; CHECK-NEXT:    [[M:%.*]] = call ptr @malloc(i64 24)
+test/Transforms/DeadStoreElimination/multiblock-captures.ll:  %m = call ptr @malloc(i64 24)
+test/Transforms/DeadStoreElimination/multiblock-captures.ll:define ptr @test_malloc_capture_4() {
+test/Transforms/DeadStoreElimination/multiblock-captures.ll:; CHECK-LABEL: @test_malloc_capture_4(
+test/Transforms/DeadStoreElimination/multiblock-captures.ll:; CHECK-NEXT:    [[M:%.*]] = call ptr @malloc(i64 24)
+test/Transforms/DeadStoreElimination/multiblock-captures.ll:  %m = call ptr @malloc(i64 24)
+test/Transforms/DeadStoreElimination/multiblock-captures.ll:define ptr @test_malloc_capture_5() {
+test/Transforms/DeadStoreElimination/multiblock-captures.ll:; CHECK-LABEL: @test_malloc_capture_5(
+test/Transforms/DeadStoreElimination/multiblock-captures.ll:; CHECK-NEXT:    [[M:%.*]] = call ptr @malloc(i64 24)
+test/Transforms/DeadStoreElimination/multiblock-captures.ll:  %m = call ptr @malloc(i64 24)
+test/Transforms/DeadStoreElimination/multiblock-captures.ll:define ptr @test_malloc_capture_6() {
+test/Transforms/DeadStoreElimination/multiblock-captures.ll:; CHECK-LABEL: @test_malloc_capture_6(
+test/Transforms/DeadStoreElimination/multiblock-captures.ll:; CHECK-NEXT:    [[M:%.*]] = call ptr @malloc(i64 24)
+test/Transforms/DeadStoreElimination/multiblock-captures.ll:  %m = call ptr @malloc(i64 24)
+test/Transforms/DeadStoreElimination/multiblock-captures.ll:define ptr @test_malloc_capture_7() {
+test/Transforms/DeadStoreElimination/multiblock-captures.ll:; CHECK-LABEL: @test_malloc_capture_7(
+test/Transforms/DeadStoreElimination/multiblock-captures.ll:; CHECK-NEXT:    [[M:%.*]] = call ptr @malloc(i64 24)
+test/Transforms/DeadStoreElimination/multiblock-captures.ll:  %m = call ptr @malloc(i64 24)
+test/Transforms/DeadStoreElimination/multiblock-captures.ll:define void @malloc_capture_throw_1() {
+test/Transforms/DeadStoreElimination/multiblock-captures.ll:; CHECK-LABEL: @malloc_capture_throw_1(
+test/Transforms/DeadStoreElimination/multiblock-captures.ll:; CHECK-NEXT:    [[CALL:%.*]] = call ptr @malloc(i64 1)
+test/Transforms/DeadStoreElimination/multiblock-captures.ll:  %call = call ptr @malloc(i64 1)
+test/Transforms/DeadStoreElimination/multiblock-captures.ll:define void @malloc_capture_throw_2() {
+test/Transforms/DeadStoreElimination/multiblock-captures.ll:; CHECK-LABEL: @malloc_capture_throw_2(
+test/Transforms/DeadStoreElimination/multiblock-captures.ll:; CHECK-NEXT:    [[CALL:%.*]] = call ptr @malloc(i64 1)
+test/Transforms/DeadStoreElimination/multiblock-captures.ll:  %call = call ptr @malloc(i64 1)
+test/Transforms/DeadStoreElimination/debuginfo.ll:declare noalias ptr @malloc(i32)
+test/Transforms/DeadStoreElimination/debuginfo.ll:; CHECK-NEXT: malloc
+test/Transforms/DeadStoreElimination/debuginfo.ll:  %p = tail call ptr @malloc(i32 4)
+test/Transforms/DeadStoreElimination/pr11390.ll:  %call4 = tail call noalias ptr @malloc(i64 %add3) nounwind
+test/Transforms/DeadStoreElimination/pr11390.ll:declare noalias ptr @malloc(i64) nounwind
+test/Transforms/DeadStoreElimination/fence-todo.ll:declare noalias ptr @malloc(i32)
+test/Transforms/DeadStoreElimination/fence-todo.ll:; CHECK: malloc
+test/Transforms/DeadStoreElimination/fence-todo.ll:  %m  =  call ptr @malloc(i32 24)
+test/Transforms/DeadStoreElimination/simple.ll:define ptr @test_malloc_no_escape_before_return() {
+test/Transforms/DeadStoreElimination/simple.ll:; CHECK-LABEL: @test_malloc_no_escape_before_return(
+test/Transforms/DeadStoreElimination/simple.ll:; CHECK-NEXT:    [[PTR:%.*]] = tail call ptr @malloc(i64 4)
+test/Transforms/DeadStoreElimination/simple.ll:  %ptr = tail call ptr @malloc(i64 4)
+test/Transforms/DeadStoreElimination/simple.ll:define ptr @test_custom_malloc_no_escape_before_return() {
+test/Transforms/DeadStoreElimination/simple.ll:; CHECK-LABEL: @test_custom_malloc_no_escape_before_return(
+test/Transforms/DeadStoreElimination/simple.ll:; CHECK-NEXT:    [[PTR:%.*]] = tail call ptr @custom_malloc(i32 4)
+test/Transforms/DeadStoreElimination/simple.ll:  %ptr = tail call ptr @custom_malloc(i32 4)
+test/Transforms/DeadStoreElimination/simple.ll:; CHECK-NEXT:    [[P:%.*]] = tail call ptr @malloc(i64 4)
+test/Transforms/DeadStoreElimination/simple.ll:  %p = tail call ptr @malloc(i64 4)
+test/Transforms/DeadStoreElimination/simple.ll:declare noalias ptr @malloc(i64) willreturn allockind("alloc,uninitialized")
+test/Transforms/DeadStoreElimination/simple.ll:declare noalias ptr @custom_malloc(i32) willreturn
+test/Transforms/DeadStoreElimination/simple.ll:define void @malloc_no_escape() {
+test/Transforms/DeadStoreElimination/simple.ll:; CHECK-LABEL: @malloc_no_escape(
+test/Transforms/DeadStoreElimination/simple.ll:  %m = call ptr @malloc(i64 24)
+test/Transforms/DeadStoreElimination/simple.ll:define void @custom_malloc_no_escape() {
+test/Transforms/DeadStoreElimination/simple.ll:; CHECK-LABEL: @custom_malloc_no_escape(
+test/Transforms/DeadStoreElimination/simple.ll:; CHECK-NEXT:    [[M:%.*]] = call ptr @custom_malloc(i32 24)
+test/Transforms/DeadStoreElimination/simple.ll:  %m = call ptr @custom_malloc(i32 24)
+test/Transforms/DeadStoreElimination/simple.ll:; Check another case like PR13547 where strdup is not like malloc.
+test/Transforms/DeadStoreElimination/store-after-loop.ll:; CHECK-NEXT:    [[LIST_NEW_I8_PTR]] = tail call align 8 dereferenceable_or_null(8) ptr @malloc(i32 8)
+test/Transforms/DeadStoreElimination/store-after-loop.ll:  %list.new.i8.ptr = tail call align 8 dereferenceable_or_null(8) ptr @malloc(i32 8)
+test/Transforms/DeadStoreElimination/store-after-loop.ll:declare noalias noundef align 8 ptr @malloc(i32 noundef) local_unnamed_addr #0
+test/Transforms/DeadStoreElimination/batchaa-caching-new-pointers.ll:define ptr @foo_with_removable_malloc() {
+test/Transforms/DeadStoreElimination/batchaa-caching-new-pointers.ll:; CHECK-LABEL: define ptr @foo_with_removable_malloc() {
+test/Transforms/DeadStoreElimination/batchaa-caching-new-pointers.ll:  %m2 = tail call ptr @malloc(i64 4)
+test/Transforms/DeadStoreElimination/batchaa-caching-new-pointers.ll:  %m1 = tail call ptr @malloc(i64 4)
+test/Transforms/DeadStoreElimination/batchaa-caching-new-pointers.ll:define ptr @foo_with_removable_malloc_free() {
+test/Transforms/DeadStoreElimination/batchaa-caching-new-pointers.ll:; CHECK-LABEL: define ptr @foo_with_removable_malloc_free() {
+test/Transforms/DeadStoreElimination/batchaa-caching-new-pointers.ll:; CHECK-NEXT:    [[M1:%.*]] = tail call ptr @malloc(i64 4)
+test/Transforms/DeadStoreElimination/batchaa-caching-new-pointers.ll:; CHECK-NEXT:    [[M2:%.*]] = tail call ptr @malloc(i64 4)
+test/Transforms/DeadStoreElimination/batchaa-caching-new-pointers.ll:  %m1 = tail call ptr @malloc(i64 4)
+test/Transforms/DeadStoreElimination/batchaa-caching-new-pointers.ll:  %m2 = tail call ptr @malloc(i64 4)
+test/Transforms/DeadStoreElimination/batchaa-caching-new-pointers.ll:define ptr @foo_with_malloc_to_calloc() {
+test/Transforms/DeadStoreElimination/batchaa-caching-new-pointers.ll:; CHECK-LABEL: define ptr @foo_with_malloc_to_calloc() {
+test/Transforms/DeadStoreElimination/batchaa-caching-new-pointers.ll:  %m1 = tail call ptr @malloc(i64 4)
+test/Transforms/DeadStoreElimination/batchaa-caching-new-pointers.ll:  %m2 = tail call ptr @malloc(i64 4)
+test/Transforms/DeadStoreElimination/batchaa-caching-new-pointers.ll:declare noalias ptr @malloc(i64) willreturn allockind("alloc,uninitialized") "alloc-family"="malloc"
+test/Transforms/DeadStoreElimination/batchaa-caching-new-pointers.ll:declare void @free(ptr nocapture) allockind("free") "alloc-family"="malloc"
+test/Transforms/GVN/malloc-load-removal.ll:declare noalias ptr @malloc(i64) nounwind allockind("alloc,uninitialized") allocsize(0)
+test/Transforms/GVN/malloc-load-removal.ll:  %call = tail call ptr @malloc(i64 100) nounwind
+test/Transforms/GVN/2009-11-12-MemDepMallocBitCast.ll:; Test to make sure malloc's bitcast does not block detection of a store 
+test/Transforms/GVN/2009-11-12-MemDepMallocBitCast.ll:  %1 = tail call ptr @malloc(i64 %mul)
+test/Transforms/GVN/2009-11-12-MemDepMallocBitCast.ll:declare noalias ptr @malloc(i64)
+test/Transforms/GVN/nonescaping-malloc.ll:; alias the malloc'd value %tmp.i20.i.i, which it can do since %tmp7.i
+test/Transforms/GVN/nonescaping-malloc.ll:declare noalias ptr @malloc(i64) nounwind allockind("alloc,uninitialized") allocsize(0) inaccessiblememonly
+test/Transforms/GVN/nonescaping-malloc.ll:  %tmp.i20.i.i = tail call noalias ptr @malloc(i64 %tmp8.i.i) nounwind
+test/Transforms/GVN/PRE/pre-after-rle.ll:declare noalias ptr @malloc(i64)
+test/Transforms/GVN/PRE/pre-after-rle.ll:; CHECK-NEXT:    [[CALL:%.*]] = tail call noalias ptr @malloc(i64 1024)
+test/Transforms/GVN/PRE/pre-after-rle.ll:  %call = tail call noalias ptr @malloc(i64 1024)
+test/Transforms/GVN/PRE/pre-after-rle.ll:; CHECK-NEXT:    [[CALL:%.*]] = tail call noalias ptr @malloc(i64 1024)
+test/Transforms/GVN/PRE/pre-after-rle.ll:  %call = tail call noalias ptr @malloc(i64 1024)
+test/Transforms/GVN/nonescaping.ll:declare noalias ptr @malloc(i64) nounwind allockind("alloc,uninitialized") allocsize(0) inaccessiblememonly
+test/Transforms/GVN/nonescaping.ll:define i8 @test_malloc(ptr %p) {
+test/Transforms/GVN/nonescaping.ll:; MDEP-LABEL: @test_malloc(
+test/Transforms/GVN/nonescaping.ll:; MDEP-NEXT:    [[OBJ:%.*]] = call ptr @malloc(i64 16)
+test/Transforms/GVN/nonescaping.ll:; MSSA-LABEL: @test_malloc(
+test/Transforms/GVN/nonescaping.ll:; MSSA-NEXT:    [[OBJ:%.*]] = call ptr @malloc(i64 16)
+test/Transforms/GVN/nonescaping.ll:  %obj = call ptr @malloc(i64 16)
+test/Transforms/GVN/setjmp.ll:declare ptr @malloc(i64)
+test/Transforms/GVN/setjmp.ll:; CHECK-NEXT:    [[MALLOC:%.*]] = call noalias ptr @malloc(i64 4)
+test/Transforms/GVN/setjmp.ll:  %malloc = call noalias ptr @malloc(i64 4)
+test/Transforms/GVN/setjmp.ll:  store i32 10, ptr %malloc, align 4
+test/Transforms/GVN/setjmp.ll:  store i32 20, ptr %malloc
+test/Transforms/GVN/setjmp.ll:  %res = load i32, ptr %malloc
+test/Transforms/GlobalOpt/malloc-promote-addrspacecast.ll:declare noalias ptr @malloc(i64) allockind("alloc,uninitialized") allocsize(0)
+test/Transforms/GlobalOpt/malloc-promote-addrspacecast.ll:  %malloccall = tail call ptr @malloc(i64 4)
+test/Transforms/GlobalOpt/malloc-promote-addrspacecast.ll:  store ptr %malloccall, ptr @G
+test/Transforms/GlobalOpt/malloc-promote-1-no-null-opt.ll:; CHECK-NEXT:    [[MALLOCCALL:%.*]] = tail call ptr @malloc(i64 4)
+test/Transforms/GlobalOpt/malloc-promote-1-no-null-opt.ll:  %malloccall = tail call ptr @malloc(i64 4)
+test/Transforms/GlobalOpt/malloc-promote-1-no-null-opt.ll:  store ptr %malloccall, ptr @G
+test/Transforms/GlobalOpt/malloc-promote-1-no-null-opt.ll:declare noalias ptr @malloc(i64)
+test/Transforms/GlobalOpt/heap-sra-phi.ll:	%malloccall = tail call ptr @malloc(i64 8000000) ; <ptr> [#uses=1]
+test/Transforms/GlobalOpt/heap-sra-phi.ll:	%.sub = getelementptr [1000000 x %struct.foo], ptr %malloccall, i32 0, i32 0		; <ptr> [#uses=1]
+test/Transforms/GlobalOpt/heap-sra-phi.ll:declare noalias ptr @malloc(i64)
+test/Transforms/GlobalOpt/null-check-not-use-pr35760.ll:; No malloc promotion with non-null check.
+test/Transforms/GlobalOpt/null-check-not-use-pr35760.ll:; CHECK-NEXT:    [[I3:%.*]] = call noalias ptr @malloc(i64 4)
+test/Transforms/GlobalOpt/null-check-not-use-pr35760.ll:  %i3 = call noalias ptr @malloc(i64 4)
+test/Transforms/GlobalOpt/null-check-not-use-pr35760.ll:declare dso_local noalias ptr @malloc(i64)
+test/Transforms/GlobalOpt/2009-11-16-MallocSingleStoreToGlobalVar.ll:; Test ensures that non-optimizable array mallocs are not optimized; specifically
+test/Transforms/GlobalOpt/2009-11-16-MallocSingleStoreToGlobalVar.ll:; GlobalOpt was treating a non-optimizable array malloc as a non-array malloc
+test/Transforms/GlobalOpt/2009-11-16-MallocSingleStoreToGlobalVar.ll:; and optimizing the global object that the malloc was stored to as a single
+test/Transforms/GlobalOpt/2009-11-16-MallocSingleStoreToGlobalVar.ll:  %4 = call noalias ptr @malloc(i64 %3) nounwind  ; <ptr> [#uses=1]
+test/Transforms/GlobalOpt/2009-11-16-MallocSingleStoreToGlobalVar.ll:; CHECK: call noalias ptr @malloc
+test/Transforms/GlobalOpt/2009-11-16-MallocSingleStoreToGlobalVar.ll:declare noalias ptr @malloc(i64) nounwind
+test/Transforms/GlobalOpt/heap-sra-1-no-null-opt.ll:  %mallocsize = mul i64 %Size, 8                  ; <i64> [#uses=1]
+test/Transforms/GlobalOpt/heap-sra-1-no-null-opt.ll:  %malloccall = tail call ptr @malloc(i64 %mallocsize) ; <ptr> [#uses=1]
+test/Transforms/GlobalOpt/heap-sra-1-no-null-opt.ll:	store ptr %malloccall, ptr @X, align 4
+test/Transforms/GlobalOpt/heap-sra-1-no-null-opt.ll:declare noalias ptr @malloc(i64)
+test/Transforms/GlobalOpt/2008-12-16-HeapSRACrash-2.ll:  %malloccall = tail call ptr @malloc(i32 %trunc)
+test/Transforms/GlobalOpt/2008-12-16-HeapSRACrash-2.ll:	%.sub = getelementptr [1000000 x %struct.foo], ptr %malloccall, i32 0, i32 0		; <ptr> [#uses=1]
+test/Transforms/GlobalOpt/2008-12-16-HeapSRACrash-2.ll:declare noalias ptr @malloc(i32)
+test/Transforms/GlobalOpt/2009-11-16-BrokenPerformHeapAllocSRoA.ll:  %4 = tail call ptr @malloc(i64 %3)              ; <ptr> [#uses=1]
+test/Transforms/GlobalOpt/2009-11-16-BrokenPerformHeapAllocSRoA.ll:; CHECK-NOT: call ptr @malloc(i64
+test/Transforms/GlobalOpt/2009-11-16-BrokenPerformHeapAllocSRoA.ll:declare noalias ptr @malloc(i64) allockind("alloc,uninitialized")
+test/Transforms/GlobalOpt/2010-02-25-MallocPromote.ll:declare noalias ptr @malloc(i32)
+test/Transforms/GlobalOpt/2010-02-25-MallocPromote.ll:  %malloccall.i10 = call ptr @malloc(i32 16) nounwind ; <ptr> [#uses=1]
+test/Transforms/GlobalOpt/2010-02-25-MallocPromote.ll:  store ptr %malloccall.i10, ptr @fixLRBT, align 8
+test/Transforms/GlobalOpt/2010-02-26-MallocSROA.ll:  %mallocsize2 = shl i32 %0, 4                    ; <i32> [#uses=1]
+test/Transforms/GlobalOpt/2010-02-26-MallocSROA.ll:  %malloccall3 = tail call ptr @malloc(i32 %mallocsize2) nounwind ; <ptr> [#uses=1]
+test/Transforms/GlobalOpt/2010-02-26-MallocSROA.ll:  store ptr %malloccall3, ptr @Y, align 8
+test/Transforms/GlobalOpt/2010-02-26-MallocSROA.ll:declare noalias ptr @malloc(i32)
+test/Transforms/GlobalOpt/heap-sra-phi-no-null-opt.ll:	%malloccall = tail call ptr @malloc(i64 8000000) ; <ptr> [#uses=1]
+test/Transforms/GlobalOpt/heap-sra-phi-no-null-opt.ll:	%.sub = getelementptr [1000000 x %struct.foo], ptr %malloccall, i32 0, i32 0		; <ptr> [#uses=1]
+test/Transforms/GlobalOpt/heap-sra-phi-no-null-opt.ll:declare noalias ptr @malloc(i64)
+test/Transforms/GlobalOpt/malloc-promote-2-no-null-opt.ll:; CHECK-NEXT:    [[MALLOCCALL:%.*]] = tail call ptr @malloc(i64 400)
+test/Transforms/GlobalOpt/malloc-promote-2-no-null-opt.ll:  %malloccall = tail call ptr @malloc(i64 400)
+test/Transforms/GlobalOpt/malloc-promote-2-no-null-opt.ll:  store ptr %malloccall, ptr @G
+test/Transforms/GlobalOpt/malloc-promote-2-no-null-opt.ll:declare noalias ptr @malloc(i64)
+test/Transforms/GlobalOpt/heap-sra-1.ll:  %mallocsize = mul i64 %Size, 8                  ; <i64> [#uses=1]
+test/Transforms/GlobalOpt/heap-sra-1.ll:  %malloccall = tail call ptr @malloc(i64 %mallocsize) ; <ptr> [#uses=1]
+test/Transforms/GlobalOpt/heap-sra-1.ll:	store ptr %malloccall, ptr @X, align 4
+test/Transforms/GlobalOpt/heap-sra-1.ll:declare noalias ptr @malloc(i64)
+test/Transforms/GlobalOpt/MallocSROA-section.ll:  %mallocsize2 = shl i32 %0, 4                    ; <i32> [#uses=1]
+test/Transforms/GlobalOpt/MallocSROA-section.ll:  %malloccall3 = tail call ptr @malloc(i32 %mallocsize2)  ; <ptr> [#uses=1]
+test/Transforms/GlobalOpt/MallocSROA-section.ll:  store ptr %malloccall3, ptr @Y, align 8
+test/Transforms/GlobalOpt/MallocSROA-section.ll:declare noalias ptr @malloc(i32)
+test/Transforms/GlobalOpt/2008-12-16-HeapSRACrash.ll:  %malloccall = tail call ptr @malloc(i32 %trunc)
+test/Transforms/GlobalOpt/2008-12-16-HeapSRACrash.ll:	%.sub = getelementptr [1000000 x %struct.foo], ptr %malloccall, i32 0, i32 0		; <ptr> [#uses=1]
+test/Transforms/GlobalOpt/2008-12-16-HeapSRACrash.ll:declare noalias ptr @malloc(i32)
+test/Transforms/GlobalOpt/heap-sra-2-no-null-opt.ll:	%malloccall = tail call ptr @malloc(i64 8000000) ; <ptr> [#uses=1]
+test/Transforms/GlobalOpt/heap-sra-2-no-null-opt.ll:	%.sub = getelementptr [1000000 x %struct.foo], ptr %malloccall, i32 0, i32 0		; <ptr> [#uses=1]
+test/Transforms/GlobalOpt/heap-sra-2-no-null-opt.ll:declare noalias ptr @malloc(i64)
+test/Transforms/GlobalOpt/MallocSROA-section-no-null-opt.ll:; CHECK: call ptr @malloc
+test/Transforms/GlobalOpt/MallocSROA-section-no-null-opt.ll:  %mallocsize2 = shl i32 %0, 4                    ; <i32> [#uses=1]
+test/Transforms/GlobalOpt/MallocSROA-section-no-null-opt.ll:  %malloccall3 = tail call ptr @malloc(i32 %mallocsize2)  ; <ptr> [#uses=1]
+test/Transforms/GlobalOpt/MallocSROA-section-no-null-opt.ll:  store ptr %malloccall3, ptr @Y, align 8
+test/Transforms/GlobalOpt/MallocSROA-section-no-null-opt.ll:declare noalias ptr @malloc(i32)
+test/Transforms/GlobalOpt/heap-sra-2.ll:	%malloccall = tail call ptr @malloc(i64 8000000) ; <ptr> [#uses=1]
+test/Transforms/GlobalOpt/heap-sra-2.ll:	%.sub = getelementptr [1000000 x %struct.foo], ptr %malloccall, i32 0, i32 0		; <ptr> [#uses=1]
+test/Transforms/GlobalOpt/heap-sra-2.ll:declare noalias ptr @malloc(i64)
+test/Transforms/GlobalOpt/malloc-promote-addrspace.ll:  %i = call noalias ptr @malloc(i64 4)
+test/Transforms/GlobalOpt/malloc-promote-addrspace.ll:declare dso_local noalias ptr @malloc(i64) #0
+test/Transforms/GlobalOpt/malloc-promote-opaque-ptr.ll:declare noalias ptr @malloc(i64) allockind("alloc,uninitialized") allocsize(0)
+test/Transforms/GlobalOpt/malloc-promote-opaque-ptr.ll:  %m1 = call ptr @malloc(i64 8)
+test/Transforms/GlobalOpt/malloc-promote-opaque-ptr.ll:  %m2 = call ptr @malloc(i64 16)
+test/Transforms/GlobalOpt/malloc-promote-opaque-ptr.ll:  %m3 = call ptr @malloc(i64 8)
+test/Transforms/GlobalOpt/null-check-is-use-pr35760.ll:; CHECK-NEXT:    [[I3:%.*]] = call noalias ptr @malloc(i64 4)
+test/Transforms/GlobalOpt/null-check-is-use-pr35760.ll:  %i3 = call noalias ptr @malloc(i64 4)
+test/Transforms/GlobalOpt/null-check-is-use-pr35760.ll:declare dso_local noalias ptr @malloc(i64) #0
+test/Transforms/GlobalOpt/2021-08-03-StoreOnceLoadMultiCasts.ll:  %call = call ptr @malloc(i64 4)
+test/Transforms/GlobalOpt/2021-08-03-StoreOnceLoadMultiCasts.ll:declare noalias align 16 ptr @malloc(i64) allockind("alloc,uninitialized") allocsize(0)
+test/Transforms/GlobalOpt/malloc-promote-5.ll:  %call = call ptr @malloc(i64 4)
+test/Transforms/GlobalOpt/malloc-promote-5.ll:declare noalias align 16 ptr @malloc(i64) allockind("alloc,uninitialized") allocsize(0)
+test/Transforms/GlobalOpt/2009-06-01-RecursivePHI.ll:	br i1 %c1, label %bb.i, label %my_malloc.exit
+test/Transforms/GlobalOpt/2009-06-01-RecursivePHI.ll:my_malloc.exit:		; preds = %entry
+test/Transforms/GlobalOpt/2009-06-01-RecursivePHI.ll:	br i1 %c2, label %bb.i81, label %my_malloc.exit83
+test/Transforms/GlobalOpt/2009-06-01-RecursivePHI.ll:bb.i81:		; preds = %my_malloc.exit
+test/Transforms/GlobalOpt/2009-06-01-RecursivePHI.ll:my_malloc.exit83:		; preds = %my_malloc.exit
+test/Transforms/GlobalOpt/2009-06-01-RecursivePHI.ll:bb.i.i57:		; preds = %my_malloc.exit83
+test/Transforms/GlobalOpt/2009-06-01-RecursivePHI.ll:my_calloc.exit.i:		; preds = %my_malloc.exit83
+test/Transforms/GlobalOpt/2009-06-01-RecursivePHI.ll:	br i1 %c8, label %bb.i1.i68, label %my_malloc.exit.i70
+test/Transforms/GlobalOpt/2009-06-01-RecursivePHI.ll:my_malloc.exit.i70:		; preds = %bb8.i67
+test/Transforms/GlobalOpt/2009-06-01-RecursivePHI.ll:bb9.i71:		; preds = %bb9.i71, %my_malloc.exit.i70
+test/Transforms/GlobalOpt/2009-06-01-RecursivePHI.ll:bb16.i77:		; preds = %bb9.i71, %my_malloc.exit.i70, %bb15.preheader.i
+test/Transforms/GlobalOpt/2009-06-01-RecursivePHI.ll:	%.pre41.i.rle244 = phi ptr [ %.pre41.i, %bb15.preheader.i ], [ %0, %my_malloc.exit.i70 ], [ %1, %bb9.i71 ]		; <ptr> [#uses=1]
+test/Transforms/GlobalOpt/2009-06-01-RecursivePHI.ll:	%mallocsize = mul i64 28, undef                  ; <i64> [#uses=1]
+test/Transforms/GlobalOpt/2009-06-01-RecursivePHI.ll:	%malloccall = tail call ptr @malloc(i64 %mallocsize)      ; <ptr> [#uses=1]
+test/Transforms/GlobalOpt/2009-06-01-RecursivePHI.ll:	br i1 %c4, label %bb.i1.i39, label %my_malloc.exit2.i
+test/Transforms/GlobalOpt/2009-06-01-RecursivePHI.ll:my_malloc.exit2.i:		; preds = %bb1.i38
+test/Transforms/GlobalOpt/2009-06-01-RecursivePHI.ll:	store ptr %malloccall, ptr @net, align 4
+test/Transforms/GlobalOpt/2009-06-01-RecursivePHI.ll:	br i1 %c5, label %bb.i7.i40, label %my_malloc.exit8.i
+test/Transforms/GlobalOpt/2009-06-01-RecursivePHI.ll:bb.i7.i40:		; preds = %my_malloc.exit2.i
+test/Transforms/GlobalOpt/2009-06-01-RecursivePHI.ll:my_malloc.exit8.i:		; preds = %my_malloc.exit2.i
+test/Transforms/GlobalOpt/2009-06-01-RecursivePHI.ll:declare noalias ptr @malloc(i64)
+test/Transforms/GlobalOpt/crash.ll:  %C = call noalias ptr @malloc(i64 %B) nounwind
+test/Transforms/GlobalOpt/crash.ll:declare noalias ptr @malloc(i64) nounwind
+test/Transforms/GlobalOpt/crash.ll:  %call = call ptr @malloc(i64 %mul)
+test/Transforms/GlobalOpt/malloc-promote-2.ll:  %malloccall = tail call ptr @malloc(i64 400)
+test/Transforms/GlobalOpt/malloc-promote-2.ll:  store ptr %malloccall, ptr @G
+test/Transforms/GlobalOpt/malloc-promote-2.ll:declare noalias ptr @malloc(i64) allockind("alloc,uninitialized") allocsize(0)
+test/Transforms/GlobalOpt/malloc-promote-1.ll:  %malloccall = tail call ptr @malloc(i64 4)
+test/Transforms/GlobalOpt/malloc-promote-1.ll:  store ptr %malloccall, ptr @G
+test/Transforms/GlobalOpt/malloc-promote-1.ll:declare noalias ptr @malloc(i64) allockind("alloc,uninitialized") allocsize(0)
+test/Transforms/GlobalOpt/malloc-promote-4.ll:  %1 = call noalias ptr @malloc(i64 4)
+test/Transforms/GlobalOpt/malloc-promote-4.ll:declare dso_local noalias ptr @malloc(i64) allockind("alloc,uninitialized") allocsize(0)
+test/Transforms/GlobalOpt/malloc-promote-3.ll:; CHECK-NEXT:    [[MALLOCCALL:%.*]] = tail call ptr @malloc(i64 400) #[[ATTR0:[0-9]+]]
+test/Transforms/GlobalOpt/malloc-promote-3.ll:  %malloccall = tail call ptr @malloc(i64 400) nobuiltin
+test/Transforms/GlobalOpt/malloc-promote-3.ll:  store ptr %malloccall, ptr @G
+test/Transforms/GlobalOpt/malloc-promote-3.ll:declare noalias ptr @malloc(i64)
+test/Transforms/HipStdPar/allocation-no-interposition.ll:; CHECK: warning: {{.*}} malloc {{.*}} cannot be interposed, missing: __hipstdpar_malloc. Tried to run the allocation interposition pass without the replacement functions available.
+test/Transforms/HipStdPar/allocation-no-interposition.ll:; CHECK: warning: {{.*}} __libc_malloc {{.*}} cannot be interposed, missing: __hipstdpar_malloc. Tried to run the allocation interposition pass without the replacement functions available.
+test/Transforms/HipStdPar/allocation-no-interposition.ll:  %3 = call noalias ptr @malloc(i64 noundef 42)
+test/Transforms/HipStdPar/allocation-no-interposition.ll:  %6 = call noalias ptr @malloc(i64 noundef 42)
+test/Transforms/HipStdPar/allocation-no-interposition.ll:  %19 = call ptr @malloc(i64 noundef 42)
+test/Transforms/HipStdPar/allocation-no-interposition.ll:  %24 = call ptr @malloc(i64 noundef 42)
+test/Transforms/HipStdPar/allocation-no-interposition.ll:  %27 = call ptr @__libc_malloc(i64 noundef 42)
+test/Transforms/HipStdPar/allocation-no-interposition.ll:declare noalias ptr @malloc(i64 noundef)
+test/Transforms/HipStdPar/allocation-no-interposition.ll:declare ptr @__libc_malloc(i64 noundef)
+test/Transforms/HipStdPar/allocation-interposition.ll:declare ptr @__hipstdpar_malloc(i64)
+test/Transforms/HipStdPar/allocation-interposition.ll:declare ptr @__hipstdpar_hidden_malloc(i64)
+test/Transforms/HipStdPar/allocation-interposition.ll:  ; CHECK: %3 = call noalias ptr @__hipstdpar_malloc(i64 noundef 42)
+test/Transforms/HipStdPar/allocation-interposition.ll:  %3 = call noalias ptr @malloc(i64 noundef 42)
+test/Transforms/HipStdPar/allocation-interposition.ll:  ; CHECK: %6 = call noalias ptr @__hipstdpar_malloc(i64 noundef 42)
+test/Transforms/HipStdPar/allocation-interposition.ll:  %6 = call noalias ptr @malloc(i64 noundef 42)
+test/Transforms/HipStdPar/allocation-interposition.ll:  ; CHECK: %19 = call ptr @__hipstdpar_malloc(i64 noundef 42)
+test/Transforms/HipStdPar/allocation-interposition.ll:  %19 = call ptr @malloc(i64 noundef 42)
+test/Transforms/HipStdPar/allocation-interposition.ll:  ; CHECK: %24 = call ptr @__hipstdpar_malloc(i64 noundef 42)
+test/Transforms/HipStdPar/allocation-interposition.ll:  %24 = call ptr @malloc(i64 noundef 42)
+test/Transforms/HipStdPar/allocation-interposition.ll:  ; CHECK: %27 = call ptr @__hipstdpar_malloc(i64 noundef 42)
+test/Transforms/HipStdPar/allocation-interposition.ll:  %27 = call ptr @__libc_malloc(i64 noundef 42)
+test/Transforms/HipStdPar/allocation-interposition.ll:declare noalias ptr @malloc(i64 noundef)
+test/Transforms/HipStdPar/allocation-interposition.ll:declare ptr @__libc_malloc(i64 noundef)
+test/Transforms/ArgumentPromotion/allocsize.ll:declare ptr @malloc(i64)
+test/Transforms/ArgumentPromotion/allocsize.ll:; CHECK-NEXT:    [[PTR:%.*]] = call ptr @malloc(i64 [[SIZE]])
+test/Transforms/ArgumentPromotion/allocsize.ll:  %ptr = call ptr @malloc(i64 %size)
+test/Transforms/ArgumentPromotion/allocsize.ll:; CHECK-NEXT:    [[PTR:%.*]] = call ptr @malloc(i64 [[SIZE]])
+test/Transforms/ArgumentPromotion/allocsize.ll:  %ptr = call ptr @malloc(i64 %size)
+test/Transforms/LoopVectorize/X86/rauw-bug.ll:; This test used to fail under libgmalloc. Because we would try to access a
+test/Transforms/LoopVectorize/X86/rauw-bug.ll:; llvm-lit -v --param use_gmalloc=1 --param
+test/Transforms/LoopVectorize/X86/rauw-bug.ll:;   gmalloc_path=/usr/lib/libgmalloc.dylib
+test/Transforms/Inline/dynamic-alloca-simplified-large.ll:declare noalias ptr @malloc(i64)
+test/Transforms/Inline/dynamic-alloca-simplified-large.ll:; CHECK-NEXT:    [[CALL:%.*]] = tail call ptr @malloc(i64 [[CONV]]) #[[ATTR3]]
+test/Transforms/Inline/dynamic-alloca-simplified-large.ll:  %call = tail call ptr @malloc(i64 %conv) #3
+test/Transforms/Inline/dynamic-alloca-simplified-large.ll:; CHECK-NEXT:    [[CALL_I:%.*]] = tail call ptr @malloc(i64 [[CONV_I]]) #[[ATTR3]]
+test/Transforms/Inline/ML/state-tracking-coro.ll:declare ptr @malloc(i64)
+test/Transforms/Inline/ML/state-tracking-coro.ll:  %alloc = call ptr @malloc(i64 16) #3
+test/Transforms/SimplifyCFG/speculate-store.ll:declare noalias ptr @malloc(i64 %size)
+test/Transforms/SimplifyCFG/speculate-store.ll:define i64 @load_before_store_noescape_malloc(i64 %i, i32 %b)  {
+test/Transforms/SimplifyCFG/speculate-store.ll:; CHECK-LABEL: @load_before_store_noescape_malloc(
+test/Transforms/SimplifyCFG/speculate-store.ll:; CHECK-NEXT:    [[A:%.*]] = call ptr @malloc(i64 8)
+test/Transforms/SimplifyCFG/speculate-store.ll:  %a = call ptr @malloc(i64 8)
+test/Transforms/Coroutines/coro-split-hidden.ll:  %alloc = call ptr @malloc(i32 %size)
+test/Transforms/Coroutines/coro-split-hidden.ll:; CHECK: call ptr @malloc
+test/Transforms/Coroutines/coro-split-hidden.ll:; CHECK-NOT: call ptr @malloc
+test/Transforms/Coroutines/coro-split-hidden.ll:; CHECK-NOT: call ptr @malloc
+test/Transforms/Coroutines/coro-split-hidden.ll:; CHECK-NOT: call ptr @malloc
+test/Transforms/Coroutines/coro-split-hidden.ll:declare noalias ptr @malloc(i32) allockind("alloc,uninitialized")
+test/Transforms/Coroutines/coro-alloca-08.ll:declare ptr @malloc(i64)
+test/Transforms/Coroutines/coro-alloca-08.ll:  %alloc = call ptr @malloc(i64 16) #3
+test/Transforms/Coroutines/coro-alloca-08.ll:  %alloc = call ptr @malloc(i64 16) #3
+test/Transforms/Coroutines/coro-split-01.ll:  %alloc = call ptr @malloc(i32 %size)
+test/Transforms/Coroutines/coro-split-01.ll:declare noalias ptr @malloc(i32)
+test/Transforms/Coroutines/coro-split-musttail6.ll:  %alloc = call ptr @malloc(i64 16) #3
+test/Transforms/Coroutines/coro-split-musttail6.ll:  %alloc = call ptr @malloc(i64 16) #3
+test/Transforms/Coroutines/coro-split-musttail6.ll:declare ptr @malloc(i64)
+test/Transforms/Coroutines/coro-split-dbg.ll:  %call = tail call ptr @malloc(i64 %1), !dbg !26
+test/Transforms/Coroutines/coro-split-dbg.ll:declare noalias ptr @malloc(i64) local_unnamed_addr #6
+test/Transforms/Coroutines/coro-split-musttail9.ll:  %alloc = call ptr @malloc(i64 16) #3
+test/Transforms/Coroutines/coro-split-musttail9.ll:declare ptr @malloc(i64)
+test/Transforms/Coroutines/coro-spill-promise-02.ll:  %alloc = call ptr @malloc(i32 %size)
+test/Transforms/Coroutines/coro-spill-promise-02.ll:declare noalias ptr @malloc(i32)
+test/Transforms/Coroutines/coro-split-musttail10.ll:  %alloc = call ptr @malloc(i64 16) #3
+test/Transforms/Coroutines/coro-split-musttail10.ll:declare ptr @malloc(i64)
+test/Transforms/Coroutines/coro-alloca-04.ll:  %alloc = call ptr @malloc(i32 %size)
+test/Transforms/Coroutines/coro-alloca-04.ll:declare noalias ptr @malloc(i32)
+test/Transforms/Coroutines/ArgAddr.ll:; CHECK-NEXT:    [[CALL:%.*]] = tail call ptr @malloc(i32 24)
+test/Transforms/Coroutines/ArgAddr.ll:  %call = tail call ptr @malloc(i32 %0)
+test/Transforms/Coroutines/ArgAddr.ll:declare ptr @malloc(i32)
+test/Transforms/Coroutines/no-suspend.ll:  %alloc = call ptr @malloc(i32 %size)
+test/Transforms/Coroutines/no-suspend.ll:  %alloc = call ptr @malloc(i32 %size)
+test/Transforms/Coroutines/no-suspend.ll:  %alloc = call ptr @malloc(i32 %size)
+test/Transforms/Coroutines/no-suspend.ll:  %alloc = call ptr @malloc(i32 %size)
+test/Transforms/Coroutines/no-suspend.ll:  %alloc = call ptr @malloc(i32 %size)
+test/Transforms/Coroutines/no-suspend.ll:  %alloc = call ptr @malloc(i32 %size)
+test/Transforms/Coroutines/no-suspend.ll:  %alloc = call ptr @malloc(i32 %size)
+test/Transforms/Coroutines/no-suspend.ll:  %alloc = call ptr @malloc(i32 %size)
+test/Transforms/Coroutines/no-suspend.ll:declare ptr @malloc(i32) allockind("alloc,uninitialized") allocsize(0)
+test/Transforms/Coroutines/ex0.ll:  %alloc = call ptr @malloc(i32 %size)
+test/Transforms/Coroutines/ex0.ll:declare noalias ptr @malloc(i32)
+test/Transforms/Coroutines/coro-split-musttail1.ll:  %alloc = call ptr @malloc(i64 16) #3
+test/Transforms/Coroutines/coro-split-musttail1.ll:declare ptr @malloc(i64)
+test/Transforms/Coroutines/coro-alloca-03.ll:  %alloc = call ptr @malloc(i32 %size)
+test/Transforms/Coroutines/coro-alloca-03.ll:declare noalias ptr @malloc(i32)
+test/Transforms/Coroutines/coro-eh-aware-edge-split-01.ll:  %alloc = call ptr @malloc(i64 %size)
+test/Transforms/Coroutines/coro-eh-aware-edge-split-01.ll:declare noalias ptr @malloc(i64)
+test/Transforms/Coroutines/coro-padding.ll:  %alloc = call ptr @malloc(i32 %size)
+test/Transforms/Coroutines/coro-padding.ll:declare noalias ptr @malloc(i32)
+test/Transforms/Coroutines/coro-split-final-suspend.ll:declare noalias ptr @malloc(i32)
+test/Transforms/Coroutines/coro-frame.ll:  %alloc = call ptr @malloc(i32 %size)
+test/Transforms/Coroutines/coro-frame.ll:declare noalias ptr @malloc(i32)
+test/Transforms/Coroutines/coro-alloca-05.ll:  %alloc = call ptr @malloc(i32 %size)
+test/Transforms/Coroutines/coro-alloca-05.ll:declare noalias ptr @malloc(i32)
+test/Transforms/Coroutines/coro-alloca-02.ll:  %alloc = call ptr @malloc(i32 %size)
+test/Transforms/Coroutines/coro-alloca-02.ll:declare noalias ptr @malloc(i32)
+test/Transforms/Coroutines/coro-catchswitch-cleanuppad.ll:  %alloc = call ptr @malloc(i32 %size)
+test/Transforms/Coroutines/coro-catchswitch-cleanuppad.ll:declare noalias ptr @malloc(i32)
+test/Transforms/Coroutines/coro-eh-aware-edge-split-02.ll:  %alloc = call ptr @malloc(i64 %size)
+test/Transforms/Coroutines/coro-eh-aware-edge-split-02.ll:declare noalias ptr @malloc(i64)
+test/Transforms/Coroutines/coro-frame-unreachable.ll:  %alloc = call ptr @malloc(i32 %size)
+test/Transforms/Coroutines/coro-frame-unreachable.ll:declare noalias ptr @malloc(i32)
+test/Transforms/Coroutines/coro-split-00.ll:  %alloc = call ptr @malloc(i32 %size)
+test/Transforms/Coroutines/coro-split-00.ll:; CHECK: call ptr @malloc
+test/Transforms/Coroutines/coro-split-00.ll:; CHECK-NOT: call ptr @malloc
+test/Transforms/Coroutines/coro-split-00.ll:; CHECK-NOT: call ptr @malloc
+test/Transforms/Coroutines/coro-split-00.ll:; CHECK-NOT: call ptr @malloc
+test/Transforms/Coroutines/coro-split-00.ll:; CHECK-NOT: call ptr @malloc
+test/Transforms/Coroutines/coro-split-00.ll:declare noalias ptr @malloc(i32) allockind("alloc,uninitialized") "alloc-family"="malloc"
+test/Transforms/Coroutines/coro-split-00.ll:declare void @free(ptr) willreturn allockind("free") "alloc-family"="malloc"
+test/Transforms/Coroutines/coro-split-musttail.ll:  %alloc = call ptr @malloc(i64 16) #3
+test/Transforms/Coroutines/coro-split-musttail.ll:declare ptr @malloc(i64)
+test/Transforms/Coroutines/coro-split-02.ll:declare ptr @malloc(i64)
+test/Transforms/Coroutines/coro-split-02.ll:  %alloc = call ptr @malloc(i64 16) #3
+test/Transforms/Coroutines/coro-zero-alloca.ll:declare ptr @malloc(i64)
+test/Transforms/Coroutines/coro-zero-alloca.ll:  %coro.alloc = call ptr @malloc(i64 %coro.size)
+test/Transforms/Coroutines/coro-split-musttail7.ll:  %alloc = call ptr @malloc(i64 16) #3
+test/Transforms/Coroutines/coro-split-musttail7.ll:  %alloc = call ptr @malloc(i64 16) #3
+test/Transforms/Coroutines/coro-split-musttail7.ll:declare ptr @malloc(i64)
+test/Transforms/Coroutines/coro-alloca-09.ll:declare ptr @malloc(i64)
+test/Transforms/Coroutines/coro-alloca-09.ll:  %alloc = call ptr @malloc(i64 16) #3
+test/Transforms/Coroutines/coro-alloca-with-addrspace.ll:  %alloc = call ptr @malloc(i32 %size)
+test/Transforms/Coroutines/coro-alloca-with-addrspace.ll:declare noalias ptr @malloc(i32)
+test/Transforms/Coroutines/coro-retcon-unreachable.ll:; CHECK-NEXT:    [[ID:%.*]] = call token @llvm.coro.id.retcon.once(i32 32, i32 8, ptr [[BUFFER:%.*]], ptr @prototype, ptr @malloc, ptr @free)
+test/Transforms/Coroutines/coro-retcon-unreachable.ll:  %id = call token @llvm.coro.id.retcon.once(i32 32, i32 8, ptr %buffer, ptr @prototype, ptr @malloc, ptr @free)
+test/Transforms/Coroutines/coro-retcon-unreachable.ll:declare noalias ptr @malloc(i64) #5
+test/Transforms/Coroutines/coro-debug.ll:  %call = call ptr @malloc(i64 %1), !dbg !16
+test/Transforms/Coroutines/coro-debug.ll:declare ptr @malloc(i64) #3
+test/Transforms/Coroutines/coro-split-musttail8.ll:  %alloc = call ptr @malloc(i64 16) #3
+test/Transforms/Coroutines/coro-split-musttail8.ll:declare ptr @malloc(i64)
+test/Transforms/Coroutines/ex4.ll:  %alloc = call ptr @malloc(i32 %size)
+test/Transforms/Coroutines/ex4.ll:declare ptr @malloc(i32)
+test/Transforms/Coroutines/coro-split-sink-lifetime-01.ll:declare ptr @malloc(i64)
+test/Transforms/Coroutines/coro-split-sink-lifetime-01.ll:  %alloc = call ptr @malloc(i64 16) #3
+test/Transforms/Coroutines/coro-split-sink-lifetime-01.ll:  %alloc = call ptr @malloc(i64 16) #3
+test/Transforms/Coroutines/ex5.ll:  %alloc = call ptr @malloc(i32 %size)
+test/Transforms/Coroutines/ex5.ll:declare noalias ptr @malloc(i32)
+test/Transforms/Coroutines/coro-spill-suspend.ll:  %alloc = call ptr @malloc(i32 %size)
+test/Transforms/Coroutines/coro-spill-suspend.ll:declare noalias ptr @malloc(i32)
+test/Transforms/Coroutines/coro-debug-spill-dbg.declare.ll:  %alloc = call ptr @malloc(i32 %size)
+test/Transforms/Coroutines/coro-debug-spill-dbg.declare.ll:declare noalias ptr @malloc(i32)
+test/Transforms/Coroutines/coro-split-no-lieftime.ll:  %alloc = call ptr @malloc(i32 %size)
+test/Transforms/Coroutines/coro-split-no-lieftime.ll:declare noalias ptr @malloc(i32)
+test/Transforms/Coroutines/coro-frame-reuse-alloca-04.ll:declare ptr @malloc(i64)
+test/Transforms/Coroutines/coro-alloca-07.ll:  %alloc = call ptr @malloc(i32 %size)
+test/Transforms/Coroutines/coro-alloca-07.ll:declare noalias ptr @malloc(i32)
+test/Transforms/Coroutines/coro-alloca-07.ll:; CHECK-NEXT:    [[ALLOC:%.*]] = call ptr @malloc(i32 48)
+test/Transforms/Coroutines/coro-readnone.ll:  %alloc = call ptr @malloc(i32 %size)
+test/Transforms/Coroutines/coro-readnone.ll:declare noalias ptr @malloc(i32)
+test/Transforms/Coroutines/coro-readnone-02.ll:  %alloc = call ptr @malloc(i32 %size)
+test/Transforms/Coroutines/coro-readnone-02.ll:declare noalias ptr @malloc(i32)
+test/Transforms/Coroutines/coro-split-eh-00.ll:declare noalias ptr @malloc(i32)
+test/Transforms/Coroutines/coro-split-musttail2.ll:  %alloc = call ptr @malloc(i64 16) #3
+test/Transforms/Coroutines/coro-split-musttail2.ll:declare ptr @malloc(i64)
+test/Transforms/Coroutines/coro-split-sink-lifetime-02.ll:declare ptr @malloc(i64)
+test/Transforms/Coroutines/coro-split-sink-lifetime-02.ll:  %alloc = call ptr @malloc(i64 16) #3
+test/Transforms/Coroutines/coro-alloca-loop-carried-address.ll:; CHECK-NEXT:    [[ALLOC:%.*]] = call ptr @malloc(i64 40)
+test/Transforms/Coroutines/coro-alloca-loop-carried-address.ll:  %alloc = call ptr @malloc(i64 %size)
+test/Transforms/Coroutines/coro-alloca-loop-carried-address.ll:declare ptr @malloc(i64)
+test/Transforms/Coroutines/coro-await-suspend-lower.ll:  %alloc = call ptr @malloc(i32 %size)
+test/Transforms/Coroutines/coro-await-suspend-lower.ll:declare noalias ptr @malloc(i32)
+test/Transforms/Coroutines/coro-await-suspend-handle-in-ramp.ll:  %alloc = call ptr @malloc(i32 %size)
+test/Transforms/Coroutines/coro-await-suspend-handle-in-ramp.ll:declare noalias ptr @malloc(i32)
+test/Transforms/Coroutines/coro-spill-promise.ll:  %alloc = call ptr @malloc(i32 %size)
+test/Transforms/Coroutines/coro-spill-promise.ll:declare noalias ptr @malloc(i32)
+test/Transforms/Coroutines/coro-frame-reuse-alloca-03.ll:  %alloc = call ptr @malloc(i32 %size)
+test/Transforms/Coroutines/coro-frame-reuse-alloca-03.ll:; CHECK: call ptr @malloc(i32 1024)
+test/Transforms/Coroutines/coro-frame-reuse-alloca-03.ll:declare noalias ptr @malloc(i32)
+test/Transforms/Coroutines/coro-frame-arrayalloca.ll:  %alloc = call ptr @malloc(i32 %size)
+test/Transforms/Coroutines/coro-frame-arrayalloca.ll:declare noalias ptr @malloc(i32)
+test/Transforms/Coroutines/coro-split-musttail5.ll:  %alloc = call ptr @malloc(i64 16) #3
+test/Transforms/Coroutines/coro-split-musttail5.ll:declare ptr @malloc(i64)
+test/Transforms/Coroutines/ex3.ll:  %alloc = call ptr @malloc(i32 %size)
+test/Transforms/Coroutines/ex3.ll:; CHECK-NOT:  ptr @malloc
+test/Transforms/Coroutines/ex3.ll:declare ptr @malloc(i32)
+test/Transforms/Coroutines/coro-eh-aware-edge-split-00.ll:  %alloc = call ptr @malloc(i64 %size)
+test/Transforms/Coroutines/coro-eh-aware-edge-split-00.ll:declare noalias ptr @malloc(i64)
+test/Transforms/Coroutines/coro-split-sink-lifetime-03.ll:declare ptr @malloc(i64)
+test/Transforms/Coroutines/coro-split-sink-lifetime-03.ll:  %alloc = call ptr @malloc(i64 16) #3
+test/Transforms/Coroutines/coro-frame-reuse-alloca-00.ll:  %alloc = call ptr @malloc(i32 %size)
+test/Transforms/Coroutines/coro-frame-reuse-alloca-00.ll:; CHECK: call ptr @malloc(i32 520)
+test/Transforms/Coroutines/coro-frame-reuse-alloca-00.ll:declare noalias ptr @malloc(i32)
+test/Transforms/Coroutines/coro-frame-reuse-alloca-02.ll:declare ptr @malloc(i64)
+test/Transforms/Coroutines/ex1.ll:  %alloc = call ptr @malloc(i32 %size)
+test/Transforms/Coroutines/ex1.ll:declare ptr @malloc(i32)
+test/Transforms/Coroutines/coro-spill-corobegin.ll:  %alloc = call ptr @malloc(i32 %size)
+test/Transforms/Coroutines/coro-spill-corobegin.ll:declare noalias ptr @malloc(i32)
+test/Transforms/Coroutines/coro-lifetime-end.ll:declare ptr @malloc(i64)
+test/Transforms/Coroutines/coro-lifetime-end.ll:; CHECK-NEXT:    [[ALLOC:%.*]] = call ptr @malloc(i64 16)
+test/Transforms/Coroutines/coro-lifetime-end.ll:  %alloc = call ptr @malloc(i64 16) #3
+test/Transforms/Coroutines/coro-lifetime-end.ll:; CHECK-NEXT:    [[ALLOC:%.*]] = call ptr @malloc(i64 16)
+test/Transforms/Coroutines/coro-lifetime-end.ll:  %alloc = call ptr @malloc(i64 16) #3
+test/Transforms/Coroutines/coro-lifetime-end.ll:; CHECK-NEXT:    [[ALLOC:%.*]] = call ptr @malloc(i64 16)
+test/Transforms/Coroutines/coro-lifetime-end.ll:  %alloc = call ptr @malloc(i64 16) #3
+test/Transforms/Coroutines/coro-split-musttail13.ll:  %alloc = call ptr @malloc(i64 16) #3
+test/Transforms/Coroutines/coro-split-musttail13.ll:declare ptr @malloc(i64)
+test/Transforms/Coroutines/coro-split-musttail-ppc64le.ll:  %alloc = call ptr @malloc(i64 16) #3
+test/Transforms/Coroutines/coro-split-musttail-ppc64le.ll:declare ptr @malloc(i64)
+test/Transforms/Coroutines/remarks.ll:  %alloc = call ptr @malloc(i32 %size)
+test/Transforms/Coroutines/remarks.ll:declare noalias ptr @malloc(i32)
+test/Transforms/Coroutines/coro-frame-reuse-alloca-05.ll:declare ptr @malloc(i64)
+test/Transforms/Coroutines/coro-frame-reuse-alloca-01.ll:declare ptr @malloc(i64)
+test/Transforms/Coroutines/coro-await-suspend-lower-invoke.ll:  %alloc = call ptr @malloc(i32 %size)
+test/Transforms/Coroutines/coro-await-suspend-lower-invoke.ll:declare noalias ptr @malloc(i32)
+test/Transforms/Coroutines/coro-spill-defs-before-corobegin.ll:  %alloc = call ptr @malloc(i32 %size)
+test/Transforms/Coroutines/coro-spill-defs-before-corobegin.ll:; CHECK:       %alloc = call ptr @malloc(i32 32)
+test/Transforms/Coroutines/coro-spill-defs-before-corobegin.ll:declare noalias ptr @malloc(i32)
+test/Transforms/Coroutines/coro-split-sink-lifetime-04.ll:declare ptr @malloc(i64)
+test/Transforms/Coroutines/coro-split-sink-lifetime-04.ll:  %alloc = call ptr @malloc(i64 16) #3
+test/Transforms/Coroutines/coro-alloca-01.ll:  %alloc = call ptr @malloc(i32 %size)
+test/Transforms/Coroutines/coro-alloca-01.ll:declare noalias ptr @malloc(i32)
+test/Transforms/Coroutines/coro-split-eh-01.ll:declare noalias ptr @malloc(i32)
+test/Transforms/Coroutines/coro-catchswitch.ll:  %alloc = call ptr @malloc(i32 %size)
+test/Transforms/Coroutines/coro-catchswitch.ll:declare noalias ptr @malloc(i32)
+test/Transforms/Coroutines/phi-coro-end.ll:  %alloc = call ptr @malloc(i32 %size)
+test/Transforms/Coroutines/phi-coro-end.ll:declare noalias ptr @malloc(i32)
+test/Transforms/Coroutines/coro-alloca-outside-frame.ll:  %alloc = call ptr @malloc(i32 %size)
+test/Transforms/Coroutines/coro-alloca-outside-frame.ll:declare noalias ptr @malloc(i32)
+test/Transforms/Coroutines/coro-alloca-06.ll:  %alloc = call ptr @malloc(i32 %size)
+test/Transforms/Coroutines/coro-alloca-06.ll:declare noalias ptr @malloc(i32)
+test/Transforms/Coroutines/coro-split-musttail4.ll:  %alloc = call ptr @malloc(i64 16) #3
+test/Transforms/Coroutines/coro-split-musttail4.ll:declare ptr @malloc(i64)
+test/Transforms/Coroutines/coro-split-noinline.ll:  %alloc = call ptr @malloc(i32 %size)
+test/Transforms/Coroutines/coro-split-noinline.ll:declare noalias ptr @malloc(i32) allockind("alloc,uninitialized") "alloc-family"="malloc"
+test/Transforms/Coroutines/coro-split-noinline.ll:declare void @free(ptr) willreturn allockind("free") "alloc-family"="malloc"
+test/Transforms/Coroutines/coro-split-musttail12.ll:  %alloc = call ptr @malloc(i64 16) #3
+test/Transforms/Coroutines/coro-split-musttail12.ll:declare ptr @malloc(i64)
+test/Transforms/Coroutines/coro-split-musttail3.ll:  %alloc = call ptr @malloc(i64 16) #3
+test/Transforms/Coroutines/coro-split-musttail3.ll:declare ptr @malloc(i64)
+test/Transforms/Coroutines/coro-spill-after-phi.ll:; CHECK-NEXT:    [[ALLOC:%.*]] = call ptr @malloc(i32 32)
+test/Transforms/Coroutines/coro-spill-after-phi.ll:  %alloc = call ptr @malloc(i32 %size)
+test/Transforms/Coroutines/coro-spill-after-phi.ll:declare noalias ptr @malloc(i32)
+test/Transforms/Coroutines/coro-noalias-param.ll:  %alloc = call ptr @malloc(i32 %size)
+test/Transforms/Coroutines/coro-noalias-param.ll:declare noalias ptr @malloc(i32)
+test/Transforms/Coroutines/coro-materialize.ll:  %alloc = call ptr @malloc(i32 %size)
+test/Transforms/Coroutines/coro-materialize.ll:  %alloc = call ptr @malloc(i32 %size)
+test/Transforms/Coroutines/coro-materialize.ll:  %alloc = call ptr @malloc(i32 %size)
+test/Transforms/Coroutines/coro-materialize.ll:  %alloc = call ptr @malloc(i32 %size)
+test/Transforms/Coroutines/coro-materialize.ll:  %alloc = call ptr @malloc(i32 %size)
+test/Transforms/Coroutines/coro-materialize.ll:declare noalias ptr @malloc(i32)
+test/Transforms/Attributor/value-simplify-reachability.ll:declare void @free(ptr) allockind("free") "alloc-family"="malloc"
+test/Transforms/Attributor/value-simplify-reachability.ll:declare noalias ptr @calloc(i64, i64) allockind("alloc,zeroed") allocsize(0, 1) "alloc-family"="malloc"
+test/Transforms/Attributor/value-simplify-reachability.ll:; TUNIT: attributes #[[ATTR2:[0-9]+]] = { allockind("free") "alloc-family"="malloc" }
+test/Transforms/Attributor/value-simplify-reachability.ll:; TUNIT: attributes #[[ATTR3:[0-9]+]] = { allockind("alloc,zeroed") allocsize(0,1) "alloc-family"="malloc" }
+test/Transforms/Attributor/value-simplify-reachability.ll:; CGSCC: attributes #[[ATTR2:[0-9]+]] = { allockind("free") "alloc-family"="malloc" }
+test/Transforms/Attributor/value-simplify-reachability.ll:; CGSCC: attributes #[[ATTR3:[0-9]+]] = { allockind("alloc,zeroed") allocsize(0,1) "alloc-family"="malloc" }
+test/Transforms/Attributor/value-simplify-pointer-info.ll:define dso_local i32 @round_trip_malloc(i32 %x) {
+test/Transforms/Attributor/value-simplify-pointer-info.ll:; CHECK-LABEL: define {{[^@]+}}@round_trip_malloc
+test/Transforms/Attributor/value-simplify-pointer-info.ll:  %call = call noalias ptr @malloc(i64 4) norecurse
+test/Transforms/Attributor/value-simplify-pointer-info.ll:define dso_local i32 @round_trip_malloc_constant() {
+test/Transforms/Attributor/value-simplify-pointer-info.ll:; CHECK-LABEL: define {{[^@]+}}@round_trip_malloc_constant() {
+test/Transforms/Attributor/value-simplify-pointer-info.ll:  %call = call noalias ptr @malloc(i64 4) norecurse
+test/Transforms/Attributor/value-simplify-pointer-info.ll:declare noalias ptr @malloc(i64) allockind("alloc,uninitialized") allocsize(0) "alloc-family"="malloc"
+test/Transforms/Attributor/value-simplify-pointer-info.ll:declare void @free(ptr) allockind("free") "alloc-family"="malloc"
+test/Transforms/Attributor/value-simplify-pointer-info.ll:define dso_local i32 @conditional_malloc(i32 %x) {
+test/Transforms/Attributor/value-simplify-pointer-info.ll:; CHECK-LABEL: define {{[^@]+}}@conditional_malloc
+test/Transforms/Attributor/value-simplify-pointer-info.ll:  %call = call noalias ptr @malloc(i64 4) norecurse
+test/Transforms/Attributor/value-simplify-pointer-info.ll:declare noalias ptr @calloc(i64, i64) allockind("alloc,zeroed") allocsize(0, 1) "alloc-family"="malloc"
+test/Transforms/Attributor/value-simplify-pointer-info.ll:define dso_local ptr @malloc_like(i32 %s) {
+test/Transforms/Attributor/value-simplify-pointer-info.ll:; TUNIT-LABEL: define {{[^@]+}}@malloc_like
+test/Transforms/Attributor/value-simplify-pointer-info.ll:; TUNIT-NEXT:    [[CALL:%.*]] = call noalias ptr @malloc(i64 [[CONV]]) #[[ATTR20:[0-9]+]]
+test/Transforms/Attributor/value-simplify-pointer-info.ll:; CGSCC-LABEL: define {{[^@]+}}@malloc_like
+test/Transforms/Attributor/value-simplify-pointer-info.ll:; CGSCC-NEXT:    [[CALL:%.*]] = call noalias ptr @malloc(i64 [[CONV]]) #[[ATTR23:[0-9]+]]
+test/Transforms/Attributor/value-simplify-pointer-info.ll:  %call = call noalias ptr @malloc(i64 %conv) norecurse
+test/Transforms/Attributor/value-simplify-pointer-info.ll:define dso_local i32 @round_trip_malloc_like(i32 %x) {
+test/Transforms/Attributor/value-simplify-pointer-info.ll:; TUNIT-LABEL: define {{[^@]+}}@round_trip_malloc_like
+test/Transforms/Attributor/value-simplify-pointer-info.ll:; TUNIT-NEXT:    [[CALL:%.*]] = call noalias ptr @malloc_like(i32 noundef 4) #[[ATTR20]]
+test/Transforms/Attributor/value-simplify-pointer-info.ll:; CGSCC-LABEL: define {{[^@]+}}@round_trip_malloc_like
+test/Transforms/Attributor/value-simplify-pointer-info.ll:; CGSCC-NEXT:    [[CALL:%.*]] = call noalias ptr @malloc_like(i32 noundef 4) #[[ATTR23]]
+test/Transforms/Attributor/value-simplify-pointer-info.ll:  %call = call ptr @malloc_like(i32 4) norecurse
+test/Transforms/Attributor/value-simplify-pointer-info.ll:; CGSCC-NEXT:    [[CALL:%.*]] = call noalias dereferenceable_or_null(24) ptr @malloc(i64 noundef 24)
+test/Transforms/Attributor/value-simplify-pointer-info.ll:  %call = call noalias dereferenceable_or_null(24) ptr @malloc(i64 24) #4
+test/Transforms/Attributor/value-simplify-pointer-info.ll:; TUNIT: attributes #[[ATTR8:[0-9]+]] = { allockind("alloc,uninitialized") allocsize(0) "alloc-family"="malloc" }
+test/Transforms/Attributor/value-simplify-pointer-info.ll:; TUNIT: attributes #[[ATTR9:[0-9]+]] = { allockind("free") "alloc-family"="malloc" }
+test/Transforms/Attributor/value-simplify-pointer-info.ll:; TUNIT: attributes #[[ATTR10:[0-9]+]] = { allockind("alloc,zeroed") allocsize(0,1) "alloc-family"="malloc" }
+test/Transforms/Attributor/value-simplify-pointer-info.ll:; CGSCC: attributes #[[ATTR9:[0-9]+]] = { allockind("alloc,uninitialized") allocsize(0) "alloc-family"="malloc" }
+test/Transforms/Attributor/value-simplify-pointer-info.ll:; CGSCC: attributes #[[ATTR10:[0-9]+]] = { allockind("free") "alloc-family"="malloc" }
+test/Transforms/Attributor/value-simplify-pointer-info.ll:; CGSCC: attributes #[[ATTR11:[0-9]+]] = { allockind("alloc,zeroed") allocsize(0,1) "alloc-family"="malloc" }
+test/Transforms/Attributor/heap_to_stack.ll:declare noalias ptr @malloc(i64) allockind("alloc,uninitialized") allocsize(0)
+test/Transforms/Attributor/heap_to_stack.ll:; CHECK-NEXT:    [[M:%.*]] = tail call noalias align 16 ptr @malloc(i64 noundef [[ADD]])
+test/Transforms/Attributor/heap_to_stack.ll:  %m = tail call noalias align 16 ptr @malloc(i64 %add)
+test/Transforms/Attributor/heap_to_stack.ll:; CHECK-NEXT:    [[I:%.*]] = tail call noalias ptr @malloc(i64 noundef 4)
+test/Transforms/Attributor/heap_to_stack.ll:  %i = tail call noalias ptr @malloc(i64 4)
+test/Transforms/Attributor/heap_to_stack.ll:; CHECK-NEXT:    [[I:%.*]] = tail call noalias ptr @malloc(i64 noundef 4)
+test/Transforms/Attributor/heap_to_stack.ll:  %i = tail call noalias ptr @malloc(i64 4)
+test/Transforms/Attributor/heap_to_stack.ll:; TEST 3 - 1 malloc, 1 free
+test/Transforms/Attributor/heap_to_stack.ll:  %i = tail call noalias ptr @malloc(i64 4)
+test/Transforms/Attributor/heap_to_stack.ll:  %i = tail call noalias ptr @malloc(i64 4)
+test/Transforms/Attributor/heap_to_stack.ll:  %i = tail call noalias ptr @malloc(i64 4)
+test/Transforms/Attributor/heap_to_stack.ll:; TEST 5 - not all exit paths have a call to free, but all uses of malloc
+test/Transforms/Attributor/heap_to_stack.ll:  %i = tail call noalias ptr @malloc(i64 4)
+test/Transforms/Attributor/heap_to_stack.ll:  %i = tail call noalias ptr @malloc(i64 4)
+test/Transforms/Attributor/heap_to_stack.ll:  %i = tail call noalias ptr @malloc(i64 4)
+test/Transforms/Attributor/heap_to_stack.ll:; CHECK-NEXT:    [[I:%.*]] = tail call noalias ptr @malloc(i64 noundef 4)
+test/Transforms/Attributor/heap_to_stack.ll:  %i = tail call noalias ptr @malloc(i64 4)
+test/Transforms/Attributor/heap_to_stack.ll:; TEST 9 - FIXME: malloc should be converted.
+test/Transforms/Attributor/heap_to_stack.ll:; CHECK-NEXT:    [[I:%.*]] = tail call noalias ptr @malloc(i64 noundef 4)
+test/Transforms/Attributor/heap_to_stack.ll:  %i = tail call noalias ptr @malloc(i64 4)
+test/Transforms/Attributor/heap_to_stack.ll:; TEST 10 - 1 malloc, 1 free
+test/Transforms/Attributor/heap_to_stack.ll:  %i = tail call noalias ptr @malloc(i64 4)
+test/Transforms/Attributor/heap_to_stack.ll:  %i = tail call noalias ptr @malloc(i64 4)
+test/Transforms/Attributor/heap_to_stack.ll:  %i = tail call noalias ptr @malloc(i64 4)
+test/Transforms/Attributor/heap_to_stack.ll:  %i = call noalias ptr @malloc(i64 4)
+test/Transforms/Attributor/heap_to_stack.ll:define i32 @malloc_in_loop(i32 %arg) {
+test/Transforms/Attributor/heap_to_stack.ll:; CHECK-LABEL: define {{[^@]+}}@malloc_in_loop
+test/Transforms/Attributor/heap_to_stack.ll:  %i7 = call noalias ptr @malloc(i64 4)
+test/Transforms/Attributor/heap_to_stack.ll:; CHECK-NEXT:    [[I:%.*]] = tail call noalias ptr @malloc(i64 noundef 256)
+test/Transforms/Attributor/heap_to_stack.ll:  %i = tail call noalias ptr @malloc(i64 256)
+test/Transforms/Attributor/heap_to_stack.ll:; CHECK-NEXT:    [[I:%.*]] = tail call noalias ptr @malloc(i64 noundef -1)
+test/Transforms/Attributor/heap_to_stack.ll:  %i = tail call noalias ptr @malloc(i64 -1)
+test/Transforms/Attributor/heap_to_stack.ll:; CHECK-NEXT:    [[I:%.*]] = tail call noalias ptr @malloc(i64 [[S]])
+test/Transforms/Attributor/heap_to_stack.ll:  %i = tail call noalias ptr @malloc(i64 %S)
+test/Transforms/Attributor/heap_to_stack.ll:  %i = tail call noalias ptr @malloc(i64 4)
+test/Transforms/Attributor/heap_to_stack.ll:; CHECK-NEXT:    [[I:%.*]] = tail call noalias ptr @malloc(i64 noundef 4)
+test/Transforms/Attributor/heap_to_stack.ll:  %i = tail call noalias ptr @malloc(i64 4)
+test/Transforms/Attributor/heap_to_stack.ll:  %i = tail call noalias ptr @malloc(i64 4)
+test/Transforms/Attributor/heap_to_stack.ll:; CHECK-NEXT:    [[I:%.*]] = tail call noalias ptr @malloc(i64 noundef 4)
+test/Transforms/Attributor/heap_to_stack.ll:  %i = tail call noalias ptr @malloc(i64 4)
+test/Transforms/Attributor/heap_to_stack.ll:  %i = tail call noalias ptr @malloc(i64 4)
+test/Transforms/Attributor/allocator.ll:; TODO: change malloc like call
+test/Transforms/Attributor/allocator.ll:define dso_local void @positive_malloc_1(ptr noundef %val) #0 {
+test/Transforms/Attributor/allocator.ll:; CHECK-LABEL: define dso_local void @positive_malloc_1
+test/Transforms/Attributor/allocator.ll:; CHECK-NEXT:    [[CALL:%.*]] = call noalias ptr @malloc(i64 noundef 12)
+test/Transforms/Attributor/allocator.ll:  %call = call noalias ptr @malloc(i64 noundef 12) #3
+test/Transforms/Attributor/allocator.ll:; TODO: change malloc like call
+test/Transforms/Attributor/allocator.ll:define dso_local void @positive_malloc_2(ptr noundef %val) #0 {
+test/Transforms/Attributor/allocator.ll:; CHECK-LABEL: define dso_local void @positive_malloc_2
+test/Transforms/Attributor/allocator.ll:; CHECK-NEXT:    [[CALL:%.*]] = call noalias ptr @malloc(i64 noundef 60)
+test/Transforms/Attributor/allocator.ll:  %call = call noalias ptr @malloc(i64 noundef %mul)
+test/Transforms/Attributor/allocator.ll:; CHECK-NEXT:    [[CALL:%.*]] = call noalias ptr @malloc(i64 noundef 16)
+test/Transforms/Attributor/allocator.ll:  %call = call noalias ptr @malloc(i64 noundef 16) #2
+test/Transforms/Attributor/allocator.ll:; CHECK-NEXT:    [[CALL:%.*]] = call noalias ptr @malloc(i64 noundef [[MUL]])
+test/Transforms/Attributor/allocator.ll:  %call = call noalias ptr @malloc(i64 noundef %mul) #3
+test/Transforms/Attributor/allocator.ll:; CHECK-NEXT:    [[CALL:%.*]] = call noalias ptr @malloc(i64 noundef 40000)
+test/Transforms/Attributor/allocator.ll:  %call = call noalias ptr @malloc(i64 noundef 40000) #3
+test/Transforms/Attributor/allocator.ll:declare noalias ptr @malloc(i64 noundef) #1
+test/Transforms/Attributor/noalias.ll:;   void *V = malloc(4);
+test/Transforms/Attributor/noalias.ll:; CHECK-NEXT:    [[TMP1:%.*]] = tail call noalias ptr @malloc(i64 noundef 4)
+test/Transforms/Attributor/noalias.ll:  %1 = tail call noalias ptr @malloc(i64 4)
+test/Transforms/Attributor/noalias.ll:declare noalias ptr @malloc(i64)
+test/Transforms/Attributor/noalias.ll:; CHECK-NEXT:    [[TMP1:%.*]] = tail call noalias ptr @malloc(i64 noundef 4)
+test/Transforms/Attributor/noalias.ll:  %1 = tail call noalias ptr @malloc(i64 4)
+test/Transforms/Attributor/noalias.ll:; CHECK-NEXT:    [[TMP1:%.*]] = tail call noalias ptr @malloc(i64 noundef 4)
+test/Transforms/Attributor/noalias.ll:  %1 = tail call noalias ptr @malloc(i64 4)
+test/Transforms/Attributor/noalias.ll:; CHECK-NEXT:    [[TMP1:%.*]] = tail call noalias ptr @malloc(i64 noundef 4)
+test/Transforms/Attributor/noalias.ll:  %1 = tail call noalias ptr @malloc(i64 4)
+test/Transforms/Attributor/noalias.ll:;   return malloc(4);
+test/Transforms/Attributor/noalias.ll:; TUNIT-NEXT:    [[TMP6:%.*]] = tail call noalias ptr @malloc(i64 noundef 4)
+test/Transforms/Attributor/noalias.ll:; CGSCC-NEXT:    [[TMP6:%.*]] = tail call noalias ptr @malloc(i64 noundef 4)
+test/Transforms/Attributor/noalias.ll:  %6 = tail call noalias ptr @malloc(i64 4)
+test/Transforms/Attributor/noalias.ll:; TUNIT-NEXT:    [[A:%.*]] = call noalias ptr @malloc(i64 noundef 4) #[[ATTR2]]
+test/Transforms/Attributor/noalias.ll:; CGSCC-NEXT:    [[A:%.*]] = call noalias ptr @malloc(i64 noundef 4) #[[ATTR3]]
+test/Transforms/Attributor/noalias.ll:  %A = call noalias ptr @malloc(i64 4) nounwind
+test/Transforms/Attributor/noalias.ll:; CHECK-NEXT:    [[TMP2:%.*]] = tail call noalias ptr @malloc(i64 noundef 4)
+test/Transforms/Attributor/noalias.ll:  %2 = tail call noalias ptr @malloc(i64 4)
+test/Transforms/Attributor/noalias.ll:; CHECK-NEXT:    [[B:%.*]] = tail call noalias ptr @malloc(i64 noundef 4)
+test/Transforms/Attributor/noalias.ll:  %B = tail call noalias ptr @malloc(i64 4)
+test/Transforms/Attributor/noalias.ll:; CHECK-NEXT:    [[A:%.*]] = tail call noalias ptr @malloc(i64 noundef 4)
+test/Transforms/Attributor/noalias.ll:  %A = tail call noalias ptr @malloc(i64 4)
+test/Transforms/Attributor/noalias.ll:; CHECK-NEXT:    [[A:%.*]] = tail call noalias ptr @malloc(i64 noundef 4)
+test/Transforms/Attributor/noalias.ll:  %A = tail call noalias ptr @malloc(i64 4)
+test/Transforms/Attributor/noalias.ll:; CHECK-NEXT:    [[A:%.*]] = tail call noalias ptr @malloc(i64 noundef 4)
+test/Transforms/Attributor/noalias.ll:; CHECK-NEXT:    [[B:%.*]] = tail call noalias ptr @malloc(i64 noundef 4)
+test/Transforms/Attributor/noalias.ll:  %A = tail call noalias ptr @malloc(i64 4)
+test/Transforms/Attributor/noalias.ll:  %B = tail call noalias ptr @malloc(i64 4)
+test/Transforms/Attributor/noalias.ll:; CHECK-NEXT:    [[M1:%.*]] = tail call noalias ptr @malloc(i64 noundef 4)
+test/Transforms/Attributor/noalias.ll:; IS__CGSCC_OPM-NEXT:    [[M1:%.*]] = tail call noalias ptr @malloc(i64 4)
+test/Transforms/Attributor/noalias.ll:  %m1 = tail call noalias ptr @malloc(i64 4)
+test/Transforms/Attributor/noalias.ll:; CHECK-NEXT:    [[M1:%.*]] = tail call noalias ptr @malloc(i64 noundef 4)
+test/Transforms/Attributor/noalias.ll:  %m1 = tail call noalias ptr @malloc(i64 4)
+test/Transforms/Attributor/nofree.ll:; CHECK-NEXT:    [[CALL:%.*]] = tail call noalias ptr @malloc(i64 [[TMP0]]) #[[ATTR2]]
+test/Transforms/Attributor/nofree.ll:  %call = tail call noalias ptr @malloc(i64 %0) #2
+test/Transforms/Attributor/nofree.ll:declare noalias ptr @malloc(i64)
+test/Transforms/Attributor/heap_to_stack_gpu.ll:; FIXME: amdgpu doesn't claim malloc is a thing, so the test is somewhat
+test/Transforms/Attributor/heap_to_stack_gpu.ll:declare noalias ptr @malloc(i64)
+test/Transforms/Attributor/heap_to_stack_gpu.ll:; CHECK-NEXT:    [[I:%.*]] = tail call noalias ptr @malloc(i64 noundef 4)
+test/Transforms/Attributor/heap_to_stack_gpu.ll:  %i = tail call noalias ptr @malloc(i64 4)
+test/Transforms/Attributor/heap_to_stack_gpu.ll:; CHECK-NEXT:    [[I:%.*]] = tail call noalias ptr @malloc(i64 noundef 4)
+test/Transforms/Attributor/heap_to_stack_gpu.ll:  %i = tail call noalias ptr @malloc(i64 4)
+test/Transforms/Attributor/heap_to_stack_gpu.ll:; TEST 3 - 1 malloc, 1 free
+test/Transforms/Attributor/heap_to_stack_gpu.ll:; CHECK-NEXT:    [[I:%.*]] = tail call noalias ptr @malloc(i64 noundef 4)
+test/Transforms/Attributor/heap_to_stack_gpu.ll:  %i = tail call noalias ptr @malloc(i64 4)
+test/Transforms/Attributor/heap_to_stack_gpu.ll:; CHECK-NEXT:    [[I:%.*]] = tail call noalias ptr @malloc(i64 noundef 4)
+test/Transforms/Attributor/heap_to_stack_gpu.ll:  %i = tail call noalias ptr @malloc(i64 4)
+test/Transforms/Attributor/heap_to_stack_gpu.ll:; CHECK-NEXT:    [[I:%.*]] = tail call noalias ptr @malloc(i64 noundef 4)
+test/Transforms/Attributor/heap_to_stack_gpu.ll:  %i = tail call noalias ptr @malloc(i64 4)
+test/Transforms/Attributor/heap_to_stack_gpu.ll:; TEST 5 - not all exit paths have a call to free, but all uses of malloc
+test/Transforms/Attributor/heap_to_stack_gpu.ll:; CHECK-NEXT:    [[I:%.*]] = tail call noalias ptr @malloc(i64 noundef 4)
+test/Transforms/Attributor/heap_to_stack_gpu.ll:  %i = tail call noalias ptr @malloc(i64 4)
+test/Transforms/Attributor/heap_to_stack_gpu.ll:; CHECK-NEXT:    [[I:%.*]] = tail call noalias ptr @malloc(i64 noundef 4)
+test/Transforms/Attributor/heap_to_stack_gpu.ll:  %i = tail call noalias ptr @malloc(i64 4)
+test/Transforms/Attributor/heap_to_stack_gpu.ll:; CHECK-NEXT:    [[I:%.*]] = tail call noalias ptr @malloc(i64 noundef 4)
+test/Transforms/Attributor/heap_to_stack_gpu.ll:  %i = tail call noalias ptr @malloc(i64 4)
+test/Transforms/Attributor/heap_to_stack_gpu.ll:; CHECK-NEXT:    [[I:%.*]] = tail call noalias ptr @malloc(i64 noundef 4)
+test/Transforms/Attributor/heap_to_stack_gpu.ll:  %i = tail call noalias ptr @malloc(i64 4)
+test/Transforms/Attributor/heap_to_stack_gpu.ll:; TEST 9 - FIXME: malloc should be converted.
+test/Transforms/Attributor/heap_to_stack_gpu.ll:; CHECK-NEXT:    [[I:%.*]] = tail call noalias ptr @malloc(i64 noundef 4)
+test/Transforms/Attributor/heap_to_stack_gpu.ll:  %i = tail call noalias ptr @malloc(i64 4)
+test/Transforms/Attributor/heap_to_stack_gpu.ll:; TEST 10 - 1 malloc, 1 free
+test/Transforms/Attributor/heap_to_stack_gpu.ll:; CHECK-NEXT:    [[I:%.*]] = tail call noalias ptr @malloc(i64 noundef 4)
+test/Transforms/Attributor/heap_to_stack_gpu.ll:  %i = tail call noalias ptr @malloc(i64 4)
+test/Transforms/Attributor/heap_to_stack_gpu.ll:; CHECK-NEXT:    [[I:%.*]] = tail call noalias ptr @malloc(i64 noundef 4)
+test/Transforms/Attributor/heap_to_stack_gpu.ll:  %i = tail call noalias ptr @malloc(i64 4)
+test/Transforms/Attributor/heap_to_stack_gpu.ll:; CHECK-NEXT:    [[I:%.*]] = tail call noalias ptr @malloc(i64 noundef 4)
+test/Transforms/Attributor/heap_to_stack_gpu.ll:  %i = tail call noalias ptr @malloc(i64 4)
+test/Transforms/Attributor/heap_to_stack_gpu.ll:; CHECK-NEXT:    [[I:%.*]] = call noalias ptr @malloc(i64 noundef 4)
+test/Transforms/Attributor/heap_to_stack_gpu.ll:  %i = call noalias ptr @malloc(i64 4)
+test/Transforms/Attributor/heap_to_stack_gpu.ll:define i32 @malloc_in_loop(i32 %arg) {
+test/Transforms/Attributor/heap_to_stack_gpu.ll:; CHECK-LABEL: define {{[^@]+}}@malloc_in_loop
+test/Transforms/Attributor/heap_to_stack_gpu.ll:; CHECK-NEXT:    [[I7:%.*]] = call noalias ptr @malloc(i64 noundef 4)
+test/Transforms/Attributor/heap_to_stack_gpu.ll:  %i7 = call noalias ptr @malloc(i64 4)
+test/Transforms/Attributor/heap_to_stack_gpu.ll:; CHECK-NEXT:    [[I:%.*]] = tail call noalias ptr @malloc(i64 noundef 256)
+test/Transforms/Attributor/heap_to_stack_gpu.ll:  %i = tail call noalias ptr @malloc(i64 256)
+test/Transforms/Attributor/heap_to_stack_gpu.ll:; CHECK-NEXT:    [[I:%.*]] = tail call noalias ptr @malloc(i64 noundef -1)
+test/Transforms/Attributor/heap_to_stack_gpu.ll:  %i = tail call noalias ptr @malloc(i64 -1)
+test/Transforms/Attributor/heap_to_stack_gpu.ll:; CHECK-NEXT:    [[I:%.*]] = tail call noalias ptr @malloc(i64 [[S]])
+test/Transforms/Attributor/heap_to_stack_gpu.ll:  %i = tail call noalias ptr @malloc(i64 %S)
+test/Transforms/Attributor/heap_to_stack_gpu.ll:; CHECK-NEXT:    [[I:%.*]] = tail call noalias ptr @malloc(i64 noundef 4)
+test/Transforms/Attributor/heap_to_stack_gpu.ll:  %i = tail call noalias ptr @malloc(i64 4)
+test/Transforms/Attributor/heap_to_stack_gpu.ll:; CHECK-NEXT:    [[I:%.*]] = tail call noalias ptr @malloc(i64 noundef 4)
+test/Transforms/Attributor/heap_to_stack_gpu.ll:  %i = tail call noalias ptr @malloc(i64 4)
+test/Transforms/Attributor/heap_to_stack_gpu.ll:; CHECK-NEXT:    [[I:%.*]] = tail call noalias ptr @malloc(i64 noundef 4)
+test/Transforms/Attributor/heap_to_stack_gpu.ll:  %i = tail call noalias ptr @malloc(i64 4)
+test/Transforms/Attributor/heap_to_stack_gpu.ll:; CHECK-NEXT:    [[I:%.*]] = tail call noalias ptr @malloc(i64 noundef 4)
+test/Transforms/Attributor/heap_to_stack_gpu.ll:  %i = tail call noalias ptr @malloc(i64 4)
+test/Transforms/Attributor/lowerheap.ll:declare noalias ptr @malloc(i64) allockind("alloc,uninitialized") allocsize(0) "alloc-family"="malloc"
+test/Transforms/Attributor/lowerheap.ll:declare noalias ptr @calloc(i64, i64) allockind("alloc,zeroed") allocsize(0,1) "alloc-family"="malloc"
+test/Transforms/Attributor/lowerheap.ll:declare void @free(ptr) allockind("free") "alloc-family"="malloc"
+test/Transforms/Attributor/lowerheap.ll:  %mem = call ptr @malloc(i64 %len)
+test/Transforms/Attributor/lowerheap.ll:; CHECK: attributes #[[ATTR1:[0-9]+]] = { allockind("alloc,uninitialized") allocsize(0) "alloc-family"="malloc" }
+test/Transforms/Attributor/lowerheap.ll:; CHECK: attributes #[[ATTR2:[0-9]+]] = { allockind("alloc,zeroed") allocsize(0,1) "alloc-family"="malloc" }
+test/Transforms/Attributor/lowerheap.ll:; CHECK: attributes #[[ATTR3:[0-9]+]] = { allockind("free") "alloc-family"="malloc" }
+test/Transforms/Attributor/memory_locations.ll:declare noalias ptr @malloc(i64) inaccessiblememonly
+test/Transforms/Attributor/memory_locations.ll:; CHECK-NEXT:    [[CALL:%.*]] = call noalias ptr @malloc(i64 [[CONV]])
+test/Transforms/Attributor/memory_locations.ll:  %call = call ptr @malloc(i64 %conv)
+test/Transforms/Attributor/memory_locations.ll:; CHECK-NEXT:    [[CALL1:%.*]] = call noalias ptr @malloc(i64 [[CONV]])
+test/Transforms/Attributor/memory_locations.ll:  %call1 = call ptr @malloc(i64 %conv)
+test/Transforms/Attributor/memory_locations.ll:; CHECK-NEXT:    [[CALL1:%.*]] = call noalias ptr @malloc(i64 [[CONV]])
+test/Transforms/Attributor/memory_locations.ll:  %call1 = call ptr @malloc(i64 %conv)
+test/Transforms/Attributor/memory_locations.ll:define dso_local ptr @internal_only_rec_static_helper_malloc_noescape(i32 %arg) {
+test/Transforms/Attributor/memory_locations.ll:; FIXME: This is actually inaccessiblememonly because the malloced memory does not escape
+test/Transforms/Attributor/memory_locations.ll:; CHECK-LABEL: define {{[^@]+}}@internal_only_rec_static_helper_malloc_noescape
+test/Transforms/Attributor/memory_locations.ll:; CHECK-NEXT:    [[CALL:%.*]] = call noalias ptr @internal_only_rec_static_malloc_noescape(i32 [[ARG]])
+test/Transforms/Attributor/memory_locations.ll:  %call = call ptr @internal_only_rec_static_malloc_noescape(i32 %arg)
+test/Transforms/Attributor/memory_locations.ll:define internal ptr @internal_only_rec_static_malloc_noescape(i32 %arg) {
+test/Transforms/Attributor/memory_locations.ll:; FIXME: This is actually inaccessiblememonly because the malloced memory does not escape
+test/Transforms/Attributor/memory_locations.ll:; CHECK-LABEL: define {{[^@]+}}@internal_only_rec_static_malloc_noescape
+test/Transforms/Attributor/memory_locations.ll:; CHECK-NEXT:    [[CALL1:%.*]] = call noalias ptr @malloc(i64 [[CONV]])
+test/Transforms/Attributor/memory_locations.ll:  %call1 = call ptr @malloc(i64 %conv)
+test/Transforms/Attributor/memory_locations.ll:; CHECK-NEXT:    [[CALL:%.*]] = call noalias ptr @malloc(i64 [[CONV]])
+test/Transforms/Attributor/memory_locations.ll:  %call = call ptr @malloc(i64 %conv)
+test/Transforms/Attributor/memory_locations.ll:; CHECK-NEXT:    [[CALL:%.*]] = call noalias noundef dereferenceable_or_null(10) ptr @malloc(i64 noundef 10)
+test/Transforms/Attributor/memory_locations.ll:  %call = call dereferenceable_or_null(10) ptr @malloc(i64 10)
+test/Transforms/Attributor/memory_locations.ll:; CHECK-NEXT:    [[CALL4:%.*]] = call noalias ptr @malloc(i64 [[CONV]])
+test/Transforms/Attributor/memory_locations.ll:  %call4 = call ptr @malloc(i64 %conv)
+test/Transforms/Attributor/multiple-offsets-pointer-info.ll:declare noalias ptr @calloc(i64, i64) allockind("alloc,zeroed") allocsize(0,1) "alloc-family"="malloc"
+test/Transforms/Attributor/multiple-offsets-pointer-info.ll:; CHECK: attributes #[[ATTR0:[0-9]+]] = { allockind("alloc,zeroed") allocsize(0,1) "alloc-family"="malloc" }
+test/Transforms/Attributor/liveness.ll:; CHECK-NEXT:    [[CALL:%.*]] = call ptr @malloc(i64 noundef 8)
+test/Transforms/Attributor/liveness.ll:  %call = call ptr @malloc(i64 8)
+test/Transforms/Attributor/liveness.ll:declare noalias ptr @malloc(i64)
+test/Transforms/LoopIdiom/basic.ll:; CHECK-NEXT:    [[CALL:%.*]] = tail call noalias ptr @malloc(i64 [[MUL]])
+test/Transforms/LoopIdiom/basic.ll:  %call = tail call noalias ptr @malloc(i64 %mul)
+test/Transforms/LoopIdiom/basic.ll:declare noalias ptr @malloc(i64)
+test/Transforms/InferFunctionAttrs/no-proto.ll:; CHECK: declare void @malloc(...)
+test/Transforms/InferFunctionAttrs/no-proto.ll:declare void @malloc(...)
+test/Transforms/InferFunctionAttrs/annotate.ll:; CHECK: declare noalias noundef ptr @malloc(i64 noundef) [[INACCESSIBLEMEMONLY_NOFREE_NOUNWIND_WILLRETURN_ALLOCKIND_ALLOCUNINIT_ALLOCSIZE0_FAMILY_MALLOC:#[0-9]+]]
+test/Transforms/InferFunctionAttrs/annotate.ll:declare ptr @malloc(i64)
+test/Transforms/InferFunctionAttrs/annotate.ll:; CHECK-AIX: declare noalias noundef ptr @vec_malloc(i64 noundef) [[INACCESSIBLEMEMONLY_NOFREE_NOUNWIND_WILLRETURN_ALLOCSIZE0_FAMILY_VEC_MALLOC:#[0-9]+]]
+test/Transforms/InferFunctionAttrs/annotate.ll:declare ptr @vec_malloc(i64)
+test/Transforms/InferFunctionAttrs/annotate.ll:; CHECK-DAG: attributes [[INACCESSIBLEMEMONLY_NOFREE_NOUNWIND_WILLRETURN_ALLOCKIND_ALLOCUNINIT_ALLOCSIZE1_FAMILY_MALLOC]] = { mustprogress nofree nounwind willreturn allockind("alloc,uninitialized,aligned") allocsize(1) memory(inaccessiblemem: readwrite) "alloc-family"="malloc" }
+test/Transforms/InferFunctionAttrs/annotate.ll:; CHECK-DAG: attributes [[INACCESSIBLEMEMONLY_NOFREE_NOUNWIND_WILLRETURN_ALLOCKIND_ALLOCZEROED_ALLOCSIZE01_FAMILY_MALLOC]] = { mustprogress nofree nounwind willreturn allockind("alloc,zeroed") allocsize(0,1) memory(inaccessiblemem: readwrite) "alloc-family"="malloc" }
+test/Transforms/InferFunctionAttrs/annotate.ll:; CHECK-DAG: attributes [[INACCESSIBLEMEMORARGMEMONLY_NOUNWIND_WILLRETURN_ALLOCKIND_FREE_FAMILY_MALLOC]] = { mustprogress nounwind willreturn allockind("free") memory(argmem: readwrite, inaccessiblemem: readwrite) "alloc-family"="malloc" }
+test/Transforms/InferFunctionAttrs/annotate.ll:; CHECK-DAG: attributes [[INACCESSIBLEMEMONLY_NOFREE_NOUNWIND_WILLRETURN_ALLOCKIND_ALLOCUNINIT_ALLOCSIZE0_FAMILY_MALLOC]] = { mustprogress nofree nounwind willreturn allockind("alloc,uninitialized") allocsize(0) memory(inaccessiblemem: readwrite) "alloc-family"="malloc" }
+test/Transforms/InferFunctionAttrs/annotate.ll:; CHECK-DAG: attributes [[INACCESSIBLEMEMORARGMEMONLY_NOUNWIND_WILLRETURN_ALLOCKIND_REALLOC_ALLOCSIZE1_FAMILY_MALLOC]] = { mustprogress nounwind willreturn allockind("realloc") allocsize(1) memory(argmem: readwrite, inaccessiblemem: readwrite) "alloc-family"="malloc" }
+test/Transforms/InferFunctionAttrs/annotate.ll:; CHECK-DAG: attributes [[INACCESSIBLEMEMORARGMEMONLY_NOUNWIND_WILLRETURN_ALLOCKIND_REALLOC_ALLOCSIZE12_FAMILY_MALLOC]] = { mustprogress nounwind willreturn allockind("realloc") allocsize(1,2) memory(argmem: readwrite, inaccessiblemem: readwrite) "alloc-family"="malloc" }
+test/Transforms/InferFunctionAttrs/annotate.ll:; CHECK-DAG: attributes [[INACCESSIBLEMEMORARGONLY_NOFREE_NOUNWIND_WILLRETURN_FAMILY_MALLOC]] = { mustprogress nofree nounwind willreturn memory(argmem: readwrite, inaccessiblemem: readwrite) "alloc-family"="malloc" }
+test/Transforms/InferFunctionAttrs/annotate.ll:; CHECK-AIX-DAG: attributes [[INACCESSIBLEMEMONLY_NOFREE_NOUNWIND_WILLRETURN_ALLOCSIZE0_FAMILY_VEC_MALLOC]] = { mustprogress nofree nounwind willreturn allockind("alloc,uninitialized") allocsize(0) memory(inaccessiblemem: readwrite) "alloc-family"="vec_malloc" }
+test/Transforms/InferFunctionAttrs/annotate.ll:; CHECK-AIX-DAG: attributes [[INACCESSIBLEMEMORARGMEMONLY_NOUNWIND_WILLRETURN_FAMILY_VEC_MALLOC]] = { mustprogress nounwind willreturn allockind("free") memory(argmem: readwrite, inaccessiblemem: readwrite) "alloc-family"="vec_malloc" }
+test/Transforms/InferFunctionAttrs/annotate.ll:; CHECK-AIX-DAG: attributes [[INACCESSIBLEMEMORARGMEMONLY_NOUNWIND_WILLRETURN_ALLOCSIZE_FAMILY_VEC_MALLOC]] = { mustprogress nounwind willreturn allockind("realloc") allocsize(1) memory(argmem: readwrite, inaccessiblemem: readwrite) "alloc-family"="vec_malloc" }
+test/Transforms/InferFunctionAttrs/annotate.ll:; CHECK-AIX-DAG: attributes [[INACCESSIBLEMEMONLY_NOFREE_NOUNWIND_WILLRETURN_ALLOCSIZE01_FAMILY_VEC_MALLOC]] = { mustprogress nofree nounwind willreturn allockind("alloc,zeroed") allocsize(0,1) memory(inaccessiblemem: readwrite) "alloc-family"="vec_malloc" }
+test/Transforms/PhaseOrdering/gep-null-compare-in-loop.ll:define i32 @using_malloc() {
+test/Transforms/PhaseOrdering/gep-null-compare-in-loop.ll:; CHECK-LABEL: define noundef i32 @using_malloc
+test/Transforms/PhaseOrdering/gep-null-compare-in-loop.ll:  %alloc = call dereferenceable_or_null(64) ptr @malloc(i64 64)
+test/Transforms/PhaseOrdering/gep-null-compare-in-loop.ll:declare ptr @malloc(i64)
+test/Transforms/PhaseOrdering/basic.ll:declare ptr @malloc(i64)
+test/Transforms/PhaseOrdering/basic.ll:  %call = call ptr @malloc(i64 1)
+test/Transforms/FunctionAttrs/nofree.ll:; CHECK-NEXT:    [[CALL:%.*]] = tail call noalias ptr @malloc(i64 [[TMP0]]) #[[ATTR2]]
+test/Transforms/FunctionAttrs/nofree.ll:  %call = tail call noalias ptr @malloc(i64 %0) #2
+test/Transforms/FunctionAttrs/nofree.ll:declare noalias ptr @malloc(i64) local_unnamed_addr #2
+test/Transforms/FunctionAttrs/nofree-attributor.ll:declare noalias ptr @malloc(i64)
+test/Transforms/LICM/promote-tls.ll:declare ptr @malloc(i64)
+test/Transforms/LICM/promote-tls.ll:; CHECK-NEXT:    [[MEM:%.*]] = call noalias dereferenceable(16) ptr @malloc(i64 16)
+test/Transforms/LICM/promote-tls.ll:  %mem = call dereferenceable(16) noalias ptr @malloc(i64 16)
+test/Transforms/LICM/promote-tls.ll:declare noalias ptr @custom_malloc(i64)
+test/Transforms/LICM/promote-tls.ll:define ptr @test_custom_malloc(i32 %n) {
+test/Transforms/LICM/promote-tls.ll:; CHECK-LABEL: @test_custom_malloc(
+test/Transforms/LICM/promote-tls.ll:; CHECK-NEXT:    [[MEM:%.*]] = call noalias dereferenceable(16) ptr @custom_malloc(i64 16)
+test/Transforms/LICM/promote-tls.ll:  %mem = call dereferenceable(16) noalias ptr @custom_malloc(i64 16)
+test/Transforms/LICM/promote-tls.ll:declare ptr @not_malloc(i64)
+test/Transforms/LICM/promote-tls.ll:define ptr @test_neg_not_malloc(i32 %n) {
+test/Transforms/LICM/promote-tls.ll:; CHECK-LABEL: @test_neg_not_malloc(
+test/Transforms/LICM/promote-tls.ll:; CHECK-NEXT:    [[MEM:%.*]] = call dereferenceable(16) ptr @not_malloc(i64 16)
+test/Transforms/LICM/promote-tls.ll:  %mem = call dereferenceable(16) ptr @not_malloc(i64 16)
+test/Transforms/LICM/promote-tls.ll:; CHECK-NEXT:    [[MEM:%.*]] = call noalias dereferenceable(16) ptr @malloc(i64 16)
+test/Transforms/LICM/promote-tls.ll:  %mem = call dereferenceable(16) noalias ptr @malloc(i64 16)
+test/Transforms/LICM/scalar-promote-unwind.ll:; The malloc'ed memory is not capture and therefore promoted.
+test/Transforms/LICM/scalar-promote-unwind.ll:define void @malloc_no_capture() #0 personality ptr @__gxx_personality_v0 {
+test/Transforms/LICM/scalar-promote-unwind.ll:; CHECK-LABEL: @malloc_no_capture(
+test/Transforms/LICM/scalar-promote-unwind.ll:; CHECK-NEXT:    [[CALL:%.*]] = call ptr @malloc(i64 4)
+test/Transforms/LICM/scalar-promote-unwind.ll:  %call = call ptr @malloc(i64 4)
+test/Transforms/LICM/scalar-promote-unwind.ll:; The malloc'ed memory can be captured and therefore only loads can be promoted.
+test/Transforms/LICM/scalar-promote-unwind.ll:define void @malloc_capture(ptr noalias %A) personality ptr @__gxx_personality_v0 {
+test/Transforms/LICM/scalar-promote-unwind.ll:; CHECK-LABEL: @malloc_capture(
+test/Transforms/LICM/scalar-promote-unwind.ll:; CHECK-NEXT:    [[CALL:%.*]] = call ptr @malloc(i64 4)
+test/Transforms/LICM/scalar-promote-unwind.ll:  %call = call ptr @malloc(i64 4)
+test/Transforms/LICM/scalar-promote-unwind.ll:declare noalias ptr @malloc(i64)
+test/Transforms/LICM/scalar-promote-unwind.ll:; This function should only be used to test malloc_capture.
+test/Transforms/LICM/hoist-alloc.ll:declare noalias ptr @malloc(i64)
+test/Transforms/LICM/hoist-alloc.ll:define i8 @test_sink_malloc() {
+test/Transforms/LICM/hoist-alloc.ll:; CHECK-LABEL: @test_sink_malloc(
+test/Transforms/LICM/hoist-alloc.ll:; CHECK-NEXT:    [[A_RAW:%.*]] = call nonnull ptr @malloc(i64 32)
+test/Transforms/LICM/hoist-alloc.ll:  %a.raw = call nonnull ptr @malloc(i64 32)
+test/Transforms/LICM/hoist-alloc.ll:define i8 @test_hoist_malloc() {
+test/Transforms/LICM/hoist-alloc.ll:; CHECK-LABEL: @test_hoist_malloc(
+test/Transforms/LICM/hoist-alloc.ll:; CHECK-NEXT:    [[A_RAW:%.*]] = call nonnull ptr @malloc(i64 32)
+test/Transforms/LICM/hoist-alloc.ll:  %a.raw = call nonnull ptr @malloc(i64 32)
+test/Transforms/LICM/hoist-alloc.ll:define i8 @test_hoist_malloc_leak() nofree nosync {
+test/Transforms/LICM/hoist-alloc.ll:; CHECK-LABEL: @test_hoist_malloc_leak(
+test/Transforms/LICM/hoist-alloc.ll:; CHECK-NEXT:    [[A_RAW:%.*]] = call nonnull ptr @malloc(i64 32)
+test/Transforms/LICM/hoist-alloc.ll:  %a.raw = call nonnull ptr @malloc(i64 32)
+test/Transforms/LICM/hoist-alloc.ll:define void @test_hoist_malloc_cond_free(i1 %c) {
+test/Transforms/LICM/hoist-alloc.ll:; CHECK-LABEL: @test_hoist_malloc_cond_free(
+test/Transforms/LICM/hoist-alloc.ll:; CHECK-NEXT:    [[A_RAW:%.*]] = call nonnull ptr @malloc(i64 32)
+test/Transforms/LICM/hoist-alloc.ll:  %a.raw = call nonnull ptr @malloc(i64 32)
+test/Transforms/LICM/hoist-alloc.ll:define i8 @test_sink_malloc_cond_free(i1 %c) {
+test/Transforms/LICM/hoist-alloc.ll:; CHECK-LABEL: @test_sink_malloc_cond_free(
+test/Transforms/LICM/hoist-alloc.ll:; CHECK-NEXT:    [[A_RAW:%.*]] = call nonnull ptr @malloc(i64 32)
+test/Transforms/LICM/hoist-alloc.ll:  %a.raw = call nonnull ptr @malloc(i64 32)
+test/Transforms/InstCombine/getelementptr.ll:declare noalias ptr @malloc(i64) nounwind allockind("alloc,uninitialized") allocsize(0)
+test/Transforms/InstCombine/getelementptr.ll:define i32 @test_gep_bitcast_malloc(ptr %a) {
+test/Transforms/InstCombine/getelementptr.ll:; CHECK-LABEL: @test_gep_bitcast_malloc(
+test/Transforms/InstCombine/getelementptr.ll:; CHECK-NEXT:    [[CALL:%.*]] = call noalias dereferenceable_or_null(16) ptr @malloc(i64 16)
+test/Transforms/InstCombine/getelementptr.ll:  %call = call noalias ptr @malloc(i64 16) #2
+test/Transforms/InstCombine/builtin-object-size-custom-dl.ll:  %ptr = call ptr @malloc(i64 %sz)
+test/Transforms/InstCombine/builtin-object-size-custom-dl.ll:declare ptr @malloc(i64)
+test/Transforms/InstCombine/malloc-free-delete-dbginvar.ll:; This is a regression test for a function taken from malloc-free-delete.ll.
+test/Transforms/InstCombine/memset-1.ll:declare noalias ptr @malloc(i32) #1
+test/Transforms/InstCombine/memset-1.ll:; CHECK-NEXT:    [[CALL1:%.*]] = call ptr @malloc(i32 [[SIZE:%.*]]) #[[ATTR0:[0-9]+]]
+test/Transforms/InstCombine/memset-1.ll:  %call1 = call ptr @malloc(i32 %size) #1
+test/Transforms/InstCombine/memset-1.ll:; Notice that malloc + memset pattern is now handled by DSE in a more general way.
+test/Transforms/InstCombine/memset-1.ll:define ptr @malloc_and_memset_intrinsic(i32 %n) #0 {
+test/Transforms/InstCombine/memset-1.ll:; CHECK-LABEL: @malloc_and_memset_intrinsic(
+test/Transforms/InstCombine/memset-1.ll:; CHECK-NEXT:    [[CALL:%.*]] = call ptr @malloc(i32 [[N:%.*]])
+test/Transforms/InstCombine/memset-1.ll:  %call = call ptr @malloc(i32 %n)
+test/Transforms/InstCombine/memset-1.ll:; Notice that malloc + memset pattern is now handled by DSE in a more general way.
+test/Transforms/InstCombine/memset-1.ll:define ptr @notmalloc_memset(i32 %size, ptr %notmalloc) {
+test/Transforms/InstCombine/memset-1.ll:; CHECK-LABEL: @notmalloc_memset(
+test/Transforms/InstCombine/memset-1.ll:  %call1 = call ptr %notmalloc(i32 %size) #1
+test/Transforms/InstCombine/memset-1.ll:; This doesn't fire currently because the malloc has more than one use.
+test/Transforms/InstCombine/memset-1.ll:; Notice that malloc + memset pattern is now handled by DSE in a more general way.
+test/Transforms/InstCombine/memset-1.ll:; CHECK-NEXT:    [[CALL:%.*]] = tail call ptr @malloc(i32 [[SIZE:%.*]]) #[[ATTR0]]
+test/Transforms/InstCombine/memset-1.ll:  %call = tail call ptr @malloc(i32 %size) #1
+test/Transforms/InstCombine/memset-1.ll:; CHECK-NEXT:    [[PTR:%.*]] = tail call ptr @malloc(i32 [[SIZE:%.*]]) #[[ATTR0]]
+test/Transforms/InstCombine/memset-1.ll:  %ptr = tail call ptr @malloc(i32 %size) #1
+test/Transforms/InstCombine/InferAlignAttribute.ll:; with _mm_malloc which calls posix_memalign.
+test/Transforms/InstCombine/malloc-free.ll:  %malloc_206 = tail call ptr @malloc(i32 %mul)
+test/Transforms/InstCombine/malloc-free.ll:  store ptr %malloc_206, ptr %c_19
+test/Transforms/InstCombine/malloc-free.ll:declare noalias ptr @calloc(i32, i32) nounwind allockind("alloc,zeroed") allocsize(0,1) "alloc-family"="malloc"
+test/Transforms/InstCombine/malloc-free.ll:declare noalias ptr @malloc(i32) allockind("alloc,uninitialized") allocsize(0) "alloc-family"="malloc"
+test/Transforms/InstCombine/malloc-free.ll:declare noalias ptr @aligned_alloc(i32, i32) allockind("alloc,uninitialized,aligned") allocsize(1) "alloc-family"="malloc"
+test/Transforms/InstCombine/malloc-free.ll:declare noalias ptr @other_aligned_alloc(i32, i32) allockind("alloc,uninitialized,aligned") allocsize(1) "alloc-family"="malloc"
+test/Transforms/InstCombine/malloc-free.ll:declare void @free(ptr) allockind("free") "alloc-family"="malloc"
+test/Transforms/InstCombine/malloc-free.ll:  %m = call ptr @malloc(i32 1)
+test/Transforms/InstCombine/malloc-free.ll:  %a = call noalias ptr @malloc(i32 10)
+test/Transforms/InstCombine/malloc-free.ll:  %A = call ptr @malloc(i32 16000)
+test/Transforms/InstCombine/malloc-free.ll:; CHECK-NEXT:    [[A:%.*]] = call dereferenceable_or_null(700) ptr @malloc(i32 700)
+test/Transforms/InstCombine/malloc-free.ll:; CHECK-NEXT:    [[B:%.*]] = call dereferenceable_or_null(700) ptr @malloc(i32 700)
+test/Transforms/InstCombine/malloc-free.ll:; CHECK-NEXT:    [[C:%.*]] = call dereferenceable_or_null(700) ptr @malloc(i32 700)
+test/Transforms/InstCombine/malloc-free.ll:; CHECK-NEXT:    [[D:%.*]] = call dereferenceable_or_null(700) ptr @malloc(i32 700)
+test/Transforms/InstCombine/malloc-free.ll:; CHECK-NEXT:    [[E:%.*]] = call dereferenceable_or_null(700) ptr @malloc(i32 700)
+test/Transforms/InstCombine/malloc-free.ll:; CHECK-NEXT:    [[F:%.*]] = call dereferenceable_or_null(700) ptr @malloc(i32 700)
+test/Transforms/InstCombine/malloc-free.ll:; CHECK-NEXT:    [[G:%.*]] = call dereferenceable_or_null(700) ptr @malloc(i32 700)
+test/Transforms/InstCombine/malloc-free.ll:  %a = call ptr @malloc(i32 700)
+test/Transforms/InstCombine/malloc-free.ll:  %b = call ptr @malloc(i32 700)
+test/Transforms/InstCombine/malloc-free.ll:  %c = call ptr @malloc(i32 700)
+test/Transforms/InstCombine/malloc-free.ll:  %d = call ptr @malloc(i32 700)
+test/Transforms/InstCombine/malloc-free.ll:  %e = call ptr @malloc(i32 700)
+test/Transforms/InstCombine/malloc-free.ll:  %f = call ptr @malloc(i32 700)
+test/Transforms/InstCombine/malloc-free.ll:  %g = call ptr @malloc(i32 700)
+test/Transforms/InstCombine/objsize-64.ll:declare noalias ptr @malloc(i64) nounwind allockind("alloc,uninitialized") allocsize(0)
+test/Transforms/InstCombine/objsize-64.ll:; CHECK-NEXT:    [[CALL:%.*]] = call dereferenceable_or_null(4) ptr @malloc(i64 4)
+test/Transforms/InstCombine/objsize-64.ll:  %call = call ptr @malloc(i64 4)
+test/Transforms/InstCombine/alloc-realloc-free.ll:  %1 = tail call noalias align 16 dereferenceable_or_null(4) ptr @malloc(i64 4) #4
+test/Transforms/InstCombine/alloc-realloc-free.ll:declare dso_local noalias noundef ptr @malloc(i64 noundef) local_unnamed_addr #1
+test/Transforms/InstCombine/alloc-realloc-free.ll:attributes #1 = { inaccessiblememonly mustprogress nofree nounwind willreturn allockind("alloc,uninitialized") "alloc-family"="malloc" }
+test/Transforms/InstCombine/alloc-realloc-free.ll:attributes #2 = { inaccessiblemem_or_argmemonly mustprogress nounwind willreturn allockind("realloc") "alloc-family"="malloc" }
+test/Transforms/InstCombine/alloc-realloc-free.ll:attributes #3 = { nofree nosync nounwind readnone speculatable willreturn allockind("free") "alloc-family"="malloc" }
+test/Transforms/InstCombine/malloc_free_delete_nvptx.ll:declare ptr @malloc(i64) allockind("alloc,uninitialized") "alloc-family"="malloc" allocsize(0)
+test/Transforms/InstCombine/malloc_free_delete_nvptx.ll:declare void @free(ptr) allockind("free") "alloc-family"="malloc"
+test/Transforms/InstCombine/malloc_free_delete_nvptx.ll:; Ensure the nvptx backend states malloc & free are a thing so we can recognize
+test/Transforms/InstCombine/malloc_free_delete_nvptx.ll:; so we will optimize them properly. In the test below the malloc-free chain is
+test/Transforms/InstCombine/malloc_free_delete_nvptx.ll:; useless and we can remove it *if* we know about malloc & free.
+test/Transforms/InstCombine/malloc_free_delete_nvptx.ll:define void @malloc_then_free_not_needed() {
+test/Transforms/InstCombine/malloc_free_delete_nvptx.ll:; CHECK-LABEL: @malloc_then_free_not_needed(
+test/Transforms/InstCombine/malloc_free_delete_nvptx.ll:  %a = call ptr @malloc(i64 4)
+test/Transforms/InstCombine/malloc_free_delete_nvptx.ll:define void @malloc_then_free_needed() {
+test/Transforms/InstCombine/malloc_free_delete_nvptx.ll:; CHECK-LABEL: @malloc_then_free_needed(
+test/Transforms/InstCombine/malloc_free_delete_nvptx.ll:; CHECK-NEXT:    [[A:%.*]] = call dereferenceable_or_null(4) ptr @malloc(i64 4)
+test/Transforms/InstCombine/malloc_free_delete_nvptx.ll:  %a = call ptr @malloc(i64 4)
+test/Transforms/InstCombine/objsize.ll:; CHECK-NEXT:    [[TMP0:%.*]] = tail call noalias dereferenceable_or_null(20) ptr @malloc(i32 20) #[[ATTR0]]
+test/Transforms/InstCombine/objsize.ll:  %0 = tail call noalias ptr @malloc(i32 20) nounwind
+test/Transforms/InstCombine/objsize.ll:; CHECK-NEXT:    [[TMP0:%.*]] = tail call noalias dereferenceable_or_null(20) ptr @malloc(i32 20) #[[ATTR0]]
+test/Transforms/InstCombine/objsize.ll:  %0 = tail call noalias ptr @malloc(i32 20) nounwind
+test/Transforms/InstCombine/objsize.ll:declare noalias ptr @malloc(i32) nounwind allockind("alloc,uninitialized") allocsize(0)
+test/Transforms/InstCombine/objsize.ll:; CHECK-NEXT:    [[ALLOC:%.*]] = call noalias dereferenceable_or_null(48) ptr @malloc(i32 48) #[[ATTR0]]
+test/Transforms/InstCombine/objsize.ll:  %alloc = call noalias ptr @malloc(i32 48) nounwind
+test/Transforms/InstCombine/malloc-free-mismatched.ll:declare dso_local void @free(ptr) allockind("free") "alloc-family"="malloc"
+test/Transforms/InstCombine/realloc.ll:declare noalias ptr @malloc(i64) allockind("alloc,uninitialized")
+test/Transforms/InstCombine/realloc.ll:; CHECK-NEXT:    [[MALLOC:%.*]] = call dereferenceable_or_null(100) ptr @malloc(i64 100)
+test/Transforms/InstCombine/builtin-object-size-strdup-family.ll:declare dso_local noalias noundef ptr @malloc(i64 noundef) local_unnamed_addr
+test/Transforms/InstCombine/allocsize.ll:declare ptr @my_malloc(ptr, i32) allocsize(1)
+test/Transforms/InstCombine/allocsize.ll:define void @test_malloc(ptr %p, ptr %r) {
+test/Transforms/InstCombine/allocsize.ll:; CHECK-LABEL: define void @test_malloc(
+test/Transforms/InstCombine/allocsize.ll:; CHECK-NEXT:    [[TMP1:%.*]] = call dereferenceable_or_null(100) ptr @my_malloc(ptr null, i32 100)
+test/Transforms/InstCombine/allocsize.ll:  %1 = call ptr @my_malloc(ptr null, i32 100)
+test/Transforms/InstCombine/allocsize.ll:define void @test_malloc_fails(ptr %p, ptr %r, i32 %n) {
+test/Transforms/InstCombine/allocsize.ll:; CHECK-LABEL: define void @test_malloc_fails(
+test/Transforms/InstCombine/allocsize.ll:; CHECK-NEXT:    [[TMP1:%.*]] = call ptr @my_malloc(ptr null, i32 [[N]])
+test/Transforms/InstCombine/allocsize.ll:  %1 = call ptr @my_malloc(ptr null, i32 %n)
+test/Transforms/InstCombine/allocsize.ll:declare ptr @my_malloc_outofline(ptr, i32) #0
+test/Transforms/InstCombine/allocsize.ll:; CHECK-NEXT:    [[TMP1:%.*]] = call dereferenceable_or_null(100) ptr @my_malloc_outofline(ptr null, i32 100)
+test/Transforms/InstCombine/allocsize.ll:  %1 = call ptr @my_malloc_outofline(ptr null, i32 100)
+test/Transforms/InstCombine/allocsize.ll:declare ptr @my_malloc_i64(ptr, i64) #0
+test/Transforms/InstCombine/allocsize.ll:; CHECK-NEXT:    [[BIG_MALLOC_I64:%.*]] = call dereferenceable_or_null(8589934592) ptr @my_malloc_i64(ptr null, i64 8589934592)
+test/Transforms/InstCombine/allocsize.ll:  %big_malloc = call ptr @my_calloc(ptr null, ptr null, i32 2147483649, i32 2)
+test/Transforms/InstCombine/allocsize.ll:  store ptr %big_malloc, ptr %p, align 8
+test/Transforms/InstCombine/allocsize.ll:  %1 = call i32 @llvm.objectsize.i32.p0(ptr %big_malloc, i1 false)
+test/Transforms/InstCombine/allocsize.ll:  %big_little_malloc = call ptr @my_tiny_calloc(ptr null, ptr null, i8 127, i8 4)
+test/Transforms/InstCombine/allocsize.ll:  store ptr %big_little_malloc, ptr %p, align 8
+test/Transforms/InstCombine/allocsize.ll:  %2 = call i32 @llvm.objectsize.i32.p0(ptr %big_little_malloc, i1 false)
+test/Transforms/InstCombine/allocsize.ll:  ; malloc(2**33)
+test/Transforms/InstCombine/allocsize.ll:  %big_malloc_i64 = call ptr @my_malloc_i64(ptr null, i64 8589934592)
+test/Transforms/InstCombine/allocsize.ll:  store ptr %big_malloc_i64, ptr %p, align 8
+test/Transforms/InstCombine/allocsize.ll:  %3 = call i32 @llvm.objectsize.i32.p0(ptr %big_malloc_i64, i1 false)
+test/Transforms/InstCombine/allocsize.ll:  %4 = call i64 @llvm.objectsize.i64.p0(ptr %big_malloc_i64, i1 false)
+test/Transforms/InstCombine/allocsize.ll:; CHECK-NEXT:    [[TMP1:%.*]] = call dereferenceable_or_null(100) ptr @my_malloc(ptr null, i32 100) #[[ATTR3:[0-9]+]]
+test/Transforms/InstCombine/allocsize.ll:  %1 = call ptr @my_malloc(ptr null, i32 100) nobuiltin
+test/Transforms/InstCombine/memset_chk-1.ll:; FIXME: memset(malloc(x), 0, x) -> calloc(1, x)
+test/Transforms/InstCombine/memset_chk-1.ll:; CHECK-NEXT:    [[CALL:%.*]] = tail call ptr @malloc(i64 [[SIZE:%.*]]) #[[ATTR3:[0-9]+]]
+test/Transforms/InstCombine/memset_chk-1.ll:  %call = tail call ptr @malloc(i64 %size) #1
+test/Transforms/InstCombine/memset_chk-1.ll:declare noalias ptr @malloc(i64) #1
+test/Transforms/InstCombine/allocsize-32.ll:declare ptr @my_malloc(ptr, i64) allocsize(1)
+test/Transforms/InstCombine/allocsize-32.ll:define void @test_malloc(ptr %p, ptr %r) {
+test/Transforms/InstCombine/allocsize-32.ll:  %1 = call ptr @my_malloc(ptr null, i64 100)
+test/Transforms/InstCombine/allocsize-32.ll:  %3 = call ptr @my_malloc(ptr null, i64 5000000000)
+test/Transforms/InstCombine/2008-05-08-StrLenSink.ll:	%tmp1 = call ptr @malloc( i32 10 ) nounwind 		; <ptr> [#uses=5]
+test/Transforms/InstCombine/2008-05-08-StrLenSink.ll:declare ptr @malloc(i32) nounwind 
+test/Transforms/InstCombine/builtin-dynamic-object-size.ll:  %call = tail call ptr @malloc(i64 %sz)
+test/Transforms/InstCombine/builtin-dynamic-object-size.ll:  %first_call = call ptr @malloc(i64 10)
+test/Transforms/InstCombine/builtin-dynamic-object-size.ll:  %second_call = call ptr @malloc(i64 30)
+test/Transforms/InstCombine/builtin-dynamic-object-size.ll:  %ptr = call ptr @malloc(i64 %sz)
+test/Transforms/InstCombine/builtin-dynamic-object-size.ll:; CHECK-NEXT:    [[OBJ:%.*]] = call ptr @malloc(i64 [[ALLOC]])
+test/Transforms/InstCombine/builtin-dynamic-object-size.ll:  %obj = call ptr @malloc(i64 %alloc)
+test/Transforms/InstCombine/builtin-dynamic-object-size.ll:; CHECK-NEXT:    [[OBJ:%.*]] = call ptr @malloc(i64 [[SZ]])
+test/Transforms/InstCombine/builtin-dynamic-object-size.ll:  %obj = call ptr @malloc(i64 %sz)
+test/Transforms/InstCombine/builtin-dynamic-object-size.ll:  %p0 = tail call ptr @malloc(i64 64)
+test/Transforms/InstCombine/builtin-dynamic-object-size.ll:  %p0 = tail call ptr @malloc(i64 64)
+test/Transforms/InstCombine/builtin-dynamic-object-size.ll:declare ptr @malloc(i64) nounwind allocsize(0) allockind("alloc,uninitialized") "alloc-family"="malloc"
+test/Transforms/InstCombine/builtin-dynamic-object-size.ll:declare void @free(ptr nocapture) nounwind allockind("free") "alloc-family"="malloc"
+test/Transforms/InstCombine/deref-alloc-fns.ll:declare noalias ptr @malloc(i64) allockind("alloc,uninitialized") allocsize(0) "alloc-family"="malloc"
+test/Transforms/InstCombine/deref-alloc-fns.ll:declare noalias ptr @calloc(i64, i64) allockind("alloc,zeroed") allocsize(0,1) "alloc-family"="malloc"
+test/Transforms/InstCombine/deref-alloc-fns.ll:declare noalias ptr @realloc(ptr nocapture, i64) allockind("realloc") allocsize(1) "alloc-family"="malloc"
+test/Transforms/InstCombine/deref-alloc-fns.ll:declare noalias ptr @aligned_alloc(i64 allocalign, i64) allockind("alloc,uninitialized,aligned") allocsize(1) "alloc-family"="malloc"
+test/Transforms/InstCombine/deref-alloc-fns.ll:declare ptr @my_malloc(i64) allocsize(0)
+test/Transforms/InstCombine/deref-alloc-fns.ll:define noalias ptr @malloc_nonconstant_size(i64 %n) {
+test/Transforms/InstCombine/deref-alloc-fns.ll:; CHECK-LABEL: @malloc_nonconstant_size(
+test/Transforms/InstCombine/deref-alloc-fns.ll:; CHECK-NEXT:    [[CALL:%.*]] = tail call noalias ptr @malloc(i64 [[N:%.*]])
+test/Transforms/InstCombine/deref-alloc-fns.ll:  %call = tail call noalias ptr @malloc(i64 %n)
+test/Transforms/InstCombine/deref-alloc-fns.ll:define noalias ptr @malloc_constant_size() {
+test/Transforms/InstCombine/deref-alloc-fns.ll:; CHECK-LABEL: @malloc_constant_size(
+test/Transforms/InstCombine/deref-alloc-fns.ll:; CHECK-NEXT:    [[CALL:%.*]] = tail call noalias dereferenceable_or_null(40) ptr @malloc(i64 40)
+test/Transforms/InstCombine/deref-alloc-fns.ll:  %call = tail call noalias ptr @malloc(i64 40)
+test/Transforms/InstCombine/deref-alloc-fns.ll:define noalias ptr @malloc_constant_size2() {
+test/Transforms/InstCombine/deref-alloc-fns.ll:; CHECK-LABEL: @malloc_constant_size2(
+test/Transforms/InstCombine/deref-alloc-fns.ll:; CHECK-NEXT:    [[CALL:%.*]] = tail call noalias dereferenceable_or_null(40) ptr @malloc(i64 40)
+test/Transforms/InstCombine/deref-alloc-fns.ll:  %call = tail call noalias dereferenceable_or_null(80) ptr @malloc(i64 40)
+test/Transforms/InstCombine/deref-alloc-fns.ll:define noalias ptr @malloc_constant_size3() {
+test/Transforms/InstCombine/deref-alloc-fns.ll:; CHECK-LABEL: @malloc_constant_size3(
+test/Transforms/InstCombine/deref-alloc-fns.ll:; CHECK-NEXT:    [[CALL:%.*]] = tail call noalias dereferenceable(80) dereferenceable_or_null(40) ptr @malloc(i64 40)
+test/Transforms/InstCombine/deref-alloc-fns.ll:  %call = tail call noalias dereferenceable(80) ptr @malloc(i64 40)
+test/Transforms/InstCombine/deref-alloc-fns.ll:define noalias ptr @malloc_constant_zero_size() {
+test/Transforms/InstCombine/deref-alloc-fns.ll:; CHECK-LABEL: @malloc_constant_zero_size(
+test/Transforms/InstCombine/deref-alloc-fns.ll:; CHECK-NEXT:    [[CALL:%.*]] = tail call noalias ptr @malloc(i64 0)
+test/Transforms/InstCombine/deref-alloc-fns.ll:  %call = tail call noalias ptr @malloc(i64 0)
+test/Transforms/InstCombine/deref-alloc-fns.ll:define ptr @my_malloc_constant_size() {
+test/Transforms/InstCombine/deref-alloc-fns.ll:; CHECK-LABEL: @my_malloc_constant_size(
+test/Transforms/InstCombine/deref-alloc-fns.ll:; CHECK-NEXT:    [[CALL:%.*]] = call dereferenceable_or_null(32) ptr @my_malloc(i64 32)
+test/Transforms/InstCombine/deref-alloc-fns.ll:  %call = call ptr @my_malloc(i64 32)
+test/Transforms/InstCombine/badmalloc.ll:declare noalias ptr @malloc(i64) nounwind allockind("alloc,uninitialized") "alloc-family"="malloc"
+test/Transforms/InstCombine/badmalloc.ll:declare void @free(ptr) allockind("free") "alloc-family"="malloc"
+test/Transforms/InstCombine/badmalloc.ll:  %A = call noalias ptr @malloc(i64 4) nounwind
+test/Transforms/InstCombine/badmalloc.ll:; CHECK: @malloc
+test/Transforms/InstCombine/badmalloc.ll:  %A = call noalias ptr @malloc(i64 4) nounwind
+test/Transforms/InstCombine/malloc-free-addrspace.ll:define i64 @remove_malloc() addrspace(200) {
+test/Transforms/InstCombine/malloc-free-addrspace.ll:; CHECK-LABEL: define {{[^@]+}}@remove_malloc() addrspace(200) {
+test/Transforms/InstCombine/malloc-free-addrspace.ll:  %call = call align 16 ptr addrspace(200) @malloc(i64 4)
+test/Transforms/InstCombine/malloc-free-addrspace.ll:declare noalias ptr addrspace(200) @calloc(i64, i64) addrspace(200)  nounwind allockind("alloc,zeroed") allocsize(0,1) "alloc-family"="malloc"
+test/Transforms/InstCombine/malloc-free-addrspace.ll:declare noalias ptr addrspace(200) @malloc(i64) addrspace(200) allockind("alloc,uninitialized") allocsize(0) "alloc-family"="malloc"
+test/Transforms/InstCombine/malloc-free-addrspace.ll:declare noalias ptr addrspace(200) @aligned_alloc(i64, i64) addrspace(200) allockind("alloc,uninitialized,aligned") allocsize(1) "alloc-family"="malloc"
+test/Transforms/InstCombine/malloc-free-addrspace.ll:declare void @free(ptr addrspace(200)) addrspace(200) allockind("free") "alloc-family"="malloc"
+test/Transforms/InstCombine/2008-05-08-LiveStoreDelete.ll:	%tmp1 = call ptr @malloc( i32 10 ) nounwind 		; <ptr> [#uses=5]
+test/Transforms/InstCombine/2008-05-08-LiveStoreDelete.ll:declare ptr @malloc(i32) nounwind 
+test/Transforms/InstCombine/compare-unescaped.ll:declare noalias ptr @malloc(i64) allockind("alloc,uninitialized") allocsize(0)
+test/Transforms/InstCombine/compare-unescaped.ll:  %m = call ptr @malloc(i64 4)
+test/Transforms/InstCombine/compare-unescaped.ll:  %m = call ptr @malloc(i64 4)
+test/Transforms/InstCombine/compare-unescaped.ll:; we cannot remove the alloc site: call to malloc
+test/Transforms/InstCombine/compare-unescaped.ll:; The comparison should fold to false irrespective of whether the call to malloc can be elided or not
+test/Transforms/InstCombine/compare-unescaped.ll:; CHECK-NEXT:    [[M:%.*]] = call dereferenceable_or_null(24) ptr @malloc(i64 24)
+test/Transforms/InstCombine/compare-unescaped.ll:  %m = call ptr @malloc(i64 24)
+test/Transforms/InstCombine/compare-unescaped.ll:; CHECK-NEXT:    [[M:%.*]] = call dereferenceable_or_null(24) ptr @malloc(i64 24)
+test/Transforms/InstCombine/compare-unescaped.ll:  %m = call ptr @malloc(i64 24)
+test/Transforms/InstCombine/compare-unescaped.ll:; CHECK-NEXT:    [[M:%.*]] = call dereferenceable_or_null(24) ptr @malloc(i64 24)
+test/Transforms/InstCombine/compare-unescaped.ll:  %m = call ptr @malloc(i64 24)
+test/Transforms/InstCombine/compare-unescaped.ll:; CHECK-NEXT:    [[M:%.*]] = call dereferenceable_or_null(24) ptr @malloc(i64 24)
+test/Transforms/InstCombine/compare-unescaped.ll:  %m = call ptr @malloc(i64 24)
+test/Transforms/InstCombine/compare-unescaped.ll:define i1 @compare_distinct_mallocs() {
+test/Transforms/InstCombine/compare-unescaped.ll:; CHECK-LABEL: @compare_distinct_mallocs(
+test/Transforms/InstCombine/compare-unescaped.ll:  %m = call ptr @malloc(i64 4)
+test/Transforms/InstCombine/compare-unescaped.ll:  %n = call ptr @malloc(i64 4)
+test/Transforms/InstCombine/compare-unescaped.ll:; call to malloc and the bitcast instructions are elided after that since there are no uses of the malloc
+test/Transforms/InstCombine/compare-unescaped.ll:  %m = call ptr @malloc(i64 4)
+test/Transforms/InstCombine/compare-unescaped.ll:; The malloc call for %m cannot be elided since it is used in the call to function f.
+test/Transforms/InstCombine/compare-unescaped.ll:; CHECK-NEXT:    [[M:%.*]] = call dereferenceable_or_null(4) ptr @malloc(i64 4)
+test/Transforms/InstCombine/compare-unescaped.ll:  %m = call ptr @malloc(i64 4)
+test/Transforms/InstCombine/compare-unescaped.ll:; CHECK-NEXT:    [[M:%.*]] = call dereferenceable_or_null(4) ptr @malloc(i64 4)
+test/Transforms/InstCombine/compare-unescaped.ll:; CHECK-NEXT:    [[N:%.*]] = call dereferenceable_or_null(4) ptr @malloc(i64 4)
+test/Transforms/InstCombine/compare-unescaped.ll:  %m = call ptr @malloc(i64 4)
+test/Transforms/InstCombine/compare-unescaped.ll:  %n = call ptr @malloc(i64 4)
+test/Transforms/InstCombine/compare-unescaped.ll:; The malloc call for %m cannot be elided since it is used in the call to function f.
+test/Transforms/InstCombine/compare-unescaped.ll:; CHECK-NEXT:    [[M:%.*]] = call dereferenceable_or_null(4) ptr @malloc(i64 4)
+test/Transforms/InstCombine/compare-unescaped.ll:  %m = call ptr @malloc(i64 4)
+test/Transforms/InstCombine/compare-unescaped.ll:  %n = call ptr @malloc(i64 4)
+test/Transforms/InstCombine/compare-unescaped.ll:; a malloc call, but we can't simultanious assume two different ones.  As a
+test/Transforms/InstCombine/compare-unescaped.ll:; the same point that applies to allocas, applied to noaiias/malloc.
+test/Transforms/InstCombine/compare-unescaped.ll:; CHECK-NEXT:    [[M:%.*]] = call dereferenceable_or_null(4) ptr @malloc(i64 4)
+test/Transforms/InstCombine/compare-unescaped.ll:  %m = call ptr @malloc(i64 4)
+test/Transforms/InstCombine/compare-unescaped.ll:  %m = call ptr @malloc(i64 4)
+test/Transforms/InstCombine/compare-unescaped.ll:  %n = call ptr @malloc(i64 4)
+test/Transforms/InstCombine/compare-unescaped.ll:; CHECK-NEXT:    [[M:%.*]] = call dereferenceable_or_null(4) ptr @malloc(i64 4)
+test/Transforms/InstCombine/compare-unescaped.ll:  %m = call ptr @malloc(i64 4)
+test/Transforms/InstCombine/compare-unescaped.ll:; CHECK-NEXT:    [[M:%.*]] = call dereferenceable_or_null(4) ptr @malloc(i64 4)
+test/Transforms/InstCombine/compare-unescaped.ll:; CHECK-NEXT:    [[N:%.*]] = call dereferenceable_or_null(4) ptr @malloc(i64 4)
+test/Transforms/InstCombine/compare-unescaped.ll:  %m = call ptr @malloc(i64 4)
+test/Transforms/InstCombine/compare-unescaped.ll:  %n = call ptr @malloc(i64 4)
+test/Transforms/InstCombine/compare-unescaped.ll:; CHECK-NEXT:    [[M:%.*]] = call dereferenceable_or_null(4) ptr @malloc(i64 4)
+test/Transforms/InstCombine/compare-unescaped.ll:  %m = call ptr @malloc(i64 4)
+test/Transforms/InstCombine/compare-unescaped.ll:  %m = call ptr @malloc(i64 4)
+test/Transforms/InstCombine/compare-unescaped.ll:; CHECK-NEXT:    [[M:%.*]] = call dereferenceable_or_null(4) ptr @malloc(i64 4)
+test/Transforms/InstCombine/compare-unescaped.ll:  %m = call ptr @malloc(i64 4)
+test/Transforms/InstCombine/compare-unescaped.ll:; CHECK-NEXT:    [[M:%.*]] = call dereferenceable_or_null(4) ptr @malloc(i64 4)
+test/Transforms/InstCombine/compare-unescaped.ll:  %m = call ptr @malloc(i64 4)
+test/Transforms/InstCombine/compare-unescaped.ll:  %n = call ptr @malloc(i64 4)
+test/Transforms/InstCombine/compare-unescaped.ll:; CHECK-NEXT:    [[M:%.*]] = call dereferenceable_or_null(4) ptr @malloc(i64 4)
+test/Transforms/InstCombine/compare-unescaped.ll:  %m = call ptr @malloc(i64 4)
+test/Transforms/InstCombine/compare-unescaped.ll:define i1 @two_nonnull_mallocs() {
+test/Transforms/InstCombine/compare-unescaped.ll:; CHECK-LABEL: @two_nonnull_mallocs(
+test/Transforms/InstCombine/compare-unescaped.ll:  %m = call nonnull ptr @malloc(i64 4)
+test/Transforms/InstCombine/compare-unescaped.ll:  %n = call nonnull ptr @malloc(i64 4)
+test/Transforms/InstCombine/compare-unescaped.ll:define i1 @two_nonnull_mallocs2() {
+test/Transforms/InstCombine/compare-unescaped.ll:; CHECK-LABEL: @two_nonnull_mallocs2(
+test/Transforms/InstCombine/compare-unescaped.ll:; CHECK-NEXT:    [[N:%.*]] = call nonnull dereferenceable(4) ptr @malloc(i64 4)
+test/Transforms/InstCombine/compare-unescaped.ll:  %m = call nonnull ptr @malloc(i64 4)
+test/Transforms/InstCombine/compare-unescaped.ll:  %n = call nonnull ptr @malloc(i64 4)
+test/Transforms/InstCombine/compare-unescaped.ll:define i1 @two_nonnull_mallocs_hidden() {
+test/Transforms/InstCombine/compare-unescaped.ll:; CHECK-LABEL: @two_nonnull_mallocs_hidden(
+test/Transforms/InstCombine/compare-unescaped.ll:; CHECK-NEXT:    [[M:%.*]] = call nonnull dereferenceable(4) ptr @malloc(i64 4)
+test/Transforms/InstCombine/compare-unescaped.ll:; CHECK-NEXT:    [[N:%.*]] = call nonnull dereferenceable(4) ptr @malloc(i64 4)
+test/Transforms/InstCombine/compare-unescaped.ll:  %m = call nonnull ptr @malloc(i64 4)
+test/Transforms/InstCombine/compare-unescaped.ll:  %n = call nonnull ptr @malloc(i64 4)
+test/Transforms/NewGVN/nonescaping-malloc-xfail.ll:; alias the malloc'd value %tmp.i20.i.i, which it can do since %tmp7.i
+test/Transforms/NewGVN/nonescaping-malloc-xfail.ll:declare noalias ptr @malloc(i64) nounwind
+test/Transforms/NewGVN/nonescaping-malloc-xfail.ll:  %tmp.i20.i.i = tail call noalias ptr @malloc(i64 %tmp8.i.i) nounwind
+test/Transforms/NewGVN/calloc-load-removal.ll:declare noalias ptr @calloc(i64, i64) mustprogress nofree nounwind willreturn allockind("alloc,zeroed") allocsize(0,1) "alloc-family"="malloc"
+test/Transforms/NewGVN/malloc-load-removal.ll:declare ptr @malloc(i64) nounwind allockind("alloc,uninitialized") allocsize(0) "alloc-family"="malloc"
+test/Transforms/NewGVN/malloc-load-removal.ll:; CHECK-NEXT:    [[CALL:%.*]] = tail call ptr @malloc(i64 100) #[[ATTR2:[0-9]+]]
+test/Transforms/NewGVN/malloc-load-removal.ll:  %call = tail call ptr @malloc(i64 100) nounwind
+test/Transforms/NewGVN/malloc-load-removal.ll:declare ptr @aligned_alloc(i64 allocalign, i64) nounwind allockind("alloc,uninitialized,aligned") allocsize(1) "alloc-family"="malloc"
+test/Transforms/NewGVN/2009-11-12-MemDepMallocBitCast.ll:; Test to make sure malloc's bitcast does not block detection of a store
+test/Transforms/NewGVN/2009-11-12-MemDepMallocBitCast.ll:; CHECK-NEXT:    [[TMP1:%.*]] = tail call ptr @malloc(i64 [[MUL]])
+test/Transforms/NewGVN/2009-11-12-MemDepMallocBitCast.ll:  %1 = tail call ptr @malloc(i64 %mul)
+test/Transforms/NewGVN/2009-11-12-MemDepMallocBitCast.ll:declare noalias ptr @malloc(i64)
+test/Transforms/InstSimplify/call.ll:define ptr @malloc_can_return_null() {
+test/Transforms/InstSimplify/call.ll:; CHECK-LABEL: @malloc_can_return_null(
+test/Transforms/InstSimplify/call.ll:; CHECK-NEXT:    [[CALL:%.*]] = tail call noalias ptr @malloc(i64 8)
+test/Transforms/InstSimplify/call.ll:  %call = tail call noalias ptr @malloc(i64 8)
+test/Transforms/InstSimplify/call.ll:declare noalias ptr @malloc(i64)
+test/Transforms/InstSimplify/call.ll:; CHECK-NEXT:    [[A:%.*]] = call noalias ptr @malloc(i64 8)
+test/Transforms/InstSimplify/call.ll:  %a = call noalias ptr @malloc(i64 8)
+test/Transforms/MetaRenamer/metarenamer.ll:declare noalias ptr @malloc(i64)
+test/Transforms/MetaRenamer/metarenamer.ll:; CHECK-NEXT:    [[TMP:%.*]] = call ptr @malloc(i64 23)
+test/Transforms/MetaRenamer/metarenamer.ll:  %x = call ptr @malloc(i64 23)
+test/Transforms/JumpThreading/phi-eq.ll:declare noalias ptr @g_malloc(i64)
+test/Transforms/JumpThreading/phi-eq.ll:  %call.i7 = call noalias ptr @g_malloc(i64 16) nounwind
+test/Transforms/MemCpyOpt/stackrestore.ll:; CHECK-NEXT:    [[HEAP:%.*]] = call ptr @malloc(i32 9)
+test/Transforms/MemCpyOpt/stackrestore.ll:  %heap = call ptr @malloc(i32 9)
+test/Transforms/MemCpyOpt/stackrestore.ll:; CHECK-NEXT:    [[HEAP:%.*]] = call ptr @malloc(i32 9)
+test/Transforms/MemCpyOpt/stackrestore.ll:  %heap = call ptr @malloc(i32 9)
+test/Transforms/MemCpyOpt/stackrestore.ll:declare ptr @malloc(i32)
+test/Transforms/MemCpyOpt/memmove.ll:; CHECK-NEXT:    [[MALLOCCALL:%.*]] = tail call ptr @malloc(i32 [[TRUNC]])
+test/Transforms/MemCpyOpt/memmove.ll:  %malloccall = tail call ptr @malloc(i32 %trunc)
+test/Transforms/MemCpyOpt/memmove.ll:  tail call void @llvm.memmove.p0.p0.i64(ptr %malloccall, ptr %src, i64 13, i1 false)
+test/Transforms/MemCpyOpt/memmove.ll:  ret ptr %malloccall
+test/Transforms/MemCpyOpt/memmove.ll:declare noalias ptr @malloc(i32)
+test/Transforms/MemCpyOpt/preserve-memssa.ll:declare ptr @malloc(i64)
+test/Transforms/MemCpyOpt/preserve-memssa.ll:; CHECK-NEXT:    [[CALL_I_I_I:%.*]] = tail call ptr @malloc(i64 20)
+test/Transforms/MemCpyOpt/preserve-memssa.ll:  %call.i.i.i = tail call ptr @malloc(i64 20)
+test/Transforms/MemCpyOpt/memcpy.ll:  %A = tail call ptr @malloc(i32 10)
+test/Transforms/MemCpyOpt/memcpy.ll:  %C = tail call ptr @malloc(i32 10)
+test/Transforms/MemCpyOpt/memcpy.ll:declare noalias ptr @malloc(i32) willreturn allockind("alloc,uninitialized") allocsize(0)
+test/Transforms/MemCpyOpt/memset-memcpy-oversized.ll:define void @test_malloc_with_lifetimes(ptr %result) {
+test/Transforms/MemCpyOpt/memset-memcpy-oversized.ll:; CHECK-LABEL: @test_malloc_with_lifetimes(
+test/Transforms/MemCpyOpt/memset-memcpy-oversized.ll:; CHECK-NEXT:    [[A:%.*]] = call ptr @malloc(i64 16)
+test/Transforms/MemCpyOpt/memset-memcpy-oversized.ll:  %a = call ptr @malloc(i64 16)
+test/Transforms/MemCpyOpt/memset-memcpy-oversized.ll:; CHECK-NEXT:    [[A:%.*]] = call ptr @malloc(i64 16)
+test/Transforms/MemCpyOpt/memset-memcpy-oversized.ll:  %a = call ptr @malloc(i64 16)
+test/Transforms/MemCpyOpt/memset-memcpy-oversized.ll:declare ptr @malloc(i64)
+test/Transforms/MemCpyOpt/aggregate-type-crash.ll:declare noalias ptr @my_malloc(ptr) #0
+test/Transforms/MemCpyOpt/aggregate-type-crash.ll:; CHECK-NEXT:    [[TMP2:%.*]] = call ptr @my_malloc(ptr [[TMP0]])
+test/Transforms/MemCpyOpt/aggregate-type-crash.ll:  %2 = call ptr @my_malloc(ptr %0)
+test/Transforms/SCCP/conditions-iter-order.ll:declare noalias ptr @malloc(i64)
+test/Transforms/SCCP/conditions-iter-order.ll:; CHECK-NEXT:    [[TMP:%.*]] = call ptr @malloc(i64 10368)
+test/Transforms/SCCP/conditions-iter-order.ll:  %tmp = call ptr @malloc(i64 10368)
+test/Transforms/LowerConstantIntrinsics/builtin-object-size-range.ll:declare noalias ptr @malloc(i64 noundef) #0
+test/Transforms/LowerConstantIntrinsics/builtin-object-size-range.ll:define i64 @select_malloc_size(i1 %cond) {
+test/Transforms/LowerConstantIntrinsics/builtin-object-size-range.ll:; CHECK-LABEL: @select_malloc_size(
+test/Transforms/LowerConstantIntrinsics/builtin-object-size-range.ll:; CHECK-NEXT:    [[PTR:%.*]] = call noalias ptr @malloc(i64 noundef [[SIZE]])
+test/Transforms/LowerConstantIntrinsics/builtin-object-size-range.ll:  %ptr = call noalias ptr @malloc(i64 noundef %size)
+test/Transforms/LowerConstantIntrinsics/builtin-object-size-phi.ll:declare dso_local noalias noundef ptr @malloc(i64 noundef) local_unnamed_addr allocsize(0)
+test/Transforms/LowerConstantIntrinsics/builtin-object-size-phi.ll:; CHECK-NEXT:    [[MALLOCED:%.*]] = call noalias dereferenceable_or_null(8) ptr @malloc(i64 noundef 8)
+test/Transforms/LowerConstantIntrinsics/builtin-object-size-phi.ll:  %malloced = call noalias dereferenceable_or_null(8) ptr @malloc(i64 noundef 8)
+test/Transforms/LowerConstantIntrinsics/builtin-object-size-phi.ll:  %p = phi ptr [ %malloced, %if.else ], [ @buffer, %entry ]
+test/Transforms/LowerConstantIntrinsics/builtin-object-size-phi.ll:; CHECK-NEXT:    [[MALLOCED:%.*]] = call noalias dereferenceable_or_null(8) ptr @malloc(i64 noundef 8)
+test/Transforms/LowerConstantIntrinsics/builtin-object-size-phi.ll:  %malloced = call noalias dereferenceable_or_null(8) ptr @malloc(i64 noundef 8)
+test/Transforms/LowerConstantIntrinsics/builtin-object-size-phi.ll:  %p = phi ptr [ %malloced, %if.else ], [ @buffer, %entry ]
+test/Transforms/LowerConstantIntrinsics/objectsize_basic.ll:declare ptr @malloc(i64) allocsize(0)
+test/Transforms/LowerConstantIntrinsics/objectsize_basic.ll:define i64 @test_objectsize_malloc() {
+test/Transforms/LowerConstantIntrinsics/objectsize_basic.ll:; CHECK-LABEL: @test_objectsize_malloc(
+test/Transforms/LowerConstantIntrinsics/objectsize_basic.ll:; CHECK-NEXT:    [[PTR:%.*]] = call ptr @malloc(i64 16)
+test/Transforms/LowerConstantIntrinsics/objectsize_basic.ll:  %ptr = call ptr @malloc(i64 16)
+test/Transforms/LowerConstantIntrinsics/objectsize_basic.ll:define i64 @large_malloc() {
+test/Transforms/LowerConstantIntrinsics/objectsize_basic.ll:; CHECK-LABEL: @large_malloc(
+test/Transforms/LowerConstantIntrinsics/objectsize_basic.ll:; CHECK-NEXT:    [[OBJ:%.*]] = call ptr @malloc(i64 -9223372036854775808)
+test/Transforms/LowerConstantIntrinsics/objectsize_basic.ll:  %obj = call ptr @malloc(i64 9223372036854775808)
+test/Feature/recursivetype.ll:;   *L = (list*)malloc(sizeof(list));
+test/Feature/recursivetype.ll:declare ptr @malloc(i32)
+test/Feature/recursivetype.ll:        %reg111 = call ptr @malloc( i32 16 )            ; <ptr> [#uses=3]
+test/Feature/OperandBundles/dse.ll:declare noalias ptr @malloc(i32) nounwind
+test/Feature/OperandBundles/dse.ll:  %m = call ptr @malloc(i32 24)
+test/Feature/OperandBundles/dse.ll:  %m = call ptr @malloc(i32 24)
+test/Feature/OperandBundles/dse.ll:  %m = call ptr @malloc(i32 24)
+test/Feature/OperandBundles/dse.ll:  %m = call ptr @malloc(i32 24)
+test/DebugInfo/X86/debug-loc-asan.mir:# The address of the (potentially now malloc'ed) alloca ends up
+test/DebugInfo/X86/debug-loc-asan.mir:    %4 = call i64 @__asan_stack_malloc_0(i64 64, i64 %0)
+test/DebugInfo/X86/debug-loc-asan.mir:  declare i64 @__asan_stack_malloc_0(i64, i64)
+test/DebugInfo/X86/debug-loc-asan.mir:    CALL64pcrel32 @__asan_stack_malloc_0, csr_64, implicit $rsp, implicit $ssp, implicit killed $rdi, implicit killed $rsi, implicit-def $rax
+test/DebugInfo/X86/dbg_value_direct.ll:declare i64 @__asan_stack_malloc(i64, i64)
+grep: test/DebugInfo/PDB/Inputs/big-read.pdb: binary file matches
+test/DebugInfo/PDB/pdbdump-headers.test:BIG-NEXT:              - (MD5: 928553F8BA198C9030B65FA10B6B3DD2) f:\dd\externalapis\unifiedcrt\inc\malloc.h
+test/DebugInfo/PDB/pdbdump-headers.test:BIG-NEXT:              - (MD5: 928553F8BA198C9030B65FA10B6B3DD2) f:\dd\externalapis\unifiedcrt\inc\malloc.h
+test/DebugInfo/PDB/pdbdump-headers.test:BIG-NEXT:              - (MD5: 928553F8BA198C9030B65FA10B6B3DD2) f:\dd\externalapis\unifiedcrt\inc\malloc.h
+test/DebugInfo/PDB/pdbdump-headers.test:BIG-NEXT:              - (MD5: 928553F8BA198C9030B65FA10B6B3DD2) f:\dd\externalapis\unifiedcrt\inc\malloc.h
+test/DebugInfo/PDB/pdbdump-headers.test:BIG-NEXT:              - (MD5: 928553F8BA198C9030B65FA10B6B3DD2) f:\dd\externalapis\unifiedcrt\inc\malloc.h
+test/DebugInfo/PDB/pdbdump-headers.test:BIG-NEXT:              - (MD5: 928553F8BA198C9030B65FA10B6B3DD2) f:\dd\externalapis\unifiedcrt\inc\malloc.h
+test/DebugInfo/PDB/pdbdump-headers.test:BIG-NEXT:              - (MD5: 928553F8BA198C9030B65FA10B6B3DD2) f:\dd\externalapis\unifiedcrt\inc\malloc.h
+test/DebugInfo/PDB/pdbdump-headers.test:BIG-NEXT:              - (MD5: 928553F8BA198C9030B65FA10B6B3DD2) f:\dd\externalapis\unifiedcrt\inc\malloc.h
+test/DebugInfo/PDB/pdbdump-headers.test:BIG-NEXT:              - (MD5: 928553F8BA198C9030B65FA10B6B3DD2) f:\dd\externalapis\unifiedcrt\inc\malloc.h
+test/DebugInfo/PDB/pdbdump-headers.test:BIG-NEXT:              - (MD5: 928553F8BA198C9030B65FA10B6B3DD2) f:\dd\externalapis\unifiedcrt\inc\malloc.h
+test/DebugInfo/PDB/pdbdump-headers.test:BIG-NEXT:              - (MD5: 928553F8BA198C9030B65FA10B6B3DD2) f:\dd\externalapis\unifiedcrt\inc\malloc.h
+test/DebugInfo/PDB/pdbdump-headers.test:BIG-NEXT:              - (MD5: 928553F8BA198C9030B65FA10B6B3DD2) f:\dd\externalapis\unifiedcrt\inc\malloc.h
+test/DebugInfo/PDB/pdbdump-headers.test:BIG-NEXT:              - (MD5: 928553F8BA198C9030B65FA10B6B3DD2) f:\dd\externalapis\unifiedcrt\inc\malloc.h
+test/DebugInfo/Generic/incorrect-variable-debugloc.ll:  %4 = call i64 @__asan_stack_malloc_0(i64 64, i64 %0), !dbg !39
+test/DebugInfo/Generic/incorrect-variable-debugloc.ll:  %4 = call i64 @__asan_stack_malloc_0(i64 64, i64 %0), !dbg !48
+test/DebugInfo/Generic/incorrect-variable-debugloc.ll:declare i64 @__asan_stack_malloc_0(i64, i64)
+test/DebugInfo/Generic/incorrect-variable-debugloc.ll:declare i64 @__asan_stack_malloc_1(i64, i64)
+test/DebugInfo/Generic/incorrect-variable-debugloc.ll:declare i64 @__asan_stack_malloc_2(i64, i64)
+test/DebugInfo/Generic/incorrect-variable-debugloc.ll:declare i64 @__asan_stack_malloc_3(i64, i64)
+test/DebugInfo/Generic/incorrect-variable-debugloc.ll:declare i64 @__asan_stack_malloc_4(i64, i64)
+test/DebugInfo/Generic/incorrect-variable-debugloc.ll:declare i64 @__asan_stack_malloc_5(i64, i64)
+test/DebugInfo/Generic/incorrect-variable-debugloc.ll:declare i64 @__asan_stack_malloc_6(i64, i64)
+test/DebugInfo/Generic/incorrect-variable-debugloc.ll:declare i64 @__asan_stack_malloc_7(i64, i64)
+test/DebugInfo/Generic/incorrect-variable-debugloc.ll:declare i64 @__asan_stack_malloc_8(i64, i64)
+test/DebugInfo/Generic/incorrect-variable-debugloc.ll:declare i64 @__asan_stack_malloc_9(i64, i64)
+test/DebugInfo/Generic/incorrect-variable-debugloc.ll:declare i64 @__asan_stack_malloc_10(i64, i64)
+grep: test/DebugInfo/Inputs/dwarfdump-inl-test.elf-x86-64: binary file matches
+test/Bindings/OCaml/core.ml:  group "malloc/free"; begin
+test/Bindings/OCaml/core.ml:      (* CHECK: call{{.*}}@malloc(i32 ptrtoint
+test/Bindings/OCaml/core.ml:       * CHECK: call{{.*}}@malloc(i32 %
+test/Bindings/OCaml/core.ml:      let m1 = (build_malloc (pointer_type context) "m1"
+test/Bindings/OCaml/core.ml:      ignore (build_array_malloc i32_type p1 "m2" (builder_at_end context bb1));
+grep: test/tools/sancov/Inputs/test-linux_x86_64: binary file matches
+grep: test/tools/sancov/Inputs/test-linux_android_aarch64: binary file matches
+grep: test/tools/sancov/Inputs/test-windows_x86_64: binary file matches
+grep: test/tools/sancov/Inputs/test-darwin_x86_64: binary file matches
+grep: test/tools/llvm-cov/Inputs/binary-formats.v6.linux64l: binary file matches
+grep: test/tools/llvm-cov/Inputs/multiple_objects_not_all_instrumented/instrumented: binary file matches
+grep: test/tools/llvm-cov/Inputs/binary-formats.v2.linux32l: binary file matches
+grep: test/tools/llvm-cov/Inputs/binary-formats.v6.wasm32: binary file matches
+grep: test/tools/llvm-cov/Inputs/binary-formats.v2.linux64l: binary file matches
+grep: test/tools/llvm-cov/Inputs/binary-formats.v3.macho64l: binary file matches
+grep: test/tools/llvm-cov/Inputs/binary-formats.v1.linux64l: binary file matches
+test/tools/llvm-tli-checker/ps4-tli-check.yaml:  - Name:            malloc
+grep: test/tools/llvm-xray/X86/Inputs/elf64-example.bin: binary file matches
+grep: test/tools/llvm-xray/X86/Inputs/elf64-sample-o2.bin: binary file matches
+grep: test/tools/llvm-xray/X86/Inputs/elf64-pie.bin: binary file matches
+grep: test/tools/llvm-xray/X86/Inputs/elf64-badentrysizes.bin: binary file matches
+grep: test/tools/llvm-xray/X86/Inputs/instr-map-mach.o: binary file matches
+test/tools/llvm-profgen/profile-density.test:	int *array = malloc(size * sizeof(int));
+test/tools/llvm-profgen/inline-noprobe2.test:	int *array = malloc(size * sizeof(int));
+test/tools/llvm-profgen/cold-profile-trimming-symbolized.test:	int *array = malloc(size * sizeof(int));
+test/tools/llvm-profgen/fs-discriminator-probe.test:	int *array = malloc(size * sizeof(int));
+grep: test/tools/llvm-profgen/Inputs/coff-profile.exe: binary file matches
+grep: test/tools/llvm-profgen/Inputs/coroutine.perfbin: binary file matches
+grep: test/tools/llvm-profgen/Inputs/fs-discriminator.perfbin: binary file matches
+grep: test/tools/llvm-profgen/Inputs/fs-discriminator-probe.perfbin: binary file matches
+grep: test/tools/llvm-profgen/Inputs/inline-noprobe2.perfbin: binary file matches
+grep: test/tools/llvm-profgen/Inputs/multi-load-segs.perfbin: binary file matches
+test/tools/llvm-profgen/cold-profile-trimming.test:	int *array = malloc(size * sizeof(int));
+test/tools/llvm-profgen/fs-discriminator.test:	int *array = malloc(size * sizeof(int));
+test/tools/llvm-profgen/pseudoprobe-decoding-discriminator.test:	int *array = malloc(size * sizeof(int));
+test/tools/llvm-objdump/MachO/bind.test:# CHECK-NEXT:__DATA   __data             0x00001010 pointer         0 libSystem        _malloc
+test/tools/llvm-objdump/MachO/lazy-bind.test:# CHECK-NEXT:__DATA   __la_symbol_ptr    0x100001020 libSystem        _malloc
+grep: test/tools/llvm-objdump/MachO/Inputs/macho-trie-bad-export-info-malformed-uleb128_too_big: binary file matches
+grep: test/tools/llvm-objdump/MachO/Inputs/bind.macho-x86_64: binary file matches
+grep: test/tools/llvm-objdump/MachO/Inputs/lazy-bind.macho-x86_64: binary file matches
+grep: test/tools/llvm-objdump/MachO/Inputs/macho-trie-children-count-byte: binary file matches
+grep: test/tools/llvm-objdump/MachO/Inputs/bind2.macho-x86_64: binary file matches
+grep: test/tools/llvm-objdump/MachO/Inputs/macho-trie-import-name-end: binary file matches
+grep: test/tools/llvm-objdump/MachO/Inputs/macho-trie-bad-export-info-malformed-uleb128: binary file matches
+grep: test/tools/llvm-objdump/MachO/Inputs/macho-trie-bad-kind: binary file matches
+grep: test/tools/llvm-objdump/MachO/Inputs/macho-trie-import-name-start: binary file matches
+grep: test/tools/llvm-objdump/MachO/Inputs/macho-trie-not-export-node: binary file matches
+grep: test/tools/llvm-objdump/MachO/Inputs/macho-trie-node-loop: binary file matches
+grep: test/tools/llvm-objdump/MachO/Inputs/macho-trie-edge-string-end: binary file matches
+grep: test/tools/llvm-objdump/MachO/Inputs/macho-trie-bad-library-ordinal: binary file matches
+grep: test/tools/llvm-objdump/MachO/Inputs/exports-trie.macho-x86_64: binary file matches
+grep: test/tools/llvm-objdump/MachO/Inputs/macho-trie-export-info-size-too-big: binary file matches
+test/tools/llvm-objdump/MachO/exports-trie.test:# CHECK-NEXT:[re-export] _malloc (from libSystem)
+test/tools/llvm-objdump/MachO/bind2.test:# CHECK: __DATA   __data             0x00001008 pointer         0 libSystem        _malloc
+test/tools/llvm-objdump/MachO/bind2.test:# CHECK: __DATA   __data             0x00001050 pointer         0 libSystem        _malloc
+test/tools/llvm-objdump/MachO/bind2.test:# CHECK: __DATA   __data             0x00001458 pointer         0 libSystem        _malloc
+test/tools/llvm-dwarfutil/ELF/X86/dwarf5-macro.test:#MACRO-NEXT:           DW_MACRO_define_str{{[px]}} - lineno: 281 macro: __attribute_malloc__ __attribute__ ((__malloc__))
+test/tools/llvm-dwarfutil/ELF/X86/dwarf5-macro.test:#MACRO-NEXT:           DW_MACRO_define_str{{[px]}} - lineno: 281 macro: __attribute_malloc__ __attribute__ ((__malloc__))
+test/tools/llvm-dwarfutil/ELF/X86/dwarf4-macro.test:#MACINFO-NEXT:           DW_MACINFO_define - lineno: 281 macro: __attribute_malloc__ __attribute__ ((__malloc__))
+test/tools/llvm-dwarfutil/ELF/X86/dwarf4-macro.test:#MACINFO-NEXT:           DW_MACINFO_define - lineno: 281 macro: __attribute_malloc__ __attribute__ ((__malloc__))
+grep: test/tools/llvm-dwarfutil/ELF/X86/Inputs/dwarf4-macro.out: binary file matches
+grep: test/tools/llvm-dwarfutil/ELF/X86/Inputs/dwarf5-macro.out: binary file matches
+test/tools/llvm-profdata/memprof-basic.test:We expect 2 MIB entries, 1 each for the malloc calls in the program. Any
+grep: test/tools/llvm-profdata/Inputs/inline.memprofexe: binary file matches
+grep: test/tools/llvm-profdata/Inputs/basic_v3.memprofexe: binary file matches
+grep: test/tools/llvm-profdata/Inputs/buildid.memprofexe: binary file matches
+grep: test/tools/llvm-profdata/Inputs/multi.memprofexe: binary file matches
+grep: test/tools/llvm-profdata/Inputs/basic.memprofexe: binary file matches
+test/tools/llvm-profdata/Inputs/update_memprof_inputs.sh:  char *x = (char *)malloc(10);
+test/tools/llvm-profdata/Inputs/update_memprof_inputs.sh:  x = (char *)malloc(10);
+test/tools/llvm-profdata/Inputs/update_memprof_inputs.sh:  char *ptr = (char*) malloc(x);
+test/tools/llvm-profdata/Inputs/update_memprof_inputs.sh:  char *x = (char *)malloc(10);
+test/tools/llvm-profdata/Inputs/update_memprof_inputs.sh:  x = (char *)malloc(10);
+grep: test/tools/llvm-profdata/Inputs/padding-histogram.memprofexe: binary file matches
+grep: test/tools/llvm-profdata/Inputs/pic.memprofexe: binary file matches
+grep: test/tools/llvm-profdata/Inputs/basic-histogram.memprofexe: binary file matches
+test/tools/llvm-profdata/memprof-basic_v3.test:We expect 2 MIB entries, 1 each for the malloc calls in the program. Any
+test/tools/llvm-profdata/memprof-multi.test:We expect 2 MIB entries, 1 each for the malloc calls in the program.
+test/tools/dsymutil/X86/union-fwd-decl.test:  Container *c = (Container *)malloc(sizeof(Container));
+test/tools/dsymutil/X86/union-fwd-decl.test:  c->ivars = (Container_ivars *)malloc(sizeof(Container_ivars));
+test/tools/dsymutil/X86/linker-llvm-union-fwd-decl.test:  Container *c = (Container *)malloc(sizeof(Container));
+test/tools/dsymutil/X86/linker-llvm-union-fwd-decl.test:  c->ivars = (Container_ivars *)malloc(sizeof(Container_ivars));
+grep: test/tools/dsymutil/X86/Inputs/String/main.o: binary file matches
+grep: test/tools/dsymutil/X86/Inputs/String/foo1.o: binary file matches
+grep: test/tools/dsymutil/X86/Inputs/String/foo3.o: binary file matches
+grep: test/tools/dsymutil/X86/Inputs/String/foo2.o: binary file matches
+grep: test/tools/dsymutil/Inputs/private/tmp/union/container.o: binary file matches
+grep: test/tools/dsymutil/Inputs/private/tmp/union/a.out: binary file matches
+grep: test/tools/llvm-readobj/XCOFF/Inputs/xlc32-exec: binary file matches
+grep: test/tools/llvm-readobj/XCOFF/Inputs/needed-libs-64.o: binary file matches
+grep: test/tools/llvm-readobj/XCOFF/Inputs/xlc64-exec: binary file matches
+grep: test/tools/llvm-readobj/XCOFF/Inputs/needed-libs-32.o: binary file matches
+grep: test/tools/llvm-readobj/COFF/Inputs/export-arm.dll: binary file matches
+grep: test/tools/llvm-readobj/COFF/Inputs/export-x64.dll: binary file matches
+grep: test/tools/llvm-readobj/COFF/Inputs/export-x86.dll: binary file matches
+grep: test/tools/llvm-symbolizer/Inputs/fission-ranges.elf-x86_64: binary file matches
+grep: test/tools/llvm-symbolizer/pdb/Inputs/test.pdb: binary file matches
+test/Object/macho-bind-negative-skip.test:CHECK-NEXT: __DATA_CONST __got              0x100004008 pointer         0 libSystem        _malloc
+test/Object/macho-bind-negative-skip.test:CHECK-NEXT: __DATA       __data             0x100008030 pointer         0 libSystem        _malloc
+test/Object/macho-bind-negative-skip.test:CHECK-NEXT: __DATA       __data             0x100008028 pointer         0 libSystem        _malloc
+test/Object/macho-bind-negative-skip.test:CHECK-NEXT: __DATA       __data             0x100008020 pointer         0 libSystem        _malloc
+grep: test/Object/Inputs/coff_archive.lib: binary file matches
+test/Object/Inputs/MachO/bind-negative-skip.yaml:      Symbol:          _malloc
+test/Object/Inputs/MachO/bind-negative-skip.yaml:    - _malloc
+test/CodeGen/X86/2007-10-15-CoalescerCrash.ll:declare ptr @xmalloc(i64)
+test/CodeGen/X86/regalloc-reconcile-broken-hints.ll:declare noalias ptr @malloc(i64)
+test/CodeGen/X86/regalloc-reconcile-broken-hints.ll:; It will not survive the call to malloc otherwise.
+test/CodeGen/X86/regalloc-reconcile-broken-hints.ll:; CHECK: callq _malloc
+test/CodeGen/X86/regalloc-reconcile-broken-hints.ll:  %call = tail call ptr @malloc(i64 16)
+test/CodeGen/X86/remat-fold-load.ll:  %call4.i.i.i.i68 = call noalias ptr @malloc(i32 undef) nounwind
+test/CodeGen/X86/remat-fold-load.ll:  %call4.i.i.i.i89 = call noalias ptr @malloc(i32 %tmp10) nounwind
+test/CodeGen/X86/remat-fold-load.ll:declare noalias ptr @malloc(i32) nounwind
+test/CodeGen/X86/pic.ll:declare ptr @malloc(i32)
+test/CodeGen/X86/pic.ll:    %ptr = call ptr @malloc(i32 40)
+test/CodeGen/X86/pic.ll:; CHECK-I686:	calll	malloc at PLT
+test/CodeGen/X86/pic.ll:; CHECK-X32:	callq	malloc at PLT
+test/CodeGen/X86/sbb-false-dep.ll:define i32 @mallocbench_gs(ptr noundef %0, ptr noundef %1, i32 noundef %2, i32 noundef %3, i32 noundef %4) nounwind {
+test/CodeGen/X86/sbb-false-dep.ll:; CHECK-LABEL: mallocbench_gs:
+test/CodeGen/X86/sbb-false-dep.ll:; IDIOM-LABEL: mallocbench_gs:
+test/CodeGen/X86/2010-08-04-MingWCrash.ll:  %call = tail call ptr @malloc()
+test/CodeGen/X86/2010-08-04-MingWCrash.ll:declare noalias ptr @malloc()
+test/CodeGen/X86/2010-01-13-OptExtBug.ll:  %call = tail call ptr @_Z15uprv_malloc_4_2v()
+test/CodeGen/X86/2010-01-13-OptExtBug.ll:declare ptr @_Z15uprv_malloc_4_2v()
+test/CodeGen/X86/gep-expanded-vector.ll:define ptr @malloc_init_state(<64 x ptr> %tmp, i32 %ind) nounwind {
+test/CodeGen/X86/gep-expanded-vector.ll:; CHECK-LABEL: malloc_init_state:
+test/CodeGen/X86/swifterror.ll:declare ptr @malloc(i64)
+test/CodeGen/X86/swifterror.ll:; CHECK-APPLE-NEXT:    callq _malloc
+test/CodeGen/X86/swifterror.ll:; CHECK-O0-NEXT:    callq _malloc
+test/CodeGen/X86/swifterror.ll:; CHECK-i386-NEXT:    calll _malloc
+test/CodeGen/X86/swifterror.ll:  %call = call ptr @malloc(i64 16)
+test/CodeGen/X86/swifterror.ll:; CHECK-APPLE-NEXT:    callq _malloc
+test/CodeGen/X86/swifterror.ll:; CHECK-O0-NEXT:    callq _malloc
+test/CodeGen/X86/swifterror.ll:; CHECK-i386-NEXT:    calll _malloc
+test/CodeGen/X86/swifterror.ll:  %call = call ptr @malloc(i64 16)
+test/CodeGen/X86/swifterror.ll:; CHECK-APPLE-NEXT:    callq _malloc
+test/CodeGen/X86/swifterror.ll:; CHECK-O0-NEXT:    callq _malloc
+test/CodeGen/X86/swifterror.ll:; CHECK-i386-NEXT:    calll _malloc
+test/CodeGen/X86/swifterror.ll:  %call = call ptr @malloc(i64 16)
+test/CodeGen/X86/swifterror.ll:; CHECK-APPLE-NEXT:    callq _malloc
+test/CodeGen/X86/swifterror.ll:; CHECK-O0-NEXT:    callq _malloc
+test/CodeGen/X86/swifterror.ll:; CHECK-i386-NEXT:    calll _malloc
+test/CodeGen/X86/swifterror.ll:  %call = call ptr @malloc(i64 16)
+test/CodeGen/X86/swifterror.ll:; CHECK-APPLE-NEXT:    callq _malloc
+test/CodeGen/X86/swifterror.ll:; CHECK-O0-NEXT:    callq _malloc
+test/CodeGen/X86/swifterror.ll:; CHECK-i386-NEXT:    calll _malloc
+test/CodeGen/X86/swifterror.ll:  %call = call ptr @malloc(i64 16)
+test/CodeGen/X86/tailcall-cgp-dup.ll:; CHECK-NEXT:    callq _malloc
+test/CodeGen/X86/tailcall-cgp-dup.ll:  %strcpy_ret_val = tail call noalias ptr @malloc(i64 %sz)
+test/CodeGen/X86/tailcall-cgp-dup.ll:declare noalias ptr @malloc(i64)
+test/CodeGen/X86/patchable-prologue-tailcall.ll:; CHECK-NEXT: jmp     malloc                          # TAILCALL
+test/CodeGen/X86/patchable-prologue-tailcall.ll:  %call = tail call ptr @malloc(i64 %count)
+test/CodeGen/X86/patchable-prologue-tailcall.ll:declare noalias ptr @malloc(i64) #0
+test/CodeGen/X86/patchable-prologue-tailcall.ll:attributes #0 = { allockind("alloc,uninitialized") allocsize(0) memory(inaccessiblemem: readwrite) "alloc-family"="malloc" }
+test/CodeGen/X86/2009-06-15-not-a-tail-call.ll:declare fastcc ptr @_D3gcx2GC12mallocNoSyncMFmkZPv() nounwind
+test/CodeGen/X86/2009-06-15-not-a-tail-call.ll:; CHECK-NEXT:    calll _D3gcx2GC12mallocNoSyncMFmkZPv at PLT
+test/CodeGen/X86/2009-06-15-not-a-tail-call.ll:	%tmp6 = tail call fastcc ptr @_D3gcx2GC12mallocNoSyncMFmkZPv()		; <ptr> [#uses=2]
+test/CodeGen/SystemZ/cond-move-regalloc-hints.mir:    %tmp2 = tail call ptr @sre_malloc()
+test/CodeGen/SystemZ/cond-move-regalloc-hints.mir:    %tmp4 = tail call ptr @sre_malloc()
+test/CodeGen/SystemZ/cond-move-regalloc-hints.mir:    tail call void @malloc()
+test/CodeGen/SystemZ/cond-move-regalloc-hints.mir:  declare ptr @sre_malloc() #0
+test/CodeGen/SystemZ/cond-move-regalloc-hints.mir:  declare void @malloc() #0
+test/CodeGen/SystemZ/cond-move-regalloc-hints.mir:    CallBRASL @sre_malloc, csr_systemz_elf, implicit-def dead $r14d, implicit-def dead $cc, implicit-def $r2d
+test/CodeGen/SystemZ/cond-move-regalloc-hints.mir:    CallBRASL @sre_malloc, csr_systemz_elf, implicit-def dead $r14d, implicit-def dead $cc, implicit-def $r2d
+test/CodeGen/SystemZ/cond-move-regalloc-hints.mir:    CallBRASL @malloc, csr_systemz_elf, implicit-def dead $r14d, implicit-def dead $cc
+test/CodeGen/SystemZ/swifterror.ll:declare ptr @malloc(i64)
+test/CodeGen/SystemZ/swifterror.ll:; CHECK: brasl %r14, malloc
+test/CodeGen/SystemZ/swifterror.ll:; CHECK-O0: brasl %r14, malloc
+test/CodeGen/SystemZ/swifterror.ll:  %call = call ptr @malloc(i64 16)
+test/CodeGen/SystemZ/swifterror.ll:; CHECK: brasl %r14, malloc
+test/CodeGen/SystemZ/swifterror.ll:; CHECK-O0: brasl %r14, malloc
+test/CodeGen/SystemZ/swifterror.ll:  %call = call ptr @malloc(i64 16)
+test/CodeGen/SystemZ/swifterror.ll:; CHECK: brasl %r14, malloc
+test/CodeGen/SystemZ/swifterror.ll:; CHECK-O0: brasl %r14, malloc
+test/CodeGen/SystemZ/swifterror.ll:  %call = call ptr @malloc(i64 16)
+test/CodeGen/SystemZ/swifterror.ll:; CHECK: brasl %r14, malloc
+test/CodeGen/SystemZ/swifterror.ll:; CHECK-O0: brasl %r14, malloc
+test/CodeGen/SystemZ/swifterror.ll:  %call = call ptr @malloc(i64 16)
+test/CodeGen/SystemZ/mixed-ptr-sizes.ll:;  qa_area* __ptr32 fap_asm_option_a = (qa_area*)__malloc31(sizeof(qa_area));
+test/CodeGen/SystemZ/mixed-ptr-sizes.ll:; the C code. __malloc31() returns a 64 bit pointer, thus the sequence
+test/CodeGen/SystemZ/mixed-ptr-sizes.ll:  %call = tail call ptr @__malloc31(i64 noundef 8)
+test/CodeGen/SystemZ/mixed-ptr-sizes.ll:;  extern char* __ptr32 domalloc(unsigned long);
+test/CodeGen/SystemZ/mixed-ptr-sizes.ll:; instead of __malloc31(). Note the different instruction sequence, because
+test/CodeGen/SystemZ/mixed-ptr-sizes.ll:  %call = tail call ptr addrspace(1) @domalloc(i64 noundef 8)
+test/CodeGen/SystemZ/mixed-ptr-sizes.ll:declare ptr @__malloc31(i64)
+test/CodeGen/SystemZ/mixed-ptr-sizes.ll:declare ptr addrspace(1) @domalloc(i64)
+test/CodeGen/Hexagon/regalloc-block-overlap.ll:declare void @halide_malloc() local_unnamed_addr #0
+test/CodeGen/Hexagon/regalloc-block-overlap.ll:  tail call void @halide_malloc()
+test/CodeGen/Hexagon/regalloc-block-overlap.ll:  tail call void @halide_malloc()
+test/CodeGen/Hexagon/expand-vstorerw-undef2.ll:declare noalias ptr @halide_malloc() local_unnamed_addr #0
+test/CodeGen/Hexagon/expand-vstorerw-undef2.ll:  %v3 = tail call ptr @halide_malloc()
+test/CodeGen/Hexagon/expand-vstorerw-undef2.ll:  %v5 = tail call ptr @halide_malloc()
+test/CodeGen/Hexagon/expand-vstorerw-undef2.ll:  %v7 = tail call ptr @halide_malloc()
+test/CodeGen/Hexagon/expand-vstorerw-undef2.ll:  %v9 = tail call ptr @halide_malloc()
+test/CodeGen/Hexagon/packetize-return-arg.ll:  %call = tail call ptr @malloc(i32 %add1) #1
+test/CodeGen/Hexagon/packetize-return-arg.ll:declare noalias ptr @malloc(i32) local_unnamed_addr #1
+test/CodeGen/ARM/2011-02-04-AntidepMultidef.ll:  %0 = call noalias ptr @malloc() nounwind
+test/CodeGen/ARM/2011-02-04-AntidepMultidef.ll:declare noalias ptr @malloc() nounwind
+test/CodeGen/ARM/2009-08-21-PostRAKill3.ll:  %malloccall = tail call ptr @malloc(i32 ptrtoint (ptr getelementptr (%struct.Village, ptr null, i32 1) to i32))
+test/CodeGen/ARM/2009-08-21-PostRAKill3.ll:  %0 = getelementptr %struct.Village, ptr %malloccall, i32 0, i32 3, i32 6, i32 0 ; <ptr> [#uses=1]
+test/CodeGen/ARM/2009-08-21-PostRAKill3.ll:  %1 = getelementptr %struct.Village, ptr %malloccall, i32 0, i32 3, i32 6, i32 2 ; <ptr> [#uses=1]
+test/CodeGen/ARM/2009-08-21-PostRAKill3.ll:  ret ptr %malloccall
+test/CodeGen/ARM/2009-08-21-PostRAKill3.ll:declare noalias ptr @malloc(i32)
+test/CodeGen/ARM/shifter_operand.ll:declare ptr @malloc(...)
+test/CodeGen/ARM/shifter_operand.ll:; CHECK-ARM-NEXT:    bl malloc
+test/CodeGen/ARM/shifter_operand.ll:; CHECK-THUMB-NEXT:    bl malloc
+test/CodeGen/ARM/shifter_operand.ll:  %0 = tail call ptr (...) @malloc(i32 undef) nounwind
+test/CodeGen/ARM/swifterror.ll:declare ptr @malloc(i64)
+test/CodeGen/ARM/swifterror.ll:; CHECK-APPLE-NEXT:    bl _malloc
+test/CodeGen/ARM/swifterror.ll:; CHECK-O0-NEXT:    bl _malloc
+test/CodeGen/ARM/swifterror.ll:; CHECK-ANDROID-NEXT:    bl malloc
+test/CodeGen/ARM/swifterror.ll:  %call = call ptr @malloc(i64 16)
+test/CodeGen/ARM/swifterror.ll:; CHECK-APPLE-NEXT:    bl _malloc
+test/CodeGen/ARM/swifterror.ll:; CHECK-O0-NEXT:    bl _malloc
+test/CodeGen/ARM/swifterror.ll:; CHECK-ANDROID-NEXT:    bl malloc
+test/CodeGen/ARM/swifterror.ll:  %call = call ptr @malloc(i64 16)
+test/CodeGen/ARM/swifterror.ll:; CHECK-APPLE-NEXT:    bl _malloc
+test/CodeGen/ARM/swifterror.ll:; CHECK-O0-NEXT:    bl _malloc
+test/CodeGen/ARM/swifterror.ll:; CHECK-ANDROID-NEXT:    bl malloc
+test/CodeGen/ARM/swifterror.ll:  %call = call ptr @malloc(i64 16)
+test/CodeGen/ARM/swifterror.ll:; CHECK-APPLE-NEXT:    bl _malloc
+test/CodeGen/ARM/swifterror.ll:; CHECK-O0-NEXT:    bl _malloc
+test/CodeGen/ARM/swifterror.ll:; CHECK-ANDROID-NEXT:    bl malloc
+test/CodeGen/ARM/swifterror.ll:  %call = call ptr @malloc(i64 16)
+test/CodeGen/ARM/swifterror.ll:; CHECK-APPLE-NEXT:    bl _malloc
+test/CodeGen/ARM/swifterror.ll:; CHECK-O0-NEXT:    bl _malloc
+test/CodeGen/ARM/swifterror.ll:; CHECK-ANDROID-NEXT:    bl malloc
+test/CodeGen/ARM/swifterror.ll:  %call = call ptr @malloc(i64 16)
+test/CodeGen/ARM/2010-09-21-OptCmpBug.ll:declare noalias ptr @malloc(i32) nounwind
+test/CodeGen/ARM/2010-09-21-OptCmpBug.ll:  %3 = call noalias ptr @malloc(i32 %storemerge2.i) nounwind
+test/CodeGen/ARM/2008-04-04-ScavengerAssert.ll:  %malloccall = tail call ptr @malloc(i32 %trunc)
+test/CodeGen/ARM/2008-04-04-ScavengerAssert.ll:	%curr_ptr.0.reg2mem.0.i = phi ptr [ %malloccall, %cond_false99.i ], [ null, %bb42 ]		; <ptr> [#uses=2]
+test/CodeGen/ARM/2008-04-04-ScavengerAssert.ll:declare noalias ptr @malloc(i32)
+test/CodeGen/WebAssembly/lower-em-sjlj-debuginfo.ll:; 'malloc' and 'free' calls take debug info from the next instruction.
+test/CodeGen/WebAssembly/lower-em-ehsjlj-options.ll:declare ptr @malloc(i32)
+test/CodeGen/WebAssembly/wasm-eh-em-sjlj-error.ll:declare ptr @malloc(i32)
+test/CodeGen/WebAssembly/lower-em-sjlj-alias.ll:; Tests if an alias to a function (here malloc) is correctly handled as a
+test/CodeGen/WebAssembly/lower-em-sjlj-alias.ll:@malloc = weak alias ptr (i32), ptr @dlmalloc
+test/CodeGen/WebAssembly/lower-em-sjlj-alias.ll:; CHECK-LABEL: @malloc_test
+test/CodeGen/WebAssembly/lower-em-sjlj-alias.ll:define void @malloc_test() {
+test/CodeGen/WebAssembly/lower-em-sjlj-alias.ll:; This is a dummy dlmalloc implemenation only to make compiler pass, because an
+test/CodeGen/WebAssembly/lower-em-sjlj-alias.ll:; alias (malloc) has to point an actual definition.
+test/CodeGen/WebAssembly/lower-em-sjlj-alias.ll:define ptr @dlmalloc(i32) {
+test/CodeGen/WebAssembly/lower-em-sjlj.ll:; CHECK-NOT: @malloc
+test/CodeGen/WebAssembly/lower-em-ehsjlj-multi-return.ll:declare ptr @malloc(i32)
+test/CodeGen/AMDGPU/amdgpu-sw-lower-lds-static-lds-test.ll:; CHECK-NEXT:    [[TMP12:%.*]] = call i64 @__asan_malloc_impl(i64 [[TMP15]], i64 [[TMP11]])
+test/CodeGen/AMDGPU/amdgpu-sw-lower-lds-dynamic-lds-test-asan.ll:; CHECK-NEXT:    [[TMP19:%.*]] = call i64 @__asan_malloc_impl(i64 [[TMP21]], i64 [[TMP23]])
+test/CodeGen/AMDGPU/amdgpu-sw-lower-lds-static-lds-test-atomicrmw-asan.ll:; CHECK-NEXT:    [[TMP12:%.*]] = call i64 @__asan_malloc_impl(i64 [[TMP9]], i64 [[TMP11]])
+test/CodeGen/AMDGPU/amdgpu-sw-lower-lds-static-lds-vector-ptrs.ll:; CHECK-NEXT:    [[TMP12:%.*]] = call i64 @__asan_malloc_impl(i64 [[TMP9]], i64 [[TMP11]])
+test/CodeGen/AMDGPU/amdgpu-sw-lower-lds-static-dynamic-lds-test-asan.ll:; CHECK-NEXT:    [[TMP39:%.*]] = call i64 @__asan_malloc_impl(i64 [[TMP30]], i64 [[TMP23]])
+test/CodeGen/AMDGPU/amdgpu-sw-lower-lds-multiple-blocks-return.ll:; Test to check malloc and free blocks are placed correctly when multiple
+test/CodeGen/AMDGPU/amdgpu-sw-lower-lds-multiple-blocks-return.ll:; CHECK-NEXT:    [[TMP20:%.*]] = call i64 @__asan_malloc_impl(i64 [[TMP17]], i64 [[TMP19]])
+test/CodeGen/AMDGPU/amdgpu-sw-lower-lds-static-dynamic-lds-test.ll:; CHECK-NEXT:    [[TMP39:%.*]] = call i64 @__asan_malloc_impl(i64 [[TMP30]], i64 [[TMP23]])
+test/CodeGen/AMDGPU/amdgpu-sw-lower-lds-static-indirect-access-nested.ll:; CHECK-NEXT:    [[TMP19:%.*]] = call i64 @__asan_malloc_impl(i64 [[TMP11]], i64 [[TMP14]])
+test/CodeGen/AMDGPU/amdgpu-sw-lower-lds-static-indirect-access-nested.ll:; CHECK-NEXT:    [[TMP19:%.*]] = call i64 @__asan_malloc_impl(i64 [[TMP16]], i64 [[TMP18]])
+test/CodeGen/AMDGPU/amdgpu-sw-lower-lds-static-indirect-access-nested.ll:; CHECK-NEXT:    [[TMP19:%.*]] = call i64 @__asan_malloc_impl(i64 [[TMP11]], i64 [[TMP14]])
+test/CodeGen/AMDGPU/amdgpu-sw-lower-lds-static-indirect-access-nested.ll:; CHECK-NEXT:    [[TMP19:%.*]] = call i64 @__asan_malloc_impl(i64 [[TMP16]], i64 [[TMP18]])
+test/CodeGen/AMDGPU/amdgpu-sw-lower-lds-static-lds-no-heap-ptr.ll:; CHECK-NEXT:    [[TMP12:%.*]] = call i64 @__asan_malloc_impl(i64 [[TMP9]], i64 [[TMP11]])
+test/CodeGen/AMDGPU/amdgpu-sw-lower-lds-dynamic-indirect-access-asan.ll:; CHECK-NEXT:    [[TMP35:%.*]] = call i64 @__asan_malloc_impl(i64 [[TMP26]], i64 [[TMP23]])
+test/CodeGen/AMDGPU/amdgpu-sw-lower-lds-non-kernel-declaration.ll:; CHECK-NEXT:    [[TMP12:%.*]] = call i64 @__asan_malloc_impl(i64 [[TMP9]], i64 [[TMP11]])
+test/CodeGen/AMDGPU/amdgpu-sw-lower-lds-static-indirect-access-function-param.ll:; CHECK-NEXT:    [[TMP16:%.*]] = call i64 @__asan_malloc_impl(i64 [[TMP13]], i64 [[TMP15]])
+test/CodeGen/AMDGPU/amdgpu-sw-lower-lds-multi-static-dynamic-indirect-access.ll:; CHECK-NEXT:    [[TMP24:%.*]] = call i64 @__asan_malloc_impl(i64 [[TMP27]], i64 [[TMP33]])
+test/CodeGen/AMDGPU/amdgpu-sw-lower-lds-multi-static-dynamic-indirect-access.ll:; CHECK-NEXT:    [[TMP34:%.*]] = call i64 @__asan_malloc_impl(i64 [[TMP26]], i64 [[TMP28]])
+test/CodeGen/AMDGPU/amdgpu-sw-lower-lds-static-lds-O0.ll:; CHECK-NEXT:    [[TMP12:%.*]] = call i64 @__asan_malloc_impl(i64 [[TMP9]], i64 [[TMP11]])
+test/CodeGen/AMDGPU/amdgpu-sw-lower-lds-multi-static-dynamic-indirect-access-asan.ll:; CHECK-NEXT:    [[TMP24:%.*]] = call i64 @__asan_malloc_impl(i64 [[TMP27]], i64 [[TMP33]])
+test/CodeGen/AMDGPU/amdgpu-sw-lower-lds-multi-static-dynamic-indirect-access-asan.ll:; CHECK-NEXT:    [[TMP34:%.*]] = call i64 @__asan_malloc_impl(i64 [[TMP26]], i64 [[TMP28]])
+test/CodeGen/AMDGPU/amdgpu-sw-lower-lds-static-lds-test-atomic-cmpxchg-asan.ll:; CHECK-NEXT:    [[TMP14:%.*]] = call i64 @__asan_malloc_impl(i64 [[TMP11]], i64 [[TMP13]])
+test/CodeGen/AMDGPU/amdgpu-sw-lower-lds-multiple-blocks-return-asan.ll:; Test to check malloc and free blocks are placed correctly when multiple
+test/CodeGen/AMDGPU/amdgpu-sw-lower-lds-multiple-blocks-return-asan.ll:; CHECK-NEXT:    [[TMP20:%.*]] = call i64 @__asan_malloc_impl(i64 [[TMP17]], i64 [[TMP19]])
+test/CodeGen/AMDGPU/amdgpu-sw-lower-lds-static-dynamic-indirect-access.ll:; CHECK-NEXT:    [[TMP35:%.*]] = call i64 @__asan_malloc_impl(i64 [[TMP26]], i64 [[TMP23]])
+test/CodeGen/AMDGPU/amdgpu-sw-lower-lds-static-indirect-access-no-kernel-lds-id.ll:; CHECK-NEXT:    [[TMP12:%.*]] = call i64 @__asan_malloc_impl(i64 [[TMP9]], i64 [[TMP11]])
+test/CodeGen/AMDGPU/amdgpu-sw-lower-lds-static-indirect-access-asan.ll:; CHECK-NEXT:    [[TMP12:%.*]] = call i64 @__asan_malloc_impl(i64 [[TMP15]], i64 [[TMP24]])
+test/CodeGen/AMDGPU/amdgpu-sw-lower-lds-static-dynamic-indirect-access-asan.ll:; CHECK-NEXT:    [[TMP35:%.*]] = call i64 @__asan_malloc_impl(i64 [[TMP26]], i64 [[TMP23]])
+test/CodeGen/AMDGPU/amdgpu-sw-lower-lds-static-indirect-access-function-param-asan.ll:; CHECK-NEXT:    [[TMP16:%.*]] = call i64 @__asan_malloc_impl(i64 [[TMP13]], i64 [[TMP15]])
+test/CodeGen/AMDGPU/amdgpu-sw-lower-lds-static-indirect-access.ll:; CHECK-NEXT:    [[TMP12:%.*]] = call i64 @__asan_malloc_impl(i64 [[TMP15]], i64 [[TMP24]])
+test/CodeGen/AMDGPU/amdgpu-sw-lower-lds-dynamic-lds-test.ll:; CHECK-NEXT:    [[TMP19:%.*]] = call i64 @__asan_malloc_impl(i64 [[TMP21]], i64 [[TMP23]])
+test/CodeGen/AMDGPU/amdgpu-sw-lower-lds-dynamic-indirect-access.ll:; CHECK-NEXT:    [[TMP35:%.*]] = call i64 @__asan_malloc_impl(i64 [[TMP26]], i64 [[TMP23]])
+test/CodeGen/AMDGPU/amdgpu-sw-lower-lds-static-indirect-access-nested-asan.ll:; CHECK-NEXT:    [[TMP12:%.*]] = call i64 @__asan_malloc_impl(i64 [[TMP9]], i64 [[TMP11]])
+test/CodeGen/AMDGPU/amdgpu-sw-lower-lds-static-indirect-access-nested-asan.ll:; CHECK-NEXT:    [[TMP19:%.*]] = call i64 @__asan_malloc_impl(i64 [[TMP16]], i64 [[TMP18]])
+test/CodeGen/AMDGPU/amdgpu-sw-lower-lds-static-indirect-access-nested-asan.ll:; CHECK-NEXT:    [[TMP12:%.*]] = call i64 @__asan_malloc_impl(i64 [[TMP9]], i64 [[TMP11]])
+test/CodeGen/AMDGPU/amdgpu-sw-lower-lds-static-indirect-access-nested-asan.ll:; CHECK-NEXT:    [[TMP19:%.*]] = call i64 @__asan_malloc_impl(i64 [[TMP16]], i64 [[TMP18]])
+test/CodeGen/AMDGPU/amdgpu-sw-lower-lds-static-lds-test-asan.ll:; CHECK-NEXT:    [[TMP12:%.*]] = call i64 @__asan_malloc_impl(i64 [[TMP15]], i64 [[TMP11]])
+test/CodeGen/Thumb2/bug-subw.ll:declare external ptr @memalloc(i32, i32, i32)
+test/CodeGen/Thumb2/bug-subw.ll:	%7 = call ptr @memalloc(i32 8, i32 4, i32 0)
+test/CodeGen/Thumb2/2009-12-01-LoopIVUsers.ll:  %18 = call  noalias ptr @malloc(i32 %17) nounwind ; <ptr> [#uses=1]
+test/CodeGen/Thumb2/2009-12-01-LoopIVUsers.ll:declare noalias ptr @malloc(i32) nounwind
+test/CodeGen/AArch64/fast-isel-call-return.ll:; CHECK: bl xmalloc
+test/CodeGen/AArch64/fast-isel-call-return.ll:  %0 = call noalias ptr @xmalloc(i64 undef)
+test/CodeGen/AArch64/fast-isel-call-return.ll:declare noalias ptr @xmalloc(i64)
+test/CodeGen/AArch64/arm64-2012-07-11-InstrEmitterBug.ll:declare noalias ptr @xmalloc(i64) optsize
+test/CodeGen/AArch64/arm64-2012-07-11-InstrEmitterBug.ll:  %call34 = tail call noalias ptr @xmalloc(i64 %conv33) nounwind optsize
+test/CodeGen/AArch64/sms-order-physreg-deps.mir:  attributes #0 = { mustprogress nounwind willreturn allockind("free") memory(argmem: readwrite, inaccessiblemem: readwrite) "alloc-family"="malloc" "frame-pointer"="non-leaf" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="a64fx" "target-features"="+aes,+complxnum,+crc,+fp-armv8,+fullfp16,+lse,+neon,+outline-atomics,+perfmon,+ras,+rdm,+sha2,+sve,+v8.1a,+v8.2a,+v8a,-fmv" }
+test/CodeGen/AArch64/GlobalISel/swifterror.ll:declare ptr @malloc(i64)
+test/CodeGen/AArch64/GlobalISel/swifterror.ll:; CHECK: malloc
+test/CodeGen/AArch64/GlobalISel/swifterror.ll:  %call = call ptr @malloc(i64 16)
+test/CodeGen/AArch64/GlobalISel/swifterror.ll:; CHECK: malloc
+test/CodeGen/AArch64/GlobalISel/swifterror.ll:  %call = call ptr @malloc(i64 16)
+test/CodeGen/AArch64/GlobalISel/swifterror.ll:; CHECK: malloc
+test/CodeGen/AArch64/GlobalISel/swifterror.ll:  %call = call ptr @malloc(i64 16)
+test/CodeGen/AArch64/GlobalISel/swifterror.ll:; CHECK: malloc
+test/CodeGen/AArch64/GlobalISel/swifterror.ll:  %call = call ptr @malloc(i64 16)
+test/CodeGen/AArch64/GlobalISel/swifterror.ll:; CHECK: malloc
+test/CodeGen/AArch64/GlobalISel/swifterror.ll:  %call = call ptr @malloc(i64 16)
+test/CodeGen/AArch64/swifterror.ll:declare ptr @malloc(i64)
+test/CodeGen/AArch64/swifterror.ll:; CHECK-APPLE-NEXT:    bl _malloc
+test/CodeGen/AArch64/swifterror.ll:; CHECK-O0-AARCH64-NEXT:    bl _malloc
+test/CodeGen/AArch64/swifterror.ll:; CHECK-O0-ARM64_32-NEXT:    bl _malloc
+test/CodeGen/AArch64/swifterror.ll:  %call = call ptr @malloc(i64 16)
+test/CodeGen/AArch64/swifterror.ll:; CHECK-APPLE-NEXT:    bl _malloc
+test/CodeGen/AArch64/swifterror.ll:; CHECK-O0-AARCH64-NEXT:    bl _malloc
+test/CodeGen/AArch64/swifterror.ll:; CHECK-O0-ARM64_32-NEXT:    bl _malloc
+test/CodeGen/AArch64/swifterror.ll:  %call = call ptr @malloc(i64 16)
+test/CodeGen/AArch64/swifterror.ll:; CHECK-APPLE-NEXT:    bl _malloc
+test/CodeGen/AArch64/swifterror.ll:; CHECK-O0-AARCH64-NEXT:    bl _malloc
+test/CodeGen/AArch64/swifterror.ll:; CHECK-O0-ARM64_32-NEXT:    bl _malloc
+test/CodeGen/AArch64/swifterror.ll:  %call = call ptr @malloc(i64 16)
+test/CodeGen/AArch64/swifterror.ll:; CHECK-APPLE-NEXT:    bl _malloc
+test/CodeGen/AArch64/swifterror.ll:; CHECK-O0-AARCH64-NEXT:    bl _malloc
+test/CodeGen/AArch64/swifterror.ll:; CHECK-O0-ARM64_32-NEXT:    bl _malloc
+test/CodeGen/AArch64/swifterror.ll:  %call = call ptr @malloc(i64 16)
+test/CodeGen/AArch64/swifterror.ll:; CHECK-APPLE-AARCH64-NEXT:    bl _malloc
+test/CodeGen/AArch64/swifterror.ll:; CHECK-O0-AARCH64-NEXT:    bl _malloc
+test/CodeGen/AArch64/swifterror.ll:; CHECK-APPLE-ARM64_32-NEXT:    bl _malloc
+test/CodeGen/AArch64/swifterror.ll:; CHECK-O0-ARM64_32-NEXT:    bl _malloc
+test/CodeGen/AArch64/swifterror.ll:  %call = call ptr @malloc(i64 16)
+test/CodeGen/PowerPC/asym-regclass-copy.ll:  %call1 = tail call noalias ptr @malloc(i64 %mul) #1
+test/CodeGen/PowerPC/asym-regclass-copy.ll:declare noalias ptr @malloc(i64) #0
+test/CodeGen/PowerPC/sms-phi-3.ll:declare ptr @malloc() local_unnamed_addr
+test/CodeGen/PowerPC/sms-phi-3.ll:; CHECK-NEXT:    bl malloc
+test/CodeGen/PowerPC/sms-phi-3.ll:; CHECK-NEXT:    bl malloc
+test/CodeGen/PowerPC/sms-phi-3.ll:  %2 = tail call noalias ptr @malloc()
+test/CodeGen/PowerPC/sms-phi-3.ll:  %4 = tail call noalias ptr @malloc()
+test/CodeGen/PowerPC/register-pressure.ll:; CHECK-NEXT:    bl malloc
+test/CodeGen/PowerPC/register-pressure.ll:  %10 = call ptr @malloc(i32 noundef %9)
+test/CodeGen/PowerPC/register-pressure.ll:declare dso_local ptr @malloc(i32 noundef) #1
+test/CodeGen/PowerPC/out-of-range-dform.ll:; CHECK-P9-NEXT:    bl malloc
+test/CodeGen/PowerPC/out-of-range-dform.ll:  %i = tail call noalias dereferenceable_or_null(6451600) ptr @malloc()
+test/CodeGen/PowerPC/out-of-range-dform.ll:declare ptr @malloc() local_unnamed_addr
+test/CodeGen/M68k/CodeModel/Small/small-static.ll:declare ptr @malloc(i32)
+test/CodeGen/M68k/CodeModel/Small/small-static.ll:; CHECK-NEXT:    jsr malloc
+test/CodeGen/M68k/CodeModel/Small/small-static.ll:    %ptr = call ptr @malloc(i32 40)
+test/CodeGen/M68k/CodeModel/Small/small-pic.ll:declare ptr @malloc(i32)
+test/CodeGen/M68k/CodeModel/Small/small-pic.ll:; CHECK-NEXT:    jsr (malloc at PLT,%pc)
+test/CodeGen/M68k/CodeModel/Small/small-pic.ll:    %ptr = call ptr @malloc(i32 40)
+test/CodeGen/M68k/CodeModel/Large/large-pic.ll:declare ptr @malloc(i32)
+test/CodeGen/M68k/CodeModel/Large/large-pic.ll:; CHECK-NEXT:    jsr (malloc at PLT,%pc)
+test/CodeGen/M68k/CodeModel/Large/large-pic.ll:    %ptr = call ptr @malloc(i32 40)
+test/CodeGen/M68k/CodeModel/Large/large-static.ll:declare ptr @malloc(i32)
+test/CodeGen/M68k/CodeModel/Large/large-static.ll:; CHECK-NEXT:    jsr malloc
+test/CodeGen/M68k/CodeModel/Large/large-static.ll:    %ptr = call ptr @malloc(i32 40)
+test/CodeGen/M68k/CodeModel/Medium/medium-pic.ll:declare ptr @malloc(i32)
+test/CodeGen/M68k/CodeModel/Medium/medium-pic.ll:; CHECK-NEXT:    jsr (malloc at PLT,%pc)
+test/CodeGen/M68k/CodeModel/Medium/medium-pic.ll:    %ptr = call ptr @malloc(i32 40)
+test/CodeGen/M68k/CodeModel/Medium/medium-static.ll:declare ptr @malloc(i32)
+test/CodeGen/M68k/CodeModel/Medium/medium-static.ll:; CHECK-NEXT:    jsr malloc
+test/CodeGen/M68k/CodeModel/Medium/medium-static.ll:    %ptr = call ptr @malloc(i32 40)
+test/Analysis/GlobalsModRef/indirect-global.ll:declare noalias ptr @malloc(i32) allockind("alloc,uninitialized") allocsize(0) inaccessiblememonly
+test/Analysis/GlobalsModRef/indirect-global.ll:define void @malloc_init() {
+test/Analysis/GlobalsModRef/indirect-global.ll:; CHECK-LABEL: @malloc_init(
+test/Analysis/GlobalsModRef/indirect-global.ll:; CHECK-NEXT:    [[A:%.*]] = call dereferenceable_or_null(4) ptr @malloc(i32 4)
+test/Analysis/GlobalsModRef/indirect-global.ll:  %a = call ptr @malloc(i32 4)
+test/Analysis/GlobalsModRef/indirect-global.ll:define i32 @malloc_test(ptr %P) {
+test/Analysis/GlobalsModRef/indirect-global.ll:; CHECK-LABEL: @malloc_test(
+test/Analysis/KernelInfo/openmp/nvptx.ll:attributes #14 = { convergent mustprogress nounwind willreturn allockind("free") memory(argmem: readwrite, inaccessiblemem: readwrite) "alloc-family"="malloc" "frame-pointer"="all" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="sm_70" "target-features"="+ptx63,+ptx83,+sm_70" }
+test/Analysis/MemorySSA/pr116227.ll:declare ptr @malloc() allockind("alloc,uninitialized")
+test/Analysis/MemorySSA/pr116227.ll:; CHECK-NEXT:  %call.i = call ptr @malloc()
+test/Analysis/MemorySSA/pr116227.ll:  %call.i = call ptr @malloc()
+test/Analysis/MemorySSA/pr116227.ll:; CHECK-NEXT: %call.i = call ptr @malloc()
+test/Analysis/MemorySSA/pr116227.ll:  %call.i = call ptr @malloc()
+test/Analysis/BasicAA/zext.ll:  %1 = tail call ptr @malloc(i64 120)
+test/Analysis/BasicAA/zext.ll:  %1 = tail call ptr @malloc(i64 120)
+test/Analysis/BasicAA/zext.ll:  %m0 = tail call ptr @malloc(i64 120)
+test/Analysis/BasicAA/zext.ll:  %1 = tail call ptr @malloc(i64 120)
+test/Analysis/BasicAA/zext.ll:  %call = tail call ptr @malloc(i64 %mul)
+test/Analysis/BasicAA/zext.ll:declare noalias ptr @malloc(i64)
+test/Analysis/ScalarEvolution/malloc.ll:; CHECK-NEXT:    %alloc = tail call dereferenceable(64) ptr @malloc(i64 64)
+test/Analysis/ScalarEvolution/malloc.ll:  %alloc = tail call dereferenceable(64) ptr @malloc(i64 64)
+test/Analysis/ScalarEvolution/malloc.ll:; CHECK-NEXT:    %alloc = tail call dereferenceable_or_null(64) ptr @malloc(i64 64)
+test/Analysis/ScalarEvolution/malloc.ll:  %alloc = tail call dereferenceable_or_null(64) ptr @malloc(i64 64)
+test/Analysis/ScalarEvolution/malloc.ll:; CHECK-NEXT:    %alloc = call nonnull ptr @malloc(i64 -1)
+test/Analysis/ScalarEvolution/malloc.ll:  %alloc = call nonnull ptr @malloc(i64 -1)
+test/Analysis/ScalarEvolution/malloc.ll:declare noalias noundef ptr @malloc(i64 noundef) allockind("alloc,uninitialized") allocsize(0)
+cmake/config-ix.cmake:  check_include_file(malloc/malloc.h HAVE_MALLOC_MALLOC_H)
+cmake/config-ix.cmake:check_symbol_exists(mallctl malloc_np.h HAVE_MALLCTL)
+cmake/config-ix.cmake:check_symbol_exists(mallinfo malloc.h HAVE_MALLINFO)
+cmake/config-ix.cmake:check_symbol_exists(mallinfo2 malloc.h HAVE_MALLINFO2)
+cmake/config-ix.cmake:check_symbol_exists(malloc_zone_statistics malloc/malloc.h
+examples/BrainF/BrainF.cpp:  //%arr = malloc i8, i32 %d
+examples/ExceptionDemo/ExceptionDemo.cpp:  OurException *ret = (OurException*) memset(malloc(size), 0, size);
+tools/llvm-c-test/echo.cpp:              safe_malloc(ParamCount * sizeof(LLVMTypeRef)));
+tools/llvm-c-test/echo.cpp:                       safe_malloc(NumHandlers * sizeof(LLVMBasicBlockRef)));
+tools/llvm-c-test/echo.cpp:              safe_malloc(OperandCount * sizeof(LLVMValueRef)));
+tools/llvm-c-test/attributes.c:            (LLVMAttributeRef *)malloc(AttrCount * sizeof(LLVMAttributeRef));
+tools/llvm-c-test/attributes.c:              Attrs = (LLVMAttributeRef *)malloc(
+tools/remarks-shlib/CMakeLists.txt:    set(CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} -INCLUDE:malloc")
+tools/llvm-shlib/CMakeLists.txt:    set(CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} ${CMAKE_CXX_LINKER_WRAPPER_FLAG}-INCLUDE:malloc")
+tools/llvm-gpu-loader/amdhsa.cpp:        // Register RPC callbacks for the malloc and free functions on HSA.
+tools/llvm-gpu-loader/amdhsa.cpp:        auto malloc_handler = [&](size_t size) -> void * {
+tools/llvm-gpu-loader/amdhsa.cpp:                handle_server<32>(*server, index, malloc_handler, free_handler);
+tools/llvm-gpu-loader/amdhsa.cpp:                handle_server<64>(*server, index, malloc_handler, free_handler);
+tools/llvm-gpu-loader/nvptx.cpp:        auto malloc_handler = [&](size_t size) -> void * {
+tools/llvm-gpu-loader/nvptx.cpp:              handle_server<32>(*server, index, malloc_handler, free_handler);
+include/llvm/IR/Instruction.h:  /// Note that this does not consider malloc and alloca to have side
+include/llvm/IR/InstVisitor.h:/// For example, here is a visitor that counts the number of malloc
+include/llvm/IR/IRBuilder.h:  /// CreateMalloc - Generate the IR for a call to malloc:
+include/llvm/IR/IRBuilder.h:  /// 1. Compute the malloc call's argument as the specified type's size,
+include/llvm/IR/IRBuilder.h:  /// 2. Call malloc with that argument.
+include/llvm/IR/RuntimeLibcalls.def:// DSEPass can emit calloc if it finds a pair of malloc/memset
+include/llvm/Config/config.h.cmake:/* Define to 1 if you have the <malloc/malloc.h> header file. */
+include/llvm/Config/config.h.cmake:/* Define to 1 if you have the `malloc_zone_statistics' function. */
+include/llvm/Demangle/ItaniumDemangle.h:      auto *Tmp = static_cast<T *>(std::malloc(NewCap * sizeof(T)));
+include/llvm/ADT/SmallVector.h:  void *mallocForGrow(void *FirstEl, size_t MinSize, size_t TSize,
+include/llvm/ADT/SmallVector.h:  T *mallocForGrow(size_t MinSize, size_t &NewCapacity);
+include/llvm/ADT/SmallVector.h:    T *NewElts = mallocForGrow(NumElts, NewCapacity);
+include/llvm/ADT/SmallVector.h:    T *NewElts = mallocForGrow(0, NewCapacity);
+include/llvm/ADT/SmallVector.h:  T *NewElts = mallocForGrow(MinSize, NewCapacity);
+include/llvm/ADT/SmallVector.h:T *SmallVectorTemplateBase<T, TriviallyCopyable>::mallocForGrow(
+include/llvm/ADT/SmallVector.h:      SmallVectorBase<SmallVectorSizeType<T>>::mallocForGrow(
+include/llvm/ADT/SmallSet.h:/// maintained with no mallocs.  If the set gets large, we expand to using an
+include/llvm/ADT/SparseMultiSet.h:    // The Sparse array doesn't actually need to be initialized, so malloc
+include/llvm/ADT/SparseSet.h:    // The Sparse array doesn't actually need to be initialized, so malloc
+include/llvm/ExecutionEngine/Orc/Shared/WrapperFunctionUtils.h:      WFR.R.Data.ValuePtr = (char *)malloc(WFR.R.Size);
+include/llvm/ExecutionEngine/Orc/Shared/WrapperFunctionUtils.h:    char *Tmp = (char *)malloc(strlen(Msg) + 1);
+include/llvm/Transforms/Instrumentation/MemProfiler.h:/// essentially replaces malloc() and free() with custom implementations that
+include/llvm/Transforms/Utils/BuildLibCalls.h:  /// Emit a call to the malloc function.
+include/llvm/Support/PointerLikeTypeTraits.h:  /// Note, we assume here that void* is related to raw malloc'ed memory and
+include/llvm/Support/PointerLikeTypeTraits.h:  /// that malloc returns objects at least 4-byte aligned. However, this may be
+include/llvm/Support/PointerLikeTypeTraits.h:  /// wrong, or pointers may be from something other than malloc. In this case,
+include/llvm/Support/Compiler.h:#define LLVM_ATTRIBUTE_RETURNS_NOALIAS __attribute__((__malloc__))
+include/llvm/Support/Process.h:  /// by the process. This only counts the memory allocated via the malloc,
+include/llvm/Support/MemAlloc.h:LLVM_ATTRIBUTE_RETURNS_NONNULL inline void *safe_malloc(size_t Sz) {
+include/llvm/Support/MemAlloc.h:  void *Result = std::malloc(Sz);
+include/llvm/Support/MemAlloc.h:      return safe_malloc(1);
+include/llvm/Support/MemAlloc.h:      return safe_malloc(1);
+include/llvm/Support/MemAlloc.h:      return safe_malloc(1);
+include/llvm/Support/ErrorHandling.h:/// bad alloc error, e.g. failing malloc/calloc, is encountered by LLVM.
+include/llvm/Support/Allocator.h:/// object, which wraps malloc, to allocate memory, but it can be changed to
+include/llvm/Analysis/ValueTracking.h:/// for malloc and alloca because speculatively executing them might cause a
+include/llvm/Analysis/TargetLibraryInfo.def:/// hint are supported by the open source version of tcmalloc, see:
+include/llvm/Analysis/TargetLibraryInfo.def:/// https://github.com/google/tcmalloc/blob/master/tcmalloc/new_extension.h
+include/llvm/Analysis/TargetLibraryInfo.def:/// https://github.com/google/tcmalloc/blob/master/tcmalloc/malloc_extension.h
+include/llvm/Analysis/TargetLibraryInfo.def:/// They are implemented by tcmalloc, see source at
+include/llvm/Analysis/TargetLibraryInfo.def:/// https://github.com/google/tcmalloc/blob/master/tcmalloc/malloc_extension.h
+include/llvm/Analysis/TargetLibraryInfo.def:/// void *malloc(size_t size);
+include/llvm/Analysis/TargetLibraryInfo.def:TLI_DEFINE_ENUM_INTERNAL(malloc)
+include/llvm/Analysis/TargetLibraryInfo.def:TLI_DEFINE_STRING_INTERNAL("malloc")
+include/llvm/Analysis/TargetLibraryInfo.def:/// void *vec_malloc(size_t size);
+include/llvm/Analysis/TargetLibraryInfo.def:TLI_DEFINE_ENUM_INTERNAL(vec_malloc)
+include/llvm/Analysis/TargetLibraryInfo.def:TLI_DEFINE_STRING_INTERNAL("vec_malloc")
+include/llvm/Analysis/AliasAnalysis.h:///    NoAlias returns (e.g. calls to malloc)
+include/llvm/Analysis/MemoryBuiltins.h:/// allocates or reallocates memory (either malloc, calloc, realloc, or strdup
+include/llvm/Analysis/MemoryBuiltins.h:/// allocates memory similar to malloc or calloc.
+include/llvm/Analysis/MemoryBuiltins.h:/// allocates memory (either malloc, calloc, or strdup like).
+include/llvm/Analysis/MemoryBuiltins.h:/// malloc/realloc/calloc/free), return the identifier for its family
+unittests/ProfileData/MemProfTest.cpp:          {"malloc", 70, 57, 3, "memprof/memprof_malloc_linux.cpp"},
+unittests/ProfileData/MemProfTest.cpp:          {"malloc", 70, 57, 3, "memprof_malloc_linux.cpp"},
+unittests/Demangle/PartialDemangleTest.cpp:  char *Buf = static_cast<char *>(std::malloc(Size));
+unittests/Demangle/PartialDemangleTest.cpp:  char *Buf = static_cast<char *>(std::malloc(OriginalSize));
+unittests/Transforms/Coroutines/ExtraRematTest.cpp:      %alloc = call ptr @malloc(i32 %size)
+unittests/Transforms/Coroutines/ExtraRematTest.cpp:    declare noalias ptr @malloc(i32)
+unittests/Transforms/Coroutines/ExtraRematTest.cpp:      %alloc = call ptr @malloc(i32 %size)
+unittests/Transforms/Coroutines/ExtraRematTest.cpp:    declare noalias ptr @malloc(i32)
+unittests/Support/AllocatorTest.cpp:    void *MemBase = safe_malloc(Size + Alignment.value() - 1 + sizeof(void *));
+unittests/Support/AllocatorTest.cpp:    // Hold a pointer to the base so we can free the whole malloced block.
+unittests/Support/MemoryBufferTest.cpp:  // falling back to malloc for them causes a huge memory usage increase.
+unittests/Support/ManagedStatic.cpp:    void *stack = safe_malloc(n);
+unittests/Support/ManagedStatic.cpp:    void *Mem = safe_malloc(sizeof(int));
+unittests/Support/DynamicLibrary/CMakeLists.txt:    # /INCLUDE:malloc is there to force searching into LLVMSupport before libucrt
+unittests/Support/DynamicLibrary/CMakeLists.txt:    target_link_libraries(${NAME} ${llvm_libs} "-INCLUDE:malloc")
+unittests/Analysis/TargetLibraryInfoTest.cpp:      "declare i8* @malloc(i64)\n"
+unittests/Analysis/TargetLibraryInfoTest.cpp:      "declare i8* @vec_malloc(i64)\n"
+unittests/Analysis/MemoryProfileInfoTest.cpp:  %call1 = call noalias dereferenceable_or_null(40) i8* @malloc(i64 noundef 40)
+unittests/Analysis/MemoryProfileInfoTest.cpp:  %call2 = call noalias dereferenceable_or_null(40) i8* @malloc(i64 noundef 40)
+unittests/Analysis/MemoryProfileInfoTest.cpp:  %call3 = call noalias dereferenceable_or_null(40) i8* @malloc(i64 noundef 40)
+unittests/Analysis/MemoryProfileInfoTest.cpp:  %call4 = call noalias dereferenceable_or_null(40) i8* @malloc(i64 noundef 40)
+unittests/Analysis/MemoryProfileInfoTest.cpp:declare dso_local noalias noundef i8* @malloc(i64 noundef)
+unittests/Analysis/MemoryProfileInfoTest.cpp:  %call = call noalias dereferenceable_or_null(40) i8* @malloc(i64 noundef 40)
+unittests/Analysis/MemoryProfileInfoTest.cpp:declare dso_local noalias noundef i8* @malloc(i64 noundef)
+unittests/Analysis/MemoryProfileInfoTest.cpp:  %call = call noalias dereferenceable_or_null(40) i8* @malloc(i64 noundef 40)
+unittests/Analysis/MemoryProfileInfoTest.cpp:declare dso_local noalias noundef i8* @malloc(i64 noundef)
+unittests/Analysis/MemoryProfileInfoTest.cpp:  %call = call noalias dereferenceable_or_null(40) i8* @malloc(i64 noundef 40)
+unittests/Analysis/MemoryProfileInfoTest.cpp:declare dso_local noalias noundef i8* @malloc(i64 noundef)
+unittests/Analysis/MemoryProfileInfoTest.cpp:  %call = call noalias dereferenceable_or_null(40) i8* @malloc(i64 noundef 40)
+unittests/Analysis/MemoryProfileInfoTest.cpp:declare dso_local noalias noundef i8* @malloc(i64 noundef)
+unittests/Analysis/MemoryProfileInfoTest.cpp:  %call = call noalias dereferenceable_or_null(40) i8* @malloc(i64 noundef 40)
+unittests/Analysis/MemoryProfileInfoTest.cpp:declare dso_local noalias noundef i8* @malloc(i64 noundef)
+unittests/Analysis/MemoryProfileInfoTest.cpp:  %call1 = call noalias dereferenceable_or_null(40) i8* @malloc(i64 noundef 40), !memprof !0
+unittests/Analysis/MemoryProfileInfoTest.cpp:  %call2 = call noalias dereferenceable_or_null(40) i8* @malloc(i64 noundef 40), !memprof !3
+unittests/Analysis/MemoryProfileInfoTest.cpp:  %call3 = call noalias dereferenceable_or_null(40) i8* @malloc(i64 noundef 40), !memprof !6
+unittests/Analysis/MemoryProfileInfoTest.cpp:declare dso_local noalias noundef i8* @malloc(i64 noundef)
+unittests/Analysis/MemoryProfileInfoTest.cpp:  %call = call noalias dereferenceable_or_null(40) i8* @malloc(i64 noundef 40), !memprof !0
+unittests/Analysis/MemoryProfileInfoTest.cpp:declare dso_local noalias noundef i8* @malloc(i64 noundef)
+CMakeLists.txt:set(LLVM_ENABLE_RPMALLOC "" CACHE BOOL "Replace the CRT allocator with rpmalloc.")
+CMakeLists.txt:  # Override the C runtime allocator with the in-tree rpmalloc
+CMakeLists.txt:set(LLVM_INTEGRATED_CRT_ALLOC "${LLVM_INTEGRATED_CRT_ALLOC}" CACHE PATH "Replace the Windows CRT allocator with any of {rpmalloc|mimalloc|snmalloc}. Only works with CMAKE_MSVC_RUNTIME_LIBRARY=MultiThreaded.")
diff --git a/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp b/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp
index bb6c2ec5ede77..4fb3936df653a 100644
--- a/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp
+++ b/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp
@@ -5377,7 +5377,7 @@ TEST_F(OpenMPIRBuilderTest, ScanReduction) {
   Loops = loopsVec;
   EXPECT_EQ(Loops.size(), 2U);
   CanonicalLoopInfo *InputLoop = Loops.front();
-  CanonicalLoopInfo  *ScanLoop = Loops.back();
+  CanonicalLoopInfo *ScanLoop = Loops.back();
   Builder.restoreIP(ScanLoop->getAfterIP());
   InputLoop->assertOK();
   ScanLoop->assertOK();
diff --git a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
index c68cae77b2f3d..03636f86215d7 100644
--- a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
+++ b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
@@ -47,8 +47,10 @@
 
 using namespace mlir;
 
-llvm::SmallDenseMap<llvm::Value *, llvm::Type *> ReductionVarToType; 
-llvm::OpenMPIRBuilder::InsertPointTy parallelAllocaIP;// TODO: change this alloca IP to point to originalvar allocaIP. ReductionDecl need to be linked to scan var.
+llvm::SmallDenseMap<llvm::Value *, llvm::Type *> ReductionVarToType;
+llvm::OpenMPIRBuilder::InsertPointTy
+    parallelAllocaIP; // TODO: change this alloca IP to point to originalvar
+                      // allocaIP. ReductionDecl need to be linked to scan var.
 namespace {
 static llvm::omp::ScheduleKind
 convertToScheduleKind(std::optional<omp::ClauseScheduleKind> schedKind) {
diff --git a/offload/out b/offload/out
deleted file mode 100644
index 5df1a3ad98775..0000000000000
--- a/offload/out
+++ /dev/null
@@ -1,424 +0,0 @@
-grep: ./out: input file is also the output
-./test/offloading/fortran/target-map-nested-dtype-alloca-array.f90:!$omp target map(tofrom: one_l%nest%array_k)
-./test/offloading/fortran/target-map-nested-dtype-alloca-array.f90:!$omp end target
-./test/offloading/fortran/target-map-derived-type-full-1.f90:  !$omp target map(from:out) map(to:in)
-./test/offloading/fortran/target-map-derived-type-full-1.f90:  !$omp end target
-./test/offloading/fortran/target-map-pointer-scopes-enter-exit.f90:  !$omp target enter data map(alloc: arg_alloc)
-./test/offloading/fortran/target-map-pointer-scopes-enter-exit.f90:  !$omp target
-./test/offloading/fortran/target-map-pointer-scopes-enter-exit.f90:  !$omp end target
-./test/offloading/fortran/target-map-pointer-scopes-enter-exit.f90:  !$omp target exit data map(from: arg_alloc)
-./test/offloading/fortran/target-map-pointer-scopes-enter-exit.f90:  !$omp target exit data map(delete: arg_alloc)
-./test/offloading/fortran/target-map-pointer-scopes-enter-exit.f90:  !$omp target enter data map(alloc: local_alloc)
-./test/offloading/fortran/target-map-pointer-scopes-enter-exit.f90:  !$omp target
-./test/offloading/fortran/target-map-pointer-scopes-enter-exit.f90:  !$omp end target
-./test/offloading/fortran/target-map-pointer-scopes-enter-exit.f90:  !$omp target exit data map(from: local_alloc)
-./test/offloading/fortran/target-map-pointer-scopes-enter-exit.f90:  !$omp target exit data map(delete: local_alloc)
-./test/offloading/fortran/target-map-pointer-scopes-enter-exit.f90:  !$omp target enter data map(alloc: map_ptr)
-./test/offloading/fortran/target-map-pointer-scopes-enter-exit.f90:  !$omp target
-./test/offloading/fortran/target-map-pointer-scopes-enter-exit.f90:  !$omp end target
-./test/offloading/fortran/target-map-pointer-scopes-enter-exit.f90:  !$omp target exit data map(from: map_ptr)
-./test/offloading/fortran/target-map-pointer-scopes-enter-exit.f90:  !$omp target exit data map(delete: map_ptr)
-./test/offloading/fortran/target_map_ompx_hold.f90:!$omp target data map(ompx_hold, tofrom: presence_check)
-./test/offloading/fortran/target_map_ompx_hold.f90:!$omp target exit data map(delete: presence_check)
-./test/offloading/fortran/target_map_ompx_hold.f90:!$omp target map(present, tofrom: presence_check)
-./test/offloading/fortran/target_map_ompx_hold.f90:!$omp end target
-./test/offloading/fortran/target_map_ompx_hold.f90:!$omp end target data
-./test/offloading/fortran/target_map_ompx_hold.f90:!$omp target data map(tofrom: presence_check)
-./test/offloading/fortran/target_map_ompx_hold.f90:!$omp target exit data map(delete: presence_check)
-./test/offloading/fortran/target_map_ompx_hold.f90:!$omp target map(present, tofrom: presence_check)
-./test/offloading/fortran/target_map_ompx_hold.f90:!$omp end target
-./test/offloading/fortran/target_map_ompx_hold.f90:!$omp end target data
-./test/offloading/fortran/target-map-nullary-pointer.f90:!$omp target map(Set)
-./test/offloading/fortran/target-map-nullary-pointer.f90:!$omp end target
-./test/offloading/fortran/dtype-member-map-syntax-1.f90:  !$omp target map(tofrom: array_dtype(5))
-./test/offloading/fortran/dtype-member-map-syntax-1.f90:  !$omp end target
-./test/offloading/fortran/dtype-member-map-syntax-1.f90:  !$omp target map(tofrom: array_dtype(4)%elements(3))
-./test/offloading/fortran/dtype-member-map-syntax-1.f90:  !$omp end target
-./test/offloading/fortran/dtype-member-map-syntax-1.f90:  !$omp target map(tofrom: array_dtype(5)%elements(3:5))
-./test/offloading/fortran/dtype-member-map-syntax-1.f90:  !$omp end target
-./test/offloading/fortran/dtype-member-map-syntax-1.f90:  !$omp target map(tofrom: array_dtype(3:5))
-./test/offloading/fortran/dtype-member-map-syntax-1.f90:  !$omp end target
-./test/offloading/fortran/dtype-member-map-syntax-1.f90:  !$omp target map(tofrom: array_dtype(4)%num_chars%number%float_elements(8))
-./test/offloading/fortran/dtype-member-map-syntax-1.f90:  !$omp end target
-./test/offloading/fortran/dtype-member-map-syntax-1.f90:  !$omp target map(tofrom: array_dtype(4)%num_chars%number%float_elements(5:10))
-./test/offloading/fortran/dtype-member-map-syntax-1.f90:  !$omp end target
-./test/offloading/fortran/dtype-member-map-syntax-1.f90:  !$omp target map(tofrom: array_dtype(4)%internal_dtypes(3)%float_elements(4))
-./test/offloading/fortran/dtype-member-map-syntax-1.f90:  !$omp end target
-./test/offloading/fortran/target-map-two-dtype-mixed-implicit-explicit-capture-1.f90:  !$omp target map(tofrom:scalar_arr1%break_1)
-./test/offloading/fortran/target-map-two-dtype-mixed-implicit-explicit-capture-1.f90:  !$omp end target
-./test/offloading/fortran/target-map-nested-alloca-dtype-3d-alloca-array-bounds.f90:!$omp target map(tofrom: one_l%nest%array_k(1:3, 1:3, 2:2)) map(to: inArray(1:3, 1:3, 1:3))
-./test/offloading/fortran/target-map-nested-alloca-dtype-3d-alloca-array-bounds.f90:!$omp end target
-./test/offloading/fortran/target-map-dtype-multi-explicit-array-member.f90:  !$omp target map(tofrom:scalar_arr%array_x, scalar_arr%array_y)
-./test/offloading/fortran/target-map-dtype-multi-explicit-array-member.f90:  !$omp end target
-./test/offloading/fortran/target-map-derived-type-full-2.f90:  !$omp target map(from:out) map(to:in)
-./test/offloading/fortran/target-map-derived-type-full-2.f90:  !$omp end target
-./test/offloading/fortran/target-map-double-nested-dtype-single-member.f90:!$omp target map(tofrom: top_dtype%nested%array_i2, top_dtype2%nested%array_j2)
-./test/offloading/fortran/target-map-double-nested-dtype-single-member.f90:!$omp end target
-./test/offloading/fortran/target-map-pointer-target-scopes.f90:  !$omp target map(tofrom: arg_alloc)
-./test/offloading/fortran/target-map-pointer-target-scopes.f90:  !$omp end target
-./test/offloading/fortran/target-map-pointer-target-scopes.f90:  !$omp target map(tofrom: local_alloc)
-./test/offloading/fortran/target-map-pointer-target-scopes.f90:  !$omp end target
-./test/offloading/fortran/target-map-pointer-target-scopes.f90:  !$omp target map(tofrom: map_ptr)
-./test/offloading/fortran/target-map-pointer-target-scopes.f90:  !$omp end target
-./test/offloading/fortran/basic-target-parallel-region.f90:   !$omp target parallel map(from: x)
-./test/offloading/fortran/basic-target-parallel-region.f90:   !$omp end target parallel
-./test/offloading/fortran/target-map-enter-exit-allocatables.f90:   !$omp target enter data map(alloc: A)
-./test/offloading/fortran/target-map-enter-exit-allocatables.f90:    !$omp target
-./test/offloading/fortran/target-map-enter-exit-allocatables.f90:    !$omp end target
-./test/offloading/fortran/target-map-enter-exit-allocatables.f90:    !$omp target exit data map(from: A)
-./test/offloading/fortran/target-map-enter-exit-allocatables.f90:    !$omp target exit data map(delete: A)
-./test/offloading/fortran/target-use-dev-ptr.f90:   !$omp target data map(tofrom: x) use_device_ptr(x)
-./test/offloading/fortran/target-use-dev-ptr.f90:   !$omp end target data
-./test/offloading/fortran/target-map-pointer-to-dtype-allocatable-member.f90:    !$omp target enter data map(to:my_instance, my_instance%values)
-./test/offloading/fortran/target-map-pointer-to-dtype-allocatable-member.f90:    !$omp target
-./test/offloading/fortran/target-map-pointer-to-dtype-allocatable-member.f90:    !$omp end target
-./test/offloading/fortran/target-map-pointer-to-dtype-allocatable-member.f90:    !$omp target exit data map(from:my_instance%values)
-./test/offloading/fortran/target-map-pointer-to-dtype-allocatable-member.f90:    !$omp target exit data map(release:my_instance)
-./test/offloading/fortran/target_map_present_fail.f90:!$omp target data map(present,alloc:arr)
-./test/offloading/fortran/target_map_present_fail.f90:!$omp target
-./test/offloading/fortran/target_map_present_fail.f90:!$omp end target
-./test/offloading/fortran/target_map_present_fail.f90:!$omp end target data
-./test/offloading/fortran/declare-target-vars-in-target-region.f90:  !$omp declare target link(arr1) enter(arr2)
-./test/offloading/fortran/declare-target-vars-in-target-region.f90:  !$omp declare target link(scalar)
-./test/offloading/fortran/declare-target-vars-in-target-region.f90:  !$omp target map(tofrom:arr1, i, j)
-./test/offloading/fortran/declare-target-vars-in-target-region.f90:  !$omp end target
-./test/offloading/fortran/declare-target-vars-in-target-region.f90:  !$omp target map(i, j)
-./test/offloading/fortran/declare-target-vars-in-target-region.f90:  !$omp end target
-./test/offloading/fortran/declare-target-vars-in-target-region.f90:  !$omp target map(i, j)
-./test/offloading/fortran/declare-target-vars-in-target-region.f90:  !$omp end target
-./test/offloading/fortran/declare-target-vars-in-target-region.f90:  !$omp target
-./test/offloading/fortran/declare-target-vars-in-target-region.f90:  !$omp end target
-./test/offloading/fortran/dtype-array-constant-index-map.f90:!$omp target map(tofrom: array_dtype(5))
-./test/offloading/fortran/dtype-array-constant-index-map.f90:!$omp end target
-./test/offloading/fortran/dtype-array-constant-index-map.f90:!$omp target map(tofrom: array_dtype(5))
-./test/offloading/fortran/dtype-array-constant-index-map.f90:!$omp end target
-./test/offloading/fortran/target-map-dtype-alloca-array-of-dtype.f90:!$omp target map(tofrom: dtyped%array_dtype)
-./test/offloading/fortran/target-map-dtype-alloca-array-of-dtype.f90:!$omp end target
-./test/offloading/fortran/target-map-alloca-dtype-alloca-array-of-dtype.f90:!$omp target map(tofrom: dtyped%array_dtype)
-./test/offloading/fortran/target-map-alloca-dtype-alloca-array-of-dtype.f90:!$omp end target
-./test/offloading/fortran/target-map-nested-dtype-alloca-and-non-alloca-array.f90:    !$omp target map(tofrom: one_l%nest%array_i, one_l%nest%array_k)
-./test/offloading/fortran/target-map-nested-dtype-alloca-and-non-alloca-array.f90:    !$omp end target
-./test/offloading/fortran/target-map-first-common-block-member.f90:  !$omp target map(tofrom: var1)
-./test/offloading/fortran/target-map-first-common-block-member.f90:  !$omp end target
-./test/offloading/fortran/target-map-first-common-block-member.f90:!$omp target map(tofrom: var1)
-./test/offloading/fortran/target-map-first-common-block-member.f90:!$omp end target
-./test/offloading/fortran/target-map-first-common-block-member.f90:!$omp target map(tofrom: var1)
-./test/offloading/fortran/target-map-first-common-block-member.f90:!$omp end target
-./test/offloading/fortran/target-map-two-dtype-mixed-implicit-explicit-capture-2.f90:  !$omp target map(tofrom:scalar_arr2%array_x(3:6))
-./test/offloading/fortran/target-map-two-dtype-mixed-implicit-explicit-capture-2.f90:  !$omp end target
-./test/offloading/fortran/local-descriptor-map-regress.f90:!$omp target enter data map(alloc:sbuf31)
-./test/offloading/fortran/local-descriptor-map-regress.f90:!$omp target
-./test/offloading/fortran/local-descriptor-map-regress.f90:!$omp end target
-./test/offloading/fortran/local-descriptor-map-regress.f90:!$omp target update from(sbuf31)
-./test/offloading/fortran/local-descriptor-map-regress.f90:!$omp target exit data map(delete:sbuf31)
-./test/offloading/fortran/local-descriptor-map-regress.f90:!$omp target enter data map(to:p)
-./test/offloading/fortran/local-descriptor-map-regress.f90:!$omp target
-./test/offloading/fortran/local-descriptor-map-regress.f90:!$omp end target
-./test/offloading/fortran/local-descriptor-map-regress.f90:!$omp target update from(p)
-./test/offloading/fortran/local-descriptor-map-regress.f90:!!$omp target exit data map(delete:p)
-./test/offloading/fortran/target-has-device-addr1.f90:    !$omp target enter data map(to: x)
-./test/offloading/fortran/target-has-device-addr1.f90:    !$omp target data use_device_addr(x)
-./test/offloading/fortran/target-has-device-addr1.f90:    !$omp end target data
-./test/offloading/fortran/target-has-device-addr1.f90:    !$omp target map(to: x) map(from: res1, res2) &
-./test/offloading/fortran/target-has-device-addr1.f90:    !$omp & has_device_addr(first_scalar_device_addr)
-./test/offloading/fortran/target-has-device-addr1.f90:    !$omp end target
-./test/offloading/fortran/target-region-implicit-array.f90:  !$omp target
-./test/offloading/fortran/target-region-implicit-array.f90:  !$omp end target
-./test/offloading/fortran/target-map-local-intrinisc-sized-param.f90:!$omp target map(tofrom: b)
-./test/offloading/fortran/target-map-local-intrinisc-sized-param.f90:!$omp end target
-./test/offloading/fortran/target-map-alloca-dtype-and-alloca-array-v2.f90:!$omp target map(tofrom: alloca_dtype%array_j, alloca_dtype)
-./test/offloading/fortran/target-map-alloca-dtype-and-alloca-array-v2.f90:!$omp end target
-./test/offloading/fortran/target-map-dtype-3d-alloca-array-with-bounds.f90:!$omp target map(tofrom: one_l%array_j(1:3, 1:3, 2:2)) map(to: inArray(1:3, 1:3, 1:3))
-./test/offloading/fortran/target-map-dtype-3d-alloca-array-with-bounds.f90:!$omp end target
-./test/offloading/fortran/target-map-dtype-explicit-individual-array-member.f90:  !$omp target map(tofrom:scalar_arr%array_y)
-./test/offloading/fortran/target-map-dtype-explicit-individual-array-member.f90:  !$omp end target
-./test/offloading/fortran/target-map-pointer-target-array-section-3d-bounds.f90:!$omp target map(tofrom:inArray(1:3, 1:3, 2:2), outArray(1:3, 1:3, 1:3))
-./test/offloading/fortran/target-map-pointer-target-array-section-3d-bounds.f90:!$omp end target
-./test/offloading/fortran/dump_map_tables.f90:!$omp target enter data map(to:A(:N))
-./test/offloading/fortran/dump_map_tables.f90:!$omp target parallel do
-./test/offloading/fortran/dump_map_tables.f90:!$omp target exit data map(from:A)
-./test/offloading/fortran/basic-target-region-3D-array-section.f90:!$omp target map(tofrom:inArray(1:3, 1:3, 2:2), outArray(1:3, 1:3, 1:3), j, k, j2, k2)
-./test/offloading/fortran/basic-target-region-3D-array-section.f90:!$omp end target
-./test/offloading/fortran/target-map-enter-exit-array.f90:   !$omp target enter data map(alloc: A)
-./test/offloading/fortran/target-map-enter-exit-array.f90:    !$omp target
-./test/offloading/fortran/target-map-enter-exit-array.f90:    !$omp end target
-./test/offloading/fortran/target-map-enter-exit-array.f90:    !$omp target exit data map(from: A)
-./test/offloading/fortran/target-map-enter-exit-array.f90:    !$omp target exit data map(delete: A)
-./test/offloading/fortran/target-map-dtype-alloca-array-and-non-alloca-dtype.f90:!$omp target map(tofrom: one_l%nest, one_l%array_j)
-./test/offloading/fortran/target-map-dtype-alloca-array-and-non-alloca-dtype.f90:!$omp end target
-./test/offloading/fortran/target-map-two-dtype-explicit-member.f90:  !$omp target map(tofrom:scalar_arr1%break_1, scalar_arr2%break_3)
-./test/offloading/fortran/target-map-two-dtype-explicit-member.f90:  !$omp end target
-./test/offloading/fortran/target-map-two-dtype-multi-member-array-1D-bounds.f90:  !$omp target map(tofrom:scalar_arr1%array_x(3:6), scalar_arr1%array_y(3:6), scalar_arr2%array_x(3:6), scalar_arr2%array_y(3:6))
-./test/offloading/fortran/target-map-two-dtype-multi-member-array-1D-bounds.f90:  !$omp end target
-./test/offloading/fortran/target-map-all-common-block-members.f90:  !$omp target map(tofrom: var1, var2, var3)
-./test/offloading/fortran/target-map-all-common-block-members.f90:  !$omp end target
-./test/offloading/fortran/target-map-all-common-block-members.f90:!$omp target map(tofrom: var2, var1, var3)
-./test/offloading/fortran/target-map-all-common-block-members.f90:!$omp end target
-./test/offloading/fortran/target-map-all-common-block-members.f90:!$omp target map(tofrom: var2, var3, var1)
-./test/offloading/fortran/target-map-all-common-block-members.f90:!$omp end target
-./test/offloading/fortran/target-map-alloca-dtype-array-and-scalar.f90:!$omp target map(tofrom: alloca_dtype%nested_dtype%array_i, alloca_dtype%k)
-./test/offloading/fortran/target-map-alloca-dtype-array-and-scalar.f90:!$omp end target
-./test/offloading/fortran/target-map-second-common-block-member.f90:  !$omp target map(tofrom: var2)
-./test/offloading/fortran/target-map-second-common-block-member.f90:  !$omp end target
-./test/offloading/fortran/target-map-second-common-block-member.f90:!$omp target map(tofrom: var2)
-./test/offloading/fortran/target-map-second-common-block-member.f90:!$omp end target
-./test/offloading/fortran/target-map-second-common-block-member.f90:!$omp target map(tofrom: var2)
-./test/offloading/fortran/target-map-second-common-block-member.f90:!$omp end target
-./test/offloading/fortran/double-target-call-with-declare-target.f90:    !$omp declare target link(sp)
-./test/offloading/fortran/double-target-call-with-declare-target.f90:!$omp target map(tofrom:sp) map(to: i, j)
-./test/offloading/fortran/double-target-call-with-declare-target.f90:!$omp end target
-./test/offloading/fortran/double-target-call-with-declare-target.f90:!$omp target map(tofrom:sp) map(to: i, j)
-./test/offloading/fortran/double-target-call-with-declare-target.f90:!$omp end target
-./test/offloading/fortran/target-map-dtype-multi-explicit-array-3D-member-bounds.f90:  !$omp target map(tofrom:scalar_arr%array_x(1:3, 1:3, 2:2), scalar_arr%array_y(1:3, 1:3, 1:3))
-./test/offloading/fortran/target-map-dtype-multi-explicit-array-3D-member-bounds.f90:  !$omp end target
-./test/offloading/fortran/target-map-dtype-allocatable-scalar-and-array.f90:    !$omp target map(tofrom: one_l%array_j, one_l%j)
-./test/offloading/fortran/target-map-dtype-allocatable-scalar-and-array.f90:    !$omp end target
-./test/offloading/fortran/target-map-dtype-arr-bounds-member-enter-exit.f90:    !$omp target enter data map(to: scalar_arr%array(3:6))
-./test/offloading/fortran/target-map-dtype-arr-bounds-member-enter-exit.f90:   !$omp target
-./test/offloading/fortran/target-map-dtype-arr-bounds-member-enter-exit.f90:  !$omp end target
-./test/offloading/fortran/target-map-dtype-arr-bounds-member-enter-exit.f90:  !$omp target exit data map(from: scalar_arr%array(3:6))
-./test/offloading/fortran/basic-target-region-3D-array.f90:!$omp target map(tofrom:x, counter) map(to: i, j, k, i2, j2, k2)
-./test/offloading/fortran/basic-target-region-3D-array.f90:!$omp end target
-./test/offloading/fortran/basic_target_region.f90:!$omp target map(from:x)
-./test/offloading/fortran/basic_target_region.f90:!$omp end target
-./test/offloading/fortran/target-map-nested-dtype-multi-member.f90:!$omp target map(tofrom: top_dtype%nested%i2, top_dtype%k, top_dtype%nested%array_i2)
-./test/offloading/fortran/target-map-nested-dtype-multi-member.f90:!$omp end target
-./test/offloading/fortran/constant-arr-index.f90:  !$omp target map(tofrom:sp)
-./test/offloading/fortran/constant-arr-index.f90:  !$omp end target
-./test/offloading/fortran/usm_map_close.f90:  !$omp requires unified_shared_memory
-./test/offloading/fortran/usm_map_close.f90:!$omp target data map(tofrom: a, device_alloc)
-./test/offloading/fortran/usm_map_close.f90:!$omp target map(tofrom: device_alloc)
-./test/offloading/fortran/usm_map_close.f90:!$omp end target
-./test/offloading/fortran/usm_map_close.f90:!$omp end target data
-./test/offloading/fortran/usm_map_close.f90:!$omp target data map(close, tofrom: a) map(tofrom: device_alloc)
-./test/offloading/fortran/usm_map_close.f90:!$omp target map(tofrom: device_alloc)
-./test/offloading/fortran/usm_map_close.f90:!$omp end target
-./test/offloading/fortran/usm_map_close.f90:!$omp end target data
-./test/offloading/fortran/usm_map_close.f90:!$omp target data map(tofrom: a) use_device_ptr(a)
-./test/offloading/fortran/usm_map_close.f90:!$omp end target data
-./test/offloading/fortran/usm_map_close.f90:!$omp target enter data map(close, to: a)
-./test/offloading/fortran/usm_map_close.f90:!$omp target map(from: device_alloc)
-./test/offloading/fortran/usm_map_close.f90:!$omp end target
-./test/offloading/fortran/usm_map_close.f90:!$omp target exit data map(from: a)
-./test/offloading/fortran/target-map-two-dtype-individual-member-array-1D-bounds.f90:  !$omp target map(tofrom:scalar_arr1%array_x(3:6), scalar_arr2%array_x(3:6))
-./test/offloading/fortran/target-map-two-dtype-individual-member-array-1D-bounds.f90:  !$omp end target
-./test/offloading/fortran/target_map_present_success.f90:!$omp target data map(tofrom:arr)
-./test/offloading/fortran/target_map_present_success.f90:!$omp target data map(present,alloc:arr)
-./test/offloading/fortran/target_map_present_success.f90:!$omp target
-./test/offloading/fortran/target_map_present_success.f90:!$omp end target
-./test/offloading/fortran/target_map_present_success.f90:!$omp end target data
-./test/offloading/fortran/target_map_present_success.f90:!$omp end target data
-./test/offloading/fortran/target-map-literal-write.f90:!$omp target
-./test/offloading/fortran/target-map-literal-write.f90:!$omp end target
-./test/offloading/fortran/target-map-multi-alloca-dtypes-with-multi-alloca-members.f90:!$omp target map(tofrom: top_dtype%array_i, top_dtype%nested%nest2%array_j, top_dtype%nested%nest%array_ptr) &
-./test/offloading/fortran/target-map-multi-alloca-dtypes-with-multi-alloca-members.f90:!$omp map(tofrom: top_dtype2%array_i, top_dtype2%nested%nest2%array_j, top_dtype2%nested%nest%array_ptr)
-./test/offloading/fortran/target-map-multi-alloca-dtypes-with-multi-alloca-members.f90:!$omp end target
-./test/offloading/fortran/target-map-double-nested-dtype-double-array-bounds.f90:!$omp target map(tofrom: top_dtype%nested%array_i2(4:8), top_dtype2%nested%array_j2(4:8))
-./test/offloading/fortran/target-map-double-nested-dtype-double-array-bounds.f90:!$omp end target
-./test/offloading/fortran/target-map-large-nested-dtype-multi-member.f90:!$omp target map(tofrom: top_dtype%nested%nest%j4, top_dtype%nested%nest%i4, top_dtype%nested%nest%k4) &
-./test/offloading/fortran/target-map-large-nested-dtype-multi-member.f90:!$omp map(tofrom: top_dtype%array_i, top_dtype%nested%nest2%i3, top_dtype%nested%i2, top_dtype%nested%nest2%k3, top_dtype%nested%nest2%j3)
-./test/offloading/fortran/target-map-large-nested-dtype-multi-member.f90:!$omp end target
-./test/offloading/fortran/target_map_common_block2.f90:  !$omp target map(tofrom:var4)
-./test/offloading/fortran/target_map_common_block2.f90:  !$omp end target
-./test/offloading/fortran/target-nested-target-data.f90:   !$omp target data map(to: A, B) map(alloc: C)
-./test/offloading/fortran/target-nested-target-data.f90:   !$omp target map(from: C)
-./test/offloading/fortran/target-nested-target-data.f90:   !$omp end target
-./test/offloading/fortran/target-nested-target-data.f90:   !$omp target update from(C) ! updates C device -> host
-./test/offloading/fortran/target-nested-target-data.f90:   !$omp end target data
-./test/offloading/fortran/target-map-dtype-multi-explicit-array-member-bounds.f90:  !$omp target map(tofrom:scalar_arr%array_x(3:6), scalar_arr%array_y(3:6))
-./test/offloading/fortran/target-map-dtype-multi-explicit-array-member-bounds.f90:  !$omp end target
-./test/offloading/fortran/target-map-allocatable-array-section-1d-bounds.f90:    !$omp target map(tofrom:sp_read(2:6)) map(tofrom:sp_write(2:6))
-./test/offloading/fortran/target-map-allocatable-array-section-1d-bounds.f90:    !$omp end target
-./test/offloading/fortran/target-map-dtype-allocatable-array.f90:    !$omp target map(tofrom: one_l%array_j)
-./test/offloading/fortran/target-map-dtype-allocatable-array.f90:    !$omp end target
-./test/offloading/fortran/target-map-enter-exit-array-bounds.f90:    !$omp target enter data map(to: array(3:6))
-./test/offloading/fortran/target-map-enter-exit-array-bounds.f90:   !$omp target
-./test/offloading/fortran/target-map-enter-exit-array-bounds.f90:  !$omp end target
-./test/offloading/fortran/target-map-enter-exit-array-bounds.f90:  !$omp target exit data map(from: array(3:6))
-./test/offloading/fortran/target-has-device-addr2.f90:  !$omp target data map(tofrom: b(1:3)) use_device_addr(b)
-./test/offloading/fortran/target-has-device-addr2.f90:  !$omp target has_device_addr(b(2)%x)
-./test/offloading/fortran/target-has-device-addr2.f90:  !$omp end target
-./test/offloading/fortran/target-has-device-addr2.f90:  !$omp end target data
-./test/offloading/fortran/target-has-device-addr2.f90:  !$omp target data map(tofrom: b(1:3)) use_device_addr(b)
-./test/offloading/fortran/target-has-device-addr2.f90:  !$omp target has_device_addr(b(2)%x)
-./test/offloading/fortran/target-has-device-addr2.f90:  !$omp end target
-./test/offloading/fortran/target-has-device-addr2.f90:  !$omp end target data
-./test/offloading/fortran/target-depend.f90:      !$omp parallel num_threads(3)
-./test/offloading/fortran/target-depend.f90:      !$omp single
-./test/offloading/fortran/target-depend.f90:      !$omp task depend(out: z) shared(z)
-./test/offloading/fortran/target-depend.f90:      !$omp end task
-./test/offloading/fortran/target-depend.f90:      !$omp target map(tofrom: z) depend(in:z)
-./test/offloading/fortran/target-depend.f90:      !$omp end target
-./test/offloading/fortran/target-depend.f90:      !$omp end single
-./test/offloading/fortran/target-depend.f90:      !$omp end parallel
-./test/offloading/fortran/target-map-dtype-arr-bounds-member-enter-exit-update.f90:  !$omp target enter data map(to: scalar_arr%array(3:6))
-./test/offloading/fortran/target-map-dtype-arr-bounds-member-enter-exit-update.f90:  !$omp target update to(scalar_arr%array(3:6))
-./test/offloading/fortran/target-map-dtype-arr-bounds-member-enter-exit-update.f90:   !$omp target
-./test/offloading/fortran/target-map-dtype-arr-bounds-member-enter-exit-update.f90:  !$omp end target
-./test/offloading/fortran/target-map-dtype-arr-bounds-member-enter-exit-update.f90:  !$omp target exit data map(from: scalar_arr%array(3:6))
-./test/offloading/fortran/target-map-common-block.f90:  !$omp target map(tofrom: /var_common/)
-./test/offloading/fortran/target-map-common-block.f90:  !$omp end target
-./test/offloading/fortran/target-map-common-block.f90:!$omp target map(tofrom: /var_common/)
-./test/offloading/fortran/target-map-common-block.f90:!$omp end target
-./test/offloading/fortran/target-map-common-block.f90:!$omp target map(tofrom: /var_common/)
-./test/offloading/fortran/target-map-common-block.f90:!$omp end target
-./test/offloading/fortran/target-map-dtype-alloca-and-non-alloca-array.f90:!$omp target map(tofrom: one_l%array_i, one_l%array_j)
-./test/offloading/fortran/target-map-dtype-alloca-and-non-alloca-array.f90:!$omp end target
-./test/offloading/fortran/target-data-map-if-present.f90:       !$omp target data if(present(a)) map(alloc:a)
-./test/offloading/fortran/target-data-map-if-present.f90:       !$omp end target data
-./test/offloading/fortran/target-parallel-do-collapse.f90:   !$omp target parallel do map(from:array) collapse(2)
-./test/offloading/fortran/target-parallel-do-collapse.f90:    !$omp end target parallel do
-./test/offloading/fortran/target-map-nested-dtype-complex-member.f90:!$omp target map(tofrom: top_dtype%nested%i2, top_dtype%k, top_dtype%nested%j2, top_dtype%nested%array_i2, top_dtype%l)
-./test/offloading/fortran/target-map-nested-dtype-complex-member.f90:!$omp end target
-./test/offloading/fortran/target-map-derived-type-full-implicit-2.f90:  !$omp target
-./test/offloading/fortran/target-map-derived-type-full-implicit-2.f90:  !$omp end target
-./test/offloading/fortran/target-map-enter-exit-array-2.f90:    !$omp target enter data map(to: array)
-./test/offloading/fortran/target-map-enter-exit-array-2.f90:   !$omp target
-./test/offloading/fortran/target-map-enter-exit-array-2.f90:  !$omp end target
-./test/offloading/fortran/target-map-enter-exit-array-2.f90:  !$omp target exit data map(from: array)
-./test/offloading/fortran/basic-target-parallel-do.f90:   !$omp target parallel do map(from: x)
-./test/offloading/fortran/basic-target-parallel-do.f90:   !$omp end target parallel do
-./test/offloading/fortran/target-map-dtype-multi-explicit-member.f90:    !$omp target map(from:scalar_struct%rx, scalar_struct%ry)
-./test/offloading/fortran/target-map-dtype-multi-explicit-member.f90:    !$omp end target
-./test/offloading/fortran/target-map-dynamic.f90:    !$omp target enter data map(to: A)
-./test/offloading/fortran/target-map-dynamic.f90:    !$omp target
-./test/offloading/fortran/target-map-dynamic.f90:    !$omp end target
-./test/offloading/fortran/target-map-dynamic.f90:    !$omp target exit data map(from: A)
-./test/offloading/fortran/target_map_common_block1.f90:  !$omp target map(tofrom:devices) map(tofrom:var1)
-./test/offloading/fortran/target_map_common_block1.f90:  !$omp end target
-./test/offloading/fortran/target-with-threadprivate.f90:!$omp threadprivate(pointer2)
-./test/offloading/fortran/target-with-threadprivate.f90:!$omp target
-./test/offloading/fortran/target-with-threadprivate.f90:!$omp end target
-./test/offloading/fortran/target_map_common_block.f90:  !$omp target map(tofrom:devices)
-./test/offloading/fortran/target_map_common_block.f90:  !$omp end target
-./test/offloading/fortran/target_map_common_block.f90:  !$omp target map(tofrom:var1)
-./test/offloading/fortran/target_map_common_block.f90:  !$omp end target
-./test/offloading/fortran/target_map_common_block.f90:  !$omp target map(tofrom:var2)
-./test/offloading/fortran/target_map_common_block.f90:  !$omp end target
-./test/offloading/fortran/target_map_common_block.f90:  !$omp target map(tofrom:var3)
-./test/offloading/fortran/target_map_common_block.f90:  !$omp end target
-./test/offloading/fortran/target_map_common_block.f90:  !$omp target map(to:var4) map(from:tmp)
-./test/offloading/fortran/target_map_common_block.f90:  !$omp end target
-./test/offloading/fortran/target_map_common_block.f90:  !$omp target map(tofrom: var6)
-./test/offloading/fortran/target_map_common_block.f90:  !$omp end target
-./test/offloading/fortran/target-map-alloca-dtype-and-alloca-array.f90:!$omp target map(tofrom: alloca_dtype, alloca_dtype%array_j)
-./test/offloading/fortran/target-map-alloca-dtype-and-alloca-array.f90:!$omp end target
-./test/offloading/fortran/target-map-double-large-nested-dtype-multi-member.f90:!$omp target map(from: top_dtype%nested%nest%j4, top_dtype%nested%nest%i4, top_dtype%nested%nest%k4) &
-./test/offloading/fortran/target-map-double-large-nested-dtype-multi-member.f90:!$omp map(from: top_dtype%array_i, top_dtype%nested%nest2%i3, top_dtype%nested%i2) &
-./test/offloading/fortran/target-map-double-large-nested-dtype-multi-member.f90:!$omp map(from: top_dtype%nested%nest2%k3, top_dtype%nested%nest2%j3) &
-./test/offloading/fortran/target-map-double-large-nested-dtype-multi-member.f90:!$omp map(to: top_dtype2%nested%nest%j4, top_dtype2%nested%nest%i4, top_dtype2%nested%nest%k4) &
-./test/offloading/fortran/target-map-double-large-nested-dtype-multi-member.f90:!$omp map(to: top_dtype2%array_i, top_dtype2%nested%nest2%i3, top_dtype2%nested%i2) &
-./test/offloading/fortran/target-map-double-large-nested-dtype-multi-member.f90:!$omp map(to: top_dtype2%nested%nest2%k3, top_dtype2%nested%nest2%j3)
-./test/offloading/fortran/target-map-double-large-nested-dtype-multi-member.f90:!$omp end target
-./test/offloading/fortran/target-map-derived-type-full-implicit-1.f90:  !$omp target map(from:out)
-./test/offloading/fortran/target-map-derived-type-full-implicit-1.f90:  !$omp end target
-./test/offloading/fortran/target-map-dtype-alloca-array-with-bounds.f90:!$omp target map(tofrom: one_l%array_j(2:6))
-./test/offloading/fortran/target-map-dtype-alloca-array-with-bounds.f90:!$omp end target
-./test/offloading/fortran/target-map-nested-dtype-single-member.f90:!$omp target map(tofrom: top_dtype%nested%array_i2)
-./test/offloading/fortran/target-map-nested-dtype-single-member.f90:!$omp end target
-./test/offloading/fortran/target-map-mix-imp-exp-common-block-members.f90:    !$omp target map(tofrom: var2)
-./test/offloading/fortran/target-map-mix-imp-exp-common-block-members.f90:    !$omp end target
-./test/offloading/fortran/target-map-mix-imp-exp-common-block-members.f90:  !$omp target map(tofrom: /var_common/)
-./test/offloading/fortran/target-map-mix-imp-exp-common-block-members.f90:  !$omp end target
-./test/offloading/fortran/target-map-mix-imp-exp-common-block-members.f90:  !$omp target map(tofrom: copy)
-./test/offloading/fortran/target-map-mix-imp-exp-common-block-members.f90:  !$omp end target
-./test/offloading/fortran/target-map-nested-dtype-derived-member.f90:!$omp target map(tofrom: top_dtype%k, top_dtype%nested2%array_i2, top_dtype%nested)
-./test/offloading/fortran/target-map-nested-dtype-derived-member.f90:!$omp end target
-./test/offloading/fortran/target-map-nested-alloca-dtype-alloca-array-bounds.f90:!$omp target map(tofrom: one_l%nest%array_k(2:6))
-./test/offloading/fortran/target-map-nested-alloca-dtype-alloca-array-bounds.f90:!$omp end target
-./test/offloading/fortran/basic-target-region-1D-array-section.f90:    !$omp target map(to:read_arr(2:5)) map(from:write_arr(2:5)) map(to:i,j)
-./test/offloading/fortran/basic-target-region-1D-array-section.f90:    !$omp end target
-./test/offloading/fortran/target-map-two-nested-dtype-member-array-map.f90:  !$omp target map(tofrom:scalar_arr1%nested%array_z(3:6), scalar_arr1%nested%array_ix(3:6), scalar_arr2%nested%array_z(3:6), scalar_arr2%nested%array_ix(3:6))
-./test/offloading/fortran/target-map-two-nested-dtype-member-array-map.f90:  !$omp end target
-./test/offloading/fortran/target-custom-mapper.f90:   !$omp declare mapper(my_mapper1 : mytype :: t) map(to: t%data(1 : n))
-./test/offloading/fortran/target-custom-mapper.f90:   !$omp declare mapper(my_mapper2 : mytype2 :: t) map(mapper(my_mapper1): t%my_data)
-./test/offloading/fortran/target-custom-mapper.f90:   !$omp target map(tofrom: sum_device) map(mapper(my_mapper2) : obj)
-./test/offloading/fortran/target-custom-mapper.f90:   !$omp end target
-./test/offloading/fortran/target-map-nested-dtype-3d-alloca-array-with-bounds.f90:!$omp target map(tofrom: one_l%nest%array_k(1:3, 1:3, 2:2)) map(to: inArray(1:3, 1:3, 1:3))
-./test/offloading/fortran/target-map-nested-dtype-3d-alloca-array-with-bounds.f90:!$omp end target
-./test/offloading/fortran/target-map-allocatable-array-section-3d-bounds.f90:!$omp target map(tofrom:inArray(1:3, 1:3, 2:2), outArray(1:3, 1:3, 1:3))
-./test/offloading/fortran/target-map-allocatable-array-section-3d-bounds.f90:!$omp end target
-./test/offloading/fortran/target-map-nested-dtype-alloca-array-and-non-alloca-dtype.f90:    !$omp target map(tofrom: one_l%nest%nest2, one_l%nest%array_k)
-./test/offloading/fortran/target-map-nested-dtype-alloca-array-and-non-alloca-dtype.f90:    !$omp end target
-./test/offloading/fortran/target-map-individual-dtype-member-map.f90:  !$omp target map(from:scalar_struct%rx)
-./test/offloading/fortran/target-map-individual-dtype-member-map.f90:  !$omp end target
-./test/offloading/fortran/target-map-allocatable-dtype.f90:!$omp target map(tofrom: alloca_dtype)
-./test/offloading/fortran/target-map-allocatable-dtype.f90:!$omp end target
-./test/offloading/fortran/target-map-nested-dtype-alloca-array-with-bounds.f90:!$omp target map(tofrom: one_l%nest%array_k(2:6))
-./test/offloading/fortran/target-map-nested-dtype-alloca-array-with-bounds.f90:!$omp end target
-./test/offloading/fortran/implicit-record-field-mapping.f90:  !$omp target map(from: dst_sum)
-./test/offloading/fortran/implicit-record-field-mapping.f90:  !$omp end target
-./test/offloading/fortran/target-has-device-addr3.f90:  !$omp target data map(tofrom: x) use_device_addr(x)
-./test/offloading/fortran/target-has-device-addr3.f90:  !$omp target has_device_addr(x) map(tofrom: y)
-./test/offloading/fortran/target-has-device-addr3.f90:  !$omp end target
-./test/offloading/fortran/target-has-device-addr3.f90:  !$omp end target data
-./test/offloading/fortran/target-map-double-nested-dtype-array-bounds.f90:!$omp target map(tofrom: top_dtype%nested%array_i2(4:8), top_dtype2%nested%array_j2(4:8))
-./test/offloading/fortran/target-map-double-nested-dtype-array-bounds.f90:!$omp end target
-./test/offloading/fortran/target-map-allocatable-map-scopes.f90:  !$omp target map(tofrom: arg_alloc)
-./test/offloading/fortran/target-map-allocatable-map-scopes.f90:  !$omp end target
-./test/offloading/fortran/target-map-allocatable-map-scopes.f90:  !$omp target map(tofrom: local_alloc)
-./test/offloading/fortran/target-map-allocatable-map-scopes.f90:  !$omp end target
-./test/offloading/fortran/target-map-allocatable-map-scopes.f90:  !$omp target map(tofrom: map_ptr)
-./test/offloading/fortran/target-map-allocatable-map-scopes.f90:  !$omp end target
-./test/offloading/fortran/target-map-enter-exit-scalar.f90:    !$omp target enter data map(to: scalar)
-./test/offloading/fortran/target-map-enter-exit-scalar.f90:   !$omp target
-./test/offloading/fortran/target-map-enter-exit-scalar.f90:   !$omp end target
-./test/offloading/fortran/target-map-enter-exit-scalar.f90:  !$omp target exit data map(from: scalar)
-./test/offloading/fortran/target-map-multi-alloca-dtypes-with-multi-mixed-members.f90:!$omp target map(tofrom: top_dtype%nested%nest%i4, top_dtype%nested%array_j2) &
-./test/offloading/fortran/target-map-multi-alloca-dtypes-with-multi-mixed-members.f90:!$omp map(tofrom: top_dtype%nested%nest%k4, top_dtype%array_i, top_dtype%nested%nest2%i3) &
-./test/offloading/fortran/target-map-multi-alloca-dtypes-with-multi-mixed-members.f90:!$omp map(tofrom: top_dtype%nested%i2, top_dtype%nested%nest2%j3, top_dtype%array_j)
-./test/offloading/fortran/target-map-multi-alloca-dtypes-with-multi-mixed-members.f90:!$omp end target
-./test/offloading/fortran/explicit-and-implicit-record-field-mapping.f90:  !$omp target enter data map(alloc:       &
-./test/offloading/fortran/explicit-and-implicit-record-field-mapping.f90:  !$omp  chunk%tiles(2)%field%density0)
-./test/offloading/fortran/explicit-and-implicit-record-field-mapping.f90:  !$omp target
-./test/offloading/fortran/explicit-and-implicit-record-field-mapping.f90:  !$omp end target
-./test/offloading/fortran/explicit-and-implicit-record-field-mapping.f90:  !$omp target exit data map(from:         &
-./test/offloading/fortran/explicit-and-implicit-record-field-mapping.f90:  !$omp  chunk%tiles(2)%field%density0)
-./test/offloading/fortran/dtype-member-map-syntax-2.f90:!$omp target map(tofrom: alloca_dtype%vertexes(N1)%test)
-./test/offloading/fortran/dtype-member-map-syntax-2.f90:!$omp end target
-./test/offloading/fortran/dtype-member-map-syntax-2.f90:!$omp target map(tofrom: alloca_dtype%vertexes(N1)%test, alloca_dtype%vertexes(N2)%test)
-./test/offloading/fortran/dtype-member-map-syntax-2.f90:!$omp end target
-./test/offloading/fortran/dtype-member-map-syntax-2.f90:!$omp target map(tofrom: alloca_dtype%test_tile(N1)%field%vertexx, &
-./test/offloading/fortran/dtype-member-map-syntax-2.f90:!$omp                    alloca_dtype%test_tile(N1)%field%vertexy)
-./test/offloading/fortran/dtype-member-map-syntax-2.f90:!$omp end target
-./test/offloading/fortran/dtype-member-map-syntax-2.f90:!$omp target map(tofrom:  alloca_dtype%test_tile(N1)%field%test, &
-./test/offloading/fortran/dtype-member-map-syntax-2.f90:!$omp                     alloca_dtype%test_tile(N2)%field%test, &
-./test/offloading/fortran/dtype-member-map-syntax-2.f90:!$omp                     alloca_dtype%test_tile(N1)%field%vertexy, &
-./test/offloading/fortran/dtype-member-map-syntax-2.f90:!$omp                     alloca_dtype%test_tile(N2)%field%vertexy)
-./test/offloading/fortran/dtype-member-map-syntax-2.f90:!$omp end target
-./test/offloading/fortran/dtype-member-map-syntax-2.f90:!$omp target map(tofrom:  alloca_dtype%test_tile(N2)%field%vertexy)
-./test/offloading/fortran/dtype-member-map-syntax-2.f90:!$omp end target
-./test/offloading/fortran/dtype-member-map-syntax-2.f90:!$omp target map(tofrom: alloca_dtype%vertexes(N1)%vertexx, &
-./test/offloading/fortran/dtype-member-map-syntax-2.f90:!$omp                    alloca_dtype%vertexes(N1)%vertexy, &
-./test/offloading/fortran/dtype-member-map-syntax-2.f90:!$omp                    alloca_dtype%vertexes(N2)%vertexx, &
-./test/offloading/fortran/dtype-member-map-syntax-2.f90:!$omp                    alloca_dtype%vertexes(N2)%vertexy)
-./test/offloading/fortran/dtype-member-map-syntax-2.f90:!$omp end target
-./test/offloading/fortran/dtype-member-map-syntax-2.f90:!$omp target map(tofrom: alloca_dtype%vertexes(N1)%vertexx, &
-./test/offloading/fortran/dtype-member-map-syntax-2.f90:!$omp                    alloca_dtype%vertexes(N1)%vertexy, &
-./test/offloading/fortran/dtype-member-map-syntax-2.f90:!$omp                    alloca_dtype%vertexes(4)%vertexy, &
-./test/offloading/fortran/dtype-member-map-syntax-2.f90:!$omp                    alloca_dtype%vertexes(4)%vertexx, &
-./test/offloading/fortran/dtype-member-map-syntax-2.f90:!$omp                    alloca_dtype%vertexes(N2)%vertexx, &
-./test/offloading/fortran/dtype-member-map-syntax-2.f90:!$omp                    alloca_dtype%vertexes(N2)%vertexy)
-./test/offloading/fortran/dtype-member-map-syntax-2.f90:!$omp end target
-./test/offloading/fortran/dtype-member-map-syntax-2.f90:!$omp target map(tofrom: alloca_dtype_arr(N2)%array_i)
-./test/offloading/fortran/dtype-member-map-syntax-2.f90:!$omp end target
-./test/offloading/fortran/target_update.f90:!$omp target enter data map(to:x, device_id)
-./test/offloading/fortran/target_update.f90:!$omp target
-./test/offloading/fortran/target_update.f90:!$omp end target
-./test/offloading/fortran/target_update.f90:!$omp target
-./test/offloading/fortran/target_update.f90:!$omp end target
-./test/offloading/fortran/target_update.f90:!$omp target update from(x, device_id)
-./test/offloading/fortran/target-map-declare-target-link-common-block.f90:    !$omp declare target link(/var_common/)
-./test/offloading/fortran/target-map-declare-target-link-common-block.f90:  !$omp target map(tofrom: var2)
-./test/offloading/fortran/target-map-declare-target-link-common-block.f90:  !$omp end target
-./test/offloading/fortran/target-map-declare-target-link-common-block.f90:!$omp target map(tofrom: /var_common/)
-./test/offloading/fortran/target-map-declare-target-link-common-block.f90:!$omp end target
-./test/offloading/fortran/target-map-declare-target-link-common-block.f90:!$omp target map(tofrom: copy)
-./test/offloading/fortran/target-map-declare-target-link-common-block.f90:!$omp end target
-./test/offloading/fortran/target-map-declare-target-link-common-block.f90:!$omp target map(tofrom: /var_common/)
-./test/offloading/fortran/target-map-declare-target-link-common-block.f90:!$omp end target
-./test/offloading/fortran/target-map-alloca-dtype-alloca-array.f90:!$omp target map(tofrom: alloca_dtype%array_j)
-./test/offloading/fortran/target-map-alloca-dtype-alloca-array.f90:!$omp end target
-./test/offloading/fortran/target-map-alloca-dtype-array-of-dtype.f90:!$omp target map(tofrom: dtyped%array_dtype)
-./test/offloading/fortran/target-map-alloca-dtype-array-of-dtype.f90:!$omp end target
-./test/Inputs/basic_array.f90:    !$omp declare target

>From 2ae5772a8165c7d54d2a6b52666a80812375b6c0 Mon Sep 17 00:00:00 2001
From: Anchu Rajendran <asudhaku at amd.com>
Date: Tue, 15 Apr 2025 16:22:12 -0500
Subject: [PATCH 5/6] New changes to dynamically allocate buffer

---
 .../llvm/Frontend/OpenMP/OMPIRBuilder.h       | 26 ++---
 llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp     | 98 +++++++++++--------
 .../Frontend/OpenMPIRBuilderTest.cpp          |  3 +-
 .../OpenMP/OpenMPToLLVMIRTranslation.cpp      |  4 +-
 4 files changed, 72 insertions(+), 59 deletions(-)

diff --git a/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h b/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h
index 008731aa41b23..6ee1181db589e 100644
--- a/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h
+++ b/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h
@@ -503,6 +503,13 @@ class OpenMPIRBuilder {
       return allocaInst;
     }
   };
+  
+  /// Type used throughout for insertion points.
+  using InsertPointTy = IRBuilder<>::InsertPoint;
+
+  /// Type used to represent an insertion point or an error value.
+  using InsertPointOrErrorTy = Expected<InsertPointTy>;
+
   struct ScanInformation {
   public:
     llvm::BasicBlock *OMPBeforeScanBlock = nullptr;
@@ -510,6 +517,9 @@ class OpenMPIRBuilder {
     llvm::BasicBlock *OMPScanExitBlock = nullptr;
     llvm::BasicBlock *OMPScanDispatch = nullptr;
     llvm::BasicBlock *OMPScanLoopExit = nullptr;
+    llvm::BasicBlock *OMPScanInit = nullptr;
+    llvm::BasicBlock *OMPScanFinish = nullptr;
+    InsertPointTy FinalizeIP;
     bool OMPFirstScanLoop = false;
     llvm::SmallDenseMap<llvm::Value *, llvm::Value *> ReductionVarToScanBuffs;
     llvm::Value *IV;
@@ -532,12 +542,6 @@ class OpenMPIRBuilder {
   /// Add attributes known for \p FnID to \p Fn.
   void addAttributes(omp::RuntimeFunction FnID, Function &Fn);
 
-  /// Type used throughout for insertion points.
-  using InsertPointTy = IRBuilder<>::InsertPoint;
-
-  /// Type used to represent an insertion point or an error value.
-  using InsertPointOrErrorTy = Expected<InsertPointTy>;
-
   /// Get the create a name using the platform specific separators.
   /// \param Parts parts of the final name that needs separation
   /// The created name has a first separator between the first and second part
@@ -1600,11 +1604,12 @@ class OpenMPIRBuilder {
   /// Creates the basic blocks required for scan reduction.
   void createScanBBs();
 
-  /// Creates the buffer needed for scan reduction.
+  /// Dynamically allocates the buffer needed for scan reduction.
+  /// \param AllocaIP The IP where possibly-shared pointer of buffer needs to be declared. 
   /// \param ScanVars Scan Variables.
   ///
   /// \return error if any produced, else return success.
-  Error emitScanBasedDirectiveDeclsIR(ArrayRef<llvm::Value *> ScanVars,
+  Error emitScanBasedDirectiveDeclsIR(InsertPointTy AllocaIP, ArrayRef<llvm::Value *> ScanVars,
                                      ArrayRef<llvm::Type *> ScanVarsType);
 
   /// Copies the result back to the reduction variable.
@@ -2700,14 +2705,11 @@ class OpenMPIRBuilder {
   ///      buffer[cnt] op= buffer[cnt-i];
   ///  }
   /// \param Loc The insert and source location description.
-  /// \param FinalizeIP The IP where the reduction result needs
-  //                   to be copied back to original variable.
   /// \param ReductionInfos Array type containing the ReductionOps.
   ///
   /// \returns The insertion position *after* the masked.
   InsertPointOrErrorTy emitScanReduction(
-      const LocationDescription &Loc, InsertPointTy &FinalizeIP,
-      SmallVector<llvm::OpenMPIRBuilder::ReductionInfo> ReductionInfos);
+      const LocationDescription &Loc, SmallVector<llvm::OpenMPIRBuilder::ReductionInfo> ReductionInfos);
 
   /// This directive split and directs the control flow to input phase
   ///  blocks or scan phase blocks based on 1. whether input loop or scan loop
diff --git a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
index 4f63030a2a269..e212b71383667 100644
--- a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
+++ b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
@@ -4005,8 +4005,7 @@ OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::createScan(
     ArrayRef<llvm::Value *> ScanVars, ArrayRef<llvm::Type *> ScanVarsType,
     bool IsInclusive) {
   if (ScanInfo.OMPFirstScanLoop) {
-    Builder.restoreIP(AllocaIP);
-    llvm::Error Err = emitScanBasedDirectiveDeclsIR(ScanVars, ScanVarsType);
+    llvm::Error Err = emitScanBasedDirectiveDeclsIR(AllocaIP, ScanVars, ScanVarsType);
     if (Err) {
       return Err;
     }
@@ -4014,35 +4013,6 @@ OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::createScan(
   if (!updateToLocation(Loc))
     return Loc.IP;
   
-  // Allocate temporary buffer by master thread 
-  auto BodyGenCB = [&](InsertPointTy AllocaIP,
-                       InsertPointTy CodeGenIP) -> Error {
-
-    Value *AllocSpan = Builder.CreateAdd(ScanInfo.Span, Builder.getInt32(1));
-    for (int i = 0; i < ScanVars.size(); i++) {
-      Type* IntPtrTy = Builder.getInt32Ty();
-      Constant* allocsize = ConstantExpr::getSizeOf(ScanVarsType[i]);
-      allocsize = ConstantExpr::getTruncOrBitCast(allocsize, IntPtrTy);
-      llvm::Value *Buff = Builder.CreateMalloc(IntPtrTy, ScanVarsType[i], allocsize,AllocSpan,nullptr, "arr");
-      Builder.CreateStore(Buff, ScanInfo.ReductionVarToScanBuffs[ScanVars[i]]);
-    }
-    return Error::success();
-  };
-  // TODO: Perform finalization actions for variables. This has to be
-  // called for variables which have destructors/finalizers.
-  auto FiniCB = [&](InsertPointTy CodeGenIP) { return llvm::Error::success(); };
-
-  llvm::Value *FilterVal = Builder.getInt32(0);
-  llvm::OpenMPIRBuilder::InsertPointOrErrorTy AfterIP =
-      createMasked(Builder.saveIP(), BodyGenCB, FiniCB, FilterVal);
-
-  if (!AfterIP)
-    return AfterIP.takeError();
-  Builder.restoreIP(*AfterIP);
-  AfterIP = createBarrier(Builder.saveIP(), llvm::omp::OMPD_barrier);
-  if (!AfterIP)
-    return AfterIP.takeError();
-
   unsigned int defaultAS = M.getDataLayout().getProgramAddressSpace();
   llvm::Value *IV = ScanInfo.IV;
 
@@ -4104,12 +4074,49 @@ OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::createScan(
 }
 
 Error OpenMPIRBuilder::emitScanBasedDirectiveDeclsIR(
-    ArrayRef<Value *> ScanVars, ArrayRef<Type *> ScanVarsType) {
- 
+    InsertPointTy AllocaIP, ArrayRef<Value *> ScanVars, ArrayRef<Type *> ScanVarsType) {
+  
+  Builder.restoreIP(AllocaIP);
+  // Create the shared pointer at alloca IP.
   for(int i=0; i<ScanVars.size(); i++){
    llvm::Value *Buff = Builder.CreateAlloca(Builder.getPtrTy(),1); 
    ScanInfo.ReductionVarToScanBuffs[ScanVars[i]] = Buff;
   }
+
+  // Allocate temporary buffer by master thread 
+  auto BodyGenCB = [&](InsertPointTy AllocaIP,
+                       InsertPointTy CodeGenIP) -> Error {
+    Builder.restoreIP(CodeGenIP);
+    Value *AllocSpan = Builder.CreateAdd(ScanInfo.Span, Builder.getInt32(1));
+    for (int i = 0; i < ScanVars.size(); i++) {
+      Type* IntPtrTy = Builder.getInt32Ty();
+      Constant* allocsize = ConstantExpr::getSizeOf(ScanVarsType[i]);
+      allocsize = ConstantExpr::getTruncOrBitCast(allocsize, IntPtrTy);
+      llvm::Value *Buff = Builder.CreateMalloc(IntPtrTy, ScanVarsType[i], allocsize,AllocSpan,nullptr, "arr");
+      Builder.CreateStore(Buff, ScanInfo.ReductionVarToScanBuffs[ScanVars[i]]);
+    }
+    return Error::success();
+  };
+  // TODO: Perform finalization actions for variables. This has to be
+  // called for variables which have destructors/finalizers.
+  auto FiniCB = [&](InsertPointTy CodeGenIP) { return llvm::Error::success(); };
+
+  Builder.SetInsertPoint(ScanInfo.OMPScanInit->getTerminator());
+  llvm::Value *FilterVal = Builder.getInt32(0);
+  llvm::OpenMPIRBuilder::InsertPointOrErrorTy AfterIP =
+      createMasked(Builder.saveIP(), BodyGenCB, FiniCB, FilterVal);
+
+  if (!AfterIP)
+    return AfterIP.takeError();
+  Builder.restoreIP(*AfterIP);
+  BasicBlock *InputBB = Builder.GetInsertBlock();
+  if(InputBB->getTerminator())
+    Builder.SetInsertPoint(Builder.GetInsertBlock()->getTerminator());
+  AfterIP = createBarrier(Builder.saveIP(), llvm::omp::OMPD_barrier);
+  if (!AfterIP)
+    return AfterIP.takeError();
+  Builder.restoreIP(*AfterIP);
+
   return Error::success();
 }
 
@@ -4117,6 +4124,7 @@ Error OpenMPIRBuilder::emitScanBasedDirectiveFinalsIR(
     SmallVector<ReductionInfo> ReductionInfos) {
   auto BodyGenCB = [&](InsertPointTy AllocaIP,
                        InsertPointTy CodeGenIP) -> Error {
+    Builder.restoreIP(CodeGenIP);
     unsigned int DefaultAS = M.getDataLayout().getProgramAddressSpace();
     for (ReductionInfo RedInfo : ReductionInfos) {
       Value *PrivateVar = RedInfo.PrivateVariable;
@@ -4131,7 +4139,7 @@ Error OpenMPIRBuilder::emitScanBasedDirectiveFinalsIR(
           OrigVar, SrcTy->getPointerTo(DefaultAS));
 
       Builder.CreateStore(Src, Dest);
-      Builder.CreateFree(Buff);
+      //Builder.CreateFree(Buff);
     }
     return Error::success();
   };
@@ -4139,6 +4147,7 @@ Error OpenMPIRBuilder::emitScanBasedDirectiveFinalsIR(
   // called for variables which have destructors/finalizers.
   auto FiniCB = [&](InsertPointTy CodeGenIP) { return llvm::Error::success(); };
 
+  Builder.SetInsertPoint(ScanInfo.OMPScanFinish->getTerminator());
   llvm::Value *FilterVal = Builder.getInt32(0);
   llvm::OpenMPIRBuilder::InsertPointOrErrorTy AfterIP =
       createMasked(Builder.saveIP(), BodyGenCB, FiniCB, FilterVal);
@@ -4146,15 +4155,18 @@ Error OpenMPIRBuilder::emitScanBasedDirectiveFinalsIR(
   if (!AfterIP)
     return AfterIP.takeError();
   Builder.restoreIP(*AfterIP);
-  //AfterIP = createBarrier(Builder.saveIP(), llvm::omp::OMPD_barrier);
-  //if (!AfterIP)
-  //  return AfterIP.takeError();
+  BasicBlock *InputBB = Builder.GetInsertBlock();
+  if(InputBB->getTerminator())
+    Builder.SetInsertPoint(Builder.GetInsertBlock()->getTerminator());
+  AfterIP = createBarrier(Builder.saveIP(), llvm::omp::OMPD_barrier);
+  if (!AfterIP)
+    return AfterIP.takeError();
+  Builder.restoreIP(*AfterIP);
   return Error::success();
 }
 
 OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::emitScanReduction(
-    const LocationDescription &Loc, InsertPointTy &FinalizeIP,
-    SmallVector<llvm::OpenMPIRBuilder::ReductionInfo> ReductionInfos) {
+    const LocationDescription &Loc, SmallVector<llvm::OpenMPIRBuilder::ReductionInfo> ReductionInfos) {
 
   if (!updateToLocation(Loc))
     return Loc.IP;
@@ -4261,12 +4273,10 @@ OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::emitScanReduction(
   if (!AfterIP)
     return AfterIP.takeError();
   Builder.restoreIP(*AfterIP);
-  Builder.restoreIP(FinalizeIP);
   Error Err = emitScanBasedDirectiveFinalsIR(ReductionInfos);
   if (Err) {
     return Err;
   }
-  FinalizeIP = Builder.saveIP();
 
   return AfterIP;
 }
@@ -4423,6 +4433,8 @@ OpenMPIRBuilder::createCanonicalScanLoops(
   Value *TripCount = calculateCanonicalLoopTripCount(
       ComputeLoc, Start, Stop, Step, IsSigned, InclusiveStop, Name);
   ScanInfo.Span = TripCount;
+  ScanInfo.OMPScanInit = splitBB(Builder, true, "scan.init");
+  Builder.SetInsertPoint(ScanInfo.OMPScanInit);
 
   auto BodyGen = [=](InsertPointTy CodeGenIP, Value *IV) {
     /// The control of the loopbody of following structure:
@@ -4449,6 +4461,7 @@ OpenMPIRBuilder::createCanonicalScanLoops(
     /// of OMPScanDispatch can be OMPBeforeScanBlock or OMPAfterScanBlock based
     /// on 1.whether it is in Input phase or Scan Phase , 2. whether it is an
     /// exclusive or inclusive scan.
+    Builder.restoreIP(CodeGenIP);
     ScanInfo.IV = IV;
     createScanBBs();
     BasicBlock *InputBlock = Builder.GetInsertBlock();
@@ -4468,7 +4481,7 @@ OpenMPIRBuilder::createCanonicalScanLoops(
   SmallVector<llvm::CanonicalLoopInfo *> Result;
   const auto &&InputLoopGen = [&]() -> Error {
     auto LoopInfo =
-        createCanonicalLoop(Loc, BodyGen, Start, Stop, Step, IsSigned,
+        createCanonicalLoop(Builder.saveIP(), BodyGen, Start, Stop, Step, IsSigned,
                             InclusiveStop, ComputeIP, Name, true);
     if (!LoopInfo)
       return LoopInfo.takeError();
@@ -4484,6 +4497,7 @@ OpenMPIRBuilder::createCanonicalScanLoops(
       return LoopInfo.takeError();
     Result.push_back(*LoopInfo);
     Builder.restoreIP((*LoopInfo)->getAfterIP());
+    ScanInfo.OMPScanFinish= Builder.GetInsertBlock();
     return Error::success();
   };
   Error Err = emitScanBasedDirectiveIR(InputLoopGen, ScanLoopGen);
diff --git a/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp b/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp
index 4fb3936df653a..f34da1dd26df0 100644
--- a/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp
+++ b/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp
@@ -5390,12 +5390,11 @@ TEST_F(OpenMPIRBuilderTest, ScanReduction) {
       {Builder.getFloatTy(), origVar, scanVar,
        /*EvaluationKind=*/OpenMPIRBuilder::EvalKind::Scalar, sumReduction,
        /*ReductionGenClang=*/nullptr, sumAtomicReduction}};
-  auto FinalizeIP = ScanLoop->getAfterIP();
   OpenMPIRBuilder::LocationDescription RedLoc({InputLoop->getAfterIP(), DL});
   llvm::BasicBlock *Cont = splitBB(Builder, false, "omp.scan.loop.cont");
   ASSERT_EXPECTED_INIT(
       InsertPointTy, retIp,
-      OMPBuilder.emitScanReduction(RedLoc, FinalizeIP, reductionInfos));
+      OMPBuilder.emitScanReduction(RedLoc, reductionInfos));
   Builder.restoreIP(retIp);
   Builder.CreateBr(Cont);
   SmallVector<CallInst *> MaskedCalls;
diff --git a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
index 03636f86215d7..c84789e634101 100644
--- a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
+++ b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
@@ -2363,7 +2363,6 @@ convertOmpWsloop(Operation &opInst, llvm::IRBuilderBase &builder,
   SmallVector<llvm::CanonicalLoopInfo *> loopInfos =
       findCurrentLoopInfos(moduleTranslation);
   auto inputLoopFinishIp = loopInfos.front()->getAfterIP();
-  auto scanLoopFinishIp = loopInfos.back()->getAfterIP();
   bool isInScanRegion =
       wsloopOp.getReductionMod() && (wsloopOp.getReductionMod().value() ==
                                      mlir::omp::ReductionModifier::inscan);
@@ -2377,8 +2376,7 @@ convertOmpWsloop(Operation &opInst, llvm::IRBuilderBase &builder,
                          privateReductionVariables, reductionInfos);
     llvm::BasicBlock *cont = splitBB(builder, false, "omp.scan.loop.cont");
     llvm::OpenMPIRBuilder::InsertPointOrErrorTy redIP =
-        ompBuilder->emitScanReduction(builder.saveIP(), scanLoopFinishIp,
-                                      reductionInfos);
+        ompBuilder->emitScanReduction(builder.saveIP(), reductionInfos);
     if (failed(handleError(redIP, opInst)))
       return failure();
 

>From 0d343a611394b815af74779388a2f86ccc176104 Mon Sep 17 00:00:00 2001
From: Anchu Rajendran <asudhaku at amd.com>
Date: Wed, 16 Apr 2025 14:55:24 -0500
Subject: [PATCH 6/6] New changes

---
 .../llvm/Frontend/OpenMP/OMPIRBuilder.h       | 28 ++++--
 llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp     | 98 ++++++++-----------
 .../Frontend/OpenMPIRBuilderTest.cpp          |  5 +-
 3 files changed, 63 insertions(+), 68 deletions(-)

diff --git a/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h b/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h
index 6ee1181db589e..386de1fa51f18 100644
--- a/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h
+++ b/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h
@@ -503,7 +503,7 @@ class OpenMPIRBuilder {
       return allocaInst;
     }
   };
-  
+
   /// Type used throughout for insertion points.
   using InsertPointTy = IRBuilder<>::InsertPoint;
 
@@ -512,16 +512,24 @@ class OpenMPIRBuilder {
 
   struct ScanInformation {
   public:
+    /// Dominates the body of the loop before scan directive
     llvm::BasicBlock *OMPBeforeScanBlock = nullptr;
+    /// Dominates the body of the loop before scan directive
     llvm::BasicBlock *OMPAfterScanBlock = nullptr;
-    llvm::BasicBlock *OMPScanExitBlock = nullptr;
+    /// Controls the flow to before or after scan blocks
     llvm::BasicBlock *OMPScanDispatch = nullptr;
+    /// Exit block of loop body
     llvm::BasicBlock *OMPScanLoopExit = nullptr;
+    /// Block before loop body where scan initializations are done
     llvm::BasicBlock *OMPScanInit = nullptr;
+    /// Block after loop body where scan finalizations are done
     llvm::BasicBlock *OMPScanFinish = nullptr;
-    InsertPointTy FinalizeIP;
+    /// If true, it indicates Input phase is lowered; else it indicates
+    /// ScanPhase is lowered
     bool OMPFirstScanLoop = false;
-    llvm::SmallDenseMap<llvm::Value *, llvm::Value *> ReductionVarToScanBuffs;
+    // Maps the private reduction variable to the pointer of the temporary
+    // buffer
+    llvm::SmallDenseMap<llvm::Value *, llvm::Value *> ScanBuffPtrs;
     llvm::Value *IV;
     llvm::Value *Span;
   } ScanInfo;
@@ -1605,12 +1613,13 @@ class OpenMPIRBuilder {
   void createScanBBs();
 
   /// Dynamically allocates the buffer needed for scan reduction.
-  /// \param AllocaIP The IP where possibly-shared pointer of buffer needs to be declared. 
-  /// \param ScanVars Scan Variables.
+  /// \param AllocaIP The IP where possibly-shared pointer of buffer needs to be
+  /// declared. \param ScanVars Scan Variables.
   ///
   /// \return error if any produced, else return success.
-  Error emitScanBasedDirectiveDeclsIR(InsertPointTy AllocaIP, ArrayRef<llvm::Value *> ScanVars,
-                                     ArrayRef<llvm::Type *> ScanVarsType);
+  Error emitScanBasedDirectiveDeclsIR(InsertPointTy AllocaIP,
+                                      ArrayRef<llvm::Value *> ScanVars,
+                                      ArrayRef<llvm::Type *> ScanVarsType);
 
   /// Copies the result back to the reduction variable.
   /// \param ReductionInfos Array type containing the ReductionOps.
@@ -2709,7 +2718,8 @@ class OpenMPIRBuilder {
   ///
   /// \returns The insertion position *after* the masked.
   InsertPointOrErrorTy emitScanReduction(
-      const LocationDescription &Loc, SmallVector<llvm::OpenMPIRBuilder::ReductionInfo> ReductionInfos);
+      const LocationDescription &Loc,
+      SmallVector<llvm::OpenMPIRBuilder::ReductionInfo> ReductionInfos);
 
   /// This directive split and directs the control flow to input phase
   ///  blocks or scan phase blocks based on 1. whether input loop or scan loop
diff --git a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
index e212b71383667..23a548a1a60d0 100644
--- a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
+++ b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
@@ -60,6 +60,7 @@
 #include "llvm/Transforms/Utils/UnrollLoop.h"
 
 #include <cassert>
+#include <cstddef>
 #include <cstdint>
 #include <optional>
 
@@ -4005,57 +4006,44 @@ OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::createScan(
     ArrayRef<llvm::Value *> ScanVars, ArrayRef<llvm::Type *> ScanVarsType,
     bool IsInclusive) {
   if (ScanInfo.OMPFirstScanLoop) {
-    llvm::Error Err = emitScanBasedDirectiveDeclsIR(AllocaIP, ScanVars, ScanVarsType);
+    llvm::Error Err =
+        emitScanBasedDirectiveDeclsIR(AllocaIP, ScanVars, ScanVarsType);
     if (Err) {
       return Err;
     }
   }
   if (!updateToLocation(Loc))
     return Loc.IP;
-  
-  unsigned int defaultAS = M.getDataLayout().getProgramAddressSpace();
+
   llvm::Value *IV = ScanInfo.IV;
 
   if (ScanInfo.OMPFirstScanLoop) {
     // Emit buffer[i] = red; at the end of the input phase.
-    for (int i = 0; i < ScanVars.size(); i++) {
-      Value *Buff = ScanInfo.ReductionVarToScanBuffs[ScanVars[i]];
+    for (size_t i = 0; i < ScanVars.size(); i++) {
+      Value *BuffPtr = ScanInfo.ScanBuffPtrs[ScanVars[i]];
+      Value *Buff = Builder.CreateLoad(Builder.getPtrTy(), BuffPtr);
       Type *DestTy = ScanVarsType[i];
       Value *Val = Builder.CreateInBoundsGEP(DestTy, Buff, IV, "arrayOffset");
       Value *Src = Builder.CreateLoad(DestTy, ScanVars[i]);
-      Value *Dest = Builder.CreatePointerBitCastOrAddrSpaceCast(
-          Val, DestTy->getPointerTo(defaultAS));
 
-      Builder.CreateStore(Src, Dest);
+      Builder.CreateStore(Src, Val);
     }
   }
   Builder.CreateBr(ScanInfo.OMPScanLoopExit);
   emitBlock(ScanInfo.OMPScanDispatch, Builder.GetInsertBlock()->getParent());
 
-  // Initialize the private reduction variable to 0 in each iteration.
-  // It is used to copy intial values to scan buffer.
-  ConstantInt *Zero = ConstantInt::get(Builder.getInt32Ty(), 0);
-  for (int i = 0; i < ScanVars.size(); i++) {
-    Type *DestTy = ScanVarsType[i];
-    Value *Dest = Builder.CreatePointerBitCastOrAddrSpaceCast(
-        ScanVars[i], DestTy->getPointerTo(defaultAS));
-    Builder.CreateStore(Zero, Dest);
-  }
-
   if (!ScanInfo.OMPFirstScanLoop) {
     IV = ScanInfo.IV;
     // Emit red = buffer[i]; at the entrance to the scan phase.
     // TODO: if exclusive scan, the red = buffer[i-1] needs to be updated.
-    for (int i = 0; i < ScanVars.size(); i++) {
-      Value *Buff = ScanInfo.ReductionVarToScanBuffs[ScanVars[i]];
+    for (size_t i = 0; i < ScanVars.size(); i++) {
+      Value *BuffPtr = ScanInfo.ScanBuffPtrs[ScanVars[i]];
+      Value *Buff = Builder.CreateLoad(Builder.getPtrTy(), BuffPtr);
       Type *DestTy = ScanVarsType[i];
       Value *SrcPtr =
           Builder.CreateInBoundsGEP(DestTy, Buff, IV, "arrayOffset");
       Value *Src = Builder.CreateLoad(DestTy, SrcPtr);
-      Value *Dest = Builder.CreatePointerBitCastOrAddrSpaceCast(
-          ScanVars[i], DestTy->getPointerTo(defaultAS));
-
-      Builder.CreateStore(Src, Dest);
+      Builder.CreateStore(Src, ScanVars[i]);
     }
   }
 
@@ -4074,26 +4062,29 @@ OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::createScan(
 }
 
 Error OpenMPIRBuilder::emitScanBasedDirectiveDeclsIR(
-    InsertPointTy AllocaIP, ArrayRef<Value *> ScanVars, ArrayRef<Type *> ScanVarsType) {
-  
+    InsertPointTy AllocaIP, ArrayRef<Value *> ScanVars,
+    ArrayRef<Type *> ScanVarsType) {
+
   Builder.restoreIP(AllocaIP);
   // Create the shared pointer at alloca IP.
-  for(int i=0; i<ScanVars.size(); i++){
-   llvm::Value *Buff = Builder.CreateAlloca(Builder.getPtrTy(),1); 
-   ScanInfo.ReductionVarToScanBuffs[ScanVars[i]] = Buff;
+  for (size_t i = 0; i < ScanVars.size(); i++) {
+    llvm::Value *BuffPtr =
+        Builder.CreateAlloca(Builder.getPtrTy(), nullptr, "vla");
+    ScanInfo.ScanBuffPtrs[ScanVars[i]] = BuffPtr;
   }
 
-  // Allocate temporary buffer by master thread 
+  // Allocate temporary buffer by master thread
   auto BodyGenCB = [&](InsertPointTy AllocaIP,
                        InsertPointTy CodeGenIP) -> Error {
     Builder.restoreIP(CodeGenIP);
     Value *AllocSpan = Builder.CreateAdd(ScanInfo.Span, Builder.getInt32(1));
-    for (int i = 0; i < ScanVars.size(); i++) {
-      Type* IntPtrTy = Builder.getInt32Ty();
-      Constant* allocsize = ConstantExpr::getSizeOf(ScanVarsType[i]);
-      allocsize = ConstantExpr::getTruncOrBitCast(allocsize, IntPtrTy);
-      llvm::Value *Buff = Builder.CreateMalloc(IntPtrTy, ScanVarsType[i], allocsize,AllocSpan,nullptr, "arr");
-      Builder.CreateStore(Buff, ScanInfo.ReductionVarToScanBuffs[ScanVars[i]]);
+    for (size_t i = 0; i < ScanVars.size(); i++) {
+      Type *IntPtrTy = Builder.getInt32Ty();
+      Constant *Allocsize = ConstantExpr::getSizeOf(ScanVarsType[i]);
+      Allocsize = ConstantExpr::getTruncOrBitCast(Allocsize, IntPtrTy);
+      Value *Buff = Builder.CreateMalloc(IntPtrTy, ScanVarsType[i], Allocsize,
+                                         AllocSpan, nullptr, "arr");
+      Builder.CreateStore(Buff, ScanInfo.ScanBuffPtrs[ScanVars[i]]);
     }
     return Error::success();
   };
@@ -4110,7 +4101,7 @@ Error OpenMPIRBuilder::emitScanBasedDirectiveDeclsIR(
     return AfterIP.takeError();
   Builder.restoreIP(*AfterIP);
   BasicBlock *InputBB = Builder.GetInsertBlock();
-  if(InputBB->getTerminator())
+  if (InputBB->getTerminator())
     Builder.SetInsertPoint(Builder.GetInsertBlock()->getTerminator());
   AfterIP = createBarrier(Builder.saveIP(), llvm::omp::OMPD_barrier);
   if (!AfterIP)
@@ -4125,21 +4116,19 @@ Error OpenMPIRBuilder::emitScanBasedDirectiveFinalsIR(
   auto BodyGenCB = [&](InsertPointTy AllocaIP,
                        InsertPointTy CodeGenIP) -> Error {
     Builder.restoreIP(CodeGenIP);
-    unsigned int DefaultAS = M.getDataLayout().getProgramAddressSpace();
     for (ReductionInfo RedInfo : ReductionInfos) {
       Value *PrivateVar = RedInfo.PrivateVariable;
       Value *OrigVar = RedInfo.Variable;
-      Value *Buff = ScanInfo.ReductionVarToScanBuffs[PrivateVar];
+      Value *BuffPtr = ScanInfo.ScanBuffPtrs[PrivateVar];
+      Value *Buff = Builder.CreateLoad(Builder.getPtrTy(), BuffPtr);
 
       Type *SrcTy = RedInfo.ElementType;
       Value *Val =
           Builder.CreateInBoundsGEP(SrcTy, Buff, ScanInfo.Span, "arrayOffset");
       Value *Src = Builder.CreateLoad(SrcTy, Val);
-      Value *Dest = Builder.CreatePointerBitCastOrAddrSpaceCast(
-          OrigVar, SrcTy->getPointerTo(DefaultAS));
 
-      Builder.CreateStore(Src, Dest);
-      //Builder.CreateFree(Buff);
+      Builder.CreateStore(Src, OrigVar);
+      Builder.CreateFree(Buff);
     }
     return Error::success();
   };
@@ -4156,7 +4145,7 @@ Error OpenMPIRBuilder::emitScanBasedDirectiveFinalsIR(
     return AfterIP.takeError();
   Builder.restoreIP(*AfterIP);
   BasicBlock *InputBB = Builder.GetInsertBlock();
-  if(InputBB->getTerminator())
+  if (InputBB->getTerminator())
     Builder.SetInsertPoint(Builder.GetInsertBlock()->getTerminator());
   AfterIP = createBarrier(Builder.saveIP(), llvm::omp::OMPD_barrier);
   if (!AfterIP)
@@ -4166,14 +4155,15 @@ Error OpenMPIRBuilder::emitScanBasedDirectiveFinalsIR(
 }
 
 OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::emitScanReduction(
-    const LocationDescription &Loc, SmallVector<llvm::OpenMPIRBuilder::ReductionInfo> ReductionInfos) {
+    const LocationDescription &Loc,
+    SmallVector<llvm::OpenMPIRBuilder::ReductionInfo> ReductionInfos) {
 
   if (!updateToLocation(Loc))
     return Loc.IP;
   auto BodyGenCB = [&](InsertPointTy AllocaIP,
                        InsertPointTy CodeGenIP) -> Error {
     Builder.restoreIP(CodeGenIP);
-    auto CurFn = Builder.GetInsertBlock()->getParent();
+    Function *CurFn = Builder.GetInsertBlock()->getParent();
     // for (int k = 0; k <= ceil(log2(n)); ++k)
     llvm::BasicBlock *LoopBB =
         BasicBlock::Create(CurFn->getContext(), "omp.outer.log.scan.body");
@@ -4217,10 +4207,10 @@ OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::emitScanReduction(
     Builder.SetInsertPoint(InnerLoopBB);
     auto *IVal = Builder.CreatePHI(Builder.getInt32Ty(), 2);
     IVal->addIncoming(NMin1, LoopBB);
-    unsigned int defaultAS = M.getDataLayout().getProgramAddressSpace();
     for (ReductionInfo RedInfo : ReductionInfos) {
       Value *ReductionVal = RedInfo.PrivateVariable;
-      Value *Buff = ScanInfo.ReductionVarToScanBuffs[ReductionVal];
+      Value *BuffPtr = ScanInfo.ScanBuffPtrs[ReductionVal];
+      Value *Buff = Builder.CreateLoad(Builder.getPtrTy(), BuffPtr);
       Type *DestTy = RedInfo.ElementType;
       Value *IV = Builder.CreateAdd(IVal, Builder.getInt32(1));
       Value *LHSPtr =
@@ -4230,14 +4220,12 @@ OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::emitScanReduction(
           Builder.CreateInBoundsGEP(DestTy, Buff, OffsetIval, "arrayOffset");
       Value *LHS = Builder.CreateLoad(DestTy, LHSPtr);
       Value *RHS = Builder.CreateLoad(DestTy, RHSPtr);
-      Value *LHSAddr = Builder.CreatePointerBitCastOrAddrSpaceCast(
-          LHSPtr, RHS->getType()->getPointerTo(defaultAS));
       llvm::Value *Result;
       InsertPointOrErrorTy AfterIP =
           RedInfo.ReductionGen(Builder.saveIP(), LHS, RHS, Result);
       if (!AfterIP)
         return AfterIP.takeError();
-      Builder.CreateStore(Result, LHSAddr);
+      Builder.CreateStore(Result, LHSPtr);
     }
     llvm::Value *NextIVal = Builder.CreateNUWSub(
         IVal, llvm::ConstantInt::get(Builder.getInt32Ty(), 1));
@@ -4312,8 +4300,6 @@ Error OpenMPIRBuilder::emitScanBasedDirectiveIR(
 
 void OpenMPIRBuilder::createScanBBs() {
   Function *Fun = Builder.GetInsertBlock()->getParent();
-  ScanInfo.OMPScanExitBlock =
-      BasicBlock::Create(Fun->getContext(), "omp.exit.inscan.bb");
   ScanInfo.OMPScanDispatch =
       BasicBlock::Create(Fun->getContext(), "omp.inscan.dispatch");
   ScanInfo.OMPAfterScanBlock =
@@ -4481,8 +4467,8 @@ OpenMPIRBuilder::createCanonicalScanLoops(
   SmallVector<llvm::CanonicalLoopInfo *> Result;
   const auto &&InputLoopGen = [&]() -> Error {
     auto LoopInfo =
-        createCanonicalLoop(Builder.saveIP(), BodyGen, Start, Stop, Step, IsSigned,
-                            InclusiveStop, ComputeIP, Name, true);
+        createCanonicalLoop(Builder.saveIP(), BodyGen, Start, Stop, Step,
+                            IsSigned, InclusiveStop, ComputeIP, Name, true);
     if (!LoopInfo)
       return LoopInfo.takeError();
     Result.push_back(*LoopInfo);
@@ -4497,7 +4483,7 @@ OpenMPIRBuilder::createCanonicalScanLoops(
       return LoopInfo.takeError();
     Result.push_back(*LoopInfo);
     Builder.restoreIP((*LoopInfo)->getAfterIP());
-    ScanInfo.OMPScanFinish= Builder.GetInsertBlock();
+    ScanInfo.OMPScanFinish = Builder.GetInsertBlock();
     return Error::success();
   };
   Error Err = emitScanBasedDirectiveIR(InputLoopGen, ScanLoopGen);
diff --git a/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp b/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp
index f34da1dd26df0..4cc312c1b0f1c 100644
--- a/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp
+++ b/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp
@@ -5392,9 +5392,8 @@ TEST_F(OpenMPIRBuilderTest, ScanReduction) {
        /*ReductionGenClang=*/nullptr, sumAtomicReduction}};
   OpenMPIRBuilder::LocationDescription RedLoc({InputLoop->getAfterIP(), DL});
   llvm::BasicBlock *Cont = splitBB(Builder, false, "omp.scan.loop.cont");
-  ASSERT_EXPECTED_INIT(
-      InsertPointTy, retIp,
-      OMPBuilder.emitScanReduction(RedLoc, reductionInfos));
+  ASSERT_EXPECTED_INIT(InsertPointTy, retIp,
+                       OMPBuilder.emitScanReduction(RedLoc, reductionInfos));
   Builder.restoreIP(retIp);
   Builder.CreateBr(Cont);
   SmallVector<CallInst *> MaskedCalls;



More information about the Mlir-commits mailing list