[llvm] [OpenMP] [IR Builder] Changes to Support Scan Operation (PR #136035)
Anchu Rajendran S via llvm-commits
llvm-commits at lists.llvm.org
Thu Aug 7 12:16:44 PDT 2025
https://github.com/anchuraj updated https://github.com/llvm/llvm-project/pull/136035
>From 8e0683c8e3172a3ecc8604312e55939c00ea0715 Mon Sep 17 00:00:00 2001
From: Anchu Rajendran <asudhaku at amd.com>
Date: Wed, 16 Apr 2025 16:03:02 -0500
Subject: [PATCH 1/4] IR Builder Changes to Support Scan Operation
---
.../llvm/Frontend/OpenMP/OMPIRBuilder.h | 138 +++++-
llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp | 423 +++++++++++++++++-
.../Frontend/OpenMPIRBuilderTest.cpp | 95 ++++
3 files changed, 651 insertions(+), 5 deletions(-)
diff --git a/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h b/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h
index 206ad4a4ef85f..d0243e80b4f21 100644
--- a/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h
+++ b/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h
@@ -511,6 +511,30 @@ class OpenMPIRBuilder {
return allocaInst;
}
};
+
+ struct ScanInformation {
+ /// Dominates the body of the loop before scan directive
+ llvm::BasicBlock *OMPBeforeScanBlock = nullptr;
+ /// Dominates the body of the loop before scan directive
+ llvm::BasicBlock *OMPAfterScanBlock = nullptr;
+ /// Controls the flow to before or after scan blocks
+ llvm::BasicBlock *OMPScanDispatch = nullptr;
+ /// Exit block of loop body
+ llvm::BasicBlock *OMPScanLoopExit = nullptr;
+ /// Block before loop body where scan initializations are done
+ llvm::BasicBlock *OMPScanInit = nullptr;
+ /// Block after loop body where scan finalizations are done
+ llvm::BasicBlock *OMPScanFinish = nullptr;
+ /// If true, it indicates Input phase is lowered; else it indicates
+ /// ScanPhase is lowered
+ bool OMPFirstScanLoop = false;
+ // Maps the private reduction variable to the pointer of the temporary
+ // buffer
+ llvm::SmallDenseMap<llvm::Value *, llvm::Value *> ScanBuffPtrs;
+ llvm::Value *IV;
+ llvm::Value *Span;
+ } ScanInfo;
+
/// Initialize the internal state, this will put structures types and
/// potentially other helpers into the underlying module. Must be called
/// before any other method and only once! This internal state includes types
@@ -750,6 +774,35 @@ class OpenMPIRBuilder {
LoopBodyGenCallbackTy BodyGenCB, Value *TripCount,
const Twine &Name = "loop");
+ /// Generator for the control flow structure of an OpenMP canonical loops if
+ /// the parent directive has an `inscan` modifier specified.
+ /// If the `inscan` modifier is specified, the region of the parent is
+ /// expected to have a `scan` directive. Based on the clauses in
+ /// scan directive, the body of the loop is split into two loops: Input loop
+ /// and Scan Loop. Input loop contains the code generated for input phase of
+ /// scan and Scan loop contains the code generated for scan phase of scan.
+ ///
+ /// \param Loc The insert and source location description.
+ /// \param BodyGenCB Callback that will generate the loop body code.
+ /// \param Start Value of the loop counter for the first iterations.
+ /// \param Stop Loop counter values past this will stop the loop.
+ /// \param Step Loop counter increment after each iteration; negative
+ /// means counting down.
+ /// \param IsSigned Whether Start, Stop and Step are signed integers.
+ /// \param InclusiveStop Whether \p Stop itself is a valid value for the loop
+ /// counter.
+ /// \param ComputeIP Insertion point for instructions computing the trip
+ /// count. Can be used to ensure the trip count is available
+ /// at the outermost loop of a loop nest. If not set,
+ /// defaults to the preheader of the generated loop.
+ /// \param Name Base name used to derive BB and instruction names.
+ ///
+ /// \returns A vector containing Loop Info of Input Loop and Scan Loop.
+ Expected<SmallVector<llvm::CanonicalLoopInfo *>> createCanonicalScanLoops(
+ const LocationDescription &Loc, LoopBodyGenCallbackTy BodyGenCB,
+ Value *Start, Value *Stop, Value *Step, bool IsSigned, bool InclusiveStop,
+ InsertPointTy ComputeIP, const Twine &Name);
+
/// Calculate the trip count of a canonical loop.
///
/// This allows specifying user-defined loop counter values using increment,
@@ -818,13 +871,16 @@ class OpenMPIRBuilder {
/// at the outermost loop of a loop nest. If not set,
/// defaults to the preheader of the generated loop.
/// \param Name Base name used to derive BB and instruction names.
+ /// \param InScan Whether loop has a scan reduction specified.
///
/// \returns An object representing the created control flow structure which
/// can be used for loop-associated directives.
- LLVM_ABI Expected<CanonicalLoopInfo *> createCanonicalLoop(
- const LocationDescription &Loc, LoopBodyGenCallbackTy BodyGenCB,
- Value *Start, Value *Stop, Value *Step, bool IsSigned, bool InclusiveStop,
- InsertPointTy ComputeIP = {}, const Twine &Name = "loop");
+ LLVM_ABI Expected<CanonicalLoopInfo *>
+ createCanonicalLoop(const LocationDescription &Loc,
+ LoopBodyGenCallbackTy BodyGenCB, Value *Start,
+ Value *Stop, Value *Step, bool IsSigned,
+ bool InclusiveStop, InsertPointTy ComputeIP = {},
+ const Twine &Name = "loop", bool InScan = false);
/// Collapse a loop nest into a single loop.
///
@@ -1556,6 +1612,45 @@ class OpenMPIRBuilder {
ArrayRef<OpenMPIRBuilder::ReductionInfo> ReductionInfos,
Function *ReduceFn, AttributeList FuncAttrs);
+ /// Creates the runtime call specified
+ /// \param Callee Function Declaration Value
+ /// \param Args Arguments passed to the call
+ /// \param Name Optional param to specify the name of the call Instruction.
+ ///
+ /// \return The Runtime call instruction created.
+ llvm::CallInst *emitNoUnwindRuntimeCall(llvm::FunctionCallee Callee,
+ ArrayRef<llvm::Value *> Args,
+ const llvm::Twine &Name);
+
+ /// Helper function for CreateCanonicalScanLoops to create InputLoop
+ /// in the firstGen and Scan Loop in the SecondGen
+ /// \param InputLoopGen Callback for generating the loop for input phase
+ /// \param ScanLoopGen Callback for generating the loop for scan phase
+ ///
+ /// \return error if any produced, else return success.
+ Error emitScanBasedDirectiveIR(
+ llvm::function_ref<Error()> InputLoopGen,
+ llvm::function_ref<Error(LocationDescription Loc)> ScanLoopGen);
+
+ /// Creates the basic blocks required for scan reduction.
+ void createScanBBs();
+
+ /// Dynamically allocates the buffer needed for scan reduction.
+ /// \param AllocaIP The IP where possibly-shared pointer of buffer needs to be
+ /// declared. \param ScanVars Scan Variables.
+ ///
+ /// \return error if any produced, else return success.
+ Error emitScanBasedDirectiveDeclsIR(InsertPointTy AllocaIP,
+ ArrayRef<llvm::Value *> ScanVars,
+ ArrayRef<llvm::Type *> ScanVarsType);
+
+ /// Copies the result back to the reduction variable.
+ /// \param ReductionInfos Array type containing the ReductionOps.
+ ///
+ /// \return error if any produced, else return success.
+ Error emitScanBasedDirectiveFinalsIR(
+ SmallVector<llvm::OpenMPIRBuilder::ReductionInfo> ReductionInfos);
+
/// This function emits a helper that gathers Reduce lists from the first
/// lane of every active warp to lanes in the first warp.
///
@@ -2639,6 +2734,41 @@ class OpenMPIRBuilder {
FinalizeCallbackTy FiniCB,
Value *Filter);
+ /// This function performs the scan reduction of the values updated in
+ /// the input phase. The reduction logic needs to be emitted between input
+ /// and scan loop returned by `CreateCanonicalScanLoops`. The following
+ /// is the code that is generated, `buffer` and `span` are expected to be
+ /// populated before executing the generated code.
+ ///
+ /// for (int k = 0; k != ceil(log2(span)); ++k) {
+ /// i=pow(2,k)
+ /// for (size cnt = last_iter; cnt >= i; --cnt)
+ /// buffer[cnt] op= buffer[cnt-i];
+ /// }
+ /// \param Loc The insert and source location description.
+ /// \param ReductionInfos Array type containing the ReductionOps.
+ ///
+ /// \returns The insertion position *after* the masked.
+ InsertPointOrErrorTy emitScanReduction(
+ const LocationDescription &Loc,
+ SmallVector<llvm::OpenMPIRBuilder::ReductionInfo> ReductionInfos);
+
+ /// This directive split and directs the control flow to input phase
+ /// blocks or scan phase blocks based on 1. whether input loop or scan loop
+ /// is executed, 2. whether exclusive or inclusive scan is used.
+ ///
+ /// \param Loc The insert and source location description.
+ /// \param AllocaIP The IP where the temporary buffer for scan reduction
+ // needs to be allocated.
+ /// \param ScanVars Scan Variables.
+ /// \param IsInclusive Whether it is an inclusive or exclusive scan.
+ ///
+ /// \returns The insertion position *after* the scan.
+ InsertPointOrErrorTy createScan(const LocationDescription &Loc,
+ InsertPointTy AllocaIP,
+ ArrayRef<llvm::Value *> ScanVars,
+ ArrayRef<llvm::Type *> ScanVarsType,
+ bool IsInclusive);
/// Generator for '#omp critical'
///
/// \param Loc The insert and source location description.
diff --git a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
index 840ca8364e218..e0e13c901e143 100644
--- a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
+++ b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
@@ -59,6 +59,8 @@
#include "llvm/Transforms/Utils/LoopPeel.h"
#include "llvm/Transforms/Utils/UnrollLoop.h"
+#include <cassert>
+#include <cstddef>
#include <cstdint>
#include <optional>
@@ -4021,6 +4023,336 @@ OpenMPIRBuilder::createMasked(const LocationDescription &Loc,
/*Conditional*/ true, /*hasFinalize*/ true);
}
+llvm::CallInst *
+OpenMPIRBuilder::emitNoUnwindRuntimeCall(llvm::FunctionCallee Callee,
+ ArrayRef<llvm::Value *> Args,
+ const llvm::Twine &Name) {
+ llvm::CallInst *Call = Builder.CreateCall(
+ Callee, Args, SmallVector<llvm::OperandBundleDef, 1>(), Name);
+ Call->setDoesNotThrow();
+ return Call;
+}
+
+// Expects input basic block is dominated by BeforeScanBB.
+// Once Scan directive is encountered, the code after scan directive should be
+// dominated by AfterScanBB. Scan directive splits the code sequence to
+// scan and input phase. Based on whether inclusive or exclusive
+// clause is used in the scan directive and whether input loop or scan loop
+// is lowered, it adds jumps to input and scan phase. First Scan loop is the
+// input loop and second is the scan loop. The code generated handles only
+// inclusive scans now.
+OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::createScan(
+ const LocationDescription &Loc, InsertPointTy AllocaIP,
+ ArrayRef<llvm::Value *> ScanVars, ArrayRef<llvm::Type *> ScanVarsType,
+ bool IsInclusive) {
+ if (ScanInfo.OMPFirstScanLoop) {
+ llvm::Error Err =
+ emitScanBasedDirectiveDeclsIR(AllocaIP, ScanVars, ScanVarsType);
+ if (Err) {
+ return Err;
+ }
+ }
+ if (!updateToLocation(Loc))
+ return Loc.IP;
+
+ llvm::Value *IV = ScanInfo.IV;
+
+ if (ScanInfo.OMPFirstScanLoop) {
+ // Emit buffer[i] = red; at the end of the input phase.
+ for (size_t i = 0; i < ScanVars.size(); i++) {
+ Value *BuffPtr = ScanInfo.ScanBuffPtrs[ScanVars[i]];
+ Value *Buff = Builder.CreateLoad(Builder.getPtrTy(), BuffPtr);
+ Type *DestTy = ScanVarsType[i];
+ Value *Val = Builder.CreateInBoundsGEP(DestTy, Buff, IV, "arrayOffset");
+ Value *Src = Builder.CreateLoad(DestTy, ScanVars[i]);
+
+ Builder.CreateStore(Src, Val);
+ }
+ }
+ Builder.CreateBr(ScanInfo.OMPScanLoopExit);
+ emitBlock(ScanInfo.OMPScanDispatch, Builder.GetInsertBlock()->getParent());
+
+ if (!ScanInfo.OMPFirstScanLoop) {
+ IV = ScanInfo.IV;
+ // Emit red = buffer[i]; at the entrance to the scan phase.
+ // TODO: if exclusive scan, the red = buffer[i-1] needs to be updated.
+ for (size_t i = 0; i < ScanVars.size(); i++) {
+ Value *BuffPtr = ScanInfo.ScanBuffPtrs[ScanVars[i]];
+ Value *Buff = Builder.CreateLoad(Builder.getPtrTy(), BuffPtr);
+ Type *DestTy = ScanVarsType[i];
+ Value *SrcPtr =
+ Builder.CreateInBoundsGEP(DestTy, Buff, IV, "arrayOffset");
+ Value *Src = Builder.CreateLoad(DestTy, SrcPtr);
+ Builder.CreateStore(Src, ScanVars[i]);
+ }
+ }
+
+ // TODO: Update it to CreateBr and remove dead blocks
+ llvm::Value *CmpI = Builder.getInt1(true);
+ if (ScanInfo.OMPFirstScanLoop == IsInclusive) {
+ Builder.CreateCondBr(CmpI, ScanInfo.OMPBeforeScanBlock,
+ ScanInfo.OMPAfterScanBlock);
+ } else {
+ Builder.CreateCondBr(CmpI, ScanInfo.OMPAfterScanBlock,
+ ScanInfo.OMPBeforeScanBlock);
+ }
+ emitBlock(ScanInfo.OMPAfterScanBlock, Builder.GetInsertBlock()->getParent());
+ Builder.SetInsertPoint(ScanInfo.OMPAfterScanBlock);
+ return Builder.saveIP();
+}
+
+Error OpenMPIRBuilder::emitScanBasedDirectiveDeclsIR(
+ InsertPointTy AllocaIP, ArrayRef<Value *> ScanVars,
+ ArrayRef<Type *> ScanVarsType) {
+
+ Builder.restoreIP(AllocaIP);
+ // Create the shared pointer at alloca IP.
+ for (size_t i = 0; i < ScanVars.size(); i++) {
+ llvm::Value *BuffPtr =
+ Builder.CreateAlloca(Builder.getPtrTy(), nullptr, "vla");
+ ScanInfo.ScanBuffPtrs[ScanVars[i]] = BuffPtr;
+ }
+
+ // Allocate temporary buffer by master thread
+ auto BodyGenCB = [&](InsertPointTy AllocaIP,
+ InsertPointTy CodeGenIP) -> Error {
+ Builder.restoreIP(CodeGenIP);
+ Value *AllocSpan = Builder.CreateAdd(ScanInfo.Span, Builder.getInt32(1));
+ for (size_t i = 0; i < ScanVars.size(); i++) {
+ Type *IntPtrTy = Builder.getInt32Ty();
+ Constant *Allocsize = ConstantExpr::getSizeOf(ScanVarsType[i]);
+ Allocsize = ConstantExpr::getTruncOrBitCast(Allocsize, IntPtrTy);
+ Value *Buff = Builder.CreateMalloc(IntPtrTy, ScanVarsType[i], Allocsize,
+ AllocSpan, nullptr, "arr");
+ Builder.CreateStore(Buff, ScanInfo.ScanBuffPtrs[ScanVars[i]]);
+ }
+ return Error::success();
+ };
+ // TODO: Perform finalization actions for variables. This has to be
+ // called for variables which have destructors/finalizers.
+ auto FiniCB = [&](InsertPointTy CodeGenIP) { return llvm::Error::success(); };
+
+ Builder.SetInsertPoint(ScanInfo.OMPScanInit->getTerminator());
+ llvm::Value *FilterVal = Builder.getInt32(0);
+ llvm::OpenMPIRBuilder::InsertPointOrErrorTy AfterIP =
+ createMasked(Builder.saveIP(), BodyGenCB, FiniCB, FilterVal);
+
+ if (!AfterIP)
+ return AfterIP.takeError();
+ Builder.restoreIP(*AfterIP);
+ BasicBlock *InputBB = Builder.GetInsertBlock();
+ if (InputBB->getTerminator())
+ Builder.SetInsertPoint(Builder.GetInsertBlock()->getTerminator());
+ AfterIP = createBarrier(Builder.saveIP(), llvm::omp::OMPD_barrier);
+ if (!AfterIP)
+ return AfterIP.takeError();
+ Builder.restoreIP(*AfterIP);
+
+ return Error::success();
+}
+
+Error OpenMPIRBuilder::emitScanBasedDirectiveFinalsIR(
+ SmallVector<ReductionInfo> ReductionInfos) {
+ auto BodyGenCB = [&](InsertPointTy AllocaIP,
+ InsertPointTy CodeGenIP) -> Error {
+ Builder.restoreIP(CodeGenIP);
+ for (ReductionInfo RedInfo : ReductionInfos) {
+ Value *PrivateVar = RedInfo.PrivateVariable;
+ Value *OrigVar = RedInfo.Variable;
+ Value *BuffPtr = ScanInfo.ScanBuffPtrs[PrivateVar];
+ Value *Buff = Builder.CreateLoad(Builder.getPtrTy(), BuffPtr);
+
+ Type *SrcTy = RedInfo.ElementType;
+ Value *Val =
+ Builder.CreateInBoundsGEP(SrcTy, Buff, ScanInfo.Span, "arrayOffset");
+ Value *Src = Builder.CreateLoad(SrcTy, Val);
+
+ Builder.CreateStore(Src, OrigVar);
+ Builder.CreateFree(Buff);
+ }
+ return Error::success();
+ };
+ // TODO: Perform finalization actions for variables. This has to be
+ // called for variables which have destructors/finalizers.
+ auto FiniCB = [&](InsertPointTy CodeGenIP) { return llvm::Error::success(); };
+
+ if (ScanInfo.OMPScanFinish->getTerminator())
+ Builder.SetInsertPoint(ScanInfo.OMPScanFinish->getTerminator());
+ else
+ Builder.SetInsertPoint(ScanInfo.OMPScanFinish);
+
+ llvm::Value *FilterVal = Builder.getInt32(0);
+ llvm::OpenMPIRBuilder::InsertPointOrErrorTy AfterIP =
+ createMasked(Builder.saveIP(), BodyGenCB, FiniCB, FilterVal);
+
+ if (!AfterIP)
+ return AfterIP.takeError();
+ Builder.restoreIP(*AfterIP);
+ BasicBlock *InputBB = Builder.GetInsertBlock();
+ if (InputBB->getTerminator())
+ Builder.SetInsertPoint(Builder.GetInsertBlock()->getTerminator());
+ AfterIP = createBarrier(Builder.saveIP(), llvm::omp::OMPD_barrier);
+ if (!AfterIP)
+ return AfterIP.takeError();
+ Builder.restoreIP(*AfterIP);
+ return Error::success();
+}
+
+OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::emitScanReduction(
+ const LocationDescription &Loc,
+ SmallVector<llvm::OpenMPIRBuilder::ReductionInfo> ReductionInfos) {
+
+ if (!updateToLocation(Loc))
+ return Loc.IP;
+ auto BodyGenCB = [&](InsertPointTy AllocaIP,
+ InsertPointTy CodeGenIP) -> Error {
+ Builder.restoreIP(CodeGenIP);
+ Function *CurFn = Builder.GetInsertBlock()->getParent();
+ // for (int k = 0; k <= ceil(log2(n)); ++k)
+ llvm::BasicBlock *LoopBB =
+ BasicBlock::Create(CurFn->getContext(), "omp.outer.log.scan.body");
+ llvm::BasicBlock *ExitBB =
+ splitBB(Builder, false, "omp.outer.log.scan.exit");
+ llvm::Function *F = llvm::Intrinsic::getOrInsertDeclaration(
+ Builder.GetInsertBlock()->getModule(),
+ (llvm::Intrinsic::ID)llvm::Intrinsic::log2, Builder.getDoubleTy());
+ llvm::BasicBlock *InputBB = Builder.GetInsertBlock();
+ llvm::Value *Arg =
+ Builder.CreateUIToFP(ScanInfo.Span, Builder.getDoubleTy());
+ llvm::Value *LogVal = emitNoUnwindRuntimeCall(F, Arg, "");
+ F = llvm::Intrinsic::getOrInsertDeclaration(
+ Builder.GetInsertBlock()->getModule(),
+ (llvm::Intrinsic::ID)llvm::Intrinsic::ceil, Builder.getDoubleTy());
+ LogVal = emitNoUnwindRuntimeCall(F, LogVal, "");
+ LogVal = Builder.CreateFPToUI(LogVal, Builder.getInt32Ty());
+ llvm::Value *NMin1 = Builder.CreateNUWSub(
+ ScanInfo.Span, llvm::ConstantInt::get(ScanInfo.Span->getType(), 1));
+ Builder.SetInsertPoint(InputBB);
+ Builder.CreateBr(LoopBB);
+ emitBlock(LoopBB, CurFn);
+ Builder.SetInsertPoint(LoopBB);
+
+ PHINode *Counter = Builder.CreatePHI(Builder.getInt32Ty(), 2);
+ //// size pow2k = 1;
+ PHINode *Pow2K = Builder.CreatePHI(Builder.getInt32Ty(), 2);
+ Counter->addIncoming(llvm::ConstantInt::get(Builder.getInt32Ty(), 0),
+ InputBB);
+ Pow2K->addIncoming(llvm::ConstantInt::get(Builder.getInt32Ty(), 1),
+ InputBB);
+ //// for (size i = n - 1; i >= 2 ^ k; --i)
+ //// tmp[i] op= tmp[i-pow2k];
+ llvm::BasicBlock *InnerLoopBB =
+ BasicBlock::Create(CurFn->getContext(), "omp.inner.log.scan.body");
+ llvm::BasicBlock *InnerExitBB =
+ BasicBlock::Create(CurFn->getContext(), "omp.inner.log.scan.exit");
+ llvm::Value *CmpI = Builder.CreateICmpUGE(NMin1, Pow2K);
+ Builder.CreateCondBr(CmpI, InnerLoopBB, InnerExitBB);
+ emitBlock(InnerLoopBB, CurFn);
+ Builder.SetInsertPoint(InnerLoopBB);
+ auto *IVal = Builder.CreatePHI(Builder.getInt32Ty(), 2);
+ IVal->addIncoming(NMin1, LoopBB);
+ for (ReductionInfo RedInfo : ReductionInfos) {
+ Value *ReductionVal = RedInfo.PrivateVariable;
+ Value *BuffPtr = ScanInfo.ScanBuffPtrs[ReductionVal];
+ Value *Buff = Builder.CreateLoad(Builder.getPtrTy(), BuffPtr);
+ Type *DestTy = RedInfo.ElementType;
+ Value *IV = Builder.CreateAdd(IVal, Builder.getInt32(1));
+ Value *LHSPtr =
+ Builder.CreateInBoundsGEP(DestTy, Buff, IV, "arrayOffset");
+ Value *OffsetIval = Builder.CreateNUWSub(IV, Pow2K);
+ Value *RHSPtr =
+ Builder.CreateInBoundsGEP(DestTy, Buff, OffsetIval, "arrayOffset");
+ Value *LHS = Builder.CreateLoad(DestTy, LHSPtr);
+ Value *RHS = Builder.CreateLoad(DestTy, RHSPtr);
+ llvm::Value *Result;
+ InsertPointOrErrorTy AfterIP =
+ RedInfo.ReductionGen(Builder.saveIP(), LHS, RHS, Result);
+ if (!AfterIP)
+ return AfterIP.takeError();
+ Builder.CreateStore(Result, LHSPtr);
+ }
+ llvm::Value *NextIVal = Builder.CreateNUWSub(
+ IVal, llvm::ConstantInt::get(Builder.getInt32Ty(), 1));
+ IVal->addIncoming(NextIVal, Builder.GetInsertBlock());
+ CmpI = Builder.CreateICmpUGE(NextIVal, Pow2K);
+ Builder.CreateCondBr(CmpI, InnerLoopBB, InnerExitBB);
+ emitBlock(InnerExitBB, CurFn);
+ llvm::Value *Next = Builder.CreateNUWAdd(
+ Counter, llvm::ConstantInt::get(Counter->getType(), 1));
+ Counter->addIncoming(Next, Builder.GetInsertBlock());
+ // pow2k <<= 1;
+ llvm::Value *NextPow2K = Builder.CreateShl(Pow2K, 1, "", /*HasNUW=*/true);
+ Pow2K->addIncoming(NextPow2K, Builder.GetInsertBlock());
+ llvm::Value *Cmp = Builder.CreateICmpNE(Next, LogVal);
+ Builder.CreateCondBr(Cmp, LoopBB, ExitBB);
+ Builder.SetInsertPoint(ExitBB->getFirstInsertionPt());
+ return Error::success();
+ };
+
+ // TODO: Perform finalization actions for variables. This has to be
+ // called for variables which have destructors/finalizers.
+ auto FiniCB = [&](InsertPointTy CodeGenIP) { return llvm::Error::success(); };
+
+ llvm::Value *FilterVal = Builder.getInt32(0);
+ llvm::OpenMPIRBuilder::InsertPointOrErrorTy AfterIP =
+ createMasked(Builder.saveIP(), BodyGenCB, FiniCB, FilterVal);
+
+ if (!AfterIP)
+ return AfterIP.takeError();
+ Builder.restoreIP(*AfterIP);
+ AfterIP = createBarrier(Builder.saveIP(), llvm::omp::OMPD_barrier);
+
+ if (!AfterIP)
+ return AfterIP.takeError();
+ Builder.restoreIP(*AfterIP);
+ Error Err = emitScanBasedDirectiveFinalsIR(ReductionInfos);
+ if (Err) {
+ return Err;
+ }
+
+ return AfterIP;
+}
+
+Error OpenMPIRBuilder::emitScanBasedDirectiveIR(
+ llvm::function_ref<Error()> InputLoopGen,
+ llvm::function_ref<Error(LocationDescription Loc)> ScanLoopGen) {
+
+ {
+ // Emit loop with input phase:
+ // for (i: 0..<num_iters>) {
+ // <input phase>;
+ // buffer[i] = red;
+ // }
+ ScanInfo.OMPFirstScanLoop = true;
+ auto Result = InputLoopGen();
+ if (Result)
+ return Result;
+ }
+ {
+ // Emit loop with scan phase:
+ // for (i: 0..<num_iters>) {
+ // red = buffer[i];
+ // <scan phase>;
+ // }
+ ScanInfo.OMPFirstScanLoop = false;
+ auto Result = ScanLoopGen(Builder.saveIP());
+ if (Result)
+ return Result;
+ }
+ return Error::success();
+}
+
+void OpenMPIRBuilder::createScanBBs() {
+ Function *Fun = Builder.GetInsertBlock()->getParent();
+ ScanInfo.OMPScanDispatch =
+ BasicBlock::Create(Fun->getContext(), "omp.inscan.dispatch");
+ ScanInfo.OMPAfterScanBlock =
+ BasicBlock::Create(Fun->getContext(), "omp.after.scan.bb");
+ ScanInfo.OMPBeforeScanBlock =
+ BasicBlock::Create(Fun->getContext(), "omp.before.scan.bb");
+ ScanInfo.OMPScanLoopExit =
+ BasicBlock::Create(Fun->getContext(), "omp.scan.loop.exit");
+}
CanonicalLoopInfo *OpenMPIRBuilder::createLoopSkeleton(
DebugLoc DL, Value *TripCount, Function *F, BasicBlock *PreInsertBefore,
BasicBlock *PostInsertBefore, const Twine &Name) {
@@ -4118,6 +4450,92 @@ OpenMPIRBuilder::createCanonicalLoop(const LocationDescription &Loc,
return CL;
}
+Expected<SmallVector<llvm::CanonicalLoopInfo *>>
+OpenMPIRBuilder::createCanonicalScanLoops(
+ const LocationDescription &Loc, LoopBodyGenCallbackTy BodyGenCB,
+ Value *Start, Value *Stop, Value *Step, bool IsSigned, bool InclusiveStop,
+ InsertPointTy ComputeIP, const Twine &Name) {
+ LocationDescription ComputeLoc =
+ ComputeIP.isSet() ? LocationDescription(ComputeIP, Loc.DL) : Loc;
+ updateToLocation(ComputeLoc);
+
+ Value *TripCount = calculateCanonicalLoopTripCount(
+ ComputeLoc, Start, Stop, Step, IsSigned, InclusiveStop, Name);
+ ScanInfo.Span = TripCount;
+ ScanInfo.OMPScanInit = splitBB(Builder, true, "scan.init");
+ Builder.SetInsertPoint(ScanInfo.OMPScanInit);
+
+ auto BodyGen = [=](InsertPointTy CodeGenIP, Value *IV) {
+ /// The control of the loopbody of following structure:
+ ///
+ /// InputBlock
+ /// |
+ /// ContinueBlock
+ ///
+ /// is transformed to:
+ ///
+ /// InputBlock
+ /// |
+ /// OMPScanDispatch
+ ///
+ /// OMPBeforeScanBlock
+ /// |
+ /// OMPScanLoopExit
+ /// |
+ /// ContinueBlock
+ ///
+ /// OMPBeforeScanBlock dominates the control flow of code generated until
+ /// scan directive is encountered and OMPAfterScanBlock dominates the
+ /// control flow of code generated after scan is encountered. The successor
+ /// of OMPScanDispatch can be OMPBeforeScanBlock or OMPAfterScanBlock based
+ /// on 1.whether it is in Input phase or Scan Phase , 2. whether it is an
+ /// exclusive or inclusive scan.
+ Builder.restoreIP(CodeGenIP);
+ ScanInfo.IV = IV;
+ createScanBBs();
+ BasicBlock *InputBlock = Builder.GetInsertBlock();
+ Instruction *Terminator = InputBlock->getTerminator();
+ assert(Terminator->getNumSuccessors() == 1);
+ BasicBlock *ContinueBlock = Terminator->getSuccessor(0);
+ Terminator->setSuccessor(0, ScanInfo.OMPScanDispatch);
+ emitBlock(ScanInfo.OMPBeforeScanBlock,
+ Builder.GetInsertBlock()->getParent());
+ Builder.CreateBr(ScanInfo.OMPScanLoopExit);
+ emitBlock(ScanInfo.OMPScanLoopExit, Builder.GetInsertBlock()->getParent());
+ Builder.CreateBr(ContinueBlock);
+ Builder.SetInsertPoint(ScanInfo.OMPBeforeScanBlock->getFirstInsertionPt());
+ return BodyGenCB(Builder.saveIP(), IV);
+ };
+
+ SmallVector<llvm::CanonicalLoopInfo *> Result;
+ const auto &&InputLoopGen = [&]() -> Error {
+ auto LoopInfo =
+ createCanonicalLoop(Builder.saveIP(), BodyGen, Start, Stop, Step,
+ IsSigned, InclusiveStop, ComputeIP, Name, true);
+ if (!LoopInfo)
+ return LoopInfo.takeError();
+ Result.push_back(*LoopInfo);
+ Builder.restoreIP((*LoopInfo)->getAfterIP());
+ return Error::success();
+ };
+ const auto &&ScanLoopGen = [&](LocationDescription Loc) -> Error {
+ auto LoopInfo =
+ createCanonicalLoop(Loc, BodyGen, Start, Stop, Step, IsSigned,
+ InclusiveStop, ComputeIP, Name, true);
+ if (!LoopInfo)
+ return LoopInfo.takeError();
+ Result.push_back(*LoopInfo);
+ Builder.restoreIP((*LoopInfo)->getAfterIP());
+ ScanInfo.OMPScanFinish = Builder.GetInsertBlock();
+ return Error::success();
+ };
+ Error Err = emitScanBasedDirectiveIR(InputLoopGen, ScanLoopGen);
+ if (Err) {
+ return Err;
+ }
+ return Result;
+}
+
Value *OpenMPIRBuilder::calculateCanonicalLoopTripCount(
const LocationDescription &Loc, Value *Start, Value *Stop, Value *Step,
bool IsSigned, bool InclusiveStop, const Twine &Name) {
@@ -4181,7 +4599,7 @@ Value *OpenMPIRBuilder::calculateCanonicalLoopTripCount(
Expected<CanonicalLoopInfo *> OpenMPIRBuilder::createCanonicalLoop(
const LocationDescription &Loc, LoopBodyGenCallbackTy BodyGenCB,
Value *Start, Value *Stop, Value *Step, bool IsSigned, bool InclusiveStop,
- InsertPointTy ComputeIP, const Twine &Name) {
+ InsertPointTy ComputeIP, const Twine &Name, bool InScan) {
LocationDescription ComputeLoc =
ComputeIP.isSet() ? LocationDescription(ComputeIP, Loc.DL) : Loc;
@@ -4192,6 +4610,9 @@ Expected<CanonicalLoopInfo *> OpenMPIRBuilder::createCanonicalLoop(
Builder.restoreIP(CodeGenIP);
Value *Span = Builder.CreateMul(IV, Step);
Value *IndVar = Builder.CreateAdd(Span, Start);
+ if (InScan) {
+ ScanInfo.IV = IndVar;
+ }
return BodyGenCB(Builder.saveIP(), IndVar);
};
LocationDescription LoopLoc =
diff --git a/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp b/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp
index d6b578aa8ffd1..be06d4c10d7dd 100644
--- a/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp
+++ b/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp
@@ -23,6 +23,7 @@
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "gmock/gmock.h"
#include "gtest/gtest.h"
+#include <cstdlib>
#include <optional>
using namespace llvm;
@@ -5360,6 +5361,100 @@ TEST_F(OpenMPIRBuilderTest, CreateReductions) {
EXPECT_TRUE(findGEPZeroOne(ReductionFn->getArg(1), FirstRHS, SecondRHS));
}
+void createScan(llvm::Value *scanVar, llvm::Type *scanType,
+ OpenMPIRBuilder &OMPBuilder, IRBuilder<> &Builder,
+ OpenMPIRBuilder::LocationDescription Loc,
+ OpenMPIRBuilder::InsertPointTy &allocaIP) {
+ using InsertPointTy = OpenMPIRBuilder::InsertPointTy;
+ ASSERT_EXPECTED_INIT(
+ InsertPointTy, retIp,
+ OMPBuilder.createScan(Loc, allocaIP, {scanVar}, {scanType}, true));
+ Builder.restoreIP(retIp);
+}
+
+TEST_F(OpenMPIRBuilderTest, ScanReduction) {
+ using InsertPointTy = OpenMPIRBuilder::InsertPointTy;
+ OpenMPIRBuilder OMPBuilder(*M);
+ OMPBuilder.initialize();
+ IRBuilder<> Builder(BB);
+ OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL});
+ Value *TripCount = F->getArg(0);
+ Type *LCTy = TripCount->getType();
+ Value *StartVal = ConstantInt::get(LCTy, 1);
+ Value *StopVal = ConstantInt::get(LCTy, 100);
+ Value *Step = ConstantInt::get(LCTy, 1);
+ auto AllocaIP = Builder.saveIP();
+
+ llvm::Value *ScanVar = Builder.CreateAlloca(Builder.getFloatTy());
+ llvm::Value *OrigVar = Builder.CreateAlloca(Builder.getFloatTy());
+ unsigned NumBodiesGenerated = 0;
+ auto LoopBodyGenCB = [&](InsertPointTy CodeGenIP, llvm::Value *LC) {
+ NumBodiesGenerated += 1;
+ Builder.restoreIP(CodeGenIP);
+ createScan(ScanVar, Builder.getFloatTy(), OMPBuilder, Builder, Loc,
+ AllocaIP);
+ return Error::success();
+ };
+ SmallVector<CanonicalLoopInfo *> Loops;
+ ASSERT_EXPECTED_INIT(SmallVector<CanonicalLoopInfo *>, loopsVec,
+ OMPBuilder.createCanonicalScanLoops(
+ Loc, LoopBodyGenCB, StartVal, StopVal, Step, false,
+ false, Builder.saveIP(), "scan"));
+ Loops = loopsVec;
+ EXPECT_EQ(Loops.size(), 2U);
+ CanonicalLoopInfo *InputLoop = Loops.front();
+ CanonicalLoopInfo *ScanLoop = Loops.back();
+ Builder.restoreIP(ScanLoop->getAfterIP());
+ InputLoop->assertOK();
+ ScanLoop->assertOK();
+
+ EXPECT_EQ(ScanLoop->getAfter(), Builder.GetInsertBlock());
+ EXPECT_EQ(NumBodiesGenerated, 2U);
+ SmallVector<OpenMPIRBuilder::ReductionInfo> ReductionInfos = {
+ {Builder.getFloatTy(), OrigVar, ScanVar,
+ /*EvaluationKind=*/OpenMPIRBuilder::EvalKind::Scalar, sumReduction,
+ /*ReductionGenClang=*/nullptr, sumAtomicReduction}};
+ OpenMPIRBuilder::LocationDescription RedLoc({InputLoop->getAfterIP(), DL});
+ llvm::BasicBlock *Cont = splitBB(Builder, false, "omp.scan.loop.cont");
+ ASSERT_EXPECTED_INIT(InsertPointTy, retIp,
+ OMPBuilder.emitScanReduction(RedLoc, ReductionInfos));
+ Builder.restoreIP(retIp);
+ Builder.CreateBr(Cont);
+ Builder.SetInsertPoint(Cont);
+ unsigned NumMallocs = 0;
+ unsigned NumFrees = 0;
+ unsigned NumMasked = 0;
+ unsigned NumEndMasked = 0;
+ unsigned NumLog = 0;
+ unsigned NumCeil = 0;
+ for (Instruction &I : instructions(F)) {
+ if (isa<CallInst>(I)) {
+ CallInst *Call = dyn_cast<CallInst>(&I);
+ auto Name = Call->getCalledFunction()->getName();
+ if (Name.equals_insensitive("malloc")) {
+ NumMallocs += 1;
+ } else if (Name.equals_insensitive("free")) {
+ NumFrees += 1;
+ } else if (Name.equals_insensitive("__kmpc_masked")) {
+ NumMasked += 1;
+ } else if (Name.equals_insensitive("__kmpc_end_masked")) {
+ NumEndMasked += 1;
+ } else if (Name.equals_insensitive("llvm.log2.f64")) {
+ NumLog += 1;
+ } else if (Name.equals_insensitive("llvm.ceil.f64")) {
+ NumCeil += 1;
+ }
+ }
+ }
+ EXPECT_EQ(NumBodiesGenerated, 2U);
+ EXPECT_EQ(NumMasked, 3U);
+ EXPECT_EQ(NumEndMasked, 3U);
+ EXPECT_EQ(NumMallocs, 1U);
+ EXPECT_EQ(NumFrees, 1U);
+ EXPECT_EQ(NumLog, 1U);
+ EXPECT_EQ(NumCeil, 1U);
+}
+
TEST_F(OpenMPIRBuilderTest, CreateTwoReductions) {
using InsertPointTy = OpenMPIRBuilder::InsertPointTy;
OpenMPIRBuilder OMPBuilder(*M);
>From a8fb6bdf0c7f3f3ca51679adfa96b0432d95c77f Mon Sep 17 00:00:00 2001
From: Anchu Rajendran <asudhaku at amd.com>
Date: Mon, 16 Jun 2025 13:27:26 -0500
Subject: [PATCH 2/4] R2: Addressing review comments
---
.../llvm/Frontend/OpenMP/OMPIRBuilder.h | 166 ++++++++++-----
llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp | 193 +++++++++---------
.../Frontend/OpenMPIRBuilderTest.cpp | 81 ++++++--
3 files changed, 274 insertions(+), 166 deletions(-)
diff --git a/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h b/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h
index d0243e80b4f21..620258911ed39 100644
--- a/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h
+++ b/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h
@@ -31,6 +31,7 @@
namespace llvm {
class CanonicalLoopInfo;
+class ScanInfo;
struct TargetRegionEntryInfo;
class OffloadEntriesInfoManager;
class OpenMPIRBuilder;
@@ -512,29 +513,6 @@ class OpenMPIRBuilder {
}
};
- struct ScanInformation {
- /// Dominates the body of the loop before scan directive
- llvm::BasicBlock *OMPBeforeScanBlock = nullptr;
- /// Dominates the body of the loop before scan directive
- llvm::BasicBlock *OMPAfterScanBlock = nullptr;
- /// Controls the flow to before or after scan blocks
- llvm::BasicBlock *OMPScanDispatch = nullptr;
- /// Exit block of loop body
- llvm::BasicBlock *OMPScanLoopExit = nullptr;
- /// Block before loop body where scan initializations are done
- llvm::BasicBlock *OMPScanInit = nullptr;
- /// Block after loop body where scan finalizations are done
- llvm::BasicBlock *OMPScanFinish = nullptr;
- /// If true, it indicates Input phase is lowered; else it indicates
- /// ScanPhase is lowered
- bool OMPFirstScanLoop = false;
- // Maps the private reduction variable to the pointer of the temporary
- // buffer
- llvm::SmallDenseMap<llvm::Value *, llvm::Value *> ScanBuffPtrs;
- llvm::Value *IV;
- llvm::Value *Span;
- } ScanInfo;
-
/// Initialize the internal state, this will put structures types and
/// potentially other helpers into the underlying module. Must be called
/// before any other method and only once! This internal state includes types
@@ -731,6 +709,9 @@ class OpenMPIRBuilder {
LLVM_ABI InsertPointOrErrorTy createCancellationPoint(
const LocationDescription &Loc, omp::Directive CanceledDirective);
+ /// Creates a ScanInfo object, allocates and returns the pointer.
+ Expected<ScanInfo *> scanInfoInitialize();
+
/// Generator for '#omp parallel'
///
/// \param Loc The insert and source location description.
@@ -781,6 +762,11 @@ class OpenMPIRBuilder {
/// scan directive, the body of the loop is split into two loops: Input loop
/// and Scan Loop. Input loop contains the code generated for input phase of
/// scan and Scan loop contains the code generated for scan phase of scan.
+ /// From the bodyGen callback of these loops, `createScan` would be called
+ /// when a scan directive is encountered from the loop body. `createScan`
+ /// based on whether 1. inclusive or exclusive scan is specified and, 2. input
+ /// loop or scan loop is generated, lowers the body of the for loop
+ /// accordingly.
///
/// \param Loc The insert and source location description.
/// \param BodyGenCB Callback that will generate the loop body code.
@@ -796,12 +782,14 @@ class OpenMPIRBuilder {
/// at the outermost loop of a loop nest. If not set,
/// defaults to the preheader of the generated loop.
/// \param Name Base name used to derive BB and instruction names.
+ /// \param ScanRedInfo Pointer to the ScanInfo objected created using
+ /// `ScanInfoInitialize`.
///
/// \returns A vector containing Loop Info of Input Loop and Scan Loop.
Expected<SmallVector<llvm::CanonicalLoopInfo *>> createCanonicalScanLoops(
const LocationDescription &Loc, LoopBodyGenCallbackTy BodyGenCB,
Value *Start, Value *Stop, Value *Step, bool IsSigned, bool InclusiveStop,
- InsertPointTy ComputeIP, const Twine &Name);
+ InsertPointTy ComputeIP, const Twine &Name, ScanInfo *ScanRedInfo);
/// Calculate the trip count of a canonical loop.
///
@@ -872,15 +860,16 @@ class OpenMPIRBuilder {
/// defaults to the preheader of the generated loop.
/// \param Name Base name used to derive BB and instruction names.
/// \param InScan Whether loop has a scan reduction specified.
+ /// \param ScanRedInfo Pointer to the ScanInfo objected created using
+ /// `ScanInfoInitialize`.
///
/// \returns An object representing the created control flow structure which
/// can be used for loop-associated directives.
- LLVM_ABI Expected<CanonicalLoopInfo *>
- createCanonicalLoop(const LocationDescription &Loc,
- LoopBodyGenCallbackTy BodyGenCB, Value *Start,
- Value *Stop, Value *Step, bool IsSigned,
- bool InclusiveStop, InsertPointTy ComputeIP = {},
- const Twine &Name = "loop", bool InScan = false);
+ LLVM_ABI Expected<CanonicalLoopInfo *> createCanonicalLoop(
+ const LocationDescription &Loc, LoopBodyGenCallbackTy BodyGenCB,
+ Value *Start, Value *Stop, Value *Step, bool IsSigned, bool InclusiveStop,
+ InsertPointTy ComputeIP = {}, const Twine &Name = "loop",
+ bool InScan = false, ScanInfo *ScanRedInfo = nullptr);
/// Collapse a loop nest into a single loop.
///
@@ -1612,44 +1601,45 @@ class OpenMPIRBuilder {
ArrayRef<OpenMPIRBuilder::ReductionInfo> ReductionInfos,
Function *ReduceFn, AttributeList FuncAttrs);
- /// Creates the runtime call specified
- /// \param Callee Function Declaration Value
- /// \param Args Arguments passed to the call
- /// \param Name Optional param to specify the name of the call Instruction.
- ///
- /// \return The Runtime call instruction created.
- llvm::CallInst *emitNoUnwindRuntimeCall(llvm::FunctionCallee Callee,
- ArrayRef<llvm::Value *> Args,
- const llvm::Twine &Name);
-
/// Helper function for CreateCanonicalScanLoops to create InputLoop
/// in the firstGen and Scan Loop in the SecondGen
/// \param InputLoopGen Callback for generating the loop for input phase
/// \param ScanLoopGen Callback for generating the loop for scan phase
+ /// \param ScanRedInfo Pointer to the ScanInfo objected created using
+ /// `ScanInfoInitialize`.
///
/// \return error if any produced, else return success.
Error emitScanBasedDirectiveIR(
llvm::function_ref<Error()> InputLoopGen,
- llvm::function_ref<Error(LocationDescription Loc)> ScanLoopGen);
+ llvm::function_ref<Error(LocationDescription Loc)> ScanLoopGen,
+ ScanInfo *ScanRedInfo);
/// Creates the basic blocks required for scan reduction.
- void createScanBBs();
+ /// \param ScanRedInfo Pointer to the ScanInfo objected created using
+ /// `ScanInfoInitialize`.
+ void createScanBBs(ScanInfo *ScanRedInfo);
/// Dynamically allocates the buffer needed for scan reduction.
/// \param AllocaIP The IP where possibly-shared pointer of buffer needs to be
/// declared. \param ScanVars Scan Variables.
+ /// \param ScanRedInfo Pointer to the ScanInfo objected created using
+ /// `ScanInfoInitialize`.
///
/// \return error if any produced, else return success.
Error emitScanBasedDirectiveDeclsIR(InsertPointTy AllocaIP,
ArrayRef<llvm::Value *> ScanVars,
- ArrayRef<llvm::Type *> ScanVarsType);
+ ArrayRef<llvm::Type *> ScanVarsType,
+ ScanInfo *ScanRedInfo);
/// Copies the result back to the reduction variable.
/// \param ReductionInfos Array type containing the ReductionOps.
+ /// \param ScanRedInfo Pointer to the ScanInfo objected created using
+ /// `ScanInfoInitialize`.
///
/// \return error if any produced, else return success.
Error emitScanBasedDirectiveFinalsIR(
- SmallVector<llvm::OpenMPIRBuilder::ReductionInfo> ReductionInfos);
+ ArrayRef<llvm::OpenMPIRBuilder::ReductionInfo> ReductionInfos,
+ ScanInfo *ScanInfo);
/// This function emits a helper that gathers Reduce lists from the first
/// lane of every active warp to lanes in the first warp.
@@ -2278,6 +2268,7 @@ class OpenMPIRBuilder {
/// Collection of owned canonical loop objects that eventually need to be
/// free'd.
std::forward_list<CanonicalLoopInfo> LoopInfos;
+ std::forward_list<ScanInfo> ScanInfos;
/// Add a new region that will be outlined later.
void addOutlineInfo(OutlineInfo &&OI) { OutlineInfos.emplace_back(OI); }
@@ -2747,11 +2738,14 @@ class OpenMPIRBuilder {
/// }
/// \param Loc The insert and source location description.
/// \param ReductionInfos Array type containing the ReductionOps.
+ /// \param ScanRedInfo Pointer to the ScanInfo objected created using
+ /// `ScanInfoInitialize`.
///
/// \returns The insertion position *after* the masked.
InsertPointOrErrorTy emitScanReduction(
const LocationDescription &Loc,
- SmallVector<llvm::OpenMPIRBuilder::ReductionInfo> ReductionInfos);
+ ArrayRef<llvm::OpenMPIRBuilder::ReductionInfo> ReductionInfos,
+ ScanInfo *ScanRedInfo);
/// This directive split and directs the control flow to input phase
/// blocks or scan phase blocks based on 1. whether input loop or scan loop
@@ -2762,13 +2756,15 @@ class OpenMPIRBuilder {
// needs to be allocated.
/// \param ScanVars Scan Variables.
/// \param IsInclusive Whether it is an inclusive or exclusive scan.
+ /// \param ScanRedInfo Pointer to the ScanInfo objected created using
+ /// `ScanInfoInitialize`.
///
/// \returns The insertion position *after* the scan.
InsertPointOrErrorTy createScan(const LocationDescription &Loc,
InsertPointTy AllocaIP,
ArrayRef<llvm::Value *> ScanVars,
ArrayRef<llvm::Type *> ScanVarsType,
- bool IsInclusive);
+ bool IsInclusive, ScanInfo *ScanRedInfo);
/// Generator for '#omp critical'
///
/// \param Loc The insert and source location description.
@@ -3904,6 +3900,84 @@ class CanonicalLoopInfo {
LLVM_ABI void invalidate();
};
+/// ScanInfo holds the information to assist in lowering of Scan reduction.
+/// Before lowering, body of the for loop specifying scan reduction is expected
+/// to have the following structure
+/// Loop Body Entry
+/// |
+/// Code before the scan directive
+/// |
+/// Scan Directive
+/// |
+/// Code after the scan directive
+/// |
+/// Loop Body Exit
+/// When `createCanonicalScanLoops` is executed, the bodyGen callback of it
+/// transforms the body to:
+///
+/// Loop Body Entry
+/// |
+/// OMPScanDispatch
+///
+/// OMPBeforeScanBlock
+/// |
+/// OMPScanLoopExit
+/// |
+/// Loop Body Exit
+///
+/// The insert point is updated to the first insert point of OMPBeforeScanBlock.
+/// It dominates the control flow of code generated until
+/// scan directive is encountered and OMPAfterScanBlock dominates the
+/// control flow of code generated after scan is encountered. The successor
+/// of OMPScanDispatch can be OMPBeforeScanBlock or OMPAfterScanBlock based
+/// on 1.whether it is in Input phase or Scan Phase , 2. whether it is an
+/// exclusive or inclusive scan. This jump is added when `createScan` is
+/// executed. If input loop is being generated, if it is inclusive scan,
+/// `OMPAfterScanBlock` succeeds `OMPScanDispatch` , if exclusive,
+/// `OMPBeforeScanBlock` succeeds `OMPDispatch` and vice versa for scan loop. At
+/// the end of the input loop, temporary buffer is populated and at the
+/// beginning of the scan loop, temporary buffer is read. After scan directive
+/// is encountered, insertion point is updated to `OMPAfterScanBlock` as it is
+/// expected to dominate the code after the scan directive. Both Before and
+/// After scan blocks are succeeded by `OMPScanLoopExit`.
+/// Temporary buffer allocations are done in `ScanLoopInit` block before the
+/// lowering of for-loop. The results are copied back to reduction variable in
+/// `ScanLoopFinish` block.
+
+class ScanInfo {
+public:
+ /// Dominates the body of the loop before scan directive
+ llvm::BasicBlock *OMPBeforeScanBlock = nullptr;
+ /// Dominates the body of the loop before scan directive
+ llvm::BasicBlock *OMPAfterScanBlock = nullptr;
+ /// Controls the flow to before or after scan blocks
+ llvm::BasicBlock *OMPScanDispatch = nullptr;
+ /// Exit block of loop body
+ llvm::BasicBlock *OMPScanLoopExit = nullptr;
+ /// Block before loop body where scan initializations are done
+ llvm::BasicBlock *OMPScanInit = nullptr;
+ /// Block after loop body where scan finalizations are done
+ llvm::BasicBlock *OMPScanFinish = nullptr;
+ /// If true, it indicates Input phase is lowered; else it indicates
+ /// ScanPhase is lowered
+ bool OMPFirstScanLoop = false;
+ /// Maps the private reduction variable to the pointer of the temporary
+ /// buffer
+ llvm::SmallDenseMap<llvm::Value *, llvm::Value *> *ScanBuffPtrs;
+ /// Keeps track of value of iteration variable for input/scan loop to be
+ /// used for Scan directive lowering
+ llvm::Value *IV;
+ /// Stores the span of canonical loop being lowered to be used for temporary
+ /// buffer allocation or Finalization.
+ llvm::Value *Span;
+
+ ScanInfo() {
+ ScanBuffPtrs = new llvm::SmallDenseMap<llvm::Value *, llvm::Value *>();
+ }
+
+ ~ScanInfo() { delete (ScanBuffPtrs); }
+};
+
} // end namespace llvm
#endif // LLVM_FRONTEND_OPENMP_OMPIRBUILDER_H
diff --git a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
index e0e13c901e143..95805fb28487c 100644
--- a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
+++ b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
@@ -1,3 +1,4 @@
+
//===- OpenMPIRBuilder.cpp - Builder for LLVM-IR for OpenMP directives ----===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
@@ -4023,10 +4024,10 @@ OpenMPIRBuilder::createMasked(const LocationDescription &Loc,
/*Conditional*/ true, /*hasFinalize*/ true);
}
-llvm::CallInst *
-OpenMPIRBuilder::emitNoUnwindRuntimeCall(llvm::FunctionCallee Callee,
- ArrayRef<llvm::Value *> Args,
- const llvm::Twine &Name) {
+llvm::CallInst *emitNoUnwindRuntimeCall(IRBuilder<> &Builder,
+ llvm::FunctionCallee Callee,
+ ArrayRef<llvm::Value *> Args,
+ const llvm::Twine &Name) {
llvm::CallInst *Call = Builder.CreateCall(
Callee, Args, SmallVector<llvm::OperandBundleDef, 1>(), Name);
Call->setDoesNotThrow();
@@ -4044,23 +4045,22 @@ OpenMPIRBuilder::emitNoUnwindRuntimeCall(llvm::FunctionCallee Callee,
OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::createScan(
const LocationDescription &Loc, InsertPointTy AllocaIP,
ArrayRef<llvm::Value *> ScanVars, ArrayRef<llvm::Type *> ScanVarsType,
- bool IsInclusive) {
- if (ScanInfo.OMPFirstScanLoop) {
- llvm::Error Err =
- emitScanBasedDirectiveDeclsIR(AllocaIP, ScanVars, ScanVarsType);
- if (Err) {
+ bool IsInclusive, ScanInfo *ScanRedInfo) {
+ if (ScanRedInfo->OMPFirstScanLoop) {
+ llvm::Error Err = emitScanBasedDirectiveDeclsIR(AllocaIP, ScanVars,
+ ScanVarsType, ScanRedInfo);
+ if (Err)
return Err;
- }
}
if (!updateToLocation(Loc))
return Loc.IP;
- llvm::Value *IV = ScanInfo.IV;
+ llvm::Value *IV = ScanRedInfo->IV;
- if (ScanInfo.OMPFirstScanLoop) {
+ if (ScanRedInfo->OMPFirstScanLoop) {
// Emit buffer[i] = red; at the end of the input phase.
for (size_t i = 0; i < ScanVars.size(); i++) {
- Value *BuffPtr = ScanInfo.ScanBuffPtrs[ScanVars[i]];
+ Value *BuffPtr = (*(ScanRedInfo->ScanBuffPtrs))[ScanVars[i]];
Value *Buff = Builder.CreateLoad(Builder.getPtrTy(), BuffPtr);
Type *DestTy = ScanVarsType[i];
Value *Val = Builder.CreateInBoundsGEP(DestTy, Buff, IV, "arrayOffset");
@@ -4069,15 +4069,16 @@ OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::createScan(
Builder.CreateStore(Src, Val);
}
}
- Builder.CreateBr(ScanInfo.OMPScanLoopExit);
- emitBlock(ScanInfo.OMPScanDispatch, Builder.GetInsertBlock()->getParent());
+ Builder.CreateBr(ScanRedInfo->OMPScanLoopExit);
+ emitBlock(ScanRedInfo->OMPScanDispatch,
+ Builder.GetInsertBlock()->getParent());
- if (!ScanInfo.OMPFirstScanLoop) {
- IV = ScanInfo.IV;
+ if (!ScanRedInfo->OMPFirstScanLoop) {
+ IV = ScanRedInfo->IV;
// Emit red = buffer[i]; at the entrance to the scan phase.
// TODO: if exclusive scan, the red = buffer[i-1] needs to be updated.
for (size_t i = 0; i < ScanVars.size(); i++) {
- Value *BuffPtr = ScanInfo.ScanBuffPtrs[ScanVars[i]];
+ Value *BuffPtr = (*(ScanRedInfo->ScanBuffPtrs))[ScanVars[i]];
Value *Buff = Builder.CreateLoad(Builder.getPtrTy(), BuffPtr);
Type *DestTy = ScanVarsType[i];
Value *SrcPtr =
@@ -4089,42 +4090,44 @@ OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::createScan(
// TODO: Update it to CreateBr and remove dead blocks
llvm::Value *CmpI = Builder.getInt1(true);
- if (ScanInfo.OMPFirstScanLoop == IsInclusive) {
- Builder.CreateCondBr(CmpI, ScanInfo.OMPBeforeScanBlock,
- ScanInfo.OMPAfterScanBlock);
+ if (ScanRedInfo->OMPFirstScanLoop == IsInclusive) {
+ Builder.CreateCondBr(CmpI, ScanRedInfo->OMPBeforeScanBlock,
+ ScanRedInfo->OMPAfterScanBlock);
} else {
- Builder.CreateCondBr(CmpI, ScanInfo.OMPAfterScanBlock,
- ScanInfo.OMPBeforeScanBlock);
+ Builder.CreateCondBr(CmpI, ScanRedInfo->OMPAfterScanBlock,
+ ScanRedInfo->OMPBeforeScanBlock);
}
- emitBlock(ScanInfo.OMPAfterScanBlock, Builder.GetInsertBlock()->getParent());
- Builder.SetInsertPoint(ScanInfo.OMPAfterScanBlock);
+ emitBlock(ScanRedInfo->OMPAfterScanBlock,
+ Builder.GetInsertBlock()->getParent());
+ Builder.SetInsertPoint(ScanRedInfo->OMPAfterScanBlock);
return Builder.saveIP();
}
Error OpenMPIRBuilder::emitScanBasedDirectiveDeclsIR(
InsertPointTy AllocaIP, ArrayRef<Value *> ScanVars,
- ArrayRef<Type *> ScanVarsType) {
+ ArrayRef<Type *> ScanVarsType, ScanInfo *ScanRedInfo) {
Builder.restoreIP(AllocaIP);
// Create the shared pointer at alloca IP.
for (size_t i = 0; i < ScanVars.size(); i++) {
llvm::Value *BuffPtr =
Builder.CreateAlloca(Builder.getPtrTy(), nullptr, "vla");
- ScanInfo.ScanBuffPtrs[ScanVars[i]] = BuffPtr;
+ (*(ScanRedInfo->ScanBuffPtrs))[ScanVars[i]] = BuffPtr;
}
// Allocate temporary buffer by master thread
auto BodyGenCB = [&](InsertPointTy AllocaIP,
InsertPointTy CodeGenIP) -> Error {
Builder.restoreIP(CodeGenIP);
- Value *AllocSpan = Builder.CreateAdd(ScanInfo.Span, Builder.getInt32(1));
+ Value *AllocSpan =
+ Builder.CreateAdd(ScanRedInfo->Span, Builder.getInt32(1));
for (size_t i = 0; i < ScanVars.size(); i++) {
Type *IntPtrTy = Builder.getInt32Ty();
Constant *Allocsize = ConstantExpr::getSizeOf(ScanVarsType[i]);
Allocsize = ConstantExpr::getTruncOrBitCast(Allocsize, IntPtrTy);
Value *Buff = Builder.CreateMalloc(IntPtrTy, ScanVarsType[i], Allocsize,
AllocSpan, nullptr, "arr");
- Builder.CreateStore(Buff, ScanInfo.ScanBuffPtrs[ScanVars[i]]);
+ Builder.CreateStore(Buff, (*(ScanRedInfo->ScanBuffPtrs))[ScanVars[i]]);
}
return Error::success();
};
@@ -4132,7 +4135,7 @@ Error OpenMPIRBuilder::emitScanBasedDirectiveDeclsIR(
// called for variables which have destructors/finalizers.
auto FiniCB = [&](InsertPointTy CodeGenIP) { return llvm::Error::success(); };
- Builder.SetInsertPoint(ScanInfo.OMPScanInit->getTerminator());
+ Builder.SetInsertPoint(ScanRedInfo->OMPScanInit->getTerminator());
llvm::Value *FilterVal = Builder.getInt32(0);
llvm::OpenMPIRBuilder::InsertPointOrErrorTy AfterIP =
createMasked(Builder.saveIP(), BodyGenCB, FiniCB, FilterVal);
@@ -4152,19 +4155,19 @@ Error OpenMPIRBuilder::emitScanBasedDirectiveDeclsIR(
}
Error OpenMPIRBuilder::emitScanBasedDirectiveFinalsIR(
- SmallVector<ReductionInfo> ReductionInfos) {
+ ArrayRef<ReductionInfo> ReductionInfos, ScanInfo *ScanRedInfo) {
auto BodyGenCB = [&](InsertPointTy AllocaIP,
InsertPointTy CodeGenIP) -> Error {
Builder.restoreIP(CodeGenIP);
for (ReductionInfo RedInfo : ReductionInfos) {
Value *PrivateVar = RedInfo.PrivateVariable;
Value *OrigVar = RedInfo.Variable;
- Value *BuffPtr = ScanInfo.ScanBuffPtrs[PrivateVar];
+ Value *BuffPtr = (*(ScanRedInfo->ScanBuffPtrs))[PrivateVar];
Value *Buff = Builder.CreateLoad(Builder.getPtrTy(), BuffPtr);
Type *SrcTy = RedInfo.ElementType;
- Value *Val =
- Builder.CreateInBoundsGEP(SrcTy, Buff, ScanInfo.Span, "arrayOffset");
+ Value *Val = Builder.CreateInBoundsGEP(SrcTy, Buff, ScanRedInfo->Span,
+ "arrayOffset");
Value *Src = Builder.CreateLoad(SrcTy, Val);
Builder.CreateStore(Src, OrigVar);
@@ -4176,10 +4179,10 @@ Error OpenMPIRBuilder::emitScanBasedDirectiveFinalsIR(
// called for variables which have destructors/finalizers.
auto FiniCB = [&](InsertPointTy CodeGenIP) { return llvm::Error::success(); };
- if (ScanInfo.OMPScanFinish->getTerminator())
- Builder.SetInsertPoint(ScanInfo.OMPScanFinish->getTerminator());
+ if (ScanRedInfo->OMPScanFinish->getTerminator())
+ Builder.SetInsertPoint(ScanRedInfo->OMPScanFinish->getTerminator());
else
- Builder.SetInsertPoint(ScanInfo.OMPScanFinish);
+ Builder.SetInsertPoint(ScanRedInfo->OMPScanFinish);
llvm::Value *FilterVal = Builder.getInt32(0);
llvm::OpenMPIRBuilder::InsertPointOrErrorTy AfterIP =
@@ -4200,7 +4203,8 @@ Error OpenMPIRBuilder::emitScanBasedDirectiveFinalsIR(
OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::emitScanReduction(
const LocationDescription &Loc,
- SmallVector<llvm::OpenMPIRBuilder::ReductionInfo> ReductionInfos) {
+ ArrayRef<llvm::OpenMPIRBuilder::ReductionInfo> ReductionInfos,
+ ScanInfo *ScanRedInfo) {
if (!updateToLocation(Loc))
return Loc.IP;
@@ -4218,15 +4222,16 @@ OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::emitScanReduction(
(llvm::Intrinsic::ID)llvm::Intrinsic::log2, Builder.getDoubleTy());
llvm::BasicBlock *InputBB = Builder.GetInsertBlock();
llvm::Value *Arg =
- Builder.CreateUIToFP(ScanInfo.Span, Builder.getDoubleTy());
- llvm::Value *LogVal = emitNoUnwindRuntimeCall(F, Arg, "");
+ Builder.CreateUIToFP(ScanRedInfo->Span, Builder.getDoubleTy());
+ llvm::Value *LogVal = emitNoUnwindRuntimeCall(Builder, F, Arg, "");
F = llvm::Intrinsic::getOrInsertDeclaration(
Builder.GetInsertBlock()->getModule(),
(llvm::Intrinsic::ID)llvm::Intrinsic::ceil, Builder.getDoubleTy());
- LogVal = emitNoUnwindRuntimeCall(F, LogVal, "");
+ LogVal = emitNoUnwindRuntimeCall(Builder, F, LogVal, "");
LogVal = Builder.CreateFPToUI(LogVal, Builder.getInt32Ty());
llvm::Value *NMin1 = Builder.CreateNUWSub(
- ScanInfo.Span, llvm::ConstantInt::get(ScanInfo.Span->getType(), 1));
+ ScanRedInfo->Span,
+ llvm::ConstantInt::get(ScanRedInfo->Span->getType(), 1));
Builder.SetInsertPoint(InputBB);
Builder.CreateBr(LoopBB);
emitBlock(LoopBB, CurFn);
@@ -4253,7 +4258,7 @@ OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::emitScanReduction(
IVal->addIncoming(NMin1, LoopBB);
for (ReductionInfo RedInfo : ReductionInfos) {
Value *ReductionVal = RedInfo.PrivateVariable;
- Value *BuffPtr = ScanInfo.ScanBuffPtrs[ReductionVal];
+ Value *BuffPtr = (*(ScanRedInfo->ScanBuffPtrs))[ReductionVal];
Value *Buff = Builder.CreateLoad(Builder.getPtrTy(), BuffPtr);
Type *DestTy = RedInfo.ElementType;
Value *IV = Builder.CreateAdd(IVal, Builder.getInt32(1));
@@ -4305,17 +4310,17 @@ OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::emitScanReduction(
if (!AfterIP)
return AfterIP.takeError();
Builder.restoreIP(*AfterIP);
- Error Err = emitScanBasedDirectiveFinalsIR(ReductionInfos);
- if (Err) {
+ Error Err = emitScanBasedDirectiveFinalsIR(ReductionInfos, ScanRedInfo);
+ if (Err)
return Err;
- }
return AfterIP;
}
Error OpenMPIRBuilder::emitScanBasedDirectiveIR(
llvm::function_ref<Error()> InputLoopGen,
- llvm::function_ref<Error(LocationDescription Loc)> ScanLoopGen) {
+ llvm::function_ref<Error(LocationDescription Loc)> ScanLoopGen,
+ ScanInfo *ScanRedInfo) {
{
// Emit loop with input phase:
@@ -4323,7 +4328,7 @@ Error OpenMPIRBuilder::emitScanBasedDirectiveIR(
// <input phase>;
// buffer[i] = red;
// }
- ScanInfo.OMPFirstScanLoop = true;
+ ScanRedInfo->OMPFirstScanLoop = true;
auto Result = InputLoopGen();
if (Result)
return Result;
@@ -4334,7 +4339,7 @@ Error OpenMPIRBuilder::emitScanBasedDirectiveIR(
// red = buffer[i];
// <scan phase>;
// }
- ScanInfo.OMPFirstScanLoop = false;
+ ScanRedInfo->OMPFirstScanLoop = false;
auto Result = ScanLoopGen(Builder.saveIP());
if (Result)
return Result;
@@ -4342,15 +4347,15 @@ Error OpenMPIRBuilder::emitScanBasedDirectiveIR(
return Error::success();
}
-void OpenMPIRBuilder::createScanBBs() {
+void OpenMPIRBuilder::createScanBBs(ScanInfo *ScanRedInfo) {
Function *Fun = Builder.GetInsertBlock()->getParent();
- ScanInfo.OMPScanDispatch =
+ ScanRedInfo->OMPScanDispatch =
BasicBlock::Create(Fun->getContext(), "omp.inscan.dispatch");
- ScanInfo.OMPAfterScanBlock =
+ ScanRedInfo->OMPAfterScanBlock =
BasicBlock::Create(Fun->getContext(), "omp.after.scan.bb");
- ScanInfo.OMPBeforeScanBlock =
+ ScanRedInfo->OMPBeforeScanBlock =
BasicBlock::Create(Fun->getContext(), "omp.before.scan.bb");
- ScanInfo.OMPScanLoopExit =
+ ScanRedInfo->OMPScanLoopExit =
BasicBlock::Create(Fun->getContext(), "omp.scan.loop.exit");
}
CanonicalLoopInfo *OpenMPIRBuilder::createLoopSkeleton(
@@ -4450,68 +4455,53 @@ OpenMPIRBuilder::createCanonicalLoop(const LocationDescription &Loc,
return CL;
}
+Expected<ScanInfo *> OpenMPIRBuilder::scanInfoInitialize() {
+ ScanInfos.emplace_front();
+ ScanInfo *Result = &ScanInfos.front();
+ return Result;
+}
+
Expected<SmallVector<llvm::CanonicalLoopInfo *>>
OpenMPIRBuilder::createCanonicalScanLoops(
const LocationDescription &Loc, LoopBodyGenCallbackTy BodyGenCB,
Value *Start, Value *Stop, Value *Step, bool IsSigned, bool InclusiveStop,
- InsertPointTy ComputeIP, const Twine &Name) {
+ InsertPointTy ComputeIP, const Twine &Name, ScanInfo *ScanRedInfo) {
LocationDescription ComputeLoc =
ComputeIP.isSet() ? LocationDescription(ComputeIP, Loc.DL) : Loc;
updateToLocation(ComputeLoc);
+ SmallVector<CanonicalLoopInfo *> Result;
+
Value *TripCount = calculateCanonicalLoopTripCount(
ComputeLoc, Start, Stop, Step, IsSigned, InclusiveStop, Name);
- ScanInfo.Span = TripCount;
- ScanInfo.OMPScanInit = splitBB(Builder, true, "scan.init");
- Builder.SetInsertPoint(ScanInfo.OMPScanInit);
+ ScanRedInfo->Span = TripCount;
+ ScanRedInfo->OMPScanInit = splitBB(Builder, true, "scan.init");
+ Builder.SetInsertPoint(ScanRedInfo->OMPScanInit);
auto BodyGen = [=](InsertPointTy CodeGenIP, Value *IV) {
- /// The control of the loopbody of following structure:
- ///
- /// InputBlock
- /// |
- /// ContinueBlock
- ///
- /// is transformed to:
- ///
- /// InputBlock
- /// |
- /// OMPScanDispatch
- ///
- /// OMPBeforeScanBlock
- /// |
- /// OMPScanLoopExit
- /// |
- /// ContinueBlock
- ///
- /// OMPBeforeScanBlock dominates the control flow of code generated until
- /// scan directive is encountered and OMPAfterScanBlock dominates the
- /// control flow of code generated after scan is encountered. The successor
- /// of OMPScanDispatch can be OMPBeforeScanBlock or OMPAfterScanBlock based
- /// on 1.whether it is in Input phase or Scan Phase , 2. whether it is an
- /// exclusive or inclusive scan.
Builder.restoreIP(CodeGenIP);
- ScanInfo.IV = IV;
- createScanBBs();
+ ScanRedInfo->IV = IV;
+ createScanBBs(ScanRedInfo);
BasicBlock *InputBlock = Builder.GetInsertBlock();
Instruction *Terminator = InputBlock->getTerminator();
assert(Terminator->getNumSuccessors() == 1);
BasicBlock *ContinueBlock = Terminator->getSuccessor(0);
- Terminator->setSuccessor(0, ScanInfo.OMPScanDispatch);
- emitBlock(ScanInfo.OMPBeforeScanBlock,
+ Terminator->setSuccessor(0, ScanRedInfo->OMPScanDispatch);
+ emitBlock(ScanRedInfo->OMPBeforeScanBlock,
+ Builder.GetInsertBlock()->getParent());
+ Builder.CreateBr(ScanRedInfo->OMPScanLoopExit);
+ emitBlock(ScanRedInfo->OMPScanLoopExit,
Builder.GetInsertBlock()->getParent());
- Builder.CreateBr(ScanInfo.OMPScanLoopExit);
- emitBlock(ScanInfo.OMPScanLoopExit, Builder.GetInsertBlock()->getParent());
Builder.CreateBr(ContinueBlock);
- Builder.SetInsertPoint(ScanInfo.OMPBeforeScanBlock->getFirstInsertionPt());
+ Builder.SetInsertPoint(
+ ScanRedInfo->OMPBeforeScanBlock->getFirstInsertionPt());
return BodyGenCB(Builder.saveIP(), IV);
};
- SmallVector<llvm::CanonicalLoopInfo *> Result;
const auto &&InputLoopGen = [&]() -> Error {
- auto LoopInfo =
- createCanonicalLoop(Builder.saveIP(), BodyGen, Start, Stop, Step,
- IsSigned, InclusiveStop, ComputeIP, Name, true);
+ auto LoopInfo = createCanonicalLoop(Builder.saveIP(), BodyGen, Start, Stop,
+ Step, IsSigned, InclusiveStop,
+ ComputeIP, Name, true, ScanRedInfo);
if (!LoopInfo)
return LoopInfo.takeError();
Result.push_back(*LoopInfo);
@@ -4521,18 +4511,17 @@ OpenMPIRBuilder::createCanonicalScanLoops(
const auto &&ScanLoopGen = [&](LocationDescription Loc) -> Error {
auto LoopInfo =
createCanonicalLoop(Loc, BodyGen, Start, Stop, Step, IsSigned,
- InclusiveStop, ComputeIP, Name, true);
+ InclusiveStop, ComputeIP, Name, true, ScanRedInfo);
if (!LoopInfo)
return LoopInfo.takeError();
Result.push_back(*LoopInfo);
Builder.restoreIP((*LoopInfo)->getAfterIP());
- ScanInfo.OMPScanFinish = Builder.GetInsertBlock();
+ ScanRedInfo->OMPScanFinish = Builder.GetInsertBlock();
return Error::success();
};
- Error Err = emitScanBasedDirectiveIR(InputLoopGen, ScanLoopGen);
- if (Err) {
+ Error Err = emitScanBasedDirectiveIR(InputLoopGen, ScanLoopGen, ScanRedInfo);
+ if (Err)
return Err;
- }
return Result;
}
@@ -4599,7 +4588,8 @@ Value *OpenMPIRBuilder::calculateCanonicalLoopTripCount(
Expected<CanonicalLoopInfo *> OpenMPIRBuilder::createCanonicalLoop(
const LocationDescription &Loc, LoopBodyGenCallbackTy BodyGenCB,
Value *Start, Value *Stop, Value *Step, bool IsSigned, bool InclusiveStop,
- InsertPointTy ComputeIP, const Twine &Name, bool InScan) {
+ InsertPointTy ComputeIP, const Twine &Name, bool InScan,
+ ScanInfo *ScanRedInfo) {
LocationDescription ComputeLoc =
ComputeIP.isSet() ? LocationDescription(ComputeIP, Loc.DL) : Loc;
@@ -4610,9 +4600,8 @@ Expected<CanonicalLoopInfo *> OpenMPIRBuilder::createCanonicalLoop(
Builder.restoreIP(CodeGenIP);
Value *Span = Builder.CreateMul(IV, Step);
Value *IndVar = Builder.CreateAdd(Span, Start);
- if (InScan) {
- ScanInfo.IV = IndVar;
- }
+ if (InScan)
+ ScanRedInfo->IV = IndVar;
return BodyGenCB(Builder.saveIP(), IndVar);
};
LocationDescription LoopLoc =
diff --git a/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp b/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp
index be06d4c10d7dd..d9b2b4fd5a8ef 100644
--- a/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp
+++ b/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp
@@ -1,3 +1,4 @@
+
//===- llvm/unittest/IR/OpenMPIRBuilderTest.cpp - OpenMPIRBuilder tests ---===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
@@ -5361,17 +5362,57 @@ TEST_F(OpenMPIRBuilderTest, CreateReductions) {
EXPECT_TRUE(findGEPZeroOne(ReductionFn->getArg(1), FirstRHS, SecondRHS));
}
-void createScan(llvm::Value *scanVar, llvm::Type *scanType,
- OpenMPIRBuilder &OMPBuilder, IRBuilder<> &Builder,
- OpenMPIRBuilder::LocationDescription Loc,
- OpenMPIRBuilder::InsertPointTy &allocaIP) {
+static void createScan(llvm::Value *scanVar, llvm::Type *scanType,
+ OpenMPIRBuilder &OMPBuilder, IRBuilder<> &Builder,
+ OpenMPIRBuilder::LocationDescription Loc,
+ OpenMPIRBuilder::InsertPointTy &allocaIP,
+ ScanInfo *&ScanRedInfo) {
using InsertPointTy = OpenMPIRBuilder::InsertPointTy;
- ASSERT_EXPECTED_INIT(
- InsertPointTy, retIp,
- OMPBuilder.createScan(Loc, allocaIP, {scanVar}, {scanType}, true));
+ ASSERT_EXPECTED_INIT(InsertPointTy, retIp,
+ OMPBuilder.createScan(Loc, allocaIP, {scanVar},
+ {scanType}, true, ScanRedInfo));
Builder.restoreIP(retIp);
}
-
+/*
+ Following is the pseudocode of the code generated by the test case
+ <declare pointer to buffer> ptr
+ size num_iters = 100
+ // temp buffer allocation
+ omp masked {
+ buff = malloc(num_iters*scanvarstype)
+ *ptr = buff
+ }
+ barrier;
+ // input phase loop
+ for (i: 0..<num_iters>) {
+ <input phase>;
+ buffer = *ptr;
+ buffer[i] = red;
+ }
+ // scan reduction
+ omp masked
+ {
+ for (int k = 0; k != ceil(log2(num_iters)); ++k) {
+ i=pow(2,k)
+ for (size cnt = last_iter; cnt >= i; --cnt) {
+ buffer = *ptr;
+ buffer[cnt] op= buffer[cnt-i];
+ }
+ }
+ }
+ barrier;
+ // scan phase loop
+ for (0..<num_iters>) {
+ buffer = *ptr;
+ red = buffer[i] ;
+ <scan phase>;
+ }
+ // temp buffer deletion
+ omp masked {
+ free(*ptr)
+ }
+ barrier;
+*/
TEST_F(OpenMPIRBuilderTest, ScanReduction) {
using InsertPointTy = OpenMPIRBuilder::InsertPointTy;
OpenMPIRBuilder OMPBuilder(*M);
@@ -5388,22 +5429,25 @@ TEST_F(OpenMPIRBuilderTest, ScanReduction) {
llvm::Value *ScanVar = Builder.CreateAlloca(Builder.getFloatTy());
llvm::Value *OrigVar = Builder.CreateAlloca(Builder.getFloatTy());
unsigned NumBodiesGenerated = 0;
+ ScanInfo *ScanRedInfo;
+ ASSERT_EXPECTED_INIT(ScanInfo *, ScanInformation,
+ OMPBuilder.scanInfoInitialize());
+ ScanRedInfo = ScanInformation;
auto LoopBodyGenCB = [&](InsertPointTy CodeGenIP, llvm::Value *LC) {
NumBodiesGenerated += 1;
Builder.restoreIP(CodeGenIP);
createScan(ScanVar, Builder.getFloatTy(), OMPBuilder, Builder, Loc,
- AllocaIP);
+ AllocaIP, ScanRedInfo);
return Error::success();
};
- SmallVector<CanonicalLoopInfo *> Loops;
- ASSERT_EXPECTED_INIT(SmallVector<CanonicalLoopInfo *>, loopsVec,
+ llvm::SmallVector<CanonicalLoopInfo *> loops;
+ ASSERT_EXPECTED_INIT(llvm::SmallVector<CanonicalLoopInfo *>, loopvec,
OMPBuilder.createCanonicalScanLoops(
Loc, LoopBodyGenCB, StartVal, StopVal, Step, false,
- false, Builder.saveIP(), "scan"));
- Loops = loopsVec;
- EXPECT_EQ(Loops.size(), 2U);
- CanonicalLoopInfo *InputLoop = Loops.front();
- CanonicalLoopInfo *ScanLoop = Loops.back();
+ false, Builder.saveIP(), "scan", ScanRedInfo));
+ loops = loopvec;
+ CanonicalLoopInfo *InputLoop = loops.front();
+ CanonicalLoopInfo *ScanLoop = loops.back();
Builder.restoreIP(ScanLoop->getAfterIP());
InputLoop->assertOK();
ScanLoop->assertOK();
@@ -5416,8 +5460,9 @@ TEST_F(OpenMPIRBuilderTest, ScanReduction) {
/*ReductionGenClang=*/nullptr, sumAtomicReduction}};
OpenMPIRBuilder::LocationDescription RedLoc({InputLoop->getAfterIP(), DL});
llvm::BasicBlock *Cont = splitBB(Builder, false, "omp.scan.loop.cont");
- ASSERT_EXPECTED_INIT(InsertPointTy, retIp,
- OMPBuilder.emitScanReduction(RedLoc, ReductionInfos));
+ ASSERT_EXPECTED_INIT(
+ InsertPointTy, retIp,
+ OMPBuilder.emitScanReduction(RedLoc, ReductionInfos, ScanRedInfo));
Builder.restoreIP(retIp);
Builder.CreateBr(Cont);
Builder.SetInsertPoint(Cont);
>From e923c6903610d9aa001b1b4e075d0b5b6a7683a4 Mon Sep 17 00:00:00 2001
From: Anchu Rajendran <asudhaku at amd.com>
Date: Thu, 31 Jul 2025 16:17:12 -0500
Subject: [PATCH 3/4] R3: Addressing review comments
---
.../llvm/Frontend/OpenMP/OMPIRBuilder.h | 15 +++++++--
llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp | 28 ++++++++--------
.../Frontend/OpenMPIRBuilderTest.cpp | 33 +++++++++----------
3 files changed, 42 insertions(+), 34 deletions(-)
diff --git a/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h b/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h
index 620258911ed39..7423ec0991e69 100644
--- a/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h
+++ b/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h
@@ -3901,8 +3901,9 @@ class CanonicalLoopInfo {
};
/// ScanInfo holds the information to assist in lowering of Scan reduction.
-/// Before lowering, body of the for loop specifying scan reduction is expected
-/// to have the following structure
+/// Before lowering, the body of the for loop specifying scan reduction is
+/// expected to have the following structure
+///
/// Loop Body Entry
/// |
/// Code before the scan directive
@@ -3943,30 +3944,38 @@ class CanonicalLoopInfo {
/// Temporary buffer allocations are done in `ScanLoopInit` block before the
/// lowering of for-loop. The results are copied back to reduction variable in
/// `ScanLoopFinish` block.
-
class ScanInfo {
public:
/// Dominates the body of the loop before scan directive
llvm::BasicBlock *OMPBeforeScanBlock = nullptr;
+
/// Dominates the body of the loop before scan directive
llvm::BasicBlock *OMPAfterScanBlock = nullptr;
+
/// Controls the flow to before or after scan blocks
llvm::BasicBlock *OMPScanDispatch = nullptr;
+
/// Exit block of loop body
llvm::BasicBlock *OMPScanLoopExit = nullptr;
+
/// Block before loop body where scan initializations are done
llvm::BasicBlock *OMPScanInit = nullptr;
+
/// Block after loop body where scan finalizations are done
llvm::BasicBlock *OMPScanFinish = nullptr;
+
/// If true, it indicates Input phase is lowered; else it indicates
/// ScanPhase is lowered
bool OMPFirstScanLoop = false;
+
/// Maps the private reduction variable to the pointer of the temporary
/// buffer
llvm::SmallDenseMap<llvm::Value *, llvm::Value *> *ScanBuffPtrs;
+
/// Keeps track of value of iteration variable for input/scan loop to be
/// used for Scan directive lowering
llvm::Value *IV;
+
/// Stores the span of canonical loop being lowered to be used for temporary
/// buffer allocation or Finalization.
llvm::Value *Span;
diff --git a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
index 95805fb28487c..7f5aec40d6bce 100644
--- a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
+++ b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
@@ -4238,14 +4238,14 @@ OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::emitScanReduction(
Builder.SetInsertPoint(LoopBB);
PHINode *Counter = Builder.CreatePHI(Builder.getInt32Ty(), 2);
- //// size pow2k = 1;
+ // size pow2k = 1;
PHINode *Pow2K = Builder.CreatePHI(Builder.getInt32Ty(), 2);
Counter->addIncoming(llvm::ConstantInt::get(Builder.getInt32Ty(), 0),
InputBB);
Pow2K->addIncoming(llvm::ConstantInt::get(Builder.getInt32Ty(), 1),
InputBB);
- //// for (size i = n - 1; i >= 2 ^ k; --i)
- //// tmp[i] op= tmp[i-pow2k];
+ // for (size i = n - 1; i >= 2 ^ k; --i)
+ // tmp[i] op= tmp[i-pow2k];
llvm::BasicBlock *InnerLoopBB =
BasicBlock::Create(CurFn->getContext(), "omp.inner.log.scan.body");
llvm::BasicBlock *InnerExitBB =
@@ -4254,7 +4254,7 @@ OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::emitScanReduction(
Builder.CreateCondBr(CmpI, InnerLoopBB, InnerExitBB);
emitBlock(InnerLoopBB, CurFn);
Builder.SetInsertPoint(InnerLoopBB);
- auto *IVal = Builder.CreatePHI(Builder.getInt32Ty(), 2);
+ PHINode *IVal = Builder.CreatePHI(Builder.getInt32Ty(), 2);
IVal->addIncoming(NMin1, LoopBB);
for (ReductionInfo RedInfo : ReductionInfos) {
Value *ReductionVal = RedInfo.PrivateVariable;
@@ -4329,9 +4329,9 @@ Error OpenMPIRBuilder::emitScanBasedDirectiveIR(
// buffer[i] = red;
// }
ScanRedInfo->OMPFirstScanLoop = true;
- auto Result = InputLoopGen();
- if (Result)
- return Result;
+ Error Err = InputLoopGen();
+ if (Err)
+ return Err;
}
{
// Emit loop with scan phase:
@@ -4340,9 +4340,9 @@ Error OpenMPIRBuilder::emitScanBasedDirectiveIR(
// <scan phase>;
// }
ScanRedInfo->OMPFirstScanLoop = false;
- auto Result = ScanLoopGen(Builder.saveIP());
- if (Result)
- return Result;
+ Error Err = ScanLoopGen(Builder.saveIP());
+ if (Err)
+ return Err;
}
return Error::success();
}
@@ -4499,9 +4499,9 @@ OpenMPIRBuilder::createCanonicalScanLoops(
};
const auto &&InputLoopGen = [&]() -> Error {
- auto LoopInfo = createCanonicalLoop(Builder.saveIP(), BodyGen, Start, Stop,
- Step, IsSigned, InclusiveStop,
- ComputeIP, Name, true, ScanRedInfo);
+ Expected<CanonicalLoopInfo *> LoopInfo = createCanonicalLoop(
+ Builder.saveIP(), BodyGen, Start, Stop, Step, IsSigned, InclusiveStop,
+ ComputeIP, Name, true, ScanRedInfo);
if (!LoopInfo)
return LoopInfo.takeError();
Result.push_back(*LoopInfo);
@@ -4509,7 +4509,7 @@ OpenMPIRBuilder::createCanonicalScanLoops(
return Error::success();
};
const auto &&ScanLoopGen = [&](LocationDescription Loc) -> Error {
- auto LoopInfo =
+ Expected<CanonicalLoopInfo *> LoopInfo =
createCanonicalLoop(Loc, BodyGen, Start, Stop, Step, IsSigned,
InclusiveStop, ComputeIP, Name, true, ScanRedInfo);
if (!LoopInfo)
diff --git a/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp b/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp
index d9b2b4fd5a8ef..b7a060bb3563d 100644
--- a/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp
+++ b/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp
@@ -1,4 +1,3 @@
-
//===- llvm/unittest/IR/OpenMPIRBuilderTest.cpp - OpenMPIRBuilder tests ---===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
@@ -5473,22 +5472,22 @@ TEST_F(OpenMPIRBuilderTest, ScanReduction) {
unsigned NumLog = 0;
unsigned NumCeil = 0;
for (Instruction &I : instructions(F)) {
- if (isa<CallInst>(I)) {
- CallInst *Call = dyn_cast<CallInst>(&I);
- auto Name = Call->getCalledFunction()->getName();
- if (Name.equals_insensitive("malloc")) {
- NumMallocs += 1;
- } else if (Name.equals_insensitive("free")) {
- NumFrees += 1;
- } else if (Name.equals_insensitive("__kmpc_masked")) {
- NumMasked += 1;
- } else if (Name.equals_insensitive("__kmpc_end_masked")) {
- NumEndMasked += 1;
- } else if (Name.equals_insensitive("llvm.log2.f64")) {
- NumLog += 1;
- } else if (Name.equals_insensitive("llvm.ceil.f64")) {
- NumCeil += 1;
- }
+ if (!isa<CallInst>(I))
+ continue;
+ CallInst *Call = dyn_cast<CallInst>(&I);
+ StringRef Name = Call->getCalledFunction()->getName();
+ if (Name.equals_insensitive("malloc")) {
+ NumMallocs += 1;
+ } else if (Name.equals_insensitive("free")) {
+ NumFrees += 1;
+ } else if (Name.equals_insensitive("__kmpc_masked")) {
+ NumMasked += 1;
+ } else if (Name.equals_insensitive("__kmpc_end_masked")) {
+ NumEndMasked += 1;
+ } else if (Name.equals_insensitive("llvm.log2.f64")) {
+ NumLog += 1;
+ } else if (Name.equals_insensitive("llvm.ceil.f64")) {
+ NumCeil += 1;
}
}
EXPECT_EQ(NumBodiesGenerated, 2U);
>From 91b069aeecd93d39ff021565a468ff8fca1812f1 Mon Sep 17 00:00:00 2001
From: Anchu Rajendran <asudhaku at amd.com>
Date: Thu, 7 Aug 2025 13:59:54 -0500
Subject: [PATCH 4/4] R4: Addressing review comments
---
.../llvm/Frontend/OpenMP/OMPIRBuilder.h | 58 ++++++++++---------
llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp | 11 ++--
2 files changed, 35 insertions(+), 34 deletions(-)
diff --git a/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h b/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h
index 7423ec0991e69..72c2192dc816a 100644
--- a/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h
+++ b/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h
@@ -512,7 +512,6 @@ class OpenMPIRBuilder {
return allocaInst;
}
};
-
/// Initialize the internal state, this will put structures types and
/// potentially other helpers into the underlying module. Must be called
/// before any other method and only once! This internal state includes types
@@ -782,8 +781,8 @@ class OpenMPIRBuilder {
/// at the outermost loop of a loop nest. If not set,
/// defaults to the preheader of the generated loop.
/// \param Name Base name used to derive BB and instruction names.
- /// \param ScanRedInfo Pointer to the ScanInfo objected created using
- /// `ScanInfoInitialize`.
+ /// \param ScanRedInfo Pointer to the ScanInfo objected created using
+ /// `ScanInfoInitialize`.
///
/// \returns A vector containing Loop Info of Input Loop and Scan Loop.
Expected<SmallVector<llvm::CanonicalLoopInfo *>> createCanonicalScanLoops(
@@ -861,7 +860,7 @@ class OpenMPIRBuilder {
/// \param Name Base name used to derive BB and instruction names.
/// \param InScan Whether loop has a scan reduction specified.
/// \param ScanRedInfo Pointer to the ScanInfo objected created using
- /// `ScanInfoInitialize`.
+ /// `ScanInfoInitialize`.
///
/// \returns An object representing the created control flow structure which
/// can be used for loop-associated directives.
@@ -1604,9 +1603,9 @@ class OpenMPIRBuilder {
/// Helper function for CreateCanonicalScanLoops to create InputLoop
/// in the firstGen and Scan Loop in the SecondGen
/// \param InputLoopGen Callback for generating the loop for input phase
- /// \param ScanLoopGen Callback for generating the loop for scan phase
+ /// \param ScanLoopGen Callback for generating the loop for scan phase
/// \param ScanRedInfo Pointer to the ScanInfo objected created using
- /// `ScanInfoInitialize`.
+ /// `ScanInfoInitialize`.
///
/// \return error if any produced, else return success.
Error emitScanBasedDirectiveIR(
@@ -1615,15 +1614,16 @@ class OpenMPIRBuilder {
ScanInfo *ScanRedInfo);
/// Creates the basic blocks required for scan reduction.
- /// \param ScanRedInfo Pointer to the ScanInfo objected created using
- /// `ScanInfoInitialize`.
+ /// \param ScanRedInfo Pointer to the ScanInfo objected created using
+ /// `ScanInfoInitialize`.
void createScanBBs(ScanInfo *ScanRedInfo);
/// Dynamically allocates the buffer needed for scan reduction.
- /// \param AllocaIP The IP where possibly-shared pointer of buffer needs to be
- /// declared. \param ScanVars Scan Variables.
- /// \param ScanRedInfo Pointer to the ScanInfo objected created using
- /// `ScanInfoInitialize`.
+ /// \param AllocaIP The IP where possibly-shared pointer of buffer needs to
+ /// be declared.
+ /// \param ScanVars Scan Variables.
+ /// \param ScanRedInfo Pointer to the ScanInfo objected created using
+ /// `ScanInfoInitialize`.
///
/// \return error if any produced, else return success.
Error emitScanBasedDirectiveDeclsIR(InsertPointTy AllocaIP,
@@ -1633,8 +1633,8 @@ class OpenMPIRBuilder {
/// Copies the result back to the reduction variable.
/// \param ReductionInfos Array type containing the ReductionOps.
- /// \param ScanRedInfo Pointer to the ScanInfo objected created using
- /// `ScanInfoInitialize`.
+ /// \param ScanRedInfo Pointer to the ScanInfo objected created using
+ /// `ScanInfoInitialize`.
///
/// \return error if any produced, else return success.
Error emitScanBasedDirectiveFinalsIR(
@@ -2268,6 +2268,8 @@ class OpenMPIRBuilder {
/// Collection of owned canonical loop objects that eventually need to be
/// free'd.
std::forward_list<CanonicalLoopInfo> LoopInfos;
+
+ /// Collection of owned ScanInfo objects that eventually need to be free'd.
std::forward_list<ScanInfo> ScanInfos;
/// Add a new region that will be outlined later.
@@ -2730,16 +2732,17 @@ class OpenMPIRBuilder {
/// and scan loop returned by `CreateCanonicalScanLoops`. The following
/// is the code that is generated, `buffer` and `span` are expected to be
/// populated before executing the generated code.
- ///
- /// for (int k = 0; k != ceil(log2(span)); ++k) {
- /// i=pow(2,k)
- /// for (size cnt = last_iter; cnt >= i; --cnt)
- /// buffer[cnt] op= buffer[cnt-i];
- /// }
+ /// \code{c}
+ /// for (int k = 0; k != ceil(log2(span)); ++k) {
+ /// i=pow(2,k)
+ /// for (size cnt = last_iter; cnt >= i; --cnt)
+ /// buffer[cnt] op= buffer[cnt-i];
+ /// }
+ /// \endcode
/// \param Loc The insert and source location description.
/// \param ReductionInfos Array type containing the ReductionOps.
- /// \param ScanRedInfo Pointer to the ScanInfo objected created using
- /// `ScanInfoInitialize`.
+ /// \param ScanRedInfo Pointer to the ScanInfo objected created using
+ /// `ScanInfoInitialize`.
///
/// \returns The insertion position *after* the masked.
InsertPointOrErrorTy emitScanReduction(
@@ -2752,12 +2755,12 @@ class OpenMPIRBuilder {
/// is executed, 2. whether exclusive or inclusive scan is used.
///
/// \param Loc The insert and source location description.
- /// \param AllocaIP The IP where the temporary buffer for scan reduction
- // needs to be allocated.
- /// \param ScanVars Scan Variables.
+ /// \param AllocaIP The IP where the temporary buffer for scan reduction
+ // needs to be allocated.
+ /// \param ScanVars Scan Variables.
/// \param IsInclusive Whether it is an inclusive or exclusive scan.
- /// \param ScanRedInfo Pointer to the ScanInfo objected created using
- /// `ScanInfoInitialize`.
+ /// \param ScanRedInfo Pointer to the ScanInfo objected created using
+ /// `ScanInfoInitialize`.
///
/// \returns The insertion position *after* the scan.
InsertPointOrErrorTy createScan(const LocationDescription &Loc,
@@ -2765,6 +2768,7 @@ class OpenMPIRBuilder {
ArrayRef<llvm::Value *> ScanVars,
ArrayRef<llvm::Type *> ScanVarsType,
bool IsInclusive, ScanInfo *ScanRedInfo);
+
/// Generator for '#omp critical'
///
/// \param Loc The insert and source location description.
diff --git a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
index 7f5aec40d6bce..9f90411b54d00 100644
--- a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
+++ b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
@@ -1,4 +1,3 @@
-
//===- OpenMPIRBuilder.cpp - Builder for LLVM-IR for OpenMP directives ----===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
@@ -60,8 +59,6 @@
#include "llvm/Transforms/Utils/LoopPeel.h"
#include "llvm/Transforms/Utils/UnrollLoop.h"
-#include <cassert>
-#include <cstddef>
#include <cstdint>
#include <optional>
@@ -4024,10 +4021,10 @@ OpenMPIRBuilder::createMasked(const LocationDescription &Loc,
/*Conditional*/ true, /*hasFinalize*/ true);
}
-llvm::CallInst *emitNoUnwindRuntimeCall(IRBuilder<> &Builder,
- llvm::FunctionCallee Callee,
- ArrayRef<llvm::Value *> Args,
- const llvm::Twine &Name) {
+static llvm::CallInst *emitNoUnwindRuntimeCall(IRBuilder<> &Builder,
+ llvm::FunctionCallee Callee,
+ ArrayRef<llvm::Value *> Args,
+ const llvm::Twine &Name) {
llvm::CallInst *Call = Builder.CreateCall(
Callee, Args, SmallVector<llvm::OperandBundleDef, 1>(), Name);
Call->setDoesNotThrow();
More information about the llvm-commits
mailing list