[polly] r219003 - [Refactor] Generalize parallel code generation
Johannes Doerfert
doerfert at cs.uni-saarland.de
Fri Oct 3 12:10:13 PDT 2014
Author: jdoerfert
Date: Fri Oct 3 14:10:13 2014
New Revision: 219003
URL: http://llvm.org/viewvc/llvm-project?rev=219003&view=rev
Log:
[Refactor] Generalize parallel code generation
+ Generalized function names and comments
+ Removed OpenMP (omp) from the names and comments
+ Use common names (non OpenMP specific) for runtime library call creation
methodes
+ Commented the parallel code generator and all its member functions
+ Refactored some values and methodes
Differential Revision: http://reviews.llvm.org/D4990
Modified:
polly/trunk/include/polly/CodeGen/LoopGenerators.h
polly/trunk/lib/CodeGen/CodeGeneration.cpp
polly/trunk/lib/CodeGen/LoopGenerators.cpp
polly/trunk/test/Cloog/CodeGen/OpenMP/20120330-argument-use.ll
polly/trunk/test/Cloog/CodeGen/OpenMP/clastvar_after_parallel_loop.ll
polly/trunk/test/Cloog/CodeGen/OpenMP/copy_in_argument.ll
polly/trunk/test/Cloog/CodeGen/OpenMP/copy_in_temporary.ll
polly/trunk/test/Cloog/CodeGen/OpenMP/extract_memref.ll
polly/trunk/test/Cloog/CodeGen/OpenMP/param_referenced_in_stmt.ll
polly/trunk/test/Cloog/CodeGen/OpenMP/simple_nested_loop.ll
polly/trunk/test/Cloog/CodeGen/OpenMP/structnames.ll
Modified: polly/trunk/include/polly/CodeGen/LoopGenerators.h
URL: http://llvm.org/viewvc/llvm-project/polly/trunk/include/polly/CodeGen/LoopGenerators.h?rev=219003&r1=219002&r2=219003&view=diff
==============================================================================
--- polly/trunk/include/polly/CodeGen/LoopGenerators.h (original)
+++ polly/trunk/include/polly/CodeGen/LoopGenerators.h Fri Oct 3 14:10:13 2014
@@ -13,10 +13,11 @@
//===----------------------------------------------------------------------===//
#ifndef POLLY_LOOP_GENERATORS_H
#define POLLY_LOOP_GENERATORS_H
+
#include "polly/CodeGen/IRBuilder.h"
-#include "llvm/ADT/SetVector.h"
-#include <map>
+#include "llvm/IR/ValueMap.h"
+#include "llvm/ADT/SetVector.h"
namespace llvm {
class Value;
@@ -54,73 +55,160 @@ Value *createLoop(Value *LowerBound, Val
ScopAnnotator *Annotator = NULL, bool Parallel = false,
bool UseGuard = true);
-class OMPGenerator {
+/// @brief The ParallelLoopGenerator allows to create parallelized loops
+///
+/// To parallelize a loop, we perform the following steps:
+/// o Generate a subfunction which will hold the loop body.
+/// o Create a struct to hold all outer values needed in the loop body.
+/// o Create calls to a runtime library to achieve the actual parallelism.
+/// These calls will spawn and join threads, define how the work (here the
+/// iterations) are distributed between them and make sure each has access
+/// to the struct holding all needed values.
+///
+/// At the moment we support only one parallel runtime, OpenMP.
+///
+/// If we parallelize the outer loop of the following loop nest,
+///
+/// S0;
+/// for (int i = 0; i < N; i++)
+/// for (int j = 0; j < M; j++)
+/// S1(i, j);
+/// S2;
+///
+/// we will generate the following code (with different runtime function names):
+///
+/// S0;
+/// auto *values = storeValuesIntoStruct();
+/// // Execute subfunction with multiple threads
+/// spawn_threads(subfunction, values);
+/// join_threads();
+/// S2;
+///
+/// // This function is executed in parallel by different threads
+/// void subfunction(values) {
+/// while (auto *WorkItem = getWorkItem()) {
+/// int LB = WorkItem.begin();
+/// int UB = WorkItem.end();
+/// for (int i = LB; i < UB; i++)
+/// for (int j = 0; j < M; j++)
+/// S1(i, j);
+/// }
+/// cleanup_thread();
+/// }
+class ParallelLoopGenerator {
public:
- typedef std::map<Value *, Value *> ValueToValueMapTy;
-
- OMPGenerator(PollyIRBuilder &Builder, Pass *P) : Builder(Builder), P(P) {}
+ using ValueToValueMapTy = llvm::ValueMap<Value *, Value *>;
- /// @brief Create an OpenMP parallel loop.
- ///
- ///
- /// @param LowerBound The starting value of the induction variable.
- /// @param UpperBound The upper bound of the induction variable.
- /// @param Stride The value by which the induction variable is
- /// incremented.
- ///
- /// @param UsedValues A set of LLVM-IR Values that should be available to
- /// the new loop body.
- /// @param VMap This map is filled by createParallelLoop(). It
- /// maps the values in UsedValues to Values through which
- /// their content is available within the loop body.
- /// @param LoopBody A pointer to an iterator that is set to point to the
- /// body of the created loop. It should be used to insert
- /// instructions that form the actual loop body.
- ///
- /// @return Value* The newly created induction variable for this loop.
- Value *createParallelLoop(Value *LowerBound, Value *UpperBound, Value *Stride,
- SetVector<Value *> &UsedValues,
- ValueToValueMapTy &VMap,
+ /// @brief Create a parallel loop generator for the current function.
+ ParallelLoopGenerator(PollyIRBuilder &Builder, Pass *P, LoopInfo &LI,
+ DominatorTree &DT, const DataLayout &DL)
+ : Builder(Builder), P(P), LI(LI), DT(DT), DL(DL),
+ LongType(
+ Type::getIntNTy(Builder.getContext(), DL.getPointerSizeInBits())),
+ M(Builder.GetInsertBlock()->getParent()->getParent()) {}
+
+ /// @brief Create a parallel loop
+ ///
+ ///
+ /// @param LB The lower bound for the loop we parallelize.
+ /// @param UB The upper bound for the loop we parallelize.
+ /// @param Stride The stride of the loop we parallelize.
+ /// @param Values A set of LLVM-IR Values that should be available in
+ /// the new loop body.
+ /// @param VMap A map to allow outside access to the new versions of
+ /// the values in @p Values.
+ /// @param LoopBody A pointer to an iterator that is set to point to the
+ /// body of the created loop. It should be used to insert
+ /// instructions that form the actual loop body.
+ ///
+ /// @return The newly created induction variable for this loop.
+ Value *createParallelLoop(Value *LB, Value *UB, Value *Stride,
+ SetVector<Value *> &Values, ValueToValueMapTy &VMap,
BasicBlock::iterator *LoopBody);
private:
+ /// @brief The IR builder we use to create instructions.
PollyIRBuilder &Builder;
+
+ /// @brief A pass pointer to update analysis information.
Pass *P;
- IntegerType *getIntPtrTy();
- Module *getModule();
+ /// @brief The loop info of the current function we need to update.
+ LoopInfo &LI;
+
+ /// @brief The dominance tree of the current function we need to update.
+ DominatorTree &DT;
+
+ /// @brief The target layout to get the right size for types.
+ const DataLayout &DL;
+
+ /// @brief The type of a "long" on this hardware used for backend calls.
+ Type *LongType;
- void createCallParallelLoopStart(Value *SubFunction, Value *SubfunctionParam,
- Value *NumberOfThreads, Value *LowerBound,
- Value *UpperBound, Value *Stride);
- Value *createCallLoopNext(Value *LowerBoundPtr, Value *UpperBoundPtr);
- void createCallParallelEnd();
- void createCallLoopEndNowait();
-
- Value *loadValuesIntoStruct(SetVector<Value *> &Values);
- void extractValuesFromStruct(SetVector<Value *> OldValues, Value *Struct,
- ValueToValueMapTy &Map);
-
- /// @brief Create the OpenMP subfunction.
- ///
- /// @param Stride The value by which the induction variable is
- /// incremented.
- /// @param Struct The structure that is used to make Values available to
- /// the loop body.
- /// @param UsedValues A set of LLVM-IR Values that should be available to
- /// the new loop body.
- /// @param VMap This map that is filled by createSubfunction(). It
- /// maps the values in UsedValues to Values through which
- /// their content is available within the loop body.
- /// @param SubFunction The newly created SubFunction is returned here.
- ///
- /// @return Value* The newly created induction variable.
- Value *createSubfunction(Value *Stride, Value *Struct,
- SetVector<Value *> UsedValues,
- ValueToValueMapTy &VMap, Function **SubFunction);
+ /// @brief The current module
+ Module *M;
- /// @brief Create the definition of the OpenMP subfunction.
- Function *createSubfunctionDefinition();
+ /// @brief Create a runtime library call to spawn the worker threads.
+ ///
+ /// @param SubFn The subfunction which holds the loop body.
+ /// @param SubFnParam The parameter for the subfunction (basically the struct
+ /// filled with the outside values).
+ /// @param LB The lower bound for the loop we parallelize.
+ /// @param UB The upper bound for the loop we parallelize.
+ /// @param Stride The stride of the loop we parallelize.
+ void createCallSpawnThreads(Value *SubFn, Value *SubFnParam, Value *LB,
+ Value *UB, Value *Stride);
+
+ /// @brief Create a runtime library call to join the worker threads.
+ void createCallJoinThreads();
+
+ /// @brief Create a runtime library call to get the next work item.
+ ///
+ /// @param LBPtr A pointer value to store the work item begin in.
+ /// @param UBPtr A pointer value to store the work item end in.
+ ///
+ /// @returns A true value if the work item is not empty.
+ Value *createCallGetWorkItem(Value *LBPtr, Value *UBPtr);
+
+ /// @brief Create a runtime library call to allow cleanup of the thread.
+ ///
+ /// @note This function is called right before the thread will exit the
+ /// subfunction and only if the runtime system depends depends on it.
+ void createCallCleanupThread();
+
+ /// @brief Create a struct for all @p Values and store them in there.
+ ///
+ /// @param Values The values which should be stored in the struct.
+ ///
+ /// @return The created struct.
+ Value *storeValuesIntoStruct(SetVector<Value *> &Values);
+
+ /// @brief Extract all values from the @p Struct and construct the mapping.
+ ///
+ /// @param Values The values which were stored in the struct.
+ /// @param Struct The struct holding all the values in @p Values.
+ /// @param VMap A map to associate every element of @p Values with the
+ /// new llvm value loaded from the @p Struct.
+ void extractValuesFromStruct(SetVector<Value *> Values, Value *Struct,
+ ValueToValueMapTy &VMap);
+
+ /// @brief Create the definition of the parallel subfunction.
+ Function *createSubFnDefinition();
+
+ /// @brief Create the parallel subfunction.
+ ///
+ /// @param Stride The induction variable increment.
+ /// @param Struct A struct holding all values in @p Values.
+ /// @param Values A set of LLVM-IR Values that should be available in
+ /// the new loop body.
+ /// @param VMap A map to allow outside access to the new versions of
+ /// the values in @p Values.
+ /// @param SubFn The newly created subfunction is returned here.
+ ///
+ /// @return The newly created induction variable.
+ Value *createSubFn(Value *Stride, Value *Struct,
+ SetVector<Value *> UsedValues, ValueToValueMapTy &VMap,
+ Function **SubFn);
};
} // end namespace polly
#endif
Modified: polly/trunk/lib/CodeGen/CodeGeneration.cpp
URL: http://llvm.org/viewvc/llvm-project/polly/trunk/lib/CodeGen/CodeGeneration.cpp?rev=219003&r1=219002&r2=219003&view=diff
==============================================================================
--- polly/trunk/lib/CodeGen/CodeGeneration.cpp (original)
+++ polly/trunk/lib/CodeGen/CodeGeneration.cpp Fri Oct 3 14:10:13 2014
@@ -246,6 +246,7 @@ private:
LoopInfo &LI;
ScalarEvolution &SE;
DominatorTree &DT;
+ const DataLayout &DL;
// The Builder specifies the current location to code generate at.
PollyIRBuilder &Builder;
@@ -316,7 +317,7 @@ private:
/// @brief Update ClastVars and ValueMap according to a value map.
///
/// @param VMap A map from old to new values.
- void updateWithValueMap(OMPGenerator::ValueToValueMapTy &VMap);
+ void updateWithValueMap(ParallelLoopGenerator::ValueToValueMapTy &VMap);
/// @brief Create an OpenMP parallel for loop.
///
@@ -579,8 +580,8 @@ SetVector<Value *> ClastStmtCodeGen::get
return Values;
}
-void
-ClastStmtCodeGen::updateWithValueMap(OMPGenerator::ValueToValueMapTy &VMap) {
+void ClastStmtCodeGen::updateWithValueMap(
+ ParallelLoopGenerator::ValueToValueMapTy &VMap) {
std::set<Value *> Inserted;
for (const auto &I : ClastVars) {
@@ -611,8 +612,8 @@ void ClastStmtCodeGen::codegenForOpenMP(
BasicBlock::iterator LoopBody;
IntegerType *IntPtrTy = getIntPtrTy();
SetVector<Value *> Values;
- OMPGenerator::ValueToValueMapTy VMap;
- OMPGenerator OMPGen(Builder, P);
+ ParallelLoopGenerator::ValueToValueMapTy VMap;
+ ParallelLoopGenerator OMPGen(Builder, P, LI, DT, DL);
Stride = Builder.getInt(APInt_from_MPZ(For->stride));
Stride = Builder.CreateSExtOrBitCast(Stride, IntPtrTy);
@@ -1025,7 +1026,8 @@ void ClastStmtCodeGen::codegen(const cla
ClastStmtCodeGen::ClastStmtCodeGen(Scop *scop, PollyIRBuilder &B, Pass *P)
: S(scop), P(P), LI(P->getAnalysis<LoopInfo>()),
SE(P->getAnalysis<ScalarEvolution>()),
- DT(P->getAnalysis<DominatorTreeWrapperPass>().getDomTree()), Builder(B),
+ DT(P->getAnalysis<DominatorTreeWrapperPass>().getDomTree()),
+ DL(P->getAnalysis<DataLayoutPass>().getDataLayout()), Builder(B),
ExpGen(Builder, ClastVars) {}
namespace {
@@ -1074,9 +1076,11 @@ public:
AU.addRequired<ScopDetection>();
AU.addRequired<ScopInfo>();
AU.addRequired<DataLayoutPass>();
+ AU.addRequired<DataLayoutPass>();
AU.addRequired<LoopInfo>();
AU.addPreserved<CloogInfo>();
+ AU.addPreserved<DataLayoutPass>();
AU.addPreserved<Dependences>();
AU.addPreserved<LoopInfo>();
AU.addPreserved<DominatorTreeWrapperPass>();
@@ -1103,6 +1107,7 @@ INITIALIZE_PASS_DEPENDENCY(CloogInfo);
INITIALIZE_PASS_DEPENDENCY(Dependences);
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass);
INITIALIZE_PASS_DEPENDENCY(RegionInfoPass);
+INITIALIZE_PASS_DEPENDENCY(DataLayoutPass);
INITIALIZE_PASS_DEPENDENCY(ScalarEvolution);
INITIALIZE_PASS_DEPENDENCY(ScopDetection);
INITIALIZE_PASS_DEPENDENCY(DataLayoutPass);
Modified: polly/trunk/lib/CodeGen/LoopGenerators.cpp
URL: http://llvm.org/viewvc/llvm-project/polly/trunk/lib/CodeGen/LoopGenerators.cpp?rev=219003&r1=219002&r2=219003&view=diff
==============================================================================
--- polly/trunk/lib/CodeGen/LoopGenerators.cpp (original)
+++ polly/trunk/lib/CodeGen/LoopGenerators.cpp Fri Oct 3 14:10:13 2014
@@ -7,8 +7,7 @@
//
//===----------------------------------------------------------------------===//
//
-// This file contains functions to create scalar and OpenMP parallel loops
-// as LLVM-IR.
+// This file contains functions to create scalar and parallel loops as LLVM-IR.
//
//===----------------------------------------------------------------------===//
@@ -138,61 +137,84 @@ Value *polly::createLoop(Value *LB, Valu
return IV;
}
-void OMPGenerator::createCallParallelLoopStart(
- Value *SubFunction, Value *SubfunctionParam, Value *NumberOfThreads,
- Value *LowerBound, Value *UpperBound, Value *Stride) {
- Module *M = getModule();
- const char *Name = "GOMP_parallel_loop_runtime_start";
+Value *ParallelLoopGenerator::createParallelLoop(
+ Value *LB, Value *UB, Value *Stride, SetVector<Value *> &UsedValues,
+ ValueToValueMapTy &Map, BasicBlock::iterator *LoopBody) {
+ Value *Struct, *IV, *SubFnParam;
+ Function *SubFn;
+
+ Struct = storeValuesIntoStruct(UsedValues);
+
+ BasicBlock::iterator BeforeLoop = Builder.GetInsertPoint();
+ IV = createSubFn(Stride, Struct, UsedValues, Map, &SubFn);
+ *LoopBody = Builder.GetInsertPoint();
+ Builder.SetInsertPoint(BeforeLoop);
+
+ SubFnParam = Builder.CreateBitCast(Struct, Builder.getInt8PtrTy(),
+ "polly.par.userContext");
+
+ // Add one as the upper bound provided by openmp is a < comparison
+ // whereas the codegenForSequential function creates a <= comparison.
+ UB = Builder.CreateAdd(UB, ConstantInt::get(LongType, 1));
+
+ // Tell the runtime we start a parallel loop
+ createCallSpawnThreads(SubFn, SubFnParam, LB, UB, Stride);
+ Builder.CreateCall(SubFn, SubFnParam);
+ createCallJoinThreads();
+
+ return IV;
+}
+
+void ParallelLoopGenerator::createCallSpawnThreads(Value *SubFn,
+ Value *SubFnParam, Value *LB,
+ Value *UB, Value *Stride) {
+ const std::string Name = "GOMP_parallel_loop_runtime_start";
+
Function *F = M->getFunction(Name);
// If F is not available, declare it.
if (!F) {
- Type *LongTy = getIntPtrTy();
GlobalValue::LinkageTypes Linkage = Function::ExternalLinkage;
Type *Params[] = {PointerType::getUnqual(FunctionType::get(
Builder.getVoidTy(), Builder.getInt8PtrTy(), false)),
- Builder.getInt8PtrTy(), Builder.getInt32Ty(), LongTy,
- LongTy, LongTy};
+ Builder.getInt8PtrTy(), LongType, LongType, LongType,
+ LongType};
FunctionType *Ty = FunctionType::get(Builder.getVoidTy(), Params, false);
F = Function::Create(Ty, Linkage, Name, M);
}
- Value *Args[] = {SubFunction, SubfunctionParam, NumberOfThreads,
- LowerBound, UpperBound, Stride};
+ Value *NumberOfThreads = ConstantInt::get(LongType, 0);
+ Value *Args[] = {SubFn, SubFnParam, NumberOfThreads, LB, UB, Stride};
Builder.CreateCall(F, Args);
}
-Value *OMPGenerator::createCallLoopNext(Value *LowerBoundPtr,
- Value *UpperBoundPtr) {
- Module *M = getModule();
- const char *Name = "GOMP_loop_runtime_next";
+Value *ParallelLoopGenerator::createCallGetWorkItem(Value *LBPtr,
+ Value *UBPtr) {
+ const std::string Name = "GOMP_loop_runtime_next";
+
Function *F = M->getFunction(Name);
// If F is not available, declare it.
if (!F) {
- Type *LongPtrTy = PointerType::getUnqual(getIntPtrTy());
GlobalValue::LinkageTypes Linkage = Function::ExternalLinkage;
-
- Type *Params[] = {LongPtrTy, LongPtrTy};
-
+ Type *Params[] = {LongType->getPointerTo(), LongType->getPointerTo()};
FunctionType *Ty = FunctionType::get(Builder.getInt8Ty(), Params, false);
F = Function::Create(Ty, Linkage, Name, M);
}
- Value *Args[] = {LowerBoundPtr, UpperBoundPtr};
-
+ Value *Args[] = {LBPtr, UBPtr};
Value *Return = Builder.CreateCall(F, Args);
Return = Builder.CreateICmpNE(
Return, Builder.CreateZExt(Builder.getFalse(), Return->getType()));
return Return;
}
-void OMPGenerator::createCallParallelEnd() {
- const char *Name = "GOMP_parallel_end";
- Module *M = getModule();
+void ParallelLoopGenerator::createCallJoinThreads() {
+ const std::string Name = "GOMP_parallel_end";
+
Function *F = M->getFunction(Name);
// If F is not available, declare it.
@@ -206,9 +228,9 @@ void OMPGenerator::createCallParallelEnd
Builder.CreateCall(F);
}
-void OMPGenerator::createCallLoopEndNowait() {
- const char *Name = "GOMP_loop_end_nowait";
- Module *M = getModule();
+void ParallelLoopGenerator::createCallCleanupThread() {
+ const std::string Name = "GOMP_loop_end_nowait";
+
Function *F = M->getFunction(Name);
// If F is not available, declare it.
@@ -222,39 +244,32 @@ void OMPGenerator::createCallLoopEndNowa
Builder.CreateCall(F);
}
-IntegerType *OMPGenerator::getIntPtrTy() {
- return P->getAnalysis<DataLayoutPass>().getDataLayout().getIntPtrType(
- Builder.getContext());
-}
-
-Module *OMPGenerator::getModule() {
- return Builder.GetInsertBlock()->getParent()->getParent();
-}
-
-Function *OMPGenerator::createSubfunctionDefinition() {
- Module *M = getModule();
+Function *ParallelLoopGenerator::createSubFnDefinition() {
Function *F = Builder.GetInsertBlock()->getParent();
std::vector<Type *> Arguments(1, Builder.getInt8PtrTy());
FunctionType *FT = FunctionType::get(Builder.getVoidTy(), Arguments, false);
- Function *FN = Function::Create(FT, Function::InternalLinkage,
- F->getName() + ".omp_subfn", M);
+ Function *SubFn = Function::Create(FT, Function::InternalLinkage,
+ F->getName() + ".polly.subfn", M);
+
// Do not run any polly pass on the new function.
- FN->addFnAttr(PollySkipFnAttr);
+ SubFn->addFnAttr(PollySkipFnAttr);
- Function::arg_iterator AI = FN->arg_begin();
- AI->setName("omp.userContext");
+ Function::arg_iterator AI = SubFn->arg_begin();
+ AI->setName("polly.par.userContext");
- return FN;
+ return SubFn;
}
-Value *OMPGenerator::loadValuesIntoStruct(SetVector<Value *> &Values) {
- std::vector<Type *> Members;
+Value *
+ParallelLoopGenerator::storeValuesIntoStruct(SetVector<Value *> &Values) {
+ SmallVector<Type *, 8> Members;
for (Value *V : Values)
Members.push_back(V->getType());
StructType *Ty = StructType::get(Builder.getContext(), Members);
- Value *Struct = Builder.CreateAlloca(Ty, 0, "omp.userContext");
+ Value *Struct =
+ new AllocaInst(Ty, 0, "polly.par.userContext", Builder.GetInsertPoint());
for (unsigned i = 0; i < Values.size(); i++) {
Value *Address = Builder.CreateStructGEP(Struct, i);
@@ -264,121 +279,79 @@ Value *OMPGenerator::loadValuesIntoStruc
return Struct;
}
-void OMPGenerator::extractValuesFromStruct(SetVector<Value *> OldValues,
- Value *Struct,
- ValueToValueMapTy &Map) {
+void ParallelLoopGenerator::extractValuesFromStruct(
+ SetVector<Value *> OldValues, Value *Struct, ValueToValueMapTy &Map) {
for (unsigned i = 0; i < OldValues.size(); i++) {
Value *Address = Builder.CreateStructGEP(Struct, i);
Value *NewValue = Builder.CreateLoad(Address);
- Map.insert(std::make_pair(OldValues[i], NewValue));
+ Map[OldValues[i]] = NewValue;
}
}
-Value *OMPGenerator::createSubfunction(Value *Stride, Value *StructData,
- SetVector<Value *> Data,
- ValueToValueMapTy &Map,
- Function **SubFunction) {
- Function *FN = createSubfunctionDefinition();
-
- BasicBlock *PrevBB, *HeaderBB, *ExitBB, *CheckNextBB, *LoadIVBoundsBB,
- *AfterBB;
- Value *LowerBoundPtr, *UpperBoundPtr, *UserContext, *Ret1, *HasNextSchedule,
- *LowerBound, *UpperBound, *IV;
- Type *IntPtrTy = getIntPtrTy();
- LLVMContext &Context = FN->getContext();
+Value *ParallelLoopGenerator::createSubFn(Value *Stride, Value *StructData,
+ SetVector<Value *> Data,
+ ValueToValueMapTy &Map,
+ Function **SubFnPtr) {
+ BasicBlock *PrevBB, *HeaderBB, *ExitBB, *CheckNextBB, *PreHeaderBB, *AfterBB;
+ Value *LBPtr, *UBPtr, *UserContext, *Ret1, *HasNextSchedule, *LB, *UB, *IV;
+ Function *SubFn = createSubFnDefinition();
+ LLVMContext &Context = SubFn->getContext();
// Store the previous basic block.
PrevBB = Builder.GetInsertBlock();
// Create basic blocks.
- HeaderBB = BasicBlock::Create(Context, "omp.setup", FN);
- ExitBB = BasicBlock::Create(Context, "omp.exit", FN);
- CheckNextBB = BasicBlock::Create(Context, "omp.checkNext", FN);
- LoadIVBoundsBB = BasicBlock::Create(Context, "omp.loadIVBounds", FN);
+ HeaderBB = BasicBlock::Create(Context, "polly.par.setup", SubFn);
+ ExitBB = BasicBlock::Create(Context, "polly.par.exit", SubFn);
+ CheckNextBB = BasicBlock::Create(Context, "polly.par.checkNext", SubFn);
+ PreHeaderBB = BasicBlock::Create(Context, "polly.par.loadIVBounds", SubFn);
- DominatorTree &DT = P->getAnalysis<DominatorTreeWrapperPass>().getDomTree();
DT.addNewBlock(HeaderBB, PrevBB);
DT.addNewBlock(ExitBB, HeaderBB);
DT.addNewBlock(CheckNextBB, HeaderBB);
- DT.addNewBlock(LoadIVBoundsBB, HeaderBB);
+ DT.addNewBlock(PreHeaderBB, HeaderBB);
// Fill up basic block HeaderBB.
Builder.SetInsertPoint(HeaderBB);
- LowerBoundPtr = Builder.CreateAlloca(IntPtrTy, 0, "omp.lowerBoundPtr");
- UpperBoundPtr = Builder.CreateAlloca(IntPtrTy, 0, "omp.upperBoundPtr");
- UserContext = Builder.CreateBitCast(FN->arg_begin(), StructData->getType(),
- "omp.userContext");
+ LBPtr = Builder.CreateAlloca(LongType, 0, "polly.par.LBPtr");
+ UBPtr = Builder.CreateAlloca(LongType, 0, "polly.par.UBPtr");
+ UserContext = Builder.CreateBitCast(SubFn->arg_begin(), StructData->getType(),
+ "polly.par.userContext");
extractValuesFromStruct(Data, UserContext, Map);
Builder.CreateBr(CheckNextBB);
// Add code to check if another set of iterations will be executed.
Builder.SetInsertPoint(CheckNextBB);
- Ret1 = createCallLoopNext(LowerBoundPtr, UpperBoundPtr);
+ Ret1 = createCallGetWorkItem(LBPtr, UBPtr);
HasNextSchedule = Builder.CreateTrunc(Ret1, Builder.getInt1Ty(),
- "omp.hasNextScheduleBlock");
- Builder.CreateCondBr(HasNextSchedule, LoadIVBoundsBB, ExitBB);
+ "polly.par.hasNextScheduleBlock");
+ Builder.CreateCondBr(HasNextSchedule, PreHeaderBB, ExitBB);
// Add code to to load the iv bounds for this set of iterations.
- Builder.SetInsertPoint(LoadIVBoundsBB);
- LowerBound = Builder.CreateLoad(LowerBoundPtr, "omp.lowerBound");
- UpperBound = Builder.CreateLoad(UpperBoundPtr, "omp.upperBound");
+ Builder.SetInsertPoint(PreHeaderBB);
+ LB = Builder.CreateLoad(LBPtr, "polly.par.LB");
+ UB = Builder.CreateLoad(UBPtr, "polly.par.UB");
// Subtract one as the upper bound provided by openmp is a < comparison
// whereas the codegenForSequential function creates a <= comparison.
- UpperBound = Builder.CreateSub(UpperBound, ConstantInt::get(IntPtrTy, 1),
- "omp.upperBoundAdjusted");
+ UB = Builder.CreateSub(UB, ConstantInt::get(LongType, 1),
+ "polly.par.UBAdjusted");
Builder.CreateBr(CheckNextBB);
Builder.SetInsertPoint(--Builder.GetInsertPoint());
- LoopInfo &LI = P->getAnalysis<LoopInfo>();
- IV = createLoop(LowerBound, UpperBound, Stride, Builder, P, LI, DT, AfterBB,
+ IV = createLoop(LB, UB, Stride, Builder, P, LI, DT, AfterBB,
ICmpInst::ICMP_SLE, nullptr, true, /* UseGuard */ false);
BasicBlock::iterator LoopBody = Builder.GetInsertPoint();
- Builder.SetInsertPoint(AfterBB->begin());
- // Add code to terminate this openmp subfunction.
+ // Add code to terminate this subfunction.
Builder.SetInsertPoint(ExitBB);
- createCallLoopEndNowait();
+ createCallCleanupThread();
Builder.CreateRetVoid();
Builder.SetInsertPoint(LoopBody);
- *SubFunction = FN;
-
- return IV;
-}
-
-Value *OMPGenerator::createParallelLoop(Value *LowerBound, Value *UpperBound,
- Value *Stride,
- SetVector<Value *> &Values,
- ValueToValueMapTy &Map,
- BasicBlock::iterator *LoopBody) {
- Value *Struct, *IV, *SubfunctionParam, *NumberOfThreads;
- Function *SubFunction;
-
- Struct = loadValuesIntoStruct(Values);
-
- BasicBlock::iterator PrevInsertPoint = Builder.GetInsertPoint();
- IV = createSubfunction(Stride, Struct, Values, Map, &SubFunction);
- *LoopBody = Builder.GetInsertPoint();
- Builder.SetInsertPoint(PrevInsertPoint);
-
- // Create call for GOMP_parallel_loop_runtime_start.
- SubfunctionParam =
- Builder.CreateBitCast(Struct, Builder.getInt8PtrTy(), "omp_data");
-
- NumberOfThreads = Builder.getInt32(0);
-
- // Add one as the upper bound provided by openmp is a < comparison
- // whereas the codegenForSequential function creates a <= comparison.
- UpperBound =
- Builder.CreateAdd(UpperBound, ConstantInt::get(getIntPtrTy(), 1));
-
- createCallParallelLoopStart(SubFunction, SubfunctionParam, NumberOfThreads,
- LowerBound, UpperBound, Stride);
- Builder.CreateCall(SubFunction, SubfunctionParam);
- createCallParallelEnd();
+ *SubFnPtr = SubFn;
return IV;
}
Modified: polly/trunk/test/Cloog/CodeGen/OpenMP/20120330-argument-use.ll
URL: http://llvm.org/viewvc/llvm-project/polly/trunk/test/Cloog/CodeGen/OpenMP/20120330-argument-use.ll?rev=219003&r1=219002&r2=219003&view=diff
==============================================================================
--- polly/trunk/test/Cloog/CodeGen/OpenMP/20120330-argument-use.ll (original)
+++ polly/trunk/test/Cloog/CodeGen/OpenMP/20120330-argument-use.ll Fri Oct 3 14:10:13 2014
@@ -1,32 +1,8 @@
; RUN: opt %loadPolly -basicaa -polly-codegen -enable-polly-openmp < %s -S | FileCheck %s
-
-;/*
-; * =============================================================================
-; *
-; * Filename: 20120330-argument-use.c
-; *
-; * Description: Polly OpenMP test case
-; *
-; * Test if the OpenMP subfunction uses the argument copy in
-; * the OpenMP struct not the original one only available in
-; * the original function.
-; *
-; * Run with -polly-codegen -enable-polly-openmp
-; *
-; * Author: Johannes Doerfert johannes at jdoerfert.de
-; *
-; * Created: 2012-03-30
-; * Modified: 2012-03-30
-; *
-; * =============================================================================
-; */
;
;void f(int * restrict A, int * restrict B, int n) {
-; int i;
-;
-; for (i = 0; i < n; i++) {
+; for (int i = 0; i < n; i++)
; A[i] = B[i] * 2;
-; }
;}
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
@@ -58,11 +34,11 @@ for.end:
ret void
}
-; CHECK: %omp.userContext1 = bitcast i8* %omp.userContext to { i32, i32*, i32* }*
-; CHECK: %0 = getelementptr inbounds { i32, i32*, i32* }* %omp.userContext1, i32 0, i32 0
+; CHECK: %polly.par.userContext[[NO:[0-9]*]] = bitcast i8* %polly.par.userContext to { i32, i32*, i32* }*
+; CHECK: %0 = getelementptr inbounds { i32, i32*, i32* }* %polly.par.userContext[[NO]], i32 0, i32 0
; CHECK: %1 = load i32* %0
-; CHECK: %2 = getelementptr inbounds { i32, i32*, i32* }* %omp.userContext1, i32 0, i32 1
+; CHECK: %2 = getelementptr inbounds { i32, i32*, i32* }* %polly.par.userContext[[NO]], i32 0, i32 1
; CHECK: %3 = load i32** %2
-; CHECK: %4 = getelementptr inbounds { i32, i32*, i32* }* %omp.userContext1, i32 0, i32 2
+; CHECK: %4 = getelementptr inbounds { i32, i32*, i32* }* %polly.par.userContext[[NO]], i32 0, i32 2
; CHECK: %5 = load i32** %4
Modified: polly/trunk/test/Cloog/CodeGen/OpenMP/clastvar_after_parallel_loop.ll
URL: http://llvm.org/viewvc/llvm-project/polly/trunk/test/Cloog/CodeGen/OpenMP/clastvar_after_parallel_loop.ll?rev=219003&r1=219002&r2=219003&view=diff
==============================================================================
--- polly/trunk/test/Cloog/CodeGen/OpenMP/clastvar_after_parallel_loop.ll (original)
+++ polly/trunk/test/Cloog/CodeGen/OpenMP/clastvar_after_parallel_loop.ll Fri Oct 3 14:10:13 2014
@@ -53,4 +53,4 @@ end:
; CLOOG: Stmt_for_end(c2);
; CLOOG: }
-; CHECK: @f.omp_subfn
+; CHECK: @f.polly.subfn
Modified: polly/trunk/test/Cloog/CodeGen/OpenMP/copy_in_argument.ll
URL: http://llvm.org/viewvc/llvm-project/polly/trunk/test/Cloog/CodeGen/OpenMP/copy_in_argument.ll?rev=219003&r1=219002&r2=219003&view=diff
==============================================================================
--- polly/trunk/test/Cloog/CodeGen/OpenMP/copy_in_argument.ll (original)
+++ polly/trunk/test/Cloog/CodeGen/OpenMP/copy_in_argument.ll Fri Oct 3 14:10:13 2014
@@ -31,4 +31,4 @@ for.end:
ret void
}
-; CHECK: %omp.userContext = alloca { float }
+; CHECK: %polly.par.userContext = alloca { float }
Modified: polly/trunk/test/Cloog/CodeGen/OpenMP/copy_in_temporary.ll
URL: http://llvm.org/viewvc/llvm-project/polly/trunk/test/Cloog/CodeGen/OpenMP/copy_in_temporary.ll?rev=219003&r1=219002&r2=219003&view=diff
==============================================================================
--- polly/trunk/test/Cloog/CodeGen/OpenMP/copy_in_temporary.ll (original)
+++ polly/trunk/test/Cloog/CodeGen/OpenMP/copy_in_temporary.ll Fri Oct 3 14:10:13 2014
@@ -32,4 +32,4 @@ for.end:
ret void
}
-; CHECK: %omp.userContext = alloca { float }
+; CHECK: %polly.par.userContext = alloca { float }
Modified: polly/trunk/test/Cloog/CodeGen/OpenMP/extract_memref.ll
URL: http://llvm.org/viewvc/llvm-project/polly/trunk/test/Cloog/CodeGen/OpenMP/extract_memref.ll?rev=219003&r1=219002&r2=219003&view=diff
==============================================================================
--- polly/trunk/test/Cloog/CodeGen/OpenMP/extract_memref.ll (original)
+++ polly/trunk/test/Cloog/CodeGen/OpenMP/extract_memref.ll Fri Oct 3 14:10:13 2014
@@ -49,9 +49,8 @@ entry:
call void @foo()
ret i32 0
}
-; CHECK: getelementptr inbounds { [10 x float]* }* %omp.userContext, i32 0, i32 0
-; CHECK: store [10 x float]* %A, [10 x float]** %0
-; CHECK: %omp_data = bitcast { [10 x float]* }* %omp.userContext to i8*
-; CHECK: inbounds { [10 x float]* }* %omp.userContext1, i32 0, i32 0
+; CHECK: %[[V:[._a-zA-Z0-9]+]] = getelementptr inbounds { [10 x float]* }* %polly.par.userContext, i32 0, i32 0
+; CHECK: store [10 x float]* %A, [10 x float]** %[[V]]
+; CHECK: inbounds { [10 x float]* }* %polly.par.userContext{{[0-9]*}}, i32 0, i32 0
; CHECK: load [10 x float]**
Modified: polly/trunk/test/Cloog/CodeGen/OpenMP/param_referenced_in_stmt.ll
URL: http://llvm.org/viewvc/llvm-project/polly/trunk/test/Cloog/CodeGen/OpenMP/param_referenced_in_stmt.ll?rev=219003&r1=219002&r2=219003&view=diff
==============================================================================
--- polly/trunk/test/Cloog/CodeGen/OpenMP/param_referenced_in_stmt.ll (original)
+++ polly/trunk/test/Cloog/CodeGen/OpenMP/param_referenced_in_stmt.ll Fri Oct 3 14:10:13 2014
@@ -1,8 +1,5 @@
; RUN: opt %loadPolly -polly-codegen < %s -enable-polly-openmp -S | FileCheck %s
-target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
-target triple = "x86_64-unknown-linux-gnu"
-
-
+;
; This test case implements the following code:
;
; for (i = 0; i < 1024; i++)
@@ -10,6 +7,7 @@ target triple = "x86_64-unknown-linux-gn
;
; The problem is that 'param' is not references in any subscript of loop
; bound, but it must still be forwarded to the OpenMP subfunction.
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
define void @foo(double %param, [1024 x double]* %A) {
entry:
@@ -35,4 +33,4 @@ for.end:
ret void
}
-; CHECK: omp_subfn
+; CHECK: @foo.polly.subfn
Modified: polly/trunk/test/Cloog/CodeGen/OpenMP/simple_nested_loop.ll
URL: http://llvm.org/viewvc/llvm-project/polly/trunk/test/Cloog/CodeGen/OpenMP/simple_nested_loop.ll?rev=219003&r1=219002&r2=219003&view=diff
==============================================================================
--- polly/trunk/test/Cloog/CodeGen/OpenMP/simple_nested_loop.ll (original)
+++ polly/trunk/test/Cloog/CodeGen/OpenMP/simple_nested_loop.ll Fri Oct 3 14:10:13 2014
@@ -78,15 +78,15 @@ entry:
declare void @llvm.memset.p0i8.i32(i8* nocapture, i8, i32, i32, i1) nounwind
-; CHECK: %omp.userContext = alloca { i32 }
-; CHECK: getelementptr inbounds { i32 }* %omp.userContext, i32 0, i32 0
-; CHECK: store i32 %polly.indvar, i32* %0
-; CHECK: %omp_data = bitcast { i32 }* %omp.userContext to i8*
-; CHECK: call void @GOMP_parallel_loop_runtime_start(void (i8*)* @loop_openmp.omp_subfn, i8* %omp_data, i32 0, i32 0, i32 10, i32 1)
-; CHECK: call void @loop_openmp.omp_subfn(i8* %omp_data)
+; CHECK: %polly.par.userContext = alloca { i32 }
+; CHECK: %[[NO:[._a-zA-Z0-9]*]] = getelementptr inbounds { i32 }* %polly.par.userContext, i32 0, i32 0
+; CHECK: store i32 %polly.indvar, i32* %[[NO]]
+; CHECK: %[[DATA:[._a-zA-Z0-9]*]] = bitcast { i32 }* %polly.par.userContext to i8*
+; CHECK: call void @GOMP_parallel_loop_runtime_start(void (i8*)* @loop_openmp.polly.subfn, i8* %[[DATA]], i32 0, i32 0, i32 10, i32 1)
+; CHECK: call void @loop_openmp.polly.subfn(i8* %[[DATA]])
; CHECK: call void @GOMP_parallel_end()
; Verify the new subfunction is annotated such that SCoP detection will skip it.
-; CHECK: @loop_openmp.omp_subfn({{.*}}) [[ATTR:#[0-9]+]]
+; CHECK: @loop_openmp.polly.subfn({{.*}}) [[ATTR:#[0-9]+]]
; CHECK: attributes [[ATTR]] = {{{[^\}]*}}polly.skip.fn{{[^\}]*}}}
Modified: polly/trunk/test/Cloog/CodeGen/OpenMP/structnames.ll
URL: http://llvm.org/viewvc/llvm-project/polly/trunk/test/Cloog/CodeGen/OpenMP/structnames.ll?rev=219003&r1=219002&r2=219003&view=diff
==============================================================================
--- polly/trunk/test/Cloog/CodeGen/OpenMP/structnames.ll (original)
+++ polly/trunk/test/Cloog/CodeGen/OpenMP/structnames.ll Fri Oct 3 14:10:13 2014
@@ -100,6 +100,6 @@ entry:
declare void @llvm.memset.p0i8.i32(i8* nocapture, i8, i32, i32, i1) nounwind
-; CHECK: %omp.userContext = alloca {}
-; CHECK: %omp.userContext1 = alloca { i32 }
+; CHECK-DAG: %polly.par.userContext{{[0-9]*}} = alloca {}
+; CHECK-DAG: %polly.par.userContext{{[0-9]*}} = alloca { i32 }
More information about the llvm-commits
mailing list