[polly] r219003 - [Refactor] Generalize parallel code generation

Johannes Doerfert doerfert at cs.uni-saarland.de
Fri Oct 3 12:10:13 PDT 2014


Author: jdoerfert
Date: Fri Oct  3 14:10:13 2014
New Revision: 219003

URL: http://llvm.org/viewvc/llvm-project?rev=219003&view=rev
Log:
[Refactor] Generalize parallel code generation

  + Generalized function names and comments
    + Removed OpenMP (omp) from the names and comments
    + Use common names (non OpenMP specific) for runtime library call creation
      methodes
  + Commented the parallel code generator and all its member functions
  + Refactored some values and methodes

Differential Revision: http://reviews.llvm.org/D4990


Modified:
    polly/trunk/include/polly/CodeGen/LoopGenerators.h
    polly/trunk/lib/CodeGen/CodeGeneration.cpp
    polly/trunk/lib/CodeGen/LoopGenerators.cpp
    polly/trunk/test/Cloog/CodeGen/OpenMP/20120330-argument-use.ll
    polly/trunk/test/Cloog/CodeGen/OpenMP/clastvar_after_parallel_loop.ll
    polly/trunk/test/Cloog/CodeGen/OpenMP/copy_in_argument.ll
    polly/trunk/test/Cloog/CodeGen/OpenMP/copy_in_temporary.ll
    polly/trunk/test/Cloog/CodeGen/OpenMP/extract_memref.ll
    polly/trunk/test/Cloog/CodeGen/OpenMP/param_referenced_in_stmt.ll
    polly/trunk/test/Cloog/CodeGen/OpenMP/simple_nested_loop.ll
    polly/trunk/test/Cloog/CodeGen/OpenMP/structnames.ll

Modified: polly/trunk/include/polly/CodeGen/LoopGenerators.h
URL: http://llvm.org/viewvc/llvm-project/polly/trunk/include/polly/CodeGen/LoopGenerators.h?rev=219003&r1=219002&r2=219003&view=diff
==============================================================================
--- polly/trunk/include/polly/CodeGen/LoopGenerators.h (original)
+++ polly/trunk/include/polly/CodeGen/LoopGenerators.h Fri Oct  3 14:10:13 2014
@@ -13,10 +13,11 @@
 //===----------------------------------------------------------------------===//
 #ifndef POLLY_LOOP_GENERATORS_H
 #define POLLY_LOOP_GENERATORS_H
+
 #include "polly/CodeGen/IRBuilder.h"
-#include "llvm/ADT/SetVector.h"
 
-#include <map>
+#include "llvm/IR/ValueMap.h"
+#include "llvm/ADT/SetVector.h"
 
 namespace llvm {
 class Value;
@@ -54,73 +55,160 @@ Value *createLoop(Value *LowerBound, Val
                   ScopAnnotator *Annotator = NULL, bool Parallel = false,
                   bool UseGuard = true);
 
-class OMPGenerator {
+/// @brief The ParallelLoopGenerator allows to create parallelized loops
+///
+/// To parallelize a loop, we perform the following steps:
+///   o  Generate a subfunction which will hold the loop body.
+///   o  Create a struct to hold all outer values needed in the loop body.
+///   o  Create calls to a runtime library to achieve the actual parallelism.
+///      These calls will spawn and join threads, define how the work (here the
+///      iterations) are distributed between them and make sure each has access
+///      to the struct holding all needed values.
+///
+/// At the moment we support only one parallel runtime, OpenMP.
+///
+/// If we parallelize the outer loop of the following loop nest,
+///
+///   S0;
+///   for (int i = 0; i < N; i++)
+///     for (int j = 0; j < M; j++)
+///       S1(i, j);
+///   S2;
+///
+/// we will generate the following code (with different runtime function names):
+///
+///   S0;
+///   auto *values = storeValuesIntoStruct();
+///   // Execute subfunction with multiple threads
+///   spawn_threads(subfunction, values);
+///   join_threads();
+///   S2;
+///
+///  // This function is executed in parallel by different threads
+///   void subfunction(values) {
+///     while (auto *WorkItem = getWorkItem()) {
+///       int LB = WorkItem.begin();
+///       int UB = WorkItem.end();
+///       for (int i = LB; i < UB; i++)
+///         for (int j = 0; j < M; j++)
+///           S1(i, j);
+///     }
+///     cleanup_thread();
+///   }
+class ParallelLoopGenerator {
 public:
-  typedef std::map<Value *, Value *> ValueToValueMapTy;
-
-  OMPGenerator(PollyIRBuilder &Builder, Pass *P) : Builder(Builder), P(P) {}
+  using ValueToValueMapTy = llvm::ValueMap<Value *, Value *>;
 
-  /// @brief Create an OpenMP parallel loop.
-  ///
-  ///
-  /// @param LowerBound  The starting value of the induction variable.
-  /// @param UpperBound  The upper bound of the induction variable.
-  /// @param Stride      The value by which the induction variable is
-  ///                    incremented.
-  ///
-  /// @param UsedValues  A set of LLVM-IR Values that should be available to
-  ///                    the new loop body.
-  /// @param VMap        This map is filled by createParallelLoop(). It
-  ///                    maps the values in UsedValues to Values through which
-  ///                    their content is available within the loop body.
-  /// @param LoopBody    A pointer to an iterator that is set to point to the
-  ///                    body of the created loop. It should be used to insert
-  ///                    instructions that form the actual loop body.
-  ///
-  /// @return Value*     The newly created induction variable for this loop.
-  Value *createParallelLoop(Value *LowerBound, Value *UpperBound, Value *Stride,
-                            SetVector<Value *> &UsedValues,
-                            ValueToValueMapTy &VMap,
+  /// @brief Create a parallel loop generator for the current function.
+  ParallelLoopGenerator(PollyIRBuilder &Builder, Pass *P, LoopInfo &LI,
+                        DominatorTree &DT, const DataLayout &DL)
+      : Builder(Builder), P(P), LI(LI), DT(DT), DL(DL),
+        LongType(
+            Type::getIntNTy(Builder.getContext(), DL.getPointerSizeInBits())),
+        M(Builder.GetInsertBlock()->getParent()->getParent()) {}
+
+  /// @brief Create a parallel loop
+  ///
+  ///
+  /// @param LB        The lower bound for the loop we parallelize.
+  /// @param UB        The upper bound for the loop we parallelize.
+  /// @param Stride    The stride of the loop we parallelize.
+  /// @param Values    A set of LLVM-IR Values that should be available in
+  ///                  the new loop body.
+  /// @param VMap      A map to allow outside access to the new versions of
+  ///                  the values in @p Values.
+  /// @param LoopBody  A pointer to an iterator that is set to point to the
+  ///                  body of the created loop. It should be used to insert
+  ///                  instructions that form the actual loop body.
+  ///
+  /// @return The newly created induction variable for this loop.
+  Value *createParallelLoop(Value *LB, Value *UB, Value *Stride,
+                            SetVector<Value *> &Values, ValueToValueMapTy &VMap,
                             BasicBlock::iterator *LoopBody);
 
 private:
+  /// @brief The IR builder we use to create instructions.
   PollyIRBuilder &Builder;
+
+  /// @brief A pass pointer to update analysis information.
   Pass *P;
 
-  IntegerType *getIntPtrTy();
-  Module *getModule();
+  /// @brief The loop info of the current function we need to update.
+  LoopInfo &LI;
+
+  /// @brief The dominance tree of the current function we need to update.
+  DominatorTree &DT;
+
+  /// @brief The target layout to get the right size for types.
+  const DataLayout &DL;
+
+  /// @brief The type of a "long" on this hardware used for backend calls.
+  Type *LongType;
 
-  void createCallParallelLoopStart(Value *SubFunction, Value *SubfunctionParam,
-                                   Value *NumberOfThreads, Value *LowerBound,
-                                   Value *UpperBound, Value *Stride);
-  Value *createCallLoopNext(Value *LowerBoundPtr, Value *UpperBoundPtr);
-  void createCallParallelEnd();
-  void createCallLoopEndNowait();
-
-  Value *loadValuesIntoStruct(SetVector<Value *> &Values);
-  void extractValuesFromStruct(SetVector<Value *> OldValues, Value *Struct,
-                               ValueToValueMapTy &Map);
-
-  /// @brief Create the OpenMP subfunction.
-  ///
-  /// @param Stride       The value by which the induction variable is
-  ///                     incremented.
-  /// @param Struct       The structure that is used to make Values available to
-  ///                     the loop body.
-  /// @param UsedValues   A set of LLVM-IR Values that should be available to
-  ///                     the new loop body.
-  /// @param VMap         This map that is filled by createSubfunction(). It
-  ///                     maps the values in UsedValues to Values through which
-  ///                     their content is available within the loop body.
-  /// @param SubFunction  The newly created SubFunction is returned here.
-  ///
-  /// @return Value*      The newly created induction variable.
-  Value *createSubfunction(Value *Stride, Value *Struct,
-                           SetVector<Value *> UsedValues,
-                           ValueToValueMapTy &VMap, Function **SubFunction);
+  /// @brief The current module
+  Module *M;
 
-  /// @brief Create the definition of the OpenMP subfunction.
-  Function *createSubfunctionDefinition();
+  /// @brief Create a runtime library call to spawn the worker threads.
+  ///
+  /// @param SubFn      The subfunction which holds the loop body.
+  /// @param SubFnParam The parameter for the subfunction (basically the struct
+  ///                   filled with the outside values).
+  /// @param LB         The lower bound for the loop we parallelize.
+  /// @param UB         The upper bound for the loop we parallelize.
+  /// @param Stride     The stride of the loop we parallelize.
+  void createCallSpawnThreads(Value *SubFn, Value *SubFnParam, Value *LB,
+                              Value *UB, Value *Stride);
+
+  /// @brief Create a runtime library call to join the worker threads.
+  void createCallJoinThreads();
+
+  /// @brief Create a runtime library call to get the next work item.
+  ///
+  /// @param LBPtr A pointer value to store the work item begin in.
+  /// @param UBPtr A pointer value to store the work item end in.
+  ///
+  /// @returns A true value if the work item is not empty.
+  Value *createCallGetWorkItem(Value *LBPtr, Value *UBPtr);
+
+  /// @brief Create a runtime library call to allow cleanup of the thread.
+  ///
+  /// @note This function is called right before the thread will exit the
+  ///       subfunction and only if the runtime system depends depends on it.
+  void createCallCleanupThread();
+
+  /// @brief Create a struct for all @p Values and store them in there.
+  ///
+  /// @param Values The values which should be stored in the struct.
+  ///
+  /// @return The created struct.
+  Value *storeValuesIntoStruct(SetVector<Value *> &Values);
+
+  /// @brief Extract all values from the @p Struct and construct the mapping.
+  ///
+  /// @param Values The values which were stored in the struct.
+  /// @param Struct The struct holding all the values in @p Values.
+  /// @param VMap   A map to associate every element of @p Values with the
+  ///               new llvm value loaded from the @p Struct.
+  void extractValuesFromStruct(SetVector<Value *> Values, Value *Struct,
+                               ValueToValueMapTy &VMap);
+
+  /// @brief Create the definition of the parallel subfunction.
+  Function *createSubFnDefinition();
+
+  /// @brief Create the parallel subfunction.
+  ///
+  /// @param Stride The induction variable increment.
+  /// @param Struct A struct holding all values in @p Values.
+  /// @param Values A set of LLVM-IR Values that should be available in
+  ///               the new loop body.
+  /// @param VMap   A map to allow outside access to the new versions of
+  ///               the values in @p Values.
+  /// @param SubFn  The newly created subfunction is returned here.
+  ///
+  /// @return The newly created induction variable.
+  Value *createSubFn(Value *Stride, Value *Struct,
+                     SetVector<Value *> UsedValues, ValueToValueMapTy &VMap,
+                     Function **SubFn);
 };
 } // end namespace polly
 #endif

Modified: polly/trunk/lib/CodeGen/CodeGeneration.cpp
URL: http://llvm.org/viewvc/llvm-project/polly/trunk/lib/CodeGen/CodeGeneration.cpp?rev=219003&r1=219002&r2=219003&view=diff
==============================================================================
--- polly/trunk/lib/CodeGen/CodeGeneration.cpp (original)
+++ polly/trunk/lib/CodeGen/CodeGeneration.cpp Fri Oct  3 14:10:13 2014
@@ -246,6 +246,7 @@ private:
   LoopInfo &LI;
   ScalarEvolution &SE;
   DominatorTree &DT;
+  const DataLayout &DL;
 
   // The Builder specifies the current location to code generate at.
   PollyIRBuilder &Builder;
@@ -316,7 +317,7 @@ private:
   /// @brief Update ClastVars and ValueMap according to a value map.
   ///
   /// @param VMap A map from old to new values.
-  void updateWithValueMap(OMPGenerator::ValueToValueMapTy &VMap);
+  void updateWithValueMap(ParallelLoopGenerator::ValueToValueMapTy &VMap);
 
   /// @brief Create an OpenMP parallel for loop.
   ///
@@ -579,8 +580,8 @@ SetVector<Value *> ClastStmtCodeGen::get
   return Values;
 }
 
-void
-ClastStmtCodeGen::updateWithValueMap(OMPGenerator::ValueToValueMapTy &VMap) {
+void ClastStmtCodeGen::updateWithValueMap(
+    ParallelLoopGenerator::ValueToValueMapTy &VMap) {
   std::set<Value *> Inserted;
 
   for (const auto &I : ClastVars) {
@@ -611,8 +612,8 @@ void ClastStmtCodeGen::codegenForOpenMP(
   BasicBlock::iterator LoopBody;
   IntegerType *IntPtrTy = getIntPtrTy();
   SetVector<Value *> Values;
-  OMPGenerator::ValueToValueMapTy VMap;
-  OMPGenerator OMPGen(Builder, P);
+  ParallelLoopGenerator::ValueToValueMapTy VMap;
+  ParallelLoopGenerator OMPGen(Builder, P, LI, DT, DL);
 
   Stride = Builder.getInt(APInt_from_MPZ(For->stride));
   Stride = Builder.CreateSExtOrBitCast(Stride, IntPtrTy);
@@ -1025,7 +1026,8 @@ void ClastStmtCodeGen::codegen(const cla
 ClastStmtCodeGen::ClastStmtCodeGen(Scop *scop, PollyIRBuilder &B, Pass *P)
     : S(scop), P(P), LI(P->getAnalysis<LoopInfo>()),
       SE(P->getAnalysis<ScalarEvolution>()),
-      DT(P->getAnalysis<DominatorTreeWrapperPass>().getDomTree()), Builder(B),
+      DT(P->getAnalysis<DominatorTreeWrapperPass>().getDomTree()),
+      DL(P->getAnalysis<DataLayoutPass>().getDataLayout()), Builder(B),
       ExpGen(Builder, ClastVars) {}
 
 namespace {
@@ -1074,9 +1076,11 @@ public:
     AU.addRequired<ScopDetection>();
     AU.addRequired<ScopInfo>();
     AU.addRequired<DataLayoutPass>();
+    AU.addRequired<DataLayoutPass>();
     AU.addRequired<LoopInfo>();
 
     AU.addPreserved<CloogInfo>();
+    AU.addPreserved<DataLayoutPass>();
     AU.addPreserved<Dependences>();
     AU.addPreserved<LoopInfo>();
     AU.addPreserved<DominatorTreeWrapperPass>();
@@ -1103,6 +1107,7 @@ INITIALIZE_PASS_DEPENDENCY(CloogInfo);
 INITIALIZE_PASS_DEPENDENCY(Dependences);
 INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass);
 INITIALIZE_PASS_DEPENDENCY(RegionInfoPass);
+INITIALIZE_PASS_DEPENDENCY(DataLayoutPass);
 INITIALIZE_PASS_DEPENDENCY(ScalarEvolution);
 INITIALIZE_PASS_DEPENDENCY(ScopDetection);
 INITIALIZE_PASS_DEPENDENCY(DataLayoutPass);

Modified: polly/trunk/lib/CodeGen/LoopGenerators.cpp
URL: http://llvm.org/viewvc/llvm-project/polly/trunk/lib/CodeGen/LoopGenerators.cpp?rev=219003&r1=219002&r2=219003&view=diff
==============================================================================
--- polly/trunk/lib/CodeGen/LoopGenerators.cpp (original)
+++ polly/trunk/lib/CodeGen/LoopGenerators.cpp Fri Oct  3 14:10:13 2014
@@ -7,8 +7,7 @@
 //
 //===----------------------------------------------------------------------===//
 //
-// This file contains functions to create scalar and OpenMP parallel loops
-// as LLVM-IR.
+// This file contains functions to create scalar and parallel loops as LLVM-IR.
 //
 //===----------------------------------------------------------------------===//
 
@@ -138,61 +137,84 @@ Value *polly::createLoop(Value *LB, Valu
   return IV;
 }
 
-void OMPGenerator::createCallParallelLoopStart(
-    Value *SubFunction, Value *SubfunctionParam, Value *NumberOfThreads,
-    Value *LowerBound, Value *UpperBound, Value *Stride) {
-  Module *M = getModule();
-  const char *Name = "GOMP_parallel_loop_runtime_start";
+Value *ParallelLoopGenerator::createParallelLoop(
+    Value *LB, Value *UB, Value *Stride, SetVector<Value *> &UsedValues,
+    ValueToValueMapTy &Map, BasicBlock::iterator *LoopBody) {
+  Value *Struct, *IV, *SubFnParam;
+  Function *SubFn;
+
+  Struct = storeValuesIntoStruct(UsedValues);
+
+  BasicBlock::iterator BeforeLoop = Builder.GetInsertPoint();
+  IV = createSubFn(Stride, Struct, UsedValues, Map, &SubFn);
+  *LoopBody = Builder.GetInsertPoint();
+  Builder.SetInsertPoint(BeforeLoop);
+
+  SubFnParam = Builder.CreateBitCast(Struct, Builder.getInt8PtrTy(),
+                                     "polly.par.userContext");
+
+  // Add one as the upper bound provided by openmp is a < comparison
+  // whereas the codegenForSequential function creates a <= comparison.
+  UB = Builder.CreateAdd(UB, ConstantInt::get(LongType, 1));
+
+  // Tell the runtime we start a parallel loop
+  createCallSpawnThreads(SubFn, SubFnParam, LB, UB, Stride);
+  Builder.CreateCall(SubFn, SubFnParam);
+  createCallJoinThreads();
+
+  return IV;
+}
+
+void ParallelLoopGenerator::createCallSpawnThreads(Value *SubFn,
+                                                   Value *SubFnParam, Value *LB,
+                                                   Value *UB, Value *Stride) {
+  const std::string Name = "GOMP_parallel_loop_runtime_start";
+
   Function *F = M->getFunction(Name);
 
   // If F is not available, declare it.
   if (!F) {
-    Type *LongTy = getIntPtrTy();
     GlobalValue::LinkageTypes Linkage = Function::ExternalLinkage;
 
     Type *Params[] = {PointerType::getUnqual(FunctionType::get(
                           Builder.getVoidTy(), Builder.getInt8PtrTy(), false)),
-                      Builder.getInt8PtrTy(), Builder.getInt32Ty(), LongTy,
-                      LongTy, LongTy};
+                      Builder.getInt8PtrTy(), LongType, LongType, LongType,
+                      LongType};
 
     FunctionType *Ty = FunctionType::get(Builder.getVoidTy(), Params, false);
     F = Function::Create(Ty, Linkage, Name, M);
   }
 
-  Value *Args[] = {SubFunction, SubfunctionParam, NumberOfThreads,
-                   LowerBound,  UpperBound,       Stride};
+  Value *NumberOfThreads = ConstantInt::get(LongType, 0);
+  Value *Args[] = {SubFn, SubFnParam, NumberOfThreads, LB, UB, Stride};
 
   Builder.CreateCall(F, Args);
 }
 
-Value *OMPGenerator::createCallLoopNext(Value *LowerBoundPtr,
-                                        Value *UpperBoundPtr) {
-  Module *M = getModule();
-  const char *Name = "GOMP_loop_runtime_next";
+Value *ParallelLoopGenerator::createCallGetWorkItem(Value *LBPtr,
+                                                    Value *UBPtr) {
+  const std::string Name = "GOMP_loop_runtime_next";
+
   Function *F = M->getFunction(Name);
 
   // If F is not available, declare it.
   if (!F) {
-    Type *LongPtrTy = PointerType::getUnqual(getIntPtrTy());
     GlobalValue::LinkageTypes Linkage = Function::ExternalLinkage;
-
-    Type *Params[] = {LongPtrTy, LongPtrTy};
-
+    Type *Params[] = {LongType->getPointerTo(), LongType->getPointerTo()};
     FunctionType *Ty = FunctionType::get(Builder.getInt8Ty(), Params, false);
     F = Function::Create(Ty, Linkage, Name, M);
   }
 
-  Value *Args[] = {LowerBoundPtr, UpperBoundPtr};
-
+  Value *Args[] = {LBPtr, UBPtr};
   Value *Return = Builder.CreateCall(F, Args);
   Return = Builder.CreateICmpNE(
       Return, Builder.CreateZExt(Builder.getFalse(), Return->getType()));
   return Return;
 }
 
-void OMPGenerator::createCallParallelEnd() {
-  const char *Name = "GOMP_parallel_end";
-  Module *M = getModule();
+void ParallelLoopGenerator::createCallJoinThreads() {
+  const std::string Name = "GOMP_parallel_end";
+
   Function *F = M->getFunction(Name);
 
   // If F is not available, declare it.
@@ -206,9 +228,9 @@ void OMPGenerator::createCallParallelEnd
   Builder.CreateCall(F);
 }
 
-void OMPGenerator::createCallLoopEndNowait() {
-  const char *Name = "GOMP_loop_end_nowait";
-  Module *M = getModule();
+void ParallelLoopGenerator::createCallCleanupThread() {
+  const std::string Name = "GOMP_loop_end_nowait";
+
   Function *F = M->getFunction(Name);
 
   // If F is not available, declare it.
@@ -222,39 +244,32 @@ void OMPGenerator::createCallLoopEndNowa
   Builder.CreateCall(F);
 }
 
-IntegerType *OMPGenerator::getIntPtrTy() {
-  return P->getAnalysis<DataLayoutPass>().getDataLayout().getIntPtrType(
-      Builder.getContext());
-}
-
-Module *OMPGenerator::getModule() {
-  return Builder.GetInsertBlock()->getParent()->getParent();
-}
-
-Function *OMPGenerator::createSubfunctionDefinition() {
-  Module *M = getModule();
+Function *ParallelLoopGenerator::createSubFnDefinition() {
   Function *F = Builder.GetInsertBlock()->getParent();
   std::vector<Type *> Arguments(1, Builder.getInt8PtrTy());
   FunctionType *FT = FunctionType::get(Builder.getVoidTy(), Arguments, false);
-  Function *FN = Function::Create(FT, Function::InternalLinkage,
-                                  F->getName() + ".omp_subfn", M);
+  Function *SubFn = Function::Create(FT, Function::InternalLinkage,
+                                     F->getName() + ".polly.subfn", M);
+
   // Do not run any polly pass on the new function.
-  FN->addFnAttr(PollySkipFnAttr);
+  SubFn->addFnAttr(PollySkipFnAttr);
 
-  Function::arg_iterator AI = FN->arg_begin();
-  AI->setName("omp.userContext");
+  Function::arg_iterator AI = SubFn->arg_begin();
+  AI->setName("polly.par.userContext");
 
-  return FN;
+  return SubFn;
 }
 
-Value *OMPGenerator::loadValuesIntoStruct(SetVector<Value *> &Values) {
-  std::vector<Type *> Members;
+Value *
+ParallelLoopGenerator::storeValuesIntoStruct(SetVector<Value *> &Values) {
+  SmallVector<Type *, 8> Members;
 
   for (Value *V : Values)
     Members.push_back(V->getType());
 
   StructType *Ty = StructType::get(Builder.getContext(), Members);
-  Value *Struct = Builder.CreateAlloca(Ty, 0, "omp.userContext");
+  Value *Struct =
+      new AllocaInst(Ty, 0, "polly.par.userContext", Builder.GetInsertPoint());
 
   for (unsigned i = 0; i < Values.size(); i++) {
     Value *Address = Builder.CreateStructGEP(Struct, i);
@@ -264,121 +279,79 @@ Value *OMPGenerator::loadValuesIntoStruc
   return Struct;
 }
 
-void OMPGenerator::extractValuesFromStruct(SetVector<Value *> OldValues,
-                                           Value *Struct,
-                                           ValueToValueMapTy &Map) {
+void ParallelLoopGenerator::extractValuesFromStruct(
+    SetVector<Value *> OldValues, Value *Struct, ValueToValueMapTy &Map) {
   for (unsigned i = 0; i < OldValues.size(); i++) {
     Value *Address = Builder.CreateStructGEP(Struct, i);
     Value *NewValue = Builder.CreateLoad(Address);
-    Map.insert(std::make_pair(OldValues[i], NewValue));
+    Map[OldValues[i]] = NewValue;
   }
 }
 
-Value *OMPGenerator::createSubfunction(Value *Stride, Value *StructData,
-                                       SetVector<Value *> Data,
-                                       ValueToValueMapTy &Map,
-                                       Function **SubFunction) {
-  Function *FN = createSubfunctionDefinition();
-
-  BasicBlock *PrevBB, *HeaderBB, *ExitBB, *CheckNextBB, *LoadIVBoundsBB,
-      *AfterBB;
-  Value *LowerBoundPtr, *UpperBoundPtr, *UserContext, *Ret1, *HasNextSchedule,
-      *LowerBound, *UpperBound, *IV;
-  Type *IntPtrTy = getIntPtrTy();
-  LLVMContext &Context = FN->getContext();
+Value *ParallelLoopGenerator::createSubFn(Value *Stride, Value *StructData,
+                                          SetVector<Value *> Data,
+                                          ValueToValueMapTy &Map,
+                                          Function **SubFnPtr) {
+  BasicBlock *PrevBB, *HeaderBB, *ExitBB, *CheckNextBB, *PreHeaderBB, *AfterBB;
+  Value *LBPtr, *UBPtr, *UserContext, *Ret1, *HasNextSchedule, *LB, *UB, *IV;
+  Function *SubFn = createSubFnDefinition();
+  LLVMContext &Context = SubFn->getContext();
 
   // Store the previous basic block.
   PrevBB = Builder.GetInsertBlock();
 
   // Create basic blocks.
-  HeaderBB = BasicBlock::Create(Context, "omp.setup", FN);
-  ExitBB = BasicBlock::Create(Context, "omp.exit", FN);
-  CheckNextBB = BasicBlock::Create(Context, "omp.checkNext", FN);
-  LoadIVBoundsBB = BasicBlock::Create(Context, "omp.loadIVBounds", FN);
+  HeaderBB = BasicBlock::Create(Context, "polly.par.setup", SubFn);
+  ExitBB = BasicBlock::Create(Context, "polly.par.exit", SubFn);
+  CheckNextBB = BasicBlock::Create(Context, "polly.par.checkNext", SubFn);
+  PreHeaderBB = BasicBlock::Create(Context, "polly.par.loadIVBounds", SubFn);
 
-  DominatorTree &DT = P->getAnalysis<DominatorTreeWrapperPass>().getDomTree();
   DT.addNewBlock(HeaderBB, PrevBB);
   DT.addNewBlock(ExitBB, HeaderBB);
   DT.addNewBlock(CheckNextBB, HeaderBB);
-  DT.addNewBlock(LoadIVBoundsBB, HeaderBB);
+  DT.addNewBlock(PreHeaderBB, HeaderBB);
 
   // Fill up basic block HeaderBB.
   Builder.SetInsertPoint(HeaderBB);
-  LowerBoundPtr = Builder.CreateAlloca(IntPtrTy, 0, "omp.lowerBoundPtr");
-  UpperBoundPtr = Builder.CreateAlloca(IntPtrTy, 0, "omp.upperBoundPtr");
-  UserContext = Builder.CreateBitCast(FN->arg_begin(), StructData->getType(),
-                                      "omp.userContext");
+  LBPtr = Builder.CreateAlloca(LongType, 0, "polly.par.LBPtr");
+  UBPtr = Builder.CreateAlloca(LongType, 0, "polly.par.UBPtr");
+  UserContext = Builder.CreateBitCast(SubFn->arg_begin(), StructData->getType(),
+                                      "polly.par.userContext");
 
   extractValuesFromStruct(Data, UserContext, Map);
   Builder.CreateBr(CheckNextBB);
 
   // Add code to check if another set of iterations will be executed.
   Builder.SetInsertPoint(CheckNextBB);
-  Ret1 = createCallLoopNext(LowerBoundPtr, UpperBoundPtr);
+  Ret1 = createCallGetWorkItem(LBPtr, UBPtr);
   HasNextSchedule = Builder.CreateTrunc(Ret1, Builder.getInt1Ty(),
-                                        "omp.hasNextScheduleBlock");
-  Builder.CreateCondBr(HasNextSchedule, LoadIVBoundsBB, ExitBB);
+                                        "polly.par.hasNextScheduleBlock");
+  Builder.CreateCondBr(HasNextSchedule, PreHeaderBB, ExitBB);
 
   // Add code to to load the iv bounds for this set of iterations.
-  Builder.SetInsertPoint(LoadIVBoundsBB);
-  LowerBound = Builder.CreateLoad(LowerBoundPtr, "omp.lowerBound");
-  UpperBound = Builder.CreateLoad(UpperBoundPtr, "omp.upperBound");
+  Builder.SetInsertPoint(PreHeaderBB);
+  LB = Builder.CreateLoad(LBPtr, "polly.par.LB");
+  UB = Builder.CreateLoad(UBPtr, "polly.par.UB");
 
   // Subtract one as the upper bound provided by openmp is a < comparison
   // whereas the codegenForSequential function creates a <= comparison.
-  UpperBound = Builder.CreateSub(UpperBound, ConstantInt::get(IntPtrTy, 1),
-                                 "omp.upperBoundAdjusted");
+  UB = Builder.CreateSub(UB, ConstantInt::get(LongType, 1),
+                         "polly.par.UBAdjusted");
 
   Builder.CreateBr(CheckNextBB);
   Builder.SetInsertPoint(--Builder.GetInsertPoint());
-  LoopInfo &LI = P->getAnalysis<LoopInfo>();
-  IV = createLoop(LowerBound, UpperBound, Stride, Builder, P, LI, DT, AfterBB,
+  IV = createLoop(LB, UB, Stride, Builder, P, LI, DT, AfterBB,
                   ICmpInst::ICMP_SLE, nullptr, true, /* UseGuard */ false);
 
   BasicBlock::iterator LoopBody = Builder.GetInsertPoint();
-  Builder.SetInsertPoint(AfterBB->begin());
 
-  // Add code to terminate this openmp subfunction.
+  // Add code to terminate this subfunction.
   Builder.SetInsertPoint(ExitBB);
-  createCallLoopEndNowait();
+  createCallCleanupThread();
   Builder.CreateRetVoid();
 
   Builder.SetInsertPoint(LoopBody);
-  *SubFunction = FN;
-
-  return IV;
-}
-
-Value *OMPGenerator::createParallelLoop(Value *LowerBound, Value *UpperBound,
-                                        Value *Stride,
-                                        SetVector<Value *> &Values,
-                                        ValueToValueMapTy &Map,
-                                        BasicBlock::iterator *LoopBody) {
-  Value *Struct, *IV, *SubfunctionParam, *NumberOfThreads;
-  Function *SubFunction;
-
-  Struct = loadValuesIntoStruct(Values);
-
-  BasicBlock::iterator PrevInsertPoint = Builder.GetInsertPoint();
-  IV = createSubfunction(Stride, Struct, Values, Map, &SubFunction);
-  *LoopBody = Builder.GetInsertPoint();
-  Builder.SetInsertPoint(PrevInsertPoint);
-
-  // Create call for GOMP_parallel_loop_runtime_start.
-  SubfunctionParam =
-      Builder.CreateBitCast(Struct, Builder.getInt8PtrTy(), "omp_data");
-
-  NumberOfThreads = Builder.getInt32(0);
-
-  // Add one as the upper bound provided by openmp is a < comparison
-  // whereas the codegenForSequential function creates a <= comparison.
-  UpperBound =
-      Builder.CreateAdd(UpperBound, ConstantInt::get(getIntPtrTy(), 1));
-
-  createCallParallelLoopStart(SubFunction, SubfunctionParam, NumberOfThreads,
-                              LowerBound, UpperBound, Stride);
-  Builder.CreateCall(SubFunction, SubfunctionParam);
-  createCallParallelEnd();
+  *SubFnPtr = SubFn;
 
   return IV;
 }

Modified: polly/trunk/test/Cloog/CodeGen/OpenMP/20120330-argument-use.ll
URL: http://llvm.org/viewvc/llvm-project/polly/trunk/test/Cloog/CodeGen/OpenMP/20120330-argument-use.ll?rev=219003&r1=219002&r2=219003&view=diff
==============================================================================
--- polly/trunk/test/Cloog/CodeGen/OpenMP/20120330-argument-use.ll (original)
+++ polly/trunk/test/Cloog/CodeGen/OpenMP/20120330-argument-use.ll Fri Oct  3 14:10:13 2014
@@ -1,32 +1,8 @@
 ; RUN: opt %loadPolly -basicaa -polly-codegen -enable-polly-openmp < %s -S | FileCheck %s
-
-;/*
-; * =============================================================================
-; *
-; *       Filename:  20120330-argument-use.c
-; *
-; *    Description:  Polly OpenMP test case
-; *
-; *                  Test if the OpenMP subfunction uses the argument copy in
-; *                  the OpenMP struct not the original one only available in
-; *                  the original function.
-; *
-; *                  Run with -polly-codegen -enable-polly-openmp
-; *
-; *         Author:  Johannes Doerfert johannes at jdoerfert.de
-; *
-; *        Created:  2012-03-30
-; *       Modified:  2012-03-30
-; *
-; * =============================================================================
-; */
 ;
 ;void f(int * restrict A, int * restrict B, int n) {
-;  int i;
-;
-;  for (i = 0; i < n; i++) {
+;  for (int i = 0; i < n; i++)
 ;    A[i] = B[i] * 2;
-;  }
 ;}
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
@@ -58,11 +34,11 @@ for.end:
   ret void
 }
 
-; CHECK: %omp.userContext1 = bitcast i8* %omp.userContext to { i32, i32*, i32* }*
-; CHECK: %0 = getelementptr inbounds { i32, i32*, i32* }* %omp.userContext1, i32 0, i32 0
+; CHECK: %polly.par.userContext[[NO:[0-9]*]] = bitcast i8* %polly.par.userContext to { i32, i32*, i32* }*
+; CHECK: %0 = getelementptr inbounds { i32, i32*, i32* }* %polly.par.userContext[[NO]], i32 0, i32 0
 ; CHECK: %1 = load i32* %0
-; CHECK: %2 = getelementptr inbounds { i32, i32*, i32* }* %omp.userContext1, i32 0, i32 1
+; CHECK: %2 = getelementptr inbounds { i32, i32*, i32* }* %polly.par.userContext[[NO]], i32 0, i32 1
 ; CHECK: %3 = load i32** %2
-; CHECK: %4 = getelementptr inbounds { i32, i32*, i32* }* %omp.userContext1, i32 0, i32 2
+; CHECK: %4 = getelementptr inbounds { i32, i32*, i32* }* %polly.par.userContext[[NO]], i32 0, i32 2
 ; CHECK: %5 = load i32** %4
 

Modified: polly/trunk/test/Cloog/CodeGen/OpenMP/clastvar_after_parallel_loop.ll
URL: http://llvm.org/viewvc/llvm-project/polly/trunk/test/Cloog/CodeGen/OpenMP/clastvar_after_parallel_loop.ll?rev=219003&r1=219002&r2=219003&view=diff
==============================================================================
--- polly/trunk/test/Cloog/CodeGen/OpenMP/clastvar_after_parallel_loop.ll (original)
+++ polly/trunk/test/Cloog/CodeGen/OpenMP/clastvar_after_parallel_loop.ll Fri Oct  3 14:10:13 2014
@@ -53,4 +53,4 @@ end:
 ; CLOOG:   Stmt_for_end(c2);
 ; CLOOG: }
 
-; CHECK: @f.omp_subfn
+; CHECK: @f.polly.subfn

Modified: polly/trunk/test/Cloog/CodeGen/OpenMP/copy_in_argument.ll
URL: http://llvm.org/viewvc/llvm-project/polly/trunk/test/Cloog/CodeGen/OpenMP/copy_in_argument.ll?rev=219003&r1=219002&r2=219003&view=diff
==============================================================================
--- polly/trunk/test/Cloog/CodeGen/OpenMP/copy_in_argument.ll (original)
+++ polly/trunk/test/Cloog/CodeGen/OpenMP/copy_in_argument.ll Fri Oct  3 14:10:13 2014
@@ -31,4 +31,4 @@ for.end:
   ret void
 }
 
-; CHECK: %omp.userContext = alloca { float }
+; CHECK: %polly.par.userContext = alloca { float }

Modified: polly/trunk/test/Cloog/CodeGen/OpenMP/copy_in_temporary.ll
URL: http://llvm.org/viewvc/llvm-project/polly/trunk/test/Cloog/CodeGen/OpenMP/copy_in_temporary.ll?rev=219003&r1=219002&r2=219003&view=diff
==============================================================================
--- polly/trunk/test/Cloog/CodeGen/OpenMP/copy_in_temporary.ll (original)
+++ polly/trunk/test/Cloog/CodeGen/OpenMP/copy_in_temporary.ll Fri Oct  3 14:10:13 2014
@@ -32,4 +32,4 @@ for.end:
   ret void
 }
 
-; CHECK: %omp.userContext = alloca { float }
+; CHECK: %polly.par.userContext = alloca { float }

Modified: polly/trunk/test/Cloog/CodeGen/OpenMP/extract_memref.ll
URL: http://llvm.org/viewvc/llvm-project/polly/trunk/test/Cloog/CodeGen/OpenMP/extract_memref.ll?rev=219003&r1=219002&r2=219003&view=diff
==============================================================================
--- polly/trunk/test/Cloog/CodeGen/OpenMP/extract_memref.ll (original)
+++ polly/trunk/test/Cloog/CodeGen/OpenMP/extract_memref.ll Fri Oct  3 14:10:13 2014
@@ -49,9 +49,8 @@ entry:
   call void @foo()
   ret i32 0
 }
-; CHECK: getelementptr inbounds { [10 x float]* }* %omp.userContext, i32 0, i32 0
-; CHECK: store [10 x float]* %A, [10 x float]** %0
-; CHECK: %omp_data = bitcast { [10 x float]* }* %omp.userContext to i8*
-; CHECK: inbounds { [10 x float]* }* %omp.userContext1, i32 0, i32 0
+; CHECK: %[[V:[._a-zA-Z0-9]+]] = getelementptr inbounds { [10 x float]* }* %polly.par.userContext, i32 0, i32 0
+; CHECK: store [10 x float]* %A, [10 x float]** %[[V]]
+; CHECK: inbounds { [10 x float]* }* %polly.par.userContext{{[0-9]*}}, i32 0, i32 0
 ; CHECK: load [10 x float]**
 

Modified: polly/trunk/test/Cloog/CodeGen/OpenMP/param_referenced_in_stmt.ll
URL: http://llvm.org/viewvc/llvm-project/polly/trunk/test/Cloog/CodeGen/OpenMP/param_referenced_in_stmt.ll?rev=219003&r1=219002&r2=219003&view=diff
==============================================================================
--- polly/trunk/test/Cloog/CodeGen/OpenMP/param_referenced_in_stmt.ll (original)
+++ polly/trunk/test/Cloog/CodeGen/OpenMP/param_referenced_in_stmt.ll Fri Oct  3 14:10:13 2014
@@ -1,8 +1,5 @@
 ; RUN: opt %loadPolly -polly-codegen < %s -enable-polly-openmp -S | FileCheck %s
-target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
-target triple = "x86_64-unknown-linux-gnu"
-
-
+;
 ; This test case implements the following code:
 ;
 ; for (i = 0; i < 1024; i++)
@@ -10,6 +7,7 @@ target triple = "x86_64-unknown-linux-gn
 ;
 ; The problem is that 'param' is not references in any subscript of loop
 ; bound, but it must still be forwarded to the OpenMP subfunction.
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
 
 define void @foo(double %param, [1024 x double]* %A) {
 entry:
@@ -35,4 +33,4 @@ for.end:
   ret void
 }
 
-; CHECK: omp_subfn
+; CHECK: @foo.polly.subfn

Modified: polly/trunk/test/Cloog/CodeGen/OpenMP/simple_nested_loop.ll
URL: http://llvm.org/viewvc/llvm-project/polly/trunk/test/Cloog/CodeGen/OpenMP/simple_nested_loop.ll?rev=219003&r1=219002&r2=219003&view=diff
==============================================================================
--- polly/trunk/test/Cloog/CodeGen/OpenMP/simple_nested_loop.ll (original)
+++ polly/trunk/test/Cloog/CodeGen/OpenMP/simple_nested_loop.ll Fri Oct  3 14:10:13 2014
@@ -78,15 +78,15 @@ entry:
 
 declare void @llvm.memset.p0i8.i32(i8* nocapture, i8, i32, i32, i1) nounwind
 
-; CHECK: %omp.userContext = alloca { i32 }
-; CHECK: getelementptr inbounds { i32 }* %omp.userContext, i32 0, i32 0
-; CHECK: store i32 %polly.indvar, i32* %0
-; CHECK: %omp_data = bitcast { i32 }* %omp.userContext to i8*
-; CHECK: call void @GOMP_parallel_loop_runtime_start(void (i8*)* @loop_openmp.omp_subfn, i8* %omp_data, i32 0, i32 0, i32 10, i32 1)
-; CHECK: call void @loop_openmp.omp_subfn(i8* %omp_data)
+; CHECK: %polly.par.userContext = alloca { i32 }
+; CHECK: %[[NO:[._a-zA-Z0-9]*]] = getelementptr inbounds { i32 }* %polly.par.userContext, i32 0, i32 0
+; CHECK: store i32 %polly.indvar, i32* %[[NO]]
+; CHECK: %[[DATA:[._a-zA-Z0-9]*]] = bitcast { i32 }* %polly.par.userContext to i8*
+; CHECK: call void @GOMP_parallel_loop_runtime_start(void (i8*)* @loop_openmp.polly.subfn, i8* %[[DATA]], i32 0, i32 0, i32 10, i32 1)
+; CHECK: call void @loop_openmp.polly.subfn(i8* %[[DATA]])
 ; CHECK: call void @GOMP_parallel_end()
 
 ; Verify the new subfunction is annotated such that SCoP detection will skip it.
-; CHECK: @loop_openmp.omp_subfn({{.*}}) [[ATTR:#[0-9]+]]
+; CHECK: @loop_openmp.polly.subfn({{.*}}) [[ATTR:#[0-9]+]]
 ; CHECK: attributes [[ATTR]] = {{{[^\}]*}}polly.skip.fn{{[^\}]*}}}
 

Modified: polly/trunk/test/Cloog/CodeGen/OpenMP/structnames.ll
URL: http://llvm.org/viewvc/llvm-project/polly/trunk/test/Cloog/CodeGen/OpenMP/structnames.ll?rev=219003&r1=219002&r2=219003&view=diff
==============================================================================
--- polly/trunk/test/Cloog/CodeGen/OpenMP/structnames.ll (original)
+++ polly/trunk/test/Cloog/CodeGen/OpenMP/structnames.ll Fri Oct  3 14:10:13 2014
@@ -100,6 +100,6 @@ entry:
 
 declare void @llvm.memset.p0i8.i32(i8* nocapture, i8, i32, i32, i1) nounwind
 
-; CHECK: %omp.userContext = alloca {}
-; CHECK: %omp.userContext1 = alloca { i32 }
+; CHECK-DAG: %polly.par.userContext{{[0-9]*}} = alloca {}
+; CHECK-DAG: %polly.par.userContext{{[0-9]*}} = alloca { i32 }
 





More information about the llvm-commits mailing list