[polly] r237450 - Drop unused PTX generator file

Tobias Grosser tobias at grosser.es
Fri May 15 08:41:14 PDT 2015


Author: grosser
Date: Fri May 15 10:41:14 2015
New Revision: 237450

URL: http://llvm.org/viewvc/llvm-project?rev=237450&view=rev
Log:
Drop unused PTX generator file

This code has been part of Polly's GPGPU backend, which has been remove together
with the code generation backend. Development now continues in an out-of-tree
branch.

Removed:
    polly/trunk/include/polly/CodeGen/PTXGenerator.h
    polly/trunk/lib/CodeGen/PTXGenerator.cpp
Modified:
    polly/trunk/lib/CMakeLists.txt
    polly/trunk/lib/Makefile

Removed: polly/trunk/include/polly/CodeGen/PTXGenerator.h
URL: http://llvm.org/viewvc/llvm-project/polly/trunk/include/polly/CodeGen/PTXGenerator.h?rev=237449&view=auto
==============================================================================
--- polly/trunk/include/polly/CodeGen/PTXGenerator.h (original)
+++ polly/trunk/include/polly/CodeGen/PTXGenerator.h (removed)
@@ -1,184 +0,0 @@
-//===- PTXGenerator.h - IR helper to create GPGPU LLVM-IR -------*- C++ -*-===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file contains functions to create GPGPU parallel loops as LLVM-IR.
-//
-//===----------------------------------------------------------------------===//
-#ifndef POLLY_CODEGEN_PTXGENERATOR_H
-#define POLLY_CODEGEN_PTXGENERATOR_H
-
-#include "polly/Config/config.h"
-
-#ifdef GPU_CODEGEN
-#include "polly/CodeGen/IRBuilder.h"
-#include "llvm/ADT/SetVector.h"
-
-#include <map>
-
-namespace llvm {
-class Value;
-class Pass;
-class BasicBlock;
-}
-
-namespace polly {
-using namespace llvm;
-
-class PTXGenerator {
-public:
-  typedef std::map<Value *, Value *> ValueToValueMapTy;
-
-  PTXGenerator(PollyIRBuilder &Builder, Pass *P, const std::string &Triple);
-
-  /// @brief Create a GPGPU parallel loop.
-  ///
-  /// @param UsedValues   A set of LLVM-IR Values that should be available to
-  ///                     the new loop body.
-  /// @param OriginalIVS  The new values of the original induction variables.
-  /// @param VMap         This map is filled by createParallelLoop(). It
-  ///                     maps the values in UsedValues to Values through which
-  ///                     their content is available within the loop body.
-  /// @param LoopBody     A pointer to an iterator that is set to point to the
-  ///                     body of the created loop. It should be used to insert
-  ///                     instructions that form the actual loop body.
-  void startGeneration(SetVector<Value *> &UsedValues,
-                       SetVector<Value *> &OriginalIVS, ValueToValueMapTy &VMap,
-                       BasicBlock::iterator *LoopBody);
-
-  /// @brief Execute the post-operations to build a GPGPU parallel loop.
-  ///
-  void finishGeneration(Function *SubFunction);
-
-  /// @brief Set the parameters for launching PTX kernel.
-  ///
-  /// @param GridW    A value of the width of a GPU grid.
-  /// @param GridH    A value of the height of a GPU grid.
-  /// @param BlockW   A value of the width of a GPU block.
-  /// @param BlockH   A value of the height of a GPU block.
-  void setLaunchingParameters(int GridW, int GridH, int BlockW, int BlockH) {
-    GridWidth = GridW;
-    GridHeight = GridH;
-    BlockWidth = BlockW;
-    BlockHeight = BlockH;
-  }
-
-  /// @brief Set the size of the output array.
-  ///
-  /// This size is used to allocate memory on the device and the host.
-  ///
-  /// @param Bytes        Output array size in bytes.
-  void setOutputBytes(unsigned Bytes) { OutputBytes = Bytes; }
-
-private:
-  PollyIRBuilder &Builder;
-  Pass *P;
-
-  /// @brief The target triple of the device.
-  const std::string &GPUTriple;
-
-  ///@brief Parameters used for launching PTX kernel.
-  int GridWidth, GridHeight, BlockWidth, BlockHeight;
-
-  /// @brief Size of the output array in bytes.
-  unsigned OutputBytes;
-
-  /// @brief Polly's GPU data types.
-  StructType *ContextTy, *ModuleTy, *KernelTy, *DeviceTy, *DevDataTy, *EventTy;
-
-  void InitializeGPUDataTypes();
-  IntegerType *getInt64Type();           // i64
-  PointerType *getI8PtrType();           // char *
-  PointerType *getPtrI8PtrType();        // char **
-  PointerType *getFloatPtrType();        // float *
-  PointerType *getGPUContextPtrType();   // %struct.PollyGPUContextT *
-  PointerType *getGPUModulePtrType();    // %struct.PollyGPUModuleT *
-  PointerType *getGPUDevicePtrType();    // %struct.PollyGPUDeviceT *
-  PointerType *getPtrGPUDevicePtrType(); // %struct.PollyGPUDevicePtrT *
-  PointerType *getGPUFunctionPtrType();  // %struct.PollyGPUFunctionT *
-  PointerType *getGPUEventPtrType();     // %struct.PollyGPUEventT *
-
-  Module *getModule();
-
-  /// @brief Create the kernel string containing LLVM IR.
-  ///
-  /// @param SubFunction  A pointer to the device code function.
-  /// @return             A global string variable containing the LLVM IR codes
-  //                      of the SubFunction.
-  Value *createPTXKernelFunction(Function *SubFunction);
-
-  /// @brief Get the entry name of the device kernel function.
-  ///
-  /// @param SubFunction  A pointer to the device code function.
-  /// @return             A global string variable containing the entry name of
-  ///                     the SubFunction.
-  Value *getPTXKernelEntryName(Function *SubFunction);
-
-  void createCallInitDevice(Value *Context, Value *Device);
-  void createCallGetPTXModule(Value *Buffer, Value *Module);
-  void createCallGetPTXKernelEntry(Value *Entry, Value *Module, Value *Kernel);
-  void createCallAllocateMemoryForHostAndDevice(Value *HostData,
-                                                Value *DeviceData, Value *Size);
-  void createCallCopyFromHostToDevice(Value *DeviceData, Value *HostData,
-                                      Value *Size);
-  void createCallCopyFromDeviceToHost(Value *HostData, Value *DeviceData,
-                                      Value *Size);
-  void createCallSetKernelParameters(Value *Kernel, Value *BlockWidth,
-                                     Value *BlockHeight, Value *DeviceData);
-  void createCallLaunchKernel(Value *Kernel, Value *GridWidth,
-                              Value *GridHeight);
-  void createCallStartTimerByCudaEvent(Value *StartEvent, Value *StopEvent);
-  void createCallStopTimerByCudaEvent(Value *StartEvent, Value *StopEvent,
-                                      Value *Timer);
-  void createCallCleanupGPGPUResources(Value *HostData, Value *DeviceData,
-                                       Value *Module, Value *Context,
-                                       Value *Kernel);
-
-  /// @brief Create the CUDA subfunction.
-  ///
-  /// @param UsedValues   A set of LLVM-IR Values that should be available to
-  ///                     the new loop body.
-  /// @param VMap         This map that is filled by createSubfunction(). It
-  ///                     maps the values in UsedValues to Values through which
-  ///                     their content is available within the loop body.
-  /// @param OriginalIVS  The new values of the original induction variables.
-  /// @param SubFunction  The newly created SubFunction is returned here.
-  void createSubfunction(SetVector<Value *> &UsedValues,
-                         SetVector<Value *> &OriginalIVS,
-                         ValueToValueMapTy &VMap, Function **SubFunction);
-
-  /// @brief Create the definition of the CUDA subfunction.
-  ///
-  /// @param NumArgs      The number of parameters of this subfunction. This is
-  ///                     usually set to the number of memory accesses which
-  ///                     will be copied from host to device.
-  Function *createSubfunctionDefinition(int NumArgs);
-
-  /// @brief Get the Value of CUDA block width.
-  Value *getCUDABlockWidth();
-
-  /// @brief Get the Value of CUDA block height.
-  Value *getCUDABlockHeight();
-
-  /// @brief Get the Value of CUDA Gird width.
-  Value *getCUDAGridWidth();
-
-  /// @brief Get the Value of CUDA grid height.
-  Value *getCUDAGridHeight();
-
-  /// @brief Get the Value of the bytes of the output array.
-  Value *getOutputArraySizeInBytes();
-
-  /// @brief Erase the ptx-related subfunctions and declarations.
-  ///
-  /// @param SubFunction  A pointer to the device code function.
-  void eraseUnusedFunctions(Function *SubFunction);
-};
-} // end namespace polly
-#endif /* GPU_CODEGEN */
-#endif /* POLLY_CODEGEN_PTXGENERATOR_H */

Modified: polly/trunk/lib/CMakeLists.txt
URL: http://llvm.org/viewvc/llvm-project/polly/trunk/lib/CMakeLists.txt?rev=237450&r1=237449&r2=237450&view=diff
==============================================================================
--- polly/trunk/lib/CMakeLists.txt (original)
+++ polly/trunk/lib/CMakeLists.txt Fri May 15 10:41:14 2015
@@ -13,8 +13,7 @@ set(ISL_CODEGEN_FILES
     CodeGen/CodeGeneration.cpp)
 
 if (GPU_CODEGEN)
-  set (GPGPU_CODEGEN_FILES
-       CodeGen/PTXGenerator.cpp)
+  set (GPGPU_CODEGEN_FILES)
 endif (GPU_CODEGEN)
 
 set (ISL_FILES

Removed: polly/trunk/lib/CodeGen/PTXGenerator.cpp
URL: http://llvm.org/viewvc/llvm-project/polly/trunk/lib/CodeGen/PTXGenerator.cpp?rev=237449&view=auto
==============================================================================
--- polly/trunk/lib/CodeGen/PTXGenerator.cpp (original)
+++ polly/trunk/lib/CodeGen/PTXGenerator.cpp (removed)
@@ -1,711 +0,0 @@
-//===------ PTXGenerator.cpp -  IR helper to create loops -----------------===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file contains functions to create GPU parallel codes as LLVM-IR.
-//
-//===----------------------------------------------------------------------===//
-
-#include "polly/CodeGen/PTXGenerator.h"
-
-#ifdef GPU_CODEGEN
-#include "polly/ScopDetection.h"
-#include "polly/ScopInfo.h"
-
-#include "llvm/IR/LegacyPassManager.h"
-#include "llvm/ADT/SetVector.h"
-#include "llvm/ADT/SmallString.h"
-#include "llvm/Analysis/TargetLibraryInfo.h"
-#include "llvm/Analysis/TargetTransformInfo.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Support/FormattedStream.h"
-#include "llvm/Support/TargetRegistry.h"
-#include "llvm/Support/TargetSelect.h"
-#include "llvm/IR/DataLayout.h"
-#include "llvm/IR/Dominators.h"
-#include "llvm/IR/Intrinsics.h"
-#include "llvm/IR/Module.h"
-#include "llvm/Target/TargetMachine.h"
-#include "llvm/Transforms/Utils/BasicBlockUtils.h"
-#include "llvm/Transforms/Utils/Cloning.h"
-
-using namespace llvm;
-using namespace polly;
-
-PTXGenerator::PTXGenerator(PollyIRBuilder &Builder, Pass *P,
-                           const std::string &Triple)
-    : Builder(Builder), P(P), GPUTriple(Triple), GridWidth(1), GridHeight(1),
-      BlockWidth(1), BlockHeight(1), OutputBytes(0) {
-  InitializeGPUDataTypes();
-}
-
-Module *PTXGenerator::getModule() {
-  return Builder.GetInsertBlock()->getParent()->getParent();
-}
-
-Function *PTXGenerator::createSubfunctionDefinition(int NumArgs) {
-  assert(NumArgs == 1 && "we support only one array access now.");
-
-  Module *M = getModule();
-  Function *F = Builder.GetInsertBlock()->getParent();
-  std::vector<Type *> Arguments;
-  for (int i = 0; i < NumArgs; i++)
-    Arguments.push_back(Builder.getInt8PtrTy());
-  FunctionType *FT = FunctionType::get(Builder.getVoidTy(), Arguments, false);
-  Function *FN = Function::Create(FT, Function::InternalLinkage,
-                                  F->getName() + "_ptx_subfn", M);
-  FN->setCallingConv(CallingConv::PTX_Kernel);
-
-  // Do not run any optimization pass on the new function.
-  P->getAnalysis<polly::ScopDetection>().markFunctionAsInvalid(FN);
-
-  for (Function::arg_iterator AI = FN->arg_begin(); AI != FN->arg_end(); ++AI)
-    AI->setName("ptx.Array");
-
-  return FN;
-}
-
-void PTXGenerator::createSubfunction(SetVector<Value *> &UsedValues,
-                                     SetVector<Value *> &OriginalIVS,
-                                     PTXGenerator::ValueToValueMapTy &VMap,
-                                     Function **SubFunction) {
-  Function *FN = createSubfunctionDefinition(UsedValues.size());
-  Module *M = getModule();
-  LLVMContext &Context = FN->getContext();
-  IntegerType *Ty = Builder.getInt64Ty();
-
-  // Store the previous basic block.
-  BasicBlock *PrevBB = Builder.GetInsertBlock();
-
-  // Create basic blocks.
-  BasicBlock *HeaderBB = BasicBlock::Create(Context, "ptx.setup", FN);
-  BasicBlock *ExitBB = BasicBlock::Create(Context, "ptx.exit", FN);
-  BasicBlock *BodyBB = BasicBlock::Create(Context, "ptx.loop_body", FN);
-
-  DominatorTree &DT = P->getAnalysis<DominatorTreeWrapperPass>().getDomTree();
-  DT.addNewBlock(HeaderBB, PrevBB);
-  DT.addNewBlock(ExitBB, HeaderBB);
-  DT.addNewBlock(BodyBB, HeaderBB);
-
-  Builder.SetInsertPoint(HeaderBB);
-
-  // Insert VMap items with maps of array base address on the host to base
-  // address on the device.
-  Function::arg_iterator AI = FN->arg_begin();
-  for (unsigned j = 0; j < UsedValues.size(); j++) {
-    Value *BaseAddr = UsedValues[j];
-    Type *ArrayTy = BaseAddr->getType();
-    Value *Param = Builder.CreateBitCast(AI, ArrayTy);
-    VMap.insert(std::make_pair(BaseAddr, Param));
-    AI++;
-  }
-
-  // FIXME: These intrinsics should be inserted on-demand. However, we insert
-  // them all currently for simplicity.
-  Function *GetNctaidX =
-      Intrinsic::getDeclaration(M, Intrinsic::ptx_read_nctaid_x);
-  Function *GetNctaidY =
-      Intrinsic::getDeclaration(M, Intrinsic::ptx_read_nctaid_y);
-  Function *GetCtaidX =
-      Intrinsic::getDeclaration(M, Intrinsic::ptx_read_ctaid_x);
-  Function *GetCtaidY =
-      Intrinsic::getDeclaration(M, Intrinsic::ptx_read_ctaid_y);
-  Function *GetNtidX = Intrinsic::getDeclaration(M, Intrinsic::ptx_read_ntid_x);
-  Function *GetNtidY = Intrinsic::getDeclaration(M, Intrinsic::ptx_read_ntid_y);
-  Function *GetTidX = Intrinsic::getDeclaration(M, Intrinsic::ptx_read_tid_x);
-  Function *GetTidY = Intrinsic::getDeclaration(M, Intrinsic::ptx_read_tid_y);
-
-  Value *GridWidth = Builder.CreateCall(GetNctaidX);
-  GridWidth = Builder.CreateIntCast(GridWidth, Ty, false);
-  Value *GridHeight = Builder.CreateCall(GetNctaidY);
-  GridHeight = Builder.CreateIntCast(GridHeight, Ty, false);
-  Value *BlockWidth = Builder.CreateCall(GetNtidX);
-  BlockWidth = Builder.CreateIntCast(BlockWidth, Ty, false);
-  Value *BlockHeight = Builder.CreateCall(GetNtidY);
-  BlockHeight = Builder.CreateIntCast(BlockHeight, Ty, false);
-  Value *BIDx = Builder.CreateCall(GetCtaidX);
-  BIDx = Builder.CreateIntCast(BIDx, Ty, false);
-  Value *BIDy = Builder.CreateCall(GetCtaidY);
-  BIDy = Builder.CreateIntCast(BIDy, Ty, false);
-  Value *TIDx = Builder.CreateCall(GetTidX);
-  TIDx = Builder.CreateIntCast(TIDx, Ty, false);
-  Value *TIDy = Builder.CreateCall(GetTidY);
-  TIDy = Builder.CreateIntCast(TIDy, Ty, false);
-
-  Builder.CreateBr(BodyBB);
-  Builder.SetInsertPoint(BodyBB);
-
-  unsigned NumDims = OriginalIVS.size();
-  std::vector<Value *> Substitutions;
-  Value *BlockID, *ThreadID;
-  switch (NumDims) {
-  case 1: {
-    Value *BlockSize =
-        Builder.CreateMul(BlockWidth, BlockHeight, "p_gpu_blocksize");
-    BlockID = Builder.CreateMul(BIDy, GridWidth, "p_gpu_index_i");
-    BlockID = Builder.CreateAdd(BlockID, BIDx);
-    BlockID = Builder.CreateMul(BlockID, BlockSize);
-    ThreadID = Builder.CreateMul(TIDy, BlockWidth, "p_gpu_index_j");
-    ThreadID = Builder.CreateAdd(ThreadID, TIDx);
-    ThreadID = Builder.CreateAdd(ThreadID, BlockID);
-    Substitutions.push_back(ThreadID);
-    break;
-  }
-  case 2: {
-    BlockID = Builder.CreateMul(BIDy, GridWidth, "p_gpu_index_i");
-    BlockID = Builder.CreateAdd(BlockID, BIDx);
-    Substitutions.push_back(BlockID);
-    ThreadID = Builder.CreateMul(TIDy, BlockWidth, "p_gpu_index_j");
-    ThreadID = Builder.CreateAdd(ThreadID, TIDx);
-    Substitutions.push_back(ThreadID);
-    break;
-  }
-  case 3: {
-    BlockID = Builder.CreateMul(BIDy, GridWidth, "p_gpu_index_i");
-    BlockID = Builder.CreateAdd(BlockID, BIDx);
-    Substitutions.push_back(BlockID);
-    Substitutions.push_back(TIDy);
-    Substitutions.push_back(TIDx);
-    break;
-  }
-  case 4: {
-    Substitutions.push_back(BIDy);
-    Substitutions.push_back(BIDx);
-    Substitutions.push_back(TIDy);
-    Substitutions.push_back(TIDx);
-    break;
-  }
-  default:
-    assert(true &&
-           "We cannot transform parallel loops whose depth is larger than 4.");
-    return;
-  }
-
-  assert(OriginalIVS.size() == Substitutions.size() &&
-         "The size of IVS should be equal to the size of substitutions.");
-  for (unsigned i = 0; i < OriginalIVS.size(); ++i) {
-    VMap.insert(std::make_pair(OriginalIVS[i], Substitutions[i]));
-  }
-
-  Builder.CreateBr(ExitBB);
-  Builder.SetInsertPoint(--Builder.GetInsertPoint());
-  BasicBlock::iterator LoopBody = Builder.GetInsertPoint();
-
-  // Add the termination of the ptx-device subfunction.
-  Builder.SetInsertPoint(ExitBB);
-  Builder.CreateRetVoid();
-
-  Builder.SetInsertPoint(LoopBody);
-  *SubFunction = FN;
-}
-
-void PTXGenerator::startGeneration(SetVector<Value *> &UsedValues,
-                                   SetVector<Value *> &OriginalIVS,
-                                   ValueToValueMapTy &VMap,
-                                   BasicBlock::iterator *LoopBody) {
-  Function *SubFunction;
-  BasicBlock::iterator PrevInsertPoint = Builder.GetInsertPoint();
-  createSubfunction(UsedValues, OriginalIVS, VMap, &SubFunction);
-  *LoopBody = Builder.GetInsertPoint();
-  Builder.SetInsertPoint(PrevInsertPoint);
-}
-
-IntegerType *PTXGenerator::getInt64Type() { return Builder.getInt64Ty(); }
-
-PointerType *PTXGenerator::getI8PtrType() {
-  return PointerType::getUnqual(Builder.getInt8Ty());
-}
-
-PointerType *PTXGenerator::getPtrI8PtrType() {
-  return PointerType::getUnqual(getI8PtrType());
-}
-
-PointerType *PTXGenerator::getFloatPtrType() {
-  return llvm::Type::getFloatPtrTy(getModule()->getContext());
-}
-
-PointerType *PTXGenerator::getGPUContextPtrType() {
-  return PointerType::getUnqual(ContextTy);
-}
-
-PointerType *PTXGenerator::getGPUModulePtrType() {
-  return PointerType::getUnqual(ModuleTy);
-}
-
-PointerType *PTXGenerator::getGPUDevicePtrType() {
-  return PointerType::getUnqual(DeviceTy);
-}
-
-PointerType *PTXGenerator::getPtrGPUDevicePtrType() {
-  return PointerType::getUnqual(DevDataTy);
-}
-
-PointerType *PTXGenerator::getGPUFunctionPtrType() {
-  return PointerType::getUnqual(KernelTy);
-}
-
-PointerType *PTXGenerator::getGPUEventPtrType() {
-  return PointerType::getUnqual(EventTy);
-}
-
-void PTXGenerator::InitializeGPUDataTypes() {
-  LLVMContext &Context = getModule()->getContext();
-
-  ContextTy = StructType::create(Context, "struct.PollyGPUContextT");
-  ModuleTy = StructType::create(Context, "struct.PollyGPUModuleT");
-  KernelTy = StructType::create(Context, "struct.PollyGPUFunctionT");
-  DeviceTy = StructType::create(Context, "struct.PollyGPUDeviceT");
-  DevDataTy = StructType::create(Context, "struct.PollyGPUDevicePtrT");
-  EventTy = StructType::create(Context, "struct.PollyGPUEventT");
-}
-
-void PTXGenerator::createCallInitDevice(Value *Context, Value *Device) {
-  const char *Name = "polly_initDevice";
-  Module *M = getModule();
-  Function *F = M->getFunction(Name);
-
-  // If F is not available, declare it.
-  if (!F) {
-    GlobalValue::LinkageTypes Linkage = Function::ExternalLinkage;
-    std::vector<Type *> Args;
-    Args.push_back(PointerType::getUnqual(getGPUContextPtrType()));
-    Args.push_back(PointerType::getUnqual(getGPUDevicePtrType()));
-    FunctionType *Ty = FunctionType::get(Builder.getVoidTy(), Args, false);
-    F = Function::Create(Ty, Linkage, Name, M);
-  }
-
-  Builder.CreateCall2(F, Context, Device);
-}
-
-void PTXGenerator::createCallGetPTXModule(Value *Buffer, Value *Module) {
-  const char *Name = "polly_getPTXModule";
-  llvm::Module *M = getModule();
-  Function *F = M->getFunction(Name);
-
-  // If F is not available, declare it.
-  if (!F) {
-    GlobalValue::LinkageTypes Linkage = Function::ExternalLinkage;
-    std::vector<Type *> Args;
-    Args.push_back(getI8PtrType());
-    Args.push_back(PointerType::getUnqual(getGPUModulePtrType()));
-    FunctionType *Ty = FunctionType::get(Builder.getVoidTy(), Args, false);
-    F = Function::Create(Ty, Linkage, Name, M);
-  }
-
-  Builder.CreateCall2(F, Buffer, Module);
-}
-
-void PTXGenerator::createCallGetPTXKernelEntry(Value *Entry, Value *Module,
-                                               Value *Kernel) {
-  const char *Name = "polly_getPTXKernelEntry";
-  llvm::Module *M = getModule();
-  Function *F = M->getFunction(Name);
-
-  // If F is not available, declare it.
-  if (!F) {
-    GlobalValue::LinkageTypes Linkage = Function::ExternalLinkage;
-    std::vector<Type *> Args;
-    Args.push_back(getI8PtrType());
-    Args.push_back(getGPUModulePtrType());
-    Args.push_back(PointerType::getUnqual(getGPUFunctionPtrType()));
-    FunctionType *Ty = FunctionType::get(Builder.getVoidTy(), Args, false);
-    F = Function::Create(Ty, Linkage, Name, M);
-  }
-
-  Builder.CreateCall3(F, Entry, Module, Kernel);
-}
-
-void PTXGenerator::createCallAllocateMemoryForHostAndDevice(Value *HostData,
-                                                            Value *DeviceData,
-                                                            Value *Size) {
-  const char *Name = "polly_allocateMemoryForHostAndDevice";
-  Module *M = getModule();
-  Function *F = M->getFunction(Name);
-
-  // If F is not available, declare it.
-  if (!F) {
-    GlobalValue::LinkageTypes Linkage = Function::ExternalLinkage;
-    std::vector<Type *> Args;
-    Args.push_back(getPtrI8PtrType());
-    Args.push_back(PointerType::getUnqual(getPtrGPUDevicePtrType()));
-    Args.push_back(getInt64Type());
-    FunctionType *Ty = FunctionType::get(Builder.getVoidTy(), Args, false);
-    F = Function::Create(Ty, Linkage, Name, M);
-  }
-
-  Builder.CreateCall3(F, HostData, DeviceData, Size);
-}
-
-void PTXGenerator::createCallCopyFromHostToDevice(Value *DeviceData,
-                                                  Value *HostData,
-                                                  Value *Size) {
-  const char *Name = "polly_copyFromHostToDevice";
-  Module *M = getModule();
-  Function *F = M->getFunction(Name);
-
-  // If F is not available, declare it.
-  if (!F) {
-    GlobalValue::LinkageTypes Linkage = Function::ExternalLinkage;
-    std::vector<Type *> Args;
-    Args.push_back(getPtrGPUDevicePtrType());
-    Args.push_back(getI8PtrType());
-    Args.push_back(getInt64Type());
-    FunctionType *Ty = FunctionType::get(Builder.getVoidTy(), Args, false);
-    F = Function::Create(Ty, Linkage, Name, M);
-  }
-
-  Builder.CreateCall3(F, DeviceData, HostData, Size);
-}
-
-void PTXGenerator::createCallCopyFromDeviceToHost(Value *HostData,
-                                                  Value *DeviceData,
-                                                  Value *Size) {
-  const char *Name = "polly_copyFromDeviceToHost";
-  Module *M = getModule();
-  Function *F = M->getFunction(Name);
-
-  // If F is not available, declare it.
-  if (!F) {
-    GlobalValue::LinkageTypes Linkage = Function::ExternalLinkage;
-    std::vector<Type *> Args;
-    Args.push_back(getI8PtrType());
-    Args.push_back(getPtrGPUDevicePtrType());
-    Args.push_back(getInt64Type());
-    FunctionType *Ty = FunctionType::get(Builder.getVoidTy(), Args, false);
-    F = Function::Create(Ty, Linkage, Name, M);
-  }
-
-  Builder.CreateCall3(F, HostData, DeviceData, Size);
-}
-
-void PTXGenerator::createCallSetKernelParameters(Value *Kernel,
-                                                 Value *BlockWidth,
-                                                 Value *BlockHeight,
-                                                 Value *DeviceData) {
-  const char *Name = "polly_setKernelParameters";
-  Module *M = getModule();
-  Function *F = M->getFunction(Name);
-
-  // If F is not available, declare it.
-  if (!F) {
-    GlobalValue::LinkageTypes Linkage = Function::ExternalLinkage;
-    std::vector<Type *> Args;
-    Args.push_back(getGPUFunctionPtrType());
-    Args.push_back(getInt64Type());
-    Args.push_back(getInt64Type());
-    Args.push_back(getPtrGPUDevicePtrType());
-    FunctionType *Ty = FunctionType::get(Builder.getVoidTy(), Args, false);
-    F = Function::Create(Ty, Linkage, Name, M);
-  }
-
-  Builder.CreateCall4(F, Kernel, BlockWidth, BlockHeight, DeviceData);
-}
-
-void PTXGenerator::createCallLaunchKernel(Value *Kernel, Value *GridWidth,
-                                          Value *GridHeight) {
-  const char *Name = "polly_launchKernel";
-  Module *M = getModule();
-  Function *F = M->getFunction(Name);
-
-  // If F is not available, declare it.
-  if (!F) {
-    GlobalValue::LinkageTypes Linkage = Function::ExternalLinkage;
-    std::vector<Type *> Args;
-    Args.push_back(getGPUFunctionPtrType());
-    Args.push_back(getInt64Type());
-    Args.push_back(getInt64Type());
-    FunctionType *Ty = FunctionType::get(Builder.getVoidTy(), Args, false);
-    F = Function::Create(Ty, Linkage, Name, M);
-  }
-
-  Builder.CreateCall3(F, Kernel, GridWidth, GridHeight);
-}
-
-void PTXGenerator::createCallStartTimerByCudaEvent(Value *StartEvent,
-                                                   Value *StopEvent) {
-  const char *Name = "polly_startTimerByCudaEvent";
-  Module *M = getModule();
-  Function *F = M->getFunction(Name);
-
-  // If F is not available, declare it.
-  if (!F) {
-    GlobalValue::LinkageTypes Linkage = Function::ExternalLinkage;
-    std::vector<Type *> Args;
-    Args.push_back(PointerType::getUnqual(getGPUEventPtrType()));
-    Args.push_back(PointerType::getUnqual(getGPUEventPtrType()));
-    FunctionType *Ty = FunctionType::get(Builder.getVoidTy(), Args, false);
-    F = Function::Create(Ty, Linkage, Name, M);
-  }
-
-  Builder.CreateCall2(F, StartEvent, StopEvent);
-}
-
-void PTXGenerator::createCallStopTimerByCudaEvent(Value *StartEvent,
-                                                  Value *StopEvent,
-                                                  Value *Timer) {
-  const char *Name = "polly_stopTimerByCudaEvent";
-  Module *M = getModule();
-  Function *F = M->getFunction(Name);
-
-  // If F is not available, declare it.
-  if (!F) {
-    GlobalValue::LinkageTypes Linkage = Function::ExternalLinkage;
-    std::vector<Type *> Args;
-    Args.push_back(getGPUEventPtrType());
-    Args.push_back(getGPUEventPtrType());
-    Args.push_back(getFloatPtrType());
-    FunctionType *Ty = FunctionType::get(Builder.getVoidTy(), Args, false);
-    F = Function::Create(Ty, Linkage, Name, M);
-  }
-
-  Builder.CreateCall3(F, StartEvent, StopEvent, Timer);
-}
-
-void PTXGenerator::createCallCleanupGPGPUResources(Value *HostData,
-                                                   Value *DeviceData,
-                                                   Value *Module,
-                                                   Value *Context,
-                                                   Value *Kernel) {
-  const char *Name = "polly_cleanupGPGPUResources";
-  llvm::Module *M = getModule();
-  Function *F = M->getFunction(Name);
-
-  // If F is not available, declare it.
-  if (!F) {
-    GlobalValue::LinkageTypes Linkage = Function::ExternalLinkage;
-    std::vector<Type *> Args;
-    Args.push_back(getI8PtrType());
-    Args.push_back(getPtrGPUDevicePtrType());
-    Args.push_back(getGPUModulePtrType());
-    Args.push_back(getGPUContextPtrType());
-    Args.push_back(getGPUFunctionPtrType());
-    FunctionType *Ty = FunctionType::get(Builder.getVoidTy(), Args, false);
-    F = Function::Create(Ty, Linkage, Name, M);
-  }
-
-  Builder.CreateCall5(F, HostData, DeviceData, Module, Context, Kernel);
-}
-
-Value *PTXGenerator::getCUDAGridWidth() {
-  return ConstantInt::get(getInt64Type(), GridWidth);
-}
-
-Value *PTXGenerator::getCUDAGridHeight() {
-  return ConstantInt::get(getInt64Type(), GridHeight);
-}
-
-Value *PTXGenerator::getCUDABlockWidth() {
-  return ConstantInt::get(getInt64Type(), BlockWidth);
-}
-
-Value *PTXGenerator::getCUDABlockHeight() {
-  return ConstantInt::get(getInt64Type(), BlockHeight);
-}
-
-Value *PTXGenerator::getOutputArraySizeInBytes() {
-  return ConstantInt::get(getInt64Type(), OutputBytes);
-}
-
-static Module *extractPTXFunctionsFromModule(const Module *M,
-                                             const StringRef &Triple) {
-  llvm::ValueToValueMapTy VMap;
-  Module *New = new Module("TempGPUModule", M->getContext());
-  New->setTargetTriple(Triple::normalize(Triple));
-
-  // Loop over the functions in the module, making external functions as before
-  for (Module::const_iterator I = M->begin(), E = M->end(); I != E; ++I) {
-    if (!I->isDeclaration() &&
-        (I->getCallingConv() == CallingConv::PTX_Device ||
-         I->getCallingConv() == CallingConv::PTX_Kernel)) {
-      Function *NF =
-          Function::Create(cast<FunctionType>(I->getType()->getElementType()),
-                           I->getLinkage(), I->getName(), New);
-      NF->copyAttributesFrom(I);
-      VMap[I] = NF;
-
-      Function::arg_iterator DestI = NF->arg_begin();
-      for (Function::const_arg_iterator J = I->arg_begin(); J != I->arg_end();
-           ++J) {
-        DestI->setName(J->getName());
-        VMap[J] = DestI++;
-      }
-      SmallVector<ReturnInst *, 8> Returns; // Ignore returns cloned.
-      CloneFunctionInto(NF, I, VMap, /*ModuleLevelChanges=*/true, Returns);
-    }
-  }
-
-  return New;
-}
-
-static bool createASMAsString(Module *New, const StringRef &Triple,
-                              const StringRef &MCPU, const StringRef &Features,
-                              std::string &ASM) {
-  llvm::Triple TheTriple(Triple::normalize(Triple));
-  std::string ErrMsg;
-  const Target *TheTarget =
-      TargetRegistry::lookupTarget(TheTriple.getTriple(), ErrMsg);
-  if (!TheTarget) {
-    errs() << ErrMsg << "\n";
-    return false;
-  }
-
-  TargetOptions Options;
-  std::unique_ptr<TargetMachine> target(TheTarget->createTargetMachine(
-      TheTriple.getTriple(), MCPU, Features, Options));
-  assert(target.get() && "Could not allocate target machine!");
-  TargetMachine &Target = *target.get();
-
-  // Build up all of the passes that we want to do to the module.
-  llvm::legacy::PassManager PM;
-
-  PM.add(new TargetLibraryInfoWrapperPass(TheTriple));
-  PM.add(createTargetTransformInfoWrapperPass(Target.getTargetIRAnalysis()));
-
-  {
-    SmallString<100> ASMSmall;
-    raw_svector_ostream NameROSSmall(ASMSmall);
-
-    // Ask the target to add backend passes as necessary.
-    int UseVerifier = true;
-    if (Target.addPassesToEmitFile(
-            PM, NameROSSmall, TargetMachine::CGFT_AssemblyFile, UseVerifier)) {
-      errs() << "The target does not support generation of this file type!\n";
-      return false;
-    }
-
-    ASM = ASMSmall.c_str();
-    PM.run(*New);
-  }
-
-  return true;
-}
-
-Value *PTXGenerator::createPTXKernelFunction(Function *SubFunction) {
-  Module *M = getModule();
-  Module *GPUModule = extractPTXFunctionsFromModule(M, GPUTriple);
-  std::string LLVMKernelStr;
-  if (!createASMAsString(GPUModule, GPUTriple, "sm_20" /*MCPU*/,
-                         "" /*Features*/, LLVMKernelStr)) {
-    errs() << "Generate ptx string failed!\n";
-    return NULL;
-  }
-
-  Value *LLVMKernel =
-      Builder.CreateGlobalStringPtr(LLVMKernelStr, "llvm_kernel");
-
-  delete GPUModule;
-  return LLVMKernel;
-}
-
-Value *PTXGenerator::getPTXKernelEntryName(Function *SubFunction) {
-  StringRef Entry = SubFunction->getName();
-  return Builder.CreateGlobalStringPtr(Entry, "ptx_entry");
-}
-
-void PTXGenerator::eraseUnusedFunctions(Function *SubFunction) {
-  Module *M = getModule();
-  SubFunction->eraseFromParent();
-
-  if (Function *FuncPTXReadNCtaidX = M->getFunction("llvm.ptx.read.nctaid.x")) {
-    FuncPTXReadNCtaidX->eraseFromParent();
-  }
-
-  if (Function *FuncPTXReadNCtaidY = M->getFunction("llvm.ptx.read.nctaid.y")) {
-    FuncPTXReadNCtaidY->eraseFromParent();
-  }
-
-  if (Function *FuncPTXReadCtaidX = M->getFunction("llvm.ptx.read.ctaid.x")) {
-    FuncPTXReadCtaidX->eraseFromParent();
-  }
-
-  if (Function *FuncPTXReadCtaidY = M->getFunction("llvm.ptx.read.ctaid.y")) {
-    FuncPTXReadCtaidY->eraseFromParent();
-  }
-
-  if (Function *FuncPTXReadNTidX = M->getFunction("llvm.ptx.read.ntid.x")) {
-    FuncPTXReadNTidX->eraseFromParent();
-  }
-
-  if (Function *FuncPTXReadNTidY = M->getFunction("llvm.ptx.read.ntid.y")) {
-    FuncPTXReadNTidY->eraseFromParent();
-  }
-
-  if (Function *FuncPTXReadTidX = M->getFunction("llvm.ptx.read.tid.x")) {
-    FuncPTXReadTidX->eraseFromParent();
-  }
-
-  if (Function *FuncPTXReadTidY = M->getFunction("llvm.ptx.read.tid.y")) {
-    FuncPTXReadTidY->eraseFromParent();
-  }
-}
-
-void PTXGenerator::finishGeneration(Function *F) {
-  // Define data used by the GPURuntime library.
-  AllocaInst *PtrCUContext =
-      Builder.CreateAlloca(getGPUContextPtrType(), 0, "phcontext");
-  AllocaInst *PtrCUDevice =
-      Builder.CreateAlloca(getGPUDevicePtrType(), 0, "phdevice");
-  AllocaInst *PtrCUModule =
-      Builder.CreateAlloca(getGPUModulePtrType(), 0, "phmodule");
-  AllocaInst *PtrCUKernel =
-      Builder.CreateAlloca(getGPUFunctionPtrType(), 0, "phkernel");
-  AllocaInst *PtrCUStartEvent =
-      Builder.CreateAlloca(getGPUEventPtrType(), 0, "pstart_timer");
-  AllocaInst *PtrCUStopEvent =
-      Builder.CreateAlloca(getGPUEventPtrType(), 0, "pstop_timer");
-  AllocaInst *PtrDevData =
-      Builder.CreateAlloca(getPtrGPUDevicePtrType(), 0, "pdevice_data");
-  AllocaInst *PtrHostData =
-      Builder.CreateAlloca(getI8PtrType(), 0, "phost_data");
-  Type *FloatTy = llvm::Type::getFloatTy(getModule()->getContext());
-  AllocaInst *PtrElapsedTimes = Builder.CreateAlloca(FloatTy, 0, "ptimer");
-
-  // Initialize the GPU device.
-  createCallInitDevice(PtrCUContext, PtrCUDevice);
-
-  // Create the GPU kernel module and entry function.
-  Value *PTXString = createPTXKernelFunction(F);
-  Value *PTXEntry = getPTXKernelEntryName(F);
-  createCallGetPTXModule(PTXString, PtrCUModule);
-  LoadInst *CUModule = Builder.CreateLoad(PtrCUModule, "cumodule");
-  createCallGetPTXKernelEntry(PTXEntry, CUModule, PtrCUKernel);
-
-  // Allocate device memory and its corresponding host memory.
-  createCallAllocateMemoryForHostAndDevice(PtrHostData, PtrDevData,
-                                           getOutputArraySizeInBytes());
-
-  // Get the pointer to the device memory and set the GPU execution parameters.
-  LoadInst *DData = Builder.CreateLoad(PtrDevData, "device_data");
-  LoadInst *CUKernel = Builder.CreateLoad(PtrCUKernel, "cukernel");
-  createCallSetKernelParameters(CUKernel, getCUDABlockWidth(),
-                                getCUDABlockHeight(), DData);
-
-  // Create the start and end timer and record the start time.
-  createCallStartTimerByCudaEvent(PtrCUStartEvent, PtrCUStopEvent);
-
-  // Launch the GPU kernel.
-  createCallLaunchKernel(CUKernel, getCUDAGridWidth(), getCUDAGridHeight());
-
-  // Copy the results back from the GPU to the host.
-  LoadInst *HData = Builder.CreateLoad(PtrHostData, "host_data");
-  createCallCopyFromDeviceToHost(HData, DData, getOutputArraySizeInBytes());
-
-  // Record the end time.
-  LoadInst *CUStartEvent = Builder.CreateLoad(PtrCUStartEvent, "start_timer");
-  LoadInst *CUStopEvent = Builder.CreateLoad(PtrCUStopEvent, "stop_timer");
-  createCallStopTimerByCudaEvent(CUStartEvent, CUStopEvent, PtrElapsedTimes);
-
-  // Cleanup all the resources used.
-  LoadInst *CUContext = Builder.CreateLoad(PtrCUContext, "cucontext");
-  createCallCleanupGPGPUResources(HData, DData, CUModule, CUContext, CUKernel);
-
-  // Erase the ptx kernel and device subfunctions and ptx intrinsics from
-  // current module.
-  eraseUnusedFunctions(F);
-}
-#endif /* GPU_CODEGEN */

Modified: polly/trunk/lib/Makefile
URL: http://llvm.org/viewvc/llvm-project/polly/trunk/lib/Makefile?rev=237450&r1=237449&r2=237450&view=diff
==============================================================================
--- polly/trunk/lib/Makefile (original)
+++ polly/trunk/lib/Makefile Fri May 15 10:41:14 2015
@@ -18,7 +18,7 @@ include $(LEVEL)/Makefile.config
 
 # Enable optional source files
 ifeq ($(GPU_CODEGEN), yes)
-GPGPU_CODEGEN_FILES= CodeGen/PTXGenerator.cpp
+GPGPU_CODEGEN_FILES=""
 endif
 
 ISL_CODEGEN_FILES= CodeGen/IslAst.cpp \





More information about the llvm-commits mailing list