[polly] r211573 - Remove use of llvm.codegen intrinsic for GPGPU codegen
Yabin Hu
yabin.hwu at gmail.com
Tue Jun 24 01:11:36 PDT 2014
Author: yabin
Date: Tue Jun 24 03:11:36 2014
New Revision: 211573
URL: http://llvm.org/viewvc/llvm-project?rev=211573&view=rev
Log:
Remove use of llvm.codegen intrinsic for GPGPU codegen
We use llvm.codegen intrinsic to generate code for embedded LLVM-IR
strings. The reason we introduce such a intrinsic is that previous
clang/opt tools was NOT linked with various LLVM targets and their
AsmParsers and AsmPrinters. Since clang/opt been linked with all the
needed libraries, we no longer need the llvm.codegen intrinsic.
Added:
polly/trunk/test/Cloog/CodeGen/GPGPU/1d_parallel.ll
polly/trunk/test/Cloog/CodeGen/GPGPU/gpu___%1---%5.jscop
polly/trunk/test/Cloog/CodeGen/GPGPU/gpu___%1---%5.jscop.transformed+gpu
Removed:
polly/trunk/utils/0001-Add-llvm.codegen-intrinsic.patch
Modified:
polly/trunk/include/polly/CodeGen/PTXGenerator.h
polly/trunk/lib/CodeGen/PTXGenerator.cpp
Modified: polly/trunk/include/polly/CodeGen/PTXGenerator.h
URL: http://llvm.org/viewvc/llvm-project/polly/trunk/include/polly/CodeGen/PTXGenerator.h?rev=211573&r1=211572&r2=211573&view=diff
==============================================================================
--- polly/trunk/include/polly/CodeGen/PTXGenerator.h (original)
+++ polly/trunk/include/polly/CodeGen/PTXGenerator.h Tue Jun 24 03:11:36 2014
@@ -159,13 +159,6 @@ private:
/// will be copied from host to device.
Function *createSubfunctionDefinition(int NumArgs);
- /// @brief Extract all the ptx related subfunctions into a new module.
- ///
- /// @param M Current module.
- /// @return The generated module containing only gpu related
- /// subfunctions.
- Module *extractPTXFunctionsFromModule(const Module *M);
-
/// @brief Get the Value of CUDA block width.
Value *getCUDABlockWidth();
Modified: polly/trunk/lib/CodeGen/PTXGenerator.cpp
URL: http://llvm.org/viewvc/llvm-project/polly/trunk/lib/CodeGen/PTXGenerator.cpp?rev=211573&r1=211572&r2=211573&view=diff
==============================================================================
--- polly/trunk/lib/CodeGen/PTXGenerator.cpp (original)
+++ polly/trunk/lib/CodeGen/PTXGenerator.cpp Tue Jun 24 03:11:36 2014
@@ -22,10 +22,12 @@
#include "llvm/Support/Debug.h"
#include "llvm/Support/FormattedStream.h"
#include "llvm/Support/TargetRegistry.h"
+#include "llvm/Support/TargetSelect.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/Dominators.h"
#include "llvm/IR/Intrinsics.h"
#include "llvm/IR/Module.h"
+#include "llvm/Target/TargetLibraryInfo.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "llvm/Transforms/Utils/Cloning.h"
@@ -507,44 +509,98 @@ Value *PTXGenerator::getOutputArraySizeI
return ConstantInt::get(getInt64Type(), OutputBytes);
}
+static Module *extractPTXFunctionsFromModule(const Module *M,
+ const StringRef &Triple) {
+ llvm::ValueToValueMapTy VMap;
+ Module *New = new Module("TempGPUModule", M->getContext());
+ New->setTargetTriple(Triple::normalize(Triple));
+
+ // Loop over the functions in the module, making external functions as before
+ for (Module::const_iterator I = M->begin(), E = M->end(); I != E; ++I) {
+ if (!I->isDeclaration() &&
+ (I->getCallingConv() == CallingConv::PTX_Device ||
+ I->getCallingConv() == CallingConv::PTX_Kernel)) {
+ Function *NF =
+ Function::Create(cast<FunctionType>(I->getType()->getElementType()),
+ I->getLinkage(), I->getName(), New);
+ NF->copyAttributesFrom(I);
+ VMap[I] = NF;
+
+ Function::arg_iterator DestI = NF->arg_begin();
+ for (Function::const_arg_iterator J = I->arg_begin(); J != I->arg_end();
+ ++J) {
+ DestI->setName(J->getName());
+ VMap[J] = DestI++;
+ }
+ SmallVector<ReturnInst *, 8> Returns; // Ignore returns cloned.
+ CloneFunctionInto(NF, I, VMap, /*ModuleLevelChanges=*/true, Returns);
+ }
+ }
+
+ return New;
+}
+
+static bool createASMAsString(Module *New, const StringRef &Triple,
+ const StringRef &MCPU, const StringRef &Features,
+ std::string &ASM) {
+ llvm::Triple TheTriple(Triple::normalize(Triple));
+ std::string ErrMsg;
+ const Target *TheTarget =
+ TargetRegistry::lookupTarget(TheTriple.getTriple(), ErrMsg);
+ if (!TheTarget) {
+ errs() << ErrMsg << "\n";
+ return false;
+ }
+
+ TargetOptions Options;
+ std::unique_ptr<TargetMachine> target(TheTarget->createTargetMachine(
+ TheTriple.getTriple(), MCPU, Features, Options));
+ assert(target.get() && "Could not allocate target machine!");
+ TargetMachine &Target = *target.get();
+
+ // Build up all of the passes that we want to do to the module.
+ PassManager PM;
+
+ TargetLibraryInfo *TLI = new TargetLibraryInfo(TheTriple);
+ PM.add(TLI);
+
+ PM.add(new DataLayoutPass(*Target.getDataLayout()));
+ Target.addAnalysisPasses(PM);
+
+ {
+ raw_string_ostream NameROS(ASM);
+ formatted_raw_ostream FOS(NameROS);
+
+ // Ask the target to add backend passes as necessary.
+ int UseVerifier = true;
+ if (Target.addPassesToEmitFile(PM, FOS, TargetMachine::CGFT_AssemblyFile,
+ UseVerifier)) {
+ errs() << "The target does not support generation of this file type!\n";
+ return false;
+ }
+
+ PM.run(*New);
+ FOS.flush();
+ }
+
+ return true;
+}
+
Value *PTXGenerator::createPTXKernelFunction(Function *SubFunction) {
Module *M = getModule();
+ Module *GPUModule = extractPTXFunctionsFromModule(M, GPUTriple);
std::string LLVMKernelStr;
- raw_string_ostream NameROS(LLVMKernelStr);
- formatted_raw_ostream FOS(NameROS);
- FOS << "target triple = \"" << GPUTriple << "\"\n";
- SubFunction->print(FOS);
-
- // Insert ptx intrinsics into the kernel string.
- for (Module::iterator I = M->begin(), E = M->end(); I != E;) {
- Function *F = I++;
- // Function must be a prototype and unused.
- if (F->isDeclaration() && F->isIntrinsic()) {
- switch (F->getIntrinsicID()) {
- case Intrinsic::ptx_read_nctaid_x:
- case Intrinsic::ptx_read_nctaid_y:
- case Intrinsic::ptx_read_ctaid_x:
- case Intrinsic::ptx_read_ctaid_y:
- case Intrinsic::ptx_read_ntid_x:
- case Intrinsic::ptx_read_ntid_y:
- case Intrinsic::ptx_read_tid_x:
- case Intrinsic::ptx_read_tid_y:
- F->print(FOS);
- break;
- default:
- break;
- }
- }
+ if (!createASMAsString(GPUModule, GPUTriple, "sm_20" /*MCPU*/,
+ "" /*Features*/, LLVMKernelStr)) {
+ errs() << "Generate ptx string failed!\n";
+ return NULL;
}
Value *LLVMKernel =
Builder.CreateGlobalStringPtr(LLVMKernelStr, "llvm_kernel");
- Value *MCPU = Builder.CreateGlobalStringPtr("sm_10", "mcpu");
- Value *Features = Builder.CreateGlobalStringPtr("", "cpu_features");
-
- Function *GetDeviceKernel = Intrinsic::getDeclaration(M, Intrinsic::codegen);
- return Builder.CreateCall3(GetDeviceKernel, LLVMKernel, MCPU, Features);
+ delete GPUModule;
+ return LLVMKernel;
}
Value *PTXGenerator::getPTXKernelEntryName(Function *SubFunction) {
Added: polly/trunk/test/Cloog/CodeGen/GPGPU/1d_parallel.ll
URL: http://llvm.org/viewvc/llvm-project/polly/trunk/test/Cloog/CodeGen/GPGPU/1d_parallel.ll?rev=211573&view=auto
==============================================================================
--- polly/trunk/test/Cloog/CodeGen/GPGPU/1d_parallel.ll (added)
+++ polly/trunk/test/Cloog/CodeGen/GPGPU/1d_parallel.ll Tue Jun 24 03:11:36 2014
@@ -0,0 +1,72 @@
+; REQUIRES: nvptx-registered-target
+; RUN: opt %loadPolly -basicaa -polly-import-jscop -polly-import-jscop-dir=%S -polly-import-jscop-postfix=transformed+gpu -enable-polly-gpgpu -polly-gpgpu-triple=nvptx64-unknown-unknown -polly-codegen < %s -S | FileCheck %s
+
+;int A[1024];
+
+;int gpu() {
+; int i;
+;
+; for(i = 0; i < 1024; i++)
+; A[i] = i*128 + 508;
+;
+; return 0;
+;}
+;
+;int main() {
+; int b = gpu();
+; return 0;
+;}
+
+; ModuleID = '1d_parallel.s'
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+ at A = common global [1024 x i32] zeroinitializer, align 16
+
+; Function Attrs: nounwind uwtable
+define i32 @gpu() #0 {
+ br label %.split
+
+.split: ; preds = %0
+ br label %1
+
+; <label>:1 ; preds = %.split, %1
+ %indvar = phi i64 [ 0, %.split ], [ %indvar.next, %1 ]
+ %2 = mul i64 %indvar, 128
+ %3 = add i64 %2, 508
+ %4 = trunc i64 %3 to i32
+ %scevgep = getelementptr [1024 x i32]* @A, i64 0, i64 %indvar
+ store i32 %4, i32* %scevgep, align 4
+ %indvar.next = add i64 %indvar, 1
+ %exitcond = icmp ne i64 %indvar.next, 1024
+ br i1 %exitcond, label %1, label %5
+
+; <label>:5 ; preds = %1
+ ret i32 0
+}
+
+; Function Attrs: nounwind uwtable
+define i32 @main() #0 {
+ br label %.split
+
+.split: ; preds = %0
+ %1 = tail call i32 @gpu()
+ ret i32 0
+}
+
+; CHECK: call void @polly_initDevice
+; CHECK: call void @polly_getPTXModule
+; CHECK: call void @polly_getPTXKernelEntry
+; CHECK: call void @polly_allocateMemoryForHostAndDevice
+; CHECK: call void @polly_setKernelParameters
+; CHECK: call void @polly_startTimerByCudaEvent
+; CHECK: call void @polly_launchKernel
+; CHECK: call void @polly_copyFromDeviceToHost
+; CHECK: call void @polly_stopTimerByCudaEvent
+; CHECK: call void @polly_cleanupGPGPUResources
+
+attributes #0 = { nounwind uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+
+!llvm.ident = !{!0}
+
+!0 = metadata !{metadata !"clang version 3.5.0 "}
Added: polly/trunk/test/Cloog/CodeGen/GPGPU/gpu___%1---%5.jscop
URL: http://llvm.org/viewvc/llvm-project/polly/trunk/test/Cloog/CodeGen/GPGPU/gpu___%251---%255.jscop?rev=211573&view=auto
==============================================================================
--- polly/trunk/test/Cloog/CodeGen/GPGPU/gpu___%1---%5.jscop (added)
+++ polly/trunk/test/Cloog/CodeGen/GPGPU/gpu___%1---%5.jscop Tue Jun 24 03:11:36 2014
@@ -0,0 +1,17 @@
+{
+ "context" : "{ : }",
+ "name" : "%1 => %5",
+ "statements" : [
+ {
+ "accesses" : [
+ {
+ "kind" : "write",
+ "relation" : "{ Stmt_1[i0] -> MemRef_A[i0] }"
+ }
+ ],
+ "domain" : "{ Stmt_1[i0] : i0 >= 0 and i0 <= 1023 }",
+ "name" : "Stmt_1",
+ "schedule" : "{ Stmt_1[i0] -> scattering[0, i0, 0] }"
+ }
+ ]
+}
Added: polly/trunk/test/Cloog/CodeGen/GPGPU/gpu___%1---%5.jscop.transformed+gpu
URL: http://llvm.org/viewvc/llvm-project/polly/trunk/test/Cloog/CodeGen/GPGPU/gpu___%251---%255.jscop.transformed%2Bgpu?rev=211573&view=auto
==============================================================================
--- polly/trunk/test/Cloog/CodeGen/GPGPU/gpu___%1---%5.jscop.transformed+gpu (added)
+++ polly/trunk/test/Cloog/CodeGen/GPGPU/gpu___%1---%5.jscop.transformed+gpu Tue Jun 24 03:11:36 2014
@@ -0,0 +1,17 @@
+{
+ "context" : "{ : }",
+ "name" : "%1 => %5",
+ "statements" : [
+ {
+ "accesses" : [
+ {
+ "kind" : "write",
+ "relation" : "{ Stmt_1[i0] -> MemRef_A[i0] }"
+ }
+ ],
+ "domain" : "{ Stmt_1[i0] : i0 >= 0 and i0 <= 1023 }",
+ "name" : "Stmt_1",
+ "schedule" : "{ Stmt_1[i0] -> scattering[0, o0, o1, o2, o3] : o0 >= 0 and o0 <= 1 and o1 >= 0 and o1 <= 1 and o2 >= 0 and o2 <= 15 and o3 >= 0 and o3 <= 15 and i0 = 512o0 + 256o1 + 16o2 + o3 }"
+ }
+ ]
+}
Removed: polly/trunk/utils/0001-Add-llvm.codegen-intrinsic.patch
URL: http://llvm.org/viewvc/llvm-project/polly/trunk/utils/0001-Add-llvm.codegen-intrinsic.patch?rev=211572&view=auto
==============================================================================
--- polly/trunk/utils/0001-Add-llvm.codegen-intrinsic.patch (original)
+++ polly/trunk/utils/0001-Add-llvm.codegen-intrinsic.patch (removed)
@@ -1,489 +0,0 @@
-From 7e36390f24f6ceaea7bc2ba4adcd55d06cf73439 Mon Sep 17 00:00:00 2001
-From: Yabin Hu <yabin.hwu at gmail.com>
-Date: Thu, 29 Nov 2012 16:08:29 +0800
-Subject: [PATCH] Add llvm.codegen intrinsic.
-
-The llvm.codegen intrinsic generates code for embedded LLVM-IR
-strings. Each call to the intrinsic is replaced by a pointer to
-the newly generated target code. The code generation target can be
-different to the one of the parent module.
----
- docs/LangRef.html | 36 +++
- include/llvm/CodeGen/Passes.h | 3 +
- include/llvm/InitializePasses.h | 1 +
- include/llvm/Intrinsics.td | 4 +
- lib/CodeGen/CMakeLists.txt | 1 +
- lib/CodeGen/CodeGen.cpp | 1 +
- lib/CodeGen/CodeGenIntrinsic.cpp | 227 ++++++++++++++++++++
- lib/CodeGen/Passes.cpp | 3 +
- lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp | 2 +
- lib/Target/LLVMBuild.txt | 2 +-
- lib/VMCore/Verifier.cpp | 10 +
- .../CodeGen/X86/EmbeddedCG/embedded-codegen-ptx.ll | 28 +++
- test/CodeGen/X86/EmbeddedCG/lit.local.cfg | 5 +
- 13 files changed, 322 insertions(+), 1 deletions(-)
- create mode 100644 lib/CodeGen/CodeGenIntrinsic.cpp
- create mode 100644 test/CodeGen/X86/EmbeddedCG/embedded-codegen-ptx.ll
- create mode 100644 test/CodeGen/X86/EmbeddedCG/lit.local.cfg
-
-diff --git a/docs/LangRef.html b/docs/LangRef.html
-index cfc1c7d..eae069e 100644
---- a/docs/LangRef.html
-+++ b/docs/LangRef.html
-@@ -243,6 +243,7 @@
- <li><a href="#int_prefetch">'<tt>llvm.prefetch</tt>' Intrinsic</a></li>
- <li><a href="#int_pcmarker">'<tt>llvm.pcmarker</tt>' Intrinsic</a></li>
- <li><a href="#int_readcyclecounter">'<tt>llvm.readcyclecounter</tt>' Intrinsic</a></li>
-+ <li><a href="#int_codegen_intrinsic">'<tt>llvm.codegen</tt>' Intrinsic</a></li>
- </ol>
- </li>
- <li><a href="#int_libc">Standard C Library Intrinsics</a>
-@@ -7249,6 +7250,41 @@ LLVM</a>.</p>
-
- </div>
-
-+<!-- _______________________________________________________________________ -->
-+<h4>
-+ <a name="int_codegen_intrinsic">'<tt>llvm.codegen</tt>' Intrinsic</a>
-+</h4>
-+
-+<div>
-+
-+<h5>Syntax:</h5>
-+<pre>
-+ declare i8* @llvm.codegen(i8* <IRString>, i8* <MCPU>, i8* <
-+ Features>)
-+</pre>
-+
-+<h5>Overview:</h5>
-+<p>The '<tt>llvm.codegen</tt>' intrinsic uses the LLVM back ends to generate
-+ code for embedded LLVM-IR strings. The code generation target can be
-+ different to the one of the parent module.</p>
-+
-+<h5>Arguments:</h5>
-+<p><tt>IRString</tt> is a string containing LLVM-IR.</p>
-+<p><tt>MCPU</tt> is the name of the target CPU.</p>
-+<p><tt>Features</tt> is the string representation of the additional target
-+ features.</p>
-+
-+<h5>Semantics:</h5>
-+<p>The '<tt>llvm.codegen</tt>' intrinsic transforms a string containing LLVM IR
-+ to target assembly code. Calls to the intrinsic are replaced by a pointer to
-+ the newly generated target code. In case LLVM can not generate code (e.g. the
-+ target is not available), the call to the intrinsic is replaced by a i8 NULL
-+ pointer.Users of this intrinsic should make sure the target triple is
-+ properly set in the <IRString>. Inputs to both <MCPU> and
-+ <Features> parameters can be null pointers.</p>
-+
-+</div>
-+
- </div>
-
- <!-- ======================================================================= -->
-diff --git a/include/llvm/CodeGen/Passes.h b/include/llvm/CodeGen/Passes.h
-index 44c9676..57b3aa2 100644
---- a/include/llvm/CodeGen/Passes.h
-+++ b/include/llvm/CodeGen/Passes.h
-@@ -432,6 +432,9 @@ namespace llvm {
- /// branch folding).
- extern char &GCMachineCodeAnalysisID;
-
-+ /// CodeGenIntrinsic Pass - Create target code for embedded LLVM-IR strings.
-+ FunctionPass *createCodeGenIntrinsicPass();
-+
- /// Deleter Pass - Releases GC metadata.
- ///
- FunctionPass *createGCInfoDeleter();
-diff --git a/include/llvm/InitializePasses.h b/include/llvm/InitializePasses.h
-index e06b892..fe8655e 100644
---- a/include/llvm/InitializePasses.h
-+++ b/include/llvm/InitializePasses.h
-@@ -93,6 +93,7 @@ void initializeCorrelatedValuePropagationPass(PassRegistry&);
- void initializeDAEPass(PassRegistry&);
- void initializeDAHPass(PassRegistry&);
- void initializeDCEPass(PassRegistry&);
-+void initializeCodeGenIntrinsicPass(PassRegistry&);
- void initializeDSEPass(PassRegistry&);
- void initializeDeadInstEliminationPass(PassRegistry&);
- void initializeDeadMachineInstructionElimPass(PassRegistry&);
-diff --git a/include/llvm/Intrinsics.td b/include/llvm/Intrinsics.td
-index d3a548c..a60d2bb 100644
---- a/include/llvm/Intrinsics.td
-+++ b/include/llvm/Intrinsics.td
-@@ -238,6 +238,10 @@ def int_readcyclecounter : Intrinsic<[llvm_i64_ty]>;
- // guard to the correct place on the stack frame.
- def int_stackprotector : Intrinsic<[], [llvm_ptr_ty, llvm_ptrptr_ty], []>;
-
-+//===----------------- Code Generation for Embedded LLVM-IR ---------------===//
-+def int_codegen : Intrinsic<[llvm_ptr_ty],
-+ [llvm_ptr_ty, llvm_ptr_ty, llvm_ptr_ty]>;
-+
- //===------------------- Standard C Library Intrinsics --------------------===//
- //
-
-diff --git a/lib/CodeGen/CMakeLists.txt b/lib/CodeGen/CMakeLists.txt
-index 7a20ff6..8e1ab9a 100644
---- a/lib/CodeGen/CMakeLists.txt
-+++ b/lib/CodeGen/CMakeLists.txt
-@@ -6,6 +6,7 @@ add_llvm_library(LLVMCodeGen
- CalcSpillWeights.cpp
- CallingConvLower.cpp
- CodeGen.cpp
-+ CodeGenIntrinsic.cpp
- CodePlacementOpt.cpp
- CriticalAntiDepBreaker.cpp
- DeadMachineInstructionElim.cpp
-diff --git a/lib/CodeGen/CodeGen.cpp b/lib/CodeGen/CodeGen.cpp
-index a53f6f8..702ee18 100644
---- a/lib/CodeGen/CodeGen.cpp
-+++ b/lib/CodeGen/CodeGen.cpp
-@@ -21,6 +21,7 @@ using namespace llvm;
- void llvm::initializeCodeGen(PassRegistry &Registry) {
- initializeBranchFolderPassPass(Registry);
- initializeCalculateSpillWeightsPass(Registry);
-+ initializeCodeGenIntrinsicPass(Registry);
- initializeCodePlacementOptPass(Registry);
- initializeDeadMachineInstructionElimPass(Registry);
- initializeEarlyIfConverterPass(Registry);
-diff --git a/lib/CodeGen/CodeGenIntrinsic.cpp b/lib/CodeGen/CodeGenIntrinsic.cpp
-new file mode 100644
-index 0000000..cf8aa54
---- /dev/null
-+++ b/lib/CodeGen/CodeGenIntrinsic.cpp
-@@ -0,0 +1,227 @@
-+//===-- CodeGenIntrinsic.cpp - CodeGen Intrinsic --------------------------===//
-+//
-+// The LLVM Compiler Infrastructure
-+//
-+// This file is distributed under the University of Illinois Open Source
-+// License. See LICENSE.TXT for details.
-+//
-+//===----------------------------------------------------------------------===//
-+//
-+// This file implements the llvm.codegen intrinsic.
-+//
-+//===----------------------------------------------------------------------===//
-+
-+#include "llvm/CodeGen/Passes.h"
-+#include "llvm/CallingConv.h"
-+#include "llvm/IRBuilder.h"
-+#include "llvm/IntrinsicInst.h"
-+#include "llvm/LLVMContext.h"
-+#include "llvm/Module.h"
-+#include "llvm/PassManager.h"
-+#include "llvm/AsmParser/Parser.h"
-+#include "llvm/Target/TargetMachine.h"
-+#include "llvm/Target/TargetRegisterInfo.h"
-+#include "llvm/Support/Debug.h"
-+#include "llvm/Support/ErrorHandling.h"
-+#include "llvm/Support/FormattedStream.h"
-+#include "llvm/Support/Host.h"
-+#include "llvm/Support/raw_ostream.h"
-+#include "llvm/Support/SourceMgr.h"
-+#include "llvm/Support/TargetRegistry.h"
-+#include "llvm/ADT/Triple.h"
-+
-+using namespace llvm;
-+
-+namespace {
-+ /// ASMGenerator generates target-specific assembly code from LLVM IR.
-+ class ASMGenerator {
-+ public:
-+ ASMGenerator() {}
-+
-+ /// generate - Generates a target code string from a LLVM IR Value.
-+ bool generate(Value *IRStr, Value *MCPUStr, Value *FeaturesStr,
-+ std::string &ASM);
-+
-+ private:
-+ bool getStringFromConstantExpr(Value *ConstData, std::string &Out) const;
-+ };
-+
-+ /// CodeGenIntrinsic - This pass replaces each call to the llvm.codegen
-+ /// intrinsic with a string generated by ASMGenerator.
-+ class CodeGenIntrinsic : public FunctionPass {
-+ public:
-+ static char ID;
-+
-+ CodeGenIntrinsic();
-+ const char *getPassName() const;
-+ virtual bool runOnFunction(Function &F);
-+ };
-+}
-+
-+// -----------------------------------------------------------------------------
-+static bool getTargetMachineFromModule(Module *M, const StringRef &TripleStr,
-+ const StringRef &MCPU,
-+ const StringRef &Features,
-+ TargetMachine *&TM) {
-+ std::string ErrMsg;
-+ const Target *TheTarget = TargetRegistry::lookupTarget(TripleStr, ErrMsg);
-+ if (!TheTarget) {
-+ errs() << ErrMsg << "\n";
-+ return false;
-+ }
-+
-+ TargetOptions Options;
-+ TM = TheTarget->createTargetMachine(TripleStr, MCPU, Features, Options);
-+ assert(TM && "Could not allocate target machine!");
-+ return true;
-+}
-+
-+static bool createASMAsString(Module *New, const StringRef &Triple,
-+ const StringRef &MCPU, const StringRef &Features,
-+ std::string &ASM) {
-+ TargetMachine *Target;
-+ if (!getTargetMachineFromModule(New, Triple, MCPU, Features, Target)) {
-+ return false;
-+ }
-+
-+ // Build up all of the passes that we want to do to the module.
-+ PassManager PM;
-+
-+ // Get the data layout of the new module. If it is empty, return false.
-+ const std::string &ModuleDataLayout = New->getDataLayout();
-+ if (ModuleDataLayout.empty())
-+ return false;
-+
-+ {
-+ raw_string_ostream NameROS(ASM);
-+ formatted_raw_ostream FOS(NameROS);
-+
-+ // Ask the target to add backend passes as necessary.
-+ int UseVerifier = true;
-+ if (Target->addPassesToEmitFile(PM, FOS, TargetMachine::CGFT_AssemblyFile,
-+ UseVerifier)) {
-+ errs() << "CodeGen Intrinsic: target does not support generation of this "
-+ << "file type!\n";
-+
-+ return false;
-+ }
-+
-+ PM.run(*New);
-+ FOS.flush();
-+ }
-+
-+ delete Target;
-+ return true;
-+}
-+
-+bool ASMGenerator::getStringFromConstantExpr(Value *ConstData,
-+ std::string &Out) const {
-+ bool Result = false;
-+ if (ConstantExpr *U = dyn_cast<ConstantExpr>(ConstData)) {
-+ Value *R = U->getOperand(0);
-+ if (GlobalVariable *GV = dyn_cast<GlobalVariable>(R)) {
-+ Constant *C = GV->getInitializer();
-+ if (ConstantDataArray *CA = dyn_cast<ConstantDataArray>(C)) {
-+ Out = CA->getAsString();
-+ Result = true;
-+ }
-+ }
-+ }
-+ return Result;
-+}
-+
-+bool ASMGenerator::generate(Value *IRStr, Value *MCPUStr, Value *FeaturesStr,
-+ std::string &ASM) {
-+ std::string Kernel;
-+ if (!getStringFromConstantExpr(IRStr, Kernel))
-+ return false;
-+
-+ std::string MCPU;
-+ if (!getStringFromConstantExpr(MCPUStr, MCPU))
-+ MCPU = "";
-+
-+ std::string Features;
-+ if (!getStringFromConstantExpr(FeaturesStr, Features))
-+ Features = "";
-+
-+ SMDiagnostic ErrorMessage;
-+ LLVMContext Context;
-+ std::auto_ptr<Module> TempModule(
-+ ParseAssemblyString(Kernel.c_str(), 0, ErrorMessage, Context));
-+
-+ Triple TheTriple(TempModule->getTargetTriple());
-+ const std::string TripleStr = TheTriple.getTriple();
-+ if(TripleStr.empty()) {
-+ errs() << "error: Target triple isn't set correctly for the new module.\n";
-+ return false;
-+ }
-+
-+ return createASMAsString(TempModule.get(), TripleStr.data(), MCPU.data(),
-+ Features.data(), ASM);
-+}
-+
-+// -----------------------------------------------------------------------------
-+INITIALIZE_PASS(CodeGenIntrinsic, "codegen-intrinsic", "CodeGen Intrinsic",
-+ false, false)
-+
-+FunctionPass *llvm::createCodeGenIntrinsicPass() {
-+ return new CodeGenIntrinsic();
-+}
-+
-+char CodeGenIntrinsic::ID = 0;
-+
-+CodeGenIntrinsic::CodeGenIntrinsic()
-+ : FunctionPass(ID) {
-+}
-+
-+const char *CodeGenIntrinsic::getPassName() const {
-+ return "Lowering CodeGen Intrinsic.";
-+}
-+
-+bool CodeGenIntrinsic::runOnFunction(Function &F) {
-+ bool MadeChange = false;
-+ Module *M = F.getParent();
-+ if (Function *CG = M->getFunction("llvm.codegen")) {
-+ for (Function::use_iterator I = CG->use_begin(), E = CG->use_end();
-+ I != E; ++I) {
-+ if (CallInst *CI = dyn_cast<CallInst>(*I)) {
-+ if (&F != CI->getParent()->getParent())
-+ continue;
-+
-+ std::string ASM;
-+ ASMGenerator *Generator = new ASMGenerator();
-+ IRBuilder<> Builder(CI->getParent(), CI);
-+ Value *St;
-+ if (!Generator->generate(CI->getArgOperand(0), CI->getArgOperand(1),
-+ CI->getArgOperand(2), ASM)) {
-+ Type *Ty= CG->getReturnType();
-+ St = Constant::getNullValue(Ty);
-+ } else {
-+ // FIXME: Is this actually dangerous as WritingAnLLVMPass.html claims?
-+ // Seems that, short of multithreaded LLVM, it should be safe;
-+ // all that is necessary is that a simple Module::iterator loop
-+ // not be invalidated. Appending to the GlobalVariable list is
-+ // safe in that sense.
-+ //
-+ // All the output passes emit globals last. The ExecutionEngine
-+ // explicitly supports adding globals to the module after
-+ // initialization.
-+ //
-+ // Still, if it isn't deemed acceptable, then this
-+ // transformation needs to be a ModulePass (which means it
-+ // cannot be in the 'llc' pipeline (which uses a
-+ // FunctionPassManager (which segfaults (not asserts) if
-+ // provided a ModulePass))).
-+ St = Builder.CreateGlobalStringPtr(ASM, "ASM");
-+ }
-+ CI->replaceAllUsesWith(St);
-+ CI->eraseFromParent();
-+ // We should erase the unused globals from current module. But we
-+ // can't do this within a FunctionPass.
-+ MadeChange = true;
-+ }
-+ }
-+ }
-+
-+ return MadeChange;
-+}
-diff --git a/lib/CodeGen/Passes.cpp b/lib/CodeGen/Passes.cpp
-index 526d994..1de0c63 100644
---- a/lib/CodeGen/Passes.cpp
-+++ b/lib/CodeGen/Passes.cpp
-@@ -369,6 +369,9 @@ void TargetPassConfig::addIRPasses() {
-
- addPass(createGCLoweringPass());
-
-+ // Generate target code for embedded LLVM-IR strings.
-+ addPass(createCodeGenIntrinsicPass());
-+
- // Make sure that no unreachable blocks are instruction selected.
- addPass(createUnreachableBlockEliminationPass());
- }
-diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
-index 56e774c..97006c0 100644
---- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
-+++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
-@@ -5169,6 +5169,8 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
- case Intrinsic::donothing:
- // ignore
- return 0;
-+ case Intrinsic::codegen:
-+ llvm_unreachable("failed to lower codegen intrinsic!");
- }
- }
-
-diff --git a/lib/Target/LLVMBuild.txt b/lib/Target/LLVMBuild.txt
-index eb6c779..a54f57c 100644
---- a/lib/Target/LLVMBuild.txt
-+++ b/lib/Target/LLVMBuild.txt
-@@ -45,7 +45,7 @@ parent = Libraries
- type = Library
- name = Target
- parent = Libraries
--required_libraries = Core MC Support
-+required_libraries = Core MC Support AsmParser
-
- ; This is a special group whose required libraries are extended (by llvm-build)
- ; with every built target, which makes it easy for tools to include every
-diff --git a/lib/VMCore/Verifier.cpp b/lib/VMCore/Verifier.cpp
-index 3782957..896772a 100644
---- a/lib/VMCore/Verifier.cpp
-+++ b/lib/VMCore/Verifier.cpp
-@@ -1952,6 +1952,16 @@ void Verifier::visitIntrinsicFunctionCall(Intrinsic::ID ID, CallInst &CI) {
- Assert1(isa<ConstantInt>(CI.getArgOperand(1)),
- "llvm.invariant.end parameter #2 must be a constant integer", &CI);
- break;
-+ case Intrinsic::codegen:
-+ Assert1(isa<ConstantExpr>(CI.getArgOperand(0)),
-+ "llvm.codegen parameter #1 must be a constant expression", &CI);
-+ Assert1(isa<ConstantExpr>(CI.getArgOperand(1)) ||
-+ isa<ConstantPointerNull>(CI.getArgOperand(1)),
-+ "llvm.codegen parameter #2 must be a constant expression", &CI);
-+ Assert1(isa<ConstantExpr>(CI.getArgOperand(2)) ||
-+ isa<ConstantPointerNull>(CI.getArgOperand(2)),
-+ "llvm.codegen parameter #3 must be a constant expression", &CI);
-+ break;
- }
- }
-
-diff --git a/test/CodeGen/X86/EmbeddedCG/embedded-codegen-ptx.ll b/test/CodeGen/X86/EmbeddedCG/embedded-codegen-ptx.ll
-new file mode 100644
-index 0000000..73d34e1
---- /dev/null
-+++ b/test/CodeGen/X86/EmbeddedCG/embedded-codegen-ptx.ll
-@@ -0,0 +1,28 @@
-+; RUN: llc < %s -march=x86 | FileCheck %s
-+
-+; ModuleID = 'embedded-codegen-ptx.ll'
-+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32-n8:16:32-S128"
-+target triple = "i386-pc-linux-gnu"
-+
-+ at llvm_kernel = private unnamed_addr constant [1940 x i8] c"target triple = \22nvptx-unknown-unknown\22\0A\0Adefine internal ptx_kernel void @gpu_codegen.ptx_subfn(i8* %ptx.Array) {\0Aptx.setup:\0A %0 = bitcast i8* %ptx.Array to [128 x [128 x i32]]*\0A %1 = call i32 @llvm.ptx.read.nctaid.x()\0A %2 = zext i32 %1 to i64\0A %3 = call i32 @llvm.ptx.read.nctaid.y()\0A %4 = zext i32 %3 to i64\0A %5 = call i32 @llvm.ptx.read.ntid.x()\0A %6 = zext i32 %5 to i64\0A %7 = call i32 @llvm.ptx.read.ntid.y()\0A %8 = zext i32 %7 to i64\0A %9 = call i32 @llvm.ptx.read.ctaid.x()\0A %10 = zext i32 %9 to i64\0A %11 = call i32 @llvm.ptx.read.ctaid.y()\0A %12 = zext i32 %11 to i64\0A %13 = call i32 @llvm.ptx.read.tid.x()\0A %14 = zext i32 %13 to i64\0A %15 = call i32 @llvm.ptx.read.tid.y()\0A %16 = zext i32 %15 to i64\0A br label %ptx.loop_body\0A\0Aptx.exit: ; preds = %polly.stmt.for.body3\0A ret void\0A\0Aptx.loop_body: !
; preds = %ptx.setup\0A %p_gpu_index_i = mul i64 %12, %2\0A %17 = add i64 %p_gpu_index_i, %10\0A %p_gpu_index_j = mul i64 %16, %6\0A %18 = add i64 %p_gpu_index_j, %14\0A br label %polly.stmt.for.body3\0A\0Apolly.stmt.for.body3: ; preds = %ptx.loop_body\0A %19 = trunc i64 %17 to i32\0A %p_mul = shl nsw i32 %19, 7\0A %20 = trunc i64 %18 to i32\0A %p_add = add nsw i32 %p_mul, %20\0A %21 = trunc i64 %17 to i32\0A %22 = trunc i64 %18 to i32\0A %p_arrayidx4 = getelementptr inbounds [128 x [128 x i32]]* %0, i32 0, i32 %21, i32 %22\0A store i32 %p_add, i32* %p_arrayidx4\0A br label %ptx.exit\0A}\0A\0Adeclare i32 @llvm.ptx.read.nctaid.x() nounwind readnone\0A\0Adeclare i32 @llvm.ptx.read.nctaid.y() nounwind readnone\0A\0Adeclare i32 @llvm.ptx.read.ctaid.x() nounwind readnone\0A\0Adeclare i32 @llvm.ptx.read.ctaid.y() nounwind readnone\0A\0Adeclare i32 @llvm.ptx.read.ntid.x() nounwind readnone\0A\0Adeclare i32 @llvm.ptx.read.ntid.y(!
) nounwin
d readnone\0A\0Adeclare i32 @llvm.ptx.read.tid.x() nounwind readnone\0A\0Adeclare i32 @llvm.ptx.read.tid.y() nounwind readnone\0A\00"
-+
-+ at .str = private unnamed_addr constant [3 x i8] c"%s\00", align 1
-+
-+define i32 @gpu_codegen() nounwind {
-+entry:
-+ %0 = call i8* @llvm.codegen(i8* getelementptr inbounds ([1940 x i8]* @llvm_kernel, i32 0, i32 0), i8* null, i8* null)
-+ %call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([3 x i8]* @.str, i32 0, i32 0), i8* %0)
-+ ret i32 0
-+}
-+
-+define i32 @main() nounwind {
-+entry:
-+ %call = call i32 @gpu_codegen()
-+ ret i32 0
-+}
-+
-+declare i8* @llvm.codegen(i8*, i8*, i8*) nounwind
-+
-+declare i32 @printf(i8*, ...) nounwind
-+
-+; CHECK: gpu_codegen_2E_ptx_subfn
-diff --git a/test/CodeGen/X86/EmbeddedCG/lit.local.cfg b/test/CodeGen/X86/EmbeddedCG/lit.local.cfg
-new file mode 100644
-index 0000000..7180c84
---- /dev/null
-+++ b/test/CodeGen/X86/EmbeddedCG/lit.local.cfg
-@@ -0,0 +1,5 @@
-+config.suffixes = ['.ll', '.c', '.cpp']
-+
-+targets = set(config.root.targets_to_build.split())
-+if not 'NVPTX' in targets:
-+ config.unsupported = True
---
-1.7.6.5
-
More information about the llvm-commits
mailing list