[llvm-commits] [polly] r157689 - /polly/trunk/utils/0001-Add-llvm.codegen-intrinsic.patch

Tobias Grosser grosser at fim.uni-passau.de
Wed May 30 06:53:57 PDT 2012


Author: grosser
Date: Wed May 30 08:53:57 2012
New Revision: 157689

URL: http://llvm.org/viewvc/llvm-project?rev=157689&view=rev
Log:
Add llvm.codegen intrinsic patch file to polly/utils.

The "llvm.codegen" intrinsic patch is a patch to LLVM, which is used
to generate code for embedded LLVM-IR strings. In Polly, we use it
to generate ptx assembly text for GPGPU code generation.

Added:
    polly/trunk/utils/0001-Add-llvm.codegen-intrinsic.patch

Added: polly/trunk/utils/0001-Add-llvm.codegen-intrinsic.patch
URL: http://llvm.org/viewvc/llvm-project/polly/trunk/utils/0001-Add-llvm.codegen-intrinsic.patch?rev=157689&view=auto
==============================================================================
--- polly/trunk/utils/0001-Add-llvm.codegen-intrinsic.patch (added)
+++ polly/trunk/utils/0001-Add-llvm.codegen-intrinsic.patch Wed May 30 08:53:57 2012
@@ -0,0 +1,492 @@
+From 28e525d58b236efa5db562c2931c996e26d6996b Mon Sep 17 00:00:00 2001
+From: Yabin Hu <yabin.hwu at gmail.com>
+Date: Wed, 23 May 2012 09:19:36 +0800
+Subject: [PATCH] Add llvm.codegen intrinsic.
+
+The llvm.codegen intrinsic generates code for embedded LLVM-IR
+strings. Each call to the intrinsic is replaced by a pointer to
+the newly generated target code. The code generation target can be
+different to the one of the parent module.
+---
+ docs/LangRef.html                                  |   36 +++
+ include/llvm/CodeGen/Passes.h                      |    3 +
+ include/llvm/InitializePasses.h                    |    1 +
+ include/llvm/Intrinsics.td                         |    4 +
+ lib/CodeGen/CMakeLists.txt                         |    1 +
+ lib/CodeGen/CodeGen.cpp                            |    1 +
+ lib/CodeGen/CodeGenIntrinsic.cpp                   |  229 ++++++++++++++++++++
+ lib/CodeGen/Passes.cpp                             |    3 +
+ lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp   |    3 +
+ lib/Target/LLVMBuild.txt                           |    2 +-
+ lib/VMCore/Verifier.cpp                            |   10 +
+ .../CodeGen/X86/EmbeddedCG/embedded-codegen-ptx.ll |   28 +++
+ test/CodeGen/X86/EmbeddedCG/lit.local.cfg          |    5 +
+ 13 files changed, 325 insertions(+), 1 deletions(-)
+ create mode 100644 lib/CodeGen/CodeGenIntrinsic.cpp
+ create mode 100644 test/CodeGen/X86/EmbeddedCG/embedded-codegen-ptx.ll
+ create mode 100644 test/CodeGen/X86/EmbeddedCG/lit.local.cfg
+
+diff --git a/docs/LangRef.html b/docs/LangRef.html
+index 8f7a17c..23d73bd 100644
+--- a/docs/LangRef.html
++++ b/docs/LangRef.html
+@@ -242,6 +242,7 @@
+           <li><a href="#int_prefetch">'<tt>llvm.prefetch</tt>' Intrinsic</a></li>
+           <li><a href="#int_pcmarker">'<tt>llvm.pcmarker</tt>' Intrinsic</a></li>
+           <li><a href="#int_readcyclecounter">'<tt>llvm.readcyclecounter</tt>' Intrinsic</a></li>
++          <li><a href="#int_codegen_intrinsic">'<tt>llvm.codegen</tt>' Intrinsic</a></li>
+         </ol>
+       </li>
+       <li><a href="#int_libc">Standard C Library Intrinsics</a>
+@@ -7015,6 +7016,41 @@ LLVM</a>.</p>
+ 
+ </div>
+ 
++<!-- _______________________________________________________________________ -->
++<h4>
++  <a name="int_codegen_intrinsic">'<tt>llvm.codegen</tt>' Intrinsic</a>
++</h4>
++
++<div>
++
++<h5>Syntax:</h5>
++<pre>
++  declare i8* @llvm.codegen(i8* <IRString>, i8* <MCPU>, i8* <
++  Features>)
++</pre>
++
++<h5>Overview:</h5>
++<p>The '<tt>llvm.codegen</tt>' intrinsic uses the LLVM back ends to generate
++   code for embedded LLVM-IR strings. The code generation target can be
++   different to the one of the parent module.</p>
++
++<h5>Arguments:</h5>
++<p><tt>IRString</tt> is a string containing LLVM-IR.</p>
++<p><tt>MCPU</tt> is the name of the target CPU.</p>
++<p><tt>Features</tt> is the string representation of the additional target
++   features.</p>
++
++<h5>Semantics:</h5>
++<p>The '<tt>llvm.codegen</tt>' intrinsic transforms a string containing LLVM IR
++   to target assembly code. Calls to the intrinsic are replaced by a pointer to
++   the newly generated target code. In case LLVM can not generate code (e.g. the
++   target is not available), the call to the intrinsic is replaced by a i8 NULL
++   pointer.Users of this intrinsic should make sure the target triple is
++   properly set in the <IRString>. Inputs to both <MCPU> and
++   <Features> parameters can be null pointers.</p>
++
++</div>
++
+ </div>
+ 
+ <!-- ======================================================================= -->
+diff --git a/include/llvm/CodeGen/Passes.h b/include/llvm/CodeGen/Passes.h
+index e76fe99..ecf3e4b 100644
+--- a/include/llvm/CodeGen/Passes.h
++++ b/include/llvm/CodeGen/Passes.h
+@@ -373,6 +373,9 @@ namespace llvm {
+   /// branch folding).
+   extern char &GCMachineCodeAnalysisID;
+ 
++  /// CodeGenIntrinsic Pass - Create target code for embedded LLVM-IR strings.
++  FunctionPass *createCodeGenIntrinsicPass();
++
+   /// Deleter Pass - Releases GC metadata.
+   ///
+   FunctionPass *createGCInfoDeleter();
+diff --git a/include/llvm/InitializePasses.h b/include/llvm/InitializePasses.h
+index df696b1..28b146f 100644
+--- a/include/llvm/InitializePasses.h
++++ b/include/llvm/InitializePasses.h
+@@ -91,6 +91,7 @@ void initializeCorrelatedValuePropagationPass(PassRegistry&);
+ void initializeDAEPass(PassRegistry&);
+ void initializeDAHPass(PassRegistry&);
+ void initializeDCEPass(PassRegistry&);
++void initializeCodeGenIntrinsicPass(PassRegistry&);
+ void initializeDSEPass(PassRegistry&);
+ void initializeDeadInstEliminationPass(PassRegistry&);
+ void initializeDeadMachineInstructionElimPass(PassRegistry&);
+diff --git a/include/llvm/Intrinsics.td b/include/llvm/Intrinsics.td
+index 75162bf..4bcec16 100644
+--- a/include/llvm/Intrinsics.td
++++ b/include/llvm/Intrinsics.td
+@@ -226,6 +226,10 @@ def int_readcyclecounter : Intrinsic<[llvm_i64_ty]>;
+ // guard to the correct place on the stack frame.
+ def int_stackprotector : Intrinsic<[], [llvm_ptr_ty, llvm_ptrptr_ty], []>;
+ 
++//===----------------- Code Generation for Embedded LLVM-IR ---------------===//
++def int_codegen  : Intrinsic<[llvm_ptr_ty],
++                              [llvm_ptr_ty, llvm_ptr_ty, llvm_ptr_ty]>;
++
+ //===------------------- Standard C Library Intrinsics --------------------===//
+ //
+ 
+diff --git a/lib/CodeGen/CMakeLists.txt b/lib/CodeGen/CMakeLists.txt
+index 855fa0c..4451922 100644
+--- a/lib/CodeGen/CMakeLists.txt
++++ b/lib/CodeGen/CMakeLists.txt
+@@ -6,6 +6,7 @@ add_llvm_library(LLVMCodeGen
+   CalcSpillWeights.cpp
+   CallingConvLower.cpp
+   CodeGen.cpp
++  CodeGenIntrinsic.cpp
+   CodePlacementOpt.cpp
+   CriticalAntiDepBreaker.cpp
+   DeadMachineInstructionElim.cpp
+diff --git a/lib/CodeGen/CodeGen.cpp b/lib/CodeGen/CodeGen.cpp
+index a81bb5c..662345a 100644
+--- a/lib/CodeGen/CodeGen.cpp
++++ b/lib/CodeGen/CodeGen.cpp
+@@ -21,6 +21,7 @@ using namespace llvm;
+ void llvm::initializeCodeGen(PassRegistry &Registry) {
+   initializeBranchFolderPassPass(Registry);
+   initializeCalculateSpillWeightsPass(Registry);
++  initializeCodeGenIntrinsicPass(Registry);
+   initializeCodePlacementOptPass(Registry);
+   initializeDeadMachineInstructionElimPass(Registry);
+   initializeExpandPostRAPass(Registry);
+diff --git a/lib/CodeGen/CodeGenIntrinsic.cpp b/lib/CodeGen/CodeGenIntrinsic.cpp
+new file mode 100644
+index 0000000..01253cd
+--- /dev/null
++++ b/lib/CodeGen/CodeGenIntrinsic.cpp
+@@ -0,0 +1,229 @@
++//===-- CodeGenIntrinsic.cpp - CodeGen Intrinsic --------------------------===//
++//
++//                     The LLVM Compiler Infrastructure
++//
++// This file is distributed under the University of Illinois Open Source
++// License. See LICENSE.TXT for details.
++//
++//===----------------------------------------------------------------------===//
++//
++// This file implements the llvm.codegen intrinsic.
++//
++//===----------------------------------------------------------------------===//
++
++#include "llvm/CodeGen/Passes.h"
++#include "llvm/CallingConv.h"
++#include "llvm/IntrinsicInst.h"
++#include "llvm/LLVMContext.h"
++#include "llvm/Module.h"
++#include "llvm/PassManager.h"
++#include "llvm/Assembly/Parser.h"
++#include "llvm/Target/TargetData.h"
++#include "llvm/Target/TargetMachine.h"
++#include "llvm/Target/TargetRegisterInfo.h"
++#include "llvm/Support/Debug.h"
++#include "llvm/Support/ErrorHandling.h"
++#include "llvm/Support/FormattedStream.h"
++#include "llvm/Support/Host.h"
++#include "llvm/Support/IRBuilder.h"
++#include "llvm/Support/raw_ostream.h"
++#include "llvm/Support/SourceMgr.h"
++#include "llvm/Support/TargetRegistry.h"
++#include "llvm/ADT/Triple.h"
++
++using namespace llvm;
++
++namespace {
++  /// ASMGenerator generates target-specific assembly code from LLVM IR.
++  class ASMGenerator {
++  public:
++    ASMGenerator() {}
++
++    /// generate - Generates a target code string from a LLVM IR Value.
++    bool generate(Value *IRStr, Value *MCPUStr, Value *FeaturesStr,
++                  std::string &ASM);
++
++  private:
++    bool getStringFromConstantExpr(Value *ConstData, std::string &Out) const;
++  };
++
++  /// CodeGenIntrinsic - This pass replaces each call to the llvm.codegen
++  /// intrinsic with a string generated by ASMGenerator.
++  class CodeGenIntrinsic : public FunctionPass {
++  public:
++    static char ID;
++
++    CodeGenIntrinsic();
++    const char *getPassName() const;
++    virtual bool runOnFunction(Function &F);
++  };
++}
++
++// -----------------------------------------------------------------------------
++static bool getTargetMachineFromModule(Module *M, const StringRef &TripleStr,
++                                       const StringRef &MCPU,
++                                       const StringRef &Features,
++                                       TargetMachine *&TM) {
++  std::string ErrMsg;
++  const Target *TheTarget = TargetRegistry::lookupTarget(TripleStr, ErrMsg);
++  if (!TheTarget) {
++    errs() << ErrMsg << "\n";
++    return false;
++  }
++
++  TargetOptions Options;
++  TM = TheTarget->createTargetMachine(TripleStr, MCPU, Features, Options);
++  assert(TM && "Could not allocate target machine!");
++  return true;
++}
++
++static bool createASMAsString(Module *New, const StringRef &Triple,
++                              const StringRef &MCPU, const StringRef &Features,
++                              std::string &ASM) {
++  TargetMachine *Target;
++  if (!getTargetMachineFromModule(New, Triple, MCPU, Features, Target)) {
++    return false;
++  }
++
++  // Build up all of the passes that we want to do to the module.
++  PassManager PM;
++
++  // Add the target data from the target machine, if it exists, or the module.
++  if (const TargetData *TD = Target->getTargetData())
++    PM.add(new TargetData(*TD));
++  else
++    PM.add(new TargetData(New));
++
++  {
++    raw_string_ostream NameROS(ASM);
++    formatted_raw_ostream FOS(NameROS);
++
++    // Ask the target to add backend passes as necessary.
++    int UseVerifier = true;
++    if (Target->addPassesToEmitFile(PM, FOS, TargetMachine::CGFT_AssemblyFile,
++                                    UseVerifier)) {
++      errs() << "CodeGen Intrinsic: target does not support generation of this "
++             << "file type!\n";
++
++      return false;
++    }
++
++    PM.run(*New);
++    FOS.flush();
++  }
++
++  delete Target;
++  return true;
++}
++
++bool ASMGenerator::getStringFromConstantExpr(Value *ConstData,
++                                             std::string &Out) const {
++  bool Result = false;
++  if (ConstantExpr *U = dyn_cast<ConstantExpr>(ConstData)) {
++    Value *R = U->getOperand(0);
++    if (GlobalVariable *GV = dyn_cast<GlobalVariable>(R)) {
++      Constant *C = GV->getInitializer();
++      if (ConstantDataArray *CA = dyn_cast<ConstantDataArray>(C)) {
++        Out = CA->getAsString();
++        Result = true;
++      }
++    }
++  }
++  return Result;
++}
++
++bool ASMGenerator::generate(Value *IRStr, Value *MCPUStr, Value *FeaturesStr,
++                            std::string &ASM) {
++  std::string Kernel;
++  if (!getStringFromConstantExpr(IRStr, Kernel))
++    return false;
++
++  std::string MCPU;
++  if (!getStringFromConstantExpr(MCPUStr, MCPU))
++    MCPU = "";
++
++  std::string Features;
++  if (!getStringFromConstantExpr(FeaturesStr, Features))
++    Features = "";
++
++  SMDiagnostic ErrorMessage;
++  LLVMContext Context;
++  std::auto_ptr<Module> TempModule(
++    ParseAssemblyString(Kernel.c_str(), 0, ErrorMessage, Context));
++
++  Triple TheTriple(TempModule->getTargetTriple());
++  const std::string TripleStr = TheTriple.getTriple();
++  if(TripleStr.empty()) {
++    errs() << "error: Target triple isn't set correctly for the new module.\n";
++    return false;
++  }
++
++  return createASMAsString(TempModule.get(), TripleStr.data(), MCPU.data(),
++                           Features.data(), ASM);
++}
++
++// -----------------------------------------------------------------------------
++INITIALIZE_PASS(CodeGenIntrinsic, "codegen-intrinsic", "CodeGen Intrinsic",
++                false, false)
++
++FunctionPass *llvm::createCodeGenIntrinsicPass() {
++  return new CodeGenIntrinsic();
++}
++
++char CodeGenIntrinsic::ID = 0;
++
++CodeGenIntrinsic::CodeGenIntrinsic()
++  : FunctionPass(ID) {
++}
++
++const char *CodeGenIntrinsic::getPassName() const {
++  return "Lowering CodeGen Intrinsic.";
++}
++
++bool CodeGenIntrinsic::runOnFunction(Function &F) {
++  bool MadeChange = false;
++  Module *M = F.getParent();
++  if (Function *CG = M->getFunction("llvm.codegen")) {
++    for (Function::use_iterator I = CG->use_begin(), E = CG->use_end();
++         I != E; ++I) {
++      if (CallInst *CI = dyn_cast<CallInst>(*I)) {
++        if (&F != CI->getParent()->getParent())
++          continue;
++
++        std::string ASM;
++        ASMGenerator *Generator = new ASMGenerator();
++        IRBuilder<> Builder(CI->getParent(), CI);
++        Value *St;
++        if (!Generator->generate(CI->getArgOperand(0), CI->getArgOperand(1),
++                                 CI->getArgOperand(2), ASM)) {
++          Type *Ty= CG->getReturnType();
++          St = Constant::getNullValue(Ty);
++        } else {
++          // FIXME: Is this actually dangerous as WritingAnLLVMPass.html claims?
++          //        Seems that, short of multithreaded LLVM, it should be safe;
++          //        all that is necessary is that a simple Module::iterator loop
++          //        not be invalidated. Appending to the GlobalVariable list is
++          //        safe in that sense.
++          //
++          //        All the output passes emit globals last. The ExecutionEngine
++          //        explicitly supports adding globals to the module after
++          //        initialization.
++          //
++          //        Still, if it isn't deemed acceptable, then this
++          //        transformation needs to be a ModulePass (which means it
++          //        cannot be in the  'llc' pipeline  (which uses a
++          //        FunctionPassManager (which segfaults (not asserts) if
++          //        provided a ModulePass))).
++          St = Builder.CreateGlobalStringPtr(ASM, "ASM");
++        }
++        CI->replaceAllUsesWith(St);
++        CI->eraseFromParent();
++        // We should erase the unused globals from current module. But we
++        // can't do this within a FunctionPass.
++        MadeChange = true;
++      }
++    }
++  }
++
++  return MadeChange;
++}
+diff --git a/lib/CodeGen/Passes.cpp b/lib/CodeGen/Passes.cpp
+index 490547b..95b6e0c 100644
+--- a/lib/CodeGen/Passes.cpp
++++ b/lib/CodeGen/Passes.cpp
+@@ -305,6 +305,9 @@ void TargetPassConfig::addIRPasses() {
+ 
+   PM->add(createGCLoweringPass());
+ 
++  // Generate target code for embedded LLVM-IR strings.
++  PM->add(createCodeGenIntrinsicPass());
++
+   // Make sure that no unreachable blocks are instruction selected.
+   PM->add(createUnreachableBlockEliminationPass());
+ }
+diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+index f1b4d80..d87986c 100644
+--- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
++++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+@@ -5131,6 +5131,9 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
+   case Intrinsic::lifetime_end:
+     // Discard region information.
+     return 0;
++
++  case Intrinsic::codegen:
++    llvm_unreachable("failed to lower codegen intrinsic!");
+   }
+ }
+ 
+diff --git a/lib/Target/LLVMBuild.txt b/lib/Target/LLVMBuild.txt
+index 045ab9e..d9bd19f 100644
+--- a/lib/Target/LLVMBuild.txt
++++ b/lib/Target/LLVMBuild.txt
+@@ -45,7 +45,7 @@ parent = Libraries
+ type = Library
+ name = Target
+ parent = Libraries
+-required_libraries = Core MC Support
++required_libraries = Core MC Support AsmParser
+ 
+ ; This is a special group whose required libraries are extended (by llvm-build)
+ ; with every built target, which makes it easy for tools to include every
+diff --git a/lib/VMCore/Verifier.cpp b/lib/VMCore/Verifier.cpp
+index f11efff..1031685 100644
+--- a/lib/VMCore/Verifier.cpp
++++ b/lib/VMCore/Verifier.cpp
+@@ -1783,6 +1783,16 @@ void Verifier::visitIntrinsicFunctionCall(Intrinsic::ID ID, CallInst &CI) {
+     Assert1(isa<ConstantInt>(CI.getArgOperand(1)),
+             "llvm.invariant.end parameter #2 must be a constant integer", &CI);
+     break;
++  case Intrinsic::codegen:
++    Assert1(isa<ConstantExpr>(CI.getArgOperand(0)),
++            "llvm.codegen parameter #1 must be a constant expression", &CI);
++    Assert1(isa<ConstantExpr>(CI.getArgOperand(1)) ||
++            isa<ConstantPointerNull>(CI.getArgOperand(1)),
++            "llvm.codegen parameter #2 must be a constant expression", &CI);
++    Assert1(isa<ConstantExpr>(CI.getArgOperand(2)) ||
++            isa<ConstantPointerNull>(CI.getArgOperand(2)),
++            "llvm.codegen parameter #3 must be a constant expression", &CI);
++    break;
+   }
+ }
+ 
+diff --git a/test/CodeGen/X86/EmbeddedCG/embedded-codegen-ptx.ll b/test/CodeGen/X86/EmbeddedCG/embedded-codegen-ptx.ll
+new file mode 100644
+index 0000000..768790d
+--- /dev/null
++++ b/test/CodeGen/X86/EmbeddedCG/embedded-codegen-ptx.ll
+@@ -0,0 +1,28 @@
++; RUN: llc < %s -march=x86 | FileCheck %s
++
++; ModuleID = 'embedded-codegen-ptx.ll'
++target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32-n8:16:32-S128"
++target triple = "i386-pc-linux-gnu"
++
++ at llvm_kernel = private unnamed_addr constant [1937 x i8] c"target triple = \22ptx32-pc-linux-gnu\22\0A\0Adefine internal ptx_kernel void @gpu_codegen.ptx_subfn(i8* %ptx.Array) {\0Aptx.setup:\0A  %0 = bitcast i8* %ptx.Array to [128 x [128 x i32]]*\0A  %1 = call i32 @llvm.ptx.read.nctaid.x()\0A  %2 = zext i32 %1 to i64\0A  %3 = call i32 @llvm.ptx.read.nctaid.y()\0A  %4 = zext i32 %3 to i64\0A  %5 = call i32 @llvm.ptx.read.ntid.x()\0A  %6 = zext i32 %5 to i64\0A  %7 = call i32 @llvm.ptx.read.ntid.y()\0A  %8 = zext i32 %7 to i64\0A  %9 = call i32 @llvm.ptx.read.ctaid.x()\0A  %10 = zext i32 %9 to i64\0A  %11 = call i32 @llvm.ptx.read.ctaid.y()\0A  %12 = zext i32 %11 to i64\0A  %13 = call i32 @llvm.ptx.read.tid.x()\0A  %14 = zext i32 %13 to i64\0A  %15 = call i32 @llvm.ptx.read.tid.y()\0A  %16 = zext i32 %15 to i64\0A  br label %ptx.loop_body\0A\0Aptx.exit:                                         ; preds = %polly.stmt.for.body3\0A  ret void\0A\0Aptx.loop_body:                    
                 ; preds = %ptx.setup\0A  %p_gpu_index_i = mul i64 %12, %2\0A  %17 = add i64 %p_gpu_index_i, %10\0A  %p_gpu_index_j = mul i64 %16, %6\0A  %18 = add i64 %p_gpu_index_j, %14\0A  br label %polly.stmt.for.body3\0A\0Apolly.stmt.for.body3:                             ; preds = %ptx.loop_body\0A  %19 = trunc i64 %17 to i32\0A  %p_mul = shl nsw i32 %19, 7\0A  %20 = trunc i64 %18 to i32\0A  %p_add = add nsw i32 %p_mul, %20\0A  %21 = trunc i64 %17 to i32\0A  %22 = trunc i64 %18 to i32\0A  %p_arrayidx4 = getelementptr inbounds [128 x [128 x i32]]* %0, i32 0, i32 %21, i32 %22\0A  store i32 %p_add, i32* %p_arrayidx4\0A  br label %ptx.exit\0A}\0A\0Adeclare i32 @llvm.ptx.read.nctaid.x() nounwind readnone\0A\0Adeclare i32 @llvm.ptx.read.nctaid.y() nounwind readnone\0A\0Adeclare i32 @llvm.ptx.read.ctaid.x() nounwind readnone\0A\0Adeclare i32 @llvm.ptx.read.ctaid.y() nounwind readnone\0A\0Adeclare i32 @llvm.ptx.read.ntid.x() nounwind readnone\0A\0Adeclare i32 @llvm.ptx.read.nti
 d.y() nounwind readnone\0A\0Adeclare i32 @llvm.ptx.read.tid.x() nounwind readnone\0A\0Adeclare i32 @llvm.ptx.read.tid.y() nounwind readnone\0A\00"
++
++ at .str = private unnamed_addr constant [3 x i8] c"%s\00", align 1
++
++define i32 @gpu_codegen() nounwind {
++entry:
++  %0 = call i8* @llvm.codegen(i8* getelementptr inbounds ([1937 x i8]* @llvm_kernel, i32 0, i32 0), i8* null, i8* null)
++  %call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([3 x i8]* @.str, i32 0, i32 0), i8* %0)
++  ret i32 0
++}
++
++define i32 @main() nounwind {
++entry:
++  %call = call i32 @gpu_codegen()
++  ret i32 0
++}
++
++declare i8* @llvm.codegen(i8*, i8*, i8*) nounwind
++
++declare i32 @printf(i8*, ...) nounwind
++
++; CHECK: .entry gpu_codegen_2E_ptx_subfn (.param .b32 __param_1)
+diff --git a/test/CodeGen/X86/EmbeddedCG/lit.local.cfg b/test/CodeGen/X86/EmbeddedCG/lit.local.cfg
+new file mode 100644
+index 0000000..346ffa1
+--- /dev/null
++++ b/test/CodeGen/X86/EmbeddedCG/lit.local.cfg
+@@ -0,0 +1,5 @@
++config.suffixes = ['.ll', '.c', '.cpp']
++
++targets = set(config.root.targets_to_build.split())
++if not 'PTX' in targets:
++    config.unsupported = True
+-- 
+1.7.6.5
+





More information about the llvm-commits mailing list