[llvm-commits] [llvm] r168198 - in /llvm/trunk: lib/Target/NVPTX/NVPTXAsmPrinter.cpp test/CodeGen/NVPTX/global-ordering.ll
Justin Holewinski
justin.holewinski at gmail.com
Sun Nov 18 05:09:04 PST 2012
As per ML thread:
I am the code owner for NVPTX and I approve this patch.
On Fri, Nov 16, 2012 at 10:27 PM, Pawel Wodnicki <pawel at 32bitmicro.com>wrote:
> Justin,
>
> > One more for 3.2. :)
>
> I will queue it up but merging into the 3.2 is for now suspended
> as per:
>
> http://lists.cs.uiuc.edu/pipermail/llvmdev/2012-November/055895.html
>
> Pawel
>
>
> >
> >
> > On Fri, Nov 16, 2012 at 4:03 PM, Justin Holewinski
> > <jholewinski at nvidia.com>wrote:
> >
> >> Author: jholewinski
> >> Date: Fri Nov 16 15:03:51 2012
> >> New Revision: 168198
> >>
> >> URL: http://llvm.org/viewvc/llvm-project?rev=168198&view=rev
> >> Log:
> >> [NVPTX] Order global variables in def-use order before emiting them in
> the
> >> final assembly
> >>
> >> Added:
> >> llvm/trunk/test/CodeGen/NVPTX/global-ordering.ll
> >> Modified:
> >> llvm/trunk/lib/Target/NVPTX/NVPTXAsmPrinter.cpp
> >>
> >> Modified: llvm/trunk/lib/Target/NVPTX/NVPTXAsmPrinter.cpp
> >> URL:
> >>
> http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/NVPTX/NVPTXAsmPrinter.cpp?rev=168198&r1=168197&r2=168198&view=diff
> >>
> >>
> ==============================================================================
> >> --- llvm/trunk/lib/Target/NVPTX/NVPTXAsmPrinter.cpp (original)
> >> +++ llvm/trunk/lib/Target/NVPTX/NVPTXAsmPrinter.cpp Fri Nov 16 15:03:51
> >> 2012
> >> @@ -68,7 +68,54 @@
> >>
> >> cl::location(llvm::InterleaveSrcInPtx));
> >>
> >>
> >> +namespace {
> >> +/// DiscoverDependentGlobals - Return a set of GlobalVariables on which
> >> \p V
> >> +/// depends.
> >> +void DiscoverDependentGlobals(Value *V,
> >> + DenseSet<GlobalVariable*> &Globals) {
> >> + if (GlobalVariable *GV = dyn_cast<GlobalVariable>(V))
> >> + Globals.insert(GV);
> >> + else {
> >> + if (User *U = dyn_cast<User>(V)) {
> >> + for (unsigned i = 0, e = U->getNumOperands(); i != e; ++i) {
> >> + DiscoverDependentGlobals(U->getOperand(i), Globals);
> >> + }
> >> + }
> >> + }
> >> +}
> >>
> >> +/// VisitGlobalVariableForEmission - Add \p GV to the list of
> >> GlobalVariable
> >> +/// instances to be emitted, but only after any dependents have been
> added
> >> +/// first.
> >> +void VisitGlobalVariableForEmission(GlobalVariable *GV,
> >> + SmallVectorImpl<GlobalVariable*>
> >> &Order,
> >> + DenseSet<GlobalVariable*> &Visited,
> >> + DenseSet<GlobalVariable*>
> &Visiting) {
> >> + // Have we already visited this one?
> >> + if (Visited.count(GV)) return;
> >> +
> >> + // Do we have a circular dependency?
> >> + if (Visiting.count(GV))
> >> + report_fatal_error("Circular dependency found in global variable
> >> set");
> >> +
> >> + // Start visiting this global
> >> + Visiting.insert(GV);
> >> +
> >> + // Make sure we visit all dependents first
> >> + DenseSet<GlobalVariable*> Others;
> >> + for (unsigned i = 0, e = GV->getNumOperands(); i != e; ++i)
> >> + DiscoverDependentGlobals(GV->getOperand(i), Others);
> >> +
> >> + for (DenseSet<GlobalVariable*>::iterator I = Others.begin(),
> >> + E = Others.end(); I != E; ++I)
> >> + VisitGlobalVariableForEmission(*I, Order, Visited, Visiting);
> >> +
> >> + // Now we can visit ourself
> >> + Order.push_back(GV);
> >> + Visited.insert(GV);
> >> + Visiting.erase(GV);
> >> +}
> >> +}
> >>
> >> // @TODO: This is a copy from AsmPrinter.cpp. The function is static,
> so
> >> we
> >> // cannot just link to the existing version.
> >> @@ -893,10 +940,27 @@
> >>
> >> emitDeclarations(M, OS2);
> >>
> >> - // Print out module-level global variables here.
> >> + // As ptxas does not support forward references of globals, we need
> to
> >> first
> >> + // sort the list of module-level globals in def-use order. We visit
> each
> >> + // global variable in order, and ensure that we emit it *after* its
> >> dependent
> >> + // globals. We use a little extra memory maintaining both a set and a
> >> list to
> >> + // have fast searches while maintaining a strict ordering.
> >> + SmallVector<GlobalVariable*,8> Globals;
> >> + DenseSet<GlobalVariable*> GVVisited;
> >> + DenseSet<GlobalVariable*> GVVisiting;
> >> +
> >> + // Visit each global variable, in order
> >> for (Module::global_iterator I = M.global_begin(), E =
> M.global_end();
> >> - I != E; ++I)
> >> - printModuleLevelGV(I, OS2);
> >> + I != E; ++I)
> >> + VisitGlobalVariableForEmission(I, Globals, GVVisited, GVVisiting);
> >> +
> >> + assert(GVVisited.size() == M.getGlobalList().size() &&
> >> + "Missed a global variable");
> >> + assert(GVVisiting.size() == 0 && "Did not fully process a global
> >> variable");
> >> +
> >> + // Print out module-level global variables in proper order
> >> + for (unsigned i = 0, e = Globals.size(); i != e; ++i)
> >> + printModuleLevelGV(Globals[i], OS2);
> >>
> >> OS2 << '\n';
> >>
> >>
> >> Added: llvm/trunk/test/CodeGen/NVPTX/global-ordering.ll
> >> URL:
> >>
> http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/NVPTX/global-ordering.ll?rev=168198&view=auto
> >>
> >>
> ==============================================================================
> >> --- llvm/trunk/test/CodeGen/NVPTX/global-ordering.ll (added)
> >> +++ llvm/trunk/test/CodeGen/NVPTX/global-ordering.ll Fri Nov 16 15:03:51
> >> 2012
> >> @@ -0,0 +1,20 @@
> >> +; RUN: llc < %s -march=nvptx -mcpu=sm_20 | FileCheck %s
> >> --check-prefix=PTX32
> >> +; RUN: llc < %s -march=nvptx64 -mcpu=sm_20 | FileCheck %s
> >> --check-prefix=PTX64
> >> +
> >> +; Make sure we emit these globals in def-use order
> >> +
> >> +
> >> +; PTX32: .visible .global .align 1 .u8 a = 2;
> >> +; PTX32-NEXT: .visible .global .align 4 .u32 a2 = a;
> >> +; PTX64: .visible .global .align 1 .u8 a = 2;
> >> +; PTX64-NEXT: .visible .global .align 8 .u64 a2 = a;
> >> + at a2 = addrspace(1) global i8 addrspace(1)* @a
> >> + at a = addrspace(1) global i8 2
> >> +
> >> +
> >> +; PTX32: .visible .global .align 1 .u8 b = 1;
> >> +; PTX32-NEXT: .visible .global .align 4 .u32 b2[2] = {b, b};
> >> +; PTX64: .visible .global .align 1 .u8 b = 1;
> >> +; PTX64-NEXT: .visible .global .align 8 .u64 b2[2] = {b, b};
> >> + at b2 = addrspace(1) global [2 x i8 addrspace(1)*] [i8 addrspace(1)* @b,
> i8
> >> addrspace(1)* @b]
> >> + at b = addrspace(1) global i8 1
> >>
> >>
> >> _______________________________________________
> >> llvm-commits mailing list
> >> llvm-commits at cs.uiuc.edu
> >> http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits
> >>
> >
> >
> >
>
>
--
Thanks,
Justin Holewinski
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20121118/a43ad761/attachment.html>
More information about the llvm-commits
mailing list