<div dir="ltr"><div>Thanks for the quick fix, Justin!</div><div><br></div><div><br></div><div class="gmail_extra"><br><div class="gmail_quote">On Wed, Nov 5, 2014 at 10:19 AM, Justin Holewinski <span dir="ltr"><<a href="mailto:jholewinski@nvidia.com" target="_blank">jholewinski@nvidia.com</a>></span> wrote:<br><blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex">Author: jholewinski<br>
Date: Wed Nov 5 12:19:30 2014<br>
New Revision: 221377<br>
<br>
URL: <a href="http://llvm.org/viewvc/llvm-project?rev=221377&view=rev" target="_blank">http://llvm.org/viewvc/llvm-project?rev=221377&view=rev</a><br>
Log:<br>
[NVPTX] Add NVPTXLowerStructArgs pass<br>
<br>
This works around the limitation that PTX does not allow .param space<br>
loads/stores with arbitrary pointers.<br>
<br>
If a function has a by-val struct ptr arg, say foo(%struct.x *byval %d), then<br>
add the following instructions to the first basic block :<br>
<br>
%temp = alloca %struct.x, align 8<br>
%tt1 = bitcast %struct.x * %d to i8 *<br>
%tt2 = llvm.nvvm.cvt.gen.to.param %tt2<br>
%tempd = bitcast i8 addrspace(101) * to %struct.x addrspace(101) *<br>
%tv = load %struct.x addrspace(101) * %tempd<br>
store %struct.x %tv, %struct.x * %temp, align 8<br>
<br>
The above code allocates some space in the stack and copies the incoming<br>
struct from param space to local space. Then replace all occurences of %d<br>
by %temp.<br>
<br>
Fixes PR21465.<br>
<br>
Added:<br>
llvm/trunk/lib/Target/NVPTX/NVPTXLowerStructArgs.cpp<br>
llvm/trunk/test/CodeGen/NVPTX/bug21465.ll<br>
Modified:<br>
llvm/trunk/lib/Target/NVPTX/CMakeLists.txt<br>
llvm/trunk/lib/Target/NVPTX/NVPTX.h<br>
llvm/trunk/lib/Target/NVPTX/NVPTXTargetMachine.cpp<br>
<br>
Modified: llvm/trunk/lib/Target/NVPTX/CMakeLists.txt<br>
URL: <a href="http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/NVPTX/CMakeLists.txt?rev=221377&r1=221376&r2=221377&view=diff" target="_blank">http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/NVPTX/CMakeLists.txt?rev=221377&r1=221376&r2=221377&view=diff</a><br>
==============================================================================<br>
--- llvm/trunk/lib/Target/NVPTX/CMakeLists.txt (original)<br>
+++ llvm/trunk/lib/Target/NVPTX/CMakeLists.txt Wed Nov 5 12:19:30 2014<br>
@@ -29,6 +29,7 @@ set(NVPTXCodeGen_sources<br>
NVPTXMCExpr.cpp<br>
NVPTXReplaceImageHandles.cpp<br>
NVPTXImageOptimizer.cpp<br>
+ NVPTXLowerStructArgs.cpp<br>
)<br>
<br>
add_llvm_target(NVPTXCodeGen ${NVPTXCodeGen_sources})<br>
<br>
Modified: llvm/trunk/lib/Target/NVPTX/NVPTX.h<br>
URL: <a href="http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/NVPTX/NVPTX.h?rev=221377&r1=221376&r2=221377&view=diff" target="_blank">http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/NVPTX/NVPTX.h?rev=221377&r1=221376&r2=221377&view=diff</a><br>
==============================================================================<br>
--- llvm/trunk/lib/Target/NVPTX/NVPTX.h (original)<br>
+++ llvm/trunk/lib/Target/NVPTX/NVPTX.h Wed Nov 5 12:19:30 2014<br>
@@ -69,6 +69,7 @@ ModulePass *createNVVMReflectPass(const<br>
MachineFunctionPass *createNVPTXPrologEpilogPass();<br>
MachineFunctionPass *createNVPTXReplaceImageHandlesPass();<br>
FunctionPass *createNVPTXImageOptimizerPass();<br>
+FunctionPass *createNVPTXLowerStructArgsPass();<br>
<br>
bool isImageOrSamplerVal(const Value *, const Module *);<br>
<br>
<br>
Added: llvm/trunk/lib/Target/NVPTX/NVPTXLowerStructArgs.cpp<br>
URL: <a href="http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/NVPTX/NVPTXLowerStructArgs.cpp?rev=221377&view=auto" target="_blank">http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/NVPTX/NVPTXLowerStructArgs.cpp?rev=221377&view=auto</a><br>
==============================================================================<br>
--- llvm/trunk/lib/Target/NVPTX/NVPTXLowerStructArgs.cpp (added)<br>
+++ llvm/trunk/lib/Target/NVPTX/NVPTXLowerStructArgs.cpp Wed Nov 5 12:19:30 2014<br>
@@ -0,0 +1,150 @@<br>
+//===-- NVPTXLowerStructArgs.cpp - Copy struct args to local memory =====--===//<br>
+//<br>
+// The LLVM Compiler Infrastructure<br>
+//<br>
+// This file is distributed under the University of Illinois Open Source<br>
+// License. See LICENSE.TXT for details.<br>
+//<br>
+//===----------------------------------------------------------------------===//<br>
+//<br>
+// Copy struct args to local memory. This is needed for kernel functions only.<br>
+// This is a preparation for handling cases like<br>
+//<br>
+// kernel void foo(struct A arg, ...)<br>
+// {<br>
+// struct A *p = &arg;<br>
+// ...<br>
+// ... = p->filed1 ... (this is no generic address for .param)<br>
+// p->filed2 = ... (this is no write access to .param)<br>
+// }<br>
+//<br>
+//===----------------------------------------------------------------------===//<br>
+<br>
+#include "NVPTX.h"<br>
+#include "NVPTXUtilities.h"<br>
+#include "llvm/IR/Function.h"<br>
+#include "llvm/IR/Instructions.h"<br>
+#include "llvm/IR/IntrinsicInst.h"<br>
+#include "llvm/IR/Module.h"<br>
+#include "llvm/IR/Type.h"<br>
+#include "llvm/Pass.h"<br>
+<br>
+using namespace llvm;<br>
+<br>
+namespace llvm {<br>
+void initializeNVPTXLowerStructArgsPass(PassRegistry &);<br>
+}<br>
+<br>
+class LLVM_LIBRARY_VISIBILITY NVPTXLowerStructArgs : public FunctionPass {<br>
+ bool runOnFunction(Function &F) override;<br>
+<br>
+ void handleStructPtrArgs(Function &);<br>
+ void handleParam(Argument *);<br>
+<br>
+public:<br>
+ static char ID; // Pass identification, replacement for typeid<br>
+ NVPTXLowerStructArgs() : FunctionPass(ID) {}<br>
+ const char *getPassName() const override {<br>
+ return "Copy structure (byval *) arguments to stack";<br>
+ }<br>
+};<br>
+<br>
+char NVPTXLowerStructArgs::ID = 1;<br>
+<br>
+INITIALIZE_PASS(NVPTXLowerStructArgs, "nvptx-lower-struct-args",<br>
+ "Lower structure arguments (NVPTX)", false, false)<br>
+<br>
+void NVPTXLowerStructArgs::handleParam(Argument *Arg) {<br>
+ Function *Func = Arg->getParent();<br>
+ Instruction *FirstInst = &(Func->getEntryBlock().front());<br>
+ const PointerType *PType = dyn_cast<PointerType>(Arg->getType());<br>
+<br>
+ assert(PType && "Expecting pointer type in handleParam");<br>
+<br>
+ const Type *StructType = PType->getElementType();<br>
+<br>
+ AllocaInst *AllocA =<br>
+ new AllocaInst((Type *)StructType, Arg->getName(), FirstInst);<br>
+<br>
+ /* Set the alignment to alignment of the byval parameter. This is because,<br>
+ * later load/stores assume that alignment, and we are going to replace<br>
+ * the use of the byval parameter with this alloca instruction.<br>
+ */<br>
+ AllocA->setAlignment(Func->getParamAlignment(Arg->getArgNo() + 1));<br>
+<br>
+ Arg->replaceAllUsesWith(AllocA);<br>
+<br>
+ // Get the cvt.gen.to.param intrinsic<br>
+ const Type *CvtTypes[2] = {<br>
+ Type::getInt8PtrTy(Func->getParent()->getContext(), ADDRESS_SPACE_PARAM),<br>
+ Type::getInt8PtrTy(Func->getParent()->getContext(), ADDRESS_SPACE_GENERIC)<br>
+ };<br>
+ Function *CvtFunc = (Function *)Intrinsic::getDeclaration(<br>
+ Func->getParent(), Intrinsic::nvvm_ptr_gen_to_param,<br>
+ ArrayRef<Type *>((Type **)CvtTypes, 2));<br>
+ std::vector<Value *> BC1;<br>
+ BC1.push_back(<br>
+ new BitCastInst(Arg, Type::getInt8PtrTy(Func->getParent()->getContext(),<br>
+ ADDRESS_SPACE_GENERIC),<br>
+ Arg->getName(), FirstInst));<br>
+ CallInst *CallCVT = CallInst::Create(CvtFunc, ArrayRef<Value *>(BC1),<br>
+ "cvt_to_param", FirstInst);<br>
+<br>
+ BitCastInst *BitCast = new BitCastInst(<br>
+ CallCVT, PointerType::get((Type *)StructType, ADDRESS_SPACE_PARAM),<br>
+ Arg->getName(), FirstInst);<br>
+ LoadInst *LI = new LoadInst(BitCast, Arg->getName(), FirstInst);<br>
+ new StoreInst(LI, AllocA, FirstInst);<br>
+}<br>
+<br>
+/// =============================================================================<br>
+/// If the function had a struct ptr arg, say foo(%struct.x *byval %d), then<br>
+/// add the following instructions to the first basic block :<br>
+///<br>
+/// %temp = alloca %struct.x, align 8<br>
+/// %tt1 = bitcast %struct.x * %d to i8 *<br>
+/// %tt2 = llvm.nvvm.cvt.gen.to.param %tt2<br>
+/// %tempd = bitcast i8 addrspace(101) * to %struct.x addrspace(101) *<br>
+/// %tv = load %struct.x addrspace(101) * %tempd<br>
+/// store %struct.x %tv, %struct.x * %temp, align 8<br>
+///<br>
+/// The above code allocates some space in the stack and copies the incoming<br>
+/// struct from param space to local space.<br>
+/// Then replace all occurences of %d by %temp.<br>
+/// =============================================================================<br>
+void NVPTXLowerStructArgs::handleStructPtrArgs(Function &F) {<br>
+ const AttributeSet &PAL = F.getAttributes();<br>
+<br>
+ unsigned Idx = 1;<br>
+<br>
+ for (Argument &Arg : F.args()) {<br>
+ const Type *Ty = Arg.getType();<br>
+<br>
+ const PointerType *PTy = dyn_cast<PointerType>(Ty);<br>
+<br>
+ if (PTy) {<br>
+ if (PAL.hasAttribute(Idx, Attribute::ByVal)) {<br>
+ // cout << "Has struct ptr args" << std::endl;<br>
+ handleParam(&Arg);<br>
+ }<br>
+ }<br>
+ Idx++;<br>
+ }<br>
+}<br>
+<br>
+/// =============================================================================<br>
+/// Main function for this pass.<br>
+/// =============================================================================<br>
+bool NVPTXLowerStructArgs::runOnFunction(Function &F) {<br>
+ // Skip non-kernels. See the comments at the top of this file.<br>
+ if (!isKernelFunction(F))<br>
+ return false;<br>
+<br>
+ handleStructPtrArgs(F);<br>
+<br>
+ return true;<br>
+}<br>
+<br>
+FunctionPass *llvm::createNVPTXLowerStructArgsPass() {<br>
+ return new NVPTXLowerStructArgs();<br>
+}<br>
<br>
Modified: llvm/trunk/lib/Target/NVPTX/NVPTXTargetMachine.cpp<br>
URL: <a href="http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/NVPTX/NVPTXTargetMachine.cpp?rev=221377&r1=221376&r2=221377&view=diff" target="_blank">http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/NVPTX/NVPTXTargetMachine.cpp?rev=221377&r1=221376&r2=221377&view=diff</a><br>
==============================================================================<br>
--- llvm/trunk/lib/Target/NVPTX/NVPTXTargetMachine.cpp (original)<br>
+++ llvm/trunk/lib/Target/NVPTX/NVPTXTargetMachine.cpp Wed Nov 5 12:19:30 2014<br>
@@ -50,6 +50,7 @@ void initializeNVVMReflectPass(PassRegis<br>
void initializeGenericToNVVMPass(PassRegistry&);<br>
void initializeNVPTXAssignValidGlobalNamesPass(PassRegistry&);<br>
void initializeNVPTXFavorNonGenericAddrSpacesPass(PassRegistry &);<br>
+void initializeNVPTXLowerStructArgsPass(PassRegistry &);<br>
}<br>
<br>
extern "C" void LLVMInitializeNVPTXTarget() {<br>
@@ -64,6 +65,7 @@ extern "C" void LLVMInitializeNVPTXTarge<br>
initializeNVPTXAssignValidGlobalNamesPass(*PassRegistry::getPassRegistry());<br>
initializeNVPTXFavorNonGenericAddrSpacesPass(<br>
*PassRegistry::getPassRegistry());<br>
+ initializeNVPTXLowerStructArgsPass(*PassRegistry::getPassRegistry());<br>
}<br>
<br>
NVPTXTargetMachine::NVPTXTargetMachine(const Target &T, StringRef TT,<br>
<br>
Added: llvm/trunk/test/CodeGen/NVPTX/bug21465.ll<br>
URL: <a href="http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/NVPTX/bug21465.ll?rev=221377&view=auto" target="_blank">http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/NVPTX/bug21465.ll?rev=221377&view=auto</a><br>
==============================================================================<br>
--- llvm/trunk/test/CodeGen/NVPTX/bug21465.ll (added)<br>
+++ llvm/trunk/test/CodeGen/NVPTX/bug21465.ll Wed Nov 5 12:19:30 2014<br>
@@ -0,0 +1,24 @@<br>
+; RUN: opt < %s -nvptx-lower-struct-args -S | FileCheck %s<br>
+<br>
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v16:16:16-v32:32:32-v64:64:64-v128:128:128-n16:32:64"<br>
+target triple = "nvptx64-unknown-unknown"<br>
+<br>
+%struct.S = type { i32, i32 }<br>
+<br>
+; Function Attrs: nounwind<br>
+define void @_Z11TakesStruct1SPi(%struct.S* byval nocapture readonly %input, i32* nocapture %output) #0 {<br>
+entry:<br>
+; CHECK-LABEL @_Z22TakesStruct1SPi<br>
+; CHECK: bitcast %struct.S* %input to i8*<br>
+; CHECK: call i8 addrspace(101)* @llvm.nvvm.ptr.gen.to.param.p101i8.p0i8<br>
+ %b = getelementptr inbounds %struct.S* %input, i64 0, i32 1<br>
+ %0 = load i32* %b, align 4<br>
+ store i32 %0, i32* %output, align 4<br>
+ ret void<br>
+}<br>
+<br>
+attributes #0 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }<br>
+<br>
+!nvvm.annotations = !{!0}<br>
+<br>
+!0 = metadata !{void (%struct.S*, i32*)* @_Z11TakesStruct1SPi, metadata !"kernel", i32 1}<br>
<br>
<br>
_______________________________________________<br>
llvm-commits mailing list<br>
<a href="mailto:llvm-commits@cs.uiuc.edu">llvm-commits@cs.uiuc.edu</a><br>
<a href="http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits" target="_blank">http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits</a><br>
</blockquote></div><br></div></div>