[llvm] r221377 - [NVPTX] Add NVPTXLowerStructArgs pass
Eli Bendersky
eliben at google.com
Wed Nov 5 10:50:14 PST 2014
Thanks for the quick fix, Justin!
On Wed, Nov 5, 2014 at 10:19 AM, Justin Holewinski <jholewinski at nvidia.com>
wrote:
> Author: jholewinski
> Date: Wed Nov 5 12:19:30 2014
> New Revision: 221377
>
> URL: http://llvm.org/viewvc/llvm-project?rev=221377&view=rev
> Log:
> [NVPTX] Add NVPTXLowerStructArgs pass
>
> This works around the limitation that PTX does not allow .param space
> loads/stores with arbitrary pointers.
>
> If a function has a by-val struct ptr arg, say foo(%struct.x *byval %d),
> then
> add the following instructions to the first basic block :
>
> %temp = alloca %struct.x, align 8
> %tt1 = bitcast %struct.x * %d to i8 *
> %tt2 = llvm.nvvm.cvt.gen.to.param %tt2
> %tempd = bitcast i8 addrspace(101) * to %struct.x addrspace(101) *
> %tv = load %struct.x addrspace(101) * %tempd
> store %struct.x %tv, %struct.x * %temp, align 8
>
> The above code allocates some space in the stack and copies the incoming
> struct from param space to local space. Then replace all occurences of %d
> by %temp.
>
> Fixes PR21465.
>
> Added:
> llvm/trunk/lib/Target/NVPTX/NVPTXLowerStructArgs.cpp
> llvm/trunk/test/CodeGen/NVPTX/bug21465.ll
> Modified:
> llvm/trunk/lib/Target/NVPTX/CMakeLists.txt
> llvm/trunk/lib/Target/NVPTX/NVPTX.h
> llvm/trunk/lib/Target/NVPTX/NVPTXTargetMachine.cpp
>
> Modified: llvm/trunk/lib/Target/NVPTX/CMakeLists.txt
> URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/NVPTX/CMakeLists.txt?rev=221377&r1=221376&r2=221377&view=diff
>
> ==============================================================================
> --- llvm/trunk/lib/Target/NVPTX/CMakeLists.txt (original)
> +++ llvm/trunk/lib/Target/NVPTX/CMakeLists.txt Wed Nov 5 12:19:30 2014
> @@ -29,6 +29,7 @@ set(NVPTXCodeGen_sources
> NVPTXMCExpr.cpp
> NVPTXReplaceImageHandles.cpp
> NVPTXImageOptimizer.cpp
> + NVPTXLowerStructArgs.cpp
> )
>
> add_llvm_target(NVPTXCodeGen ${NVPTXCodeGen_sources})
>
> Modified: llvm/trunk/lib/Target/NVPTX/NVPTX.h
> URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/NVPTX/NVPTX.h?rev=221377&r1=221376&r2=221377&view=diff
>
> ==============================================================================
> --- llvm/trunk/lib/Target/NVPTX/NVPTX.h (original)
> +++ llvm/trunk/lib/Target/NVPTX/NVPTX.h Wed Nov 5 12:19:30 2014
> @@ -69,6 +69,7 @@ ModulePass *createNVVMReflectPass(const
> MachineFunctionPass *createNVPTXPrologEpilogPass();
> MachineFunctionPass *createNVPTXReplaceImageHandlesPass();
> FunctionPass *createNVPTXImageOptimizerPass();
> +FunctionPass *createNVPTXLowerStructArgsPass();
>
> bool isImageOrSamplerVal(const Value *, const Module *);
>
>
> Added: llvm/trunk/lib/Target/NVPTX/NVPTXLowerStructArgs.cpp
> URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/NVPTX/NVPTXLowerStructArgs.cpp?rev=221377&view=auto
>
> ==============================================================================
> --- llvm/trunk/lib/Target/NVPTX/NVPTXLowerStructArgs.cpp (added)
> +++ llvm/trunk/lib/Target/NVPTX/NVPTXLowerStructArgs.cpp Wed Nov 5
> 12:19:30 2014
> @@ -0,0 +1,150 @@
> +//===-- NVPTXLowerStructArgs.cpp - Copy struct args to local memory
> =====--===//
> +//
> +// The LLVM Compiler Infrastructure
> +//
> +// This file is distributed under the University of Illinois Open Source
> +// License. See LICENSE.TXT for details.
> +//
>
> +//===----------------------------------------------------------------------===//
> +//
> +// Copy struct args to local memory. This is needed for kernel functions
> only.
> +// This is a preparation for handling cases like
> +//
> +// kernel void foo(struct A arg, ...)
> +// {
> +// struct A *p = &arg;
> +// ...
> +// ... = p->filed1 ... (this is no generic address for .param)
> +// p->filed2 = ... (this is no write access to .param)
> +// }
> +//
>
> +//===----------------------------------------------------------------------===//
> +
> +#include "NVPTX.h"
> +#include "NVPTXUtilities.h"
> +#include "llvm/IR/Function.h"
> +#include "llvm/IR/Instructions.h"
> +#include "llvm/IR/IntrinsicInst.h"
> +#include "llvm/IR/Module.h"
> +#include "llvm/IR/Type.h"
> +#include "llvm/Pass.h"
> +
> +using namespace llvm;
> +
> +namespace llvm {
> +void initializeNVPTXLowerStructArgsPass(PassRegistry &);
> +}
> +
> +class LLVM_LIBRARY_VISIBILITY NVPTXLowerStructArgs : public FunctionPass {
> + bool runOnFunction(Function &F) override;
> +
> + void handleStructPtrArgs(Function &);
> + void handleParam(Argument *);
> +
> +public:
> + static char ID; // Pass identification, replacement for typeid
> + NVPTXLowerStructArgs() : FunctionPass(ID) {}
> + const char *getPassName() const override {
> + return "Copy structure (byval *) arguments to stack";
> + }
> +};
> +
> +char NVPTXLowerStructArgs::ID = 1;
> +
> +INITIALIZE_PASS(NVPTXLowerStructArgs, "nvptx-lower-struct-args",
> + "Lower structure arguments (NVPTX)", false, false)
> +
> +void NVPTXLowerStructArgs::handleParam(Argument *Arg) {
> + Function *Func = Arg->getParent();
> + Instruction *FirstInst = &(Func->getEntryBlock().front());
> + const PointerType *PType = dyn_cast<PointerType>(Arg->getType());
> +
> + assert(PType && "Expecting pointer type in handleParam");
> +
> + const Type *StructType = PType->getElementType();
> +
> + AllocaInst *AllocA =
> + new AllocaInst((Type *)StructType, Arg->getName(), FirstInst);
> +
> + /* Set the alignment to alignment of the byval parameter. This is
> because,
> + * later load/stores assume that alignment, and we are going to replace
> + * the use of the byval parameter with this alloca instruction.
> + */
> + AllocA->setAlignment(Func->getParamAlignment(Arg->getArgNo() + 1));
> +
> + Arg->replaceAllUsesWith(AllocA);
> +
> + // Get the cvt.gen.to.param intrinsic
> + const Type *CvtTypes[2] = {
> + Type::getInt8PtrTy(Func->getParent()->getContext(),
> ADDRESS_SPACE_PARAM),
> + Type::getInt8PtrTy(Func->getParent()->getContext(),
> ADDRESS_SPACE_GENERIC)
> + };
> + Function *CvtFunc = (Function *)Intrinsic::getDeclaration(
> + Func->getParent(), Intrinsic::nvvm_ptr_gen_to_param,
> + ArrayRef<Type *>((Type **)CvtTypes, 2));
> + std::vector<Value *> BC1;
> + BC1.push_back(
> + new BitCastInst(Arg,
> Type::getInt8PtrTy(Func->getParent()->getContext(),
> + ADDRESS_SPACE_GENERIC),
> + Arg->getName(), FirstInst));
> + CallInst *CallCVT = CallInst::Create(CvtFunc, ArrayRef<Value *>(BC1),
> + "cvt_to_param", FirstInst);
> +
> + BitCastInst *BitCast = new BitCastInst(
> + CallCVT, PointerType::get((Type *)StructType, ADDRESS_SPACE_PARAM),
> + Arg->getName(), FirstInst);
> + LoadInst *LI = new LoadInst(BitCast, Arg->getName(), FirstInst);
> + new StoreInst(LI, AllocA, FirstInst);
> +}
> +
> +///
> =============================================================================
> +/// If the function had a struct ptr arg, say foo(%struct.x *byval %d),
> then
> +/// add the following instructions to the first basic block :
> +///
> +/// %temp = alloca %struct.x, align 8
> +/// %tt1 = bitcast %struct.x * %d to i8 *
> +/// %tt2 = llvm.nvvm.cvt.gen.to.param %tt2
> +/// %tempd = bitcast i8 addrspace(101) * to %struct.x addrspace(101) *
> +/// %tv = load %struct.x addrspace(101) * %tempd
> +/// store %struct.x %tv, %struct.x * %temp, align 8
> +///
> +/// The above code allocates some space in the stack and copies the
> incoming
> +/// struct from param space to local space.
> +/// Then replace all occurences of %d by %temp.
> +///
> =============================================================================
> +void NVPTXLowerStructArgs::handleStructPtrArgs(Function &F) {
> + const AttributeSet &PAL = F.getAttributes();
> +
> + unsigned Idx = 1;
> +
> + for (Argument &Arg : F.args()) {
> + const Type *Ty = Arg.getType();
> +
> + const PointerType *PTy = dyn_cast<PointerType>(Ty);
> +
> + if (PTy) {
> + if (PAL.hasAttribute(Idx, Attribute::ByVal)) {
> + // cout << "Has struct ptr args" << std::endl;
> + handleParam(&Arg);
> + }
> + }
> + Idx++;
> + }
> +}
> +
> +///
> =============================================================================
> +/// Main function for this pass.
> +///
> =============================================================================
> +bool NVPTXLowerStructArgs::runOnFunction(Function &F) {
> + // Skip non-kernels. See the comments at the top of this file.
> + if (!isKernelFunction(F))
> + return false;
> +
> + handleStructPtrArgs(F);
> +
> + return true;
> +}
> +
> +FunctionPass *llvm::createNVPTXLowerStructArgsPass() {
> + return new NVPTXLowerStructArgs();
> +}
>
> Modified: llvm/trunk/lib/Target/NVPTX/NVPTXTargetMachine.cpp
> URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/NVPTX/NVPTXTargetMachine.cpp?rev=221377&r1=221376&r2=221377&view=diff
>
> ==============================================================================
> --- llvm/trunk/lib/Target/NVPTX/NVPTXTargetMachine.cpp (original)
> +++ llvm/trunk/lib/Target/NVPTX/NVPTXTargetMachine.cpp Wed Nov 5 12:19:30
> 2014
> @@ -50,6 +50,7 @@ void initializeNVVMReflectPass(PassRegis
> void initializeGenericToNVVMPass(PassRegistry&);
> void initializeNVPTXAssignValidGlobalNamesPass(PassRegistry&);
> void initializeNVPTXFavorNonGenericAddrSpacesPass(PassRegistry &);
> +void initializeNVPTXLowerStructArgsPass(PassRegistry &);
> }
>
> extern "C" void LLVMInitializeNVPTXTarget() {
> @@ -64,6 +65,7 @@ extern "C" void LLVMInitializeNVPTXTarge
>
> initializeNVPTXAssignValidGlobalNamesPass(*PassRegistry::getPassRegistry());
> initializeNVPTXFavorNonGenericAddrSpacesPass(
> *PassRegistry::getPassRegistry());
> + initializeNVPTXLowerStructArgsPass(*PassRegistry::getPassRegistry());
> }
>
> NVPTXTargetMachine::NVPTXTargetMachine(const Target &T, StringRef TT,
>
> Added: llvm/trunk/test/CodeGen/NVPTX/bug21465.ll
> URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/NVPTX/bug21465.ll?rev=221377&view=auto
>
> ==============================================================================
> --- llvm/trunk/test/CodeGen/NVPTX/bug21465.ll (added)
> +++ llvm/trunk/test/CodeGen/NVPTX/bug21465.ll Wed Nov 5 12:19:30 2014
> @@ -0,0 +1,24 @@
> +; RUN: opt < %s -nvptx-lower-struct-args -S | FileCheck %s
> +
> +target datalayout =
> "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v16:16:16-v32:32:32-v64:64:64-v128:128:128-n16:32:64"
> +target triple = "nvptx64-unknown-unknown"
> +
> +%struct.S = type { i32, i32 }
> +
> +; Function Attrs: nounwind
> +define void @_Z11TakesStruct1SPi(%struct.S* byval nocapture readonly
> %input, i32* nocapture %output) #0 {
> +entry:
> +; CHECK-LABEL @_Z22TakesStruct1SPi
> +; CHECK: bitcast %struct.S* %input to i8*
> +; CHECK: call i8 addrspace(101)* @llvm.nvvm.ptr.gen.to.param.p101i8.p0i8
> + %b = getelementptr inbounds %struct.S* %input, i64 0, i32 1
> + %0 = load i32* %b, align 4
> + store i32 %0, i32* %output, align 4
> + ret void
> +}
> +
> +attributes #0 = { nounwind "less-precise-fpmad"="false"
> "no-frame-pointer-elim"="false" "no-infs-fp-math"="false"
> "no-nans-fp-math"="false" "stack-protector-buffer-size"="8"
> "unsafe-fp-math"="false" "use-soft-float"="false" }
> +
> +!nvvm.annotations = !{!0}
> +
> +!0 = metadata !{void (%struct.S*, i32*)* @_Z11TakesStruct1SPi, metadata
> !"kernel", i32 1}
>
>
> _______________________________________________
> llvm-commits mailing list
> llvm-commits at cs.uiuc.edu
> http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits
>
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20141105/090eed7d/attachment.html>
More information about the llvm-commits
mailing list