[llvm] r221377 - [NVPTX] Add NVPTXLowerStructArgs pass

Justin Holewinski jholewinski at nvidia.com
Wed Nov 5 10:19:31 PST 2014


Author: jholewinski
Date: Wed Nov  5 12:19:30 2014
New Revision: 221377

URL: http://llvm.org/viewvc/llvm-project?rev=221377&view=rev
Log:
[NVPTX] Add NVPTXLowerStructArgs pass

This works around the limitation that PTX does not allow .param space
loads/stores with arbitrary pointers.

If a function has a by-val struct ptr arg, say foo(%struct.x *byval %d), then
add the following instructions to the first basic block :

%temp = alloca %struct.x, align 8
%tt1 = bitcast %struct.x * %d to i8 *
%tt2 = llvm.nvvm.cvt.gen.to.param %tt2
%tempd = bitcast i8 addrspace(101) * to %struct.x addrspace(101) *
%tv = load %struct.x addrspace(101) * %tempd
store %struct.x %tv, %struct.x * %temp, align 8

The above code allocates some space in the stack and copies the incoming
struct from param space to local space. Then replace all occurences of %d
by %temp.

Fixes PR21465.

Added:
    llvm/trunk/lib/Target/NVPTX/NVPTXLowerStructArgs.cpp
    llvm/trunk/test/CodeGen/NVPTX/bug21465.ll
Modified:
    llvm/trunk/lib/Target/NVPTX/CMakeLists.txt
    llvm/trunk/lib/Target/NVPTX/NVPTX.h
    llvm/trunk/lib/Target/NVPTX/NVPTXTargetMachine.cpp

Modified: llvm/trunk/lib/Target/NVPTX/CMakeLists.txt
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/NVPTX/CMakeLists.txt?rev=221377&r1=221376&r2=221377&view=diff
==============================================================================
--- llvm/trunk/lib/Target/NVPTX/CMakeLists.txt (original)
+++ llvm/trunk/lib/Target/NVPTX/CMakeLists.txt Wed Nov  5 12:19:30 2014
@@ -29,6 +29,7 @@ set(NVPTXCodeGen_sources
   NVPTXMCExpr.cpp
   NVPTXReplaceImageHandles.cpp
   NVPTXImageOptimizer.cpp
+  NVPTXLowerStructArgs.cpp
   )
 
 add_llvm_target(NVPTXCodeGen ${NVPTXCodeGen_sources})

Modified: llvm/trunk/lib/Target/NVPTX/NVPTX.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/NVPTX/NVPTX.h?rev=221377&r1=221376&r2=221377&view=diff
==============================================================================
--- llvm/trunk/lib/Target/NVPTX/NVPTX.h (original)
+++ llvm/trunk/lib/Target/NVPTX/NVPTX.h Wed Nov  5 12:19:30 2014
@@ -69,6 +69,7 @@ ModulePass *createNVVMReflectPass(const
 MachineFunctionPass *createNVPTXPrologEpilogPass();
 MachineFunctionPass *createNVPTXReplaceImageHandlesPass();
 FunctionPass *createNVPTXImageOptimizerPass();
+FunctionPass *createNVPTXLowerStructArgsPass();
 
 bool isImageOrSamplerVal(const Value *, const Module *);
 

Added: llvm/trunk/lib/Target/NVPTX/NVPTXLowerStructArgs.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/NVPTX/NVPTXLowerStructArgs.cpp?rev=221377&view=auto
==============================================================================
--- llvm/trunk/lib/Target/NVPTX/NVPTXLowerStructArgs.cpp (added)
+++ llvm/trunk/lib/Target/NVPTX/NVPTXLowerStructArgs.cpp Wed Nov  5 12:19:30 2014
@@ -0,0 +1,150 @@
+//===-- NVPTXLowerStructArgs.cpp - Copy struct args to local memory =====--===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Copy struct args to local memory. This is needed for kernel functions only.
+// This is a preparation for handling cases like
+//
+// kernel void foo(struct A arg, ...)
+// {
+//     struct A *p = &arg;
+//     ...
+//     ... = p->filed1 ...  (this is no generic address for .param)
+//     p->filed2 = ...      (this is no write access to .param)
+// }
+//
+//===----------------------------------------------------------------------===//
+
+#include "NVPTX.h"
+#include "NVPTXUtilities.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IR/Type.h"
+#include "llvm/Pass.h"
+
+using namespace llvm;
+
+namespace llvm {
+void initializeNVPTXLowerStructArgsPass(PassRegistry &);
+}
+
+class LLVM_LIBRARY_VISIBILITY NVPTXLowerStructArgs : public FunctionPass {
+  bool runOnFunction(Function &F) override;
+
+  void handleStructPtrArgs(Function &);
+  void handleParam(Argument *);
+
+public:
+  static char ID; // Pass identification, replacement for typeid
+  NVPTXLowerStructArgs() : FunctionPass(ID) {}
+  const char *getPassName() const override {
+    return "Copy structure (byval *) arguments to stack";
+  }
+};
+
+char NVPTXLowerStructArgs::ID = 1;
+
+INITIALIZE_PASS(NVPTXLowerStructArgs, "nvptx-lower-struct-args",
+                "Lower structure arguments (NVPTX)", false, false)
+
+void NVPTXLowerStructArgs::handleParam(Argument *Arg) {
+  Function *Func = Arg->getParent();
+  Instruction *FirstInst = &(Func->getEntryBlock().front());
+  const PointerType *PType = dyn_cast<PointerType>(Arg->getType());
+
+  assert(PType && "Expecting pointer type in handleParam");
+
+  const Type *StructType = PType->getElementType();
+
+  AllocaInst *AllocA =
+    new AllocaInst((Type *)StructType, Arg->getName(), FirstInst);
+
+  /* Set the alignment to alignment of the byval parameter. This is because,
+   * later load/stores assume that alignment, and we are going to replace
+   * the use of the byval parameter with this alloca instruction.
+   */
+  AllocA->setAlignment(Func->getParamAlignment(Arg->getArgNo() + 1));
+
+  Arg->replaceAllUsesWith(AllocA);
+
+  // Get the cvt.gen.to.param intrinsic
+  const Type *CvtTypes[2] = {
+    Type::getInt8PtrTy(Func->getParent()->getContext(), ADDRESS_SPACE_PARAM),
+    Type::getInt8PtrTy(Func->getParent()->getContext(), ADDRESS_SPACE_GENERIC)
+  };
+  Function *CvtFunc = (Function *)Intrinsic::getDeclaration(
+      Func->getParent(), Intrinsic::nvvm_ptr_gen_to_param,
+      ArrayRef<Type *>((Type **)CvtTypes, 2));
+  std::vector<Value *> BC1;
+  BC1.push_back(
+      new BitCastInst(Arg, Type::getInt8PtrTy(Func->getParent()->getContext(),
+                                              ADDRESS_SPACE_GENERIC),
+                      Arg->getName(), FirstInst));
+  CallInst *CallCVT = CallInst::Create(CvtFunc, ArrayRef<Value *>(BC1),
+                                       "cvt_to_param", FirstInst);
+
+  BitCastInst *BitCast = new BitCastInst(
+      CallCVT, PointerType::get((Type *)StructType, ADDRESS_SPACE_PARAM),
+      Arg->getName(), FirstInst);
+  LoadInst *LI = new LoadInst(BitCast, Arg->getName(), FirstInst);
+  new StoreInst(LI, AllocA, FirstInst);
+}
+
+/// =============================================================================
+/// If the function had a struct ptr arg, say foo(%struct.x *byval %d), then
+/// add the following instructions to the first basic block :
+///
+/// %temp = alloca %struct.x, align 8
+/// %tt1 = bitcast %struct.x * %d to i8 *
+/// %tt2 = llvm.nvvm.cvt.gen.to.param %tt2
+/// %tempd = bitcast i8 addrspace(101) * to %struct.x addrspace(101) *
+/// %tv = load %struct.x addrspace(101) * %tempd
+/// store %struct.x %tv, %struct.x * %temp, align 8
+///
+/// The above code allocates some space in the stack and copies the incoming
+/// struct from param space to local space.
+/// Then replace all occurences of %d by %temp.
+/// =============================================================================
+void NVPTXLowerStructArgs::handleStructPtrArgs(Function &F) {
+  const AttributeSet &PAL = F.getAttributes();
+
+  unsigned Idx = 1;
+
+  for (Argument &Arg : F.args()) {
+    const Type *Ty = Arg.getType();
+
+    const PointerType *PTy = dyn_cast<PointerType>(Ty);
+
+    if (PTy) {
+      if (PAL.hasAttribute(Idx, Attribute::ByVal)) {
+        //  cout << "Has struct ptr args" << std::endl;
+        handleParam(&Arg);
+      }
+    }
+    Idx++;
+  }
+}
+
+/// =============================================================================
+/// Main function for this pass.
+/// =============================================================================
+bool NVPTXLowerStructArgs::runOnFunction(Function &F) {
+  // Skip non-kernels. See the comments at the top of this file.
+  if (!isKernelFunction(F))
+    return false;
+
+  handleStructPtrArgs(F);
+
+  return true;
+}
+
+FunctionPass *llvm::createNVPTXLowerStructArgsPass() {
+  return new NVPTXLowerStructArgs();
+}

Modified: llvm/trunk/lib/Target/NVPTX/NVPTXTargetMachine.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/NVPTX/NVPTXTargetMachine.cpp?rev=221377&r1=221376&r2=221377&view=diff
==============================================================================
--- llvm/trunk/lib/Target/NVPTX/NVPTXTargetMachine.cpp (original)
+++ llvm/trunk/lib/Target/NVPTX/NVPTXTargetMachine.cpp Wed Nov  5 12:19:30 2014
@@ -50,6 +50,7 @@ void initializeNVVMReflectPass(PassRegis
 void initializeGenericToNVVMPass(PassRegistry&);
 void initializeNVPTXAssignValidGlobalNamesPass(PassRegistry&);
 void initializeNVPTXFavorNonGenericAddrSpacesPass(PassRegistry &);
+void initializeNVPTXLowerStructArgsPass(PassRegistry &);
 }
 
 extern "C" void LLVMInitializeNVPTXTarget() {
@@ -64,6 +65,7 @@ extern "C" void LLVMInitializeNVPTXTarge
   initializeNVPTXAssignValidGlobalNamesPass(*PassRegistry::getPassRegistry());
   initializeNVPTXFavorNonGenericAddrSpacesPass(
     *PassRegistry::getPassRegistry());
+  initializeNVPTXLowerStructArgsPass(*PassRegistry::getPassRegistry());
 }
 
 NVPTXTargetMachine::NVPTXTargetMachine(const Target &T, StringRef TT,

Added: llvm/trunk/test/CodeGen/NVPTX/bug21465.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/NVPTX/bug21465.ll?rev=221377&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/NVPTX/bug21465.ll (added)
+++ llvm/trunk/test/CodeGen/NVPTX/bug21465.ll Wed Nov  5 12:19:30 2014
@@ -0,0 +1,24 @@
+; RUN: opt < %s -nvptx-lower-struct-args -S | FileCheck %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v16:16:16-v32:32:32-v64:64:64-v128:128:128-n16:32:64"
+target triple = "nvptx64-unknown-unknown"
+
+%struct.S = type { i32, i32 }
+
+; Function Attrs: nounwind
+define void @_Z11TakesStruct1SPi(%struct.S* byval nocapture readonly %input, i32* nocapture %output) #0 {
+entry:
+; CHECK-LABEL @_Z22TakesStruct1SPi
+; CHECK:   bitcast %struct.S* %input to i8*
+; CHECK:   call i8 addrspace(101)* @llvm.nvvm.ptr.gen.to.param.p101i8.p0i8
+  %b = getelementptr inbounds %struct.S* %input, i64 0, i32 1
+  %0 = load i32* %b, align 4
+  store i32 %0, i32* %output, align 4
+  ret void
+}
+
+attributes #0 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+
+!nvvm.annotations = !{!0}
+
+!0 = metadata !{void (%struct.S*, i32*)* @_Z11TakesStruct1SPi, metadata !"kernel", i32 1}





More information about the llvm-commits mailing list