[llvm] r276153 - [NVPTX] Improve lowering of byval args of device functions.

Artem Belevich via llvm-commits llvm-commits at lists.llvm.org
Wed Jul 20 11:39:47 PDT 2016


Author: tra
Date: Wed Jul 20 13:39:47 2016
New Revision: 276153

URL: http://llvm.org/viewvc/llvm-project?rev=276153&view=rev
Log:
[NVPTX] Improve lowering of byval args of device functions.

Avoid unnecessary spills of byval arguments of device functions to
local space on SASS level and subsequent pointer conversion to generic
address space that follows. Instead, make a local copy in IR, provide
a way to access arguments directly, and let LLVM optimize the copy away
when possible.

Differential Review: https://reviews.llvm.org/D21421

Modified:
    llvm/trunk/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp
    llvm/trunk/lib/Target/NVPTX/NVPTXISelLowering.cpp
    llvm/trunk/lib/Target/NVPTX/NVPTXLowerKernelArgs.cpp
    llvm/trunk/test/CodeGen/NVPTX/bug21465.ll
    llvm/trunk/test/CodeGen/NVPTX/lower-kernel-ptr-arg.ll

Modified: llvm/trunk/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp?rev=276153&r1=276152&r2=276153&view=diff
==============================================================================
--- llvm/trunk/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp (original)
+++ llvm/trunk/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp Wed Jul 20 13:39:47 2016
@@ -5076,11 +5076,12 @@ bool NVPTXDAGToDAGISel::SelectDirectAddr
     Address = N.getOperand(0);
     return true;
   }
-  if (N.getOpcode() == ISD::INTRINSIC_WO_CHAIN) {
-    unsigned IID = cast<ConstantSDNode>(N.getOperand(0))->getZExtValue();
-    if (IID == Intrinsic::nvvm_ptr_gen_to_param)
-      if (N.getOperand(1).getOpcode() == NVPTXISD::MoveParam)
-        return (SelectDirectAddr(N.getOperand(1).getOperand(0), Address));
+  // addrspacecast(MoveParam(arg_symbol) to addrspace(PARAM)) -> arg_symbol
+  if (AddrSpaceCastSDNode *CastN = dyn_cast<AddrSpaceCastSDNode>(N)) {
+    if (CastN->getSrcAddressSpace() == ADDRESS_SPACE_GENERIC &&
+        CastN->getDestAddressSpace() == ADDRESS_SPACE_PARAM &&
+        CastN->getOperand(0).getOpcode() == NVPTXISD::MoveParam)
+      return SelectDirectAddr(CastN->getOperand(0).getOperand(0), Address);
   }
   return false;
 }

Modified: llvm/trunk/lib/Target/NVPTX/NVPTXISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/NVPTX/NVPTXISelLowering.cpp?rev=276153&r1=276152&r2=276153&view=diff
==============================================================================
--- llvm/trunk/lib/Target/NVPTX/NVPTXISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/NVPTX/NVPTXISelLowering.cpp Wed Jul 20 13:39:47 2016
@@ -2077,7 +2077,6 @@ SDValue NVPTXTargetLowering::LowerFormal
   SDValue Root = DAG.getRoot();
   std::vector<SDValue> OutChains;
 
-  bool isKernel = llvm::isKernelFunction(*F);
   bool isABI = (STI.getSmVersion() >= 20);
   assert(isABI && "Non-ABI compilation is not supported");
   if (!isABI)
@@ -2111,7 +2110,8 @@ SDValue NVPTXTargetLowering::LowerFormal
             theArgs[i],
             (theArgs[i]->getParent() ? theArgs[i]->getParent()->getParent()
                                      : nullptr))) {
-      assert(isKernel && "Only kernels can have image/sampler params");
+      assert(llvm::isKernelFunction(*F) &&
+             "Only kernels can have image/sampler params");
       InVals.push_back(DAG.getConstant(i + 1, dl, MVT::i32));
       continue;
     }
@@ -2336,14 +2336,7 @@ SDValue NVPTXTargetLowering::LowerFormal
     SDValue p = DAG.getNode(NVPTXISD::MoveParam, dl, ObjectVT, Arg);
     if (p.getNode())
       p.getNode()->setIROrder(idx + 1);
-    if (isKernel)
-      InVals.push_back(p);
-    else {
-      SDValue p2 = DAG.getNode(
-          ISD::INTRINSIC_WO_CHAIN, dl, ObjectVT,
-          DAG.getConstant(Intrinsic::nvvm_ptr_local_to_gen, dl, MVT::i32), p);
-      InVals.push_back(p2);
-    }
+    InVals.push_back(p);
   }
 
   // Clang will check explicit VarArg and issue error if any. However, Clang

Modified: llvm/trunk/lib/Target/NVPTX/NVPTXLowerKernelArgs.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/NVPTX/NVPTXLowerKernelArgs.cpp?rev=276153&r1=276152&r2=276153&view=diff
==============================================================================
--- llvm/trunk/lib/Target/NVPTX/NVPTXLowerKernelArgs.cpp (original)
+++ llvm/trunk/lib/Target/NVPTX/NVPTXLowerKernelArgs.cpp Wed Jul 20 13:39:47 2016
@@ -7,20 +7,28 @@
 //
 //===----------------------------------------------------------------------===//
 //
-// Pointer arguments to kernel functions need to be lowered specially.
 //
-// 1. Copy byval struct args to local memory. This is a preparation for handling
-//    cases like
+// Arguments to kernel and device functions are passed via param space,
+// which imposes certain restrictions:
+// http://docs.nvidia.com/cuda/parallel-thread-execution/#state-spaces
+//
+// Kernel parameters are read-only and accessible only via ld.param
+// instruction, directly or via a pointer. Pointers to kernel
+// arguments can't be converted to generic address space.
+//
+// Device function parameters are directly accessible via
+// ld.param/st.param, but taking the address of one returns a pointer
+// to a copy created in local space which *can't* be used with
+// ld.param/st.param.
+//
+// Copying a byval struct into local memory in IR allows us to enforce
+// the param space restrictions, gives the rest of IR a pointer w/o
+// param space restrictions, and gives us an opportunity to eliminate
+// the copy.
 //
-//    kernel void foo(struct A arg, ...)
-//    {
-//      struct A *p = &arg;
-//      ...
-//      ... = p->filed1 ...  (this is no generic address for .param)
-//      p->filed2 = ...      (this is no write access to .param)
-//    }
+// Pointer arguments to kernel functions need more work to be lowered:
 //
-// 2. Convert non-byval pointer arguments of CUDA kernels to pointers in the
+// 1. Convert non-byval pointer arguments of CUDA kernels to pointers in the
 //    global address space. This allows later optimizations to emit
 //    ld.global.*/st.global.* for accessing these pointer arguments. For
 //    example,
@@ -47,7 +55,7 @@
 //      ...
 //    }
 //
-// 3. Convert pointers in a byval kernel parameter to pointers in the global
+// 2. Convert pointers in a byval kernel parameter to pointers in the global
 //    address space. As #2, it allows NVPTX to emit more ld/st.global. E.g.,
 //
 //    struct S {
@@ -101,6 +109,9 @@ namespace {
 class NVPTXLowerKernelArgs : public FunctionPass {
   bool runOnFunction(Function &F) override;
 
+  bool runOnKernelFunction(Function &F);
+  bool runOnDeviceFunction(Function &F);
+
   // handle byval parameters
   void handleByValParam(Argument *Arg);
   // Knowing Ptr must point to the global address space, this function
@@ -192,11 +203,7 @@ void NVPTXLowerKernelArgs::markPointerAs
 // =============================================================================
 // Main function for this pass.
 // =============================================================================
-bool NVPTXLowerKernelArgs::runOnFunction(Function &F) {
-  // Skip non-kernels. See the comments at the top of this file.
-  if (!isKernelFunction(F))
-    return false;
-
+bool NVPTXLowerKernelArgs::runOnKernelFunction(Function &F) {
   if (TM && TM->getDrvInterface() == NVPTX::CUDA) {
     // Mark pointers in byval structs as global.
     for (auto &B : F) {
@@ -228,6 +235,18 @@ bool NVPTXLowerKernelArgs::runOnFunction
   return true;
 }
 
+// Device functions only need to copy byval args into local memory.
+bool NVPTXLowerKernelArgs::runOnDeviceFunction(Function &F) {
+  for (Argument &Arg : F.args())
+    if (Arg.getType()->isPointerTy() && Arg.hasByValAttr())
+      handleByValParam(&Arg);
+  return true;
+}
+
+bool NVPTXLowerKernelArgs::runOnFunction(Function &F) {
+  return isKernelFunction(F) ? runOnKernelFunction(F) : runOnDeviceFunction(F);
+}
+
 FunctionPass *
 llvm::createNVPTXLowerKernelArgsPass(const NVPTXTargetMachine *TM) {
   return new NVPTXLowerKernelArgs(TM);

Modified: llvm/trunk/test/CodeGen/NVPTX/bug21465.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/NVPTX/bug21465.ll?rev=276153&r1=276152&r2=276153&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/NVPTX/bug21465.ll (original)
+++ llvm/trunk/test/CodeGen/NVPTX/bug21465.ll Wed Jul 20 13:39:47 2016
@@ -15,7 +15,7 @@ entry:
   %b = getelementptr inbounds %struct.S, %struct.S* %input, i64 0, i32 1
   %0 = load i32, i32* %b, align 4
 ; PTX-NOT: ld.param.u32 {{%r[0-9]+}}, [{{%rd[0-9]+}}]
-; PTX: ld.param.u32 [[value:%r[0-9]+]], [{{%rd[0-9]+}}+4]
+; PTX: ld.param.u32 [[value:%r[0-9]+]], [_Z11TakesStruct1SPi_param_0+4]
   store i32 %0, i32* %output, align 4
 ; PTX-NEXT: st.global.u32 [{{%rd[0-9]+}}], [[value]]
   ret void

Modified: llvm/trunk/test/CodeGen/NVPTX/lower-kernel-ptr-arg.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/NVPTX/lower-kernel-ptr-arg.ll?rev=276153&r1=276152&r2=276153&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/NVPTX/lower-kernel-ptr-arg.ll (original)
+++ llvm/trunk/test/CodeGen/NVPTX/lower-kernel-ptr-arg.ll Wed Jul 20 13:39:47 2016
@@ -28,20 +28,38 @@ define void @kernel2(float addrspace(1)*
 
 %struct.S = type { i32*, i32* }
 
-define void @ptr_in_byval(%struct.S* byval %input, i32* %output) {
-; CHECK-LABEL: .visible .entry ptr_in_byval(
-; CHECK: cvta.to.global.u64
-; CHECK: cvta.to.global.u64
+define void @ptr_in_byval_kernel(%struct.S* byval %input, i32* %output) {
+; CHECK-LABEL: .visible .entry ptr_in_byval_kernel(
+; CHECK: ld.param.u64 	%[[optr:rd.*]], [ptr_in_byval_kernel_param_1]
+; CHECK: cvta.to.global.u64 %[[optr_g:.*]], %[[optr]];
+; CHECK: ld.param.u64 	%[[iptr:rd.*]], [ptr_in_byval_kernel_param_0+8]
+; CHECK: cvta.to.global.u64 %[[iptr_g:.*]], %[[iptr]];
   %b_ptr = getelementptr inbounds %struct.S, %struct.S* %input, i64 0, i32 1
   %b = load i32*, i32** %b_ptr, align 4
   %v = load i32, i32* %b, align 4
-; CHECK: ld.global.u32
+; CHECK: ld.global.u32 %[[val:.*]], [%[[iptr_g]]]
   store i32 %v, i32* %output, align 4
-; CHECK: st.global.u32
+; CHECK: st.global.u32 [%[[optr_g]]], %[[val]]
+  ret void
+}
+
+; Regular functions lower byval arguments differently. We need to make
+; sure that we're loading byval argument data using [symbol+offset].
+; There's also no assumption that all pointers within are in global space.
+define void @ptr_in_byval_func(%struct.S* byval %input, i32* %output) {
+; CHECK-LABEL: .visible .func ptr_in_byval_func(
+; CHECK: ld.param.u64 	%[[optr:rd.*]], [ptr_in_byval_func_param_1]
+; CHECK: ld.param.u64 	%[[iptr:rd.*]], [ptr_in_byval_func_param_0+8]
+  %b_ptr = getelementptr inbounds %struct.S, %struct.S* %input, i64 0, i32 1
+  %b = load i32*, i32** %b_ptr, align 4
+  %v = load i32, i32* %b, align 4
+; CHECK: ld.u32 %[[val:.*]], [%[[iptr]]]
+  store i32 %v, i32* %output, align 4
+; CHECK: st.u32 [%[[optr]]], %[[val]]
   ret void
 }
 
 !nvvm.annotations = !{!0, !1, !2}
 !0 = !{void (float*, float*)* @kernel, !"kernel", i32 1}
 !1 = !{void (float addrspace(1)*, float addrspace(1)*)* @kernel2, !"kernel", i32 1}
-!2 = !{void (%struct.S*, i32*)* @ptr_in_byval, !"kernel", i32 1}
+!2 = !{void (%struct.S*, i32*)* @ptr_in_byval_kernel, !"kernel", i32 1}




More information about the llvm-commits mailing list