[PATCH] D21421: [NVPTX] Improve lowering of byval args of device functions.

Artem Belevich via llvm-commits llvm-commits at lists.llvm.org
Wed Jun 15 16:38:11 PDT 2016


tra created this revision.
tra added reviewers: jholewinski, jlebar, jingyue.
tra added a subscriber: llvm-commits.
Herald added a subscriber: jholewinski.

Lower byval arguments of device functions the same way 
we lower them for kernels and ensure that it can be accessed 
via argument's symbol.

This allows loading value of the argument using [symbol+offset] 
instead of converting argument to general space pointer and using it 
for indexing (which also implicitly converts param space pointer to 
local space one on SASS level and triggers copying of argument into 
local space in the process).

This reduces call overhead, uses less registers and reduces overall 
SASS size by 2-4% on thrust tests.



http://reviews.llvm.org/D21421

Files:
  lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp
  lib/Target/NVPTX/NVPTXISelLowering.cpp
  lib/Target/NVPTX/NVPTXLowerKernelArgs.cpp
  test/CodeGen/NVPTX/bug21465.ll

Index: test/CodeGen/NVPTX/bug21465.ll
===================================================================
--- test/CodeGen/NVPTX/bug21465.ll
+++ test/CodeGen/NVPTX/bug21465.ll
@@ -15,7 +15,7 @@
   %b = getelementptr inbounds %struct.S, %struct.S* %input, i64 0, i32 1
   %0 = load i32, i32* %b, align 4
 ; PTX-NOT: ld.param.u32 {{%r[0-9]+}}, [{{%rd[0-9]+}}]
-; PTX: ld.param.u32 [[value:%r[0-9]+]], [{{%rd[0-9]+}}+4]
+; PTX: ld.param.u32 [[value:%r[0-9]+]], [_Z11TakesStruct1SPi_param_0+4]
   store i32 %0, i32* %output, align 4
 ; PTX-NEXT: st.global.u32 [{{%rd[0-9]+}}], [[value]]
   ret void
Index: lib/Target/NVPTX/NVPTXLowerKernelArgs.cpp
===================================================================
--- lib/Target/NVPTX/NVPTXLowerKernelArgs.cpp
+++ lib/Target/NVPTX/NVPTXLowerKernelArgs.cpp
@@ -101,6 +101,11 @@
 class NVPTXLowerKernelArgs : public FunctionPass {
   bool runOnFunction(Function &F) override;
 
+  // Kernels and regular device functions treat byval arguments
+  // differently.
+  bool runOnKernelFunction(Function &F);
+  bool runOnDeviceFunction(Function &F);
+
   // handle byval parameters
   void handleByValParam(Argument *Arg);
   // Knowing Ptr must point to the global address space, this function
@@ -192,11 +197,7 @@
 // =============================================================================
 // Main function for this pass.
 // =============================================================================
-bool NVPTXLowerKernelArgs::runOnFunction(Function &F) {
-  // Skip non-kernels. See the comments at the top of this file.
-  if (!isKernelFunction(F))
-    return false;
-
+bool NVPTXLowerKernelArgs::runOnKernelFunction(Function &F) {
   if (TM && TM->getDrvInterface() == NVPTX::CUDA) {
     // Mark pointers in byval structs as global.
     for (auto &B : F) {
@@ -228,6 +229,17 @@
   return true;
 }
 
+bool NVPTXLowerKernelArgs::runOnDeviceFunction(Function &F) {
+  for (Argument &Arg : F.args())
+    if (Arg.getType()->isPointerTy() && Arg.hasByValAttr())
+      handleByValParam(&Arg);
+  return true;
+}
+
+bool NVPTXLowerKernelArgs::runOnFunction(Function &F) {
+  return isKernelFunction(F) ? runOnKernelFunction(F) : runOnDeviceFunction(F);
+}
+
 FunctionPass *
 llvm::createNVPTXLowerKernelArgsPass(const NVPTXTargetMachine *TM) {
   return new NVPTXLowerKernelArgs(TM);
Index: lib/Target/NVPTX/NVPTXISelLowering.cpp
===================================================================
--- lib/Target/NVPTX/NVPTXISelLowering.cpp
+++ lib/Target/NVPTX/NVPTXISelLowering.cpp
@@ -2341,14 +2341,7 @@
     SDValue p = DAG.getNode(NVPTXISD::MoveParam, dl, ObjectVT, Arg);
     if (p.getNode())
       p.getNode()->setIROrder(idx + 1);
-    if (isKernel)
-      InVals.push_back(p);
-    else {
-      SDValue p2 = DAG.getNode(
-          ISD::INTRINSIC_WO_CHAIN, dl, ObjectVT,
-          DAG.getConstant(Intrinsic::nvvm_ptr_local_to_gen, dl, MVT::i32), p);
-      InVals.push_back(p2);
-    }
+    InVals.push_back(p);
   }
 
   // Clang will check explicit VarArg and issue error if any. However, Clang
Index: lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp
===================================================================
--- lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp
+++ lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp
@@ -662,6 +662,11 @@
           TM.is64Bit() ? NVPTX::cvta_to_local_yes_64 : NVPTX::cvta_to_local_yes;
       break;
     case ADDRESS_SPACE_PARAM:
+      if (Src.getOpcode() == NVPTXISD::MoveParam) {
+        // addrspacecast MoveParam to param space is a no-op.
+        ReplaceNode(N, Src.getOperand(0).getNode());
+        return;
+      }
       Opc = TM.is64Bit() ? NVPTX::nvvm_ptr_gen_to_param_64
                          : NVPTX::nvvm_ptr_gen_to_param;
       break;


-------------- next part --------------
A non-text attachment was scrubbed...
Name: D21421.60899.patch
Type: text/x-patch
Size: 3741 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20160615/eacad25f/attachment.bin>


More information about the llvm-commits mailing list