[PATCH] D21421: [NVPTX] Improve lowering of byval args of device functions.
Artem Belevich via llvm-commits
llvm-commits at lists.llvm.org
Wed Jun 15 16:38:11 PDT 2016
tra created this revision.
tra added reviewers: jholewinski, jlebar, jingyue.
tra added a subscriber: llvm-commits.
Herald added a subscriber: jholewinski.
Lower byval arguments of device functions the same way
we lower them for kernels and ensure that it can be accessed
via argument's symbol.
This allows loading value of the argument using [symbol+offset]
instead of converting argument to general space pointer and using it
for indexing (which also implicitly converts param space pointer to
local space one on SASS level and triggers copying of argument into
local space in the process).
This reduces call overhead, uses less registers and reduces overall
SASS size by 2-4% on thrust tests.
http://reviews.llvm.org/D21421
Files:
lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp
lib/Target/NVPTX/NVPTXISelLowering.cpp
lib/Target/NVPTX/NVPTXLowerKernelArgs.cpp
test/CodeGen/NVPTX/bug21465.ll
Index: test/CodeGen/NVPTX/bug21465.ll
===================================================================
--- test/CodeGen/NVPTX/bug21465.ll
+++ test/CodeGen/NVPTX/bug21465.ll
@@ -15,7 +15,7 @@
%b = getelementptr inbounds %struct.S, %struct.S* %input, i64 0, i32 1
%0 = load i32, i32* %b, align 4
; PTX-NOT: ld.param.u32 {{%r[0-9]+}}, [{{%rd[0-9]+}}]
-; PTX: ld.param.u32 [[value:%r[0-9]+]], [{{%rd[0-9]+}}+4]
+; PTX: ld.param.u32 [[value:%r[0-9]+]], [_Z11TakesStruct1SPi_param_0+4]
store i32 %0, i32* %output, align 4
; PTX-NEXT: st.global.u32 [{{%rd[0-9]+}}], [[value]]
ret void
Index: lib/Target/NVPTX/NVPTXLowerKernelArgs.cpp
===================================================================
--- lib/Target/NVPTX/NVPTXLowerKernelArgs.cpp
+++ lib/Target/NVPTX/NVPTXLowerKernelArgs.cpp
@@ -101,6 +101,11 @@
class NVPTXLowerKernelArgs : public FunctionPass {
bool runOnFunction(Function &F) override;
+ // Kernels and regular device functions treat byval arguments
+ // differently.
+ bool runOnKernelFunction(Function &F);
+ bool runOnDeviceFunction(Function &F);
+
// handle byval parameters
void handleByValParam(Argument *Arg);
// Knowing Ptr must point to the global address space, this function
@@ -192,11 +197,7 @@
// =============================================================================
// Main function for this pass.
// =============================================================================
-bool NVPTXLowerKernelArgs::runOnFunction(Function &F) {
- // Skip non-kernels. See the comments at the top of this file.
- if (!isKernelFunction(F))
- return false;
-
+bool NVPTXLowerKernelArgs::runOnKernelFunction(Function &F) {
if (TM && TM->getDrvInterface() == NVPTX::CUDA) {
// Mark pointers in byval structs as global.
for (auto &B : F) {
@@ -228,6 +229,17 @@
return true;
}
+bool NVPTXLowerKernelArgs::runOnDeviceFunction(Function &F) {
+ for (Argument &Arg : F.args())
+ if (Arg.getType()->isPointerTy() && Arg.hasByValAttr())
+ handleByValParam(&Arg);
+ return true;
+}
+
+bool NVPTXLowerKernelArgs::runOnFunction(Function &F) {
+ return isKernelFunction(F) ? runOnKernelFunction(F) : runOnDeviceFunction(F);
+}
+
FunctionPass *
llvm::createNVPTXLowerKernelArgsPass(const NVPTXTargetMachine *TM) {
return new NVPTXLowerKernelArgs(TM);
Index: lib/Target/NVPTX/NVPTXISelLowering.cpp
===================================================================
--- lib/Target/NVPTX/NVPTXISelLowering.cpp
+++ lib/Target/NVPTX/NVPTXISelLowering.cpp
@@ -2341,14 +2341,7 @@
SDValue p = DAG.getNode(NVPTXISD::MoveParam, dl, ObjectVT, Arg);
if (p.getNode())
p.getNode()->setIROrder(idx + 1);
- if (isKernel)
- InVals.push_back(p);
- else {
- SDValue p2 = DAG.getNode(
- ISD::INTRINSIC_WO_CHAIN, dl, ObjectVT,
- DAG.getConstant(Intrinsic::nvvm_ptr_local_to_gen, dl, MVT::i32), p);
- InVals.push_back(p2);
- }
+ InVals.push_back(p);
}
// Clang will check explicit VarArg and issue error if any. However, Clang
Index: lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp
===================================================================
--- lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp
+++ lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp
@@ -662,6 +662,11 @@
TM.is64Bit() ? NVPTX::cvta_to_local_yes_64 : NVPTX::cvta_to_local_yes;
break;
case ADDRESS_SPACE_PARAM:
+ if (Src.getOpcode() == NVPTXISD::MoveParam) {
+ // addrspacecast MoveParam to param space is a no-op.
+ ReplaceNode(N, Src.getOperand(0).getNode());
+ return;
+ }
Opc = TM.is64Bit() ? NVPTX::nvvm_ptr_gen_to_param_64
: NVPTX::nvvm_ptr_gen_to_param;
break;
-------------- next part --------------
A non-text attachment was scrubbed...
Name: D21421.60899.patch
Type: text/x-patch
Size: 3741 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20160615/eacad25f/attachment.bin>
More information about the llvm-commits
mailing list