[llvm] r276189 - [NVPTX] Renamed NVPTXLowerKernelArgs -> NVPTXLowerArgs. NFC.
Artem Belevich via llvm-commits
llvm-commits at lists.llvm.org
Wed Jul 20 14:44:08 PDT 2016
Author: tra
Date: Wed Jul 20 16:44:07 2016
New Revision: 276189
URL: http://llvm.org/viewvc/llvm-project?rev=276189&view=rev
Log:
[NVPTX] Renamed NVPTXLowerKernelArgs -> NVPTXLowerArgs. NFC.
After r276153 the pass applies to both kernels and regular functions.
Differential Revision: https://reviews.llvm.org/D22583
Added:
llvm/trunk/lib/Target/NVPTX/NVPTXLowerArgs.cpp
- copied, changed from r276154, llvm/trunk/lib/Target/NVPTX/NVPTXLowerKernelArgs.cpp
Removed:
llvm/trunk/lib/Target/NVPTX/NVPTXLowerKernelArgs.cpp
Modified:
llvm/trunk/lib/Target/NVPTX/CMakeLists.txt
llvm/trunk/lib/Target/NVPTX/NVPTX.h
llvm/trunk/lib/Target/NVPTX/NVPTXTargetMachine.cpp
llvm/trunk/test/CodeGen/NVPTX/bug21465.ll
Modified: llvm/trunk/lib/Target/NVPTX/CMakeLists.txt
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/NVPTX/CMakeLists.txt?rev=276189&r1=276188&r2=276189&view=diff
==============================================================================
--- llvm/trunk/lib/Target/NVPTX/CMakeLists.txt (original)
+++ llvm/trunk/lib/Target/NVPTX/CMakeLists.txt Wed Jul 20 16:44:07 2016
@@ -21,7 +21,7 @@ set(NVPTXCodeGen_sources
NVPTXInferAddressSpaces.cpp
NVPTXInstrInfo.cpp
NVPTXLowerAggrCopies.cpp
- NVPTXLowerKernelArgs.cpp
+ NVPTXLowerArgs.cpp
NVPTXLowerAlloca.cpp
NVPTXPeephole.cpp
NVPTXMCExpr.cpp
Modified: llvm/trunk/lib/Target/NVPTX/NVPTX.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/NVPTX/NVPTX.h?rev=276189&r1=276188&r2=276189&view=diff
==============================================================================
--- llvm/trunk/lib/Target/NVPTX/NVPTX.h (original)
+++ llvm/trunk/lib/Target/NVPTX/NVPTX.h Wed Jul 20 16:44:07 2016
@@ -53,7 +53,7 @@ FunctionPass *createNVVMReflectPass(cons
MachineFunctionPass *createNVPTXPrologEpilogPass();
MachineFunctionPass *createNVPTXReplaceImageHandlesPass();
FunctionPass *createNVPTXImageOptimizerPass();
-FunctionPass *createNVPTXLowerKernelArgsPass(const NVPTXTargetMachine *TM);
+FunctionPass *createNVPTXLowerArgsPass(const NVPTXTargetMachine *TM);
BasicBlockPass *createNVPTXLowerAllocaPass();
MachineFunctionPass *createNVPTXPeephole();
Copied: llvm/trunk/lib/Target/NVPTX/NVPTXLowerArgs.cpp (from r276154, llvm/trunk/lib/Target/NVPTX/NVPTXLowerKernelArgs.cpp)
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/NVPTX/NVPTXLowerArgs.cpp?p2=llvm/trunk/lib/Target/NVPTX/NVPTXLowerArgs.cpp&p1=llvm/trunk/lib/Target/NVPTX/NVPTXLowerKernelArgs.cpp&r1=276154&r2=276189&rev=276189&view=diff
==============================================================================
--- llvm/trunk/lib/Target/NVPTX/NVPTXLowerKernelArgs.cpp (original)
+++ llvm/trunk/lib/Target/NVPTX/NVPTXLowerArgs.cpp Wed Jul 20 16:44:07 2016
@@ -1,4 +1,4 @@
-//===-- NVPTXLowerKernelArgs.cpp - Lower kernel arguments -----------------===//
+//===-- NVPTXLowerArgs.cpp - Lower arguments ------------------------------===//
//
// The LLVM Compiler Infrastructure
//
@@ -102,11 +102,11 @@
using namespace llvm;
namespace llvm {
-void initializeNVPTXLowerKernelArgsPass(PassRegistry &);
+void initializeNVPTXLowerArgsPass(PassRegistry &);
}
namespace {
-class NVPTXLowerKernelArgs : public FunctionPass {
+class NVPTXLowerArgs : public FunctionPass {
bool runOnFunction(Function &F) override;
bool runOnKernelFunction(Function &F);
@@ -122,7 +122,7 @@ class NVPTXLowerKernelArgs : public Func
public:
static char ID; // Pass identification, replacement for typeid
- NVPTXLowerKernelArgs(const NVPTXTargetMachine *TM = nullptr)
+ NVPTXLowerArgs(const NVPTXTargetMachine *TM = nullptr)
: FunctionPass(ID), TM(TM) {}
const char *getPassName() const override {
return "Lower pointer arguments of CUDA kernels";
@@ -133,10 +133,10 @@ private:
};
} // namespace
-char NVPTXLowerKernelArgs::ID = 1;
+char NVPTXLowerArgs::ID = 1;
-INITIALIZE_PASS(NVPTXLowerKernelArgs, "nvptx-lower-kernel-args",
- "Lower kernel arguments (NVPTX)", false, false)
+INITIALIZE_PASS(NVPTXLowerArgs, "nvptx-lower-args",
+ "Lower arguments (NVPTX)", false, false)
// =============================================================================
// If the function had a byval struct ptr arg, say foo(%struct.x* byval %d),
@@ -151,7 +151,7 @@ INITIALIZE_PASS(NVPTXLowerKernelArgs, "n
// struct from param space to local space.
// Then replace all occurrences of %d by %temp.
// =============================================================================
-void NVPTXLowerKernelArgs::handleByValParam(Argument *Arg) {
+void NVPTXLowerArgs::handleByValParam(Argument *Arg) {
Function *Func = Arg->getParent();
Instruction *FirstInst = &(Func->getEntryBlock().front());
PointerType *PType = dyn_cast<PointerType>(Arg->getType());
@@ -173,7 +173,7 @@ void NVPTXLowerKernelArgs::handleByValPa
new StoreInst(LI, AllocA, FirstInst);
}
-void NVPTXLowerKernelArgs::markPointerAsGlobal(Value *Ptr) {
+void NVPTXLowerArgs::markPointerAsGlobal(Value *Ptr) {
if (Ptr->getType()->getPointerAddressSpace() == ADDRESS_SPACE_GLOBAL)
return;
@@ -203,7 +203,7 @@ void NVPTXLowerKernelArgs::markPointerAs
// =============================================================================
// Main function for this pass.
// =============================================================================
-bool NVPTXLowerKernelArgs::runOnKernelFunction(Function &F) {
+bool NVPTXLowerArgs::runOnKernelFunction(Function &F) {
if (TM && TM->getDrvInterface() == NVPTX::CUDA) {
// Mark pointers in byval structs as global.
for (auto &B : F) {
@@ -236,18 +236,18 @@ bool NVPTXLowerKernelArgs::runOnKernelFu
}
// Device functions only need to copy byval args into local memory.
-bool NVPTXLowerKernelArgs::runOnDeviceFunction(Function &F) {
+bool NVPTXLowerArgs::runOnDeviceFunction(Function &F) {
for (Argument &Arg : F.args())
if (Arg.getType()->isPointerTy() && Arg.hasByValAttr())
handleByValParam(&Arg);
return true;
}
-bool NVPTXLowerKernelArgs::runOnFunction(Function &F) {
+bool NVPTXLowerArgs::runOnFunction(Function &F) {
return isKernelFunction(F) ? runOnKernelFunction(F) : runOnDeviceFunction(F);
}
FunctionPass *
-llvm::createNVPTXLowerKernelArgsPass(const NVPTXTargetMachine *TM) {
- return new NVPTXLowerKernelArgs(TM);
+llvm::createNVPTXLowerArgsPass(const NVPTXTargetMachine *TM) {
+ return new NVPTXLowerArgs(TM);
}
Removed: llvm/trunk/lib/Target/NVPTX/NVPTXLowerKernelArgs.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/NVPTX/NVPTXLowerKernelArgs.cpp?rev=276188&view=auto
==============================================================================
--- llvm/trunk/lib/Target/NVPTX/NVPTXLowerKernelArgs.cpp (original)
+++ llvm/trunk/lib/Target/NVPTX/NVPTXLowerKernelArgs.cpp (removed)
@@ -1,253 +0,0 @@
-//===-- NVPTXLowerKernelArgs.cpp - Lower kernel arguments -----------------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-//
-// Arguments to kernel and device functions are passed via param space,
-// which imposes certain restrictions:
-// http://docs.nvidia.com/cuda/parallel-thread-execution/#state-spaces
-//
-// Kernel parameters are read-only and accessible only via ld.param
-// instruction, directly or via a pointer. Pointers to kernel
-// arguments can't be converted to generic address space.
-//
-// Device function parameters are directly accessible via
-// ld.param/st.param, but taking the address of one returns a pointer
-// to a copy created in local space which *can't* be used with
-// ld.param/st.param.
-//
-// Copying a byval struct into local memory in IR allows us to enforce
-// the param space restrictions, gives the rest of IR a pointer w/o
-// param space restrictions, and gives us an opportunity to eliminate
-// the copy.
-//
-// Pointer arguments to kernel functions need more work to be lowered:
-//
-// 1. Convert non-byval pointer arguments of CUDA kernels to pointers in the
-// global address space. This allows later optimizations to emit
-// ld.global.*/st.global.* for accessing these pointer arguments. For
-// example,
-//
-// define void @foo(float* %input) {
-// %v = load float, float* %input, align 4
-// ...
-// }
-//
-// becomes
-//
-// define void @foo(float* %input) {
-// %input2 = addrspacecast float* %input to float addrspace(1)*
-// %input3 = addrspacecast float addrspace(1)* %input2 to float*
-// %v = load float, float* %input3, align 4
-// ...
-// }
-//
-// Later, NVPTXFavorNonGenericAddrSpaces will optimize it to
-//
-// define void @foo(float* %input) {
-// %input2 = addrspacecast float* %input to float addrspace(1)*
-// %v = load float, float addrspace(1)* %input2, align 4
-// ...
-// }
-//
-// 2. Convert pointers in a byval kernel parameter to pointers in the global
-// address space. As #2, it allows NVPTX to emit more ld/st.global. E.g.,
-//
-// struct S {
-// int *x;
-// int *y;
-// };
-// __global__ void foo(S s) {
-// int *b = s.y;
-// // use b
-// }
-//
-// "b" points to the global address space. In the IR level,
-//
-// define void @foo({i32*, i32*}* byval %input) {
-// %b_ptr = getelementptr {i32*, i32*}, {i32*, i32*}* %input, i64 0, i32 1
-// %b = load i32*, i32** %b_ptr
-// ; use %b
-// }
-//
-// becomes
-//
-// define void @foo({i32*, i32*}* byval %input) {
-// %b_ptr = getelementptr {i32*, i32*}, {i32*, i32*}* %input, i64 0, i32 1
-// %b = load i32*, i32** %b_ptr
-// %b_global = addrspacecast i32* %b to i32 addrspace(1)*
-// %b_generic = addrspacecast i32 addrspace(1)* %b_global to i32*
-// ; use %b_generic
-// }
-//
-// TODO: merge this pass with NVPTXFavorNonGenericAddrSpace so that other passes
-// don't cancel the addrspacecast pair this pass emits.
-//===----------------------------------------------------------------------===//
-
-#include "NVPTX.h"
-#include "NVPTXUtilities.h"
-#include "NVPTXTargetMachine.h"
-#include "llvm/Analysis/ValueTracking.h"
-#include "llvm/IR/Function.h"
-#include "llvm/IR/Instructions.h"
-#include "llvm/IR/Module.h"
-#include "llvm/IR/Type.h"
-#include "llvm/Pass.h"
-
-using namespace llvm;
-
-namespace llvm {
-void initializeNVPTXLowerKernelArgsPass(PassRegistry &);
-}
-
-namespace {
-class NVPTXLowerKernelArgs : public FunctionPass {
- bool runOnFunction(Function &F) override;
-
- bool runOnKernelFunction(Function &F);
- bool runOnDeviceFunction(Function &F);
-
- // handle byval parameters
- void handleByValParam(Argument *Arg);
- // Knowing Ptr must point to the global address space, this function
- // addrspacecasts Ptr to global and then back to generic. This allows
- // NVPTXFavorNonGenericAddrSpace to fold the global-to-generic cast into
- // loads/stores that appear later.
- void markPointerAsGlobal(Value *Ptr);
-
-public:
- static char ID; // Pass identification, replacement for typeid
- NVPTXLowerKernelArgs(const NVPTXTargetMachine *TM = nullptr)
- : FunctionPass(ID), TM(TM) {}
- const char *getPassName() const override {
- return "Lower pointer arguments of CUDA kernels";
- }
-
-private:
- const NVPTXTargetMachine *TM;
-};
-} // namespace
-
-char NVPTXLowerKernelArgs::ID = 1;
-
-INITIALIZE_PASS(NVPTXLowerKernelArgs, "nvptx-lower-kernel-args",
- "Lower kernel arguments (NVPTX)", false, false)
-
-// =============================================================================
-// If the function had a byval struct ptr arg, say foo(%struct.x* byval %d),
-// then add the following instructions to the first basic block:
-//
-// %temp = alloca %struct.x, align 8
-// %tempd = addrspacecast %struct.x* %d to %struct.x addrspace(101)*
-// %tv = load %struct.x addrspace(101)* %tempd
-// store %struct.x %tv, %struct.x* %temp, align 8
-//
-// The above code allocates some space in the stack and copies the incoming
-// struct from param space to local space.
-// Then replace all occurrences of %d by %temp.
-// =============================================================================
-void NVPTXLowerKernelArgs::handleByValParam(Argument *Arg) {
- Function *Func = Arg->getParent();
- Instruction *FirstInst = &(Func->getEntryBlock().front());
- PointerType *PType = dyn_cast<PointerType>(Arg->getType());
-
- assert(PType && "Expecting pointer type in handleByValParam");
-
- Type *StructType = PType->getElementType();
- AllocaInst *AllocA = new AllocaInst(StructType, Arg->getName(), FirstInst);
- // Set the alignment to alignment of the byval parameter. This is because,
- // later load/stores assume that alignment, and we are going to replace
- // the use of the byval parameter with this alloca instruction.
- AllocA->setAlignment(Func->getParamAlignment(Arg->getArgNo() + 1));
- Arg->replaceAllUsesWith(AllocA);
-
- Value *ArgInParam = new AddrSpaceCastInst(
- Arg, PointerType::get(StructType, ADDRESS_SPACE_PARAM), Arg->getName(),
- FirstInst);
- LoadInst *LI = new LoadInst(ArgInParam, Arg->getName(), FirstInst);
- new StoreInst(LI, AllocA, FirstInst);
-}
-
-void NVPTXLowerKernelArgs::markPointerAsGlobal(Value *Ptr) {
- if (Ptr->getType()->getPointerAddressSpace() == ADDRESS_SPACE_GLOBAL)
- return;
-
- // Deciding where to emit the addrspacecast pair.
- BasicBlock::iterator InsertPt;
- if (Argument *Arg = dyn_cast<Argument>(Ptr)) {
- // Insert at the functon entry if Ptr is an argument.
- InsertPt = Arg->getParent()->getEntryBlock().begin();
- } else {
- // Insert right after Ptr if Ptr is an instruction.
- InsertPt = ++cast<Instruction>(Ptr)->getIterator();
- assert(InsertPt != InsertPt->getParent()->end() &&
- "We don't call this function with Ptr being a terminator.");
- }
-
- Instruction *PtrInGlobal = new AddrSpaceCastInst(
- Ptr, PointerType::get(Ptr->getType()->getPointerElementType(),
- ADDRESS_SPACE_GLOBAL),
- Ptr->getName(), &*InsertPt);
- Value *PtrInGeneric = new AddrSpaceCastInst(PtrInGlobal, Ptr->getType(),
- Ptr->getName(), &*InsertPt);
- // Replace with PtrInGeneric all uses of Ptr except PtrInGlobal.
- Ptr->replaceAllUsesWith(PtrInGeneric);
- PtrInGlobal->setOperand(0, Ptr);
-}
-
-// =============================================================================
-// Main function for this pass.
-// =============================================================================
-bool NVPTXLowerKernelArgs::runOnKernelFunction(Function &F) {
- if (TM && TM->getDrvInterface() == NVPTX::CUDA) {
- // Mark pointers in byval structs as global.
- for (auto &B : F) {
- for (auto &I : B) {
- if (LoadInst *LI = dyn_cast<LoadInst>(&I)) {
- if (LI->getType()->isPointerTy()) {
- Value *UO = GetUnderlyingObject(LI->getPointerOperand(),
- F.getParent()->getDataLayout());
- if (Argument *Arg = dyn_cast<Argument>(UO)) {
- if (Arg->hasByValAttr()) {
- // LI is a load from a pointer within a byval kernel parameter.
- markPointerAsGlobal(LI);
- }
- }
- }
- }
- }
- }
- }
-
- for (Argument &Arg : F.args()) {
- if (Arg.getType()->isPointerTy()) {
- if (Arg.hasByValAttr())
- handleByValParam(&Arg);
- else if (TM && TM->getDrvInterface() == NVPTX::CUDA)
- markPointerAsGlobal(&Arg);
- }
- }
- return true;
-}
-
-// Device functions only need to copy byval args into local memory.
-bool NVPTXLowerKernelArgs::runOnDeviceFunction(Function &F) {
- for (Argument &Arg : F.args())
- if (Arg.getType()->isPointerTy() && Arg.hasByValAttr())
- handleByValParam(&Arg);
- return true;
-}
-
-bool NVPTXLowerKernelArgs::runOnFunction(Function &F) {
- return isKernelFunction(F) ? runOnKernelFunction(F) : runOnDeviceFunction(F);
-}
-
-FunctionPass *
-llvm::createNVPTXLowerKernelArgsPass(const NVPTXTargetMachine *TM) {
- return new NVPTXLowerKernelArgs(TM);
-}
Modified: llvm/trunk/lib/Target/NVPTX/NVPTXTargetMachine.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/NVPTX/NVPTXTargetMachine.cpp?rev=276189&r1=276188&r2=276189&view=diff
==============================================================================
--- llvm/trunk/lib/Target/NVPTX/NVPTXTargetMachine.cpp (original)
+++ llvm/trunk/lib/Target/NVPTX/NVPTXTargetMachine.cpp Wed Jul 20 16:44:07 2016
@@ -63,7 +63,7 @@ void initializeNVPTXAssignValidGlobalNam
void initializeNVPTXFavorNonGenericAddrSpacesPass(PassRegistry &);
void initializeNVPTXInferAddressSpacesPass(PassRegistry &);
void initializeNVPTXLowerAggrCopiesPass(PassRegistry &);
-void initializeNVPTXLowerKernelArgsPass(PassRegistry &);
+void initializeNVPTXLowerArgsPass(PassRegistry &);
void initializeNVPTXLowerAllocaPass(PassRegistry &);
}
@@ -82,7 +82,7 @@ extern "C" void LLVMInitializeNVPTXTarge
initializeNVPTXAssignValidGlobalNamesPass(PR);
initializeNVPTXFavorNonGenericAddrSpacesPass(PR);
initializeNVPTXInferAddressSpacesPass(PR);
- initializeNVPTXLowerKernelArgsPass(PR);
+ initializeNVPTXLowerArgsPass(PR);
initializeNVPTXLowerAllocaPass(PR);
initializeNVPTXLowerAggrCopiesPass(PR);
}
@@ -195,7 +195,7 @@ void NVPTXPassConfig::addEarlyCSEOrGVNPa
}
void NVPTXPassConfig::addAddressSpaceInferencePasses() {
- // NVPTXLowerKernelArgs emits alloca for byval parameters which can often
+ // NVPTXLowerArgs emits alloca for byval parameters which can often
// be eliminated by SROA.
addPass(createSROAPass());
addPass(createNVPTXLowerAllocaPass());
@@ -253,9 +253,9 @@ void NVPTXPassConfig::addIRPasses() {
addPass(createNVPTXAssignValidGlobalNamesPass());
addPass(createGenericToNVVMPass());
- // NVPTXLowerKernelArgs is required for correctness and should be run right
+ // NVPTXLowerArgs is required for correctness and should be run right
// before the address space inference passes.
- addPass(createNVPTXLowerKernelArgsPass(&getNVPTXTargetMachine()));
+ addPass(createNVPTXLowerArgsPass(&getNVPTXTargetMachine()));
if (getOptLevel() != CodeGenOpt::None) {
addAddressSpaceInferencePasses();
addStraightLineScalarOptimizationPasses();
Modified: llvm/trunk/test/CodeGen/NVPTX/bug21465.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/NVPTX/bug21465.ll?rev=276189&r1=276188&r2=276189&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/NVPTX/bug21465.ll (original)
+++ llvm/trunk/test/CodeGen/NVPTX/bug21465.ll Wed Jul 20 16:44:07 2016
@@ -1,4 +1,4 @@
-; RUN: opt < %s -nvptx-lower-kernel-args -S | FileCheck %s
+; RUN: opt < %s -nvptx-lower-args -S | FileCheck %s
; RUN: llc < %s -march=nvptx64 -mcpu=sm_35 | FileCheck %s --check-prefix PTX
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v16:16:16-v32:32:32-v64:64:64-v128:128:128-n16:32:64"
More information about the llvm-commits
mailing list