[PATCH] D50207: [NVPTX] Handle __nvvm_reflect("__CUDA_ARCH").
Artem Belevich via Phabricator via llvm-commits
llvm-commits at lists.llvm.org
Fri Aug 3 11:05:58 PDT 2018
This revision was automatically updated to reflect the committed changes.
Closed by commit rL338908: [NVPTX] Handle __nvvm_reflect("__CUDA_ARCH"). (authored by tra, committed by ).
Changed prior to commit:
https://reviews.llvm.org/D50207?vs=158869&id=159056#toc
Repository:
rL LLVM
https://reviews.llvm.org/D50207
Files:
llvm/trunk/lib/Target/NVPTX/NVPTX.h
llvm/trunk/lib/Target/NVPTX/NVPTXTargetMachine.cpp
llvm/trunk/lib/Target/NVPTX/NVVMReflect.cpp
llvm/trunk/test/CodeGen/NVPTX/nvvm-reflect-arch.ll
Index: llvm/trunk/test/CodeGen/NVPTX/nvvm-reflect-arch.ll
===================================================================
--- llvm/trunk/test/CodeGen/NVPTX/nvvm-reflect-arch.ll
+++ llvm/trunk/test/CodeGen/NVPTX/nvvm-reflect-arch.ll
@@ -0,0 +1,21 @@
+; Libdevice in recent CUDA versions relies on __CUDA_ARCH reflecting GPU type.
+; Verify that __nvvm_reflect() is replaced with an appropriate value.
+;
+; RUN: opt %s -S -nvvm-reflect -O2 -mtriple=nvptx64 \
+; RUN: | FileCheck %s --check-prefixes=COMMON,SM20
+; RUN: opt %s -S -nvvm-reflect -O2 -mtriple=nvptx64 -mcpu=sm_35 \
+; RUN: | FileCheck %s --check-prefixes=COMMON,SM35
+
+@"$str" = private addrspace(1) constant [12 x i8] c"__CUDA_ARCH\00"
+
+declare i32 @__nvvm_reflect(i8*)
+
+; COMMON-LABEL: @foo
+define i32 @foo(float %a, float %b) {
+; COMMON-NOT: call i32 @__nvvm_reflect
+ %reflect = call i32 @__nvvm_reflect(i8* addrspacecast (i8 addrspace(1)* getelementptr inbounds ([12 x i8], [12 x i8] addrspace(1)* @"$str", i32 0, i32 0) to i8*))
+; SM20: ret i32 200
+; SM35: ret i32 350
+ ret i32 %reflect
+}
+
Index: llvm/trunk/lib/Target/NVPTX/NVPTX.h
===================================================================
--- llvm/trunk/lib/Target/NVPTX/NVPTX.h
+++ llvm/trunk/lib/Target/NVPTX/NVPTX.h
@@ -46,7 +46,7 @@
ModulePass *createNVPTXAssignValidGlobalNamesPass();
ModulePass *createGenericToNVVMPass();
FunctionPass *createNVVMIntrRangePass(unsigned int SmVersion);
-FunctionPass *createNVVMReflectPass();
+FunctionPass *createNVVMReflectPass(unsigned int SmVersion);
MachineFunctionPass *createNVPTXPrologEpilogPass();
MachineFunctionPass *createNVPTXReplaceImageHandlesPass();
FunctionPass *createNVPTXImageOptimizerPass();
Index: llvm/trunk/lib/Target/NVPTX/NVVMReflect.cpp
===================================================================
--- llvm/trunk/lib/Target/NVPTX/NVVMReflect.cpp
+++ llvm/trunk/lib/Target/NVPTX/NVVMReflect.cpp
@@ -50,15 +50,19 @@
class NVVMReflect : public FunctionPass {
public:
static char ID;
- NVVMReflect() : FunctionPass(ID) {
+ unsigned int SmVersion;
+ NVVMReflect() : NVVMReflect(0) {}
+ explicit NVVMReflect(unsigned int Sm) : FunctionPass(ID), SmVersion(Sm) {
initializeNVVMReflectPass(*PassRegistry::getPassRegistry());
}
bool runOnFunction(Function &) override;
};
}
-FunctionPass *llvm::createNVVMReflectPass() { return new NVVMReflect(); }
+FunctionPass *llvm::createNVVMReflectPass(unsigned int SmVersion) {
+ return new NVVMReflect(SmVersion);
+}
static cl::opt<bool>
NVVMReflectEnabled("nvvm-reflect-enable", cl::init(true), cl::Hidden,
@@ -163,6 +167,8 @@
if (auto *Flag = mdconst::extract_or_null<ConstantInt>(
F.getParent()->getModuleFlag("nvvm-reflect-ftz")))
ReflectVal = Flag->getSExtValue();
+ } else if (ReflectArg == "__CUDA_ARCH") {
+ ReflectVal = SmVersion * 10;
}
Call->replaceAllUsesWith(ConstantInt::get(Call->getType(), ReflectVal));
ToRemove.push_back(Call);
Index: llvm/trunk/lib/Target/NVPTX/NVPTXTargetMachine.cpp
===================================================================
--- llvm/trunk/lib/Target/NVPTX/NVPTXTargetMachine.cpp
+++ llvm/trunk/lib/Target/NVPTX/NVPTXTargetMachine.cpp
@@ -195,7 +195,7 @@
Builder.addExtension(
PassManagerBuilder::EP_EarlyAsPossible,
[&](const PassManagerBuilder &, legacy::PassManagerBase &PM) {
- PM.add(createNVVMReflectPass());
+ PM.add(createNVVMReflectPass(Subtarget.getSmVersion()));
PM.add(createNVVMIntrRangePass(Subtarget.getSmVersion()));
});
}
@@ -258,7 +258,8 @@
// it here does nothing. But since we need it for correctness when lowering
// to NVPTX, run it here too, in case whoever built our pass pipeline didn't
// call addEarlyAsPossiblePasses.
- addPass(createNVVMReflectPass());
+ const NVPTXSubtarget &ST = *getTM<NVPTXTargetMachine>().getSubtargetImpl();
+ addPass(createNVVMReflectPass(ST.getSmVersion()));
if (getOptLevel() != CodeGenOpt::None)
addPass(createNVPTXImageOptimizerPass());
-------------- next part --------------
A non-text attachment was scrubbed...
Name: D50207.159056.patch
Type: text/x-patch
Size: 4051 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20180803/fb82bdb6/attachment.bin>
More information about the llvm-commits
mailing list