[Parallel_libs-commits] [PATCH] D24531: [SE] KernelSpec return best PTX
Jason Henline via Parallel_libs-commits
parallel_libs-commits at lists.llvm.org
Tue Sep 13 16:37:57 PDT 2016
This revision was automatically updated to reflect the committed changes.
Closed by commit rL281417: [SE] KernelSpec return best PTX (authored by jhen).
Changed prior to commit:
https://reviews.llvm.org/D24531?vs=71258&id=71264#toc
Repository:
rL LLVM
https://reviews.llvm.org/D24531
Files:
parallel-libs/trunk/streamexecutor/include/streamexecutor/KernelSpec.h
parallel-libs/trunk/streamexecutor/lib/KernelSpec.cpp
parallel-libs/trunk/streamexecutor/unittests/CoreTests/KernelSpecTest.cpp
Index: parallel-libs/trunk/streamexecutor/lib/KernelSpec.cpp
===================================================================
--- parallel-libs/trunk/streamexecutor/lib/KernelSpec.cpp
+++ parallel-libs/trunk/streamexecutor/lib/KernelSpec.cpp
@@ -31,12 +31,13 @@
const char *CUDAPTXInMemorySpec::getCode(int ComputeCapabilityMajor,
int ComputeCapabilityMinor) const {
- auto PTXIter =
- PTXByComputeCapability.find(CUDAPTXInMemorySpec::ComputeCapability{
+ auto Iterator =
+ PTXByComputeCapability.upper_bound(CUDAPTXInMemorySpec::ComputeCapability{
ComputeCapabilityMajor, ComputeCapabilityMinor});
- if (PTXIter == PTXByComputeCapability.end())
+ if (Iterator == PTXByComputeCapability.begin())
return nullptr;
- return PTXIter->second;
+ --Iterator;
+ return Iterator->second;
}
CUDAFatbinInMemorySpec::CUDAFatbinInMemorySpec(llvm::StringRef KernelName,
Index: parallel-libs/trunk/streamexecutor/include/streamexecutor/KernelSpec.h
===================================================================
--- parallel-libs/trunk/streamexecutor/include/streamexecutor/KernelSpec.h
+++ parallel-libs/trunk/streamexecutor/include/streamexecutor/KernelSpec.h
@@ -121,12 +121,11 @@
llvm::StringRef KernelName,
const llvm::ArrayRef<CUDAPTXInMemorySpec::PTXSpec> SpecList);
- /// Returns a pointer to the PTX code for the requested compute capability.
+ /// Returns a pointer to the PTX code for the greatest compute capability not
+ /// exceeding the requested compute capability.
///
- /// Returns nullptr on failed lookup (if the requested compute capability is
- /// not available). Matches exactly the specified compute capability. Doesn't
- /// try to do anything smart like finding the next best compute capability if
- /// the specified capability cannot be found.
+ /// Returns nullptr on failed lookup (if the requested version is not
+ /// available and no lower versions are available).
const char *getCode(int ComputeCapabilityMajor,
int ComputeCapabilityMinor) const;
Index: parallel-libs/trunk/streamexecutor/unittests/CoreTests/KernelSpecTest.cpp
===================================================================
--- parallel-libs/trunk/streamexecutor/unittests/CoreTests/KernelSpecTest.cpp
+++ parallel-libs/trunk/streamexecutor/unittests/CoreTests/KernelSpecTest.cpp
@@ -30,19 +30,21 @@
const char *PTXCodeString = "Dummy PTX code";
se::CUDAPTXInMemorySpec Spec("KernelName", {{{1, 0}, PTXCodeString}});
EXPECT_EQ("KernelName", Spec.getKernelName());
+ EXPECT_EQ(nullptr, Spec.getCode(0, 5));
EXPECT_EQ(PTXCodeString, Spec.getCode(1, 0));
- EXPECT_EQ(nullptr, Spec.getCode(2, 0));
+ EXPECT_EQ(PTXCodeString, Spec.getCode(2, 0));
}
TEST(CUDAPTXInMemorySpec, TwoComputeCapabilities) {
const char *PTXCodeString10 = "Dummy PTX code 10";
const char *PTXCodeString30 = "Dummy PTX code 30";
se::CUDAPTXInMemorySpec Spec(
"KernelName", {{{1, 0}, PTXCodeString10}, {{3, 0}, PTXCodeString30}});
EXPECT_EQ("KernelName", Spec.getKernelName());
+ EXPECT_EQ(nullptr, Spec.getCode(0, 5));
EXPECT_EQ(PTXCodeString10, Spec.getCode(1, 0));
EXPECT_EQ(PTXCodeString30, Spec.getCode(3, 0));
- EXPECT_EQ(nullptr, Spec.getCode(2, 0));
+ EXPECT_EQ(PTXCodeString10, Spec.getCode(2, 0));
}
TEST(CUDAFatbinInMemorySpec, BasicUsage) {
@@ -89,8 +91,9 @@
EXPECT_TRUE(MultiSpec.hasOpenCLTextInMemory());
EXPECT_EQ(KernelName, MultiSpec.getCUDAPTXInMemory().getKernelName());
+ EXPECT_EQ(nullptr, MultiSpec.getCUDAPTXInMemory().getCode(0, 5));
EXPECT_EQ(PTXCodeString, MultiSpec.getCUDAPTXInMemory().getCode(1, 0));
- EXPECT_EQ(nullptr, MultiSpec.getCUDAPTXInMemory().getCode(2, 0));
+ EXPECT_EQ(PTXCodeString, MultiSpec.getCUDAPTXInMemory().getCode(2, 0));
EXPECT_EQ(KernelName, MultiSpec.getCUDAFatbinInMemory().getKernelName());
EXPECT_EQ(FatbinBytes, MultiSpec.getCUDAFatbinInMemory().getBytes());
-------------- next part --------------
A non-text attachment was scrubbed...
Name: D24531.71264.patch
Type: text/x-patch
Size: 3995 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/parallel_libs-commits/attachments/20160913/b1c34899/attachment.bin>
More information about the Parallel_libs-commits
mailing list