[Parallel_libs-commits] [parallel-libs] r281417 - [SE] KernelSpec return best PTX
Jason Henline via Parallel_libs-commits
parallel_libs-commits at lists.llvm.org
Tue Sep 13 16:29:26 PDT 2016
Author: jhen
Date: Tue Sep 13 18:29:25 2016
New Revision: 281417
URL: http://llvm.org/viewvc/llvm-project?rev=281417&view=rev
Log:
[SE] KernelSpec return best PTX
Summary:
Before, the kernel spec would only return PTX for exactly the requested
compute capability. With this patch it will now return the PTX with the
largest compute capability that does not exceed that requested compute
capability.
Reviewers: jlebar
Subscribers: jprice, jlebar, parallel_libs-commits
Differential Revision: https://reviews.llvm.org/D24531
Modified:
parallel-libs/trunk/streamexecutor/include/streamexecutor/KernelSpec.h
parallel-libs/trunk/streamexecutor/lib/KernelSpec.cpp
parallel-libs/trunk/streamexecutor/unittests/CoreTests/KernelSpecTest.cpp
Modified: parallel-libs/trunk/streamexecutor/include/streamexecutor/KernelSpec.h
URL: http://llvm.org/viewvc/llvm-project/parallel-libs/trunk/streamexecutor/include/streamexecutor/KernelSpec.h?rev=281417&r1=281416&r2=281417&view=diff
==============================================================================
--- parallel-libs/trunk/streamexecutor/include/streamexecutor/KernelSpec.h (original)
+++ parallel-libs/trunk/streamexecutor/include/streamexecutor/KernelSpec.h Tue Sep 13 18:29:25 2016
@@ -121,12 +121,11 @@ public:
llvm::StringRef KernelName,
const llvm::ArrayRef<CUDAPTXInMemorySpec::PTXSpec> SpecList);
- /// Returns a pointer to the PTX code for the requested compute capability.
+ /// Returns a pointer to the PTX code for the greatest compute capability not
+ /// exceeding the requested compute capability.
///
- /// Returns nullptr on failed lookup (if the requested compute capability is
- /// not available). Matches exactly the specified compute capability. Doesn't
- /// try to do anything smart like finding the next best compute capability if
- /// the specified capability cannot be found.
+ /// Returns nullptr on failed lookup (if the requested version is not
+ /// available and no lower versions are available).
const char *getCode(int ComputeCapabilityMajor,
int ComputeCapabilityMinor) const;
Modified: parallel-libs/trunk/streamexecutor/lib/KernelSpec.cpp
URL: http://llvm.org/viewvc/llvm-project/parallel-libs/trunk/streamexecutor/lib/KernelSpec.cpp?rev=281417&r1=281416&r2=281417&view=diff
==============================================================================
--- parallel-libs/trunk/streamexecutor/lib/KernelSpec.cpp (original)
+++ parallel-libs/trunk/streamexecutor/lib/KernelSpec.cpp Tue Sep 13 18:29:25 2016
@@ -31,12 +31,13 @@ CUDAPTXInMemorySpec::CUDAPTXInMemorySpec
const char *CUDAPTXInMemorySpec::getCode(int ComputeCapabilityMajor,
int ComputeCapabilityMinor) const {
- auto PTXIter =
- PTXByComputeCapability.find(CUDAPTXInMemorySpec::ComputeCapability{
+ auto Iterator =
+ PTXByComputeCapability.upper_bound(CUDAPTXInMemorySpec::ComputeCapability{
ComputeCapabilityMajor, ComputeCapabilityMinor});
- if (PTXIter == PTXByComputeCapability.end())
+ if (Iterator == PTXByComputeCapability.begin())
return nullptr;
- return PTXIter->second;
+ --Iterator;
+ return Iterator->second;
}
CUDAFatbinInMemorySpec::CUDAFatbinInMemorySpec(llvm::StringRef KernelName,
Modified: parallel-libs/trunk/streamexecutor/unittests/CoreTests/KernelSpecTest.cpp
URL: http://llvm.org/viewvc/llvm-project/parallel-libs/trunk/streamexecutor/unittests/CoreTests/KernelSpecTest.cpp?rev=281417&r1=281416&r2=281417&view=diff
==============================================================================
--- parallel-libs/trunk/streamexecutor/unittests/CoreTests/KernelSpecTest.cpp (original)
+++ parallel-libs/trunk/streamexecutor/unittests/CoreTests/KernelSpecTest.cpp Tue Sep 13 18:29:25 2016
@@ -30,8 +30,9 @@ TEST(CUDAPTXInMemorySpec, SingleComputeC
const char *PTXCodeString = "Dummy PTX code";
se::CUDAPTXInMemorySpec Spec("KernelName", {{{1, 0}, PTXCodeString}});
EXPECT_EQ("KernelName", Spec.getKernelName());
+ EXPECT_EQ(nullptr, Spec.getCode(0, 5));
EXPECT_EQ(PTXCodeString, Spec.getCode(1, 0));
- EXPECT_EQ(nullptr, Spec.getCode(2, 0));
+ EXPECT_EQ(PTXCodeString, Spec.getCode(2, 0));
}
TEST(CUDAPTXInMemorySpec, TwoComputeCapabilities) {
@@ -40,9 +41,10 @@ TEST(CUDAPTXInMemorySpec, TwoComputeCapa
se::CUDAPTXInMemorySpec Spec(
"KernelName", {{{1, 0}, PTXCodeString10}, {{3, 0}, PTXCodeString30}});
EXPECT_EQ("KernelName", Spec.getKernelName());
+ EXPECT_EQ(nullptr, Spec.getCode(0, 5));
EXPECT_EQ(PTXCodeString10, Spec.getCode(1, 0));
EXPECT_EQ(PTXCodeString30, Spec.getCode(3, 0));
- EXPECT_EQ(nullptr, Spec.getCode(2, 0));
+ EXPECT_EQ(PTXCodeString10, Spec.getCode(2, 0));
}
TEST(CUDAFatbinInMemorySpec, BasicUsage) {
@@ -89,8 +91,9 @@ TEST(MultiKernelLoaderSpec, Registration
EXPECT_TRUE(MultiSpec.hasOpenCLTextInMemory());
EXPECT_EQ(KernelName, MultiSpec.getCUDAPTXInMemory().getKernelName());
+ EXPECT_EQ(nullptr, MultiSpec.getCUDAPTXInMemory().getCode(0, 5));
EXPECT_EQ(PTXCodeString, MultiSpec.getCUDAPTXInMemory().getCode(1, 0));
- EXPECT_EQ(nullptr, MultiSpec.getCUDAPTXInMemory().getCode(2, 0));
+ EXPECT_EQ(PTXCodeString, MultiSpec.getCUDAPTXInMemory().getCode(2, 0));
EXPECT_EQ(KernelName, MultiSpec.getCUDAFatbinInMemory().getKernelName());
EXPECT_EQ(FatbinBytes, MultiSpec.getCUDAFatbinInMemory().getBytes());
More information about the Parallel_libs-commits
mailing list