[Parallel_libs-commits] [parallel-libs] r281417 - [SE] KernelSpec return best PTX

Jason Henline via Parallel_libs-commits parallel_libs-commits at lists.llvm.org
Tue Sep 13 16:29:26 PDT 2016


Author: jhen
Date: Tue Sep 13 18:29:25 2016
New Revision: 281417

URL: http://llvm.org/viewvc/llvm-project?rev=281417&view=rev
Log:
[SE] KernelSpec return best PTX

Summary:
Before, the kernel spec would only return PTX for exactly the requested
compute capability. With this patch it will now return the PTX with the
largest compute capability that does not exceed that requested compute
capability.

Reviewers: jlebar

Subscribers: jprice, jlebar, parallel_libs-commits

Differential Revision: https://reviews.llvm.org/D24531

Modified:
    parallel-libs/trunk/streamexecutor/include/streamexecutor/KernelSpec.h
    parallel-libs/trunk/streamexecutor/lib/KernelSpec.cpp
    parallel-libs/trunk/streamexecutor/unittests/CoreTests/KernelSpecTest.cpp

Modified: parallel-libs/trunk/streamexecutor/include/streamexecutor/KernelSpec.h
URL: http://llvm.org/viewvc/llvm-project/parallel-libs/trunk/streamexecutor/include/streamexecutor/KernelSpec.h?rev=281417&r1=281416&r2=281417&view=diff
==============================================================================
--- parallel-libs/trunk/streamexecutor/include/streamexecutor/KernelSpec.h (original)
+++ parallel-libs/trunk/streamexecutor/include/streamexecutor/KernelSpec.h Tue Sep 13 18:29:25 2016
@@ -121,12 +121,11 @@ public:
       llvm::StringRef KernelName,
       const llvm::ArrayRef<CUDAPTXInMemorySpec::PTXSpec> SpecList);
 
-  /// Returns a pointer to the PTX code for the requested compute capability.
+  /// Returns a pointer to the PTX code for the greatest compute capability not
+  /// exceeding the requested compute capability.
   ///
-  /// Returns nullptr on failed lookup (if the requested compute capability is
-  /// not available). Matches exactly the specified compute capability. Doesn't
-  /// try to do anything smart like finding the next best compute capability if
-  /// the specified capability cannot be found.
+  /// Returns nullptr on failed lookup (if the requested version is not
+  /// available and no lower versions are available).
   const char *getCode(int ComputeCapabilityMajor,
                       int ComputeCapabilityMinor) const;
 

Modified: parallel-libs/trunk/streamexecutor/lib/KernelSpec.cpp
URL: http://llvm.org/viewvc/llvm-project/parallel-libs/trunk/streamexecutor/lib/KernelSpec.cpp?rev=281417&r1=281416&r2=281417&view=diff
==============================================================================
--- parallel-libs/trunk/streamexecutor/lib/KernelSpec.cpp (original)
+++ parallel-libs/trunk/streamexecutor/lib/KernelSpec.cpp Tue Sep 13 18:29:25 2016
@@ -31,12 +31,13 @@ CUDAPTXInMemorySpec::CUDAPTXInMemorySpec
 
 const char *CUDAPTXInMemorySpec::getCode(int ComputeCapabilityMajor,
                                          int ComputeCapabilityMinor) const {
-  auto PTXIter =
-      PTXByComputeCapability.find(CUDAPTXInMemorySpec::ComputeCapability{
+  auto Iterator =
+      PTXByComputeCapability.upper_bound(CUDAPTXInMemorySpec::ComputeCapability{
           ComputeCapabilityMajor, ComputeCapabilityMinor});
-  if (PTXIter == PTXByComputeCapability.end())
+  if (Iterator == PTXByComputeCapability.begin())
     return nullptr;
-  return PTXIter->second;
+  --Iterator;
+  return Iterator->second;
 }
 
 CUDAFatbinInMemorySpec::CUDAFatbinInMemorySpec(llvm::StringRef KernelName,

Modified: parallel-libs/trunk/streamexecutor/unittests/CoreTests/KernelSpecTest.cpp
URL: http://llvm.org/viewvc/llvm-project/parallel-libs/trunk/streamexecutor/unittests/CoreTests/KernelSpecTest.cpp?rev=281417&r1=281416&r2=281417&view=diff
==============================================================================
--- parallel-libs/trunk/streamexecutor/unittests/CoreTests/KernelSpecTest.cpp (original)
+++ parallel-libs/trunk/streamexecutor/unittests/CoreTests/KernelSpecTest.cpp Tue Sep 13 18:29:25 2016
@@ -30,8 +30,9 @@ TEST(CUDAPTXInMemorySpec, SingleComputeC
   const char *PTXCodeString = "Dummy PTX code";
   se::CUDAPTXInMemorySpec Spec("KernelName", {{{1, 0}, PTXCodeString}});
   EXPECT_EQ("KernelName", Spec.getKernelName());
+  EXPECT_EQ(nullptr, Spec.getCode(0, 5));
   EXPECT_EQ(PTXCodeString, Spec.getCode(1, 0));
-  EXPECT_EQ(nullptr, Spec.getCode(2, 0));
+  EXPECT_EQ(PTXCodeString, Spec.getCode(2, 0));
 }
 
 TEST(CUDAPTXInMemorySpec, TwoComputeCapabilities) {
@@ -40,9 +41,10 @@ TEST(CUDAPTXInMemorySpec, TwoComputeCapa
   se::CUDAPTXInMemorySpec Spec(
       "KernelName", {{{1, 0}, PTXCodeString10}, {{3, 0}, PTXCodeString30}});
   EXPECT_EQ("KernelName", Spec.getKernelName());
+  EXPECT_EQ(nullptr, Spec.getCode(0, 5));
   EXPECT_EQ(PTXCodeString10, Spec.getCode(1, 0));
   EXPECT_EQ(PTXCodeString30, Spec.getCode(3, 0));
-  EXPECT_EQ(nullptr, Spec.getCode(2, 0));
+  EXPECT_EQ(PTXCodeString10, Spec.getCode(2, 0));
 }
 
 TEST(CUDAFatbinInMemorySpec, BasicUsage) {
@@ -89,8 +91,9 @@ TEST(MultiKernelLoaderSpec, Registration
   EXPECT_TRUE(MultiSpec.hasOpenCLTextInMemory());
 
   EXPECT_EQ(KernelName, MultiSpec.getCUDAPTXInMemory().getKernelName());
+  EXPECT_EQ(nullptr, MultiSpec.getCUDAPTXInMemory().getCode(0, 5));
   EXPECT_EQ(PTXCodeString, MultiSpec.getCUDAPTXInMemory().getCode(1, 0));
-  EXPECT_EQ(nullptr, MultiSpec.getCUDAPTXInMemory().getCode(2, 0));
+  EXPECT_EQ(PTXCodeString, MultiSpec.getCUDAPTXInMemory().getCode(2, 0));
 
   EXPECT_EQ(KernelName, MultiSpec.getCUDAFatbinInMemory().getKernelName());
   EXPECT_EQ(FatbinBytes, MultiSpec.getCUDAFatbinInMemory().getBytes());




More information about the Parallel_libs-commits mailing list