[Parallel_libs-commits] [PATCH] D24531: [SE] KernelSpec return best PTX

Jason Henline via Parallel_libs-commits parallel_libs-commits at lists.llvm.org
Tue Sep 13 16:24:05 PDT 2016


jhen created this revision.
jhen added a reviewer: jlebar.
jhen added subscribers: parallel_libs-commits, jprice.
Herald added a subscriber: jlebar.

Before, the kernel spec would only return PTX for exactly the requested
compute capability. With this patch it will now return the PTX with the
largest compute capability that does not exceed that requested compute
capability.

https://reviews.llvm.org/D24531

Files:
  streamexecutor/include/streamexecutor/KernelSpec.h
  streamexecutor/lib/KernelSpec.cpp
  streamexecutor/unittests/CoreTests/KernelSpecTest.cpp

Index: streamexecutor/unittests/CoreTests/KernelSpecTest.cpp
===================================================================
--- streamexecutor/unittests/CoreTests/KernelSpecTest.cpp
+++ streamexecutor/unittests/CoreTests/KernelSpecTest.cpp
@@ -30,19 +30,21 @@
   const char *PTXCodeString = "Dummy PTX code";
   se::CUDAPTXInMemorySpec Spec("KernelName", {{{1, 0}, PTXCodeString}});
   EXPECT_EQ("KernelName", Spec.getKernelName());
+  EXPECT_EQ(nullptr, Spec.getCode(0, 5));
   EXPECT_EQ(PTXCodeString, Spec.getCode(1, 0));
-  EXPECT_EQ(nullptr, Spec.getCode(2, 0));
+  EXPECT_EQ(PTXCodeString, Spec.getCode(2, 0));
 }
 
 TEST(CUDAPTXInMemorySpec, TwoComputeCapabilities) {
   const char *PTXCodeString10 = "Dummy PTX code 10";
   const char *PTXCodeString30 = "Dummy PTX code 30";
   se::CUDAPTXInMemorySpec Spec(
       "KernelName", {{{1, 0}, PTXCodeString10}, {{3, 0}, PTXCodeString30}});
   EXPECT_EQ("KernelName", Spec.getKernelName());
+  EXPECT_EQ(nullptr, Spec.getCode(0, 5));
   EXPECT_EQ(PTXCodeString10, Spec.getCode(1, 0));
   EXPECT_EQ(PTXCodeString30, Spec.getCode(3, 0));
-  EXPECT_EQ(nullptr, Spec.getCode(2, 0));
+  EXPECT_EQ(PTXCodeString10, Spec.getCode(2, 0));
 }
 
 TEST(CUDAFatbinInMemorySpec, BasicUsage) {
@@ -89,8 +91,9 @@
   EXPECT_TRUE(MultiSpec.hasOpenCLTextInMemory());
 
   EXPECT_EQ(KernelName, MultiSpec.getCUDAPTXInMemory().getKernelName());
+  EXPECT_EQ(nullptr, MultiSpec.getCUDAPTXInMemory().getCode(0, 5));
   EXPECT_EQ(PTXCodeString, MultiSpec.getCUDAPTXInMemory().getCode(1, 0));
-  EXPECT_EQ(nullptr, MultiSpec.getCUDAPTXInMemory().getCode(2, 0));
+  EXPECT_EQ(PTXCodeString, MultiSpec.getCUDAPTXInMemory().getCode(2, 0));
 
   EXPECT_EQ(KernelName, MultiSpec.getCUDAFatbinInMemory().getKernelName());
   EXPECT_EQ(FatbinBytes, MultiSpec.getCUDAFatbinInMemory().getBytes());
Index: streamexecutor/lib/KernelSpec.cpp
===================================================================
--- streamexecutor/lib/KernelSpec.cpp
+++ streamexecutor/lib/KernelSpec.cpp
@@ -31,12 +31,13 @@
 
 const char *CUDAPTXInMemorySpec::getCode(int ComputeCapabilityMajor,
                                          int ComputeCapabilityMinor) const {
-  auto PTXIter =
-      PTXByComputeCapability.find(CUDAPTXInMemorySpec::ComputeCapability{
+  auto Iterator =
+      PTXByComputeCapability.upper_bound(CUDAPTXInMemorySpec::ComputeCapability{
           ComputeCapabilityMajor, ComputeCapabilityMinor});
-  if (PTXIter == PTXByComputeCapability.end())
+  if (Iterator == PTXByComputeCapability.begin())
     return nullptr;
-  return PTXIter->second;
+  --Iterator;
+  return Iterator->second;
 }
 
 CUDAFatbinInMemorySpec::CUDAFatbinInMemorySpec(llvm::StringRef KernelName,
Index: streamexecutor/include/streamexecutor/KernelSpec.h
===================================================================
--- streamexecutor/include/streamexecutor/KernelSpec.h
+++ streamexecutor/include/streamexecutor/KernelSpec.h
@@ -121,12 +121,11 @@
       llvm::StringRef KernelName,
       const llvm::ArrayRef<CUDAPTXInMemorySpec::PTXSpec> SpecList);
 
-  /// Returns a pointer to the PTX code for the requested compute capability.
+  /// Returns a pointer to the PTX code for the greatest compute capability not
+  /// exceeding the requested compute capability.
   ///
-  /// Returns nullptr on failed lookup (if the requested compute capability is
-  /// not available). Matches exactly the specified compute capability. Doesn't
-  /// try to do anything smart like finding the next best compute capability if
-  /// the specified capability cannot be found.
+  /// Returns nullptr on failed lookup (if the requested version is not
+  /// available and no lower versions are available).
   const char *getCode(int ComputeCapabilityMajor,
                       int ComputeCapabilityMinor) const;
 


-------------- next part --------------
A non-text attachment was scrubbed...
Name: D24531.71258.patch
Type: text/x-patch
Size: 3815 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/parallel_libs-commits/attachments/20160913/db6ced7f/attachment.bin>


More information about the Parallel_libs-commits mailing list