[Parallel_libs-commits] [PATCH] D24531: [SE] KernelSpec return best PTX

Jason Henline via Parallel_libs-commits parallel_libs-commits at lists.llvm.org
Tue Sep 13 16:37:57 PDT 2016


This revision was automatically updated to reflect the committed changes.
Closed by commit rL281417: [SE] KernelSpec return best PTX (authored by jhen).

Changed prior to commit:
  https://reviews.llvm.org/D24531?vs=71258&id=71264#toc

Repository:
  rL LLVM

https://reviews.llvm.org/D24531

Files:
  parallel-libs/trunk/streamexecutor/include/streamexecutor/KernelSpec.h
  parallel-libs/trunk/streamexecutor/lib/KernelSpec.cpp
  parallel-libs/trunk/streamexecutor/unittests/CoreTests/KernelSpecTest.cpp

Index: parallel-libs/trunk/streamexecutor/lib/KernelSpec.cpp
===================================================================
--- parallel-libs/trunk/streamexecutor/lib/KernelSpec.cpp
+++ parallel-libs/trunk/streamexecutor/lib/KernelSpec.cpp
@@ -31,12 +31,13 @@
 
 const char *CUDAPTXInMemorySpec::getCode(int ComputeCapabilityMajor,
                                          int ComputeCapabilityMinor) const {
-  auto PTXIter =
-      PTXByComputeCapability.find(CUDAPTXInMemorySpec::ComputeCapability{
+  auto Iterator =
+      PTXByComputeCapability.upper_bound(CUDAPTXInMemorySpec::ComputeCapability{
           ComputeCapabilityMajor, ComputeCapabilityMinor});
-  if (PTXIter == PTXByComputeCapability.end())
+  if (Iterator == PTXByComputeCapability.begin())
     return nullptr;
-  return PTXIter->second;
+  --Iterator;
+  return Iterator->second;
 }
 
 CUDAFatbinInMemorySpec::CUDAFatbinInMemorySpec(llvm::StringRef KernelName,
Index: parallel-libs/trunk/streamexecutor/include/streamexecutor/KernelSpec.h
===================================================================
--- parallel-libs/trunk/streamexecutor/include/streamexecutor/KernelSpec.h
+++ parallel-libs/trunk/streamexecutor/include/streamexecutor/KernelSpec.h
@@ -121,12 +121,11 @@
       llvm::StringRef KernelName,
       const llvm::ArrayRef<CUDAPTXInMemorySpec::PTXSpec> SpecList);
 
-  /// Returns a pointer to the PTX code for the requested compute capability.
+  /// Returns a pointer to the PTX code for the greatest compute capability not
+  /// exceeding the requested compute capability.
   ///
-  /// Returns nullptr on failed lookup (if the requested compute capability is
-  /// not available). Matches exactly the specified compute capability. Doesn't
-  /// try to do anything smart like finding the next best compute capability if
-  /// the specified capability cannot be found.
+  /// Returns nullptr on failed lookup (if the requested version is not
+  /// available and no lower versions are available).
   const char *getCode(int ComputeCapabilityMajor,
                       int ComputeCapabilityMinor) const;
 
Index: parallel-libs/trunk/streamexecutor/unittests/CoreTests/KernelSpecTest.cpp
===================================================================
--- parallel-libs/trunk/streamexecutor/unittests/CoreTests/KernelSpecTest.cpp
+++ parallel-libs/trunk/streamexecutor/unittests/CoreTests/KernelSpecTest.cpp
@@ -30,19 +30,21 @@
   const char *PTXCodeString = "Dummy PTX code";
   se::CUDAPTXInMemorySpec Spec("KernelName", {{{1, 0}, PTXCodeString}});
   EXPECT_EQ("KernelName", Spec.getKernelName());
+  EXPECT_EQ(nullptr, Spec.getCode(0, 5));
   EXPECT_EQ(PTXCodeString, Spec.getCode(1, 0));
-  EXPECT_EQ(nullptr, Spec.getCode(2, 0));
+  EXPECT_EQ(PTXCodeString, Spec.getCode(2, 0));
 }
 
 TEST(CUDAPTXInMemorySpec, TwoComputeCapabilities) {
   const char *PTXCodeString10 = "Dummy PTX code 10";
   const char *PTXCodeString30 = "Dummy PTX code 30";
   se::CUDAPTXInMemorySpec Spec(
       "KernelName", {{{1, 0}, PTXCodeString10}, {{3, 0}, PTXCodeString30}});
   EXPECT_EQ("KernelName", Spec.getKernelName());
+  EXPECT_EQ(nullptr, Spec.getCode(0, 5));
   EXPECT_EQ(PTXCodeString10, Spec.getCode(1, 0));
   EXPECT_EQ(PTXCodeString30, Spec.getCode(3, 0));
-  EXPECT_EQ(nullptr, Spec.getCode(2, 0));
+  EXPECT_EQ(PTXCodeString10, Spec.getCode(2, 0));
 }
 
 TEST(CUDAFatbinInMemorySpec, BasicUsage) {
@@ -89,8 +91,9 @@
   EXPECT_TRUE(MultiSpec.hasOpenCLTextInMemory());
 
   EXPECT_EQ(KernelName, MultiSpec.getCUDAPTXInMemory().getKernelName());
+  EXPECT_EQ(nullptr, MultiSpec.getCUDAPTXInMemory().getCode(0, 5));
   EXPECT_EQ(PTXCodeString, MultiSpec.getCUDAPTXInMemory().getCode(1, 0));
-  EXPECT_EQ(nullptr, MultiSpec.getCUDAPTXInMemory().getCode(2, 0));
+  EXPECT_EQ(PTXCodeString, MultiSpec.getCUDAPTXInMemory().getCode(2, 0));
 
   EXPECT_EQ(KernelName, MultiSpec.getCUDAFatbinInMemory().getKernelName());
   EXPECT_EQ(FatbinBytes, MultiSpec.getCUDAFatbinInMemory().getBytes());


-------------- next part --------------
A non-text attachment was scrubbed...
Name: D24531.71264.patch
Type: text/x-patch
Size: 3995 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/parallel_libs-commits/attachments/20160913/b1c34899/attachment.bin>


More information about the Parallel_libs-commits mailing list