[Parallel_libs-commits] [parallel-libs] r281424 - [SE] Pack global dev handle addresses

Jason Henline via Parallel_libs-commits parallel_libs-commits at lists.llvm.org
Tue Sep 13 16:59:10 PDT 2016


Author: jhen
Date: Tue Sep 13 18:59:10 2016
New Revision: 281424

URL: http://llvm.org/viewvc/llvm-project?rev=281424&view=rev
Log:
[SE] Pack global dev handle addresses

Summary:
We were packing global device memory handles in
`PackedKernelArgumentArray`, but as I was implementing the CUDA
platform, I realized that CUDA wants the address of the handle, not the
handle itself. So this patch switches to packing the address of the
handle.

Reviewers: jlebar

Subscribers: jprice, jlebar, parallel_libs-commits

Differential Revision: https://reviews.llvm.org/D24528

Modified:
    parallel-libs/trunk/streamexecutor/examples/HostSaxpy.cpp
    parallel-libs/trunk/streamexecutor/include/streamexecutor/DeviceMemory.h
    parallel-libs/trunk/streamexecutor/include/streamexecutor/PackedKernelArgumentArray.h
    parallel-libs/trunk/streamexecutor/unittests/CoreTests/PackedKernelArgumentArrayTest.cpp

Modified: parallel-libs/trunk/streamexecutor/examples/HostSaxpy.cpp
URL: http://llvm.org/viewvc/llvm-project/parallel-libs/trunk/streamexecutor/examples/HostSaxpy.cpp?rev=281424&r1=281423&r2=281424&view=diff
==============================================================================
--- parallel-libs/trunk/streamexecutor/examples/HostSaxpy.cpp (original)
+++ parallel-libs/trunk/streamexecutor/examples/HostSaxpy.cpp Tue Sep 13 18:59:10 2016
@@ -33,8 +33,8 @@ using SaxpyKernel =
 // Wrapper function converts argument addresses to arguments.
 void SaxpyWrapper(const void *const *ArgumentAddresses) {
   Saxpy(*static_cast<const float *>(ArgumentAddresses[0]),
-        static_cast<float *>(const_cast<void *>(ArgumentAddresses[1])),
-        static_cast<float *>(const_cast<void *>(ArgumentAddresses[2])),
+        *static_cast<float **>(const_cast<void *>(ArgumentAddresses[1])),
+        *static_cast<float **>(const_cast<void *>(ArgumentAddresses[2])),
         *static_cast<const size_t *>(ArgumentAddresses[3]));
 }
 

Modified: parallel-libs/trunk/streamexecutor/include/streamexecutor/DeviceMemory.h
URL: http://llvm.org/viewvc/llvm-project/parallel-libs/trunk/streamexecutor/include/streamexecutor/DeviceMemory.h?rev=281424&r1=281423&r2=281424&view=diff
==============================================================================
--- parallel-libs/trunk/streamexecutor/include/streamexecutor/DeviceMemory.h (original)
+++ parallel-libs/trunk/streamexecutor/include/streamexecutor/DeviceMemory.h Tue Sep 13 18:59:10 2016
@@ -133,6 +133,9 @@ public:
   /// Returns an opaque handle to the underlying memory.
   const void *getHandle() const { return Handle; }
 
+  /// Returns the address of the opaque handle as stored by this object.
+  const void *const *getHandleAddress() const { return &Handle; }
+
   // Cannot copy because the handle must be owned by a single object.
   GlobalDeviceMemoryBase(const GlobalDeviceMemoryBase &) = delete;
   GlobalDeviceMemoryBase &operator=(const GlobalDeviceMemoryBase &) = delete;

Modified: parallel-libs/trunk/streamexecutor/include/streamexecutor/PackedKernelArgumentArray.h
URL: http://llvm.org/viewvc/llvm-project/parallel-libs/trunk/streamexecutor/include/streamexecutor/PackedKernelArgumentArray.h?rev=281424&r1=281423&r2=281424&view=diff
==============================================================================
--- parallel-libs/trunk/streamexecutor/include/streamexecutor/PackedKernelArgumentArray.h (original)
+++ parallel-libs/trunk/streamexecutor/include/streamexecutor/PackedKernelArgumentArray.h Tue Sep 13 18:59:10 2016
@@ -164,31 +164,10 @@ private:
     Types[Index] = KernelArgumentType::VALUE;
   }
 
-  // Pack a GlobalDeviceMemoryBase argument.
-  void PackOneArgument(size_t Index, const GlobalDeviceMemoryBase &Argument) {
-    Addresses[Index] = Argument.getHandle();
-    Sizes[Index] = sizeof(void *);
-    Types[Index] = KernelArgumentType::GLOBAL_DEVICE_MEMORY;
-  }
-
-  // Pack a GlobalDeviceMemoryBase pointer argument.
-  void PackOneArgument(size_t Index, GlobalDeviceMemoryBase *Argument) {
-    Addresses[Index] = Argument->getHandle();
-    Sizes[Index] = sizeof(void *);
-    Types[Index] = KernelArgumentType::GLOBAL_DEVICE_MEMORY;
-  }
-
-  // Pack a const GlobalDeviceMemoryBase pointer argument.
-  void PackOneArgument(size_t Index, const GlobalDeviceMemoryBase *Argument) {
-    Addresses[Index] = Argument->getHandle();
-    Sizes[Index] = sizeof(void *);
-    Types[Index] = KernelArgumentType::GLOBAL_DEVICE_MEMORY;
-  }
-
   // Pack a GlobalDeviceMemory<T> argument.
   template <typename T>
   void PackOneArgument(size_t Index, const GlobalDeviceMemory<T> &Argument) {
-    Addresses[Index] = Argument.getHandle();
+    Addresses[Index] = Argument.getHandleAddress();
     Sizes[Index] = sizeof(void *);
     Types[Index] = KernelArgumentType::GLOBAL_DEVICE_MEMORY;
   }
@@ -196,7 +175,7 @@ private:
   // Pack a GlobalDeviceMemory<T> pointer argument.
   template <typename T>
   void PackOneArgument(size_t Index, GlobalDeviceMemory<T> *Argument) {
-    Addresses[Index] = Argument->getHandle();
+    Addresses[Index] = Argument->getHandleAddress();
     Sizes[Index] = sizeof(void *);
     Types[Index] = KernelArgumentType::GLOBAL_DEVICE_MEMORY;
   }
@@ -204,7 +183,7 @@ private:
   // Pack a const GlobalDeviceMemory<T> pointer argument.
   template <typename T>
   void PackOneArgument(size_t Index, const GlobalDeviceMemory<T> *Argument) {
-    Addresses[Index] = Argument->getHandle();
+    Addresses[Index] = Argument->getHandleAddress();
     Sizes[Index] = sizeof(void *);
     Types[Index] = KernelArgumentType::GLOBAL_DEVICE_MEMORY;
   }

Modified: parallel-libs/trunk/streamexecutor/unittests/CoreTests/PackedKernelArgumentArrayTest.cpp
URL: http://llvm.org/viewvc/llvm-project/parallel-libs/trunk/streamexecutor/unittests/CoreTests/PackedKernelArgumentArrayTest.cpp?rev=281424&r1=281423&r2=281424&view=diff
==============================================================================
--- parallel-libs/trunk/streamexecutor/unittests/CoreTests/PackedKernelArgumentArrayTest.cpp (original)
+++ parallel-libs/trunk/streamexecutor/unittests/CoreTests/PackedKernelArgumentArrayTest.cpp Tue Sep 13 18:59:10 2016
@@ -76,7 +76,7 @@ TEST_F(DeviceMemoryPackingTest, SingleVa
 
 TEST_F(DeviceMemoryPackingTest, SingleTypedGlobal) {
   auto Array = se::make_kernel_argument_pack(TypedGlobal);
-  ExpectEqual(TypedGlobal.getHandle(), sizeof(void *),
+  ExpectEqual(TypedGlobal.getHandleAddress(), sizeof(void *),
               Type::GLOBAL_DEVICE_MEMORY, Array, 0);
   EXPECT_EQ(1u, Array.getArgumentCount());
   EXPECT_EQ(0u, Array.getSharedCount());
@@ -84,7 +84,7 @@ TEST_F(DeviceMemoryPackingTest, SingleTy
 
 TEST_F(DeviceMemoryPackingTest, SingleTypedGlobalPointer) {
   auto Array = se::make_kernel_argument_pack(&TypedGlobal);
-  ExpectEqual(TypedGlobal.getHandle(), sizeof(void *),
+  ExpectEqual(TypedGlobal.getHandleAddress(), sizeof(void *),
               Type::GLOBAL_DEVICE_MEMORY, Array, 0);
   EXPECT_EQ(1u, Array.getArgumentCount());
   EXPECT_EQ(0u, Array.getSharedCount());
@@ -93,7 +93,7 @@ TEST_F(DeviceMemoryPackingTest, SingleTy
 TEST_F(DeviceMemoryPackingTest, SingleConstTypedGlobalPointer) {
   const se::GlobalDeviceMemory<int> *ArgumentPointer = &TypedGlobal;
   auto Array = se::make_kernel_argument_pack(ArgumentPointer);
-  ExpectEqual(TypedGlobal.getHandle(), sizeof(void *),
+  ExpectEqual(TypedGlobal.getHandleAddress(), sizeof(void *),
               Type::GLOBAL_DEVICE_MEMORY, Array, 0);
   EXPECT_EQ(1u, Array.getArgumentCount());
   EXPECT_EQ(0u, Array.getSharedCount());
@@ -131,11 +131,11 @@ TEST_F(DeviceMemoryPackingTest, PackSeve
                                              TypedGlobalPointer, TypedShared,
                                              &TypedShared, TypedSharedPointer);
   ExpectEqual(&Value, sizeof(Value), Type::VALUE, Array, 0);
-  ExpectEqual(TypedGlobal.getHandle(), sizeof(void *),
+  ExpectEqual(TypedGlobal.getHandleAddress(), sizeof(void *),
               Type::GLOBAL_DEVICE_MEMORY, Array, 1);
-  ExpectEqual(TypedGlobal.getHandle(), sizeof(void *),
+  ExpectEqual(TypedGlobal.getHandleAddress(), sizeof(void *),
               Type::GLOBAL_DEVICE_MEMORY, Array, 2);
-  ExpectEqual(TypedGlobal.getHandle(), sizeof(void *),
+  ExpectEqual(TypedGlobal.getHandleAddress(), sizeof(void *),
               Type::GLOBAL_DEVICE_MEMORY, Array, 3);
   ExpectEqual(nullptr, TypedShared.getByteCount(), Type::SHARED_DEVICE_MEMORY,
               Array, 4);




More information about the Parallel_libs-commits mailing list