[Openmp-dev] [EXTERNAL] Handling vectors with omp_target_alloc_shared()

Huber, Joseph via Openmp-dev openmp-dev at lists.llvm.org
Mon Oct 18 06:57:24 PDT 2021


This certainly bypasses is_device_ptr​, but it's the same operation fundamentally. If you use LIBOMPTARGET_INFO​=1 you can see that in both cases a 64-bit variable is being copied by-value to the device.
________________________________
From: Itaru Kitayama <itaru.kitayama at gmail.com>
Sent: Sunday, October 17, 2021 3:53 AM
To: Huber, Joseph <huberjn at ornl.gov>
Cc: openmp-dev <openmp-dev at lists.llvm.org>
Subject: Re: [EXTERNAL] [Openmp-dev] Handling vectors with omp_target_alloc_shared()

This worked for me:

#include <cstdlib>
#include <new>
#include <limits>
#include <iostream>
#include <vector>
#include <omp.h>
#include <array>
#include <memory_resource>

extern "C" {
  void* llvm_omp_target_alloc_shared(size_t, int);
}

template <class T>
struct Mallocator
{
  typedef T value_type;

  Mallocator () = default;
  template <class U> constexpr Mallocator (const Mallocator <U>&) noexcept {}

  [[nodiscard]] T* allocate(std::size_t n) {
    if (n > std::numeric_limits<std::size_t>::max() / sizeof(T))
      throw std::bad_array_new_length();

    //if (auto p = static_cast<T*>(std::malloc(n*sizeof(T)))) {
    //if (T* p =
static_cast<T*>(llvm_omp_target_alloc_shared(n*sizeof(T), 0))) {
    if (std::uintptr_t p =
(std::uintptr_t)llvm_omp_target_alloc_shared(n*sizeof(T), 0)) {
      //report(p, n);
      return reinterpret_cast<T*>(p);
    }

    throw std::bad_alloc();
  }

  void deallocate(T* p, std::size_t n) noexcept {
    //report(p, n, 0);
    //std::free(p);
    //omp_target_free(p, 0);
  }

private:
  void report(T* p, std::size_t n, bool alloc = true) const {
    std::cout << (alloc ? "Alloc: " : "Dealloc: ") << sizeof(T)*n
      << " bytes at " << std::hex << std::showbase
      << reinterpret_cast<void*>(p) << std::dec << '\n';
  }
};

template <class T, class U>
bool operator==(const Mallocator <T>&, const Mallocator <U>&) { return true; }
template <class T, class U>
bool operator!=(const Mallocator <T>&, const Mallocator <U>&) { return false; }

//typedef std::vector<int, Mallocator<int> > vector<int>;
class A {
public:
     A();
     double get() {
       return v_[0][0];
     };

private:
   //std::vector<std::array<double,3>, Mallocator< std::array<double,
3> > > v_ = std::vector<std::array<double, 3>,
Mallocator<std::array<double, 3> > >(1024);
   std::vector<std::array<double,3>, Mallocator< std::array<double, 3> > > v_;
};

A::A() {

  v_.reserve(1024);
  for (int i=0;i<1024;i++)
     v_[i].fill(123.4);

}

int main()
{
  std:uintptr_t p = (std::uintptr_t)llvm_omp_target_alloc_shared(sizeof(A), 0);
  new (reinterpret_cast<A*>(p)) A();
#pragma omp target parallel for
  for (int i=0;i < 1024;i++) {
   printf("%f\n", reinterpret_cast<A*>(p)->get());
  }

}

On Fri, Oct 15, 2021 at 8:57 AM Huber, Joseph <huberjn at ornl.gov> wrote:
>
> The pointer value should be copied to the kernel by value, I wouldn't think it would have a large amount of overhead. I don't think there's any other way to tell the runtime to use a device pointer instead of trying to create one using the mapping table. You could try using a single large pointer to a memory region and extracting it from there.
> ________________________________
> From: Itaru Kitayama <itaru.kitayama at gmail.com>
> Sent: Thursday, October 14, 2021 5:30 PM
> To: Huber, Joseph <huberjn at ornl.gov>
> Cc: openmp-dev <openmp-dev at lists.llvm.org>
> Subject: Re: [EXTERNAL] [Openmp-dev] Handling vectors with omp_target_alloc_shared()
>
> Joseph
>
> I’d like to avoid using the is_device_pre clause as my app deals with a neural network consisting of tens of thousands of neurons.
>
> Itaru.
>
> On Fri, Oct 15, 2021 at 2:18 Huber, Joseph <huberjn at ornl.gov> wrote:
>
> You need to specify is_device_ptr in your region so the runtime knows to just copy the pointer into the device. Otherwise, it will assume it is on the host and try to look it up in the device mapping table and get nothing. You can use env LIBOMPTARGET_INFO=-1 to see the difference in the assumed mapping type.
> ________________________________
> From: Openmp-dev <openmp-dev-bounces at lists.llvm.org> on behalf of Itaru Kitayama via Openmp-dev <openmp-dev at lists.llvm.org>
> Sent: Thursday, October 14, 2021 2:37 AM
> To: openmp-dev <openmp-dev at lists.llvm.org>
> Subject: [EXTERNAL] [Openmp-dev] Handling vectors with omp_target_alloc_shared()
>
> Hi,
> My test code below works on host, but not on device, could someone
> give me advice if there's a workaround for this?
>
> #include <cstdlib>
> #include <new>
> #include <limits>
> #include <iostream>
> #include <vector>
> #include <omp.h>
> #include <array>
> #include <memory_resource>
>
> extern "C" {
>   void* llvm_omp_target_alloc_shared(size_t, int);
> }
>
> template <class T>
> struct Mallocator
> {
>   typedef T value_type;
>
>   Mallocator () = default;
>   template <class U> constexpr Mallocator (const Mallocator <U>&) noexcept {}
>
>   [[nodiscard]] T* allocate(std::size_t n) {
>     if (n > std::numeric_limits<std::size_t>::max() / sizeof(T))
>       throw std::bad_array_new_length();
>
>     //if (auto p = static_cast<T*>(std::malloc(n*sizeof(T)))) {
>     //if (T* p =
> static_cast<T*>(llvm_omp_target_alloc_shared(n*sizeof(T), 0))) {
>     if (std::uintptr_t p =
> (std::uintptr_t)llvm_omp_target_alloc_shared(n*sizeof(T), 0)) {
>       //report(p, n);
>       return reinterpret_cast<T*>(p);
>     }
>
>     throw std::bad_alloc();
>   }
>
>   void deallocate(T* p, std::size_t n) noexcept {
>     //report(p, n, 0);
>     //std::free(p);
>     //omp_target_free(p, 0);
>   }
>
> private:
>   void report(T* p, std::size_t n, bool alloc = true) const {
>     std::cout << (alloc ? "Alloc: " : "Dealloc: ") << sizeof(T)*n
>       << " bytes at " << std::hex << std::showbase
>       << reinterpret_cast<void*>(p) << std::dec << '\n';
>   }
> };
>
> template <class T, class U>
> bool operator==(const Mallocator <T>&, const Mallocator <U>&) { return true; }
> template <class T, class U>
> bool operator!=(const Mallocator <T>&, const Mallocator <U>&) { return false; }
>
> //typedef std::vector<int, Mallocator<int> > vector<int>;
> class A {
> public:
>      A();
>      double get() {
>        return this->v_[0][0];
>      };
>      int f() { return reinterpret_cast<int*>(array_)[0]; };
>
> private:
>    std::vector<std::array<double,3>, Mallocator< std::array<double, 3> > > v_;
>    //std::vector<double, Mallocator< double> > v_;
>    std::uintptr_t array_;
> };
>
> A::A() {
>
>   v_.reserve(1024);
>   for (int i=0;i<1024;i++)
>      v_[i].fill(123.4);
>
>
> }
>
> int main()
> {
>   A *p = (A*)llvm_omp_target_alloc_shared(sizeof(A), 0);
>   new (p) A();
>
>   std::vector<std::array<double, 3>, Mallocator<std::array<double,
> 3>>> myv(1024);
>   myv[0].fill(0.256);
> #pragma omp target parallel for
>   for (int i=0;i < 1024;i++) {
>    printf("%f\n", p->get());
>    //printf("%f\n", myv[0][0]);
>   }
>
> }
> _______________________________________________
> Openmp-dev mailing list
> Openmp-dev at lists.llvm.org
> https://lists.llvm.org/cgi-bin/mailman/listinfo/openmp-dev
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.llvm.org/pipermail/openmp-dev/attachments/20211018/2bc44625/attachment-0001.html>


More information about the Openmp-dev mailing list