[PATCH] D76948: [cuda][hip] Add CUDA builtin surface/texture reference support.

Michael Liao via Phabricator via cfe-commits cfe-commits at lists.llvm.org
Fri Mar 27 13:10:15 PDT 2020


hliao added a comment.

This's revised change from https://reviews.llvm.org/D76365 after fixing Sema checks on the template partial specialization. With this change, I could compile the following sample code using surface reference.

kernel.cu

  #include <cuda.h>
  
  surface<void, cudaSurfaceType2D> surf;
  
  #if defined(__clang__)
  __device__ int
  suld_2d_trap(surface<void, cudaSurfaceType2D>, int, int) asm("llvm.nvvm.suld.2d.i32.trap");
  
  template <typename T>
  static inline __device__ T
  surf2Dread(surface<void, cudaSurfaceType2D> s, int x, int y) {
    // By default, `surf2Dread` uses trap mode.
    return suld_2d_trap(s, x, y);
  }
  #endif
  
  __device__ int foo(int x, int y) { return surf2Dread<int>(surf, x, y); }

With NVCC, it generates

`kernel.ptx` after `nvcc --ptx -rdc=true kernel.cu`

  //
  // Generated by NVIDIA NVVM Compiler
  //
  // Compiler Build ID: CL-27506705
  // Cuda compilation tools, release 10.2, V10.2.89
  // Based on LLVM 3.4svn
  //
  
  .version 6.5
  .target sm_30
  .address_size 64
  
          // .globl       _Z3fooii
  .visible .global .surfref surf;
  
  .visible .func  (.param .b32 func_retval0) _Z3fooii(
          .param .b32 _Z3fooii_param_0,
          .param .b32 _Z3fooii_param_1
  )
  {
          .reg .b32       %r<4>;
          .reg .b64       %rd<2>;
  
  
          ld.param.u32    %r1, [_Z3fooii_param_0];
          ld.param.u32    %r2, [_Z3fooii_param_1];
          suld.b.2d.b32.trap {%r3}, [surf, {%r1, %r2}];
          st.param.b32    [func_retval0+0], %r3;
          ret;
  }

With Clang, it generates

`kernel-cuda-nvptx64-nvidia-cuda-sm_30.s` after `clang --cuda-device-only --cuda-gpu-arch=sm_30 -O2 -S kernel.cu`

  //
  // Generated by LLVM NVPTX Back-End
  //
  
  .version 6.4
  .target sm_30
  .address_size 64
  
          // .globl       _Z3fooii
  .visible .global .surfref surf;
  
  .visible .func  (.param .b32 func_retval0) _Z3fooii(
          .param .b32 _Z3fooii_param_0,
          .param .b32 _Z3fooii_param_1
  )
  {
          .reg .b32       %r<4>;
          .reg .b64       %rd<2>;
  
          ld.param.u32    %r1, [_Z3fooii_param_0];
          ld.param.u32    %r2, [_Z3fooii_param_1];
          mov.u64         %rd1, surf;
          suld.b.2d.b32.trap {%r3}, [%rd1, {%r1, %r2}];
          st.param.b32    [func_retval0+0], %r3;
          ret;
  
  }


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D76948/new/

https://reviews.llvm.org/D76948





More information about the cfe-commits mailing list