[PATCH] D76365: [cuda][hip] Add CUDA builtin surface/texture reference support.

Michael Liao via Phabricator via cfe-commits cfe-commits at lists.llvm.org
Thu Mar 19 15:24:35 PDT 2020


hliao added a comment.

With this revision, the following sample could be compiled with CUDA SDK and almost the same PTX code is generated.

  #include <cuda.h>
  
  texture<float, cudaTextureType2D, cudaReadModeElementType> tex;
  
  #if defined(__clang__)
  struct v4f {
    float x, y, z, w;
  };
  __device__ v4f
  tex_2d_ld(texture<float, cudaTextureType2D, cudaReadModeElementType>,
            float, float) asm("llvm.nvvm.tex.unified.2d.v4f32.f32");
  
  template <typename T>
  static inline __device__ T
  tex2D(texture<T, cudaTextureType2D, cudaReadModeElementType> t,
        float x, float y) {
    return tex_2d_ld(t, x, y).x;
  }
  #endif
  
  __device__ float foo(float x, float y) { return tex2D(tex, x, y); }

Note that, clang-based one needs defining texture fetch functions as they could not be reused from CUDA SDK. That part is enclosed with `#if defined(__clang__)`.

Here's the PTX code generated from NVCC. ``

  kernel.ptx
  //
  // Generated by NVIDIA NVVM Compiler
  //
  // Compiler Build ID: CL-27506705
  // Cuda compilation tools, release 10.2, V10.2.89
  // Based on LLVM 3.4svn
  //
  
  .version 6.5
  .target sm_30
  .address_size 64
  
          // .globl       _Z3fooff
  .visible .global .texref tex;
  
  .visible .func  (.param .b32 func_retval0) _Z3fooff(
          .param .b32 _Z3fooff_param_0,
          .param .b32 _Z3fooff_param_1
  )
  {
          .reg .f32       %f<7>;
          .reg .b64       %rd<2>;
  
  
          ld.param.f32    %f1, [_Z3fooff_param_0];
          ld.param.f32    %f2, [_Z3fooff_param_1];
          tex.2d.v4.f32.f32       {%f3, %f4, %f5, %f6}, [tex, {%f1, %f2}];
          st.param.f32    [func_retval0+0], %f3;
          ret;
  }

Here's the PTX code generated from Clang and LLVM backend. `clang --cuda-device-only --cuda-gpu-arch=sm_30 -O2 -S kernel.cu`

  kernel-cuda-nvptx64-nvidia-cuda-sm_30.s
  //
  // Generated by LLVM NVPTX Back-End
  //
  
  .version 6.4
  .target sm_30
  .address_size 64
  
          // .globl       _Z3fooff
  .visible .global .texref tex;
  
  .visible .func  (.param .b32 func_retval0) _Z3fooff(
          .param .b32 _Z3fooff_param_0,
          .param .b32 _Z3fooff_param_1
  )
  {
          .reg .f32       %f<7>;
          .reg .b64       %rd<2>;
  
          ld.param.f32    %f1, [_Z3fooff_param_0];
          ld.param.f32    %f2, [_Z3fooff_param_1];
          mov.u64         %rd1, tex;
          tex.2d.v4.f32.f32       {%f3, %f4, %f5, %f6}, [%rd1, {%f1, %f2}];
          st.param.f32    [func_retval0+0], %f3;
          ret;
  
  }


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D76365/new/

https://reviews.llvm.org/D76365





More information about the cfe-commits mailing list