[PATCH] D76365: [cuda][hip] Add CUDA builtin surface/texture reference support.
Michael Liao via Phabricator via cfe-commits
cfe-commits at lists.llvm.org
Thu Mar 19 15:24:35 PDT 2020
hliao added a comment.
With this revision, the following sample could be compiled with CUDA SDK and almost the same PTX code is generated.
#include <cuda.h>
texture<float, cudaTextureType2D, cudaReadModeElementType> tex;
#if defined(__clang__)
struct v4f {
float x, y, z, w;
};
__device__ v4f
tex_2d_ld(texture<float, cudaTextureType2D, cudaReadModeElementType>,
float, float) asm("llvm.nvvm.tex.unified.2d.v4f32.f32");
template <typename T>
static inline __device__ T
tex2D(texture<T, cudaTextureType2D, cudaReadModeElementType> t,
float x, float y) {
return tex_2d_ld(t, x, y).x;
}
#endif
__device__ float foo(float x, float y) { return tex2D(tex, x, y); }
Note that, clang-based one needs defining texture fetch functions as they could not be reused from CUDA SDK. That part is enclosed with `#if defined(__clang__)`.
Here's the PTX code generated from NVCC. ``
kernel.ptx
//
// Generated by NVIDIA NVVM Compiler
//
// Compiler Build ID: CL-27506705
// Cuda compilation tools, release 10.2, V10.2.89
// Based on LLVM 3.4svn
//
.version 6.5
.target sm_30
.address_size 64
// .globl _Z3fooff
.visible .global .texref tex;
.visible .func (.param .b32 func_retval0) _Z3fooff(
.param .b32 _Z3fooff_param_0,
.param .b32 _Z3fooff_param_1
)
{
.reg .f32 %f<7>;
.reg .b64 %rd<2>;
ld.param.f32 %f1, [_Z3fooff_param_0];
ld.param.f32 %f2, [_Z3fooff_param_1];
tex.2d.v4.f32.f32 {%f3, %f4, %f5, %f6}, [tex, {%f1, %f2}];
st.param.f32 [func_retval0+0], %f3;
ret;
}
Here's the PTX code generated from Clang and LLVM backend. `clang --cuda-device-only --cuda-gpu-arch=sm_30 -O2 -S kernel.cu`
kernel-cuda-nvptx64-nvidia-cuda-sm_30.s
//
// Generated by LLVM NVPTX Back-End
//
.version 6.4
.target sm_30
.address_size 64
// .globl _Z3fooff
.visible .global .texref tex;
.visible .func (.param .b32 func_retval0) _Z3fooff(
.param .b32 _Z3fooff_param_0,
.param .b32 _Z3fooff_param_1
)
{
.reg .f32 %f<7>;
.reg .b64 %rd<2>;
ld.param.f32 %f1, [_Z3fooff_param_0];
ld.param.f32 %f2, [_Z3fooff_param_1];
mov.u64 %rd1, tex;
tex.2d.v4.f32.f32 {%f3, %f4, %f5, %f6}, [%rd1, {%f1, %f2}];
st.param.f32 [func_retval0+0], %f3;
ret;
}
Repository:
rG LLVM Github Monorepo
CHANGES SINCE LAST ACTION
https://reviews.llvm.org/D76365/new/
https://reviews.llvm.org/D76365
More information about the cfe-commits
mailing list