[PATCH] D76948: [cuda][hip] Add CUDA builtin surface/texture reference support.
Michael Liao via Phabricator via cfe-commits
cfe-commits at lists.llvm.org
Fri Mar 27 13:10:15 PDT 2020
hliao added a comment.
This's revised change from https://reviews.llvm.org/D76365 after fixing Sema checks on the template partial specialization. With this change, I could compile the following sample code using surface reference.
kernel.cu
#include <cuda.h>
surface<void, cudaSurfaceType2D> surf;
#if defined(__clang__)
__device__ int
suld_2d_trap(surface<void, cudaSurfaceType2D>, int, int) asm("llvm.nvvm.suld.2d.i32.trap");
template <typename T>
static inline __device__ T
surf2Dread(surface<void, cudaSurfaceType2D> s, int x, int y) {
// By default, `surf2Dread` uses trap mode.
return suld_2d_trap(s, x, y);
}
#endif
__device__ int foo(int x, int y) { return surf2Dread<int>(surf, x, y); }
With NVCC, it generates
`kernel.ptx` after `nvcc --ptx -rdc=true kernel.cu`
//
// Generated by NVIDIA NVVM Compiler
//
// Compiler Build ID: CL-27506705
// Cuda compilation tools, release 10.2, V10.2.89
// Based on LLVM 3.4svn
//
.version 6.5
.target sm_30
.address_size 64
// .globl _Z3fooii
.visible .global .surfref surf;
.visible .func (.param .b32 func_retval0) _Z3fooii(
.param .b32 _Z3fooii_param_0,
.param .b32 _Z3fooii_param_1
)
{
.reg .b32 %r<4>;
.reg .b64 %rd<2>;
ld.param.u32 %r1, [_Z3fooii_param_0];
ld.param.u32 %r2, [_Z3fooii_param_1];
suld.b.2d.b32.trap {%r3}, [surf, {%r1, %r2}];
st.param.b32 [func_retval0+0], %r3;
ret;
}
With Clang, it generates
`kernel-cuda-nvptx64-nvidia-cuda-sm_30.s` after `clang --cuda-device-only --cuda-gpu-arch=sm_30 -O2 -S kernel.cu`
//
// Generated by LLVM NVPTX Back-End
//
.version 6.4
.target sm_30
.address_size 64
// .globl _Z3fooii
.visible .global .surfref surf;
.visible .func (.param .b32 func_retval0) _Z3fooii(
.param .b32 _Z3fooii_param_0,
.param .b32 _Z3fooii_param_1
)
{
.reg .b32 %r<4>;
.reg .b64 %rd<2>;
ld.param.u32 %r1, [_Z3fooii_param_0];
ld.param.u32 %r2, [_Z3fooii_param_1];
mov.u64 %rd1, surf;
suld.b.2d.b32.trap {%r3}, [%rd1, {%r1, %r2}];
st.param.b32 [func_retval0+0], %r3;
ret;
}
Repository:
rG LLVM Github Monorepo
CHANGES SINCE LAST ACTION
https://reviews.llvm.org/D76948/new/
https://reviews.llvm.org/D76948
More information about the cfe-commits
mailing list