[PATCH] D76948: [cuda][hip] Add CUDA builtin surface/texture reference support.
Michael Liao via Phabricator via cfe-commits
cfe-commits at lists.llvm.org
Fri Mar 27 13:10:14 PDT 2020
hliao created this revision.
hliao added reviewers: tra, rjmccall, yaxunl.
Herald added a reviewer: a.sidorin.
Herald added a project: clang.
Herald added a subscriber: cfe-commits.
hliao added a comment.
This's revised change from https://reviews.llvm.org/D76365 after fixing Sema checks on the template partial specialization. With this change, I could compile the following sample code using surface reference.
kernel.cu
#include <cuda.h>
surface<void, cudaSurfaceType2D> surf;
#if defined(__clang__)
__device__ int
suld_2d_trap(surface<void, cudaSurfaceType2D>, int, int) asm("llvm.nvvm.suld.2d.i32.trap");
template <typename T>
static inline __device__ T
surf2Dread(surface<void, cudaSurfaceType2D> s, int x, int y) {
// By default, `surf2Dread` uses trap mode.
return suld_2d_trap(s, x, y);
}
#endif
__device__ int foo(int x, int y) { return surf2Dread<int>(surf, x, y); }
With NVCC, it generates
`kernel.ptx` after `nvcc --ptx -rdc=true kernel.cu`
//
// Generated by NVIDIA NVVM Compiler
//
// Compiler Build ID: CL-27506705
// Cuda compilation tools, release 10.2, V10.2.89
// Based on LLVM 3.4svn
//
.version 6.5
.target sm_30
.address_size 64
// .globl _Z3fooii
.visible .global .surfref surf;
.visible .func (.param .b32 func_retval0) _Z3fooii(
.param .b32 _Z3fooii_param_0,
.param .b32 _Z3fooii_param_1
)
{
.reg .b32 %r<4>;
.reg .b64 %rd<2>;
ld.param.u32 %r1, [_Z3fooii_param_0];
ld.param.u32 %r2, [_Z3fooii_param_1];
suld.b.2d.b32.trap {%r3}, [surf, {%r1, %r2}];
st.param.b32 [func_retval0+0], %r3;
ret;
}
With Clang, it generates
`kernel-cuda-nvptx64-nvidia-cuda-sm_30.s` after `clang --cuda-device-only --cuda-gpu-arch=sm_30 -O2 -S kernel.cu`
//
// Generated by LLVM NVPTX Back-End
//
.version 6.4
.target sm_30
.address_size 64
// .globl _Z3fooii
.visible .global .surfref surf;
.visible .func (.param .b32 func_retval0) _Z3fooii(
.param .b32 _Z3fooii_param_0,
.param .b32 _Z3fooii_param_1
)
{
.reg .b32 %r<4>;
.reg .b64 %rd<2>;
ld.param.u32 %r1, [_Z3fooii_param_0];
ld.param.u32 %r2, [_Z3fooii_param_1];
mov.u64 %rd1, surf;
suld.b.2d.b32.trap {%r3}, [%rd1, {%r1, %r2}];
st.param.b32 [func_retval0+0], %r3;
ret;
}
- Re-commit after fix Sema checks on partial template specialization.
Repository:
rG LLVM Github Monorepo
https://reviews.llvm.org/D76948
Files:
clang/include/clang/AST/Type.h
clang/include/clang/Basic/Attr.td
clang/include/clang/Basic/AttrDocs.td
clang/include/clang/Basic/DiagnosticSemaKinds.td
clang/lib/AST/Type.cpp
clang/lib/CodeGen/CGCUDANV.cpp
clang/lib/CodeGen/CGCUDARuntime.h
clang/lib/CodeGen/CGExprAgg.cpp
clang/lib/CodeGen/CodeGenModule.cpp
clang/lib/CodeGen/CodeGenTypes.cpp
clang/lib/CodeGen/TargetInfo.cpp
clang/lib/CodeGen/TargetInfo.h
clang/lib/Headers/__clang_cuda_runtime_wrapper.h
clang/lib/Sema/SemaDeclAttr.cpp
clang/lib/Sema/SemaDeclCXX.cpp
clang/test/CodeGenCUDA/surface.cu
clang/test/CodeGenCUDA/texture.cu
clang/test/Misc/pragma-attribute-supported-attributes-list.test
clang/test/SemaCUDA/attr-declspec.cu
clang/test/SemaCUDA/attributes-on-non-cuda.cu
clang/test/SemaCUDA/bad-attributes.cu
llvm/include/llvm/IR/Operator.h
-------------- next part --------------
A non-text attachment was scrubbed...
Name: D76948.253201.patch
Type: text/x-patch
Size: 46338 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/cfe-commits/attachments/20200327/41100379/attachment-0001.bin>
More information about the cfe-commits
mailing list