[clang] cuda clang: Add support for CUDA surfaces (PR #132883)

Sat Mar 29 19:32:01 PDT 2025

https://github.com/AustinSchuh updated https://github.com/llvm/llvm-project/pull/132883

>From d8ffb5acbd79869476c91433f85488f3088e38fd Mon Sep 17 00:00:00 2001
From: Austin Schuh <austin.linux at gmail.com>
Date: Mon, 24 Mar 2025 21:42:35 -0700
Subject: [PATCH 1/7] Add support for CUDA surfaces

This adds support for all the surface read and write calls to clang.
It extends the pattern used for textures to surfaces too.

I tested this by generating all the various permutations of the calls
and argument types in a python script, compiling them with both clang
and nvcc, and comparing the generated ptx for equivilence.  They all
agree, ignoring register allocation, and some places where Clang does
different memory writes.  An example kernel is:

__global__ void testKernel(cudaSurfaceObject_t surfObj, int x, float2* result)
{
    *result = surf1Dread<float2>(surfObj, x, cudaBoundaryModeZero);
}

Signed-off-by: Austin Schuh <austin.linux at gmail.com>
---
 .../Headers/__clang_cuda_runtime_wrapper.h    |   1 +
 .../Headers/__clang_cuda_texture_intrinsics.h | 419 +++++++++++++++++-
 2 files changed, 418 insertions(+), 2 deletions(-)

diff --git a/clang/lib/Headers/__clang_cuda_runtime_wrapper.h b/clang/lib/Headers/__clang_cuda_runtime_wrapper.h
index d369c86fe1064..8182c961ec32f 100644
--- a/clang/lib/Headers/__clang_cuda_runtime_wrapper.h
+++ b/clang/lib/Headers/__clang_cuda_runtime_wrapper.h
@@ -386,6 +386,7 @@ __host__ __device__ void __nv_tex_surf_handler(const char *name, T *ptr,
 #endif // __cplusplus >= 201103L && CUDA_VERSION >= 9000
 #include "texture_fetch_functions.h"
 #include "texture_indirect_functions.h"
+#include "surface_indirect_functions.h"
 
 // Restore state of __CUDA_ARCH__ and __THROW we had on entry.
 #pragma pop_macro("__CUDA_ARCH__")
diff --git a/clang/lib/Headers/__clang_cuda_texture_intrinsics.h b/clang/lib/Headers/__clang_cuda_texture_intrinsics.h
index a71952211237b..2ea83f66036d4 100644
--- a/clang/lib/Headers/__clang_cuda_texture_intrinsics.h
+++ b/clang/lib/Headers/__clang_cuda_texture_intrinsics.h
@@ -28,6 +28,7 @@
 #pragma push_macro("__Args")
 #pragma push_macro("__ID")
 #pragma push_macro("__IDV")
+#pragma push_macro("__OP_TYPE_SURFACE")
 #pragma push_macro("__IMPL_2DGATHER")
 #pragma push_macro("__IMPL_ALIAS")
 #pragma push_macro("__IMPL_ALIASI")
@@ -45,6 +46,64 @@
 #pragma push_macro("__IMPL_SI")
 #pragma push_macro("__L")
 #pragma push_macro("__STRIP_PARENS")
+#pragma push_macro("__SURF_WRITE_V2")
+#pragma push_macro("__SW_ASM_ARGS")
+#pragma push_macro("__SW_ASM_ARGS1")
+#pragma push_macro("__SW_ASM_ARGS2")
+#pragma push_macro("__SW_ASM_ARGS4")
+#pragma push_macro("__SURF_WRITE_V2")
+#pragma push_macro("__SURF_READ_V2")
+#pragma push_macro("__SW_ASM_ARGS")
+#pragma push_macro("__SW_ASM_ARGS1")
+#pragma push_macro("__SW_ASM_ARGS2")
+#pragma push_macro("__SW_ASM_ARGS4")
+#pragma push_macro("__SURF_READ1D");
+#pragma push_macro("__SURF_READ2D");
+#pragma push_macro("__SURF_READ3D");
+#pragma push_macro("__SURF_READ1DLAYERED");
+#pragma push_macro("__SURF_READ2DLAYERED");
+#pragma push_macro("__SURF_READCUBEMAP");
+#pragma push_macro("__SURF_READCUBEMAPLAYERED");
+#pragma push_macro("__1DV1");
+#pragma push_macro("__1DV2");
+#pragma push_macro("__1DV4");
+#pragma push_macro("__2DV1");
+#pragma push_macro("__2DV2");
+#pragma push_macro("__2DV4");
+#pragma push_macro("__1DLAYERV1");
+#pragma push_macro("__1DLAYERV2");
+#pragma push_macro("__1DLAYERV4");
+#pragma push_macro("__3DV1");
+#pragma push_macro("__3DV2");
+#pragma push_macro("__3DV4");
+#pragma push_macro("__2DLAYERV1");
+#pragma push_macro("__2DLAYERV2");
+#pragma push_macro("__2DLAYERV4");
+#pragma push_macro("__CUBEMAPV1");
+#pragma push_macro("__CUBEMAPV2");
+#pragma push_macro("__CUBEMAPV4");
+#pragma push_macro("__CUBEMAPLAYERV1");
+#pragma push_macro("__CUBEMAPLAYERV2");
+#pragma push_macro("__CUBEMAPLAYERV4");
+#pragma push_macro("__SURF_READXD_ALL");
+#pragma push_macro("__SURF_WRITE1D_V2");
+#pragma push_macro("__SURF_WRITE1DLAYERED_V2");
+#pragma push_macro("__SURF_WRITE2D_V2");
+#pragma push_macro("__SURF_WRITE2DLAYERED_V2");
+#pragma push_macro("__SURF_WRITE3D_V2");
+#pragma push_macro("__SURF_CUBEMAPWRITE_V2");
+#pragma push_macro("__SURF_CUBEMAPLAYEREDWRITE_V2");
+#pragma push_macro("__SURF_WRITEXD_V2_ALL");
+#pragma push_macro("__1DV1");
+#pragma push_macro("__1DV2");
+#pragma push_macro("__1DV4");
+#pragma push_macro("__2DV1");
+#pragma push_macro("__2DV2");
+#pragma push_macro("__2DV4");
+#pragma push_macro("__3DV1");
+#pragma push_macro("__3DV2");
+#pragma push_macro("__3DV4");
+
 
 // Put all functions into anonymous namespace so they have internal linkage.
 // The device-only function here must be internal in order to avoid ODR
@@ -186,6 +245,20 @@ template <class __T> struct __TypeInfoT {
   using __fetch_t = typename __TypeInfoT<__base_t>::__fetch_t;
 };
 
+// Tag structs to distinguish operation types
+struct __texture_op_tag {};
+struct __surface_op_tag {};
+
+// Template specialization to determine operation type based on tag value
+template <class __op>
+struct __op_type_traits {
+  using type = __texture_op_tag;
+};
+
+// Specialize for known surface operation tags
+#define __OP_TYPE_SURFACE(__op) \
+    template <> struct __op_type_traits<__op> { using type = __surface_op_tag; };
+
 // Classes that implement specific texture ops.
 template <class __op> struct __tex_fetch_v4;
 
@@ -649,6 +722,283 @@ template <class __DestT, class __SrcT> struct __convert {
   }
 };
 
+// There are a couple of layers here.  First, __op_type_traits is used to dispatch to either surface write calls, or to
+// the texture read calls.
+//
+// Then, that dispatches to __tex_fetch_impl below, which dispatches by both tag and datatype to the appropriate
+// __surf_read_write_v2.
+// TODO(austin): Do the reads too.
+
+// Mark which of the ids we should be dispatching to surface write calls.
+__OP_TYPE_SURFACE(__ID("__isurf1Dread"));
+__OP_TYPE_SURFACE(__ID("__isurf2Dread"));
+__OP_TYPE_SURFACE(__ID("__isurf3Dread"));
+__OP_TYPE_SURFACE(__ID("__isurf1DLayeredread"));
+__OP_TYPE_SURFACE(__ID("__isurf2DLayeredread"));
+__OP_TYPE_SURFACE(__ID("__isurfCubemapread"));
+__OP_TYPE_SURFACE(__ID("__isurfCubemapLayeredread"));
+__OP_TYPE_SURFACE(__ID("__isurf1Dwrite_v2"));
+__OP_TYPE_SURFACE(__ID("__isurf2Dwrite_v2"));
+__OP_TYPE_SURFACE(__ID("__isurf3Dwrite_v2"));
+__OP_TYPE_SURFACE(__ID("__isurf1DLayeredwrite_v2"));
+__OP_TYPE_SURFACE(__ID("__isurf2DLayeredwrite_v2"));
+__OP_TYPE_SURFACE(__ID("__isurfCubemapwrite_v2"));
+__OP_TYPE_SURFACE(__ID("__isurfCubemapLayeredwrite_v2"));
+
+template <class __op, typename __type>
+struct __surf_read_write_v2;
+
+// For the various write calls, we need to be able to generate variations with different IDs, different numbers of
+// arguments, and different numbers of outputs.
+
+#define __SURF_WRITE_V2(__op, __asm_dim, __asmtype, __type, __index_op_args, __index_args, __index_asm_args,          \
+                        __asm_op_args, __asm_args)                                                                    \
+    template <>                                                                                                       \
+    struct __surf_read_write_v2<__op, __type> {                                                                       \
+        static __device__ void __run(__type *__ptr, cudaSurfaceObject_t obj, __L(__index_args),                       \
+                                     cudaSurfaceBoundaryMode mode) {                                                  \
+            switch (mode) {                                                                                           \
+                case cudaBoundaryModeZero:                                                                            \
+                    asm volatile("sust.b." __asm_dim "." __asmtype ".zero [%0, " __index_op_args "], " __asm_op_args  \
+                                 ";"                                                                                  \
+                                 :                                                                                    \
+                                 : "l"(obj), __L(__index_asm_args), __L(__asm_args));                                 \
+                    break;                                                                                            \
+                case cudaBoundaryModeClamp:                                                                           \
+                    asm volatile("sust.b." __asm_dim "." __asmtype ".clamp [%0, " __index_op_args "], " __asm_op_args \
+                                 ";"                                                                                  \
+                                 :                                                                                    \
+                                 : "l"(obj), __L(__index_asm_args), __L(__asm_args));                                 \
+                    break;                                                                                            \
+                case cudaBoundaryModeTrap:                                                                            \
+                    asm volatile("sust.b." __asm_dim "." __asmtype ".trap [%0, " __index_op_args "], " __asm_op_args  \
+                                 ";"                                                                                  \
+                                 :                                                                                    \
+                                 : "l"(obj), __L(__index_asm_args), __L(__asm_args));                                 \
+                    break;                                                                                            \
+            }                                                                                                         \
+        }                                                                                                             \
+    }
+
+#define __SURF_READ_V2(__op, __asm_dim, __asmtype, __type, __asm_op_args, __asm_args, __index_args, __index_asm_args) \
+    template <>                                                                                                       \
+    struct __surf_read_write_v2<__op, __type> {                                                                       \
+        static __device__ void __run(__type *__ptr, cudaSurfaceObject_t obj, __L(__index_args),                       \
+                                     cudaSurfaceBoundaryMode mode) {                                                  \
+            switch (mode) {                                                                                           \
+                case cudaBoundaryModeZero:                                                                            \
+                    asm("suld.b." __asm_dim "." __asmtype ".zero " __asm_op_args ";"                                  \
+                        : __L(__asm_args)                                                                             \
+                        : "l"(obj), __L(__index_asm_args));                                                           \
+                    break;                                                                                            \
+                case cudaBoundaryModeClamp:                                                                           \
+                    asm("suld.b." __asm_dim "." __asmtype ".clamp " __asm_op_args ";"                                 \
+                        : __L(__asm_args)                                                                             \
+                        : "l"(obj), __L(__index_asm_args));                                                           \
+                    break;                                                                                            \
+                case cudaBoundaryModeTrap:                                                                            \
+                    asm("suld.b." __asm_dim "." __asmtype ".trap " __asm_op_args ";"                                  \
+                        : __L(__asm_args)                                                                             \
+                        : "l"(obj), __L(__index_asm_args));                                                           \
+                    break;                                                                                            \
+            }                                                                                                         \
+        }                                                                                                             \
+    }
+
+// Amazing, the read side should follow the same flow, I just need to change the generated assembly calls, and the rest
+// should fall in line.
+
+#define __SW_ASM_ARGS(__type) (__type(*__ptr))
+#define __SW_ASM_ARGS1(__type) (__type(__ptr->x))
+#define __SW_ASM_ARGS2(__type) (__type(__ptr->x), __type(__ptr->y))
+#define __SW_ASM_ARGS4(__type) (__type(__ptr->x), __type(__ptr->y), __type(__ptr->z), __type(__ptr->w))
+
+#define __SURF_READ1D(__asmtype, __type, __asm_op_args, __asm_args) \
+    __SURF_READ_V2(__ID("__isurf1Dread"), "1d", __asmtype, __type, __asm_op_args, __asm_args, (int x), ("r"(x)))
+#define __SURF_READ2D(__asmtype, __type, __asm_op_args, __asm_args)                                           \
+    __SURF_READ_V2(__ID("__isurf2Dread"), "2d", __asmtype, __type, __asm_op_args, __asm_args, (int x, int y), \
+                   ("r"(x), "r"(y)))
+#define __SURF_READ3D(__asmtype, __type, __asm_op_args, __asm_args)                                                  \
+    __SURF_READ_V2(__ID("__isurf3Dread"), "3d", __asmtype, __type, __asm_op_args, __asm_args, (int x, int y, int z), \
+                   ("r"(x), "r"(y), "r"(z)))
+
+#define __SURF_READ1DLAYERED(__asmtype, __type, __asm_op_args, __asm_args)                            \
+    __SURF_READ_V2(__ID("__isurf1DLayeredread"), "a1d", __asmtype, __type, __asm_op_args, __asm_args, \
+                   (int x, int layer), ("r"(x), "r"(layer)))
+#define __SURF_READ2DLAYERED(__asmtype, __type, __asm_op_args, __asm_args)                            \
+    __SURF_READ_V2(__ID("__isurf2DLayeredread"), "a2d", __asmtype, __type, __asm_op_args, __asm_args, \
+                   (int x, int y, int layer), ("r"(x), "r"(y), "r"(layer)))
+#define __SURF_READCUBEMAP(__asmtype, __type, __asm_op_args, __asm_args)                            \
+    __SURF_READ_V2(__ID("__isurfCubemapread"), "a2d", __asmtype, __type, __asm_op_args, __asm_args, \
+                   (int x, int y, int face), ("r"(x), "r"(y), "r"(face)))
+#define __SURF_READCUBEMAPLAYERED(__asmtype, __type, __asm_op_args, __asm_args)                            \
+    __SURF_READ_V2(__ID("__isurfCubemapLayeredread"), "a2d", __asmtype, __type, __asm_op_args, __asm_args, \
+                   (int x, int y, int layerface), ("r"(x), "r"(y), "r"(layerface)))
+
+#define __1DV1 "{%0}, [%1, {%2}]"
+#define __1DV2 "{%0, %1}, [%2, {%3}]"
+#define __1DV4 "{%0, %1, %2, %3}, [%4, {%5}]"
+
+#define __2DV1 "{%0}, [%1, {%2, %3}]"
+#define __2DV2 "{%0, %1}, [%2, {%3, %4}]"
+#define __2DV4 "{%0, %1, %2, %3}, [%4, {%5, %6}]"
+
+#define __1DLAYERV1 "{%0}, [%1, {%3, %2}]"
+#define __1DLAYERV2 "{%0, %1}, [%2, {%4, %3}]"
+#define __1DLAYERV4 "{%0, %1, %2, %3}, [%4, {%6, %5}]"
+
+#define __3DV1 "{%0}, [%1, {%2, %3, %4, %4}]"
+#define __3DV2 "{%0, %1}, [%2, {%3, %4, %5, %5}]"
+#define __3DV4 "{%0, %1, %2, %4}, [%4, {%5, %6, %7, %7}]"
+
+#define __2DLAYERV1 "{%0}, [%1, {%4, %2, %3, %3}]"
+#define __2DLAYERV2 "{%0, %1}, [%2, {%5, %3, %4, %4}]"
+#define __2DLAYERV4 "{%0, %1, %2, %3}, [%4, {%7, %5, %6, %6}]"
+
+#define __CUBEMAPV1 "{%0}, [%1, {%4, %2, %3, %3}]"
+#define __CUBEMAPV2 "{%0, %1}, [%2, {%5, %3, %4, %4}]"
+#define __CUBEMAPV4 "{%0, %1, %2, %3}, [%4, {%7, %5, %6, %6}]"
+
+#define __CUBEMAPLAYERV1 "{%0}, [%1, {%4, %2, %3, %3}]"
+#define __CUBEMAPLAYERV2 "{%0, %1}, [%2, {%5, %3, %4, %4}]"
+#define __CUBEMAPLAYERV4 "{%0, %1, %2, %3}, [%4, {%7, %5, %6, %6}]"
+
+#define __SURF_READXD_ALL(__xdv1, __xdv2, __xdv4, __surf_readxd_v2)           \
+    __surf_readxd_v2("b8", char, __xdv1, __SW_ASM_ARGS("=h"));                \
+    __surf_readxd_v2("b8", signed char, __xdv1, __SW_ASM_ARGS("=h"));         \
+    __surf_readxd_v2("b8", char1, __xdv1, __SW_ASM_ARGS1("=h"));              \
+    __surf_readxd_v2("b8", unsigned char, __xdv1, __SW_ASM_ARGS("=h"));       \
+    __surf_readxd_v2("b8", uchar1, __xdv1, __SW_ASM_ARGS1("=h"));             \
+    __surf_readxd_v2("b16", short, __xdv1, __SW_ASM_ARGS("=h"));              \
+    __surf_readxd_v2("b16", short1, __xdv1, __SW_ASM_ARGS1("=h"));            \
+    __surf_readxd_v2("b16", unsigned short, __xdv1, __SW_ASM_ARGS("=h"));     \
+    __surf_readxd_v2("b16", ushort1, __xdv1, __SW_ASM_ARGS1("=h"));           \
+    __surf_readxd_v2("b32", int, __xdv1, __SW_ASM_ARGS("=r"));                \
+    __surf_readxd_v2("b32", int1, __xdv1, __SW_ASM_ARGS1("=r"));              \
+    __surf_readxd_v2("b32", unsigned int, __xdv1, __SW_ASM_ARGS("=r"));       \
+    __surf_readxd_v2("b32", uint1, __xdv1, __SW_ASM_ARGS1("=r"));             \
+    __surf_readxd_v2("b64", long long, __xdv1, __SW_ASM_ARGS("=l"));          \
+    __surf_readxd_v2("b64", longlong1, __xdv1, __SW_ASM_ARGS1("=l"));         \
+    __surf_readxd_v2("b64", unsigned long long, __xdv1, __SW_ASM_ARGS("=l")); \
+    __surf_readxd_v2("b64", ulonglong1, __xdv1, __SW_ASM_ARGS1("=l"));        \
+    __surf_readxd_v2("b32", float, __xdv1, __SW_ASM_ARGS("=r"));              \
+    __surf_readxd_v2("b32", float1, __xdv1, __SW_ASM_ARGS1("=r"));            \
+                                                                              \
+    __surf_readxd_v2("v2.b8", char2, __xdv2, __SW_ASM_ARGS2("=h"));           \
+    __surf_readxd_v2("v2.b8", uchar2, __xdv2, __SW_ASM_ARGS2("=h"));          \
+    __surf_readxd_v2("v2.b16", short2, __xdv2, __SW_ASM_ARGS2("=h"));         \
+    __surf_readxd_v2("v2.b16", ushort2, __xdv2, __SW_ASM_ARGS2("=h"));        \
+    __surf_readxd_v2("v2.b32", int2, __xdv2, __SW_ASM_ARGS2("=r"));           \
+    __surf_readxd_v2("v2.b32", uint2, __xdv2, __SW_ASM_ARGS2("=r"));          \
+    __surf_readxd_v2("v2.b64", longlong2, __xdv2, __SW_ASM_ARGS2("=l"));      \
+    __surf_readxd_v2("v2.b64", ulonglong2, __xdv2, __SW_ASM_ARGS2("=l"));     \
+    __surf_readxd_v2("v2.b32", float2, __xdv2, __SW_ASM_ARGS2("=r"));         \
+                                                                              \
+    __surf_readxd_v2("v4.b8", char4, __xdv4, __SW_ASM_ARGS4("=h"));           \
+    __surf_readxd_v2("v4.b8", uchar4, __xdv4, __SW_ASM_ARGS4("=h"));          \
+    __surf_readxd_v2("v4.b16", short4, __xdv4, __SW_ASM_ARGS4("=h"));         \
+    __surf_readxd_v2("v4.b16", ushort4, __xdv4, __SW_ASM_ARGS4("=h"));        \
+    __surf_readxd_v2("v4.b32", int4, __xdv4, __SW_ASM_ARGS4("=r"));           \
+    __surf_readxd_v2("v4.b32", uint4, __xdv4, __SW_ASM_ARGS4("=r"));          \
+    __surf_readxd_v2("v4.b32", float4, __xdv4, __SW_ASM_ARGS4("=r"))
+
+__SURF_READXD_ALL(__1DV1, __1DV2, __1DV4, __SURF_READ1D);
+__SURF_READXD_ALL(__2DV1, __2DV2, __2DV4, __SURF_READ2D);
+__SURF_READXD_ALL(__3DV1, __3DV2, __3DV4, __SURF_READ3D);
+__SURF_READXD_ALL(__1DLAYERV1, __1DLAYERV2, __1DLAYERV4, __SURF_READ1DLAYERED);
+__SURF_READXD_ALL(__2DLAYERV1, __2DLAYERV2, __2DLAYERV4, __SURF_READ2DLAYERED);
+__SURF_READXD_ALL(__CUBEMAPV1, __CUBEMAPV2, __CUBEMAPV4, __SURF_READCUBEMAP);
+__SURF_READXD_ALL(__CUBEMAPLAYERV1, __CUBEMAPLAYERV2, __CUBEMAPLAYERV4, __SURF_READCUBEMAPLAYERED);
+
+
+#define __SURF_WRITE1D_V2(__asmtype, __type, __asm_op_args, __asm_args)                                     \
+    __SURF_WRITE_V2(__ID("__isurf1Dwrite_v2"), "1d", __asmtype, __type, "{%1}", (int x), ("r"(x)), __asm_op_args, \
+                    __asm_args)
+#define __SURF_WRITE1DLAYERED_V2(__asmtype, __type, __asm_op_args, __asm_args)                                  \
+    __SURF_WRITE_V2(__ID("__isurf1DLayeredwrite_v2"), "a1d", __asmtype, __type, "{%2, %1}", (int x, int layer), \
+                    ("r"(x), "r"(layer)), __asm_op_args, __asm_args)
+#define __SURF_WRITE2D_V2(__asmtype, __type, __asm_op_args, __asm_args)                                               \
+    __SURF_WRITE_V2(__ID("__isurf2Dwrite_v2"), "2d", __asmtype, __type, "{%1, %2}", (int x, int y), ("r"(x), "r"(y)), \
+                    __asm_op_args, __asm_args)
+#define __SURF_WRITE2DLAYERED_V2(__asmtype, __type, __asm_op_args, __asm_args)                      \
+    __SURF_WRITE_V2(__ID("__isurf2DLayeredwrite_v2"), "a2d", __asmtype, __type, "{%3, %1, %2, %2}", \
+                    (int x, int y, int layer), ("r"(x), "r"(y), "r"(layer)), __asm_op_args, __asm_args)
+#define __SURF_WRITE3D_V2(__asmtype, __type, __asm_op_args, __asm_args)                                            \
+    __SURF_WRITE_V2(__ID("__isurf3Dwrite_v2"), "3d", __asmtype, __type, "{%1, %2, %3, %3}", (int x, int y, int z), \
+                    ("r"(x), "r"(y), "r"(z)), __asm_op_args, __asm_args)
+
+#define __SURF_CUBEMAPWRITE_V2(__asmtype, __type, __asm_op_args, __asm_args)                      \
+    __SURF_WRITE_V2(__ID("__isurfCubemapwrite_v2"), "a2d", __asmtype, __type, "{%3, %1, %2, %2}", \
+                    (int x, int y, int face), ("r"(x), "r"(y), "r"(face)), __asm_op_args, __asm_args)
+#define __SURF_CUBEMAPLAYEREDWRITE_V2(__asmtype, __type, __asm_op_args, __asm_args)                      \
+    __SURF_WRITE_V2(__ID("__isurfCubemapLayeredwrite_v2"), "a2d", __asmtype, __type, "{%3, %1, %2, %2}", \
+                    (int x, int y, int layerface), ("r"(x), "r"(y), "r"(layerface)), __asm_op_args, __asm_args)
+
+#define __SURF_WRITEXD_V2_ALL(__xdv1, __xdv2, __xdv4, __surf_writexd_v2)      \
+    __surf_writexd_v2("b8", char, __xdv1, __SW_ASM_ARGS("h"));                \
+    __surf_writexd_v2("b8", signed char, __xdv1, __SW_ASM_ARGS("h"));         \
+    __surf_writexd_v2("b8", char1, __xdv1, __SW_ASM_ARGS1("h"));              \
+    __surf_writexd_v2("b8", unsigned char, __xdv1, __SW_ASM_ARGS("h"));       \
+    __surf_writexd_v2("b8", uchar1, __xdv1, __SW_ASM_ARGS1("h"));             \
+    __surf_writexd_v2("b16", short, __xdv1, __SW_ASM_ARGS("h"));              \
+    __surf_writexd_v2("b16", short1, __xdv1, __SW_ASM_ARGS1("h"));            \
+    __surf_writexd_v2("b16", unsigned short, __xdv1, __SW_ASM_ARGS("h"));     \
+    __surf_writexd_v2("b16", ushort1, __xdv1, __SW_ASM_ARGS1("h"));           \
+    __surf_writexd_v2("b32", int, __xdv1, __SW_ASM_ARGS("r"));                \
+    __surf_writexd_v2("b32", int1, __xdv1, __SW_ASM_ARGS1("r"));              \
+    __surf_writexd_v2("b32", unsigned int, __xdv1, __SW_ASM_ARGS("r"));       \
+    __surf_writexd_v2("b32", uint1, __xdv1, __SW_ASM_ARGS1("r"));             \
+    __surf_writexd_v2("b64", long long, __xdv1, __SW_ASM_ARGS("l"));          \
+    __surf_writexd_v2("b64", longlong1, __xdv1, __SW_ASM_ARGS1("l"));         \
+    __surf_writexd_v2("b64", unsigned long long, __xdv1, __SW_ASM_ARGS("l")); \
+    __surf_writexd_v2("b64", ulonglong1, __xdv1, __SW_ASM_ARGS1("l"));        \
+    __surf_writexd_v2("b32", float, __xdv1, __SW_ASM_ARGS("r"));              \
+    __surf_writexd_v2("b32", float1, __xdv1, __SW_ASM_ARGS1("r"));            \
+                                                                              \
+    __surf_writexd_v2("v2.b8", char2, __xdv2, __SW_ASM_ARGS2("h"));           \
+    __surf_writexd_v2("v2.b8", uchar2, __xdv2, __SW_ASM_ARGS2("h"));          \
+    __surf_writexd_v2("v2.b16", short2, __xdv2, __SW_ASM_ARGS2("h"));         \
+    __surf_writexd_v2("v2.b16", ushort2, __xdv2, __SW_ASM_ARGS2("h"));        \
+    __surf_writexd_v2("v2.b32", int2, __xdv2, __SW_ASM_ARGS2("r"));           \
+    __surf_writexd_v2("v2.b32", uint2, __xdv2, __SW_ASM_ARGS2("r"));          \
+    __surf_writexd_v2("v2.b64", longlong2, __xdv2, __SW_ASM_ARGS2("l"));      \
+    __surf_writexd_v2("v2.b64", ulonglong2, __xdv2, __SW_ASM_ARGS2("l"));     \
+    __surf_writexd_v2("v2.b32", float2, __xdv2, __SW_ASM_ARGS2("r"));         \
+                                                                              \
+    __surf_writexd_v2("v4.b8", char4, __xdv4, __SW_ASM_ARGS4("h"));           \
+    __surf_writexd_v2("v4.b8", uchar4, __xdv4, __SW_ASM_ARGS4("h"));          \
+    __surf_writexd_v2("v4.b16", short4, __xdv4, __SW_ASM_ARGS4("h"));         \
+    __surf_writexd_v2("v4.b16", ushort4, __xdv4, __SW_ASM_ARGS4("h"));        \
+    __surf_writexd_v2("v4.b32", int4, __xdv4, __SW_ASM_ARGS4("r"));           \
+    __surf_writexd_v2("v4.b32", uint4, __xdv4, __SW_ASM_ARGS4("r"));          \
+    __surf_writexd_v2("v4.b32", float4, __xdv4, __SW_ASM_ARGS4("r"))
+
+#define __1DV1 "{%2}"
+#define __1DV2 "{%2, %3}"
+#define __1DV4 "{%2, %3, %4, %5}"
+
+#define __2DV1 "{%3}"
+#define __2DV2 "{%3, %4}"
+#define __2DV4 "{%3, %4, %5, %6}"
+
+#define __3DV1 "{%4}"
+#define __3DV2 "{%4, %5}"
+#define __3DV4 "{%4, %5, %6, %7}"
+
+__SURF_WRITEXD_V2_ALL(__1DV1, __1DV2, __1DV4, __SURF_WRITE1D_V2);
+__SURF_WRITEXD_V2_ALL(__2DV1, __2DV2, __2DV4, __SURF_WRITE2D_V2);
+__SURF_WRITEXD_V2_ALL(__3DV1, __3DV2, __3DV4, __SURF_WRITE3D_V2);
+__SURF_WRITEXD_V2_ALL(__2DV1, __2DV2, __2DV4, __SURF_WRITE1DLAYERED_V2);
+__SURF_WRITEXD_V2_ALL(__3DV1, __3DV2, __3DV4, __SURF_WRITE2DLAYERED_V2);
+__SURF_WRITEXD_V2_ALL(__3DV1, __3DV2, __3DV4, __SURF_CUBEMAPWRITE_V2);
+__SURF_WRITEXD_V2_ALL(__3DV1, __3DV2, __3DV4, __SURF_CUBEMAPLAYEREDWRITE_V2);
+
+template <class __op, class __DataT, class... __Args>
+__device__ static void __tex_fetch_impl(__surface_op_tag, __DataT *__ptr, cudaSurfaceObject_t __handle,
+                                        __Args... __args) {
+    __surf_read_write_v2<__op, __DataT>::__run(__ptr, __handle, __args...);
+}
+
 // These are the top-level function overloads the __nv_tex_surf_handler expands
 // to.  Each overload deals with one of the several ways __nv_tex_surf_handler
 // is called by CUDA headers. In the end, each of the overloads does the same
@@ -659,13 +1009,20 @@ template <class __DestT, class __SrcT> struct __convert {
 // __nv_tex_surf_handler("__tex...", &ret, cudaTextureObject_t handle, args...);
 //   Data type and return type are based on ret.
 template <class __op, class __T, class... __Args>
-__device__ static void __tex_fetch(__T *__ptr, cudaTextureObject_t __handle,
-                                   __Args... __args) {
+__device__ static void __tex_fetch_impl(__texture_op_tag, __T *__ptr, cudaTextureObject_t __handle,
+                                        __Args... __args) {
   using __FetchT = typename __TypeInfoT<__T>::__fetch_t;
   *__ptr = __convert<__T, __FetchT>::__run(
       __tex_fetch_v4<__op>::template __run<__FetchT>(__handle, __args...));
 }
 
+template <class __op, class __T, class... __Args>
+__device__ static void __tex_fetch(__T *__ptr, cudaTextureObject_t __handle,
+                                   __Args... __args) {
+  using op_type = typename __op_type_traits<__op>::type;
+  __tex_fetch_impl<__op>(op_type{}, __ptr, __handle, __args...);
+}
+
 #if CUDA_VERSION < 12000
 // texture<> objects get magically converted into a texture reference.  However,
 // there's no way to convert them to cudaTextureObject_t on C++ level. So, we
@@ -722,6 +1079,7 @@ __tex_fetch(__DataT *, __RetT *__ptr,
 #pragma pop_macro("__Args")
 #pragma pop_macro("__ID")
 #pragma pop_macro("__IDV")
+#pragma pop_macro("__OP_TYPE_SURFACE")
 #pragma pop_macro("__IMPL_2DGATHER")
 #pragma pop_macro("__IMPL_ALIAS")
 #pragma pop_macro("__IMPL_ALIASI")
@@ -739,4 +1097,61 @@ __tex_fetch(__DataT *, __RetT *__ptr,
 #pragma pop_macro("__IMPL_SI")
 #pragma pop_macro("__L")
 #pragma pop_macro("__STRIP_PARENS")
+#pragma pop_macro("__SURF_WRITE_V2")
+#pragma pop_macro("__SW_ASM_ARGS")
+#pragma pop_macro("__SW_ASM_ARGS1")
+#pragma pop_macro("__SW_ASM_ARGS2")
+#pragma pop_macro("__SW_ASM_ARGS4")
+#pragma pop_macro("__SURF_WRITE_V2")
+#pragma pop_macro("__SURF_READ_V2")
+#pragma pop_macro("__SW_ASM_ARGS")
+#pragma pop_macro("__SW_ASM_ARGS1")
+#pragma pop_macro("__SW_ASM_ARGS2")
+#pragma pop_macro("__SW_ASM_ARGS4")
+#pragma pop_macro("__SURF_READ1D");
+#pragma pop_macro("__SURF_READ2D");
+#pragma pop_macro("__SURF_READ3D");
+#pragma pop_macro("__SURF_READ1DLAYERED");
+#pragma pop_macro("__SURF_READ2DLAYERED");
+#pragma pop_macro("__SURF_READCUBEMAP");
+#pragma pop_macro("__SURF_READCUBEMAPLAYERED");
+#pragma pop_macro("__1DV1");
+#pragma pop_macro("__1DV2");
+#pragma pop_macro("__1DV4");
+#pragma pop_macro("__2DV1");
+#pragma pop_macro("__2DV2");
+#pragma pop_macro("__2DV4");
+#pragma pop_macro("__1DLAYERV1");
+#pragma pop_macro("__1DLAYERV2");
+#pragma pop_macro("__1DLAYERV4");
+#pragma pop_macro("__3DV1");
+#pragma pop_macro("__3DV2");
+#pragma pop_macro("__3DV4");
+#pragma pop_macro("__2DLAYERV1");
+#pragma pop_macro("__2DLAYERV2");
+#pragma pop_macro("__2DLAYERV4");
+#pragma pop_macro("__CUBEMAPV1");
+#pragma pop_macro("__CUBEMAPV2");
+#pragma pop_macro("__CUBEMAPV4");
+#pragma pop_macro("__CUBEMAPLAYERV1");
+#pragma pop_macro("__CUBEMAPLAYERV2");
+#pragma pop_macro("__CUBEMAPLAYERV4");
+#pragma pop_macro("__SURF_READXD_ALL");
+#pragma pop_macro("__SURF_WRITE1D_V2");
+#pragma pop_macro("__SURF_WRITE1DLAYERED_V2");
+#pragma pop_macro("__SURF_WRITE2D_V2");
+#pragma pop_macro("__SURF_WRITE2DLAYERED_V2");
+#pragma pop_macro("__SURF_WRITE3D_V2");
+#pragma pop_macro("__SURF_CUBEMAPWRITE_V2");
+#pragma pop_macro("__SURF_CUBEMAPLAYEREDWRITE_V2");
+#pragma pop_macro("__SURF_WRITEXD_V2_ALL");
+#pragma pop_macro("__1DV1");
+#pragma pop_macro("__1DV2");
+#pragma pop_macro("__1DV4");
+#pragma pop_macro("__2DV1");
+#pragma pop_macro("__2DV2");
+#pragma pop_macro("__2DV4");
+#pragma pop_macro("__3DV1");
+#pragma pop_macro("__3DV2");
+#pragma pop_macro("__3DV4");
 #endif // __CLANG_CUDA_TEXTURE_INTRINSICS_H__

>From dc5fd8f7a3a5e55bdb15769bd16242903e15354c Mon Sep 17 00:00:00 2001
From: Austin Schuh <austin.linux at gmail.com>
Date: Tue, 25 Mar 2025 15:51:23 -0700
Subject: [PATCH 2/7] Hopefully fix test failures, and fix register mapping
 issue

---
 clang/lib/Headers/__clang_cuda_texture_intrinsics.h | 2 +-
 clang/test/Headers/Inputs/include/cuda.h            | 7 +++++++
 2 files changed, 8 insertions(+), 1 deletion(-)

diff --git a/clang/lib/Headers/__clang_cuda_texture_intrinsics.h b/clang/lib/Headers/__clang_cuda_texture_intrinsics.h
index 2ea83f66036d4..618ac70eefe99 100644
--- a/clang/lib/Headers/__clang_cuda_texture_intrinsics.h
+++ b/clang/lib/Headers/__clang_cuda_texture_intrinsics.h
@@ -849,7 +849,7 @@ struct __surf_read_write_v2;
 
 #define __3DV1 "{%0}, [%1, {%2, %3, %4, %4}]"
 #define __3DV2 "{%0, %1}, [%2, {%3, %4, %5, %5}]"
-#define __3DV4 "{%0, %1, %2, %4}, [%4, {%5, %6, %7, %7}]"
+#define __3DV4 "{%0, %1, %2, %3}, [%4, {%5, %6, %7, %7}]"
 
 #define __2DLAYERV1 "{%0}, [%1, {%4, %2, %3, %3}]"
 #define __2DLAYERV2 "{%0, %1}, [%2, {%5, %3, %4, %4}]"
diff --git a/clang/test/Headers/Inputs/include/cuda.h b/clang/test/Headers/Inputs/include/cuda.h
index d84029759c165..d1c9f32a45fc1 100644
--- a/clang/test/Headers/Inputs/include/cuda.h
+++ b/clang/test/Headers/Inputs/include/cuda.h
@@ -120,12 +120,19 @@ struct double4 {
 };
 
 typedef unsigned long long cudaTextureObject_t;
+typedef unsigned long long cudaSurfaceObject_t;
 
 enum cudaTextureReadMode {
   cudaReadModeNormalizedFloat,
   cudaReadModeElementType
 };
 
+enum cudaSurfaceBoundaryMode {
+  cudaBoundaryModeZero,
+  cudaBoundaryModeClamp,
+  cudaBoundaryModeTrap
+};
+
 enum {
   cudaTextureType1D,
   cudaTextureType2D,

>From 3be9f51f85eafce10ead22abe143eb173a4afd20 Mon Sep 17 00:00:00 2001
From: Austin Schuh <austin.linux at gmail.com>
Date: Tue, 25 Mar 2025 16:40:45 -0700
Subject: [PATCH 3/7] Add 1d variants

---
 clang/test/Headers/Inputs/include/cuda.h | 40 ++++++++++++++++++++++++
 1 file changed, 40 insertions(+)

diff --git a/clang/test/Headers/Inputs/include/cuda.h b/clang/test/Headers/Inputs/include/cuda.h
index d1c9f32a45fc1..40a00b5af295a 100644
--- a/clang/test/Headers/Inputs/include/cuda.h
+++ b/clang/test/Headers/Inputs/include/cuda.h
@@ -25,6 +25,10 @@ __device__ void *operator new[](__SIZE_TYPE__, void *p) { return p; }
 
 #define CUDA_VERSION 10100
 
+struct char1 {
+  char x;
+  __host__ __device__ char1(char x = 0) : x(x) {}
+};
 struct char2 {
   char x, y;
   __host__ __device__ char2(char x = 0, char y = 0) : x(x), y(y) {}
@@ -34,6 +38,10 @@ struct char4 {
   __host__ __device__ char4(char x = 0, char y = 0, char z = 0, char w = 0) : x(x), y(y), z(z), w(w) {}
 };
 
+struct uchar1 {
+  unsigned char x;
+  __host__ __device__ uchar1(unsigned char x = 0) : x(x) {}
+};
 struct uchar2 {
   unsigned char x, y;
   __host__ __device__ uchar2(unsigned char x = 0, unsigned char y = 0) : x(x), y(y) {}
@@ -43,6 +51,10 @@ struct uchar4 {
   __host__ __device__ uchar4(unsigned char x = 0, unsigned char y = 0, unsigned char z = 0, unsigned char w = 0) : x(x), y(y), z(z), w(w) {}
 };
 
+struct short1 {
+  short x;
+  __host__ __device__ short1(short x = 0) : x(x) {}
+};
 struct short2 {
   short x, y;
   __host__ __device__ short2(short x = 0, short y = 0) : x(x), y(y) {}
@@ -52,6 +64,10 @@ struct short4 {
   __host__ __device__ short4(short x = 0, short y = 0, short z = 0, short w = 0) : x(x), y(y), z(z), w(w) {}
 };
 
+struct ushort1 {
+  unsigned short x;
+  __host__ __device__ ushort1(unsigned short x = 0) : x(x) {}
+};
 struct ushort2 {
   unsigned short x, y;
   __host__ __device__ ushort2(unsigned short x = 0, unsigned short y = 0) : x(x), y(y) {}
@@ -61,6 +77,10 @@ struct ushort4 {
   __host__ __device__ ushort4(unsigned short x = 0, unsigned short y = 0, unsigned short z = 0, unsigned short w = 0) : x(x), y(y), z(z), w(w) {}
 };
 
+struct int1 {
+  int x;
+  __host__ __device__ int1(int x = 0) : x(x) {}
+};
 struct int2 {
   int x, y;
   __host__ __device__ int2(int x = 0, int y = 0) : x(x), y(y) {}
@@ -70,6 +90,10 @@ struct int4 {
   __host__ __device__ int4(int x = 0, int y = 0, int z = 0, int w = 0) : x(x), y(y), z(z), w(w) {}
 };
 
+struct uint1 {
+  unsigned x;
+  __host__ __device__ uint1(unsigned x = 0) : x(x) {}
+};
 struct uint2 {
   unsigned x, y;
   __host__ __device__ uint2(unsigned x = 0, unsigned y = 0) : x(x), y(y) {}
@@ -83,6 +107,10 @@ struct uint4 {
   __host__ __device__ uint4(unsigned x = 0, unsigned y = 0, unsigned z = 0, unsigned w = 0) : x(x), y(y), z(z), w(w) {}
 };
 
+struct longlong1 {
+  long long x;
+  __host__ __device__ longlong1(long long x = 0) : x(x) {}
+};
 struct longlong2 {
   long long x, y;
   __host__ __device__ longlong2(long long x = 0, long long y = 0) : x(x), y(y) {}
@@ -92,6 +120,10 @@ struct longlong4 {
   __host__ __device__ longlong4(long long x = 0, long long y = 0, long long z = 0, long long w = 0) : x(x), y(y), z(z), w(w) {}
 };
 
+struct ulonglong1 {
+  unsigned long long x;
+  __host__ __device__ ulonglong1(unsigned long long x = 0) : x(x) {}
+};
 struct ulonglong2 {
   unsigned long long x, y;
   __host__ __device__ ulonglong2(unsigned long long x = 0, unsigned long long y = 0) : x(x), y(y) {}
@@ -101,6 +133,10 @@ struct ulonglong4 {
   __host__ __device__ ulonglong4(unsigned long long x = 0, unsigned long long y = 0, unsigned long long z = 0, unsigned long long w = 0) : x(x), y(y), z(z), w(w) {}
 };
 
+struct float1 {
+  float x;
+  __host__ __device__ float1(float x = 0) : x(x) {}
+};
 struct float2 {
   float x, y;
   __host__ __device__ float2(float x = 0, float y = 0) : x(x), y(y) {}
@@ -110,6 +146,10 @@ struct float4 {
   __host__ __device__ float4(float x = 0, float y = 0, float z = 0, float w = 0) : x(x), y(y), z(z), w(w) {}
 };
 
+struct double1 {
+  double x;
+  __host__ __device__ double1(double x = 0) : x(x) {}
+};
 struct double2 {
   double x, y;
   __host__ __device__ double2(double x = 0, double y = 0) : x(x), y(y) {}

>From 0d2ea2e4454a7f81fddb02c181e9459c1da508e7 Mon Sep 17 00:00:00 2001
From: Austin Schuh <austin.linux at gmail.com>
Date: Tue, 25 Mar 2025 16:52:43 -0700
Subject: [PATCH 4/7] Fix missing test header.

---
 clang/test/Headers/Inputs/include/surface_indirect_functions.h | 2 ++
 1 file changed, 2 insertions(+)
 create mode 100644 clang/test/Headers/Inputs/include/surface_indirect_functions.h

diff --git a/clang/test/Headers/Inputs/include/surface_indirect_functions.h b/clang/test/Headers/Inputs/include/surface_indirect_functions.h
new file mode 100644
index 0000000000000..bffa775cb2822
--- /dev/null
+++ b/clang/test/Headers/Inputs/include/surface_indirect_functions.h
@@ -0,0 +1,2 @@
+// required for __clang_cuda_runtime_wrapper.h tests
+#pragma once

>From e61144bc8392b6bbf8a6ac8579638a3606220981 Mon Sep 17 00:00:00 2001
From: Austin Schuh <austin.linux at gmail.com>
Date: Sat, 29 Mar 2025 19:22:06 -0700
Subject: [PATCH 5/7] Test surfaces

---
 clang/test/CodeGen/nvptx-surface.cu | 3329 +++++++++++++++++++++++++++
 1 file changed, 3329 insertions(+)
 create mode 100644 clang/test/CodeGen/nvptx-surface.cu

diff --git a/clang/test/CodeGen/nvptx-surface.cu b/clang/test/CodeGen/nvptx-surface.cu
new file mode 100644
index 0000000000000..7c42e5d118153
--- /dev/null
+++ b/clang/test/CodeGen/nvptx-surface.cu
@@ -0,0 +1,3329 @@
+// RUN: %clang_cc1 -triple nvptx-unknown-unknown -fcuda-is-device -O3 -o - %s -emit-llvm | FileCheck %s
+// RUN: %clang_cc1 -triple nvptx64-unknown-unknown -fcuda-is-device -O3 -o - %s -emit-llvm | FileCheck %s
+#include "../Headers/Inputs/include/cuda.h"
+
+#include "__clang_cuda_texture_intrinsics.h"
+
+__device__ void surfchar(cudaSurfaceObject_t surf, int x, int y, int z, int layer, int face, int layerface) {
+  char val;
+
+  // CHECK: %0 = tail call i8 asm "suld.b.1d.b8.zero {$0}, [$1, {$2}];", "=h,l,r"
+  __nv_tex_surf_handler("__isurf1Dread", &val, surf, x, cudaBoundaryModeZero);
+  // CHECK: tail call void asm sideeffect "sust.b.1d.b8.zero [$0, {$1}], {$2};", "l,r,h"
+  __nv_tex_surf_handler("__isurf1Dwrite_v2", &val, surf, x, cudaBoundaryModeZero);
+  // CHECK: %1 = tail call i8 asm "suld.b.1d.b8.clamp {$0}, [$1, {$2}];", "=h,l,r"
+  __nv_tex_surf_handler("__isurf1Dread", &val, surf, x, cudaBoundaryModeClamp);
+  // CHECK: tail call void asm sideeffect "sust.b.1d.b8.clamp [$0, {$1}], {$2};", "l,r,h"
+  __nv_tex_surf_handler("__isurf1Dwrite_v2", &val, surf, x, cudaBoundaryModeClamp);
+  // CHECK: %2 = tail call i8 asm "suld.b.1d.b8.trap {$0}, [$1, {$2}];", "=h,l,r"
+  __nv_tex_surf_handler("__isurf1Dread", &val, surf, x, cudaBoundaryModeTrap);
+  // CHECK: tail call void asm sideeffect "sust.b.1d.b8.trap [$0, {$1}], {$2};", "l,r,h"
+  __nv_tex_surf_handler("__isurf1Dwrite_v2", &val, surf, x, cudaBoundaryModeTrap);
+
+  // CHECK: %3 = tail call i8 asm "suld.b.2d.b8.zero {$0}, [$1, {$2, $3}];", "=h,l,r,r"
+  __nv_tex_surf_handler("__isurf2Dread", &val, surf, x, y, cudaBoundaryModeZero);
+  // CHECK: tail call void asm sideeffect "sust.b.2d.b8.zero [$0, {$1, $2}], {$3};", "l,r,r,h"
+  __nv_tex_surf_handler("__isurf2Dwrite_v2", &val, surf, x, y, cudaBoundaryModeZero);
+  // CHECK: %4 = tail call i8 asm "suld.b.2d.b8.clamp {$0}, [$1, {$2, $3}];", "=h,l,r,r"
+  __nv_tex_surf_handler("__isurf2Dread", &val, surf, x, y, cudaBoundaryModeClamp);
+  // CHECK: tail call void asm sideeffect "sust.b.2d.b8.clamp [$0, {$1, $2}], {$3};", "l,r,r,h"
+  __nv_tex_surf_handler("__isurf2Dwrite_v2", &val, surf, x, y, cudaBoundaryModeClamp);
+  // CHECK: %5 = tail call i8 asm "suld.b.2d.b8.trap {$0}, [$1, {$2, $3}];", "=h,l,r,r"
+  __nv_tex_surf_handler("__isurf2Dread", &val, surf, x, y, cudaBoundaryModeTrap);
+  // CHECK: tail call void asm sideeffect "sust.b.2d.b8.trap [$0, {$1, $2}], {$3};", "l,r,r,h"
+  __nv_tex_surf_handler("__isurf2Dwrite_v2", &val, surf, x, y, cudaBoundaryModeTrap);
+
+  // CHECK: %6 = tail call i8 asm "suld.b.3d.b8.zero {$0}, [$1, {$2, $3, $4, $4}];", "=h,l,r,r,r"
+  __nv_tex_surf_handler("__isurf3Dread", &val, surf, x, y, z, cudaBoundaryModeZero);
+  // CHECK: tail call void asm sideeffect "sust.b.3d.b8.zero [$0, {$1, $2, $3, $3}], {$4};", "l,r,r,r,h"
+  __nv_tex_surf_handler("__isurf3Dwrite_v2", &val, surf, x, y, z, cudaBoundaryModeZero);
+  // CHECK: %7 = tail call i8 asm "suld.b.3d.b8.clamp {$0}, [$1, {$2, $3, $4, $4}];", "=h,l,r,r,r"
+  __nv_tex_surf_handler("__isurf3Dread", &val, surf, x, y, z, cudaBoundaryModeClamp);
+  // CHECK: tail call void asm sideeffect "sust.b.3d.b8.clamp [$0, {$1, $2, $3, $3}], {$4};", "l,r,r,r,h"
+  __nv_tex_surf_handler("__isurf3Dwrite_v2", &val, surf, x, y, z, cudaBoundaryModeClamp);
+  // CHECK: %8 = tail call i8 asm "suld.b.3d.b8.trap {$0}, [$1, {$2, $3, $4, $4}];", "=h,l,r,r,r"
+  __nv_tex_surf_handler("__isurf3Dread", &val, surf, x, y, z, cudaBoundaryModeTrap);
+  // CHECK: tail call void asm sideeffect "sust.b.3d.b8.trap [$0, {$1, $2, $3, $3}], {$4};", "l,r,r,r,h"
+  __nv_tex_surf_handler("__isurf3Dwrite_v2", &val, surf, x, y, z, cudaBoundaryModeTrap);
+
+  // CHECK: %9 = tail call i8 asm "suld.b.a1d.b8.zero {$0}, [$1, {$3, $2}];", "=h,l,r,r"
+  __nv_tex_surf_handler("__isurf1DLayeredread", &val, surf, x, layer, cudaBoundaryModeZero);
+  // CHECK: tail call void asm sideeffect "sust.b.a1d.b8.zero [$0, {$2, $1}], {$3};", "l,r,r,h"
+  __nv_tex_surf_handler("__isurf1DLayeredwrite_v2", &val, surf, x, layer, cudaBoundaryModeZero);
+  // CHECK: %10 = tail call i8 asm "suld.b.a1d.b8.clamp {$0}, [$1, {$3, $2}];", "=h,l,r,r"
+  __nv_tex_surf_handler("__isurf1DLayeredread", &val, surf, x, layer, cudaBoundaryModeClamp);
+  // CHECK: tail call void asm sideeffect "sust.b.a1d.b8.clamp [$0, {$2, $1}], {$3};", "l,r,r,h"
+  __nv_tex_surf_handler("__isurf1DLayeredwrite_v2", &val, surf, x, layer, cudaBoundaryModeClamp);
+  // CHECK: %11 = tail call i8 asm "suld.b.a1d.b8.trap {$0}, [$1, {$3, $2}];", "=h,l,r,r"
+  __nv_tex_surf_handler("__isurf1DLayeredread", &val, surf, x, layer, cudaBoundaryModeTrap);
+  // CHECK: tail call void asm sideeffect "sust.b.a1d.b8.trap [$0, {$2, $1}], {$3};", "l,r,r,h"
+  __nv_tex_surf_handler("__isurf1DLayeredwrite_v2", &val, surf, x, layer, cudaBoundaryModeTrap);
+
+  // CHECK: %12 = tail call i8 asm "suld.b.a2d.b8.zero {$0}, [$1, {$4, $2, $3, $3}];", "=h,l,r,r,r"
+  __nv_tex_surf_handler("__isurf2DLayeredread", &val, surf, x, y, layer, cudaBoundaryModeZero);
+  // CHECK: tail call void asm sideeffect "sust.b.a2d.b8.zero [$0, {$3, $1, $2, $2}], {$4};", "l,r,r,r,h"
+  __nv_tex_surf_handler("__isurf2DLayeredwrite_v2", &val, surf, x, y, layer, cudaBoundaryModeZero);
+  // CHECK: %13 = tail call i8 asm "suld.b.a2d.b8.clamp {$0}, [$1, {$4, $2, $3, $3}];", "=h,l,r,r,r"
+  __nv_tex_surf_handler("__isurf2DLayeredread", &val, surf, x, y, layer, cudaBoundaryModeClamp);
+  // CHECK: tail call void asm sideeffect "sust.b.a2d.b8.clamp [$0, {$3, $1, $2, $2}], {$4};", "l,r,r,r,h"
+  __nv_tex_surf_handler("__isurf2DLayeredwrite_v2", &val, surf, x, y, layer, cudaBoundaryModeClamp);
+  // CHECK: %14 = tail call i8 asm "suld.b.a2d.b8.trap {$0}, [$1, {$4, $2, $3, $3}];", "=h,l,r,r,r"
+  __nv_tex_surf_handler("__isurf2DLayeredread", &val, surf, x, y, layer, cudaBoundaryModeTrap);
+  // CHECK: tail call void asm sideeffect "sust.b.a2d.b8.trap [$0, {$3, $1, $2, $2}], {$4};", "l,r,r,r,h"
+  __nv_tex_surf_handler("__isurf2DLayeredwrite_v2", &val, surf, x, y, layer, cudaBoundaryModeTrap);
+
+  // CHECK: %15 = tail call i8 asm "suld.b.a2d.b8.zero {$0}, [$1, {$4, $2, $3, $3}];", "=h,l,r,r,r"
+  __nv_tex_surf_handler("__isurfCubemapread", &val, surf, x, y, face, cudaBoundaryModeZero);
+  // CHECK: tail call void asm sideeffect "sust.b.a2d.b8.zero [$0, {$3, $1, $2, $2}], {$4};", "l,r,r,r,h"
+  __nv_tex_surf_handler("__isurfCubemapwrite_v2", &val, surf, x, y, face, cudaBoundaryModeZero);
+  // CHECK: %16 = tail call i8 asm "suld.b.a2d.b8.clamp {$0}, [$1, {$4, $2, $3, $3}];", "=h,l,r,r,r"
+  __nv_tex_surf_handler("__isurfCubemapread", &val, surf, x, y, face, cudaBoundaryModeClamp);
+  // CHECK: tail call void asm sideeffect "sust.b.a2d.b8.clamp [$0, {$3, $1, $2, $2}], {$4};", "l,r,r,r,h"
+  __nv_tex_surf_handler("__isurfCubemapwrite_v2", &val, surf, x, y, face, cudaBoundaryModeClamp);
+  // CHECK: %17 = tail call i8 asm "suld.b.a2d.b8.trap {$0}, [$1, {$4, $2, $3, $3}];", "=h,l,r,r,r"
+  __nv_tex_surf_handler("__isurfCubemapread", &val, surf, x, y, face, cudaBoundaryModeTrap);
+  // CHECK: tail call void asm sideeffect "sust.b.a2d.b8.trap [$0, {$3, $1, $2, $2}], {$4};", "l,r,r,r,h"
+  __nv_tex_surf_handler("__isurfCubemapwrite_v2", &val, surf, x, y, face, cudaBoundaryModeTrap);
+
+  // CHECK: %18 = tail call i8 asm "suld.b.a2d.b8.zero {$0}, [$1, {$4, $2, $3, $3}];", "=h,l,r,r,r"
+  __nv_tex_surf_handler("__isurfCubemapLayeredread", &val, surf, x, y, layerface, cudaBoundaryModeZero);
+  // CHECK: tail call void asm sideeffect "sust.b.a2d.b8.zero [$0, {$3, $1, $2, $2}], {$4};", "l,r,r,r,h"
+  __nv_tex_surf_handler("__isurfCubemapLayeredwrite_v2", &val, surf, x, y, layerface, cudaBoundaryModeZero);
+  // CHECK: %19 = tail call i8 asm "suld.b.a2d.b8.clamp {$0}, [$1, {$4, $2, $3, $3}];", "=h,l,r,r,r"
+  __nv_tex_surf_handler("__isurfCubemapLayeredread", &val, surf, x, y, layerface, cudaBoundaryModeClamp);
+  // CHECK: tail call void asm sideeffect "sust.b.a2d.b8.clamp [$0, {$3, $1, $2, $2}], {$4};", "l,r,r,r,h"
+  __nv_tex_surf_handler("__isurfCubemapLayeredwrite_v2", &val, surf, x, y, layerface, cudaBoundaryModeClamp);
+  // CHECK: %20 = tail call i8 asm "suld.b.a2d.b8.trap {$0}, [$1, {$4, $2, $3, $3}];", "=h,l,r,r,r"
+  __nv_tex_surf_handler("__isurfCubemapLayeredread", &val, surf, x, y, layerface, cudaBoundaryModeTrap);
+  // CHECK: tail call void asm sideeffect "sust.b.a2d.b8.trap [$0, {$3, $1, $2, $2}], {$4};", "l,r,r,r,h"
+  __nv_tex_surf_handler("__isurfCubemapLayeredwrite_v2", &val, surf, x, y, layerface, cudaBoundaryModeTrap);
+}
+
+__device__ void surfsignedchar(cudaSurfaceObject_t surf, int x, int y, int z, int layer, int face, int layerface) {
+  signed char val;
+
+  // CHECK: %0 = tail call i8 asm "suld.b.1d.b8.zero {$0}, [$1, {$2}];", "=h,l,r"
+  __nv_tex_surf_handler("__isurf1Dread", &val, surf, x, cudaBoundaryModeZero);
+  // CHECK: tail call void asm sideeffect "sust.b.1d.b8.zero [$0, {$1}], {$2};", "l,r,h"
+  __nv_tex_surf_handler("__isurf1Dwrite_v2", &val, surf, x, cudaBoundaryModeZero);
+  // CHECK: %1 = tail call i8 asm "suld.b.1d.b8.clamp {$0}, [$1, {$2}];", "=h,l,r"
+  __nv_tex_surf_handler("__isurf1Dread", &val, surf, x, cudaBoundaryModeClamp);
+  // CHECK: tail call void asm sideeffect "sust.b.1d.b8.clamp [$0, {$1}], {$2};", "l,r,h"
+  __nv_tex_surf_handler("__isurf1Dwrite_v2", &val, surf, x, cudaBoundaryModeClamp);
+  // CHECK: %2 = tail call i8 asm "suld.b.1d.b8.trap {$0}, [$1, {$2}];", "=h,l,r"
+  __nv_tex_surf_handler("__isurf1Dread", &val, surf, x, cudaBoundaryModeTrap);
+  // CHECK: tail call void asm sideeffect "sust.b.1d.b8.trap [$0, {$1}], {$2};", "l,r,h"
+  __nv_tex_surf_handler("__isurf1Dwrite_v2", &val, surf, x, cudaBoundaryModeTrap);
+
+  // CHECK: %3 = tail call i8 asm "suld.b.2d.b8.zero {$0}, [$1, {$2, $3}];", "=h,l,r,r"
+  __nv_tex_surf_handler("__isurf2Dread", &val, surf, x, y, cudaBoundaryModeZero);
+  // CHECK: tail call void asm sideeffect "sust.b.2d.b8.zero [$0, {$1, $2}], {$3};", "l,r,r,h"
+  __nv_tex_surf_handler("__isurf2Dwrite_v2", &val, surf, x, y, cudaBoundaryModeZero);
+  // CHECK: %4 = tail call i8 asm "suld.b.2d.b8.clamp {$0}, [$1, {$2, $3}];", "=h,l,r,r"
+  __nv_tex_surf_handler("__isurf2Dread", &val, surf, x, y, cudaBoundaryModeClamp);
+  // CHECK: tail call void asm sideeffect "sust.b.2d.b8.clamp [$0, {$1, $2}], {$3};", "l,r,r,h"
+  __nv_tex_surf_handler("__isurf2Dwrite_v2", &val, surf, x, y, cudaBoundaryModeClamp);
+  // CHECK: %5 = tail call i8 asm "suld.b.2d.b8.trap {$0}, [$1, {$2, $3}];", "=h,l,r,r"
+  __nv_tex_surf_handler("__isurf2Dread", &val, surf, x, y, cudaBoundaryModeTrap);
+  // CHECK: tail call void asm sideeffect "sust.b.2d.b8.trap [$0, {$1, $2}], {$3};", "l,r,r,h"
+  __nv_tex_surf_handler("__isurf2Dwrite_v2", &val, surf, x, y, cudaBoundaryModeTrap);
+
+  // CHECK: %6 = tail call i8 asm "suld.b.3d.b8.zero {$0}, [$1, {$2, $3, $4, $4}];", "=h,l,r,r,r"
+  __nv_tex_surf_handler("__isurf3Dread", &val, surf, x, y, z, cudaBoundaryModeZero);
+  // CHECK: tail call void asm sideeffect "sust.b.3d.b8.zero [$0, {$1, $2, $3, $3}], {$4};", "l,r,r,r,h"
+  __nv_tex_surf_handler("__isurf3Dwrite_v2", &val, surf, x, y, z, cudaBoundaryModeZero);
+  // CHECK: %7 = tail call i8 asm "suld.b.3d.b8.clamp {$0}, [$1, {$2, $3, $4, $4}];", "=h,l,r,r,r"
+  __nv_tex_surf_handler("__isurf3Dread", &val, surf, x, y, z, cudaBoundaryModeClamp);
+  // CHECK: tail call void asm sideeffect "sust.b.3d.b8.clamp [$0, {$1, $2, $3, $3}], {$4};", "l,r,r,r,h"
+  __nv_tex_surf_handler("__isurf3Dwrite_v2", &val, surf, x, y, z, cudaBoundaryModeClamp);
+  // CHECK: %8 = tail call i8 asm "suld.b.3d.b8.trap {$0}, [$1, {$2, $3, $4, $4}];", "=h,l,r,r,r"
+  __nv_tex_surf_handler("__isurf3Dread", &val, surf, x, y, z, cudaBoundaryModeTrap);
+  // CHECK: tail call void asm sideeffect "sust.b.3d.b8.trap [$0, {$1, $2, $3, $3}], {$4};", "l,r,r,r,h"
+  __nv_tex_surf_handler("__isurf3Dwrite_v2", &val, surf, x, y, z, cudaBoundaryModeTrap);
+
+  // CHECK: %9 = tail call i8 asm "suld.b.a1d.b8.zero {$0}, [$1, {$3, $2}];", "=h,l,r,r"
+  __nv_tex_surf_handler("__isurf1DLayeredread", &val, surf, x, layer, cudaBoundaryModeZero);
+  // CHECK: tail call void asm sideeffect "sust.b.a1d.b8.zero [$0, {$2, $1}], {$3};", "l,r,r,h"
+  __nv_tex_surf_handler("__isurf1DLayeredwrite_v2", &val, surf, x, layer, cudaBoundaryModeZero);
+  // CHECK: %10 = tail call i8 asm "suld.b.a1d.b8.clamp {$0}, [$1, {$3, $2}];", "=h,l,r,r"
+  __nv_tex_surf_handler("__isurf1DLayeredread", &val, surf, x, layer, cudaBoundaryModeClamp);
+  // CHECK: tail call void asm sideeffect "sust.b.a1d.b8.clamp [$0, {$2, $1}], {$3};", "l,r,r,h"
+  __nv_tex_surf_handler("__isurf1DLayeredwrite_v2", &val, surf, x, layer, cudaBoundaryModeClamp);
+  // CHECK: %11 = tail call i8 asm "suld.b.a1d.b8.trap {$0}, [$1, {$3, $2}];", "=h,l,r,r"
+  __nv_tex_surf_handler("__isurf1DLayeredread", &val, surf, x, layer, cudaBoundaryModeTrap);
+  // CHECK: tail call void asm sideeffect "sust.b.a1d.b8.trap [$0, {$2, $1}], {$3};", "l,r,r,h"
+  __nv_tex_surf_handler("__isurf1DLayeredwrite_v2", &val, surf, x, layer, cudaBoundaryModeTrap);
+
+  // CHECK: %12 = tail call i8 asm "suld.b.a2d.b8.zero {$0}, [$1, {$4, $2, $3, $3}];", "=h,l,r,r,r"
+  __nv_tex_surf_handler("__isurf2DLayeredread", &val, surf, x, y, layer, cudaBoundaryModeZero);
+  // CHECK: tail call void asm sideeffect "sust.b.a2d.b8.zero [$0, {$3, $1, $2, $2}], {$4};", "l,r,r,r,h"
+  __nv_tex_surf_handler("__isurf2DLayeredwrite_v2", &val, surf, x, y, layer, cudaBoundaryModeZero);
+  // CHECK: %13 = tail call i8 asm "suld.b.a2d.b8.clamp {$0}, [$1, {$4, $2, $3, $3}];", "=h,l,r,r,r"
+  __nv_tex_surf_handler("__isurf2DLayeredread", &val, surf, x, y, layer, cudaBoundaryModeClamp);
+  // CHECK: tail call void asm sideeffect "sust.b.a2d.b8.clamp [$0, {$3, $1, $2, $2}], {$4};", "l,r,r,r,h"
+  __nv_tex_surf_handler("__isurf2DLayeredwrite_v2", &val, surf, x, y, layer, cudaBoundaryModeClamp);
+  // CHECK: %14 = tail call i8 asm "suld.b.a2d.b8.trap {$0}, [$1, {$4, $2, $3, $3}];", "=h,l,r,r,r"
+  __nv_tex_surf_handler("__isurf2DLayeredread", &val, surf, x, y, layer, cudaBoundaryModeTrap);
+  // CHECK: tail call void asm sideeffect "sust.b.a2d.b8.trap [$0, {$3, $1, $2, $2}], {$4};", "l,r,r,r,h"
+  __nv_tex_surf_handler("__isurf2DLayeredwrite_v2", &val, surf, x, y, layer, cudaBoundaryModeTrap);
+
+  // CHECK: %15 = tail call i8 asm "suld.b.a2d.b8.zero {$0}, [$1, {$4, $2, $3, $3}];", "=h,l,r,r,r"
+  __nv_tex_surf_handler("__isurfCubemapread", &val, surf, x, y, face, cudaBoundaryModeZero);
+  // CHECK: tail call void asm sideeffect "sust.b.a2d.b8.zero [$0, {$3, $1, $2, $2}], {$4};", "l,r,r,r,h"
+  __nv_tex_surf_handler("__isurfCubemapwrite_v2", &val, surf, x, y, face, cudaBoundaryModeZero);
+  // CHECK: %16 = tail call i8 asm "suld.b.a2d.b8.clamp {$0}, [$1, {$4, $2, $3, $3}];", "=h,l,r,r,r"
+  __nv_tex_surf_handler("__isurfCubemapread", &val, surf, x, y, face, cudaBoundaryModeClamp);
+  // CHECK: tail call void asm sideeffect "sust.b.a2d.b8.clamp [$0, {$3, $1, $2, $2}], {$4};", "l,r,r,r,h"
+  __nv_tex_surf_handler("__isurfCubemapwrite_v2", &val, surf, x, y, face, cudaBoundaryModeClamp);
+  // CHECK: %17 = tail call i8 asm "suld.b.a2d.b8.trap {$0}, [$1, {$4, $2, $3, $3}];", "=h,l,r,r,r"
+  __nv_tex_surf_handler("__isurfCubemapread", &val, surf, x, y, face, cudaBoundaryModeTrap);
+  // CHECK: tail call void asm sideeffect "sust.b.a2d.b8.trap [$0, {$3, $1, $2, $2}], {$4};", "l,r,r,r,h"
+  __nv_tex_surf_handler("__isurfCubemapwrite_v2", &val, surf, x, y, face, cudaBoundaryModeTrap);
+
+  // CHECK: %18 = tail call i8 asm "suld.b.a2d.b8.zero {$0}, [$1, {$4, $2, $3, $3}];", "=h,l,r,r,r"
+  __nv_tex_surf_handler("__isurfCubemapLayeredread", &val, surf, x, y, layerface, cudaBoundaryModeZero);
+  // CHECK: tail call void asm sideeffect "sust.b.a2d.b8.zero [$0, {$3, $1, $2, $2}], {$4};", "l,r,r,r,h"
+  __nv_tex_surf_handler("__isurfCubemapLayeredwrite_v2", &val, surf, x, y, layerface, cudaBoundaryModeZero);
+  // CHECK: %19 = tail call i8 asm "suld.b.a2d.b8.clamp {$0}, [$1, {$4, $2, $3, $3}];", "=h,l,r,r,r"
+  __nv_tex_surf_handler("__isurfCubemapLayeredread", &val, surf, x, y, layerface, cudaBoundaryModeClamp);
+  // CHECK: tail call void asm sideeffect "sust.b.a2d.b8.clamp [$0, {$3, $1, $2, $2}], {$4};", "l,r,r,r,h"
+  __nv_tex_surf_handler("__isurfCubemapLayeredwrite_v2", &val, surf, x, y, layerface, cudaBoundaryModeClamp);
+  // CHECK: %20 = tail call i8 asm "suld.b.a2d.b8.trap {$0}, [$1, {$4, $2, $3, $3}];", "=h,l,r,r,r"
+  __nv_tex_surf_handler("__isurfCubemapLayeredread", &val, surf, x, y, layerface, cudaBoundaryModeTrap);
+  // CHECK: tail call void asm sideeffect "sust.b.a2d.b8.trap [$0, {$3, $1, $2, $2}], {$4};", "l,r,r,r,h"
+  __nv_tex_surf_handler("__isurfCubemapLayeredwrite_v2", &val, surf, x, y, layerface, cudaBoundaryModeTrap);
+}
+
+__device__ void surfchar1(cudaSurfaceObject_t surf, int x, int y, int z, int layer, int face, int layerface) {
+  char1 val;
+
+  // CHECK: %0 = tail call i8 asm "suld.b.1d.b8.zero {$0}, [$1, {$2}];", "=h,l,r"
+  __nv_tex_surf_handler("__isurf1Dread", &val, surf, x, cudaBoundaryModeZero);
+  // CHECK: tail call void asm sideeffect "sust.b.1d.b8.zero [$0, {$1}], {$2};", "l,r,h"
+  __nv_tex_surf_handler("__isurf1Dwrite_v2", &val, surf, x, cudaBoundaryModeZero);
+  // CHECK: %1 = tail call i8 asm "suld.b.1d.b8.clamp {$0}, [$1, {$2}];", "=h,l,r"
+  __nv_tex_surf_handler("__isurf1Dread", &val, surf, x, cudaBoundaryModeClamp);
+  // CHECK: tail call void asm sideeffect "sust.b.1d.b8.clamp [$0, {$1}], {$2};", "l,r,h"
+  __nv_tex_surf_handler("__isurf1Dwrite_v2", &val, surf, x, cudaBoundaryModeClamp);
+  // CHECK: %2 = tail call i8 asm "suld.b.1d.b8.trap {$0}, [$1, {$2}];", "=h,l,r"
+  __nv_tex_surf_handler("__isurf1Dread", &val, surf, x, cudaBoundaryModeTrap);
+  // CHECK: tail call void asm sideeffect "sust.b.1d.b8.trap [$0, {$1}], {$2};", "l,r,h"
+  __nv_tex_surf_handler("__isurf1Dwrite_v2", &val, surf, x, cudaBoundaryModeTrap);
+
+  // CHECK: %3 = tail call i8 asm "suld.b.2d.b8.zero {$0}, [$1, {$2, $3}];", "=h,l,r,r"
+  __nv_tex_surf_handler("__isurf2Dread", &val, surf, x, y, cudaBoundaryModeZero);
+  // CHECK: tail call void asm sideeffect "sust.b.2d.b8.zero [$0, {$1, $2}], {$3};", "l,r,r,h"
+  __nv_tex_surf_handler("__isurf2Dwrite_v2", &val, surf, x, y, cudaBoundaryModeZero);
+  // CHECK: %4 = tail call i8 asm "suld.b.2d.b8.clamp {$0}, [$1, {$2, $3}];", "=h,l,r,r"
+  __nv_tex_surf_handler("__isurf2Dread", &val, surf, x, y, cudaBoundaryModeClamp);
+  // CHECK: tail call void asm sideeffect "sust.b.2d.b8.clamp [$0, {$1, $2}], {$3};", "l,r,r,h"
+  __nv_tex_surf_handler("__isurf2Dwrite_v2", &val, surf, x, y, cudaBoundaryModeClamp);
+  // CHECK: %5 = tail call i8 asm "suld.b.2d.b8.trap {$0}, [$1, {$2, $3}];", "=h,l,r,r"
+  __nv_tex_surf_handler("__isurf2Dread", &val, surf, x, y, cudaBoundaryModeTrap);
+  // CHECK: tail call void asm sideeffect "sust.b.2d.b8.trap [$0, {$1, $2}], {$3};", "l,r,r,h"
+  __nv_tex_surf_handler("__isurf2Dwrite_v2", &val, surf, x, y, cudaBoundaryModeTrap);
+
+  // CHECK: %6 = tail call i8 asm "suld.b.3d.b8.zero {$0}, [$1, {$2, $3, $4, $4}];", "=h,l,r,r,r"
+  __nv_tex_surf_handler("__isurf3Dread", &val, surf, x, y, z, cudaBoundaryModeZero);
+  // CHECK: tail call void asm sideeffect "sust.b.3d.b8.zero [$0, {$1, $2, $3, $3}], {$4};", "l,r,r,r,h"
+  __nv_tex_surf_handler("__isurf3Dwrite_v2", &val, surf, x, y, z, cudaBoundaryModeZero);
+  // CHECK: %7 = tail call i8 asm "suld.b.3d.b8.clamp {$0}, [$1, {$2, $3, $4, $4}];", "=h,l,r,r,r"
+  __nv_tex_surf_handler("__isurf3Dread", &val, surf, x, y, z, cudaBoundaryModeClamp);
+  // CHECK: tail call void asm sideeffect "sust.b.3d.b8.clamp [$0, {$1, $2, $3, $3}], {$4};", "l,r,r,r,h"
+  __nv_tex_surf_handler("__isurf3Dwrite_v2", &val, surf, x, y, z, cudaBoundaryModeClamp);
+  // CHECK: %8 = tail call i8 asm "suld.b.3d.b8.trap {$0}, [$1, {$2, $3, $4, $4}];", "=h,l,r,r,r"
+  __nv_tex_surf_handler("__isurf3Dread", &val, surf, x, y, z, cudaBoundaryModeTrap);
+  // CHECK: tail call void asm sideeffect "sust.b.3d.b8.trap [$0, {$1, $2, $3, $3}], {$4};", "l,r,r,r,h"
+  __nv_tex_surf_handler("__isurf3Dwrite_v2", &val, surf, x, y, z, cudaBoundaryModeTrap);
+
+  // CHECK: %9 = tail call i8 asm "suld.b.a1d.b8.zero {$0}, [$1, {$3, $2}];", "=h,l,r,r"
+  __nv_tex_surf_handler("__isurf1DLayeredread", &val, surf, x, layer, cudaBoundaryModeZero);
+  // CHECK: tail call void asm sideeffect "sust.b.a1d.b8.zero [$0, {$2, $1}], {$3};", "l,r,r,h"
+  __nv_tex_surf_handler("__isurf1DLayeredwrite_v2", &val, surf, x, layer, cudaBoundaryModeZero);
+  // CHECK: %10 = tail call i8 asm "suld.b.a1d.b8.clamp {$0}, [$1, {$3, $2}];", "=h,l,r,r"
+  __nv_tex_surf_handler("__isurf1DLayeredread", &val, surf, x, layer, cudaBoundaryModeClamp);
+  // CHECK: tail call void asm sideeffect "sust.b.a1d.b8.clamp [$0, {$2, $1}], {$3};", "l,r,r,h"
+  __nv_tex_surf_handler("__isurf1DLayeredwrite_v2", &val, surf, x, layer, cudaBoundaryModeClamp);
+  // CHECK: %11 = tail call i8 asm "suld.b.a1d.b8.trap {$0}, [$1, {$3, $2}];", "=h,l,r,r"
+  __nv_tex_surf_handler("__isurf1DLayeredread", &val, surf, x, layer, cudaBoundaryModeTrap);
+  // CHECK: tail call void asm sideeffect "sust.b.a1d.b8.trap [$0, {$2, $1}], {$3};", "l,r,r,h"
+  __nv_tex_surf_handler("__isurf1DLayeredwrite_v2", &val, surf, x, layer, cudaBoundaryModeTrap);
+
+  // CHECK: %12 = tail call i8 asm "suld.b.a2d.b8.zero {$0}, [$1, {$4, $2, $3, $3}];", "=h,l,r,r,r"
+  __nv_tex_surf_handler("__isurf2DLayeredread", &val, surf, x, y, layer, cudaBoundaryModeZero);
+  // CHECK: tail call void asm sideeffect "sust.b.a2d.b8.zero [$0, {$3, $1, $2, $2}], {$4};", "l,r,r,r,h"
+  __nv_tex_surf_handler("__isurf2DLayeredwrite_v2", &val, surf, x, y, layer, cudaBoundaryModeZero);
+  // CHECK: %13 = tail call i8 asm "suld.b.a2d.b8.clamp {$0}, [$1, {$4, $2, $3, $3}];", "=h,l,r,r,r"
+  __nv_tex_surf_handler("__isurf2DLayeredread", &val, surf, x, y, layer, cudaBoundaryModeClamp);
+  // CHECK: tail call void asm sideeffect "sust.b.a2d.b8.clamp [$0, {$3, $1, $2, $2}], {$4};", "l,r,r,r,h"
+  __nv_tex_surf_handler("__isurf2DLayeredwrite_v2", &val, surf, x, y, layer, cudaBoundaryModeClamp);
+  // CHECK: %14 = tail call i8 asm "suld.b.a2d.b8.trap {$0}, [$1, {$4, $2, $3, $3}];", "=h,l,r,r,r"
+  __nv_tex_surf_handler("__isurf2DLayeredread", &val, surf, x, y, layer, cudaBoundaryModeTrap);
+  // CHECK: tail call void asm sideeffect "sust.b.a2d.b8.trap [$0, {$3, $1, $2, $2}], {$4};", "l,r,r,r,h"
+  __nv_tex_surf_handler("__isurf2DLayeredwrite_v2", &val, surf, x, y, layer, cudaBoundaryModeTrap);
+
+  // CHECK: %15 = tail call i8 asm "suld.b.a2d.b8.zero {$0}, [$1, {$4, $2, $3, $3}];", "=h,l,r,r,r"
+  __nv_tex_surf_handler("__isurfCubemapread", &val, surf, x, y, face, cudaBoundaryModeZero);
+  // CHECK: tail call void asm sideeffect "sust.b.a2d.b8.zero [$0, {$3, $1, $2, $2}], {$4};", "l,r,r,r,h"
+  __nv_tex_surf_handler("__isurfCubemapwrite_v2", &val, surf, x, y, face, cudaBoundaryModeZero);
+  // CHECK: %16 = tail call i8 asm "suld.b.a2d.b8.clamp {$0}, [$1, {$4, $2, $3, $3}];", "=h,l,r,r,r"
+  __nv_tex_surf_handler("__isurfCubemapread", &val, surf, x, y, face, cudaBoundaryModeClamp);
+  // CHECK: tail call void asm sideeffect "sust.b.a2d.b8.clamp [$0, {$3, $1, $2, $2}], {$4};", "l,r,r,r,h"
+  __nv_tex_surf_handler("__isurfCubemapwrite_v2", &val, surf, x, y, face, cudaBoundaryModeClamp);
+  // CHECK: %17 = tail call i8 asm "suld.b.a2d.b8.trap {$0}, [$1, {$4, $2, $3, $3}];", "=h,l,r,r,r"
+  __nv_tex_surf_handler("__isurfCubemapread", &val, surf, x, y, face, cudaBoundaryModeTrap);
+  // CHECK: tail call void asm sideeffect "sust.b.a2d.b8.trap [$0, {$3, $1, $2, $2}], {$4};", "l,r,r,r,h"
+  __nv_tex_surf_handler("__isurfCubemapwrite_v2", &val, surf, x, y, face, cudaBoundaryModeTrap);
+
+  // CHECK: %18 = tail call i8 asm "suld.b.a2d.b8.zero {$0}, [$1, {$4, $2, $3, $3}];", "=h,l,r,r,r"
+  __nv_tex_surf_handler("__isurfCubemapLayeredread", &val, surf, x, y, layerface, cudaBoundaryModeZero);
+  // CHECK: tail call void asm sideeffect "sust.b.a2d.b8.zero [$0, {$3, $1, $2, $2}], {$4};", "l,r,r,r,h"
+  __nv_tex_surf_handler("__isurfCubemapLayeredwrite_v2", &val, surf, x, y, layerface, cudaBoundaryModeZero);
+  // CHECK: %19 = tail call i8 asm "suld.b.a2d.b8.clamp {$0}, [$1, {$4, $2, $3, $3}];", "=h,l,r,r,r"
+  __nv_tex_surf_handler("__isurfCubemapLayeredread", &val, surf, x, y, layerface, cudaBoundaryModeClamp);
+  // CHECK: tail call void asm sideeffect "sust.b.a2d.b8.clamp [$0, {$3, $1, $2, $2}], {$4};", "l,r,r,r,h"
+  __nv_tex_surf_handler("__isurfCubemapLayeredwrite_v2", &val, surf, x, y, layerface, cudaBoundaryModeClamp);
+  // CHECK: %20 = tail call i8 asm "suld.b.a2d.b8.trap {$0}, [$1, {$4, $2, $3, $3}];", "=h,l,r,r,r"
+  __nv_tex_surf_handler("__isurfCubemapLayeredread", &val, surf, x, y, layerface, cudaBoundaryModeTrap);
+  // CHECK: tail call void asm sideeffect "sust.b.a2d.b8.trap [$0, {$3, $1, $2, $2}], {$4};", "l,r,r,r,h"
+  __nv_tex_surf_handler("__isurfCubemapLayeredwrite_v2", &val, surf, x, y, layerface, cudaBoundaryModeTrap);
+}
+
+__device__ void surfunsignedchar(cudaSurfaceObject_t surf, int x, int y, int z, int layer, int face, int layerface) {
+  unsigned char val;
+
+  // CHECK: %0 = tail call i8 asm "suld.b.1d.b8.zero {$0}, [$1, {$2}];", "=h,l,r"
+  __nv_tex_surf_handler("__isurf1Dread", &val, surf, x, cudaBoundaryModeZero);
+  // CHECK: tail call void asm sideeffect "sust.b.1d.b8.zero [$0, {$1}], {$2};", "l,r,h"
+  __nv_tex_surf_handler("__isurf1Dwrite_v2", &val, surf, x, cudaBoundaryModeZero);
+  // CHECK: %1 = tail call i8 asm "suld.b.1d.b8.clamp {$0}, [$1, {$2}];", "=h,l,r"
+  __nv_tex_surf_handler("__isurf1Dread", &val, surf, x, cudaBoundaryModeClamp);
+  // CHECK: tail call void asm sideeffect "sust.b.1d.b8.clamp [$0, {$1}], {$2};", "l,r,h"
+  __nv_tex_surf_handler("__isurf1Dwrite_v2", &val, surf, x, cudaBoundaryModeClamp);
+  // CHECK: %2 = tail call i8 asm "suld.b.1d.b8.trap {$0}, [$1, {$2}];", "=h,l,r"
+  __nv_tex_surf_handler("__isurf1Dread", &val, surf, x, cudaBoundaryModeTrap);
+  // CHECK: tail call void asm sideeffect "sust.b.1d.b8.trap [$0, {$1}], {$2};", "l,r,h"
+  __nv_tex_surf_handler("__isurf1Dwrite_v2", &val, surf, x, cudaBoundaryModeTrap);
+
+  // CHECK: %3 = tail call i8 asm "suld.b.2d.b8.zero {$0}, [$1, {$2, $3}];", "=h,l,r,r"
+  __nv_tex_surf_handler("__isurf2Dread", &val, surf, x, y, cudaBoundaryModeZero);
+  // CHECK: tail call void asm sideeffect "sust.b.2d.b8.zero [$0, {$1, $2}], {$3};", "l,r,r,h"
+  __nv_tex_surf_handler("__isurf2Dwrite_v2", &val, surf, x, y, cudaBoundaryModeZero);
+  // CHECK: %4 = tail call i8 asm "suld.b.2d.b8.clamp {$0}, [$1, {$2, $3}];", "=h,l,r,r"
+  __nv_tex_surf_handler("__isurf2Dread", &val, surf, x, y, cudaBoundaryModeClamp);
+  // CHECK: tail call void asm sideeffect "sust.b.2d.b8.clamp [$0, {$1, $2}], {$3};", "l,r,r,h"
+  __nv_tex_surf_handler("__isurf2Dwrite_v2", &val, surf, x, y, cudaBoundaryModeClamp);
+  // CHECK: %5 = tail call i8 asm "suld.b.2d.b8.trap {$0}, [$1, {$2, $3}];", "=h,l,r,r"
+  __nv_tex_surf_handler("__isurf2Dread", &val, surf, x, y, cudaBoundaryModeTrap);
+  // CHECK: tail call void asm sideeffect "sust.b.2d.b8.trap [$0, {$1, $2}], {$3};", "l,r,r,h"
+  __nv_tex_surf_handler("__isurf2Dwrite_v2", &val, surf, x, y, cudaBoundaryModeTrap);
+
+  // CHECK: %6 = tail call i8 asm "suld.b.3d.b8.zero {$0}, [$1, {$2, $3, $4, $4}];", "=h,l,r,r,r"
+  __nv_tex_surf_handler("__isurf3Dread", &val, surf, x, y, z, cudaBoundaryModeZero);
+  // CHECK: tail call void asm sideeffect "sust.b.3d.b8.zero [$0, {$1, $2, $3, $3}], {$4};", "l,r,r,r,h"
+  __nv_tex_surf_handler("__isurf3Dwrite_v2", &val, surf, x, y, z, cudaBoundaryModeZero);
+  // CHECK: %7 = tail call i8 asm "suld.b.3d.b8.clamp {$0}, [$1, {$2, $3, $4, $4}];", "=h,l,r,r,r"
+  __nv_tex_surf_handler("__isurf3Dread", &val, surf, x, y, z, cudaBoundaryModeClamp);
+  // CHECK: tail call void asm sideeffect "sust.b.3d.b8.clamp [$0, {$1, $2, $3, $3}], {$4};", "l,r,r,r,h"
+  __nv_tex_surf_handler("__isurf3Dwrite_v2", &val, surf, x, y, z, cudaBoundaryModeClamp);
+  // CHECK: %8 = tail call i8 asm "suld.b.3d.b8.trap {$0}, [$1, {$2, $3, $4, $4}];", "=h,l,r,r,r"
+  __nv_tex_surf_handler("__isurf3Dread", &val, surf, x, y, z, cudaBoundaryModeTrap);
+  // CHECK: tail call void asm sideeffect "sust.b.3d.b8.trap [$0, {$1, $2, $3, $3}], {$4};", "l,r,r,r,h"
+  __nv_tex_surf_handler("__isurf3Dwrite_v2", &val, surf, x, y, z, cudaBoundaryModeTrap);
+
+  // CHECK: %9 = tail call i8 asm "suld.b.a1d.b8.zero {$0}, [$1, {$3, $2}];", "=h,l,r,r"
+  __nv_tex_surf_handler("__isurf1DLayeredread", &val, surf, x, layer, cudaBoundaryModeZero);
+  // CHECK: tail call void asm sideeffect "sust.b.a1d.b8.zero [$0, {$2, $1}], {$3};", "l,r,r,h"
+  __nv_tex_surf_handler("__isurf1DLayeredwrite_v2", &val, surf, x, layer, cudaBoundaryModeZero);
+  // CHECK: %10 = tail call i8 asm "suld.b.a1d.b8.clamp {$0}, [$1, {$3, $2}];", "=h,l,r,r"
+  __nv_tex_surf_handler("__isurf1DLayeredread", &val, surf, x, layer, cudaBoundaryModeClamp);
+  // CHECK: tail call void asm sideeffect "sust.b.a1d.b8.clamp [$0, {$2, $1}], {$3};", "l,r,r,h"
+  __nv_tex_surf_handler("__isurf1DLayeredwrite_v2", &val, surf, x, layer, cudaBoundaryModeClamp);
+  // CHECK: %11 = tail call i8 asm "suld.b.a1d.b8.trap {$0}, [$1, {$3, $2}];", "=h,l,r,r"
+  __nv_tex_surf_handler("__isurf1DLayeredread", &val, surf, x, layer, cudaBoundaryModeTrap);
+  // CHECK: tail call void asm sideeffect "sust.b.a1d.b8.trap [$0, {$2, $1}], {$3};", "l,r,r,h"
+  __nv_tex_surf_handler("__isurf1DLayeredwrite_v2", &val, surf, x, layer, cudaBoundaryModeTrap);
+
+  // CHECK: %12 = tail call i8 asm "suld.b.a2d.b8.zero {$0}, [$1, {$4, $2, $3, $3}];", "=h,l,r,r,r"
+  __nv_tex_surf_handler("__isurf2DLayeredread", &val, surf, x, y, layer, cudaBoundaryModeZero);
+  // CHECK: tail call void asm sideeffect "sust.b.a2d.b8.zero [$0, {$3, $1, $2, $2}], {$4};", "l,r,r,r,h"
+  __nv_tex_surf_handler("__isurf2DLayeredwrite_v2", &val, surf, x, y, layer, cudaBoundaryModeZero);
+  // CHECK: %13 = tail call i8 asm "suld.b.a2d.b8.clamp {$0}, [$1, {$4, $2, $3, $3}];", "=h,l,r,r,r"
+  __nv_tex_surf_handler("__isurf2DLayeredread", &val, surf, x, y, layer, cudaBoundaryModeClamp);
+  // CHECK: tail call void asm sideeffect "sust.b.a2d.b8.clamp [$0, {$3, $1, $2, $2}], {$4};", "l,r,r,r,h"
+  __nv_tex_surf_handler("__isurf2DLayeredwrite_v2", &val, surf, x, y, layer, cudaBoundaryModeClamp);
+  // CHECK: %14 = tail call i8 asm "suld.b.a2d.b8.trap {$0}, [$1, {$4, $2, $3, $3}];", "=h,l,r,r,r"
+  __nv_tex_surf_handler("__isurf2DLayeredread", &val, surf, x, y, layer, cudaBoundaryModeTrap);
+  // CHECK: tail call void asm sideeffect "sust.b.a2d.b8.trap [$0, {$3, $1, $2, $2}], {$4};", "l,r,r,r,h"
+  __nv_tex_surf_handler("__isurf2DLayeredwrite_v2", &val, surf, x, y, layer, cudaBoundaryModeTrap);
+
+  // CHECK: %15 = tail call i8 asm "suld.b.a2d.b8.zero {$0}, [$1, {$4, $2, $3, $3}];", "=h,l,r,r,r"
+  __nv_tex_surf_handler("__isurfCubemapread", &val, surf, x, y, face, cudaBoundaryModeZero);
+  // CHECK: tail call void asm sideeffect "sust.b.a2d.b8.zero [$0, {$3, $1, $2, $2}], {$4};", "l,r,r,r,h"
+  __nv_tex_surf_handler("__isurfCubemapwrite_v2", &val, surf, x, y, face, cudaBoundaryModeZero);
+  // CHECK: %16 = tail call i8 asm "suld.b.a2d.b8.clamp {$0}, [$1, {$4, $2, $3, $3}];", "=h,l,r,r,r"
+  __nv_tex_surf_handler("__isurfCubemapread", &val, surf, x, y, face, cudaBoundaryModeClamp);
+  // CHECK: tail call void asm sideeffect "sust.b.a2d.b8.clamp [$0, {$3, $1, $2, $2}], {$4};", "l,r,r,r,h"
+  __nv_tex_surf_handler("__isurfCubemapwrite_v2", &val, surf, x, y, face, cudaBoundaryModeClamp);
+  // CHECK: %17 = tail call i8 asm "suld.b.a2d.b8.trap {$0}, [$1, {$4, $2, $3, $3}];", "=h,l,r,r,r"
+  __nv_tex_surf_handler("__isurfCubemapread", &val, surf, x, y, face, cudaBoundaryModeTrap);
+  // CHECK: tail call void asm sideeffect "sust.b.a2d.b8.trap [$0, {$3, $1, $2, $2}], {$4};", "l,r,r,r,h"
+  __nv_tex_surf_handler("__isurfCubemapwrite_v2", &val, surf, x, y, face, cudaBoundaryModeTrap);
+
+  // CHECK: %18 = tail call i8 asm "suld.b.a2d.b8.zero {$0}, [$1, {$4, $2, $3, $3}];", "=h,l,r,r,r"
+  __nv_tex_surf_handler("__isurfCubemapLayeredread", &val, surf, x, y, layerface, cudaBoundaryModeZero);
+  // CHECK: tail call void asm sideeffect "sust.b.a2d.b8.zero [$0, {$3, $1, $2, $2}], {$4};", "l,r,r,r,h"
+  __nv_tex_surf_handler("__isurfCubemapLayeredwrite_v2", &val, surf, x, y, layerface, cudaBoundaryModeZero);
+  // CHECK: %19 = tail call i8 asm "suld.b.a2d.b8.clamp {$0}, [$1, {$4, $2, $3, $3}];", "=h,l,r,r,r"
+  __nv_tex_surf_handler("__isurfCubemapLayeredread", &val, surf, x, y, layerface, cudaBoundaryModeClamp);
+  // CHECK: tail call void asm sideeffect "sust.b.a2d.b8.clamp [$0, {$3, $1, $2, $2}], {$4};", "l,r,r,r,h"
+  __nv_tex_surf_handler("__isurfCubemapLayeredwrite_v2", &val, surf, x, y, layerface, cudaBoundaryModeClamp);
+  // CHECK: %20 = tail call i8 asm "suld.b.a2d.b8.trap {$0}, [$1, {$4, $2, $3, $3}];", "=h,l,r,r,r"
+  __nv_tex_surf_handler("__isurfCubemapLayeredread", &val, surf, x, y, layerface, cudaBoundaryModeTrap);
+  // CHECK: tail call void asm sideeffect "sust.b.a2d.b8.trap [$0, {$3, $1, $2, $2}], {$4};", "l,r,r,r,h"
+  __nv_tex_surf_handler("__isurfCubemapLayeredwrite_v2", &val, surf, x, y, layerface, cudaBoundaryModeTrap);
+}
+
+__device__ void surfuchar1(cudaSurfaceObject_t surf, int x, int y, int z, int layer, int face, int layerface) {
+  uchar1 val;
+
+  // CHECK: %0 = tail call i8 asm "suld.b.1d.b8.zero {$0}, [$1, {$2}];", "=h,l,r"
+  __nv_tex_surf_handler("__isurf1Dread", &val, surf, x, cudaBoundaryModeZero);
+  // CHECK: tail call void asm sideeffect "sust.b.1d.b8.zero [$0, {$1}], {$2};", "l,r,h"
+  __nv_tex_surf_handler("__isurf1Dwrite_v2", &val, surf, x, cudaBoundaryModeZero);
+  // CHECK: %1 = tail call i8 asm "suld.b.1d.b8.clamp {$0}, [$1, {$2}];", "=h,l,r"
+  __nv_tex_surf_handler("__isurf1Dread", &val, surf, x, cudaBoundaryModeClamp);
+  // CHECK: tail call void asm sideeffect "sust.b.1d.b8.clamp [$0, {$1}], {$2};", "l,r,h"
+  __nv_tex_surf_handler("__isurf1Dwrite_v2", &val, surf, x, cudaBoundaryModeClamp);
+  // CHECK: %2 = tail call i8 asm "suld.b.1d.b8.trap {$0}, [$1, {$2}];", "=h,l,r"
+  __nv_tex_surf_handler("__isurf1Dread", &val, surf, x, cudaBoundaryModeTrap);
+  // CHECK: tail call void asm sideeffect "sust.b.1d.b8.trap [$0, {$1}], {$2};", "l,r,h"
+  __nv_tex_surf_handler("__isurf1Dwrite_v2", &val, surf, x, cudaBoundaryModeTrap);
+
+  // CHECK: %3 = tail call i8 asm "suld.b.2d.b8.zero {$0}, [$1, {$2, $3}];", "=h,l,r,r"
+  __nv_tex_surf_handler("__isurf2Dread", &val, surf, x, y, cudaBoundaryModeZero);
+  // CHECK: tail call void asm sideeffect "sust.b.2d.b8.zero [$0, {$1, $2}], {$3};", "l,r,r,h"
+  __nv_tex_surf_handler("__isurf2Dwrite_v2", &val, surf, x, y, cudaBoundaryModeZero);
+  // CHECK: %4 = tail call i8 asm "suld.b.2d.b8.clamp {$0}, [$1, {$2, $3}];", "=h,l,r,r"
+  __nv_tex_surf_handler("__isurf2Dread", &val, surf, x, y, cudaBoundaryModeClamp);
+  // CHECK: tail call void asm sideeffect "sust.b.2d.b8.clamp [$0, {$1, $2}], {$3};", "l,r,r,h"
+  __nv_tex_surf_handler("__isurf2Dwrite_v2", &val, surf, x, y, cudaBoundaryModeClamp);
+  // CHECK: %5 = tail call i8 asm "suld.b.2d.b8.trap {$0}, [$1, {$2, $3}];", "=h,l,r,r"
+  __nv_tex_surf_handler("__isurf2Dread", &val, surf, x, y, cudaBoundaryModeTrap);
+  // CHECK: tail call void asm sideeffect "sust.b.2d.b8.trap [$0, {$1, $2}], {$3};", "l,r,r,h"
+  __nv_tex_surf_handler("__isurf2Dwrite_v2", &val, surf, x, y, cudaBoundaryModeTrap);
+
+  // CHECK: %6 = tail call i8 asm "suld.b.3d.b8.zero {$0}, [$1, {$2, $3, $4, $4}];", "=h,l,r,r,r"
+  __nv_tex_surf_handler("__isurf3Dread", &val, surf, x, y, z, cudaBoundaryModeZero);
+  // CHECK: tail call void asm sideeffect "sust.b.3d.b8.zero [$0, {$1, $2, $3, $3}], {$4};", "l,r,r,r,h"
+  __nv_tex_surf_handler("__isurf3Dwrite_v2", &val, surf, x, y, z, cudaBoundaryModeZero);
+  // CHECK: %7 = tail call i8 asm "suld.b.3d.b8.clamp {$0}, [$1, {$2, $3, $4, $4}];", "=h,l,r,r,r"
+  __nv_tex_surf_handler("__isurf3Dread", &val, surf, x, y, z, cudaBoundaryModeClamp);
+  // CHECK: tail call void asm sideeffect "sust.b.3d.b8.clamp [$0, {$1, $2, $3, $3}], {$4};", "l,r,r,r,h"
+  __nv_tex_surf_handler("__isurf3Dwrite_v2", &val, surf, x, y, z, cudaBoundaryModeClamp);
+  // CHECK: %8 = tail call i8 asm "suld.b.3d.b8.trap {$0}, [$1, {$2, $3, $4, $4}];", "=h,l,r,r,r"
+  __nv_tex_surf_handler("__isurf3Dread", &val, surf, x, y, z, cudaBoundaryModeTrap);
+  // CHECK: tail call void asm sideeffect "sust.b.3d.b8.trap [$0, {$1, $2, $3, $3}], {$4};", "l,r,r,r,h"
+  __nv_tex_surf_handler("__isurf3Dwrite_v2", &val, surf, x, y, z, cudaBoundaryModeTrap);
+
+  // CHECK: %9 = tail call i8 asm "suld.b.a1d.b8.zero {$0}, [$1, {$3, $2}];", "=h,l,r,r"
+  __nv_tex_surf_handler("__isurf1DLayeredread", &val, surf, x, layer, cudaBoundaryModeZero);
+  // CHECK: tail call void asm sideeffect "sust.b.a1d.b8.zero [$0, {$2, $1}], {$3};", "l,r,r,h"
+  __nv_tex_surf_handler("__isurf1DLayeredwrite_v2", &val, surf, x, layer, cudaBoundaryModeZero);
+  // CHECK: %10 = tail call i8 asm "suld.b.a1d.b8.clamp {$0}, [$1, {$3, $2}];", "=h,l,r,r"
+  __nv_tex_surf_handler("__isurf1DLayeredread", &val, surf, x, layer, cudaBoundaryModeClamp);
+  // CHECK: tail call void asm sideeffect "sust.b.a1d.b8.clamp [$0, {$2, $1}], {$3};", "l,r,r,h"
+  __nv_tex_surf_handler("__isurf1DLayeredwrite_v2", &val, surf, x, layer, cudaBoundaryModeClamp);
+  // CHECK: %11 = tail call i8 asm "suld.b.a1d.b8.trap {$0}, [$1, {$3, $2}];", "=h,l,r,r"
+  __nv_tex_surf_handler("__isurf1DLayeredread", &val, surf, x, layer, cudaBoundaryModeTrap);
+  // CHECK: tail call void asm sideeffect "sust.b.a1d.b8.trap [$0, {$2, $1}], {$3};", "l,r,r,h"
+  __nv_tex_surf_handler("__isurf1DLayeredwrite_v2", &val, surf, x, layer, cudaBoundaryModeTrap);
+
+  // CHECK: %12 = tail call i8 asm "suld.b.a2d.b8.zero {$0}, [$1, {$4, $2, $3, $3}];", "=h,l,r,r,r"
+  __nv_tex_surf_handler("__isurf2DLayeredread", &val, surf, x, y, layer, cudaBoundaryModeZero);
+  // CHECK: tail call void asm sideeffect "sust.b.a2d.b8.zero [$0, {$3, $1, $2, $2}], {$4};", "l,r,r,r,h"
+  __nv_tex_surf_handler("__isurf2DLayeredwrite_v2", &val, surf, x, y, layer, cudaBoundaryModeZero);
+  // CHECK: %13 = tail call i8 asm "suld.b.a2d.b8.clamp {$0}, [$1, {$4, $2, $3, $3}];", "=h,l,r,r,r"
+  __nv_tex_surf_handler("__isurf2DLayeredread", &val, surf, x, y, layer, cudaBoundaryModeClamp);
+  // CHECK: tail call void asm sideeffect "sust.b.a2d.b8.clamp [$0, {$3, $1, $2, $2}], {$4};", "l,r,r,r,h"
+  __nv_tex_surf_handler("__isurf2DLayeredwrite_v2", &val, surf, x, y, layer, cudaBoundaryModeClamp);
+  // CHECK: %14 = tail call i8 asm "suld.b.a2d.b8.trap {$0}, [$1, {$4, $2, $3, $3}];", "=h,l,r,r,r"
+  __nv_tex_surf_handler("__isurf2DLayeredread", &val, surf, x, y, layer, cudaBoundaryModeTrap);
+  // CHECK: tail call void asm sideeffect "sust.b.a2d.b8.trap [$0, {$3, $1, $2, $2}], {$4};", "l,r,r,r,h"
+  __nv_tex_surf_handler("__isurf2DLayeredwrite_v2", &val, surf, x, y, layer, cudaBoundaryModeTrap);
+
+  // CHECK: %15 = tail call i8 asm "suld.b.a2d.b8.zero {$0}, [$1, {$4, $2, $3, $3}];", "=h,l,r,r,r"
+  __nv_tex_surf_handler("__isurfCubemapread", &val, surf, x, y, face, cudaBoundaryModeZero);
+  // CHECK: tail call void asm sideeffect "sust.b.a2d.b8.zero [$0, {$3, $1, $2, $2}], {$4};", "l,r,r,r,h"
+  __nv_tex_surf_handler("__isurfCubemapwrite_v2", &val, surf, x, y, face, cudaBoundaryModeZero);
+  // CHECK: %16 = tail call i8 asm "suld.b.a2d.b8.clamp {$0}, [$1, {$4, $2, $3, $3}];", "=h,l,r,r,r"
+  __nv_tex_surf_handler("__isurfCubemapread", &val, surf, x, y, face, cudaBoundaryModeClamp);
+  // CHECK: tail call void asm sideeffect "sust.b.a2d.b8.clamp [$0, {$3, $1, $2, $2}], {$4};", "l,r,r,r,h"
+  __nv_tex_surf_handler("__isurfCubemapwrite_v2", &val, surf, x, y, face, cudaBoundaryModeClamp);
+  // CHECK: %17 = tail call i8 asm "suld.b.a2d.b8.trap {$0}, [$1, {$4, $2, $3, $3}];", "=h,l,r,r,r"
+  __nv_tex_surf_handler("__isurfCubemapread", &val, surf, x, y, face, cudaBoundaryModeTrap);
+  // CHECK: tail call void asm sideeffect "sust.b.a2d.b8.trap [$0, {$3, $1, $2, $2}], {$4};", "l,r,r,r,h"
+  __nv_tex_surf_handler("__isurfCubemapwrite_v2", &val, surf, x, y, face, cudaBoundaryModeTrap);
+
+  // CHECK: %18 = tail call i8 asm "suld.b.a2d.b8.zero {$0}, [$1, {$4, $2, $3, $3}];", "=h,l,r,r,r"
+  __nv_tex_surf_handler("__isurfCubemapLayeredread", &val, surf, x, y, layerface, cudaBoundaryModeZero);
+  // CHECK: tail call void asm sideeffect "sust.b.a2d.b8.zero [$0, {$3, $1, $2, $2}], {$4};", "l,r,r,r,h"
+  __nv_tex_surf_handler("__isurfCubemapLayeredwrite_v2", &val, surf, x, y, layerface, cudaBoundaryModeZero);
+  // CHECK: %19 = tail call i8 asm "suld.b.a2d.b8.clamp {$0}, [$1, {$4, $2, $3, $3}];", "=h,l,r,r,r"
+  __nv_tex_surf_handler("__isurfCubemapLayeredread", &val, surf, x, y, layerface, cudaBoundaryModeClamp);
+  // CHECK: tail call void asm sideeffect "sust.b.a2d.b8.clamp [$0, {$3, $1, $2, $2}], {$4};", "l,r,r,r,h"
+  __nv_tex_surf_handler("__isurfCubemapLayeredwrite_v2", &val, surf, x, y, layerface, cudaBoundaryModeClamp);
+  // CHECK: %20 = tail call i8 asm "suld.b.a2d.b8.trap {$0}, [$1, {$4, $2, $3, $3}];", "=h,l,r,r,r"
+  __nv_tex_surf_handler("__isurfCubemapLayeredread", &val, surf, x, y, layerface, cudaBoundaryModeTrap);
+  // CHECK: tail call void asm sideeffect "sust.b.a2d.b8.trap [$0, {$3, $1, $2, $2}], {$4};", "l,r,r,r,h"
+  __nv_tex_surf_handler("__isurfCubemapLayeredwrite_v2", &val, surf, x, y, layerface, cudaBoundaryModeTrap);
+}
+
+__device__ void surfshort(cudaSurfaceObject_t surf, int x, int y, int z, int layer, int face, int layerface) {
+  short val;
+
+  // CHECK: %0 = tail call i16 asm "suld.b.1d.b16.zero {$0}, [$1, {$2}];", "=h,l,r"
+  __nv_tex_surf_handler("__isurf1Dread", &val, surf, x, cudaBoundaryModeZero);
+  // CHECK: tail call void asm sideeffect "sust.b.1d.b16.zero [$0, {$1}], {$2};", "l,r,h"
+  __nv_tex_surf_handler("__isurf1Dwrite_v2", &val, surf, x, cudaBoundaryModeZero);
+  // CHECK: %1 = tail call i16 asm "suld.b.1d.b16.clamp {$0}, [$1, {$2}];", "=h,l,r"
+  __nv_tex_surf_handler("__isurf1Dread", &val, surf, x, cudaBoundaryModeClamp);
+  // CHECK: tail call void asm sideeffect "sust.b.1d.b16.clamp [$0, {$1}], {$2};", "l,r,h"
+  __nv_tex_surf_handler("__isurf1Dwrite_v2", &val, surf, x, cudaBoundaryModeClamp);
+  // CHECK: %2 = tail call i16 asm "suld.b.1d.b16.trap {$0}, [$1, {$2}];", "=h,l,r"
+  __nv_tex_surf_handler("__isurf1Dread", &val, surf, x, cudaBoundaryModeTrap);
+  // CHECK: tail call void asm sideeffect "sust.b.1d.b16.trap [$0, {$1}], {$2};", "l,r,h"
+  __nv_tex_surf_handler("__isurf1Dwrite_v2", &val, surf, x, cudaBoundaryModeTrap);
+
+  // CHECK: %3 = tail call i16 asm "suld.b.2d.b16.zero {$0}, [$1, {$2, $3}];", "=h,l,r,r"
+  __nv_tex_surf_handler("__isurf2Dread", &val, surf, x, y, cudaBoundaryModeZero);
+  // CHECK: tail call void asm sideeffect "sust.b.2d.b16.zero [$0, {$1, $2}], {$3};", "l,r,r,h"
+  __nv_tex_surf_handler("__isurf2Dwrite_v2", &val, surf, x, y, cudaBoundaryModeZero);
+  // CHECK: %4 = tail call i16 asm "suld.b.2d.b16.clamp {$0}, [$1, {$2, $3}];", "=h,l,r,r"
+  __nv_tex_surf_handler("__isurf2Dread", &val, surf, x, y, cudaBoundaryModeClamp);
+  // CHECK: tail call void asm sideeffect "sust.b.2d.b16.clamp [$0, {$1, $2}], {$3};", "l,r,r,h"
+  __nv_tex_surf_handler("__isurf2Dwrite_v2", &val, surf, x, y, cudaBoundaryModeClamp);
+  // CHECK: %5 = tail call i16 asm "suld.b.2d.b16.trap {$0}, [$1, {$2, $3}];", "=h,l,r,r"
+  __nv_tex_surf_handler("__isurf2Dread", &val, surf, x, y, cudaBoundaryModeTrap);
+  // CHECK: tail call void asm sideeffect "sust.b.2d.b16.trap [$0, {$1, $2}], {$3};", "l,r,r,h"
+  __nv_tex_surf_handler("__isurf2Dwrite_v2", &val, surf, x, y, cudaBoundaryModeTrap);
+
+  // CHECK: %6 = tail call i16 asm "suld.b.3d.b16.zero {$0}, [$1, {$2, $3, $4, $4}];", "=h,l,r,r,r"
+  __nv_tex_surf_handler("__isurf3Dread", &val, surf, x, y, z, cudaBoundaryModeZero);
+  // CHECK: tail call void asm sideeffect "sust.b.3d.b16.zero [$0, {$1, $2, $3, $3}], {$4};", "l,r,r,r,h"
+  __nv_tex_surf_handler("__isurf3Dwrite_v2", &val, surf, x, y, z, cudaBoundaryModeZero);
+  // CHECK: %7 = tail call i16 asm "suld.b.3d.b16.clamp {$0}, [$1, {$2, $3, $4, $4}];", "=h,l,r,r,r"
+  __nv_tex_surf_handler("__isurf3Dread", &val, surf, x, y, z, cudaBoundaryModeClamp);
+  // CHECK: tail call void asm sideeffect "sust.b.3d.b16.clamp [$0, {$1, $2, $3, $3}], {$4};", "l,r,r,r,h"
+  __nv_tex_surf_handler("__isurf3Dwrite_v2", &val, surf, x, y, z, cudaBoundaryModeClamp);
+  // CHECK: %8 = tail call i16 asm "suld.b.3d.b16.trap {$0}, [$1, {$2, $3, $4, $4}];", "=h,l,r,r,r"
+  __nv_tex_surf_handler("__isurf3Dread", &val, surf, x, y, z, cudaBoundaryModeTrap);
+  // CHECK: tail call void asm sideeffect "sust.b.3d.b16.trap [$0, {$1, $2, $3, $3}], {$4};", "l,r,r,r,h"
+  __nv_tex_surf_handler("__isurf3Dwrite_v2", &val, surf, x, y, z, cudaBoundaryModeTrap);
+
+  // CHECK: %9 = tail call i16 asm "suld.b.a1d.b16.zero {$0}, [$1, {$3, $2}];", "=h,l,r,r"
+  __nv_tex_surf_handler("__isurf1DLayeredread", &val, surf, x, layer, cudaBoundaryModeZero);
+  // CHECK: tail call void asm sideeffect "sust.b.a1d.b16.zero [$0, {$2, $1}], {$3};", "l,r,r,h"
+  __nv_tex_surf_handler("__isurf1DLayeredwrite_v2", &val, surf, x, layer, cudaBoundaryModeZero);
+  // CHECK: %10 = tail call i16 asm "suld.b.a1d.b16.clamp {$0}, [$1, {$3, $2}];", "=h,l,r,r"
+  __nv_tex_surf_handler("__isurf1DLayeredread", &val, surf, x, layer, cudaBoundaryModeClamp);
+  // CHECK: tail call void asm sideeffect "sust.b.a1d.b16.clamp [$0, {$2, $1}], {$3};", "l,r,r,h"
+  __nv_tex_surf_handler("__isurf1DLayeredwrite_v2", &val, surf, x, layer, cudaBoundaryModeClamp);
+  // CHECK: %11 = tail call i16 asm "suld.b.a1d.b16.trap {$0}, [$1, {$3, $2}];", "=h,l,r,r"
+  __nv_tex_surf_handler("__isurf1DLayeredread", &val, surf, x, layer, cudaBoundaryModeTrap);
+  // CHECK: tail call void asm sideeffect "sust.b.a1d.b16.trap [$0, {$2, $1}], {$3};", "l,r,r,h"
+  __nv_tex_surf_handler("__isurf1DLayeredwrite_v2", &val, surf, x, layer, cudaBoundaryModeTrap);
+
+  // CHECK: %12 = tail call i16 asm "suld.b.a2d.b16.zero {$0}, [$1, {$4, $2, $3, $3}];", "=h,l,r,r,r"
+  __nv_tex_surf_handler("__isurf2DLayeredread", &val, surf, x, y, layer, cudaBoundaryModeZero);
+  // CHECK: tail call void asm sideeffect "sust.b.a2d.b16.zero [$0, {$3, $1, $2, $2}], {$4};", "l,r,r,r,h"
+  __nv_tex_surf_handler("__isurf2DLayeredwrite_v2", &val, surf, x, y, layer, cudaBoundaryModeZero);
+  // CHECK: %13 = tail call i16 asm "suld.b.a2d.b16.clamp {$0}, [$1, {$4, $2, $3, $3}];", "=h,l,r,r,r"
+  __nv_tex_surf_handler("__isurf2DLayeredread", &val, surf, x, y, layer, cudaBoundaryModeClamp);
+  // CHECK: tail call void asm sideeffect "sust.b.a2d.b16.clamp [$0, {$3, $1, $2, $2}], {$4};", "l,r,r,r,h"
+  __nv_tex_surf_handler("__isurf2DLayeredwrite_v2", &val, surf, x, y, layer, cudaBoundaryModeClamp);
+  // CHECK: %14 = tail call i16 asm "suld.b.a2d.b16.trap {$0}, [$1, {$4, $2, $3, $3}];", "=h,l,r,r,r"
+  __nv_tex_surf_handler("__isurf2DLayeredread", &val, surf, x, y, layer, cudaBoundaryModeTrap);
+  // CHECK: tail call void asm sideeffect "sust.b.a2d.b16.trap [$0, {$3, $1, $2, $2}], {$4};", "l,r,r,r,h"
+  __nv_tex_surf_handler("__isurf2DLayeredwrite_v2", &val, surf, x, y, layer, cudaBoundaryModeTrap);
+
+  // CHECK: %15 = tail call i16 asm "suld.b.a2d.b16.zero {$0}, [$1, {$4, $2, $3, $3}];", "=h,l,r,r,r"
+  __nv_tex_surf_handler("__isurfCubemapread", &val, surf, x, y, face, cudaBoundaryModeZero);
+  // CHECK: tail call void asm sideeffect "sust.b.a2d.b16.zero [$0, {$3, $1, $2, $2}], {$4};", "l,r,r,r,h"
+  __nv_tex_surf_handler("__isurfCubemapwrite_v2", &val, surf, x, y, face, cudaBoundaryModeZero);
+  // CHECK: %16 = tail call i16 asm "suld.b.a2d.b16.clamp {$0}, [$1, {$4, $2, $3, $3}];", "=h,l,r,r,r"
+  __nv_tex_surf_handler("__isurfCubemapread", &val, surf, x, y, face, cudaBoundaryModeClamp);
+  // CHECK: tail call void asm sideeffect "sust.b.a2d.b16.clamp [$0, {$3, $1, $2, $2}], {$4};", "l,r,r,r,h"
+  __nv_tex_surf_handler("__isurfCubemapwrite_v2", &val, surf, x, y, face, cudaBoundaryModeClamp);
+  // CHECK: %17 = tail call i16 asm "suld.b.a2d.b16.trap {$0}, [$1, {$4, $2, $3, $3}];", "=h,l,r,r,r"
+  __nv_tex_surf_handler("__isurfCubemapread", &val, surf, x, y, face, cudaBoundaryModeTrap);
+  // CHECK: tail call void asm sideeffect "sust.b.a2d.b16.trap [$0, {$3, $1, $2, $2}], {$4};", "l,r,r,r,h"
+  __nv_tex_surf_handler("__isurfCubemapwrite_v2", &val, surf, x, y, face, cudaBoundaryModeTrap);
+
+  // CHECK: %18 = tail call i16 asm "suld.b.a2d.b16.zero {$0}, [$1, {$4, $2, $3, $3}];", "=h,l,r,r,r"
+  __nv_tex_surf_handler("__isurfCubemapLayeredread", &val, surf, x, y, layerface, cudaBoundaryModeZero);
+  // CHECK: tail call void asm sideeffect "sust.b.a2d.b16.zero [$0, {$3, $1, $2, $2}], {$4};", "l,r,r,r,h"
+  __nv_tex_surf_handler("__isurfCubemapLayeredwrite_v2", &val, surf, x, y, layerface, cudaBoundaryModeZero);
+  // CHECK: %19 = tail call i16 asm "suld.b.a2d.b16.clamp {$0}, [$1, {$4, $2, $3, $3}];", "=h,l,r,r,r"
+  __nv_tex_surf_handler("__isurfCubemapLayeredread", &val, surf, x, y, layerface, cudaBoundaryModeClamp);
+  // CHECK: tail call void asm sideeffect "sust.b.a2d.b16.clamp [$0, {$3, $1, $2, $2}], {$4};", "l,r,r,r,h"
+  __nv_tex_surf_handler("__isurfCubemapLayeredwrite_v2", &val, surf, x, y, layerface, cudaBoundaryModeClamp);
+  // CHECK: %20 = tail call i16 asm "suld.b.a2d.b16.trap {$0}, [$1, {$4, $2, $3, $3}];", "=h,l,r,r,r"
+  __nv_tex_surf_handler("__isurfCubemapLayeredread", &val, surf, x, y, layerface, cudaBoundaryModeTrap);
+  // CHECK: tail call void asm sideeffect "sust.b.a2d.b16.trap [$0, {$3, $1, $2, $2}], {$4};", "l,r,r,r,h"
+  __nv_tex_surf_handler("__isurfCubemapLayeredwrite_v2", &val, surf, x, y, layerface, cudaBoundaryModeTrap);
+}
+
+__device__ void surfshort1(cudaSurfaceObject_t surf, int x, int y, int z, int layer, int face, int layerface) {
+  short1 val;
+
+  // CHECK: %0 = tail call i16 asm "suld.b.1d.b16.zero {$0}, [$1, {$2}];", "=h,l,r"
+  __nv_tex_surf_handler("__isurf1Dread", &val, surf, x, cudaBoundaryModeZero);
+  // CHECK: tail call void asm sideeffect "sust.b.1d.b16.zero [$0, {$1}], {$2};", "l,r,h"
+  __nv_tex_surf_handler("__isurf1Dwrite_v2", &val, surf, x, cudaBoundaryModeZero);
+  // CHECK: %1 = tail call i16 asm "suld.b.1d.b16.clamp {$0}, [$1, {$2}];", "=h,l,r"
+  __nv_tex_surf_handler("__isurf1Dread", &val, surf, x, cudaBoundaryModeClamp);
+  // CHECK: tail call void asm sideeffect "sust.b.1d.b16.clamp [$0, {$1}], {$2};", "l,r,h"
+  __nv_tex_surf_handler("__isurf1Dwrite_v2", &val, surf, x, cudaBoundaryModeClamp);
+  // CHECK: %2 = tail call i16 asm "suld.b.1d.b16.trap {$0}, [$1, {$2}];", "=h,l,r"
+  __nv_tex_surf_handler("__isurf1Dread", &val, surf, x, cudaBoundaryModeTrap);
+  // CHECK: tail call void asm sideeffect "sust.b.1d.b16.trap [$0, {$1}], {$2};", "l,r,h"
+  __nv_tex_surf_handler("__isurf1Dwrite_v2", &val, surf, x, cudaBoundaryModeTrap);
+
+  // CHECK: %3 = tail call i16 asm "suld.b.2d.b16.zero {$0}, [$1, {$2, $3}];", "=h,l,r,r"
+  __nv_tex_surf_handler("__isurf2Dread", &val, surf, x, y, cudaBoundaryModeZero);
+  // CHECK: tail call void asm sideeffect "sust.b.2d.b16.zero [$0, {$1, $2}], {$3};", "l,r,r,h"
+  __nv_tex_surf_handler("__isurf2Dwrite_v2", &val, surf, x, y, cudaBoundaryModeZero);
+  // CHECK: %4 = tail call i16 asm "suld.b.2d.b16.clamp {$0}, [$1, {$2, $3}];", "=h,l,r,r"
+  __nv_tex_surf_handler("__isurf2Dread", &val, surf, x, y, cudaBoundaryModeClamp);
+  // CHECK: tail call void asm sideeffect "sust.b.2d.b16.clamp [$0, {$1, $2}], {$3};", "l,r,r,h"
+  __nv_tex_surf_handler("__isurf2Dwrite_v2", &val, surf, x, y, cudaBoundaryModeClamp);
+  // CHECK: %5 = tail call i16 asm "suld.b.2d.b16.trap {$0}, [$1, {$2, $3}];", "=h,l,r,r"
+  __nv_tex_surf_handler("__isurf2Dread", &val, surf, x, y, cudaBoundaryModeTrap);
+  // CHECK: tail call void asm sideeffect "sust.b.2d.b16.trap [$0, {$1, $2}], {$3};", "l,r,r,h"
+  __nv_tex_surf_handler("__isurf2Dwrite_v2", &val, surf, x, y, cudaBoundaryModeTrap);
+
+  // CHECK: %6 = tail call i16 asm "suld.b.3d.b16.zero {$0}, [$1, {$2, $3, $4, $4}];", "=h,l,r,r,r"
+  __nv_tex_surf_handler("__isurf3Dread", &val, surf, x, y, z, cudaBoundaryModeZero);
+  // CHECK: tail call void asm sideeffect "sust.b.3d.b16.zero [$0, {$1, $2, $3, $3}], {$4};", "l,r,r,r,h"
+  __nv_tex_surf_handler("__isurf3Dwrite_v2", &val, surf, x, y, z, cudaBoundaryModeZero);
+  // CHECK: %7 = tail call i16 asm "suld.b.3d.b16.clamp {$0}, [$1, {$2, $3, $4, $4}];", "=h,l,r,r,r"
+  __nv_tex_surf_handler("__isurf3Dread", &val, surf, x, y, z, cudaBoundaryModeClamp);
+  // CHECK: tail call void asm sideeffect "sust.b.3d.b16.clamp [$0, {$1, $2, $3, $3}], {$4};", "l,r,r,r,h"
+  __nv_tex_surf_handler("__isurf3Dwrite_v2", &val, surf, x, y, z, cudaBoundaryModeClamp);
+  // CHECK: %8 = tail call i16 asm "suld.b.3d.b16.trap {$0}, [$1, {$2, $3, $4, $4}];", "=h,l,r,r,r"
+  __nv_tex_surf_handler("__isurf3Dread", &val, surf, x, y, z, cudaBoundaryModeTrap);
+  // CHECK: tail call void asm sideeffect "sust.b.3d.b16.trap [$0, {$1, $2, $3, $3}], {$4};", "l,r,r,r,h"
+  __nv_tex_surf_handler("__isurf3Dwrite_v2", &val, surf, x, y, z, cudaBoundaryModeTrap);
+
+  // CHECK: %9 = tail call i16 asm "suld.b.a1d.b16.zero {$0}, [$1, {$3, $2}];", "=h,l,r,r"
+  __nv_tex_surf_handler("__isurf1DLayeredread", &val, surf, x, layer, cudaBoundaryModeZero);
+  // CHECK: tail call void asm sideeffect "sust.b.a1d.b16.zero [$0, {$2, $1}], {$3};", "l,r,r,h"
+  __nv_tex_surf_handler("__isurf1DLayeredwrite_v2", &val, surf, x, layer, cudaBoundaryModeZero);
+  // CHECK: %10 = tail call i16 asm "suld.b.a1d.b16.clamp {$0}, [$1, {$3, $2}];", "=h,l,r,r"
+  __nv_tex_surf_handler("__isurf1DLayeredread", &val, surf, x, layer, cudaBoundaryModeClamp);
+  // CHECK: tail call void asm sideeffect "sust.b.a1d.b16.clamp [$0, {$2, $1}], {$3};", "l,r,r,h"
+  __nv_tex_surf_handler("__isurf1DLayeredwrite_v2", &val, surf, x, layer, cudaBoundaryModeClamp);
+  // CHECK: %11 = tail call i16 asm "suld.b.a1d.b16.trap {$0}, [$1, {$3, $2}];", "=h,l,r,r"
+  __nv_tex_surf_handler("__isurf1DLayeredread", &val, surf, x, layer, cudaBoundaryModeTrap);
+  // CHECK: tail call void asm sideeffect "sust.b.a1d.b16.trap [$0, {$2, $1}], {$3};", "l,r,r,h"
+  __nv_tex_surf_handler("__isurf1DLayeredwrite_v2", &val, surf, x, layer, cudaBoundaryModeTrap);
+
+  // CHECK: %12 = tail call i16 asm "suld.b.a2d.b16.zero {$0}, [$1, {$4, $2, $3, $3}];", "=h,l,r,r,r"
+  __nv_tex_surf_handler("__isurf2DLayeredread", &val, surf, x, y, layer, cudaBoundaryModeZero);
+  // CHECK: tail call void asm sideeffect "sust.b.a2d.b16.zero [$0, {$3, $1, $2, $2}], {$4};", "l,r,r,r,h"
+  __nv_tex_surf_handler("__isurf2DLayeredwrite_v2", &val, surf, x, y, layer, cudaBoundaryModeZero);
+  // CHECK: %13 = tail call i16 asm "suld.b.a2d.b16.clamp {$0}, [$1, {$4, $2, $3, $3}];", "=h,l,r,r,r"
+  __nv_tex_surf_handler("__isurf2DLayeredread", &val, surf, x, y, layer, cudaBoundaryModeClamp);
+  // CHECK: tail call void asm sideeffect "sust.b.a2d.b16.clamp [$0, {$3, $1, $2, $2}], {$4};", "l,r,r,r,h"
+  __nv_tex_surf_handler("__isurf2DLayeredwrite_v2", &val, surf, x, y, layer, cudaBoundaryModeClamp);
+  // CHECK: %14 = tail call i16 asm "suld.b.a2d.b16.trap {$0}, [$1, {$4, $2, $3, $3}];", "=h,l,r,r,r"
+  __nv_tex_surf_handler("__isurf2DLayeredread", &val, surf, x, y, layer, cudaBoundaryModeTrap);
+  // CHECK: tail call void asm sideeffect "sust.b.a2d.b16.trap [$0, {$3, $1, $2, $2}], {$4};", "l,r,r,r,h"
+  __nv_tex_surf_handler("__isurf2DLayeredwrite_v2", &val, surf, x, y, layer, cudaBoundaryModeTrap);
+
+  // CHECK: %15 = tail call i16 asm "suld.b.a2d.b16.zero {$0}, [$1, {$4, $2, $3, $3}];", "=h,l,r,r,r"
+  __nv_tex_surf_handler("__isurfCubemapread", &val, surf, x, y, face, cudaBoundaryModeZero);
+  // CHECK: tail call void asm sideeffect "sust.b.a2d.b16.zero [$0, {$3, $1, $2, $2}], {$4};", "l,r,r,r,h"
+  __nv_tex_surf_handler("__isurfCubemapwrite_v2", &val, surf, x, y, face, cudaBoundaryModeZero);
+  // CHECK: %16 = tail call i16 asm "suld.b.a2d.b16.clamp {$0}, [$1, {$4, $2, $3, $3}];", "=h,l,r,r,r"
+  __nv_tex_surf_handler("__isurfCubemapread", &val, surf, x, y, face, cudaBoundaryModeClamp);
+  // CHECK: tail call void asm sideeffect "sust.b.a2d.b16.clamp [$0, {$3, $1, $2, $2}], {$4};", "l,r,r,r,h"
+  __nv_tex_surf_handler("__isurfCubemapwrite_v2", &val, surf, x, y, face, cudaBoundaryModeClamp);
+  // CHECK: %17 = tail call i16 asm "suld.b.a2d.b16.trap {$0}, [$1, {$4, $2, $3, $3}];", "=h,l,r,r,r"
+  __nv_tex_surf_handler("__isurfCubemapread", &val, surf, x, y, face, cudaBoundaryModeTrap);
+  // CHECK: tail call void asm sideeffect "sust.b.a2d.b16.trap [$0, {$3, $1, $2, $2}], {$4};", "l,r,r,r,h"
+  __nv_tex_surf_handler("__isurfCubemapwrite_v2", &val, surf, x, y, face, cudaBoundaryModeTrap);
+
+  // CHECK: %18 = tail call i16 asm "suld.b.a2d.b16.zero {$0}, [$1, {$4, $2, $3, $3}];", "=h,l,r,r,r"
+  __nv_tex_surf_handler("__isurfCubemapLayeredread", &val, surf, x, y, layerface, cudaBoundaryModeZero);
+  // CHECK: tail call void asm sideeffect "sust.b.a2d.b16.zero [$0, {$3, $1, $2, $2}], {$4};", "l,r,r,r,h"
+  __nv_tex_surf_handler("__isurfCubemapLayeredwrite_v2", &val, surf, x, y, layerface, cudaBoundaryModeZero);
+  // CHECK: %19 = tail call i16 asm "suld.b.a2d.b16.clamp {$0}, [$1, {$4, $2, $3, $3}];", "=h,l,r,r,r"
+  __nv_tex_surf_handler("__isurfCubemapLayeredread", &val, surf, x, y, layerface, cudaBoundaryModeClamp);
+  // CHECK: tail call void asm sideeffect "sust.b.a2d.b16.clamp [$0, {$3, $1, $2, $2}], {$4};", "l,r,r,r,h"
+  __nv_tex_surf_handler("__isurfCubemapLayeredwrite_v2", &val, surf, x, y, layerface, cudaBoundaryModeClamp);
+  // CHECK: %20 = tail call i16 asm "suld.b.a2d.b16.trap {$0}, [$1, {$4, $2, $3, $3}];", "=h,l,r,r,r"
+  __nv_tex_surf_handler("__isurfCubemapLayeredread", &val, surf, x, y, layerface, cudaBoundaryModeTrap);
+  // CHECK: tail call void asm sideeffect "sust.b.a2d.b16.trap [$0, {$3, $1, $2, $2}], {$4};", "l,r,r,r,h"
+  __nv_tex_surf_handler("__isurfCubemapLayeredwrite_v2", &val, surf, x, y, layerface, cudaBoundaryModeTrap);
+}
+
+__device__ void surfunsignedshort(cudaSurfaceObject_t surf, int x, int y, int z, int layer, int face, int layerface) {
+  unsigned short val;
+
+  // CHECK: %0 = tail call i16 asm "suld.b.1d.b16.zero {$0}, [$1, {$2}];", "=h,l,r"
+  __nv_tex_surf_handler("__isurf1Dread", &val, surf, x, cudaBoundaryModeZero);
+  // CHECK: tail call void asm sideeffect "sust.b.1d.b16.zero [$0, {$1}], {$2};", "l,r,h"
+  __nv_tex_surf_handler("__isurf1Dwrite_v2", &val, surf, x, cudaBoundaryModeZero);
+  // CHECK: %1 = tail call i16 asm "suld.b.1d.b16.clamp {$0}, [$1, {$2}];", "=h,l,r"
+  __nv_tex_surf_handler("__isurf1Dread", &val, surf, x, cudaBoundaryModeClamp);
+  // CHECK: tail call void asm sideeffect "sust.b.1d.b16.clamp [$0, {$1}], {$2};", "l,r,h"
+  __nv_tex_surf_handler("__isurf1Dwrite_v2", &val, surf, x, cudaBoundaryModeClamp);
+  // CHECK: %2 = tail call i16 asm "suld.b.1d.b16.trap {$0}, [$1, {$2}];", "=h,l,r"
+  __nv_tex_surf_handler("__isurf1Dread", &val, surf, x, cudaBoundaryModeTrap);
+  // CHECK: tail call void asm sideeffect "sust.b.1d.b16.trap [$0, {$1}], {$2};", "l,r,h"
+  __nv_tex_surf_handler("__isurf1Dwrite_v2", &val, surf, x, cudaBoundaryModeTrap);
+
+  // CHECK: %3 = tail call i16 asm "suld.b.2d.b16.zero {$0}, [$1, {$2, $3}];", "=h,l,r,r"
+  __nv_tex_surf_handler("__isurf2Dread", &val, surf, x, y, cudaBoundaryModeZero);
+  // CHECK: tail call void asm sideeffect "sust.b.2d.b16.zero [$0, {$1, $2}], {$3};", "l,r,r,h"
+  __nv_tex_surf_handler("__isurf2Dwrite_v2", &val, surf, x, y, cudaBoundaryModeZero);
+  // CHECK: %4 = tail call i16 asm "suld.b.2d.b16.clamp {$0}, [$1, {$2, $3}];", "=h,l,r,r"
+  __nv_tex_surf_handler("__isurf2Dread", &val, surf, x, y, cudaBoundaryModeClamp);
+  // CHECK: tail call void asm sideeffect "sust.b.2d.b16.clamp [$0, {$1, $2}], {$3};", "l,r,r,h"
+  __nv_tex_surf_handler("__isurf2Dwrite_v2", &val, surf, x, y, cudaBoundaryModeClamp);
+  // CHECK: %5 = tail call i16 asm "suld.b.2d.b16.trap {$0}, [$1, {$2, $3}];", "=h,l,r,r"
+  __nv_tex_surf_handler("__isurf2Dread", &val, surf, x, y, cudaBoundaryModeTrap);
+  // CHECK: tail call void asm sideeffect "sust.b.2d.b16.trap [$0, {$1, $2}], {$3};", "l,r,r,h"
+  __nv_tex_surf_handler("__isurf2Dwrite_v2", &val, surf, x, y, cudaBoundaryModeTrap);
+
+  // CHECK: %6 = tail call i16 asm "suld.b.3d.b16.zero {$0}, [$1, {$2, $3, $4, $4}];", "=h,l,r,r,r"
+  __nv_tex_surf_handler("__isurf3Dread", &val, surf, x, y, z, cudaBoundaryModeZero);
+  // CHECK: tail call void asm sideeffect "sust.b.3d.b16.zero [$0, {$1, $2, $3, $3}], {$4};", "l,r,r,r,h"
+  __nv_tex_surf_handler("__isurf3Dwrite_v2", &val, surf, x, y, z, cudaBoundaryModeZero);
+  // CHECK: %7 = tail call i16 asm "suld.b.3d.b16.clamp {$0}, [$1, {$2, $3, $4, $4}];", "=h,l,r,r,r"
+  __nv_tex_surf_handler("__isurf3Dread", &val, surf, x, y, z, cudaBoundaryModeClamp);
+  // CHECK: tail call void asm sideeffect "sust.b.3d.b16.clamp [$0, {$1, $2, $3, $3}], {$4};", "l,r,r,r,h"
+  __nv_tex_surf_handler("__isurf3Dwrite_v2", &val, surf, x, y, z, cudaBoundaryModeClamp);
+  // CHECK: %8 = tail call i16 asm "suld.b.3d.b16.trap {$0}, [$1, {$2, $3, $4, $4}];", "=h,l,r,r,r"
+  __nv_tex_surf_handler("__isurf3Dread", &val, surf, x, y, z, cudaBoundaryModeTrap);
+  // CHECK: tail call void asm sideeffect "sust.b.3d.b16.trap [$0, {$1, $2, $3, $3}], {$4};", "l,r,r,r,h"
+  __nv_tex_surf_handler("__isurf3Dwrite_v2", &val, surf, x, y, z, cudaBoundaryModeTrap);
+
+  // CHECK: %9 = tail call i16 asm "suld.b.a1d.b16.zero {$0}, [$1, {$3, $2}];", "=h,l,r,r"
+  __nv_tex_surf_handler("__isurf1DLayeredread", &val, surf, x, layer, cudaBoundaryModeZero);
+  // CHECK: tail call void asm sideeffect "sust.b.a1d.b16.zero [$0, {$2, $1}], {$3};", "l,r,r,h"
+  __nv_tex_surf_handler("__isurf1DLayeredwrite_v2", &val, surf, x, layer, cudaBoundaryModeZero);
+  // CHECK: %10 = tail call i16 asm "suld.b.a1d.b16.clamp {$0}, [$1, {$3, $2}];", "=h,l,r,r"
+  __nv_tex_surf_handler("__isurf1DLayeredread", &val, surf, x, layer, cudaBoundaryModeClamp);
+  // CHECK: tail call void asm sideeffect "sust.b.a1d.b16.clamp [$0, {$2, $1}], {$3};", "l,r,r,h"
+  __nv_tex_surf_handler("__isurf1DLayeredwrite_v2", &val, surf, x, layer, cudaBoundaryModeClamp);
+  // CHECK: %11 = tail call i16 asm "suld.b.a1d.b16.trap {$0}, [$1, {$3, $2}];", "=h,l,r,r"
+  __nv_tex_surf_handler("__isurf1DLayeredread", &val, surf, x, layer, cudaBoundaryModeTrap);
+  // CHECK: tail call void asm sideeffect "sust.b.a1d.b16.trap [$0, {$2, $1}], {$3};", "l,r,r,h"
+  __nv_tex_surf_handler("__isurf1DLayeredwrite_v2", &val, surf, x, layer, cudaBoundaryModeTrap);
+
+  // CHECK: %12 = tail call i16 asm "suld.b.a2d.b16.zero {$0}, [$1, {$4, $2, $3, $3}];", "=h,l,r,r,r"
+  __nv_tex_surf_handler("__isurf2DLayeredread", &val, surf, x, y, layer, cudaBoundaryModeZero);
+  // CHECK: tail call void asm sideeffect "sust.b.a2d.b16.zero [$0, {$3, $1, $2, $2}], {$4};", "l,r,r,r,h"
+  __nv_tex_surf_handler("__isurf2DLayeredwrite_v2", &val, surf, x, y, layer, cudaBoundaryModeZero);
+  // CHECK: %13 = tail call i16 asm "suld.b.a2d.b16.clamp {$0}, [$1, {$4, $2, $3, $3}];", "=h,l,r,r,r"
+  __nv_tex_surf_handler("__isurf2DLayeredread", &val, surf, x, y, layer, cudaBoundaryModeClamp);
+  // CHECK: tail call void asm sideeffect "sust.b.a2d.b16.clamp [$0, {$3, $1, $2, $2}], {$4};", "l,r,r,r,h"
+  __nv_tex_surf_handler("__isurf2DLayeredwrite_v2", &val, surf, x, y, layer, cudaBoundaryModeClamp);
+  // CHECK: %14 = tail call i16 asm "suld.b.a2d.b16.trap {$0}, [$1, {$4, $2, $3, $3}];", "=h,l,r,r,r"
+  __nv_tex_surf_handler("__isurf2DLayeredread", &val, surf, x, y, layer, cudaBoundaryModeTrap);
+  // CHECK: tail call void asm sideeffect "sust.b.a2d.b16.trap [$0, {$3, $1, $2, $2}], {$4};", "l,r,r,r,h"
+  __nv_tex_surf_handler("__isurf2DLayeredwrite_v2", &val, surf, x, y, layer, cudaBoundaryModeTrap);
+
+  // CHECK: %15 = tail call i16 asm "suld.b.a2d.b16.zero {$0}, [$1, {$4, $2, $3, $3}];", "=h,l,r,r,r"
+  __nv_tex_surf_handler("__isurfCubemapread", &val, surf, x, y, face, cudaBoundaryModeZero);
+  // CHECK: tail call void asm sideeffect "sust.b.a2d.b16.zero [$0, {$3, $1, $2, $2}], {$4};", "l,r,r,r,h"
+  __nv_tex_surf_handler("__isurfCubemapwrite_v2", &val, surf, x, y, face, cudaBoundaryModeZero);
+  // CHECK: %16 = tail call i16 asm "suld.b.a2d.b16.clamp {$0}, [$1, {$4, $2, $3, $3}];", "=h,l,r,r,r"
+  __nv_tex_surf_handler("__isurfCubemapread", &val, surf, x, y, face, cudaBoundaryModeClamp);
+  // CHECK: tail call void asm sideeffect "sust.b.a2d.b16.clamp [$0, {$3, $1, $2, $2}], {$4};", "l,r,r,r,h"
+  __nv_tex_surf_handler("__isurfCubemapwrite_v2", &val, surf, x, y, face, cudaBoundaryModeClamp);
+  // CHECK: %17 = tail call i16 asm "suld.b.a2d.b16.trap {$0}, [$1, {$4, $2, $3, $3}];", "=h,l,r,r,r"
+  __nv_tex_surf_handler("__isurfCubemapread", &val, surf, x, y, face, cudaBoundaryModeTrap);
+  // CHECK: tail call void asm sideeffect "sust.b.a2d.b16.trap [$0, {$3, $1, $2, $2}], {$4};", "l,r,r,r,h"
+  __nv_tex_surf_handler("__isurfCubemapwrite_v2", &val, surf, x, y, face, cudaBoundaryModeTrap);
+
+  // CHECK: %18 = tail call i16 asm "suld.b.a2d.b16.zero {$0}, [$1, {$4, $2, $3, $3}];", "=h,l,r,r,r"
+  __nv_tex_surf_handler("__isurfCubemapLayeredread", &val, surf, x, y, layerface, cudaBoundaryModeZero);
+  // CHECK: tail call void asm sideeffect "sust.b.a2d.b16.zero [$0, {$3, $1, $2, $2}], {$4};", "l,r,r,r,h"
+  __nv_tex_surf_handler("__isurfCubemapLayeredwrite_v2", &val, surf, x, y, layerface, cudaBoundaryModeZero);
+  // CHECK: %19 = tail call i16 asm "suld.b.a2d.b16.clamp {$0}, [$1, {$4, $2, $3, $3}];", "=h,l,r,r,r"
+  __nv_tex_surf_handler("__isurfCubemapLayeredread", &val, surf, x, y, layerface, cudaBoundaryModeClamp);
+  // CHECK: tail call void asm sideeffect "sust.b.a2d.b16.clamp [$0, {$3, $1, $2, $2}], {$4};", "l,r,r,r,h"
+  __nv_tex_surf_handler("__isurfCubemapLayeredwrite_v2", &val, surf, x, y, layerface, cudaBoundaryModeClamp);
+  // CHECK: %20 = tail call i16 asm "suld.b.a2d.b16.trap {$0}, [$1, {$4, $2, $3, $3}];", "=h,l,r,r,r"
+  __nv_tex_surf_handler("__isurfCubemapLayeredread", &val, surf, x, y, layerface, cudaBoundaryModeTrap);
+  // CHECK: tail call void asm sideeffect "sust.b.a2d.b16.trap [$0, {$3, $1, $2, $2}], {$4};", "l,r,r,r,h"
+  __nv_tex_surf_handler("__isurfCubemapLayeredwrite_v2", &val, surf, x, y, layerface, cudaBoundaryModeTrap);
+}
+
+__device__ void surfushort1(cudaSurfaceObject_t surf, int x, int y, int z, int layer, int face, int layerface) {
+  ushort1 val;
+
+  // CHECK: %0 = tail call i16 asm "suld.b.1d.b16.zero {$0}, [$1, {$2}];", "=h,l,r"
+  __nv_tex_surf_handler("__isurf1Dread", &val, surf, x, cudaBoundaryModeZero);
+  // CHECK: tail call void asm sideeffect "sust.b.1d.b16.zero [$0, {$1}], {$2};", "l,r,h"
+  __nv_tex_surf_handler("__isurf1Dwrite_v2", &val, surf, x, cudaBoundaryModeZero);
+  // CHECK: %1 = tail call i16 asm "suld.b.1d.b16.clamp {$0}, [$1, {$2}];", "=h,l,r"
+  __nv_tex_surf_handler("__isurf1Dread", &val, surf, x, cudaBoundaryModeClamp);
+  // CHECK: tail call void asm sideeffect "sust.b.1d.b16.clamp [$0, {$1}], {$2};", "l,r,h"
+  __nv_tex_surf_handler("__isurf1Dwrite_v2", &val, surf, x, cudaBoundaryModeClamp);
+  // CHECK: %2 = tail call i16 asm "suld.b.1d.b16.trap {$0}, [$1, {$2}];", "=h,l,r"
+  __nv_tex_surf_handler("__isurf1Dread", &val, surf, x, cudaBoundaryModeTrap);
+  // CHECK: tail call void asm sideeffect "sust.b.1d.b16.trap [$0, {$1}], {$2};", "l,r,h"
+  __nv_tex_surf_handler("__isurf1Dwrite_v2", &val, surf, x, cudaBoundaryModeTrap);
+
+  // CHECK: %3 = tail call i16 asm "suld.b.2d.b16.zero {$0}, [$1, {$2, $3}];", "=h,l,r,r"
+  __nv_tex_surf_handler("__isurf2Dread", &val, surf, x, y, cudaBoundaryModeZero);
+  // CHECK: tail call void asm sideeffect "sust.b.2d.b16.zero [$0, {$1, $2}], {$3};", "l,r,r,h"
+  __nv_tex_surf_handler("__isurf2Dwrite_v2", &val, surf, x, y, cudaBoundaryModeZero);
+  // CHECK: %4 = tail call i16 asm "suld.b.2d.b16.clamp {$0}, [$1, {$2, $3}];", "=h,l,r,r"
+  __nv_tex_surf_handler("__isurf2Dread", &val, surf, x, y, cudaBoundaryModeClamp);
+  // CHECK: tail call void asm sideeffect "sust.b.2d.b16.clamp [$0, {$1, $2}], {$3};", "l,r,r,h"
+  __nv_tex_surf_handler("__isurf2Dwrite_v2", &val, surf, x, y, cudaBoundaryModeClamp);
+  // CHECK: %5 = tail call i16 asm "suld.b.2d.b16.trap {$0}, [$1, {$2, $3}];", "=h,l,r,r"
+  __nv_tex_surf_handler("__isurf2Dread", &val, surf, x, y, cudaBoundaryModeTrap);
+  // CHECK: tail call void asm sideeffect "sust.b.2d.b16.trap [$0, {$1, $2}], {$3};", "l,r,r,h"
+  __nv_tex_surf_handler("__isurf2Dwrite_v2", &val, surf, x, y, cudaBoundaryModeTrap);
+
+  // CHECK: %6 = tail call i16 asm "suld.b.3d.b16.zero {$0}, [$1, {$2, $3, $4, $4}];", "=h,l,r,r,r"
+  __nv_tex_surf_handler("__isurf3Dread", &val, surf, x, y, z, cudaBoundaryModeZero);
+  // CHECK: tail call void asm sideeffect "sust.b.3d.b16.zero [$0, {$1, $2, $3, $3}], {$4};", "l,r,r,r,h"
+  __nv_tex_surf_handler("__isurf3Dwrite_v2", &val, surf, x, y, z, cudaBoundaryModeZero);
+  // CHECK: %7 = tail call i16 asm "suld.b.3d.b16.clamp {$0}, [$1, {$2, $3, $4, $4}];", "=h,l,r,r,r"
+  __nv_tex_surf_handler("__isurf3Dread", &val, surf, x, y, z, cudaBoundaryModeClamp);
+  // CHECK: tail call void asm sideeffect "sust.b.3d.b16.clamp [$0, {$1, $2, $3, $3}], {$4};", "l,r,r,r,h"
+  __nv_tex_surf_handler("__isurf3Dwrite_v2", &val, surf, x, y, z, cudaBoundaryModeClamp);
+  // CHECK: %8 = tail call i16 asm "suld.b.3d.b16.trap {$0}, [$1, {$2, $3, $4, $4}];", "=h,l,r,r,r"
+  __nv_tex_surf_handler("__isurf3Dread", &val, surf, x, y, z, cudaBoundaryModeTrap);
+  // CHECK: tail call void asm sideeffect "sust.b.3d.b16.trap [$0, {$1, $2, $3, $3}], {$4};", "l,r,r,r,h"
+  __nv_tex_surf_handler("__isurf3Dwrite_v2", &val, surf, x, y, z, cudaBoundaryModeTrap);
+
+  // CHECK: %9 = tail call i16 asm "suld.b.a1d.b16.zero {$0}, [$1, {$3, $2}];", "=h,l,r,r"
+  __nv_tex_surf_handler("__isurf1DLayeredread", &val, surf, x, layer, cudaBoundaryModeZero);
+  // CHECK: tail call void asm sideeffect "sust.b.a1d.b16.zero [$0, {$2, $1}], {$3};", "l,r,r,h"
+  __nv_tex_surf_handler("__isurf1DLayeredwrite_v2", &val, surf, x, layer, cudaBoundaryModeZero);
+  // CHECK: %10 = tail call i16 asm "suld.b.a1d.b16.clamp {$0}, [$1, {$3, $2}];", "=h,l,r,r"
+  __nv_tex_surf_handler("__isurf1DLayeredread", &val, surf, x, layer, cudaBoundaryModeClamp);
+  // CHECK: tail call void asm sideeffect "sust.b.a1d.b16.clamp [$0, {$2, $1}], {$3};", "l,r,r,h"
+  __nv_tex_surf_handler("__isurf1DLayeredwrite_v2", &val, surf, x, layer, cudaBoundaryModeClamp);
+  // CHECK: %11 = tail call i16 asm "suld.b.a1d.b16.trap {$0}, [$1, {$3, $2}];", "=h,l,r,r"
+  __nv_tex_surf_handler("__isurf1DLayeredread", &val, surf, x, layer, cudaBoundaryModeTrap);
+  // CHECK: tail call void asm sideeffect "sust.b.a1d.b16.trap [$0, {$2, $1}], {$3};", "l,r,r,h"
+  __nv_tex_surf_handler("__isurf1DLayeredwrite_v2", &val, surf, x, layer, cudaBoundaryModeTrap);
+
+  // CHECK: %12 = tail call i16 asm "suld.b.a2d.b16.zero {$0}, [$1, {$4, $2, $3, $3}];", "=h,l,r,r,r"
+  __nv_tex_surf_handler("__isurf2DLayeredread", &val, surf, x, y, layer, cudaBoundaryModeZero);
+  // CHECK: tail call void asm sideeffect "sust.b.a2d.b16.zero [$0, {$3, $1, $2, $2}], {$4};", "l,r,r,r,h"
+  __nv_tex_surf_handler("__isurf2DLayeredwrite_v2", &val, surf, x, y, layer, cudaBoundaryModeZero);
+  // CHECK: %13 = tail call i16 asm "suld.b.a2d.b16.clamp {$0}, [$1, {$4, $2, $3, $3}];", "=h,l,r,r,r"
+  __nv_tex_surf_handler("__isurf2DLayeredread", &val, surf, x, y, layer, cudaBoundaryModeClamp);
+  // CHECK: tail call void asm sideeffect "sust.b.a2d.b16.clamp [$0, {$3, $1, $2, $2}], {$4};", "l,r,r,r,h"
+  __nv_tex_surf_handler("__isurf2DLayeredwrite_v2", &val, surf, x, y, layer, cudaBoundaryModeClamp);
+  // CHECK: %14 = tail call i16 asm "suld.b.a2d.b16.trap {$0}, [$1, {$4, $2, $3, $3}];", "=h,l,r,r,r"
+  __nv_tex_surf_handler("__isurf2DLayeredread", &val, surf, x, y, layer, cudaBoundaryModeTrap);
+  // CHECK: tail call void asm sideeffect "sust.b.a2d.b16.trap [$0, {$3, $1, $2, $2}], {$4};", "l,r,r,r,h"
+  __nv_tex_surf_handler("__isurf2DLayeredwrite_v2", &val, surf, x, y, layer, cudaBoundaryModeTrap);
+
+  // CHECK: %15 = tail call i16 asm "suld.b.a2d.b16.zero {$0}, [$1, {$4, $2, $3, $3}];", "=h,l,r,r,r"
+  __nv_tex_surf_handler("__isurfCubemapread", &val, surf, x, y, face, cudaBoundaryModeZero);
+  // CHECK: tail call void asm sideeffect "sust.b.a2d.b16.zero [$0, {$3, $1, $2, $2}], {$4};", "l,r,r,r,h"
+  __nv_tex_surf_handler("__isurfCubemapwrite_v2", &val, surf, x, y, face, cudaBoundaryModeZero);
+  // CHECK: %16 = tail call i16 asm "suld.b.a2d.b16.clamp {$0}, [$1, {$4, $2, $3, $3}];", "=h,l,r,r,r"
+  __nv_tex_surf_handler("__isurfCubemapread", &val, surf, x, y, face, cudaBoundaryModeClamp);
+  // CHECK: tail call void asm sideeffect "sust.b.a2d.b16.clamp [$0, {$3, $1, $2, $2}], {$4};", "l,r,r,r,h"
+  __nv_tex_surf_handler("__isurfCubemapwrite_v2", &val, surf, x, y, face, cudaBoundaryModeClamp);
+  // CHECK: %17 = tail call i16 asm "suld.b.a2d.b16.trap {$0}, [$1, {$4, $2, $3, $3}];", "=h,l,r,r,r"
+  __nv_tex_surf_handler("__isurfCubemapread", &val, surf, x, y, face, cudaBoundaryModeTrap);
+  // CHECK: tail call void asm sideeffect "sust.b.a2d.b16.trap [$0, {$3, $1, $2, $2}], {$4};", "l,r,r,r,h"
+  __nv_tex_surf_handler("__isurfCubemapwrite_v2", &val, surf, x, y, face, cudaBoundaryModeTrap);
+
+  // CHECK: %18 = tail call i16 asm "suld.b.a2d.b16.zero {$0}, [$1, {$4, $2, $3, $3}];", "=h,l,r,r,r"
+  __nv_tex_surf_handler("__isurfCubemapLayeredread", &val, surf, x, y, layerface, cudaBoundaryModeZero);
+  // CHECK: tail call void asm sideeffect "sust.b.a2d.b16.zero [$0, {$3, $1, $2, $2}], {$4};", "l,r,r,r,h"
+  __nv_tex_surf_handler("__isurfCubemapLayeredwrite_v2", &val, surf, x, y, layerface, cudaBoundaryModeZero);
+  // CHECK: %19 = tail call i16 asm "suld.b.a2d.b16.clamp {$0}, [$1, {$4, $2, $3, $3}];", "=h,l,r,r,r"
+  __nv_tex_surf_handler("__isurfCubemapLayeredread", &val, surf, x, y, layerface, cudaBoundaryModeClamp);
+  // CHECK: tail call void asm sideeffect "sust.b.a2d.b16.clamp [$0, {$3, $1, $2, $2}], {$4};", "l,r,r,r,h"
+  __nv_tex_surf_handler("__isurfCubemapLayeredwrite_v2", &val, surf, x, y, layerface, cudaBoundaryModeClamp);
+  // CHECK: %20 = tail call i16 asm "suld.b.a2d.b16.trap {$0}, [$1, {$4, $2, $3, $3}];", "=h,l,r,r,r"
+  __nv_tex_surf_handler("__isurfCubemapLayeredread", &val, surf, x, y, layerface, cudaBoundaryModeTrap);
+  // CHECK: tail call void asm sideeffect "sust.b.a2d.b16.trap [$0, {$3, $1, $2, $2}], {$4};", "l,r,r,r,h"
+  __nv_tex_surf_handler("__isurfCubemapLayeredwrite_v2", &val, surf, x, y, layerface, cudaBoundaryModeTrap);
+}
+
+__device__ void surfint(cudaSurfaceObject_t surf, int x, int y, int z, int layer, int face, int layerface) {
+  int val;
+
+  // CHECK: %0 = tail call i32 asm "suld.b.1d.b32.zero {$0}, [$1, {$2}];", "=r,l,r"
+  __nv_tex_surf_handler("__isurf1Dread", &val, surf, x, cudaBoundaryModeZero);
+  // CHECK: tail call void asm sideeffect "sust.b.1d.b32.zero [$0, {$1}], {$2};", "l,r,r"
+  __nv_tex_surf_handler("__isurf1Dwrite_v2", &val, surf, x, cudaBoundaryModeZero);
+  // CHECK: %1 = tail call i32 asm "suld.b.1d.b32.clamp {$0}, [$1, {$2}];", "=r,l,r"
+  __nv_tex_surf_handler("__isurf1Dread", &val, surf, x, cudaBoundaryModeClamp);
+  // CHECK: tail call void asm sideeffect "sust.b.1d.b32.clamp [$0, {$1}], {$2};", "l,r,r"
+  __nv_tex_surf_handler("__isurf1Dwrite_v2", &val, surf, x, cudaBoundaryModeClamp);
+  // CHECK: %2 = tail call i32 asm "suld.b.1d.b32.trap {$0}, [$1, {$2}];", "=r,l,r"
+  __nv_tex_surf_handler("__isurf1Dread", &val, surf, x, cudaBoundaryModeTrap);
+  // CHECK: tail call void asm sideeffect "sust.b.1d.b32.trap [$0, {$1}], {$2};", "l,r,r"
+  __nv_tex_surf_handler("__isurf1Dwrite_v2", &val, surf, x, cudaBoundaryModeTrap);
+
+  // CHECK: %3 = tail call i32 asm "suld.b.2d.b32.zero {$0}, [$1, {$2, $3}];", "=r,l,r,r"
+  __nv_tex_surf_handler("__isurf2Dread", &val, surf, x, y, cudaBoundaryModeZero);
+  // CHECK: tail call void asm sideeffect "sust.b.2d.b32.zero [$0, {$1, $2}], {$3};", "l,r,r,r"
+  __nv_tex_surf_handler("__isurf2Dwrite_v2", &val, surf, x, y, cudaBoundaryModeZero);
+  // CHECK: %4 = tail call i32 asm "suld.b.2d.b32.clamp {$0}, [$1, {$2, $3}];", "=r,l,r,r"
+  __nv_tex_surf_handler("__isurf2Dread", &val, surf, x, y, cudaBoundaryModeClamp);
+  // CHECK: tail call void asm sideeffect "sust.b.2d.b32.clamp [$0, {$1, $2}], {$3};", "l,r,r,r"
+  __nv_tex_surf_handler("__isurf2Dwrite_v2", &val, surf, x, y, cudaBoundaryModeClamp);
+  // CHECK: %5 = tail call i32 asm "suld.b.2d.b32.trap {$0}, [$1, {$2, $3}];", "=r,l,r,r"
+  __nv_tex_surf_handler("__isurf2Dread", &val, surf, x, y, cudaBoundaryModeTrap);
+  // CHECK: tail call void asm sideeffect "sust.b.2d.b32.trap [$0, {$1, $2}], {$3};", "l,r,r,r"
+  __nv_tex_surf_handler("__isurf2Dwrite_v2", &val, surf, x, y, cudaBoundaryModeTrap);
+
+  // CHECK: %6 = tail call i32 asm "suld.b.3d.b32.zero {$0}, [$1, {$2, $3, $4, $4}];", "=r,l,r,r,r"
+  __nv_tex_surf_handler("__isurf3Dread", &val, surf, x, y, z, cudaBoundaryModeZero);
+  // CHECK: tail call void asm sideeffect "sust.b.3d.b32.zero [$0, {$1, $2, $3, $3}], {$4};", "l,r,r,r,r"
+  __nv_tex_surf_handler("__isurf3Dwrite_v2", &val, surf, x, y, z, cudaBoundaryModeZero);
+  // CHECK: %7 = tail call i32 asm "suld.b.3d.b32.clamp {$0}, [$1, {$2, $3, $4, $4}];", "=r,l,r,r,r"
+  __nv_tex_surf_handler("__isurf3Dread", &val, surf, x, y, z, cudaBoundaryModeClamp);
+  // CHECK: tail call void asm sideeffect "sust.b.3d.b32.clamp [$0, {$1, $2, $3, $3}], {$4};", "l,r,r,r,r"
+  __nv_tex_surf_handler("__isurf3Dwrite_v2", &val, surf, x, y, z, cudaBoundaryModeClamp);
+  // CHECK: %8 = tail call i32 asm "suld.b.3d.b32.trap {$0}, [$1, {$2, $3, $4, $4}];", "=r,l,r,r,r"
+  __nv_tex_surf_handler("__isurf3Dread", &val, surf, x, y, z, cudaBoundaryModeTrap);
+  // CHECK: tail call void asm sideeffect "sust.b.3d.b32.trap [$0, {$1, $2, $3, $3}], {$4};", "l,r,r,r,r"
+  __nv_tex_surf_handler("__isurf3Dwrite_v2", &val, surf, x, y, z, cudaBoundaryModeTrap);
+
+  // CHECK: %9 = tail call i32 asm "suld.b.a1d.b32.zero {$0}, [$1, {$3, $2}];", "=r,l,r,r"
+  __nv_tex_surf_handler("__isurf1DLayeredread", &val, surf, x, layer, cudaBoundaryModeZero);
+  // CHECK: tail call void asm sideeffect "sust.b.a1d.b32.zero [$0, {$2, $1}], {$3};", "l,r,r,r"
+  __nv_tex_surf_handler("__isurf1DLayeredwrite_v2", &val, surf, x, layer, cudaBoundaryModeZero);
+  // CHECK: %10 = tail call i32 asm "suld.b.a1d.b32.clamp {$0}, [$1, {$3, $2}];", "=r,l,r,r"
+  __nv_tex_surf_handler("__isurf1DLayeredread", &val, surf, x, layer, cudaBoundaryModeClamp);
+  // CHECK: tail call void asm sideeffect "sust.b.a1d.b32.clamp [$0, {$2, $1}], {$3};", "l,r,r,r"
+  __nv_tex_surf_handler("__isurf1DLayeredwrite_v2", &val, surf, x, layer, cudaBoundaryModeClamp);
+  // CHECK: %11 = tail call i32 asm "suld.b.a1d.b32.trap {$0}, [$1, {$3, $2}];", "=r,l,r,r"
+  __nv_tex_surf_handler("__isurf1DLayeredread", &val, surf, x, layer, cudaBoundaryModeTrap);
+  // CHECK: tail call void asm sideeffect "sust.b.a1d.b32.trap [$0, {$2, $1}], {$3};", "l,r,r,r"
+  __nv_tex_surf_handler("__isurf1DLayeredwrite_v2", &val, surf, x, layer, cudaBoundaryModeTrap);
+
+  // CHECK: %12 = tail call i32 asm "suld.b.a2d.b32.zero {$0}, [$1, {$4, $2, $3, $3}];", "=r,l,r,r,r"
+  __nv_tex_surf_handler("__isurf2DLayeredread", &val, surf, x, y, layer, cudaBoundaryModeZero);
+  // CHECK: tail call void asm sideeffect "sust.b.a2d.b32.zero [$0, {$3, $1, $2, $2}], {$4};", "l,r,r,r,r"
+  __nv_tex_surf_handler("__isurf2DLayeredwrite_v2", &val, surf, x, y, layer, cudaBoundaryModeZero);
+  // CHECK: %13 = tail call i32 asm "suld.b.a2d.b32.clamp {$0}, [$1, {$4, $2, $3, $3}];", "=r,l,r,r,r"
+  __nv_tex_surf_handler("__isurf2DLayeredread", &val, surf, x, y, layer, cudaBoundaryModeClamp);
+  // CHECK: tail call void asm sideeffect "sust.b.a2d.b32.clamp [$0, {$3, $1, $2, $2}], {$4};", "l,r,r,r,r"
+  __nv_tex_surf_handler("__isurf2DLayeredwrite_v2", &val, surf, x, y, layer, cudaBoundaryModeClamp);
+  // CHECK: %14 = tail call i32 asm "suld.b.a2d.b32.trap {$0}, [$1, {$4, $2, $3, $3}];", "=r,l,r,r,r"
+  __nv_tex_surf_handler("__isurf2DLayeredread", &val, surf, x, y, layer, cudaBoundaryModeTrap);
+  // CHECK: tail call void asm sideeffect "sust.b.a2d.b32.trap [$0, {$3, $1, $2, $2}], {$4};", "l,r,r,r,r"
+  __nv_tex_surf_handler("__isurf2DLayeredwrite_v2", &val, surf, x, y, layer, cudaBoundaryModeTrap);
+
+  // CHECK: %15 = tail call i32 asm "suld.b.a2d.b32.zero {$0}, [$1, {$4, $2, $3, $3}];", "=r,l,r,r,r"
+  __nv_tex_surf_handler("__isurfCubemapread", &val, surf, x, y, face, cudaBoundaryModeZero);
+  // CHECK: tail call void asm sideeffect "sust.b.a2d.b32.zero [$0, {$3, $1, $2, $2}], {$4};", "l,r,r,r,r"
+  __nv_tex_surf_handler("__isurfCubemapwrite_v2", &val, surf, x, y, face, cudaBoundaryModeZero);
+  // CHECK: %16 = tail call i32 asm "suld.b.a2d.b32.clamp {$0}, [$1, {$4, $2, $3, $3}];", "=r,l,r,r,r"
+  __nv_tex_surf_handler("__isurfCubemapread", &val, surf, x, y, face, cudaBoundaryModeClamp);
+  // CHECK: tail call void asm sideeffect "sust.b.a2d.b32.clamp [$0, {$3, $1, $2, $2}], {$4};", "l,r,r,r,r"
+  __nv_tex_surf_handler("__isurfCubemapwrite_v2", &val, surf, x, y, face, cudaBoundaryModeClamp);
+  // CHECK: %17 = tail call i32 asm "suld.b.a2d.b32.trap {$0}, [$1, {$4, $2, $3, $3}];", "=r,l,r,r,r"
+  __nv_tex_surf_handler("__isurfCubemapread", &val, surf, x, y, face, cudaBoundaryModeTrap);
+  // CHECK: tail call void asm sideeffect "sust.b.a2d.b32.trap [$0, {$3, $1, $2, $2}], {$4};", "l,r,r,r,r"
+  __nv_tex_surf_handler("__isurfCubemapwrite_v2", &val, surf, x, y, face, cudaBoundaryModeTrap);
+
+  // CHECK: %18 = tail call i32 asm "suld.b.a2d.b32.zero {$0}, [$1, {$4, $2, $3, $3}];", "=r,l,r,r,r"
+  __nv_tex_surf_handler("__isurfCubemapLayeredread", &val, surf, x, y, layerface, cudaBoundaryModeZero);
+  // CHECK: tail call void asm sideeffect "sust.b.a2d.b32.zero [$0, {$3, $1, $2, $2}], {$4};", "l,r,r,r,r"
+  __nv_tex_surf_handler("__isurfCubemapLayeredwrite_v2", &val, surf, x, y, layerface, cudaBoundaryModeZero);
+  // CHECK: %19 = tail call i32 asm "suld.b.a2d.b32.clamp {$0}, [$1, {$4, $2, $3, $3}];", "=r,l,r,r,r"
+  __nv_tex_surf_handler("__isurfCubemapLayeredread", &val, surf, x, y, layerface, cudaBoundaryModeClamp);
+  // CHECK: tail call void asm sideeffect "sust.b.a2d.b32.clamp [$0, {$3, $1, $2, $2}], {$4};", "l,r,r,r,r"
+  __nv_tex_surf_handler("__isurfCubemapLayeredwrite_v2", &val, surf, x, y, layerface, cudaBoundaryModeClamp);
+  // CHECK: %20 = tail call i32 asm "suld.b.a2d.b32.trap {$0}, [$1, {$4, $2, $3, $3}];", "=r,l,r,r,r"
+  __nv_tex_surf_handler("__isurfCubemapLayeredread", &val, surf, x, y, layerface, cudaBoundaryModeTrap);
+  // CHECK: tail call void asm sideeffect "sust.b.a2d.b32.trap [$0, {$3, $1, $2, $2}], {$4};", "l,r,r,r,r"
+  __nv_tex_surf_handler("__isurfCubemapLayeredwrite_v2", &val, surf, x, y, layerface, cudaBoundaryModeTrap);
+}
+
+__device__ void surfint1(cudaSurfaceObject_t surf, int x, int y, int z, int layer, int face, int layerface) {
+  int1 val;
+
+  // CHECK: %0 = tail call i32 asm "suld.b.1d.b32.zero {$0}, [$1, {$2}];", "=r,l,r"
+  __nv_tex_surf_handler("__isurf1Dread", &val, surf, x, cudaBoundaryModeZero);
+  // CHECK: tail call void asm sideeffect "sust.b.1d.b32.zero [$0, {$1}], {$2};", "l,r,r"
+  __nv_tex_surf_handler("__isurf1Dwrite_v2", &val, surf, x, cudaBoundaryModeZero);
+  // CHECK: %1 = tail call i32 asm "suld.b.1d.b32.clamp {$0}, [$1, {$2}];", "=r,l,r"
+  __nv_tex_surf_handler("__isurf1Dread", &val, surf, x, cudaBoundaryModeClamp);
+  // CHECK: tail call void asm sideeffect "sust.b.1d.b32.clamp [$0, {$1}], {$2};", "l,r,r"
+  __nv_tex_surf_handler("__isurf1Dwrite_v2", &val, surf, x, cudaBoundaryModeClamp);
+  // CHECK: %2 = tail call i32 asm "suld.b.1d.b32.trap {$0}, [$1, {$2}];", "=r,l,r"
+  __nv_tex_surf_handler("__isurf1Dread", &val, surf, x, cudaBoundaryModeTrap);
+  // CHECK: tail call void asm sideeffect "sust.b.1d.b32.trap [$0, {$1}], {$2};", "l,r,r"
+  __nv_tex_surf_handler("__isurf1Dwrite_v2", &val, surf, x, cudaBoundaryModeTrap);
+
+  // CHECK: %3 = tail call i32 asm "suld.b.2d.b32.zero {$0}, [$1, {$2, $3}];", "=r,l,r,r"
+  __nv_tex_surf_handler("__isurf2Dread", &val, surf, x, y, cudaBoundaryModeZero);
+  // CHECK: tail call void asm sideeffect "sust.b.2d.b32.zero [$0, {$1, $2}], {$3};", "l,r,r,r"
+  __nv_tex_surf_handler("__isurf2Dwrite_v2", &val, surf, x, y, cudaBoundaryModeZero);
+  // CHECK: %4 = tail call i32 asm "suld.b.2d.b32.clamp {$0}, [$1, {$2, $3}];", "=r,l,r,r"
+  __nv_tex_surf_handler("__isurf2Dread", &val, surf, x, y, cudaBoundaryModeClamp);
+  // CHECK: tail call void asm sideeffect "sust.b.2d.b32.clamp [$0, {$1, $2}], {$3};", "l,r,r,r"
+  __nv_tex_surf_handler("__isurf2Dwrite_v2", &val, surf, x, y, cudaBoundaryModeClamp);
+  // CHECK: %5 = tail call i32 asm "suld.b.2d.b32.trap {$0}, [$1, {$2, $3}];", "=r,l,r,r"
+  __nv_tex_surf_handler("__isurf2Dread", &val, surf, x, y, cudaBoundaryModeTrap);
+  // CHECK: tail call void asm sideeffect "sust.b.2d.b32.trap [$0, {$1, $2}], {$3};", "l,r,r,r"
+  __nv_tex_surf_handler("__isurf2Dwrite_v2", &val, surf, x, y, cudaBoundaryModeTrap);
+
+  // CHECK: %6 = tail call i32 asm "suld.b.3d.b32.zero {$0}, [$1, {$2, $3, $4, $4}];", "=r,l,r,r,r"
+  __nv_tex_surf_handler("__isurf3Dread", &val, surf, x, y, z, cudaBoundaryModeZero);
+  // CHECK: tail call void asm sideeffect "sust.b.3d.b32.zero [$0, {$1, $2, $3, $3}], {$4};", "l,r,r,r,r"
+  __nv_tex_surf_handler("__isurf3Dwrite_v2", &val, surf, x, y, z, cudaBoundaryModeZero);
+  // CHECK: %7 = tail call i32 asm "suld.b.3d.b32.clamp {$0}, [$1, {$2, $3, $4, $4}];", "=r,l,r,r,r"
+  __nv_tex_surf_handler("__isurf3Dread", &val, surf, x, y, z, cudaBoundaryModeClamp);
+  // CHECK: tail call void asm sideeffect "sust.b.3d.b32.clamp [$0, {$1, $2, $3, $3}], {$4};", "l,r,r,r,r"
+  __nv_tex_surf_handler("__isurf3Dwrite_v2", &val, surf, x, y, z, cudaBoundaryModeClamp);
+  // CHECK: %8 = tail call i32 asm "suld.b.3d.b32.trap {$0}, [$1, {$2, $3, $4, $4}];", "=r,l,r,r,r"
+  __nv_tex_surf_handler("__isurf3Dread", &val, surf, x, y, z, cudaBoundaryModeTrap);
+  // CHECK: tail call void asm sideeffect "sust.b.3d.b32.trap [$0, {$1, $2, $3, $3}], {$4};", "l,r,r,r,r"
+  __nv_tex_surf_handler("__isurf3Dwrite_v2", &val, surf, x, y, z, cudaBoundaryModeTrap);
+
+  // CHECK: %9 = tail call i32 asm "suld.b.a1d.b32.zero {$0}, [$1, {$3, $2}];", "=r,l,r,r"
+  __nv_tex_surf_handler("__isurf1DLayeredread", &val, surf, x, layer, cudaBoundaryModeZero);
+  // CHECK: tail call void asm sideeffect "sust.b.a1d.b32.zero [$0, {$2, $1}], {$3};", "l,r,r,r"
+  __nv_tex_surf_handler("__isurf1DLayeredwrite_v2", &val, surf, x, layer, cudaBoundaryModeZero);
+  // CHECK: %10 = tail call i32 asm "suld.b.a1d.b32.clamp {$0}, [$1, {$3, $2}];", "=r,l,r,r"
+  __nv_tex_surf_handler("__isurf1DLayeredread", &val, surf, x, layer, cudaBoundaryModeClamp);
+  // CHECK: tail call void asm sideeffect "sust.b.a1d.b32.clamp [$0, {$2, $1}], {$3};", "l,r,r,r"
+  __nv_tex_surf_handler("__isurf1DLayeredwrite_v2", &val, surf, x, layer, cudaBoundaryModeClamp);
+  // CHECK: %11 = tail call i32 asm "suld.b.a1d.b32.trap {$0}, [$1, {$3, $2}];", "=r,l,r,r"
+  __nv_tex_surf_handler("__isurf1DLayeredread", &val, surf, x, layer, cudaBoundaryModeTrap);
+  // CHECK: tail call void asm sideeffect "sust.b.a1d.b32.trap [$0, {$2, $1}], {$3};", "l,r,r,r"
+  __nv_tex_surf_handler("__isurf1DLayeredwrite_v2", &val, surf, x, layer, cudaBoundaryModeTrap);
+
+  // CHECK: %12 = tail call i32 asm "suld.b.a2d.b32.zero {$0}, [$1, {$4, $2, $3, $3}];", "=r,l,r,r,r"
+  __nv_tex_surf_handler("__isurf2DLayeredread", &val, surf, x, y, layer, cudaBoundaryModeZero);
+  // CHECK: tail call void asm sideeffect "sust.b.a2d.b32.zero [$0, {$3, $1, $2, $2}], {$4};", "l,r,r,r,r"
+  __nv_tex_surf_handler("__isurf2DLayeredwrite_v2", &val, surf, x, y, layer, cudaBoundaryModeZero);
+  // CHECK: %13 = tail call i32 asm "suld.b.a2d.b32.clamp {$0}, [$1, {$4, $2, $3, $3}];", "=r,l,r,r,r"
+  __nv_tex_surf_handler("__isurf2DLayeredread", &val, surf, x, y, layer, cudaBoundaryModeClamp);
+  // CHECK: tail call void asm sideeffect "sust.b.a2d.b32.clamp [$0, {$3, $1, $2, $2}], {$4};", "l,r,r,r,r"
+  __nv_tex_surf_handler("__isurf2DLayeredwrite_v2", &val, surf, x, y, layer, cudaBoundaryModeClamp);
+  // CHECK: %14 = tail call i32 asm "suld.b.a2d.b32.trap {$0}, [$1, {$4, $2, $3, $3}];", "=r,l,r,r,r"
+  __nv_tex_surf_handler("__isurf2DLayeredread", &val, surf, x, y, layer, cudaBoundaryModeTrap);
+  // CHECK: tail call void asm sideeffect "sust.b.a2d.b32.trap [$0, {$3, $1, $2, $2}], {$4};", "l,r,r,r,r"
+  __nv_tex_surf_handler("__isurf2DLayeredwrite_v2", &val, surf, x, y, layer, cudaBoundaryModeTrap);
+
+  // CHECK: %15 = tail call i32 asm "suld.b.a2d.b32.zero {$0}, [$1, {$4, $2, $3, $3}];", "=r,l,r,r,r"
+  __nv_tex_surf_handler("__isurfCubemapread", &val, surf, x, y, face, cudaBoundaryModeZero);
+  // CHECK: tail call void asm sideeffect "sust.b.a2d.b32.zero [$0, {$3, $1, $2, $2}], {$4};", "l,r,r,r,r"
+  __nv_tex_surf_handler("__isurfCubemapwrite_v2", &val, surf, x, y, face, cudaBoundaryModeZero);
+  // CHECK: %16 = tail call i32 asm "suld.b.a2d.b32.clamp {$0}, [$1, {$4, $2, $3, $3}];", "=r,l,r,r,r"
+  __nv_tex_surf_handler("__isurfCubemapread", &val, surf, x, y, face, cudaBoundaryModeClamp);
+  // CHECK: tail call void asm sideeffect "sust.b.a2d.b32.clamp [$0, {$3, $1, $2, $2}], {$4};", "l,r,r,r,r"
+  __nv_tex_surf_handler("__isurfCubemapwrite_v2", &val, surf, x, y, face, cudaBoundaryModeClamp);
+  // CHECK: %17 = tail call i32 asm "suld.b.a2d.b32.trap {$0}, [$1, {$4, $2, $3, $3}];", "=r,l,r,r,r"
+  __nv_tex_surf_handler("__isurfCubemapread", &val, surf, x, y, face, cudaBoundaryModeTrap);
+  // CHECK: tail call void asm sideeffect "sust.b.a2d.b32.trap [$0, {$3, $1, $2, $2}], {$4};", "l,r,r,r,r"
+  __nv_tex_surf_handler("__isurfCubemapwrite_v2", &val, surf, x, y, face, cudaBoundaryModeTrap);
+
+  // CHECK: %18 = tail call i32 asm "suld.b.a2d.b32.zero {$0}, [$1, {$4, $2, $3, $3}];", "=r,l,r,r,r"
+  __nv_tex_surf_handler("__isurfCubemapLayeredread", &val, surf, x, y, layerface, cudaBoundaryModeZero);
+  // CHECK: tail call void asm sideeffect "sust.b.a2d.b32.zero [$0, {$3, $1, $2, $2}], {$4};", "l,r,r,r,r"
+  __nv_tex_surf_handler("__isurfCubemapLayeredwrite_v2", &val, surf, x, y, layerface, cudaBoundaryModeZero);
+  // CHECK: %19 = tail call i32 asm "suld.b.a2d.b32.clamp {$0}, [$1, {$4, $2, $3, $3}];", "=r,l,r,r,r"
+  __nv_tex_surf_handler("__isurfCubemapLayeredread", &val, surf, x, y, layerface, cudaBoundaryModeClamp);
+  // CHECK: tail call void asm sideeffect "sust.b.a2d.b32.clamp [$0, {$3, $1, $2, $2}], {$4};", "l,r,r,r,r"
+  __nv_tex_surf_handler("__isurfCubemapLayeredwrite_v2", &val, surf, x, y, layerface, cudaBoundaryModeClamp);
+  // CHECK: %20 = tail call i32 asm "suld.b.a2d.b32.trap {$0}, [$1, {$4, $2, $3, $3}];", "=r,l,r,r,r"
+  __nv_tex_surf_handler("__isurfCubemapLayeredread", &val, surf, x, y, layerface, cudaBoundaryModeTrap);
+  // CHECK: tail call void asm sideeffect "sust.b.a2d.b32.trap [$0, {$3, $1, $2, $2}], {$4};", "l,r,r,r,r"
+  __nv_tex_surf_handler("__isurfCubemapLayeredwrite_v2", &val, surf, x, y, layerface, cudaBoundaryModeTrap);
+}
+
+__device__ void surfunsignedint(cudaSurfaceObject_t surf, int x, int y, int z, int layer, int face, int layerface) {
+  unsigned int val;
+
+  // CHECK: %0 = tail call i32 asm "suld.b.1d.b32.zero {$0}, [$1, {$2}];", "=r,l,r"
+  __nv_tex_surf_handler("__isurf1Dread", &val, surf, x, cudaBoundaryModeZero);
+  // CHECK: tail call void asm sideeffect "sust.b.1d.b32.zero [$0, {$1}], {$2};", "l,r,r"
+  __nv_tex_surf_handler("__isurf1Dwrite_v2", &val, surf, x, cudaBoundaryModeZero);
+  // CHECK: %1 = tail call i32 asm "suld.b.1d.b32.clamp {$0}, [$1, {$2}];", "=r,l,r"
+  __nv_tex_surf_handler("__isurf1Dread", &val, surf, x, cudaBoundaryModeClamp);
+  // CHECK: tail call void asm sideeffect "sust.b.1d.b32.clamp [$0, {$1}], {$2};", "l,r,r"
+  __nv_tex_surf_handler("__isurf1Dwrite_v2", &val, surf, x, cudaBoundaryModeClamp);
+  // CHECK: %2 = tail call i32 asm "suld.b.1d.b32.trap {$0}, [$1, {$2}];", "=r,l,r"
+  __nv_tex_surf_handler("__isurf1Dread", &val, surf, x, cudaBoundaryModeTrap);
+  // CHECK: tail call void asm sideeffect "sust.b.1d.b32.trap [$0, {$1}], {$2};", "l,r,r"
+  __nv_tex_surf_handler("__isurf1Dwrite_v2", &val, surf, x, cudaBoundaryModeTrap);
+
+  // CHECK: %3 = tail call i32 asm "suld.b.2d.b32.zero {$0}, [$1, {$2, $3}];", "=r,l,r,r"
+  __nv_tex_surf_handler("__isurf2Dread", &val, surf, x, y, cudaBoundaryModeZero);
+  // CHECK: tail call void asm sideeffect "sust.b.2d.b32.zero [$0, {$1, $2}], {$3};", "l,r,r,r"
+  __nv_tex_surf_handler("__isurf2Dwrite_v2", &val, surf, x, y, cudaBoundaryModeZero);
+  // CHECK: %4 = tail call i32 asm "suld.b.2d.b32.clamp {$0}, [$1, {$2, $3}];", "=r,l,r,r"
+  __nv_tex_surf_handler("__isurf2Dread", &val, surf, x, y, cudaBoundaryModeClamp);
+  // CHECK: tail call void asm sideeffect "sust.b.2d.b32.clamp [$0, {$1, $2}], {$3};", "l,r,r,r"
+  __nv_tex_surf_handler("__isurf2Dwrite_v2", &val, surf, x, y, cudaBoundaryModeClamp);
+  // CHECK: %5 = tail call i32 asm "suld.b.2d.b32.trap {$0}, [$1, {$2, $3}];", "=r,l,r,r"
+  __nv_tex_surf_handler("__isurf2Dread", &val, surf, x, y, cudaBoundaryModeTrap);
+  // CHECK: tail call void asm sideeffect "sust.b.2d.b32.trap [$0, {$1, $2}], {$3};", "l,r,r,r"
+  __nv_tex_surf_handler("__isurf2Dwrite_v2", &val, surf, x, y, cudaBoundaryModeTrap);
+
+  // CHECK: %6 = tail call i32 asm "suld.b.3d.b32.zero {$0}, [$1, {$2, $3, $4, $4}];", "=r,l,r,r,r"
+  __nv_tex_surf_handler("__isurf3Dread", &val, surf, x, y, z, cudaBoundaryModeZero);
+  // CHECK: tail call void asm sideeffect "sust.b.3d.b32.zero [$0, {$1, $2, $3, $3}], {$4};", "l,r,r,r,r"
+  __nv_tex_surf_handler("__isurf3Dwrite_v2", &val, surf, x, y, z, cudaBoundaryModeZero);
+  // CHECK: %7 = tail call i32 asm "suld.b.3d.b32.clamp {$0}, [$1, {$2, $3, $4, $4}];", "=r,l,r,r,r"
+  __nv_tex_surf_handler("__isurf3Dread", &val, surf, x, y, z, cudaBoundaryModeClamp);
+  // CHECK: tail call void asm sideeffect "sust.b.3d.b32.clamp [$0, {$1, $2, $3, $3}], {$4};", "l,r,r,r,r"
+  __nv_tex_surf_handler("__isurf3Dwrite_v2", &val, surf, x, y, z, cudaBoundaryModeClamp);
+  // CHECK: %8 = tail call i32 asm "suld.b.3d.b32.trap {$0}, [$1, {$2, $3, $4, $4}];", "=r,l,r,r,r"
+  __nv_tex_surf_handler("__isurf3Dread", &val, surf, x, y, z, cudaBoundaryModeTrap);
+  // CHECK: tail call void asm sideeffect "sust.b.3d.b32.trap [$0, {$1, $2, $3, $3}], {$4};", "l,r,r,r,r"
+  __nv_tex_surf_handler("__isurf3Dwrite_v2", &val, surf, x, y, z, cudaBoundaryModeTrap);
+
+  // CHECK: %9 = tail call i32 asm "suld.b.a1d.b32.zero {$0}, [$1, {$3, $2}];", "=r,l,r,r"
+  __nv_tex_surf_handler("__isurf1DLayeredread", &val, surf, x, layer, cudaBoundaryModeZero);
+  // CHECK: tail call void asm sideeffect "sust.b.a1d.b32.zero [$0, {$2, $1}], {$3};", "l,r,r,r"
+  __nv_tex_surf_handler("__isurf1DLayeredwrite_v2", &val, surf, x, layer, cudaBoundaryModeZero);
+  // CHECK: %10 = tail call i32 asm "suld.b.a1d.b32.clamp {$0}, [$1, {$3, $2}];", "=r,l,r,r"
+  __nv_tex_surf_handler("__isurf1DLayeredread", &val, surf, x, layer, cudaBoundaryModeClamp);
+  // CHECK: tail call void asm sideeffect "sust.b.a1d.b32.clamp [$0, {$2, $1}], {$3};", "l,r,r,r"
+  __nv_tex_surf_handler("__isurf1DLayeredwrite_v2", &val, surf, x, layer, cudaBoundaryModeClamp);
+  // CHECK: %11 = tail call i32 asm "suld.b.a1d.b32.trap {$0}, [$1, {$3, $2}];", "=r,l,r,r"
+  __nv_tex_surf_handler("__isurf1DLayeredread", &val, surf, x, layer, cudaBoundaryModeTrap);
+  // CHECK: tail call void asm sideeffect "sust.b.a1d.b32.trap [$0, {$2, $1}], {$3};", "l,r,r,r"
+  __nv_tex_surf_handler("__isurf1DLayeredwrite_v2", &val, surf, x, layer, cudaBoundaryModeTrap);
+
+  // CHECK: %12 = tail call i32 asm "suld.b.a2d.b32.zero {$0}, [$1, {$4, $2, $3, $3}];", "=r,l,r,r,r"
+  __nv_tex_surf_handler("__isurf2DLayeredread", &val, surf, x, y, layer, cudaBoundaryModeZero);
+  // CHECK: tail call void asm sideeffect "sust.b.a2d.b32.zero [$0, {$3, $1, $2, $2}], {$4};", "l,r,r,r,r"
+  __nv_tex_surf_handler("__isurf2DLayeredwrite_v2", &val, surf, x, y, layer, cudaBoundaryModeZero);
+  // CHECK: %13 = tail call i32 asm "suld.b.a2d.b32.clamp {$0}, [$1, {$4, $2, $3, $3}];", "=r,l,r,r,r"
+  __nv_tex_surf_handler("__isurf2DLayeredread", &val, surf, x, y, layer, cudaBoundaryModeClamp);
+  // CHECK: tail call void asm sideeffect "sust.b.a2d.b32.clamp [$0, {$3, $1, $2, $2}], {$4};", "l,r,r,r,r"
+  __nv_tex_surf_handler("__isurf2DLayeredwrite_v2", &val, surf, x, y, layer, cudaBoundaryModeClamp);
+  // CHECK: %14 = tail call i32 asm "suld.b.a2d.b32.trap {$0}, [$1, {$4, $2, $3, $3}];", "=r,l,r,r,r"
+  __nv_tex_surf_handler("__isurf2DLayeredread", &val, surf, x, y, layer, cudaBoundaryModeTrap);
+  // CHECK: tail call void asm sideeffect "sust.b.a2d.b32.trap [$0, {$3, $1, $2, $2}], {$4};", "l,r,r,r,r"
+  __nv_tex_surf_handler("__isurf2DLayeredwrite_v2", &val, surf, x, y, layer, cudaBoundaryModeTrap);
+
+  // CHECK: %15 = tail call i32 asm "suld.b.a2d.b32.zero {$0}, [$1, {$4, $2, $3, $3}];", "=r,l,r,r,r"
+  __nv_tex_surf_handler("__isurfCubemapread", &val, surf, x, y, face, cudaBoundaryModeZero);
+  // CHECK: tail call void asm sideeffect "sust.b.a2d.b32.zero [$0, {$3, $1, $2, $2}], {$4};", "l,r,r,r,r"
+  __nv_tex_surf_handler("__isurfCubemapwrite_v2", &val, surf, x, y, face, cudaBoundaryModeZero);
+  // CHECK: %16 = tail call i32 asm "suld.b.a2d.b32.clamp {$0}, [$1, {$4, $2, $3, $3}];", "=r,l,r,r,r"
+  __nv_tex_surf_handler("__isurfCubemapread", &val, surf, x, y, face, cudaBoundaryModeClamp);
+  // CHECK: tail call void asm sideeffect "sust.b.a2d.b32.clamp [$0, {$3, $1, $2, $2}], {$4};", "l,r,r,r,r"
+  __nv_tex_surf_handler("__isurfCubemapwrite_v2", &val, surf, x, y, face, cudaBoundaryModeClamp);
+  // CHECK: %17 = tail call i32 asm "suld.b.a2d.b32.trap {$0}, [$1, {$4, $2, $3, $3}];", "=r,l,r,r,r"
+  __nv_tex_surf_handler("__isurfCubemapread", &val, surf, x, y, face, cudaBoundaryModeTrap);
+  // CHECK: tail call void asm sideeffect "sust.b.a2d.b32.trap [$0, {$3, $1, $2, $2}], {$4};", "l,r,r,r,r"
+  __nv_tex_surf_handler("__isurfCubemapwrite_v2", &val, surf, x, y, face, cudaBoundaryModeTrap);
+
+  // CHECK: %18 = tail call i32 asm "suld.b.a2d.b32.zero {$0}, [$1, {$4, $2, $3, $3}];", "=r,l,r,r,r"
+  __nv_tex_surf_handler("__isurfCubemapLayeredread", &val, surf, x, y, layerface, cudaBoundaryModeZero);
+  // CHECK: tail call void asm sideeffect "sust.b.a2d.b32.zero [$0, {$3, $1, $2, $2}], {$4};", "l,r,r,r,r"
+  __nv_tex_surf_handler("__isurfCubemapLayeredwrite_v2", &val, surf, x, y, layerface, cudaBoundaryModeZero);
+  // CHECK: %19 = tail call i32 asm "suld.b.a2d.b32.clamp {$0}, [$1, {$4, $2, $3, $3}];", "=r,l,r,r,r"
+  __nv_tex_surf_handler("__isurfCubemapLayeredread", &val, surf, x, y, layerface, cudaBoundaryModeClamp);
+  // CHECK: tail call void asm sideeffect "sust.b.a2d.b32.clamp [$0, {$3, $1, $2, $2}], {$4};", "l,r,r,r,r"
+  __nv_tex_surf_handler("__isurfCubemapLayeredwrite_v2", &val, surf, x, y, layerface, cudaBoundaryModeClamp);
+  // CHECK: %20 = tail call i32 asm "suld.b.a2d.b32.trap {$0}, [$1, {$4, $2, $3, $3}];", "=r,l,r,r,r"
+  __nv_tex_surf_handler("__isurfCubemapLayeredread", &val, surf, x, y, layerface, cudaBoundaryModeTrap);
+  // CHECK: tail call void asm sideeffect "sust.b.a2d.b32.trap [$0, {$3, $1, $2, $2}], {$4};", "l,r,r,r,r"
+  __nv_tex_surf_handler("__isurfCubemapLayeredwrite_v2", &val, surf, x, y, layerface, cudaBoundaryModeTrap);
+}
+
+__device__ void surfuint1(cudaSurfaceObject_t surf, int x, int y, int z, int layer, int face, int layerface) {
+  uint1 val;
+
+  // CHECK: %0 = tail call i32 asm "suld.b.1d.b32.zero {$0}, [$1, {$2}];", "=r,l,r"
+  __nv_tex_surf_handler("__isurf1Dread", &val, surf, x, cudaBoundaryModeZero);
+  // CHECK: tail call void asm sideeffect "sust.b.1d.b32.zero [$0, {$1}], {$2};", "l,r,r"
+  __nv_tex_surf_handler("__isurf1Dwrite_v2", &val, surf, x, cudaBoundaryModeZero);
+  // CHECK: %1 = tail call i32 asm "suld.b.1d.b32.clamp {$0}, [$1, {$2}];", "=r,l,r"
+  __nv_tex_surf_handler("__isurf1Dread", &val, surf, x, cudaBoundaryModeClamp);
+  // CHECK: tail call void asm sideeffect "sust.b.1d.b32.clamp [$0, {$1}], {$2};", "l,r,r"
+  __nv_tex_surf_handler("__isurf1Dwrite_v2", &val, surf, x, cudaBoundaryModeClamp);
+  // CHECK: %2 = tail call i32 asm "suld.b.1d.b32.trap {$0}, [$1, {$2}];", "=r,l,r"
+  __nv_tex_surf_handler("__isurf1Dread", &val, surf, x, cudaBoundaryModeTrap);
+  // CHECK: tail call void asm sideeffect "sust.b.1d.b32.trap [$0, {$1}], {$2};", "l,r,r"
+  __nv_tex_surf_handler("__isurf1Dwrite_v2", &val, surf, x, cudaBoundaryModeTrap);
+
+  // CHECK: %3 = tail call i32 asm "suld.b.2d.b32.zero {$0}, [$1, {$2, $3}];", "=r,l,r,r"
+  __nv_tex_surf_handler("__isurf2Dread", &val, surf, x, y, cudaBoundaryModeZero);
+  // CHECK: tail call void asm sideeffect "sust.b.2d.b32.zero [$0, {$1, $2}], {$3};", "l,r,r,r"
+  __nv_tex_surf_handler("__isurf2Dwrite_v2", &val, surf, x, y, cudaBoundaryModeZero);
+  // CHECK: %4 = tail call i32 asm "suld.b.2d.b32.clamp {$0}, [$1, {$2, $3}];", "=r,l,r,r"
+  __nv_tex_surf_handler("__isurf2Dread", &val, surf, x, y, cudaBoundaryModeClamp);
+  // CHECK: tail call void asm sideeffect "sust.b.2d.b32.clamp [$0, {$1, $2}], {$3};", "l,r,r,r"
+  __nv_tex_surf_handler("__isurf2Dwrite_v2", &val, surf, x, y, cudaBoundaryModeClamp);
+  // CHECK: %5 = tail call i32 asm "suld.b.2d.b32.trap {$0}, [$1, {$2, $3}];", "=r,l,r,r"
+  __nv_tex_surf_handler("__isurf2Dread", &val, surf, x, y, cudaBoundaryModeTrap);
+  // CHECK: tail call void asm sideeffect "sust.b.2d.b32.trap [$0, {$1, $2}], {$3};", "l,r,r,r"
+  __nv_tex_surf_handler("__isurf2Dwrite_v2", &val, surf, x, y, cudaBoundaryModeTrap);
+
+  // CHECK: %6 = tail call i32 asm "suld.b.3d.b32.zero {$0}, [$1, {$2, $3, $4, $4}];", "=r,l,r,r,r"
+  __nv_tex_surf_handler("__isurf3Dread", &val, surf, x, y, z, cudaBoundaryModeZero);
+  // CHECK: tail call void asm sideeffect "sust.b.3d.b32.zero [$0, {$1, $2, $3, $3}], {$4};", "l,r,r,r,r"
+  __nv_tex_surf_handler("__isurf3Dwrite_v2", &val, surf, x, y, z, cudaBoundaryModeZero);
+  // CHECK: %7 = tail call i32 asm "suld.b.3d.b32.clamp {$0}, [$1, {$2, $3, $4, $4}];", "=r,l,r,r,r"
+  __nv_tex_surf_handler("__isurf3Dread", &val, surf, x, y, z, cudaBoundaryModeClamp);
+  // CHECK: tail call void asm sideeffect "sust.b.3d.b32.clamp [$0, {$1, $2, $3, $3}], {$4};", "l,r,r,r,r"
+  __nv_tex_surf_handler("__isurf3Dwrite_v2", &val, surf, x, y, z, cudaBoundaryModeClamp);
+  // CHECK: %8 = tail call i32 asm "suld.b.3d.b32.trap {$0}, [$1, {$2, $3, $4, $4}];", "=r,l,r,r,r"
+  __nv_tex_surf_handler("__isurf3Dread", &val, surf, x, y, z, cudaBoundaryModeTrap);
+  // CHECK: tail call void asm sideeffect "sust.b.3d.b32.trap [$0, {$1, $2, $3, $3}], {$4};", "l,r,r,r,r"
+  __nv_tex_surf_handler("__isurf3Dwrite_v2", &val, surf, x, y, z, cudaBoundaryModeTrap);
+
+  // CHECK: %9 = tail call i32 asm "suld.b.a1d.b32.zero {$0}, [$1, {$3, $2}];", "=r,l,r,r"
+  __nv_tex_surf_handler("__isurf1DLayeredread", &val, surf, x, layer, cudaBoundaryModeZero);
+  // CHECK: tail call void asm sideeffect "sust.b.a1d.b32.zero [$0, {$2, $1}], {$3};", "l,r,r,r"
+  __nv_tex_surf_handler("__isurf1DLayeredwrite_v2", &val, surf, x, layer, cudaBoundaryModeZero);
+  // CHECK: %10 = tail call i32 asm "suld.b.a1d.b32.clamp {$0}, [$1, {$3, $2}];", "=r,l,r,r"
+  __nv_tex_surf_handler("__isurf1DLayeredread", &val, surf, x, layer, cudaBoundaryModeClamp);
+  // CHECK: tail call void asm sideeffect "sust.b.a1d.b32.clamp [$0, {$2, $1}], {$3};", "l,r,r,r"
+  __nv_tex_surf_handler("__isurf1DLayeredwrite_v2", &val, surf, x, layer, cudaBoundaryModeClamp);
+  // CHECK: %11 = tail call i32 asm "suld.b.a1d.b32.trap {$0}, [$1, {$3, $2}];", "=r,l,r,r"
+  __nv_tex_surf_handler("__isurf1DLayeredread", &val, surf, x, layer, cudaBoundaryModeTrap);
+  // CHECK: tail call void asm sideeffect "sust.b.a1d.b32.trap [$0, {$2, $1}], {$3};", "l,r,r,r"
+  __nv_tex_surf_handler("__isurf1DLayeredwrite_v2", &val, surf, x, layer, cudaBoundaryModeTrap);
+
+  // CHECK: %12 = tail call i32 asm "suld.b.a2d.b32.zero {$0}, [$1, {$4, $2, $3, $3}];", "=r,l,r,r,r"
+  __nv_tex_surf_handler("__isurf2DLayeredread", &val, surf, x, y, layer, cudaBoundaryModeZero);
+  // CHECK: tail call void asm sideeffect "sust.b.a2d.b32.zero [$0, {$3, $1, $2, $2}], {$4};", "l,r,r,r,r"
+  __nv_tex_surf_handler("__isurf2DLayeredwrite_v2", &val, surf, x, y, layer, cudaBoundaryModeZero);
+  // CHECK: %13 = tail call i32 asm "suld.b.a2d.b32.clamp {$0}, [$1, {$4, $2, $3, $3}];", "=r,l,r,r,r"
+  __nv_tex_surf_handler("__isurf2DLayeredread", &val, surf, x, y, layer, cudaBoundaryModeClamp);
+  // CHECK: tail call void asm sideeffect "sust.b.a2d.b32.clamp [$0, {$3, $1, $2, $2}], {$4};", "l,r,r,r,r"
+  __nv_tex_surf_handler("__isurf2DLayeredwrite_v2", &val, surf, x, y, layer, cudaBoundaryModeClamp);
+  // CHECK: %14 = tail call i32 asm "suld.b.a2d.b32.trap {$0}, [$1, {$4, $2, $3, $3}];", "=r,l,r,r,r"
+  __nv_tex_surf_handler("__isurf2DLayeredread", &val, surf, x, y, layer, cudaBoundaryModeTrap);
+  // CHECK: tail call void asm sideeffect "sust.b.a2d.b32.trap [$0, {$3, $1, $2, $2}], {$4};", "l,r,r,r,r"
+  __nv_tex_surf_handler("__isurf2DLayeredwrite_v2", &val, surf, x, y, layer, cudaBoundaryModeTrap);
+
+  // CHECK: %15 = tail call i32 asm "suld.b.a2d.b32.zero {$0}, [$1, {$4, $2, $3, $3}];", "=r,l,r,r,r"
+  __nv_tex_surf_handler("__isurfCubemapread", &val, surf, x, y, face, cudaBoundaryModeZero);
+  // CHECK: tail call void asm sideeffect "sust.b.a2d.b32.zero [$0, {$3, $1, $2, $2}], {$4};", "l,r,r,r,r"
+  __nv_tex_surf_handler("__isurfCubemapwrite_v2", &val, surf, x, y, face, cudaBoundaryModeZero);
+  // CHECK: %16 = tail call i32 asm "suld.b.a2d.b32.clamp {$0}, [$1, {$4, $2, $3, $3}];", "=r,l,r,r,r"
+  __nv_tex_surf_handler("__isurfCubemapread", &val, surf, x, y, face, cudaBoundaryModeClamp);
+  // CHECK: tail call void asm sideeffect "sust.b.a2d.b32.clamp [$0, {$3, $1, $2, $2}], {$4};", "l,r,r,r,r"
+  __nv_tex_surf_handler("__isurfCubemapwrite_v2", &val, surf, x, y, face, cudaBoundaryModeClamp);
+  // CHECK: %17 = tail call i32 asm "suld.b.a2d.b32.trap {$0}, [$1, {$4, $2, $3, $3}];", "=r,l,r,r,r"
+  __nv_tex_surf_handler("__isurfCubemapread", &val, surf, x, y, face, cudaBoundaryModeTrap);
+  // CHECK: tail call void asm sideeffect "sust.b.a2d.b32.trap [$0, {$3, $1, $2, $2}], {$4};", "l,r,r,r,r"
+  __nv_tex_surf_handler("__isurfCubemapwrite_v2", &val, surf, x, y, face, cudaBoundaryModeTrap);
+
+  // CHECK: %18 = tail call i32 asm "suld.b.a2d.b32.zero {$0}, [$1, {$4, $2, $3, $3}];", "=r,l,r,r,r"
+  __nv_tex_surf_handler("__isurfCubemapLayeredread", &val, surf, x, y, layerface, cudaBoundaryModeZero);
+  // CHECK: tail call void asm sideeffect "sust.b.a2d.b32.zero [$0, {$3, $1, $2, $2}], {$4};", "l,r,r,r,r"
+  __nv_tex_surf_handler("__isurfCubemapLayeredwrite_v2", &val, surf, x, y, layerface, cudaBoundaryModeZero);
+  // CHECK: %19 = tail call i32 asm "suld.b.a2d.b32.clamp {$0}, [$1, {$4, $2, $3, $3}];", "=r,l,r,r,r"
+  __nv_tex_surf_handler("__isurfCubemapLayeredread", &val, surf, x, y, layerface, cudaBoundaryModeClamp);
+  // CHECK: tail call void asm sideeffect "sust.b.a2d.b32.clamp [$0, {$3, $1, $2, $2}], {$4};", "l,r,r,r,r"
+  __nv_tex_surf_handler("__isurfCubemapLayeredwrite_v2", &val, surf, x, y, layerface, cudaBoundaryModeClamp);
+  // CHECK: %20 = tail call i32 asm "suld.b.a2d.b32.trap {$0}, [$1, {$4, $2, $3, $3}];", "=r,l,r,r,r"
+  __nv_tex_surf_handler("__isurfCubemapLayeredread", &val, surf, x, y, layerface, cudaBoundaryModeTrap);
+  // CHECK: tail call void asm sideeffect "sust.b.a2d.b32.trap [$0, {$3, $1, $2, $2}], {$4};", "l,r,r,r,r"
+  __nv_tex_surf_handler("__isurfCubemapLayeredwrite_v2", &val, surf, x, y, layerface, cudaBoundaryModeTrap);
+}
+
+__device__ void surflonglong(cudaSurfaceObject_t surf, int x, int y, int z, int layer, int face, int layerface) {
+  long long val;
+
+  // CHECK: %0 = tail call i64 asm "suld.b.1d.b64.zero {$0}, [$1, {$2}];", "=l,l,r"
+  __nv_tex_surf_handler("__isurf1Dread", &val, surf, x, cudaBoundaryModeZero);
+  // CHECK: tail call void asm sideeffect "sust.b.1d.b64.zero [$0, {$1}], {$2};", "l,r,l"
+  __nv_tex_surf_handler("__isurf1Dwrite_v2", &val, surf, x, cudaBoundaryModeZero);
+  // CHECK: %1 = tail call i64 asm "suld.b.1d.b64.clamp {$0}, [$1, {$2}];", "=l,l,r"
+  __nv_tex_surf_handler("__isurf1Dread", &val, surf, x, cudaBoundaryModeClamp);
+  // CHECK: tail call void asm sideeffect "sust.b.1d.b64.clamp [$0, {$1}], {$2};", "l,r,l"
+  __nv_tex_surf_handler("__isurf1Dwrite_v2", &val, surf, x, cudaBoundaryModeClamp);
+  // CHECK: %2 = tail call i64 asm "suld.b.1d.b64.trap {$0}, [$1, {$2}];", "=l,l,r"
+  __nv_tex_surf_handler("__isurf1Dread", &val, surf, x, cudaBoundaryModeTrap);
+  // CHECK: tail call void asm sideeffect "sust.b.1d.b64.trap [$0, {$1}], {$2};", "l,r,l"
+  __nv_tex_surf_handler("__isurf1Dwrite_v2", &val, surf, x, cudaBoundaryModeTrap);
+
+  // CHECK: %3 = tail call i64 asm "suld.b.2d.b64.zero {$0}, [$1, {$2, $3}];", "=l,l,r,r"
+  __nv_tex_surf_handler("__isurf2Dread", &val, surf, x, y, cudaBoundaryModeZero);
+  // CHECK: tail call void asm sideeffect "sust.b.2d.b64.zero [$0, {$1, $2}], {$3};", "l,r,r,l"
+  __nv_tex_surf_handler("__isurf2Dwrite_v2", &val, surf, x, y, cudaBoundaryModeZero);
+  // CHECK: %4 = tail call i64 asm "suld.b.2d.b64.clamp {$0}, [$1, {$2, $3}];", "=l,l,r,r"
+  __nv_tex_surf_handler("__isurf2Dread", &val, surf, x, y, cudaBoundaryModeClamp);
+  // CHECK: tail call void asm sideeffect "sust.b.2d.b64.clamp [$0, {$1, $2}], {$3};", "l,r,r,l"
+  __nv_tex_surf_handler("__isurf2Dwrite_v2", &val, surf, x, y, cudaBoundaryModeClamp);
+  // CHECK: %5 = tail call i64 asm "suld.b.2d.b64.trap {$0}, [$1, {$2, $3}];", "=l,l,r,r"
+  __nv_tex_surf_handler("__isurf2Dread", &val, surf, x, y, cudaBoundaryModeTrap);
+  // CHECK: tail call void asm sideeffect "sust.b.2d.b64.trap [$0, {$1, $2}], {$3};", "l,r,r,l"
+  __nv_tex_surf_handler("__isurf2Dwrite_v2", &val, surf, x, y, cudaBoundaryModeTrap);
+
+  // CHECK: %6 = tail call i64 asm "suld.b.3d.b64.zero {$0}, [$1, {$2, $3, $4, $4}];", "=l,l,r,r,r"
+  __nv_tex_surf_handler("__isurf3Dread", &val, surf, x, y, z, cudaBoundaryModeZero);
+  // CHECK: tail call void asm sideeffect "sust.b.3d.b64.zero [$0, {$1, $2, $3, $3}], {$4};", "l,r,r,r,l"
+  __nv_tex_surf_handler("__isurf3Dwrite_v2", &val, surf, x, y, z, cudaBoundaryModeZero);
+  // CHECK: %7 = tail call i64 asm "suld.b.3d.b64.clamp {$0}, [$1, {$2, $3, $4, $4}];", "=l,l,r,r,r"
+  __nv_tex_surf_handler("__isurf3Dread", &val, surf, x, y, z, cudaBoundaryModeClamp);
+  // CHECK: tail call void asm sideeffect "sust.b.3d.b64.clamp [$0, {$1, $2, $3, $3}], {$4};", "l,r,r,r,l"
+  __nv_tex_surf_handler("__isurf3Dwrite_v2", &val, surf, x, y, z, cudaBoundaryModeClamp);
+  // CHECK: %8 = tail call i64 asm "suld.b.3d.b64.trap {$0}, [$1, {$2, $3, $4, $4}];", "=l,l,r,r,r"
+  __nv_tex_surf_handler("__isurf3Dread", &val, surf, x, y, z, cudaBoundaryModeTrap);
+  // CHECK: tail call void asm sideeffect "sust.b.3d.b64.trap [$0, {$1, $2, $3, $3}], {$4};", "l,r,r,r,l"
+  __nv_tex_surf_handler("__isurf3Dwrite_v2", &val, surf, x, y, z, cudaBoundaryModeTrap);
+
+  // CHECK: %9 = tail call i64 asm "suld.b.a1d.b64.zero {$0}, [$1, {$3, $2}];", "=l,l,r,r"
+  __nv_tex_surf_handler("__isurf1DLayeredread", &val, surf, x, layer, cudaBoundaryModeZero);
+  // CHECK: tail call void asm sideeffect "sust.b.a1d.b64.zero [$0, {$2, $1}], {$3};", "l,r,r,l"
+  __nv_tex_surf_handler("__isurf1DLayeredwrite_v2", &val, surf, x, layer, cudaBoundaryModeZero);
+  // CHECK: %10 = tail call i64 asm "suld.b.a1d.b64.clamp {$0}, [$1, {$3, $2}];", "=l,l,r,r"
+  __nv_tex_surf_handler("__isurf1DLayeredread", &val, surf, x, layer, cudaBoundaryModeClamp);
+  // CHECK: tail call void asm sideeffect "sust.b.a1d.b64.clamp [$0, {$2, $1}], {$3};", "l,r,r,l"
+  __nv_tex_surf_handler("__isurf1DLayeredwrite_v2", &val, surf, x, layer, cudaBoundaryModeClamp);
+  // CHECK: %11 = tail call i64 asm "suld.b.a1d.b64.trap {$0}, [$1, {$3, $2}];", "=l,l,r,r"
+  __nv_tex_surf_handler("__isurf1DLayeredread", &val, surf, x, layer, cudaBoundaryModeTrap);
+  // CHECK: tail call void asm sideeffect "sust.b.a1d.b64.trap [$0, {$2, $1}], {$3};", "l,r,r,l"
+  __nv_tex_surf_handler("__isurf1DLayeredwrite_v2", &val, surf, x, layer, cudaBoundaryModeTrap);
+
+  // CHECK: %12 = tail call i64 asm "suld.b.a2d.b64.zero {$0}, [$1, {$4, $2, $3, $3}];", "=l,l,r,r,r"
+  __nv_tex_surf_handler("__isurf2DLayeredread", &val, surf, x, y, layer, cudaBoundaryModeZero);
+  // CHECK: tail call void asm sideeffect "sust.b.a2d.b64.zero [$0, {$3, $1, $2, $2}], {$4};", "l,r,r,r,l"
+  __nv_tex_surf_handler("__isurf2DLayeredwrite_v2", &val, surf, x, y, layer, cudaBoundaryModeZero);
+  // CHECK: %13 = tail call i64 asm "suld.b.a2d.b64.clamp {$0}, [$1, {$4, $2, $3, $3}];", "=l,l,r,r,r"
+  __nv_tex_surf_handler("__isurf2DLayeredread", &val, surf, x, y, layer, cudaBoundaryModeClamp);
+  // CHECK: tail call void asm sideeffect "sust.b.a2d.b64.clamp [$0, {$3, $1, $2, $2}], {$4};", "l,r,r,r,l"
+  __nv_tex_surf_handler("__isurf2DLayeredwrite_v2", &val, surf, x, y, layer, cudaBoundaryModeClamp);
+  // CHECK: %14 = tail call i64 asm "suld.b.a2d.b64.trap {$0}, [$1, {$4, $2, $3, $3}];", "=l,l,r,r,r"
+  __nv_tex_surf_handler("__isurf2DLayeredread", &val, surf, x, y, layer, cudaBoundaryModeTrap);
+  // CHECK: tail call void asm sideeffect "sust.b.a2d.b64.trap [$0, {$3, $1, $2, $2}], {$4};", "l,r,r,r,l"
+  __nv_tex_surf_handler("__isurf2DLayeredwrite_v2", &val, surf, x, y, layer, cudaBoundaryModeTrap);
+
+  // CHECK: %15 = tail call i64 asm "suld.b.a2d.b64.zero {$0}, [$1, {$4, $2, $3, $3}];", "=l,l,r,r,r"
+  __nv_tex_surf_handler("__isurfCubemapread", &val, surf, x, y, face, cudaBoundaryModeZero);
+  // CHECK: tail call void asm sideeffect "sust.b.a2d.b64.zero [$0, {$3, $1, $2, $2}], {$4};", "l,r,r,r,l"
+  __nv_tex_surf_handler("__isurfCubemapwrite_v2", &val, surf, x, y, face, cudaBoundaryModeZero);
+  // CHECK: %16 = tail call i64 asm "suld.b.a2d.b64.clamp {$0}, [$1, {$4, $2, $3, $3}];", "=l,l,r,r,r"
+  __nv_tex_surf_handler("__isurfCubemapread", &val, surf, x, y, face, cudaBoundaryModeClamp);
+  // CHECK: tail call void asm sideeffect "sust.b.a2d.b64.clamp [$0, {$3, $1, $2, $2}], {$4};", "l,r,r,r,l"
+  __nv_tex_surf_handler("__isurfCubemapwrite_v2", &val, surf, x, y, face, cudaBoundaryModeClamp);
+  // CHECK: %17 = tail call i64 asm "suld.b.a2d.b64.trap {$0}, [$1, {$4, $2, $3, $3}];", "=l,l,r,r,r"
+  __nv_tex_surf_handler("__isurfCubemapread", &val, surf, x, y, face, cudaBoundaryModeTrap);
+  // CHECK: tail call void asm sideeffect "sust.b.a2d.b64.trap [$0, {$3, $1, $2, $2}], {$4};", "l,r,r,r,l"
+  __nv_tex_surf_handler("__isurfCubemapwrite_v2", &val, surf, x, y, face, cudaBoundaryModeTrap);
+
+  // CHECK: %18 = tail call i64 asm "suld.b.a2d.b64.zero {$0}, [$1, {$4, $2, $3, $3}];", "=l,l,r,r,r"
+  __nv_tex_surf_handler("__isurfCubemapLayeredread", &val, surf, x, y, layerface, cudaBoundaryModeZero);
+  // CHECK: tail call void asm sideeffect "sust.b.a2d.b64.zero [$0, {$3, $1, $2, $2}], {$4};", "l,r,r,r,l"
+  __nv_tex_surf_handler("__isurfCubemapLayeredwrite_v2", &val, surf, x, y, layerface, cudaBoundaryModeZero);
+  // CHECK: %19 = tail call i64 asm "suld.b.a2d.b64.clamp {$0}, [$1, {$4, $2, $3, $3}];", "=l,l,r,r,r"
+  __nv_tex_surf_handler("__isurfCubemapLayeredread", &val, surf, x, y, layerface, cudaBoundaryModeClamp);
+  // CHECK: tail call void asm sideeffect "sust.b.a2d.b64.clamp [$0, {$3, $1, $2, $2}], {$4};", "l,r,r,r,l"
+  __nv_tex_surf_handler("__isurfCubemapLayeredwrite_v2", &val, surf, x, y, layerface, cudaBoundaryModeClamp);
+  // CHECK: %20 = tail call i64 asm "suld.b.a2d.b64.trap {$0}, [$1, {$4, $2, $3, $3}];", "=l,l,r,r,r"
+  __nv_tex_surf_handler("__isurfCubemapLayeredread", &val, surf, x, y, layerface, cudaBoundaryModeTrap);
+  // CHECK: tail call void asm sideeffect "sust.b.a2d.b64.trap [$0, {$3, $1, $2, $2}], {$4};", "l,r,r,r,l"
+  __nv_tex_surf_handler("__isurfCubemapLayeredwrite_v2", &val, surf, x, y, layerface, cudaBoundaryModeTrap);
+}
+
+__device__ void surflonglong1(cudaSurfaceObject_t surf, int x, int y, int z, int layer, int face, int layerface) {
+  longlong1 val;
+
+  // CHECK: %0 = tail call i64 asm "suld.b.1d.b64.zero {$0}, [$1, {$2}];", "=l,l,r"
+  __nv_tex_surf_handler("__isurf1Dread", &val, surf, x, cudaBoundaryModeZero);
+  // CHECK: tail call void asm sideeffect "sust.b.1d.b64.zero [$0, {$1}], {$2};", "l,r,l"
+  __nv_tex_surf_handler("__isurf1Dwrite_v2", &val, surf, x, cudaBoundaryModeZero);
+  // CHECK: %1 = tail call i64 asm "suld.b.1d.b64.clamp {$0}, [$1, {$2}];", "=l,l,r"
+  __nv_tex_surf_handler("__isurf1Dread", &val, surf, x, cudaBoundaryModeClamp);
+  // CHECK: tail call void asm sideeffect "sust.b.1d.b64.clamp [$0, {$1}], {$2};", "l,r,l"
+  __nv_tex_surf_handler("__isurf1Dwrite_v2", &val, surf, x, cudaBoundaryModeClamp);
+  // CHECK: %2 = tail call i64 asm "suld.b.1d.b64.trap {$0}, [$1, {$2}];", "=l,l,r"
+  __nv_tex_surf_handler("__isurf1Dread", &val, surf, x, cudaBoundaryModeTrap);
+  // CHECK: tail call void asm sideeffect "sust.b.1d.b64.trap [$0, {$1}], {$2};", "l,r,l"
+  __nv_tex_surf_handler("__isurf1Dwrite_v2", &val, surf, x, cudaBoundaryModeTrap);
+
+  // CHECK: %3 = tail call i64 asm "suld.b.2d.b64.zero {$0}, [$1, {$2, $3}];", "=l,l,r,r"
+  __nv_tex_surf_handler("__isurf2Dread", &val, surf, x, y, cudaBoundaryModeZero);
+  // CHECK: tail call void asm sideeffect "sust.b.2d.b64.zero [$0, {$1, $2}], {$3};", "l,r,r,l"
+  __nv_tex_surf_handler("__isurf2Dwrite_v2", &val, surf, x, y, cudaBoundaryModeZero);
+  // CHECK: %4 = tail call i64 asm "suld.b.2d.b64.clamp {$0}, [$1, {$2, $3}];", "=l,l,r,r"
+  __nv_tex_surf_handler("__isurf2Dread", &val, surf, x, y, cudaBoundaryModeClamp);
+  // CHECK: tail call void asm sideeffect "sust.b.2d.b64.clamp [$0, {$1, $2}], {$3};", "l,r,r,l"
+  __nv_tex_surf_handler("__isurf2Dwrite_v2", &val, surf, x, y, cudaBoundaryModeClamp);
+  // CHECK: %5 = tail call i64 asm "suld.b.2d.b64.trap {$0}, [$1, {$2, $3}];", "=l,l,r,r"
+  __nv_tex_surf_handler("__isurf2Dread", &val, surf, x, y, cudaBoundaryModeTrap);
+  // CHECK: tail call void asm sideeffect "sust.b.2d.b64.trap [$0, {$1, $2}], {$3};", "l,r,r,l"
+  __nv_tex_surf_handler("__isurf2Dwrite_v2", &val, surf, x, y, cudaBoundaryModeTrap);
+
+  // CHECK: %6 = tail call i64 asm "suld.b.3d.b64.zero {$0}, [$1, {$2, $3, $4, $4}];", "=l,l,r,r,r"
+  __nv_tex_surf_handler("__isurf3Dread", &val, surf, x, y, z, cudaBoundaryModeZero);
+  // CHECK: tail call void asm sideeffect "sust.b.3d.b64.zero [$0, {$1, $2, $3, $3}], {$4};", "l,r,r,r,l"
+  __nv_tex_surf_handler("__isurf3Dwrite_v2", &val, surf, x, y, z, cudaBoundaryModeZero);
+  // CHECK: %7 = tail call i64 asm "suld.b.3d.b64.clamp {$0}, [$1, {$2, $3, $4, $4}];", "=l,l,r,r,r"
+  __nv_tex_surf_handler("__isurf3Dread", &val, surf, x, y, z, cudaBoundaryModeClamp);
+  // CHECK: tail call void asm sideeffect "sust.b.3d.b64.clamp [$0, {$1, $2, $3, $3}], {$4};", "l,r,r,r,l"
+  __nv_tex_surf_handler("__isurf3Dwrite_v2", &val, surf, x, y, z, cudaBoundaryModeClamp);
+  // CHECK: %8 = tail call i64 asm "suld.b.3d.b64.trap {$0}, [$1, {$2, $3, $4, $4}];", "=l,l,r,r,r"
+  __nv_tex_surf_handler("__isurf3Dread", &val, surf, x, y, z, cudaBoundaryModeTrap);
+  // CHECK: tail call void asm sideeffect "sust.b.3d.b64.trap [$0, {$1, $2, $3, $3}], {$4};", "l,r,r,r,l"
+  __nv_tex_surf_handler("__isurf3Dwrite_v2", &val, surf, x, y, z, cudaBoundaryModeTrap);
+
+  // CHECK: %9 = tail call i64 asm "suld.b.a1d.b64.zero {$0}, [$1, {$3, $2}];", "=l,l,r,r"
+  __nv_tex_surf_handler("__isurf1DLayeredread", &val, surf, x, layer, cudaBoundaryModeZero);
+  // CHECK: tail call void asm sideeffect "sust.b.a1d.b64.zero [$0, {$2, $1}], {$3};", "l,r,r,l"
+  __nv_tex_surf_handler("__isurf1DLayeredwrite_v2", &val, surf, x, layer, cudaBoundaryModeZero);
+  // CHECK: %10 = tail call i64 asm "suld.b.a1d.b64.clamp {$0}, [$1, {$3, $2}];", "=l,l,r,r"
+  __nv_tex_surf_handler("__isurf1DLayeredread", &val, surf, x, layer, cudaBoundaryModeClamp);
+  // CHECK: tail call void asm sideeffect "sust.b.a1d.b64.clamp [$0, {$2, $1}], {$3};", "l,r,r,l"
+  __nv_tex_surf_handler("__isurf1DLayeredwrite_v2", &val, surf, x, layer, cudaBoundaryModeClamp);
+  // CHECK: %11 = tail call i64 asm "suld.b.a1d.b64.trap {$0}, [$1, {$3, $2}];", "=l,l,r,r"
+  __nv_tex_surf_handler("__isurf1DLayeredread", &val, surf, x, layer, cudaBoundaryModeTrap);
+  // CHECK: tail call void asm sideeffect "sust.b.a1d.b64.trap [$0, {$2, $1}], {$3};", "l,r,r,l"
+  __nv_tex_surf_handler("__isurf1DLayeredwrite_v2", &val, surf, x, layer, cudaBoundaryModeTrap);
+
+  // CHECK: %12 = tail call i64 asm "suld.b.a2d.b64.zero {$0}, [$1, {$4, $2, $3, $3}];", "=l,l,r,r,r"
+  __nv_tex_surf_handler("__isurf2DLayeredread", &val, surf, x, y, layer, cudaBoundaryModeZero);
+  // CHECK: tail call void asm sideeffect "sust.b.a2d.b64.zero [$0, {$3, $1, $2, $2}], {$4};", "l,r,r,r,l"
+  __nv_tex_surf_handler("__isurf2DLayeredwrite_v2", &val, surf, x, y, layer, cudaBoundaryModeZero);
+  // CHECK: %13 = tail call i64 asm "suld.b.a2d.b64.clamp {$0}, [$1, {$4, $2, $3, $3}];", "=l,l,r,r,r"
+  __nv_tex_surf_handler("__isurf2DLayeredread", &val, surf, x, y, layer, cudaBoundaryModeClamp);
+  // CHECK: tail call void asm sideeffect "sust.b.a2d.b64.clamp [$0, {$3, $1, $2, $2}], {$4};", "l,r,r,r,l"
+  __nv_tex_surf_handler("__isurf2DLayeredwrite_v2", &val, surf, x, y, layer, cudaBoundaryModeClamp);
+  // CHECK: %14 = tail call i64 asm "suld.b.a2d.b64.trap {$0}, [$1, {$4, $2, $3, $3}];", "=l,l,r,r,r"
+  __nv_tex_surf_handler("__isurf2DLayeredread", &val, surf, x, y, layer, cudaBoundaryModeTrap);
+  // CHECK: tail call void asm sideeffect "sust.b.a2d.b64.trap [$0, {$3, $1, $2, $2}], {$4};", "l,r,r,r,l"
+  __nv_tex_surf_handler("__isurf2DLayeredwrite_v2", &val, surf, x, y, layer, cudaBoundaryModeTrap);
+
+  // CHECK: %15 = tail call i64 asm "suld.b.a2d.b64.zero {$0}, [$1, {$4, $2, $3, $3}];", "=l,l,r,r,r"
+  __nv_tex_surf_handler("__isurfCubemapread", &val, surf, x, y, face, cudaBoundaryModeZero);
+  // CHECK: tail call void asm sideeffect "sust.b.a2d.b64.zero [$0, {$3, $1, $2, $2}], {$4};", "l,r,r,r,l"
+  __nv_tex_surf_handler("__isurfCubemapwrite_v2", &val, surf, x, y, face, cudaBoundaryModeZero);
+  // CHECK: %16 = tail call i64 asm "suld.b.a2d.b64.clamp {$0}, [$1, {$4, $2, $3, $3}];", "=l,l,r,r,r"
+  __nv_tex_surf_handler("__isurfCubemapread", &val, surf, x, y, face, cudaBoundaryModeClamp);
+  // CHECK: tail call void asm sideeffect "sust.b.a2d.b64.clamp [$0, {$3, $1, $2, $2}], {$4};", "l,r,r,r,l"
+  __nv_tex_surf_handler("__isurfCubemapwrite_v2", &val, surf, x, y, face, cudaBoundaryModeClamp);
+  // CHECK: %17 = tail call i64 asm "suld.b.a2d.b64.trap {$0}, [$1, {$4, $2, $3, $3}];", "=l,l,r,r,r"
+  __nv_tex_surf_handler("__isurfCubemapread", &val, surf, x, y, face, cudaBoundaryModeTrap);
+  // CHECK: tail call void asm sideeffect "sust.b.a2d.b64.trap [$0, {$3, $1, $2, $2}], {$4};", "l,r,r,r,l"
+  __nv_tex_surf_handler("__isurfCubemapwrite_v2", &val, surf, x, y, face, cudaBoundaryModeTrap);
+
+  // CHECK: %18 = tail call i64 asm "suld.b.a2d.b64.zero {$0}, [$1, {$4, $2, $3, $3}];", "=l,l,r,r,r"
+  __nv_tex_surf_handler("__isurfCubemapLayeredread", &val, surf, x, y, layerface, cudaBoundaryModeZero);
+  // CHECK: tail call void asm sideeffect "sust.b.a2d.b64.zero [$0, {$3, $1, $2, $2}], {$4};", "l,r,r,r,l"
+  __nv_tex_surf_handler("__isurfCubemapLayeredwrite_v2", &val, surf, x, y, layerface, cudaBoundaryModeZero);
+  // CHECK: %19 = tail call i64 asm "suld.b.a2d.b64.clamp {$0}, [$1, {$4, $2, $3, $3}];", "=l,l,r,r,r"
+  __nv_tex_surf_handler("__isurfCubemapLayeredread", &val, surf, x, y, layerface, cudaBoundaryModeClamp);
+  // CHECK: tail call void asm sideeffect "sust.b.a2d.b64.clamp [$0, {$3, $1, $2, $2}], {$4};", "l,r,r,r,l"
+  __nv_tex_surf_handler("__isurfCubemapLayeredwrite_v2", &val, surf, x, y, layerface, cudaBoundaryModeClamp);
+  // CHECK: %20 = tail call i64 asm "suld.b.a2d.b64.trap {$0}, [$1, {$4, $2, $3, $3}];", "=l,l,r,r,r"
+  __nv_tex_surf_handler("__isurfCubemapLayeredread", &val, surf, x, y, layerface, cudaBoundaryModeTrap);
+  // CHECK: tail call void asm sideeffect "sust.b.a2d.b64.trap [$0, {$3, $1, $2, $2}], {$4};", "l,r,r,r,l"
+  __nv_tex_surf_handler("__isurfCubemapLayeredwrite_v2", &val, surf, x, y, layerface, cudaBoundaryModeTrap);
+}
+
+__device__ void surfunsignedlonglong(cudaSurfaceObject_t surf, int x, int y, int z, int layer, int face, int layerface) {
+  unsigned long long val;
+
+  // CHECK: %0 = tail call i64 asm "suld.b.1d.b64.zero {$0}, [$1, {$2}];", "=l,l,r"
+  __nv_tex_surf_handler("__isurf1Dread", &val, surf, x, cudaBoundaryModeZero);
+  // CHECK: tail call void asm sideeffect "sust.b.1d.b64.zero [$0, {$1}], {$2};", "l,r,l"
+  __nv_tex_surf_handler("__isurf1Dwrite_v2", &val, surf, x, cudaBoundaryModeZero);
+  // CHECK: %1 = tail call i64 asm "suld.b.1d.b64.clamp {$0}, [$1, {$2}];", "=l,l,r"
+  __nv_tex_surf_handler("__isurf1Dread", &val, surf, x, cudaBoundaryModeClamp);
+  // CHECK: tail call void asm sideeffect "sust.b.1d.b64.clamp [$0, {$1}], {$2};", "l,r,l"
+  __nv_tex_surf_handler("__isurf1Dwrite_v2", &val, surf, x, cudaBoundaryModeClamp);
+  // CHECK: %2 = tail call i64 asm "suld.b.1d.b64.trap {$0}, [$1, {$2}];", "=l,l,r"
+  __nv_tex_surf_handler("__isurf1Dread", &val, surf, x, cudaBoundaryModeTrap);
+  // CHECK: tail call void asm sideeffect "sust.b.1d.b64.trap [$0, {$1}], {$2};", "l,r,l"
+  __nv_tex_surf_handler("__isurf1Dwrite_v2", &val, surf, x, cudaBoundaryModeTrap);
+
+  // CHECK: %3 = tail call i64 asm "suld.b.2d.b64.zero {$0}, [$1, {$2, $3}];", "=l,l,r,r"
+  __nv_tex_surf_handler("__isurf2Dread", &val, surf, x, y, cudaBoundaryModeZero);
+  // CHECK: tail call void asm sideeffect "sust.b.2d.b64.zero [$0, {$1, $2}], {$3};", "l,r,r,l"
+  __nv_tex_surf_handler("__isurf2Dwrite_v2", &val, surf, x, y, cudaBoundaryModeZero);
+  // CHECK: %4 = tail call i64 asm "suld.b.2d.b64.clamp {$0}, [$1, {$2, $3}];", "=l,l,r,r"
+  __nv_tex_surf_handler("__isurf2Dread", &val, surf, x, y, cudaBoundaryModeClamp);
+  // CHECK: tail call void asm sideeffect "sust.b.2d.b64.clamp [$0, {$1, $2}], {$3};", "l,r,r,l"
+  __nv_tex_surf_handler("__isurf2Dwrite_v2", &val, surf, x, y, cudaBoundaryModeClamp);
+  // CHECK: %5 = tail call i64 asm "suld.b.2d.b64.trap {$0}, [$1, {$2, $3}];", "=l,l,r,r"
+  __nv_tex_surf_handler("__isurf2Dread", &val, surf, x, y, cudaBoundaryModeTrap);
+  // CHECK: tail call void asm sideeffect "sust.b.2d.b64.trap [$0, {$1, $2}], {$3};", "l,r,r,l"
+  __nv_tex_surf_handler("__isurf2Dwrite_v2", &val, surf, x, y, cudaBoundaryModeTrap);
+
+  // CHECK: %6 = tail call i64 asm "suld.b.3d.b64.zero {$0}, [$1, {$2, $3, $4, $4}];", "=l,l,r,r,r"
+  __nv_tex_surf_handler("__isurf3Dread", &val, surf, x, y, z, cudaBoundaryModeZero);
+  // CHECK: tail call void asm sideeffect "sust.b.3d.b64.zero [$0, {$1, $2, $3, $3}], {$4};", "l,r,r,r,l"
+  __nv_tex_surf_handler("__isurf3Dwrite_v2", &val, surf, x, y, z, cudaBoundaryModeZero);
+  // CHECK: %7 = tail call i64 asm "suld.b.3d.b64.clamp {$0}, [$1, {$2, $3, $4, $4}];", "=l,l,r,r,r"
+  __nv_tex_surf_handler("__isurf3Dread", &val, surf, x, y, z, cudaBoundaryModeClamp);
+  // CHECK: tail call void asm sideeffect "sust.b.3d.b64.clamp [$0, {$1, $2, $3, $3}], {$4};", "l,r,r,r,l"
+  __nv_tex_surf_handler("__isurf3Dwrite_v2", &val, surf, x, y, z, cudaBoundaryModeClamp);
+  // CHECK: %8 = tail call i64 asm "suld.b.3d.b64.trap {$0}, [$1, {$2, $3, $4, $4}];", "=l,l,r,r,r"
+  __nv_tex_surf_handler("__isurf3Dread", &val, surf, x, y, z, cudaBoundaryModeTrap);
+  // CHECK: tail call void asm sideeffect "sust.b.3d.b64.trap [$0, {$1, $2, $3, $3}], {$4};", "l,r,r,r,l"
+  __nv_tex_surf_handler("__isurf3Dwrite_v2", &val, surf, x, y, z, cudaBoundaryModeTrap);
+
+  // CHECK: %9 = tail call i64 asm "suld.b.a1d.b64.zero {$0}, [$1, {$3, $2}];", "=l,l,r,r"
+  __nv_tex_surf_handler("__isurf1DLayeredread", &val, surf, x, layer, cudaBoundaryModeZero);
+  // CHECK: tail call void asm sideeffect "sust.b.a1d.b64.zero [$0, {$2, $1}], {$3};", "l,r,r,l"
+  __nv_tex_surf_handler("__isurf1DLayeredwrite_v2", &val, surf, x, layer, cudaBoundaryModeZero);
+  // CHECK: %10 = tail call i64 asm "suld.b.a1d.b64.clamp {$0}, [$1, {$3, $2}];", "=l,l,r,r"
+  __nv_tex_surf_handler("__isurf1DLayeredread", &val, surf, x, layer, cudaBoundaryModeClamp);
+  // CHECK: tail call void asm sideeffect "sust.b.a1d.b64.clamp [$0, {$2, $1}], {$3};", "l,r,r,l"
+  __nv_tex_surf_handler("__isurf1DLayeredwrite_v2", &val, surf, x, layer, cudaBoundaryModeClamp);
+  // CHECK: %11 = tail call i64 asm "suld.b.a1d.b64.trap {$0}, [$1, {$3, $2}];", "=l,l,r,r"
+  __nv_tex_surf_handler("__isurf1DLayeredread", &val, surf, x, layer, cudaBoundaryModeTrap);
+  // CHECK: tail call void asm sideeffect "sust.b.a1d.b64.trap [$0, {$2, $1}], {$3};", "l,r,r,l"
+  __nv_tex_surf_handler("__isurf1DLayeredwrite_v2", &val, surf, x, layer, cudaBoundaryModeTrap);
+
+  // CHECK: %12 = tail call i64 asm "suld.b.a2d.b64.zero {$0}, [$1, {$4, $2, $3, $3}];", "=l,l,r,r,r"
+  __nv_tex_surf_handler("__isurf2DLayeredread", &val, surf, x, y, layer, cudaBoundaryModeZero);
+  // CHECK: tail call void asm sideeffect "sust.b.a2d.b64.zero [$0, {$3, $1, $2, $2}], {$4};", "l,r,r,r,l"
+  __nv_tex_surf_handler("__isurf2DLayeredwrite_v2", &val, surf, x, y, layer, cudaBoundaryModeZero);
+  // CHECK: %13 = tail call i64 asm "suld.b.a2d.b64.clamp {$0}, [$1, {$4, $2, $3, $3}];", "=l,l,r,r,r"
+  __nv_tex_surf_handler("__isurf2DLayeredread", &val, surf, x, y, layer, cudaBoundaryModeClamp);
+  // CHECK: tail call void asm sideeffect "sust.b.a2d.b64.clamp [$0, {$3, $1, $2, $2}], {$4};", "l,r,r,r,l"
+  __nv_tex_surf_handler("__isurf2DLayeredwrite_v2", &val, surf, x, y, layer, cudaBoundaryModeClamp);
+  // CHECK: %14 = tail call i64 asm "suld.b.a2d.b64.trap {$0}, [$1, {$4, $2, $3, $3}];", "=l,l,r,r,r"
+  __nv_tex_surf_handler("__isurf2DLayeredread", &val, surf, x, y, layer, cudaBoundaryModeTrap);
+  // CHECK: tail call void asm sideeffect "sust.b.a2d.b64.trap [$0, {$3, $1, $2, $2}], {$4};", "l,r,r,r,l"
+  __nv_tex_surf_handler("__isurf2DLayeredwrite_v2", &val, surf, x, y, layer, cudaBoundaryModeTrap);
+
+  // CHECK: %15 = tail call i64 asm "suld.b.a2d.b64.zero {$0}, [$1, {$4, $2, $3, $3}];", "=l,l,r,r,r"
+  __nv_tex_surf_handler("__isurfCubemapread", &val, surf, x, y, face, cudaBoundaryModeZero);
+  // CHECK: tail call void asm sideeffect "sust.b.a2d.b64.zero [$0, {$3, $1, $2, $2}], {$4};", "l,r,r,r,l"
+  __nv_tex_surf_handler("__isurfCubemapwrite_v2", &val, surf, x, y, face, cudaBoundaryModeZero);
+  // CHECK: %16 = tail call i64 asm "suld.b.a2d.b64.clamp {$0}, [$1, {$4, $2, $3, $3}];", "=l,l,r,r,r"
+  __nv_tex_surf_handler("__isurfCubemapread", &val, surf, x, y, face, cudaBoundaryModeClamp);
+  // CHECK: tail call void asm sideeffect "sust.b.a2d.b64.clamp [$0, {$3, $1, $2, $2}], {$4};", "l,r,r,r,l"
+  __nv_tex_surf_handler("__isurfCubemapwrite_v2", &val, surf, x, y, face, cudaBoundaryModeClamp);
+  // CHECK: %17 = tail call i64 asm "suld.b.a2d.b64.trap {$0}, [$1, {$4, $2, $3, $3}];", "=l,l,r,r,r"
+  __nv_tex_surf_handler("__isurfCubemapread", &val, surf, x, y, face, cudaBoundaryModeTrap);
+  // CHECK: tail call void asm sideeffect "sust.b.a2d.b64.trap [$0, {$3, $1, $2, $2}], {$4};", "l,r,r,r,l"
+  __nv_tex_surf_handler("__isurfCubemapwrite_v2", &val, surf, x, y, face, cudaBoundaryModeTrap);
+
+  // CHECK: %18 = tail call i64 asm "suld.b.a2d.b64.zero {$0}, [$1, {$4, $2, $3, $3}];", "=l,l,r,r,r"
+  __nv_tex_surf_handler("__isurfCubemapLayeredread", &val, surf, x, y, layerface, cudaBoundaryModeZero);
+  // CHECK: tail call void asm sideeffect "sust.b.a2d.b64.zero [$0, {$3, $1, $2, $2}], {$4};", "l,r,r,r,l"
+  __nv_tex_surf_handler("__isurfCubemapLayeredwrite_v2", &val, surf, x, y, layerface, cudaBoundaryModeZero);
+  // CHECK: %19 = tail call i64 asm "suld.b.a2d.b64.clamp {$0}, [$1, {$4, $2, $3, $3}];", "=l,l,r,r,r"
+  __nv_tex_surf_handler("__isurfCubemapLayeredread", &val, surf, x, y, layerface, cudaBoundaryModeClamp);
+  // CHECK: tail call void asm sideeffect "sust.b.a2d.b64.clamp [$0, {$3, $1, $2, $2}], {$4};", "l,r,r,r,l"
+  __nv_tex_surf_handler("__isurfCubemapLayeredwrite_v2", &val, surf, x, y, layerface, cudaBoundaryModeClamp);
+  // CHECK: %20 = tail call i64 asm "suld.b.a2d.b64.trap {$0}, [$1, {$4, $2, $3, $3}];", "=l,l,r,r,r"
+  __nv_tex_surf_handler("__isurfCubemapLayeredread", &val, surf, x, y, layerface, cudaBoundaryModeTrap);
+  // CHECK: tail call void asm sideeffect "sust.b.a2d.b64.trap [$0, {$3, $1, $2, $2}], {$4};", "l,r,r,r,l"
+  __nv_tex_surf_handler("__isurfCubemapLayeredwrite_v2", &val, surf, x, y, layerface, cudaBoundaryModeTrap);
+}
+
+__device__ void surfulonglong1(cudaSurfaceObject_t surf, int x, int y, int z, int layer, int face, int layerface) {
+  ulonglong1 val;
+
+  // CHECK: %0 = tail call i64 asm "suld.b.1d.b64.zero {$0}, [$1, {$2}];", "=l,l,r"
+  __nv_tex_surf_handler("__isurf1Dread", &val, surf, x, cudaBoundaryModeZero);
+  // CHECK: tail call void asm sideeffect "sust.b.1d.b64.zero [$0, {$1}], {$2};", "l,r,l"
+  __nv_tex_surf_handler("__isurf1Dwrite_v2", &val, surf, x, cudaBoundaryModeZero);
+  // CHECK: %1 = tail call i64 asm "suld.b.1d.b64.clamp {$0}, [$1, {$2}];", "=l,l,r"
+  __nv_tex_surf_handler("__isurf1Dread", &val, surf, x, cudaBoundaryModeClamp);
+  // CHECK: tail call void asm sideeffect "sust.b.1d.b64.clamp [$0, {$1}], {$2};", "l,r,l"
+  __nv_tex_surf_handler("__isurf1Dwrite_v2", &val, surf, x, cudaBoundaryModeClamp);
+  // CHECK: %2 = tail call i64 asm "suld.b.1d.b64.trap {$0}, [$1, {$2}];", "=l,l,r"
+  __nv_tex_surf_handler("__isurf1Dread", &val, surf, x, cudaBoundaryModeTrap);
+  // CHECK: tail call void asm sideeffect "sust.b.1d.b64.trap [$0, {$1}], {$2};", "l,r,l"
+  __nv_tex_surf_handler("__isurf1Dwrite_v2", &val, surf, x, cudaBoundaryModeTrap);
+
+  // CHECK: %3 = tail call i64 asm "suld.b.2d.b64.zero {$0}, [$1, {$2, $3}];", "=l,l,r,r"
+  __nv_tex_surf_handler("__isurf2Dread", &val, surf, x, y, cudaBoundaryModeZero);
+  // CHECK: tail call void asm sideeffect "sust.b.2d.b64.zero [$0, {$1, $2}], {$3};", "l,r,r,l"
+  __nv_tex_surf_handler("__isurf2Dwrite_v2", &val, surf, x, y, cudaBoundaryModeZero);
+  // CHECK: %4 = tail call i64 asm "suld.b.2d.b64.clamp {$0}, [$1, {$2, $3}];", "=l,l,r,r"
+  __nv_tex_surf_handler("__isurf2Dread", &val, surf, x, y, cudaBoundaryModeClamp);
+  // CHECK: tail call void asm sideeffect "sust.b.2d.b64.clamp [$0, {$1, $2}], {$3};", "l,r,r,l"
+  __nv_tex_surf_handler("__isurf2Dwrite_v2", &val, surf, x, y, cudaBoundaryModeClamp);
+  // CHECK: %5 = tail call i64 asm "suld.b.2d.b64.trap {$0}, [$1, {$2, $3}];", "=l,l,r,r"
+  __nv_tex_surf_handler("__isurf2Dread", &val, surf, x, y, cudaBoundaryModeTrap);
+  // CHECK: tail call void asm sideeffect "sust.b.2d.b64.trap [$0, {$1, $2}], {$3};", "l,r,r,l"
+  __nv_tex_surf_handler("__isurf2Dwrite_v2", &val, surf, x, y, cudaBoundaryModeTrap);
+
+  // CHECK: %6 = tail call i64 asm "suld.b.3d.b64.zero {$0}, [$1, {$2, $3, $4, $4}];", "=l,l,r,r,r"
+  __nv_tex_surf_handler("__isurf3Dread", &val, surf, x, y, z, cudaBoundaryModeZero);
+  // CHECK: tail call void asm sideeffect "sust.b.3d.b64.zero [$0, {$1, $2, $3, $3}], {$4};", "l,r,r,r,l"
+  __nv_tex_surf_handler("__isurf3Dwrite_v2", &val, surf, x, y, z, cudaBoundaryModeZero);
+  // CHECK: %7 = tail call i64 asm "suld.b.3d.b64.clamp {$0}, [$1, {$2, $3, $4, $4}];", "=l,l,r,r,r"
+  __nv_tex_surf_handler("__isurf3Dread", &val, surf, x, y, z, cudaBoundaryModeClamp);
+  // CHECK: tail call void asm sideeffect "sust.b.3d.b64.clamp [$0, {$1, $2, $3, $3}], {$4};", "l,r,r,r,l"
+  __nv_tex_surf_handler("__isurf3Dwrite_v2", &val, surf, x, y, z, cudaBoundaryModeClamp);
+  // CHECK: %8 = tail call i64 asm "suld.b.3d.b64.trap {$0}, [$1, {$2, $3, $4, $4}];", "=l,l,r,r,r"
+  __nv_tex_surf_handler("__isurf3Dread", &val, surf, x, y, z, cudaBoundaryModeTrap);
+  // CHECK: tail call void asm sideeffect "sust.b.3d.b64.trap [$0, {$1, $2, $3, $3}], {$4};", "l,r,r,r,l"
+  __nv_tex_surf_handler("__isurf3Dwrite_v2", &val, surf, x, y, z, cudaBoundaryModeTrap);
+
+  // CHECK: %9 = tail call i64 asm "suld.b.a1d.b64.zero {$0}, [$1, {$3, $2}];", "=l,l,r,r"
+  __nv_tex_surf_handler("__isurf1DLayeredread", &val, surf, x, layer, cudaBoundaryModeZero);
+  // CHECK: tail call void asm sideeffect "sust.b.a1d.b64.zero [$0, {$2, $1}], {$3};", "l,r,r,l"
+  __nv_tex_surf_handler("__isurf1DLayeredwrite_v2", &val, surf, x, layer, cudaBoundaryModeZero);
+  // CHECK: %10 = tail call i64 asm "suld.b.a1d.b64.clamp {$0}, [$1, {$3, $2}];", "=l,l,r,r"
+  __nv_tex_surf_handler("__isurf1DLayeredread", &val, surf, x, layer, cudaBoundaryModeClamp);
+  // CHECK: tail call void asm sideeffect "sust.b.a1d.b64.clamp [$0, {$2, $1}], {$3};", "l,r,r,l"
+  __nv_tex_surf_handler("__isurf1DLayeredwrite_v2", &val, surf, x, layer, cudaBoundaryModeClamp);
+  // CHECK: %11 = tail call i64 asm "suld.b.a1d.b64.trap {$0}, [$1, {$3, $2}];", "=l,l,r,r"
+  __nv_tex_surf_handler("__isurf1DLayeredread", &val, surf, x, layer, cudaBoundaryModeTrap);
+  // CHECK: tail call void asm sideeffect "sust.b.a1d.b64.trap [$0, {$2, $1}], {$3};", "l,r,r,l"
+  __nv_tex_surf_handler("__isurf1DLayeredwrite_v2", &val, surf, x, layer, cudaBoundaryModeTrap);
+
+  // CHECK: %12 = tail call i64 asm "suld.b.a2d.b64.zero {$0}, [$1, {$4, $2, $3, $3}];", "=l,l,r,r,r"
+  __nv_tex_surf_handler("__isurf2DLayeredread", &val, surf, x, y, layer, cudaBoundaryModeZero);
+  // CHECK: tail call void asm sideeffect "sust.b.a2d.b64.zero [$0, {$3, $1, $2, $2}], {$4};", "l,r,r,r,l"
+  __nv_tex_surf_handler("__isurf2DLayeredwrite_v2", &val, surf, x, y, layer, cudaBoundaryModeZero);
+  // CHECK: %13 = tail call i64 asm "suld.b.a2d.b64.clamp {$0}, [$1, {$4, $2, $3, $3}];", "=l,l,r,r,r"
+  __nv_tex_surf_handler("__isurf2DLayeredread", &val, surf, x, y, layer, cudaBoundaryModeClamp);
+  // CHECK: tail call void asm sideeffect "sust.b.a2d.b64.clamp [$0, {$3, $1, $2, $2}], {$4};", "l,r,r,r,l"
+  __nv_tex_surf_handler("__isurf2DLayeredwrite_v2", &val, surf, x, y, layer, cudaBoundaryModeClamp);
+  // CHECK: %14 = tail call i64 asm "suld.b.a2d.b64.trap {$0}, [$1, {$4, $2, $3, $3}];", "=l,l,r,r,r"
+  __nv_tex_surf_handler("__isurf2DLayeredread", &val, surf, x, y, layer, cudaBoundaryModeTrap);
+  // CHECK: tail call void asm sideeffect "sust.b.a2d.b64.trap [$0, {$3, $1, $2, $2}], {$4};", "l,r,r,r,l"
+  __nv_tex_surf_handler("__isurf2DLayeredwrite_v2", &val, surf, x, y, layer, cudaBoundaryModeTrap);
+
+  // CHECK: %15 = tail call i64 asm "suld.b.a2d.b64.zero {$0}, [$1, {$4, $2, $3, $3}];", "=l,l,r,r,r"
+  __nv_tex_surf_handler("__isurfCubemapread", &val, surf, x, y, face, cudaBoundaryModeZero);
+  // CHECK: tail call void asm sideeffect "sust.b.a2d.b64.zero [$0, {$3, $1, $2, $2}], {$4};", "l,r,r,r,l"
+  __nv_tex_surf_handler("__isurfCubemapwrite_v2", &val, surf, x, y, face, cudaBoundaryModeZero);
+  // CHECK: %16 = tail call i64 asm "suld.b.a2d.b64.clamp {$0}, [$1, {$4, $2, $3, $3}];", "=l,l,r,r,r"
+  __nv_tex_surf_handler("__isurfCubemapread", &val, surf, x, y, face, cudaBoundaryModeClamp);
+  // CHECK: tail call void asm sideeffect "sust.b.a2d.b64.clamp [$0, {$3, $1, $2, $2}], {$4};", "l,r,r,r,l"
+  __nv_tex_surf_handler("__isurfCubemapwrite_v2", &val, surf, x, y, face, cudaBoundaryModeClamp);
+  // CHECK: %17 = tail call i64 asm "suld.b.a2d.b64.trap {$0}, [$1, {$4, $2, $3, $3}];", "=l,l,r,r,r"
+  __nv_tex_surf_handler("__isurfCubemapread", &val, surf, x, y, face, cudaBoundaryModeTrap);
+  // CHECK: tail call void asm sideeffect "sust.b.a2d.b64.trap [$0, {$3, $1, $2, $2}], {$4};", "l,r,r,r,l"
+  __nv_tex_surf_handler("__isurfCubemapwrite_v2", &val, surf, x, y, face, cudaBoundaryModeTrap);
+
+  // CHECK: %18 = tail call i64 asm "suld.b.a2d.b64.zero {$0}, [$1, {$4, $2, $3, $3}];", "=l,l,r,r,r"
+  __nv_tex_surf_handler("__isurfCubemapLayeredread", &val, surf, x, y, layerface, cudaBoundaryModeZero);
+  // CHECK: tail call void asm sideeffect "sust.b.a2d.b64.zero [$0, {$3, $1, $2, $2}], {$4};", "l,r,r,r,l"
+  __nv_tex_surf_handler("__isurfCubemapLayeredwrite_v2", &val, surf, x, y, layerface, cudaBoundaryModeZero);
+  // CHECK: %19 = tail call i64 asm "suld.b.a2d.b64.clamp {$0}, [$1, {$4, $2, $3, $3}];", "=l,l,r,r,r"
+  __nv_tex_surf_handler("__isurfCubemapLayeredread", &val, surf, x, y, layerface, cudaBoundaryModeClamp);
+  // CHECK: tail call void asm sideeffect "sust.b.a2d.b64.clamp [$0, {$3, $1, $2, $2}], {$4};", "l,r,r,r,l"
+  __nv_tex_surf_handler("__isurfCubemapLayeredwrite_v2", &val, surf, x, y, layerface, cudaBoundaryModeClamp);
+  // CHECK: %20 = tail call i64 asm "suld.b.a2d.b64.trap {$0}, [$1, {$4, $2, $3, $3}];", "=l,l,r,r,r"
+  __nv_tex_surf_handler("__isurfCubemapLayeredread", &val, surf, x, y, layerface, cudaBoundaryModeTrap);
+  // CHECK: tail call void asm sideeffect "sust.b.a2d.b64.trap [$0, {$3, $1, $2, $2}], {$4};", "l,r,r,r,l"
+  __nv_tex_surf_handler("__isurfCubemapLayeredwrite_v2", &val, surf, x, y, layerface, cudaBoundaryModeTrap);
+}
+
+__device__ void surffloat(cudaSurfaceObject_t surf, int x, int y, int z, int layer, int face, int layerface) {
+  float val;
+
+  // CHECK: %0 = tail call contract float asm "suld.b.1d.b32.zero {$0}, [$1, {$2}];", "=r,l,r"
+  __nv_tex_surf_handler("__isurf1Dread", &val, surf, x, cudaBoundaryModeZero);
+  // CHECK: tail call void asm sideeffect "sust.b.1d.b32.zero [$0, {$1}], {$2};", "l,r,r"
+  __nv_tex_surf_handler("__isurf1Dwrite_v2", &val, surf, x, cudaBoundaryModeZero);
+  // CHECK: %1 = tail call contract float asm "suld.b.1d.b32.clamp {$0}, [$1, {$2}];", "=r,l,r"
+  __nv_tex_surf_handler("__isurf1Dread", &val, surf, x, cudaBoundaryModeClamp);
+  // CHECK: tail call void asm sideeffect "sust.b.1d.b32.clamp [$0, {$1}], {$2};", "l,r,r"
+  __nv_tex_surf_handler("__isurf1Dwrite_v2", &val, surf, x, cudaBoundaryModeClamp);
+  // CHECK: %2 = tail call contract float asm "suld.b.1d.b32.trap {$0}, [$1, {$2}];", "=r,l,r"
+  __nv_tex_surf_handler("__isurf1Dread", &val, surf, x, cudaBoundaryModeTrap);
+  // CHECK: tail call void asm sideeffect "sust.b.1d.b32.trap [$0, {$1}], {$2};", "l,r,r"
+  __nv_tex_surf_handler("__isurf1Dwrite_v2", &val, surf, x, cudaBoundaryModeTrap);
+
+  // CHECK: %3 = tail call contract float asm "suld.b.2d.b32.zero {$0}, [$1, {$2, $3}];", "=r,l,r,r"
+  __nv_tex_surf_handler("__isurf2Dread", &val, surf, x, y, cudaBoundaryModeZero);
+  // CHECK: tail call void asm sideeffect "sust.b.2d.b32.zero [$0, {$1, $2}], {$3};", "l,r,r,r"
+  __nv_tex_surf_handler("__isurf2Dwrite_v2", &val, surf, x, y, cudaBoundaryModeZero);
+  // CHECK: %4 = tail call contract float asm "suld.b.2d.b32.clamp {$0}, [$1, {$2, $3}];", "=r,l,r,r"
+  __nv_tex_surf_handler("__isurf2Dread", &val, surf, x, y, cudaBoundaryModeClamp);
+  // CHECK: tail call void asm sideeffect "sust.b.2d.b32.clamp [$0, {$1, $2}], {$3};", "l,r,r,r"
+  __nv_tex_surf_handler("__isurf2Dwrite_v2", &val, surf, x, y, cudaBoundaryModeClamp);
+  // CHECK: %5 = tail call contract float asm "suld.b.2d.b32.trap {$0}, [$1, {$2, $3}];", "=r,l,r,r"
+  __nv_tex_surf_handler("__isurf2Dread", &val, surf, x, y, cudaBoundaryModeTrap);
+  // CHECK: tail call void asm sideeffect "sust.b.2d.b32.trap [$0, {$1, $2}], {$3};", "l,r,r,r"
+  __nv_tex_surf_handler("__isurf2Dwrite_v2", &val, surf, x, y, cudaBoundaryModeTrap);
+
+  // CHECK: %6 = tail call contract float asm "suld.b.3d.b32.zero {$0}, [$1, {$2, $3, $4, $4}];", "=r,l,r,r,r"
+  __nv_tex_surf_handler("__isurf3Dread", &val, surf, x, y, z, cudaBoundaryModeZero);
+  // CHECK: tail call void asm sideeffect "sust.b.3d.b32.zero [$0, {$1, $2, $3, $3}], {$4};", "l,r,r,r,r"
+  __nv_tex_surf_handler("__isurf3Dwrite_v2", &val, surf, x, y, z, cudaBoundaryModeZero);
+  // CHECK: %7 = tail call contract float asm "suld.b.3d.b32.clamp {$0}, [$1, {$2, $3, $4, $4}];", "=r,l,r,r,r"
+  __nv_tex_surf_handler("__isurf3Dread", &val, surf, x, y, z, cudaBoundaryModeClamp);
+  // CHECK: tail call void asm sideeffect "sust.b.3d.b32.clamp [$0, {$1, $2, $3, $3}], {$4};", "l,r,r,r,r"
+  __nv_tex_surf_handler("__isurf3Dwrite_v2", &val, surf, x, y, z, cudaBoundaryModeClamp);
+  // CHECK: %8 = tail call contract float asm "suld.b.3d.b32.trap {$0}, [$1, {$2, $3, $4, $4}];", "=r,l,r,r,r"
+  __nv_tex_surf_handler("__isurf3Dread", &val, surf, x, y, z, cudaBoundaryModeTrap);
+  // CHECK: tail call void asm sideeffect "sust.b.3d.b32.trap [$0, {$1, $2, $3, $3}], {$4};", "l,r,r,r,r"
+  __nv_tex_surf_handler("__isurf3Dwrite_v2", &val, surf, x, y, z, cudaBoundaryModeTrap);
+
+  // CHECK: %9 = tail call contract float asm "suld.b.a1d.b32.zero {$0}, [$1, {$3, $2}];", "=r,l,r,r"
+  __nv_tex_surf_handler("__isurf1DLayeredread", &val, surf, x, layer, cudaBoundaryModeZero);
+  // CHECK: tail call void asm sideeffect "sust.b.a1d.b32.zero [$0, {$2, $1}], {$3};", "l,r,r,r"
+  __nv_tex_surf_handler("__isurf1DLayeredwrite_v2", &val, surf, x, layer, cudaBoundaryModeZero);
+  // CHECK: %10 = tail call contract float asm "suld.b.a1d.b32.clamp {$0}, [$1, {$3, $2}];", "=r,l,r,r"
+  __nv_tex_surf_handler("__isurf1DLayeredread", &val, surf, x, layer, cudaBoundaryModeClamp);
+  // CHECK: tail call void asm sideeffect "sust.b.a1d.b32.clamp [$0, {$2, $1}], {$3};", "l,r,r,r"
+  __nv_tex_surf_handler("__isurf1DLayeredwrite_v2", &val, surf, x, layer, cudaBoundaryModeClamp);
+  // CHECK: %11 = tail call contract float asm "suld.b.a1d.b32.trap {$0}, [$1, {$3, $2}];", "=r,l,r,r"
+  __nv_tex_surf_handler("__isurf1DLayeredread", &val, surf, x, layer, cudaBoundaryModeTrap);
+  // CHECK: tail call void asm sideeffect "sust.b.a1d.b32.trap [$0, {$2, $1}], {$3};", "l,r,r,r"
+  __nv_tex_surf_handler("__isurf1DLayeredwrite_v2", &val, surf, x, layer, cudaBoundaryModeTrap);
+
+  // CHECK: %12 = tail call contract float asm "suld.b.a2d.b32.zero {$0}, [$1, {$4, $2, $3, $3}];", "=r,l,r,r,r"
+  __nv_tex_surf_handler("__isurf2DLayeredread", &val, surf, x, y, layer, cudaBoundaryModeZero);
+  // CHECK: tail call void asm sideeffect "sust.b.a2d.b32.zero [$0, {$3, $1, $2, $2}], {$4};", "l,r,r,r,r"
+  __nv_tex_surf_handler("__isurf2DLayeredwrite_v2", &val, surf, x, y, layer, cudaBoundaryModeZero);
+  // CHECK: %13 = tail call contract float asm "suld.b.a2d.b32.clamp {$0}, [$1, {$4, $2, $3, $3}];", "=r,l,r,r,r"
+  __nv_tex_surf_handler("__isurf2DLayeredread", &val, surf, x, y, layer, cudaBoundaryModeClamp);
+  // CHECK: tail call void asm sideeffect "sust.b.a2d.b32.clamp [$0, {$3, $1, $2, $2}], {$4};", "l,r,r,r,r"
+  __nv_tex_surf_handler("__isurf2DLayeredwrite_v2", &val, surf, x, y, layer, cudaBoundaryModeClamp);
+  // CHECK: %14 = tail call contract float asm "suld.b.a2d.b32.trap {$0}, [$1, {$4, $2, $3, $3}];", "=r,l,r,r,r"
+  __nv_tex_surf_handler("__isurf2DLayeredread", &val, surf, x, y, layer, cudaBoundaryModeTrap);
+  // CHECK: tail call void asm sideeffect "sust.b.a2d.b32.trap [$0, {$3, $1, $2, $2}], {$4};", "l,r,r,r,r"
+  __nv_tex_surf_handler("__isurf2DLayeredwrite_v2", &val, surf, x, y, layer, cudaBoundaryModeTrap);
+
+  // CHECK: %15 = tail call contract float asm "suld.b.a2d.b32.zero {$0}, [$1, {$4, $2, $3, $3}];", "=r,l,r,r,r"
+  __nv_tex_surf_handler("__isurfCubemapread", &val, surf, x, y, face, cudaBoundaryModeZero);
+  // CHECK: tail call void asm sideeffect "sust.b.a2d.b32.zero [$0, {$3, $1, $2, $2}], {$4};", "l,r,r,r,r"
+  __nv_tex_surf_handler("__isurfCubemapwrite_v2", &val, surf, x, y, face, cudaBoundaryModeZero);
+  // CHECK: %16 = tail call contract float asm "suld.b.a2d.b32.clamp {$0}, [$1, {$4, $2, $3, $3}];", "=r,l,r,r,r"
+  __nv_tex_surf_handler("__isurfCubemapread", &val, surf, x, y, face, cudaBoundaryModeClamp);
+  // CHECK: tail call void asm sideeffect "sust.b.a2d.b32.clamp [$0, {$3, $1, $2, $2}], {$4};", "l,r,r,r,r"
+  __nv_tex_surf_handler("__isurfCubemapwrite_v2", &val, surf, x, y, face, cudaBoundaryModeClamp);
+  // CHECK: %17 = tail call contract float asm "suld.b.a2d.b32.trap {$0}, [$1, {$4, $2, $3, $3}];", "=r,l,r,r,r"
+  __nv_tex_surf_handler("__isurfCubemapread", &val, surf, x, y, face, cudaBoundaryModeTrap);
+  // CHECK: tail call void asm sideeffect "sust.b.a2d.b32.trap [$0, {$3, $1, $2, $2}], {$4};", "l,r,r,r,r"
+  __nv_tex_surf_handler("__isurfCubemapwrite_v2", &val, surf, x, y, face, cudaBoundaryModeTrap);
+
+  // CHECK: %18 = tail call contract float asm "suld.b.a2d.b32.zero {$0}, [$1, {$4, $2, $3, $3}];", "=r,l,r,r,r"
+  __nv_tex_surf_handler("__isurfCubemapLayeredread", &val, surf, x, y, layerface, cudaBoundaryModeZero);
+  // CHECK: tail call void asm sideeffect "sust.b.a2d.b32.zero [$0, {$3, $1, $2, $2}], {$4};", "l,r,r,r,r"
+  __nv_tex_surf_handler("__isurfCubemapLayeredwrite_v2", &val, surf, x, y, layerface, cudaBoundaryModeZero);
+  // CHECK: %19 = tail call contract float asm "suld.b.a2d.b32.clamp {$0}, [$1, {$4, $2, $3, $3}];", "=r,l,r,r,r"
+  __nv_tex_surf_handler("__isurfCubemapLayeredread", &val, surf, x, y, layerface, cudaBoundaryModeClamp);
+  // CHECK: tail call void asm sideeffect "sust.b.a2d.b32.clamp [$0, {$3, $1, $2, $2}], {$4};", "l,r,r,r,r"
+  __nv_tex_surf_handler("__isurfCubemapLayeredwrite_v2", &val, surf, x, y, layerface, cudaBoundaryModeClamp);
+  // CHECK: %20 = tail call contract float asm "suld.b.a2d.b32.trap {$0}, [$1, {$4, $2, $3, $3}];", "=r,l,r,r,r"
+  __nv_tex_surf_handler("__isurfCubemapLayeredread", &val, surf, x, y, layerface, cudaBoundaryModeTrap);
+  // CHECK: tail call void asm sideeffect "sust.b.a2d.b32.trap [$0, {$3, $1, $2, $2}], {$4};", "l,r,r,r,r"
+  __nv_tex_surf_handler("__isurfCubemapLayeredwrite_v2", &val, surf, x, y, layerface, cudaBoundaryModeTrap);
+}
+
+__device__ void surffloat1(cudaSurfaceObject_t surf, int x, int y, int z, int layer, int face, int layerface) {
+  float1 val;
+
+  // CHECK: %0 = tail call contract float asm "suld.b.1d.b32.zero {$0}, [$1, {$2}];", "=r,l,r"
+  __nv_tex_surf_handler("__isurf1Dread", &val, surf, x, cudaBoundaryModeZero);
+  // CHECK: tail call void asm sideeffect "sust.b.1d.b32.zero [$0, {$1}], {$2};", "l,r,r"
+  __nv_tex_surf_handler("__isurf1Dwrite_v2", &val, surf, x, cudaBoundaryModeZero);
+  // CHECK: %1 = tail call contract float asm "suld.b.1d.b32.clamp {$0}, [$1, {$2}];", "=r,l,r"
+  __nv_tex_surf_handler("__isurf1Dread", &val, surf, x, cudaBoundaryModeClamp);
+  // CHECK: tail call void asm sideeffect "sust.b.1d.b32.clamp [$0, {$1}], {$2};", "l,r,r"
+  __nv_tex_surf_handler("__isurf1Dwrite_v2", &val, surf, x, cudaBoundaryModeClamp);
+  // CHECK: %2 = tail call contract float asm "suld.b.1d.b32.trap {$0}, [$1, {$2}];", "=r,l,r"
+  __nv_tex_surf_handler("__isurf1Dread", &val, surf, x, cudaBoundaryModeTrap);
+  // CHECK: tail call void asm sideeffect "sust.b.1d.b32.trap [$0, {$1}], {$2};", "l,r,r"
+  __nv_tex_surf_handler("__isurf1Dwrite_v2", &val, surf, x, cudaBoundaryModeTrap);
+
+  // CHECK: %3 = tail call contract float asm "suld.b.2d.b32.zero {$0}, [$1, {$2, $3}];", "=r,l,r,r"
+  __nv_tex_surf_handler("__isurf2Dread", &val, surf, x, y, cudaBoundaryModeZero);
+  // CHECK: tail call void asm sideeffect "sust.b.2d.b32.zero [$0, {$1, $2}], {$3};", "l,r,r,r"
+  __nv_tex_surf_handler("__isurf2Dwrite_v2", &val, surf, x, y, cudaBoundaryModeZero);
+  // CHECK: %4 = tail call contract float asm "suld.b.2d.b32.clamp {$0}, [$1, {$2, $3}];", "=r,l,r,r"
+  __nv_tex_surf_handler("__isurf2Dread", &val, surf, x, y, cudaBoundaryModeClamp);
+  // CHECK: tail call void asm sideeffect "sust.b.2d.b32.clamp [$0, {$1, $2}], {$3};", "l,r,r,r"
+  __nv_tex_surf_handler("__isurf2Dwrite_v2", &val, surf, x, y, cudaBoundaryModeClamp);
+  // CHECK: %5 = tail call contract float asm "suld.b.2d.b32.trap {$0}, [$1, {$2, $3}];", "=r,l,r,r"
+  __nv_tex_surf_handler("__isurf2Dread", &val, surf, x, y, cudaBoundaryModeTrap);
+  // CHECK: tail call void asm sideeffect "sust.b.2d.b32.trap [$0, {$1, $2}], {$3};", "l,r,r,r"
+  __nv_tex_surf_handler("__isurf2Dwrite_v2", &val, surf, x, y, cudaBoundaryModeTrap);
+
+  // CHECK: %6 = tail call contract float asm "suld.b.3d.b32.zero {$0}, [$1, {$2, $3, $4, $4}];", "=r,l,r,r,r"
+  __nv_tex_surf_handler("__isurf3Dread", &val, surf, x, y, z, cudaBoundaryModeZero);
+  // CHECK: tail call void asm sideeffect "sust.b.3d.b32.zero [$0, {$1, $2, $3, $3}], {$4};", "l,r,r,r,r"
+  __nv_tex_surf_handler("__isurf3Dwrite_v2", &val, surf, x, y, z, cudaBoundaryModeZero);
+  // CHECK: %7 = tail call contract float asm "suld.b.3d.b32.clamp {$0}, [$1, {$2, $3, $4, $4}];", "=r,l,r,r,r"
+  __nv_tex_surf_handler("__isurf3Dread", &val, surf, x, y, z, cudaBoundaryModeClamp);
+  // CHECK: tail call void asm sideeffect "sust.b.3d.b32.clamp [$0, {$1, $2, $3, $3}], {$4};", "l,r,r,r,r"
+  __nv_tex_surf_handler("__isurf3Dwrite_v2", &val, surf, x, y, z, cudaBoundaryModeClamp);
+  // CHECK: %8 = tail call contract float asm "suld.b.3d.b32.trap {$0}, [$1, {$2, $3, $4, $4}];", "=r,l,r,r,r"
+  __nv_tex_surf_handler("__isurf3Dread", &val, surf, x, y, z, cudaBoundaryModeTrap);
+  // CHECK: tail call void asm sideeffect "sust.b.3d.b32.trap [$0, {$1, $2, $3, $3}], {$4};", "l,r,r,r,r"
+  __nv_tex_surf_handler("__isurf3Dwrite_v2", &val, surf, x, y, z, cudaBoundaryModeTrap);
+
+  // CHECK: %9 = tail call contract float asm "suld.b.a1d.b32.zero {$0}, [$1, {$3, $2}];", "=r,l,r,r"
+  __nv_tex_surf_handler("__isurf1DLayeredread", &val, surf, x, layer, cudaBoundaryModeZero);
+  // CHECK: tail call void asm sideeffect "sust.b.a1d.b32.zero [$0, {$2, $1}], {$3};", "l,r,r,r"
+  __nv_tex_surf_handler("__isurf1DLayeredwrite_v2", &val, surf, x, layer, cudaBoundaryModeZero);
+  // CHECK: %10 = tail call contract float asm "suld.b.a1d.b32.clamp {$0}, [$1, {$3, $2}];", "=r,l,r,r"
+  __nv_tex_surf_handler("__isurf1DLayeredread", &val, surf, x, layer, cudaBoundaryModeClamp);
+  // CHECK: tail call void asm sideeffect "sust.b.a1d.b32.clamp [$0, {$2, $1}], {$3};", "l,r,r,r"
+  __nv_tex_surf_handler("__isurf1DLayeredwrite_v2", &val, surf, x, layer, cudaBoundaryModeClamp);
+  // CHECK: %11 = tail call contract float asm "suld.b.a1d.b32.trap {$0}, [$1, {$3, $2}];", "=r,l,r,r"
+  __nv_tex_surf_handler("__isurf1DLayeredread", &val, surf, x, layer, cudaBoundaryModeTrap);
+  // CHECK: tail call void asm sideeffect "sust.b.a1d.b32.trap [$0, {$2, $1}], {$3};", "l,r,r,r"
+  __nv_tex_surf_handler("__isurf1DLayeredwrite_v2", &val, surf, x, layer, cudaBoundaryModeTrap);
+
+  // CHECK: %12 = tail call contract float asm "suld.b.a2d.b32.zero {$0}, [$1, {$4, $2, $3, $3}];", "=r,l,r,r,r"
+  __nv_tex_surf_handler("__isurf2DLayeredread", &val, surf, x, y, layer, cudaBoundaryModeZero);
+  // CHECK: tail call void asm sideeffect "sust.b.a2d.b32.zero [$0, {$3, $1, $2, $2}], {$4};", "l,r,r,r,r"
+  __nv_tex_surf_handler("__isurf2DLayeredwrite_v2", &val, surf, x, y, layer, cudaBoundaryModeZero);
+  // CHECK: %13 = tail call contract float asm "suld.b.a2d.b32.clamp {$0}, [$1, {$4, $2, $3, $3}];", "=r,l,r,r,r"
+  __nv_tex_surf_handler("__isurf2DLayeredread", &val, surf, x, y, layer, cudaBoundaryModeClamp);
+  // CHECK: tail call void asm sideeffect "sust.b.a2d.b32.clamp [$0, {$3, $1, $2, $2}], {$4};", "l,r,r,r,r"
+  __nv_tex_surf_handler("__isurf2DLayeredwrite_v2", &val, surf, x, y, layer, cudaBoundaryModeClamp);
+  // CHECK: %14 = tail call contract float asm "suld.b.a2d.b32.trap {$0}, [$1, {$4, $2, $3, $3}];", "=r,l,r,r,r"
+  __nv_tex_surf_handler("__isurf2DLayeredread", &val, surf, x, y, layer, cudaBoundaryModeTrap);
+  // CHECK: tail call void asm sideeffect "sust.b.a2d.b32.trap [$0, {$3, $1, $2, $2}], {$4};", "l,r,r,r,r"
+  __nv_tex_surf_handler("__isurf2DLayeredwrite_v2", &val, surf, x, y, layer, cudaBoundaryModeTrap);
+
+  // CHECK: %15 = tail call contract float asm "suld.b.a2d.b32.zero {$0}, [$1, {$4, $2, $3, $3}];", "=r,l,r,r,r"
+  __nv_tex_surf_handler("__isurfCubemapread", &val, surf, x, y, face, cudaBoundaryModeZero);
+  // CHECK: tail call void asm sideeffect "sust.b.a2d.b32.zero [$0, {$3, $1, $2, $2}], {$4};", "l,r,r,r,r"
+  __nv_tex_surf_handler("__isurfCubemapwrite_v2", &val, surf, x, y, face, cudaBoundaryModeZero);
+  // CHECK: %16 = tail call contract float asm "suld.b.a2d.b32.clamp {$0}, [$1, {$4, $2, $3, $3}];", "=r,l,r,r,r"
+  __nv_tex_surf_handler("__isurfCubemapread", &val, surf, x, y, face, cudaBoundaryModeClamp);
+  // CHECK: tail call void asm sideeffect "sust.b.a2d.b32.clamp [$0, {$3, $1, $2, $2}], {$4};", "l,r,r,r,r"
+  __nv_tex_surf_handler("__isurfCubemapwrite_v2", &val, surf, x, y, face, cudaBoundaryModeClamp);
+  // CHECK: %17 = tail call contract float asm "suld.b.a2d.b32.trap {$0}, [$1, {$4, $2, $3, $3}];", "=r,l,r,r,r"
+  __nv_tex_surf_handler("__isurfCubemapread", &val, surf, x, y, face, cudaBoundaryModeTrap);
+  // CHECK: tail call void asm sideeffect "sust.b.a2d.b32.trap [$0, {$3, $1, $2, $2}], {$4};", "l,r,r,r,r"
+  __nv_tex_surf_handler("__isurfCubemapwrite_v2", &val, surf, x, y, face, cudaBoundaryModeTrap);
+
+  // CHECK: %18 = tail call contract float asm "suld.b.a2d.b32.zero {$0}, [$1, {$4, $2, $3, $3}];", "=r,l,r,r,r"
+  __nv_tex_surf_handler("__isurfCubemapLayeredread", &val, surf, x, y, layerface, cudaBoundaryModeZero);
+  // CHECK: tail call void asm sideeffect "sust.b.a2d.b32.zero [$0, {$3, $1, $2, $2}], {$4};", "l,r,r,r,r"
+  __nv_tex_surf_handler("__isurfCubemapLayeredwrite_v2", &val, surf, x, y, layerface, cudaBoundaryModeZero);
+  // CHECK: %19 = tail call contract float asm "suld.b.a2d.b32.clamp {$0}, [$1, {$4, $2, $3, $3}];", "=r,l,r,r,r"
+  __nv_tex_surf_handler("__isurfCubemapLayeredread", &val, surf, x, y, layerface, cudaBoundaryModeClamp);
+  // CHECK: tail call void asm sideeffect "sust.b.a2d.b32.clamp [$0, {$3, $1, $2, $2}], {$4};", "l,r,r,r,r"
+  __nv_tex_surf_handler("__isurfCubemapLayeredwrite_v2", &val, surf, x, y, layerface, cudaBoundaryModeClamp);
+  // CHECK: %20 = tail call contract float asm "suld.b.a2d.b32.trap {$0}, [$1, {$4, $2, $3, $3}];", "=r,l,r,r,r"
+  __nv_tex_surf_handler("__isurfCubemapLayeredread", &val, surf, x, y, layerface, cudaBoundaryModeTrap);
+  // CHECK: tail call void asm sideeffect "sust.b.a2d.b32.trap [$0, {$3, $1, $2, $2}], {$4};", "l,r,r,r,r"
+  __nv_tex_surf_handler("__isurfCubemapLayeredwrite_v2", &val, surf, x, y, layerface, cudaBoundaryModeTrap);
+}
+
+__device__ void surfchar2(cudaSurfaceObject_t surf, int x, int y, int z, int layer, int face, int layerface) {
+  char2 val;
+
+  // CHECK: %0 = tail call { i8, i8 } asm "suld.b.1d.v2.b8.zero {$0, $1}, [$2, {$3}];", "=h,=h,l,r"
+  __nv_tex_surf_handler("__isurf1Dread", &val, surf, x, cudaBoundaryModeZero);
+  // CHECK: tail call void asm sideeffect "sust.b.1d.v2.b8.zero [$0, {$1}], {$2, $3};", "l,r,h,h"
+  __nv_tex_surf_handler("__isurf1Dwrite_v2", &val, surf, x, cudaBoundaryModeZero);
+  // CHECK: %1 = tail call { i8, i8 } asm "suld.b.1d.v2.b8.clamp {$0, $1}, [$2, {$3}];", "=h,=h,l,r"
+  __nv_tex_surf_handler("__isurf1Dread", &val, surf, x, cudaBoundaryModeClamp);
+  // CHECK: tail call void asm sideeffect "sust.b.1d.v2.b8.clamp [$0, {$1}], {$2, $3};", "l,r,h,h"
+  __nv_tex_surf_handler("__isurf1Dwrite_v2", &val, surf, x, cudaBoundaryModeClamp);
+  // CHECK: %2 = tail call { i8, i8 } asm "suld.b.1d.v2.b8.trap {$0, $1}, [$2, {$3}];", "=h,=h,l,r"
+  __nv_tex_surf_handler("__isurf1Dread", &val, surf, x, cudaBoundaryModeTrap);
+  // CHECK: tail call void asm sideeffect "sust.b.1d.v2.b8.trap [$0, {$1}], {$2, $3};", "l,r,h,h"
+  __nv_tex_surf_handler("__isurf1Dwrite_v2", &val, surf, x, cudaBoundaryModeTrap);
+
+  // CHECK: %3 = tail call { i8, i8 } asm "suld.b.2d.v2.b8.zero {$0, $1}, [$2, {$3, $4}];", "=h,=h,l,r,r"
+  __nv_tex_surf_handler("__isurf2Dread", &val, surf, x, y, cudaBoundaryModeZero);
+  // CHECK: tail call void asm sideeffect "sust.b.2d.v2.b8.zero [$0, {$1, $2}], {$3, $4};", "l,r,r,h,h"
+  __nv_tex_surf_handler("__isurf2Dwrite_v2", &val, surf, x, y, cudaBoundaryModeZero);
+  // CHECK: %4 = tail call { i8, i8 } asm "suld.b.2d.v2.b8.clamp {$0, $1}, [$2, {$3, $4}];", "=h,=h,l,r,r"
+  __nv_tex_surf_handler("__isurf2Dread", &val, surf, x, y, cudaBoundaryModeClamp);
+  // CHECK: tail call void asm sideeffect "sust.b.2d.v2.b8.clamp [$0, {$1, $2}], {$3, $4};", "l,r,r,h,h"
+  __nv_tex_surf_handler("__isurf2Dwrite_v2", &val, surf, x, y, cudaBoundaryModeClamp);
+  // CHECK: %5 = tail call { i8, i8 } asm "suld.b.2d.v2.b8.trap {$0, $1}, [$2, {$3, $4}];", "=h,=h,l,r,r"
+  __nv_tex_surf_handler("__isurf2Dread", &val, surf, x, y, cudaBoundaryModeTrap);
+  // CHECK: tail call void asm sideeffect "sust.b.2d.v2.b8.trap [$0, {$1, $2}], {$3, $4};", "l,r,r,h,h"
+  __nv_tex_surf_handler("__isurf2Dwrite_v2", &val, surf, x, y, cudaBoundaryModeTrap);
+
+  // CHECK: %6 = tail call { i8, i8 } asm "suld.b.3d.v2.b8.zero {$0, $1}, [$2, {$3, $4, $5, $5}];", "=h,=h,l,r,r,r"
+  __nv_tex_surf_handler("__isurf3Dread", &val, surf, x, y, z, cudaBoundaryModeZero);
+  // CHECK: tail call void asm sideeffect "sust.b.3d.v2.b8.zero [$0, {$1, $2, $3, $3}], {$4, $5};", "l,r,r,r,h,h"
+  __nv_tex_surf_handler("__isurf3Dwrite_v2", &val, surf, x, y, z, cudaBoundaryModeZero);
+  // CHECK: %7 = tail call { i8, i8 } asm "suld.b.3d.v2.b8.clamp {$0, $1}, [$2, {$3, $4, $5, $5}];", "=h,=h,l,r,r,r"
+  __nv_tex_surf_handler("__isurf3Dread", &val, surf, x, y, z, cudaBoundaryModeClamp);
+  // CHECK: tail call void asm sideeffect "sust.b.3d.v2.b8.clamp [$0, {$1, $2, $3, $3}], {$4, $5};", "l,r,r,r,h,h"
+  __nv_tex_surf_handler("__isurf3Dwrite_v2", &val, surf, x, y, z, cudaBoundaryModeClamp);
+  // CHECK: %8 = tail call { i8, i8 } asm "suld.b.3d.v2.b8.trap {$0, $1}, [$2, {$3, $4, $5, $5}];", "=h,=h,l,r,r,r"
+  __nv_tex_surf_handler("__isurf3Dread", &val, surf, x, y, z, cudaBoundaryModeTrap);
+  // CHECK: tail call void asm sideeffect "sust.b.3d.v2.b8.trap [$0, {$1, $2, $3, $3}], {$4, $5};", "l,r,r,r,h,h"
+  __nv_tex_surf_handler("__isurf3Dwrite_v2", &val, surf, x, y, z, cudaBoundaryModeTrap);
+
+  // CHECK: %9 = tail call { i8, i8 } asm "suld.b.a1d.v2.b8.zero {$0, $1}, [$2, {$4, $3}];", "=h,=h,l,r,r"
+  __nv_tex_surf_handler("__isurf1DLayeredread", &val, surf, x, layer, cudaBoundaryModeZero);
+  // CHECK: tail call void asm sideeffect "sust.b.a1d.v2.b8.zero [$0, {$2, $1}], {$3, $4};", "l,r,r,h,h"
+  __nv_tex_surf_handler("__isurf1DLayeredwrite_v2", &val, surf, x, layer, cudaBoundaryModeZero);
+  // CHECK: %10 = tail call { i8, i8 } asm "suld.b.a1d.v2.b8.clamp {$0, $1}, [$2, {$4, $3}];", "=h,=h,l,r,r"
+  __nv_tex_surf_handler("__isurf1DLayeredread", &val, surf, x, layer, cudaBoundaryModeClamp);
+  // CHECK: tail call void asm sideeffect "sust.b.a1d.v2.b8.clamp [$0, {$2, $1}], {$3, $4};", "l,r,r,h,h"
+  __nv_tex_surf_handler("__isurf1DLayeredwrite_v2", &val, surf, x, layer, cudaBoundaryModeClamp);
+  // CHECK: %11 = tail call { i8, i8 } asm "suld.b.a1d.v2.b8.trap {$0, $1}, [$2, {$4, $3}];", "=h,=h,l,r,r"
+  __nv_tex_surf_handler("__isurf1DLayeredread", &val, surf, x, layer, cudaBoundaryModeTrap);
+  // CHECK: tail call void asm sideeffect "sust.b.a1d.v2.b8.trap [$0, {$2, $1}], {$3, $4};", "l,r,r,h,h"
+  __nv_tex_surf_handler("__isurf1DLayeredwrite_v2", &val, surf, x, layer, cudaBoundaryModeTrap);
+
+  // CHECK: %12 = tail call { i8, i8 } asm "suld.b.a2d.v2.b8.zero {$0, $1}, [$2, {$5, $3, $4, $4}];", "=h,=h,l,r,r,r"
+  __nv_tex_surf_handler("__isurf2DLayeredread", &val, surf, x, y, layer, cudaBoundaryModeZero);
+  // CHECK: tail call void asm sideeffect "sust.b.a2d.v2.b8.zero [$0, {$3, $1, $2, $2}], {$4, $5};", "l,r,r,r,h,h"
+  __nv_tex_surf_handler("__isurf2DLayeredwrite_v2", &val, surf, x, y, layer, cudaBoundaryModeZero);
+  // CHECK: %13 = tail call { i8, i8 } asm "suld.b.a2d.v2.b8.clamp {$0, $1}, [$2, {$5, $3, $4, $4}];", "=h,=h,l,r,r,r"
+  __nv_tex_surf_handler("__isurf2DLayeredread", &val, surf, x, y, layer, cudaBoundaryModeClamp);
+  // CHECK: tail call void asm sideeffect "sust.b.a2d.v2.b8.clamp [$0, {$3, $1, $2, $2}], {$4, $5};", "l,r,r,r,h,h"
+  __nv_tex_surf_handler("__isurf2DLayeredwrite_v2", &val, surf, x, y, layer, cudaBoundaryModeClamp);
+  // CHECK: %14 = tail call { i8, i8 } asm "suld.b.a2d.v2.b8.trap {$0, $1}, [$2, {$5, $3, $4, $4}];", "=h,=h,l,r,r,r"
+  __nv_tex_surf_handler("__isurf2DLayeredread", &val, surf, x, y, layer, cudaBoundaryModeTrap);
+  // CHECK: tail call void asm sideeffect "sust.b.a2d.v2.b8.trap [$0, {$3, $1, $2, $2}], {$4, $5};", "l,r,r,r,h,h"
+  __nv_tex_surf_handler("__isurf2DLayeredwrite_v2", &val, surf, x, y, layer, cudaBoundaryModeTrap);
+
+  // CHECK: %15 = tail call { i8, i8 } asm "suld.b.a2d.v2.b8.zero {$0, $1}, [$2, {$5, $3, $4, $4}];", "=h,=h,l,r,r,r"
+  __nv_tex_surf_handler("__isurfCubemapread", &val, surf, x, y, face, cudaBoundaryModeZero);
+  // CHECK: tail call void asm sideeffect "sust.b.a2d.v2.b8.zero [$0, {$3, $1, $2, $2}], {$4, $5};", "l,r,r,r,h,h"
+  __nv_tex_surf_handler("__isurfCubemapwrite_v2", &val, surf, x, y, face, cudaBoundaryModeZero);
+  // CHECK: %16 = tail call { i8, i8 } asm "suld.b.a2d.v2.b8.clamp {$0, $1}, [$2, {$5, $3, $4, $4}];", "=h,=h,l,r,r,r"
+  __nv_tex_surf_handler("__isurfCubemapread", &val, surf, x, y, face, cudaBoundaryModeClamp);
+  // CHECK: tail call void asm sideeffect "sust.b.a2d.v2.b8.clamp [$0, {$3, $1, $2, $2}], {$4, $5};", "l,r,r,r,h,h"
+  __nv_tex_surf_handler("__isurfCubemapwrite_v2", &val, surf, x, y, face, cudaBoundaryModeClamp);
+  // CHECK: %17 = tail call { i8, i8 } asm "suld.b.a2d.v2.b8.trap {$0, $1}, [$2, {$5, $3, $4, $4}];", "=h,=h,l,r,r,r"
+  __nv_tex_surf_handler("__isurfCubemapread", &val, surf, x, y, face, cudaBoundaryModeTrap);
+  // CHECK: tail call void asm sideeffect "sust.b.a2d.v2.b8.trap [$0, {$3, $1, $2, $2}], {$4, $5};", "l,r,r,r,h,h"
+  __nv_tex_surf_handler("__isurfCubemapwrite_v2", &val, surf, x, y, face, cudaBoundaryModeTrap);
+
+  // CHECK: %18 = tail call { i8, i8 } asm "suld.b.a2d.v2.b8.zero {$0, $1}, [$2, {$5, $3, $4, $4}];", "=h,=h,l,r,r,r"
+  __nv_tex_surf_handler("__isurfCubemapLayeredread", &val, surf, x, y, layerface, cudaBoundaryModeZero);
+  // CHECK: tail call void asm sideeffect "sust.b.a2d.v2.b8.zero [$0, {$3, $1, $2, $2}], {$4, $5};", "l,r,r,r,h,h"
+  __nv_tex_surf_handler("__isurfCubemapLayeredwrite_v2", &val, surf, x, y, layerface, cudaBoundaryModeZero);
+  // CHECK: %19 = tail call { i8, i8 } asm "suld.b.a2d.v2.b8.clamp {$0, $1}, [$2, {$5, $3, $4, $4}];", "=h,=h,l,r,r,r"
+  __nv_tex_surf_handler("__isurfCubemapLayeredread", &val, surf, x, y, layerface, cudaBoundaryModeClamp);
+  // CHECK: tail call void asm sideeffect "sust.b.a2d.v2.b8.clamp [$0, {$3, $1, $2, $2}], {$4, $5};", "l,r,r,r,h,h"
+  __nv_tex_surf_handler("__isurfCubemapLayeredwrite_v2", &val, surf, x, y, layerface, cudaBoundaryModeClamp);
+  // CHECK: %20 = tail call { i8, i8 } asm "suld.b.a2d.v2.b8.trap {$0, $1}, [$2, {$5, $3, $4, $4}];", "=h,=h,l,r,r,r"
+  __nv_tex_surf_handler("__isurfCubemapLayeredread", &val, surf, x, y, layerface, cudaBoundaryModeTrap);
+  // CHECK: tail call void asm sideeffect "sust.b.a2d.v2.b8.trap [$0, {$3, $1, $2, $2}], {$4, $5};", "l,r,r,r,h,h"
+  __nv_tex_surf_handler("__isurfCubemapLayeredwrite_v2", &val, surf, x, y, layerface, cudaBoundaryModeTrap);
+}
+
+__device__ void surfuchar2(cudaSurfaceObject_t surf, int x, int y, int z, int layer, int face, int layerface) {
+  uchar2 val;
+
+  // CHECK: %0 = tail call { i8, i8 } asm "suld.b.1d.v2.b8.zero {$0, $1}, [$2, {$3}];", "=h,=h,l,r"
+  __nv_tex_surf_handler("__isurf1Dread", &val, surf, x, cudaBoundaryModeZero);
+  // CHECK: tail call void asm sideeffect "sust.b.1d.v2.b8.zero [$0, {$1}], {$2, $3};", "l,r,h,h"
+  __nv_tex_surf_handler("__isurf1Dwrite_v2", &val, surf, x, cudaBoundaryModeZero);
+  // CHECK: %1 = tail call { i8, i8 } asm "suld.b.1d.v2.b8.clamp {$0, $1}, [$2, {$3}];", "=h,=h,l,r"
+  __nv_tex_surf_handler("__isurf1Dread", &val, surf, x, cudaBoundaryModeClamp);
+  // CHECK: tail call void asm sideeffect "sust.b.1d.v2.b8.clamp [$0, {$1}], {$2, $3};", "l,r,h,h"
+  __nv_tex_surf_handler("__isurf1Dwrite_v2", &val, surf, x, cudaBoundaryModeClamp);
+  // CHECK: %2 = tail call { i8, i8 } asm "suld.b.1d.v2.b8.trap {$0, $1}, [$2, {$3}];", "=h,=h,l,r"
+  __nv_tex_surf_handler("__isurf1Dread", &val, surf, x, cudaBoundaryModeTrap);
+  // CHECK: tail call void asm sideeffect "sust.b.1d.v2.b8.trap [$0, {$1}], {$2, $3};", "l,r,h,h"
+  __nv_tex_surf_handler("__isurf1Dwrite_v2", &val, surf, x, cudaBoundaryModeTrap);
+
+  // CHECK: %3 = tail call { i8, i8 } asm "suld.b.2d.v2.b8.zero {$0, $1}, [$2, {$3, $4}];", "=h,=h,l,r,r"
+  __nv_tex_surf_handler("__isurf2Dread", &val, surf, x, y, cudaBoundaryModeZero);
+  // CHECK: tail call void asm sideeffect "sust.b.2d.v2.b8.zero [$0, {$1, $2}], {$3, $4};", "l,r,r,h,h"
+  __nv_tex_surf_handler("__isurf2Dwrite_v2", &val, surf, x, y, cudaBoundaryModeZero);
+  // CHECK: %4 = tail call { i8, i8 } asm "suld.b.2d.v2.b8.clamp {$0, $1}, [$2, {$3, $4}];", "=h,=h,l,r,r"
+  __nv_tex_surf_handler("__isurf2Dread", &val, surf, x, y, cudaBoundaryModeClamp);
+  // CHECK: tail call void asm sideeffect "sust.b.2d.v2.b8.clamp [$0, {$1, $2}], {$3, $4};", "l,r,r,h,h"
+  __nv_tex_surf_handler("__isurf2Dwrite_v2", &val, surf, x, y, cudaBoundaryModeClamp);
+  // CHECK: %5 = tail call { i8, i8 } asm "suld.b.2d.v2.b8.trap {$0, $1}, [$2, {$3, $4}];", "=h,=h,l,r,r"
+  __nv_tex_surf_handler("__isurf2Dread", &val, surf, x, y, cudaBoundaryModeTrap);
+  // CHECK: tail call void asm sideeffect "sust.b.2d.v2.b8.trap [$0, {$1, $2}], {$3, $4};", "l,r,r,h,h"
+  __nv_tex_surf_handler("__isurf2Dwrite_v2", &val, surf, x, y, cudaBoundaryModeTrap);
+
+  // CHECK: %6 = tail call { i8, i8 } asm "suld.b.3d.v2.b8.zero {$0, $1}, [$2, {$3, $4, $5, $5}];", "=h,=h,l,r,r,r"
+  __nv_tex_surf_handler("__isurf3Dread", &val, surf, x, y, z, cudaBoundaryModeZero);
+  // CHECK: tail call void asm sideeffect "sust.b.3d.v2.b8.zero [$0, {$1, $2, $3, $3}], {$4, $5};", "l,r,r,r,h,h"
+  __nv_tex_surf_handler("__isurf3Dwrite_v2", &val, surf, x, y, z, cudaBoundaryModeZero);
+  // CHECK: %7 = tail call { i8, i8 } asm "suld.b.3d.v2.b8.clamp {$0, $1}, [$2, {$3, $4, $5, $5}];", "=h,=h,l,r,r,r"
+  __nv_tex_surf_handler("__isurf3Dread", &val, surf, x, y, z, cudaBoundaryModeClamp);
+  // CHECK: tail call void asm sideeffect "sust.b.3d.v2.b8.clamp [$0, {$1, $2, $3, $3}], {$4, $5};", "l,r,r,r,h,h"
+  __nv_tex_surf_handler("__isurf3Dwrite_v2", &val, surf, x, y, z, cudaBoundaryModeClamp);
+  // CHECK: %8 = tail call { i8, i8 } asm "suld.b.3d.v2.b8.trap {$0, $1}, [$2, {$3, $4, $5, $5}];", "=h,=h,l,r,r,r"
+  __nv_tex_surf_handler("__isurf3Dread", &val, surf, x, y, z, cudaBoundaryModeTrap);
+  // CHECK: tail call void asm sideeffect "sust.b.3d.v2.b8.trap [$0, {$1, $2, $3, $3}], {$4, $5};", "l,r,r,r,h,h"
+  __nv_tex_surf_handler("__isurf3Dwrite_v2", &val, surf, x, y, z, cudaBoundaryModeTrap);
+
+  // CHECK: %9 = tail call { i8, i8 } asm "suld.b.a1d.v2.b8.zero {$0, $1}, [$2, {$4, $3}];", "=h,=h,l,r,r"
+  __nv_tex_surf_handler("__isurf1DLayeredread", &val, surf, x, layer, cudaBoundaryModeZero);
+  // CHECK: tail call void asm sideeffect "sust.b.a1d.v2.b8.zero [$0, {$2, $1}], {$3, $4};", "l,r,r,h,h"
+  __nv_tex_surf_handler("__isurf1DLayeredwrite_v2", &val, surf, x, layer, cudaBoundaryModeZero);
+  // CHECK: %10 = tail call { i8, i8 } asm "suld.b.a1d.v2.b8.clamp {$0, $1}, [$2, {$4, $3}];", "=h,=h,l,r,r"
+  __nv_tex_surf_handler("__isurf1DLayeredread", &val, surf, x, layer, cudaBoundaryModeClamp);
+  // CHECK: tail call void asm sideeffect "sust.b.a1d.v2.b8.clamp [$0, {$2, $1}], {$3, $4};", "l,r,r,h,h"
+  __nv_tex_surf_handler("__isurf1DLayeredwrite_v2", &val, surf, x, layer, cudaBoundaryModeClamp);
+  // CHECK: %11 = tail call { i8, i8 } asm "suld.b.a1d.v2.b8.trap {$0, $1}, [$2, {$4, $3}];", "=h,=h,l,r,r"
+  __nv_tex_surf_handler("__isurf1DLayeredread", &val, surf, x, layer, cudaBoundaryModeTrap);
+  // CHECK: tail call void asm sideeffect "sust.b.a1d.v2.b8.trap [$0, {$2, $1}], {$3, $4};", "l,r,r,h,h"
+  __nv_tex_surf_handler("__isurf1DLayeredwrite_v2", &val, surf, x, layer, cudaBoundaryModeTrap);
+
+  // CHECK: %12 = tail call { i8, i8 } asm "suld.b.a2d.v2.b8.zero {$0, $1}, [$2, {$5, $3, $4, $4}];", "=h,=h,l,r,r,r"
+  __nv_tex_surf_handler("__isurf2DLayeredread", &val, surf, x, y, layer, cudaBoundaryModeZero);
+  // CHECK: tail call void asm sideeffect "sust.b.a2d.v2.b8.zero [$0, {$3, $1, $2, $2}], {$4, $5};", "l,r,r,r,h,h"
+  __nv_tex_surf_handler("__isurf2DLayeredwrite_v2", &val, surf, x, y, layer, cudaBoundaryModeZero);
+  // CHECK: %13 = tail call { i8, i8 } asm "suld.b.a2d.v2.b8.clamp {$0, $1}, [$2, {$5, $3, $4, $4}];", "=h,=h,l,r,r,r"
+  __nv_tex_surf_handler("__isurf2DLayeredread", &val, surf, x, y, layer, cudaBoundaryModeClamp);
+  // CHECK: tail call void asm sideeffect "sust.b.a2d.v2.b8.clamp [$0, {$3, $1, $2, $2}], {$4, $5};", "l,r,r,r,h,h"
+  __nv_tex_surf_handler("__isurf2DLayeredwrite_v2", &val, surf, x, y, layer, cudaBoundaryModeClamp);
+  // CHECK: %14 = tail call { i8, i8 } asm "suld.b.a2d.v2.b8.trap {$0, $1}, [$2, {$5, $3, $4, $4}];", "=h,=h,l,r,r,r"
+  __nv_tex_surf_handler("__isurf2DLayeredread", &val, surf, x, y, layer, cudaBoundaryModeTrap);
+  // CHECK: tail call void asm sideeffect "sust.b.a2d.v2.b8.trap [$0, {$3, $1, $2, $2}], {$4, $5};", "l,r,r,r,h,h"
+  __nv_tex_surf_handler("__isurf2DLayeredwrite_v2", &val, surf, x, y, layer, cudaBoundaryModeTrap);
+
+  // CHECK: %15 = tail call { i8, i8 } asm "suld.b.a2d.v2.b8.zero {$0, $1}, [$2, {$5, $3, $4, $4}];", "=h,=h,l,r,r,r"
+  __nv_tex_surf_handler("__isurfCubemapread", &val, surf, x, y, face, cudaBoundaryModeZero);
+  // CHECK: tail call void asm sideeffect "sust.b.a2d.v2.b8.zero [$0, {$3, $1, $2, $2}], {$4, $5};", "l,r,r,r,h,h"
+  __nv_tex_surf_handler("__isurfCubemapwrite_v2", &val, surf, x, y, face, cudaBoundaryModeZero);
+  // CHECK: %16 = tail call { i8, i8 } asm "suld.b.a2d.v2.b8.clamp {$0, $1}, [$2, {$5, $3, $4, $4}];", "=h,=h,l,r,r,r"
+  __nv_tex_surf_handler("__isurfCubemapread", &val, surf, x, y, face, cudaBoundaryModeClamp);
+  // CHECK: tail call void asm sideeffect "sust.b.a2d.v2.b8.clamp [$0, {$3, $1, $2, $2}], {$4, $5};", "l,r,r,r,h,h"
+  __nv_tex_surf_handler("__isurfCubemapwrite_v2", &val, surf, x, y, face, cudaBoundaryModeClamp);
+  // CHECK: %17 = tail call { i8, i8 } asm "suld.b.a2d.v2.b8.trap {$0, $1}, [$2, {$5, $3, $4, $4}];", "=h,=h,l,r,r,r"
+  __nv_tex_surf_handler("__isurfCubemapread", &val, surf, x, y, face, cudaBoundaryModeTrap);
+  // CHECK: tail call void asm sideeffect "sust.b.a2d.v2.b8.trap [$0, {$3, $1, $2, $2}], {$4, $5};", "l,r,r,r,h,h"
+  __nv_tex_surf_handler("__isurfCubemapwrite_v2", &val, surf, x, y, face, cudaBoundaryModeTrap);
+
+  // CHECK: %18 = tail call { i8, i8 } asm "suld.b.a2d.v2.b8.zero {$0, $1}, [$2, {$5, $3, $4, $4}];", "=h,=h,l,r,r,r"
+  __nv_tex_surf_handler("__isurfCubemapLayeredread", &val, surf, x, y, layerface, cudaBoundaryModeZero);
+  // CHECK: tail call void asm sideeffect "sust.b.a2d.v2.b8.zero [$0, {$3, $1, $2, $2}], {$4, $5};", "l,r,r,r,h,h"
+  __nv_tex_surf_handler("__isurfCubemapLayeredwrite_v2", &val, surf, x, y, layerface, cudaBoundaryModeZero);
+  // CHECK: %19 = tail call { i8, i8 } asm "suld.b.a2d.v2.b8.clamp {$0, $1}, [$2, {$5, $3, $4, $4}];", "=h,=h,l,r,r,r"
+  __nv_tex_surf_handler("__isurfCubemapLayeredread", &val, surf, x, y, layerface, cudaBoundaryModeClamp);
+  // CHECK: tail call void asm sideeffect "sust.b.a2d.v2.b8.clamp [$0, {$3, $1, $2, $2}], {$4, $5};", "l,r,r,r,h,h"
+  __nv_tex_surf_handler("__isurfCubemapLayeredwrite_v2", &val, surf, x, y, layerface, cudaBoundaryModeClamp);
+  // CHECK: %20 = tail call { i8, i8 } asm "suld.b.a2d.v2.b8.trap {$0, $1}, [$2, {$5, $3, $4, $4}];", "=h,=h,l,r,r,r"
+  __nv_tex_surf_handler("__isurfCubemapLayeredread", &val, surf, x, y, layerface, cudaBoundaryModeTrap);
+  // CHECK: tail call void asm sideeffect "sust.b.a2d.v2.b8.trap [$0, {$3, $1, $2, $2}], {$4, $5};", "l,r,r,r,h,h"
+  __nv_tex_surf_handler("__isurfCubemapLayeredwrite_v2", &val, surf, x, y, layerface, cudaBoundaryModeTrap);
+}
+
+__device__ void surfshort2(cudaSurfaceObject_t surf, int x, int y, int z, int layer, int face, int layerface) {
+  short2 val;
+
+  // CHECK: %0 = tail call { i16, i16 } asm "suld.b.1d.v2.b16.zero {$0, $1}, [$2, {$3}];", "=h,=h,l,r"
+  __nv_tex_surf_handler("__isurf1Dread", &val, surf, x, cudaBoundaryModeZero);
+  // CHECK: tail call void asm sideeffect "sust.b.1d.v2.b16.zero [$0, {$1}], {$2, $3};", "l,r,h,h"
+  __nv_tex_surf_handler("__isurf1Dwrite_v2", &val, surf, x, cudaBoundaryModeZero);
+  // CHECK: %1 = tail call { i16, i16 } asm "suld.b.1d.v2.b16.clamp {$0, $1}, [$2, {$3}];", "=h,=h,l,r"
+  __nv_tex_surf_handler("__isurf1Dread", &val, surf, x, cudaBoundaryModeClamp);
+  // CHECK: tail call void asm sideeffect "sust.b.1d.v2.b16.clamp [$0, {$1}], {$2, $3};", "l,r,h,h"
+  __nv_tex_surf_handler("__isurf1Dwrite_v2", &val, surf, x, cudaBoundaryModeClamp);
+  // CHECK: %2 = tail call { i16, i16 } asm "suld.b.1d.v2.b16.trap {$0, $1}, [$2, {$3}];", "=h,=h,l,r"
+  __nv_tex_surf_handler("__isurf1Dread", &val, surf, x, cudaBoundaryModeTrap);
+  // CHECK: tail call void asm sideeffect "sust.b.1d.v2.b16.trap [$0, {$1}], {$2, $3};", "l,r,h,h"
+  __nv_tex_surf_handler("__isurf1Dwrite_v2", &val, surf, x, cudaBoundaryModeTrap);
+
+  // CHECK: %3 = tail call { i16, i16 } asm "suld.b.2d.v2.b16.zero {$0, $1}, [$2, {$3, $4}];", "=h,=h,l,r,r"
+  __nv_tex_surf_handler("__isurf2Dread", &val, surf, x, y, cudaBoundaryModeZero);
+  // CHECK: tail call void asm sideeffect "sust.b.2d.v2.b16.zero [$0, {$1, $2}], {$3, $4};", "l,r,r,h,h"
+  __nv_tex_surf_handler("__isurf2Dwrite_v2", &val, surf, x, y, cudaBoundaryModeZero);
+  // CHECK: %4 = tail call { i16, i16 } asm "suld.b.2d.v2.b16.clamp {$0, $1}, [$2, {$3, $4}];", "=h,=h,l,r,r"
+  __nv_tex_surf_handler("__isurf2Dread", &val, surf, x, y, cudaBoundaryModeClamp);
+  // CHECK: tail call void asm sideeffect "sust.b.2d.v2.b16.clamp [$0, {$1, $2}], {$3, $4};", "l,r,r,h,h"
+  __nv_tex_surf_handler("__isurf2Dwrite_v2", &val, surf, x, y, cudaBoundaryModeClamp);
+  // CHECK: %5 = tail call { i16, i16 } asm "suld.b.2d.v2.b16.trap {$0, $1}, [$2, {$3, $4}];", "=h,=h,l,r,r"
+  __nv_tex_surf_handler("__isurf2Dread", &val, surf, x, y, cudaBoundaryModeTrap);
+  // CHECK: tail call void asm sideeffect "sust.b.2d.v2.b16.trap [$0, {$1, $2}], {$3, $4};", "l,r,r,h,h"
+  __nv_tex_surf_handler("__isurf2Dwrite_v2", &val, surf, x, y, cudaBoundaryModeTrap);
+
+  // CHECK: %6 = tail call { i16, i16 } asm "suld.b.3d.v2.b16.zero {$0, $1}, [$2, {$3, $4, $5, $5}];", "=h,=h,l,r,r,r"
+  __nv_tex_surf_handler("__isurf3Dread", &val, surf, x, y, z, cudaBoundaryModeZero);
+  // CHECK: tail call void asm sideeffect "sust.b.3d.v2.b16.zero [$0, {$1, $2, $3, $3}], {$4, $5};", "l,r,r,r,h,h"
+  __nv_tex_surf_handler("__isurf3Dwrite_v2", &val, surf, x, y, z, cudaBoundaryModeZero);
+  // CHECK: %7 = tail call { i16, i16 } asm "suld.b.3d.v2.b16.clamp {$0, $1}, [$2, {$3, $4, $5, $5}];", "=h,=h,l,r,r,r"
+  __nv_tex_surf_handler("__isurf3Dread", &val, surf, x, y, z, cudaBoundaryModeClamp);
+  // CHECK: tail call void asm sideeffect "sust.b.3d.v2.b16.clamp [$0, {$1, $2, $3, $3}], {$4, $5};", "l,r,r,r,h,h"
+  __nv_tex_surf_handler("__isurf3Dwrite_v2", &val, surf, x, y, z, cudaBoundaryModeClamp);
+  // CHECK: %8 = tail call { i16, i16 } asm "suld.b.3d.v2.b16.trap {$0, $1}, [$2, {$3, $4, $5, $5}];", "=h,=h,l,r,r,r"
+  __nv_tex_surf_handler("__isurf3Dread", &val, surf, x, y, z, cudaBoundaryModeTrap);
+  // CHECK: tail call void asm sideeffect "sust.b.3d.v2.b16.trap [$0, {$1, $2, $3, $3}], {$4, $5};", "l,r,r,r,h,h"
+  __nv_tex_surf_handler("__isurf3Dwrite_v2", &val, surf, x, y, z, cudaBoundaryModeTrap);
+
+  // CHECK: %9 = tail call { i16, i16 } asm "suld.b.a1d.v2.b16.zero {$0, $1}, [$2, {$4, $3}];", "=h,=h,l,r,r"
+  __nv_tex_surf_handler("__isurf1DLayeredread", &val, surf, x, layer, cudaBoundaryModeZero);
+  // CHECK: tail call void asm sideeffect "sust.b.a1d.v2.b16.zero [$0, {$2, $1}], {$3, $4};", "l,r,r,h,h"
+  __nv_tex_surf_handler("__isurf1DLayeredwrite_v2", &val, surf, x, layer, cudaBoundaryModeZero);
+  // CHECK: %10 = tail call { i16, i16 } asm "suld.b.a1d.v2.b16.clamp {$0, $1}, [$2, {$4, $3}];", "=h,=h,l,r,r"
+  __nv_tex_surf_handler("__isurf1DLayeredread", &val, surf, x, layer, cudaBoundaryModeClamp);
+  // CHECK: tail call void asm sideeffect "sust.b.a1d.v2.b16.clamp [$0, {$2, $1}], {$3, $4};", "l,r,r,h,h"
+  __nv_tex_surf_handler("__isurf1DLayeredwrite_v2", &val, surf, x, layer, cudaBoundaryModeClamp);
+  // CHECK: %11 = tail call { i16, i16 } asm "suld.b.a1d.v2.b16.trap {$0, $1}, [$2, {$4, $3}];", "=h,=h,l,r,r"
+  __nv_tex_surf_handler("__isurf1DLayeredread", &val, surf, x, layer, cudaBoundaryModeTrap);
+  // CHECK: tail call void asm sideeffect "sust.b.a1d.v2.b16.trap [$0, {$2, $1}], {$3, $4};", "l,r,r,h,h"
+  __nv_tex_surf_handler("__isurf1DLayeredwrite_v2", &val, surf, x, layer, cudaBoundaryModeTrap);
+
+  // CHECK: %12 = tail call { i16, i16 } asm "suld.b.a2d.v2.b16.zero {$0, $1}, [$2, {$5, $3, $4, $4}];", "=h,=h,l,r,r,r"
+  __nv_tex_surf_handler("__isurf2DLayeredread", &val, surf, x, y, layer, cudaBoundaryModeZero);
+  // CHECK: tail call void asm sideeffect "sust.b.a2d.v2.b16.zero [$0, {$3, $1, $2, $2}], {$4, $5};", "l,r,r,r,h,h"
+  __nv_tex_surf_handler("__isurf2DLayeredwrite_v2", &val, surf, x, y, layer, cudaBoundaryModeZero);
+  // CHECK: %13 = tail call { i16, i16 } asm "suld.b.a2d.v2.b16.clamp {$0, $1}, [$2, {$5, $3, $4, $4}];", "=h,=h,l,r,r,r"
+  __nv_tex_surf_handler("__isurf2DLayeredread", &val, surf, x, y, layer, cudaBoundaryModeClamp);
+  // CHECK: tail call void asm sideeffect "sust.b.a2d.v2.b16.clamp [$0, {$3, $1, $2, $2}], {$4, $5};", "l,r,r,r,h,h"
+  __nv_tex_surf_handler("__isurf2DLayeredwrite_v2", &val, surf, x, y, layer, cudaBoundaryModeClamp);
+  // CHECK: %14 = tail call { i16, i16 } asm "suld.b.a2d.v2.b16.trap {$0, $1}, [$2, {$5, $3, $4, $4}];", "=h,=h,l,r,r,r"
+  __nv_tex_surf_handler("__isurf2DLayeredread", &val, surf, x, y, layer, cudaBoundaryModeTrap);
+  // CHECK: tail call void asm sideeffect "sust.b.a2d.v2.b16.trap [$0, {$3, $1, $2, $2}], {$4, $5};", "l,r,r,r,h,h"
+  __nv_tex_surf_handler("__isurf2DLayeredwrite_v2", &val, surf, x, y, layer, cudaBoundaryModeTrap);
+
+  // CHECK: %15 = tail call { i16, i16 } asm "suld.b.a2d.v2.b16.zero {$0, $1}, [$2, {$5, $3, $4, $4}];", "=h,=h,l,r,r,r"
+  __nv_tex_surf_handler("__isurfCubemapread", &val, surf, x, y, face, cudaBoundaryModeZero);
+  // CHECK: tail call void asm sideeffect "sust.b.a2d.v2.b16.zero [$0, {$3, $1, $2, $2}], {$4, $5};", "l,r,r,r,h,h"
+  __nv_tex_surf_handler("__isurfCubemapwrite_v2", &val, surf, x, y, face, cudaBoundaryModeZero);
+  // CHECK: %16 = tail call { i16, i16 } asm "suld.b.a2d.v2.b16.clamp {$0, $1}, [$2, {$5, $3, $4, $4}];", "=h,=h,l,r,r,r"
+  __nv_tex_surf_handler("__isurfCubemapread", &val, surf, x, y, face, cudaBoundaryModeClamp);
+  // CHECK: tail call void asm sideeffect "sust.b.a2d.v2.b16.clamp [$0, {$3, $1, $2, $2}], {$4, $5};", "l,r,r,r,h,h"
+  __nv_tex_surf_handler("__isurfCubemapwrite_v2", &val, surf, x, y, face, cudaBoundaryModeClamp);
+  // CHECK: %17 = tail call { i16, i16 } asm "suld.b.a2d.v2.b16.trap {$0, $1}, [$2, {$5, $3, $4, $4}];", "=h,=h,l,r,r,r"
+  __nv_tex_surf_handler("__isurfCubemapread", &val, surf, x, y, face, cudaBoundaryModeTrap);
+  // CHECK: tail call void asm sideeffect "sust.b.a2d.v2.b16.trap [$0, {$3, $1, $2, $2}], {$4, $5};", "l,r,r,r,h,h"
+  __nv_tex_surf_handler("__isurfCubemapwrite_v2", &val, surf, x, y, face, cudaBoundaryModeTrap);
+
+  // CHECK: %18 = tail call { i16, i16 } asm "suld.b.a2d.v2.b16.zero {$0, $1}, [$2, {$5, $3, $4, $4}];", "=h,=h,l,r,r,r"
+  __nv_tex_surf_handler("__isurfCubemapLayeredread", &val, surf, x, y, layerface, cudaBoundaryModeZero);
+  // CHECK: tail call void asm sideeffect "sust.b.a2d.v2.b16.zero [$0, {$3, $1, $2, $2}], {$4, $5};", "l,r,r,r,h,h"
+  __nv_tex_surf_handler("__isurfCubemapLayeredwrite_v2", &val, surf, x, y, layerface, cudaBoundaryModeZero);
+  // CHECK: %19 = tail call { i16, i16 } asm "suld.b.a2d.v2.b16.clamp {$0, $1}, [$2, {$5, $3, $4, $4}];", "=h,=h,l,r,r,r"
+  __nv_tex_surf_handler("__isurfCubemapLayeredread", &val, surf, x, y, layerface, cudaBoundaryModeClamp);
+  // CHECK: tail call void asm sideeffect "sust.b.a2d.v2.b16.clamp [$0, {$3, $1, $2, $2}], {$4, $5};", "l,r,r,r,h,h"
+  __nv_tex_surf_handler("__isurfCubemapLayeredwrite_v2", &val, surf, x, y, layerface, cudaBoundaryModeClamp);
+  // CHECK: %20 = tail call { i16, i16 } asm "suld.b.a2d.v2.b16.trap {$0, $1}, [$2, {$5, $3, $4, $4}];", "=h,=h,l,r,r,r"
+  __nv_tex_surf_handler("__isurfCubemapLayeredread", &val, surf, x, y, layerface, cudaBoundaryModeTrap);
+  // CHECK: tail call void asm sideeffect "sust.b.a2d.v2.b16.trap [$0, {$3, $1, $2, $2}], {$4, $5};", "l,r,r,r,h,h"
+  __nv_tex_surf_handler("__isurfCubemapLayeredwrite_v2", &val, surf, x, y, layerface, cudaBoundaryModeTrap);
+}
+__device__ void surfushort2(cudaSurfaceObject_t surf, int x, int y, int z, int layer, int face, int layerface) {
+  ushort2 val;
+
+  // CHECK: %0 = tail call { i16, i16 } asm "suld.b.1d.v2.b16.zero {$0, $1}, [$2, {$3}];", "=h,=h,l,r"
+  __nv_tex_surf_handler("__isurf1Dread", &val, surf, x, cudaBoundaryModeZero);
+  // CHECK: tail call void asm sideeffect "sust.b.1d.v2.b16.zero [$0, {$1}], {$2, $3};", "l,r,h,h"
+  __nv_tex_surf_handler("__isurf1Dwrite_v2", &val, surf, x, cudaBoundaryModeZero);
+  // CHECK: %1 = tail call { i16, i16 } asm "suld.b.1d.v2.b16.clamp {$0, $1}, [$2, {$3}];", "=h,=h,l,r"
+  __nv_tex_surf_handler("__isurf1Dread", &val, surf, x, cudaBoundaryModeClamp);
+  // CHECK: tail call void asm sideeffect "sust.b.1d.v2.b16.clamp [$0, {$1}], {$2, $3};", "l,r,h,h"
+  __nv_tex_surf_handler("__isurf1Dwrite_v2", &val, surf, x, cudaBoundaryModeClamp);
+  // CHECK: %2 = tail call { i16, i16 } asm "suld.b.1d.v2.b16.trap {$0, $1}, [$2, {$3}];", "=h,=h,l,r"
+  __nv_tex_surf_handler("__isurf1Dread", &val, surf, x, cudaBoundaryModeTrap);
+  // CHECK: tail call void asm sideeffect "sust.b.1d.v2.b16.trap [$0, {$1}], {$2, $3};", "l,r,h,h"
+  __nv_tex_surf_handler("__isurf1Dwrite_v2", &val, surf, x, cudaBoundaryModeTrap);
+
+  // CHECK: %3 = tail call { i16, i16 } asm "suld.b.2d.v2.b16.zero {$0, $1}, [$2, {$3, $4}];", "=h,=h,l,r,r"
+  __nv_tex_surf_handler("__isurf2Dread", &val, surf, x, y, cudaBoundaryModeZero);
+  // CHECK: tail call void asm sideeffect "sust.b.2d.v2.b16.zero [$0, {$1, $2}], {$3, $4};", "l,r,r,h,h"
+  __nv_tex_surf_handler("__isurf2Dwrite_v2", &val, surf, x, y, cudaBoundaryModeZero);
+  // CHECK: %4 = tail call { i16, i16 } asm "suld.b.2d.v2.b16.clamp {$0, $1}, [$2, {$3, $4}];", "=h,=h,l,r,r"
+  __nv_tex_surf_handler("__isurf2Dread", &val, surf, x, y, cudaBoundaryModeClamp);
+  // CHECK: tail call void asm sideeffect "sust.b.2d.v2.b16.clamp [$0, {$1, $2}], {$3, $4};", "l,r,r,h,h"
+  __nv_tex_surf_handler("__isurf2Dwrite_v2", &val, surf, x, y, cudaBoundaryModeClamp);
+  // CHECK: %5 = tail call { i16, i16 } asm "suld.b.2d.v2.b16.trap {$0, $1}, [$2, {$3, $4}];", "=h,=h,l,r,r"
+  __nv_tex_surf_handler("__isurf2Dread", &val, surf, x, y, cudaBoundaryModeTrap);
+  // CHECK: tail call void asm sideeffect "sust.b.2d.v2.b16.trap [$0, {$1, $2}], {$3, $4};", "l,r,r,h,h"
+  __nv_tex_surf_handler("__isurf2Dwrite_v2", &val, surf, x, y, cudaBoundaryModeTrap);
+
+  // CHECK: %6 = tail call { i16, i16 } asm "suld.b.3d.v2.b16.zero {$0, $1}, [$2, {$3, $4, $5, $5}];", "=h,=h,l,r,r,r"
+  __nv_tex_surf_handler("__isurf3Dread", &val, surf, x, y, z, cudaBoundaryModeZero);
+  // CHECK: tail call void asm sideeffect "sust.b.3d.v2.b16.zero [$0, {$1, $2, $3, $3}], {$4, $5};", "l,r,r,r,h,h"
+  __nv_tex_surf_handler("__isurf3Dwrite_v2", &val, surf, x, y, z, cudaBoundaryModeZero);
+  // CHECK: %7 = tail call { i16, i16 } asm "suld.b.3d.v2.b16.clamp {$0, $1}, [$2, {$3, $4, $5, $5}];", "=h,=h,l,r,r,r"
+  __nv_tex_surf_handler("__isurf3Dread", &val, surf, x, y, z, cudaBoundaryModeClamp);
+  // CHECK: tail call void asm sideeffect "sust.b.3d.v2.b16.clamp [$0, {$1, $2, $3, $3}], {$4, $5};", "l,r,r,r,h,h"
+  __nv_tex_surf_handler("__isurf3Dwrite_v2", &val, surf, x, y, z, cudaBoundaryModeClamp);
+  // CHECK: %8 = tail call { i16, i16 } asm "suld.b.3d.v2.b16.trap {$0, $1}, [$2, {$3, $4, $5, $5}];", "=h,=h,l,r,r,r"
+  __nv_tex_surf_handler("__isurf3Dread", &val, surf, x, y, z, cudaBoundaryModeTrap);
+  // CHECK: tail call void asm sideeffect "sust.b.3d.v2.b16.trap [$0, {$1, $2, $3, $3}], {$4, $5};", "l,r,r,r,h,h"
+  __nv_tex_surf_handler("__isurf3Dwrite_v2", &val, surf, x, y, z, cudaBoundaryModeTrap);
+
+  // CHECK: %9 = tail call { i16, i16 } asm "suld.b.a1d.v2.b16.zero {$0, $1}, [$2, {$4, $3}];", "=h,=h,l,r,r"
+  __nv_tex_surf_handler("__isurf1DLayeredread", &val, surf, x, layer, cudaBoundaryModeZero);
+  // CHECK: tail call void asm sideeffect "sust.b.a1d.v2.b16.zero [$0, {$2, $1}], {$3, $4};", "l,r,r,h,h"
+  __nv_tex_surf_handler("__isurf1DLayeredwrite_v2", &val, surf, x, layer, cudaBoundaryModeZero);
+  // CHECK: %10 = tail call { i16, i16 } asm "suld.b.a1d.v2.b16.clamp {$0, $1}, [$2, {$4, $3}];", "=h,=h,l,r,r"
+  __nv_tex_surf_handler("__isurf1DLayeredread", &val, surf, x, layer, cudaBoundaryModeClamp);
+  // CHECK: tail call void asm sideeffect "sust.b.a1d.v2.b16.clamp [$0, {$2, $1}], {$3, $4};", "l,r,r,h,h"
+  __nv_tex_surf_handler("__isurf1DLayeredwrite_v2", &val, surf, x, layer, cudaBoundaryModeClamp);
+  // CHECK: %11 = tail call { i16, i16 } asm "suld.b.a1d.v2.b16.trap {$0, $1}, [$2, {$4, $3}];", "=h,=h,l,r,r"
+  __nv_tex_surf_handler("__isurf1DLayeredread", &val, surf, x, layer, cudaBoundaryModeTrap);
+  // CHECK: tail call void asm sideeffect "sust.b.a1d.v2.b16.trap [$0, {$2, $1}], {$3, $4};", "l,r,r,h,h"
+  __nv_tex_surf_handler("__isurf1DLayeredwrite_v2", &val, surf, x, layer, cudaBoundaryModeTrap);
+
+  // CHECK: %12 = tail call { i16, i16 } asm "suld.b.a2d.v2.b16.zero {$0, $1}, [$2, {$5, $3, $4, $4}];", "=h,=h,l,r,r,r"
+  __nv_tex_surf_handler("__isurf2DLayeredread", &val, surf, x, y, layer, cudaBoundaryModeZero);
+  // CHECK: tail call void asm sideeffect "sust.b.a2d.v2.b16.zero [$0, {$3, $1, $2, $2}], {$4, $5};", "l,r,r,r,h,h"
+  __nv_tex_surf_handler("__isurf2DLayeredwrite_v2", &val, surf, x, y, layer, cudaBoundaryModeZero);
+  // CHECK: %13 = tail call { i16, i16 } asm "suld.b.a2d.v2.b16.clamp {$0, $1}, [$2, {$5, $3, $4, $4}];", "=h,=h,l,r,r,r"
+  __nv_tex_surf_handler("__isurf2DLayeredread", &val, surf, x, y, layer, cudaBoundaryModeClamp);
+  // CHECK: tail call void asm sideeffect "sust.b.a2d.v2.b16.clamp [$0, {$3, $1, $2, $2}], {$4, $5};", "l,r,r,r,h,h"
+  __nv_tex_surf_handler("__isurf2DLayeredwrite_v2", &val, surf, x, y, layer, cudaBoundaryModeClamp);
+  // CHECK: %14 = tail call { i16, i16 } asm "suld.b.a2d.v2.b16.trap {$0, $1}, [$2, {$5, $3, $4, $4}];", "=h,=h,l,r,r,r"
+  __nv_tex_surf_handler("__isurf2DLayeredread", &val, surf, x, y, layer, cudaBoundaryModeTrap);
+  // CHECK: tail call void asm sideeffect "sust.b.a2d.v2.b16.trap [$0, {$3, $1, $2, $2}], {$4, $5};", "l,r,r,r,h,h"
+  __nv_tex_surf_handler("__isurf2DLayeredwrite_v2", &val, surf, x, y, layer, cudaBoundaryModeTrap);
+
+  // CHECK: %15 = tail call { i16, i16 } asm "suld.b.a2d.v2.b16.zero {$0, $1}, [$2, {$5, $3, $4, $4}];", "=h,=h,l,r,r,r"
+  __nv_tex_surf_handler("__isurfCubemapread", &val, surf, x, y, face, cudaBoundaryModeZero);
+  // CHECK: tail call void asm sideeffect "sust.b.a2d.v2.b16.zero [$0, {$3, $1, $2, $2}], {$4, $5};", "l,r,r,r,h,h"
+  __nv_tex_surf_handler("__isurfCubemapwrite_v2", &val, surf, x, y, face, cudaBoundaryModeZero);
+  // CHECK: %16 = tail call { i16, i16 } asm "suld.b.a2d.v2.b16.clamp {$0, $1}, [$2, {$5, $3, $4, $4}];", "=h,=h,l,r,r,r"
+  __nv_tex_surf_handler("__isurfCubemapread", &val, surf, x, y, face, cudaBoundaryModeClamp);
+  // CHECK: tail call void asm sideeffect "sust.b.a2d.v2.b16.clamp [$0, {$3, $1, $2, $2}], {$4, $5};", "l,r,r,r,h,h"
+  __nv_tex_surf_handler("__isurfCubemapwrite_v2", &val, surf, x, y, face, cudaBoundaryModeClamp);
+  // CHECK: %17 = tail call { i16, i16 } asm "suld.b.a2d.v2.b16.trap {$0, $1}, [$2, {$5, $3, $4, $4}];", "=h,=h,l,r,r,r"
+  __nv_tex_surf_handler("__isurfCubemapread", &val, surf, x, y, face, cudaBoundaryModeTrap);
+  // CHECK: tail call void asm sideeffect "sust.b.a2d.v2.b16.trap [$0, {$3, $1, $2, $2}], {$4, $5};", "l,r,r,r,h,h"
+  __nv_tex_surf_handler("__isurfCubemapwrite_v2", &val, surf, x, y, face, cudaBoundaryModeTrap);
+
+  // CHECK: %18 = tail call { i16, i16 } asm "suld.b.a2d.v2.b16.zero {$0, $1}, [$2, {$5, $3, $4, $4}];", "=h,=h,l,r,r,r"
+  __nv_tex_surf_handler("__isurfCubemapLayeredread", &val, surf, x, y, layerface, cudaBoundaryModeZero);
+  // CHECK: tail call void asm sideeffect "sust.b.a2d.v2.b16.zero [$0, {$3, $1, $2, $2}], {$4, $5};", "l,r,r,r,h,h"
+  __nv_tex_surf_handler("__isurfCubemapLayeredwrite_v2", &val, surf, x, y, layerface, cudaBoundaryModeZero);
+  // CHECK: %19 = tail call { i16, i16 } asm "suld.b.a2d.v2.b16.clamp {$0, $1}, [$2, {$5, $3, $4, $4}];", "=h,=h,l,r,r,r"
+  __nv_tex_surf_handler("__isurfCubemapLayeredread", &val, surf, x, y, layerface, cudaBoundaryModeClamp);
+  // CHECK: tail call void asm sideeffect "sust.b.a2d.v2.b16.clamp [$0, {$3, $1, $2, $2}], {$4, $5};", "l,r,r,r,h,h"
+  __nv_tex_surf_handler("__isurfCubemapLayeredwrite_v2", &val, surf, x, y, layerface, cudaBoundaryModeClamp);
+  // CHECK: %20 = tail call { i16, i16 } asm "suld.b.a2d.v2.b16.trap {$0, $1}, [$2, {$5, $3, $4, $4}];", "=h,=h,l,r,r,r"
+  __nv_tex_surf_handler("__isurfCubemapLayeredread", &val, surf, x, y, layerface, cudaBoundaryModeTrap);
+  // CHECK: tail call void asm sideeffect "sust.b.a2d.v2.b16.trap [$0, {$3, $1, $2, $2}], {$4, $5};", "l,r,r,r,h,h"
+  __nv_tex_surf_handler("__isurfCubemapLayeredwrite_v2", &val, surf, x, y, layerface, cudaBoundaryModeTrap);
+}
+
+__device__ void surfint2(cudaSurfaceObject_t surf, int x, int y, int z, int layer, int face, int layerface) {
+  int2 val;
+
+  // CHECK: %0 = tail call { i32, i32 } asm "suld.b.1d.v2.b32.zero {$0, $1}, [$2, {$3}];", "=r,=r,l,r"
+  __nv_tex_surf_handler("__isurf1Dread", &val, surf, x, cudaBoundaryModeZero);
+  // CHECK: tail call void asm sideeffect "sust.b.1d.v2.b32.zero [$0, {$1}], {$2, $3};", "l,r,r,r"
+  __nv_tex_surf_handler("__isurf1Dwrite_v2", &val, surf, x, cudaBoundaryModeZero);
+  // CHECK: %1 = tail call { i32, i32 } asm "suld.b.1d.v2.b32.clamp {$0, $1}, [$2, {$3}];", "=r,=r,l,r"
+  __nv_tex_surf_handler("__isurf1Dread", &val, surf, x, cudaBoundaryModeClamp);
+  // CHECK: tail call void asm sideeffect "sust.b.1d.v2.b32.clamp [$0, {$1}], {$2, $3};", "l,r,r,r"
+  __nv_tex_surf_handler("__isurf1Dwrite_v2", &val, surf, x, cudaBoundaryModeClamp);
+  // CHECK: %2 = tail call { i32, i32 } asm "suld.b.1d.v2.b32.trap {$0, $1}, [$2, {$3}];", "=r,=r,l,r"
+  __nv_tex_surf_handler("__isurf1Dread", &val, surf, x, cudaBoundaryModeTrap);
+  // CHECK: tail call void asm sideeffect "sust.b.1d.v2.b32.trap [$0, {$1}], {$2, $3};", "l,r,r,r"
+  __nv_tex_surf_handler("__isurf1Dwrite_v2", &val, surf, x, cudaBoundaryModeTrap);
+
+  // CHECK: %3 = tail call { i32, i32 } asm "suld.b.2d.v2.b32.zero {$0, $1}, [$2, {$3, $4}];", "=r,=r,l,r,r"
+  __nv_tex_surf_handler("__isurf2Dread", &val, surf, x, y, cudaBoundaryModeZero);
+  // CHECK: tail call void asm sideeffect "sust.b.2d.v2.b32.zero [$0, {$1, $2}], {$3, $4};", "l,r,r,r,r"
+  __nv_tex_surf_handler("__isurf2Dwrite_v2", &val, surf, x, y, cudaBoundaryModeZero);
+  // CHECK: %4 = tail call { i32, i32 } asm "suld.b.2d.v2.b32.clamp {$0, $1}, [$2, {$3, $4}];", "=r,=r,l,r,r"
+  __nv_tex_surf_handler("__isurf2Dread", &val, surf, x, y, cudaBoundaryModeClamp);
+  // CHECK: tail call void asm sideeffect "sust.b.2d.v2.b32.clamp [$0, {$1, $2}], {$3, $4};", "l,r,r,r,r"
+  __nv_tex_surf_handler("__isurf2Dwrite_v2", &val, surf, x, y, cudaBoundaryModeClamp);
+  // CHECK: %5 = tail call { i32, i32 } asm "suld.b.2d.v2.b32.trap {$0, $1}, [$2, {$3, $4}];", "=r,=r,l,r,r"
+  __nv_tex_surf_handler("__isurf2Dread", &val, surf, x, y, cudaBoundaryModeTrap);
+  // CHECK: tail call void asm sideeffect "sust.b.2d.v2.b32.trap [$0, {$1, $2}], {$3, $4};", "l,r,r,r,r"
+  __nv_tex_surf_handler("__isurf2Dwrite_v2", &val, surf, x, y, cudaBoundaryModeTrap);
+
+  // CHECK: %6 = tail call { i32, i32 } asm "suld.b.3d.v2.b32.zero {$0, $1}, [$2, {$3, $4, $5, $5}];", "=r,=r,l,r,r,r"
+  __nv_tex_surf_handler("__isurf3Dread", &val, surf, x, y, z, cudaBoundaryModeZero);
+  // CHECK: tail call void asm sideeffect "sust.b.3d.v2.b32.zero [$0, {$1, $2, $3, $3}], {$4, $5};", "l,r,r,r,r,r"
+  __nv_tex_surf_handler("__isurf3Dwrite_v2", &val, surf, x, y, z, cudaBoundaryModeZero);
+  // CHECK: %7 = tail call { i32, i32 } asm "suld.b.3d.v2.b32.clamp {$0, $1}, [$2, {$3, $4, $5, $5}];", "=r,=r,l,r,r,r"
+  __nv_tex_surf_handler("__isurf3Dread", &val, surf, x, y, z, cudaBoundaryModeClamp);
+  // CHECK: tail call void asm sideeffect "sust.b.3d.v2.b32.clamp [$0, {$1, $2, $3, $3}], {$4, $5};", "l,r,r,r,r,r"
+  __nv_tex_surf_handler("__isurf3Dwrite_v2", &val, surf, x, y, z, cudaBoundaryModeClamp);
+  // CHECK: %8 = tail call { i32, i32 } asm "suld.b.3d.v2.b32.trap {$0, $1}, [$2, {$3, $4, $5, $5}];", "=r,=r,l,r,r,r"
+  __nv_tex_surf_handler("__isurf3Dread", &val, surf, x, y, z, cudaBoundaryModeTrap);
+  // CHECK: tail call void asm sideeffect "sust.b.3d.v2.b32.trap [$0, {$1, $2, $3, $3}], {$4, $5};", "l,r,r,r,r,r"
+  __nv_tex_surf_handler("__isurf3Dwrite_v2", &val, surf, x, y, z, cudaBoundaryModeTrap);
+
+  // CHECK: %9 = tail call { i32, i32 } asm "suld.b.a1d.v2.b32.zero {$0, $1}, [$2, {$4, $3}];", "=r,=r,l,r,r"
+  __nv_tex_surf_handler("__isurf1DLayeredread", &val, surf, x, layer, cudaBoundaryModeZero);
+  // CHECK: tail call void asm sideeffect "sust.b.a1d.v2.b32.zero [$0, {$2, $1}], {$3, $4};", "l,r,r,r,r"
+  __nv_tex_surf_handler("__isurf1DLayeredwrite_v2", &val, surf, x, layer, cudaBoundaryModeZero);
+  // CHECK: %10 = tail call { i32, i32 } asm "suld.b.a1d.v2.b32.clamp {$0, $1}, [$2, {$4, $3}];", "=r,=r,l,r,r"
+  __nv_tex_surf_handler("__isurf1DLayeredread", &val, surf, x, layer, cudaBoundaryModeClamp);
+  // CHECK: tail call void asm sideeffect "sust.b.a1d.v2.b32.clamp [$0, {$2, $1}], {$3, $4};", "l,r,r,r,r"
+  __nv_tex_surf_handler("__isurf1DLayeredwrite_v2", &val, surf, x, layer, cudaBoundaryModeClamp);
+  // CHECK: %11 = tail call { i32, i32 } asm "suld.b.a1d.v2.b32.trap {$0, $1}, [$2, {$4, $3}];", "=r,=r,l,r,r"
+  __nv_tex_surf_handler("__isurf1DLayeredread", &val, surf, x, layer, cudaBoundaryModeTrap);
+  // CHECK: tail call void asm sideeffect "sust.b.a1d.v2.b32.trap [$0, {$2, $1}], {$3, $4};", "l,r,r,r,r"
+  __nv_tex_surf_handler("__isurf1DLayeredwrite_v2", &val, surf, x, layer, cudaBoundaryModeTrap);
+
+  // CHECK: %12 = tail call { i32, i32 } asm "suld.b.a2d.v2.b32.zero {$0, $1}, [$2, {$5, $3, $4, $4}];", "=r,=r,l,r,r,r"
+  __nv_tex_surf_handler("__isurf2DLayeredread", &val, surf, x, y, layer, cudaBoundaryModeZero);
+  // CHECK: tail call void asm sideeffect "sust.b.a2d.v2.b32.zero [$0, {$3, $1, $2, $2}], {$4, $5};", "l,r,r,r,r,r"
+  __nv_tex_surf_handler("__isurf2DLayeredwrite_v2", &val, surf, x, y, layer, cudaBoundaryModeZero);
+  // CHECK: %13 = tail call { i32, i32 } asm "suld.b.a2d.v2.b32.clamp {$0, $1}, [$2, {$5, $3, $4, $4}];", "=r,=r,l,r,r,r"
+  __nv_tex_surf_handler("__isurf2DLayeredread", &val, surf, x, y, layer, cudaBoundaryModeClamp);
+  // CHECK: tail call void asm sideeffect "sust.b.a2d.v2.b32.clamp [$0, {$3, $1, $2, $2}], {$4, $5};", "l,r,r,r,r,r"
+  __nv_tex_surf_handler("__isurf2DLayeredwrite_v2", &val, surf, x, y, layer, cudaBoundaryModeClamp);
+  // CHECK: %14 = tail call { i32, i32 } asm "suld.b.a2d.v2.b32.trap {$0, $1}, [$2, {$5, $3, $4, $4}];", "=r,=r,l,r,r,r"
+  __nv_tex_surf_handler("__isurf2DLayeredread", &val, surf, x, y, layer, cudaBoundaryModeTrap);
+  // CHECK: tail call void asm sideeffect "sust.b.a2d.v2.b32.trap [$0, {$3, $1, $2, $2}], {$4, $5};", "l,r,r,r,r,r"
+  __nv_tex_surf_handler("__isurf2DLayeredwrite_v2", &val, surf, x, y, layer, cudaBoundaryModeTrap);
+
+  // CHECK: %15 = tail call { i32, i32 } asm "suld.b.a2d.v2.b32.zero {$0, $1}, [$2, {$5, $3, $4, $4}];", "=r,=r,l,r,r,r"
+  __nv_tex_surf_handler("__isurfCubemapread", &val, surf, x, y, face, cudaBoundaryModeZero);
+  // CHECK: tail call void asm sideeffect "sust.b.a2d.v2.b32.zero [$0, {$3, $1, $2, $2}], {$4, $5};", "l,r,r,r,r,r"
+  __nv_tex_surf_handler("__isurfCubemapwrite_v2", &val, surf, x, y, face, cudaBoundaryModeZero);
+  // CHECK: %16 = tail call { i32, i32 } asm "suld.b.a2d.v2.b32.clamp {$0, $1}, [$2, {$5, $3, $4, $4}];", "=r,=r,l,r,r,r"
+  __nv_tex_surf_handler("__isurfCubemapread", &val, surf, x, y, face, cudaBoundaryModeClamp);
+  // CHECK: tail call void asm sideeffect "sust.b.a2d.v2.b32.clamp [$0, {$3, $1, $2, $2}], {$4, $5};", "l,r,r,r,r,r"
+  __nv_tex_surf_handler("__isurfCubemapwrite_v2", &val, surf, x, y, face, cudaBoundaryModeClamp);
+  // CHECK: %17 = tail call { i32, i32 } asm "suld.b.a2d.v2.b32.trap {$0, $1}, [$2, {$5, $3, $4, $4}];", "=r,=r,l,r,r,r"
+  __nv_tex_surf_handler("__isurfCubemapread", &val, surf, x, y, face, cudaBoundaryModeTrap);
+  // CHECK: tail call void asm sideeffect "sust.b.a2d.v2.b32.trap [$0, {$3, $1, $2, $2}], {$4, $5};", "l,r,r,r,r,r"
+  __nv_tex_surf_handler("__isurfCubemapwrite_v2", &val, surf, x, y, face, cudaBoundaryModeTrap);
+
+  // CHECK: %18 = tail call { i32, i32 } asm "suld.b.a2d.v2.b32.zero {$0, $1}, [$2, {$5, $3, $4, $4}];", "=r,=r,l,r,r,r"
+  __nv_tex_surf_handler("__isurfCubemapLayeredread", &val, surf, x, y, layerface, cudaBoundaryModeZero);
+  // CHECK: tail call void asm sideeffect "sust.b.a2d.v2.b32.zero [$0, {$3, $1, $2, $2}], {$4, $5};", "l,r,r,r,r,r"
+  __nv_tex_surf_handler("__isurfCubemapLayeredwrite_v2", &val, surf, x, y, layerface, cudaBoundaryModeZero);
+  // CHECK: %19 = tail call { i32, i32 } asm "suld.b.a2d.v2.b32.clamp {$0, $1}, [$2, {$5, $3, $4, $4}];", "=r,=r,l,r,r,r"
+  __nv_tex_surf_handler("__isurfCubemapLayeredread", &val, surf, x, y, layerface, cudaBoundaryModeClamp);
+  // CHECK: tail call void asm sideeffect "sust.b.a2d.v2.b32.clamp [$0, {$3, $1, $2, $2}], {$4, $5};", "l,r,r,r,r,r"
+  __nv_tex_surf_handler("__isurfCubemapLayeredwrite_v2", &val, surf, x, y, layerface, cudaBoundaryModeClamp);
+  // CHECK: %20 = tail call { i32, i32 } asm "suld.b.a2d.v2.b32.trap {$0, $1}, [$2, {$5, $3, $4, $4}];", "=r,=r,l,r,r,r"
+  __nv_tex_surf_handler("__isurfCubemapLayeredread", &val, surf, x, y, layerface, cudaBoundaryModeTrap);
+  // CHECK: tail call void asm sideeffect "sust.b.a2d.v2.b32.trap [$0, {$3, $1, $2, $2}], {$4, $5};", "l,r,r,r,r,r"
+  __nv_tex_surf_handler("__isurfCubemapLayeredwrite_v2", &val, surf, x, y, layerface, cudaBoundaryModeTrap);
+}
+
+__device__ void surfuint2(cudaSurfaceObject_t surf, int x, int y, int z, int layer, int face, int layerface) {
+  uint2 val;
+
+  // CHECK: %0 = tail call { i32, i32 } asm "suld.b.1d.v2.b32.zero {$0, $1}, [$2, {$3}];", "=r,=r,l,r"
+  __nv_tex_surf_handler("__isurf1Dread", &val, surf, x, cudaBoundaryModeZero);
+  // CHECK: tail call void asm sideeffect "sust.b.1d.v2.b32.zero [$0, {$1}], {$2, $3};", "l,r,r,r"
+  __nv_tex_surf_handler("__isurf1Dwrite_v2", &val, surf, x, cudaBoundaryModeZero);
+  // CHECK: %1 = tail call { i32, i32 } asm "suld.b.1d.v2.b32.clamp {$0, $1}, [$2, {$3}];", "=r,=r,l,r"
+  __nv_tex_surf_handler("__isurf1Dread", &val, surf, x, cudaBoundaryModeClamp);
+  // CHECK: tail call void asm sideeffect "sust.b.1d.v2.b32.clamp [$0, {$1}], {$2, $3};", "l,r,r,r"
+  __nv_tex_surf_handler("__isurf1Dwrite_v2", &val, surf, x, cudaBoundaryModeClamp);
+  // CHECK: %2 = tail call { i32, i32 } asm "suld.b.1d.v2.b32.trap {$0, $1}, [$2, {$3}];", "=r,=r,l,r"
+  __nv_tex_surf_handler("__isurf1Dread", &val, surf, x, cudaBoundaryModeTrap);
+  // CHECK: tail call void asm sideeffect "sust.b.1d.v2.b32.trap [$0, {$1}], {$2, $3};", "l,r,r,r"
+  __nv_tex_surf_handler("__isurf1Dwrite_v2", &val, surf, x, cudaBoundaryModeTrap);
+
+  // CHECK: %3 = tail call { i32, i32 } asm "suld.b.2d.v2.b32.zero {$0, $1}, [$2, {$3, $4}];", "=r,=r,l,r,r"
+  __nv_tex_surf_handler("__isurf2Dread", &val, surf, x, y, cudaBoundaryModeZero);
+  // CHECK: tail call void asm sideeffect "sust.b.2d.v2.b32.zero [$0, {$1, $2}], {$3, $4};", "l,r,r,r,r"
+  __nv_tex_surf_handler("__isurf2Dwrite_v2", &val, surf, x, y, cudaBoundaryModeZero);
+  // CHECK: %4 = tail call { i32, i32 } asm "suld.b.2d.v2.b32.clamp {$0, $1}, [$2, {$3, $4}];", "=r,=r,l,r,r"
+  __nv_tex_surf_handler("__isurf2Dread", &val, surf, x, y, cudaBoundaryModeClamp);
+  // CHECK: tail call void asm sideeffect "sust.b.2d.v2.b32.clamp [$0, {$1, $2}], {$3, $4};", "l,r,r,r,r"
+  __nv_tex_surf_handler("__isurf2Dwrite_v2", &val, surf, x, y, cudaBoundaryModeClamp);
+  // CHECK: %5 = tail call { i32, i32 } asm "suld.b.2d.v2.b32.trap {$0, $1}, [$2, {$3, $4}];", "=r,=r,l,r,r"
+  __nv_tex_surf_handler("__isurf2Dread", &val, surf, x, y, cudaBoundaryModeTrap);
+  // CHECK: tail call void asm sideeffect "sust.b.2d.v2.b32.trap [$0, {$1, $2}], {$3, $4};", "l,r,r,r,r"
+  __nv_tex_surf_handler("__isurf2Dwrite_v2", &val, surf, x, y, cudaBoundaryModeTrap);
+
+  // CHECK: %6 = tail call { i32, i32 } asm "suld.b.3d.v2.b32.zero {$0, $1}, [$2, {$3, $4, $5, $5}];", "=r,=r,l,r,r,r"
+  __nv_tex_surf_handler("__isurf3Dread", &val, surf, x, y, z, cudaBoundaryModeZero);
+  // CHECK: tail call void asm sideeffect "sust.b.3d.v2.b32.zero [$0, {$1, $2, $3, $3}], {$4, $5};", "l,r,r,r,r,r"
+  __nv_tex_surf_handler("__isurf3Dwrite_v2", &val, surf, x, y, z, cudaBoundaryModeZero);
+  // CHECK: %7 = tail call { i32, i32 } asm "suld.b.3d.v2.b32.clamp {$0, $1}, [$2, {$3, $4, $5, $5}];", "=r,=r,l,r,r,r"
+  __nv_tex_surf_handler("__isurf3Dread", &val, surf, x, y, z, cudaBoundaryModeClamp);
+  // CHECK: tail call void asm sideeffect "sust.b.3d.v2.b32.clamp [$0, {$1, $2, $3, $3}], {$4, $5};", "l,r,r,r,r,r"
+  __nv_tex_surf_handler("__isurf3Dwrite_v2", &val, surf, x, y, z, cudaBoundaryModeClamp);
+  // CHECK: %8 = tail call { i32, i32 } asm "suld.b.3d.v2.b32.trap {$0, $1}, [$2, {$3, $4, $5, $5}];", "=r,=r,l,r,r,r"
+  __nv_tex_surf_handler("__isurf3Dread", &val, surf, x, y, z, cudaBoundaryModeTrap);
+  // CHECK: tail call void asm sideeffect "sust.b.3d.v2.b32.trap [$0, {$1, $2, $3, $3}], {$4, $5};", "l,r,r,r,r,r"
+  __nv_tex_surf_handler("__isurf3Dwrite_v2", &val, surf, x, y, z, cudaBoundaryModeTrap);
+
+  // CHECK: %9 = tail call { i32, i32 } asm "suld.b.a1d.v2.b32.zero {$0, $1}, [$2, {$4, $3}];", "=r,=r,l,r,r"
+  __nv_tex_surf_handler("__isurf1DLayeredread", &val, surf, x, layer, cudaBoundaryModeZero);
+  // CHECK: tail call void asm sideeffect "sust.b.a1d.v2.b32.zero [$0, {$2, $1}], {$3, $4};", "l,r,r,r,r"
+  __nv_tex_surf_handler("__isurf1DLayeredwrite_v2", &val, surf, x, layer, cudaBoundaryModeZero);
+  // CHECK: %10 = tail call { i32, i32 } asm "suld.b.a1d.v2.b32.clamp {$0, $1}, [$2, {$4, $3}];", "=r,=r,l,r,r"
+  __nv_tex_surf_handler("__isurf1DLayeredread", &val, surf, x, layer, cudaBoundaryModeClamp);
+  // CHECK: tail call void asm sideeffect "sust.b.a1d.v2.b32.clamp [$0, {$2, $1}], {$3, $4};", "l,r,r,r,r"
+  __nv_tex_surf_handler("__isurf1DLayeredwrite_v2", &val, surf, x, layer, cudaBoundaryModeClamp);
+  // CHECK: %11 = tail call { i32, i32 } asm "suld.b.a1d.v2.b32.trap {$0, $1}, [$2, {$4, $3}];", "=r,=r,l,r,r"
+  __nv_tex_surf_handler("__isurf1DLayeredread", &val, surf, x, layer, cudaBoundaryModeTrap);
+  // CHECK: tail call void asm sideeffect "sust.b.a1d.v2.b32.trap [$0, {$2, $1}], {$3, $4};", "l,r,r,r,r"
+  __nv_tex_surf_handler("__isurf1DLayeredwrite_v2", &val, surf, x, layer, cudaBoundaryModeTrap);
+
+  // CHECK: %12 = tail call { i32, i32 } asm "suld.b.a2d.v2.b32.zero {$0, $1}, [$2, {$5, $3, $4, $4}];", "=r,=r,l,r,r,r"
+  __nv_tex_surf_handler("__isurf2DLayeredread", &val, surf, x, y, layer, cudaBoundaryModeZero);
+  // CHECK: tail call void asm sideeffect "sust.b.a2d.v2.b32.zero [$0, {$3, $1, $2, $2}], {$4, $5};", "l,r,r,r,r,r"
+  __nv_tex_surf_handler("__isurf2DLayeredwrite_v2", &val, surf, x, y, layer, cudaBoundaryModeZero);
+  // CHECK: %13 = tail call { i32, i32 } asm "suld.b.a2d.v2.b32.clamp {$0, $1}, [$2, {$5, $3, $4, $4}];", "=r,=r,l,r,r,r"
+  __nv_tex_surf_handler("__isurf2DLayeredread", &val, surf, x, y, layer, cudaBoundaryModeClamp);
+  // CHECK: tail call void asm sideeffect "sust.b.a2d.v2.b32.clamp [$0, {$3, $1, $2, $2}], {$4, $5};", "l,r,r,r,r,r"
+  __nv_tex_surf_handler("__isurf2DLayeredwrite_v2", &val, surf, x, y, layer, cudaBoundaryModeClamp);
+  // CHECK: %14 = tail call { i32, i32 } asm "suld.b.a2d.v2.b32.trap {$0, $1}, [$2, {$5, $3, $4, $4}];", "=r,=r,l,r,r,r"
+  __nv_tex_surf_handler("__isurf2DLayeredread", &val, surf, x, y, layer, cudaBoundaryModeTrap);
+  // CHECK: tail call void asm sideeffect "sust.b.a2d.v2.b32.trap [$0, {$3, $1, $2, $2}], {$4, $5};", "l,r,r,r,r,r"
+  __nv_tex_surf_handler("__isurf2DLayeredwrite_v2", &val, surf, x, y, layer, cudaBoundaryModeTrap);
+
+  // CHECK: %15 = tail call { i32, i32 } asm "suld.b.a2d.v2.b32.zero {$0, $1}, [$2, {$5, $3, $4, $4}];", "=r,=r,l,r,r,r"
+  __nv_tex_surf_handler("__isurfCubemapread", &val, surf, x, y, face, cudaBoundaryModeZero);
+  // CHECK: tail call void asm sideeffect "sust.b.a2d.v2.b32.zero [$0, {$3, $1, $2, $2}], {$4, $5};", "l,r,r,r,r,r"
+  __nv_tex_surf_handler("__isurfCubemapwrite_v2", &val, surf, x, y, face, cudaBoundaryModeZero);
+  // CHECK: %16 = tail call { i32, i32 } asm "suld.b.a2d.v2.b32.clamp {$0, $1}, [$2, {$5, $3, $4, $4}];", "=r,=r,l,r,r,r"
+  __nv_tex_surf_handler("__isurfCubemapread", &val, surf, x, y, face, cudaBoundaryModeClamp);
+  // CHECK: tail call void asm sideeffect "sust.b.a2d.v2.b32.clamp [$0, {$3, $1, $2, $2}], {$4, $5};", "l,r,r,r,r,r"
+  __nv_tex_surf_handler("__isurfCubemapwrite_v2", &val, surf, x, y, face, cudaBoundaryModeClamp);
+  // CHECK: %17 = tail call { i32, i32 } asm "suld.b.a2d.v2.b32.trap {$0, $1}, [$2, {$5, $3, $4, $4}];", "=r,=r,l,r,r,r"
+  __nv_tex_surf_handler("__isurfCubemapread", &val, surf, x, y, face, cudaBoundaryModeTrap);
+  // CHECK: tail call void asm sideeffect "sust.b.a2d.v2.b32.trap [$0, {$3, $1, $2, $2}], {$4, $5};", "l,r,r,r,r,r"
+  __nv_tex_surf_handler("__isurfCubemapwrite_v2", &val, surf, x, y, face, cudaBoundaryModeTrap);
+
+  // CHECK: %18 = tail call { i32, i32 } asm "suld.b.a2d.v2.b32.zero {$0, $1}, [$2, {$5, $3, $4, $4}];", "=r,=r,l,r,r,r"
+  __nv_tex_surf_handler("__isurfCubemapLayeredread", &val, surf, x, y, layerface, cudaBoundaryModeZero);
+  // CHECK: tail call void asm sideeffect "sust.b.a2d.v2.b32.zero [$0, {$3, $1, $2, $2}], {$4, $5};", "l,r,r,r,r,r"
+  __nv_tex_surf_handler("__isurfCubemapLayeredwrite_v2", &val, surf, x, y, layerface, cudaBoundaryModeZero);
+  // CHECK: %19 = tail call { i32, i32 } asm "suld.b.a2d.v2.b32.clamp {$0, $1}, [$2, {$5, $3, $4, $4}];", "=r,=r,l,r,r,r"
+  __nv_tex_surf_handler("__isurfCubemapLayeredread", &val, surf, x, y, layerface, cudaBoundaryModeClamp);
+  // CHECK: tail call void asm sideeffect "sust.b.a2d.v2.b32.clamp [$0, {$3, $1, $2, $2}], {$4, $5};", "l,r,r,r,r,r"
+  __nv_tex_surf_handler("__isurfCubemapLayeredwrite_v2", &val, surf, x, y, layerface, cudaBoundaryModeClamp);
+  // CHECK: %20 = tail call { i32, i32 } asm "suld.b.a2d.v2.b32.trap {$0, $1}, [$2, {$5, $3, $4, $4}];", "=r,=r,l,r,r,r"
+  __nv_tex_surf_handler("__isurfCubemapLayeredread", &val, surf, x, y, layerface, cudaBoundaryModeTrap);
+  // CHECK: tail call void asm sideeffect "sust.b.a2d.v2.b32.trap [$0, {$3, $1, $2, $2}], {$4, $5};", "l,r,r,r,r,r"
+  __nv_tex_surf_handler("__isurfCubemapLayeredwrite_v2", &val, surf, x, y, layerface, cudaBoundaryModeTrap);
+}
+
+__device__ void surflonglong2(cudaSurfaceObject_t surf, int x, int y, int z, int layer, int face, int layerface) {
+  longlong2 val;
+
+  // CHECK: %0 = tail call { i64, i64 } asm "suld.b.1d.v2.b64.zero {$0, $1}, [$2, {$3}];", "=l,=l,l,r"
+  __nv_tex_surf_handler("__isurf1Dread", &val, surf, x, cudaBoundaryModeZero);
+  // CHECK: tail call void asm sideeffect "sust.b.1d.v2.b64.zero [$0, {$1}], {$2, $3};", "l,r,l,l"
+  __nv_tex_surf_handler("__isurf1Dwrite_v2", &val, surf, x, cudaBoundaryModeZero);
+  // CHECK: %1 = tail call { i64, i64 } asm "suld.b.1d.v2.b64.clamp {$0, $1}, [$2, {$3}];", "=l,=l,l,r"
+  __nv_tex_surf_handler("__isurf1Dread", &val, surf, x, cudaBoundaryModeClamp);
+  // CHECK: tail call void asm sideeffect "sust.b.1d.v2.b64.clamp [$0, {$1}], {$2, $3};", "l,r,l,l"
+  __nv_tex_surf_handler("__isurf1Dwrite_v2", &val, surf, x, cudaBoundaryModeClamp);
+  // CHECK: %2 = tail call { i64, i64 } asm "suld.b.1d.v2.b64.trap {$0, $1}, [$2, {$3}];", "=l,=l,l,r"
+  __nv_tex_surf_handler("__isurf1Dread", &val, surf, x, cudaBoundaryModeTrap);
+  // CHECK: tail call void asm sideeffect "sust.b.1d.v2.b64.trap [$0, {$1}], {$2, $3};", "l,r,l,l"
+  __nv_tex_surf_handler("__isurf1Dwrite_v2", &val, surf, x, cudaBoundaryModeTrap);
+
+  // CHECK: %3 = tail call { i64, i64 } asm "suld.b.2d.v2.b64.zero {$0, $1}, [$2, {$3, $4}];", "=l,=l,l,r,r"
+  __nv_tex_surf_handler("__isurf2Dread", &val, surf, x, y, cudaBoundaryModeZero);
+  // CHECK: tail call void asm sideeffect "sust.b.2d.v2.b64.zero [$0, {$1, $2}], {$3, $4};", "l,r,r,l,l"
+  __nv_tex_surf_handler("__isurf2Dwrite_v2", &val, surf, x, y, cudaBoundaryModeZero);
+  // CHECK: %4 = tail call { i64, i64 } asm "suld.b.2d.v2.b64.clamp {$0, $1}, [$2, {$3, $4}];", "=l,=l,l,r,r"
+  __nv_tex_surf_handler("__isurf2Dread", &val, surf, x, y, cudaBoundaryModeClamp);
+  // CHECK: tail call void asm sideeffect "sust.b.2d.v2.b64.clamp [$0, {$1, $2}], {$3, $4};", "l,r,r,l,l"
+  __nv_tex_surf_handler("__isurf2Dwrite_v2", &val, surf, x, y, cudaBoundaryModeClamp);
+  // CHECK: %5 = tail call { i64, i64 } asm "suld.b.2d.v2.b64.trap {$0, $1}, [$2, {$3, $4}];", "=l,=l,l,r,r"
+  __nv_tex_surf_handler("__isurf2Dread", &val, surf, x, y, cudaBoundaryModeTrap);
+  // CHECK: tail call void asm sideeffect "sust.b.2d.v2.b64.trap [$0, {$1, $2}], {$3, $4};", "l,r,r,l,l"
+  __nv_tex_surf_handler("__isurf2Dwrite_v2", &val, surf, x, y, cudaBoundaryModeTrap);
+
+  // CHECK: %6 = tail call { i64, i64 } asm "suld.b.3d.v2.b64.zero {$0, $1}, [$2, {$3, $4, $5, $5}];", "=l,=l,l,r,r,r"
+  __nv_tex_surf_handler("__isurf3Dread", &val, surf, x, y, z, cudaBoundaryModeZero);
+  // CHECK: tail call void asm sideeffect "sust.b.3d.v2.b64.zero [$0, {$1, $2, $3, $3}], {$4, $5};", "l,r,r,r,l,l"
+  __nv_tex_surf_handler("__isurf3Dwrite_v2", &val, surf, x, y, z, cudaBoundaryModeZero);
+  // CHECK: %7 = tail call { i64, i64 } asm "suld.b.3d.v2.b64.clamp {$0, $1}, [$2, {$3, $4, $5, $5}];", "=l,=l,l,r,r,r"
+  __nv_tex_surf_handler("__isurf3Dread", &val, surf, x, y, z, cudaBoundaryModeClamp);
+  // CHECK: tail call void asm sideeffect "sust.b.3d.v2.b64.clamp [$0, {$1, $2, $3, $3}], {$4, $5};", "l,r,r,r,l,l"
+  __nv_tex_surf_handler("__isurf3Dwrite_v2", &val, surf, x, y, z, cudaBoundaryModeClamp);
+  // CHECK: %8 = tail call { i64, i64 } asm "suld.b.3d.v2.b64.trap {$0, $1}, [$2, {$3, $4, $5, $5}];", "=l,=l,l,r,r,r"
+  __nv_tex_surf_handler("__isurf3Dread", &val, surf, x, y, z, cudaBoundaryModeTrap);
+  // CHECK: tail call void asm sideeffect "sust.b.3d.v2.b64.trap [$0, {$1, $2, $3, $3}], {$4, $5};", "l,r,r,r,l,l"
+  __nv_tex_surf_handler("__isurf3Dwrite_v2", &val, surf, x, y, z, cudaBoundaryModeTrap);
+
+  // CHECK: %9 = tail call { i64, i64 } asm "suld.b.a1d.v2.b64.zero {$0, $1}, [$2, {$4, $3}];", "=l,=l,l,r,r"
+  __nv_tex_surf_handler("__isurf1DLayeredread", &val, surf, x, layer, cudaBoundaryModeZero);
+  // CHECK: tail call void asm sideeffect "sust.b.a1d.v2.b64.zero [$0, {$2, $1}], {$3, $4};", "l,r,r,l,l"
+  __nv_tex_surf_handler("__isurf1DLayeredwrite_v2", &val, surf, x, layer, cudaBoundaryModeZero);
+  // CHECK: %10 = tail call { i64, i64 } asm "suld.b.a1d.v2.b64.clamp {$0, $1}, [$2, {$4, $3}];", "=l,=l,l,r,r"
+  __nv_tex_surf_handler("__isurf1DLayeredread", &val, surf, x, layer, cudaBoundaryModeClamp);
+  // CHECK: tail call void asm sideeffect "sust.b.a1d.v2.b64.clamp [$0, {$2, $1}], {$3, $4};", "l,r,r,l,l"
+  __nv_tex_surf_handler("__isurf1DLayeredwrite_v2", &val, surf, x, layer, cudaBoundaryModeClamp);
+  // CHECK: %11 = tail call { i64, i64 } asm "suld.b.a1d.v2.b64.trap {$0, $1}, [$2, {$4, $3}];", "=l,=l,l,r,r"
+  __nv_tex_surf_handler("__isurf1DLayeredread", &val, surf, x, layer, cudaBoundaryModeTrap);
+  // CHECK: tail call void asm sideeffect "sust.b.a1d.v2.b64.trap [$0, {$2, $1}], {$3, $4};", "l,r,r,l,l"
+  __nv_tex_surf_handler("__isurf1DLayeredwrite_v2", &val, surf, x, layer, cudaBoundaryModeTrap);
+
+  // CHECK: %12 = tail call { i64, i64 } asm "suld.b.a2d.v2.b64.zero {$0, $1}, [$2, {$5, $3, $4, $4}];", "=l,=l,l,r,r,r"
+  __nv_tex_surf_handler("__isurf2DLayeredread", &val, surf, x, y, layer, cudaBoundaryModeZero);
+  // CHECK: tail call void asm sideeffect "sust.b.a2d.v2.b64.zero [$0, {$3, $1, $2, $2}], {$4, $5};", "l,r,r,r,l,l"
+  __nv_tex_surf_handler("__isurf2DLayeredwrite_v2", &val, surf, x, y, layer, cudaBoundaryModeZero);
+  // CHECK: %13 = tail call { i64, i64 } asm "suld.b.a2d.v2.b64.clamp {$0, $1}, [$2, {$5, $3, $4, $4}];", "=l,=l,l,r,r,r"
+  __nv_tex_surf_handler("__isurf2DLayeredread", &val, surf, x, y, layer, cudaBoundaryModeClamp);
+  // CHECK: tail call void asm sideeffect "sust.b.a2d.v2.b64.clamp [$0, {$3, $1, $2, $2}], {$4, $5};", "l,r,r,r,l,l"
+  __nv_tex_surf_handler("__isurf2DLayeredwrite_v2", &val, surf, x, y, layer, cudaBoundaryModeClamp);
+  // CHECK: %14 = tail call { i64, i64 } asm "suld.b.a2d.v2.b64.trap {$0, $1}, [$2, {$5, $3, $4, $4}];", "=l,=l,l,r,r,r"
+  __nv_tex_surf_handler("__isurf2DLayeredread", &val, surf, x, y, layer, cudaBoundaryModeTrap);
+  // CHECK: tail call void asm sideeffect "sust.b.a2d.v2.b64.trap [$0, {$3, $1, $2, $2}], {$4, $5};", "l,r,r,r,l,l"
+  __nv_tex_surf_handler("__isurf2DLayeredwrite_v2", &val, surf, x, y, layer, cudaBoundaryModeTrap);
+
+  // CHECK: %15 = tail call { i64, i64 } asm "suld.b.a2d.v2.b64.zero {$0, $1}, [$2, {$5, $3, $4, $4}];", "=l,=l,l,r,r,r"
+  __nv_tex_surf_handler("__isurfCubemapread", &val, surf, x, y, face, cudaBoundaryModeZero);
+  // CHECK: tail call void asm sideeffect "sust.b.a2d.v2.b64.zero [$0, {$3, $1, $2, $2}], {$4, $5};", "l,r,r,r,l,l"
+  __nv_tex_surf_handler("__isurfCubemapwrite_v2", &val, surf, x, y, face, cudaBoundaryModeZero);
+  // CHECK: %16 = tail call { i64, i64 } asm "suld.b.a2d.v2.b64.clamp {$0, $1}, [$2, {$5, $3, $4, $4}];", "=l,=l,l,r,r,r"
+  __nv_tex_surf_handler("__isurfCubemapread", &val, surf, x, y, face, cudaBoundaryModeClamp);
+  // CHECK: tail call void asm sideeffect "sust.b.a2d.v2.b64.clamp [$0, {$3, $1, $2, $2}], {$4, $5};", "l,r,r,r,l,l"
+  __nv_tex_surf_handler("__isurfCubemapwrite_v2", &val, surf, x, y, face, cudaBoundaryModeClamp);
+  // CHECK: %17 = tail call { i64, i64 } asm "suld.b.a2d.v2.b64.trap {$0, $1}, [$2, {$5, $3, $4, $4}];", "=l,=l,l,r,r,r"
+  __nv_tex_surf_handler("__isurfCubemapread", &val, surf, x, y, face, cudaBoundaryModeTrap);
+  // CHECK: tail call void asm sideeffect "sust.b.a2d.v2.b64.trap [$0, {$3, $1, $2, $2}], {$4, $5};", "l,r,r,r,l,l"
+  __nv_tex_surf_handler("__isurfCubemapwrite_v2", &val, surf, x, y, face, cudaBoundaryModeTrap);
+
+  // CHECK: %18 = tail call { i64, i64 } asm "suld.b.a2d.v2.b64.zero {$0, $1}, [$2, {$5, $3, $4, $4}];", "=l,=l,l,r,r,r"
+  __nv_tex_surf_handler("__isurfCubemapLayeredread", &val, surf, x, y, layerface, cudaBoundaryModeZero);
+  // CHECK: tail call void asm sideeffect "sust.b.a2d.v2.b64.zero [$0, {$3, $1, $2, $2}], {$4, $5};", "l,r,r,r,l,l"
+  __nv_tex_surf_handler("__isurfCubemapLayeredwrite_v2", &val, surf, x, y, layerface, cudaBoundaryModeZero);
+  // CHECK: %19 = tail call { i64, i64 } asm "suld.b.a2d.v2.b64.clamp {$0, $1}, [$2, {$5, $3, $4, $4}];", "=l,=l,l,r,r,r"
+  __nv_tex_surf_handler("__isurfCubemapLayeredread", &val, surf, x, y, layerface, cudaBoundaryModeClamp);
+  // CHECK: tail call void asm sideeffect "sust.b.a2d.v2.b64.clamp [$0, {$3, $1, $2, $2}], {$4, $5};", "l,r,r,r,l,l"
+  __nv_tex_surf_handler("__isurfCubemapLayeredwrite_v2", &val, surf, x, y, layerface, cudaBoundaryModeClamp);
+  // CHECK: %20 = tail call { i64, i64 } asm "suld.b.a2d.v2.b64.trap {$0, $1}, [$2, {$5, $3, $4, $4}];", "=l,=l,l,r,r,r"
+  __nv_tex_surf_handler("__isurfCubemapLayeredread", &val, surf, x, y, layerface, cudaBoundaryModeTrap);
+  // CHECK: tail call void asm sideeffect "sust.b.a2d.v2.b64.trap [$0, {$3, $1, $2, $2}], {$4, $5};", "l,r,r,r,l,l"
+  __nv_tex_surf_handler("__isurfCubemapLayeredwrite_v2", &val, surf, x, y, layerface, cudaBoundaryModeTrap);
+}
+
+__device__ void surfulonglong2(cudaSurfaceObject_t surf, int x, int y, int z, int layer, int face, int layerface) {
+  ulonglong2 val;
+
+  // CHECK: %0 = tail call { i64, i64 } asm "suld.b.1d.v2.b64.zero {$0, $1}, [$2, {$3}];", "=l,=l,l,r"
+  __nv_tex_surf_handler("__isurf1Dread", &val, surf, x, cudaBoundaryModeZero);
+  // CHECK: tail call void asm sideeffect "sust.b.1d.v2.b64.zero [$0, {$1}], {$2, $3};", "l,r,l,l"
+  __nv_tex_surf_handler("__isurf1Dwrite_v2", &val, surf, x, cudaBoundaryModeZero);
+  // CHECK: %1 = tail call { i64, i64 } asm "suld.b.1d.v2.b64.clamp {$0, $1}, [$2, {$3}];", "=l,=l,l,r"
+  __nv_tex_surf_handler("__isurf1Dread", &val, surf, x, cudaBoundaryModeClamp);
+  // CHECK: tail call void asm sideeffect "sust.b.1d.v2.b64.clamp [$0, {$1}], {$2, $3};", "l,r,l,l"
+  __nv_tex_surf_handler("__isurf1Dwrite_v2", &val, surf, x, cudaBoundaryModeClamp);
+  // CHECK: %2 = tail call { i64, i64 } asm "suld.b.1d.v2.b64.trap {$0, $1}, [$2, {$3}];", "=l,=l,l,r"
+  __nv_tex_surf_handler("__isurf1Dread", &val, surf, x, cudaBoundaryModeTrap);
+  // CHECK: tail call void asm sideeffect "sust.b.1d.v2.b64.trap [$0, {$1}], {$2, $3};", "l,r,l,l"
+  __nv_tex_surf_handler("__isurf1Dwrite_v2", &val, surf, x, cudaBoundaryModeTrap);
+
+  // CHECK: %3 = tail call { i64, i64 } asm "suld.b.2d.v2.b64.zero {$0, $1}, [$2, {$3, $4}];", "=l,=l,l,r,r"
+  __nv_tex_surf_handler("__isurf2Dread", &val, surf, x, y, cudaBoundaryModeZero);
+  // CHECK: tail call void asm sideeffect "sust.b.2d.v2.b64.zero [$0, {$1, $2}], {$3, $4};", "l,r,r,l,l"
+  __nv_tex_surf_handler("__isurf2Dwrite_v2", &val, surf, x, y, cudaBoundaryModeZero);
+  // CHECK: %4 = tail call { i64, i64 } asm "suld.b.2d.v2.b64.clamp {$0, $1}, [$2, {$3, $4}];", "=l,=l,l,r,r"
+  __nv_tex_surf_handler("__isurf2Dread", &val, surf, x, y, cudaBoundaryModeClamp);
+  // CHECK: tail call void asm sideeffect "sust.b.2d.v2.b64.clamp [$0, {$1, $2}], {$3, $4};", "l,r,r,l,l"
+  __nv_tex_surf_handler("__isurf2Dwrite_v2", &val, surf, x, y, cudaBoundaryModeClamp);
+  // CHECK: %5 = tail call { i64, i64 } asm "suld.b.2d.v2.b64.trap {$0, $1}, [$2, {$3, $4}];", "=l,=l,l,r,r"
+  __nv_tex_surf_handler("__isurf2Dread", &val, surf, x, y, cudaBoundaryModeTrap);
+  // CHECK: tail call void asm sideeffect "sust.b.2d.v2.b64.trap [$0, {$1, $2}], {$3, $4};", "l,r,r,l,l"
+  __nv_tex_surf_handler("__isurf2Dwrite_v2", &val, surf, x, y, cudaBoundaryModeTrap);
+
+  // CHECK: %6 = tail call { i64, i64 } asm "suld.b.3d.v2.b64.zero {$0, $1}, [$2, {$3, $4, $5, $5}];", "=l,=l,l,r,r,r"
+  __nv_tex_surf_handler("__isurf3Dread", &val, surf, x, y, z, cudaBoundaryModeZero);
+  // CHECK: tail call void asm sideeffect "sust.b.3d.v2.b64.zero [$0, {$1, $2, $3, $3}], {$4, $5};", "l,r,r,r,l,l"
+  __nv_tex_surf_handler("__isurf3Dwrite_v2", &val, surf, x, y, z, cudaBoundaryModeZero);
+  // CHECK: %7 = tail call { i64, i64 } asm "suld.b.3d.v2.b64.clamp {$0, $1}, [$2, {$3, $4, $5, $5}];", "=l,=l,l,r,r,r"
+  __nv_tex_surf_handler("__isurf3Dread", &val, surf, x, y, z, cudaBoundaryModeClamp);
+  // CHECK: tail call void asm sideeffect "sust.b.3d.v2.b64.clamp [$0, {$1, $2, $3, $3}], {$4, $5};", "l,r,r,r,l,l"
+  __nv_tex_surf_handler("__isurf3Dwrite_v2", &val, surf, x, y, z, cudaBoundaryModeClamp);
+  // CHECK: %8 = tail call { i64, i64 } asm "suld.b.3d.v2.b64.trap {$0, $1}, [$2, {$3, $4, $5, $5}];", "=l,=l,l,r,r,r"
+  __nv_tex_surf_handler("__isurf3Dread", &val, surf, x, y, z, cudaBoundaryModeTrap);
+  // CHECK: tail call void asm sideeffect "sust.b.3d.v2.b64.trap [$0, {$1, $2, $3, $3}], {$4, $5};", "l,r,r,r,l,l"
+  __nv_tex_surf_handler("__isurf3Dwrite_v2", &val, surf, x, y, z, cudaBoundaryModeTrap);
+
+  // CHECK: %9 = tail call { i64, i64 } asm "suld.b.a1d.v2.b64.zero {$0, $1}, [$2, {$4, $3}];", "=l,=l,l,r,r"
+  __nv_tex_surf_handler("__isurf1DLayeredread", &val, surf, x, layer, cudaBoundaryModeZero);
+  // CHECK: tail call void asm sideeffect "sust.b.a1d.v2.b64.zero [$0, {$2, $1}], {$3, $4};", "l,r,r,l,l"
+  __nv_tex_surf_handler("__isurf1DLayeredwrite_v2", &val, surf, x, layer, cudaBoundaryModeZero);
+  // CHECK: %10 = tail call { i64, i64 } asm "suld.b.a1d.v2.b64.clamp {$0, $1}, [$2, {$4, $3}];", "=l,=l,l,r,r"
+  __nv_tex_surf_handler("__isurf1DLayeredread", &val, surf, x, layer, cudaBoundaryModeClamp);
+  // CHECK: tail call void asm sideeffect "sust.b.a1d.v2.b64.clamp [$0, {$2, $1}], {$3, $4};", "l,r,r,l,l"
+  __nv_tex_surf_handler("__isurf1DLayeredwrite_v2", &val, surf, x, layer, cudaBoundaryModeClamp);
+  // CHECK: %11 = tail call { i64, i64 } asm "suld.b.a1d.v2.b64.trap {$0, $1}, [$2, {$4, $3}];", "=l,=l,l,r,r"
+  __nv_tex_surf_handler("__isurf1DLayeredread", &val, surf, x, layer, cudaBoundaryModeTrap);
+  // CHECK: tail call void asm sideeffect "sust.b.a1d.v2.b64.trap [$0, {$2, $1}], {$3, $4};", "l,r,r,l,l"
+  __nv_tex_surf_handler("__isurf1DLayeredwrite_v2", &val, surf, x, layer, cudaBoundaryModeTrap);
+
+  // CHECK: %12 = tail call { i64, i64 } asm "suld.b.a2d.v2.b64.zero {$0, $1}, [$2, {$5, $3, $4, $4}];", "=l,=l,l,r,r,r"
+  __nv_tex_surf_handler("__isurf2DLayeredread", &val, surf, x, y, layer, cudaBoundaryModeZero);
+  // CHECK: tail call void asm sideeffect "sust.b.a2d.v2.b64.zero [$0, {$3, $1, $2, $2}], {$4, $5};", "l,r,r,r,l,l"
+  __nv_tex_surf_handler("__isurf2DLayeredwrite_v2", &val, surf, x, y, layer, cudaBoundaryModeZero);
+  // CHECK: %13 = tail call { i64, i64 } asm "suld.b.a2d.v2.b64.clamp {$0, $1}, [$2, {$5, $3, $4, $4}];", "=l,=l,l,r,r,r"
+  __nv_tex_surf_handler("__isurf2DLayeredread", &val, surf, x, y, layer, cudaBoundaryModeClamp);
+  // CHECK: tail call void asm sideeffect "sust.b.a2d.v2.b64.clamp [$0, {$3, $1, $2, $2}], {$4, $5};", "l,r,r,r,l,l"
+  __nv_tex_surf_handler("__isurf2DLayeredwrite_v2", &val, surf, x, y, layer, cudaBoundaryModeClamp);
+  // CHECK: %14 = tail call { i64, i64 } asm "suld.b.a2d.v2.b64.trap {$0, $1}, [$2, {$5, $3, $4, $4}];", "=l,=l,l,r,r,r"
+  __nv_tex_surf_handler("__isurf2DLayeredread", &val, surf, x, y, layer, cudaBoundaryModeTrap);
+  // CHECK: tail call void asm sideeffect "sust.b.a2d.v2.b64.trap [$0, {$3, $1, $2, $2}], {$4, $5};", "l,r,r,r,l,l"
+  __nv_tex_surf_handler("__isurf2DLayeredwrite_v2", &val, surf, x, y, layer, cudaBoundaryModeTrap);
+
+  // CHECK: %15 = tail call { i64, i64 } asm "suld.b.a2d.v2.b64.zero {$0, $1}, [$2, {$5, $3, $4, $4}];", "=l,=l,l,r,r,r"
+  __nv_tex_surf_handler("__isurfCubemapread", &val, surf, x, y, face, cudaBoundaryModeZero);
+  // CHECK: tail call void asm sideeffect "sust.b.a2d.v2.b64.zero [$0, {$3, $1, $2, $2}], {$4, $5};", "l,r,r,r,l,l"
+  __nv_tex_surf_handler("__isurfCubemapwrite_v2", &val, surf, x, y, face, cudaBoundaryModeZero);
+  // CHECK: %16 = tail call { i64, i64 } asm "suld.b.a2d.v2.b64.clamp {$0, $1}, [$2, {$5, $3, $4, $4}];", "=l,=l,l,r,r,r"
+  __nv_tex_surf_handler("__isurfCubemapread", &val, surf, x, y, face, cudaBoundaryModeClamp);
+  // CHECK: tail call void asm sideeffect "sust.b.a2d.v2.b64.clamp [$0, {$3, $1, $2, $2}], {$4, $5};", "l,r,r,r,l,l"
+  __nv_tex_surf_handler("__isurfCubemapwrite_v2", &val, surf, x, y, face, cudaBoundaryModeClamp);
+  // CHECK: %17 = tail call { i64, i64 } asm "suld.b.a2d.v2.b64.trap {$0, $1}, [$2, {$5, $3, $4, $4}];", "=l,=l,l,r,r,r"
+  __nv_tex_surf_handler("__isurfCubemapread", &val, surf, x, y, face, cudaBoundaryModeTrap);
+  // CHECK: tail call void asm sideeffect "sust.b.a2d.v2.b64.trap [$0, {$3, $1, $2, $2}], {$4, $5};", "l,r,r,r,l,l"
+  __nv_tex_surf_handler("__isurfCubemapwrite_v2", &val, surf, x, y, face, cudaBoundaryModeTrap);
+
+  // CHECK: %18 = tail call { i64, i64 } asm "suld.b.a2d.v2.b64.zero {$0, $1}, [$2, {$5, $3, $4, $4}];", "=l,=l,l,r,r,r"
+  __nv_tex_surf_handler("__isurfCubemapLayeredread", &val, surf, x, y, layerface, cudaBoundaryModeZero);
+  // CHECK: tail call void asm sideeffect "sust.b.a2d.v2.b64.zero [$0, {$3, $1, $2, $2}], {$4, $5};", "l,r,r,r,l,l"
+  __nv_tex_surf_handler("__isurfCubemapLayeredwrite_v2", &val, surf, x, y, layerface, cudaBoundaryModeZero);
+  // CHECK: %19 = tail call { i64, i64 } asm "suld.b.a2d.v2.b64.clamp {$0, $1}, [$2, {$5, $3, $4, $4}];", "=l,=l,l,r,r,r"
+  __nv_tex_surf_handler("__isurfCubemapLayeredread", &val, surf, x, y, layerface, cudaBoundaryModeClamp);
+  // CHECK: tail call void asm sideeffect "sust.b.a2d.v2.b64.clamp [$0, {$3, $1, $2, $2}], {$4, $5};", "l,r,r,r,l,l"
+  __nv_tex_surf_handler("__isurfCubemapLayeredwrite_v2", &val, surf, x, y, layerface, cudaBoundaryModeClamp);
+  // CHECK: %20 = tail call { i64, i64 } asm "suld.b.a2d.v2.b64.trap {$0, $1}, [$2, {$5, $3, $4, $4}];", "=l,=l,l,r,r,r"
+  __nv_tex_surf_handler("__isurfCubemapLayeredread", &val, surf, x, y, layerface, cudaBoundaryModeTrap);
+  // CHECK: tail call void asm sideeffect "sust.b.a2d.v2.b64.trap [$0, {$3, $1, $2, $2}], {$4, $5};", "l,r,r,r,l,l"
+  __nv_tex_surf_handler("__isurfCubemapLayeredwrite_v2", &val, surf, x, y, layerface, cudaBoundaryModeTrap);
+}
+
+__device__ void surffloat2(cudaSurfaceObject_t surf, int x, int y, int z, int layer, int face, int layerface) {
+  float2 val;
+
+  // CHECK: %0 = tail call contract { float, float } asm "suld.b.1d.v2.b32.zero {$0, $1}, [$2, {$3}];", "=r,=r,l,r"
+  __nv_tex_surf_handler("__isurf1Dread", &val, surf, x, cudaBoundaryModeZero);
+  // CHECK: tail call void asm sideeffect "sust.b.1d.v2.b32.zero [$0, {$1}], {$2, $3};", "l,r,r,r"
+  __nv_tex_surf_handler("__isurf1Dwrite_v2", &val, surf, x, cudaBoundaryModeZero);
+  // CHECK: %1 = tail call contract { float, float } asm "suld.b.1d.v2.b32.clamp {$0, $1}, [$2, {$3}];", "=r,=r,l,r"
+  __nv_tex_surf_handler("__isurf1Dread", &val, surf, x, cudaBoundaryModeClamp);
+  // CHECK: tail call void asm sideeffect "sust.b.1d.v2.b32.clamp [$0, {$1}], {$2, $3};", "l,r,r,r"
+  __nv_tex_surf_handler("__isurf1Dwrite_v2", &val, surf, x, cudaBoundaryModeClamp);
+  // CHECK: %2 = tail call contract { float, float } asm "suld.b.1d.v2.b32.trap {$0, $1}, [$2, {$3}];", "=r,=r,l,r"
+  __nv_tex_surf_handler("__isurf1Dread", &val, surf, x, cudaBoundaryModeTrap);
+  // CHECK: tail call void asm sideeffect "sust.b.1d.v2.b32.trap [$0, {$1}], {$2, $3};", "l,r,r,r"
+  __nv_tex_surf_handler("__isurf1Dwrite_v2", &val, surf, x, cudaBoundaryModeTrap);
+
+  // CHECK: %3 = tail call contract { float, float } asm "suld.b.2d.v2.b32.zero {$0, $1}, [$2, {$3, $4}];", "=r,=r,l,r,r"
+  __nv_tex_surf_handler("__isurf2Dread", &val, surf, x, y, cudaBoundaryModeZero);
+  // CHECK: tail call void asm sideeffect "sust.b.2d.v2.b32.zero [$0, {$1, $2}], {$3, $4};", "l,r,r,r,r"
+  __nv_tex_surf_handler("__isurf2Dwrite_v2", &val, surf, x, y, cudaBoundaryModeZero);
+  // CHECK: %4 = tail call contract { float, float } asm "suld.b.2d.v2.b32.clamp {$0, $1}, [$2, {$3, $4}];", "=r,=r,l,r,r"
+  __nv_tex_surf_handler("__isurf2Dread", &val, surf, x, y, cudaBoundaryModeClamp);
+  // CHECK: tail call void asm sideeffect "sust.b.2d.v2.b32.clamp [$0, {$1, $2}], {$3, $4};", "l,r,r,r,r"
+  __nv_tex_surf_handler("__isurf2Dwrite_v2", &val, surf, x, y, cudaBoundaryModeClamp);
+  // CHECK: %5 = tail call contract { float, float } asm "suld.b.2d.v2.b32.trap {$0, $1}, [$2, {$3, $4}];", "=r,=r,l,r,r"
+  __nv_tex_surf_handler("__isurf2Dread", &val, surf, x, y, cudaBoundaryModeTrap);
+  // CHECK: tail call void asm sideeffect "sust.b.2d.v2.b32.trap [$0, {$1, $2}], {$3, $4};", "l,r,r,r,r"
+  __nv_tex_surf_handler("__isurf2Dwrite_v2", &val, surf, x, y, cudaBoundaryModeTrap);
+
+  // CHECK: %6 = tail call contract { float, float } asm "suld.b.3d.v2.b32.zero {$0, $1}, [$2, {$3, $4, $5, $5}];", "=r,=r,l,r,r,r"
+  __nv_tex_surf_handler("__isurf3Dread", &val, surf, x, y, z, cudaBoundaryModeZero);
+  // CHECK: tail call void asm sideeffect "sust.b.3d.v2.b32.zero [$0, {$1, $2, $3, $3}], {$4, $5};", "l,r,r,r,r,r"
+  __nv_tex_surf_handler("__isurf3Dwrite_v2", &val, surf, x, y, z, cudaBoundaryModeZero);
+  // CHECK: %7 = tail call contract { float, float } asm "suld.b.3d.v2.b32.clamp {$0, $1}, [$2, {$3, $4, $5, $5}];", "=r,=r,l,r,r,r"
+  __nv_tex_surf_handler("__isurf3Dread", &val, surf, x, y, z, cudaBoundaryModeClamp);
+  // CHECK: tail call void asm sideeffect "sust.b.3d.v2.b32.clamp [$0, {$1, $2, $3, $3}], {$4, $5};", "l,r,r,r,r,r"
+  __nv_tex_surf_handler("__isurf3Dwrite_v2", &val, surf, x, y, z, cudaBoundaryModeClamp);
+  // CHECK: %8 = tail call contract { float, float } asm "suld.b.3d.v2.b32.trap {$0, $1}, [$2, {$3, $4, $5, $5}];", "=r,=r,l,r,r,r"
+  __nv_tex_surf_handler("__isurf3Dread", &val, surf, x, y, z, cudaBoundaryModeTrap);
+  // CHECK: tail call void asm sideeffect "sust.b.3d.v2.b32.trap [$0, {$1, $2, $3, $3}], {$4, $5};", "l,r,r,r,r,r"
+  __nv_tex_surf_handler("__isurf3Dwrite_v2", &val, surf, x, y, z, cudaBoundaryModeTrap);
+
+  // CHECK: %9 = tail call contract { float, float } asm "suld.b.a1d.v2.b32.zero {$0, $1}, [$2, {$4, $3}];", "=r,=r,l,r,r"
+  __nv_tex_surf_handler("__isurf1DLayeredread", &val, surf, x, layer, cudaBoundaryModeZero);
+  // CHECK: tail call void asm sideeffect "sust.b.a1d.v2.b32.zero [$0, {$2, $1}], {$3, $4};", "l,r,r,r,r"
+  __nv_tex_surf_handler("__isurf1DLayeredwrite_v2", &val, surf, x, layer, cudaBoundaryModeZero);
+  // CHECK: %10 = tail call contract { float, float } asm "suld.b.a1d.v2.b32.clamp {$0, $1}, [$2, {$4, $3}];", "=r,=r,l,r,r"
+  __nv_tex_surf_handler("__isurf1DLayeredread", &val, surf, x, layer, cudaBoundaryModeClamp);
+  // CHECK: tail call void asm sideeffect "sust.b.a1d.v2.b32.clamp [$0, {$2, $1}], {$3, $4};", "l,r,r,r,r"
+  __nv_tex_surf_handler("__isurf1DLayeredwrite_v2", &val, surf, x, layer, cudaBoundaryModeClamp);
+  // CHECK: %11 = tail call contract { float, float } asm "suld.b.a1d.v2.b32.trap {$0, $1}, [$2, {$4, $3}];", "=r,=r,l,r,r"
+  __nv_tex_surf_handler("__isurf1DLayeredread", &val, surf, x, layer, cudaBoundaryModeTrap);
+  // CHECK: tail call void asm sideeffect "sust.b.a1d.v2.b32.trap [$0, {$2, $1}], {$3, $4};", "l,r,r,r,r"
+  __nv_tex_surf_handler("__isurf1DLayeredwrite_v2", &val, surf, x, layer, cudaBoundaryModeTrap);
+
+  // CHECK: %12 = tail call contract { float, float } asm "suld.b.a2d.v2.b32.zero {$0, $1}, [$2, {$5, $3, $4, $4}];", "=r,=r,l,r,r,r"
+  __nv_tex_surf_handler("__isurf2DLayeredread", &val, surf, x, y, layer, cudaBoundaryModeZero);
+  // CHECK: tail call void asm sideeffect "sust.b.a2d.v2.b32.zero [$0, {$3, $1, $2, $2}], {$4, $5};", "l,r,r,r,r,r"
+  __nv_tex_surf_handler("__isurf2DLayeredwrite_v2", &val, surf, x, y, layer, cudaBoundaryModeZero);
+  // CHECK: %13 = tail call contract { float, float } asm "suld.b.a2d.v2.b32.clamp {$0, $1}, [$2, {$5, $3, $4, $4}];", "=r,=r,l,r,r,r"
+  __nv_tex_surf_handler("__isurf2DLayeredread", &val, surf, x, y, layer, cudaBoundaryModeClamp);
+  // CHECK: tail call void asm sideeffect "sust.b.a2d.v2.b32.clamp [$0, {$3, $1, $2, $2}], {$4, $5};", "l,r,r,r,r,r"
+  __nv_tex_surf_handler("__isurf2DLayeredwrite_v2", &val, surf, x, y, layer, cudaBoundaryModeClamp);
+  // CHECK: %14 = tail call contract { float, float } asm "suld.b.a2d.v2.b32.trap {$0, $1}, [$2, {$5, $3, $4, $4}];", "=r,=r,l,r,r,r"
+  __nv_tex_surf_handler("__isurf2DLayeredread", &val, surf, x, y, layer, cudaBoundaryModeTrap);
+  // CHECK: tail call void asm sideeffect "sust.b.a2d.v2.b32.trap [$0, {$3, $1, $2, $2}], {$4, $5};", "l,r,r,r,r,r"
+  __nv_tex_surf_handler("__isurf2DLayeredwrite_v2", &val, surf, x, y, layer, cudaBoundaryModeTrap);
+
+  // CHECK: %15 = tail call contract { float, float } asm "suld.b.a2d.v2.b32.zero {$0, $1}, [$2, {$5, $3, $4, $4}];", "=r,=r,l,r,r,r"
+  __nv_tex_surf_handler("__isurfCubemapread", &val, surf, x, y, face, cudaBoundaryModeZero);
+  // CHECK: tail call void asm sideeffect "sust.b.a2d.v2.b32.zero [$0, {$3, $1, $2, $2}], {$4, $5};", "l,r,r,r,r,r"
+  __nv_tex_surf_handler("__isurfCubemapwrite_v2", &val, surf, x, y, face, cudaBoundaryModeZero);
+  // CHECK: %16 = tail call contract { float, float } asm "suld.b.a2d.v2.b32.clamp {$0, $1}, [$2, {$5, $3, $4, $4}];", "=r,=r,l,r,r,r"
+  __nv_tex_surf_handler("__isurfCubemapread", &val, surf, x, y, face, cudaBoundaryModeClamp);
+  // CHECK: tail call void asm sideeffect "sust.b.a2d.v2.b32.clamp [$0, {$3, $1, $2, $2}], {$4, $5};", "l,r,r,r,r,r"
+  __nv_tex_surf_handler("__isurfCubemapwrite_v2", &val, surf, x, y, face, cudaBoundaryModeClamp);
+  // CHECK: %17 = tail call contract { float, float } asm "suld.b.a2d.v2.b32.trap {$0, $1}, [$2, {$5, $3, $4, $4}];", "=r,=r,l,r,r,r"
+  __nv_tex_surf_handler("__isurfCubemapread", &val, surf, x, y, face, cudaBoundaryModeTrap);
+  // CHECK: tail call void asm sideeffect "sust.b.a2d.v2.b32.trap [$0, {$3, $1, $2, $2}], {$4, $5};", "l,r,r,r,r,r"
+  __nv_tex_surf_handler("__isurfCubemapwrite_v2", &val, surf, x, y, face, cudaBoundaryModeTrap);
+
+  // CHECK: %18 = tail call contract { float, float } asm "suld.b.a2d.v2.b32.zero {$0, $1}, [$2, {$5, $3, $4, $4}];", "=r,=r,l,r,r,r"
+  __nv_tex_surf_handler("__isurfCubemapLayeredread", &val, surf, x, y, layerface, cudaBoundaryModeZero);
+  // CHECK: tail call void asm sideeffect "sust.b.a2d.v2.b32.zero [$0, {$3, $1, $2, $2}], {$4, $5};", "l,r,r,r,r,r"
+  __nv_tex_surf_handler("__isurfCubemapLayeredwrite_v2", &val, surf, x, y, layerface, cudaBoundaryModeZero);
+  // CHECK: %19 = tail call contract { float, float } asm "suld.b.a2d.v2.b32.clamp {$0, $1}, [$2, {$5, $3, $4, $4}];", "=r,=r,l,r,r,r"
+  __nv_tex_surf_handler("__isurfCubemapLayeredread", &val, surf, x, y, layerface, cudaBoundaryModeClamp);
+  // CHECK: tail call void asm sideeffect "sust.b.a2d.v2.b32.clamp [$0, {$3, $1, $2, $2}], {$4, $5};", "l,r,r,r,r,r"
+  __nv_tex_surf_handler("__isurfCubemapLayeredwrite_v2", &val, surf, x, y, layerface, cudaBoundaryModeClamp);
+  // CHECK: %20 = tail call contract { float, float } asm "suld.b.a2d.v2.b32.trap {$0, $1}, [$2, {$5, $3, $4, $4}];", "=r,=r,l,r,r,r"
+  __nv_tex_surf_handler("__isurfCubemapLayeredread", &val, surf, x, y, layerface, cudaBoundaryModeTrap);
+  // CHECK: tail call void asm sideeffect "sust.b.a2d.v2.b32.trap [$0, {$3, $1, $2, $2}], {$4, $5};", "l,r,r,r,r,r"
+  __nv_tex_surf_handler("__isurfCubemapLayeredwrite_v2", &val, surf, x, y, layerface, cudaBoundaryModeTrap);
+}
+
+__device__ void surfchar4(cudaSurfaceObject_t surf, int x, int y, int z, int layer, int face, int layerface) {
+  char4 val;
+
+  // CHECK: %0 = tail call { i8, i8, i8, i8 } asm "suld.b.1d.v4.b8.zero {$0, $1, $2, $3}, [$4, {$5}];", "=h,=h,=h,=h,l,r"
+  __nv_tex_surf_handler("__isurf1Dread", &val, surf, x, cudaBoundaryModeZero);
+  // CHECK: tail call void asm sideeffect "sust.b.1d.v4.b8.zero [$0, {$1}], {$2, $3, $4, $5};", "l,r,h,h,h,h"
+  __nv_tex_surf_handler("__isurf1Dwrite_v2", &val, surf, x, cudaBoundaryModeZero);
+  // CHECK: %1 = tail call { i8, i8, i8, i8 } asm "suld.b.1d.v4.b8.clamp {$0, $1, $2, $3}, [$4, {$5}];", "=h,=h,=h,=h,l,r"
+  __nv_tex_surf_handler("__isurf1Dread", &val, surf, x, cudaBoundaryModeClamp);
+  // CHECK: tail call void asm sideeffect "sust.b.1d.v4.b8.clamp [$0, {$1}], {$2, $3, $4, $5};", "l,r,h,h,h,h"
+  __nv_tex_surf_handler("__isurf1Dwrite_v2", &val, surf, x, cudaBoundaryModeClamp);
+  // CHECK: %2 = tail call { i8, i8, i8, i8 } asm "suld.b.1d.v4.b8.trap {$0, $1, $2, $3}, [$4, {$5}];", "=h,=h,=h,=h,l,r"
+  __nv_tex_surf_handler("__isurf1Dread", &val, surf, x, cudaBoundaryModeTrap);
+  // CHECK: tail call void asm sideeffect "sust.b.1d.v4.b8.trap [$0, {$1}], {$2, $3, $4, $5};", "l,r,h,h,h,h"
+  __nv_tex_surf_handler("__isurf1Dwrite_v2", &val, surf, x, cudaBoundaryModeTrap);
+
+  // CHECK: %3 = tail call { i8, i8, i8, i8 } asm "suld.b.2d.v4.b8.zero {$0, $1, $2, $3}, [$4, {$5, $6}];", "=h,=h,=h,=h,l,r,r"
+  __nv_tex_surf_handler("__isurf2Dread", &val, surf, x, y, cudaBoundaryModeZero);
+  // CHECK: tail call void asm sideeffect "sust.b.2d.v4.b8.zero [$0, {$1, $2}], {$3, $4, $5, $6};", "l,r,r,h,h,h,h"
+  __nv_tex_surf_handler("__isurf2Dwrite_v2", &val, surf, x, y, cudaBoundaryModeZero);
+  // CHECK: %4 = tail call { i8, i8, i8, i8 } asm "suld.b.2d.v4.b8.clamp {$0, $1, $2, $3}, [$4, {$5, $6}];", "=h,=h,=h,=h,l,r,r"
+  __nv_tex_surf_handler("__isurf2Dread", &val, surf, x, y, cudaBoundaryModeClamp);
+  // CHECK: tail call void asm sideeffect "sust.b.2d.v4.b8.clamp [$0, {$1, $2}], {$3, $4, $5, $6};", "l,r,r,h,h,h,h"
+  __nv_tex_surf_handler("__isurf2Dwrite_v2", &val, surf, x, y, cudaBoundaryModeClamp);
+  // CHECK: %5 = tail call { i8, i8, i8, i8 } asm "suld.b.2d.v4.b8.trap {$0, $1, $2, $3}, [$4, {$5, $6}];", "=h,=h,=h,=h,l,r,r"
+  __nv_tex_surf_handler("__isurf2Dread", &val, surf, x, y, cudaBoundaryModeTrap);
+  // CHECK: tail call void asm sideeffect "sust.b.2d.v4.b8.trap [$0, {$1, $2}], {$3, $4, $5, $6};", "l,r,r,h,h,h,h"
+  __nv_tex_surf_handler("__isurf2Dwrite_v2", &val, surf, x, y, cudaBoundaryModeTrap);
+
+  // CHECK: %6 = tail call { i8, i8, i8, i8 } asm "suld.b.3d.v4.b8.zero {$0, $1, $2, $3}, [$4, {$5, $6, $7, $7}];", "=h,=h,=h,=h,l,r,r,r"
+  __nv_tex_surf_handler("__isurf3Dread", &val, surf, x, y, z, cudaBoundaryModeZero);
+  // CHECK: tail call void asm sideeffect "sust.b.3d.v4.b8.zero [$0, {$1, $2, $3, $3}], {$4, $5, $6, $7};", "l,r,r,r,h,h,h,h"
+  __nv_tex_surf_handler("__isurf3Dwrite_v2", &val, surf, x, y, z, cudaBoundaryModeZero);
+  // CHECK: %7 = tail call { i8, i8, i8, i8 } asm "suld.b.3d.v4.b8.clamp {$0, $1, $2, $3}, [$4, {$5, $6, $7, $7}];", "=h,=h,=h,=h,l,r,r,r"
+  __nv_tex_surf_handler("__isurf3Dread", &val, surf, x, y, z, cudaBoundaryModeClamp);
+  // CHECK: tail call void asm sideeffect "sust.b.3d.v4.b8.clamp [$0, {$1, $2, $3, $3}], {$4, $5, $6, $7};", "l,r,r,r,h,h,h,h"
+  __nv_tex_surf_handler("__isurf3Dwrite_v2", &val, surf, x, y, z, cudaBoundaryModeClamp);
+  // CHECK: %8 = tail call { i8, i8, i8, i8 } asm "suld.b.3d.v4.b8.trap {$0, $1, $2, $3}, [$4, {$5, $6, $7, $7}];", "=h,=h,=h,=h,l,r,r,r"
+  __nv_tex_surf_handler("__isurf3Dread", &val, surf, x, y, z, cudaBoundaryModeTrap);
+  // CHECK: tail call void asm sideeffect "sust.b.3d.v4.b8.trap [$0, {$1, $2, $3, $3}], {$4, $5, $6, $7};", "l,r,r,r,h,h,h,h"
+  __nv_tex_surf_handler("__isurf3Dwrite_v2", &val, surf, x, y, z, cudaBoundaryModeTrap);
+
+  // CHECK: %9 = tail call { i8, i8, i8, i8 } asm "suld.b.a1d.v4.b8.zero {$0, $1, $2, $3}, [$4, {$6, $5}];", "=h,=h,=h,=h,l,r,r"
+  __nv_tex_surf_handler("__isurf1DLayeredread", &val, surf, x, layer, cudaBoundaryModeZero);
+  // CHECK: tail call void asm sideeffect "sust.b.a1d.v4.b8.zero [$0, {$2, $1}], {$3, $4, $5, $6};", "l,r,r,h,h,h,h"
+  __nv_tex_surf_handler("__isurf1DLayeredwrite_v2", &val, surf, x, layer, cudaBoundaryModeZero);
+  // CHECK: %10 = tail call { i8, i8, i8, i8 } asm "suld.b.a1d.v4.b8.clamp {$0, $1, $2, $3}, [$4, {$6, $5}];", "=h,=h,=h,=h,l,r,r"
+  __nv_tex_surf_handler("__isurf1DLayeredread", &val, surf, x, layer, cudaBoundaryModeClamp);
+  // CHECK: tail call void asm sideeffect "sust.b.a1d.v4.b8.clamp [$0, {$2, $1}], {$3, $4, $5, $6};", "l,r,r,h,h,h,h"
+  __nv_tex_surf_handler("__isurf1DLayeredwrite_v2", &val, surf, x, layer, cudaBoundaryModeClamp);
+  // CHECK: %11 = tail call { i8, i8, i8, i8 } asm "suld.b.a1d.v4.b8.trap {$0, $1, $2, $3}, [$4, {$6, $5}];", "=h,=h,=h,=h,l,r,r"
+  __nv_tex_surf_handler("__isurf1DLayeredread", &val, surf, x, layer, cudaBoundaryModeTrap);
+  // CHECK: tail call void asm sideeffect "sust.b.a1d.v4.b8.trap [$0, {$2, $1}], {$3, $4, $5, $6};", "l,r,r,h,h,h,h"
+  __nv_tex_surf_handler("__isurf1DLayeredwrite_v2", &val, surf, x, layer, cudaBoundaryModeTrap);
+
+  // CHECK: %12 = tail call { i8, i8, i8, i8 } asm "suld.b.a2d.v4.b8.zero {$0, $1, $2, $3}, [$4, {$7, $5, $6, $6}];", "=h,=h,=h,=h,l,r,r,r"
+  __nv_tex_surf_handler("__isurf2DLayeredread", &val, surf, x, y, layer, cudaBoundaryModeZero);
+  // CHECK: tail call void asm sideeffect "sust.b.a2d.v4.b8.zero [$0, {$3, $1, $2, $2}], {$4, $5, $6, $7};", "l,r,r,r,h,h,h,h"
+  __nv_tex_surf_handler("__isurf2DLayeredwrite_v2", &val, surf, x, y, layer, cudaBoundaryModeZero);
+  // CHECK: %13 = tail call { i8, i8, i8, i8 } asm "suld.b.a2d.v4.b8.clamp {$0, $1, $2, $3}, [$4, {$7, $5, $6, $6}];", "=h,=h,=h,=h,l,r,r,r"
+  __nv_tex_surf_handler("__isurf2DLayeredread", &val, surf, x, y, layer, cudaBoundaryModeClamp);
+  // CHECK: tail call void asm sideeffect "sust.b.a2d.v4.b8.clamp [$0, {$3, $1, $2, $2}], {$4, $5, $6, $7};", "l,r,r,r,h,h,h,h"
+  __nv_tex_surf_handler("__isurf2DLayeredwrite_v2", &val, surf, x, y, layer, cudaBoundaryModeClamp);
+  // CHECK: %14 = tail call { i8, i8, i8, i8 } asm "suld.b.a2d.v4.b8.trap {$0, $1, $2, $3}, [$4, {$7, $5, $6, $6}];", "=h,=h,=h,=h,l,r,r,r"
+  __nv_tex_surf_handler("__isurf2DLayeredread", &val, surf, x, y, layer, cudaBoundaryModeTrap);
+  // CHECK: tail call void asm sideeffect "sust.b.a2d.v4.b8.trap [$0, {$3, $1, $2, $2}], {$4, $5, $6, $7};", "l,r,r,r,h,h,h,h"
+  __nv_tex_surf_handler("__isurf2DLayeredwrite_v2", &val, surf, x, y, layer, cudaBoundaryModeTrap);
+
+  // CHECK: %15 = tail call { i8, i8, i8, i8 } asm "suld.b.a2d.v4.b8.zero {$0, $1, $2, $3}, [$4, {$7, $5, $6, $6}];", "=h,=h,=h,=h,l,r,r,r"
+  __nv_tex_surf_handler("__isurfCubemapread", &val, surf, x, y, face, cudaBoundaryModeZero);
+  // CHECK: tail call void asm sideeffect "sust.b.a2d.v4.b8.zero [$0, {$3, $1, $2, $2}], {$4, $5, $6, $7};", "l,r,r,r,h,h,h,h"
+  __nv_tex_surf_handler("__isurfCubemapwrite_v2", &val, surf, x, y, face, cudaBoundaryModeZero);
+  // CHECK: %16 = tail call { i8, i8, i8, i8 } asm "suld.b.a2d.v4.b8.clamp {$0, $1, $2, $3}, [$4, {$7, $5, $6, $6}];", "=h,=h,=h,=h,l,r,r,r"
+  __nv_tex_surf_handler("__isurfCubemapread", &val, surf, x, y, face, cudaBoundaryModeClamp);
+  // CHECK: tail call void asm sideeffect "sust.b.a2d.v4.b8.clamp [$0, {$3, $1, $2, $2}], {$4, $5, $6, $7};", "l,r,r,r,h,h,h,h"
+  __nv_tex_surf_handler("__isurfCubemapwrite_v2", &val, surf, x, y, face, cudaBoundaryModeClamp);
+  // CHECK: %17 = tail call { i8, i8, i8, i8 } asm "suld.b.a2d.v4.b8.trap {$0, $1, $2, $3}, [$4, {$7, $5, $6, $6}];", "=h,=h,=h,=h,l,r,r,r"
+  __nv_tex_surf_handler("__isurfCubemapread", &val, surf, x, y, face, cudaBoundaryModeTrap);
+  // CHECK: tail call void asm sideeffect "sust.b.a2d.v4.b8.trap [$0, {$3, $1, $2, $2}], {$4, $5, $6, $7};", "l,r,r,r,h,h,h,h"
+  __nv_tex_surf_handler("__isurfCubemapwrite_v2", &val, surf, x, y, face, cudaBoundaryModeTrap);
+
+  // CHECK: %18 = tail call { i8, i8, i8, i8 } asm "suld.b.a2d.v4.b8.zero {$0, $1, $2, $3}, [$4, {$7, $5, $6, $6}];", "=h,=h,=h,=h,l,r,r,r"
+  __nv_tex_surf_handler("__isurfCubemapLayeredread", &val, surf, x, y, layerface, cudaBoundaryModeZero);
+  // CHECK: tail call void asm sideeffect "sust.b.a2d.v4.b8.zero [$0, {$3, $1, $2, $2}], {$4, $5, $6, $7};", "l,r,r,r,h,h,h,h"
+  __nv_tex_surf_handler("__isurfCubemapLayeredwrite_v2", &val, surf, x, y, layerface, cudaBoundaryModeZero);
+  // CHECK: %19 = tail call { i8, i8, i8, i8 } asm "suld.b.a2d.v4.b8.clamp {$0, $1, $2, $3}, [$4, {$7, $5, $6, $6}];", "=h,=h,=h,=h,l,r,r,r"
+  __nv_tex_surf_handler("__isurfCubemapLayeredread", &val, surf, x, y, layerface, cudaBoundaryModeClamp);
+  // CHECK: tail call void asm sideeffect "sust.b.a2d.v4.b8.clamp [$0, {$3, $1, $2, $2}], {$4, $5, $6, $7};", "l,r,r,r,h,h,h,h"
+  __nv_tex_surf_handler("__isurfCubemapLayeredwrite_v2", &val, surf, x, y, layerface, cudaBoundaryModeClamp);
+  // CHECK: %20 = tail call { i8, i8, i8, i8 } asm "suld.b.a2d.v4.b8.trap {$0, $1, $2, $3}, [$4, {$7, $5, $6, $6}];", "=h,=h,=h,=h,l,r,r,r"
+  __nv_tex_surf_handler("__isurfCubemapLayeredread", &val, surf, x, y, layerface, cudaBoundaryModeTrap);
+  // CHECK: tail call void asm sideeffect "sust.b.a2d.v4.b8.trap [$0, {$3, $1, $2, $2}], {$4, $5, $6, $7};", "l,r,r,r,h,h,h,h"
+  __nv_tex_surf_handler("__isurfCubemapLayeredwrite_v2", &val, surf, x, y, layerface, cudaBoundaryModeTrap);
+}
+
+__device__ void surfuchar4(cudaSurfaceObject_t surf, int x, int y, int z, int layer, int face, int layerface) {
+  uchar4 val;
+
+  // CHECK: %0 = tail call { i8, i8, i8, i8 } asm "suld.b.1d.v4.b8.zero {$0, $1, $2, $3}, [$4, {$5}];", "=h,=h,=h,=h,l,r"
+  __nv_tex_surf_handler("__isurf1Dread", &val, surf, x, cudaBoundaryModeZero);
+  // CHECK: tail call void asm sideeffect "sust.b.1d.v4.b8.zero [$0, {$1}], {$2, $3, $4, $5};", "l,r,h,h,h,h"
+  __nv_tex_surf_handler("__isurf1Dwrite_v2", &val, surf, x, cudaBoundaryModeZero);
+  // CHECK: %1 = tail call { i8, i8, i8, i8 } asm "suld.b.1d.v4.b8.clamp {$0, $1, $2, $3}, [$4, {$5}];", "=h,=h,=h,=h,l,r"
+  __nv_tex_surf_handler("__isurf1Dread", &val, surf, x, cudaBoundaryModeClamp);
+  // CHECK: tail call void asm sideeffect "sust.b.1d.v4.b8.clamp [$0, {$1}], {$2, $3, $4, $5};", "l,r,h,h,h,h"
+  __nv_tex_surf_handler("__isurf1Dwrite_v2", &val, surf, x, cudaBoundaryModeClamp);
+  // CHECK: %2 = tail call { i8, i8, i8, i8 } asm "suld.b.1d.v4.b8.trap {$0, $1, $2, $3}, [$4, {$5}];", "=h,=h,=h,=h,l,r"
+  __nv_tex_surf_handler("__isurf1Dread", &val, surf, x, cudaBoundaryModeTrap);
+  // CHECK: tail call void asm sideeffect "sust.b.1d.v4.b8.trap [$0, {$1}], {$2, $3, $4, $5};", "l,r,h,h,h,h"
+  __nv_tex_surf_handler("__isurf1Dwrite_v2", &val, surf, x, cudaBoundaryModeTrap);
+
+  // CHECK: %3 = tail call { i8, i8, i8, i8 } asm "suld.b.2d.v4.b8.zero {$0, $1, $2, $3}, [$4, {$5, $6}];", "=h,=h,=h,=h,l,r,r"
+  __nv_tex_surf_handler("__isurf2Dread", &val, surf, x, y, cudaBoundaryModeZero);
+  // CHECK: tail call void asm sideeffect "sust.b.2d.v4.b8.zero [$0, {$1, $2}], {$3, $4, $5, $6};", "l,r,r,h,h,h,h"
+  __nv_tex_surf_handler("__isurf2Dwrite_v2", &val, surf, x, y, cudaBoundaryModeZero);
+  // CHECK: %4 = tail call { i8, i8, i8, i8 } asm "suld.b.2d.v4.b8.clamp {$0, $1, $2, $3}, [$4, {$5, $6}];", "=h,=h,=h,=h,l,r,r"
+  __nv_tex_surf_handler("__isurf2Dread", &val, surf, x, y, cudaBoundaryModeClamp);
+  // CHECK: tail call void asm sideeffect "sust.b.2d.v4.b8.clamp [$0, {$1, $2}], {$3, $4, $5, $6};", "l,r,r,h,h,h,h"
+  __nv_tex_surf_handler("__isurf2Dwrite_v2", &val, surf, x, y, cudaBoundaryModeClamp);
+  // CHECK: %5 = tail call { i8, i8, i8, i8 } asm "suld.b.2d.v4.b8.trap {$0, $1, $2, $3}, [$4, {$5, $6}];", "=h,=h,=h,=h,l,r,r"
+  __nv_tex_surf_handler("__isurf2Dread", &val, surf, x, y, cudaBoundaryModeTrap);
+  // CHECK: tail call void asm sideeffect "sust.b.2d.v4.b8.trap [$0, {$1, $2}], {$3, $4, $5, $6};", "l,r,r,h,h,h,h"
+  __nv_tex_surf_handler("__isurf2Dwrite_v2", &val, surf, x, y, cudaBoundaryModeTrap);
+
+  // CHECK: %6 = tail call { i8, i8, i8, i8 } asm "suld.b.3d.v4.b8.zero {$0, $1, $2, $3}, [$4, {$5, $6, $7, $7}];", "=h,=h,=h,=h,l,r,r,r"
+  __nv_tex_surf_handler("__isurf3Dread", &val, surf, x, y, z, cudaBoundaryModeZero);
+  // CHECK: tail call void asm sideeffect "sust.b.3d.v4.b8.zero [$0, {$1, $2, $3, $3}], {$4, $5, $6, $7};", "l,r,r,r,h,h,h,h"
+  __nv_tex_surf_handler("__isurf3Dwrite_v2", &val, surf, x, y, z, cudaBoundaryModeZero);
+  // CHECK: %7 = tail call { i8, i8, i8, i8 } asm "suld.b.3d.v4.b8.clamp {$0, $1, $2, $3}, [$4, {$5, $6, $7, $7}];", "=h,=h,=h,=h,l,r,r,r"
+  __nv_tex_surf_handler("__isurf3Dread", &val, surf, x, y, z, cudaBoundaryModeClamp);
+  // CHECK: tail call void asm sideeffect "sust.b.3d.v4.b8.clamp [$0, {$1, $2, $3, $3}], {$4, $5, $6, $7};", "l,r,r,r,h,h,h,h"
+  __nv_tex_surf_handler("__isurf3Dwrite_v2", &val, surf, x, y, z, cudaBoundaryModeClamp);
+  // CHECK: %8 = tail call { i8, i8, i8, i8 } asm "suld.b.3d.v4.b8.trap {$0, $1, $2, $3}, [$4, {$5, $6, $7, $7}];", "=h,=h,=h,=h,l,r,r,r"
+  __nv_tex_surf_handler("__isurf3Dread", &val, surf, x, y, z, cudaBoundaryModeTrap);
+  // CHECK: tail call void asm sideeffect "sust.b.3d.v4.b8.trap [$0, {$1, $2, $3, $3}], {$4, $5, $6, $7};", "l,r,r,r,h,h,h,h"
+  __nv_tex_surf_handler("__isurf3Dwrite_v2", &val, surf, x, y, z, cudaBoundaryModeTrap);
+
+  // CHECK: %9 = tail call { i8, i8, i8, i8 } asm "suld.b.a1d.v4.b8.zero {$0, $1, $2, $3}, [$4, {$6, $5}];", "=h,=h,=h,=h,l,r,r"
+  __nv_tex_surf_handler("__isurf1DLayeredread", &val, surf, x, layer, cudaBoundaryModeZero);
+  // CHECK: tail call void asm sideeffect "sust.b.a1d.v4.b8.zero [$0, {$2, $1}], {$3, $4, $5, $6};", "l,r,r,h,h,h,h"
+  __nv_tex_surf_handler("__isurf1DLayeredwrite_v2", &val, surf, x, layer, cudaBoundaryModeZero);
+  // CHECK: %10 = tail call { i8, i8, i8, i8 } asm "suld.b.a1d.v4.b8.clamp {$0, $1, $2, $3}, [$4, {$6, $5}];", "=h,=h,=h,=h,l,r,r"
+  __nv_tex_surf_handler("__isurf1DLayeredread", &val, surf, x, layer, cudaBoundaryModeClamp);
+  // CHECK: tail call void asm sideeffect "sust.b.a1d.v4.b8.clamp [$0, {$2, $1}], {$3, $4, $5, $6};", "l,r,r,h,h,h,h"
+  __nv_tex_surf_handler("__isurf1DLayeredwrite_v2", &val, surf, x, layer, cudaBoundaryModeClamp);
+  // CHECK: %11 = tail call { i8, i8, i8, i8 } asm "suld.b.a1d.v4.b8.trap {$0, $1, $2, $3}, [$4, {$6, $5}];", "=h,=h,=h,=h,l,r,r"
+  __nv_tex_surf_handler("__isurf1DLayeredread", &val, surf, x, layer, cudaBoundaryModeTrap);
+  // CHECK: tail call void asm sideeffect "sust.b.a1d.v4.b8.trap [$0, {$2, $1}], {$3, $4, $5, $6};", "l,r,r,h,h,h,h"
+  __nv_tex_surf_handler("__isurf1DLayeredwrite_v2", &val, surf, x, layer, cudaBoundaryModeTrap);
+
+  // CHECK: %12 = tail call { i8, i8, i8, i8 } asm "suld.b.a2d.v4.b8.zero {$0, $1, $2, $3}, [$4, {$7, $5, $6, $6}];", "=h,=h,=h,=h,l,r,r,r"
+  __nv_tex_surf_handler("__isurf2DLayeredread", &val, surf, x, y, layer, cudaBoundaryModeZero);
+  // CHECK: tail call void asm sideeffect "sust.b.a2d.v4.b8.zero [$0, {$3, $1, $2, $2}], {$4, $5, $6, $7};", "l,r,r,r,h,h,h,h"
+  __nv_tex_surf_handler("__isurf2DLayeredwrite_v2", &val, surf, x, y, layer, cudaBoundaryModeZero);
+  // CHECK: %13 = tail call { i8, i8, i8, i8 } asm "suld.b.a2d.v4.b8.clamp {$0, $1, $2, $3}, [$4, {$7, $5, $6, $6}];", "=h,=h,=h,=h,l,r,r,r"
+  __nv_tex_surf_handler("__isurf2DLayeredread", &val, surf, x, y, layer, cudaBoundaryModeClamp);
+  // CHECK: tail call void asm sideeffect "sust.b.a2d.v4.b8.clamp [$0, {$3, $1, $2, $2}], {$4, $5, $6, $7};", "l,r,r,r,h,h,h,h"
+  __nv_tex_surf_handler("__isurf2DLayeredwrite_v2", &val, surf, x, y, layer, cudaBoundaryModeClamp);
+  // CHECK: %14 = tail call { i8, i8, i8, i8 } asm "suld.b.a2d.v4.b8.trap {$0, $1, $2, $3}, [$4, {$7, $5, $6, $6}];", "=h,=h,=h,=h,l,r,r,r"
+  __nv_tex_surf_handler("__isurf2DLayeredread", &val, surf, x, y, layer, cudaBoundaryModeTrap);
+  // CHECK: tail call void asm sideeffect "sust.b.a2d.v4.b8.trap [$0, {$3, $1, $2, $2}], {$4, $5, $6, $7};", "l,r,r,r,h,h,h,h"
+  __nv_tex_surf_handler("__isurf2DLayeredwrite_v2", &val, surf, x, y, layer, cudaBoundaryModeTrap);
+
+  // CHECK: %15 = tail call { i8, i8, i8, i8 } asm "suld.b.a2d.v4.b8.zero {$0, $1, $2, $3}, [$4, {$7, $5, $6, $6}];", "=h,=h,=h,=h,l,r,r,r"
+  __nv_tex_surf_handler("__isurfCubemapread", &val, surf, x, y, face, cudaBoundaryModeZero);
+  // CHECK: tail call void asm sideeffect "sust.b.a2d.v4.b8.zero [$0, {$3, $1, $2, $2}], {$4, $5, $6, $7};", "l,r,r,r,h,h,h,h"
+  __nv_tex_surf_handler("__isurfCubemapwrite_v2", &val, surf, x, y, face, cudaBoundaryModeZero);
+  // CHECK: %16 = tail call { i8, i8, i8, i8 } asm "suld.b.a2d.v4.b8.clamp {$0, $1, $2, $3}, [$4, {$7, $5, $6, $6}];", "=h,=h,=h,=h,l,r,r,r"
+  __nv_tex_surf_handler("__isurfCubemapread", &val, surf, x, y, face, cudaBoundaryModeClamp);
+  // CHECK: tail call void asm sideeffect "sust.b.a2d.v4.b8.clamp [$0, {$3, $1, $2, $2}], {$4, $5, $6, $7};", "l,r,r,r,h,h,h,h"
+  __nv_tex_surf_handler("__isurfCubemapwrite_v2", &val, surf, x, y, face, cudaBoundaryModeClamp);
+  // CHECK: %17 = tail call { i8, i8, i8, i8 } asm "suld.b.a2d.v4.b8.trap {$0, $1, $2, $3}, [$4, {$7, $5, $6, $6}];", "=h,=h,=h,=h,l,r,r,r"
+  __nv_tex_surf_handler("__isurfCubemapread", &val, surf, x, y, face, cudaBoundaryModeTrap);
+  // CHECK: tail call void asm sideeffect "sust.b.a2d.v4.b8.trap [$0, {$3, $1, $2, $2}], {$4, $5, $6, $7};", "l,r,r,r,h,h,h,h"
+  __nv_tex_surf_handler("__isurfCubemapwrite_v2", &val, surf, x, y, face, cudaBoundaryModeTrap);
+
+  // CHECK: %18 = tail call { i8, i8, i8, i8 } asm "suld.b.a2d.v4.b8.zero {$0, $1, $2, $3}, [$4, {$7, $5, $6, $6}];", "=h,=h,=h,=h,l,r,r,r"
+  __nv_tex_surf_handler("__isurfCubemapLayeredread", &val, surf, x, y, layerface, cudaBoundaryModeZero);
+  // CHECK: tail call void asm sideeffect "sust.b.a2d.v4.b8.zero [$0, {$3, $1, $2, $2}], {$4, $5, $6, $7};", "l,r,r,r,h,h,h,h"
+  __nv_tex_surf_handler("__isurfCubemapLayeredwrite_v2", &val, surf, x, y, layerface, cudaBoundaryModeZero);
+  // CHECK: %19 = tail call { i8, i8, i8, i8 } asm "suld.b.a2d.v4.b8.clamp {$0, $1, $2, $3}, [$4, {$7, $5, $6, $6}];", "=h,=h,=h,=h,l,r,r,r"
+  __nv_tex_surf_handler("__isurfCubemapLayeredread", &val, surf, x, y, layerface, cudaBoundaryModeClamp);
+  // CHECK: tail call void asm sideeffect "sust.b.a2d.v4.b8.clamp [$0, {$3, $1, $2, $2}], {$4, $5, $6, $7};", "l,r,r,r,h,h,h,h"
+  __nv_tex_surf_handler("__isurfCubemapLayeredwrite_v2", &val, surf, x, y, layerface, cudaBoundaryModeClamp);
+  // CHECK: %20 = tail call { i8, i8, i8, i8 } asm "suld.b.a2d.v4.b8.trap {$0, $1, $2, $3}, [$4, {$7, $5, $6, $6}];", "=h,=h,=h,=h,l,r,r,r"
+  __nv_tex_surf_handler("__isurfCubemapLayeredread", &val, surf, x, y, layerface, cudaBoundaryModeTrap);
+  // CHECK: tail call void asm sideeffect "sust.b.a2d.v4.b8.trap [$0, {$3, $1, $2, $2}], {$4, $5, $6, $7};", "l,r,r,r,h,h,h,h"
+  __nv_tex_surf_handler("__isurfCubemapLayeredwrite_v2", &val, surf, x, y, layerface, cudaBoundaryModeTrap);
+}
+
+__device__ void surfshort4(cudaSurfaceObject_t surf, int x, int y, int z, int layer, int face, int layerface) {
+  short4 val;
+
+  // CHECK: %0 = tail call { i16, i16, i16, i16 } asm "suld.b.1d.v4.b16.zero {$0, $1, $2, $3}, [$4, {$5}];", "=h,=h,=h,=h,l,r"
+  __nv_tex_surf_handler("__isurf1Dread", &val, surf, x, cudaBoundaryModeZero);
+  // CHECK: tail call void asm sideeffect "sust.b.1d.v4.b16.zero [$0, {$1}], {$2, $3, $4, $5};", "l,r,h,h,h,h"
+  __nv_tex_surf_handler("__isurf1Dwrite_v2", &val, surf, x, cudaBoundaryModeZero);
+  // CHECK: %1 = tail call { i16, i16, i16, i16 } asm "suld.b.1d.v4.b16.clamp {$0, $1, $2, $3}, [$4, {$5}];", "=h,=h,=h,=h,l,r"
+  __nv_tex_surf_handler("__isurf1Dread", &val, surf, x, cudaBoundaryModeClamp);
+  // CHECK: tail call void asm sideeffect "sust.b.1d.v4.b16.clamp [$0, {$1}], {$2, $3, $4, $5};", "l,r,h,h,h,h"
+  __nv_tex_surf_handler("__isurf1Dwrite_v2", &val, surf, x, cudaBoundaryModeClamp);
+  // CHECK: %2 = tail call { i16, i16, i16, i16 } asm "suld.b.1d.v4.b16.trap {$0, $1, $2, $3}, [$4, {$5}];", "=h,=h,=h,=h,l,r"
+  __nv_tex_surf_handler("__isurf1Dread", &val, surf, x, cudaBoundaryModeTrap);
+  // CHECK: tail call void asm sideeffect "sust.b.1d.v4.b16.trap [$0, {$1}], {$2, $3, $4, $5};", "l,r,h,h,h,h"
+  __nv_tex_surf_handler("__isurf1Dwrite_v2", &val, surf, x, cudaBoundaryModeTrap);
+
+  // CHECK: %3 = tail call { i16, i16, i16, i16 } asm "suld.b.2d.v4.b16.zero {$0, $1, $2, $3}, [$4, {$5, $6}];", "=h,=h,=h,=h,l,r,r"
+  __nv_tex_surf_handler("__isurf2Dread", &val, surf, x, y, cudaBoundaryModeZero);
+  // CHECK: tail call void asm sideeffect "sust.b.2d.v4.b16.zero [$0, {$1, $2}], {$3, $4, $5, $6};", "l,r,r,h,h,h,h"
+  __nv_tex_surf_handler("__isurf2Dwrite_v2", &val, surf, x, y, cudaBoundaryModeZero);
+  // CHECK: %4 = tail call { i16, i16, i16, i16 } asm "suld.b.2d.v4.b16.clamp {$0, $1, $2, $3}, [$4, {$5, $6}];", "=h,=h,=h,=h,l,r,r"
+  __nv_tex_surf_handler("__isurf2Dread", &val, surf, x, y, cudaBoundaryModeClamp);
+  // CHECK: tail call void asm sideeffect "sust.b.2d.v4.b16.clamp [$0, {$1, $2}], {$3, $4, $5, $6};", "l,r,r,h,h,h,h"
+  __nv_tex_surf_handler("__isurf2Dwrite_v2", &val, surf, x, y, cudaBoundaryModeClamp);
+  // CHECK: %5 = tail call { i16, i16, i16, i16 } asm "suld.b.2d.v4.b16.trap {$0, $1, $2, $3}, [$4, {$5, $6}];", "=h,=h,=h,=h,l,r,r"
+  __nv_tex_surf_handler("__isurf2Dread", &val, surf, x, y, cudaBoundaryModeTrap);
+  // CHECK: tail call void asm sideeffect "sust.b.2d.v4.b16.trap [$0, {$1, $2}], {$3, $4, $5, $6};", "l,r,r,h,h,h,h"
+  __nv_tex_surf_handler("__isurf2Dwrite_v2", &val, surf, x, y, cudaBoundaryModeTrap);
+
+  // CHECK: %6 = tail call { i16, i16, i16, i16 } asm "suld.b.3d.v4.b16.zero {$0, $1, $2, $3}, [$4, {$5, $6, $7, $7}];", "=h,=h,=h,=h,l,r,r,r"
+  __nv_tex_surf_handler("__isurf3Dread", &val, surf, x, y, z, cudaBoundaryModeZero);
+  // CHECK: tail call void asm sideeffect "sust.b.3d.v4.b16.zero [$0, {$1, $2, $3, $3}], {$4, $5, $6, $7};", "l,r,r,r,h,h,h,h"
+  __nv_tex_surf_handler("__isurf3Dwrite_v2", &val, surf, x, y, z, cudaBoundaryModeZero);
+  // CHECK: %7 = tail call { i16, i16, i16, i16 } asm "suld.b.3d.v4.b16.clamp {$0, $1, $2, $3}, [$4, {$5, $6, $7, $7}];", "=h,=h,=h,=h,l,r,r,r"
+  __nv_tex_surf_handler("__isurf3Dread", &val, surf, x, y, z, cudaBoundaryModeClamp);
+  // CHECK: tail call void asm sideeffect "sust.b.3d.v4.b16.clamp [$0, {$1, $2, $3, $3}], {$4, $5, $6, $7};", "l,r,r,r,h,h,h,h"
+  __nv_tex_surf_handler("__isurf3Dwrite_v2", &val, surf, x, y, z, cudaBoundaryModeClamp);
+  // CHECK: %8 = tail call { i16, i16, i16, i16 } asm "suld.b.3d.v4.b16.trap {$0, $1, $2, $3}, [$4, {$5, $6, $7, $7}];", "=h,=h,=h,=h,l,r,r,r"
+  __nv_tex_surf_handler("__isurf3Dread", &val, surf, x, y, z, cudaBoundaryModeTrap);
+  // CHECK: tail call void asm sideeffect "sust.b.3d.v4.b16.trap [$0, {$1, $2, $3, $3}], {$4, $5, $6, $7};", "l,r,r,r,h,h,h,h"
+  __nv_tex_surf_handler("__isurf3Dwrite_v2", &val, surf, x, y, z, cudaBoundaryModeTrap);
+
+  // CHECK: %9 = tail call { i16, i16, i16, i16 } asm "suld.b.a1d.v4.b16.zero {$0, $1, $2, $3}, [$4, {$6, $5}];", "=h,=h,=h,=h,l,r,r"
+  __nv_tex_surf_handler("__isurf1DLayeredread", &val, surf, x, layer, cudaBoundaryModeZero);
+  // CHECK: tail call void asm sideeffect "sust.b.a1d.v4.b16.zero [$0, {$2, $1}], {$3, $4, $5, $6};", "l,r,r,h,h,h,h"
+  __nv_tex_surf_handler("__isurf1DLayeredwrite_v2", &val, surf, x, layer, cudaBoundaryModeZero);
+  // CHECK: %10 = tail call { i16, i16, i16, i16 } asm "suld.b.a1d.v4.b16.clamp {$0, $1, $2, $3}, [$4, {$6, $5}];", "=h,=h,=h,=h,l,r,r"
+  __nv_tex_surf_handler("__isurf1DLayeredread", &val, surf, x, layer, cudaBoundaryModeClamp);
+  // CHECK: tail call void asm sideeffect "sust.b.a1d.v4.b16.clamp [$0, {$2, $1}], {$3, $4, $5, $6};", "l,r,r,h,h,h,h"
+  __nv_tex_surf_handler("__isurf1DLayeredwrite_v2", &val, surf, x, layer, cudaBoundaryModeClamp);
+  // CHECK: %11 = tail call { i16, i16, i16, i16 } asm "suld.b.a1d.v4.b16.trap {$0, $1, $2, $3}, [$4, {$6, $5}];", "=h,=h,=h,=h,l,r,r"
+  __nv_tex_surf_handler("__isurf1DLayeredread", &val, surf, x, layer, cudaBoundaryModeTrap);
+  // CHECK: tail call void asm sideeffect "sust.b.a1d.v4.b16.trap [$0, {$2, $1}], {$3, $4, $5, $6};", "l,r,r,h,h,h,h"
+  __nv_tex_surf_handler("__isurf1DLayeredwrite_v2", &val, surf, x, layer, cudaBoundaryModeTrap);
+
+  // CHECK: %12 = tail call { i16, i16, i16, i16 } asm "suld.b.a2d.v4.b16.zero {$0, $1, $2, $3}, [$4, {$7, $5, $6, $6}];", "=h,=h,=h,=h,l,r,r,r"
+  __nv_tex_surf_handler("__isurf2DLayeredread", &val, surf, x, y, layer, cudaBoundaryModeZero);
+  // CHECK: tail call void asm sideeffect "sust.b.a2d.v4.b16.zero [$0, {$3, $1, $2, $2}], {$4, $5, $6, $7};", "l,r,r,r,h,h,h,h"
+  __nv_tex_surf_handler("__isurf2DLayeredwrite_v2", &val, surf, x, y, layer, cudaBoundaryModeZero);
+  // CHECK: %13 = tail call { i16, i16, i16, i16 } asm "suld.b.a2d.v4.b16.clamp {$0, $1, $2, $3}, [$4, {$7, $5, $6, $6}];", "=h,=h,=h,=h,l,r,r,r"
+  __nv_tex_surf_handler("__isurf2DLayeredread", &val, surf, x, y, layer, cudaBoundaryModeClamp);
+  // CHECK: tail call void asm sideeffect "sust.b.a2d.v4.b16.clamp [$0, {$3, $1, $2, $2}], {$4, $5, $6, $7};", "l,r,r,r,h,h,h,h"
+  __nv_tex_surf_handler("__isurf2DLayeredwrite_v2", &val, surf, x, y, layer, cudaBoundaryModeClamp);
+  // CHECK: %14 = tail call { i16, i16, i16, i16 } asm "suld.b.a2d.v4.b16.trap {$0, $1, $2, $3}, [$4, {$7, $5, $6, $6}];", "=h,=h,=h,=h,l,r,r,r"
+  __nv_tex_surf_handler("__isurf2DLayeredread", &val, surf, x, y, layer, cudaBoundaryModeTrap);
+  // CHECK: tail call void asm sideeffect "sust.b.a2d.v4.b16.trap [$0, {$3, $1, $2, $2}], {$4, $5, $6, $7};", "l,r,r,r,h,h,h,h"
+  __nv_tex_surf_handler("__isurf2DLayeredwrite_v2", &val, surf, x, y, layer, cudaBoundaryModeTrap);
+
+  // CHECK: %15 = tail call { i16, i16, i16, i16 } asm "suld.b.a2d.v4.b16.zero {$0, $1, $2, $3}, [$4, {$7, $5, $6, $6}];", "=h,=h,=h,=h,l,r,r,r"
+  __nv_tex_surf_handler("__isurfCubemapread", &val, surf, x, y, face, cudaBoundaryModeZero);
+  // CHECK: tail call void asm sideeffect "sust.b.a2d.v4.b16.zero [$0, {$3, $1, $2, $2}], {$4, $5, $6, $7};", "l,r,r,r,h,h,h,h"
+  __nv_tex_surf_handler("__isurfCubemapwrite_v2", &val, surf, x, y, face, cudaBoundaryModeZero);
+  // CHECK: %16 = tail call { i16, i16, i16, i16 } asm "suld.b.a2d.v4.b16.clamp {$0, $1, $2, $3}, [$4, {$7, $5, $6, $6}];", "=h,=h,=h,=h,l,r,r,r"
+  __nv_tex_surf_handler("__isurfCubemapread", &val, surf, x, y, face, cudaBoundaryModeClamp);
+  // CHECK: tail call void asm sideeffect "sust.b.a2d.v4.b16.clamp [$0, {$3, $1, $2, $2}], {$4, $5, $6, $7};", "l,r,r,r,h,h,h,h"
+  __nv_tex_surf_handler("__isurfCubemapwrite_v2", &val, surf, x, y, face, cudaBoundaryModeClamp);
+  // CHECK: %17 = tail call { i16, i16, i16, i16 } asm "suld.b.a2d.v4.b16.trap {$0, $1, $2, $3}, [$4, {$7, $5, $6, $6}];", "=h,=h,=h,=h,l,r,r,r"
+  __nv_tex_surf_handler("__isurfCubemapread", &val, surf, x, y, face, cudaBoundaryModeTrap);
+  // CHECK: tail call void asm sideeffect "sust.b.a2d.v4.b16.trap [$0, {$3, $1, $2, $2}], {$4, $5, $6, $7};", "l,r,r,r,h,h,h,h"
+  __nv_tex_surf_handler("__isurfCubemapwrite_v2", &val, surf, x, y, face, cudaBoundaryModeTrap);
+
+  // CHECK: %18 = tail call { i16, i16, i16, i16 } asm "suld.b.a2d.v4.b16.zero {$0, $1, $2, $3}, [$4, {$7, $5, $6, $6}];", "=h,=h,=h,=h,l,r,r,r"
+  __nv_tex_surf_handler("__isurfCubemapLayeredread", &val, surf, x, y, layerface, cudaBoundaryModeZero);
+  // CHECK: tail call void asm sideeffect "sust.b.a2d.v4.b16.zero [$0, {$3, $1, $2, $2}], {$4, $5, $6, $7};", "l,r,r,r,h,h,h,h"
+  __nv_tex_surf_handler("__isurfCubemapLayeredwrite_v2", &val, surf, x, y, layerface, cudaBoundaryModeZero);
+  // CHECK: %19 = tail call { i16, i16, i16, i16 } asm "suld.b.a2d.v4.b16.clamp {$0, $1, $2, $3}, [$4, {$7, $5, $6, $6}];", "=h,=h,=h,=h,l,r,r,r"
+  __nv_tex_surf_handler("__isurfCubemapLayeredread", &val, surf, x, y, layerface, cudaBoundaryModeClamp);
+  // CHECK: tail call void asm sideeffect "sust.b.a2d.v4.b16.clamp [$0, {$3, $1, $2, $2}], {$4, $5, $6, $7};", "l,r,r,r,h,h,h,h"
+  __nv_tex_surf_handler("__isurfCubemapLayeredwrite_v2", &val, surf, x, y, layerface, cudaBoundaryModeClamp);
+  // CHECK: %20 = tail call { i16, i16, i16, i16 } asm "suld.b.a2d.v4.b16.trap {$0, $1, $2, $3}, [$4, {$7, $5, $6, $6}];", "=h,=h,=h,=h,l,r,r,r"
+  __nv_tex_surf_handler("__isurfCubemapLayeredread", &val, surf, x, y, layerface, cudaBoundaryModeTrap);
+  // CHECK: tail call void asm sideeffect "sust.b.a2d.v4.b16.trap [$0, {$3, $1, $2, $2}], {$4, $5, $6, $7};", "l,r,r,r,h,h,h,h"
+  __nv_tex_surf_handler("__isurfCubemapLayeredwrite_v2", &val, surf, x, y, layerface, cudaBoundaryModeTrap);
+}
+
+__device__ void surfushort4(cudaSurfaceObject_t surf, int x, int y, int z, int layer, int face, int layerface) {
+  ushort4 val;
+
+  // CHECK: %0 = tail call { i16, i16, i16, i16 } asm "suld.b.1d.v4.b16.zero {$0, $1, $2, $3}, [$4, {$5}];", "=h,=h,=h,=h,l,r"
+  __nv_tex_surf_handler("__isurf1Dread", &val, surf, x, cudaBoundaryModeZero);
+  // CHECK: tail call void asm sideeffect "sust.b.1d.v4.b16.zero [$0, {$1}], {$2, $3, $4, $5};", "l,r,h,h,h,h"
+  __nv_tex_surf_handler("__isurf1Dwrite_v2", &val, surf, x, cudaBoundaryModeZero);
+  // CHECK: %1 = tail call { i16, i16, i16, i16 } asm "suld.b.1d.v4.b16.clamp {$0, $1, $2, $3}, [$4, {$5}];", "=h,=h,=h,=h,l,r"
+  __nv_tex_surf_handler("__isurf1Dread", &val, surf, x, cudaBoundaryModeClamp);
+  // CHECK: tail call void asm sideeffect "sust.b.1d.v4.b16.clamp [$0, {$1}], {$2, $3, $4, $5};", "l,r,h,h,h,h"
+  __nv_tex_surf_handler("__isurf1Dwrite_v2", &val, surf, x, cudaBoundaryModeClamp);
+  // CHECK: %2 = tail call { i16, i16, i16, i16 } asm "suld.b.1d.v4.b16.trap {$0, $1, $2, $3}, [$4, {$5}];", "=h,=h,=h,=h,l,r"
+  __nv_tex_surf_handler("__isurf1Dread", &val, surf, x, cudaBoundaryModeTrap);
+  // CHECK: tail call void asm sideeffect "sust.b.1d.v4.b16.trap [$0, {$1}], {$2, $3, $4, $5};", "l,r,h,h,h,h"
+  __nv_tex_surf_handler("__isurf1Dwrite_v2", &val, surf, x, cudaBoundaryModeTrap);
+
+  // CHECK: %3 = tail call { i16, i16, i16, i16 } asm "suld.b.2d.v4.b16.zero {$0, $1, $2, $3}, [$4, {$5, $6}];", "=h,=h,=h,=h,l,r,r"
+  __nv_tex_surf_handler("__isurf2Dread", &val, surf, x, y, cudaBoundaryModeZero);
+  // CHECK: tail call void asm sideeffect "sust.b.2d.v4.b16.zero [$0, {$1, $2}], {$3, $4, $5, $6};", "l,r,r,h,h,h,h"
+  __nv_tex_surf_handler("__isurf2Dwrite_v2", &val, surf, x, y, cudaBoundaryModeZero);
+  // CHECK: %4 = tail call { i16, i16, i16, i16 } asm "suld.b.2d.v4.b16.clamp {$0, $1, $2, $3}, [$4, {$5, $6}];", "=h,=h,=h,=h,l,r,r"
+  __nv_tex_surf_handler("__isurf2Dread", &val, surf, x, y, cudaBoundaryModeClamp);
+  // CHECK: tail call void asm sideeffect "sust.b.2d.v4.b16.clamp [$0, {$1, $2}], {$3, $4, $5, $6};", "l,r,r,h,h,h,h"
+  __nv_tex_surf_handler("__isurf2Dwrite_v2", &val, surf, x, y, cudaBoundaryModeClamp);
+  // CHECK: %5 = tail call { i16, i16, i16, i16 } asm "suld.b.2d.v4.b16.trap {$0, $1, $2, $3}, [$4, {$5, $6}];", "=h,=h,=h,=h,l,r,r"
+  __nv_tex_surf_handler("__isurf2Dread", &val, surf, x, y, cudaBoundaryModeTrap);
+  // CHECK: tail call void asm sideeffect "sust.b.2d.v4.b16.trap [$0, {$1, $2}], {$3, $4, $5, $6};", "l,r,r,h,h,h,h"
+  __nv_tex_surf_handler("__isurf2Dwrite_v2", &val, surf, x, y, cudaBoundaryModeTrap);
+
+  // CHECK: %6 = tail call { i16, i16, i16, i16 } asm "suld.b.3d.v4.b16.zero {$0, $1, $2, $3}, [$4, {$5, $6, $7, $7}];", "=h,=h,=h,=h,l,r,r,r"
+  __nv_tex_surf_handler("__isurf3Dread", &val, surf, x, y, z, cudaBoundaryModeZero);
+  // CHECK: tail call void asm sideeffect "sust.b.3d.v4.b16.zero [$0, {$1, $2, $3, $3}], {$4, $5, $6, $7};", "l,r,r,r,h,h,h,h"
+  __nv_tex_surf_handler("__isurf3Dwrite_v2", &val, surf, x, y, z, cudaBoundaryModeZero);
+  // CHECK: %7 = tail call { i16, i16, i16, i16 } asm "suld.b.3d.v4.b16.clamp {$0, $1, $2, $3}, [$4, {$5, $6, $7, $7}];", "=h,=h,=h,=h,l,r,r,r"
+  __nv_tex_surf_handler("__isurf3Dread", &val, surf, x, y, z, cudaBoundaryModeClamp);
+  // CHECK: tail call void asm sideeffect "sust.b.3d.v4.b16.clamp [$0, {$1, $2, $3, $3}], {$4, $5, $6, $7};", "l,r,r,r,h,h,h,h"
+  __nv_tex_surf_handler("__isurf3Dwrite_v2", &val, surf, x, y, z, cudaBoundaryModeClamp);
+  // CHECK: %8 = tail call { i16, i16, i16, i16 } asm "suld.b.3d.v4.b16.trap {$0, $1, $2, $3}, [$4, {$5, $6, $7, $7}];", "=h,=h,=h,=h,l,r,r,r"
+  __nv_tex_surf_handler("__isurf3Dread", &val, surf, x, y, z, cudaBoundaryModeTrap);
+  // CHECK: tail call void asm sideeffect "sust.b.3d.v4.b16.trap [$0, {$1, $2, $3, $3}], {$4, $5, $6, $7};", "l,r,r,r,h,h,h,h"
+  __nv_tex_surf_handler("__isurf3Dwrite_v2", &val, surf, x, y, z, cudaBoundaryModeTrap);
+
+  // CHECK: %9 = tail call { i16, i16, i16, i16 } asm "suld.b.a1d.v4.b16.zero {$0, $1, $2, $3}, [$4, {$6, $5}];", "=h,=h,=h,=h,l,r,r"
+  __nv_tex_surf_handler("__isurf1DLayeredread", &val, surf, x, layer, cudaBoundaryModeZero);
+  // CHECK: tail call void asm sideeffect "sust.b.a1d.v4.b16.zero [$0, {$2, $1}], {$3, $4, $5, $6};", "l,r,r,h,h,h,h"
+  __nv_tex_surf_handler("__isurf1DLayeredwrite_v2", &val, surf, x, layer, cudaBoundaryModeZero);
+  // CHECK: %10 = tail call { i16, i16, i16, i16 } asm "suld.b.a1d.v4.b16.clamp {$0, $1, $2, $3}, [$4, {$6, $5}];", "=h,=h,=h,=h,l,r,r"
+  __nv_tex_surf_handler("__isurf1DLayeredread", &val, surf, x, layer, cudaBoundaryModeClamp);
+  // CHECK: tail call void asm sideeffect "sust.b.a1d.v4.b16.clamp [$0, {$2, $1}], {$3, $4, $5, $6};", "l,r,r,h,h,h,h"
+  __nv_tex_surf_handler("__isurf1DLayeredwrite_v2", &val, surf, x, layer, cudaBoundaryModeClamp);
+  // CHECK: %11 = tail call { i16, i16, i16, i16 } asm "suld.b.a1d.v4.b16.trap {$0, $1, $2, $3}, [$4, {$6, $5}];", "=h,=h,=h,=h,l,r,r"
+  __nv_tex_surf_handler("__isurf1DLayeredread", &val, surf, x, layer, cudaBoundaryModeTrap);
+  // CHECK: tail call void asm sideeffect "sust.b.a1d.v4.b16.trap [$0, {$2, $1}], {$3, $4, $5, $6};", "l,r,r,h,h,h,h"
+  __nv_tex_surf_handler("__isurf1DLayeredwrite_v2", &val, surf, x, layer, cudaBoundaryModeTrap);
+
+  // CHECK: %12 = tail call { i16, i16, i16, i16 } asm "suld.b.a2d.v4.b16.zero {$0, $1, $2, $3}, [$4, {$7, $5, $6, $6}];", "=h,=h,=h,=h,l,r,r,r"
+  __nv_tex_surf_handler("__isurf2DLayeredread", &val, surf, x, y, layer, cudaBoundaryModeZero);
+  // CHECK: tail call void asm sideeffect "sust.b.a2d.v4.b16.zero [$0, {$3, $1, $2, $2}], {$4, $5, $6, $7};", "l,r,r,r,h,h,h,h"
+  __nv_tex_surf_handler("__isurf2DLayeredwrite_v2", &val, surf, x, y, layer, cudaBoundaryModeZero);
+  // CHECK: %13 = tail call { i16, i16, i16, i16 } asm "suld.b.a2d.v4.b16.clamp {$0, $1, $2, $3}, [$4, {$7, $5, $6, $6}];", "=h,=h,=h,=h,l,r,r,r"
+  __nv_tex_surf_handler("__isurf2DLayeredread", &val, surf, x, y, layer, cudaBoundaryModeClamp);
+  // CHECK: tail call void asm sideeffect "sust.b.a2d.v4.b16.clamp [$0, {$3, $1, $2, $2}], {$4, $5, $6, $7};", "l,r,r,r,h,h,h,h"
+  __nv_tex_surf_handler("__isurf2DLayeredwrite_v2", &val, surf, x, y, layer, cudaBoundaryModeClamp);
+  // CHECK: %14 = tail call { i16, i16, i16, i16 } asm "suld.b.a2d.v4.b16.trap {$0, $1, $2, $3}, [$4, {$7, $5, $6, $6}];", "=h,=h,=h,=h,l,r,r,r"
+  __nv_tex_surf_handler("__isurf2DLayeredread", &val, surf, x, y, layer, cudaBoundaryModeTrap);
+  // CHECK: tail call void asm sideeffect "sust.b.a2d.v4.b16.trap [$0, {$3, $1, $2, $2}], {$4, $5, $6, $7};", "l,r,r,r,h,h,h,h"
+  __nv_tex_surf_handler("__isurf2DLayeredwrite_v2", &val, surf, x, y, layer, cudaBoundaryModeTrap);
+
+  // CHECK: %15 = tail call { i16, i16, i16, i16 } asm "suld.b.a2d.v4.b16.zero {$0, $1, $2, $3}, [$4, {$7, $5, $6, $6}];", "=h,=h,=h,=h,l,r,r,r"
+  __nv_tex_surf_handler("__isurfCubemapread", &val, surf, x, y, face, cudaBoundaryModeZero);
+  // CHECK: tail call void asm sideeffect "sust.b.a2d.v4.b16.zero [$0, {$3, $1, $2, $2}], {$4, $5, $6, $7};", "l,r,r,r,h,h,h,h"
+  __nv_tex_surf_handler("__isurfCubemapwrite_v2", &val, surf, x, y, face, cudaBoundaryModeZero);
+  // CHECK: %16 = tail call { i16, i16, i16, i16 } asm "suld.b.a2d.v4.b16.clamp {$0, $1, $2, $3}, [$4, {$7, $5, $6, $6}];", "=h,=h,=h,=h,l,r,r,r"
+  __nv_tex_surf_handler("__isurfCubemapread", &val, surf, x, y, face, cudaBoundaryModeClamp);
+  // CHECK: tail call void asm sideeffect "sust.b.a2d.v4.b16.clamp [$0, {$3, $1, $2, $2}], {$4, $5, $6, $7};", "l,r,r,r,h,h,h,h"
+  __nv_tex_surf_handler("__isurfCubemapwrite_v2", &val, surf, x, y, face, cudaBoundaryModeClamp);
+  // CHECK: %17 = tail call { i16, i16, i16, i16 } asm "suld.b.a2d.v4.b16.trap {$0, $1, $2, $3}, [$4, {$7, $5, $6, $6}];", "=h,=h,=h,=h,l,r,r,r"
+  __nv_tex_surf_handler("__isurfCubemapread", &val, surf, x, y, face, cudaBoundaryModeTrap);
+  // CHECK: tail call void asm sideeffect "sust.b.a2d.v4.b16.trap [$0, {$3, $1, $2, $2}], {$4, $5, $6, $7};", "l,r,r,r,h,h,h,h"
+  __nv_tex_surf_handler("__isurfCubemapwrite_v2", &val, surf, x, y, face, cudaBoundaryModeTrap);
+
+  // CHECK: %18 = tail call { i16, i16, i16, i16 } asm "suld.b.a2d.v4.b16.zero {$0, $1, $2, $3}, [$4, {$7, $5, $6, $6}];", "=h,=h,=h,=h,l,r,r,r"
+  __nv_tex_surf_handler("__isurfCubemapLayeredread", &val, surf, x, y, layerface, cudaBoundaryModeZero);
+  // CHECK: tail call void asm sideeffect "sust.b.a2d.v4.b16.zero [$0, {$3, $1, $2, $2}], {$4, $5, $6, $7};", "l,r,r,r,h,h,h,h"
+  __nv_tex_surf_handler("__isurfCubemapLayeredwrite_v2", &val, surf, x, y, layerface, cudaBoundaryModeZero);
+  // CHECK: %19 = tail call { i16, i16, i16, i16 } asm "suld.b.a2d.v4.b16.clamp {$0, $1, $2, $3}, [$4, {$7, $5, $6, $6}];", "=h,=h,=h,=h,l,r,r,r"
+  __nv_tex_surf_handler("__isurfCubemapLayeredread", &val, surf, x, y, layerface, cudaBoundaryModeClamp);
+  // CHECK: tail call void asm sideeffect "sust.b.a2d.v4.b16.clamp [$0, {$3, $1, $2, $2}], {$4, $5, $6, $7};", "l,r,r,r,h,h,h,h"
+  __nv_tex_surf_handler("__isurfCubemapLayeredwrite_v2", &val, surf, x, y, layerface, cudaBoundaryModeClamp);
+  // CHECK: %20 = tail call { i16, i16, i16, i16 } asm "suld.b.a2d.v4.b16.trap {$0, $1, $2, $3}, [$4, {$7, $5, $6, $6}];", "=h,=h,=h,=h,l,r,r,r"
+  __nv_tex_surf_handler("__isurfCubemapLayeredread", &val, surf, x, y, layerface, cudaBoundaryModeTrap);
+  // CHECK: tail call void asm sideeffect "sust.b.a2d.v4.b16.trap [$0, {$3, $1, $2, $2}], {$4, $5, $6, $7};", "l,r,r,r,h,h,h,h"
+  __nv_tex_surf_handler("__isurfCubemapLayeredwrite_v2", &val, surf, x, y, layerface, cudaBoundaryModeTrap);
+}
+
+__device__ void surfint4(cudaSurfaceObject_t surf, int x, int y, int z, int layer, int face, int layerface) {
+  int4 val;
+
+  // CHECK: %0 = tail call { i32, i32, i32, i32 } asm "suld.b.1d.v4.b32.zero {$0, $1, $2, $3}, [$4, {$5}];", "=r,=r,=r,=r,l,r"
+  __nv_tex_surf_handler("__isurf1Dread", &val, surf, x, cudaBoundaryModeZero);
+  // CHECK: tail call void asm sideeffect "sust.b.1d.v4.b32.zero [$0, {$1}], {$2, $3, $4, $5};", "l,r,r,r,r,r"
+  __nv_tex_surf_handler("__isurf1Dwrite_v2", &val, surf, x, cudaBoundaryModeZero);
+  // CHECK: %1 = tail call { i32, i32, i32, i32 } asm "suld.b.1d.v4.b32.clamp {$0, $1, $2, $3}, [$4, {$5}];", "=r,=r,=r,=r,l,r"
+  __nv_tex_surf_handler("__isurf1Dread", &val, surf, x, cudaBoundaryModeClamp);
+  // CHECK: tail call void asm sideeffect "sust.b.1d.v4.b32.clamp [$0, {$1}], {$2, $3, $4, $5};", "l,r,r,r,r,r"
+  __nv_tex_surf_handler("__isurf1Dwrite_v2", &val, surf, x, cudaBoundaryModeClamp);
+  // CHECK: %2 = tail call { i32, i32, i32, i32 } asm "suld.b.1d.v4.b32.trap {$0, $1, $2, $3}, [$4, {$5}];", "=r,=r,=r,=r,l,r"
+  __nv_tex_surf_handler("__isurf1Dread", &val, surf, x, cudaBoundaryModeTrap);
+  // CHECK: tail call void asm sideeffect "sust.b.1d.v4.b32.trap [$0, {$1}], {$2, $3, $4, $5};", "l,r,r,r,r,r"
+  __nv_tex_surf_handler("__isurf1Dwrite_v2", &val, surf, x, cudaBoundaryModeTrap);
+
+  // CHECK: %3 = tail call { i32, i32, i32, i32 } asm "suld.b.2d.v4.b32.zero {$0, $1, $2, $3}, [$4, {$5, $6}];", "=r,=r,=r,=r,l,r,r"
+  __nv_tex_surf_handler("__isurf2Dread", &val, surf, x, y, cudaBoundaryModeZero);
+  // CHECK: tail call void asm sideeffect "sust.b.2d.v4.b32.zero [$0, {$1, $2}], {$3, $4, $5, $6};", "l,r,r,r,r,r,r"
+  __nv_tex_surf_handler("__isurf2Dwrite_v2", &val, surf, x, y, cudaBoundaryModeZero);
+  // CHECK: %4 = tail call { i32, i32, i32, i32 } asm "suld.b.2d.v4.b32.clamp {$0, $1, $2, $3}, [$4, {$5, $6}];", "=r,=r,=r,=r,l,r,r"
+  __nv_tex_surf_handler("__isurf2Dread", &val, surf, x, y, cudaBoundaryModeClamp);
+  // CHECK: tail call void asm sideeffect "sust.b.2d.v4.b32.clamp [$0, {$1, $2}], {$3, $4, $5, $6};", "l,r,r,r,r,r,r"
+  __nv_tex_surf_handler("__isurf2Dwrite_v2", &val, surf, x, y, cudaBoundaryModeClamp);
+  // CHECK: %5 = tail call { i32, i32, i32, i32 } asm "suld.b.2d.v4.b32.trap {$0, $1, $2, $3}, [$4, {$5, $6}];", "=r,=r,=r,=r,l,r,r"
+  __nv_tex_surf_handler("__isurf2Dread", &val, surf, x, y, cudaBoundaryModeTrap);
+  // CHECK: tail call void asm sideeffect "sust.b.2d.v4.b32.trap [$0, {$1, $2}], {$3, $4, $5, $6};", "l,r,r,r,r,r,r"
+  __nv_tex_surf_handler("__isurf2Dwrite_v2", &val, surf, x, y, cudaBoundaryModeTrap);
+
+  // CHECK: %6 = tail call { i32, i32, i32, i32 } asm "suld.b.3d.v4.b32.zero {$0, $1, $2, $3}, [$4, {$5, $6, $7, $7}];", "=r,=r,=r,=r,l,r,r,r"
+  __nv_tex_surf_handler("__isurf3Dread", &val, surf, x, y, z, cudaBoundaryModeZero);
+  // CHECK: tail call void asm sideeffect "sust.b.3d.v4.b32.zero [$0, {$1, $2, $3, $3}], {$4, $5, $6, $7};", "l,r,r,r,r,r,r,r"
+  __nv_tex_surf_handler("__isurf3Dwrite_v2", &val, surf, x, y, z, cudaBoundaryModeZero);
+  // CHECK: %7 = tail call { i32, i32, i32, i32 } asm "suld.b.3d.v4.b32.clamp {$0, $1, $2, $3}, [$4, {$5, $6, $7, $7}];", "=r,=r,=r,=r,l,r,r,r"
+  __nv_tex_surf_handler("__isurf3Dread", &val, surf, x, y, z, cudaBoundaryModeClamp);
+  // CHECK: tail call void asm sideeffect "sust.b.3d.v4.b32.clamp [$0, {$1, $2, $3, $3}], {$4, $5, $6, $7};", "l,r,r,r,r,r,r,r"
+  __nv_tex_surf_handler("__isurf3Dwrite_v2", &val, surf, x, y, z, cudaBoundaryModeClamp);
+  // CHECK: %8 = tail call { i32, i32, i32, i32 } asm "suld.b.3d.v4.b32.trap {$0, $1, $2, $3}, [$4, {$5, $6, $7, $7}];", "=r,=r,=r,=r,l,r,r,r"
+  __nv_tex_surf_handler("__isurf3Dread", &val, surf, x, y, z, cudaBoundaryModeTrap);
+  // CHECK: tail call void asm sideeffect "sust.b.3d.v4.b32.trap [$0, {$1, $2, $3, $3}], {$4, $5, $6, $7};", "l,r,r,r,r,r,r,r"
+  __nv_tex_surf_handler("__isurf3Dwrite_v2", &val, surf, x, y, z, cudaBoundaryModeTrap);
+
+  // CHECK: %9 = tail call { i32, i32, i32, i32 } asm "suld.b.a1d.v4.b32.zero {$0, $1, $2, $3}, [$4, {$6, $5}];", "=r,=r,=r,=r,l,r,r"
+  __nv_tex_surf_handler("__isurf1DLayeredread", &val, surf, x, layer, cudaBoundaryModeZero);
+  // CHECK: tail call void asm sideeffect "sust.b.a1d.v4.b32.zero [$0, {$2, $1}], {$3, $4, $5, $6};", "l,r,r,r,r,r,r"
+  __nv_tex_surf_handler("__isurf1DLayeredwrite_v2", &val, surf, x, layer, cudaBoundaryModeZero);
+  // CHECK: %10 = tail call { i32, i32, i32, i32 } asm "suld.b.a1d.v4.b32.clamp {$0, $1, $2, $3}, [$4, {$6, $5}];", "=r,=r,=r,=r,l,r,r"
+  __nv_tex_surf_handler("__isurf1DLayeredread", &val, surf, x, layer, cudaBoundaryModeClamp);
+  // CHECK: tail call void asm sideeffect "sust.b.a1d.v4.b32.clamp [$0, {$2, $1}], {$3, $4, $5, $6};", "l,r,r,r,r,r,r"
+  __nv_tex_surf_handler("__isurf1DLayeredwrite_v2", &val, surf, x, layer, cudaBoundaryModeClamp);
+  // CHECK: %11 = tail call { i32, i32, i32, i32 } asm "suld.b.a1d.v4.b32.trap {$0, $1, $2, $3}, [$4, {$6, $5}];", "=r,=r,=r,=r,l,r,r"
+  __nv_tex_surf_handler("__isurf1DLayeredread", &val, surf, x, layer, cudaBoundaryModeTrap);
+  // CHECK: tail call void asm sideeffect "sust.b.a1d.v4.b32.trap [$0, {$2, $1}], {$3, $4, $5, $6};", "l,r,r,r,r,r,r"
+  __nv_tex_surf_handler("__isurf1DLayeredwrite_v2", &val, surf, x, layer, cudaBoundaryModeTrap);
+
+  // CHECK: %12 = tail call { i32, i32, i32, i32 } asm "suld.b.a2d.v4.b32.zero {$0, $1, $2, $3}, [$4, {$7, $5, $6, $6}];", "=r,=r,=r,=r,l,r,r,r"
+  __nv_tex_surf_handler("__isurf2DLayeredread", &val, surf, x, y, layer, cudaBoundaryModeZero);
+  // CHECK: tail call void asm sideeffect "sust.b.a2d.v4.b32.zero [$0, {$3, $1, $2, $2}], {$4, $5, $6, $7};", "l,r,r,r,r,r,r,r"
+  __nv_tex_surf_handler("__isurf2DLayeredwrite_v2", &val, surf, x, y, layer, cudaBoundaryModeZero);
+  // CHECK: %13 = tail call { i32, i32, i32, i32 } asm "suld.b.a2d.v4.b32.clamp {$0, $1, $2, $3}, [$4, {$7, $5, $6, $6}];", "=r,=r,=r,=r,l,r,r,r"
+  __nv_tex_surf_handler("__isurf2DLayeredread", &val, surf, x, y, layer, cudaBoundaryModeClamp);
+  // CHECK: tail call void asm sideeffect "sust.b.a2d.v4.b32.clamp [$0, {$3, $1, $2, $2}], {$4, $5, $6, $7};", "l,r,r,r,r,r,r,r"
+  __nv_tex_surf_handler("__isurf2DLayeredwrite_v2", &val, surf, x, y, layer, cudaBoundaryModeClamp);
+  // CHECK: %14 = tail call { i32, i32, i32, i32 } asm "suld.b.a2d.v4.b32.trap {$0, $1, $2, $3}, [$4, {$7, $5, $6, $6}];", "=r,=r,=r,=r,l,r,r,r"
+  __nv_tex_surf_handler("__isurf2DLayeredread", &val, surf, x, y, layer, cudaBoundaryModeTrap);
+  // CHECK: tail call void asm sideeffect "sust.b.a2d.v4.b32.trap [$0, {$3, $1, $2, $2}], {$4, $5, $6, $7};", "l,r,r,r,r,r,r,r"
+  __nv_tex_surf_handler("__isurf2DLayeredwrite_v2", &val, surf, x, y, layer, cudaBoundaryModeTrap);
+
+  // CHECK: %15 = tail call { i32, i32, i32, i32 } asm "suld.b.a2d.v4.b32.zero {$0, $1, $2, $3}, [$4, {$7, $5, $6, $6}];", "=r,=r,=r,=r,l,r,r,r"
+  __nv_tex_surf_handler("__isurfCubemapread", &val, surf, x, y, face, cudaBoundaryModeZero);
+  // CHECK: tail call void asm sideeffect "sust.b.a2d.v4.b32.zero [$0, {$3, $1, $2, $2}], {$4, $5, $6, $7};", "l,r,r,r,r,r,r,r"
+  __nv_tex_surf_handler("__isurfCubemapwrite_v2", &val, surf, x, y, face, cudaBoundaryModeZero);
+  // CHECK: %16 = tail call { i32, i32, i32, i32 } asm "suld.b.a2d.v4.b32.clamp {$0, $1, $2, $3}, [$4, {$7, $5, $6, $6}];", "=r,=r,=r,=r,l,r,r,r"
+  __nv_tex_surf_handler("__isurfCubemapread", &val, surf, x, y, face, cudaBoundaryModeClamp);
+  // CHECK: tail call void asm sideeffect "sust.b.a2d.v4.b32.clamp [$0, {$3, $1, $2, $2}], {$4, $5, $6, $7};", "l,r,r,r,r,r,r,r"
+  __nv_tex_surf_handler("__isurfCubemapwrite_v2", &val, surf, x, y, face, cudaBoundaryModeClamp);
+  // CHECK: %17 = tail call { i32, i32, i32, i32 } asm "suld.b.a2d.v4.b32.trap {$0, $1, $2, $3}, [$4, {$7, $5, $6, $6}];", "=r,=r,=r,=r,l,r,r,r"
+  __nv_tex_surf_handler("__isurfCubemapread", &val, surf, x, y, face, cudaBoundaryModeTrap);
+  // CHECK: tail call void asm sideeffect "sust.b.a2d.v4.b32.trap [$0, {$3, $1, $2, $2}], {$4, $5, $6, $7};", "l,r,r,r,r,r,r,r"
+  __nv_tex_surf_handler("__isurfCubemapwrite_v2", &val, surf, x, y, face, cudaBoundaryModeTrap);
+
+  // CHECK: %18 = tail call { i32, i32, i32, i32 } asm "suld.b.a2d.v4.b32.zero {$0, $1, $2, $3}, [$4, {$7, $5, $6, $6}];", "=r,=r,=r,=r,l,r,r,r"
+  __nv_tex_surf_handler("__isurfCubemapLayeredread", &val, surf, x, y, layerface, cudaBoundaryModeZero);
+  // CHECK: tail call void asm sideeffect "sust.b.a2d.v4.b32.zero [$0, {$3, $1, $2, $2}], {$4, $5, $6, $7};", "l,r,r,r,r,r,r,r"
+  __nv_tex_surf_handler("__isurfCubemapLayeredwrite_v2", &val, surf, x, y, layerface, cudaBoundaryModeZero);
+  // CHECK: %19 = tail call { i32, i32, i32, i32 } asm "suld.b.a2d.v4.b32.clamp {$0, $1, $2, $3}, [$4, {$7, $5, $6, $6}];", "=r,=r,=r,=r,l,r,r,r"
+  __nv_tex_surf_handler("__isurfCubemapLayeredread", &val, surf, x, y, layerface, cudaBoundaryModeClamp);
+  // CHECK: tail call void asm sideeffect "sust.b.a2d.v4.b32.clamp [$0, {$3, $1, $2, $2}], {$4, $5, $6, $7};", "l,r,r,r,r,r,r,r"
+  __nv_tex_surf_handler("__isurfCubemapLayeredwrite_v2", &val, surf, x, y, layerface, cudaBoundaryModeClamp);
+  // CHECK: %20 = tail call { i32, i32, i32, i32 } asm "suld.b.a2d.v4.b32.trap {$0, $1, $2, $3}, [$4, {$7, $5, $6, $6}];", "=r,=r,=r,=r,l,r,r,r"
+  __nv_tex_surf_handler("__isurfCubemapLayeredread", &val, surf, x, y, layerface, cudaBoundaryModeTrap);
+  // CHECK: tail call void asm sideeffect "sust.b.a2d.v4.b32.trap [$0, {$3, $1, $2, $2}], {$4, $5, $6, $7};", "l,r,r,r,r,r,r,r"
+  __nv_tex_surf_handler("__isurfCubemapLayeredwrite_v2", &val, surf, x, y, layerface, cudaBoundaryModeTrap);
+}
+
+__device__ void surfuint4(cudaSurfaceObject_t surf, int x, int y, int z, int layer, int face, int layerface) {
+  uint4 val;
+
+  // CHECK: %0 = tail call { i32, i32, i32, i32 } asm "suld.b.1d.v4.b32.zero {$0, $1, $2, $3}, [$4, {$5}];", "=r,=r,=r,=r,l,r"
+  __nv_tex_surf_handler("__isurf1Dread", &val, surf, x, cudaBoundaryModeZero);
+  // CHECK: tail call void asm sideeffect "sust.b.1d.v4.b32.zero [$0, {$1}], {$2, $3, $4, $5};", "l,r,r,r,r,r"
+  __nv_tex_surf_handler("__isurf1Dwrite_v2", &val, surf, x, cudaBoundaryModeZero);
+  // CHECK: %1 = tail call { i32, i32, i32, i32 } asm "suld.b.1d.v4.b32.clamp {$0, $1, $2, $3}, [$4, {$5}];", "=r,=r,=r,=r,l,r"
+  __nv_tex_surf_handler("__isurf1Dread", &val, surf, x, cudaBoundaryModeClamp);
+  // CHECK: tail call void asm sideeffect "sust.b.1d.v4.b32.clamp [$0, {$1}], {$2, $3, $4, $5};", "l,r,r,r,r,r"
+  __nv_tex_surf_handler("__isurf1Dwrite_v2", &val, surf, x, cudaBoundaryModeClamp);
+  // CHECK: %2 = tail call { i32, i32, i32, i32 } asm "suld.b.1d.v4.b32.trap {$0, $1, $2, $3}, [$4, {$5}];", "=r,=r,=r,=r,l,r"
+  __nv_tex_surf_handler("__isurf1Dread", &val, surf, x, cudaBoundaryModeTrap);
+  // CHECK: tail call void asm sideeffect "sust.b.1d.v4.b32.trap [$0, {$1}], {$2, $3, $4, $5};", "l,r,r,r,r,r"
+  __nv_tex_surf_handler("__isurf1Dwrite_v2", &val, surf, x, cudaBoundaryModeTrap);
+
+  // CHECK: %3 = tail call { i32, i32, i32, i32 } asm "suld.b.2d.v4.b32.zero {$0, $1, $2, $3}, [$4, {$5, $6}];", "=r,=r,=r,=r,l,r,r"
+  __nv_tex_surf_handler("__isurf2Dread", &val, surf, x, y, cudaBoundaryModeZero);
+  // CHECK: tail call void asm sideeffect "sust.b.2d.v4.b32.zero [$0, {$1, $2}], {$3, $4, $5, $6};", "l,r,r,r,r,r,r"
+  __nv_tex_surf_handler("__isurf2Dwrite_v2", &val, surf, x, y, cudaBoundaryModeZero);
+  // CHECK: %4 = tail call { i32, i32, i32, i32 } asm "suld.b.2d.v4.b32.clamp {$0, $1, $2, $3}, [$4, {$5, $6}];", "=r,=r,=r,=r,l,r,r"
+  __nv_tex_surf_handler("__isurf2Dread", &val, surf, x, y, cudaBoundaryModeClamp);
+  // CHECK: tail call void asm sideeffect "sust.b.2d.v4.b32.clamp [$0, {$1, $2}], {$3, $4, $5, $6};", "l,r,r,r,r,r,r"
+  __nv_tex_surf_handler("__isurf2Dwrite_v2", &val, surf, x, y, cudaBoundaryModeClamp);
+  // CHECK: %5 = tail call { i32, i32, i32, i32 } asm "suld.b.2d.v4.b32.trap {$0, $1, $2, $3}, [$4, {$5, $6}];", "=r,=r,=r,=r,l,r,r"
+  __nv_tex_surf_handler("__isurf2Dread", &val, surf, x, y, cudaBoundaryModeTrap);
+  // CHECK: tail call void asm sideeffect "sust.b.2d.v4.b32.trap [$0, {$1, $2}], {$3, $4, $5, $6};", "l,r,r,r,r,r,r"
+  __nv_tex_surf_handler("__isurf2Dwrite_v2", &val, surf, x, y, cudaBoundaryModeTrap);
+
+  // CHECK: %6 = tail call { i32, i32, i32, i32 } asm "suld.b.3d.v4.b32.zero {$0, $1, $2, $3}, [$4, {$5, $6, $7, $7}];", "=r,=r,=r,=r,l,r,r,r"
+  __nv_tex_surf_handler("__isurf3Dread", &val, surf, x, y, z, cudaBoundaryModeZero);
+  // CHECK: tail call void asm sideeffect "sust.b.3d.v4.b32.zero [$0, {$1, $2, $3, $3}], {$4, $5, $6, $7};", "l,r,r,r,r,r,r,r"
+  __nv_tex_surf_handler("__isurf3Dwrite_v2", &val, surf, x, y, z, cudaBoundaryModeZero);
+  // CHECK: %7 = tail call { i32, i32, i32, i32 } asm "suld.b.3d.v4.b32.clamp {$0, $1, $2, $3}, [$4, {$5, $6, $7, $7}];", "=r,=r,=r,=r,l,r,r,r"
+  __nv_tex_surf_handler("__isurf3Dread", &val, surf, x, y, z, cudaBoundaryModeClamp);
+  // CHECK: tail call void asm sideeffect "sust.b.3d.v4.b32.clamp [$0, {$1, $2, $3, $3}], {$4, $5, $6, $7};", "l,r,r,r,r,r,r,r"
+  __nv_tex_surf_handler("__isurf3Dwrite_v2", &val, surf, x, y, z, cudaBoundaryModeClamp);
+  // CHECK: %8 = tail call { i32, i32, i32, i32 } asm "suld.b.3d.v4.b32.trap {$0, $1, $2, $3}, [$4, {$5, $6, $7, $7}];", "=r,=r,=r,=r,l,r,r,r"
+  __nv_tex_surf_handler("__isurf3Dread", &val, surf, x, y, z, cudaBoundaryModeTrap);
+  // CHECK: tail call void asm sideeffect "sust.b.3d.v4.b32.trap [$0, {$1, $2, $3, $3}], {$4, $5, $6, $7};", "l,r,r,r,r,r,r,r"
+  __nv_tex_surf_handler("__isurf3Dwrite_v2", &val, surf, x, y, z, cudaBoundaryModeTrap);
+
+  // CHECK: %9 = tail call { i32, i32, i32, i32 } asm "suld.b.a1d.v4.b32.zero {$0, $1, $2, $3}, [$4, {$6, $5}];", "=r,=r,=r,=r,l,r,r"
+  __nv_tex_surf_handler("__isurf1DLayeredread", &val, surf, x, layer, cudaBoundaryModeZero);
+  // CHECK: tail call void asm sideeffect "sust.b.a1d.v4.b32.zero [$0, {$2, $1}], {$3, $4, $5, $6};", "l,r,r,r,r,r,r"
+  __nv_tex_surf_handler("__isurf1DLayeredwrite_v2", &val, surf, x, layer, cudaBoundaryModeZero);
+  // CHECK: %10 = tail call { i32, i32, i32, i32 } asm "suld.b.a1d.v4.b32.clamp {$0, $1, $2, $3}, [$4, {$6, $5}];", "=r,=r,=r,=r,l,r,r"
+  __nv_tex_surf_handler("__isurf1DLayeredread", &val, surf, x, layer, cudaBoundaryModeClamp);
+  // CHECK: tail call void asm sideeffect "sust.b.a1d.v4.b32.clamp [$0, {$2, $1}], {$3, $4, $5, $6};", "l,r,r,r,r,r,r"
+  __nv_tex_surf_handler("__isurf1DLayeredwrite_v2", &val, surf, x, layer, cudaBoundaryModeClamp);
+  // CHECK: %11 = tail call { i32, i32, i32, i32 } asm "suld.b.a1d.v4.b32.trap {$0, $1, $2, $3}, [$4, {$6, $5}];", "=r,=r,=r,=r,l,r,r"
+  __nv_tex_surf_handler("__isurf1DLayeredread", &val, surf, x, layer, cudaBoundaryModeTrap);
+  // CHECK: tail call void asm sideeffect "sust.b.a1d.v4.b32.trap [$0, {$2, $1}], {$3, $4, $5, $6};", "l,r,r,r,r,r,r"
+  __nv_tex_surf_handler("__isurf1DLayeredwrite_v2", &val, surf, x, layer, cudaBoundaryModeTrap);
+
+  // CHECK: %12 = tail call { i32, i32, i32, i32 } asm "suld.b.a2d.v4.b32.zero {$0, $1, $2, $3}, [$4, {$7, $5, $6, $6}];", "=r,=r,=r,=r,l,r,r,r"
+  __nv_tex_surf_handler("__isurf2DLayeredread", &val, surf, x, y, layer, cudaBoundaryModeZero);
+  // CHECK: tail call void asm sideeffect "sust.b.a2d.v4.b32.zero [$0, {$3, $1, $2, $2}], {$4, $5, $6, $7};", "l,r,r,r,r,r,r,r"
+  __nv_tex_surf_handler("__isurf2DLayeredwrite_v2", &val, surf, x, y, layer, cudaBoundaryModeZero);
+  // CHECK: %13 = tail call { i32, i32, i32, i32 } asm "suld.b.a2d.v4.b32.clamp {$0, $1, $2, $3}, [$4, {$7, $5, $6, $6}];", "=r,=r,=r,=r,l,r,r,r"
+  __nv_tex_surf_handler("__isurf2DLayeredread", &val, surf, x, y, layer, cudaBoundaryModeClamp);
+  // CHECK: tail call void asm sideeffect "sust.b.a2d.v4.b32.clamp [$0, {$3, $1, $2, $2}], {$4, $5, $6, $7};", "l,r,r,r,r,r,r,r"
+  __nv_tex_surf_handler("__isurf2DLayeredwrite_v2", &val, surf, x, y, layer, cudaBoundaryModeClamp);
+  // CHECK: %14 = tail call { i32, i32, i32, i32 } asm "suld.b.a2d.v4.b32.trap {$0, $1, $2, $3}, [$4, {$7, $5, $6, $6}];", "=r,=r,=r,=r,l,r,r,r"
+  __nv_tex_surf_handler("__isurf2DLayeredread", &val, surf, x, y, layer, cudaBoundaryModeTrap);
+  // CHECK: tail call void asm sideeffect "sust.b.a2d.v4.b32.trap [$0, {$3, $1, $2, $2}], {$4, $5, $6, $7};", "l,r,r,r,r,r,r,r"
+  __nv_tex_surf_handler("__isurf2DLayeredwrite_v2", &val, surf, x, y, layer, cudaBoundaryModeTrap);
+
+  // CHECK: %15 = tail call { i32, i32, i32, i32 } asm "suld.b.a2d.v4.b32.zero {$0, $1, $2, $3}, [$4, {$7, $5, $6, $6}];", "=r,=r,=r,=r,l,r,r,r"
+  __nv_tex_surf_handler("__isurfCubemapread", &val, surf, x, y, face, cudaBoundaryModeZero);
+  // CHECK: tail call void asm sideeffect "sust.b.a2d.v4.b32.zero [$0, {$3, $1, $2, $2}], {$4, $5, $6, $7};", "l,r,r,r,r,r,r,r"
+  __nv_tex_surf_handler("__isurfCubemapwrite_v2", &val, surf, x, y, face, cudaBoundaryModeZero);
+  // CHECK: %16 = tail call { i32, i32, i32, i32 } asm "suld.b.a2d.v4.b32.clamp {$0, $1, $2, $3}, [$4, {$7, $5, $6, $6}];", "=r,=r,=r,=r,l,r,r,r"
+  __nv_tex_surf_handler("__isurfCubemapread", &val, surf, x, y, face, cudaBoundaryModeClamp);
+  // CHECK: tail call void asm sideeffect "sust.b.a2d.v4.b32.clamp [$0, {$3, $1, $2, $2}], {$4, $5, $6, $7};", "l,r,r,r,r,r,r,r"
+  __nv_tex_surf_handler("__isurfCubemapwrite_v2", &val, surf, x, y, face, cudaBoundaryModeClamp);
+  // CHECK: %17 = tail call { i32, i32, i32, i32 } asm "suld.b.a2d.v4.b32.trap {$0, $1, $2, $3}, [$4, {$7, $5, $6, $6}];", "=r,=r,=r,=r,l,r,r,r"
+  __nv_tex_surf_handler("__isurfCubemapread", &val, surf, x, y, face, cudaBoundaryModeTrap);
+  // CHECK: tail call void asm sideeffect "sust.b.a2d.v4.b32.trap [$0, {$3, $1, $2, $2}], {$4, $5, $6, $7};", "l,r,r,r,r,r,r,r"
+  __nv_tex_surf_handler("__isurfCubemapwrite_v2", &val, surf, x, y, face, cudaBoundaryModeTrap);
+
+  // CHECK: %18 = tail call { i32, i32, i32, i32 } asm "suld.b.a2d.v4.b32.zero {$0, $1, $2, $3}, [$4, {$7, $5, $6, $6}];", "=r,=r,=r,=r,l,r,r,r"
+  __nv_tex_surf_handler("__isurfCubemapLayeredread", &val, surf, x, y, layerface, cudaBoundaryModeZero);
+  // CHECK: tail call void asm sideeffect "sust.b.a2d.v4.b32.zero [$0, {$3, $1, $2, $2}], {$4, $5, $6, $7};", "l,r,r,r,r,r,r,r"
+  __nv_tex_surf_handler("__isurfCubemapLayeredwrite_v2", &val, surf, x, y, layerface, cudaBoundaryModeZero);
+  // CHECK: %19 = tail call { i32, i32, i32, i32 } asm "suld.b.a2d.v4.b32.clamp {$0, $1, $2, $3}, [$4, {$7, $5, $6, $6}];", "=r,=r,=r,=r,l,r,r,r"
+  __nv_tex_surf_handler("__isurfCubemapLayeredread", &val, surf, x, y, layerface, cudaBoundaryModeClamp);
+  // CHECK: tail call void asm sideeffect "sust.b.a2d.v4.b32.clamp [$0, {$3, $1, $2, $2}], {$4, $5, $6, $7};", "l,r,r,r,r,r,r,r"
+  __nv_tex_surf_handler("__isurfCubemapLayeredwrite_v2", &val, surf, x, y, layerface, cudaBoundaryModeClamp);
+  // CHECK: %20 = tail call { i32, i32, i32, i32 } asm "suld.b.a2d.v4.b32.trap {$0, $1, $2, $3}, [$4, {$7, $5, $6, $6}];", "=r,=r,=r,=r,l,r,r,r"
+  __nv_tex_surf_handler("__isurfCubemapLayeredread", &val, surf, x, y, layerface, cudaBoundaryModeTrap);
+  // CHECK: tail call void asm sideeffect "sust.b.a2d.v4.b32.trap [$0, {$3, $1, $2, $2}], {$4, $5, $6, $7};", "l,r,r,r,r,r,r,r"
+  __nv_tex_surf_handler("__isurfCubemapLayeredwrite_v2", &val, surf, x, y, layerface, cudaBoundaryModeTrap);
+}
+
+__device__ void surffloat4(cudaSurfaceObject_t surf, int x, int y, int z, int layer, int face, int layerface) {
+  float4 val;
+
+  // CHECK: %0 = tail call contract { float, float, float, float } asm "suld.b.1d.v4.b32.zero {$0, $1, $2, $3}, [$4, {$5}];", "=r,=r,=r,=r,l,r"
+  __nv_tex_surf_handler("__isurf1Dread", &val, surf, x, cudaBoundaryModeZero);
+  // CHECK: tail call void asm sideeffect "sust.b.1d.v4.b32.zero [$0, {$1}], {$2, $3, $4, $5};", "l,r,r,r,r,r"
+  __nv_tex_surf_handler("__isurf1Dwrite_v2", &val, surf, x, cudaBoundaryModeZero);
+  // CHECK: %1 = tail call contract { float, float, float, float } asm "suld.b.1d.v4.b32.clamp {$0, $1, $2, $3}, [$4, {$5}];", "=r,=r,=r,=r,l,r"
+  __nv_tex_surf_handler("__isurf1Dread", &val, surf, x, cudaBoundaryModeClamp);
+  // CHECK: tail call void asm sideeffect "sust.b.1d.v4.b32.clamp [$0, {$1}], {$2, $3, $4, $5};", "l,r,r,r,r,r"
+  __nv_tex_surf_handler("__isurf1Dwrite_v2", &val, surf, x, cudaBoundaryModeClamp);
+  // CHECK: %2 = tail call contract { float, float, float, float } asm "suld.b.1d.v4.b32.trap {$0, $1, $2, $3}, [$4, {$5}];", "=r,=r,=r,=r,l,r"
+  __nv_tex_surf_handler("__isurf1Dread", &val, surf, x, cudaBoundaryModeTrap);
+  // CHECK: tail call void asm sideeffect "sust.b.1d.v4.b32.trap [$0, {$1}], {$2, $3, $4, $5};", "l,r,r,r,r,r"
+  __nv_tex_surf_handler("__isurf1Dwrite_v2", &val, surf, x, cudaBoundaryModeTrap);
+
+  // CHECK: %3 = tail call contract { float, float, float, float } asm "suld.b.2d.v4.b32.zero {$0, $1, $2, $3}, [$4, {$5, $6}];", "=r,=r,=r,=r,l,r,r"
+  __nv_tex_surf_handler("__isurf2Dread", &val, surf, x, y, cudaBoundaryModeZero);
+  // CHECK: tail call void asm sideeffect "sust.b.2d.v4.b32.zero [$0, {$1, $2}], {$3, $4, $5, $6};", "l,r,r,r,r,r,r"
+  __nv_tex_surf_handler("__isurf2Dwrite_v2", &val, surf, x, y, cudaBoundaryModeZero);
+  // CHECK: %4 = tail call contract { float, float, float, float } asm "suld.b.2d.v4.b32.clamp {$0, $1, $2, $3}, [$4, {$5, $6}];", "=r,=r,=r,=r,l,r,r"
+  __nv_tex_surf_handler("__isurf2Dread", &val, surf, x, y, cudaBoundaryModeClamp);
+  // CHECK: tail call void asm sideeffect "sust.b.2d.v4.b32.clamp [$0, {$1, $2}], {$3, $4, $5, $6};", "l,r,r,r,r,r,r"
+  __nv_tex_surf_handler("__isurf2Dwrite_v2", &val, surf, x, y, cudaBoundaryModeClamp);
+  // CHECK: %5 = tail call contract { float, float, float, float } asm "suld.b.2d.v4.b32.trap {$0, $1, $2, $3}, [$4, {$5, $6}];", "=r,=r,=r,=r,l,r,r"
+  __nv_tex_surf_handler("__isurf2Dread", &val, surf, x, y, cudaBoundaryModeTrap);
+  // CHECK: tail call void asm sideeffect "sust.b.2d.v4.b32.trap [$0, {$1, $2}], {$3, $4, $5, $6};", "l,r,r,r,r,r,r"
+  __nv_tex_surf_handler("__isurf2Dwrite_v2", &val, surf, x, y, cudaBoundaryModeTrap);
+
+  // CHECK: %6 = tail call contract { float, float, float, float } asm "suld.b.3d.v4.b32.zero {$0, $1, $2, $3}, [$4, {$5, $6, $7, $7}];", "=r,=r,=r,=r,l,r,r,r"
+  __nv_tex_surf_handler("__isurf3Dread", &val, surf, x, y, z, cudaBoundaryModeZero);
+  // CHECK: tail call void asm sideeffect "sust.b.3d.v4.b32.zero [$0, {$1, $2, $3, $3}], {$4, $5, $6, $7};", "l,r,r,r,r,r,r,r"
+  __nv_tex_surf_handler("__isurf3Dwrite_v2", &val, surf, x, y, z, cudaBoundaryModeZero);
+  // CHECK: %7 = tail call contract { float, float, float, float } asm "suld.b.3d.v4.b32.clamp {$0, $1, $2, $3}, [$4, {$5, $6, $7, $7}];", "=r,=r,=r,=r,l,r,r,r"
+  __nv_tex_surf_handler("__isurf3Dread", &val, surf, x, y, z, cudaBoundaryModeClamp);
+  // CHECK: tail call void asm sideeffect "sust.b.3d.v4.b32.clamp [$0, {$1, $2, $3, $3}], {$4, $5, $6, $7};", "l,r,r,r,r,r,r,r"
+  __nv_tex_surf_handler("__isurf3Dwrite_v2", &val, surf, x, y, z, cudaBoundaryModeClamp);
+  // CHECK: %8 = tail call contract { float, float, float, float } asm "suld.b.3d.v4.b32.trap {$0, $1, $2, $3}, [$4, {$5, $6, $7, $7}];", "=r,=r,=r,=r,l,r,r,r"
+  __nv_tex_surf_handler("__isurf3Dread", &val, surf, x, y, z, cudaBoundaryModeTrap);
+  // CHECK: tail call void asm sideeffect "sust.b.3d.v4.b32.trap [$0, {$1, $2, $3, $3}], {$4, $5, $6, $7};", "l,r,r,r,r,r,r,r"
+  __nv_tex_surf_handler("__isurf3Dwrite_v2", &val, surf, x, y, z, cudaBoundaryModeTrap);
+
+  // CHECK: %9 = tail call contract { float, float, float, float } asm "suld.b.a1d.v4.b32.zero {$0, $1, $2, $3}, [$4, {$6, $5}];", "=r,=r,=r,=r,l,r,r"
+  __nv_tex_surf_handler("__isurf1DLayeredread", &val, surf, x, layer, cudaBoundaryModeZero);
+  // CHECK: tail call void asm sideeffect "sust.b.a1d.v4.b32.zero [$0, {$2, $1}], {$3, $4, $5, $6};", "l,r,r,r,r,r,r"
+  __nv_tex_surf_handler("__isurf1DLayeredwrite_v2", &val, surf, x, layer, cudaBoundaryModeZero);
+  // CHECK: %10 = tail call contract { float, float, float, float } asm "suld.b.a1d.v4.b32.clamp {$0, $1, $2, $3}, [$4, {$6, $5}];", "=r,=r,=r,=r,l,r,r"
+  __nv_tex_surf_handler("__isurf1DLayeredread", &val, surf, x, layer, cudaBoundaryModeClamp);
+  // CHECK: tail call void asm sideeffect "sust.b.a1d.v4.b32.clamp [$0, {$2, $1}], {$3, $4, $5, $6};", "l,r,r,r,r,r,r"
+  __nv_tex_surf_handler("__isurf1DLayeredwrite_v2", &val, surf, x, layer, cudaBoundaryModeClamp);
+  // CHECK: %11 = tail call contract { float, float, float, float } asm "suld.b.a1d.v4.b32.trap {$0, $1, $2, $3}, [$4, {$6, $5}];", "=r,=r,=r,=r,l,r,r"
+  __nv_tex_surf_handler("__isurf1DLayeredread", &val, surf, x, layer, cudaBoundaryModeTrap);
+  // CHECK: tail call void asm sideeffect "sust.b.a1d.v4.b32.trap [$0, {$2, $1}], {$3, $4, $5, $6};", "l,r,r,r,r,r,r"
+  __nv_tex_surf_handler("__isurf1DLayeredwrite_v2", &val, surf, x, layer, cudaBoundaryModeTrap);
+
+  // CHECK: %12 = tail call contract { float, float, float, float } asm "suld.b.a2d.v4.b32.zero {$0, $1, $2, $3}, [$4, {$7, $5, $6, $6}];", "=r,=r,=r,=r,l,r,r,r"
+  __nv_tex_surf_handler("__isurf2DLayeredread", &val, surf, x, y, layer, cudaBoundaryModeZero);
+  // CHECK: tail call void asm sideeffect "sust.b.a2d.v4.b32.zero [$0, {$3, $1, $2, $2}], {$4, $5, $6, $7};", "l,r,r,r,r,r,r,r"
+  __nv_tex_surf_handler("__isurf2DLayeredwrite_v2", &val, surf, x, y, layer, cudaBoundaryModeZero);
+  // CHECK: %13 = tail call contract { float, float, float, float } asm "suld.b.a2d.v4.b32.clamp {$0, $1, $2, $3}, [$4, {$7, $5, $6, $6}];", "=r,=r,=r,=r,l,r,r,r"
+  __nv_tex_surf_handler("__isurf2DLayeredread", &val, surf, x, y, layer, cudaBoundaryModeClamp);
+  // CHECK: tail call void asm sideeffect "sust.b.a2d.v4.b32.clamp [$0, {$3, $1, $2, $2}], {$4, $5, $6, $7};", "l,r,r,r,r,r,r,r"
+  __nv_tex_surf_handler("__isurf2DLayeredwrite_v2", &val, surf, x, y, layer, cudaBoundaryModeClamp);
+  // CHECK: %14 = tail call contract { float, float, float, float } asm "suld.b.a2d.v4.b32.trap {$0, $1, $2, $3}, [$4, {$7, $5, $6, $6}];", "=r,=r,=r,=r,l,r,r,r"
+  __nv_tex_surf_handler("__isurf2DLayeredread", &val, surf, x, y, layer, cudaBoundaryModeTrap);
+  // CHECK: tail call void asm sideeffect "sust.b.a2d.v4.b32.trap [$0, {$3, $1, $2, $2}], {$4, $5, $6, $7};", "l,r,r,r,r,r,r,r"
+  __nv_tex_surf_handler("__isurf2DLayeredwrite_v2", &val, surf, x, y, layer, cudaBoundaryModeTrap);
+
+  // CHECK: %15 = tail call contract { float, float, float, float } asm "suld.b.a2d.v4.b32.zero {$0, $1, $2, $3}, [$4, {$7, $5, $6, $6}];", "=r,=r,=r,=r,l,r,r,r"
+  __nv_tex_surf_handler("__isurfCubemapread", &val, surf, x, y, face, cudaBoundaryModeZero);
+  // CHECK: tail call void asm sideeffect "sust.b.a2d.v4.b32.zero [$0, {$3, $1, $2, $2}], {$4, $5, $6, $7};", "l,r,r,r,r,r,r,r"
+  __nv_tex_surf_handler("__isurfCubemapwrite_v2", &val, surf, x, y, face, cudaBoundaryModeZero);
+  // CHECK: %16 = tail call contract { float, float, float, float } asm "suld.b.a2d.v4.b32.clamp {$0, $1, $2, $3}, [$4, {$7, $5, $6, $6}];", "=r,=r,=r,=r,l,r,r,r"
+  __nv_tex_surf_handler("__isurfCubemapread", &val, surf, x, y, face, cudaBoundaryModeClamp);
+  // CHECK: tail call void asm sideeffect "sust.b.a2d.v4.b32.clamp [$0, {$3, $1, $2, $2}], {$4, $5, $6, $7};", "l,r,r,r,r,r,r,r"
+  __nv_tex_surf_handler("__isurfCubemapwrite_v2", &val, surf, x, y, face, cudaBoundaryModeClamp);
+  // CHECK: %17 = tail call contract { float, float, float, float } asm "suld.b.a2d.v4.b32.trap {$0, $1, $2, $3}, [$4, {$7, $5, $6, $6}];", "=r,=r,=r,=r,l,r,r,r"
+  __nv_tex_surf_handler("__isurfCubemapread", &val, surf, x, y, face, cudaBoundaryModeTrap);
+  // CHECK: tail call void asm sideeffect "sust.b.a2d.v4.b32.trap [$0, {$3, $1, $2, $2}], {$4, $5, $6, $7};", "l,r,r,r,r,r,r,r"
+  __nv_tex_surf_handler("__isurfCubemapwrite_v2", &val, surf, x, y, face, cudaBoundaryModeTrap);
+
+  // CHECK: %18 = tail call contract { float, float, float, float } asm "suld.b.a2d.v4.b32.zero {$0, $1, $2, $3}, [$4, {$7, $5, $6, $6}];", "=r,=r,=r,=r,l,r,r,r"
+  __nv_tex_surf_handler("__isurfCubemapLayeredread", &val, surf, x, y, layerface, cudaBoundaryModeZero);
+  // CHECK: tail call void asm sideeffect "sust.b.a2d.v4.b32.zero [$0, {$3, $1, $2, $2}], {$4, $5, $6, $7};", "l,r,r,r,r,r,r,r"
+  __nv_tex_surf_handler("__isurfCubemapLayeredwrite_v2", &val, surf, x, y, layerface, cudaBoundaryModeZero);
+  // CHECK: %19 = tail call contract { float, float, float, float } asm "suld.b.a2d.v4.b32.clamp {$0, $1, $2, $3}, [$4, {$7, $5, $6, $6}];", "=r,=r,=r,=r,l,r,r,r"
+  __nv_tex_surf_handler("__isurfCubemapLayeredread", &val, surf, x, y, layerface, cudaBoundaryModeClamp);
+  // CHECK: tail call void asm sideeffect "sust.b.a2d.v4.b32.clamp [$0, {$3, $1, $2, $2}], {$4, $5, $6, $7};", "l,r,r,r,r,r,r,r"
+  __nv_tex_surf_handler("__isurfCubemapLayeredwrite_v2", &val, surf, x, y, layerface, cudaBoundaryModeClamp);
+  // CHECK: %20 = tail call contract { float, float, float, float } asm "suld.b.a2d.v4.b32.trap {$0, $1, $2, $3}, [$4, {$7, $5, $6, $6}];", "=r,=r,=r,=r,l,r,r,r"
+  __nv_tex_surf_handler("__isurfCubemapLayeredread", &val, surf, x, y, layerface, cudaBoundaryModeTrap);
+  // CHECK: tail call void asm sideeffect "sust.b.a2d.v4.b32.trap [$0, {$3, $1, $2, $2}], {$4, $5, $6, $7};", "l,r,r,r,r,r,r,r"
+  __nv_tex_surf_handler("__isurfCubemapLayeredwrite_v2", &val, surf, x, y, layerface, cudaBoundaryModeTrap);
+}

>From d8923194c57cc03fde0e70026712c19ca00a7a60 Mon Sep 17 00:00:00 2001
From: Austin Schuh <austin.linux at gmail.com>
Date: Sat, 29 Mar 2025 19:27:04 -0700
Subject: [PATCH 6/7] Clang format

---
 .../Headers/__clang_cuda_runtime_wrapper.h    |   2 +-
 .../Headers/__clang_cuda_texture_intrinsics.h | 410 +++++++++---------
 2 files changed, 215 insertions(+), 197 deletions(-)

diff --git a/clang/lib/Headers/__clang_cuda_runtime_wrapper.h b/clang/lib/Headers/__clang_cuda_runtime_wrapper.h
index 8182c961ec32f..44934ba2c2d67 100644
--- a/clang/lib/Headers/__clang_cuda_runtime_wrapper.h
+++ b/clang/lib/Headers/__clang_cuda_runtime_wrapper.h
@@ -384,9 +384,9 @@ __host__ __device__ void __nv_tex_surf_handler(const char *name, T *ptr,
 // will continue to fail as it does now.
 #endif // CUDA_VERSION
 #endif // __cplusplus >= 201103L && CUDA_VERSION >= 9000
+#include "surface_indirect_functions.h"
 #include "texture_fetch_functions.h"
 #include "texture_indirect_functions.h"
-#include "surface_indirect_functions.h"
 
 // Restore state of __CUDA_ARCH__ and __THROW we had on entry.
 #pragma pop_macro("__CUDA_ARCH__")
diff --git a/clang/lib/Headers/__clang_cuda_texture_intrinsics.h b/clang/lib/Headers/__clang_cuda_texture_intrinsics.h
index 618ac70eefe99..db682f0df43a1 100644
--- a/clang/lib/Headers/__clang_cuda_texture_intrinsics.h
+++ b/clang/lib/Headers/__clang_cuda_texture_intrinsics.h
@@ -104,7 +104,6 @@
 #pragma push_macro("__3DV2");
 #pragma push_macro("__3DV4");
 
-
 // Put all functions into anonymous namespace so they have internal linkage.
 // The device-only function here must be internal in order to avoid ODR
 // violations in case they are used from the files compiled with
@@ -250,14 +249,13 @@ struct __texture_op_tag {};
 struct __surface_op_tag {};
 
 // Template specialization to determine operation type based on tag value
-template <class __op>
-struct __op_type_traits {
+template <class __op> struct __op_type_traits {
   using type = __texture_op_tag;
 };
 
 // Specialize for known surface operation tags
-#define __OP_TYPE_SURFACE(__op) \
-    template <> struct __op_type_traits<__op> { using type = __surface_op_tag; };
+#define __OP_TYPE_SURFACE(__op)                                                \
+  template <> struct __op_type_traits<__op> { using type = __surface_op_tag; };
 
 // Classes that implement specific texture ops.
 template <class __op> struct __tex_fetch_v4;
@@ -722,10 +720,11 @@ template <class __DestT, class __SrcT> struct __convert {
   }
 };
 
-// There are a couple of layers here.  First, __op_type_traits is used to dispatch to either surface write calls, or to
-// the texture read calls.
+// There are a couple of layers here.  First, __op_type_traits is used to
+// dispatch to either surface write calls, or to the texture read calls.
 //
-// Then, that dispatches to __tex_fetch_impl below, which dispatches by both tag and datatype to the appropriate
+// Then, that dispatches to __tex_fetch_impl below, which dispatches by both tag
+// and datatype to the appropriate
 // __surf_read_write_v2.
 // TODO(austin): Do the reads too.
 
@@ -745,95 +744,105 @@ __OP_TYPE_SURFACE(__ID("__isurf2DLayeredwrite_v2"));
 __OP_TYPE_SURFACE(__ID("__isurfCubemapwrite_v2"));
 __OP_TYPE_SURFACE(__ID("__isurfCubemapLayeredwrite_v2"));
 
-template <class __op, typename __type>
-struct __surf_read_write_v2;
-
-// For the various write calls, we need to be able to generate variations with different IDs, different numbers of
-// arguments, and different numbers of outputs.
-
-#define __SURF_WRITE_V2(__op, __asm_dim, __asmtype, __type, __index_op_args, __index_args, __index_asm_args,          \
-                        __asm_op_args, __asm_args)                                                                    \
-    template <>                                                                                                       \
-    struct __surf_read_write_v2<__op, __type> {                                                                       \
-        static __device__ void __run(__type *__ptr, cudaSurfaceObject_t obj, __L(__index_args),                       \
-                                     cudaSurfaceBoundaryMode mode) {                                                  \
-            switch (mode) {                                                                                           \
-                case cudaBoundaryModeZero:                                                                            \
-                    asm volatile("sust.b." __asm_dim "." __asmtype ".zero [%0, " __index_op_args "], " __asm_op_args  \
-                                 ";"                                                                                  \
-                                 :                                                                                    \
-                                 : "l"(obj), __L(__index_asm_args), __L(__asm_args));                                 \
-                    break;                                                                                            \
-                case cudaBoundaryModeClamp:                                                                           \
-                    asm volatile("sust.b." __asm_dim "." __asmtype ".clamp [%0, " __index_op_args "], " __asm_op_args \
-                                 ";"                                                                                  \
-                                 :                                                                                    \
-                                 : "l"(obj), __L(__index_asm_args), __L(__asm_args));                                 \
-                    break;                                                                                            \
-                case cudaBoundaryModeTrap:                                                                            \
-                    asm volatile("sust.b." __asm_dim "." __asmtype ".trap [%0, " __index_op_args "], " __asm_op_args  \
-                                 ";"                                                                                  \
-                                 :                                                                                    \
-                                 : "l"(obj), __L(__index_asm_args), __L(__asm_args));                                 \
-                    break;                                                                                            \
-            }                                                                                                         \
-        }                                                                                                             \
-    }
+template <class __op, typename __type> struct __surf_read_write_v2;
+
+// For the various write calls, we need to be able to generate variations with
+// different IDs, different numbers of arguments, and different numbers of
+// outputs.
+
+#define __SURF_WRITE_V2(__op, __asm_dim, __asmtype, __type, __index_op_args,   \
+                        __index_args, __index_asm_args, __asm_op_args,         \
+                        __asm_args)                                            \
+  template <> struct __surf_read_write_v2<__op, __type> {                      \
+    static __device__ void __run(__type *__ptr, cudaSurfaceObject_t obj,       \
+                                 __L(__index_args),                            \
+                                 cudaSurfaceBoundaryMode mode) {               \
+      switch (mode) {                                                          \
+      case cudaBoundaryModeZero:                                               \
+        asm volatile("sust.b." __asm_dim "." __asmtype                         \
+                     ".zero [%0, " __index_op_args "], " __asm_op_args ";"     \
+                     :                                                         \
+                     : "l"(obj), __L(__index_asm_args), __L(__asm_args));      \
+        break;                                                                 \
+      case cudaBoundaryModeClamp:                                              \
+        asm volatile("sust.b." __asm_dim "." __asmtype                         \
+                     ".clamp [%0, " __index_op_args "], " __asm_op_args ";"    \
+                     :                                                         \
+                     : "l"(obj), __L(__index_asm_args), __L(__asm_args));      \
+        break;                                                                 \
+      case cudaBoundaryModeTrap:                                               \
+        asm volatile("sust.b." __asm_dim "." __asmtype                         \
+                     ".trap [%0, " __index_op_args "], " __asm_op_args ";"     \
+                     :                                                         \
+                     : "l"(obj), __L(__index_asm_args), __L(__asm_args));      \
+        break;                                                                 \
+      }                                                                        \
+    }                                                                          \
+  }
 
-#define __SURF_READ_V2(__op, __asm_dim, __asmtype, __type, __asm_op_args, __asm_args, __index_args, __index_asm_args) \
-    template <>                                                                                                       \
-    struct __surf_read_write_v2<__op, __type> {                                                                       \
-        static __device__ void __run(__type *__ptr, cudaSurfaceObject_t obj, __L(__index_args),                       \
-                                     cudaSurfaceBoundaryMode mode) {                                                  \
-            switch (mode) {                                                                                           \
-                case cudaBoundaryModeZero:                                                                            \
-                    asm("suld.b." __asm_dim "." __asmtype ".zero " __asm_op_args ";"                                  \
-                        : __L(__asm_args)                                                                             \
-                        : "l"(obj), __L(__index_asm_args));                                                           \
-                    break;                                                                                            \
-                case cudaBoundaryModeClamp:                                                                           \
-                    asm("suld.b." __asm_dim "." __asmtype ".clamp " __asm_op_args ";"                                 \
-                        : __L(__asm_args)                                                                             \
-                        : "l"(obj), __L(__index_asm_args));                                                           \
-                    break;                                                                                            \
-                case cudaBoundaryModeTrap:                                                                            \
-                    asm("suld.b." __asm_dim "." __asmtype ".trap " __asm_op_args ";"                                  \
-                        : __L(__asm_args)                                                                             \
-                        : "l"(obj), __L(__index_asm_args));                                                           \
-                    break;                                                                                            \
-            }                                                                                                         \
-        }                                                                                                             \
-    }
+#define __SURF_READ_V2(__op, __asm_dim, __asmtype, __type, __asm_op_args,      \
+                       __asm_args, __index_args, __index_asm_args)             \
+  template <> struct __surf_read_write_v2<__op, __type> {                      \
+    static __device__ void __run(__type *__ptr, cudaSurfaceObject_t obj,       \
+                                 __L(__index_args),                            \
+                                 cudaSurfaceBoundaryMode mode) {               \
+      switch (mode) {                                                          \
+      case cudaBoundaryModeZero:                                               \
+        asm("suld.b." __asm_dim "." __asmtype ".zero " __asm_op_args ";"       \
+            : __L(__asm_args)                                                  \
+            : "l"(obj), __L(__index_asm_args));                                \
+        break;                                                                 \
+      case cudaBoundaryModeClamp:                                              \
+        asm("suld.b." __asm_dim "." __asmtype ".clamp " __asm_op_args ";"      \
+            : __L(__asm_args)                                                  \
+            : "l"(obj), __L(__index_asm_args));                                \
+        break;                                                                 \
+      case cudaBoundaryModeTrap:                                               \
+        asm("suld.b." __asm_dim "." __asmtype ".trap " __asm_op_args ";"       \
+            : __L(__asm_args)                                                  \
+            : "l"(obj), __L(__index_asm_args));                                \
+        break;                                                                 \
+      }                                                                        \
+    }                                                                          \
+  }
 
-// Amazing, the read side should follow the same flow, I just need to change the generated assembly calls, and the rest
-// should fall in line.
+// Amazing, the read side should follow the same flow, I just need to change the
+// generated assembly calls, and the rest should fall in line.
 
 #define __SW_ASM_ARGS(__type) (__type(*__ptr))
 #define __SW_ASM_ARGS1(__type) (__type(__ptr->x))
 #define __SW_ASM_ARGS2(__type) (__type(__ptr->x), __type(__ptr->y))
-#define __SW_ASM_ARGS4(__type) (__type(__ptr->x), __type(__ptr->y), __type(__ptr->z), __type(__ptr->w))
-
-#define __SURF_READ1D(__asmtype, __type, __asm_op_args, __asm_args) \
-    __SURF_READ_V2(__ID("__isurf1Dread"), "1d", __asmtype, __type, __asm_op_args, __asm_args, (int x), ("r"(x)))
-#define __SURF_READ2D(__asmtype, __type, __asm_op_args, __asm_args)                                           \
-    __SURF_READ_V2(__ID("__isurf2Dread"), "2d", __asmtype, __type, __asm_op_args, __asm_args, (int x, int y), \
-                   ("r"(x), "r"(y)))
-#define __SURF_READ3D(__asmtype, __type, __asm_op_args, __asm_args)                                                  \
-    __SURF_READ_V2(__ID("__isurf3Dread"), "3d", __asmtype, __type, __asm_op_args, __asm_args, (int x, int y, int z), \
-                   ("r"(x), "r"(y), "r"(z)))
-
-#define __SURF_READ1DLAYERED(__asmtype, __type, __asm_op_args, __asm_args)                            \
-    __SURF_READ_V2(__ID("__isurf1DLayeredread"), "a1d", __asmtype, __type, __asm_op_args, __asm_args, \
-                   (int x, int layer), ("r"(x), "r"(layer)))
-#define __SURF_READ2DLAYERED(__asmtype, __type, __asm_op_args, __asm_args)                            \
-    __SURF_READ_V2(__ID("__isurf2DLayeredread"), "a2d", __asmtype, __type, __asm_op_args, __asm_args, \
-                   (int x, int y, int layer), ("r"(x), "r"(y), "r"(layer)))
-#define __SURF_READCUBEMAP(__asmtype, __type, __asm_op_args, __asm_args)                            \
-    __SURF_READ_V2(__ID("__isurfCubemapread"), "a2d", __asmtype, __type, __asm_op_args, __asm_args, \
-                   (int x, int y, int face), ("r"(x), "r"(y), "r"(face)))
-#define __SURF_READCUBEMAPLAYERED(__asmtype, __type, __asm_op_args, __asm_args)                            \
-    __SURF_READ_V2(__ID("__isurfCubemapLayeredread"), "a2d", __asmtype, __type, __asm_op_args, __asm_args, \
-                   (int x, int y, int layerface), ("r"(x), "r"(y), "r"(layerface)))
+#define __SW_ASM_ARGS4(__type)                                                 \
+  (__type(__ptr->x), __type(__ptr->y), __type(__ptr->z), __type(__ptr->w))
+
+#define __SURF_READ1D(__asmtype, __type, __asm_op_args, __asm_args)            \
+  __SURF_READ_V2(__ID("__isurf1Dread"), "1d", __asmtype, __type,               \
+                 __asm_op_args, __asm_args, (int x), ("r"(x)))
+#define __SURF_READ2D(__asmtype, __type, __asm_op_args, __asm_args)            \
+  __SURF_READ_V2(__ID("__isurf2Dread"), "2d", __asmtype, __type,               \
+                 __asm_op_args, __asm_args, (int x, int y), ("r"(x), "r"(y)))
+#define __SURF_READ3D(__asmtype, __type, __asm_op_args, __asm_args)            \
+  __SURF_READ_V2(__ID("__isurf3Dread"), "3d", __asmtype, __type,               \
+                 __asm_op_args, __asm_args, (int x, int y, int z),             \
+                 ("r"(x), "r"(y), "r"(z)))
+
+#define __SURF_READ1DLAYERED(__asmtype, __type, __asm_op_args, __asm_args)     \
+  __SURF_READ_V2(__ID("__isurf1DLayeredread"), "a1d", __asmtype, __type,       \
+                 __asm_op_args, __asm_args, (int x, int layer),                \
+                 ("r"(x), "r"(layer)))
+#define __SURF_READ2DLAYERED(__asmtype, __type, __asm_op_args, __asm_args)     \
+  __SURF_READ_V2(__ID("__isurf2DLayeredread"), "a2d", __asmtype, __type,       \
+                 __asm_op_args, __asm_args, (int x, int y, int layer),         \
+                 ("r"(x), "r"(y), "r"(layer)))
+#define __SURF_READCUBEMAP(__asmtype, __type, __asm_op_args, __asm_args)       \
+  __SURF_READ_V2(__ID("__isurfCubemapread"), "a2d", __asmtype, __type,         \
+                 __asm_op_args, __asm_args, (int x, int y, int face),          \
+                 ("r"(x), "r"(y), "r"(face)))
+#define __SURF_READCUBEMAPLAYERED(__asmtype, __type, __asm_op_args,            \
+                                  __asm_args)                                  \
+  __SURF_READ_V2(__ID("__isurfCubemapLayeredread"), "a2d", __asmtype, __type,  \
+                 __asm_op_args, __asm_args, (int x, int y, int layerface),     \
+                 ("r"(x), "r"(y), "r"(layerface)))
 
 #define __1DV1 "{%0}, [%1, {%2}]"
 #define __1DV2 "{%0, %1}, [%2, {%3}]"
@@ -863,44 +872,44 @@ struct __surf_read_write_v2;
 #define __CUBEMAPLAYERV2 "{%0, %1}, [%2, {%5, %3, %4, %4}]"
 #define __CUBEMAPLAYERV4 "{%0, %1, %2, %3}, [%4, {%7, %5, %6, %6}]"
 
-#define __SURF_READXD_ALL(__xdv1, __xdv2, __xdv4, __surf_readxd_v2)           \
-    __surf_readxd_v2("b8", char, __xdv1, __SW_ASM_ARGS("=h"));                \
-    __surf_readxd_v2("b8", signed char, __xdv1, __SW_ASM_ARGS("=h"));         \
-    __surf_readxd_v2("b8", char1, __xdv1, __SW_ASM_ARGS1("=h"));              \
-    __surf_readxd_v2("b8", unsigned char, __xdv1, __SW_ASM_ARGS("=h"));       \
-    __surf_readxd_v2("b8", uchar1, __xdv1, __SW_ASM_ARGS1("=h"));             \
-    __surf_readxd_v2("b16", short, __xdv1, __SW_ASM_ARGS("=h"));              \
-    __surf_readxd_v2("b16", short1, __xdv1, __SW_ASM_ARGS1("=h"));            \
-    __surf_readxd_v2("b16", unsigned short, __xdv1, __SW_ASM_ARGS("=h"));     \
-    __surf_readxd_v2("b16", ushort1, __xdv1, __SW_ASM_ARGS1("=h"));           \
-    __surf_readxd_v2("b32", int, __xdv1, __SW_ASM_ARGS("=r"));                \
-    __surf_readxd_v2("b32", int1, __xdv1, __SW_ASM_ARGS1("=r"));              \
-    __surf_readxd_v2("b32", unsigned int, __xdv1, __SW_ASM_ARGS("=r"));       \
-    __surf_readxd_v2("b32", uint1, __xdv1, __SW_ASM_ARGS1("=r"));             \
-    __surf_readxd_v2("b64", long long, __xdv1, __SW_ASM_ARGS("=l"));          \
-    __surf_readxd_v2("b64", longlong1, __xdv1, __SW_ASM_ARGS1("=l"));         \
-    __surf_readxd_v2("b64", unsigned long long, __xdv1, __SW_ASM_ARGS("=l")); \
-    __surf_readxd_v2("b64", ulonglong1, __xdv1, __SW_ASM_ARGS1("=l"));        \
-    __surf_readxd_v2("b32", float, __xdv1, __SW_ASM_ARGS("=r"));              \
-    __surf_readxd_v2("b32", float1, __xdv1, __SW_ASM_ARGS1("=r"));            \
-                                                                              \
-    __surf_readxd_v2("v2.b8", char2, __xdv2, __SW_ASM_ARGS2("=h"));           \
-    __surf_readxd_v2("v2.b8", uchar2, __xdv2, __SW_ASM_ARGS2("=h"));          \
-    __surf_readxd_v2("v2.b16", short2, __xdv2, __SW_ASM_ARGS2("=h"));         \
-    __surf_readxd_v2("v2.b16", ushort2, __xdv2, __SW_ASM_ARGS2("=h"));        \
-    __surf_readxd_v2("v2.b32", int2, __xdv2, __SW_ASM_ARGS2("=r"));           \
-    __surf_readxd_v2("v2.b32", uint2, __xdv2, __SW_ASM_ARGS2("=r"));          \
-    __surf_readxd_v2("v2.b64", longlong2, __xdv2, __SW_ASM_ARGS2("=l"));      \
-    __surf_readxd_v2("v2.b64", ulonglong2, __xdv2, __SW_ASM_ARGS2("=l"));     \
-    __surf_readxd_v2("v2.b32", float2, __xdv2, __SW_ASM_ARGS2("=r"));         \
-                                                                              \
-    __surf_readxd_v2("v4.b8", char4, __xdv4, __SW_ASM_ARGS4("=h"));           \
-    __surf_readxd_v2("v4.b8", uchar4, __xdv4, __SW_ASM_ARGS4("=h"));          \
-    __surf_readxd_v2("v4.b16", short4, __xdv4, __SW_ASM_ARGS4("=h"));         \
-    __surf_readxd_v2("v4.b16", ushort4, __xdv4, __SW_ASM_ARGS4("=h"));        \
-    __surf_readxd_v2("v4.b32", int4, __xdv4, __SW_ASM_ARGS4("=r"));           \
-    __surf_readxd_v2("v4.b32", uint4, __xdv4, __SW_ASM_ARGS4("=r"));          \
-    __surf_readxd_v2("v4.b32", float4, __xdv4, __SW_ASM_ARGS4("=r"))
+#define __SURF_READXD_ALL(__xdv1, __xdv2, __xdv4, __surf_readxd_v2)            \
+  __surf_readxd_v2("b8", char, __xdv1, __SW_ASM_ARGS("=h"));                   \
+  __surf_readxd_v2("b8", signed char, __xdv1, __SW_ASM_ARGS("=h"));            \
+  __surf_readxd_v2("b8", char1, __xdv1, __SW_ASM_ARGS1("=h"));                 \
+  __surf_readxd_v2("b8", unsigned char, __xdv1, __SW_ASM_ARGS("=h"));          \
+  __surf_readxd_v2("b8", uchar1, __xdv1, __SW_ASM_ARGS1("=h"));                \
+  __surf_readxd_v2("b16", short, __xdv1, __SW_ASM_ARGS("=h"));                 \
+  __surf_readxd_v2("b16", short1, __xdv1, __SW_ASM_ARGS1("=h"));               \
+  __surf_readxd_v2("b16", unsigned short, __xdv1, __SW_ASM_ARGS("=h"));        \
+  __surf_readxd_v2("b16", ushort1, __xdv1, __SW_ASM_ARGS1("=h"));              \
+  __surf_readxd_v2("b32", int, __xdv1, __SW_ASM_ARGS("=r"));                   \
+  __surf_readxd_v2("b32", int1, __xdv1, __SW_ASM_ARGS1("=r"));                 \
+  __surf_readxd_v2("b32", unsigned int, __xdv1, __SW_ASM_ARGS("=r"));          \
+  __surf_readxd_v2("b32", uint1, __xdv1, __SW_ASM_ARGS1("=r"));                \
+  __surf_readxd_v2("b64", long long, __xdv1, __SW_ASM_ARGS("=l"));             \
+  __surf_readxd_v2("b64", longlong1, __xdv1, __SW_ASM_ARGS1("=l"));            \
+  __surf_readxd_v2("b64", unsigned long long, __xdv1, __SW_ASM_ARGS("=l"));    \
+  __surf_readxd_v2("b64", ulonglong1, __xdv1, __SW_ASM_ARGS1("=l"));           \
+  __surf_readxd_v2("b32", float, __xdv1, __SW_ASM_ARGS("=r"));                 \
+  __surf_readxd_v2("b32", float1, __xdv1, __SW_ASM_ARGS1("=r"));               \
+                                                                               \
+  __surf_readxd_v2("v2.b8", char2, __xdv2, __SW_ASM_ARGS2("=h"));              \
+  __surf_readxd_v2("v2.b8", uchar2, __xdv2, __SW_ASM_ARGS2("=h"));             \
+  __surf_readxd_v2("v2.b16", short2, __xdv2, __SW_ASM_ARGS2("=h"));            \
+  __surf_readxd_v2("v2.b16", ushort2, __xdv2, __SW_ASM_ARGS2("=h"));           \
+  __surf_readxd_v2("v2.b32", int2, __xdv2, __SW_ASM_ARGS2("=r"));              \
+  __surf_readxd_v2("v2.b32", uint2, __xdv2, __SW_ASM_ARGS2("=r"));             \
+  __surf_readxd_v2("v2.b64", longlong2, __xdv2, __SW_ASM_ARGS2("=l"));         \
+  __surf_readxd_v2("v2.b64", ulonglong2, __xdv2, __SW_ASM_ARGS2("=l"));        \
+  __surf_readxd_v2("v2.b32", float2, __xdv2, __SW_ASM_ARGS2("=r"));            \
+                                                                               \
+  __surf_readxd_v2("v4.b8", char4, __xdv4, __SW_ASM_ARGS4("=h"));              \
+  __surf_readxd_v2("v4.b8", uchar4, __xdv4, __SW_ASM_ARGS4("=h"));             \
+  __surf_readxd_v2("v4.b16", short4, __xdv4, __SW_ASM_ARGS4("=h"));            \
+  __surf_readxd_v2("v4.b16", ushort4, __xdv4, __SW_ASM_ARGS4("=h"));           \
+  __surf_readxd_v2("v4.b32", int4, __xdv4, __SW_ASM_ARGS4("=r"));              \
+  __surf_readxd_v2("v4.b32", uint4, __xdv4, __SW_ASM_ARGS4("=r"));             \
+  __surf_readxd_v2("v4.b32", float4, __xdv4, __SW_ASM_ARGS4("=r"))
 
 __SURF_READXD_ALL(__1DV1, __1DV2, __1DV4, __SURF_READ1D);
 __SURF_READXD_ALL(__2DV1, __2DV2, __2DV4, __SURF_READ2D);
@@ -908,70 +917,77 @@ __SURF_READXD_ALL(__3DV1, __3DV2, __3DV4, __SURF_READ3D);
 __SURF_READXD_ALL(__1DLAYERV1, __1DLAYERV2, __1DLAYERV4, __SURF_READ1DLAYERED);
 __SURF_READXD_ALL(__2DLAYERV1, __2DLAYERV2, __2DLAYERV4, __SURF_READ2DLAYERED);
 __SURF_READXD_ALL(__CUBEMAPV1, __CUBEMAPV2, __CUBEMAPV4, __SURF_READCUBEMAP);
-__SURF_READXD_ALL(__CUBEMAPLAYERV1, __CUBEMAPLAYERV2, __CUBEMAPLAYERV4, __SURF_READCUBEMAPLAYERED);
-
-
-#define __SURF_WRITE1D_V2(__asmtype, __type, __asm_op_args, __asm_args)                                     \
-    __SURF_WRITE_V2(__ID("__isurf1Dwrite_v2"), "1d", __asmtype, __type, "{%1}", (int x), ("r"(x)), __asm_op_args, \
-                    __asm_args)
-#define __SURF_WRITE1DLAYERED_V2(__asmtype, __type, __asm_op_args, __asm_args)                                  \
-    __SURF_WRITE_V2(__ID("__isurf1DLayeredwrite_v2"), "a1d", __asmtype, __type, "{%2, %1}", (int x, int layer), \
-                    ("r"(x), "r"(layer)), __asm_op_args, __asm_args)
-#define __SURF_WRITE2D_V2(__asmtype, __type, __asm_op_args, __asm_args)                                               \
-    __SURF_WRITE_V2(__ID("__isurf2Dwrite_v2"), "2d", __asmtype, __type, "{%1, %2}", (int x, int y), ("r"(x), "r"(y)), \
-                    __asm_op_args, __asm_args)
-#define __SURF_WRITE2DLAYERED_V2(__asmtype, __type, __asm_op_args, __asm_args)                      \
-    __SURF_WRITE_V2(__ID("__isurf2DLayeredwrite_v2"), "a2d", __asmtype, __type, "{%3, %1, %2, %2}", \
-                    (int x, int y, int layer), ("r"(x), "r"(y), "r"(layer)), __asm_op_args, __asm_args)
-#define __SURF_WRITE3D_V2(__asmtype, __type, __asm_op_args, __asm_args)                                            \
-    __SURF_WRITE_V2(__ID("__isurf3Dwrite_v2"), "3d", __asmtype, __type, "{%1, %2, %3, %3}", (int x, int y, int z), \
-                    ("r"(x), "r"(y), "r"(z)), __asm_op_args, __asm_args)
-
-#define __SURF_CUBEMAPWRITE_V2(__asmtype, __type, __asm_op_args, __asm_args)                      \
-    __SURF_WRITE_V2(__ID("__isurfCubemapwrite_v2"), "a2d", __asmtype, __type, "{%3, %1, %2, %2}", \
-                    (int x, int y, int face), ("r"(x), "r"(y), "r"(face)), __asm_op_args, __asm_args)
-#define __SURF_CUBEMAPLAYEREDWRITE_V2(__asmtype, __type, __asm_op_args, __asm_args)                      \
-    __SURF_WRITE_V2(__ID("__isurfCubemapLayeredwrite_v2"), "a2d", __asmtype, __type, "{%3, %1, %2, %2}", \
-                    (int x, int y, int layerface), ("r"(x), "r"(y), "r"(layerface)), __asm_op_args, __asm_args)
-
-#define __SURF_WRITEXD_V2_ALL(__xdv1, __xdv2, __xdv4, __surf_writexd_v2)      \
-    __surf_writexd_v2("b8", char, __xdv1, __SW_ASM_ARGS("h"));                \
-    __surf_writexd_v2("b8", signed char, __xdv1, __SW_ASM_ARGS("h"));         \
-    __surf_writexd_v2("b8", char1, __xdv1, __SW_ASM_ARGS1("h"));              \
-    __surf_writexd_v2("b8", unsigned char, __xdv1, __SW_ASM_ARGS("h"));       \
-    __surf_writexd_v2("b8", uchar1, __xdv1, __SW_ASM_ARGS1("h"));             \
-    __surf_writexd_v2("b16", short, __xdv1, __SW_ASM_ARGS("h"));              \
-    __surf_writexd_v2("b16", short1, __xdv1, __SW_ASM_ARGS1("h"));            \
-    __surf_writexd_v2("b16", unsigned short, __xdv1, __SW_ASM_ARGS("h"));     \
-    __surf_writexd_v2("b16", ushort1, __xdv1, __SW_ASM_ARGS1("h"));           \
-    __surf_writexd_v2("b32", int, __xdv1, __SW_ASM_ARGS("r"));                \
-    __surf_writexd_v2("b32", int1, __xdv1, __SW_ASM_ARGS1("r"));              \
-    __surf_writexd_v2("b32", unsigned int, __xdv1, __SW_ASM_ARGS("r"));       \
-    __surf_writexd_v2("b32", uint1, __xdv1, __SW_ASM_ARGS1("r"));             \
-    __surf_writexd_v2("b64", long long, __xdv1, __SW_ASM_ARGS("l"));          \
-    __surf_writexd_v2("b64", longlong1, __xdv1, __SW_ASM_ARGS1("l"));         \
-    __surf_writexd_v2("b64", unsigned long long, __xdv1, __SW_ASM_ARGS("l")); \
-    __surf_writexd_v2("b64", ulonglong1, __xdv1, __SW_ASM_ARGS1("l"));        \
-    __surf_writexd_v2("b32", float, __xdv1, __SW_ASM_ARGS("r"));              \
-    __surf_writexd_v2("b32", float1, __xdv1, __SW_ASM_ARGS1("r"));            \
-                                                                              \
-    __surf_writexd_v2("v2.b8", char2, __xdv2, __SW_ASM_ARGS2("h"));           \
-    __surf_writexd_v2("v2.b8", uchar2, __xdv2, __SW_ASM_ARGS2("h"));          \
-    __surf_writexd_v2("v2.b16", short2, __xdv2, __SW_ASM_ARGS2("h"));         \
-    __surf_writexd_v2("v2.b16", ushort2, __xdv2, __SW_ASM_ARGS2("h"));        \
-    __surf_writexd_v2("v2.b32", int2, __xdv2, __SW_ASM_ARGS2("r"));           \
-    __surf_writexd_v2("v2.b32", uint2, __xdv2, __SW_ASM_ARGS2("r"));          \
-    __surf_writexd_v2("v2.b64", longlong2, __xdv2, __SW_ASM_ARGS2("l"));      \
-    __surf_writexd_v2("v2.b64", ulonglong2, __xdv2, __SW_ASM_ARGS2("l"));     \
-    __surf_writexd_v2("v2.b32", float2, __xdv2, __SW_ASM_ARGS2("r"));         \
-                                                                              \
-    __surf_writexd_v2("v4.b8", char4, __xdv4, __SW_ASM_ARGS4("h"));           \
-    __surf_writexd_v2("v4.b8", uchar4, __xdv4, __SW_ASM_ARGS4("h"));          \
-    __surf_writexd_v2("v4.b16", short4, __xdv4, __SW_ASM_ARGS4("h"));         \
-    __surf_writexd_v2("v4.b16", ushort4, __xdv4, __SW_ASM_ARGS4("h"));        \
-    __surf_writexd_v2("v4.b32", int4, __xdv4, __SW_ASM_ARGS4("r"));           \
-    __surf_writexd_v2("v4.b32", uint4, __xdv4, __SW_ASM_ARGS4("r"));          \
-    __surf_writexd_v2("v4.b32", float4, __xdv4, __SW_ASM_ARGS4("r"))
+__SURF_READXD_ALL(__CUBEMAPLAYERV1, __CUBEMAPLAYERV2, __CUBEMAPLAYERV4,
+                  __SURF_READCUBEMAPLAYERED);
+
+#define __SURF_WRITE1D_V2(__asmtype, __type, __asm_op_args, __asm_args)        \
+  __SURF_WRITE_V2(__ID("__isurf1Dwrite_v2"), "1d", __asmtype, __type, "{%1}",  \
+                  (int x), ("r"(x)), __asm_op_args, __asm_args)
+#define __SURF_WRITE1DLAYERED_V2(__asmtype, __type, __asm_op_args, __asm_args) \
+  __SURF_WRITE_V2(__ID("__isurf1DLayeredwrite_v2"), "a1d", __asmtype, __type,  \
+                  "{%2, %1}", (int x, int layer), ("r"(x), "r"(layer)),        \
+                  __asm_op_args, __asm_args)
+#define __SURF_WRITE2D_V2(__asmtype, __type, __asm_op_args, __asm_args)        \
+  __SURF_WRITE_V2(__ID("__isurf2Dwrite_v2"), "2d", __asmtype, __type,          \
+                  "{%1, %2}", (int x, int y), ("r"(x), "r"(y)), __asm_op_args, \
+                  __asm_args)
+#define __SURF_WRITE2DLAYERED_V2(__asmtype, __type, __asm_op_args, __asm_args) \
+  __SURF_WRITE_V2(__ID("__isurf2DLayeredwrite_v2"), "a2d", __asmtype, __type,  \
+                  "{%3, %1, %2, %2}", (int x, int y, int layer),               \
+                  ("r"(x), "r"(y), "r"(layer)), __asm_op_args, __asm_args)
+#define __SURF_WRITE3D_V2(__asmtype, __type, __asm_op_args, __asm_args)        \
+  __SURF_WRITE_V2(__ID("__isurf3Dwrite_v2"), "3d", __asmtype, __type,          \
+                  "{%1, %2, %3, %3}", (int x, int y, int z),                   \
+                  ("r"(x), "r"(y), "r"(z)), __asm_op_args, __asm_args)
+
+#define __SURF_CUBEMAPWRITE_V2(__asmtype, __type, __asm_op_args, __asm_args)   \
+  __SURF_WRITE_V2(__ID("__isurfCubemapwrite_v2"), "a2d", __asmtype, __type,    \
+                  "{%3, %1, %2, %2}", (int x, int y, int face),                \
+                  ("r"(x), "r"(y), "r"(face)), __asm_op_args, __asm_args)
+#define __SURF_CUBEMAPLAYEREDWRITE_V2(__asmtype, __type, __asm_op_args,        \
+                                      __asm_args)                              \
+  __SURF_WRITE_V2(__ID("__isurfCubemapLayeredwrite_v2"), "a2d", __asmtype,     \
+                  __type, "{%3, %1, %2, %2}", (int x, int y, int layerface),   \
+                  ("r"(x), "r"(y), "r"(layerface)), __asm_op_args, __asm_args)
+
+#define __SURF_WRITEXD_V2_ALL(__xdv1, __xdv2, __xdv4, __surf_writexd_v2)       \
+  __surf_writexd_v2("b8", char, __xdv1, __SW_ASM_ARGS("h"));                   \
+  __surf_writexd_v2("b8", signed char, __xdv1, __SW_ASM_ARGS("h"));            \
+  __surf_writexd_v2("b8", char1, __xdv1, __SW_ASM_ARGS1("h"));                 \
+  __surf_writexd_v2("b8", unsigned char, __xdv1, __SW_ASM_ARGS("h"));          \
+  __surf_writexd_v2("b8", uchar1, __xdv1, __SW_ASM_ARGS1("h"));                \
+  __surf_writexd_v2("b16", short, __xdv1, __SW_ASM_ARGS("h"));                 \
+  __surf_writexd_v2("b16", short1, __xdv1, __SW_ASM_ARGS1("h"));               \
+  __surf_writexd_v2("b16", unsigned short, __xdv1, __SW_ASM_ARGS("h"));        \
+  __surf_writexd_v2("b16", ushort1, __xdv1, __SW_ASM_ARGS1("h"));              \
+  __surf_writexd_v2("b32", int, __xdv1, __SW_ASM_ARGS("r"));                   \
+  __surf_writexd_v2("b32", int1, __xdv1, __SW_ASM_ARGS1("r"));                 \
+  __surf_writexd_v2("b32", unsigned int, __xdv1, __SW_ASM_ARGS("r"));          \
+  __surf_writexd_v2("b32", uint1, __xdv1, __SW_ASM_ARGS1("r"));                \
+  __surf_writexd_v2("b64", long long, __xdv1, __SW_ASM_ARGS("l"));             \
+  __surf_writexd_v2("b64", longlong1, __xdv1, __SW_ASM_ARGS1("l"));            \
+  __surf_writexd_v2("b64", unsigned long long, __xdv1, __SW_ASM_ARGS("l"));    \
+  __surf_writexd_v2("b64", ulonglong1, __xdv1, __SW_ASM_ARGS1("l"));           \
+  __surf_writexd_v2("b32", float, __xdv1, __SW_ASM_ARGS("r"));                 \
+  __surf_writexd_v2("b32", float1, __xdv1, __SW_ASM_ARGS1("r"));               \
+                                                                               \
+  __surf_writexd_v2("v2.b8", char2, __xdv2, __SW_ASM_ARGS2("h"));              \
+  __surf_writexd_v2("v2.b8", uchar2, __xdv2, __SW_ASM_ARGS2("h"));             \
+  __surf_writexd_v2("v2.b16", short2, __xdv2, __SW_ASM_ARGS2("h"));            \
+  __surf_writexd_v2("v2.b16", ushort2, __xdv2, __SW_ASM_ARGS2("h"));           \
+  __surf_writexd_v2("v2.b32", int2, __xdv2, __SW_ASM_ARGS2("r"));              \
+  __surf_writexd_v2("v2.b32", uint2, __xdv2, __SW_ASM_ARGS2("r"));             \
+  __surf_writexd_v2("v2.b64", longlong2, __xdv2, __SW_ASM_ARGS2("l"));         \
+  __surf_writexd_v2("v2.b64", ulonglong2, __xdv2, __SW_ASM_ARGS2("l"));        \
+  __surf_writexd_v2("v2.b32", float2, __xdv2, __SW_ASM_ARGS2("r"));            \
+                                                                               \
+  __surf_writexd_v2("v4.b8", char4, __xdv4, __SW_ASM_ARGS4("h"));              \
+  __surf_writexd_v2("v4.b8", uchar4, __xdv4, __SW_ASM_ARGS4("h"));             \
+  __surf_writexd_v2("v4.b16", short4, __xdv4, __SW_ASM_ARGS4("h"));            \
+  __surf_writexd_v2("v4.b16", ushort4, __xdv4, __SW_ASM_ARGS4("h"));           \
+  __surf_writexd_v2("v4.b32", int4, __xdv4, __SW_ASM_ARGS4("r"));              \
+  __surf_writexd_v2("v4.b32", uint4, __xdv4, __SW_ASM_ARGS4("r"));             \
+  __surf_writexd_v2("v4.b32", float4, __xdv4, __SW_ASM_ARGS4("r"))
 
 #define __1DV1 "{%2}"
 #define __1DV2 "{%2, %3}"
@@ -994,9 +1010,10 @@ __SURF_WRITEXD_V2_ALL(__3DV1, __3DV2, __3DV4, __SURF_CUBEMAPWRITE_V2);
 __SURF_WRITEXD_V2_ALL(__3DV1, __3DV2, __3DV4, __SURF_CUBEMAPLAYEREDWRITE_V2);
 
 template <class __op, class __DataT, class... __Args>
-__device__ static void __tex_fetch_impl(__surface_op_tag, __DataT *__ptr, cudaSurfaceObject_t __handle,
+__device__ static void __tex_fetch_impl(__surface_op_tag, __DataT *__ptr,
+                                        cudaSurfaceObject_t __handle,
                                         __Args... __args) {
-    __surf_read_write_v2<__op, __DataT>::__run(__ptr, __handle, __args...);
+  __surf_read_write_v2<__op, __DataT>::__run(__ptr, __handle, __args...);
 }
 
 // These are the top-level function overloads the __nv_tex_surf_handler expands
@@ -1009,7 +1026,8 @@ __device__ static void __tex_fetch_impl(__surface_op_tag, __DataT *__ptr, cudaSu
 // __nv_tex_surf_handler("__tex...", &ret, cudaTextureObject_t handle, args...);
 //   Data type and return type are based on ret.
 template <class __op, class __T, class... __Args>
-__device__ static void __tex_fetch_impl(__texture_op_tag, __T *__ptr, cudaTextureObject_t __handle,
+__device__ static void __tex_fetch_impl(__texture_op_tag, __T *__ptr,
+                                        cudaTextureObject_t __handle,
                                         __Args... __args) {
   using __FetchT = typename __TypeInfoT<__T>::__fetch_t;
   *__ptr = __convert<__T, __FetchT>::__run(

>From 4dde1b1b446e2e4448ba902dcde556751838f410 Mon Sep 17 00:00:00 2001
From: Austin Schuh <austin.linux at gmail.com>
Date: Sat, 29 Mar 2025 19:31:26 -0700
Subject: [PATCH 7/7] Clang format try 2

---
 clang/lib/Headers/__clang_cuda_texture_intrinsics.h | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/clang/lib/Headers/__clang_cuda_texture_intrinsics.h b/clang/lib/Headers/__clang_cuda_texture_intrinsics.h
index db682f0df43a1..8b914ed50b5fc 100644
--- a/clang/lib/Headers/__clang_cuda_texture_intrinsics.h
+++ b/clang/lib/Headers/__clang_cuda_texture_intrinsics.h
@@ -255,7 +255,9 @@ template <class __op> struct __op_type_traits {
 
 // Specialize for known surface operation tags
 #define __OP_TYPE_SURFACE(__op)                                                \
-  template <> struct __op_type_traits<__op> { using type = __surface_op_tag; };
+  template <> struct __op_type_traits<__op> {                                  \
+    using type = __surface_op_tag;                                             \
+  }
 
 // Classes that implement specific texture ops.
 template <class __op> struct __tex_fetch_v4;