[clang] Add missing intrinsics to cuda headers (PR #143664)
via cfe-commits
cfe-commits at lists.llvm.org
Thu Jun 12 11:10:43 PDT 2025
github-actions[bot] wrote:
<!--LLVM CODE FORMAT COMMENT: {clang-format}-->
:warning: C/C++ code formatter, clang-format found issues in your code. :warning:
<details>
<summary>
You can test this locally with the following command:
</summary>
``````````bash
git-clang-format --diff HEAD~1 HEAD --extensions h -- clang/lib/Headers/__clang_cuda_intrinsics.h
``````````
</details>
<details>
<summary>
View the diff from clang-format here.
</summary>
``````````diff
diff --git a/clang/lib/Headers/__clang_cuda_intrinsics.h b/clang/lib/Headers/__clang_cuda_intrinsics.h
index cf3f2ceba..9c288d4d3 100644
--- a/clang/lib/Headers/__clang_cuda_intrinsics.h
+++ b/clang/lib/Headers/__clang_cuda_intrinsics.h
@@ -479,40 +479,40 @@ inline __device__ unsigned __funnelshift_rc(unsigned low32, unsigned high32,
return ret;
}
-#define INTRINSIC_LOAD(func_name, asm_op, decl_type, internal_type, asm_type) \
-inline __device__ decl_type func_name(const decl_type *ptr) { \
- internal_type ret; \
- asm(asm_op" %0, [%1];" : asm_type(ret) : "l"(ptr)); \
- return (decl_type)ret; \
-}
+#define INTRINSIC_LOAD(func_name, asm_op, decl_type, internal_type, asm_type) \
+ inline __device__ decl_type func_name(const decl_type *ptr) { \
+ internal_type ret; \
+ asm(asm_op " %0, [%1];" : asm_type(ret) : "l"(ptr)); \
+ return (decl_type)ret; \
+ }
#define INTRINSIC_LOAD2(func_name, asm_op, decl_type, internal_type, asm_type) \
-inline __device__ decl_type func_name(const decl_type *ptr) { \
- decl_type ret; \
- internal_type tmp; \
- asm(asm_op" {%0,%1}, [%2];" \
- : asm_type(tmp.x), asm_type(tmp.y) \
- : "l"(ptr)); \
- using element_type = decltype(ret.x); \
- ret.x = (element_type)(tmp.x); \
- ret.y = (element_type)tmp.y; \
- return ret; \
-}
+ inline __device__ decl_type func_name(const decl_type *ptr) { \
+ decl_type ret; \
+ internal_type tmp; \
+ asm(asm_op " {%0,%1}, [%2];" \
+ : asm_type(tmp.x), asm_type(tmp.y) \
+ : "l"(ptr)); \
+ using element_type = decltype(ret.x); \
+ ret.x = (element_type)(tmp.x); \
+ ret.y = (element_type)tmp.y; \
+ return ret; \
+ }
#define INTRINSIC_LOAD4(func_name, asm_op, decl_type, internal_type, asm_type) \
-inline __device__ decl_type func_name(const decl_type *ptr) { \
- decl_type ret; \
- internal_type tmp; \
- asm(asm_op" {%0,%1,%2,%3}, [%4];" \
- : asm_type(tmp.x), asm_type(tmp.y), asm_type(tmp.z), asm_type(tmp.w) \
- : "l"(ptr)); \
- using element_type = decltype(ret.x); \
- ret.x = (element_type)tmp.x; \
- ret.y = (element_type)tmp.y; \
- ret.z = (element_type)tmp.z; \
- ret.w = (element_type)tmp.w; \
- return ret; \
-}
+ inline __device__ decl_type func_name(const decl_type *ptr) { \
+ decl_type ret; \
+ internal_type tmp; \
+ asm(asm_op " {%0,%1,%2,%3}, [%4];" \
+ : asm_type(tmp.x), asm_type(tmp.y), asm_type(tmp.z), asm_type(tmp.w) \
+ : "l"(ptr)); \
+ using element_type = decltype(ret.x); \
+ ret.x = (element_type)tmp.x; \
+ ret.y = (element_type)tmp.y; \
+ ret.z = (element_type)tmp.z; \
+ ret.w = (element_type)tmp.w; \
+ return ret; \
+ }
INTRINSIC_LOAD(__ldcg, "ld.global.cg.s8", char, unsigned int, "=r");
INTRINSIC_LOAD(__ldcg, "ld.global.cg.s8", signed char, unsigned int, "=r");
@@ -529,9 +529,11 @@ INTRINSIC_LOAD4(__ldcg, "ld.global.cg.v4.s32", int4, int4, "=r");
INTRINSIC_LOAD2(__ldcg, "ld.global.cg.v2.s64 ", longlong2, longlong2, "=l");
INTRINSIC_LOAD(__ldcg, "ld.global.cg.u8", unsigned char, unsigned int, "=r");
-INTRINSIC_LOAD(__ldcg, "ld.global.cg.u16", unsigned short, unsigned short, "=h");
+INTRINSIC_LOAD(__ldcg, "ld.global.cg.u16", unsigned short, unsigned short,
+ "=h");
INTRINSIC_LOAD(__ldcg, "ld.global.cg.u32", unsigned int, unsigned int, "=r");
-INTRINSIC_LOAD(__ldcg, "ld.global.cg.u64", unsigned long long, unsigned long long, "=l");
+INTRINSIC_LOAD(__ldcg, "ld.global.cg.u64", unsigned long long,
+ unsigned long long, "=l");
INTRINSIC_LOAD2(__ldcg, "ld.global.cg.v2.u8", uchar2, int2, "=r");
INTRINSIC_LOAD4(__ldcg, "ld.global.cg.v4.u8", uchar4, int4, "=r");
@@ -558,39 +560,43 @@ inline __device__ long __ldcg(const long *ptr) {
}
#define MINTRINSIC_LOAD(func_name, asm_op, decl_type, internal_type, asm_type) \
-inline __device__ decl_type func_name(const decl_type *ptr) { \
- internal_type ret; \
- asm(asm_op" %0, [%1];" : asm_type(ret) : "l"(ptr) : "memory"); \
- return (decl_type)ret; \
-}
-
-#define MINTRINSIC_LOAD2(func_name, asm_op, decl_type, internal_type, asm_type) \
-inline __device__ decl_type func_name(const decl_type *ptr) { \
- decl_type ret; \
- internal_type tmp; \
- asm(asm_op" {%0,%1}, [%2];" \
- : asm_type(tmp.x), asm_type(tmp.y) \
- : "l"(ptr) : "memory"); \
- using element_type = decltype(ret.x); \
- ret.x = (element_type)tmp.x; \
- ret.y = (element_type)tmp.y; \
- return ret; \
-}
-
-#define MINTRINSIC_LOAD4(func_name, asm_op, decl_type, internal_type, asm_type) \
-inline __device__ decl_type func_name(const decl_type *ptr) { \
- decl_type ret; \
- internal_type tmp; \
- asm(asm_op" {%0,%1,%2,%3}, [%4];" \
- : asm_type(tmp.x), asm_type(tmp.y), asm_type(tmp.z), asm_type(tmp.w) \
- : "l"(ptr) : "memory"); \
- using element_type = decltype(ret.x); \
- ret.x = (element_type)tmp.x; \
- ret.y = (element_type)tmp.y; \
- ret.z = (element_type)tmp.z; \
- ret.w = (element_type)tmp.w; \
- return ret; \
-}
+ inline __device__ decl_type func_name(const decl_type *ptr) { \
+ internal_type ret; \
+ asm(asm_op " %0, [%1];" : asm_type(ret) : "l"(ptr) : "memory"); \
+ return (decl_type)ret; \
+ }
+
+#define MINTRINSIC_LOAD2(func_name, asm_op, decl_type, internal_type, \
+ asm_type) \
+ inline __device__ decl_type func_name(const decl_type *ptr) { \
+ decl_type ret; \
+ internal_type tmp; \
+ asm(asm_op " {%0,%1}, [%2];" \
+ : asm_type(tmp.x), asm_type(tmp.y) \
+ : "l"(ptr) \
+ : "memory"); \
+ using element_type = decltype(ret.x); \
+ ret.x = (element_type)tmp.x; \
+ ret.y = (element_type)tmp.y; \
+ return ret; \
+ }
+
+#define MINTRINSIC_LOAD4(func_name, asm_op, decl_type, internal_type, \
+ asm_type) \
+ inline __device__ decl_type func_name(const decl_type *ptr) { \
+ decl_type ret; \
+ internal_type tmp; \
+ asm(asm_op " {%0,%1,%2,%3}, [%4];" \
+ : asm_type(tmp.x), asm_type(tmp.y), asm_type(tmp.z), asm_type(tmp.w) \
+ : "l"(ptr) \
+ : "memory"); \
+ using element_type = decltype(ret.x); \
+ ret.x = (element_type)tmp.x; \
+ ret.y = (element_type)tmp.y; \
+ ret.z = (element_type)tmp.z; \
+ ret.w = (element_type)tmp.w; \
+ return ret; \
+ }
MINTRINSIC_LOAD(__ldcv, "ld.global.cv.u8", unsigned char, unsigned int, "=r");
MINTRINSIC_LOAD(__ldcv, "ld.global.cv.u16", unsigned short, unsigned short,
@@ -685,33 +691,35 @@ inline __device__ long __ldcs(const long *ptr) {
}
#define INTRINSIC_STORE(func_name, asm_op, decl_type, internal_type, asm_type) \
-inline __device__ void func_name(decl_type *ptr, decl_type value) { \
- internal_type tmp = (internal_type)value; \
- asm(asm_op" [%0], %1;" ::"l"(ptr), asm_type(tmp) : "memory"); \
-}
-
-#define INTRINSIC_STORE2(func_name, asm_op, decl_type, internal_type, asm_type) \
-inline __device__ void func_name(decl_type *ptr, decl_type value) { \
- internal_type tmp; \
- using element_type = decltype(tmp.x); \
- tmp.x = (element_type)(value.x); \
- tmp.y = (element_type)(value.y); \
- asm(asm_op" [%0], {%1,%2};" ::"l"(ptr), asm_type(tmp.x), asm_type(tmp.y) \
- : "memory"); \
-}
-
-#define INTRINSIC_STORE4(func_name, asm_op, decl_type, internal_type, asm_type) \
-inline __device__ void func_name(decl_type *ptr, decl_type value) { \
- internal_type tmp; \
- using element_type = decltype(tmp.x); \
- tmp.x = (element_type)(value.x); \
- tmp.y = (element_type)(value.y); \
- tmp.z = (element_type)(value.z); \
- tmp.w = (element_type)(value.w); \
- asm(asm_op" [%0], {%1,%2,%3,%4};" ::"l"(ptr), asm_type(tmp.x), \
- asm_type(tmp.y), asm_type(tmp.z), asm_type(tmp.w) \
- : "memory"); \
-}
+ inline __device__ void func_name(decl_type *ptr, decl_type value) { \
+ internal_type tmp = (internal_type)value; \
+ asm(asm_op " [%0], %1;" ::"l"(ptr), asm_type(tmp) : "memory"); \
+ }
+
+#define INTRINSIC_STORE2(func_name, asm_op, decl_type, internal_type, \
+ asm_type) \
+ inline __device__ void func_name(decl_type *ptr, decl_type value) { \
+ internal_type tmp; \
+ using element_type = decltype(tmp.x); \
+ tmp.x = (element_type)(value.x); \
+ tmp.y = (element_type)(value.y); \
+ asm(asm_op " [%0], {%1,%2};" ::"l"(ptr), asm_type(tmp.x), asm_type(tmp.y) \
+ : "memory"); \
+ }
+
+#define INTRINSIC_STORE4(func_name, asm_op, decl_type, internal_type, \
+ asm_type) \
+ inline __device__ void func_name(decl_type *ptr, decl_type value) { \
+ internal_type tmp; \
+ using element_type = decltype(tmp.x); \
+ tmp.x = (element_type)(value.x); \
+ tmp.y = (element_type)(value.y); \
+ tmp.z = (element_type)(value.z); \
+ tmp.w = (element_type)(value.w); \
+ asm(asm_op " [%0], {%1,%2,%3,%4};" ::"l"(ptr), asm_type(tmp.x), \
+ asm_type(tmp.y), asm_type(tmp.z), asm_type(tmp.w) \
+ : "memory"); \
+ }
INTRINSIC_STORE(__stwt, "st.global.wt.s8", char, int, "r");
INTRINSIC_STORE(__stwt, "st.global.wt.s8", signed char, int, "r");
``````````
</details>
https://github.com/llvm/llvm-project/pull/143664
More information about the cfe-commits
mailing list