[clang] 7e9802f - [Headers][NFC] Steps to allow sharing code between gpu intrin.h headers (#131134)

Thu Mar 13 06:17:55 PDT 2025

Author: Jon Chesterfield
Date: 2025-03-13T13:17:52Z
New Revision: 7e9802f348e36bf826d9fe83d0d187478e0e9639

URL: https://github.com/llvm/llvm-project/commit/7e9802f348e36bf826d9fe83d0d187478e0e9639
DIFF: https://github.com/llvm/llvm-project/commit/7e9802f348e36bf826d9fe83d0d187478e0e9639.diff

LOG: [Headers][NFC] Steps to allow sharing code between gpu intrin.h headers (#131134)

Adds macro guards to error if the implementation headers are included
directly as part of dropping the need for them to be standalone. Lifts
the bool macro into gpuintrin.h.

Moves shuffle_idx_u64 into gpuintrin in passing, was the same
implementation in each architecture file.

Added: 
    

Modified: 
    clang/lib/Headers/amdgpuintrin.h
    clang/lib/Headers/gpuintrin.h
    clang/lib/Headers/nvptxintrin.h

Removed: 
    


################################################################################
diff  --git a/clang/lib/Headers/amdgpuintrin.h b/clang/lib/Headers/amdgpuintrin.h
index 839a05175cf3e..56748f6c3e818 100644

--- a/clang/lib/Headers/amdgpuintrin.h
+++ b/clang/lib/Headers/amdgpuintrin.h
@@ -13,11 +13,8 @@
 #error "This file is intended for AMDGPU targets or offloading to AMDGPU"
 #endif
 
-#include <stdint.h>
-
-#if !defined(__cplusplus)
-_Pragma("push_macro(\"bool\")");
-#define bool _Bool
+#ifndef __GPUINTRIN_H
+#error "Never use <amdgpuintrin.h> directly; include <gpuintrin.h> instead"
 #endif
 
 _Pragma("omp begin declare target device_type(nohost)");
@@ -146,17 +143,6 @@ __gpu_shuffle_idx_u32(uint64_t __lane_mask, uint32_t __idx, uint32_t __x,
   return __builtin_amdgcn_ds_bpermute(__lane << 2, __x);
 }
 
-// Shuffles the the lanes inside the wavefront according to the given index.
-_DEFAULT_FN_ATTRS static __inline__ uint64_t
-__gpu_shuffle_idx_u64(uint64_t __lane_mask, uint32_t __idx, uint64_t __x,
-                      uint32_t __width) {
-  uint32_t __hi = (uint32_t)(__x >> 32ull);
-  uint32_t __lo = (uint32_t)(__x & 0xFFFFFFFF);
-  return ((uint64_t)__gpu_shuffle_idx_u32(__lane_mask, __idx, __hi, __width)
-          << 32ull) |
-         ((uint64_t)__gpu_shuffle_idx_u32(__lane_mask, __idx, __lo, __width));
-}
-
 // Returns a bitmask marking all lanes that have the same value of __x.
 _DEFAULT_FN_ATTRS static __inline__ uint64_t
 __gpu_match_any_u32(uint64_t __lane_mask, uint32_t __x) {
@@ -238,8 +224,4 @@ _DEFAULT_FN_ATTRS static __inline__ void __gpu_thread_suspend(void) {
 _Pragma("omp end declare variant");
 _Pragma("omp end declare target");
 
-#if !defined(__cplusplus)
-_Pragma("pop_macro(\"bool\")");
-#endif
-
 #endif // __AMDGPUINTRIN_H

diff  --git a/clang/lib/Headers/gpuintrin.h b/clang/lib/Headers/gpuintrin.h
index 4181628d18048..ac79d685337c5 100644
--- a/clang/lib/Headers/gpuintrin.h
+++ b/clang/lib/Headers/gpuintrin.h
@@ -25,6 +25,13 @@
 #endif
 #endif
 
+#include <stdint.h>
+
+#if !defined(__cplusplus)
+_Pragma("push_macro(\"bool\")");
+#define bool _Bool
+#endif
+
 #if defined(__NVPTX__)
 #include <nvptxintrin.h>
 #elif defined(__AMDGPU__)
@@ -33,13 +40,6 @@
 #error "This header is only meant to be used on GPU architectures."
 #endif
 
-#include <stdint.h>
-
-#if !defined(__cplusplus)
-_Pragma("push_macro(\"bool\")");
-#define bool _Bool
-#endif
-
 _Pragma("omp begin declare target device_type(nohost)");
 _Pragma("omp begin declare variant match(device = {kind(gpu)})");
 
@@ -141,6 +141,18 @@ __gpu_read_first_lane_f64(uint64_t __lane_mask, double __x) {
                                         __builtin_bit_cast(uint64_t, __x)));
 }
 
+// Shuffles the the lanes according to the given index.
+_DEFAULT_FN_ATTRS static __inline__ uint64_t
+__gpu_shuffle_idx_u64(uint64_t __lane_mask, uint32_t __idx, uint64_t __x,
+                      uint32_t __width) {
+  uint32_t __hi = (uint32_t)(__x >> 32ull);
+  uint32_t __lo = (uint32_t)(__x & 0xFFFFFFFF);
+  uint32_t __mask = (uint32_t)__lane_mask;
+  return ((uint64_t)__gpu_shuffle_idx_u32(__mask, __idx, __hi, __width)
+          << 32ull) |
+         ((uint64_t)__gpu_shuffle_idx_u32(__mask, __idx, __lo, __width));
+}
+
 // Shuffles the the lanes according to the given index.
 _DEFAULT_FN_ATTRS static __inline__ float
 __gpu_shuffle_idx_f32(uint64_t __lane_mask, uint32_t __idx, float __x,

diff  --git a/clang/lib/Headers/nvptxintrin.h b/clang/lib/Headers/nvptxintrin.h
index d00a5f6de3950..10ad7a682d4cd 100644
--- a/clang/lib/Headers/nvptxintrin.h
+++ b/clang/lib/Headers/nvptxintrin.h
@@ -13,15 +13,12 @@
 #error "This file is intended for NVPTX targets or offloading to NVPTX"
 #endif
 
-#ifndef __CUDA_ARCH__
-#define __CUDA_ARCH__ 0
+#ifndef __GPUINTRIN_H
+#error "Never use <nvptxintrin.h> directly; include <gpuintrin.h> instead"
 #endif
 
-#include <stdint.h>
-
-#if !defined(__cplusplus)
-_Pragma("push_macro(\"bool\")");
-#define bool _Bool
+#ifndef __CUDA_ARCH__
+#define __CUDA_ARCH__ 0
 #endif
 
 _Pragma("omp begin declare target device_type(nohost)");
@@ -153,18 +150,6 @@ __gpu_shuffle_idx_u32(uint64_t __lane_mask, uint32_t __idx, uint32_t __x,
                                   ((__gpu_num_lanes() - __width) << 8u) | 0x1f);
 }
 
-// Shuffles the the lanes inside the warp according to the given index.
-_DEFAULT_FN_ATTRS static __inline__ uint64_t
-__gpu_shuffle_idx_u64(uint64_t __lane_mask, uint32_t __idx, uint64_t __x,
-                      uint32_t __width) {
-  uint32_t __hi = (uint32_t)(__x >> 32ull);
-  uint32_t __lo = (uint32_t)(__x & 0xFFFFFFFF);
-  uint32_t __mask = (uint32_t)__lane_mask;
-  return ((uint64_t)__gpu_shuffle_idx_u32(__mask, __idx, __hi, __width)
-          << 32ull) |
-         ((uint64_t)__gpu_shuffle_idx_u32(__mask, __idx, __lo, __width));
-}
-
 // Returns a bitmask marking all lanes that have the same value of __x.
 _DEFAULT_FN_ATTRS static __inline__ uint64_t
 __gpu_match_any_u32(uint64_t __lane_mask, uint32_t __x) {
@@ -263,8 +248,4 @@ _DEFAULT_FN_ATTRS static __inline__ void __gpu_thread_suspend(void) {
 _Pragma("omp end declare variant");
 _Pragma("omp end declare target");
 
-#if !defined(__cplusplus)
-_Pragma("pop_macro(\"bool\")");
-#endif
-
 #endif // __NVPTXINTRIN_H