[libc-commits] [clang] [libc] [Clang] Add more scan / reduce operations to 'gpuintrin.h' (PR #185525)

Joseph Huber via libc-commits libc-commits at lists.llvm.org
Tue Mar 10 05:50:25 PDT 2026


================
@@ -245,11 +245,44 @@ __gpu_shuffle_idx_f64(uint64_t __lane_mask, uint32_t __idx, double __x,
         __lane_mask,                                                           \
         __gpu_suffix_scan_##__prefix##_##__suffix(__lane_mask, __x));          \
   }
-__DO_LANE_OP(uint32_t, +, 0, sum, u32);
-__DO_LANE_OP(uint64_t, +, 0, sum, u64);
-__DO_LANE_OP(float, +, 0, sum, f32);
-__DO_LANE_OP(double, +, 0, sum, f64);
-#undef __DO_LANE_OP
+
+#define __GPU_OP(__x, __y) ((__x) + (__y))
+__DO_LANE_OPS(uint32_t, __GPU_OP, 0, sum, u32);
+__DO_LANE_OPS(uint64_t, __GPU_OP, 0, sum, u64);
+__DO_LANE_OPS(float, __GPU_OP, 0, sum, f32);
+__DO_LANE_OPS(double, __GPU_OP, 0, sum, f64);
+#undef __GPU_OP
+
+#define __GPU_OP(__x, __y) ((__x) & (__y))
+__DO_LANE_OPS(uint32_t, __GPU_OP, UINT32_MAX, and, u32);
+__DO_LANE_OPS(uint64_t, __GPU_OP, UINT64_MAX, and, u64);
+#undef __GPU_OP
+
+#define __GPU_OP(__x, __y) ((__x) | (__y))
+__DO_LANE_OPS(uint32_t, __GPU_OP, 0, or, u32);
+__DO_LANE_OPS(uint64_t, __GPU_OP, 0, or, u64);
+#undef __GPU_OP
+
+#define __GPU_OP(__x, __y) ((__x) ^ (__y))
+__DO_LANE_OPS(uint32_t, __GPU_OP, 0, xor, u32);
+__DO_LANE_OPS(uint64_t, __GPU_OP, 0, xor, u64);
+#undef __GPU_OP
+
+#define __GPU_OP(__x, __y) ((__x) < (__y) ? (__x) : (__y))
+__DO_LANE_OPS(uint32_t, __GPU_OP, UINT32_MAX, min, u32);
+__DO_LANE_OPS(uint64_t, __GPU_OP, UINT64_MAX, min, u64);
+__DO_LANE_OPS(float, __GPU_OP, __builtin_inff(), min, f32);
+__DO_LANE_OPS(double, __GPU_OP, __builtin_inf(), min, f64);
+#undef __GPU_OP
+
+#define __GPU_OP(__x, __y) ((__x) > (__y) ? (__x) : (__y))
----------------
jhuber6 wrote:

For some reason I forgot that `__builtin_fmin` exists, fixed.

https://github.com/llvm/llvm-project/pull/185525


More information about the libc-commits mailing list