[libc-commits] [libc] [libc] Add generic cpp::byteswap to CPP/bit.h (PR #196274)

Thu May 7 03:33:55 PDT 2026

https://github.com/kaladron created https://github.com/llvm/llvm-project/pull/196274

Added a constexpr byteswap template using recursive half-swap decomposition. Accepts all integral types, matching C++23 std::byteswap semantics. Signed types delegate to the unsigned path via static_cast, which the compiler elides entirely.

A single recursive template handles all sizes from 8 to 128 bits with no per-width specialisations or builtin fallbacks needed. Produces optimal bswap/rolw instructions on Clang at -O2. A static_assert rejects types larger than 128 bits.

Refactored endian_internal.h to call cpp::byteswap directly, replacing the explicit template specialisations and builtin dispatch.

Assisted-by: Automated tooling, human reviewed.

>From e8bb8d3d782c5d4fb59f343f4eb35e4c8e2ff574 Mon Sep 17 00:00:00 2001
From: Jeff Bailey <jbailey at raspberryginger.com>
Date: Thu, 7 May 2026 11:25:40 +0100
Subject: [PATCH] [libc] Add generic cpp::byteswap to CPP/bit.h

Added a constexpr byteswap template using recursive half-swap
decomposition. Accepts all integral types, matching C++23
std::byteswap semantics. Signed types delegate to the unsigned
path via static_cast, which the compiler elides entirely.

A single recursive template handles all sizes from 8 to 128 bits
with no per-width specialisations or builtin fallbacks needed.
Produces optimal bswap/rolw instructions on Clang at -O2.
A static_assert rejects types larger than 128 bits.

Refactored endian_internal.h to call cpp::byteswap directly,
replacing the explicit template specialisations and builtin
dispatch.

Assisted-by: Automated tooling, human reviewed.
---
 libc/src/__support/CMakeLists.txt        |  1 +
 libc/src/__support/CPP/bit.h             | 32 +++++++++++++-
 libc/src/__support/endian_internal.h     | 45 +++-----------------
 libc/test/src/__support/CPP/bit_test.cpp | 54 ++++++++++++++++++++++++
 4 files changed, 93 insertions(+), 39 deletions(-)

diff --git a/libc/src/__support/CMakeLists.txt b/libc/src/__support/CMakeLists.txt
index 6eab12ec8b637..098fb6ef86936 100644
--- a/libc/src/__support/CMakeLists.txt
+++ b/libc/src/__support/CMakeLists.txt
@@ -103,6 +103,7 @@ add_header_library(
     macros/config.h
   DEPENDS
     libc.hdr.stdint_proxy
+    libc.src.__support.CPP.bit
 )
 
 add_header_library(
diff --git a/libc/src/__support/CPP/bit.h b/libc/src/__support/CPP/bit.h
index 0ba8b9219a317..3dfa81017472d 100644
--- a/libc/src/__support/CPP/bit.h
+++ b/libc/src/__support/CPP/bit.h
@@ -6,7 +6,7 @@
 //
 //===----------------------------------------------------------------------===//
 // This is inspired by LLVM ADT/bit.h header.
-// Some functions are missing, we can add them as needed (popcount, byteswap).
+// Some functions are missing, we can add them as needed.
 
 #ifndef LLVM_LIBC_SRC___SUPPORT_CPP_BIT_H
 #define LLVM_LIBC_SRC___SUPPORT_CPP_BIT_H
@@ -330,6 +330,36 @@ ADD_SPECIALIZATION(unsigned long long, __builtin_popcountll)
 #endif // __builtin_popcountg
 #undef ADD_SPECIALIZATION
 
+/// Reverses the bytes in the given integer value.
+///
+/// All integral types are allowed, matching C++23 std::byteswap semantics.
+/// Signed types delegate to the unsigned path via static_cast.
+///
+/// The recursive decomposition generates optimal 'bswap' or 'rolw'
+/// instructions on Clang at -O2 without requiring compiler intrinsics.
+template <typename T>
+[[nodiscard]] LIBC_INLINE constexpr cpp::enable_if_t<cpp::is_integral_v<T>, T>
+byteswap(T value) {
+  static_assert(sizeof(T) <= 16, "byteswap: unsupported type size");
+  if constexpr (!cpp::is_unsigned_v<T>) {
+    using U = cpp::make_unsigned_t<T>;
+    return static_cast<T>(byteswap(static_cast<U>(value)));
+  } else if constexpr (sizeof(T) == 1) {
+    return value;
+  } else {
+    constexpr unsigned half_bits = sizeof(T) * 8 / 2;
+    using Half = cpp::conditional_t<
+        sizeof(T) == 2, uint8_t,
+        cpp::conditional_t<
+            sizeof(T) == 4, uint16_t,
+            cpp::conditional_t<sizeof(T) == 8, uint32_t, uint64_t>>>;
+    Half lo = static_cast<Half>(value);
+    Half hi = static_cast<Half>(value >> half_bits);
+    return static_cast<T>((static_cast<T>(byteswap(lo)) << half_bits) |
+                          static_cast<T>(byteswap(hi)));
+  }
+}
+
 } // namespace cpp
 } // namespace LIBC_NAMESPACE_DECL
 
diff --git a/libc/src/__support/endian_internal.h b/libc/src/__support/endian_internal.h
index 07cde7b905c4d..8d81329fd243d 100644
--- a/libc/src/__support/endian_internal.h
+++ b/libc/src/__support/endian_internal.h
@@ -10,6 +10,7 @@
 #define LLVM_LIBC_SRC___SUPPORT_ENDIAN_INTERNAL_H
 
 #include "hdr/stdint_proxy.h"
+#include "src/__support/CPP/bit.h"
 #include "src/__support/common.h"
 #include "src/__support/macros/config.h"
 
@@ -29,38 +30,6 @@ namespace LIBC_NAMESPACE_DECL {
 
 namespace internal {
 
-template <typename T> LIBC_INLINE T byte_swap(T value);
-
-template <> LIBC_INLINE uint16_t byte_swap<uint16_t>(uint16_t value) {
-#if __has_builtin(__builtin_bswap16)
-  return __builtin_bswap16(value);
-#else
-  return (value << 8) | (value >> 8);
-#endif // __builtin_bswap16
-}
-
-template <> LIBC_INLINE uint32_t byte_swap<uint32_t>(uint32_t value) {
-#if __has_builtin(__builtin_bswap32)
-  return __builtin_bswap32(value);
-#else
-  return byte_swap<uint16_t>(static_cast<uint16_t>(value >> 16)) ||
-         (static_cast<uint32_t>(
-              byte_swap<uint16_t>(static_cast<uint16_t>(value)))
-          << 16);
-#endif // __builtin_bswap64
-}
-
-template <> LIBC_INLINE uint64_t byte_swap<uint64_t>(uint64_t value) {
-#if __has_builtin(__builtin_bswap64)
-  return __builtin_bswap64(value);
-#else
-  return byte_swap<uint32_t>(static_cast<uint32_t>(value >> 32)) ||
-         (static_cast<uint64_t>(
-              byte_swap<uint32_t>(static_cast<uint32_t>(value)))
-          << 32);
-#endif // __builtin_bswap64
-}
-
 // Converts uint8_t, uint16_t, uint32_t, uint64_t to its big or little endian
 // counterpart.
 // We use explicit template specialization:
@@ -91,7 +60,7 @@ template <>
 template <>
 LIBC_INLINE uint16_t
 Endian<__ORDER_LITTLE_ENDIAN__>::to_big_endian<uint16_t>(uint16_t v) {
-  return byte_swap<uint16_t>(v);
+  return cpp::byteswap(v);
 }
 template <>
 template <>
@@ -103,7 +72,7 @@ template <>
 template <>
 LIBC_INLINE uint32_t
 Endian<__ORDER_LITTLE_ENDIAN__>::to_big_endian<uint32_t>(uint32_t v) {
-  return byte_swap<uint32_t>(v);
+  return cpp::byteswap(v);
 }
 template <>
 template <>
@@ -115,7 +84,7 @@ template <>
 template <>
 LIBC_INLINE uint64_t
 Endian<__ORDER_LITTLE_ENDIAN__>::to_big_endian<uint64_t>(uint64_t v) {
-  return byte_swap<uint64_t>(v);
+  return cpp::byteswap(v);
 }
 template <>
 template <>
@@ -147,7 +116,7 @@ template <>
 template <>
 LIBC_INLINE uint16_t
 Endian<__ORDER_BIG_ENDIAN__>::to_little_endian<uint16_t>(uint16_t v) {
-  return byte_swap<uint16_t>(v);
+  return cpp::byteswap(v);
 }
 template <>
 template <>
@@ -159,7 +128,7 @@ template <>
 template <>
 LIBC_INLINE uint32_t
 Endian<__ORDER_BIG_ENDIAN__>::to_little_endian<uint32_t>(uint32_t v) {
-  return byte_swap<uint32_t>(v);
+  return cpp::byteswap(v);
 }
 template <>
 template <>
@@ -171,7 +140,7 @@ template <>
 template <>
 LIBC_INLINE uint64_t
 Endian<__ORDER_BIG_ENDIAN__>::to_little_endian<uint64_t>(uint64_t v) {
-  return byte_swap<uint64_t>(v);
+  return cpp::byteswap(v);
 }
 
 } // namespace internal
diff --git a/libc/test/src/__support/CPP/bit_test.cpp b/libc/test/src/__support/CPP/bit_test.cpp
index 891e693e0c953..bf22abe5b7d4a 100644
--- a/libc/test/src/__support/CPP/bit_test.cpp
+++ b/libc/test/src/__support/CPP/bit_test.cpp
@@ -233,5 +233,59 @@ TYPED_TEST(LlvmLibcBitTest, CountOnes, UnsignedTypes) {
         cpp::numeric_limits<T>::digits - i);
 }
 
+TEST(LlvmLibcBitTest, Byteswap) {
+  // 8-bit: identity
+  EXPECT_EQ(byteswap(uint8_t(0x00)), uint8_t(0x00));
+  EXPECT_EQ(byteswap(uint8_t(0xAB)), uint8_t(0xAB));
+  EXPECT_EQ(byteswap(uint8_t(0xFF)), uint8_t(0xFF));
+
+  // 16-bit
+  EXPECT_EQ(byteswap(uint16_t(0x0000)), uint16_t(0x0000));
+  EXPECT_EQ(byteswap(uint16_t(0x1234)), uint16_t(0x3412));
+  EXPECT_EQ(byteswap(uint16_t(0xAABB)), uint16_t(0xBBAA));
+  EXPECT_EQ(byteswap(uint16_t(0xFFFF)), uint16_t(0xFFFF));
+
+  // 32-bit
+  EXPECT_EQ(byteswap(uint32_t(0x00000000)), uint32_t(0x00000000));
+  EXPECT_EQ(byteswap(uint32_t(0x12345678)), uint32_t(0x78563412));
+  EXPECT_EQ(byteswap(uint32_t(0xDEADBEEF)), uint32_t(0xEFBEADDE));
+  EXPECT_EQ(byteswap(uint32_t(0xFFFFFFFF)), uint32_t(0xFFFFFFFF));
+
+  // 64-bit
+  EXPECT_EQ(byteswap(uint64_t(0x0000000000000000)),
+            uint64_t(0x0000000000000000));
+  EXPECT_EQ(byteswap(uint64_t(0x0123456789ABCDEF)),
+            uint64_t(0xEFCDAB8967452301));
+  EXPECT_EQ(byteswap(uint64_t(0xFFFFFFFFFFFFFFFF)),
+            uint64_t(0xFFFFFFFFFFFFFFFF));
+}
+
+TEST(LlvmLibcBitTest, ByteswapSigned) {
+  // Signed 16-bit
+  EXPECT_EQ(byteswap(int16_t(0x1234)), int16_t(0x3412));
+
+  // Signed 32-bit
+  EXPECT_EQ(byteswap(int32_t(0x12345678)), int32_t(0x78563412));
+
+  // Signed 64-bit
+  EXPECT_EQ(byteswap(int64_t(0x0123456789ABCDEF)), int64_t(0xEFCDAB8967452301));
+}
+
+using ByteswapTypes = testing::TypeList<
+#if defined(LIBC_TYPES_HAS_INT128)
+    __uint128_t,
+#endif
+    unsigned char, unsigned short, unsigned int, unsigned long,
+    unsigned long long, signed char, short, int, long, long long>;
+
+TYPED_TEST(LlvmLibcBitTest, ByteswapInvolution, ByteswapTypes) {
+  // Byteswap is its own inverse: byteswap(byteswap(x)) == x.
+  EXPECT_EQ(byteswap(byteswap(T(0))), T(0));
+  EXPECT_EQ(byteswap(byteswap(T(1))), T(1));
+  EXPECT_EQ(byteswap(byteswap(T(~0))), T(~0));
+  EXPECT_EQ(byteswap(byteswap(T(0x0123456789ABCDEF & T(~0)))),
+            T(0x0123456789ABCDEF & T(~0)));
+}
+
 } // namespace cpp
 } // namespace LIBC_NAMESPACE_DECL