[llvm-branch-commits] Extract SipHash implementation into a header. (PR #134197)

via llvm-branch-commits llvm-branch-commits at lists.llvm.org
Wed Apr 2 21:34:46 PDT 2025


llvmbot wrote:


<!--LLVM PR SUMMARY COMMENT-->

@llvm/pr-subscribers-llvm-support

Author: Peter Collingbourne (pcc)

<details>
<summary>Changes</summary>

This is so that we'll be able to use it in compiler-rt as well.
Dependencies on LLVM Support were removed from the header by restoring
code from the original SipHash implementation.


---
Full diff: https://github.com/llvm/llvm-project/pull/134197.diff


4 Files Affected:

- (modified) llvm/lib/Support/CMakeLists.txt (+5) 
- (modified) llvm/lib/Support/SipHash.cpp (+1-130) 
- (modified) llvm/utils/gn/secondary/llvm/lib/Support/BUILD.gn (+1) 
- (added) third-party/siphash/include/siphash/SipHash.h (+161) 


``````````diff
diff --git a/llvm/lib/Support/CMakeLists.txt b/llvm/lib/Support/CMakeLists.txt
index 2754c97fce6c1..1b18eb7c6346b 100644
--- a/llvm/lib/Support/CMakeLists.txt
+++ b/llvm/lib/Support/CMakeLists.txt
@@ -368,3 +368,8 @@ if(LLVM_WITH_Z3)
     ${Z3_INCLUDE_DIR}
     )
 endif()
+
+target_include_directories(LLVMSupport SYSTEM
+  PRIVATE
+  ${LLVM_THIRD_PARTY_DIR}/siphash/include
+  )
diff --git a/llvm/lib/Support/SipHash.cpp b/llvm/lib/Support/SipHash.cpp
index 68545913a4f59..682e9231c776f 100644
--- a/llvm/lib/Support/SipHash.cpp
+++ b/llvm/lib/Support/SipHash.cpp
@@ -15,9 +15,9 @@
 #include "llvm/ADT/ArrayRef.h"
 #include "llvm/ADT/StringExtras.h"
 #include "llvm/ADT/StringRef.h"
-#include "llvm/Support/Compiler.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/Endian.h"
+#include "siphash/SipHash.h"
 #include <cstdint>
 
 using namespace llvm;
@@ -25,135 +25,6 @@ using namespace support;
 
 #define DEBUG_TYPE "llvm-siphash"
 
-// Lightly adapted from the SipHash reference C implementation:
-//   https://github.com/veorq/SipHash
-// by Jean-Philippe Aumasson and Daniel J. Bernstein
-
-#define ROTL(x, b) (uint64_t)(((x) << (b)) | ((x) >> (64 - (b))))
-
-#define SIPROUND                                                               \
-  do {                                                                         \
-    v0 += v1;                                                                  \
-    v1 = ROTL(v1, 13);                                                         \
-    v1 ^= v0;                                                                  \
-    v0 = ROTL(v0, 32);                                                         \
-    v2 += v3;                                                                  \
-    v3 = ROTL(v3, 16);                                                         \
-    v3 ^= v2;                                                                  \
-    v0 += v3;                                                                  \
-    v3 = ROTL(v3, 21);                                                         \
-    v3 ^= v0;                                                                  \
-    v2 += v1;                                                                  \
-    v1 = ROTL(v1, 17);                                                         \
-    v1 ^= v2;                                                                  \
-    v2 = ROTL(v2, 32);                                                         \
-  } while (0)
-
-namespace {
-
-/// Computes a SipHash value
-///
-/// \param in: pointer to input data (read-only)
-/// \param inlen: input data length in bytes (any size_t value)
-/// \param k: reference to the key data 16-byte array (read-only)
-/// \returns output data, must be 8 or 16 bytes
-///
-template <int cROUNDS, int dROUNDS, size_t outlen>
-void siphash(const unsigned char *in, uint64_t inlen,
-             const unsigned char (&k)[16], unsigned char (&out)[outlen]) {
-
-  const unsigned char *ni = (const unsigned char *)in;
-  const unsigned char *kk = (const unsigned char *)k;
-
-  static_assert(outlen == 8 || outlen == 16, "result should be 8 or 16 bytes");
-
-  uint64_t v0 = UINT64_C(0x736f6d6570736575);
-  uint64_t v1 = UINT64_C(0x646f72616e646f6d);
-  uint64_t v2 = UINT64_C(0x6c7967656e657261);
-  uint64_t v3 = UINT64_C(0x7465646279746573);
-  uint64_t k0 = endian::read64le(kk);
-  uint64_t k1 = endian::read64le(kk + 8);
-  uint64_t m;
-  int i;
-  const unsigned char *end = ni + inlen - (inlen % sizeof(uint64_t));
-  const int left = inlen & 7;
-  uint64_t b = ((uint64_t)inlen) << 56;
-  v3 ^= k1;
-  v2 ^= k0;
-  v1 ^= k1;
-  v0 ^= k0;
-
-  if (outlen == 16)
-    v1 ^= 0xee;
-
-  for (; ni != end; ni += 8) {
-    m = endian::read64le(ni);
-    v3 ^= m;
-
-    for (i = 0; i < cROUNDS; ++i)
-      SIPROUND;
-
-    v0 ^= m;
-  }
-
-  switch (left) {
-  case 7:
-    b |= ((uint64_t)ni[6]) << 48;
-    LLVM_FALLTHROUGH;
-  case 6:
-    b |= ((uint64_t)ni[5]) << 40;
-    LLVM_FALLTHROUGH;
-  case 5:
-    b |= ((uint64_t)ni[4]) << 32;
-    LLVM_FALLTHROUGH;
-  case 4:
-    b |= ((uint64_t)ni[3]) << 24;
-    LLVM_FALLTHROUGH;
-  case 3:
-    b |= ((uint64_t)ni[2]) << 16;
-    LLVM_FALLTHROUGH;
-  case 2:
-    b |= ((uint64_t)ni[1]) << 8;
-    LLVM_FALLTHROUGH;
-  case 1:
-    b |= ((uint64_t)ni[0]);
-    break;
-  case 0:
-    break;
-  }
-
-  v3 ^= b;
-
-  for (i = 0; i < cROUNDS; ++i)
-    SIPROUND;
-
-  v0 ^= b;
-
-  if (outlen == 16)
-    v2 ^= 0xee;
-  else
-    v2 ^= 0xff;
-
-  for (i = 0; i < dROUNDS; ++i)
-    SIPROUND;
-
-  b = v0 ^ v1 ^ v2 ^ v3;
-  endian::write64le(out, b);
-
-  if (outlen == 8)
-    return;
-
-  v1 ^= 0xdd;
-
-  for (i = 0; i < dROUNDS; ++i)
-    SIPROUND;
-
-  b = v0 ^ v1 ^ v2 ^ v3;
-  endian::write64le(out + 8, b);
-}
-
-} // end anonymous namespace
-
 void llvm::getSipHash_2_4_64(ArrayRef<uint8_t> In, const uint8_t (&K)[16],
                              uint8_t (&Out)[8]) {
   siphash<2, 4>(In.data(), In.size(), K, Out);
diff --git a/llvm/utils/gn/secondary/llvm/lib/Support/BUILD.gn b/llvm/utils/gn/secondary/llvm/lib/Support/BUILD.gn
index fe7ff6ef68f99..5461ed5246b85 100644
--- a/llvm/utils/gn/secondary/llvm/lib/Support/BUILD.gn
+++ b/llvm/utils/gn/secondary/llvm/lib/Support/BUILD.gn
@@ -31,6 +31,7 @@ static_library("Support") {
   include_dirs = [
     "Unix",
     "Windows",
+    "//third-party/siphash/include",
   ]
   sources = [
     "AArch64AttributeParser.cpp",
diff --git a/third-party/siphash/include/siphash/SipHash.h b/third-party/siphash/include/siphash/SipHash.h
new file mode 100644
index 0000000000000..9653e9428b123
--- /dev/null
+++ b/third-party/siphash/include/siphash/SipHash.h
@@ -0,0 +1,161 @@
+//===--- SipHash.h - An implementation of SipHash -------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+//  This is a header-only implementation of SipHash. It lacks library
+//  dependencies so it can be used from LLVM and compiler-rt.
+//
+//===----------------------------------------------------------------------===//
+
+#include <stddef.h>
+#include <stdint.h>
+
+// Lightly adapted from the SipHash reference C implementation:
+//   https://github.com/veorq/SipHash
+// by Jean-Philippe Aumasson and Daniel J. Bernstein
+
+#define ROTL(x, b) (uint64_t)(((x) << (b)) | ((x) >> (64 - (b))))
+
+#define U32TO8_LE(p, v)                                                        \
+    (p)[0] = (uint8_t)((v));                                                   \
+    (p)[1] = (uint8_t)((v) >> 8);                                              \
+    (p)[2] = (uint8_t)((v) >> 16);                                             \
+    (p)[3] = (uint8_t)((v) >> 24);
+
+#define U64TO8_LE(p, v)                                                        \
+    U32TO8_LE((p), (uint32_t)((v)));                                           \
+    U32TO8_LE((p) + 4, (uint32_t)((v) >> 32));
+
+#define U8TO64_LE(p)                                                           \
+    (((uint64_t)((p)[0])) | ((uint64_t)((p)[1]) << 8) |                        \
+     ((uint64_t)((p)[2]) << 16) | ((uint64_t)((p)[3]) << 24) |                 \
+     ((uint64_t)((p)[4]) << 32) | ((uint64_t)((p)[5]) << 40) |                 \
+     ((uint64_t)((p)[6]) << 48) | ((uint64_t)((p)[7]) << 56))
+
+#define SIPROUND                                                               \
+  do {                                                                         \
+    v0 += v1;                                                                  \
+    v1 = ROTL(v1, 13);                                                         \
+    v1 ^= v0;                                                                  \
+    v0 = ROTL(v0, 32);                                                         \
+    v2 += v3;                                                                  \
+    v3 = ROTL(v3, 16);                                                         \
+    v3 ^= v2;                                                                  \
+    v0 += v3;                                                                  \
+    v3 = ROTL(v3, 21);                                                         \
+    v3 ^= v0;                                                                  \
+    v2 += v1;                                                                  \
+    v1 = ROTL(v1, 17);                                                         \
+    v1 ^= v2;                                                                  \
+    v2 = ROTL(v2, 32);                                                         \
+  } while (0)
+
+namespace {
+
+/// Computes a SipHash value
+///
+/// \param in: pointer to input data (read-only)
+/// \param inlen: input data length in bytes (any size_t value)
+/// \param k: reference to the key data 16-byte array (read-only)
+/// \returns output data, must be 8 or 16 bytes
+///
+template <int cROUNDS, int dROUNDS, size_t outlen>
+void siphash(const unsigned char *in, uint64_t inlen,
+             const unsigned char (&k)[16], unsigned char (&out)[outlen]) {
+
+  const unsigned char *ni = (const unsigned char *)in;
+  const unsigned char *kk = (const unsigned char *)k;
+
+  static_assert(outlen == 8 || outlen == 16, "result should be 8 or 16 bytes");
+
+  uint64_t v0 = UINT64_C(0x736f6d6570736575);
+  uint64_t v1 = UINT64_C(0x646f72616e646f6d);
+  uint64_t v2 = UINT64_C(0x6c7967656e657261);
+  uint64_t v3 = UINT64_C(0x7465646279746573);
+  uint64_t k0 = U8TO64_LE(kk);
+  uint64_t k1 = U8TO64_LE(kk + 8);
+  uint64_t m;
+  int i;
+  const unsigned char *end = ni + inlen - (inlen % sizeof(uint64_t));
+  const int left = inlen & 7;
+  uint64_t b = ((uint64_t)inlen) << 56;
+  v3 ^= k1;
+  v2 ^= k0;
+  v1 ^= k1;
+  v0 ^= k0;
+
+  if (outlen == 16)
+    v1 ^= 0xee;
+
+  for (; ni != end; ni += 8) {
+    m = U8TO64_LE(ni);
+    v3 ^= m;
+
+    for (i = 0; i < cROUNDS; ++i)
+      SIPROUND;
+
+    v0 ^= m;
+  }
+
+  switch (left) {
+  case 7:
+    b |= ((uint64_t)ni[6]) << 48;
+    /* FALLTHRU */
+  case 6:
+    b |= ((uint64_t)ni[5]) << 40;
+    /* FALLTHRU */
+  case 5:
+    b |= ((uint64_t)ni[4]) << 32;
+    /* FALLTHRU */
+  case 4:
+    b |= ((uint64_t)ni[3]) << 24;
+    /* FALLTHRU */
+  case 3:
+    b |= ((uint64_t)ni[2]) << 16;
+    /* FALLTHRU */
+  case 2:
+    b |= ((uint64_t)ni[1]) << 8;
+    /* FALLTHRU */
+  case 1:
+    b |= ((uint64_t)ni[0]);
+    /* FALLTHRU */
+    break;
+  case 0:
+    break;
+  }
+
+  v3 ^= b;
+
+  for (i = 0; i < cROUNDS; ++i)
+    SIPROUND;
+
+  v0 ^= b;
+
+  if (outlen == 16)
+    v2 ^= 0xee;
+  else
+    v2 ^= 0xff;
+
+  for (i = 0; i < dROUNDS; ++i)
+    SIPROUND;
+
+  b = v0 ^ v1 ^ v2 ^ v3;
+  U64TO8_LE(out, b);
+
+  if (outlen == 8)
+    return;
+
+  v1 ^= 0xdd;
+
+  for (i = 0; i < dROUNDS; ++i)
+    SIPROUND;
+
+  b = v0 ^ v1 ^ v2 ^ v3;
+  U64TO8_LE(out + 8, b);
+}
+
+} // end anonymous namespace

``````````

</details>


https://github.com/llvm/llvm-project/pull/134197


More information about the llvm-branch-commits mailing list