[llvm-branch-commits] [llvm] [Support] Integrate SipHash.cpp into libSupport. (PR #94394)

Tue Jun 11 16:14:00 PDT 2024

================
@@ -1,185 +1,149 @@
-/*
-   SipHash reference C implementation
-
-   Copyright (c) 2012-2022 Jean-Philippe Aumasson
-   <jeanphilippe.aumasson at gmail.com>
-   Copyright (c) 2012-2014 Daniel J. Bernstein <djb at cr.yp.to>
-
-   To the extent possible under law, the author(s) have dedicated all copyright
-   and related and neighboring rights to this software to the public domain
-   worldwide. This software is distributed without any warranty.
-
-   You should have received a copy of the CC0 Public Domain Dedication along
-   with
-   this software. If not, see
-   <http://creativecommons.org/publicdomain/zero/1.0/>.
- */
-
-#include "siphash.h"
-#include <assert.h>
-#include <stddef.h>
-#include <stdint.h>
-
-/* default: SipHash-2-4 */
-#ifndef cROUNDS
-#define cROUNDS 2
-#endif
-#ifndef dROUNDS
-#define dROUNDS 4
-#endif
+//===--- SipHash.cpp - An ABI-stable string hash --------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
 
-#define ROTL(x, b) (uint64_t)(((x) << (b)) | ((x) >> (64 - (b))))
+#include "llvm/Support/Compiler.h"
+#include <cstdint>
 
-#define U32TO8_LE(p, v)                                                        \
-    (p)[0] = (uint8_t)((v));                                                   \
-    (p)[1] = (uint8_t)((v) >> 8);                                              \
-    (p)[2] = (uint8_t)((v) >> 16);                                             \
-    (p)[3] = (uint8_t)((v) >> 24);
+// Lightly adapted from the SipHash reference C implementation:
+//   https://github.com/veorq/SipHash
+// by Jean-Philippe Aumasson and Daniel J. Bernstein
 
-#define U64TO8_LE(p, v)                                                        \
-    U32TO8_LE((p), (uint32_t)((v)));                                           \
-    U32TO8_LE((p) + 4, (uint32_t)((v) >> 32));
+#define ROTL(x, b) (uint64_t)(((x) << (b)) | ((x) >> (64 - (b))))
 
 #define U8TO64_LE(p)                                                           \
-    (((uint64_t)((p)[0])) | ((uint64_t)((p)[1]) << 8) |                        \
-     ((uint64_t)((p)[2]) << 16) | ((uint64_t)((p)[3]) << 24) |                 \
-     ((uint64_t)((p)[4]) << 32) | ((uint64_t)((p)[5]) << 40) |                 \
-     ((uint64_t)((p)[6]) << 48) | ((uint64_t)((p)[7]) << 56))
+  (((uint64_t)((p)[0])) | ((uint64_t)((p)[1]) << 8) |                          \
+   ((uint64_t)((p)[2]) << 16) | ((uint64_t)((p)[3]) << 24) |                   \
+   ((uint64_t)((p)[4]) << 32) | ((uint64_t)((p)[5]) << 40) |                   \
+   ((uint64_t)((p)[6]) << 48) | ((uint64_t)((p)[7]) << 56))
 
 #define SIPROUND                                                               \
-    do {                                                                       \
-        v0 += v1;                                                              \
-        v1 = ROTL(v1, 13);                                                     \
-        v1 ^= v0;                                                              \
-        v0 = ROTL(v0, 32);                                                     \
-        v2 += v3;                                                              \
-        v3 = ROTL(v3, 16);                                                     \
-        v3 ^= v2;                                                              \
-        v0 += v3;                                                              \
-        v3 = ROTL(v3, 21);                                                     \
-        v3 ^= v0;                                                              \
-        v2 += v1;                                                              \
-        v1 = ROTL(v1, 17);                                                     \
-        v1 ^= v2;                                                              \
-        v2 = ROTL(v2, 32);                                                     \
-    } while (0)
-
-#ifdef DEBUG_SIPHASH
-#include <stdio.h>
-
-#define TRACE                                                                  \
-    do {                                                                       \
-        printf("(%3zu) v0 %016" PRIx64 "\n", inlen, v0);                       \
-        printf("(%3zu) v1 %016" PRIx64 "\n", inlen, v1);                       \
-        printf("(%3zu) v2 %016" PRIx64 "\n", inlen, v2);                       \
-        printf("(%3zu) v3 %016" PRIx64 "\n", inlen, v3);                       \
-    } while (0)
-#else
-#define TRACE
-#endif
-
-/*
-    Computes a SipHash value
-    *in: pointer to input data (read-only)
-    inlen: input data length in bytes (any size_t value)
-    *k: pointer to the key data (read-only), must be 16 bytes 
-    *out: pointer to output data (write-only), outlen bytes must be allocated
-    outlen: length of the output in bytes, must be 8 or 16
-*/
-int siphash(const void *in, const size_t inlen, const void *k, uint8_t *out,
-            const size_t outlen) {
-
-    const unsigned char *ni = (const unsigned char *)in;
-    const unsigned char *kk = (const unsigned char *)k;
-
-    assert((outlen == 8) || (outlen == 16));
-    uint64_t v0 = UINT64_C(0x736f6d6570736575);
-    uint64_t v1 = UINT64_C(0x646f72616e646f6d);
-    uint64_t v2 = UINT64_C(0x6c7967656e657261);
-    uint64_t v3 = UINT64_C(0x7465646279746573);
-    uint64_t k0 = U8TO64_LE(kk);
-    uint64_t k1 = U8TO64_LE(kk + 8);
-    uint64_t m;
-    int i;
-    const unsigned char *end = ni + inlen - (inlen % sizeof(uint64_t));
-    const int left = inlen & 7;
-    uint64_t b = ((uint64_t)inlen) << 56;
-    v3 ^= k1;
-    v2 ^= k0;
-    v1 ^= k1;
-    v0 ^= k0;
-
-    if (outlen == 16)
-        v1 ^= 0xee;
-
-    for (; ni != end; ni += 8) {
-        m = U8TO64_LE(ni);
-        v3 ^= m;
-
-        TRACE;
-        for (i = 0; i < cROUNDS; ++i)
-            SIPROUND;
-
-        v0 ^= m;
-    }
-
-    switch (left) {
-    case 7:
-        b |= ((uint64_t)ni[6]) << 48;
-        /* FALLTHRU */
-    case 6:
-        b |= ((uint64_t)ni[5]) << 40;
-        /* FALLTHRU */
-    case 5:
-        b |= ((uint64_t)ni[4]) << 32;
-        /* FALLTHRU */
-    case 4:
-        b |= ((uint64_t)ni[3]) << 24;
-        /* FALLTHRU */
-    case 3:
-        b |= ((uint64_t)ni[2]) << 16;
-        /* FALLTHRU */
-    case 2:
-        b |= ((uint64_t)ni[1]) << 8;
-        /* FALLTHRU */
-    case 1:
-        b |= ((uint64_t)ni[0]);
-        break;
-    case 0:
-        break;
-    }
-
-    v3 ^= b;
-
-    TRACE;
-    for (i = 0; i < cROUNDS; ++i)
-        SIPROUND;
-
-    v0 ^= b;
-
-    if (outlen == 16)
-        v2 ^= 0xee;
-    else
-        v2 ^= 0xff;
-
-    TRACE;
-    for (i = 0; i < dROUNDS; ++i)
-        SIPROUND;
-
-    b = v0 ^ v1 ^ v2 ^ v3;
-    U64TO8_LE(out, b);
+  do {                                                                         \
+    v0 += v1;                                                                  \
+    v1 = ROTL(v1, 13);                                                         \
+    v1 ^= v0;                                                                  \
+    v0 = ROTL(v0, 32);                                                         \
+    v2 += v3;                                                                  \
+    v3 = ROTL(v3, 16);                                                         \
+    v3 ^= v2;                                                                  \
+    v0 += v3;                                                                  \
+    v3 = ROTL(v3, 21);                                                         \
+    v3 ^= v0;                                                                  \
+    v2 += v1;                                                                  \
+    v1 = ROTL(v1, 17);                                                         \
+    v1 ^= v2;                                                                  \
+    v2 = ROTL(v2, 32);                                                         \
+  } while (0)
+
+namespace {
+
+/// Computes a SipHash value
+///
+/// \param in: pointer to input data (read-only)
+/// \param inlen: input data length in bytes (any size_t value)
+/// \param k: reference to the key data 16-byte array (read-only)
+/// \returns output data, must be 8 or 16 bytes
+///
+template <int cROUNDS, int dROUNDS, class ResultTy>
+ResultTy siphash(const unsigned char *in, uint64_t inlen,
+                 const unsigned char (&k)[16]) {
+
+  const unsigned char *ni = (const unsigned char *)in;
+  const unsigned char *kk = (const unsigned char *)k;
+
+  constexpr size_t outlen = sizeof(ResultTy);
+  static_assert(outlen == 8 || outlen == 16,
+                "result type should be uint64_t or uint128_t");
+
+  uint64_t v0 = UINT64_C(0x736f6d6570736575);
+  uint64_t v1 = UINT64_C(0x646f72616e646f6d);
+  uint64_t v2 = UINT64_C(0x6c7967656e657261);
+  uint64_t v3 = UINT64_C(0x7465646279746573);
+  uint64_t k0 = U8TO64_LE(kk);
+  uint64_t k1 = U8TO64_LE(kk + 8);
+  uint64_t m;
+  int i;
+  const unsigned char *end = ni + inlen - (inlen % sizeof(uint64_t));
+  const int left = inlen & 7;
+  uint64_t b = ((uint64_t)inlen) << 56;
+  v3 ^= k1;
+  v2 ^= k0;
+  v1 ^= k1;
+  v0 ^= k0;
+
+  if (outlen == 16)
+    v1 ^= 0xee;
+
+  for (; ni != end; ni += 8) {
+    m = U8TO64_LE(ni);
+    v3 ^= m;
 
-    if (outlen == 8)
-        return 0;
-
-    v1 ^= 0xdd;
-
-    TRACE;
-    for (i = 0; i < dROUNDS; ++i)
-        SIPROUND;
-
-    b = v0 ^ v1 ^ v2 ^ v3;
-    U64TO8_LE(out + 8, b);
-
-    return 0;
+    for (i = 0; i < cROUNDS; ++i)
+      SIPROUND;
+
+    v0 ^= m;
+  }
+
+  switch (left) {
+  case 7:
+    b |= ((uint64_t)ni[6]) << 48;
+    LLVM_FALLTHROUGH;
+  case 6:
+    b |= ((uint64_t)ni[5]) << 40;
+    LLVM_FALLTHROUGH;
+  case 5:
+    b |= ((uint64_t)ni[4]) << 32;
+    LLVM_FALLTHROUGH;
+  case 4:
+    b |= ((uint64_t)ni[3]) << 24;
+    LLVM_FALLTHROUGH;
+  case 3:
+    b |= ((uint64_t)ni[2]) << 16;
+    LLVM_FALLTHROUGH;
+  case 2:
+    b |= ((uint64_t)ni[1]) << 8;
+    LLVM_FALLTHROUGH;
+  case 1:
+    b |= ((uint64_t)ni[0]);
+    break;
+  case 0:
+    break;
+  }
+
+  v3 ^= b;
+
+  for (i = 0; i < cROUNDS; ++i)
+    SIPROUND;
+
+  v0 ^= b;
+
+  if (outlen == 16)
+    v2 ^= 0xee;
+  else
+    v2 ^= 0xff;
+
+  for (i = 0; i < dROUNDS; ++i)
+    SIPROUND;
+
+  b = v0 ^ v1 ^ v2 ^ v3;
+  uint64_t firstHalf = b;
+
+  if (outlen == 8)
+    return firstHalf;
+
+  v1 ^= 0xdd;
+
+  for (i = 0; i < dROUNDS; ++i)
+    SIPROUND;
+
+  b = v0 ^ v1 ^ v2 ^ v3;
+  uint64_t secondHalf = b;
+
+  return firstHalf | (ResultTy(secondHalf) << 64);
----------------
ahmedbougacha wrote:

In the 128-bit result case we'd be exposed to the big-endian host ABI's treatment of uint128_t results, and from what I can find that seems to be quite messy!  [644b473](https://github.com/llvm/llvm-project/pull/94394/commits/644b473fa7d0382b035d4dfd2ea4ed88fcd97233) describes it a little bit more, and brings back the original's returning via an i8 array.  The caller can figure out what it wants to do with the result bytes.

For ptrauth in #93902 I made it `read64le` the result again (and we do define the truncation as based on the little-endian interpretation of the raw hash bytes already.)

https://github.com/llvm/llvm-project/pull/94394