[llvm-branch-commits] [llvm] [Support] Add SipHash-based 16/64-bit ptrauth stable hash. (PR #93902)

Ahmed Bougacha via llvm-branch-commits llvm-branch-commits at lists.llvm.org
Tue Jun 4 15:25:46 PDT 2024


================
@@ -0,0 +1,174 @@
+//===--- StableHash.cpp - An ABI-stable string hash -----------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+//  This file implements an ABI-stable string hash based on SipHash, used to
+//  compute ptrauth discriminators.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Support/SipHash.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/Support/Debug.h"
+#include <cstdint>
+#include <cstring>
+
+using namespace llvm;
+
+#define DEBUG_TYPE "llvm-siphash"
+
+//  Lightly adapted from the SipHash reference C implementation by
+//  Jean-Philippe Aumasson and Daniel J. Bernstein.
+
+#define SIPHASH_ROTL(x, b) (uint64_t)(((x) << (b)) | ((x) >> (64 - (b))))
+
+#define SIPHASH_U8TO64_LE(p)                                                   \
+  (((uint64_t)((p)[0])) | ((uint64_t)((p)[1]) << 8) |                          \
+   ((uint64_t)((p)[2]) << 16) | ((uint64_t)((p)[3]) << 24) |                   \
+   ((uint64_t)((p)[4]) << 32) | ((uint64_t)((p)[5]) << 40) |                   \
+   ((uint64_t)((p)[6]) << 48) | ((uint64_t)((p)[7]) << 56))
+
+#define SIPHASH_SIPROUND                                                       \
+  do {                                                                         \
+    v0 += v1;                                                                  \
+    v1 = SIPHASH_ROTL(v1, 13);                                                 \
+    v1 ^= v0;                                                                  \
+    v0 = SIPHASH_ROTL(v0, 32);                                                 \
+    v2 += v3;                                                                  \
+    v3 = SIPHASH_ROTL(v3, 16);                                                 \
+    v3 ^= v2;                                                                  \
+    v0 += v3;                                                                  \
+    v3 = SIPHASH_ROTL(v3, 21);                                                 \
+    v3 ^= v0;                                                                  \
+    v2 += v1;                                                                  \
+    v1 = SIPHASH_ROTL(v1, 17);                                                 \
+    v1 ^= v2;                                                                  \
+    v2 = SIPHASH_ROTL(v2, 32);                                                 \
+  } while (0)
+
+template <int cROUNDS, int dROUNDS, class ResultTy>
+static inline ResultTy siphash(const uint8_t *in, uint64_t inlen,
+                               const uint8_t (&k)[16]) {
+  static_assert(sizeof(ResultTy) == 8 || sizeof(ResultTy) == 16,
+                "result type should be uint64_t or uint128_t");
+  uint64_t v0 = 0x736f6d6570736575ULL;
+  uint64_t v1 = 0x646f72616e646f6dULL;
+  uint64_t v2 = 0x6c7967656e657261ULL;
+  uint64_t v3 = 0x7465646279746573ULL;
+  uint64_t b;
+  uint64_t k0 = SIPHASH_U8TO64_LE(k);
+  uint64_t k1 = SIPHASH_U8TO64_LE(k + 8);
+  uint64_t m;
+  int i;
+  const uint8_t *end = in + inlen - (inlen % sizeof(uint64_t));
+  const int left = inlen & 7;
+  b = ((uint64_t)inlen) << 56;
+  v3 ^= k1;
+  v2 ^= k0;
+  v1 ^= k1;
+  v0 ^= k0;
+
+  if (sizeof(ResultTy) == 16) {
+    v1 ^= 0xee;
+  }
+
+  for (; in != end; in += 8) {
+    m = SIPHASH_U8TO64_LE(in);
+    v3 ^= m;
+
+    for (i = 0; i < cROUNDS; ++i)
+      SIPHASH_SIPROUND;
+
+    v0 ^= m;
+  }
+
+  switch (left) {
+  case 7:
+    b |= ((uint64_t)in[6]) << 48;
+    LLVM_FALLTHROUGH;
+  case 6:
+    b |= ((uint64_t)in[5]) << 40;
+    LLVM_FALLTHROUGH;
+  case 5:
+    b |= ((uint64_t)in[4]) << 32;
+    LLVM_FALLTHROUGH;
+  case 4:
+    b |= ((uint64_t)in[3]) << 24;
+    LLVM_FALLTHROUGH;
+  case 3:
+    b |= ((uint64_t)in[2]) << 16;
+    LLVM_FALLTHROUGH;
+  case 2:
+    b |= ((uint64_t)in[1]) << 8;
+    LLVM_FALLTHROUGH;
+  case 1:
+    b |= ((uint64_t)in[0]);
+    break;
+  case 0:
+    break;
+  }
+
+  v3 ^= b;
+
+  for (i = 0; i < cROUNDS; ++i)
+    SIPHASH_SIPROUND;
+
+  v0 ^= b;
+
+  if (sizeof(ResultTy) == 8) {
+    v2 ^= 0xff;
+  } else {
+    v2 ^= 0xee;
+  }
+
+  for (i = 0; i < dROUNDS; ++i)
+    SIPHASH_SIPROUND;
+
+  b = v0 ^ v1 ^ v2 ^ v3;
+
+  // This mess with the result type would be easier with 'if constexpr'.
+
+  uint64_t firstHalf = b;
+  if (sizeof(ResultTy) == 8)
+    return firstHalf;
+
+  v1 ^= 0xdd;
+
+  for (i = 0; i < dROUNDS; ++i)
+    SIPHASH_SIPROUND;
+
+  b = v0 ^ v1 ^ v2 ^ v3;
+  uint64_t secondHalf = b;
+
+  return firstHalf | (ResultTy(secondHalf) << (sizeof(ResultTy) == 8 ? 0 : 64));
+}
+
+//===--- LLVM-specific wrappers around siphash.
+
+/// Compute an ABI-stable 64-bit hash of the given string.
+uint64_t llvm::getPointerAuthStableSipHash64(StringRef Str) {
+  static const uint8_t K[16] = {0xb5, 0xd4, 0xc9, 0xeb, 0x79, 0x10, 0x4a, 0x79,
+                                0x6f, 0xec, 0x8b, 0x1b, 0x42, 0x87, 0x81, 0xd4};
+
+  // The aliasing is fine here because of omnipotent char.
+  auto *Data = reinterpret_cast<const uint8_t *>(Str.data());
+  return siphash<2, 4, uint64_t>(Data, Str.size(), K);
+}
+
+/// Compute an ABI-stable 16-bit hash of the given string.
+uint64_t llvm::getPointerAuthStableSipHash16(StringRef Str) {
+  uint64_t RawHash = getPointerAuthStableSipHash64(Str);
+
+  // Produce a non-zero 16-bit discriminator.
+  uint64_t Discriminator = (RawHash % 0xFFFF) + 1;
----------------
ahmedbougacha wrote:

> I'm sure that such scheme is already used in downstream for a long time

I'll just mention that it's not a matter of downstream:  it's the platform ABI for arm64e.

> I suppose that it might be OK, it's just not very consistent with 64-bit hash computation since we do not try to avoid zero value there. I get the point that the chance of having zero 64-bit hash value is very low compared to 16-bit though.

Right, the 16-bit and 64-bit hashes aren't used in equivalent ways.  The 16-bit result is used in discriminator contexts, where the overwhelming majority of discriminators are 0 (and this is even enshrined in the ISA with the LDRAA/LDRAB instructions.)  This current patch series doesn't use the 64-bit variant, so I removed it altogether, but you could imagine using it in more interesting ways, e.g., as a way to seed a (not-really-but-almost) 64-bit PACGA MAC chain.  In such cases a collision with the default 0 discriminator value isn't a concern.

https://github.com/llvm/llvm-project/pull/93902


More information about the llvm-branch-commits mailing list