[clang] [llvm] [Support] Add SipHash-based 16/64-bit ptrauth stable hash. (PR #93902)
Daniil Kovalev via cfe-commits
cfe-commits at lists.llvm.org
Mon Jun 3 13:27:00 PDT 2024
================
@@ -0,0 +1,174 @@
+//===--- StableHash.cpp - An ABI-stable string hash -----------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements an ABI-stable string hash based on SipHash, used to
+// compute ptrauth discriminators.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Support/SipHash.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/Support/Debug.h"
+#include <cstdint>
+#include <cstring>
+
+using namespace llvm;
+
+#define DEBUG_TYPE "llvm-siphash"
+
+// Lightly adapted from the SipHash reference C implementation by
+// Jean-Philippe Aumasson and Daniel J. Bernstein.
+
+#define SIPHASH_ROTL(x, b) (uint64_t)(((x) << (b)) | ((x) >> (64 - (b))))
+
+#define SIPHASH_U8TO64_LE(p) \
+ (((uint64_t)((p)[0])) | ((uint64_t)((p)[1]) << 8) | \
+ ((uint64_t)((p)[2]) << 16) | ((uint64_t)((p)[3]) << 24) | \
+ ((uint64_t)((p)[4]) << 32) | ((uint64_t)((p)[5]) << 40) | \
+ ((uint64_t)((p)[6]) << 48) | ((uint64_t)((p)[7]) << 56))
+
+#define SIPHASH_SIPROUND \
+ do { \
+ v0 += v1; \
+ v1 = SIPHASH_ROTL(v1, 13); \
+ v1 ^= v0; \
+ v0 = SIPHASH_ROTL(v0, 32); \
+ v2 += v3; \
+ v3 = SIPHASH_ROTL(v3, 16); \
+ v3 ^= v2; \
+ v0 += v3; \
+ v3 = SIPHASH_ROTL(v3, 21); \
+ v3 ^= v0; \
+ v2 += v1; \
+ v1 = SIPHASH_ROTL(v1, 17); \
+ v1 ^= v2; \
+ v2 = SIPHASH_ROTL(v2, 32); \
+ } while (0)
+
+template <int cROUNDS, int dROUNDS, class ResultTy>
+static inline ResultTy siphash(const uint8_t *in, uint64_t inlen,
+ const uint8_t (&k)[16]) {
+ static_assert(sizeof(ResultTy) == 8 || sizeof(ResultTy) == 16,
+ "result type should be uint64_t or uint128_t");
+ uint64_t v0 = 0x736f6d6570736575ULL;
+ uint64_t v1 = 0x646f72616e646f6dULL;
+ uint64_t v2 = 0x6c7967656e657261ULL;
+ uint64_t v3 = 0x7465646279746573ULL;
+ uint64_t b;
+ uint64_t k0 = SIPHASH_U8TO64_LE(k);
+ uint64_t k1 = SIPHASH_U8TO64_LE(k + 8);
+ uint64_t m;
+ int i;
+ const uint8_t *end = in + inlen - (inlen % sizeof(uint64_t));
+ const int left = inlen & 7;
+ b = ((uint64_t)inlen) << 56;
+ v3 ^= k1;
+ v2 ^= k0;
+ v1 ^= k1;
+ v0 ^= k0;
+
+ if (sizeof(ResultTy) == 16) {
+ v1 ^= 0xee;
+ }
+
+ for (; in != end; in += 8) {
+ m = SIPHASH_U8TO64_LE(in);
+ v3 ^= m;
+
+ for (i = 0; i < cROUNDS; ++i)
+ SIPHASH_SIPROUND;
+
+ v0 ^= m;
+ }
+
+ switch (left) {
+ case 7:
+ b |= ((uint64_t)in[6]) << 48;
+ LLVM_FALLTHROUGH;
+ case 6:
+ b |= ((uint64_t)in[5]) << 40;
+ LLVM_FALLTHROUGH;
+ case 5:
+ b |= ((uint64_t)in[4]) << 32;
+ LLVM_FALLTHROUGH;
+ case 4:
+ b |= ((uint64_t)in[3]) << 24;
+ LLVM_FALLTHROUGH;
+ case 3:
+ b |= ((uint64_t)in[2]) << 16;
+ LLVM_FALLTHROUGH;
+ case 2:
+ b |= ((uint64_t)in[1]) << 8;
+ LLVM_FALLTHROUGH;
+ case 1:
+ b |= ((uint64_t)in[0]);
+ break;
+ case 0:
+ break;
+ }
+
+ v3 ^= b;
+
+ for (i = 0; i < cROUNDS; ++i)
+ SIPHASH_SIPROUND;
+
+ v0 ^= b;
+
+ if (sizeof(ResultTy) == 8) {
+ v2 ^= 0xff;
+ } else {
+ v2 ^= 0xee;
+ }
+
+ for (i = 0; i < dROUNDS; ++i)
+ SIPHASH_SIPROUND;
+
+ b = v0 ^ v1 ^ v2 ^ v3;
+
+ // This mess with the result type would be easier with 'if constexpr'.
+
+ uint64_t firstHalf = b;
+ if (sizeof(ResultTy) == 8)
+ return firstHalf;
+
+ v1 ^= 0xdd;
+
+ for (i = 0; i < dROUNDS; ++i)
+ SIPHASH_SIPROUND;
+
+ b = v0 ^ v1 ^ v2 ^ v3;
+ uint64_t secondHalf = b;
+
+ return firstHalf | (ResultTy(secondHalf) << (sizeof(ResultTy) == 8 ? 0 : 64));
+}
+
+//===--- LLVM-specific wrappers around siphash.
+
+/// Compute an ABI-stable 64-bit hash of the given string.
+uint64_t llvm::getPointerAuthStableSipHash64(StringRef Str) {
+ static const uint8_t K[16] = {0xb5, 0xd4, 0xc9, 0xeb, 0x79, 0x10, 0x4a, 0x79,
+ 0x6f, 0xec, 0x8b, 0x1b, 0x42, 0x87, 0x81, 0xd4};
+
+ // The aliasing is fine here because of omnipotent char.
+ auto *Data = reinterpret_cast<const uint8_t *>(Str.data());
+ return siphash<2, 4, uint64_t>(Data, Str.size(), K);
+}
+
+/// Compute an ABI-stable 16-bit hash of the given string.
+uint64_t llvm::getPointerAuthStableSipHash16(StringRef Str) {
+ uint64_t RawHash = getPointerAuthStableSipHash64(Str);
+
+ // Produce a non-zero 16-bit discriminator.
+ uint64_t Discriminator = (RawHash % 0xFFFF) + 1;
----------------
kovdan01 wrote:
I'm sure that such scheme is already used in downstream for a long time and there is a strong point in having non-zero discriminator always when we compute that from a string, but let me mention a potential downside of such approach instead of just doing `uint64_t Discriminator = RawHash % 0x10000;`.
If we assume that 64-bit hash values are distributed uniformly when applying the hash function to an infinite set of all possible strings (this should probably be true for a cryptographically secure hash), non-zero 16-bit values computed as here become non-uniformly distributed:
- 16-bit value 0: 0 64-bit values corresponding
- 16-bit value 1: 281479271743490 64-bit values corresponding
- 16-bit values 2..65535: 281479271743489 64-bit values corresponding
I suppose that it might be OK, it's just not very consistent with 64-bit hash computation since we do not try to avoid zero value there. I get the point that the chance of having zero 64-bit hash value is very low compared to 16-bit though.
The final point: if that was discussed with security researchers, I have no issues with such an implementation ignoring 16-bit zeros. If not - IMHO it's better to talk to security specialists and ask them for a piece of advice.
https://github.com/llvm/llvm-project/pull/93902
More information about the cfe-commits
mailing list