[llvm] [Support] Add SipHash-based 16-bit ptrauth stable hash. (PR #93902)

Ahmed Bougacha via llvm-commits llvm-commits at lists.llvm.org
Fri Jun 14 17:14:10 PDT 2024


https://github.com/ahmedbougacha updated https://github.com/llvm/llvm-project/pull/93902

>From b0a19c3a167ea470e893404a77a90ed0bb968ec4 Mon Sep 17 00:00:00 2001
From: Ahmed Bougacha <ahmed at bougacha.org>
Date: Fri, 14 Jun 2024 17:12:49 -0700
Subject: [PATCH] [Support] Add SipHash-based 16-bit ptrauth stable hash.

This finally wraps the now-lightly-modified SipHash C reference
implementation, for the main interface we need (16-bit ptrauth
discriminators).

The exact algorithm is the little-endian interpretation of the
non-doubled (i.e. 64-bit) result of applying a SipHash-2-4 using the
constant seed `b5d4c9eb79104a796fec8b1b428781d4` (big-endian), with the
result reduced by modulo to the range of non-zero discriminators (i.e.
`(rawHash % 65535) + 1`).

By "stable" we mean that the result of this hash algorithm will the same
across different compiler versions and target platforms.

The 16-bit hashes are used extensively for the AArch64 ptrauth ABI,
because AArch64 can efficiently load a 16-bit immediate into the high
bits of a register without disturbing the remainder of the value, which
serves as a nice blend operation.

16 bits is also sufficiently compact to not inflate a loader relocation.
We disallow zero to guarantee a different discriminator from the places
in the ABI that use a constant zero.

Co-Authored-By: John McCall <rjmccall at apple.com>
---
 llvm/include/llvm/Support/SipHash.h    | 19 ++++++++++++++++
 llvm/lib/Support/SipHash.cpp           | 30 ++++++++++++++++++++++++++
 llvm/unittests/Support/SipHashTest.cpp | 20 ++++++++++++++++-
 3 files changed, 68 insertions(+), 1 deletion(-)

diff --git a/llvm/include/llvm/Support/SipHash.h b/llvm/include/llvm/Support/SipHash.h
index 56dea80ef2e6b..ab05ff38769eb 100644
--- a/llvm/include/llvm/Support/SipHash.h
+++ b/llvm/include/llvm/Support/SipHash.h
@@ -9,6 +9,9 @@
 // An implementation of SipHash, a hash function optimized for speed on
 // short inputs. Based on the SipHash reference implementation.
 //
+// Also provides one specific wrapper on top of SipHash-2-4-64 to compute
+// compute ABI-stable ptrauth discriminators.
+//
 //===----------------------------------------------------------------------===//
 
 #ifndef LLVM_SUPPORT_SIPHASH_H
@@ -19,6 +22,7 @@
 namespace llvm {
 
 template <typename T> class ArrayRef;
+class StringRef;
 
 /// Computes a SipHash-2-4 64-bit result.
 void getSipHash_2_4_64(ArrayRef<uint8_t> In, const uint8_t (&K)[16],
@@ -28,6 +32,21 @@ void getSipHash_2_4_64(ArrayRef<uint8_t> In, const uint8_t (&K)[16],
 void getSipHash_2_4_128(ArrayRef<uint8_t> In, const uint8_t (&K)[16],
                         uint8_t (&Out)[16]);
 
+/// Compute a stable non-zero 16-bit hash of the given string.
+///
+/// The exact algorithm is the little-endian interpretation of the
+/// non-doubled (i.e. 64-bit) result of applying a SipHash-2-4 using
+/// a specific seed value which can be found in the source.
+/// This 64-bit result is truncated to a non-zero 16-bit value.
+///
+/// We use a 16-bit discriminator because ARM64 can efficiently load
+/// a 16-bit immediate into the high bits of a register without disturbing
+/// the remainder of the value, which serves as a nice blend operation.
+/// 16 bits is also sufficiently compact to not inflate a loader relocation.
+/// We disallow zero to guarantee a different discriminator from the places
+/// in the ABI that use a constant zero.
+uint16_t getPointerAuthStableSipHash(StringRef S);
+
 } // end namespace llvm
 
 #endif
diff --git a/llvm/lib/Support/SipHash.cpp b/llvm/lib/Support/SipHash.cpp
index ed1a305f0443f..68545913a4f59 100644
--- a/llvm/lib/Support/SipHash.cpp
+++ b/llvm/lib/Support/SipHash.cpp
@@ -5,16 +5,26 @@
 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
+//
+//  This file implements an ABI-stable string hash based on SipHash, used to
+//  compute ptrauth discriminators.
+//
+//===----------------------------------------------------------------------===//
 
 #include "llvm/Support/SipHash.h"
 #include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/ADT/StringRef.h"
 #include "llvm/Support/Compiler.h"
+#include "llvm/Support/Debug.h"
 #include "llvm/Support/Endian.h"
 #include <cstdint>
 
 using namespace llvm;
 using namespace support;
 
+#define DEBUG_TYPE "llvm-siphash"
+
 // Lightly adapted from the SipHash reference C implementation:
 //   https://github.com/veorq/SipHash
 // by Jean-Philippe Aumasson and Daniel J. Bernstein
@@ -153,3 +163,23 @@ void llvm::getSipHash_2_4_128(ArrayRef<uint8_t> In, const uint8_t (&K)[16],
                               uint8_t (&Out)[16]) {
   siphash<2, 4>(In.data(), In.size(), K, Out);
 }
+
+/// Compute an ABI-stable 16-bit hash of the given string.
+uint16_t llvm::getPointerAuthStableSipHash(StringRef Str) {
+  static const uint8_t K[16] = {0xb5, 0xd4, 0xc9, 0xeb, 0x79, 0x10, 0x4a, 0x79,
+                                0x6f, 0xec, 0x8b, 0x1b, 0x42, 0x87, 0x81, 0xd4};
+
+  uint8_t RawHashBytes[8];
+  getSipHash_2_4_64(arrayRefFromStringRef(Str), K, RawHashBytes);
+  uint64_t RawHash = endian::read64le(RawHashBytes);
+
+  // Produce a non-zero 16-bit discriminator.
+  uint16_t Discriminator = (RawHash % 0xFFFF) + 1;
+  LLVM_DEBUG(
+      dbgs() << "ptrauth stable hash discriminator: " << utostr(Discriminator)
+             << " (0x"
+             << utohexstr(Discriminator, /*Lowercase=*/false, /*Width=*/4)
+             << ")"
+             << " of: " << Str << "\n");
+  return Discriminator;
+}
diff --git a/llvm/unittests/Support/SipHashTest.cpp b/llvm/unittests/Support/SipHashTest.cpp
index bd05c278fc7a2..7c557eb488acc 100644
--- a/llvm/unittests/Support/SipHashTest.cpp
+++ b/llvm/unittests/Support/SipHashTest.cpp
@@ -50,7 +50,25 @@ TEST(SipHashTest, SipHash_2_4_128) {
   }
 }
 
-// Below are the unmodified expected outputs from vectors.h
+// Tests for the ptrauth-specific SipHash wrapper.
+TEST(SipHashTest, PointerAuthSipHash) {
+  // Test some basic cases.
+  EXPECT_EQ(0xE793, getPointerAuthStableSipHash(""));
+  EXPECT_EQ(0xF468, getPointerAuthStableSipHash("strlen"));
+  EXPECT_EQ(0x2D15, getPointerAuthStableSipHash("_ZN1 ind; f"));
+
+  // Test some known strings that are already enshrined in the ABI.
+  EXPECT_EQ(0x6AE1, getPointerAuthStableSipHash("isa"));
+  EXPECT_EQ(0xB5AB, getPointerAuthStableSipHash("objc_class:superclass"));
+  EXPECT_EQ(0xC0BB, getPointerAuthStableSipHash("block_descriptor"));
+  EXPECT_EQ(0xC310, getPointerAuthStableSipHash("method_list_t"));
+
+  // Test limit cases where we differ from naive truncations from 64-bit hashes.
+  EXPECT_EQ(1, getPointerAuthStableSipHash("_Zptrkvttf"));
+  EXPECT_EQ(0xFFFF, getPointerAuthStableSipHash("_Zaflhllod"));
+}
+
+// Below are the unmodified expected outputs from the reference vectors.h.
 
 const uint8_t ExpectedSipHash64[64][8] = {
     {



More information about the llvm-commits mailing list