[libc-commits] [libc] [libc] add checksum for jmpbuf (PR #101110)
Schrodinger ZHU Yifan via libc-commits
libc-commits at lists.llvm.org
Tue Jul 30 14:30:40 PDT 2024
SchrodingerZhu wrote:
```
~ via 🐍 v3.10.12
❯ clang++ -O3 test5.cc -DNO_CHECK
~ via 🐍 v3.10.12
❯ ./a.out
2845100
5104636
~ via 🐍 v3.10.12
❯ ./a.out
2723879
5370382
~ via 🐍 v3.10.12
❯ clang++ -O3 test5.cc
~ via 🐍 v3.10.12
❯ ./a.out
7721258
5307271
~ via 🐍 v3.10.12
❯ ./a.out
7872398
5451505
```
program
```c++
namespace internal {
// Folded multiplication.
// This function multiplies two 64-bit integers and xor the high and
// low 64-bit parts of the result.
inline __UINT64_TYPE__ folded_multiply(__UINT64_TYPE__ x, __UINT64_TYPE__ y) {
__uint128_t p = static_cast<__uint128_t>(x) * static_cast<__uint128_t>(y);
__UINT64_TYPE__ low = static_cast<__UINT64_TYPE__>(p);
__UINT64_TYPE__ high = static_cast<__UINT64_TYPE__>(p >> 64);
return low ^ high;
}
// Read as little endian.
// Shift-and-or implementation does not give a satisfactory code on aarch64.
// Therefore, we use a union to read the value.
template <typename T> inline T read_little_endian(const void *ptr) {
const __UINT8_TYPE__ *bytes = static_cast<const __UINT8_TYPE__ *>(ptr);
union {
T value;
__UINT8_TYPE__ buffer[sizeof(T)];
} data;
#if __BYTE_ORDER__ != __ORDER_LITTLE_ENDIAN__
// Compiler should able to optimize this as a load followed by a byte swap.
// On aarch64 (-mbig-endian), this compiles to the following for int:
// ldr w0, [x0]
// rev w0, w0
// ret
for (__SIZE_TYPE__ i = 0; i < sizeof(T); ++i) {
data.buffer[i] = bytes[sizeof(T) - i - 1];
}
#else
for (__SIZE_TYPE__ i = 0; i < sizeof(T); ++i) {
data.buffer[i] = bytes[i];
}
#endif
return data.value;
}
// Specialized read functions for small values. size must be <= 8.
inline void read_small_values(const void *ptr, __SIZE_TYPE__ size, __UINT64_TYPE__ &low,
__UINT64_TYPE__ &high) {
const __UINT8_TYPE__ *bytes = static_cast<const __UINT8_TYPE__ *>(ptr);
if (size >= 2) {
if (size >= 4) {
low = static_cast<__UINT64_TYPE__>(read_little_endian<__UINT32_TYPE__>(&bytes[0]));
high =
static_cast<__UINT64_TYPE__>(read_little_endian<__UINT32_TYPE__>(&bytes[size - 4]));
} else {
low = static_cast<__UINT64_TYPE__>(read_little_endian<__UINT16_TYPE__>(&bytes[0]));
high = static_cast<__UINT64_TYPE__>(bytes[size - 1]);
}
} else {
if (size > 0) {
low = static_cast<__UINT64_TYPE__>(bytes[0]);
high = static_cast<__UINT64_TYPE__>(bytes[0]);
} else {
low = 0;
high = 0;
}
}
}
// This constant comes from Kunth's prng (it empirically works well).
inline constexpr __UINT64_TYPE__ MULTIPLE = 6364136223846793005;
// Rotation amount for mixing.
inline constexpr __UINT64_TYPE__ ROTATE = 23;
// Randomly generated values. For now, we use the same values as in aHash as
// they are widely tested.
// https://github.com/tkaitchuck/aHash/blob/9f6a2ad8b721fd28da8dc1d0b7996677b374357c/src/random_state.rs#L38
inline constexpr __UINT64_TYPE__ RANDOMNESS[2][4] = {
{0x243f6a8885a308d3, 0x13198a2e03707344, 0xa4093822299f31d0,
0x082efa98ec4e6c89},
{0x452821e638d01377, 0xbe5466cf34e90c6c, 0xc0ac29b7c97c50dd,
0x3f84d5b5b5470917},
};
// This is a portable string hasher. It is not cryptographically secure.
// The quality of the hash is good enough to pass all tests in SMHasher.
// The implementation is derived from the generic routine of aHash.
class HashState {
__UINT64_TYPE__ buffer;
__UINT64_TYPE__ pad;
__UINT64_TYPE__ extra_keys[2];
inline void update(__UINT64_TYPE__ low, __UINT64_TYPE__ high) {
__UINT64_TYPE__ combined =
folded_multiply(low ^ extra_keys[0], high ^ extra_keys[1]);
buffer = (buffer + pad) ^ combined;
buffer = __builtin_rotateleft64(buffer, ROTATE);
}
inline static __UINT64_TYPE__ mix(__UINT64_TYPE__ seed) {
HashState mixer{RANDOMNESS[0][0], RANDOMNESS[0][1], RANDOMNESS[0][2],
RANDOMNESS[0][3]};
mixer.update(seed, 0);
return mixer.finish();
}
public:
inline constexpr HashState(__UINT64_TYPE__ a, __UINT64_TYPE__ b, __UINT64_TYPE__ c,
__UINT64_TYPE__ d)
: buffer(a), pad(b), extra_keys{c, d} {}
inline HashState(__UINT64_TYPE__ seed) {
// Mix one more round of the seed to make it stronger.
__UINT64_TYPE__ mixed = mix(seed);
buffer = RANDOMNESS[1][0] ^ mixed;
pad = RANDOMNESS[1][1] ^ mixed;
extra_keys[0] = RANDOMNESS[1][2] ^ mixed;
extra_keys[1] = RANDOMNESS[1][3] ^ mixed;
}
inline void update(const void *ptr, __SIZE_TYPE__ size) {
__UINT8_TYPE__ const *bytes = static_cast<const __UINT8_TYPE__ *>(ptr);
buffer = (buffer + size) * MULTIPLE;
__UINT64_TYPE__ low, high;
if (size > 8) {
if (size > 16) {
// update tail
low = read_little_endian<__UINT64_TYPE__>(&bytes[size - 16]);
high = read_little_endian<__UINT64_TYPE__>(&bytes[size - 8]);
update(low, high);
while (size > 16) {
low = read_little_endian<__UINT64_TYPE__>(&bytes[0]);
high = read_little_endian<__UINT64_TYPE__>(&bytes[8]);
update(low, high);
bytes += 16;
size -= 16;
}
} else {
low = read_little_endian<__UINT64_TYPE__>(&bytes[0]);
high = read_little_endian<__UINT64_TYPE__>(&bytes[size - 8]);
update(low, high);
}
} else {
read_small_values(ptr, size, low, high);
update(low, high);
}
}
inline __UINT64_TYPE__ finish() {
int rot = buffer & 63;
__UINT64_TYPE__ folded = folded_multiply(buffer, pad);
return __builtin_rotateleft64(folded, rot);
}
};
} // namespace internal
#define offsetof(A, B) __builtin_offsetof(A, B)
typedef struct {
__UINT64_TYPE__ rbx;
__UINT64_TYPE__ rbp;
__UINT64_TYPE__ r12;
__UINT64_TYPE__ r13;
__UINT64_TYPE__ r14;
__UINT64_TYPE__ r15;
__UINTPTR_TYPE__ rsp;
__UINTPTR_TYPE__ rip;
__UINT64_TYPE__ __sigmask;
__UINT64_TYPE__ __has_sigmask : 1;
__UINT64_TYPE__ __unused : 63;
__UINT64_TYPE__ __chksum;
} my_jmp_buf;
namespace jmpbuf {
using HashState = internal::HashState;
// Initial values generated by
// https://www.random.org/cgi-bin/randbyte?nbytes=48&format=h
// These values are only used for overlay targets.
inline __UINT64_TYPE__ register_mangle_cookie = 0xdf8a883867040cbc;
inline __UINT64_TYPE__ checksum_mangle_cookie = 0x9ed4fe406ebe9cf9;
inline __UINT64_TYPE__ randomness[4] = {
0x83b9df7dddf5ab3d,
0x06c931cca75e15c6,
0x08280ec9e9a778bf,
0x111f67f4aafc9276,
};
inline int update_checksum(my_jmp_buf *buf) {
HashState state{
randomness[0],
randomness[1],
randomness[2],
randomness[3],
};
state.update(buf, offsetof(my_jmp_buf, __chksum));
buf->__chksum = state.finish() ^ checksum_mangle_cookie;
return 0;
}
inline void verify(const my_jmp_buf *buf) {
HashState state{
randomness[0],
randomness[1],
randomness[2],
randomness[3],
};
state.update(buf, offsetof(my_jmp_buf, __chksum));
auto chksum = state.finish() ^ checksum_mangle_cookie;
if (chksum != buf->__chksum) {
__builtin_trap();
}
}
} // namespace jmpbuf
namespace test {
[[gnu::naked]]
void longjmp (my_jmp_buf * buf, int val) {
#ifndef NO_CHECK
asm(R"(
pushq %%rbp
pushq %%rbx
mov %%rdi, %%rbp
mov %%esi, %%ebx
subq $8, %%rsp
call %P0
addq $8, %%rsp
mov %%ebx, %%esi
mov %%rbp, %%rdi
popq %%rbx
popq %%rbp
)" ::"i"(jmpbuf::verify)
: "rax", "rcx", "rdx", "r8", "r9", "r10", "r11");
#endif
register __UINT64_TYPE__ rcx __asm__("rcx");
// Load cookie
asm("mov %1, %0\n\t" : "=r"(rcx) : "m"(jmpbuf::register_mangle_cookie));
// load registers from buffer
// do not pass any invalid values into registers
#define RECOVER(REG) \
register __UINT64_TYPE__ REG __asm__(#REG); \
asm volatile("mov %c[" #REG "](%%rdi), %%rdx\n\t" \
"xor %%rdx, %1\n\t" \
"mov %%rdx, %0\n\t" \
: "=r"(REG) \
: "r"(rcx), [REG] "i"(offsetof(my_jmp_buf, REG)) \
: "rdx");
RECOVER(rbx);
RECOVER(rbp);
RECOVER(r12);
RECOVER(r13);
RECOVER(r14);
RECOVER(r15);
RECOVER(rsp);
register int eax __asm__("eax");
asm volatile(R"(
xor %0,%0
cmp $1,%%esi
adc %%esi,%0
mov %c[rip](%%rdi),%%rdx
xor %%rdx, %%rcx
jmp *%%rdx
)"
: "=r"(eax)
: [rip] "i"(offsetof(my_jmp_buf, rip))
: "rdx");
}
[[gnu::naked]]
int setjmp (my_jmp_buf * buf) {
register __UINT64_TYPE__ rcx __asm__("rcx");
// Load cookie
asm("mov %1, %0\n\t" : "=r"(rcx) : "m"(jmpbuf::register_mangle_cookie));
// store registers to buffer
// do not pass any invalid values into registers
#define STORE(REG) \
asm("mov %%" #REG ", %%rdx\n\t" \
"xor %%rdx, %%rcx\n\t" \
"mov %%rdx, %c[" #REG \
"](%%rdi)\n\t" ::[REG] "i"(offsetof(my_jmp_buf, REG)) \
: "rdx");
STORE(rbx);
STORE(rbp);
STORE(r12);
STORE(r13);
STORE(r14);
STORE(r15);
asm(R"(
lea 8(%%rsp),%%rdx
xor %%rdx, %%rcx
mov %%rdx,%c[rsp](%%rdi)
mov (%%rsp),%%rdx
xor %%rdx, %%rcx
mov %%rdx,%c[rip](%%rdi)
)" ::[rsp] "i"(offsetof(my_jmp_buf, rsp)),
[rip] "i"(offsetof(my_jmp_buf, rip))
: "rdx");
#ifndef NO_CHECK
// tail call to update checksum
asm("jmp %P0" : : "i"(jmpbuf::update_checksum));
#else
asm("xor %eax, %eax\n\tret\n\t");
#endif
}
}
#include <setjmp.h>
#include <chrono>
#include <iostream>
int main() {
using namespace std::chrono;
{
auto x = high_resolution_clock::now();
#pragma push_macro("setjmp")
#undef setjmp
for (int i = 0; i < 1000000; ++i) {
my_jmp_buf buf;
if (test::setjmp(&buf))
continue;
test::longjmp(&buf, 0);
}
auto y = high_resolution_clock::now();
std::cout << duration_cast<nanoseconds>(y - x).count() << std::endl;
}
#pragma pop_macro("setjmp")
{
auto x = high_resolution_clock::now();
for (int i = 0; i < 1000000; ++i) {
jmp_buf buf;
if (::setjmp(buf))
continue;
::longjmp(buf, 0);
}
auto y = high_resolution_clock::now();
std::cout << duration_cast<nanoseconds>(y - x).count() << std::endl;
}
}
```
https://github.com/llvm/llvm-project/pull/101110
More information about the libc-commits
mailing list