[PATCH] D71082: Allow system header to provide their own implementation of some builtin
Manoj Gupta via Phabricator via cfe-commits
cfe-commits at lists.llvm.org
Tue Mar 31 20:23:02 PDT 2020
manojgupta added subscribers: nickdesaulniers, llozano, srhines.
manojgupta added a comment.
I was able to reduce to following:
typedef unsigned int u32;
typedef unsigned long long u64;
typedef unsigned long size_t;
void fortify_panic(const char *name) __attribute__((noreturn)) ;
void __read_overflow(void) ;
void __read_overflow2(void) ;
void __write_overflow(void) ;
extern void *memcpy(void *to, const void *from, size_t len);
extern void *__memcpy(void *to, const void *from, size_t len);
extern inline __attribute__((unused)) __attribute__((no_instrument_function)) __attribute__((always_inline)) __attribute__((gnu_inline)) void *memcpy(void *p, const void *q, size_t size)
{
size_t p_size = __builtin_object_size(p, 0);
size_t q_size = __builtin_object_size(q, 0);
if (__builtin_constant_p(size)) {
if (p_size < size)
__write_overflow();
if (q_size < size)
__read_overflow2();
}
if (p_size < size || q_size < size)
fortify_panic(__func__);
return __builtin_memcpy(p, q, size);
}
static inline __attribute__((unused)) __attribute__((no_instrument_function)) void
memcpy_fromio(void *dst, const volatile void *src, size_t count)
{
memcpy(dst, (const void *)src, count);
}
u64 sst_shim32_read64(void *addr, u32 offset)
{
u64 val;
memcpy_fromio(&val, addr + offset, sizeof(val));
return val;
}
Compiling with
clang -Qunused-arguments -D_FORTIFY_SOURCE=2 -fno-omit-frame-pointer -fno-stack-protector -nostdinc -fno-strict-aliasing -fno-common -std=gnu89 -fno-PIE -mno-sse -mno-mmx -mno-sse2 -mno-3dnow -mno-avx -m64 -mno-80387 -mstack-alignment=8 -mtune=generic -mno-red-zone -mcmodel=kernel -funit-at-a-time -pipe -mretpoline-external-thunk -fno-delete-null-pointer-checks -Os -fstack-protector-strong -mno-global-merge -no-integrated-as -fno-omit-frame-pointer -fno-optimize-sibling-calls -ftrivial-auto-var-init=pattern -pg -mfentry -fno-strict-overflow -fno-merge-all-constants -fno-stack-check -c -o test.o test.c -target x86_64-cros-linux-gnu
and objdump -drW test.o
before:
No memcpy is emitted as clang is able to optimize it.
0000000000000000 <sst_shim32_read64>:
0: e8 00 00 00 00 callq 5 <sst_shim32_read64+0x5>
1: R_X86_64_PLT32 __fentry__-0x4
5: 55 push %rbp
6: 48 89 e5 mov %rsp,%rbp
9: 89 f0 mov %esi,%eax
b: 48 8b 04 07 mov (%rdi,%rax,1),%rax
f: 5d pop %rbp
10: c3 retq
After:
A call to memcpy is emitted.
0000000000000000 <sst_shim32_read64>:
0: e8 00 00 00 00 callq 5 <sst_shim32_read64+0x5>
1: R_X86_64_PLT32 __fentry__-0x4
5: 55 push %rbp
6: 48 89 e5 mov %rsp,%rbp
9: 53 push %rbx
a: 48 83 ec 10 sub $0x10,%rsp
e: 65 48 8b 04 25 28 00 mov %gs:0x28,%rax
15: 00 00
17: 48 89 45 f0 mov %rax,-0x10(%rbp)
1b: 48 b8 aa aa aa aa aa movabs $0xaaaaaaaaaaaaaaaa,%rax
22: aa aa aa
25: 48 8d 5d e8 lea -0x18(%rbp),%rbx
29: 48 89 03 mov %rax,(%rbx)
2c: 89 f6 mov %esi,%esi
2e: 48 01 fe add %rdi,%rsi
31: ba 08 00 00 00 mov $0x8,%edx
36: 48 89 df mov %rbx,%rdi
39: e8 00 00 00 00 callq 3e <sst_shim32_read64+0x3e>
3a: R_X86_64_PLT32 memcpy-0x4
3e: 48 8b 03 mov (%rbx),%rax
41: 65 48 8b 0c 25 28 00 mov %gs:0x28,%rcx
48: 00 00
4a: 48 3b 4d f0 cmp -0x10(%rbp),%rcx
4e: 75 07 jne 57 <sst_shim32_read64+0x57>
50: 48 83 c4 10 add $0x10,%rsp
54: 5b pop %rbx
55: 5d pop %rbp
56: c3 retq
57: e8 00 00 00 00 callq 5c <sst_shim32_read64+0x5c>
58: R_X86_64_PLT32 __stack_chk_fail-0x4
At this point, it is not clear to me if clang is doing anything wrong here is or this a bug in kernel 4.4 that it is using a regular memcpy for IO which cannot use regular memcpy.
And so I suspect we need to backport https://github.com/torvalds/linux/commit/c2327da06b33d8e1093ce2c28f395bc500d1b0d3 to older kernel versions.
@nickdesaulniers wdyt?
Repository:
rG LLVM Github Monorepo
CHANGES SINCE LAST ACTION
https://reviews.llvm.org/D71082/new/
https://reviews.llvm.org/D71082
More information about the cfe-commits
mailing list