[PATCH] D71082: Allow system header to provide their own implementation of some builtin

Manoj Gupta via Phabricator via cfe-commits cfe-commits at lists.llvm.org
Tue Mar 31 20:23:02 PDT 2020


manojgupta added subscribers: nickdesaulniers, llozano, srhines.
manojgupta added a comment.

I was able to reduce to following:

  typedef unsigned int u32;
  typedef unsigned long long u64;
  typedef unsigned long size_t;
  
  void fortify_panic(const char *name) __attribute__((noreturn)) ;
  void __read_overflow(void) ;
  void __read_overflow2(void) ;
  void __write_overflow(void) ;
  
  extern void *memcpy(void *to, const void *from, size_t len);
  extern void *__memcpy(void *to, const void *from, size_t len);
  
  extern inline __attribute__((unused)) __attribute__((no_instrument_function)) __attribute__((always_inline)) __attribute__((gnu_inline)) void *memcpy(void *p, const void *q, size_t size)
  {
   size_t p_size = __builtin_object_size(p, 0);
   size_t q_size = __builtin_object_size(q, 0);
   if (__builtin_constant_p(size)) {
    if (p_size < size)
     __write_overflow();
    if (q_size < size)
     __read_overflow2();
   }
   if (p_size < size || q_size < size)
    fortify_panic(__func__);
   return __builtin_memcpy(p, q, size);
  }
  
  static inline __attribute__((unused)) __attribute__((no_instrument_function)) void
  memcpy_fromio(void *dst, const volatile void *src, size_t count)
  {
   memcpy(dst, (const void *)src, count);
  }
  
  u64 sst_shim32_read64(void *addr, u32 offset)
  {
   u64 val;
   memcpy_fromio(&val, addr + offset, sizeof(val));
   return val;
  }

Compiling with

  clang -Qunused-arguments -D_FORTIFY_SOURCE=2 -fno-omit-frame-pointer -fno-stack-protector  -nostdinc  -fno-strict-aliasing -fno-common  -std=gnu89 -fno-PIE -mno-sse -mno-mmx -mno-sse2 -mno-3dnow -mno-avx -m64 -mno-80387 -mstack-alignment=8 -mtune=generic -mno-red-zone -mcmodel=kernel -funit-at-a-time  -pipe  -mretpoline-external-thunk -fno-delete-null-pointer-checks  -Os -fstack-protector-strong -mno-global-merge -no-integrated-as  -fno-omit-frame-pointer -fno-optimize-sibling-calls -ftrivial-auto-var-init=pattern  -pg -mfentry  -fno-strict-overflow -fno-merge-all-constants -fno-stack-check  -c -o test.o test.c -target x86_64-cros-linux-gnu

and objdump -drW test.o
before:
No memcpy is emitted as clang is able to optimize it.

  0000000000000000 <sst_shim32_read64>:
     0:	e8 00 00 00 00       	callq  5 <sst_shim32_read64+0x5>
  			1: R_X86_64_PLT32	__fentry__-0x4
     5:	55                   	push   %rbp
     6:	48 89 e5             	mov    %rsp,%rbp
     9:	89 f0                	mov    %esi,%eax
     b:	48 8b 04 07          	mov    (%rdi,%rax,1),%rax
     f:	5d                   	pop    %rbp
    10:	c3                   	retq 

After:

A call to memcpy is emitted.

  0000000000000000 <sst_shim32_read64>:
     0:	e8 00 00 00 00       	callq  5 <sst_shim32_read64+0x5>
  			1: R_X86_64_PLT32	__fentry__-0x4
     5:	55                   	push   %rbp
     6:	48 89 e5             	mov    %rsp,%rbp
     9:	53                   	push   %rbx
     a:	48 83 ec 10          	sub    $0x10,%rsp
     e:	65 48 8b 04 25 28 00 	mov    %gs:0x28,%rax
    15:	00 00 
    17:	48 89 45 f0          	mov    %rax,-0x10(%rbp)
    1b:	48 b8 aa aa aa aa aa 	movabs $0xaaaaaaaaaaaaaaaa,%rax
    22:	aa aa aa 
    25:	48 8d 5d e8          	lea    -0x18(%rbp),%rbx
    29:	48 89 03             	mov    %rax,(%rbx)
    2c:	89 f6                	mov    %esi,%esi
    2e:	48 01 fe             	add    %rdi,%rsi
    31:	ba 08 00 00 00       	mov    $0x8,%edx
    36:	48 89 df             	mov    %rbx,%rdi
    39:	e8 00 00 00 00       	callq  3e <sst_shim32_read64+0x3e>
  			3a: R_X86_64_PLT32	memcpy-0x4
    3e:	48 8b 03             	mov    (%rbx),%rax
    41:	65 48 8b 0c 25 28 00 	mov    %gs:0x28,%rcx
    48:	00 00 
    4a:	48 3b 4d f0          	cmp    -0x10(%rbp),%rcx
    4e:	75 07                	jne    57 <sst_shim32_read64+0x57>
    50:	48 83 c4 10          	add    $0x10,%rsp
    54:	5b                   	pop    %rbx
    55:	5d                   	pop    %rbp
    56:	c3                   	retq   
    57:	e8 00 00 00 00       	callq  5c <sst_shim32_read64+0x5c>
  			58: R_X86_64_PLT32	__stack_chk_fail-0x4

At this point, it is not clear to me if clang is doing anything wrong here is or this a bug in kernel 4.4 that it is using a regular memcpy for IO which cannot use regular memcpy.
And so I suspect we need to backport https://github.com/torvalds/linux/commit/c2327da06b33d8e1093ce2c28f395bc500d1b0d3 to older kernel versions.

@nickdesaulniers wdyt?


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D71082/new/

https://reviews.llvm.org/D71082





More information about the cfe-commits mailing list