[compiler-rt] [compiler-rt] Use __atomic builtins whenever possible (PR #84439)

Alexander Richardson via llvm-commits llvm-commits at lists.llvm.org
Mon Apr 15 16:03:26 PDT 2024


arichardson wrote:

> Hi Alexander,
> 
> Thanks for working on this.
> 
> Please double-check that codegen for the following tsan functions in release builds don't change: __tsan_read1/2/4/8, __tsan_write1/2/4/8, __tsan_func_entry/exit. Please post diff of asm for these functions, if any.
> 
> If we are refacatoring this, can we simplify code more today? I guess Vistual Studio does not support these buitlins, so we can't just switch to them. But perhaps we could switch to std::atomic? If yes, I would assume we still keep the atomic operation wrapper functions to minimize the diff.
> 
> Also clang-format CI check complains, please re-format the code.

On x86 I don't see any difference for __tsan_read*/__tsan_write* in the x86 assembly (other than the relative addresses) __tsan_func_entry/exit.

I do see one large diff. Code before:
```
<_ZN11__sanitizer23InternalAllocatorUnlockEv>:
               	pushq	%rax
               	callq	0x3cc00 <_ZN11__sanitizer18internal_allocatorEv>
               	movb	$0x0, 0x17be73(%rip)    # 0x1b8f80 <_ZN11__sanitizerL26internal_alloc_placeholderE+0x81000>
               	xorl	%eax, %eax
               	leaq	0xfae6a(%rip), %rcx     # 0x137f80 <_ZN11__sanitizerL26internal_alloc_placeholderE>
               	nopw	%cs:(%rax,%rax)
               	movb	$0x0, 0x40d80(%rax,%rcx)
               	addq	$-0x40, %rax
               	cmpq	$-0xd80, %rax           # imm = 0xF280
               	jne	0x3d120 <_ZN11__sanitizer23InternalAllocatorUnlockEv+0x20>
               	movb	$0x0, 0x17be87(%rip)    # 0x1b8fc2 <_ZN11__sanitizerL27internal_allocator_cache_muE>
               	popq	%rax
               	retq
               	nopl	(%rax)
```
After has a lot of extra stores:
```
<_ZN11__sanitizer23InternalAllocatorUnlockEv>:
               	pushq	%rax
               	callq	0x3cc00 <_ZN11__sanitizer18internal_allocatorEv>
               	movb	$0x0, 0x17bcb3(%rip)    # 0x1b8f80 <_ZN11__sanitizerL26internal_alloc_placeholderE+0x81000>
               	movb	$0x0, 0x13ba2c(%rip)    # 0x178d00 <_ZN11__sanitizerL26internal_alloc_placeholderE+0x40d80>
               	movb	$0x0, 0x13b9e5(%rip)    # 0x178cc0 <_ZN11__sanitizerL26internal_alloc_placeholderE+0x40d40>
               	movb	$0x0, 0x13b99e(%rip)    # 0x178c80 <_ZN11__sanitizerL26internal_alloc_placeholderE+0x40d00>
               	movb	$0x0, 0x13b957(%rip)    # 0x178c40 <_ZN11__sanitizerL26internal_alloc_placeholderE+0x40cc0>
               	movb	$0x0, 0x13b910(%rip)    # 0x178c00 <_ZN11__sanitizerL26internal_alloc_placeholderE+0x40c80>
               	movb	$0x0, 0x13b8c9(%rip)    # 0x178bc0 <_ZN11__sanitizerL26internal_alloc_placeholderE+0x40c40>
               	movb	$0x0, 0x13b882(%rip)    # 0x178b80 <_ZN11__sanitizerL26internal_alloc_placeholderE+0x40c00>
               	movb	$0x0, 0x13b83b(%rip)    # 0x178b40 <_ZN11__sanitizerL26internal_alloc_placeholderE+0x40bc0>
               	movb	$0x0, 0x13b7f4(%rip)    # 0x178b00 <_ZN11__sanitizerL26internal_alloc_placeholderE+0x40b80>
               	movb	$0x0, 0x13b7ad(%rip)    # 0x178ac0 <_ZN11__sanitizerL26internal_alloc_placeholderE+0x40b40>
               	movb	$0x0, 0x13b766(%rip)    # 0x178a80 <_ZN11__sanitizerL26internal_alloc_placeholderE+0x40b00>
               	movb	$0x0, 0x13b71f(%rip)    # 0x178a40 <_ZN11__sanitizerL26internal_alloc_placeholderE+0x40ac0>
               	movb	$0x0, 0x13b6d8(%rip)    # 0x178a00 <_ZN11__sanitizerL26internal_alloc_placeholderE+0x40a80>
               	movb	$0x0, 0x13b691(%rip)    # 0x1789c0 <_ZN11__sanitizerL26internal_alloc_placeholderE+0x40a40>
               	movb	$0x0, 0x13b64a(%rip)    # 0x178980 <_ZN11__sanitizerL26internal_alloc_placeholderE+0x40a00>
               	movb	$0x0, 0x13b603(%rip)    # 0x178940 <_ZN11__sanitizerL26internal_alloc_placeholderE+0x409c0>
               	movb	$0x0, 0x13b5bc(%rip)    # 0x178900 <_ZN11__sanitizerL26internal_alloc_placeholderE+0x40980>
               	movb	$0x0, 0x13b575(%rip)    # 0x1788c0 <_ZN11__sanitizerL26internal_alloc_placeholderE+0x40940>
               	movb	$0x0, 0x13b52e(%rip)    # 0x178880 <_ZN11__sanitizerL26internal_alloc_placeholderE+0x40900>
               	movb	$0x0, 0x13b4e7(%rip)    # 0x178840 <_ZN11__sanitizerL26internal_alloc_placeholderE+0x408c0>
               	movb	$0x0, 0x13b4a0(%rip)    # 0x178800 <_ZN11__sanitizerL26internal_alloc_placeholderE+0x40880>
               	movb	$0x0, 0x13b459(%rip)    # 0x1787c0 <_ZN11__sanitizerL26internal_alloc_placeholderE+0x40840>
               	movb	$0x0, 0x13b412(%rip)    # 0x178780 <_ZN11__sanitizerL26internal_alloc_placeholderE+0x40800>
               	movb	$0x0, 0x13b3cb(%rip)    # 0x178740 <_ZN11__sanitizerL26internal_alloc_placeholderE+0x407c0>
               	movb	$0x0, 0x13b384(%rip)    # 0x178700 <_ZN11__sanitizerL26internal_alloc_placeholderE+0x40780>
               	movb	$0x0, 0x13b33d(%rip)    # 0x1786c0 <_ZN11__sanitizerL26internal_alloc_placeholderE+0x40740>
               	movb	$0x0, 0x13b2f6(%rip)    # 0x178680 <_ZN11__sanitizerL26internal_alloc_placeholderE+0x40700>
               	movb	$0x0, 0x13b2af(%rip)    # 0x178640 <_ZN11__sanitizerL26internal_alloc_placeholderE+0x406c0>
               	movb	$0x0, 0x13b268(%rip)    # 0x178600 <_ZN11__sanitizerL26internal_alloc_placeholderE+0x40680>
               	movb	$0x0, 0x13b221(%rip)    # 0x1785c0 <_ZN11__sanitizerL26internal_alloc_placeholderE+0x40640>
               	movb	$0x0, 0x13b1da(%rip)    # 0x178580 <_ZN11__sanitizerL26internal_alloc_placeholderE+0x40600>
               	movb	$0x0, 0x13b193(%rip)    # 0x178540 <_ZN11__sanitizerL26internal_alloc_placeholderE+0x405c0>
               	movb	$0x0, 0x13b14c(%rip)    # 0x178500 <_ZN11__sanitizerL26internal_alloc_placeholderE+0x40580>
               	movb	$0x0, 0x13b105(%rip)    # 0x1784c0 <_ZN11__sanitizerL26internal_alloc_placeholderE+0x40540>
               	movb	$0x0, 0x13b0be(%rip)    # 0x178480 <_ZN11__sanitizerL26internal_alloc_placeholderE+0x40500>
               	movb	$0x0, 0x13b077(%rip)    # 0x178440 <_ZN11__sanitizerL26internal_alloc_placeholderE+0x404c0>
               	movb	$0x0, 0x13b030(%rip)    # 0x178400 <_ZN11__sanitizerL26internal_alloc_placeholderE+0x40480>
               	movb	$0x0, 0x13afe9(%rip)    # 0x1783c0 <_ZN11__sanitizerL26internal_alloc_placeholderE+0x40440>
               	movb	$0x0, 0x13afa2(%rip)    # 0x178380 <_ZN11__sanitizerL26internal_alloc_placeholderE+0x40400>
               	movb	$0x0, 0x13af5b(%rip)    # 0x178340 <_ZN11__sanitizerL26internal_alloc_placeholderE+0x403c0>
               	movb	$0x0, 0x13af14(%rip)    # 0x178300 <_ZN11__sanitizerL26internal_alloc_placeholderE+0x40380>
               	movb	$0x0, 0x13aecd(%rip)    # 0x1782c0 <_ZN11__sanitizerL26internal_alloc_placeholderE+0x40340>
               	movb	$0x0, 0x13ae86(%rip)    # 0x178280 <_ZN11__sanitizerL26internal_alloc_placeholderE+0x40300>
               	movb	$0x0, 0x13ae3f(%rip)    # 0x178240 <_ZN11__sanitizerL26internal_alloc_placeholderE+0x402c0>
               	movb	$0x0, 0x13adf8(%rip)    # 0x178200 <_ZN11__sanitizerL26internal_alloc_placeholderE+0x40280>
               	movb	$0x0, 0x13adb1(%rip)    # 0x1781c0 <_ZN11__sanitizerL26internal_alloc_placeholderE+0x40240>
               	movb	$0x0, 0x13ad6a(%rip)    # 0x178180 <_ZN11__sanitizerL26internal_alloc_placeholderE+0x40200>
               	movb	$0x0, 0x13ad23(%rip)    # 0x178140 <_ZN11__sanitizerL26internal_alloc_placeholderE+0x401c0>
               	movb	$0x0, 0x13acdc(%rip)    # 0x178100 <_ZN11__sanitizerL26internal_alloc_placeholderE+0x40180>
               	movb	$0x0, 0x13ac95(%rip)    # 0x1780c0 <_ZN11__sanitizerL26internal_alloc_placeholderE+0x40140>
               	movb	$0x0, 0x13ac4e(%rip)    # 0x178080 <_ZN11__sanitizerL26internal_alloc_placeholderE+0x40100>
               	movb	$0x0, 0x13ac07(%rip)    # 0x178040 <_ZN11__sanitizerL26internal_alloc_placeholderE+0x400c0>
               	movb	$0x0, 0x13abc0(%rip)    # 0x178000 <_ZN11__sanitizerL26internal_alloc_placeholderE+0x40080>
               	movb	$0x0, 0x13ab79(%rip)    # 0x177fc0 <_ZN11__sanitizerL26internal_alloc_placeholderE+0x40040>
               	movb	$0x0, 0x17bb74(%rip)    # 0x1b8fc2 <_ZN11__sanitizerL27internal_allocator_cache_muE>
               	popq	%rax
               	retq
```
It looks like the store mo_release loop was unrolled here if I read this correctly.
I have very limited understanding of x86 ASM so this is guessing based on knowing arm/aarch64/risc-v/mips instructions.

https://github.com/llvm/llvm-project/pull/84439


More information about the llvm-commits mailing list