[cfe-dev] "Optimized implementations"?
Stefan Kanthak via cfe-dev
cfe-dev at lists.llvm.org
Sun Sep 6 06:25:17 PDT 2020
<https://compiler-rt.llvm.org/index.html> boasts:
| The builtins library provides optimized implementations of this
| and other low-level routines, either in target-independent C form,
| or as a heavily-optimized assembly.
Really?
Left: inperformant code shipped in # Right: slightly improved code,
clang_rt.builtins-* # which the optimiser REALLY
# should have generated
___cmpdi2:
mov ecx, [esp+16] # mov ecx, [esp+16]
xor eax, eax # xor eax, eax
cmp [esp+8], ecx # cmp ecx, [esp+8]
jl @f # jg @f
mov eax, 2 # mov eax, 2
jg @f # jl @f
mov ecx, [esp+4] #
mov edx, [esp+12] # mov ecx, [esp+12]
mov eax, 0 # xor eax, eax
cmp ecx, edx # cmp ecx, [esp+4]
jb @f # ja @f
cmp edx, ecx #
mov eax, 1 #
adc eax, 0 # adc eax, 1
@@: # @@:
ret # ret
# 3 instructions less, 10 bytes saved
___ucmpdi2:
mov ecx, [esp+16] # mov ecx, [esp+16]
xor eax, eax # xor eax, eax
cmp [esp+8], ecx # cmp ecx, [esp+8]
jb @f # ja @f
mov eax, 2 # mov eax, 2
ja @f # jb @f
mov ecx, [esp+4] #
mov edx, [esp+12] # mov ecx, [esp+12]
mov eax, 0 # xor eax, eax
cmp ecx, edx # cmp ecx, [esp+4]
jb @f # ja @f
cmp edx, ecx #
mov eax, 1 #
adc eax, 0 # adc eax, 1
@@: # @@:
ret # ret
# 3 instructions less, 10 bytes saved
Now properly written code, of course branch-free, faster and shorter:
# Copyright (C) 2004-2020, Stefan Kanthak <stefan.kanthak at nexgo.de>
___cmpdi2:
mov ecx, [esp+4]
mov edx, [esp+12]
cmp ecx, edx
mov eax, [esp+8]
sbb eax, [esp+16]
setl ah
cmp edx, ecx
mov edx, [esp+16]
sbb edx, [esp+8]
setl al
sub al, ah
movsx eax, al
inc eax
ret
___ucmpdi2:
mov ecx, [esp+4]
mov edx, [esp+12]
cmp ecx, edx
mov eax, [esp+8]
sbb eax, [esp+16]
sbb eax, eax
cmp edx, ecx
mov edx, [esp+16]
sbb edx, [esp+8]
adc eax, 1
ret
AGAIN:
Remove every occurance of the word "optimized" on the above web page.
'nuff said
Stefan
More information about the cfe-dev
mailing list