<table border="1" cellspacing="0" cellpadding="8">
<tr>
<th>Issue</th>
<td>
<a href=https://github.com/llvm/llvm-project/issues/100898>100898</a>
</td>
</tr>
<tr>
<th>Summary</th>
<td>
Poor compilation of libc++ string comparison
</td>
</tr>
<tr>
<th>Labels</th>
<td>
libc++
</td>
</tr>
<tr>
<th>Assignees</th>
<td>
</td>
</tr>
<tr>
<th>Reporter</th>
<td>
bmaurer
</td>
</tr>
</table>
<pre>
libc++'s std::string generates suboptimal machine code.
```
#include <string>
bool eqdynamic(const std::string& foo, const std::string& bar) {
return foo == bar;
}
bool eqshort(const std::string& foo) { return foo == "aaa"; }
bool eqlong(const std::string& foo) {
return foo == "asdfasdfasdfasdfasdfasdfasdfasdfasdfasdfasdfasdf";
}
```
https://godbolt.org/z/xEWx59fns
* In the "dynamic" case, there is lots of branching to handle the combinations of short and long strings. The code might be simplified by testing if the inputs have the same "shortness" bit and then handling the differing case by normalizing the arguments so that the "short" input is the first input. if both strings are found to be short, it may be worth loading 8 bytes of each string and comparing the length and first 8 chars at the same time.
* In the "eqshort" case, once the string is found to be short, the first 4 bytes can be loaded and compared with [0x6, 'a', 'a', 'a']
* In the "eqlong" case we check if a "short" string can be 48 chars long, which is not possible, this branch could be eliminated statically.
```
eqdynamic(...):
movzx eax, byte ptr [rdi]
test al, 1
je .LBB0_1
mov rdx, qword ptr [rdi + 8]
movzx ecx, byte ptr [rsi]
test cl, 1
jne .LBB0_5
.LBB0_4:
mov r8d, ecx
shr r8d
cmp rdx, r8
je .LBB0_8
.LBB0_7:
xor eax, eax
ret
.LBB0_1:
mov edx, eax
shr edx
movzx ecx, byte ptr [rsi]
test cl, 1
je .LBB0_4
.LBB0_5:
mov r8, qword ptr [rsi + 8]
cmp rdx, r8
jne .LBB0_7
.LBB0_8:
test al, 1
je .LBB0_9
mov rdi, qword ptr [rdi + 16]
test cl, 1
jne .LBB0_13
.LBB0_12:
inc rsi
jmp .LBB0_14
.LBB0_9:
inc rdi
test cl, 1
je .LBB0_12
.LBB0_13:
mov rsi, qword ptr [rsi + 16]
.LBB0_14:
push rax
call bcmp@PLT
test eax, eax
sete al
add rsp, 8
ret
eqshort(std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char>> const&):
movzx eax, byte ptr [rdi]
test al, 1
je .LBB1_1
mov rcx, qword ptr [rdi + 8]
cmp rcx, 3
je .LBB1_6
.LBB1_4:
xor eax, eax
ret
.LBB1_1:
mov ecx, eax
shr ecx
cmp rcx, 3
jne .LBB1_4
.LBB1_6:
test al, 1
je .LBB1_7
mov rdi, qword ptr [rdi + 16]
jmp .LBB1_9
.LBB1_7:
inc rdi
.LBB1_9:
movzx eax, word ptr [rdi]
xor eax, 24929
movzx ecx, byte ptr [rdi + 2]
xor ecx, 97
or cx, ax
sete al
ret
.LCPI2_0:
.byte 97
...
.LCPI2_1:
.byte 97
...
eqlong(std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char>> const&):
movzx eax, byte ptr [rdi]
test al, 1
je .LBB2_1
mov rcx, qword ptr [rdi + 8]
cmp rcx, 48
je .LBB2_6
.LBB2_4:
xor eax, eax
ret
.LBB2_1:
mov ecx, eax
shr ecx
cmp rcx, 48
jne .LBB2_4
.LBB2_6:
test al, 1
je .LBB2_7
mov rdi, qword ptr [rdi + 16]
jmp .LBB2_9
.LBB2_7:
inc rdi
.LBB2_9:
vmovdqu ymm0, ymmword ptr [rdi]
vmovdqu xmm1, xmmword ptr [rdi + 32]
vpxor ymm0, ymm0, ymmword ptr [rip + .LCPI2_0]
vpxor ymm1, ymm1, ymmword ptr [rip + .LCPI2_1]
vpor ymm0, ymm0, ymm1
vptest ymm0, ymm0
sete al
vzeroupper
ret
```
</pre>
<img width="1px" height="1px" alt="" src="http://email.email.llvm.org/o/eJzkWMtvo7oa_2uczacTgSEJWWTR9CEdaRazONJdVsY2wXMNZmyTJv3rr4yB8krTGd1ZnSoNTb7X73v9aiDGiFPJ-QFtjmjztCK1zZU-pAWpNderVLHrQYqUInxsXjsDxjIUPaDowVgtyhOceMk1sdyAqVNVWVEQCQWhuSg5UMX4GgVPKHho37dB-_IfcSRKKmvGAUWP3iOKnocWqVIS-E92LUkhKMIJVaWxUxgIbyFTCuFHuClPiUZ4D2h39J4BADS3tS6dKaDoCUVPjVbUaqDd0wIUkytt7wJpAi34RxgTQhDGKDrCcgCpnJ-v-P8sERfIsOxXfj2qae6TljXvubWVcbjwC8IvJ8VSJe1a6RPCL-8Iv1ye_3PZ7LPSjJqPH-DvEmzOHbi-oxgoMdy1zuZccxAGpLIGVAapJqWbpBNYBTkpmeSNOVVFKkpihSobvaYlQEoGrnbgS2XW8E_uZxAKccotpByMKCopMsEZpFew3FjnXWSNW1FWtTWQk7MPY0jRQG3cl9wYBzYVPpLNeekxNfhyDkxkGW-WwiXk_JdKF0SK906D6FNd8NIaMApsTmxXjHamsIfgSuAEmdDG-q_WDmOqbN4lB0RzyFTtkKgmM-_iEYSFglzdV29K2xykIswBSCC9uj1VGXBCO0dNLlQVFdEdSsnLk80bgUeQAM2JNtDibcpiRdGv9qiv_YJ89FWVtC2oDynMMvKPnOMWKyWl03ApcDaAyhm8CZsD2hyDy9bZIrwjCO9u_Ll5WobqV80jhTcONOf0v67UZNSVFneLJu7q4a0f4S0XNHdZlcpCpYwRqWwHWph2iIGqWjJnzqUo3PByBsYSKyiR8voZSw7Zb71eI7x3m9evvvsp1Pn9AgCcXFxcVzyorHb10Uz06Xfqbu7dlUinHY6FP7i_rr8dj8FrOAsEmjVBfr4pzQZRAOEjJLNYPTQ6h2YG0DpMdBFTyQeYNl7oP8RLxWiuOmHOlQs8kptc9_KRgBaVF_gEdTIrjA-ZDOPvZvEvyvtvm-EuXq65HVqGN5FzNrGcIHdyL_hqdSeNXy5yl188RLn5pL7zMTC3xsDV9lZdR83dDYMns-C_MLr7G7CZuDm-4fa3BjKMRn3FM9SipOD6MfbRjltrNar6fMWdiw7-VwCOtxiPAEaLPXUAbzV0UJge7tRHVZu8QTgdW8dwkNKiQnHw_ds_yw2dbUs389zytuGj7wljoE3lbCbz1K9ZR5_dia0_S7363YseUmIEfe0Ono-O1p3DuaKTvFpNhDWdXvS8rEqkVJRYpQeK0bM_lyK8HdH3_5m3XWvCJcpuykJ_hbZ7LvRW0e3pCl-3H4MRLvDx1_gw_IwP6R0-7Cn-DuzB1oZDmnM5LDLNXZoJO8KalfuLNNP5HJBB2FFXF2CRTjoe6CzunAkmOGYAXJdaVRzv8Zw8b_6faZPCiy4H7dtPKtVKwUvv7v1krdffHr__jV-DWd7rBhz04dyRaWgxGLMPVQCYaPf3Yf8K1sB_hDXi-QmqH3I8pA3827SB_wRtzHAPeAMPeQP_Nm_gP8AbeMgb-Cu8gRd441yoM_tZw7UoAofkWhT3uKMzuRRF6EwuM5MGfjSniHPl2zwIthhUVI2Hfuk_8RO29uF9P-GHn3Plx20BSDgN5RdqpDmnrul55fzOtaqriutbp5Xurm_FDhHbR3uy4odwh3EY4iQIVvmBxdss22ySbZwEQZxs0oBEIUvofhvsYxrsV-KAAxwHO7zDQRwEwToN9pgHJKMR44SEexQHvCBCrqU8F2ulTythTM0PYRAk-2QlScqlaZ4GYjx88Ocat9IHZ_VXWp8MigMpjDUffqywkh--K6WbW3Qhm6czoDL48NPfR_vHDUaVq1rLw-RhkrB5na6pKhB-ce7by1-VVj84tQi_NJgNwi8t7PMB_y8AAP__-76sfw">