<table border="1" cellspacing="0" cellpadding="8">
<tr>
<th>Issue</th>
<td>
<a href=https://github.com/llvm/llvm-project/issues/121604>121604</a>
</td>
</tr>
<tr>
<th>Summary</th>
<td>
LLVM inlining a function produces worse code than inlining it manually
</td>
</tr>
<tr>
<th>Labels</th>
<td>
missed-optimization
</td>
</tr>
<tr>
<th>Assignees</th>
<td>
</td>
</tr>
<tr>
<th>Reporter</th>
<td>
Kmeakin
</td>
</tr>
</table>
<pre>
https://godbolt.org/z/jdzErPMn7
```rust
use std::ops::RangeTo;
pub fn get_range_to(s: &str, range: RangeTo<usize>) -> Option<&str> {
if is_char_boundary(s, range.end) {
unsafe { Some(s.get_unchecked(..range.end)) }
} else {
None
}
}
fn is_char_boundary(s: &str, index: usize) -> bool {
let bytes = s.as_bytes();
let len = bytes.len();
if index == 0 {
return true;
}
if index >= len {
index == len
} else {
is_utf8_start_byte(bytes[index])
}
}
fn is_utf8_start_byte(b: u8) -> bool {
// This is bit magic equivalent to: b < 128 || b >= 192
(b as i8) >= -0x40
}
```
This function from the Rust standard library (https://github.com/rust-lang/rust/blob/3f43b1a636738f41c48df073c5bcb97a97bf8459/library/core/src/str/traits.rs#L430) produces unnecessarily branchy code:
```asm
; AArch64
example::get_range_to::hecca4baa46cebf55:
cbz x2, .LBB0_4
cmp x2, x1
b.hs .LBB0_5
ldrsb w8, [x0, x2]
cmn w8, #64
b.ge .LBB0_4
.LBB0_3:
mov x0, xzr
.LBB0_4:
mov x1, x2
ret
.LBB0_5:
b.ne .LBB0_3
b .LBB0_4
; x86_64
example::get_range_to::haafeb3d39ebfa3da:
mov rax, rdi
test rdx, rdx
je .LBB0_5
cmp rdx, rsi
jae .LBB0_2
cmp byte ptr [rax + rdx], -64
jge .LBB0_5
.LBB0_4:
xor eax, eax
.LBB0_5:
ret
.LBB0_2:
jne .LBB0_4
jmp .LBB0_5
```
By inlining `is_char_boundary` manually, we can get much nicer code with fewer branches:
```rust
pub fn get_range_to_inlined(s: &str, range: RangeTo<usize>) -> Option<&str> {
let end = range.end;
let bytes = s.as_bytes();
let len = bytes.len();
if end == 0 {
unsafe { return Some(s.get_unchecked(..end)) }
}
if end >= len {
if end == len {
unsafe { return Some(s.get_unchecked(..end)) }
}
return None;
}
let last = bytes[end];
if !is_utf8_start_byte(last) {
return None;
}
unsafe { Some(s.get_unchecked(..end)) }
}
```
```asm
; AArch64
example::get_range_to_inlined::hef86872e440018c7:
cbz x2, .LBB1_4
cmp x2, x1
b.hs .LBB1_3
ldrsb w8, [x0, x2]
mov x1, x2
cmn w8, #65
csel x0, x0, xzr, gt
ret
.LBB1_3:
csel x0, x0, xzr, eq
.LBB1_4:
mov x1, x2
ret
; x86_64
example::get_range_to_inlined::hf2fa833068e78843:
mov rax, rdi
test rdx, rdx
je .LBB1_3
xor ecx, ecx
cmp rdx, rsi
jae .LBB1_2
cmp byte ptr [rax + rdx], -64
cmovl rax, rcx
ret
.LBB1_2:
cmp rdx, rsi
cmovne rax, rcx
.LBB1_3:
ret
```
</pre>
<img width="1" height="1" alt="" src="http://email.email.llvm.org/o/eJy0V1-TojoW_zTxJdUWJIDw4IPa7cv27G7NTu2rlYSDpAeCk4Ru7U9_KwEF1OnpmbrXsozknPM7f5KcX2DGyL0CWKJ4jeLHGWtt2ejlv2pg36Wa8SY_LUtrDwbRFSJbRLb7JudNZeeN3iOyfUdk-5K_P-n_flELFKz6bxJ0X90ai4JVawAbmzsMumo6MLr6ytQevjWIrjurQ8txofAe7E470c42iKROGSOSGKsR2WAvcVMX601r5Dsg-oRIhh8QfcL_OVjZKEQ3vRl9wmjhnGCMsSywNDtRMr3jTatypk_eyxl7Dip3SIOF-7TKsALcJP5fU4OzmLtAWyVKEN8hRySdz8cAHcZjj4EWjxgqA1ew_24UuMdOsf8NVoW6H-OkElLlcHRTXf7n5HnTVCMvFVjMTxYMRvQRmzkzO_-ISOqCpGO9CpTX8grzCtRYaVQ-59gpOt3g7EuDbbXCVrcwoF4yujJ9cqbe3aQcE2Tn_8PiSbNrbZHujGXa-qwQSbvk4nVXnfjRxT-JZVrjOwC-pOnP6tkdAvytlAZLg7m0uGZ7KTD8aOUrq0BZbBuHwTGiGxySFKPFBi02fsInHmbkgpZyzAyW3l8vfgiOUTCEej5LXdjecdEq4bY4LnRTY1sC_toai41lbq_kuJJcM31y8FeHV9qy5XPR1Ihs3eF8qJja9_8R2fKq4YhsaRFRHrKEJguaFlEoojQvggUVMRc8W7BswYs0ijNEtr0rRLai0YDI1mjhft0e3VrNpDVzbRChzxENXJIH3eStAINbpUCAMUzL6oS5ZkqUJyya3B3vqz7CTO0e6RqvVlqUSYSCFRxZfaig6yWTpuFnShCCRZyxKBHAizjuUM-bR_B3Px6JO0vz5_U62EVjeX0YyY_hSMTnpXFjZxSPJFWuDccYv6XOCMXrY-CtiduIY2zlx16N0CSawO9hgHeS7h_tE6ib1y6yDvtdXzSiaYoXxbAPYhBpsBerc2H4XI380nFE_ThE1H_pGh_TZPep5WCsAE5zmgEvGM3Z_Vg1O_pOnEsntGCsn8372ePI5AXGUcV31u5sZjzYC4ORPrmj7zoAPljtVk6zI0Zk7TFcF9ngh8kqvexh5Plu_Y-N9iN0ObnhuubjdSBT6xcFV0W_SPpwB_dXPQIFq_UJS1VJJdUeoyS4YZMkwDVTLauqk4vtDbBgnnpx3YoSKylA-7OI36QtcQFvoPszCqY_n1ckf4e-dz4GT45_N407vgKVe74aaJf-g7TXe5uQ3uha0PPfB7eDn9wL7jj5GT1OgriV4-lF5c8iGqIaNQuH4-8qd5jdl5AZO9QQxWuHHD8O6rLAiIR3CdfZ3l64fuH001ey6xTvkuqf8cxld_d8U6RJuiAQRUEQpmJxyzcD14R_wjXhpCl3XPMLpvmAAm5JaNJCDVRnhrnwDNngvb3LIuGFncb2A0mNIeDHxeh3CevTnHO1NAUpWEppkKSwSNOI3rqd8k7_-TT9TFfG9X0QXc8Xx1_y0hlvRE_hDT39DjWJunmtRlw6iWG8ZmfG-TAuh-aoaII2XfN-cc6HaJYvaZ7RjM1gGS5onNEsCsisXJIoDmIhojBOcpoEPMxICCLOeEaDOE3JTC5JQOIgDCgJCSXhXGR5kiQkzOkiKbKwQFEANZPVvKpea_cKOpPGtLAMSZgE0axiHCrj32UJqaUxkD80Bytr-c48lxB3PmZ66cwfeLs3KAoqaawZAK20FSyfn___ZWBQNty4L9fXt0Yb6CjSlkwNyv6NoKPWWaur69fn8Q3cOe2Hh4NuXkC4S7hPySCy7bN6XZK_AgAA___0fkth">