<table border="1" cellspacing="0" cellpadding="8">
    <tr>
        <th>Issue</th>
        <td>
            <a href=https://github.com/llvm/llvm-project/issues/121604>121604</a>
        </td>
    </tr>

    <tr>
        <th>Summary</th>
        <td>
            LLVM inlining a function produces worse code than inlining it manually
        </td>
    </tr>

    <tr>
      <th>Labels</th>
      <td>
            missed-optimization
      </td>
    </tr>

    <tr>
      <th>Assignees</th>
      <td>
      </td>
    </tr>

    <tr>
      <th>Reporter</th>
      <td>
          Kmeakin
      </td>
    </tr>
</table>

<pre>
    https://godbolt.org/z/jdzErPMn7


```rust
use std::ops::RangeTo;

pub fn get_range_to(s: &str, range: RangeTo<usize>) -> Option<&str> {
    if is_char_boundary(s, range.end) {
        unsafe { Some(s.get_unchecked(..range.end)) }
    } else {
        None
 }
}

fn is_char_boundary(s: &str, index: usize) -> bool {
    let bytes = s.as_bytes();
    let len = bytes.len();

    if index == 0 {
 return true;
    }

    if index >= len {
        index == len
    } else {
        is_utf8_start_byte(bytes[index])
    }
}

fn is_utf8_start_byte(b: u8) -> bool {
    // This is bit magic equivalent to: b < 128 || b >= 192
    (b as i8) >= -0x40
}
```

This function from the Rust standard library (https://github.com/rust-lang/rust/blob/3f43b1a636738f41c48df073c5bcb97a97bf8459/library/core/src/str/traits.rs#L430) produces unnecessarily branchy code:

```asm
; AArch64
example::get_range_to::hecca4baa46cebf55:
        cbz     x2, .LBB0_4
        cmp     x2, x1
        b.hs    .LBB0_5
        ldrsb   w8, [x0, x2]
        cmn     w8, #64
        b.ge    .LBB0_4
.LBB0_3:
 mov     x0, xzr
.LBB0_4:
        mov     x1, x2
        ret
.LBB0_5:
 b.ne    .LBB0_3
        b       .LBB0_4


; x86_64
example::get_range_to::haafeb3d39ebfa3da:
        mov     rax, rdi
 test    rdx, rdx
        je      .LBB0_5
        cmp     rdx, rsi
 jae     .LBB0_2
        cmp     byte ptr [rax + rdx], -64
        jge .LBB0_5
.LBB0_4:
        xor     eax, eax
.LBB0_5:
 ret
.LBB0_2:
        jne     .LBB0_4
        jmp     .LBB0_5
```


By inlining `is_char_boundary` manually, we can get much nicer code with fewer branches:
```rust
pub fn get_range_to_inlined(s: &str, range: RangeTo<usize>) -> Option<&str> {
    let end = range.end;
    let bytes = s.as_bytes();
    let len = bytes.len();

    if end == 0 {
 unsafe { return Some(s.get_unchecked(..end)) }
    }

    if end >= len {
        if end == len {
            unsafe { return Some(s.get_unchecked(..end)) }
        }
        return None;
    }

 let last = bytes[end];
    if !is_utf8_start_byte(last) {
        return None;
    }

    unsafe { Some(s.get_unchecked(..end)) }
}
```

```asm
; AArch64
example::get_range_to_inlined::hef86872e440018c7:
        cbz x2, .LBB1_4
        cmp     x2, x1
        b.hs    .LBB1_3
        ldrsb w8, [x0, x2]
        mov     x1, x2
        cmn     w8, #65
        csel x0, x0, xzr, gt
        ret
.LBB1_3:
        csel    x0, x0, xzr, eq
.LBB1_4:
        mov     x1, x2
        ret

; x86_64
example::get_range_to_inlined::hf2fa833068e78843:
        mov rax, rdi
        test    rdx, rdx
        je      .LBB1_3
        xor ecx, ecx
        cmp     rdx, rsi
        jae     .LBB1_2
        cmp byte ptr [rax + rdx], -64
        cmovl   rax, rcx
        ret
.LBB1_2:
 cmp     rdx, rsi
        cmovne  rax, rcx
.LBB1_3:
 ret
```
</pre>
<img width="1" height="1" alt="" src="http://email.email.llvm.org/o/eJy0V1-TojoW_zTxJdUWJIDw4IPa7cv27G7NTu2rlYSDpAeCk4Ru7U9_KwEF1OnpmbrXsozknPM7f5KcX2DGyL0CWKJ4jeLHGWtt2ejlv2pg36Wa8SY_LUtrDwbRFSJbRLb7JudNZeeN3iOyfUdk-5K_P-n_flELFKz6bxJ0X90ai4JVawAbmzsMumo6MLr6ytQevjWIrjurQ8txofAe7E470c42iKROGSOSGKsR2WAvcVMX601r5Dsg-oRIhh8QfcL_OVjZKEQ3vRl9wmjhnGCMsSywNDtRMr3jTatypk_eyxl7Dip3SIOF-7TKsALcJP5fU4OzmLtAWyVKEN8hRySdz8cAHcZjj4EWjxgqA1ew_24UuMdOsf8NVoW6H-OkElLlcHRTXf7n5HnTVCMvFVjMTxYMRvQRmzkzO_-ISOqCpGO9CpTX8grzCtRYaVQ-59gpOt3g7EuDbbXCVrcwoF4yujJ9cqbe3aQcE2Tn_8PiSbNrbZHujGXa-qwQSbvk4nVXnfjRxT-JZVrjOwC-pOnP6tkdAvytlAZLg7m0uGZ7KTD8aOUrq0BZbBuHwTGiGxySFKPFBi02fsInHmbkgpZyzAyW3l8vfgiOUTCEej5LXdjecdEq4bY4LnRTY1sC_toai41lbq_kuJJcM31y8FeHV9qy5XPR1Ihs3eF8qJja9_8R2fKq4YhsaRFRHrKEJguaFlEoojQvggUVMRc8W7BswYs0ijNEtr0rRLai0YDI1mjhft0e3VrNpDVzbRChzxENXJIH3eStAINbpUCAMUzL6oS5ZkqUJyya3B3vqz7CTO0e6RqvVlqUSYSCFRxZfaig6yWTpuFnShCCRZyxKBHAizjuUM-bR_B3Px6JO0vz5_U62EVjeX0YyY_hSMTnpXFjZxSPJFWuDccYv6XOCMXrY-CtiduIY2zlx16N0CSawO9hgHeS7h_tE6ib1y6yDvtdXzSiaYoXxbAPYhBpsBerc2H4XI380nFE_ThE1H_pGh_TZPep5WCsAE5zmgEvGM3Z_Vg1O_pOnEsntGCsn8372ePI5AXGUcV31u5sZjzYC4ORPrmj7zoAPljtVk6zI0Zk7TFcF9ngh8kqvexh5Plu_Y-N9iN0ObnhuubjdSBT6xcFV0W_SPpwB_dXPQIFq_UJS1VJJdUeoyS4YZMkwDVTLauqk4vtDbBgnnpx3YoSKylA-7OI36QtcQFvoPszCqY_n1ckf4e-dz4GT45_N407vgKVe74aaJf-g7TXe5uQ3uha0PPfB7eDn9wL7jj5GT1OgriV4-lF5c8iGqIaNQuH4-8qd5jdl5AZO9QQxWuHHD8O6rLAiIR3CdfZ3l64fuH001ey6xTvkuqf8cxld_d8U6RJuiAQRUEQpmJxyzcD14R_wjXhpCl3XPMLpvmAAm5JaNJCDVRnhrnwDNngvb3LIuGFncb2A0mNIeDHxeh3CevTnHO1NAUpWEppkKSwSNOI3rqd8k7_-TT9TFfG9X0QXc8Xx1_y0hlvRE_hDT39DjWJunmtRlw6iWG8ZmfG-TAuh-aoaII2XfN-cc6HaJYvaZ7RjM1gGS5onNEsCsisXJIoDmIhojBOcpoEPMxICCLOeEaDOE3JTC5JQOIgDCgJCSXhXGR5kiQkzOkiKbKwQFEANZPVvKpea_cKOpPGtLAMSZgE0axiHCrj32UJqaUxkD80Bytr-c48lxB3PmZ66cwfeLs3KAoqaawZAK20FSyfn___ZWBQNty4L9fXt0Yb6CjSlkwNyv6NoKPWWaur69fn8Q3cOe2Hh4NuXkC4S7hPySCy7bN6XZK_AgAA___0fkth">