<table border="1" cellspacing="0" cellpadding="8">
    <tr>
        <th>Issue</th>
        <td>
            <a href=https://github.com/llvm/llvm-project/issues/115158>115158</a>
        </td>
    </tr>

    <tr>
        <th>Summary</th>
        <td>
            Missed optimization: `udiv` and `urem` ignores assumptions, that can help to choose either 'DIV r/m32' or 'DIV r/m64' on x86-64.
        </td>
    </tr>

    <tr>
      <th>Labels</th>
      <td>
            new issue
      </td>
    </tr>

    <tr>
      <th>Assignees</th>
      <td>
      </td>
    </tr>

    <tr>
      <th>Reporter</th>
      <td>
          zheland
      </td>
    </tr>
</table>

<pre>
    Currently `udiv i64 i32` compiles to a preudocode like: `if dividend <= u32::MAX { DIV r/m32 } else { DIV r/m64 }`.
That's great because `DIV r/m32` is typically faster than `DIV r/m64`.
However, in some cases LLVM could compile directly to the optimal variant using specified assumptions.

Examples:
- `udiv i64 i32` should use `DIV r/m64` if the compiler provides the assumption that the dividend won't fit into 32-bit register.
- `udiv i64 i32` should use `DIV r/m32` if the compiler provides the assumption that the quotient will fit into 32-bit register.

Same is applicable for all `udiv`, `sdiv`, `urem`, `srem`, and `div_rem` operations.
Same is applicable for `udiv i32 i16` with choosing between 'DIV r/m16' or 'DIV r/m32' (on x86).
Same is applicable for `udiv i64 i64` and `udiv i128 i64` with choosing between 'DIV r/m64' or (`__udivti3` and `__umodti3`) (see https://github.com/llvm/llvm-project/issues/6769).

As far as I understand it is problematic to optimize such a thing in the language compiler (Clang or Rust) because the LLVM does not provide asymmetric division instructions with different dividend and divisor and quotient types.

### Example on C

https://godbolt.org/z/j3c9KP7cx

```c
#include <stdint.h>
#define uint128_t __uint128_t

// Optimized.
uint32_t div_u64_u32_default(uint64_t dividend, uint32_t divisor) {
    return (uint32_t)(dividend / (uint64_t)divisor);
}

// Optimized.
uint32_t div_u64_u32_if_div_le_u32_max(uint64_t dividend, uint32_t divisor) {
    if (dividend <= (uint64_t)UINT32_MAX) {
 return (uint32_t)(dividend / (uint64_t)divisor);
    } else {
 __builtin_unreachable();
    }
}

// Missed optimization.
uint32_t div_u64_u32_if_div_gt_u32_max(uint64_t dividend, uint32_t divisor) {
    if (dividend > (uint64_t)UINT32_MAX) {
 return (uint32_t)(dividend / (uint64_t)divisor);
    } else {
 __builtin_unreachable();
    }
}

// Missed optimization.
uint32_t div_u64_u32_if_quot_le_u32_max(uint64_t dividend, uint32_t divisor) {
    if ((uint32_t)(dividend >> 32) < divisor) {
 return (uint32_t)(dividend / (uint64_t)divisor);
    } else {
 __builtin_unreachable();
    }
}

// Optimized.
uint64_t div_u128_u64_default(uint128_t dividend, uint64_t divisor) {
    return (uint64_t)(dividend / (uint128_t)divisor);
}

// Optimized.
uint64_t div_u128_u64_if_div_le_u64_max(uint128_t dividend, uint64_t divisor) {
    if (dividend <= (uint128_t)UINT32_MAX) {
 return (uint64_t)(dividend / (uint128_t)divisor);
    } else {
 __builtin_unreachable();
    }
}

// Missed optimization.
uint64_t div_u128_u64_if_quot_le_u64_max(uint128_t dividend, uint64_t divisor) {
    if ((uint64_t)(dividend >> 64) < divisor) {
 return (uint64_t)(dividend / (uint128_t)divisor);
    } else {
 __builtin_unreachable();
 }
}
```

<details>
<summary><h3>Similar example on Rust (click to expand)</h3></summary>

https://godbolt.org/z/3qd6qMd7s

```rust
use std::hint::assert_unchecked;

// Optimized.
#[no_mangle]
pub fn div_u64_u32_default(dividend: u64, divisor: u32) -> u32 {
    unsafe { assert_unchecked(divisor > 0) };
    (dividend / divisor as u64) as u32
}

// Optimized.
#[no_mangle]
pub fn div_u64_u32_if_div_le_u32_max(dividend: u64, divisor: u32) -> u32 {
 unsafe { assert_unchecked(divisor > 0) };
    unsafe { assert_unchecked(dividend <= u32::MAX as u64) };
    (dividend / divisor as u64) as u32
}

// Missed optimization.
#[no_mangle]
pub fn div_u64_u32_if_div_gt_u32_max(dividend: u64, divisor: u32) -> u32 {
 unsafe { assert_unchecked(divisor > 0) };
    unsafe { assert_unchecked(dividend > u32::MAX as u64) };
    (dividend / divisor as u64) as u32
}

// Missed optimization.
#[no_mangle]
pub fn div_u64_u32_if_quot_le_u32_max(dividend: u64, divisor: u32) -> u32 {
 unsafe { assert_unchecked(divisor > 0) };
    unsafe { assert_unchecked(((dividend >> 32) as u32) < divisor) };
    (dividend / divisor as u64) as u32
}

// Optimized.
#[no_mangle]
pub fn div_u128_u64_default(dividend: u128, divisor: u64) -> u64 {
    unsafe { assert_unchecked(divisor > 0) };
    (dividend / divisor as u128) as u64
}

// Optimized.
#[no_mangle]
pub fn div_u128_u64_if_div_le_u64_max(dividend: u128, divisor: u64) -> u64 {
 unsafe { assert_unchecked(divisor > 0) };
    unsafe { assert_unchecked(dividend <= u64::MAX as u128) };
    (dividend / divisor as u128) as u64
}

// Missed optimization.
#[no_mangle]
pub fn div_u128_u64_if_quot_le_u64_max(dividend: u128, divisor: u64) -> u64 {
 unsafe { assert_unchecked(divisor > 0) };
    unsafe { assert_unchecked(((dividend >> 64) as u64) < divisor) };
    (dividend / divisor as u128) as u64
}
```

</details>

### Produced assembly

`x86-64 clang (trunk)` `-O3` or `rust +nightly` `-C opt-level=3`
```assembly
; Optimized.
div_u64_u32_default:
 mov     rax, rdi
        mov     ecx, esi
        mov     rdx, rdi
 shr     rdx, 32
        je      .LBB0_1
        xor     edx, edx
        div     rcx
        ret
.LBB0_1:
        xor     edx, edx
        div     ecx
        ret

; Optimized.
div_u64_u32_if_div_le_u32_max:
        mov     rax, rdi
 xor     edx, edx
        div     esi
        ret

; Missed optimization: can use only 'DIV r/m64', should not try 'DIV r/m32'.
div_u64_u32_if_div_gt_u32_max:
        mov     rax, rdi
 mov     ecx, esi
        mov     rdx, rdi
        shr     rdx, 32
        je      .LBB2_1
        xor     edx, edx
        div rcx
        ret
.LBB2_1:
        xor     edx, edx
        div ecx
        ret

; Missed optimization: can use only 'DIV r/m32', should not try 'DIV r/m64'.
div_u64_u32_if_quot_le_u32_max:
        mov rax, rdi
        mov     ecx, esi
        mov     rdx, rdi
 shr     rdx, 32
        je      .LBB3_1
        xor     edx, edx
 div     rcx
        ret
.LBB3_1:
        xor     edx, edx
 div     ecx
        ret

; Optimized.
div_u128_u64_default:
        push    rax
        xor ecx, ecx
        call    __udivti3@PLT
        pop     rcx
 ret

; Optimized.
div_u128_u64_if_div_le_u64_max:
        mov rcx, rdx
        mov     rax, rdi
        mov     rdx, rdi
 or      rdx, rcx
        shr     rdx, 32
        je      .LBB5_1
 xor     edx, edx
        div     rcx
        ret
.LBB5_1:
 xor     edx, edx
        div     ecx
        ret

; Missed optimization: can use only 'DIV r/m32 and 'DIV r/m64' without `__udivti3`.
div_u128_u64_if_quot_le_u64_max:
        push    rax
 xor     ecx, ecx
        call    __udivti3@PLT
        pop rcx
        ret
```

### Expected assembly

(well-optimized functions are skipped)
```assembly
div_u64_u32_if_div_gt_u32_max:
 mov     rax, rdi
        mov     ecx, esi
        xor     edx, edx
        div     rcx
 ret

div_u64_u32_if_quot_le_u32_max:
        mov     rax, rdi
 xor     edx, edx
        div     esi
 ret

div_u128_u64_if_quot_le_u64_max:
        mov     rcx, rdx
 mov     rax, rdi
        mov     rdx, rdi
        or      rdx, rcx
        shr     rdx, 32
        je      .LBB5_1
        xor edx, edx
        div     rcx
        ret
.LBB5_1:
        xor edx, edx
        div     ecx
        ret
```

</pre>
<img width="1px" height="1px" alt="" src="http://email.email.llvm.org/o/eJzkWltv27gS_jXMyyCBTNmy_OCH3IxTnPZscbZnsW8CLY4tNpSo8pLE_fUH1MUXWU5sJ912sUHgCymSM998M_oskhkjlgXilIxuyOjugjmbKT39nqFkBb-YK76a3jqtsbByBSQKHBePIKIhiJCSKIBU5aWQaMAqYFBqdFyliiNI8YAkvPZjxAK4eBQcCw4kvCXhHbiQkvCahNefrv8EMr6Buw9_gCZ0locUyPgOUBrc7YiGvoNEwRUJ7khw_SVjltCxgaVGZmGOKXN-TBRszeVNFAbsqhQpk3IFC2YsarAZK3YujYabmf-lnvARNaG3IAowKkdImUEDHz_-8QlS5SRv_QYuNKYeG6vAZgiqtCJnEh6ZFqyw4IwolmBKTMVCIAdmjMtLK1RhmtXq1_tnlpcSjUelarjsQ9tk1eJdPyvjQSwqCxrLNJRaedBN1bpZ1_tuq7Z1UJ5UQejYwkJYEIVVENLLubCgcSk8XFenm9RAf6pJ35yyAgsLT0LK1-ypX39nOfoYs7KUImVzibBQGpiUrbkkCnwsSRSYnW9OY77Vt_nGPFGjgIvHpG4EVaJm21E7sOoaoJCCGER-6JOwGaSZUhUT5mifEAsgdLzGahAROgY_eqsxpL6R0FgV8BxHhE6OXNmHpiZE40bdPKBx2_66RdFwbVFMoiBJ_BxWhFuzJonLFa_bCJ34Kw0iZNaWFYnpjNDZUtjMza9SlRM6k_KxfbsstfqKqSV0JoxxaAidReNosvGyfr02sGAamIEP4AqO2li_vKeF8WSaS8yZFanPvirzxHcE49IMGNjMeyeKileSFUvHlltUJDS-9a3ey_86Y70PbQ3xI6pc5woNFMq2xAVmVnmOVou0yh7jySsKY7VLK3bU2HKxWKCvmZsU82ZXIzw1C74hul2VuFsKCA3rf2iKAqgCbrev6ICs-FxJe6X0ktDZd0JnX8N08u_P4_R5Z9ooqP_T9TKiSKXj6IuysVwU9ioj4f26m-NCFAhOFHZA48RCkqw_7xrszYDfmgDwxht_bUiTCoTERcPEhTThuGBOWkJj3x0Nkw1GPvO2x3isKmaNb-oJAQA0Wqc9WeP2UkInhMabGwydwdbshE42U5GwmcnfSM50QCwS_1Vi9S1nz2e7IhawY3h9Z9y1_X8f_vMlpMmn6z93h78PDN6K7Ztt05okcyekFUXiCo0szXyR8ZVgf-hLcH4SxiBv87KqnkcAu7TvD-z9PxpVX2rej68vgBPee6j9jWvi2dw_0d8B475C0OKVOF__PLy7payukF1A1yi_XssaZw9AURfdNxazfR-2qlk03GLHGd68WM5a84_LvLOh-Jmp1wvuOvfeB90X8KmTzwu3o5Pvp8Dcg3ErTHYgD285Wiak2SiS8Na4PGd6VTl7m4UkvP9d5EIyDbiRSl7NeSdSKdIHrwzxuWQe5onnJJ1V4-qPW_OdIK_Cbzz69omPTa-80l5M1qwwCMby-oduJgpbf2LGoLaJK9IM0wfkm0R-KX-9JBzdFCrJWbGUSEYNeqWbw6I4oLHWFAuvwXlubHjhW-pSfemJ46rf3VvxdYVhi_on-J7B9cReyvqhtfwf3-3So0OqtfY1tSGT6lNITylhJ0DQp9LOBeONSLw-_OCDkQ1YPwLew-XsdKB3VNuvDvT93xHlfRX3S8Jc__drwgasvhvUr1E89pXdDsYD79suyLU1NcjVA8q_poJWlkxaPH4QCn3a8Gw8_toiGg1307vB64dA_LYEf1Er_ppo96f4Ji8PaNB3gv6AWCR0tqcX10Gon6Z91oq7tH4Gj_lcrjry7TmOLqMhpNVTQUJjq13x4FVjFACJgsvfqsef9aNWXWvMm0IsMytX7SW3ngaXEh9RkvAu3JjZWt1ZOrzZy9A-IdfuCUCuHqH65ejpcQuaiw2i_q_tx7TqR3OgX_POeJPp7Y62tLbjvmL9fvXx5iZIBrudz6oei_VY_7bTz0WzaNrp0NgI5XbatZ8nzowHZz4O53212LXkIPBHm9gNRY-JPZXEp3nKimp7RRVytf-U3q_a7MEUyoLVq_1thMMub-m2o10-m2TN3wlco-dw7WWe0TN5dgzHTg1gFZxXAlgFuT-AXUnYF8GfWSnCo6N3XIkITwndm2tDVwx2Vy6dydr82DOphbW7eMqk9O-bHbVh8Pnjl87MquygcarN-9KtlxtpE9vnIzP_NW40sVh3dL0_gTujNXfe5_Yy2ubOe95XTs_5egd1b7f1SdhMOQud_dYD4e2KxdfZuXb6rdQ8DHS_ONvaziwxtYcEGI2fUMrLdhuXw8IVzaYq0wjmQZQlVk_xXpJUR97j3iqkTidlhzpnlPJee0_XH32GnECstSXd4nF21Wj-fkjx2ArXO9aPE2Z9oYTsJssFn4Z8Ek7YBU4H4zCYBEE8iS-yaRQMwgkNYj6I6WCBcTQacZzTcMJH82AexxdiSgM6HAyCaBAO40F8xVnA6TwIBpzjaM7HZBhgzoS8kvIxv1J6eVEdupgOBqPBKL6QbI7SVCfAKC3wCapeQikZ3V3oaXVeY-6WhgwDKYw1m2mssBKnByrg5vDN-iBKc5hGLAul0WyfhfIYVoeAfN3MUJZgVX1EBQGFzbDvcEznxExzaKU6LnMZDa8unJbT88-jNOg8Tun_AwAA__9PqDDH">