<table border="1" cellspacing="0" cellpadding="8">
<tr>
<th>Issue</th>
<td>
<a href=https://github.com/llvm/llvm-project/issues/61271>61271</a>
</td>
</tr>
<tr>
<th>Summary</th>
<td>
fastmath fminnum.f16 could be better on emulated x86
</td>
</tr>
<tr>
<th>Labels</th>
<td>
new issue
</td>
</tr>
<tr>
<th>Assignees</th>
<td>
</td>
</tr>
<tr>
<th>Reporter</th>
<td>
gbaraldi
</td>
</tr>
</table>
<pre>
This was found in https://github.com/JuliaLang/julia/issues/48848. Where LLVM started folding a function into `fast fminnum.f16` and that led to a regression.
original IR
```llvm
; ModuleID = 'min_fast'
source_filename = "min_fast"
target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux-gnu"
; @ fastmath.jl:244 within `min_fast`
define half @julia_min_fast_681(half %0, half %1) #0 {
top:
; ┌ @ essentials.jl:575 within `ifelse`
%2 = fpext half %0 to float
%3 = fpext half %1 to float
%4 = fcmp fast olt float %2, %3
%5 = select i1 %4, half %0, half %1
; └
ret half %5
}
attributes #0 = { "frame-pointer"="all" "probe-stack"="inline-asm" }
!llvm.module.flags = !{!0, !1}
!0 = !{i32 2, !"Dwarf Version", i32 4}
!1 = !{i32 2, !"Debug Info Version", i32 3}
```
new IR
```llvm
; ModuleID = 'min_fast'
source_filename = "min_fast"
target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux-gnu"
; @ fastmath.jl:252 within `min_fast`
define half @julia_min_fast_678(half %0, half %1) #0 {
top:
; ┌ @ essentials.jl:586 within `ifelse`
%2 = call fast half @llvm.minnum.f16(half %0, half %1)
; └
ret half %2
}
; Function Attrs: nofree nosync nounwind readnone speculatable willreturn
declare half @llvm.minnum.f16(half, half) #2
attributes #0 = { "frame-pointer"="all" "probe-stack"="inline-asm" }
attributes #2 = { nofree nosync nounwind readnone speculatable willreturn }
!llvm.module.flags = !{!0, !1}
!0 = !{i32 2, !"Dwarf Version", i32 4}
!1 = !{i32 2, !"Debug Info Version", i32 3}
```
Old assembly
```nasm
.text
.file "min_fast"
.globl julia_min_fast_681 # -- Begin function julia_min_fast_681
.p2align 4, 0x90
.type julia_min_fast_681,@function
julia_min_fast_681: # @julia_min_fast_681
.cfi_startproc
# %bb.0: # %top
pushq %rbp
.cfi_def_cfa_offset 16
.cfi_offset %rbp, -16
movq %rsp, %rbp
.cfi_def_cfa_register %rbp
vpextrw $0, %xmm1, %eax
vpextrw $0, %xmm0, %ecx
movzwl %cx, %ecx
vmovd %ecx, %xmm0
vcvtph2ps %xmm0, %xmm0
movzwl %ax, %eax
vmovd %eax, %xmm1
vcvtph2ps %xmm1, %xmm1
vucomiss %xmm1, %xmm0
cmovbl %ecx, %eax
vpinsrw $0, %eax, %xmm0, %xmm0
popq %rbp
.cfi_def_cfa %rsp, 8
retq
.Lfunc_end0:
.size julia_min_fast_681, .Lfunc_end0-julia_min_fast_681
.cfi_endproc
# -- End function
.section ".note.GNU-stack","",@progbits
```
New assembly
```nasm
.text
.file "min_fast"
.globl julia_min_fast_678 # -- Begin function julia_min_fast_678
.p2align 4, 0x90
.type julia_min_fast_678,@function
julia_min_fast_678: # @julia_min_fast_678
.cfi_startproc
# %bb.0: # %top
pushq %rbp
.cfi_def_cfa_offset 16
.cfi_offset %rbp, -16
movq %rsp, %rbp
.cfi_def_cfa_register %rbp
vpextrw $0, %xmm0, %eax
vpextrw $0, %xmm1, %ecx
movzwl %cx, %ecx
vmovd %ecx, %xmm0
vcvtph2ps %xmm0, %xmm0
movzwl %ax, %eax
vmovd %eax, %xmm1
vcvtph2ps %xmm1, %xmm1
vcmpltss %xmm0, %xmm1, %xmm2
vblendvps %xmm2, %xmm1, %xmm0, %xmm0
vcvtps2ph $4, %xmm0, %xmm0
vmovd %xmm0, %eax
vpinsrw $0, %eax, %xmm0, %xmm0
popq %rbp
.cfi_def_cfa %rsp, 8
retq
.Lfunc_end0:
.size julia_min_fast_678, .Lfunc_end0-julia_min_fast_678
.cfi_endproc
# -- End function
.section ".note.GNU-stack","",@progbits
```
This is with `llc-15 -mcpu alderlake`
</pre>
<img width="1px" height="1px" alt="" src="http://email.email.llvm.org/o/eJzsWFuP47YV_jX0y4EMirpYfvDDzDhTbDFJgV7SR4OSjmTuUqRCUrYnv76gZNmyo5nZJmjQNmsMNKL4nSt5Pl64taJWiBuSPJJku-Cd22uzqXNuuCzFItfl6-bve2HhyC1UulMlCAV751pLogfCngl7roXbd_my0A1hz3_upOAvXNWEPX_274Q9C2s7tIQ9x1kWZ0v45x4NwsvLj9-Dddw4LKHSshSqBg5VpwontAKhnAaS0opbB1UjlOqaZRWmJKXAVQluzx1ILMFp4GCwNmit0GoJhG4JfRie2ohaKC7h01_Pn1M6_El5aKZIEj3C97rsJH7aAom2QNiqEWrn7RO2GkBWd6bAXSUkKt7gGceuODbgHDc1Oii545K_6s6NSAwaEj1g0LIVJdFDxPqHb4a3Tf-Wxv0jEONLlXmhkGWByvxLOsqkcfC3kGX39p0Rrbx4ecrSXRoHnfqi9FEFUqjuFNSqu0hdEwEkpuAjarjbLz9LEj2wOIajcHuh_KhcIk7pIFViJRTCnsvKC_djvxtRuzQLCcuGTpZQwp5gbISErYGwiAJZPZ69162fXRdvyHeMrGOSPfVuobWonODSDo4lq2TimKhQWry4BeBtsD4FVYsnd7FL_cSppOZuioxmkOEsMh6QRdP2iQItHQyo3qIP0eubiiS9iEWJhQMR9lqmqbjLy33863hUZvDqXXLGrbbTQeTOGZF3Du05t34KrB79NKgMbzBotVAOjR_7aEsY41ISxnx_a3SOgXW8-HLpFUoKhQG3TQ-6tUVY6Itp2fTVs6wkr-15zoV-TFlIh2yE4S8l6QQpIgbnxIWEse2Rmwp-ROPL2rvCnsBD4qsWFobvyGPe1fBJVXpGSXRVMjLC0FR4fJcsvtHE-zSRsN9AE6vsP0cTWfp1NFFwKYeSHr0cZvd1BXrHxa8sWjZbtF7seVz_HpwzfpEFpSuDCErbV1WA0p06ClWCQV4qrRBsi0UnueO5RDgKKQ26zqgx34XkBj8IZYzinGP2-zPJrRV2sfIrg4cpRfwfkdPw_IssgVuLTS5f72DKJ_YMXS8dnty15emI0PUcEfnuWupcErr-5dINNz_CIggCeMRaqOt2bWbBv2huGZeiVoSu--WOntZ04uNri7NWCXsiMR0NDAIzqOgB5n7ey_l9yMVyUYldvwFtjS7GMYt8eeb5kr6l-N4GSzwPjUrbzu5_6pOcmLy9tVVitSsqvtNVZdFBmN52nz-fJdkTBBNEow-jWtue9xZvGjBYC-vQ3IMOfltjjr2e-Dztk1PThOdX5KcPseMrFqepcz8f5eBecZoBHBp9KId-vAB6ZRdEcXDtnrV2QE0M3cCmlvhpzu2JJX6ahvi2pXAW1hW6EXYWdfWnaPQhl_eR3eZRKHufxxvXZgNtdfvBNILrXMguAIPup6GxfPGVs0NV0usiSddLK35-s9xgIhR8VDmoymvdfFQpd9zxnSrhtrB71_D8wVPUUmmHyz_98I_r-sGe-icbiKE1us6Fs-_w5A94_B15cpX9Gp5cZb-NJ_2O6WOeXGX_Jk9O3frGk_fc9xU8Gf5ReLJoWunsrD8TgWv9HHKJqjxM9LJZidm4eqcsa_dDvuOP4Nco3xy-_wl67qv8XXq-L9j_anrurxSF7c9j0B-viyBMIGiKtgMuSzSSf8E7Nl-Um6hcR2u-wE2YrrKIplmcLPabLCwSVhR5nJdZQiPkNI8rXmbRmq4jxHwhNoyyiEY0Y5TROFlGNGQx8ohWPFqXUURiig0XctkfFbSpF_2d5SYN2SpcSJ6jtP0dKWMKj9B3-kiT7cJsvEyQd7X1xythnb1qccJJ3IwH5OktJhS6kyXkCDk6T0FaATb-JIMlnLJ00Rm5eeeetb-QGP4FrdGfsXDTm9be8X8FAAD___xgFLs">