<table border="1" cellspacing="0" cellpadding="8">
<tr>
<th>Issue</th>
<td>
<a href=https://github.com/llvm/llvm-project/issues/104465>104465</a>
</td>
</tr>
<tr>
<th>Summary</th>
<td>
__bf16 treated like _Float16 under #pragma STDC FENV_ACCESS ON (x86_64 clang 18.1.0)
</td>
</tr>
<tr>
<th>Labels</th>
<td>
clang
</td>
</tr>
<tr>
<th>Assignees</th>
<td>
</td>
</tr>
<tr>
<th>Reporter</th>
<td>
ngbronson-openai
</td>
</tr>
</table>
<pre>
Under STDC FENV_ACCESS ON (or -ffp-exception-behavior=strict) __bf16 <-> float conversions are compiled using the half-precision intrinsics.
My test code
```
#pragma STDC FENV_ACCESS ON
float widenB(__bf16 x) { return x; }
__bf16 narrowB(float x) { return x; }
float widenF(_Float16 x) { return x; }
_Float16 narrowF(float x) { return x; }
#pragma STDC FENV_ACCESS OFF
float widenB2(__bf16 x) { return x; }
__bf16 narrowB2(float x) { return x; }
float widenF2(_Float16 x) { return x; }
_Float16 narrowF2(float x) { return x; }
```
Compiled on godbolt `x86-64 clang 18.1.0` with `-march=sapphirerapids -std=gnu++2b -O3 `
```
_Z6widenBDF16b:
vmovw eax, xmm0
movzx eax, ax
vmovd xmm0, eax
vcvtph2ps xmm0, xmm0
ret
narrowB(float):
vxorps xmm1, xmm1, xmm1
vblendps xmm0, xmm1, xmm0, 1
vcvtps2ph xmm0, xmm0, 4
vmovw eax, xmm0
vmovw xmm0, eax
ret
widenF(_Float16):
vcvtsh2ss xmm0, xmm0, xmm0
ret
narrowF(float):
vcvtss2sh xmm0, xmm0, xmm0
ret
_Z7widenB2DF16b:
vmovw eax, xmm0
shl eax, 16
vmovd xmm0, eax
ret
narrowB2(float):
vcvtneps2bf16 xmm0, xmm0
vmovw eax, xmm0
vmovw xmm0, eax
ret
widenF2(_Float16):
vcvtsh2ss xmm0, xmm0, xmm0
ret
narrowF2(float):
vcvtss2sh xmm0, xmm0, xmm0
ret
```
Note that BF16 intrinsics are used only for the versions without FENV_ACCESS.
With broadwell as a target the same issue (with FENV_ACCESS, __bf16 conversions are compiled as if they are _Float16)
```
_Z6widenBDF16b:
vpextrw eax, xmm0, 0
movzx eax, ax
vmovd xmm0, eax
vcvtph2ps xmm0, xmm0
ret
narrowB(float):
vxorps xmm1, xmm1, xmm1
vblendps xmm0, xmm1, xmm0, 1
vcvtps2ph xmm0, xmm0, 4
vmovd eax, xmm0
vpinsrw xmm0, xmm0, eax, 0
ret
widenF(_Float16):
vpextrw eax, xmm0, 0
movzx eax, ax
vmovd xmm0, eax
vcvtph2ps xmm0, xmm0
ret
narrowF(float):
vxorps xmm1, xmm1, xmm1
vblendps xmm0, xmm1, xmm0, 1
vcvtps2ph xmm0, xmm0, 4
vmovd eax, xmm0
vpinsrw xmm0, xmm0, eax, 0
ret
_Z7widenB2DF16b:
vpextrw eax, xmm0, 0
shl eax, 16
vmovd xmm0, eax
ret
narrowB2(float):
push rax
call __truncsfbf2@PLT
pop rax
ret
widenF2(_Float16):
vpextrw eax, xmm0, 0
movzx eax, ax
vmovd xmm0, eax
vcvtph2ps xmm0, xmm0
ret
narrowF2(float):
vcvtps2ph xmm0, xmm0, 4
vmovd eax, xmm0
vpinsrw xmm0, xmm0, eax, 0
ret
```
</pre>
<img width="1px" height="1px" alt="" src="http://email.email.llvm.org/o/eJzUV02PqzYU_TXO5ooIDCHJgsVLMqzaeZXmtZXeJjJgwK2xkW0Spr--MiEz5GOYTDqt-qIoEeF-nns5JyZas0JQGqHZCs02E9KYUqpIFImSQkvhyJoKwiaJzJ6jX0VGFTx926whfnj8bftlvX54eoKvj4DwQipw8rx2aJvS2jApnISWZMekQv5GG8VSg_ASttsk90JA_tpB_gPkXBIDqRQ7qjSTQgNRFFJZ1YzTDBrNRAGmpFASnju1oimzZsCEUUxoluopcjfI_XL4_PkZDNU2YEaHv6PQ7d-HS-zXihQVudbM0O9Q3p5lVKwQXvTFt7YRNF-BoqZRAlrkrwDNNwefY4uCKCX31u0QZdzrIl9s88X2-t2ML2aHlPGHUo5hEcdvgYFvReMUDHxjaUMc8P1A3JrufD26z_VxCaWAQmaJ5AZQ6LaL0AkDSDkRBXiLqTe1jrBnprS3nYqotLQrT-q6ZIoqUrNMg6NNhvxNIRqEVwivcALOVx_Ocp4Vsv0eHuDexF6YIL-3gl0ld3sAoKRFeA1tVfUO0L8qufurfTUg7elt658BHBzxujM7NUh3pi5xrfvro-Egk6JmWPnZtiO8fC33GLSVykZsq8rrg71-940lnIrsJe0wr_eSH6_Bu1KuxnV5rVy8huCy_VH4jgaX-Jx1ffGkXu073RldYn0VzBtAjUdBtcE11m-1ftncWY7t93n_SJ8t2btg6ZJ3Fv0dL7x1ya6vDn6vTUFrjTs2ubqQnz1V_G-P9d2G753rNTZ7lIaCKYmBVeyFA_3sBLfRHc_xZ8il6vT2RY8tscnGDIXhRHJ_t8SXKEmyPeUciAYChqiCmi6OJhUFpnVD7Z-EjiUHkWwnvUK8-R-AaGC5jfXc_T4cyceZ84huTVuj9ie7gtdwRPTz6PPmx_Bz-PNo-UEavYM_s9EnrWZCq_1FnN5hHInbOHV8gP3r7jneoH9jY7zO2D_A_P7R4N5UktuGdbecjD5QoyRbNwd-VS-xUsK7KrZboxqR6jzJMQrcX376duYpazj1vE9D_iseum-N31Wo_wNpHIl_kkV-tvSXZEIjb479ub_ArjcpoznxA0Ko79L5LCB5lnpu6C0Db4H9RZDM8gmLsIsDd-HNvNBdzhZTN_doltCUJmngZr6HApdWhPEp57tqKlUx6RQt8twgCGcTThLKdXeExrg7GCCM7WlaRdbBSZpCo8DlTBv9GsIww2nUq59RlBiaAWd_vkocNN2Be_S0alW1XYTb8yMJXk4axaPSmFrb0eEY4bhgpmySaSorhGNbSP_l1Er-Qe0BPe4a0wjHfW-7CP8dAAD__0aqf1o">