<table border="1" cellspacing="0" cellpadding="8">
<tr>
<th>Issue</th>
<td>
<a href=https://github.com/llvm/llvm-project/issues/102611>102611</a>
</td>
</tr>
<tr>
<th>Summary</th>
<td>
[x86][ISEL] Missing support for half precisison non-temporal loads
</td>
</tr>
<tr>
<th>Labels</th>
<td>
</td>
</tr>
<tr>
<th>Assignees</th>
<td>
</td>
</tr>
<tr>
<th>Reporter</th>
<td>
njroussel
</td>
</tr>
</table>
<pre>
The following IR will crash `llc` on all releases between (and including) 16 and 18:
```
define void @drjit_641b8f0d1521b37df5f00905a3202c37(i64 %start, i64 %end, i32 %thread_id, ptr noalias %params) #0 {
entry:
br label %body
body:
%index = phi i64 [ %index_next, %suffix ], [ %start, %entry ]
%h1_p1 = getelementptr inbounds ptr, ptr %params, i32 3
%h1_p3 = load ptr, ptr %h1_p1, align 8, !alias.scope !2
%h1_p5 = getelementptr inbounds half, ptr %h1_p3, i64 %index
%h1 = load <16 x half>, ptr %h1_p5, align 32, !alias.scope !2, !nontemporal !3
%h2_p1 = getelementptr inbounds ptr, ptr %params, i32 4
%h2_p3 = load ptr, ptr %h2_p1, align 8, !alias.scope !2
%h2_p5 = getelementptr inbounds half, ptr %h2_p3, i64 %index
%h2 = load <16 x half>, ptr %h2_p5, align 32, !alias.scope !2, !nontemporal !3
%p3_p1 = getelementptr inbounds ptr, ptr %params, i32 5
%p3_p3 = load ptr, ptr %p3_p1, align 8, !alias.scope !2
%p3_p5 = getelementptr inbounds i8, ptr %p3_p3, i64 %index
%p3 = fcmp oeq <16 x half> %h1, %h2
%p3_e = zext <16 x i1> %p3 to <16 x i8>
store <16 x i8> %p3_e, ptr %p3_p5, align 16, !noalias !2, !nontemporal !3
br label %suffix
suffix:
%index_next = add i64 %index, 16
%cond = icmp uge i64 %index_next, %end
br i1 %cond, label %done, label %body, !llvm.loop !4
done:
ret void
}
!0 = !{!0}
!1 = !{!1, !0}
!2 = !{!1}
!3 = !{i32 1}
!4 = !{!"llvm.loop.unroll.disable", !"llvm.loop.vectorize.enable", i1 0}
attributes #0 = { norecurse nounwind "frame-pointer"="none" "no-builtins" "no-stack-arg-probe" "target-cpu"="skylake-avx512" "target-features"="-vzeroupper,+prfchw,-cldemote,+avx,+aes,+sahf,+pclmul,-xop,+crc32,+xsaves,-avx512fp16,-usermsr,-sm4,+sse4.1,-avx512ifma,+xsave,+sse4.2,-avx512pf,-tsxldtrk,-ptwrite,-widekl,-sm3,+invpcid,+64bit,+xsavec,-avx10.1-512,-avx512vpopcntdq,+cmov,-avx512vp2intersect,+avx512cd,+movbe,-avxvnniint8,-avx512er,-amx-int8,-kl,-avx10.1-256,+evex512,-avxvnni,+rtm,+adx,+avx2,-hreset,-movdiri,-serialize,-sha512,-vpclmulqdq,+avx512vl,-uintr,+clflushopt,-raoint,-cmpccxadd,+bmi,-amx-tile,+sse,-gfni,-avxvnniint16,-amx-fp16,+xsaveopt,+rdrnd,+avx512f,-amx-bf16,-avx512bf16,-avx512vnni,+cx8,+avx512bw,+sse3,-pku,+fsgsbase,-clzero,-mwaitx,-lwp,+lzcnt,-sha,-movdir64b,-wbnoinvd,-enqcmd,-prefetchwt1,-avxneconvert,-tbm,-pconfig,-amx-complex,+ssse3,+cx16,+bmi2,+fma,+popcnt,-avxifma,+f16c,-avx512bitalg,-rdpru,+clwb,+mmx,+sse2,+rdseed,-avx512vbmi2,-prefetchi,-rdpid,-fma4,-avx512vbmi,-shstk,-vaes,-waitpkg,-sgx,+fxsr,+avx512dq,-sse4a" }
```
Here's is the dump:
```
Stack dump:
0. Program arguments: ./bin/llc /home/user/kernel.ll
1. Running pass 'Function Pass Manager' on module '/home/user/kernel.ll'.
2. Running pass 'X86 DAG->DAG Instruction Selection' on function '@drjit_641b8f0d1521b37df5f00905a3202c37'
#0 0x000055558f8c270f llvm::sys::PrintStackTrace(llvm::raw_ostream&, int) (./bin/llc+0x33ca70f)
#1 0x000055558f8bff14 SignalHandler(int) Signals.cpp:0:0
#2 0x00007fb10a8a1420 __restore_rt (/lib/x86_64-linux-gnu/libpthread.so.0+0x14420)
#3 0x000055558ecbfa68 llvm::EVT::getExtendedSizeInBits() const (./bin/llc+0x27c7a68)
#4 0x000055558df43670 (anonymous namespace)::X86DAGToDAGISel::useNonTemporalLoad(llvm::LoadSDNode*) const (.isra.0) X86ISelDAGToDAG.cpp:0:0
#5 0x000055558df46e5f (anonymous namespace)::X86DAGToDAGISel::IsProfitableToFold(llvm::SDValue, llvm::SDNode*, llvm::SDNode*) const X86ISelDAGToDAG.cpp:0:0
#6 0x000055558f6a0218 llvm::SelectionDAGISel::SelectCodeCommon(llvm::SDNode*, unsigned char const*, unsigned int) (./bin/llc+0x31a8218)
#7 0x000055558df5a7ed (anonymous namespace)::X86DAGToDAGISel::Select(llvm::SDNode*) X86ISelDAGToDAG.cpp:0:0
#8 0x000055558f695357 llvm::SelectionDAGISel::DoInstructionSelection() (./bin/llc+0x319d357)
#9 0x000055558f6a3039 llvm::SelectionDAGISel::CodeGenAndEmitDAG() (./bin/llc+0x31ab039)
#10 0x000055558f6a5e8c llvm::SelectionDAGISel::SelectAllBasicBlocks(llvm::Function const&) (./bin/llc+0x31ade8c)
#11 0x000055558f6a83c6 llvm::SelectionDAGISel::runOnMachineFunction(llvm::MachineFunction&) (.part.0) SelectionDAGISel.cpp:0:0
#12 0x000055558df64475 (anonymous namespace)::X86DAGToDAGISel::runOnMachineFunction(llvm::MachineFunction&) X86ISelDAGToDAG.cpp:0:0
#13 0x000055558e9edb10 llvm::MachineFunctionPass::runOnFunction(llvm::Function&) (.part.0) MachineFunctionPass.cpp:0:0
#14 0x000055558ef1f91a llvm::FPPassManager::runOnFunction(llvm::Function&) (./bin/llc+0x2a2791a)
#15 0x000055558ef1faa9 llvm::FPPassManager::runOnModule(llvm::Module&) (./bin/llc+0x2a27aa9)
#16 0x000055558ef20f70 llvm::legacy::PassManagerImpl::run(llvm::Module&) (./bin/llc+0x2a28f70)
#17 0x000055558cc1d44b compileModule(char**, llvm::LLVMContext&) llc.cpp:0:0
#18 0x000055558cb32abe main (./bin/llc+0x63aabe)
#19 0x00007fb10a289083 __libc_start_main /build/glibc-LcI20x/glibc-2.31/csu/../csu/libc-start.c:342:3
#20 0x000055558cc14d3e _start (./bin/llc+0x71cd3e)
[1] 506883 segmentation fault ./bin/llc ~/kernel.ll
```
The reason is fairly obvious, half-precision non-temporal loads are going to hit an `llvm_uncreachable` call ([source code](https://github.com/llvm/llvm-project/blob/main/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp#L529-L550)).
The release candidates for LLVM 19 will not crash on the IR I posted above. This is due to [this commit](https://github.com/llvm/llvm-project/commit/7ff3f9760da7d7c8fe9209280aefb05168efcf20) which, to my understanding, will trigger an other code path. To my knowledge this issue is therefore still present in the `main` branch.
</pre>
<img width="1px" height="1px" alt="" src="http://email.email.llvm.org/o/eJysWV1z4yiz_jXKDWWXhL4vcpHE43lTNfOeqU1qa-5SCFoWGwQaQLYzF-e3nwLJsuRkMh97tnazEtDdTz_dNI1MjOE7CXAdpLdBurkivW2Uvpb_aNUbA-KqUuzl-rEBVCsh1IHLHbr_Cx24EIhqYhoUZKEQNMhCpCQiQiANAogBgyqwBwCJAlwQyRCXVPSMy12ASxRlyI1FRRDfBOEmCE9_s3D8178yqLkEtFecoSAJmf6H26csiaqiDlmU4qiKc1andRiWYUpiHGIa5wEueJagAKfGEm0DfIfGd5DMv8XYvdlGA2FP3I91ViOpiODEuLmOaNIahzTAcYiC_HYABNLqlwkzqjQSpALhRBxTc1f8-7QSIbeGSwZHFMQb1DV8QJXeThNPEo4eroPe1zU_oiDd-IFh1eSP98XqFz8_N9BET13kDezAgoAWpHWucVmpXjLj_Dy5O3Nz4CR-pSr2qoQi7ELQ23HvRPCdRMUAKvL8rQ1VHbhXPCocBdL3gDVE1BcG4lnoPEEX-M7ggvguytBxUBJ_uNCTnoHG-EdIh2GppIW2U5q4mEbxzAH8L5hNlsjxe8zi32H2rPC3yMU_JRf_Ern4_4PcwWIX_wt-01eqfsivt_Ob_DqZd_nlxYWF99kd0dW07ZCCb5cMD3k77vNmtom6-Am85Hc42kmKR6NMFyOrzsOFC9Zk1lilYTl5UnmBfRbRKJtCdyqNvxDNeVUc6ti8Lo4jb1VGXwC9g4SxJX34zoGZS1AlmV_LHY39DhYC81rqyv4cHI9O8m7BBJUpCYsBX8EHZ4XYt2uhVOdekrk7XmrujAbrD6xxUb5ZnG84Cj3oAEfuUMFReF6Bo2g5N-bAcg2-XDObi2dzbl8sJpOlYIDx5NW6l1oJsWbckEpAgE9BXizaA7VK8--wBjlbxiMUXrhJrNW86i2Y8fx0hvNbJJUG2msDSKpeHriLIMa1Ji2sOsWlBe2UxpsAY-nD4U5qLNWq6rmwXJrziLGEPq-I3q06rarTUkv0DuyKdv2kyDy_CPIMK7I_pi575-tqILbXYKbFq_130KrvOofkLsC3na5pcwjw3YoKBq2yMIyT_XF8cNLuwZCmHmWoaHvhZI6qG4aopr48Bvj2aMjey4yI6s5vs1VvQLfGmV2ZNhl1GkjW0Xktr1sy0zJbhM-LOgdjZc1RMKuf3XNnD5p75KsDZ_AsBiPxIM_lvqO-DQrwbZZU3M5M0FFvFK6jlefvZGbfqY5Ky76NHrZqP5_EPpwGqJ0ISyNMRzOt2lcwLt9Lybm0xVnak78i7XF1Gh8gn3DgNBvUwB6OZ1BO0TCubTtaZcfJvF_WaDDgIK1atWdcc08FaE4E_-4RmYaMKvdDIL-dXBxd81B6Lu2YIlTUojeN6rxaTVwi-4RpO0qPhI0uVy0_eWW5OMfODe5qyZdsDDnhFo_5cQrIYMb5yLRkc2D1SaKqR2k_vHw7c0SPxVy6OkyIYp8zz_0wUJudqciAkwq3PTx7B8Kt43YlDmOOi-90cNw05ExwllQ-7yqpuNw7wCuQ32jrnzoNNVjaHOwpySVQJffg-9yVrVq_iipZ893JParaTsDxBHfA6x06EVW1fNxs034ZknU0ct5GdZTRGVXcEuHtaNbp_hTeQzUmbTsZBXwKggFgM3pH05NnfNTm99eqbkmyXDzwZazfp_uhmKwct92zB2J2o836aPQ8Xj4pV27vE1_Tpgq8vEINf_8DGgKcG8QNsg0g1rfd-eq1lHhwhXW5Ilwj_88XrXaatIjoXe-aIBPEN2gd4G3FZYC3QlAU4G2jWgjw1tWzAG-fQUsQayEGVdGo6q9eSneb7Ihxh0S-7SW1XEn0xQ18JpLsnHjurpWtYr1wfVn-jnacrwcL-AcWvhYZ2tx8XAXxh83NR3QvjdX9YPMBBPin0WB9AuNM_vq9M59atThE4TEMwzBN07SoC4rzsEbuKHWcxjfmxQwPXzSX1jP-qAmFABfnRZocnpSxGkgbYN-J-cLi7qTFgvQA34bHOKYkD-sAl2cU0RJFVddRgh74ThLxHyKZcBQWo9Jh2Kxp5-Ie-v8mRXhUlNdVFJKCRAkO0dOTBt9WPmnrMDkwvArw9lhkT1myElz2x9VO9sNEN1y410atQ484ShIcLvDGc7xAq5pkxYy1D38_Dg87sB-OFiQD9sC_w7285dZ4BCWiShr7NkU4pznJioXJZG6S1Umc5eHwyULJl1b1BknSgul8cMrB_Nci29x8fFSbm4_3DyCGwd7Af5V8HFviT4qwRTDdwMPmv4pBgG-WOLnRxFFSoq9F5hSelL8di_QCcQZp_UeI780XrWpuXS_3qLZKLBE_bP4moh9a4tng5MIPhk-e_ZIv2SJBMxLiaB7waV8ucA-jd4rBnWpbt2mLHwDspf-8xRBtiB5wXc68u6UiUuBomS_5kv2U5MD-iP3Bix9h_8VcKJb8lWmc5j_lb6NmtW9W-op3iChZnOYLIsqL0MVhXP7UtAvaR5A3kn1oud3cfHzfKqnCuJysuoIWXlhNoaC_mDA3QtwSw-mtUPTZLIifDp8xRbL3MDEo6AJTdIGpiGn2U0y6l_8jPxPacAkn8wtMr-YmUB3RdqgXl6pfJ4nDh5cpmyVJnv5Ryv4h5p9msgO5LP0lsCoK0Q81uyZhhulNMO8y94a-t3Etzgeoo7qMyAzX9osTPbUrv4_o9RlFcF5GZJFg6SUGQspfwPDZt03LAI1D79snZLnpsqV9HNb5PDYCdoS-jP3MGcl9253z5vdRFHUeLlAsKi-lEUuSCrm7ABcwueoKvS-gFwfUp09_f75T0vqvQt6uEPTtiC9qKq1iTCpALeHybahZTEgFC6TlolvCRRkWMXp6EryiT_4b_tOoblv13J26252bW32i9zg8Tq94HUcB3lLjGqj1enr0k17PmgbxTZxg93cyj8MLohIWAxoMv-1CHlEWz1xIb6Mg3bgeOg2zooiRgZ1r94mvkTXphUUXbf__vu7037yIPDaANBCjpLuK1IRr8YJUteeq919zGyJqd3ei3DhbUsnV9JVRKMIMIhrQTrnG3irUcIuIHH6E2rdPvaQaCG3816ksRJQI4TvT9NaoXlNA1B2w6SbARWNt52sI3jrGuW36ak1V6x3an_636rT6x5_T20oo19y60M3W-Ib30X9NCvD2a5ENf18VPBx_SnG5-pSmPqtxuUavefG_nCFKJOOMWDCoVhq51EVROfzoJpUdf3hT0t_k7v9C96hTxgJDpFJ7WKPHhvuLHuvBfxBOb60boaptuf1D50dhvM3rOq7LPAsZyVlOixpKHJa4CAnUVZhGWQE1rX1fjw4Np42LqlWofUG9ZKCNdd5Jd7cdPLKa73agXRiVbUD7EKGO2GaNHr3cs1QHAWwHyA6emR7Gi6yGWmlAxjpNnQYD0iI-MBNkoY9VFqJKE0mb9ZzwK3YdszIuyRVcR7nbNUUWJ1fNdQhJWhSMkbAqa6hLnLOIQuTamyRNobzi1zjESViEpRtMk3Wcx2VRxUWcx0UVp1WQhNAS7nbCvl0rvbvykK-jEGdRdOW_L5vTL6762jNd9TsTJKHgxpqznOVW-N9mj0XmApfe3j98-OT25mdujNsDpu86pa1PFLd30Lh3zNub56rX4vq3g-_hmwBvRw_21_j_AgAA__9aWdbe">