<table border="1" cellspacing="0" cellpadding="8">
<tr>
<th>Issue</th>
<td>
<a href=https://github.com/llvm/llvm-project/issues/123631>123631</a>
</td>
</tr>
<tr>
<th>Summary</th>
<td>
[AMDGPU][GISel] BFI generated instead of a smaller load
</td>
</tr>
<tr>
<th>Labels</th>
<td>
backend:AMDGPU,
llvm:globalisel
</td>
</tr>
<tr>
<th>Assignees</th>
<td>
</td>
</tr>
<tr>
<th>Reporter</th>
<td>
qcolombet
</td>
</tr>
</table>
<pre>
When using GISel I end up with `bfi` instructions where SDISel uses simpler sequence of instructions.
I've attached a small reproducer and a slightly bigger one because I don't know if the fix will be exactly the same for both cases since the smallest reproducer has all relevant instructions in the same basic block whereas the bigger one does not.
In both cases, the problem stems from the fact that SDISel is able to simplify `extract_subvector (load <8 x half> %addr), high_4_half` into `load <4 x half> %add + 8`, whereas GISel lowers this sequence all the way to ISel without any simplification.
This combine may be worth putting in the generic combiner helper.
# To Reproduce #
Download the attached reproducer or copy/paste the IR at the end.
[repro.ll.txt](https://github.com/user-attachments/files/18479391/repro.ll.txt)
And run:
```bash
llc -O3 -march=amdgcn -mcpu=gfx942 -mtriple amdgcn-amd-hmcsa -global-isel=<0|1> repro.ll -o -
```
# Result #
GISel produces `bfi` instructions whereas SDISel doesn't. (Showing the result only for the smallest reproducer.)
With GISel:
```asm
s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
ds_read2_b64 v[2:5], v0 offset1:1
s_mov_b32 s0, 0xffff
s_waitcnt lgkmcnt(0)
v_bfi_b32 v2, s0, v4, v4 <--- these
v_bfi_b32 v3, s0, v5, v5 <--- these
ds_write_b64 v1, v[2:3]
s_waitcnt lgkmcnt(0)
s_setpc_b64 s[30:31]
```
With SDISel:
```asm
s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
ds_read2_b64 v[2:5], v0 offset1:1
s_waitcnt lgkmcnt(0)
ds_write_b64 v1, v[4:5]
s_waitcnt lgkmcnt(0)
s_setpc_b64 s[30:31]
```
# Note #
Small reproducer:
```llvm
define void @bla(ptr addrspace(3) %in, ptr addrspace(3) %out) {
%val = load <8 x half>, ptr addrspace(3) %in, align 8
%res = shufflevector <8 x half> %val, <8 x half> poison, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
store <4 x half> %res, ptr addrspace(3) %out, align 8
ret void
}
```
Bigger reproducer (automatically reduced):
```llvm
define amdgpu_kernel void @foo() {
bb:
%i395.pre = load <8 x half>, ptr addrspace(3) null, align 8
br label %bb374
bb374: ; preds = %bb374, %bb
%i375 = phi [1 x [2 x [1 x [4 x [1 x <4 x float>]]]]] [ zeroinitializer, %bb ], [ %i845, %bb374 ]
%i377 = phi <1 x float> [ zeroinitializer, %bb ], [ %i509, %bb374 ]
%i414 = shufflevector <8 x half> %i395.pre, <8 x half> zeroinitializer, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
%i415 = tail call <16 x float> @llvm.amdgcn.mfma.f32.32x32x8f16(<4 x half> %i414, <4 x half> zeroinitializer, <16 x float> zeroinitializer, i32 0, i32 0, i32 0)
%i446 = shufflevector <16 x float> %i415, <16 x float> zeroinitializer, <1 x i32> <i32 6>
%i509 = fmul <1 x float> %i446, %i377
%i511 = extractelement <1 x float> %i377, i64 0
%i665 = insertelement <4 x float> zeroinitializer, float %i511, i64 0
%i670 = extractvalue [1 x [2 x [1 x [4 x [1 x <4 x float>]]]]] %i375, 0, 1, 0, 0, 0
%i796 = shufflevector <4 x float> %i670, <4 x float> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
%i844 = insertvalue [1 x [4 x [1 x <4 x float>]]] zeroinitializer, <4 x float> %i665, 0, 3, 0
%i845 = insertvalue [1 x [2 x [1 x [4 x [1 x <4 x float>]]]]] zeroinitializer, [1 x [4 x [1 x <4 x float>]]] %i844, 0, 1
br label %bb374
}
; Function Attrs: convergent nocallback nofree nosync nounwind willreturn memory(none)
declare <16 x float> @llvm.amdgcn.mfma.f32.32x32x8f16(<4 x half>, <4 x half>, <16 x float>, i32 immarg, i32 immarg, i32 immarg) #0
attributes #0 = { convergent nocallback nofree nosync nounwind willreturn memory(none) }
```
</pre>
<img width="1" height="1" alt="" src="http://email.email.llvm.org/o/eJzMWEuT4jgS_jWqSwaEkWxsDhyg2Jqowz6iezbmSMh2GmtbljySzGN-_YYkXLiqqJru2T5sBAECKR_f92VKMtxacVCIa5JtSbZ74INrtVn_XmmpuxLdQ6nry_q3FhUMVqgD_PL8FSU8A6oahh5OwrVAlknZCLJMQCjrzFA5oZWFU4sG4esuWAwWLVjR9RINWPx9QFUh6OaVyZwkm2dC8yMCd45XLdbAwXZcSjDYG10PFRrgKvwsxaF18gKlOBzQgFYIJVZ8sAjPUGtFaO7gm9InEA24FqERZzgJKaFEwDOvvLH_3fIOodEGSu1aqHhM1ecXZn14tG6aQcstxKQkHrlyr4ELdXNbcisqKKWuvkVCuA2Tk6RrjRaUdh69J0BN8iD0MSzvjS4ldmAddhYao7uIiFcOXMvdSLOwwEuJ4HQkWzQXLw-eneGV29uhPGLltAFCC6l5DYQ9FnCGlsuGsL8BoRmva0PoykduxaHdp_swGdR12nsbDdN3hkDoFgqyTLz1CDeWjNQnNB67sDf9PYcex4lffMphoS8pPTjg6jJiEBX3zF4J-tW7qHRXCoXQ8YuX86SNa6EfnPNFehXggAqNqMa1BlqUPZqrG0IZ_Krhy6gqEMrizE6fVMDonbwU4kR-baDS_YXQp55bF8vk-QtwF0ao6hAi2waTuZRzd3Yk2xFatM71lrANoU-EPh2Ea4dyXumO0KfBopnFaB0qZwl9aoT0JfC0KNJ8xVYLQp9euaQrkmw2qgYzKO804lom8VVy25JkI2UFs38ymHXcVC1hO97Vh0rBrKv6gbDdoTmvUgow65wRvUSI8zPe1bO2qyyH2UHqksuZsCgJ2xH2mJD8ceFlH9OBmYbZNPiN4y9oB-lu7MZyuHJpP908uB3r2vdIaOi5r9yvrT55nT3bJrrXSl5CD3_QsvNIFkk2v_ktKyRxpWxMmdsurFjZ_YkLVykHx65SjtAiIXQFeO4n3-Th22QyGtZ2b5DXdF8uUziSbEsJ22RB-Uc4JqCbxqJbELZZjJE6fdyXjIINLZOcm6Zp3mZxL9RxXzYiWB6pt4z2xzS---6czWaeDIvvDNjEIIvvdwxquz8Z4TCCWYR1V0jMQ_qOJO3eouur4MGSbMsSb7y4Wr8ulSBLVPv_QZfPYH3ATDo6_cnM-Cb6h3aTDerrmxPxDWFSHj1jNTZ-hzxqUQNJk1JyQoveGfA7vO15hYQWzHNGaCaUh_HRrB5cGOVbkmzA_3LkEgjbwZ1D5DNHMQyX4qCgGH0Zvw2wHdh2aBqJ4wn17mQ6cumt30z0WlitrhP-SBKMBgP2KBiF0BF-kI2D5TjIfbI-B-u0wTsnmokH8KesvEZj0AXCvRz5HS238difnCWEFnxwuuNOVFzKCxj0E7Uvlc9k9bt0P-y_oVEoX0RutCa0uGlVltFJ4FmwVTbvA9IfE04NUr5FWhqQvETpHZcly9MIMA7ZBgjbQm-wjtK-LPIy-fEtpzwLK_pWAMm2Czj7Dxo_rt_SybcoUiM1dz7lbDd9-VXwBxotlHCCS_EHmpeQcG14v8ZHLtLsZY7lKcTmG7PKb1mxx8Uk5A_FyJLVhzHSRfpdZT_Kdqf276XxV5sgphTFcFxI8OUYwC9foU9DIc7jLWHeNR2fN4zOGT0zei6axdIX4LtW8mgn6X0O4HXEO2t84sm9weoGJV3eZ_cNnIj6ewNfa-E1u8sphVmyCnGbbpDvSycmdi0JX2U3u8Ui2F2v6ijR3wLvevBmHvIyheTFfrmM0gll0UzN0z-BFObGFO74zZNpXkcuB_wZjRpbP1x6_NviZXR9G-Pnqw90TN_SssyTSYX9iYrFOxVfymgxDug4YOPge_uoSNOJGG85-w6WPuns15iXNwrZa-KKNPskh7-q2728fgjYlZ6J8B-eJvEATTb-KHkaVHgygI1zxj9BQaXVEc3BV7nSfq8qefUNlG4MIihtL6oCpQd1EqoOj_0G3WAUdNhpcyG0UFph3DFqrCSPN4D_ebN7v8nd21vGqhFdx83h82_-rsGu9wfunBHl4PyNibLYmSTf_jQy4M2t5aFes3rFVvwB14uc5axY5qvsoV0XeZHTNM2KJMWiyVYlXVZJvSqTps6Lmq8exJomNEsWNFlkdMmyeUWXZcazlFUVxzTnJE2w40LOA8PaHB6EtQOuF5Qt2eIhlIMN_0tR6tGgqgnbbP6---Vf_ybU9yahNNyJ2CY-oIbnU0pJtnswaz8zK4eD9RoK6-wtjhNOhn-8rs58YW7jE2G2g-3Tc_zngDusw0Mp8hp0M_4ThSbcnR4GI9efPM6HzOLHrDf6P1g5Qp8CxPBIH1Ee1_S_AQAA___BDL6S">