<table border="1" cellspacing="0" cellpadding="8">
<tr>
<th>Issue</th>
<td>
<a href=https://github.com/llvm/llvm-project/issues/100689>100689</a>
</td>
</tr>
<tr>
<th>Summary</th>
<td>
[AArch64] Using tbl to optimize indirect memory access
</td>
</tr>
<tr>
<th>Labels</th>
<td>
new issue
</td>
</tr>
<tr>
<th>Assignees</th>
<td>
</td>
</tr>
<tr>
<th>Reporter</th>
<td>
vfdff
</td>
</tr>
</table>
<pre>
* tbl.cpp: https://gcc.godbolt.org/z/3W5e17o9f
```
#include <bits/stdc++.h>
#include <arm_sve.h>
using namespace std;
#define N 100
// Make sure the src small, which can be hold by table, here assume the -msve-vector-bits=512
#define Length 16
void foo (float dest[N], unsigned int index[N], float src[Length], int n)
{
#ifdef TABLE
svbool_t predict = svptrue_b32();
svfloat32_t srcv = svld1_f32(predict, src);
for (int j = 0; j < n; j += Length)
{
predict = svwhilelt_b32_s32(j, n);
svuint32_t indexv = svld1_u32(predict, index + j); // s 32 , 4 [0 ~ 15] , 512f
svfloat32_t tmp = svtbl_f32(srcv, indexv);
svst1_f32(predict, dest + j, tmp);
}
#else
#pragma clang loop vectorize(assume_safety)
for (int j = 0; j < n; j++) {
dest[j] = src[index[j]];
}
#endif
}
```
* The above two logic is same if we assume **-msve-vector-bits=512**, which guard the src is small enough to hold by Z register, so use tbl can save a load in loop body
</pre>
<img width="1px" height="1px" alt="" src="http://email.email.llvm.org/o/eJyMVUGPqzYQ_jXOZbQRmADhwCFsXk6v7_SqSr1Exh7AqcHINqS7h_72yoZks9G2ehFSwOOZ-ebzzGdmrWwHxJKkFUmPGza5TptybkTTbGot3kpCD-BqteXjSJIDdM6NliQHQk-EnlrOt60WtVZuq01L6Omd0FPyR4pxrouGREcSHUgWrc_ySRM5cDUJBJK81tJZQk_WCU5oRWi17Ujy7audzPRnO-ODfbJyaGFgPdqRcQTrBEmqu6_ARg4IPyCObqlXk4cOv7G_EOxkEFyHYA0H2zOlCH2Fayd5B5wNUCN0Wgmo38CxWqG3dmgQmLVTv7i-9HbGlxm50-Yl1JMc05g-4_iOQ-s6iLNHLLOWAhqtgdB9ozRzINA6klY_SHr0yaYhHJAAOTiQg8C_H4yLhzWcpNUSfjX4zQOhxZoqX0kBAPCkNgIb-Hmovn_7WLdzrbU6OxgNCskdkOQIdh6dmfBcJ5TQvQ-YVI8uAUBCzwHEvLooEZ-b4LCG8og8yCf3Rhtftsd6Ca4RSarw-grD-korb1hru9UDnwryv8-Yr51UqJxHfbYByMVDGJ4BLDVMclhKCOw-FjE9FxF2eFBwWWIFPkM3WUgo-D07IGkVwT8QpyQ9hg2vkMa0gTXxI2uuH9d8rlYraZ7Je7L5a8zWfUGx75wbuFcf-tmX5MdPfYDK4qeF0bC2Z8AVG1pQWo-wdLV8R0L3S8ufLWvQvX0cxi-d5DLbhBYPZ7d2-sXTFEgIfXzrcb_sn_8tYBDyJjI307ParFN4gJ8dAqv1jOCuGpRuJQdpwbIeQTZwvc80EOod_musg_FDJdqJGXGXEB_QqwjgoKe2A6fv-vEnGGyldWjCPGiYLHplDTpj2YzAQGnmJ33h3ssvbESZiCIp2AbLOKdxkWV5Fm26smY0KXYRLVidJnmDnBcYx0VU5wKzfN9sZEkjuotymkW7KIuibZ7XWZwlMU0KTvk-J7sIeybVVqm59_K9kdZOWMZRlO2LjWI1KhsuBkoHvEKwEkr9PWFK7_RST60lu0hJ6-xHGCedCjfK4WB4l-38Af8etNqX6zTo0clevqPvcWmQO-ix1-YNGOdo7WYyqny6aKTrpnrLdU_oySda_15Goy_o-_8U4Pm7ZMU_l_TfAAAA__99JgBp">