<table border="1" cellspacing="0" cellpadding="8">
<tr>
<th>Issue</th>
<td>
<a href=https://github.com/llvm/llvm-project/issues/71056>71056</a>
</td>
</tr>
<tr>
<th>Summary</th>
<td>
[LV] Adjust the pipeline to eliminate unnecessary sext instructions.
</td>
</tr>
<tr>
<th>Labels</th>
<td>
new issue
</td>
</tr>
<tr>
<th>Assignees</th>
<td>
</td>
</tr>
<tr>
<th>Reporter</th>
<td>
erickq
</td>
</tr>
</table>
<pre>
I was working on a case recently, and when I specified -march=armv8-a+sve, I wanted to be able to generate a sve assembly, but I couldn't vectorize it because the cost model thought it would require a lot of scev checks.
```
typedef struct {
int subtype;
struct {int i, j, k;}dim;
struct {int box, i, j, k, jStride, kStride;double * __restrict__ ptr;}read,write;
} __attribute__((aligned(64))) blockCopy_type;
typedef struct {
int global_box_id;
struct {int i, j, k;}low;
int dim;
int ghosts;
int jStride,kStride,volume;
int numVectors;
double ** __restrict__ vectors;
} box_type;
typedef struct {
double h;
int active;
int num_ranks;
int my_rank;
int box_dim;
int box_ghosts;
int box_jStride,box_kStride,box_volume;
int numVectors;
int tag;
struct {int i, j, k;}boxes_in;
struct {int i, j, k;}dim;
int * rank_of_box;
int num_my_boxes;
box_type * my_boxes;
double ** __restrict__ vectors;
int allocated_blocks;
int num_my_blocks;
blockCopy_type * my_blocks;
struct {
int type;
int allocated_blocks[3];
int num_blocks[3];
blockCopy_type * blocks[3];
} boundary_condition;
double dominant_eigenvalue_of_DinvA;
int must_subtract_mean;
double * __restrict__ RedBlack_FP;
int num_threads;
int use_offload;
int um_access_policy;
double *chebyshev_c1, *chebyshev_c2;
int Krylov_iterations;
int CAKrylov_formations_of_G;
int vcycles_from_this_level;
} level_type;
void residual(level_type * level, int res_id, int x_id, int rhs_id, double a, double b);
void residual(level_type * level, int res_id, int x_id, int rhs_id, double a, double b){
int block;
if (level->use_offload) {
// device_residual(level, res_id, x_id, rhs_id, a, b);
}
else {
#pragma omp parallel for private(block) if(level->num_my_blocks>1) schedule(static)
for(block=0;block<level->num_my_blocks;block++){
const int box = level->my_blocks[block].read.box;
const int ilo = level->my_blocks[block].read.i;
const int jlo = level->my_blocks[block].read.j;
const int klo = level->my_blocks[block].read.k;
const int ihi = level->my_blocks[block].dim.i + ilo;
const int jhi = level->my_blocks[block].dim.j + jlo;
const int khi = level->my_blocks[block].dim.k + klo;
int i,j,k;
const int jStride = level->my_boxes[box].jStride;
const int kStride = level->my_boxes[box].kStride;
const int ghosts = level->my_boxes[box].ghosts;
const double h2inv = 1.0/(level->h*level->h);
const double * __restrict__ x = level->my_boxes[box].vectors[ x_id] + ghosts*(1+jStride+kStride);
const double * __restrict__ rhs = level->my_boxes[box].vectors[ rhs_id] + ghosts*(1+jStride+kStride);
const double * __restrict__ beta_i = level->my_boxes[box].vectors[6] + ghosts*(1+jStride+kStride);
const double * __restrict__ beta_j = level->my_boxes[box].vectors[7] + ghosts*(1+jStride+kStride);
const double * __restrict__ beta_k = level->my_boxes[box].vectors[8] + ghosts*(1+jStride+kStride);
double * __restrict__ res = level->my_boxes[box].vectors[ res_id] + ghosts*(1+jStride+kStride);
for(k=klo;k<khi;k++){
for(j=jlo;j<jhi;j++){
for(i=ilo;i<ihi;i++){
int ijk = i + j*jStride + k*kStride;
double Ax = ( -b*h2inv*( ( 0.0833333333333333333)*( + beta_i[ijk ]*( 15.0*(x[ijk-1 ]-x[ijk]) - (x[ijk-2 ]-x[ijk+1 ]) ) + beta_i[ijk+1 ]*( 15.0*(x[ijk+1 ]-x[ijk]) - (x[ijk+2 ]-x[ijk-1 ]) ) + beta_j[ijk ]*( 15.0*(x[ijk-jStride]-x[ijk]) - (x[ijk-2*jStride]-x[ijk+jStride]) ) + beta_j[ijk+jStride]*( 15.0*(x[ijk+jStride]-x[ijk]) - (x[ijk+2*jStride]-x[ijk-jStride]) ) + beta_k[ijk ]*( 15.0*(x[ijk-kStride]-x[ijk]) - (x[ijk-2*kStride]-x[ijk+kStride]) ) + beta_k[ijk+kStride]*( 15.0*(x[ijk+kStride]-x[ijk]) - (x[ijk+2*kStride]-x[ijk-kStride]) ) ) + 0.25*( 0.0833333333333333333)*( + (beta_i[ijk +jStride]-beta_i[ijk -jStride]) * (x[ijk-1 +jStride]-x[ijk+jStride]-x[ijk-1 -jStride]+x[ijk-jStride]) + (beta_i[ijk +kStride]-beta_i[ijk -kStride]) * (x[ijk-1 +kStride]-x[ijk+kStride]-x[ijk-1 -kStride]+x[ijk-kStride]) + (beta_j[ijk +1 ]-beta_j[ijk -1 ]) * (x[ijk-jStride+1 ]-x[ijk+1 ]-x[ijk-jStride-1 ]+x[ijk-1 ]) + (beta_j[ijk +kStride]-beta_j[ijk -kStride]) * (x[ijk-jStride+kStride]-x[ijk+kStride]-x[ijk-jStride-kStride]+x[ijk-kStride]) + (beta_k[ijk +1 ]-beta_k[ijk -1 ]) * (x[ijk-kStride+1 ]-x[ijk+1 ]-x[ijk-kStride-1 ]+x[ijk-1 ]) + (beta_k[ijk +jStride]-beta_k[ijk -jStride]) * (x[ijk-kStride+jStride]-x[ijk+jStride]-x[ijk-kStride-jStride]+x[ijk-jStride]) + (beta_i[ijk+1 +jStride]-beta_i[ijk+1 -jStride]) * (x[ijk+1 +jStride]-x[ijk+jStride]-x[ijk+1 -jStride]+x[ijk-jStride]) + (beta_i[ijk+1 +kStride]-beta_i[ijk+1 -kStride]) * (x[ijk+1 +kStride]-x[ijk+kStride]-x[ijk+1 -kStride]+x[ijk-kStride]) + (beta_j[ijk+jStride+1 ]-beta_j[ijk+jStride-1 ]) * (x[ijk+jStride+1 ]-x[ijk+1 ]-x[ijk+jStride-1 ]+x[ijk-1 ]) + (beta_j[ijk+jStride+kStride]-beta_j[ijk+jStride-kStride]) * (x[ijk+jStride+kStride]-x[ijk+kStride]-x[ijk+jStride-kStride]+x[ijk-kStride]) + (beta_k[ijk+kStride+1 ]-beta_k[ijk+kStride-1 ]) * (x[ijk+kStride+1 ]-x[ijk+1 ]-x[ijk+kStride-1 ]+x[ijk-1 ]) + (beta_k[ijk+kStride+jStride]-beta_k[ijk+kStride-jStride]) * (x[ijk+kStride+jStride]-x[ijk+jStride]-x[ijk+kStride-jStride]+x[ijk-jStride]) ) ) );
res[ijk] = rhs[ijk]-Ax;
}}}
}
}
}
```
</pre>
<img width="1px" height="1px" alt="" src="http://email.email.llvm.org/o/eJy8WV1v6jwS_jXmxgIRp6FwwQUpb1dHuxerXem9jRzHECdOzLGdtOyvX9lJyDdNux9HlGPsyTPPPJ4ZuwUrxa45pUfg-cA7r3ChYyGPVDKS_l6FIroff8EPrOCHkCnLr1DkEEOCFYWSEpprfgfoDeI8gh8xzeEvqG6UsAujEVxnWJIYuGcss3K_xgD5qqTG3EDmmkZQCxhSiENOzfBKcyqxphBDVVKIlaJZWDkICw1_QSIKHuUAvWpYUqKFZP-ikGkYUoILRaGOKSRCaZiJiHKoY1FcY20sPsyTUNLfBZMGnwsNxQUqQktIYkpStQHbM9ieeu-7bf2yH_X9RiN6gUrLgmgIXv1qHkKWa6iK0BgAt5ltzcwyM1Ek5i01Jq_niGWt7cA4FJ_GsvuMGf1TSxZZAdN66PqRKIx6AJ1gEEiqtGREBwG8aVn5kRRHAL19SKZbcuD1DIMAay1ZWGgaBADtAdpjbpIhAmi_ewHoUL1gyAVJ38TtHvQCHL8_F-jKRYh5EIrPgEWzoY904uKjY2xMespZ5FgorQZWrVrpY1QKXmR0YJgX2Z82mboIrawjZcuBsdHSBPVTcWpX8YAWJpqVE1wDifN0GGx2t9ODWcOqn2bN7KRiZqFVzXxKe5-Wq2cWNL4uroVQfFIVsHx5VnTD6no1u2WUCMTFZNqEfNk9sO46S83u2adH6z9Jie4-ci4I1jQKbB0NZWooDdf6Rfdg1jcbi9XM1HvQa0hwhpDnu8A7j-wMsxmLCXJzWFV1FHmE5T0gIo-YZiKfrJOHxJHIWI5zHVB2pXmJeUHNfp5ZXp6GiV8oHZjWKzHRQUZxPlnE_e36B418jkkavP99cstM5Do2jXModdeqUIbVhQscDUgVWYAJoUoFN8EZuU9SIjEN7yqmZUAck9j9KTSA_Ku8c1EGTJsDkol8mEZvp9riImRWWRjJ_jIwK8mdcKqCixQmRKYCTkvKe73Mzkx1MwhLwcwhqlhUYA7QvjW1IldY5uDKtTEzfb7-9NkZy7hZqdXAnXFoDp3G7__L3-tEEtiEHqfHBTY81sD9o5sD6NAWIEDvAL2bSopoyQgNhkEYBi3jhm3L1FLsaWFLqRlSrmjXm3uT-JphKLIbvGGJOaccXoSEN8lKrClA-yoedIDs0o1g0H_-cIyJIjGNCm4eUxprRgyRyvVFyAeYe94C16_Hb3OQtQHy7evQa1JE5Eo3hw8Ebp19BqVF8GoE77wxNbnp9_UuCONiKQibgUiWQyQzEOlyiHQukJgtgIhYtmEQIN8EPhfPYqTEIiWzSOlipNQipQOk5hw3x_hc3PX1Y-zHnsieb7beO2-Sx_V3kugykPQpSHVD-gpjdI9qIJpLHWJ5aUGczdY2hbbyYoBO3Q-HeZjRCTZRK31izY3E86vm4p3tntR8zRVm7wDkP657_uOq16HxnIOMv5Snw6JubT_hsVSUkGocTOToHKfdz1RZxiNZzuP1f8kjXc5j_5_szmyW0G9lCf0vZEl1RJnjqWpB5nBKY2ZH41Oosk6Ae65aXwLct8RaJ2ProRMG3HPVexlw35h9jD17rG6DSbUtVfNOADo9Gp9pnACdpprT4_Z4qsofoD1chwCdbJepZLKT28127w7_2V_maxO_rhXg-ZaKd66XHM-0KTP8rNbWjlld15-s4QGuYccA9QyQ78Daqvrpu2rXp901608cAuT3XK6nHSYLYmvy6YsA2-3pR9pOznkfGM3GvIiICXySyvoJkXSBDOliGSYs21qc9z4wmpVhEZFahrHteopIRWa7QV7td0FtmPttrzx6G9RbGkl_gv3amdzbqcm10wfz5zZ3kl86x2-oyZjfF3va5ZdO8Rt6aPklLb-qqHuz3brtsWq7vDPVWoZZ3-D4E_1gkstQq2SRVuOz5yvBGn7fVC2dVC39SrV0oWrpd1RL5ysgXVQBLaulZdDw-1ktVOHO1qtdfsp4AuBpmx4Bfp_rTO1W0M9ycgLgaS8dAS6v4d7la1zM7fJcfk4AzB35I6xFlT15PZyn-YWu3672Gejl9d691E4Ufrv8ROGFPWCMtagL9BzMtIMO9BdV9u3OMAM9W2-Pn8EdWtpfPaq7hb1Gy7idWJ_6f1oCr-f6Nfob3GPUDvpfF1bvq-joRgf3gFf06OwO-wPaO85-FR-9w-Gydwh6uYRhtNtf3Mg5bN2tt8MvePeK6Iod0Ra5jrNFDkJ7d7-5ODuM6JZcdi-Xw2HrgJctzTDjG87LbCPkdcWUKujx1dl6uxXHIeXKfq-LUE4_oF0ECAHvvJJH88w6LK4KvGw5U1q1KJppbr8Q_tufRqNTlBRK2-9Wb-xGOcvtl7WUs4zlWFNY5DklVCks71DRTw1ZXn0hwUSuNqtC8mOs9U0Bt_676JXpuAg3RGQAvRu39X_rmxQJJRqgd0tWAfRug_l3AAAA__-JNxDu">