<table border="1" cellspacing="0" cellpadding="8">
    <tr>
        <th>Issue</th>
        <td>
            <a href=https://github.com/llvm/llvm-project/issues/64986>64986</a>
        </td>
    </tr>

    <tr>
        <th>Summary</th>
        <td>
            [AArch64][SVE] Faster unsigned narrowing to improve the parallelism degree of loop iteration processing
        </td>
    </tr>

    <tr>
      <th>Labels</th>
      <td>
      </td>
    </tr>

    <tr>
      <th>Assignees</th>
      <td>
      </td>
    </tr>

    <tr>
      <th>Reporter</th>
      <td>
          vfdff
      </td>
    </tr>
</table>

<pre>
    test: https://godbolt.org/z/1bo7TrYWj
```
void pixel_avg(uint8_t *__restrict dst, int i_dst_stride,
               uint8_t *__restrict src1, int i_src1_stride,
               uint8_t *__restrict src2, int i_src2_stride,
               int i_width, int i_height )
{
    for( int y = 0; y < i_height; y++ )
    {
        for( int x = 0; x < i_width; x++ )
            dst[x] = ( src1[x] + src2[x] + 1 ) >> 1;
 dst += i_dst_stride;
        src1 += i_src1_stride;
        src2 += i_src2_stride;
    }
}
```

this may be similar to the new **faster unsigned narrowing** feature in [gcc12](https://community.arm.com/arm-community-blogs/b/tools-software-ides-blog/posts/gcc-12)
 * **llvm**: compute 8 channels each iteration
 ```
.LBB0_12: // Parent Loop BB0_4 Depth=1
  // => This Inner Loop Header: Depth=2
  ld1b { z0.h }, p1/z, [x2, x18]
  ld1b { z1.h }, p1/z, [x4, x18]
  urhadd z0.h, p1/m, z0.h, z1.h
  st1b { z0.h }, p1, [x0, x18]
  add x18, x18, x16
 cmp x17, x18
  b.ne .LBB0_12
```
 * **gcc**: compute 16 channels each iteration
```
.L5:
  ld1b z31.b, p7/z, [x2, x1]
  ld1b z30.b, p7/z, [x4, x1]
  urhadd z31.b, p6/m, z31.b, z30.b
  st1b z31.b, p7, [x0, x1]
  incb x1
  whilelo p7.b, w1, w6
  b.any .L5
```
 

</pre>
<img width="1px" height="1px" alt="" src="http://email.email.llvm.org/o/eJyclV2PqzYTxz-NczNKhA0k5CIXYbPR80jnolKPWvUqMvYAPjIY2WaT7KevDCEvm2wrFa0WPJ75xf7PeMydU1WLuCFpTtLdjPe-NnbzUcqynBVGnjcenSfxFmrvO0fiLWF7wvaVkYXRfmFsRdj-k7A9Lczqp_3rz18k2pFoS5bR5W8YfhgloVMn1Af-URGW9ar12cEDYdvDwaLzVgkP0nnC3kC1HtRBOn8IdomEvY0YeHxeQ5wV9EYJo_-KYQ8Y9i-Y0fGopK9vcTWqqg7o9UWXVX4LLo0lLBs8z0DiHUQkzofPt2voYCEsJyy_UULwA-kL7XSjnS60cV3B8Ao2PSEBaX4i6W4gBN4g52Rj-ajL3ZgGEpD4ncTvQEk8rUq6sO08YB5yGX9ZduDfHO_T9cKTPXiyV55ktZuk3r2sxfG_r5WDhp-hQHCqUZpb8AZ8jdDiMdQCYduSO48W-nY4JRJabq05qrYap6FE7nuLoFogaV4JQRlJd4Rlj6dFmKbpW-XPC26bhTANYXtum_nVPi-0qRxh-4KwvTdGu7kzpT9yi3Ml0Q3zhO0743xwq4SYU3bLYFjLuCStP5rxKxxaYZqu9wgZiJq3LWoHyEUNyqPlXpl2in9UaPEjz6MDZQEx7gB-4xZbDz-M6SBMJrDDLhTUjk7SXzxJvAuV8DPo-_-2RTsG_Q-5RBuIUyCbArWkRShn-IwW9ZA_9gYdHRvLW1D2NJzEE82Cuk9R9Luo5DmqtzWXcvilq38TviZLoE3Ozn-zsBEfPeMDO1guE8NreZkUTQcnuprmLhHFokW4Cv6qXu-yWwnxnFy6_OfsPiU3DYV5r-JnTBfFsLfVC9G_av4ZR6-8kyfvSesrfXkVezKNrHu5H9byoPMdWbWiCJbL8FgrjdpAtxpDj0OKjsubxrw9Q9j4a32H10xuYrmO13yGG7pcs_UyYatsVm9SnsSSizhjScKKhMZ8lUgpM5pkMharcqY2LGJxlLGU0pTSbJEJISlFlHQdybhEkkTYcKUX4XSGO3OmnOtxs0zW2XKmeYHaTTew3QSnedFXjiSRVs67W5hXXg939XZrRb1MgiRp_vsf76EZ779rVqGvqaaz5gOH_tZxy7VGrVwDEiuLCKYEHU7ptXSgs0agc6qtZr3Vmy_Xv_J1X1w62dhyhte8s-YXCk_Yfthh6FXDJv8OAAD___uuWHQ">