<table border="1" cellspacing="0" cellpadding="8">
<tr>
<th>Issue</th>
<td>
<a href=http://email.email.llvm.org/c/eJzNWF1vozgU_TXkxSIyNpDkIQ9Ju9VWGmmlndW-jkxwEs8QyGBDt_n1ezGGGDAZdVcjTZo2dnx8P8-9xk2K9H37is6s5qiSPEXqLCQ6FJcLy2FSoBPPeckURzU_qKJETEp-SbL3pYefPbxr_y498nLIWH5C_mekuFTB8oD8C89ZknGf_3PlpYCZYhlMFM-lKHIJAFYezh59Lus4PNX4GmDkK1aeuEKlkAf4Fvl_UOQXRmazJcvqC_J9ve7Dj7bKT4SS_kXkIIzgcN1a1RlyKECzyGX7bV2IVMv7wtLUI2uRK-SR3c4jT8iM99b4qRvnHtkYn1f7doDgdYSYGCkCgXqEPbrXwyfY0gw9stfvzX1T83ryor3wome9addPyB7tzQR2dwqf7Wh3KUn7ZICLKffozkYNXQRPjCPu0QZ2I9fLIxR5If6otIGrQIJb88log1h-2u_xlyAaYmSWCY0JG0yLpGSIaRGAiVyrsjQSVlpC6MIkJ17dLWnltPZQtyqFGwgeLn41n8YTs7WT06VhxjNt3cgsCKzlGCPGATcm1qv4Eaa1mgUujMxUZclpdcUzmNDSNUpY0x8sOfFDXeFdl8IzmMiyeU5XeE_tGAN1aGOc9jzMapIbloYWS2do0WKCh4l2ErC3QPsZT_MnrJhqCf4YNORwK6i1Nv7vHMY2h-MJh7sEGGKtXKS51EadM83dak9x92poSOBepYaOlq2rucZlvXQPo9BPf_urOd1ec-if6FNRXNHvnKW8bEQ886tqzqKR6lpyVUMgb7ws7uHmVPt40RPF9LgabcwAtKxRvW5WoW02xm9mMJsOE04xwIll3clpkfV6YuRIVzSR01OL3vsrieIZkMXgWZDJM3GStJcU3RvtRFJXca2MLp9DCP-uP23iuQ8QoKjt4MrZ_vn36UE0aN9QCGPuD9k35vUoZLFLrbN6Jq0yswNGH50S9gkwjkI66JTubjo8bZwFKavEctrt-rCXdU6bOAY_rsufVpZH8pG6jH-NurSO0YCsf1yX8yArZRMmOspyIqg_CO0qCT5al311zT4PdaR6wHRiPaPQGUxgMX0Og62KoQ-ZvrKZbnwIfyqXszdjbNge00ATPKFJD4p6UODi0ttdkvsxSU61kXlSWvGfa_BW-OcgFiH9YAZjZWj8vOOiYzhIUNSEd7ir5Mp8cVbqKhsWkhd4n4o0KTK1LMoTzG7wGxzXxS1a3Zh9e0KvEq7CvOTQvN_RG3tv7sLFVYmLuPHukpzy7noM91mPvqBrxpnk6MyzaytmkW5puqEbtmCVOhflVgpw-JtkVcnqRVVm25F1Qp2rZAn3b5g0V13z4V_L4itcdWEqpKy4hEFEIxIsztvjcRWt0-SY0k3IV8kRh-k6ijFJE0yTdUQWGUt4Jrdwq4RL5eL_qxRbggm8cRAEBNNgSTg7xuGBxIdVtDmQI9wX-YWJbNnIaSK9KLdaZFKdJCxmQip5X4RbrDjlnHcWKqEyvv3z9fOT_zeCuBXmfw_ipgPdBj4vVJ-PdKEt3Grz_gU4LRpq>53521</a>
</td>
</tr>
<tr>
<th>Summary</th>
<td>
RISC-V auto vectorization code not optimized
</td>
</tr>
<tr>
<th>Labels</th>
<td>
</td>
</tr>
<tr>
<th>Assignees</th>
<td>
</td>
</tr>
<tr>
<th>Reporter</th>
<td>
siddiksaurav
</td>
</tr>
</table>
<pre>
I have used this command to generate vector assembly.
./clang -S test1.c -menable-experimental-extensions -march=rv64gv0p10 -target riscv64 -O3 -o test1.s -mllvm --riscv-v-vector-bits-min=2048
test1.c contains
void test_add(int *A, int *B, int *C, int n)
{
for (int i = 0; i < n; i++)
C[i] = A[i] + B[i];
}
generated assembly code:
test_add(int*, int*, int*, int): # @test_add(int*, int*, int*, int)
blez a3, .LBB0_15
slli a4, a3, 32
li a5, 32
srli a7, a4, 32
bgeu a3, a5, .LBB0_3
li t0, 0
j .LBB0_13
.LBB0_3:
slli a4, a7, 2
add a5, a2, a4
add a6, a0, a4
add t0, a1, a4
sltu a6, a2, a6
sltu a4, a0, a5
and a6, a6, a4
sltu a4, a2, t0
sltu a5, a1, a5
and a4, a4, a5
or a4, a6, a4
li t0, 0
bnez a4, .LBB0_13
li a4, 1
slli a4, a4, 32
li t1, 64
addi a6, a4, -64
bgeu a3, t1, .LBB0_6
li t0, 0
j .LBB0_10
.LBB0_6:
and t0, a7, a6
mv t2, t0
mv a5, a2
mv a4, a1
mv a3, a0
.LBB0_7: # =>This Inner Loop Header: Depth=1
vsetvli zero, t1, e32, m1, ta, mu
vle32.v v8, (a3)
vle32.v v9, (a4)
vadd.vv v8, v9, v8
vse32.v v8, (a5)
addi a3, a3, 256
addi a4, a4, 256
addi t2, t2, -64
addi a5, a5, 256
bnez t2, .LBB0_7
beq t0, a7, .LBB0_15
andi a3, a7, 32
beqz a3, .LBB0_13
.LBB0_10:
mv a3, t0
addi a4, a6, 32
and t0, a7, a4
slli a5, a3, 2
add a6, a0, a5
add a4, a1, a5
add a5, a5, a2
sub a3, a3, t0
li t1, 32
.LBB0_11: # =>This Inner Loop Header: Depth=1
vsetvli zero, t1, e32, mf2, ta, mu
vle32.v v8, (a6)
vle32.v v9, (a4)
vadd.vv v8, v9, v8
vse32.v v8, (a5)
addi a6, a6, 128
addi a4, a4, 128
addi a3, a3, 32
addi a5, a5, 128
bnez a3, .LBB0_11
beq t0, a7, .LBB0_15
.LBB0_13:
slli a3, t0, 2
add a2, a2, a3
add a1, a1, a3
add a0, a0, a3
sub a3, a7, t0
.LBB0_14: # =>This Inner Loop Header: Depth=1
lw a4, 0(a0)
lw a5, 0(a1)
addw a4, a4, a5
sw a4, 0(a2)
addi a2, a2, 4
addi a1, a1, 4
addi a3, a3, -1
addi a0, a0, 4
bnez a3, .LBB0_14
.LBB0_15:
ret
https://godbolt.org/z/1f8oz57za
Is there any way to optimize this code generation? please help
</pre>
<img width="1px" height="1px" alt="" src="http://email.email.llvm.org/o/eJzNV1uPozYU_jXkxSICG0jykIfJTEcdaaVK3aqvKwNO4q0DWWzYTn59D8YQAyarWWmlMpfY8edz_c4xTsv8ff-GzrRhqJYsR-rMJcrKy4UWMCnRiRWsooqhhmWqrBCVkl1S8b72ghcveOr-rz38mglanJD_GSkmVbjOkH9hBU0F89m_V1ZxmCkqYKJYIXlZSADQKjt75KVqkujUBNcwQL6i1YkpVHGZwbfI_4MgvzQy2y1CNBfk-3rdhx9tlZ9yJf0LL0AYDqJtZ1VvSFaCZl7I7tum5LmW94XmuYe3vFDIw09PHn5GZnywxs_9uPDwzvi8OXQDBM8RYmKkcATqUeCRgx4-w5Z26OGD_t3dN7XPsxcfuBe_6E1PwwQf0MFMYHev8MWOdp-SfEgGuJgzjzzZqLGL4IlxxD3awW7kejxMkBcFH5U2chVIcGs_KWkR60-HQ_AljMcYKQTXmKjFdEiCx5gOAZjYtSorI2GjJUQuTHpi9d2STk5nD3GrUkELCcaLX82n8cRs7eX0aVjwTFs3MQsCazlGsXHAjUn0avAI01lNQxdGClVbcjpdyQImsnRNEtb2B0tO8lBXdNelggVMbNm8pCu6p3aKgTq0MU57HmY1LQxLI4ulC7ToMOHDRDsJOFig_Uzm-eNWTLUEfwoac7gT1Fmb_DyHA5vDyYzDfQIMsTYu0lwao86Z5n51oLh7NTIkcK8SQ0fL1s1S47Ie3cMI9NPf_mpPt7cC-if6VJZX9DujOataES_sqtqzaKK6kUw1EMgbq8p7uBnRPl70RFE9ricbBYDWDWq27Sq0zdb43QJm12OiOQY4sW56OR2y2c6MnOiKZ3IGapF7f8VxsgCyGLwIMnnGTpIOkuJ7o51J6iuuk9Hncwxh3_SnTTz3AQIUtR3cONs_-zY_iEbtGwphyv0x-6a8noQscal1Vs-sVQo7YOTRKWGfANMo5KNO6e6m49PGWZCyTi2n3a6Pe1nvtIlj-OO6_GVlecQfqcvk_1GX1jEa4u2P63IZZKVsxkRHWc4EDQehXSXhR-tyqK7F96GeVA-Yjq13FLKACS2mL2ECq2LIQ6ZvbKYbH6JfymXx3Rgbdcc00CSY0WQAxQModHHp-12S-zVJzrXhZVJa8V9q8Fb4lyAWIf1wAWNlaPq-46JjNEpQ3IZ3vKtiynxxVuoqWxbiV_g9lXlaCrUuqxPMbvAXHrflLd7cqH17Qm8SrsKsYtC839F3-t7ehcur4hd-Y_0lOWf99Rjusx55RVfBqGTozMS1E7PK9yTfkR1dKa4E2__59vnZ_xvRGqR1V1d-07s7aUWpBiX5qq7EfmI8V-c6XcP1HCbtTdh8-Neq_AriYMqlrJmEQUxiHK7O-5hmu-gYp3GQb0jGkmMW4e0W42CbRzuc4ZWgKRNyD5dOuHOu-B4HsIiDMAxxQMI1ZvSYRBlOsk28y_ARLoPsQrlYt4rbMK6qvbYhrU8SFgWXSt4X4YrKTwVjvXxw_VxWe8kh8f9IWle0WWmb99rg_wAyIgSX">