<table border="1" cellspacing="0" cellpadding="8">
<tr>
<th>Issue</th>
<td>
<a href=https://github.com/llvm/llvm-project/issues/55167>55167</a>
</td>
</tr>
<tr>
<th>Summary</th>
<td>
[LoopVectorizer] Miscompile due to incorrect tail computation
</td>
</tr>
<tr>
<th>Labels</th>
<td>
new issue
</td>
</tr>
<tr>
<th>Assignees</th>
<td>
</td>
</tr>
<tr>
<th>Reporter</th>
<td>
max-quazan
</td>
</tr>
</table>
<pre>
I hope I can explain this correctly, my original repro is in Java and I can only provide a reduced test which I think is still showing the bug.
Run opt -loop-vectorize -S on the following test:
```
target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128-ni:1-p2:32:8:8:32-ni:2"
target triple = "x86_64-unknown-linux-gnu"
define void @test() #0 {
bb:
br label %bb1
bb1: ; preds = %bb3, %bb
%tmp = phi i32 [ 6, %bb ], [ %tmp6, %bb3 ]
%tmp2 = phi i32 [ 35902, %bb ], [ %tmp5, %bb3 ]
br i1 true, label %bb3, label %bb11
bb3: ; preds = %bb13, %bb11, %bb1
%tmp4 = phi i32 [ %tmp, %bb1 ], [ 9, %bb13 ], [ 9, %bb11 ]
%tmp5 = phi i32 [ %tmp2, %bb1 ], [ %tmp16, %bb13 ], [ %tmp12, %bb11 ]
%tmp6 = add nuw nsw i32 %tmp, 1
%tmp7 = icmp ult i32 %tmp, 181
br i1 %tmp7, label %bb1, label %bb8
bb8: ; preds = %bb3
%tmp9 = phi i32 [ %tmp4, %bb3 ]
%tmp10 = phi i32 [ %tmp5, %bb3 ]
store atomic i32 %tmp9, i32 addrspace(1)* undef seq_cst, align 4
unreachable
bb11: ; preds = %bb1
%tmp12 = add i32 %tmp2, undef
br i1 undef, label %bb3, label %bb13
bb13: ; preds = %bb11
%tmp14 = add i32 %tmp12, undef
%tmp15 = xor i32 undef, 1
%tmp16 = add i32 %tmp14, %tmp15
br label %bb3
}
attributes #0 = { "target-features"="+sse4.2" }
```
Some explanation: we end up storing value `%tmp9` which is effectively equal to `%tmp4` and `%tmp`. `%tmp` is a loop counter which starts from 6 and goes up to 181, so its value on the last iteration in the original loop is 181. And here is what vectorizer does:
```
; ModuleID = './reduced.ll'
source_filename = "/home/mkazantsev/work/sandbox/azlinux/jdk17/x86_64/fastdebug/lib/server/boilerplate.ll"
target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128-ni:1-p2:32:8:8:32-ni:2"
target triple = "x86_64-unknown-linux-gnu"
define void @test() #0 {
bb:
br i1 false, label %scalar.ph, label %vector.ph
vector.ph: ; preds = %bb
br label %vector.body
vector.body: ; preds = %vector.body, %vector.ph
%index = phi i32 [ 0, %vector.ph ], [ %index.next, %vector.body ]
%vec.phi = phi <4 x i32> [ <i32 35902, i32 0, i32 0, i32 0>, %vector.ph ], [ %predphi9, %vector.body ]
%vec.phi1 = phi <4 x i32> [ zeroinitializer, %vector.ph ], [ %predphi10, %vector.body ]
%offset.idx = add i32 6, %index
%0 = add i32 %offset.idx, 0
%broadcast.splatinsert = insertelement <4 x i32> poison, i32 %0, i32 0
%broadcast.splat = shufflevector <4 x i32> %broadcast.splatinsert, <4 x i32> poison, <4 x i32> zeroinitializer
%1 = add i32 %offset.idx, 4
%broadcast.splatinsert2 = insertelement <4 x i32> poison, i32 %1, i32 0
%broadcast.splat3 = shufflevector <4 x i32> %broadcast.splatinsert2, <4 x i32> poison, <4 x i32> zeroinitializer
%2 = add <4 x i32> %vec.phi, undef
%3 = add <4 x i32> %vec.phi1, undef
%4 = add <4 x i32> %2, undef
%5 = add <4 x i32> %3, undef
%6 = add <4 x i32> %4, undef
%7 = add <4 x i32> %5, undef
%predphi = select <4 x i1> zeroinitializer, <4 x i32> <i32 9, i32 9, i32 9, i32 9>, <4 x i32> %broadcast.splat
%predphi4 = select <4 x i1> zeroinitializer, <4 x i32> <i32 9, i32 9, i32 9, i32 9>, <4 x i32> %broadcast.splat3
%predphi5 = select <4 x i1> zeroinitializer, <4 x i32> <i32 9, i32 9, i32 9, i32 9>, <4 x i32> %predphi
%predphi6 = select <4 x i1> zeroinitializer, <4 x i32> <i32 9, i32 9, i32 9, i32 9>, <4 x i32> %predphi4
%predphi7 = select <4 x i1> zeroinitializer, <4 x i32> %6, <4 x i32> %vec.phi
%predphi8 = select <4 x i1> zeroinitializer, <4 x i32> %7, <4 x i32> %vec.phi1
%predphi9 = select <4 x i1> zeroinitializer, <4 x i32> %2, <4 x i32> %predphi7
%predphi10 = select <4 x i1> zeroinitializer, <4 x i32> %3, <4 x i32> %predphi8
%index.next = add nuw i32 %index, 8
%8 = icmp eq i32 %index.next, 176
br i1 %8, label %middle.block, label %vector.body, !llvm.loop !0
middle.block: ; preds = %vector.body
%bin.rdx = add <4 x i32> %predphi10, %predphi9
%9 = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %bin.rdx)
%cmp.n = icmp eq i32 176, 176
%10 = extractelement <4 x i32> %predphi6, i32 3
br i1 %cmp.n, label %bb8, label %scalar.ph
scalar.ph: ; preds = %bb, %middle.block
%bc.resume.val = phi i32 [ 182, %middle.block ], [ 6, %bb ]
%bc.merge.rdx = phi i32 [ 35902, %bb ], [ %9, %middle.block ]
br label %bb1
bb1: ; preds = %bb3, %scalar.ph
%tmp = phi i32 [ %bc.resume.val, %scalar.ph ], [ %tmp6, %bb3 ]
%tmp2 = phi i32 [ %bc.merge.rdx, %scalar.ph ], [ %tmp5, %bb3 ]
br i1 true, label %bb3, label %bb11
bb3: ; preds = %bb13, %bb11, %bb1
%tmp4 = phi i32 [ %tmp, %bb1 ], [ 9, %bb13 ], [ 9, %bb11 ]
%tmp5 = phi i32 [ %tmp2, %bb1 ], [ %tmp16, %bb13 ], [ %tmp12, %bb11 ]
%tmp6 = add nuw nsw i32 %tmp, 1
%tmp7 = icmp ult i32 %tmp, 181
br i1 %tmp7, label %bb1, label %bb8, !llvm.loop !2
bb8: ; preds = %middle.block, %bb3
%tmp9 = phi i32 [ %tmp4, %bb3 ], [ %10, %middle.block ]
%tmp10 = phi i32 [ %tmp5, %bb3 ], [ %9, %middle.block ]
store atomic i32 %tmp9, i32 addrspace(1)* undef seq_cst, align 4
unreachable
bb11: ; preds = %bb1
%tmp12 = add i32 %tmp2, undef
br i1 undef, label %bb3, label %bb13
bb13: ; preds = %bb11
%tmp14 = add i32 %tmp12, undef
%tmp15 = xor i32 undef, 1
%tmp16 = add i32 %tmp14, %tmp15
br label %bb3
}
; Function Attrs: nocallback nofree nosync nounwind readnone willreturn
declare i32 @llvm.vector.reduce.add.v4i32(<4 x i32>) #1
attributes #0 = { "target-features"="+sse4.2" }
attributes #1 = { nocallback nofree nosync nounwind readnone willreturn }
!0 = distinct !{!0, !1}
!1 = !{!"llvm.loop.isvectorized", i32 1}
!2 = distinct !{!2, !3, !1}
!3 = !{!"llvm.loop.unroll.runtime.disable"}
```
Note that vector body goes to `middle.block` which in turn has the following exit condition:
```
%cmp.n = icmp eq i32 176, 176
%10 = extractelement <4 x i32> %predphi6, i32 3
br i1 %cmp.n, label %bb8, label %scalar.ph
```
and then
```
bb8: ; preds = %middle.block, %bb3
%tmp9 = phi i32 [ %tmp4, %bb3 ], [ %10, %middle.block ]
%tmp10 = phi i32 [ %tmp5, %bb3 ], [ %9, %middle.block ]
store atomic i32 %tmp9, i32 addrspace(1)* undef seq_cst, align 4
unreachable
```
So the stored value is now `%tmp9` which is equal to `%10`. Now let's see how `%10` is computed:
```
%10 = extractelement <4 x i32> %predphi6, i32 3
%predphi6 = select <4 x i1> zeroinitializer, <4 x i32> <i32 9, i32 9, i32 9, i32 9>, <4 x i32> %predphi4
%predphi4 = select <4 x i1> zeroinitializer, <4 x i32> <i32 9, i32 9, i32 9, i32 9>, <4 x i32> %broadcast.splat3
```
So effectively it is `%broadcast.splat3`. Now, let's note that vector loop exits when `index.next` = 176, meaning that `index` on the last iteration is 176 - 8 = 168. Now,
```
%index = phi i32 [ 0, %vector.ph ], [ %index.next, %vector.body ] // = 168
%offset.idx = add i32 6, %index // 174
%1 = add i32 %offset.idx, 4 // 178
%broadcast.splatinsert2 = insertelement <4 x i32> poison, i32 %1, i32 0
%broadcast.splat3 = shufflevector <4 x i32> %broadcast.splatinsert2, <4 x i32> poison, <4 x i32> zeroinitializer
```
So effectively `%broadcast.splat3` is splatted 178.
So now we store 178 instead of 181.
</pre>
<img width="1px" height="1px" alt="" src="http://email.email.llvm.org/o/eJztWltz27YS_jXyC0Yc3kRRD3pI4nomndM-nMz0NQOSkIgaAhQClGT_-i5A8E7Kduw26dQZRyLAvWGx3P20YCKyh-1nlIsjQZ9RijkilyPDlCOVU4lSURQkVexh4X9ChwckCrqnHDNUkGMhEFAA5a_4hBHmmRUgOHtAcPdEM4IwUGZlSjKkiFTonNM0BzoQzu81u1SUMSRzcaZ8D9MEJeXeWbi3C_dD9fn_EkQeFVoyIY7LE1gDRjwStPwCmgzHTjBm-UHHIrCMi8i1f2aocLEnCmVYYYYfRKnQIrhFC98nywPwkOXRX7twEfjmQw-9_lBfRaH5WNL6YhdrJs-PlzzWF1HNA_e-mGmqp4G_vhHb_yDU3PPBhp6JqqBHRmrzLnH0FWSV_J6LM18yysvLcs_Lhqv6zMiOcoJOgmZoEbrGE3688DcgI3DRYv2xokuSxkEIJQViOCEMaFZJ4nXl6WEAJC_5twg-wsaTTFrbQWagA8dc1TphoA5HQ3HMKaKBjxarjyhqCGF4awYwWxG39wJzsyfJH4kKVhvXvyJuNS0OnEE9cH5J9P2OX4LB2Bs4KngLR3mtp0B-c9lfazhaazXf0ndXu2mng5l5b-zP1YwOf1pJddOLZnTZ2_5VlZFRibMM8fKMuDxXqpulDbywNuQ0hSgqmRrSxl5_Qy3PcA8H47i_pfHbxH7P7M2MY8Nr0e25M1wzQSwhO0LWVeJA045nzJbrITi5kEecQoyDo3yY_4BKDskDSfLta6qTxieEGd1zFNYyS14QnOY4YWSQIV6cIqYCf7BgvwmG1nwTPsbM_t5WU088rcHA6Bc_rlNGD60OJ6z2xmbbG9UzdhGFoW5WMRQaTQmtw8XImU7k9YrXt92lYwWFJSmhMtiSoBez1vHkV4VnuSNYlQWRurQEt_rT_yglCR1dotBA3KC8fhEHUmEHjhUVXDv5DDMAC8qjCUtdoE-YlVDZgMmGZeRaTABYgOx2UN3piQB-IN9KQBlKtLShptUoo5mBC6c30kIw0jgBgEvJFSmscAkLVBLtCnFAkRGyF-AGMAw06IwBLpUAZ4CmstAiC4YBs1CQY5aEaDXbgCCjCXSCBAd9AKk5gYcPJs45VqiBKgXKQNscLtHR9ZvISkY-39oAWzsL_87iJocxmKhIpSiLlHzdUUY4PjQIAYhz8D58He7xI-ZKkhMMzqK4hy8Jy03EBa7wowEPcPVndu9BRryrsAVc7GChGQHoBdeMJpqNFCdSwEUiQF0B-6qIsaWPVd7hVB9OQVLaYSb7EEKm4KPCOea92So-9GxHbzv5_CQ1kZ-m8oIVnQDin9Bopp-rc6SxK6RKT4PVmZxGIdFdRvXMHXIM8IPhcji5qD6hVjYomHDH0aJrFYvgU4guWtMi-KWSF3zSehuIqAfuxEXwyxNW6eWDis1zbfKuGAUpQlBOFYXC-6gfuudo9twnVYvdThLl0OzSKyQ1VjOO7ZC7w3LT8msOt0OaFAJnKSQNR-rMQDmkiyoDVJeEkQPharDWo6ASKoN1stbYOnxWtpEq83K3Y6Ra6tCDM-aYVc7o788P_d_a4l13SfiUS_yX-8R7hk-C73aK_xZeaSHaSKmN9ingEzzJ5U2xhfNsk_hqNU8fTNFH8_ThFP16nn41RW8f12rHIAbSNgK8KTeP9sGmrAbCT13YdPVECIytCn8Gs4KxXasfYpdVPjYn-pHmhGN71q-wByJ-crZ-ckfK4lcpW19T5o21bV6lbSK9dbw21mZ_Xn-vuuCauniIfAyG6bU6bMqvCjGI6rLEbZeDfOtRNljIW0ejXkfcw5gHmmWMOAkT6f0U-GwBm8fY6eCYXzQwcLsIsSfkmRDxKkLs1DXKnaIDT-Z82YCdBna1MqqIAYjNKi-FrlmKVVj9jHJAuHMKtVxA8aN8VFmh2yGtWHC8w0d7oF3e97wu2VUUwa4UOJ2p851kUj_4wWjzjM5RZ2r6p0Rng9rJ1_1oqFzc2-7OVqXgS1keiAM_kUcg3ov9CfYuch00eHuCDwR-qTWB8Oxe7mZO5T_e3h5sy2yXe-jHIfure98Dfz4t_70Z_t4MHzfDJ0qC39_nN-iQD4vT4hVN89bDTamYywqLlzXWn59u3nvv7733md67XsZdyVPTRv6gVKHbwYgLjVoSDGHExa4gBL7kA0_hq-RnwHoINj_jghN0powVRJUFr_uTKWR08j2IxzYxe3n7zc4G-oK8RtB3LXV05ODbpzajUlGuEbvv6VasXz_0EBsNi-9Z9TUNGNpkNIfKpj-fmdVUz2iP359RZhN4XaX6SoMrSuFpFow5RckVBQAAks2DDeobAYPDAfP5u1AEqfZMAZlenznCqE5Iepm0PVLhyHgxx3LwngS5UIVSwTNqj2omlf-LYHDfcH3AAwvmkzffS9cPK13DE0MTlUZ1Zs_dqISEcJ49IOwfCnquOQH8HRgYAQvWEqwhKG8EGAJkXmM6HCEnZVcj_bXhu_hp20U_U5dvHATdY19IS7Bd1e6N-O1mm0Rg95sPM6NBqzq96UNYwrWoTs8kqrbYZq8Dwbx660yfMlhCTTNz-is1I1qiqjPjRXFtznxM_T0nXjB1B3-1GS888am5vXU3VJ466Wi5uvr-G4ce12N2NlrN-416AJlH-633WiMI0ZnubBOgvq9dpgAFIbEzrxTcZNsg2wQbfKOoYmQL0fE_CO8_mvcKdCj8RqXObpQRlEECheQIWKV6axMpTJnNfSaEb8qCbXOljuZlBLOfe6ryMnGASJ_8A1CxX8tjIf4EGTCkUpYa992tVl60vsm3bpJFMV7Frpfu1tjbbLI0WpHYz9L1yl9nmxtTpqW2F6ANJ_ALVovQMGd1e0O3vuv7bgiFxA9XfuB43ipMkwhvEi_c-V4McJYcwHLH4CZR7G-KrTEpKfdSY13AY7K9iaWEskOMe7R8XKpcFNsDviyhXDxifmO0b431fwFte5ZF">