<table border="1" cellspacing="0" cellpadding="8">
<tr>
<th>Issue</th>
<td>
<a href=https://github.com/llvm/llvm-project/issues/56319>56319</a>
</td>
</tr>
<tr>
<th>Summary</th>
<td>
LoopVectorize miscompile caused by D126680
</td>
</tr>
<tr>
<th>Labels</th>
<td>
new issue
</td>
</tr>
<tr>
<th>Assignees</th>
<td>
</td>
</tr>
<tr>
<th>Reporter</th>
<td>
aeubanks
</td>
</tr>
</table>
<pre>
Caused by https://reviews.llvm.org/D126680 @fhahn
```
$ cat /tmp/a.ll
target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-grtev4-linux-gnu"
; Function Attrs: nofree norecurse nosync nounwind uwtable
define void @wibble(ptr nocapture readnone %arg, ptr noalias nocapture readnone %arg1, ptr noalias nocapture readnone %arg2, ptr noalias nocapture readonly %arg3, ptr noalias nocapture readnone %arg4, ptr noalias nocapture readnone %arg5) local_unnamed_addr #0 {
bb:
%tmp = load ptr, ptr %arg3, align 8, !invariant.load !0, !dereferenceable !1, !align !2
%tmp6 = getelementptr inbounds ptr, ptr %arg3, i64 1
%tmp7 = load ptr, ptr %tmp6, align 8, !invariant.load !0, !dereferenceable !3, !align !2
br label %bb8
bb8: ; preds = %bb8, %bb
%tmp9 = phi i64 [ 0, %bb ], [ %tmp13, %bb8 ]
%tmp10 = getelementptr inbounds [128 x [3 x i8]], ptr %tmp, i64 0, i64 %tmp9, i64 0
%tmp11 = load i8, ptr %tmp10, align 1, !invariant.load !0, !noalias !4
%tmp12 = getelementptr inbounds [128 x i8], ptr %tmp7, i64 0, i64 %tmp9
store i8 %tmp11, ptr %tmp12, align 1, !alias.scope !4
%tmp13 = add nuw nsw i64 %tmp9, 1
%tmp14 = icmp eq i64 %tmp13, 128
br i1 %tmp14, label %bb15, label %bb8
bb15: ; preds = %bb8
ret void
}
attributes #0 = { nofree norecurse nosync nounwind uwtable "denormal-fp-math"="preserve-sign" "no-frame-pointer-elim"="false" }
!0 = !{}
!1 = !{i64 384}
!2 = !{i64 16}
!3 = !{i64 128}
!4 = !{!5}
!5 = !{!"buffer: {index:1, offset:0, size:128}", !6}
!6 = !{!"XLA global AA domain"}
$ ./build/rel/bin/opt -passes=loop-vectorize -S -mattr=-avx512pf,-tsxldtrk,+cx16,+sahf,-tbm,-avx512ifma,-sha,+crc32,-fma4,-vpclmulqdq,+prfchw,+bmi2,-cldemote,+fsgsbase,-ptwrite,-amx-tile,-uintr,-gfni,+popcnt,-widekl,+aes,-avx512bitalg,-movdiri,+xsaves,-avx512er,-avxvnni,-avx512fp16,-avx512vnni,-amx-bf16,-avx512vpopcntdq,-pconfig,+clwb,+avx512f,+xsavec,-clzero,-pku,+mmx,-lwp,-rdpid,-xop,+rdseed,-waitpkg,-kl,-movdir64b,-sse4a,+avx512bw,+clflushopt,+xsave,-avx512vbmi2,+64bit,+avx512vl,-serialize,-hreset,+invpcid,+avx512cd,+avx,-vaes,-avx512bf16,+cx8,+fma,+rtm,+bmi,-enqcmd,+rdrnd,-mwaitx,+sse4.1,+sse4.2,+avx2,+fxsr,-wbnoinvd,+sse,+lzcnt,+pclmul,-prefetchwt1,+f16c,+ssse3,-sgx,-shstk,+cmov,-avx512vbmi,-amx-int8,+movbe,-avx512vp2intersect,+xsaveopt,+avx512dq,+sse2,+adx,+sse3 -mcpu=skylake-avx512 /tmp/a.ll
```
the following diff is the issue
```
78c79,80
< br i1 true, label %vec.epilog.middle.block, label %vec.epilog.vector.body, !llvm.loop !9
---
> %26 = icmp eq i64 %index.next10, 120
> br i1 %26, label %vec.epilog.middle.block, label %vec.epilog.vector.body, !llvm.loop !9
```
running simplifycfg + instcombine helps show the problem more clearly that the block `vec.epilog.vector.body` is only run once instead of `120 / 8` times
at head
```
define void @wibble(ptr nocapture readnone %arg, ptr noalias nocapture readnone %arg1, ptr noalias nocapture readnone %arg2, ptr noalias nocapture readonly %arg3, ptr noalias nocapture readnone %arg4, ptr noalias nocapture readnone %arg5) local_unnamed_addr #0 {
iter.check:
%tmp = load ptr, ptr %arg3, align 8, !invariant.load !0, !dereferenceable !1, !align !2
%tmp6 = getelementptr inbounds ptr, ptr %arg3, i64 1
%tmp7 = load ptr, ptr %tmp6, align 8, !invariant.load !0, !dereferenceable !3, !align !2
%wide.vec8 = load <24 x i8>, ptr %tmp, align 16, !invariant.load !0, !noalias !4
%strided.vec9 = shufflevector <24 x i8> %wide.vec8, <24 x i8> poison, <8 x i32> <i32 0, i32 3, i32 6, i32 9, i32 12, i32 15, i32 18, i32 21>
store <8 x i8> %strided.vec9, ptr %tmp7, align 16, !alias.scope !4
br label %bb8
bb8: ; preds = %bb8, %iter.check
%tmp9 = phi i64 [ 120, %iter.check ], [ %tmp13, %bb8 ]
%tmp10 = getelementptr inbounds [128 x [3 x i8]], ptr %tmp, i64 0, i64 %tmp9, i64 0
%tmp11 = load i8, ptr %tmp10, align 1, !invariant.load !0, !noalias !4
%tmp12 = getelementptr inbounds [128 x i8], ptr %tmp7, i64 0, i64 %tmp9
store i8 %tmp11, ptr %tmp12, align 1, !alias.scope !4
%tmp13 = add nuw nsw i64 %tmp9, 1
%tmp14 = icmp eq i64 %tmp13, 128
br i1 %tmp14, label %bb15, label %bb8, !llvm.loop !7
bb15: ; preds = %bb8
ret void
}
```
with patch reverted
```
define void @wibble(ptr nocapture readnone %arg, ptr noalias nocapture readnone %arg1, ptr noalias nocapture readnone %arg2, ptr noalias nocapture readonly %arg3, ptr noalias nocapture readnone %
arg4, ptr noalias nocapture readnone %arg5) local_unnamed_addr #0 {
iter.check:
%tmp = load ptr, ptr %arg3, align 8, !invariant.load !0, !dereferenceable !1, !align !2
%tmp6 = getelementptr inbounds ptr, ptr %arg3, i64 1
%tmp7 = load ptr, ptr %tmp6, align 8, !invariant.load !0, !dereferenceable !3, !align !2
br label %vec.epilog.vector.body
vec.epilog.vector.body: ; preds = %vec.epilog.vector.body, %iter.check
%offset.idx = phi i64 [ 0, %iter.check ], [ %index.next10, %vec.epilog.vector.body ]
%0 = getelementptr inbounds [128 x [3 x i8]], ptr %tmp, i64 0, i64 %offset.idx, i64 0
%wide.vec8 = load <24 x i8>, ptr %0, align 8, !invariant.load !0, !noalias !4
%strided.vec9 = shufflevector <24 x i8> %wide.vec8, <24 x i8> poison, <8 x i32> <i32 0, i32 3, i32 6, i32 9, i32 12, i32 15, i32 18, i32 21>
%1 = getelementptr inbounds [128 x i8], ptr %tmp7, i64 0, i64 %offset.idx
store <8 x i8> %strided.vec9, ptr %1, align 8, !alias.scope !4
%index.next10 = add nuw i64 %offset.idx, 8
%2 = icmp eq i64 %index.next10, 120
br i1 %2, label %bb8, label %vec.epilog.vector.body, !llvm.loop !7
bb8: ; preds = %vec.epilog.vector.body, %bb8
%tmp9 = phi i64 [ %tmp13, %bb8 ], [ 120, %vec.epilog.vector.body ]
%tmp10 = getelementptr inbounds [128 x [3 x i8]], ptr %tmp, i64 0, i64 %tmp9, i64 0
%tmp11 = load i8, ptr %tmp10, align 1, !invariant.load !0, !noalias !4
%tmp12 = getelementptr inbounds [128 x i8], ptr %tmp7, i64 0, i64 %tmp9
store i8 %tmp11, ptr %tmp12, align 1, !alias.scope !4
%tmp13 = add nuw nsw i64 %tmp9, 1
%tmp14 = icmp eq i64 %tmp13, 128
br i1 %tmp14, label %bb15, label %bb8, !llvm.loop !10
bb15: ; preds = %bb8
ret void
}
```
</pre>
<img width="1px" height="1px" alt="" src="http://email.email.llvm.org/o/eJztGk1z2zru1zgXjjwS5Q_54EPStKd3ezNv9tahRMrihvooSdlOf_0CJGVLrp0m3fTtzL5kHBskAAIgQAKQnbf8efuJ9UZwkj-TytrOzNL7Gf0CLy32UhzMXKl9PW_1DqYeE7paZTGZLeKyYlVDZvHjLL6freLw8kO6IAWzBBhs3cE7gzU8yjK9E5ZwZpliz20PROkjEFIR1SBYRB1dxwCk1L3hMJkOEVot3FskB6DMkCmhWdRkCKwGHsD9CdMgYCLfatkpMcg-ZquvQLjTVuwXkZJNf4x2TX9iCu_pA_nSN4WVbUPurdW4U6RpSy0EfGhR9NogZJ6bAj765iAbTvqDZbkSfg0uStkIsm8lxz08yBxRNOusBo6CdbbXgmjBeNMC3YwuGe77J-IJmJLM3CZMXk1JX6ZsG_UcKNNXr7l4NeVyRjdEAVZ97ZuG1YJ_ZZxrwKYQWusHv1d5jpHoQIKMEErOYaplHOUM4kZ6gtxdQzIEZzSRzZ5pyRo7dywwEwcMF1qU8N8UAn2DU0lA-SUAoFPRKycbYkcoUYvGomTZ5OBlbm5oA9FJkukq61sWoIT_0oL0pgW5JorlQqGgPM_GQY1DiOI3_eFB6LQAu_35cWs62QBN7d04kq6SbjNmywcSnyhh-OgGMOupBxNwQYedrJXEL7kAVoFzTo4IpPAhM1zASzjv8eCWeACCmuf5qcjk7C-ZTZfyrvCbnfzUX8OZAHBxIYO-yixvz0SD9U1rggBj4VYCzpM1FxbQHy1wWs5N0Xbimq6p0xUOK2n6A2nM4XITL-I9WTgGWcDZFd9GxN7VeDOfY1QmJy5EjkI2WV5MXMQw4N8YxFdjOKiiIUHgDR1krB_Hwhhc_DLvrTDhukL29cOr8wDmGy6ArGYqKruoZrbCRJOCcyloZITei8iAT2CMxE0blRruyKhrZWOFjoSS9YmjZMoIRzlVE0MvmJbgjXrC0hDTfh4dkmaLMZpeoCGXjrDpJRY8OEIvxjIpuGWEW05xoHTel3CHoetwtYaLI-ZudHVblkZYGLnQNvK78Okd16M0xOpEsdUPq__rj3uyU23OFLm_J7ytmcQ9HTEtyByKk7yXirtyR-EIib60nSVRx4wRkOQfVdt20V4UcJxAExL9SdBtcH-njxHbH5cJ7UrQKbLmqLjVTwDP6ENxTFYeMqzy-LzGD88iy5rhyFQs0OsiRdMiQOABiPZdoepefePfPEGny6I6eDivpaMtFBd1a4WfLc3O5AwD4lPU2YOWDhGx-hhZqRzcQxBh4ol2ZSPDum1XNBbnDpKLJ-VnGZh-UjaXUK9hIRLV7Z5LHTiPhu3HZEIHeN-4xcN02bmdCKMTDrTKywnGa-LsjbqibUq5C3ujDnlQy684kl_4ffgudOv4nnqPrOsjjtUBb_1I8w5ONADHtvN4zY0QburApO2enHnO-mDkapE7BxmxYGPh-WFQqlS9qSBWRtqMrAk-AgysJO14ib0TA2ddwoX73TFVePYDEeSRrnDqnjiK88jFxtQ95RBqxTELoVAHnbWtTyGDHKL5VtR82ALduB2ocQuOIVrB3nkygulJcoDKo3GOPuQN3El7fqL1gPruwwlDy0WwcwvWKxbi14alQeViYDQidfuxO_oTYexwhsAT0x0dIgfCOFgKJPl42zvq7kkD53Xkl5OXPNVwpkD0YB4_25_CAS-6Hs63eXpW7EmEta91NJe9j28zKkHKVqkWrv4d4bIsiTQEZ6UxvZjc1FP-dVasMZNmw3rpJzJkR6t7MUmEEPxz0UnV7ua15FyJeQ5l9dMtGn-FzXNo-sIV6lo7vN5wEOqGKIoG0Z-JS-R0dS2Luwt73oij9VVOQuMx3ymh09Xv1_mqEzQ0F7j9RtadkuVzUe6A5QGKK2OLts6xE6uE6gyBQ3xw3ul0Czm6JjUWToUSTEMjZCvoZRHrFCUg5YZ-qxi97JonkA1AIZww6H4goyEj7BHGEBT4QGtlDYd4UluAPoxfNemjc3x95whpT8-LSkBUfXSQ79JBwvpYG2C0Z2cF4G6CGsr1JunnqSqjzmL1U1VuNEcGKm0uOAr1faSpoGBUwh-5qfSJhm7ZCRZqZ9M2Yd61U_h06DOOAAoNFADpAKwGYDMAvlty0PIEZQMEFSdswaTvGiQN6o3NmW7W-spu3WrE_r5efnSKftLT481_yfPR3X9097_e3V_J9Ov_Rct_raw4SFuRjkEtC2x7oa34h2XsUKy8Z95-26X1_n8fdcPvf_J8o2genawbFC-e8x9O9Uu9w_Wc5h82zSU_3nxafTOvXTZBNxW4yHi_KdudTbmW895QwsVvCYn_t_oNdEneNW2P3PIrJWLyozNeyOTjkJwk9Ksxko046Zva_HGLfy2B_0I3f5Hj36GcffE2GBUAt0rbG6VruADOhe-rDv1HmftR5v6szE3i6Rn4e-vcO75N-SbdsDsrrRLbP0Crv05ffNQS9rSGKBekOP1mJPwk5K7Xajv9_cgOCuU-nwMLDNDI8BF1uv23ezr7xT0NNQAsV2myuau2VKzSbJnwOBF5uiyKTb5YL5eb9TqhBUvX_M7tn9lCSOF3Y-IQHqhSCpF1J7c0pjRepTGNF0mSzTflItnEYsPW6yKN1ynU4aJmUp1-03Knt06lvN8ZQCpp7PkHL3fM4BdxQjhxsD7rbdXqLRN9zponc-dkb53u_wEIkf2R">