<table border="1" cellspacing="0" cellpadding="8">
<tr>
<th>Issue</th>
<td>
<a href=https://github.com/llvm/llvm-project/issues/57776>57776</a>
</td>
</tr>
<tr>
<th>Summary</th>
<td>
Loop unroll pass creates 2 (near) identical loop blocks
</td>
</tr>
<tr>
<th>Labels</th>
<td>
loopoptim
</td>
</tr>
<tr>
<th>Assignees</th>
<td>
</td>
</tr>
<tr>
<th>Reporter</th>
<td>
RKSimon
</td>
</tr>
</table>
<pre>
https://godbolt.org/z/d9aEM1P1r
```
using T = long long;
const int size = 58;
T a[size], b[size], c[size];
void test () {
for (int i = 0; i < size; i++) {
a[i] = b[i] << c[i];
}
}
```
clang-g0 -O2 -march=sandybridge
```
; ModuleID = '/app/example.cpp'
@a = dso_local local_unnamed_addr global [58 x i64] zeroinitializer, align 16
@b = dso_local local_unnamed_addr global [58 x i64] zeroinitializer, align 16
@c = dso_local local_unnamed_addr global [58 x i64] zeroinitializer, align 16
; Function Attrs: mustprogress nofree norecurse nosync nounwind uwtable
define dso_local void @_Z4testv() local_unnamed_addr #0 {
entry:
br label %vector.body
vector.body: ; preds = %vector.body.1, %entry
%index = phi i64 [ 0, %entry ], [ %index.next.1, %vector.body.1 ]
%0 = getelementptr inbounds [58 x i64], ptr @b, i64 0, i64 %index
%wide.load = load <2 x i64>, ptr %0, align 16, !tbaa !6
%1 = getelementptr inbounds [58 x i64], ptr @c, i64 0, i64 %index
%wide.load11 = load <2 x i64>, ptr %1, align 16, !tbaa !6
%2 = shl <2 x i64> %wide.load, %wide.load11
%3 = getelementptr inbounds [58 x i64], ptr @a, i64 0, i64 %index
store <2 x i64> %2, ptr %3, align 16, !tbaa !6
%index.next = or i64 %index, 2
%4 = icmp eq i64 %index.next, 58
br i1 %4, label %for.cond.cleanup, label %vector.body.1, !llvm.loop !10
vector.body.1: ; preds = %vector.body
%5 = getelementptr inbounds [58 x i64], ptr @b, i64 0, i64 %index.next
%wide.load.1 = load <2 x i64>, ptr %5, align 16, !tbaa !6
%6 = getelementptr inbounds [58 x i64], ptr @c, i64 0, i64 %index.next
%wide.load11.1 = load <2 x i64>, ptr %6, align 16, !tbaa !6
%7 = shl <2 x i64> %wide.load.1, %wide.load11.1
%8 = getelementptr inbounds [58 x i64], ptr @a, i64 0, i64 %index.next
store <2 x i64> %7, ptr %8, align 16, !tbaa !6
%index.next.1 = add nuw nsw i64 %index, 4
br label %vector.body
for.cond.cleanup: ; preds = %vector.body
ret void
}
attributes #0 = { mustprogress nofree norecurse nosync nounwind uwtable "frame-pointer"="none" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="sandybridge" "target-features"="+avx,+crc32,+cx16,+cx8,+fxsr,+mmx,+pclmul,+popcnt,+sahf,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87,+xsave,+xsaveopt" }
```
```
test(): # @test()
# %bb.0: # %entry
movl $2, %eax
leaq b(%rip), %rcx
leaq c(%rip), %rdx
leaq a(%rip), %rsi
.LBB0_1: # %vector.body
# =>This Inner Loop Header: Depth=1
vmovdqa -16(%rcx,%rax,8), %xmm0
vmovdqa -16(%rdx,%rax,8), %xmm1
vpsllq %xmm1, %xmm0, %xmm2
vpshufd $238, %xmm1, %xmm1 # xmm1 = xmm1[2,3,2,3]
vpsllq %xmm1, %xmm0, %xmm0
vpblendw $240, %xmm0, %xmm2, %xmm0 # xmm0 = xmm2[0,1,2,3],xmm0[4,5,6,7]
vmovdqa %xmm0, -16(%rsi,%rax,8)
cmpq $58, %rax
je .LBB0_3
# %bb.2: # %vector.body.1
# in Loop: Header=BB0_1 Depth=1
vmovdqa (%rcx,%rax,8), %xmm0
vmovdqa (%rdx,%rax,8), %xmm1
vpsllq %xmm1, %xmm0, %xmm2
vpshufd $238, %xmm1, %xmm1 # xmm1 = xmm1[2,3,2,3]
vpsllq %xmm1, %xmm0, %xmm0
vpblendw $240, %xmm0, %xmm2, %xmm0 # xmm0 = xmm2[0,1,2,3],xmm0[4,5,6,7]
vmovdqa %xmm0, (%rsi,%rax,8)
addq $4, %rax
jmp .LBB0_1
.LBB0_3: # %for.cond.cleanup
retq
# -- End function
a:
.zero 464
b:
.zero 464
c:
.zero 464
```
Prior to loopunroll we had the more useful:
```
@a = dso_local local_unnamed_addr global [58 x i64] zeroinitializer, align 16
@b = dso_local local_unnamed_addr global [58 x i64] zeroinitializer, align 16
@c = dso_local local_unnamed_addr global [58 x i64] zeroinitializer, align 16
define dso_local void @test() local_unnamed_addr {
entry:
br label %vector.body
vector.body: ; preds = %vector.body, %entry
%index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ]
%0 = getelementptr inbounds [58 x i64], ptr @b, i64 0, i64 %index
%wide.load = load <2 x i64>, ptr %0, align 16
%1 = getelementptr inbounds [58 x i64], ptr @c, i64 0, i64 %index
%wide.load11 = load <2 x i64>, ptr %1, align 16
%2 = shl <2 x i64> %wide.load, %wide.load11
%3 = getelementptr inbounds [58 x i64], ptr @a, i64 0, i64 %index
store <2 x i64> %2, ptr %3, align 16
%index.next = add nuw i64 %index, 2
%4 = icmp eq i64 %index.next, 58
br i1 %4, label %for.cond.cleanup, label %vector.body
for.cond.cleanup: ; preds = %vector.body
ret void
}
```
</pre>
<img width="1px" height="1px" alt="" src="http://email.email.llvm.org/o/eJztWUtv2zgQ_jXyhbChh_XIwYc6abHFtthit6e9BJRE22wlSiEpJ-2v3xmKlijHbtxsCnQXNVJpKA7nQX7DGbJ5U35Z7bRulRe98sI38Ldtyryp9KKRW2h9hX_lFX39PvgQSM-_8fxX9pn49s80O8XFlnwkXnRDqgZIfHjR2h1SNEJpwoUmin9lhjXOBp6PhHrxGnu8-MYLr0k-bRZOcyp33_CSaAayvTDzwivipbafwG_TSPyOarnR6cNwQ14bO0zLC9fm72gs_tAqDkrN2HxsXKOAwrZRCPzGkV56Y00ciOmEFRUV2_nWJ_M_QjKvqSx2oEBRUX7JJS-37OQo1PO-KbuKve0N8sIUVoi2LTzZA63bii0KbKWTxVr61LCXqrmtmoJWxDxvOyFozcpbWpaSbKsmhx5wKc7IA-HJEj39ymTDBdecVjBbEpcCqK0gQTIIz3-k8OLHCR_m9E0nCs0bQV5pLTEWSN0p3cpmK5lSRDQbyRi8JCs6qZBSX0QBr07cc1GS7l7TvLJrVrINF8yx2AAUPLn9e4kw3VuYnnDECyN_hCATWn7BwLSwyiWpaM7A0TDes0I3cpFDAE9iwfkMXqBvrWSlsmBxhy0CnBH41quxOqANDrEHM6DdcZxMnFgIG4eb2KDEjsOIhWAPehA6UWTYRwW-Eb5lmlWsBnmtlrAt5DCZaOhkEVEadiPIkEZr_ANx0OyIvuclW1QNLe1OZIjr0MqLXg_ywIoJIIzVgc4pxXfiiAyeZW3xXdYGwdP2BhfaGxpRaldNJU302UVy9DsComc5TJ90WAEg2GOjQsfH6EIfR8AZW2GLnyiEgaHDvTRMvKhbwu4mnEYEskMiGoOMB2YUfh_iDdLIAhJYuSgqRkXXTjpPRFVQVfsa5rZpsRH4Z4IU2J8IU8eP-IXjpvf-BBwXF8AxvnCpkhcOn7NGB8ElZicXmp1eEEXDZjexwRGSvXAkTXw_E06p42z23eFkpxCSERHdPRHq_lFkLb8jHT2KmgvBLpk2WfO4hjJPCjma5x2kUpsvUUy6fl7KBhHhRkIGnrdQLGgsE0IQCE_RCAYvZKi5mFdsS6t5b-scFlzvBk7fsolmriXUYlAKQ0XnMGjZHUQpTYvPczBT95LybrNhcm4q0QN7Znk1lYCdedF2Q5dbIE6YNozqDjwfOKGepXtcMCAKWUShJR8MEgyV9cTmQcmeqms7oC2quqss3bSF0D2t6G5jKcUGIhyoaKCWJjgO9MAxsDxkqSUU3TOHbFptPDtTNx81sZ7qyykDrTDCAHI-9mPwcxjn-cIfuI7KnrrZI46X4aHKoUPaAuTeQeGP8mLJW5Ta88hiylOc4imnPPQUj-I9z-Ldeu3fBo6NJ_OAyc8Q7R93XJG3QjBJ3mGe-Y3REvALo29Yq_E4MexFe_CvvKNkjoufWdtxzmNJkchGax7q2j8_rPzGsFFbq6rqbvjqyB3I0OHddZvSzH2UucJG0vjcExDqpjNe40ohlvr3WF9eoHz0r4UdQJT3RvvSP2Pp-PVgiH8wJARDkDdwDQmvjYR4jRUEpkkMuNQ10U6ro2ucYQDD8QzbYVC_oF_L-DBNcgTpJ0Z69ETHkA9Pw8nNUxEkJQMhZD2g6MZg8TyUnoOjXyD6oSB6EkGQ1g2ClicABMXxYf9xNqPIQc-jZD4m6zsXTPM5eQ0JdmPP1DZpO-fYBZ7MyTKxlUR-vqs433WUCD5IDqcA3RAsuTshm6oi94zsoBTUOwb7O9RKnWIbyGrR6SusX5ck37gkOXunMebak_cZL3eVcdHvm7Xly992eI_uOv4DNx0_-83G__Em4-zNxeGg9bNcXzx1ePtXAfjE4e6wHc_YKkiSKAqyyPdn5Soqr6IrOtNcV2xlKl27v7cUDnqFhJMPHAXxMJQJRiVuRYAGoXm_EQJ_DlvTZzXrZHX8nx1c77ocvKyhgTc29oUHtE9gOTS5Uh0eq97EaZoms90q9JMkjaOULuMgymhapHEQZFnox3HMojiamXlVK8AXHGRQPxxpeI1HsvhmxmE8nBivgsTP4mDpL5YJS2lRXCVBEYZJkgAAWU15tTAXSI3czuTKmJR3WwWdFVdajZ0wBYAxxow6kE87vWvk6s_f_-J1I2bG-pUx_R_QsDgO">