<table border="1" cellspacing="0" cellpadding="8">
<tr>
<th>Issue</th>
<td>
<a href=https://github.com/llvm/llvm-project/issues/82213>82213</a>
</td>
</tr>
<tr>
<th>Summary</th>
<td>
[AArch64][SVE] Cannot be vectorized, but GCC can vectorize.(TSVC s1161)
</td>
</tr>
<tr>
<th>Labels</th>
<td>
new issue
</td>
</tr>
<tr>
<th>Assignees</th>
<td>
</td>
</tr>
<tr>
<th>Reporter</th>
<td>
m-saito-fj
</td>
</tr>
</table>
<pre>
Clang cannot SVE vectorize TSVC s1161, but GCC13.2.0 can.
Option:
`-Ofast -march=armv8.2-a+sve`
```c
#define LEN 32000
#define LEN2 256
static int ntimes = 200000;
float a[LEN], b[LEN], c[LEN], d[LEN], e[LEN];
float aa[LEN2][LEN2], bb[LEN2][LEN2], cc[LEN2][LEN2], dd[LEN2][LEN2];
int dummy(float[LEN], float[LEN], float[LEN], float[LEN], float[LEN],
float[LEN2][LEN2], float[LEN2][LEN2], float[LEN2][LEN2], float);
int s1161()
{
for (int nl = 0; nl < ntimes; nl++) {
for (int i = 0; i < LEN-1; ++i) {
if (c[i] < (float)0.) {
goto L20;
}
a[i] = c[i] + d[i] * e[i];
goto L10;
L20:
b[i] = a[i] + d[i] * d[i];
L10:
;
}
dummy(a, b, c, d, e, aa, bb, cc, 0.);
}
return 0;
}
```
See also (Clang vs GCC):
https://godbolt.org/z/EEq8Mj5zs
GCC result:
```asm
.L2:
ld1w z2.s, p0/z, [x27, x5, lsl 2]
lsl x0, x5, 2
fcmge p1.s, p2/z, z2.s, #0.0
ld1w z1.s, p0/z, [x26, x5, lsl 2]
add x6, x25, x0
add x7, x20, x0
ld1w z3.s, p0/z, [x6]
ld1w z5.s, p0/z, [x7]
add x0, x24, x0
sel z4.s, p1, z2.s, z3.s
ld1w z0.s, p0/z, [x0]
fmla z4.s, p1/m, z1.s, z5.s
fcmlt p1.s, p2/z, z2.s, #0.0
movprfx z0.s, p1/m, z3.s
fmla z0.s, p1/m, z1.s, z1.s
st1w z4.s, p0, [x6]
st1w z0.s, p0, [x0]
add x5, x5, x28
whilelo p0.s, w5, w19
b.any .L2
```
Loop Body IR:
```llvm
for.body4: ; preds = %for.cond1.preheader, %for.inc
%indvars.iv = phi i64 [ 0, %for.cond1.preheader ], [ %indvars.iv.next, %for.inc ]
%arrayidx = getelementptr inbounds [32000 x float], ptr @c, i64 0, i64 %indvars.iv, !dbg !21
%0 = load float, ptr %arrayidx, align 4, !dbg !21, !tbaa !22
%cmp5 = fcmp fast olt float %0, 0.000000e+00, !dbg !26
br i1 %cmp5, label %L20, label %if.end, !dbg !21
if.end: ; preds = %for.body4
%arrayidx9 = getelementptr inbounds [32000 x float], ptr @d, i64 0, i64 %indvars.iv, !dbg !27
%1 = load float, ptr %arrayidx9, align 4, !dbg !27, !tbaa !22
%arrayidx11 = getelementptr inbounds [32000 x float], ptr @e, i64 0, i64 %indvars.iv, !dbg !28
%2 = load float, ptr %arrayidx11, align 4, !dbg !28, !tbaa !22
%mul = fmul fast float %2, %1, !dbg !29
%add = fadd fast float %mul, %0, !dbg !30
br label %for.inc, !dbg !31
L20: ; preds = %for.body4
%arrayidx15 = getelementptr inbounds [32000 x float], ptr @a, i64 0, i64 %indvars.iv, !dbg !32
%3 = load float, ptr %arrayidx15, align 4, !dbg !32, !tbaa !22
%arrayidx17 = getelementptr inbounds [32000 x float], ptr @d, i64 0, i64 %indvars.iv, !dbg !33
%4 = load float, ptr %arrayidx17, align 4, !dbg !33, !tbaa !22
%mul20 = fmul fast float %4, %4, !dbg !34
%add21 = fadd fast float %mul20, %3, !dbg !35
br label %for.inc, !dbg !36
for.inc: ; preds = %if.end, %L20
%a.sink = phi ptr [ @a, %if.end ], [ @b, %L20 ]
%add.sink = phi float [ %add, %if.end ], [ %add21, %L20 ]
%arrayidx13 = getelementptr inbounds [32000 x float], ptr %a.sink, i64 0, i64 %indvars.iv, !dbg !37
store float %add.sink, ptr %arrayidx13, align 4, !dbg !37, !tbaa !22
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1, !dbg !38
%exitcond.not = icmp eq i64 %indvars.iv.next, 31999, !dbg !39
br i1 %exitcond.not, label %for.cond.cleanup3, label %for.body4, !dbg !13, !llvm.loop !40
```
`-mllvm -debug-only=loop-accesses` messages:
```
LAA: Can't find bounds for ptr: %arrayidx13 = getelementptr inbounds [32000 x float], ptr %a.sink, i64 0, i64 %indvars.iv, !dbg !37
LAA: Found a runtime check ptr: %arrayidx15 = getelementptr inbounds [32000 x float], ptr @a, i64 0, i64 %indvars.iv, !dbg !32
LAA: We need to do 0 pointer comparisons.
LAA: We can't vectorize because we can't find the array bounds.
```
The direct factor is the missing ptr boundary.
Phi for selecting base address of store in "%a.sink" of "for.inc" block seems to be affected.
</pre>
<img width="1px" height="1px" alt="" src="http://email.email.llvm.org/o/eJzEWF-P4jgS_zTmxSJyKgTIAw8NNKuT-mZPN6u5Zyd2wDNJzMamm-5Pf_KfkD8kvd1zp9kIQRyXf1Xl-lW5CFVKHCvONyjeong_oxd9kvWmnCsqtJzn32epZK-bXUGrI85oVUmNv357xM8807IWbxz_8fXbDqswXIYIdji9aPzbbhdGAQTELAgQ2SPy4L5_P2shKxT5IVqS-e85VRrPS1pnJxTtaV0-rwOYUwRb9czRknTXm6H9ZH4MEeO5qDh-evyCIyCEjEwAhnjpnitNtciwqDSutCi5wijaY7OOEBRtu7ryQlKNKYq3T49fULy33vVGWW_EeiPejhpYD-gRwUy1dwY8nZrJsqkZxkZn-p4Yb9mlLF8RrK0RPUv_T0-cKny7OgL3Zv-vk5CMuOg5uDazbmq17VuVyxojWNvoFzbyJujufucJ4R4g2NpPgu9ARsBEiyUs1NPjl3lohg5HvAvUXCI3eCbUAsV7i3MLGCQk6IMcpZb4CVrWTsGi1d4L0BZ6j1s9sLXc9YMHS13RZdEUsjMhbE2w5viI2Fy5aaOT2thQmwVsQCZtaL0aXA3RqctXm6Y2O21Sws4koEs2l1iww3Zr7zTdaai5vtQV7tSJRuJWltzwK-eYFkqa8LnC-axMUbRavGcnrc_KjOCA4HCULJWFDmR9RHB4Q3B4fPxz_c_v8Zvqsvy33Q7XXF0K3S2h7kNV6Z4ET9Dun78KFr6Y3zcIlPH4TJyaHUbx9gorc3eNzXehCmxzza03Q3NdSSsCg6TKyiPHGJ9Djw0NdqMNQUQC0iA2poSjpiwnTfEXZcxZ5CTBil4b9NuscwlIb3a4G9GYCcs7lVb-LR4TXk3b51TDYswCxd22vi08aNjdL2vXuMVkzAhyZ0ReFnSIfygttt92684wjoX-eBz9Vcrnc51fW8taTXde3Ky6l22sCodrlPaeL1rPp-J0kyVD2fsduoUpbgl3hbUXejmJghcSnz3Si51_CZM-SBrQ6hVjbFJurBD4giblGW8le8X_-Pd93hbFs0_cXNaBabYWKJosucPLnDHnmjPXxyCIDUgmKxYG55qfOGW8dqGzM6LKGg8QxKJiz7RWgXi2q88ngcVyYTYMk86iARz2J7ER64EEFb_qvjLc2XcEMa1r-irY1ao7cs0LXvJKn3WNRZXKS2X8iLe2j8PXphGw2owMWhBbr42RpLnpmeC0hyw9mh8IO7qJVVpIypoewoO2ZtnToRDHCi_ugNxYp5TaB00NRBBn5Tm22HlWnrHtZmWhnRKr2B0xtsUkHMGWkAH6sjEzrbEIG0xbAmnKC_PgyVWy21jkAa_YuLu-I3IS0cM4RxzR7kOT_Hxs2Kdis-ooD_86Nsl0cFZTwemuD8Ofd4x_yrF1Rzv8tWNhOO3Z-h3alRfXwebmxrLuxjjwSRgO4JLuvjDmlpub_vLyUniAAVEj0iHqjYpNXemL9rjo-sLPETGMfz5g9DMBi7p0iT4QsHgyYBF8iIqrX5RjUdTRvviAZ6tpz6L3qQhkioweKB4CLvpkhPAdOkJzHEUDkPjjhFz2_tt7kTFSdkurrbsdOwMlqh-3w9JunzkFPeNui3tH5IKkLdjwQGSsD-m9dkcrZWwa1u_aNHQT1Ohn2db4-ynK3cq60rLmbRQbT8doF03TbrK233ce7n8mY7i6vOBKvYybOqiJ0brF41ehTa8TVNKBCXOg8z_vgW6dThQmSTJATO5O8y5w7xRvuqsgKzitLufobtZVx56CsEkC0zkGhWkuEYQL8k4HipZkXhpxPGc8vRznsipeUbQ3i-c0y7hSXKElwSVXih65um9TfSF_eDA5s6MVgpXGuagY9kzKZW0Ca3vXv51-3s6D0YIpri_23Q7OTjz7MWrlrz5svIH_4bjinGEtMZOY4LMUleY1zmR5prVQslLBcEHmN799A5vyjF4Uxy_tpI2MPnFsPfQxCkZj-seJYyZqnmmcUwOJhbJLS6GUqI7WTwtA61cP8S9TqmRt_sfyTBuhlCpusq_mSmGZ-_QXFUYAnVCCmUMAtxoNOC1k9gMrzktl9iHlmOY5zzRnvVfHM7aJWBIldMY34YqsI4gispydNoStVos4yRY55WtCV7DgJIMUIGFZAvliJjZAYEEgTMgyjMkyWGYZ5CyDbBVDSJdL0-KVVBSBzShZH2dCqQvfrAHCaGbzUdn34wAVf8F20rgV72f1xqyZp5ejQgtSCKVVi6KFLuyL9YeHOjstF-5l5tdvjyjemxwydSblbSBttfcv0E0k25kAwbr7nj2ZXepiM3iHJPTpkgaZLBEc7L9K9zM_1_I7zzSCg7VcIThYz_4bAAD__wYxYdU">