[llvm-dev] Vectorization width not correct using #pragma clang loop vectorize_width
hameeza ahmed via llvm-dev
llvm-dev at lists.llvm.org
Thu Sep 20 14:15:55 PDT 2018
Hello,
I m trying to set vector width using #pragma clang loop vectorize_width(32)
but i m getting width 8 for the following kernel;
#define M 128
#define N 128
#define SQRT_FUN(x) sqrtf(x)
int main(int argc, char** argv)
{
/* Variable declaration/allocation. */
double float_n = (double)N;
double data[N*M];
double corr[M*M];
double mean[M];
double stddev[M];
uint32_t i,j,k;
/*Initialize array(s). */
#pragma clang loop vectorize_width(1) //no vectorize
for (i = 0; i < N*M; i++)
{
data[i] = (50.0)*i;
}
kernel_1:
#pragma clang loop vectorize_width(32)
for (j = 0; j < M; j++)
{
mean[j] = 0.0;
}
for (i = 0; i < N; i++)
{
for (j = 0; j < M; j++)
mean[j] += data[(i*M) + j];
}
for (j = 0; j < M; j++)
{
mean[j] /= float_n;
}
kernel_2:
for (j = 0; j < M; j++)
{
stddev[j] = 0.0;
}
for (i = 0; i < N; i++)
{
for (j = 0; j < M; j++)
{
stddev[j] += (data[(i*M) + j] - mean[j]) * (data[(i*M)+j] -
mean[j]);
}
}
for (j = 0; j < M; j++)
{
stddev[j] /= float_n;
}
for (j = 0; j < M; j++)
{
stddev[j] = SQRT_FUN(stddev[j]);
}
kernel_3:
for (i = 0; i < N; i++)
{
for (j = 0; j < M; j++)
{
data[(i*M) + j] -= mean[j];
}
}
for (i = 0; i < N; i++)
{
for (j = 0; j < M; j++)
{
data[(i*M) + j] /= SQRT_FUN(float_n) * stddev[j];
}
}
kernel_4:
for (i = 0; i < M*M; i++)
{
corr[i] = 0.0;
}
for (k = 0; k < N; k++)
{
for (i = 0; i < M-1; i++)
{
for (j = i+1; j < M; j++)
{
corr[(i*M)+j] += (data[(k*M)+i] *
data[(k*M)+j]);
}
}
}
printf("Corr[0]: %lf\n",mean[0]);
printf("Corr[0]: %lf\n",mean[M-1]);
printf("Corr[0]: %lf\n",stddev[0]);
printf("Corr[0]: %lf\n",stddev[M-1]);
printf("Corr[0]: %lf\n",corr[0]);
printf("Corr[(M*M)-1]: %lf\n",corr[(M*M)-1]);
printf("Corr[0]: %lf\n",data[0]);
printf("Corr[(M*M)-1]: %lf\n",data[(M*M)-1]);
return 0;
}
*i m getting following output when i compiled;*
*clang -O3 correlation.c -Rpass=loop-vectorize -emit-llvm -march=knl
-S -o 1.llcorrelation.c:38:9: remark: vectorized loop (vectorization
width: 8, interleaved count: 4) [-Rpass=loop-vectorize] for (j = 0;
j < M; j++) ^correlation.c:41:5: remark: vectorized loop
(vectorization width: 8, interleaved count: 4) [-Rpass=loop-vectorize]
for (j = 0; j < M; j++) ^correlation.c:53:9: remark: vectorized loop
(vectorization width: 8, interleaved count: 4)
[-Rpass=loop-vectorize] for (j = 0; j < M; j++)
^correlation.c:58:5: remark: vectorized loop (vectorization width: 8,
interleaved count: 4) [-Rpass=loop-vectorize] for (j = 0; j < M; j++)
^correlation.c:71:9: remark: vectorized loop (vectorization width: 8,
interleaved count: 4) [-Rpass=loop-vectorize] for (j = 0; j < M;
j++) ^correlation.c:78:9: remark: vectorized loop (vectorization
width: 8, interleaved count: 4) [-Rpass=loop-vectorize] for (j = 0;
j < M; j++) ^correlation.c:98:13: remark: vectorized loop
(vectorization width: 8, interleaved count: 4)
[-Rpass=loop-vectorize] for (j = i+1; j < M; j++)*
*why is that so?*
*although i m able to set width to 32 of the example code given on site.*
*Why Pragmas are not setting vector width correctly here in my kernel?*
*What is the issue?*
*Please help..*
*Thank You*
*Regards*
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.llvm.org/pipermail/llvm-dev/attachments/20180921/a072e1c9/attachment.html>
More information about the llvm-dev
mailing list