<table border="1" cellspacing="0" cellpadding="8">
<tr>
<th>Issue</th>
<td>
<a href=https://github.com/llvm/llvm-project/issues/78506>78506</a>
</td>
</tr>
<tr>
<th>Summary</th>
<td>
clang generates unoptimal code for code with lots of temporary variables
</td>
</tr>
<tr>
<th>Labels</th>
<td>
clang
</td>
</tr>
<tr>
<th>Assignees</th>
<td>
</td>
</tr>
<tr>
<th>Reporter</th>
<td>
gbaraldi
</td>
</tr>
</table>
<pre>
I started a discussion in discourse but I think this might be worthy of an issue, I know the code looks strange but it's a C reproducer of what julia generates.
```c
#include <stddef.h>
struct wtf {
int a[30];
};
struct wtf __attribute__ ((noinline)) foo(struct wtf *b, int i){
struct wtf new;
int idx0 = (0 + i) % 29;
int idx1 = (1 + i) % 29;
int idx2 = (2 + i) % 29;
int idx3 = (3 + i) % 29;
int idx4 = (4 + i) % 29;
int idx5 = (5 + i) % 29;
int idx6 = (6 + i) % 29;
int idx7 = (7 + i) % 29;
int idx8 = (8 + i) % 29;
int idx9 = (9 + i) % 29;
int idx10 = (10 + i) % 29;
int idx11 = (11 + i) % 29;
int idx12 = (12 + i) % 29;
int idx13 = (13 + i) % 29;
int idx14 = (14 + i) % 29;
int idx15 = (15 + i) % 29;
int idx16 = (16 + i) % 29;
int idx17 = (17 + i) % 29;
int idx18 = (18 + i) % 29;
int idx19 = (19 + i) % 29;
int idx20 = (20 + i) % 29;
int idx21 = (21 + i) % 29;
int idx22 = (22 + i) % 29;
int idx23 = (23 + i) % 29;
int idx24 = (24 + i) % 29;
int idx25 = (25 + i) % 29;
int idx26 = (26 + i) % 29;
int idx27 = (27 + i) % 29;
int idx28 = (28 + i) % 29;
int idx29 = (29 + i) % 29;
int val0 = b->a[idx0];
int val1 = b->a[idx1];
int val2 = b->a[idx2];
int val3 = b->a[idx3];
int val4 = b->a[idx4];
int val5 = b->a[idx5];
int val6 = b->a[idx6];
int val7 = b->a[idx7];
int val8 = b->a[idx8];
int val9 = b->a[idx9];
int val10 = b->a[idx10];
int val11 = b->a[idx11];
int val12 = b->a[idx12];
int val13 = b->a[idx13];
int val14 = b->a[idx14];
int val15 = b->a[idx15];
int val16 = b->a[idx16];
int val17 = b->a[idx17];
int val18 = b->a[idx18];
int val19 = b->a[idx19];
int val20 = b->a[idx20];
int val21 = b->a[idx21];
int val22 = b->a[idx22];
int val23 = b->a[idx23];
int val24 = b->a[idx24];
int val25 = b->a[idx25];
int val26 = b->a[idx26];
int val27 = b->a[idx27];
int val28 = b->a[idx28];
int val29 = b->a[idx29];
new.a[0] = val0;
new.a[1] = val1;
new.a[2] = val2;
new.a[3] = val3;
new.a[4] = val4;
new.a[5] = val5;
new.a[6] = val6;
new.a[7] = val7;
new.a[8] = val8;
new.a[9] = val9;
new.a[10] = val10;
new.a[11] = val11;
new.a[12] = val12;
new.a[13] = val13;
new.a[14] = val14;
new.a[15] = val15;
new.a[16] = val16;
new.a[17] = val17;
new.a[18] = val18;
new.a[19] = val19;
new.a[20] = val20;
new.a[21] = val21;
new.a[22] = val22;
new.a[23] = val23;
new.a[24] = val24;
new.a[25] = val25;
new.a[26] = val26;
new.a[27] = val27;
new.a[28] = val28;
new.a[29] = val29;
return new;
}
#include <stddef.h>
#include <stdio.h>
#include <time.h>
volatile struct wtf result;
int main() {
struct wtf b;
for (int i = 0; i < 30; i++) {
b.a[i] = i;
}
const int num_iterations = 100000000; // Number of times to call foo
clock_t start, end;
double cpu_time_used;
start = clock();
for (int i = 0; i < num_iterations; i++) {
result = foo(&b, i % 30);
// Optionally use result to prevent the compiler from optimizing out the function call
}
end = clock();
cpu_time_used = ((double) (end - start)) / CLOCKS_PER_SEC;
printf("Time taken: %f seconds\n", cpu_time_used);
return 0;
}
```
This code when compiled with gcc 12 is about 40% faster than what clang-17 generates, and by allowing it to inline it becomes around 60% faster, a quick peak at the assemblies shows a very different piece of code, with gcc using mostly xmm register and avx instructions while clang uses mostly normal registers.
https://godbolt.org/z/nWrKKe7s9
</pre>
<img width="1px" height="1px" alt="" src="http://email.email.llvm.org/o/eJyMmFFvqzgWxz-N83LUCh9CgIc83Ka30tVd7ax2RtrHyICTeAp21jbN7Xz6kSEQJ5gOUZUm8d_n4P_PNvgwY8RRcr4lyQtJXlestSelt8eCaVZXYlWo6nP7A4xl2vIKGFTClK0xQkkQsvumWm04FK2FH2BPQr67dwONOJ4sFBwuStvTJ6gDMAnCmJYT3MEPeJfqAvbEoVQVh1qpdwPGaiaPfTRhCaYGGOxA87NWVVty7cJcTszCn20tGBy55JpZbp5J9Eqib2QT9X_l9TvGQpZ1W3Eg8c7YquKH5xOJv1-bu3djdVtauNgDkPSl_w0AQEgLjCQvcUSSVxJfW0jqfX7sv98za7UoWsv3eyCYEcykErIWkhPMCeYAB6UIZn5S_FY4S1w-4VT-RXg6yS8Pme-vVVS_IiDxq8sbAcGXLhoQTADzsecgpYOUzktvgXFQ4xJ1PKjjJer1oF4vUSeDOlmi3gzqzRJ1OqjTf7QvG6TZksD5oM6XqOmIkX7B0dPfWC6CSUeadBFOOvKki4DSkSj9AukgHoHSRUTpiJQuYkpHqPQLqp5-JEsXoaUjW7oILo5wcRFcHOHispV6W6qL4OIIF7-AO4hHsrhoseIIFxfBxREuLoKLI1xcBBdHuLgILo5wcR7ufa8PVveEiycSf3f3D7cv-3cQT0knSjqjxIkS75RXWTyRxTMB1xPlekaZTJTJjHIzUW5mlOlEmYaGk01k2UzAfKLM5zyf4qGzfAKA5gjRKSKKc9opJzoHik5J0TlUdMqKzsGiU1p0Dhed8qLpnHYKjc5Ro1NsdI4bTrnhHDeccsPZlRVYWnPccMoN57jhlBvOccMpN5zjhlNuOMcNp9xwjhtOueEcN5xywwk3yS_PrtFB6uRua7wphmbqNdNgAPQUGFTEniKeplh7zetggMRTJEHFxlNspilSrzkNBsg8RRZU5J4iDxjlG0mjYAh652bYTur7STGQyLeTxuEgvqc0bCr1XaVJIJFvKt2Eg_jO0rC11PeWZoFEvrU0D88y314M24u-vUinifBuroYnK_r2Ythe9O3FdSCR7y2Gpyz69mLYXvTtxTSQyPcWwxMXfXsfnqQ0t62W94fX9PXuFPsPh_THZqFmW61o-OR8_6FqZkXN_bO05qat7cMjnNvcGiZkd27PYe4cXtyN76C0ez7sHhg7C9zM6T7uIO4_E3zp_h5CulfR-ScG-8Qt9INJTlwqaWy3Bcu22QvLNbNCSdN1pdH15VISfCP4Bv9um6KvmThjDFgFJavrrgRxi1qr8n1v-xoPwR1wWd2NsFJtUXMoz-3ehdm3hleBh9-uf3cpXcTexIDuK8PuxzVrXk-v69xXUwhu-hJK92weR-RhErrX1ZPfzi42q-tPaA0fQlkFZ80_uLTXklRzFjXXcNCqAXW2ohF_CXkE1faCQytLF6fz85YnAI3LataUzn7f1uG0QTDrXe-PG5kL8jQQyvsf32D3r992P3_f_-f7f_e_f98FrD5rIe2hi4d_iIaDZe9ckvibs-kAhpdKVoYkO0kQnX_3jIP4rus5Cqzmofjmd_njJExf4bucuByMreAi7AmOZQkUQRhghXN2HTl8B2Ys12BPTPa1vrJm8vhE01u5z10rkxUUn8DqWl0cGtFR7Gtt7kvBS-UmPdOqlRVsvNhdd_h_K8p3OHP2DqynyozhTVELbsCc1MUAgw-uP6EShwPXbnKcBS-5W1BuSC7MOI7WuItolLH1J_xqGtD8KLqBuAtlH79AyH4T6Zbs5eR2pG5kbh6aoadUumH12HkobJ6sPRsSf-sn8VFVharts9JHgm9_EXyT_9M_f_LU5KtqG1d5nLMV39I0ShLM1tF6ddpmGTtskmKz4SnNkijZFBVbRzyvDus8TTd8JbYY4TqiNEWaxLh-LuJ8kyCmxSGi1ZplZB3xhon6ua4_Gpd71VVzt2mWRJtVzQpem66OjNiNi3Q3w5XeOv1T0R4NWUe1MNbcIlhha77tbRjpQiu7Jcfqfua4HaOfQs7sWlnTbWm8OSvN9Cd8MC1YUXOzanW9fbBK2FNbPJeqIfjm0l7_PZ21-pOXluBbNwpD8K0byN8BAAD__-JTCAY">