<table border="1" cellspacing="0" cellpadding="8">
    <tr>
        <th>Issue</th>
        <td>
            <a href=https://github.com/llvm/llvm-project/issues/54770>54770</a>
        </td>
    </tr>

    <tr>
        <th>Summary</th>
        <td>
            Unrolling single trivial loop generates 3X slower program on M1 mac
        </td>
    </tr>

    <tr>
      <th>Labels</th>
      <td>
            new issue
      </td>
    </tr>

    <tr>
      <th>Assignees</th>
      <td>
      </td>
    </tr>

    <tr>
      <th>Reporter</th>
      <td>
          namniav
      </td>
    </tr>
</table>

<pre>
    ```c++
#include <cstdio>
#include <cstdlib>
#include <ctime>
#include <cassert>

#include <utility>

constexpr std::pair<int, int> p[3] = {{-233,-113}, {-91,-23}, {3,37}};

// Using template for compacity. Using separative functions doesn't change this issue.
template<int N>
bool cond_loop(int x) {
  for (int i = 0; i < 3; ++i)
    if (i < N && p[i].first <= x && x <= p[i].second)
      return true;
  return false;
}

template<int N>
bool cond_unroll(int x) {
  return (0 < N && p[0].first <= x && x <= p[0].second) ||
         (1 < N && p[1].first <= x && x <= p[1].second) ||
         (2 < N && p[2].first <= x && x <= p[2].second);
}

template<auto Cond>
__attribute__((noinline))
long long conditional_sum(signed char* data, int n)
{
  auto sum = 0LL;
  for (int i = 0; i < n; ++i) {
    if (Cond(data[i])) sum += data[i];
    else               sum += data[i] & 0xff;
  }
  return sum;
}

template<auto Cond>
__attribute__((noinline))
void benchmark(signed char* data, int n, int repeat) {
  auto s = 0LL;
  auto t1 = clock();
  for (int i = 0; i < repeat; ++i) {
    s += conditional_sum<Cond>(data, n);

    // prevent optimization between loops by calling an external function
    snprintf(nullptr, 0, "%p", (void*)data);
  }
  auto t2 = clock();
  printf("checksum=%lld time=%lldms\n",
      s, (long long)(t2 - t1) * 1000 / CLOCKS_PER_SEC);
}


int main()
{
  srand(31415926);
  int n = 32 * 1024;
  auto data = new signed char[n];
  for (int i = 0; i < n; ++i)
    data[i] = (rand() % 256) - 128;

  for (int i = 0; i < n; ++i) {
    assert(cond_loop<2>(data[i]) == cond_unroll<2>(data[i]) &&
           cond_loop<3>(data[i]) == cond_unroll<3>(data[i]));
  }

  for (int i = 0; i < 4; ++i) {
    printf("loop  <2> ");
    benchmark<cond_loop  <2>> (data, n, 3000);
    printf("unroll<2> ");
    benchmark<cond_unroll<2>> (data, n, 3000);
    printf("loop  <3> ");
    benchmark<cond_loop  <3>> (data, n, 3000);
    printf("unroll<3> ");
    benchmark<cond_unroll<3>> (data, n, 3000);
    puts("");
  }
}
```


Output on my M1 macbook air(compiled by`clang++ -Wall -Wextra -Werror -std=c++20 -O3 -fno-lto test.cpp`):
```
loop  <2> checksum=4255083000 time=207ms
unroll<2> checksum=4255083000 time=168ms
loop  <3> checksum=4255083000 time=60ms
unroll<3> checksum=4255083000 time=185ms
```

* **Issue 1**:  Compared to `cond_unroll<3>`, `cond_loop<3>` makes `conditional_sum` 3X faster !
    I don't see the difference from source code.

* **Issue 2**: Compared to `cond_loop<2>`,`cond_loop<3>` makes `conditional_sum` 3X faster !
    The difference is that `cond_loop<3>` have an addtional positive interval. But for positive `signed char data[i]`, `sum += data[i]` is equivalent to `sum += data[i] & 0xff`. Why adding a useless positive interval cause `condition_sum` 3X faster?

Note that Clang on my desktop PC(Ubuntu20.04 running on Intel CPU) doesn't have this issue. This might be related to specific platform. Apple's Clang also doesn't have this issue.

**Versions**:
```
❯ clang --version
Homebrew clang version 13.0.1
Target: arm64-apple-darwin21.4.0
Thread model: posix
InstalledDir: /opt/homebrew/opt/llvm/bin
❯ uname -a
Darwin Namniav-M1-Air.local 21.4.0 Darwin Kernel Version 21.4.0: Fri Mar 18 00:47:26 PDT 2022; root:xnu-8020.101.4~15/RELEASE_ARM64_T8101 arm64
```

**`-v` output**:
```❯ clang++ -Wall -Wextra -Werror -std=c++20 -O3 -fno-lto -v test.cpp
Homebrew clang version 13.0.1
Target: arm64-apple-darwin21.4.0
Thread model: posix
InstalledDir: /opt/homebrew/opt/llvm/bin
 "/opt/homebrew/Cellar/llvm/13.0.1_1/bin/clang-13" -cc1 -triple arm64-apple-macosx12.0.0 -Wundef-prefix=TARGET_OS_ -Werror=undef-prefix -Wdeprecated-objc-isa-usage -Werror=deprecated-objc-isa-usage -emit-obj --mrelax-relocations -disable-free -disable-llvm-verifier -discard-value-names -main-file-name test.cpp -mrelocation-model pic -pic-level 2 -mframe-pointer=non-leaf -fno-rounding-math -munwind-tables -fcompatibility-qualified-id-block-type-checking -fvisibility-inlines-hidden-static-local-var -target-cpu apple-m1 -target-feature +v8.5a -target-feature +fp-armv8 -target-feature +neon -target-feature +crc -target-feature +crypto -target-feature +dotprod -target-feature +fp16fml -target-feature +ras -target-feature +lse -target-feature +rdm -target-feature +rcpc -target-feature +zcm -target-feature +zcz -target-feature +fullfp16 -target-feature +sha2 -target-feature +aes -target-abi darwinpcs -fallow-half-arguments-and-returns -debugger-tuning=lldb -target-linker-version 711 -v -fcoverage-compilation-dir=/Users/namniav/nnspace/test -resource-dir /opt/homebrew/Cellar/llvm/13.0.1_1/lib/clang/13.0.1 -isysroot /Library/Developer/CommandLineTools/SDKs/MacOSX12.sdk -stdlib=libc++ -internal-isystem /opt/homebrew/opt/llvm/bin/../include/c++/v1 -internal-isystem /Library/Developer/CommandLineTools/SDKs/MacOSX12.sdk/usr/local/include -internal-isystem /opt/homebrew/Cellar/llvm/13.0.1_1/lib/clang/13.0.1/include -internal-externc-isystem /Library/Developer/CommandLineTools/SDKs/MacOSX12.sdk/usr/include -O3 -Wall -Wextra -std=c++20 -fdeprecated-macro -fdebug-compilation-dir=/Users/namniav/nnspace/test -ferror-limit 19 -stack-protector 1 -fblocks -fencode-extended-block-signature -fregister-global-dtors-with-atexit -fgnuc-version=4.2.1 -fno-implicit-modules -fcxx-exceptions -fexceptions -fmax-type-align=16 -fcolor-diagnostics -vectorize-loops -vectorize-slp -D__GCC_HAVE_DWARF2_CFI_ASM=1 -o /var/folders/kz/4wgytmy56ks64tslrymmc0vw0000gn/T/test-8916c0.o -x c++ test.cpp
clang -cc1 version 13.0.1 based upon LLVM 13.0.1 default target arm64-apple-darwin21.4.0
ignoring nonexistent directory "/Library/Developer/CommandLineTools/SDKs/MacOSX12.sdk/usr/local/include"
ignoring nonexistent directory "/Library/Developer/CommandLineTools/SDKs/MacOSX12.sdk/Library/Frameworks"
#include "..." search starts here:
#include <...> search starts here:
 /opt/homebrew/opt/llvm/bin/../include/c++/v1
 /opt/homebrew/Cellar/llvm/13.0.1_1/lib/clang/13.0.1/include
 /Library/Developer/CommandLineTools/SDKs/MacOSX12.sdk/usr/include
 /Library/Developer/CommandLineTools/SDKs/MacOSX12.sdk/System/Library/Frameworks (framework directory)
End of search list.
 "/usr/bin/ld" -demangle -lto_library /opt/homebrew/Cellar/llvm/13.0.1_1/lib/libLTO.dylib -dynamic -arch arm64 -platform_version macos 12.0.0 12.0.0 -syslibroot /Library/Developer/CommandLineTools/SDKs/MacOSX12.sdk -o a.out -L/Library/Developer/CommandLineTools/SDKs/MacOSX.sdk/usr/lib -L. /var/folders/kz/4wgytmy56ks64tslrymmc0vw0000gn/T/test-8916c0.o -lc++ -lSystem /opt/homebrew/Cellar/llvm/13.0.1_1/lib/clang/13.0.1/lib/darwin/libclang_rt.osx.a
```

</pre>
<img width="1px" height="1px" alt="" src="http://email.email.llvm.org/o/eJzVWltz27YS_jXyC4YaXiRZevCDLFltpnaSiZ2mbxqIBCXU4KUAKFv59f0WJCXKlh03dc_M8TAUidtesPvtLphVkewueiO_vuJeeEmXP-_5014YyTxWVSJYL5rFxiay6EVXL3UquXqp18pMvNTHjRHaHnpPjKmsVNLunoyJi9xY8VhqBuK9aIqr5FJjvMxtL5wx-omuWNkbXka94Rwr4d_5JS4vjCKM8IIg6p3PaSw1TgJqCztNNCg6p3dc0eUxhwtc7KuR-ZpZkZWKW8HSQrO4yEoeg99-02tEyTW3cov-KocywDhLCmHyXnhuWbzh-Vowu5GGSWMq0a8ptIvWArGPe_FXRaFAJU-WqijKXjim7sdeOHHSuSHMcdJ0SSe5DwHc44xF9FjvtMS0dgpjMnVz3KCPeBzhcvqT0F8_ldpY6qPlHtvux7ZlP84IYu5oYca0sJXOmdWV2Gty35pyZQ7NpOyOpt-ghyrXhVIvaKKhgV7_uWD-GwXzjwTD8jO6OvLhDxSC5xSCN1II3kYhfE4hfCOF8HhzfqxuXtmCzWh4q_Llklur5aqyYrkEN7jyQuZK5oKWbLdcFTB7dyNikiyeq6WpMow3cp2LhKxe98IpS7jljbOyfL9AZ_scE5haG_H1dcd6XrPx_NjGuwbR2rmTLBw7DmrTrWWoqWEqVux2Rp0VBAyWHf-dnkU7wfzHNO1M32t8b5ykmv9wP7aFTNhK5PEm4_r-h5tQP2hRCm6f6K7ejRN74Tps4HpiVcT3jpvJG3erofXKlplWt08tKpq1Gmm2MpzVhnSM17X7OMwutdgKcFGUiEvyO6fFoB37IETOCFINW-1YzJUi9OY5E49WaNDbw3eHrbzUkCglxVdKlVYTfd_FjzDshcPS_dDbmHYB2gZvNZ-T0yZRazJ8RZN7klg73oj43ukB1jtUKmF1sG3eMtMbzvKahy6YmIanvac6cxmDrIdddNqHXQS-75PW2Oz60-y32-Xnqy_L26vZK-hR32mPMy7zhvOnLm00d54XBYNgOCGk6krnjNBJH4UNF-HgqaWRBt2YXDywrjUPL_NjX_0HIHFQ0JEDU9oQjhuea9UMWTgktqGtAG1PTe2ngalJhsLxIbxHs7Bj3HucomVbd2jj34tDXTB4EksY69KI3k7j5NAXrPktChm8ppCuqROrjDVS1u41OQLlA8Ihr2yF28-oJ3UxApkQDPzpKl2SR4p9E8njrfgJknueo38sZfRvpXwzyWNjeCvJypqa4DMSB4PZP7QlyXN0-VRZLMUA2tmO3QRAmhip4D2j5J88JyulAhysdlTQKE7YRsbFvG-AdNwB55rTr9YwS89VD_Om8gl95n2KmJfmhacIh4Wx_bgsiRHieHqSu2PL7EDyIBwO_TEpo4Xl0D8HJrtZx8b16qxgNG5nHdvHq7NG_jNSP54UjIftrNN7AER2UWz6gYoVFtQvUA1DboLSR0P30Bwp_7mpjOrQ2PR1wWfkYyPvhWk7uzEefdEfqBJQ7xGOBAeb-oBCqi6jjKAaSrBEpqnQMFhUW7rImCkqjee4SNrC6rQc4UGOU2J0sRhCXC16l7PeePZ-ktwd845i0G64fUlVG45qEqkJT5J6dVYWRroaE34t9JarPruElxDy7rswsxMrj5Lb_ca8kPyCJlgSf1USS1P2VOvmR0nvyO-zb5sd8emSKVYZoYQxz9lFwoW-I50901gvWnS38GNhRa2lGbl5gwiJMPcWPvIZecr466rKbRX6fX_AdJXnsh72AUQVm33-SgHnUIo7rXYKcWwKnjO53lhgILJUSsSdWZhSxDKVMaPcHDrO-mxalgpZ97lpuEFNW7yy9hNbxPW70IbOBvaGeNoLr8LeZN6bLpjDNuZ523pe3f1rkYmVRlJU9zZ9LIj6fr-xtjuu18KSoXOdjQYeJ8a9hOsHmYdBf9BvKN1ttOAJy-A5ikbTlj3WXR9yYwGmIpnTgQs50gLZNO6bhvy-Qaktqr7FSuZP2a9yngnm8bp97sizjzzLJd96N4E3lbqP9BeWUTPFmiG_IRfH5jXaajqJiYWW7AZmHYyZTw2Dc9xgiJ_ndyz0EXKQZ-iiIMkf88ob-zCLwMf03vlVMASTX66ur6a3V8vpl5vRYHk3RmetotfhkK6R723JVAsXm17cweO9-xdxydseQtP_xb7XacXz0TOhFBWh7YSa4WXQzA0XTh4viDCfeXEcMA8FLxg_EgIpQGEegxBzoahvVZ6I1EOdl4LxaH43_fLL1d3y0-2y1S4au2PQnAg8xuTeXrH6M_ak4V5l-Fp0prwyRmTSUiPcMSOYePRwh_HWh31egqEr8JlqRKn9G4lM3gsgQTSg5pjrxAMYVsIj78BMKqO8FAmNa9hvOnNkGgKe2ypWAo483DyF-hZegzGpxiSvLBzIQoIcg5XgaW1IuoASgIggYjcYXeWwhMSzxBxIp-4k08qVO331_qq4Ik4TTybeigpTz-5K4blsgnDVS7fStKPrQwjjbWSSiBzGjIXAGfkzBIS01lmjF5cVa_Yw2DemgttKC4os23F_yE91pKUHC9iOT_XlAqZ_oj3W8enmXUk-9bwnKWypi-Q0_WCUZupUl-bmVDMdGZ0anWQnm-PyJLPf45PDv8ffT3JZKUWcnuozGx6eaufiwD5fSVYjRBmTScD5iwdvw1UK7a-rDJmA8VAZe_VBFpm6WFXrtdCerSjawuiUSlb7BWEV9-hs8ek8CAjLyNbQBFfy6vy9tuuEUAbl5eIrymLExUVehwd6yk3JY8TbBbkEA_060aM5J2HpFaChLxcN0Ow7GLx7Zyhc0GrXcqW53uFpTr5VlIJWQp6YQfZrGPpdUShi8Hb-G_3c8PjT7R_AI5PcOyR3H0fmuMct6DuXROLm6FiRvQ1Lw0W_j1vzcYS4br7YhIttcHrRf8M7XivjlEaOeyD8Rvb_oc5Prl8fvsXvLtGeEoXV4wj8LPKmHeRHqNGFa4Kd_6y1pi6kwBkQNlgwIZIceAqssSK2iP7Yy9RhLDkdCgIAvNMEglYLvpTJ1z5LYWUtKUH21qpYQWkJ1jDeg7QbD0w_SiK5zqt4nyui8uuHZOUUB2RWKhkjgCGOVA30Pz6CXizKJoClRy8Z4pvDfkSEde7qU-fCCjIlkq_zwgDtMXLrpJHfEevcuWqnwSgEsfly-ctstvx1-vvVcv5t-mURLmeLD8vp7Q0tyryCtnvrTCgtVFKr9f47boOH9c5mu-Ho3owG1ii9y7LY3z6gkvXX5Cd3jbK98SQYxX4fe_bIWu87zp2aRJqSi-O8ia24Qb5flWi5vv79pm1G5sArhRrIYdoP0iloCBKDAIIvtgLbhOoJtuI0sWvyov_CSWnl_xkHh-kLyjkeCn1v9gx0P-eGYZ8gLETBznW8YbB8bQ3boO49JMxHn39pfHT12vh3wM5XFvppFDus-Z6I9W6r3jpAfWHv6EQvbd8OxrI_Jb_KE1ak7aYoGFX_KNGvWa7VrhKXvCcCfK2RuVMFs1Q10Z_TOO7Xd5_6yQ4PWHgHpKXk1_HivBGJcFOYL1uXdjUCa2qEtlRAUCFG3iXMF4z3Uf8x7_qnlzp2Z5Ltuv-uEKj2-Ye6fc-4XTfX4Fe_uiFLbfsozPr8ZAV9llxEySSa8DMrrRIXX91RofsvE9IZCiq9reTKfZRja5EjQ4Q8dB5kkIaiZkLAXMNI6UinPgg-q7S62FhbGgIH971vjShYrfqI1AeBXOWFyX_Cqsmz6ESG9DocnJ_7Z5uLKBjGk2QVrNJoxNNQxMF4lKSD80mYinQ1OT9TfCWUuegNocyQvkG5Jcj0h_MzeUHHDf7AH_lh5A8QZ0dDPwq4CCeDlI9GcW_gwxek6hMf_UKvz_SFYwkphUEnuZM5dHLjTu2EI4f1eWU3hb5o0oszR_rCsf43uWDBKA">