<html>
    <head>
      <base href="https://bugs.llvm.org/">
    </head>
    <body><table border="1" cellspacing="0" cellpadding="8">
        <tr>
          <th>Bug ID</th>
          <td><a class="bz_bug_link 
          bz_status_NEW "
   title="NEW - Missed Optimization with for_each, transform and raw loop with small std::array"
   href="https://bugs.llvm.org/show_bug.cgi?id=42499">42499</a>
          </td>
        </tr>

        <tr>
          <th>Summary</th>
          <td>Missed Optimization with for_each, transform and raw loop with small std::array
          </td>
        </tr>

        <tr>
          <th>Product</th>
          <td>clang
          </td>
        </tr>

        <tr>
          <th>Version</th>
          <td>7.0
          </td>
        </tr>

        <tr>
          <th>Hardware</th>
          <td>PC
          </td>
        </tr>

        <tr>
          <th>OS</th>
          <td>Linux
          </td>
        </tr>

        <tr>
          <th>Status</th>
          <td>NEW
          </td>
        </tr>

        <tr>
          <th>Severity</th>
          <td>enhancement
          </td>
        </tr>

        <tr>
          <th>Priority</th>
          <td>P
          </td>
        </tr>

        <tr>
          <th>Component</th>
          <td>C++
          </td>
        </tr>

        <tr>
          <th>Assignee</th>
          <td>unassignedclangbugs@nondot.org
          </td>
        </tr>

        <tr>
          <th>Reporter</th>
          <td>nathanoliver60097@gmail.com
          </td>
        </tr>

        <tr>
          <th>CC</th>
          <td>blitzrakete@gmail.com, dgregor@apple.com, erik.pilkington@gmail.com, llvm-bugs@lists.llvm.org, richard-llvm@metafoo.co.uk
          </td>
        </tr></table>
      <p>
        <div>
        <pre>The compiler fails to optimize for_each, transform and a raw for loop into the
same code for small std::array's.  With the following code

#include <array>
#include <algorithm>


static void ForEach(benchmark::State& state) {
  std::array<bool, sizeof(short) * 8> a;
  std::fill(a.begin(), a.end(), true);

  for (auto _ : state) {
    std::for_each(a.begin(), a.end(), [](auto & arg) { arg = !arg; });
    benchmark::DoNotOptimize(a);
  }
}
BENCHMARK(ForEach);

static void Transform(benchmark::State& state) {
  std::array<bool, sizeof(short) * 8> a;
  std::fill(a.begin(), a.end(), true);

  for (auto _ : state) {
    std::transform(a.begin(), a.end(), a.begin(), [](auto arg) { return !arg;
});
    benchmark::DoNotOptimize(a);
  }
}
BENCHMARK(Transform);


static void RawLoop(benchmark::State& state) {
  std::array<bool, sizeof(short) * 8> a;
  std::fill(a.begin(), a.end(), true);

  for (auto _ : state) {
    for (int i = 0; i < a.size(); i++) {
      a[i] = !a[i];
    }
    benchmark::DoNotOptimize(a);
  }
}
BENCHMARK(RawLoop);

Running on quick-bench the raw loop is about 5 times faster
(<a href="http://quick-bench.com/F0C__3bEIG0maZh_vQddQNWQUUQ">http://quick-bench.com/F0C__3bEIG0maZh_vQddQNWQUUQ</a>).  If the array size is
increased then the compiler gets the same results for all 3 functions
(<a href="http://quick-bench.com/eC1fp41UYz7YK8WQOFfJokqfduw">http://quick-bench.com/eC1fp41UYz7YK8WQOFfJokqfduw</a>).  GCC on the other hand
will optimize the small array case to the same performance for all 3 functions
(<a href="http://quick-bench.com/UCmqDfqqWtlHOMscX01RTIHqVHU">http://quick-bench.com/UCmqDfqqWtlHOMscX01RTIHqVHU</a>)</pre>
        </div>
      </p>


      <hr>
      <span>You are receiving this mail because:</span>

      <ul>
          <li>You are on the CC list for the bug.</li>
      </ul>
    </body>
</html>