<html>
    <head>
      <base href="https://bugs.llvm.org/">
    </head>
    <body><table border="1" cellspacing="0" cellpadding="8">
        <tr>
          <th>Bug ID</th>
          <td><a class="bz_bug_link 
          bz_status_NEW "
   title="NEW - [miscompilation] set to 0 on __m128d ignored: garbage in high element on compare and movemask"
   href="https://bugs.llvm.org/show_bug.cgi?id=34549">34549</a>
          </td>
        </tr>

        <tr>
          <th>Summary</th>
          <td>[miscompilation] set to 0 on __m128d ignored: garbage in high element on compare and movemask
          </td>
        </tr>

        <tr>
          <th>Product</th>
          <td>new-bugs
          </td>
        </tr>

        <tr>
          <th>Version</th>
          <td>5.0
          </td>
        </tr>

        <tr>
          <th>Hardware</th>
          <td>PC
          </td>
        </tr>

        <tr>
          <th>OS</th>
          <td>Linux
          </td>
        </tr>

        <tr>
          <th>Status</th>
          <td>NEW
          </td>
        </tr>

        <tr>
          <th>Severity</th>
          <td>normal
          </td>
        </tr>

        <tr>
          <th>Priority</th>
          <td>P
          </td>
        </tr>

        <tr>
          <th>Component</th>
          <td>new bugs
          </td>
        </tr>

        <tr>
          <th>Assignee</th>
          <td>unassignedbugs@nondot.org
          </td>
        </tr>

        <tr>
          <th>Reporter</th>
          <td>kretz@kde.org
          </td>
        </tr>

        <tr>
          <th>CC</th>
          <td>llvm-bugs@lists.llvm.org
          </td>
        </tr></table>
      <p>
        <div>
        <pre>reduced testcase:

#include <x86intrin.h>
#include <iostream>

std::ostream &operator<<(std::ostream &s, __m128d v)
{
    return s << '{' << v[0] << ", " << v[1] << '}';
}

int main()
{
    for (double lo_ : {1., 1.}) {
        for (double hi_ : {1., 1.}) {
            for (std::size_t pos = 0; pos < 2; ++pos) {
                __m128d lo = _mm_set1_pd(lo_);
                __m128d hi = _mm_set1_pd(hi_);
                if (0 != _mm_movemask_pd(hi < lo)) {
                    std::cerr << hi << ", lo: " << lo;
                    if (3 != _mm_movemask_pd(hi >= hi)) {
                        std::cerr << hi << ", lo: " << lo;
                    }
                }
            }
        }
    }

    __m128d x = _mm_set1_pd(1.);
    for (std::size_t i = 0; i < 2; ++i) {
        asm("ror $64,%%rax" ::"m"(x));
        x[i] = 0;  // #1
    }
    asm("ror $64,%%rax" :"+m"(x));
    if (3 != _mm_movemask_pd(x == _mm_setzero_pd())) {
        std::cerr << "!!!FAILED!!!\n";
        return 1;
    }
    return 0;
}

Compile with `clang++-5.0 -std=c++14 -O2 -msse2`. clang 4.0 does not fail.

The assignment at line #1 is skipped for i = 1. However, minimal changes to
unrelated code lead to the use of a `movsd` from memory, thus zeroing the upper
64 bits, as requested.

The above code is a reduced testcase from a unit test in
<a href="https://github.com/VcDevel/Vc">https://github.com/VcDevel/Vc</a>.</pre>
        </div>
      </p>


      <hr>
      <span>You are receiving this mail because:</span>

      <ul>
          <li>You are on the CC list for the bug.</li>
      </ul>
    </body>
</html>