<html>
    <head>
      <base href="https://bugs.llvm.org/">
    </head>
    <body><table border="1" cellspacing="0" cellpadding="8">
        <tr>
          <th>Bug ID</th>
          <td><a class="bz_bug_link 
          bz_status_NEW "
   title="NEW - After r329339, loop unrolling with long doubles seems to be incorrect"
   href="https://bugs.llvm.org/show_bug.cgi?id=40206">40206</a>
          </td>
        </tr>

        <tr>
          <th>Summary</th>
          <td>After r329339, loop unrolling with long doubles seems to be incorrect
          </td>
        </tr>

        <tr>
          <th>Product</th>
          <td>new-bugs
          </td>
        </tr>

        <tr>
          <th>Version</th>
          <td>7.0
          </td>
        </tr>

        <tr>
          <th>Hardware</th>
          <td>PC
          </td>
        </tr>

        <tr>
          <th>OS</th>
          <td>All
          </td>
        </tr>

        <tr>
          <th>Status</th>
          <td>NEW
          </td>
        </tr>

        <tr>
          <th>Severity</th>
          <td>normal
          </td>
        </tr>

        <tr>
          <th>Priority</th>
          <td>P
          </td>
        </tr>

        <tr>
          <th>Component</th>
          <td>new bugs
          </td>
        </tr>

        <tr>
          <th>Assignee</th>
          <td>unassignedbugs@nondot.org
          </td>
        </tr>

        <tr>
          <th>Reporter</th>
          <td>dimitry@andric.com
          </td>
        </tr>

        <tr>
          <th>CC</th>
          <td>htmldeveloper@gmail.com, llvm-bugs@lists.llvm.org
          </td>
        </tr></table>
      <p>
        <div>
        <pre>As reported in <a href="https://bugs.freebsd.org/234040">https://bugs.freebsd.org/234040</a>, after the import of clang 7.0 a
few math library tests started failing.

Bisection over llvm trunk showed that these failures were introduced via
<a href="https://reviews.llvm.org/rL329339">https://reviews.llvm.org/rL329339</a> ("[X86] Remove some InstRWs for plain store
instructions on Sandy Bridge. We were forcing the latency of these instructions
to 5 cycles, but every other scheduler model had them as 1 cycle. I'm sure I
didn't get everything, but this gets a big portion.")

Test case, minimized from the loop here:
<a href="https://github.com/freebsd/freebsd/blob/master/lib/msun/ld80/e_rem_pio2l.h#L131">https://github.com/freebsd/freebsd/blob/master/lib/msun/ld80/e_rem_pio2l.h#L131</a>

// clang -O2 rempio-min.c -o rempio-min

long double __attribute__((noinline)) rem_pio2l_min(long double z)
{
  int i;
  double tx[2];

  for (i = 0; i < 2; ++i) {
    tx[i] = (double)((int)(z));
    z = (z - tx[i]) * 1.6777216e+07;
  }

  return z;
}

int main(void)
{
  const long double test1 = 0x1.b2f3ee96e7600326p+23L;
  const long double check1 = 0x1.93p+16;
  long double res;

  res = rem_pio2l_min(test1);

  return res == check1 ? 0 : 1;
}

Side-by-side diff of clang r329338 (left) and r329339 (right) assembly output,
hoping that bugzilla won't mess it up too badly:

rem_pio2l_min:                            rem_pio2l_min:
.cfi_startproc                            .cfi_startproc
pushq   %rbp                              pushq   %rbp
.cfi_def_cfa_offset 16                    .cfi_def_cfa_offset 16
.cfi_offset %rbp, -16                     .cfi_offset %rbp, -16
movq    %rsp, %rbp                        movq    %rsp, %rbp
.cfi_def_cfa_register %rbp                .cfi_def_cfa_register %rbp
fnstcw  -4(%rbp)                          fnstcw  -4(%rbp)
fldt    16(%rbp)                <
movzwl  -4(%rbp), %eax                    movzwl  -4(%rbp), %eax
movw    $3199, -4(%rbp)                   movw    $3199, -4(%rbp)
fldcw   -4(%rbp)                          fldcw   -4(%rbp)
                                >         fldt    16(%rbp)
movw    %ax, -4(%rbp)                     movw    %ax, -4(%rbp)
fistl   -8(%rbp)                          fistl   -8(%rbp)
fldcw   -4(%rbp)                          fldcw   -4(%rbp)
cvtsi2sdl       -8(%rbp), %xmm0           cvtsi2sdl       -8(%rbp), %xmm0
movsd   %xmm0, -32(%rbp)                  movsd   %xmm0, -32(%rbp)
fsubl   -32(%rbp)                         fsubl   -32(%rbp)
flds    .LCPI0_0(%rip)          <
fnstcw  -2(%rbp)                          fnstcw  -2(%rbp)
fmul    %st(0), %st(1)          |         flds    .LCPI0_0(%rip)
movzwl  -2(%rbp), %eax                    movzwl  -2(%rbp), %eax
movw    $3199, -2(%rbp)                   movw    $3199, -2(%rbp)
fldcw   -2(%rbp)                          fldcw   -2(%rbp)
                                >         fmul    %st(0), %st(1)
movw    %ax, -2(%rbp)                     movw    %ax, -2(%rbp)
fxch    %st(1)                            fxch    %st(1)
fistl   -12(%rbp)                         fistl   -12(%rbp)
fldcw   -2(%rbp)                          fldcw   -2(%rbp)
xorps   %xmm0, %xmm0                      xorps   %xmm0, %xmm0
cvtsi2sdl       -12(%rbp), %xmm0          cvtsi2sdl       -12(%rbp), %xmm0
movsd   %xmm0, -24(%rbp)                  movsd   %xmm0, -24(%rbp)
fsubl   -24(%rbp)                         fsubl   -24(%rbp)
fmulp   %st(1)                            fmulp   %st(1)
popq    %rbp                              popq    %rbp
retq                                      retq</pre>
        </div>
      </p>


      <hr>
      <span>You are receiving this mail because:</span>

      <ul>
          <li>You are on the CC list for the bug.</li>
      </ul>
    </body>
</html>