[llvm-bugs] [Bug 40206] New: After r329339, loop unrolling with long doubles seems to be incorrect

via llvm-bugs llvm-bugs at lists.llvm.org
Wed Jan 2 11:15:24 PST 2019


https://bugs.llvm.org/show_bug.cgi?id=40206

            Bug ID: 40206
           Summary: After r329339, loop unrolling with long doubles seems
                    to be incorrect
           Product: new-bugs
           Version: 7.0
          Hardware: PC
                OS: All
            Status: NEW
          Severity: normal
          Priority: P
         Component: new bugs
          Assignee: unassignedbugs at nondot.org
          Reporter: dimitry at andric.com
                CC: htmldeveloper at gmail.com, llvm-bugs at lists.llvm.org

As reported in https://bugs.freebsd.org/234040, after the import of clang 7.0 a
few math library tests started failing.

Bisection over llvm trunk showed that these failures were introduced via
https://reviews.llvm.org/rL329339 ("[X86] Remove some InstRWs for plain store
instructions on Sandy Bridge. We were forcing the latency of these instructions
to 5 cycles, but every other scheduler model had them as 1 cycle. I'm sure I
didn't get everything, but this gets a big portion.")

Test case, minimized from the loop here:
https://github.com/freebsd/freebsd/blob/master/lib/msun/ld80/e_rem_pio2l.h#L131

// clang -O2 rempio-min.c -o rempio-min

long double __attribute__((noinline)) rem_pio2l_min(long double z)
{
  int i;
  double tx[2];

  for (i = 0; i < 2; ++i) {
    tx[i] = (double)((int)(z));
    z = (z - tx[i]) * 1.6777216e+07;
  }

  return z;
}

int main(void)
{
  const long double test1 = 0x1.b2f3ee96e7600326p+23L;
  const long double check1 = 0x1.93p+16;
  long double res;

  res = rem_pio2l_min(test1);

  return res == check1 ? 0 : 1;
}

Side-by-side diff of clang r329338 (left) and r329339 (right) assembly output,
hoping that bugzilla won't mess it up too badly:

rem_pio2l_min:                            rem_pio2l_min:
.cfi_startproc                            .cfi_startproc
pushq   %rbp                              pushq   %rbp
.cfi_def_cfa_offset 16                    .cfi_def_cfa_offset 16
.cfi_offset %rbp, -16                     .cfi_offset %rbp, -16
movq    %rsp, %rbp                        movq    %rsp, %rbp
.cfi_def_cfa_register %rbp                .cfi_def_cfa_register %rbp
fnstcw  -4(%rbp)                          fnstcw  -4(%rbp)
fldt    16(%rbp)                <
movzwl  -4(%rbp), %eax                    movzwl  -4(%rbp), %eax
movw    $3199, -4(%rbp)                   movw    $3199, -4(%rbp)
fldcw   -4(%rbp)                          fldcw   -4(%rbp)
                                >         fldt    16(%rbp)
movw    %ax, -4(%rbp)                     movw    %ax, -4(%rbp)
fistl   -8(%rbp)                          fistl   -8(%rbp)
fldcw   -4(%rbp)                          fldcw   -4(%rbp)
cvtsi2sdl       -8(%rbp), %xmm0           cvtsi2sdl       -8(%rbp), %xmm0
movsd   %xmm0, -32(%rbp)                  movsd   %xmm0, -32(%rbp)
fsubl   -32(%rbp)                         fsubl   -32(%rbp)
flds    .LCPI0_0(%rip)          <
fnstcw  -2(%rbp)                          fnstcw  -2(%rbp)
fmul    %st(0), %st(1)          |         flds    .LCPI0_0(%rip)
movzwl  -2(%rbp), %eax                    movzwl  -2(%rbp), %eax
movw    $3199, -2(%rbp)                   movw    $3199, -2(%rbp)
fldcw   -2(%rbp)                          fldcw   -2(%rbp)
                                >         fmul    %st(0), %st(1)
movw    %ax, -2(%rbp)                     movw    %ax, -2(%rbp)
fxch    %st(1)                            fxch    %st(1)
fistl   -12(%rbp)                         fistl   -12(%rbp)
fldcw   -2(%rbp)                          fldcw   -2(%rbp)
xorps   %xmm0, %xmm0                      xorps   %xmm0, %xmm0
cvtsi2sdl       -12(%rbp), %xmm0          cvtsi2sdl       -12(%rbp), %xmm0
movsd   %xmm0, -24(%rbp)                  movsd   %xmm0, -24(%rbp)
fsubl   -24(%rbp)                         fsubl   -24(%rbp)
fmulp   %st(1)                            fmulp   %st(1)
popq    %rbp                              popq    %rbp
retq                                      retq

-- 
You are receiving this mail because:
You are on the CC list for the bug.
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.llvm.org/pipermail/llvm-bugs/attachments/20190102/ce98e7fa/attachment-0001.html>


More information about the llvm-bugs mailing list