[llvm-bugs] [Bug 24529] New: We miscompile jumping between MS asm blocks with optimizations

Thu Aug 20 17:38:49 PDT 2015

https://llvm.org/bugs/show_bug.cgi?id=24529

            Bug ID: 24529
           Summary: We miscompile jumping between MS asm blocks with
                    optimizations
           Product: clang
           Version: trunk
          Hardware: PC
                OS: All
            Status: NEW
          Severity: normal
          Priority: P
         Component: Frontend
          Assignee: unassignedclangbugs at nondot.org
          Reporter: ehsan at mozilla.com
                CC: david.majnemer at gmail.com, hans at chromium.org,
                    jmuizelaar at mozilla.com, llvm-bugs at lists.llvm.org,
                    rnk at google.com
    Classification: Unclassified

Apologies for the crazy test case.  Pay attention to the comments, and ignore
the computations (they're just designed to use many registers).

$ cat test.cpp
void g(int);
void f(int x) {
    int k = x + x * (x-1);
    g(k/x);
    int l = (x+1) * x * (x-1);
    int l2 = (x+10) * x * (x-15);
    int l3 = (x+100) * x * (x-150);
    int m = (l2+3) * x * (x-3);
    // at this point, ebx has been written to.
    __asm {
        mov esi, k
    L2:
    inc eax
    }
    // this expression reads ebx.
    int foo = m * l2 / (x -4) * (m+ k/l* l3) * x;
    g(foo);
    if (m == 479594508 && k == 10000) {
        __asm {
            // clobber ebx.
            mov ebx, m
            inc k
            jmp L2
        }
    }
    g(foo);
}
$ cat driver.cpp
#include <stdio.h>
void g(int x) {
  printf("%d\n", x);
}
int main() {
  void f(int);
  f(100);
}
$ ./bin/clang -fasm-blocks -m32 test.cpp driver.cpp -o test.out && ./test.out
100
454085520
454085520
454085520
$ ./bin/clang -O2 -fasm-blocks -m32 test.cpp driver.cpp -o test.out &&
./test.out
100
454085520
748924496
748924496

The problem here is that the jmp between the asm blocks invalidates the clobber
registers that we compute for each individual asm block.  Basically, the
compiler thinks that the first asm block doesn't clobber ebx so it will try to
reuse ebx afterwards, which doesn't work the second time that leave the first
block (after having jmp'ed to it from the second block.)

The generated code makes the problem obvious.  I'm including it below with
comments inline:

$ ./bin/clang -O2 -fasm-blocks -m32 -c test.cpp && ./bin/llvm-objdump
-disassemble -x86-asm-syntax=intel test.o

test.o:    file format Mach-O 32-bit i386

Disassembly of section __TEXT,__text:
__Z1fi:
       0:    55     push    ebp
       1:    89 e5     mov    ebp, esp
       3:    53     push    ebx
       4:    57     push    edi
       5:    56     push    esi
       6:    83 ec 1c     sub    esp, 28
       9:    8b 7d 08     mov    edi, dword ptr [ebp + 8]
       c:    8d 77 ff     lea    esi, [edi - 1]
       f:    89 f8     mov    eax, edi
      11:    0f af c0     imul    eax, eax
      14:    89 45 f0     mov    dword ptr [ebp - 16], eax
      17:    99     cdq
      18:    f7 ff     idiv    edi
      1a:    89 04 24     mov    dword ptr [esp], eax
      1d:    e8 de ff ff ff     call    -34 <__Z1fi>
      22:    8d 47 01     lea    eax, [edi + 1]
      25:    0f af f7     imul    esi, edi
      28:    0f af f0     imul    esi, eax
      2b:    89 75 e8     mov    dword ptr [ebp - 24], esi
      2e:    8d 47 0a     lea    eax, [edi + 10]
      31:    0f af c7     imul    eax, edi
      34:    8d 4f f1     lea    ecx, [edi - 15]
      37:    0f af c8     imul    ecx, eax
      3a:    8d 41 03     lea    eax, [ecx + 3]
      3d:    8d 5f fd     lea    ebx, [edi - 3]
      40:    0f af df     imul    ebx, edi

Write to ebx:

      43:    0f af d8     imul    ebx, eax
      46:    89 5d ec     mov    dword ptr [ebp - 20], ebx
      49:    8b 75 f0     mov    esi, dword ptr [ebp - 16]

JMP target:

      4c:    40     inc    eax

Read from ebx (oops!):

      4d:    0f af cb     imul    ecx, ebx
      50:    8d 77 fc     lea    esi, [edi - 4]
      53:    89 c8     mov    eax, ecx
      55:    99     cdq
      56:    f7 fe     idiv    esi
      58:    89 c1     mov    ecx, eax
      5a:    8b 45 f0     mov    eax, dword ptr [ebp - 16]
      5d:    99     cdq
      5e:    f7 7d e8     idiv    dword ptr [ebp - 24]
      61:    89 c6     mov    esi, eax
      63:    8d 87 6a ff ff ff     lea    eax, [edi - 150]
      69:    0f af c7     imul    eax, edi
      6c:    0f af f0     imul    esi, eax
      6f:    8d 47 64     lea    eax, [edi + 100]
      72:    0f af f0     imul    esi, eax
      75:    01 de     add    esi, ebx
      77:    0f af f7     imul    esi, edi
      7a:    0f af f1     imul    esi, ecx
      7d:    89 34 24     mov    dword ptr [esp], esi
      80:    e8 7b ff ff ff     call    -133 <__Z1fi>
      85:    81 fb 0c 08 96 1c     cmp    ebx, 479594508
      8b:    75 11     jne    17 <__Z1fi+9E>
      8d:    81 7d f0 10 27 00 00     cmp    dword ptr [ebp - 16], 10000
      94:    75 08     jne    8 <__Z1fi+9E>

Clobber ebx in the second block:

      96:    8b 5d ec     mov    ebx, dword ptr [ebp - 20]
      99:    ff 45 f0     inc    dword ptr [ebp - 16]
      9c:    eb ae     jmp    -82 <__Z1fi+4C>
      9e:    89 34 24     mov    dword ptr [esp], esi
      a1:    e8 5a ff ff ff     call    -166 <__Z1fi>
      a6:    83 c4 1c     add    esp, 28
      a9:    5e     pop    esi
      aa:    5f     pop    edi
      ab:    5b     pop    ebx
      ac:    5d     pop    ebp
      ad:    c3     ret

MSVC deals with this problem by turning off all optimizations on the function
as soon as it encounters a cross asm block jump.  This can be observed by
comparing the output of MSVC with the jmp instruction commented out vs with it:

============================
With the jmp instruction:
============================

$ cl -O2 -c test.cpp && llvm-objdump -disassemble -x86-asm-syntax=intel
test.obj
Microsoft (R) C/C++ Optimizing Compiler Version 18.00.40629 for x86
Copyright (C) Microsoft Corporation.  All rights reserved.

test.cpp

test.obj:       file format COFF-i386

Disassembly of section .text$mn:
?f@@YAXH at Z:
       0:       55      push    ebp
       1:       8b ec   mov     ebp, esp
       3:       83 ec 18        sub     esp, 24
       6:       53      push    ebx
       7:       56      push    esi
       8:       8b 45 08        mov     eax, dword ptr [ebp + 8]
       b:       83 e8 01        sub     eax, 1
       e:       0f af 45 08     imul    eax, dword ptr [ebp + 8]
      12:       03 45 08        add     eax, dword ptr [ebp + 8]
      15:       89 45 fc        mov     dword ptr [ebp - 4], eax
      18:       8b 45 fc        mov     eax, dword ptr [ebp - 4]
      1b:       99      cdq
      1c:       f7 7d 08        idiv    dword ptr [ebp + 8]
      1f:       50      push    eax
      20:       e8 00 00 00 00  call    0 <?f@@YAXH at Z+25>
      25:       83 c4 04        add     esp, 4
      28:       8b 4d 08        mov     ecx, dword ptr [ebp + 8]
      2b:       83 c1 01        add     ecx, 1
      2e:       0f af 4d 08     imul    ecx, dword ptr [ebp + 8]
      32:       8b 55 08        mov     edx, dword ptr [ebp + 8]
      35:       83 ea 01        sub     edx, 1
      38:       0f af ca        imul    ecx, edx
      3b:       89 4d ec        mov     dword ptr [ebp - 20], ecx
      3e:       8b 45 08        mov     eax, dword ptr [ebp + 8]
      41:       83 c0 0a        add     eax, 10
      44:       0f af 45 08     imul    eax, dword ptr [ebp + 8]
      48:       8b 4d 08        mov     ecx, dword ptr [ebp + 8]
      4b:       83 e9 0f        sub     ecx, 15
      4e:       0f af c1        imul    eax, ecx
      51:       89 45 f4        mov     dword ptr [ebp - 12], eax
      54:       8b 55 08        mov     edx, dword ptr [ebp + 8]
      57:       83 c2 64        add     edx, 100
      5a:       0f af 55 08     imul    edx, dword ptr [ebp + 8]
      5e:       8b 45 08        mov     eax, dword ptr [ebp + 8]
      61:       2d 96 00 00 00  sub     eax, 150
      66:       0f af d0        imul    edx, eax
      69:       89 55 e8        mov     dword ptr [ebp - 24], edx
      6c:       8b 4d f4        mov     ecx, dword ptr [ebp - 12]
      6f:       83 c1 03        add     ecx, 3
      72:       0f af 4d 08     imul    ecx, dword ptr [ebp + 8]
      76:       8b 55 08        mov     edx, dword ptr [ebp + 8]
      79:       83 ea 03        sub     edx, 3
      7c:       0f af ca        imul    ecx, edx
      7f:       89 4d f8        mov     dword ptr [ebp - 8], ecx
      82:       8b 75 fc        mov     esi, dword ptr [ebp - 4]
      85:       40      inc     eax
      86:       8b 45 f8        mov     eax, dword ptr [ebp - 8]
      89:       0f af 45 f4     imul    eax, dword ptr [ebp - 12]
      8d:       8b 4d 08        mov     ecx, dword ptr [ebp + 8]
      90:       83 e9 04        sub     ecx, 4
      93:       99      cdq
      94:       f7 f9   idiv    ecx
      96:       8b c8   mov     ecx, eax
      98:       8b 45 fc        mov     eax, dword ptr [ebp - 4]
      9b:       99      cdq
      9c:       f7 7d ec        idiv    dword ptr [ebp - 20]
      9f:       0f af 45 e8     imul    eax, dword ptr [ebp - 24]
      a3:       03 45 f8        add     eax, dword ptr [ebp - 8]
      a6:       0f af c8        imul    ecx, eax
      a9:       0f af 4d 08     imul    ecx, dword ptr [ebp + 8]
      ad:       89 4d f0        mov     dword ptr [ebp - 16], ecx
      b0:       8b 55 f0        mov     edx, dword ptr [ebp - 16]
      b3:       52      push    edx
      b4:       e8 00 00 00 00  call    0 <?f@@YAXH at Z+B9>
      b9:       83 c4 04        add     esp, 4
      bc:       81 7d f8 0c 08 96 1c    cmp     dword ptr [ebp - 8], 479594508
      c3:       75 11   jne     17 <?f@@YAXH at Z+D6>
      c5:       81 7d fc 10 27 00 00    cmp     dword ptr [ebp - 4], 10000
      cc:       75 08   jne     8 <?f@@YAXH at Z+D6>
      ce:       8b 5d f8        mov     ebx, dword ptr [ebp - 8]
      d1:       ff 45 fc        inc     dword ptr [ebp - 4]
      d4:       eb af   jmp     -81 <?f@@YAXH at Z+85>
      d6:       8b 45 f0        mov     eax, dword ptr [ebp - 16]
      d9:       50      push    eax
      da:       e8 00 00 00 00  call    0 <?f@@YAXH at Z+DF>
      df:       83 c4 04        add     esp, 4
      e2:       5e      pop     esi
      e3:       5b      pop     ebx
      e4:       8b e5   mov     esp, ebp
      e6:       5d      pop     ebp
      e7:       c3      ret

============================
Without the jmp instruction:
============================

$ cl -O2 -c test.cpp && llvm-objdump -disassemble -x86-asm-syntax=intel
test.obj
Microsoft (R) C/C++ Optimizing Compiler Version 18.00.40629 for x86
Copyright (C) Microsoft Corporation.  All rights reserved.

test.cpp

test.obj:       file format COFF-i386

Disassembly of section .text$mn:
?f@@YAXH at Z:
       0:       51      push    ecx
       1:       53      push    ebx
       2:       55      push    ebp
       3:       8b 6c 24 10     mov     ebp, dword ptr [esp + 16]
       7:       8b c5   mov     eax, ebp
       9:       0f af c5        imul    eax, ebp
       c:       56      push    esi
       d:       57      push    edi
       e:       89 44 24 18     mov     dword ptr [esp + 24], eax
      12:       99      cdq
      13:       f7 fd   idiv    ebp
      15:       50      push    eax
      16:       e8 00 00 00 00  call    0 <?f@@YAXH at Z+1B>
      1b:       8d 45 0a        lea     eax, [ebp + 10]
      1e:       83 c4 04        add     esp, 4
      21:       8d 7d f1        lea     edi, [ebp - 15]
      24:       0f af f8        imul    edi, eax
      27:       8d 45 fd        lea     eax, [ebp - 3]
      2a:       0f af fd        imul    edi, ebp
      2d:       8d 5f 03        lea     ebx, [edi + 3]
      30:       0f af d8        imul    ebx, eax
      33:       0f af dd        imul    ebx, ebp
      36:       89 5c 24 10     mov     dword ptr [esp + 16], ebx
      3a:       8b 74 24 18     mov     esi, dword ptr [esp + 24]
      3e:       40      inc     eax
      3f:       8d 45 01        lea     eax, [ebp + 1]
      42:       8d 4d ff        lea     ecx, [ebp - 1]
      45:       0f af c8        imul    ecx, eax
      48:       8b 44 24 18     mov     eax, dword ptr [esp + 24]
      4c:       99      cdq
      4d:       0f af cd        imul    ecx, ebp
      50:       f7 f9   idiv    ecx
      52:       8d 8d 6a ff ff ff       lea     ecx, [ebp - 150]
      58:       8b f0   mov     esi, eax
      5a:       8b c3   mov     eax, ebx
      5c:       0f af f1        imul    esi, ecx
      5f:       8d 4d 64        lea     ecx, [ebp + 100]
      62:       0f af c7        imul    eax, edi
      65:       0f af f1        imul    esi, ecx
      68:       8d 4d fc        lea     ecx, [ebp - 4]
      6b:       99      cdq
      6c:       f7 f9   idiv    ecx
      6e:       0f af f5        imul    esi, ebp
      71:       03 f3   add     esi, ebx
      73:       0f af f0        imul    esi, eax
      76:       0f af f5        imul    esi, ebp
      79:       56      push    esi
      7a:       e8 00 00 00 00  call    0 <?f@@YAXH at Z+7F>
      7f:       83 c4 04        add     esp, 4
      82:       81 fb 0c 08 96 1c       cmp     ebx, 479594508
      88:       75 12   jne     18 <?f@@YAXH at Z+9C>
      8a:       81 7c 24 18 10 27 00 00         cmp     dword ptr [esp + 24],
10000
      92:       75 08   jne     8 <?f@@YAXH at Z+9C>
      94:       8b 5c 24 10     mov     ebx, dword ptr [esp + 16]
      98:       ff 44 24 18     inc     dword ptr [esp + 24]
      9c:       56      push    esi
      9d:       e8 00 00 00 00  call    0 <?f@@YAXH at Z+A2>
      a2:       83 c4 04        add     esp, 4
      a5:       5f      pop     edi
      a6:       5e      pop     esi
      a7:       5d      pop     ebp
      a8:       5b      pop     ebx
      a9:       59      pop     ecx
      aa:       c3      ret

-- 
You are receiving this mail because:
You are on the CC list for the bug.
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.llvm.org/pipermail/llvm-bugs/attachments/20150821/58527afd/attachment-0001.html>