[llvm-bugs] [Bug 34132] New: LTO optimization causes clang to produce incorrect code

via llvm-bugs llvm-bugs at lists.llvm.org
Tue Aug 8 19:39:24 PDT 2017


            Bug ID: 34132
           Summary: LTO optimization causes clang to produce incorrect
           Product: clang
           Version: unspecified
          Hardware: PC
                OS: Windows NT
            Status: NEW
          Severity: enhancement
          Priority: P
         Component: -New Bugs
          Assignee: unassignedclangbugs at nondot.org
          Reporter: konstantin.belochapka at sony.com
                CC: llvm-bugs at lists.llvm.org


#include <stdio.h>

struct {
  unsigned char a[4];
  unsigned char b[4];
} s;

static void init_arr(volatile void *data, unsigned char size) {
  unsigned char *bytes = (unsigned char *)data;
  for (unsigned char i = 0; i != size; ++i) {
    bytes[i] = i;

int main() {
  init_arr(&s, sizeof(s) );
  printf("a:%x %x %x %x\n", s.a[0], s.a[1], s.a[2], s.a[3]);
  printf("b:%x %x %x %x\n", s.b[0], s.b[1], s.b[2], s.b[3]);

clang -Oz -flto  source.c
clang -O0 -flto  source.c
clang -O1 -flto  source.c

        xor    %eax,%eax
        lea    0x7fcb(%rip),%rcx        # 80a8 <__preinit_array_end>
        jmp    e5 <__start__Zdynsym+0x5>
        mov    %al,(%rax,%rcx,1)
        inc    %rax
        cmp    $0x8,%rax
        jne    df <__dynstr_end+0x6>
        push   %rbp
        mov    %rsp,%rbp
        movzbl 0x7fb2(%rip),%esi        # 80a8 <__preinit_array_end>
        movzbl 0x7fac(%rip),%edx        # 80a9 <__preinit_array_end+0x1>
        movzbl 0x7fa6(%rip),%ecx        # 80aa <__preinit_array_end+0x2>
        movzbl 0x7f9f(%rip),%r8d        # 80ab <__preinit_array_end+0x3>

        lea    0xdd(%rip),%rdi        # 1f0 <.L.str>
        xor    %eax,%eax
        callq  1a0 </PLprintf>
        lea    0xde(%rip),%rdi        # 1ff <.L.str.1>
        xor    %esi,%esi
        xor    %edx,%edx
        xor    %ecx,%ecx
        xor    %r8d,%r8d
        xor    %eax,%eax
        callq  1a0 </PLprintf>
        xor    %eax,%eax
        pop    %rbp

With LTO optimization clang assumes that s.b[0] == s.b[1] == s.b[2] == s.b[3]
== 0 and generates all zero values for the second printf().

With optimization level -O2 or -O3 and -flto, clang produces correct code:

        push   %rbp
        mov    %rsp,%rbp
        lea    0xf5(%rip),%rdi        # 1e0 <.L.str>
        mov    $0x0,%esi
        mov    $0x1,%edx
        mov    $0x2,%ecx
        mov    $0x3,%r8d
        xor    %eax,%eax
        callq  190 </PLprintf>
        lea    0xe1(%rip),%rdi        # 1ef <.L.str.1>
        mov    $0x4,%esi
        mov    $0x5,%edx
        mov    $0x6,%ecx
        mov    $0x7,%r8d
        xor    %eax,%eax
        callq  190 </PLprintf>
        xor    %eax,%eax
        pop    %rbp

