[LLVMbugs] [Bug 19326] New: PowerPC: invalid code generation at -O2 due to bad ordering of instructions

bugzilla-daemon at llvm.org bugzilla-daemon at llvm.org
Thu Apr 3 08:42:36 PDT 2014


http://llvm.org/bugs/show_bug.cgi?id=19326

            Bug ID: 19326
           Summary: PowerPC: invalid code generation at -O2 due to bad
                    ordering of instructions
           Product: clang
           Version: 3.4
          Hardware: Other
                OS: Linux
            Status: NEW
          Severity: release blocker
          Priority: P
         Component: LLVM Codegen
          Assignee: unassignedclangbugs at nondot.org
          Reporter: andyg1001 at hotmail.co.uk
                CC: llvmbugs at cs.uiuc.edu
    Classification: Unclassified

The problem I am observing is in the following snippet, where with optimisation
level 2 switch on, the code inside the if block executes even if maxLength is
0:

GuiTextField::GuiTextField(int maxLength)
    : base(), valid(true) {
  asm volatile(".marker1":::"memory");
  if (maxLength > 0x1234) // testing; was > 0)
    {
    asm volatile(".marker2":::"memory");
    printf("hmm %i %i\n", maxLength, maxLength > 0);
    this->SetMaxLength(maxLength);
    }
  asm volatile(".marker3":::"memory");
}

Here is example output from code compiled at -O2:

hmm 0 1


I am afraid that I have been unable to reduce a test-case out of this code
(having spent maybe 6 hours on it today!), but hopefully the outputs below can
help highlight the problem.

Here is the assembly between marker1 and marker3 when compiling with -O0 for
PowerPC (targetting 603e):

    #APP
    .marker1
    #NO_APP
    lwz 3, 68(31)
    cmpwi 0, 3, 4661
    blt 0, .LBB0_9
    b .LBB0_2
.LBB0_2:
    #APP
    .marker2
    #NO_APP
    lwz 3, 68(31)
    lis 4, .L.str at ha
.Ltmp3:
    li 5, 0
    li 6, 1
    cmpwi 0, 3, 0
    la 4, .L.str at l(4)
    stw 3, 32(31)
    stw 6, 28(31)
    stw 5, 24(31)
    stw 4, 20(31)
    bgt 0, .LBB0_4
    lwz 3, 24(31)
    stw 3, 28(31)
.LBB0_4:
    lwz 3, 28(31)
    lwz 4, 20(31)
    stw 3, 16(31)
    mr 3, 4
    lwz 4, 32(31)
    lwz 5, 16(31)
    crxor 6, 6, 6
    bl printf
.Ltmp4:
    stw 3, 12(31)
    b .LBB0_5
.LBB0_5:
    lwz 4, 68(31)
.Ltmp5:
    lwz 3, 48(31)
    bl _ZN17GuiEntryFieldBase12SetMaxLengthEi
.Ltmp6:
    b .LBB0_6
.LBB0_6:
    b .LBB0_9
.LBB0_7:
.Ltmp2:
    stw 3, 64(31)
    stw 4, 60(31)
    b .LBB0_10
.LBB0_8:
.Ltmp7:
    stw 3, 64(31)
    stw 4, 60(31)
    lwz 3, 44(31)
    bl _ZN7QStringD2Ev
    b .LBB0_10
.LBB0_9:
    #APP
    .marker3
    #NO_APP


And here is the same but at -O1 (note that the cmpwi has moved relative to
marker1 even though the marker1 declaration should prevent this, but the code
still runs correctly in this case):

    cmpwi 0, 29, 4661
    stb 3, 56(30)
    #APP
    .marker1
    #NO_APP
    blt 0, .LBB0_2
    lis 3, .L.str at ha
    mr 4, 29
    li 5, 1
    crxor 6, 6, 6
    #APP
    .marker2
    #NO_APP
    la 3, .L.str at l(3)
    bl printf
.Ltmp0:
    mr 3, 30
    mr 4, 29
    bl _ZN17GuiEntryFieldBase12SetMaxLengthEi
.Ltmp1:
.LBB0_2:
    lwz 30, 24(31)
    lwz 29, 20(31)
    lwz 28, 16(31)
    #APP
    .marker3
    #NO_APP


And then at -O2 (by which point the cmpwi is miles away from its corresponding
blt and the code inside the if block executes even though the condition is not
met -- I expect corrupted by the lwarx/stwcx that is part of Qt's atomics
operations):

    cmpwi 0, 29, 4661
    la 3, _ZTV17GuiTextInputField at l(3)
    la 12, _ZN7QString11shared_nullE at l(5)
    addi 4, 3, 8
    addi 3, 3, 368
    stw 4, 0(30)
    stw 3, 8(30)
    stw 12, 52(30)
    li 3, 1
    #APP
    lwarx  5,0, 12
addi   6, 5, 1
stwcx. 6,0, 12
bne-   $-12

    #NO_APP
    stb 3, 56(30)
    #APP
    .marker1
    #NO_APP
    blt 0, .LBB0_4
    lis 3, .L.str at ha
    mr 4, 29
    li 5, 1
    crxor 6, 6, 6
    #APP
    .marker2
    #NO_APP
    la 3, .L.str at l(3)
    bl printf
    lwz 3, 40(30)
    lwz 4, 0(3)
    lwz 4, 12(4)
.Ltmp0:
    mtctr 4
    bctrl
    mr 28, 3
.Ltmp1:
    lwz 5, 28(28)
    lis 3, .L.str1 at ha
    mr 4, 28
    mr 6, 29
    crxor 6, 6, 6
    la 3, .L.str1 at l(3)
    bl printf
    stw 29, 28(28)
    lwz 3, 40(30)
    lwz 4, 0(3)
    lwz 4, 12(4)
.Ltmp2:
    mtctr 4
    bctrl
    mr 4, 3
.Ltmp3:
    lwz 5, 28(4)
    lis 3, .L.str2 at ha
    crxor 6, 6, 6
    la 3, .L.str2 at l(3)
    bl printf
.LBB0_4:
    lwz 30, 24(31)
    lwz 29, 20(31)
    lwz 28, 16(31)
    #APP
    .marker3
    #NO_APP


Compare the code from gcc 4.6 for ppc603e at -O2:

#APP
    .marker1
#NO_APP
    cmpwi 7,30,4660
    bgt- 7,.L7
#APP
    .marker3
#NO_APP
    lwz 0,36(1)
    lwz 29,20(1)
    mtlr 0
    lwz 30,24(1)
    lwz 31,28(1)
    addi 1,1,32
    .cfi_remember_state
    .cfi_def_cfa_offset 0
    .cfi_restore 31
    .cfi_restore 30
    .cfi_restore 29
    blr
.L7:
    .cfi_restore_state
#APP
    .marker2
#NO_APP
    lis 4,.LC0 at ha
    li 3,1
    la 4,.LC0 at l(4)
    mr 5,30
    li 6,1
.LEHB1:
    crxor 6,6,6
    bl __printf_chk
    ...


And here is the same code for x86_64 compiled by clang (note that the code
works correctly here):

    #APP
    .marker1
    #NO_APP
    cmpl    $4661, %ebp             # imm = 0x1235
    jl    .LBB0_4
# BB#1:                                 # %invoke.cont5
    #APP
    .marker2
    #NO_APP
    movl    $.L.str, %edi
    movl    $1, %edx
    xorl    %eax, %eax
    movl    %ebp, %esi
    callq    printf
    movq    80(%r14), %rdi
    movq    (%rdi), %rax
    movq    24(%rax), %rax
.Ltmp0:
    callq    *%rax
    movq    %rax, %rbx
.Ltmp1:
# BB#2:                                 # %call.i.i.noexc
    movl    52(%rbx), %edx
    movl    $.L.str1, %edi
    xorl    %eax, %eax
    movq    %rbx, %rsi
    movl    %ebp, %ecx
    callq    printf
    movl    %ebp, 52(%rbx)
    movq    80(%r14), %rdi
    movq    (%rdi), %rax
    movq    24(%rax), %rax
.Ltmp2:
    callq    *%rax
    movq    %rax, %rcx
.Ltmp3:
# BB#3:                                 #
%_ZN17GuiEntryFieldBase12SetMaxLengthEi.exit
    movl    52(%rcx), %edx
    movl    $.L.str2, %edi
    xorl    %eax, %eax
    movq    %rcx, %rsi
    callq    printf
.LBB0_4:                                # %if.end
    #APP
    .marker3
    #NO_APP


In case it is useful, here is the code for the Qt atomic function called in
this example:

inline bool QBasicAtomicInt::ref()
{
    register int originalValue;
    register int newValue;
    asm volatile("lwarx  %[originalValue]," _Q_VALUE "\n"
                 "addi   %[newValue], %[originalValue], %[one]\n"
                 "stwcx. %[newValue]," _Q_VALUE "\n"
                 "bne-   $-12\n"
                 : [originalValue] "=&b" (originalValue),
                   [newValue] "=&r" (newValue),
                   _Q_VALUE_MEMORY_OPERAND
                 : _Q_VALUE_REGISTER_OPERAND
                   [one] "i" (1)
                 : "cc", "memory");
    return newValue != 0;
}


If I were to hazard a guess, I would say that the PowerPC back-end is not
taking due care of the asm constraints.

-- 
You are receiving this mail because:
You are on the CC list for the bug.
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.llvm.org/pipermail/llvm-bugs/attachments/20140403/331e7f52/attachment.html>


More information about the llvm-bugs mailing list