[llvm-dev] Incorrect placement of an instruction after PostRAScheduler pass

Bharathi Seshadri via llvm-dev llvm-dev at lists.llvm.org
Mon Dec 3 22:22:52 PST 2018


Hi,

I’m facing a crash issue (--target=arm-linux-gnueabi
-march=armv8-a+crc -mfloat-abi=hard) and debugging the problem, I
found that an intended branch was not taken due to bad code generation
after the Post RA Scheduler pass. A CMPri instruction after an
INLINEASM block (which inturn contains a cmp, bne instruction) is
being moved before the INLINEASM block incorrectly resulting in two
consecutive bne instructions.

I do not have a small convenient test case and there are several
inline functions involved, but I hope to explain the problem with
relevant code snippets. Any suggestions on what/where to look for the
problem within llvm would be very useful.

In the generated code, there are two consecutive bne instructions
which appears to be the problem; the cmp instruction at offset 198 is
actually associated with the bne at offset 1bc. Instructions from 1ac
to 1b8 are coming from an inline asm. After the “Post RA top-down list
latency scheduler” pass, the cmp instruction has been moved before the
inline asm causing the issue.

198:   e35c0001        cmp     ip, #1  <<<<<<<<<<<<<<<<<<
19c:   e023700e        eor     r7, r3, lr
1a0:   e0022003        and     r2, r2, r3
1a4:   e0087007        and     r7, r8, r7
1a8:   e1820007        orr     r0, r2, r7
1ac:   e1b42e9f        ldaexd  r2, r3, [r4]
1b0:   e1a46f90        strexd  r6, r0, [r4]
1b4:   e3560000        cmp     r6, #0
1b8:   1afffffb        bne     1ac <xxxxx+0x1ac>
1bc:   1a000002        bne     1cc <xxxx+0x1cc> <<<<<<<<<<<<<<<<<<<
1c0:   e1a00005        mov     r0, r5
1c4:   e3a01001        mov     r1, #1

This is the relevant C code:

       if (__builtin_expect((lock_flag == LOCK),1)) { // First
comparison using lock_flag, which is a function argument and an enum
with values 0, 1, 2.
           lock=foo_lock(vaddr);
        }
        old = func(flags, vaddr, data);
        if (__builtin_expect((lock_flag == LOCK),1)) { // Second
comparison using lock_flag
            foo_unlock(lock);
        }

The inline asm comes from the below function that is called within func().
static inline void write64 (volatile void *p, uint64_t val)
{
    uint64_t tmp=0,prev=0;
    asm volatile(
        "3: ldaexd %[prev], %H[prev], [%[p]];"
        "   strexd %[tmp], %[val], %H[val], [%[p]];"
        "   cmp %[tmp],#0;"
        "   bne 3b;"
        : [prev] "=&r" (prev), [tmp] "=&r" (tmp)
        : [p] "r" (p), [val] "r" (val) : "memory");
}

------------------------
# *** IR Dump Before Post RA top-down list latency scheduler ***:

BB#6: derived from LLVM BB %if.else
    Live Ins: %LR %R2 %R3 %R4 %R7 %R12
    Predecessors according to CFG: BB#0
        CMPri %R12, 1, pred:14, pred:%noreg, %CPSR<imp-def>
<<<<<<<<<<< First comparison using lock_flag ; R12 holds lock_flag
        %R5<def> = IMPLICIT_DEF
        Bcc <BB#21>, pred:1, pred:%CPSR<kill> <<<<<<<<<<<
    Successors according to CFG: BB#7(0x40000000 / 0x80000000 =
50.00%) BB#21(0x40000000 / 0x80000000 = 50.00%)
……
BB#21: derived from LLVM BB %if.end
    Live Ins: %LR %R2 %R3 %R4 %R5 %R7 %R12
    Predecessors according to CFG: BB#6 BB#20 BB#9 BB#14
        INLINEASM <es:ldaexd $0, ${0:H}, [$1];> [sideeffect] [mayload]
[maystore] [attdialect], $0:[regdef-ec:GPRPair],
%R8_R9<earlyclobber,def>, $1:[reguse:GPR], %R4, <!3>
        %R0<def> = ANDri %R7, 1, pred:14, pred:%noreg, opt:%CPSR<def>
        %R0<def> = MOVr %LR, pred:1, pred:%CPSR<kill>, opt:%noreg
        %R1<def> = EORrr %R0, %LR, pred:14, pred:%noreg, opt:%noreg
        %R1<def> = ANDrr %R9, %R1<kill>, pred:14, pred:%noreg, opt:%noreg
        %R2<def> = ADDrr %R8, %R2<kill>, pred:14, pred:%noreg, opt:%CPSR<def>
        %R3<def> = ADCrr %R9, %R3<kill>, pred:14, pred:%noreg,
opt:%noreg, %CPSR<imp-use,kill>
        %R0<def> = ANDrr %R3<kill>, %R0<kill>, pred:14, pred:%noreg, opt:%noreg
        %R1<def> = ORRrr %R0<kill>, %R1<kill>, pred:14, pred:%noreg,
opt:%noreg, %R0_R1<imp-def>
        %R3<def> = ANDri %R7<kill>, 2, pred:14, pred:%noreg, opt:%CPSR<def>
        %R3<def> = MVNi 0, pred:1, pred:%CPSR<kill>, opt:%noreg
        %R7<def> = EORrr %R3, %LR<kill>, pred:14, pred:%noreg, opt:%noreg
        %R7<def> = ANDrr %R8, %R7<kill>, pred:14, pred:%noreg, opt:%noreg
        %R2<def> = ANDrr %R2<kill>, %R3<kill>, pred:14, pred:%noreg, opt:%noreg
        %R0<def> = ORRrr %R2<kill>, %R7<kill>, pred:14, pred:%noreg,
opt:%noreg, %R0_R1<imp-use,kill>, %R0_R1<imp-def>
<<<<<<<<<<<<<<<<<<<<<
        INLINEASM <es:3: ldaexd $0, ${0:H}, [$2];   strexd $1, $3,
${3:H}, [$2];   cmp $1,#0;   bne 3b;> [sideeffect] [mayload]
[maystore] [attdialect], $0:[regdef-ec:GPRPair],
%R2_R3<earlyclobber,def,dead>, $1:[regdef-ec:GPRPair],
%R6_R7<earlyclobber,def,dead>, $2:[reguse:GPR], %R4<kill>,
$3:[reguse:GPRPair], %R0_R1<kill>, <!11>

        CMPri %R12<kill>, 1, pred:14, pred:%noreg, %CPSR<imp-def> //
Second comparison with lock_flag; R12 holds lock_flag
        Bcc <BB#23>, pred:1, pred:%CPSR<kill>
<<<<<<<<<<<<<<<<<<<<<<<
    Successors according to CFG: BB#22(0x40000000 / 0x80000000 =
50.00%) BB#23(0x40000000 / 0x80000000 = 50.00%)
-------------------------------
# *** IR Dump After Post RA top-down list latency scheduler ***:
BB#21: derived from LLVM BB %if.end
    Live Ins: %LR %R2 %R3 %R4 %R5 %R7 %R12
    Predecessors according to CFG: BB#6 BB#20 BB#9 BB#14
        INLINEASM <es:ldaexd $0, ${0:H}, [$1];> [sideeffect] [mayload]
[maystore] [attdialect], $0:[regdef-ec:GPRPair],
%R8_R9<earlyclobber,def>, $1:[reguse:GPR], %R4, <!3>
        %R0<def> = ANDri %R7, 1, pred:14, pred:%noreg, opt:%CPSR<def>
        %R0<def> = MOVr %LR, pred:1, pred:%CPSR<kill>, opt:%noreg
        %R1<def> = EORrr %R0, %LR, pred:14, pred:%noreg, opt:%noreg
        %R2<def> = ADDrr %R8, %R2<kill>, pred:14, pred:%noreg, opt:%CPSR<def>
        %R1<def> = ANDrr %R9, %R1<kill>, pred:14, pred:%noreg, opt:%noreg
        %R3<def> = ADCrr %R9, %R3<kill>, pred:14, pred:%noreg,
opt:%noreg, %CPSR<imp-use,kill>
        %R0<def> = ANDrr %R3<kill>, %R0<kill>, pred:14, pred:%noreg, opt:%noreg
        %R3<def> = ANDri %R7<kill>, 2, pred:14, pred:%noreg, opt:%CPSR<def>
        %R3<def> = MVNi 0, pred:1, pred:%CPSR<kill>, opt:%noreg
        %R1<def> = ORRrr %R0<kill>, %R1<kill>, pred:14, pred:%noreg,
opt:%noreg, %R0_R1<imp-def>
        CMPri %R12<kill>, 1, pred:14, pred:%noreg, %CPSR<imp-def>
<<<<<<<<<<<<<<<<<
        %R7<def> = EORrr %R3, %LR<kill>, pred:14, pred:%noreg, opt:%noreg
        %R2<def> = ANDrr %R2<kill>, %R3<kill>, pred:14, pred:%noreg, opt:%noreg
        %R7<def> = ANDrr %R8, %R7<kill>, pred:14, pred:%noreg, opt:%noreg
        %R0<def> = ORRrr %R2<kill>, %R7<kill>, pred:14, pred:%noreg,
opt:%noreg, %R0_R1<imp-use,kill>, %R0_R1<imp-def>
        INLINEASM <es:3: ldaexd $0, ${0:H}, [$2];   strexd $1, $3,
${3:H}, [$2];   cmp $1,#0;   bne 3b;> [sideeffect] [mayload]
[maystore] [attdialect], $0:[regdef-ec:GPRPair],
%R2_R3<earlyclobber,def,dead>, $1:[regdef-ec:GPRPair],
%R6_R7<earlyclobber,def,dead>, $2:[reguse:GPR], %R4<kill>,
$3:[reguse:GPRPair], %R0_R1<kill>, <!11>
        Bcc <BB#23>, pred:1, pred:%CPSR<kill>    <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
    Successors according to CFG: BB#22(0x40000000 / 0x80000000 =
50.00%) BB#23(0x40000000 / 0x80000000 = 50.00%)
--------------
After the Post RA Scheduling pass, note that CMPri %R12 has been moved
up the INLINE ASM. Since the inline asm has a {cmp, bne} pair, it is
incorrect to move the CMPri before the inline assembly block.
How do I prevent CMPri from being overridden by the cmp within the
inline asm block. Could it be that some dependency is incorrectly
specified ?

Thanks,
Bharathi


More information about the llvm-dev mailing list