[llvm-bugs] [Bug 42638] New: [ARM][Codegen] va_args lowering incorrect
via llvm-bugs
llvm-bugs at lists.llvm.org
Tue Jul 16 09:13:54 PDT 2019
https://bugs.llvm.org/show_bug.cgi?id=42638
Bug ID: 42638
Summary: [ARM][Codegen] va_args lowering incorrect
Product: new-bugs
Version: trunk
Hardware: All
OS: Linux
Status: NEW
Severity: normal
Priority: P
Component: new bugs
Assignee: unassignedbugs at nondot.org
Reporter: diogo.sampaio at arm.com
CC: diogo.sampaio at arm.com, htmldeveloper at gmail.com,
llvm-bugs at lists.llvm.org
For this C code llvm generates incorrect and inefficient ARM code:
===
#include <stdarg.h>
struct __attribute((packed)) S {
double M0;
};
double foo(int P0, ...) {
{
__attribute((aligned(8))) char V1[8];
__asm volatile("" : : "r"(&V1[0]));
}
va_list vl;
va_start(vl, P0);
struct S v = va_arg(vl, struct S);
return v.M0;
}
=======
Compiled with
clang --target=arm-arm-none-eabi -march=armv8-r -c mtest.c -S -O2
-mfloat-abi=softfp -mfpu=fpv5-sp-d16 -o -
will generate this assembly output:
foo:
.fnstart
.pad #12
sub sp, sp, #12
.pad #20
sub sp, sp, #20
str r1, [sp, #20]
add r1, sp, #20
add r0, sp, #8
str r2, [sp, #24]
str r3, [sp, #28]
@APP
@NO_APP
str r1, [sp, #8]
ldr r0, [sp, #20]
ldr r2, [sp, #24]
str r0, [sp]
ldm sp, {r0, r1} @ ----- Error here: Dependency break <<<
str r2, [sp, #4] @ ----- this should come before the ldm <<<
add sp, sp, #20
add sp, sp, #12
bx lr
=====
The incorrect dependence break occurs during machine-scheduler.
MIR prior to machine-scheduler:
=====
body: |
bb.0.entry:
liveins: $r1, $r2, $r3
%3:gpr = COPY $r3
%2:gpr = COPY $r2
%1:gpr = COPY $r1
STRi12 %1, %fixed-stack.0, 0, 14, $noreg :: (store 4 into %fixed-stack.0)
STRi12 %2, %fixed-stack.0, 4, 14, $noreg :: (store 4 into %fixed-stack.0 +
4)
STRi12 %3, %fixed-stack.0, 8, 14, $noreg :: (store 4 + 8)
%4:gpr = ADDri %stack.0.V1, 0, 14, $noreg, $noreg
INLINEASM &"", 1, 327689, %4
%5:gpr = ADDri %fixed-stack.0, 0, 14, $noreg, $noreg
STRi12 %5, %stack.1.vl, 0, 14, $noreg :: (store 4 into %ir.1)
%6:gpr = LDRi12 %fixed-stack.0, 0, 14, $noreg :: (load 4 from
%ir.argp.cur3)
%7:gpr = LDRi12 %fixed-stack.0, 4, 14, $noreg :: (load 4 from %ir.argp.cur3
+ 4)
STRi12 %7, %stack.2, 4, 14, $noreg :: (store 4 into %stack.2 + 4)
%8:gpr = LDRi12 %stack.2, 4, 14, $noreg :: (load 4 from %stack.2)
$r0 = COPY %6
$r1 = COPY %8
BX_RET 14, $noreg, implicit killed $r0, implicit $r1
=====
And after:
body: |
bb.0.entry:
liveins: $r1, $r2, $r3
%1:gpr = COPY $r1
%2:gpr = COPY $r2
%3:gpr = COPY $r3
STRi12 %1, %fixed-stack.0, 0, 14, $noreg :: (store 4 into %fixed-stack.0)
%4:gpr = ADDri %stack.0.V1, 0, 14, $noreg, $noreg
%5:gpr = ADDri %fixed-stack.0, 0, 14, $noreg, $noreg
STRi12 %2, %fixed-stack.0, 4, 14, $noreg :: (store 4 into %fixed-stack.0 +
4)
STRi12 %3, %fixed-stack.0, 8, 14, $noreg :: (store 4 + 8)
INLINEASM &"", 1, 327689, %4
STRi12 %5, %stack.1.vl, 0, 14, $noreg :: (store 4 into %ir.1)
%6:gpr = LDRi12 %fixed-stack.0, 0, 14, $noreg :: (load 4 from
%ir.argp.cur3)
%8:gpr = LDRi12 %stack.2, 4, 14, $noreg :: (load 4 from %stack.2)
%7:gpr = LDRi12 %fixed-stack.0, 4, 14, $noreg :: (load 4 from %ir.argp.cur3
+ 4)
$r0 = COPY %6
$r1 = COPY %8
STRi12 %7, %stack.2, 4, 14, $noreg :: (store 4 into %stack.2 + 4)
BX_RET 14, $noreg, implicit killed $r0, implicit $r1
=====
However, the error seems to appear before this pass. In the instruction
%8:gpr = LDRi12 %stack.2, 4, 14, $noreg :: (load 4 from %stack.2)
This instruction misses the offset of + 4, so it does not detect the dependency
break by moving the store to after it.
Second, the instruction
STRi12 %3, %fixed-stack.0, 8, 14, $noreg :: (store 4 + 8)
is also incorrect, missing the pointer to where it is storing to
(store 4 into %fixed-stack.0 + 8)
So the invalid MIR generate is actually generate during the generation of the
MIR:
*** IR Dump After Module Verifier ***
define double @foo(i32 %P0, ...) #0 {
entry:
%V1 = alloca [8 x i8], align 8
%vl = alloca %struct.__va_list, align 4
%0 = bitcast [8 x i8]* %V1 to i8*
call void asm sideeffect "", "r"(i8* nonnull %0)
%1 = bitcast %struct.__va_list* %vl to i8*
call void @llvm.va_start(i8* nonnull %1)
%2 = bitcast %struct.__va_list* %vl to double**
%argp.cur3 = load double*, double** %2, align 4
%v.sroa.0.0.copyload = load double, double* %argp.cur3, align 4
ret double %v.sroa.0.0.copyload
}
# *** IR Dump Before Finalize ISel and expand pseudo-instructions ***:
# Machine code for function foo: IsSSA, TracksLiveness
Frame Objects:
fi#-1: size=12, align=4, fixed, at location [SP-12]
fi#0: size=8, align=8, at location [SP]
fi#1: size=4, align=4, at location [SP]
fi#2: size=8, align=8, at location [SP]
Function Live Ins: $r1 in %1, $r2 in %2, $r3 in %3
bb.0.entry:
liveins: $r1, $r2, $r3
%3:gpr = COPY $r3
%2:gpr = COPY $r2
%1:gpr = COPY $r1
STRi12 %1:gpr, %fixed-stack.0, 0, 14, $noreg :: (store 4 into %fixed-stack.0)
STRi12 %2:gpr, %fixed-stack.0, 4, 14, $noreg :: (store 4 into %fixed-stack.0
+ 4)
STRi12 %3:gpr, %fixed-stack.0, 8, 14, $noreg :: (store 4 + 8)
%4:gpr = ADDri %stack.0.V1, 0, 14, $noreg, $noreg
INLINEASM &"" [sideeffect] [attdialect], $0:[reguse:GPR], %4:gpr
%5:gpr = ADDri %fixed-stack.0, 0, 14, $noreg, $noreg
STRi12 killed %5:gpr, %stack.1.vl, 0, 14, $noreg :: (store 4 into %ir.1)
%6:gpr = LDRi12 %fixed-stack.0, 0, 14, $noreg :: (load 4 from %ir.argp.cur3)
%7:gpr = LDRi12 %fixed-stack.0, 4, 14, $noreg :: (load 4 from %ir.argp.cur3 +
4)
STRi12 killed %7:gpr, %stack.2, 4, 14, $noreg :: (store 4 into %stack.2 + 4)
%8:gpr = LDRi12 %stack.2, 4, 14, $noreg :: (load 4 from %stack.2)
$r0 = COPY %6:gpr
$r1 = COPY %8:gpr
BX_RET 14, $noreg, implicit $r0, implicit $r1
# End machine code for function foo.
====
For last, it is highly inefficient, as gcc produces much smaller (still not
optimal) code. It seems llvm forces spilling the values before the inline
assembly.
https://godbolt.org/z/CjWSWb
--
You are receiving this mail because:
You are on the CC list for the bug.
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.llvm.org/pipermail/llvm-bugs/attachments/20190716/e3044911/attachment-0001.html>
More information about the llvm-bugs
mailing list