[llvm-bugs] [Bug 26218] New: BPF target: always inline __builtin_memcmp and friends
via llvm-bugs
llvm-bugs at lists.llvm.org
Wed Jan 20 02:54:58 PST 2016
https://llvm.org/bugs/show_bug.cgi?id=26218
Bug ID: 26218
Summary: BPF target: always inline __builtin_memcmp and friends
Product: new-bugs
Version: unspecified
Hardware: PC
OS: Linux
Status: NEW
Severity: enhancement
Priority: P
Component: new bugs
Assignee: unassignedbugs at nondot.org
Reporter: daniel at iogearbox.net
CC: llvm-bugs at lists.llvm.org
Classification: Unclassified
There seem to be different heuristics with regard to when __builtin_memcmp()
gets inlined for specific targets. Below is a comparison of similar code for
BPF / x86 with disassembly attached.
In BPF, bar() gets inlined/optimized while foo() and rab() function will emit
an external call to memcmp(). In x86 target, foo() and main() gets inlined
while rab() doesn't.
Obviously, when __builtin_memcmp() doesn't get inlined for BPF target, then the
program cannot be loaded into the kernel. It will result in call 0, which is
not permitted and thus rejected by the verifier.
It would be useful for the BPF target to always inline __builtin_memcmp()
invocations that have a compile-time known size (and perhaps throw an error if
size cannot be determined at compilation time if that's possible), so that eBPF
insns are always emitted for the builtin function.
That way, applications don't have to reimplement this over and over, and could
just use the provided compiler builtins. Also, it might also be useful to do
the same for other builtins like __builtin_memset / __builtin_memcpy / etc for
BPF target.
**** BPF target:
#define __section(N) __attribute__((section(N), used))
__section("p1")
int foo(void)
{
struct bar {
unsigned int a;
unsigned int b;
} x = { .a = 1, .b = 2 }, y = { .a = 1, .b = 2 };
return __builtin_memcmp(&x, &y, sizeof(x)) == 0;
}
__section("p2")
int bar(void)
{
unsigned long a = ~0, b = 123456789;
return __builtin_memcmp(&a, &b, sizeof(a)) == 0;
}
__section("p3")
int rab(void)
{
char a[16] = {}, b[16] = {};
a[15] = 1; b[15] = 4;
return __builtin_memcmp(a, b, sizeof(a)) == 0;
}
# clang -O2 -Wall -target bpf test-bpf.c -S -o -
.text
.section p1,"ax", at progbits
.globl foo
.align 8
foo: # @foo
# BB#0:
ld_64 r1, <MCOperand Expr:(.Lfoo.y)>
ld_64 r2, <MCOperand Expr:(.Lfoo.y)>
mov r3, 8
call memcmp
mov r1, r0
slli r1, 32
srli r1, 32
mov r0, 1
mov r2, 0
jeq r1, r2 goto LBB0_2
# BB#1:
mov r0, 0
LBB0_2:
ret
.section p2,"ax", at progbits
.globl bar
.align 8
bar: # @bar
# BB#0:
mov r0, 0
ret
.section p3,"ax", at progbits
.globl rab
.align 8
rab: # @rab
# BB#0:
mov r7, 0
stb -2(r10), r7
sth -4(r10), r7
stw -8(r10), r7
std -16(r10), r7
stb -18(r10), r7
sth -20(r10), r7
stw -24(r10), r7
std -32(r10), r7
mov r6, 1
stb -1(r10), r6
mov r1, 4
stb -17(r10), r1
mov r1, r10
addi r1, -16
mov r2, r10
addi r2, -32
mov r3, 16
call memcmp
slli r0, 32
srli r0, 32
jeq r0, r7 goto LBB2_2
# BB#1:
mov r6, 0
LBB2_2:
mov r0, r6
ret
.section .rodata.cst8,"aM", at progbits,8
.align 4 # @foo.y
.Lfoo.y:
.long 1 # 0x1
.long 2 # 0x2
**** X86 target for comparison:
#define barrier_data(ptr) __asm__ __volatile__("": :"r"(ptr) :"memory")
int foo(void)
{
struct bar {
unsigned int a;
unsigned int b;
} x = { .a = 1, .b = 2 }, y = { .a = 1, .b = 2 };
barrier_data(&x);
return __builtin_memcmp(&x, &y, sizeof(x)) == 0;
}
int rab(void)
{
char a[16] = {}, b[16] = {};
a[15] = 1; b[15] = 4;
barrier_data(a);
return __builtin_memcmp(a, b, sizeof(a)) == 0;
}
int main(void)
{
unsigned long a = ~0, b = 123456789;
barrier_data(&a);
return __builtin_memcmp(&a, &b, sizeof(a)) == 0;
}
# clang -O2 -Wall test-x86.c -S -o -
.text
.file "test-x86.c"
.globl foo
.align 16, 0x90
.type foo, at function
foo: # @foo
.cfi_startproc
# BB#0:
movabsq $8589934593, %rax # imm = 0x200000001
movq %rax, -8(%rsp)
leaq -8(%rsp), %rcx
#APP
#NO_APP
cmpq %rax, -8(%rsp)
sete %al
movzbl %al, %eax
retq
.Lfunc_end0:
.size foo, .Lfunc_end0-foo
.cfi_endproc
.globl rab
.align 16, 0x90
.type rab, at function
rab: # @rab
.cfi_startproc
# BB#0:
subq $40, %rsp
.Ltmp0:
.cfi_def_cfa_offset 48
xorps %xmm0, %xmm0
movaps %xmm0, 16(%rsp)
movaps %xmm0, (%rsp)
movb $1, 31(%rsp)
movb $4, 15(%rsp)
leaq 16(%rsp), %rdi
#APP
#NO_APP
leaq (%rsp), %rsi
movl $16, %edx
callq memcmp
testl %eax, %eax
sete %al
movzbl %al, %eax
addq $40, %rsp
retq
.Lfunc_end1:
.size rab, .Lfunc_end1-rab
.cfi_endproc
.globl main
.align 16, 0x90
.type main, at function
main: # @main
.cfi_startproc
# BB#0:
movq $-1, -8(%rsp)
leaq -8(%rsp), %rax
#APP
#NO_APP
cmpq $123456789, -8(%rsp) # imm = 0x75BCD15
sete %al
movzbl %al, %eax
retq
.Lfunc_end2:
.size main, .Lfunc_end2-main
.cfi_endproc
.type .Lfoo.y, at object # @foo.y
.section .rodata.cst8,"aM", at progbits,8
.align 4
.Lfoo.y:
.long 1 # 0x1
.long 2 # 0x2
.size .Lfoo.y, 8
.ident "clang version 3.8.0 (http://llvm.org/git/clang.git
77c006d1e83a7053e170777f572b10268fe99c69) (http://llvm.org/git/llvm.git
461ac91112c31d52c93bc9c5bd47d33df4c842a0)"
.section ".note.GNU-stack","", at progbits
--
You are receiving this mail because:
You are on the CC list for the bug.
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.llvm.org/pipermail/llvm-bugs/attachments/20160120/10df8102/attachment.html>
More information about the llvm-bugs
mailing list