<html>
<head>
<base href="https://llvm.org/bugs/" />
</head>
<body><table border="1" cellspacing="0" cellpadding="8">
<tr>
<th>Bug ID</th>
<td><a class="bz_bug_link
bz_status_NEW "
title="NEW --- - BPF target: always inline __builtin_memcmp and friends"
href="https://llvm.org/bugs/show_bug.cgi?id=26218">26218</a>
</td>
</tr>
<tr>
<th>Summary</th>
<td>BPF target: always inline __builtin_memcmp and friends
</td>
</tr>
<tr>
<th>Product</th>
<td>new-bugs
</td>
</tr>
<tr>
<th>Version</th>
<td>unspecified
</td>
</tr>
<tr>
<th>Hardware</th>
<td>PC
</td>
</tr>
<tr>
<th>OS</th>
<td>Linux
</td>
</tr>
<tr>
<th>Status</th>
<td>NEW
</td>
</tr>
<tr>
<th>Severity</th>
<td>enhancement
</td>
</tr>
<tr>
<th>Priority</th>
<td>P
</td>
</tr>
<tr>
<th>Component</th>
<td>new bugs
</td>
</tr>
<tr>
<th>Assignee</th>
<td>unassignedbugs@nondot.org
</td>
</tr>
<tr>
<th>Reporter</th>
<td>daniel@iogearbox.net
</td>
</tr>
<tr>
<th>CC</th>
<td>llvm-bugs@lists.llvm.org
</td>
</tr>
<tr>
<th>Classification</th>
<td>Unclassified
</td>
</tr></table>
<p>
<div>
<pre>There seem to be different heuristics with regard to when __builtin_memcmp()
gets inlined for specific targets. Below is a comparison of similar code for
BPF / x86 with disassembly attached.
In BPF, bar() gets inlined/optimized while foo() and rab() function will emit
an external call to memcmp(). In x86 target, foo() and main() gets inlined
while rab() doesn't.
Obviously, when __builtin_memcmp() doesn't get inlined for BPF target, then the
program cannot be loaded into the kernel. It will result in call 0, which is
not permitted and thus rejected by the verifier.
It would be useful for the BPF target to always inline __builtin_memcmp()
invocations that have a compile-time known size (and perhaps throw an error if
size cannot be determined at compilation time if that's possible), so that eBPF
insns are always emitted for the builtin function.
That way, applications don't have to reimplement this over and over, and could
just use the provided compiler builtins. Also, it might also be useful to do
the same for other builtins like __builtin_memset / __builtin_memcpy / etc for
BPF target.
**** BPF target:
#define __section(N) __attribute__((section(N), used))
__section("p1")
int foo(void)
{
struct bar {
unsigned int a;
unsigned int b;
} x = { .a = 1, .b = 2 }, y = { .a = 1, .b = 2 };
return __builtin_memcmp(&x, &y, sizeof(x)) == 0;
}
__section("p2")
int bar(void)
{
unsigned long a = ~0, b = 123456789;
return __builtin_memcmp(&a, &b, sizeof(a)) == 0;
}
__section("p3")
int rab(void)
{
char a[16] = {}, b[16] = {};
a[15] = 1; b[15] = 4;
return __builtin_memcmp(a, b, sizeof(a)) == 0;
}
# clang -O2 -Wall -target bpf test-bpf.c -S -o -
.text
.section p1,"ax",@progbits
.globl foo
.align 8
foo: # @foo
# BB#0:
ld_64 r1, <MCOperand Expr:(.Lfoo.y)>
ld_64 r2, <MCOperand Expr:(.Lfoo.y)>
mov r3, 8
call memcmp
mov r1, r0
slli r1, 32
srli r1, 32
mov r0, 1
mov r2, 0
jeq r1, r2 goto LBB0_2
# BB#1:
mov r0, 0
LBB0_2:
ret
.section p2,"ax",@progbits
.globl bar
.align 8
bar: # @bar
# BB#0:
mov r0, 0
ret
.section p3,"ax",@progbits
.globl rab
.align 8
rab: # @rab
# BB#0:
mov r7, 0
stb -2(r10), r7
sth -4(r10), r7
stw -8(r10), r7
std -16(r10), r7
stb -18(r10), r7
sth -20(r10), r7
stw -24(r10), r7
std -32(r10), r7
mov r6, 1
stb -1(r10), r6
mov r1, 4
stb -17(r10), r1
mov r1, r10
addi r1, -16
mov r2, r10
addi r2, -32
mov r3, 16
call memcmp
slli r0, 32
srli r0, 32
jeq r0, r7 goto LBB2_2
# BB#1:
mov r6, 0
LBB2_2:
mov r0, r6
ret
.section .rodata.cst8,"aM",@progbits,8
.align 4 # @foo.y
.Lfoo.y:
.long 1 # 0x1
.long 2 # 0x2
**** X86 target for comparison:
#define barrier_data(ptr) __asm__ __volatile__("": :"r"(ptr) :"memory")
int foo(void)
{
struct bar {
unsigned int a;
unsigned int b;
} x = { .a = 1, .b = 2 }, y = { .a = 1, .b = 2 };
barrier_data(&x);
return __builtin_memcmp(&x, &y, sizeof(x)) == 0;
}
int rab(void)
{
char a[16] = {}, b[16] = {};
a[15] = 1; b[15] = 4;
barrier_data(a);
return __builtin_memcmp(a, b, sizeof(a)) == 0;
}
int main(void)
{
unsigned long a = ~0, b = 123456789;
barrier_data(&a);
return __builtin_memcmp(&a, &b, sizeof(a)) == 0;
}
# clang -O2 -Wall test-x86.c -S -o -
.text
.file "test-x86.c"
.globl foo
.align 16, 0x90
.type foo,@function
foo: # @foo
.cfi_startproc
# BB#0:
movabsq $8589934593, %rax # imm = 0x200000001
movq %rax, -8(%rsp)
leaq -8(%rsp), %rcx
#APP
#NO_APP
cmpq %rax, -8(%rsp)
sete %al
movzbl %al, %eax
retq
.Lfunc_end0:
.size foo, .Lfunc_end0-foo
.cfi_endproc
.globl rab
.align 16, 0x90
.type rab,@function
rab: # @rab
.cfi_startproc
# BB#0:
subq $40, %rsp
.Ltmp0:
.cfi_def_cfa_offset 48
xorps %xmm0, %xmm0
movaps %xmm0, 16(%rsp)
movaps %xmm0, (%rsp)
movb $1, 31(%rsp)
movb $4, 15(%rsp)
leaq 16(%rsp), %rdi
#APP
#NO_APP
leaq (%rsp), %rsi
movl $16, %edx
callq memcmp
testl %eax, %eax
sete %al
movzbl %al, %eax
addq $40, %rsp
retq
.Lfunc_end1:
.size rab, .Lfunc_end1-rab
.cfi_endproc
.globl main
.align 16, 0x90
.type main,@function
main: # @main
.cfi_startproc
# BB#0:
movq $-1, -8(%rsp)
leaq -8(%rsp), %rax
#APP
#NO_APP
cmpq $123456789, -8(%rsp) # imm = 0x75BCD15
sete %al
movzbl %al, %eax
retq
.Lfunc_end2:
.size main, .Lfunc_end2-main
.cfi_endproc
.type .Lfoo.y,@object # @foo.y
.section .rodata.cst8,"aM",@progbits,8
.align 4
.Lfoo.y:
.long 1 # 0x1
.long 2 # 0x2
.size .Lfoo.y, 8
.ident "clang version 3.8.0 (<a href="http://llvm.org/git/clang.git">http://llvm.org/git/clang.git</a>
77c006d1e83a7053e170777f572b10268fe99c69) (<a href="http://llvm.org/git/llvm.git">http://llvm.org/git/llvm.git</a>
461ac91112c31d52c93bc9c5bd47d33df4c842a0)"
.section ".note.GNU-stack","",@progbits</pre>
</div>
</p>
<hr>
<span>You are receiving this mail because:</span>
<ul>
<li>You are on the CC list for the bug.</li>
</ul>
</body>
</html>