[llvm-bugs] [Bug 26218] New: BPF target: always inline __builtin_memcmp and friends

Wed Jan 20 02:54:58 PST 2016

https://llvm.org/bugs/show_bug.cgi?id=26218

            Bug ID: 26218
           Summary: BPF target: always inline __builtin_memcmp and friends
           Product: new-bugs
           Version: unspecified
          Hardware: PC
                OS: Linux
            Status: NEW
          Severity: enhancement
          Priority: P
         Component: new bugs
          Assignee: unassignedbugs at nondot.org
          Reporter: daniel at iogearbox.net
                CC: llvm-bugs at lists.llvm.org
    Classification: Unclassified

There seem to be different heuristics with regard to when __builtin_memcmp()
gets inlined for specific targets. Below is a comparison of similar code for
BPF / x86 with disassembly attached.

In BPF, bar() gets inlined/optimized while foo() and rab() function will emit
an external call to memcmp(). In x86 target, foo() and main() gets inlined
while rab() doesn't.

Obviously, when __builtin_memcmp() doesn't get inlined for BPF target, then the
program cannot be loaded into the kernel. It will result in call 0, which is
not permitted and thus rejected by the verifier.

It would be useful for the BPF target to always inline __builtin_memcmp()
invocations that have a compile-time known size (and perhaps throw an error if
size cannot be determined at compilation time if that's possible), so that eBPF
insns are always emitted for the builtin function.

That way, applications don't have to reimplement this over and over, and could
just use the provided compiler builtins. Also, it might also be useful to do
the same for other builtins like __builtin_memset / __builtin_memcpy / etc for
BPF target.

**** BPF target:

#define __section(N) __attribute__((section(N), used))

__section("p1")
int foo(void)
{
        struct bar {
                unsigned int a;
                unsigned int b;
        } x = { .a = 1, .b = 2 }, y = { .a = 1, .b = 2 };
        return __builtin_memcmp(&x, &y, sizeof(x)) == 0;
}

__section("p2")
int bar(void)
{
        unsigned long a = ~0, b = 123456789;
        return __builtin_memcmp(&a, &b, sizeof(a)) == 0;
}

__section("p3")
int rab(void)
{
        char a[16] = {}, b[16] = {};
        a[15] = 1; b[15] = 4;
        return __builtin_memcmp(a, b, sizeof(a)) == 0;
}

# clang -O2 -Wall -target bpf test-bpf.c -S -o -
        .text
        .section   p1,"ax", at progbits
        .globl     foo
        .align     8
foo:                                    # @foo
# BB#0:
        ld_64      r1, <MCOperand Expr:(.Lfoo.y)>
        ld_64      r2, <MCOperand Expr:(.Lfoo.y)>
        mov        r3, 8
        call       memcmp
        mov        r1, r0
        slli       r1, 32
        srli       r1, 32
        mov        r0, 1
        mov        r2, 0
        jeq        r1, r2 goto LBB0_2
# BB#1:
        mov        r0, 0
LBB0_2:
        ret

        .section   p2,"ax", at progbits
        .globl     bar
        .align     8
bar:                                    # @bar
# BB#0:
        mov        r0, 0
        ret

        .section   p3,"ax", at progbits
        .globl     rab
        .align     8
rab:                                    # @rab
# BB#0:
        mov        r7, 0
        stb        -2(r10), r7
        sth        -4(r10), r7
        stw        -8(r10), r7
        std        -16(r10), r7
        stb        -18(r10), r7
        sth        -20(r10), r7
        stw        -24(r10), r7
        std        -32(r10), r7
        mov        r6, 1
        stb        -1(r10), r6
        mov        r1, 4
        stb        -17(r10), r1
        mov        r1, r10
        addi       r1, -16
        mov        r2, r10
        addi       r2, -32
        mov        r3, 16
        call       memcmp
        slli       r0, 32
        srli       r0, 32
        jeq        r0, r7 goto LBB2_2
# BB#1:
        mov        r6, 0
LBB2_2:
        mov        r0, r6
        ret

        .section   .rodata.cst8,"aM", at progbits,8
        .align     4                       # @foo.y
.Lfoo.y:
        .long      1                       # 0x1
        .long      2                       # 0x2

**** X86 target for comparison:

#define barrier_data(ptr) __asm__ __volatile__("": :"r"(ptr) :"memory")

int foo(void)
{
        struct bar {
                unsigned int a;
                unsigned int b;
        } x = { .a = 1, .b = 2 }, y = { .a = 1, .b = 2 };
        barrier_data(&x);
        return __builtin_memcmp(&x, &y, sizeof(x)) == 0;
}

int rab(void)
{
        char a[16] = {}, b[16] = {};
        a[15] = 1; b[15] = 4;
        barrier_data(a);
        return __builtin_memcmp(a, b, sizeof(a)) == 0;
}

int main(void)
{
        unsigned long a = ~0, b = 123456789;
        barrier_data(&a);
        return __builtin_memcmp(&a, &b, sizeof(a)) == 0;
}

# clang -O2 -Wall test-x86.c -S -o -
    .text
    .file    "test-x86.c"
    .globl    foo
    .align    16, 0x90
    .type    foo, at function
foo:                                    # @foo
    .cfi_startproc
# BB#0:
    movabsq    $8589934593, %rax       # imm = 0x200000001
    movq    %rax, -8(%rsp)
    leaq    -8(%rsp), %rcx
    #APP
    #NO_APP
    cmpq    %rax, -8(%rsp)
    sete    %al
    movzbl    %al, %eax
    retq
.Lfunc_end0:
    .size    foo, .Lfunc_end0-foo
    .cfi_endproc

    .globl    rab
    .align    16, 0x90
    .type    rab, at function
rab:                                    # @rab
    .cfi_startproc
# BB#0:
    subq    $40, %rsp
.Ltmp0:
    .cfi_def_cfa_offset 48
    xorps    %xmm0, %xmm0
    movaps    %xmm0, 16(%rsp)
    movaps    %xmm0, (%rsp)
    movb    $1, 31(%rsp)
    movb    $4, 15(%rsp)
    leaq    16(%rsp), %rdi
    #APP
    #NO_APP
    leaq    (%rsp), %rsi
    movl    $16, %edx
    callq    memcmp
    testl    %eax, %eax
    sete    %al
    movzbl    %al, %eax
    addq    $40, %rsp
    retq
.Lfunc_end1:
    .size    rab, .Lfunc_end1-rab
    .cfi_endproc

    .globl    main
    .align    16, 0x90
    .type    main, at function
main:                                   # @main
    .cfi_startproc
# BB#0:
    movq    $-1, -8(%rsp)
    leaq    -8(%rsp), %rax
    #APP
    #NO_APP
    cmpq    $123456789, -8(%rsp)    # imm = 0x75BCD15
    sete    %al
    movzbl    %al, %eax
    retq
.Lfunc_end2:
    .size    main, .Lfunc_end2-main
    .cfi_endproc

    .type    .Lfoo.y, at object         # @foo.y
    .section    .rodata.cst8,"aM", at progbits,8
    .align    4
.Lfoo.y:
    .long    1                       # 0x1
    .long    2                       # 0x2
    .size    .Lfoo.y, 8

    .ident    "clang version 3.8.0 (http://llvm.org/git/clang.git
77c006d1e83a7053e170777f572b10268fe99c69) (http://llvm.org/git/llvm.git
461ac91112c31d52c93bc9c5bd47d33df4c842a0)"
    .section    ".note.GNU-stack","", at progbits

-- 
You are receiving this mail because:
You are on the CC list for the bug.
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.llvm.org/pipermail/llvm-bugs/attachments/20160120/10df8102/attachment.html>