<html>

    <head>

      <base href="https://llvm.org/bugs/" />

    </head>

    <body><table border="1" cellspacing="0" cellpadding="8">

        <tr>

          <th>Bug ID</th>

          <td><a class="bz_bug_link 

          bz_status_NEW "

   title="NEW --- - BPF target: always inline __builtin_memcmp and friends"

   href="https://llvm.org/bugs/show_bug.cgi?id=26218">26218</a>

          </td>

        </tr>

        <tr>

          <th>Summary</th>

          <td>BPF target: always inline __builtin_memcmp and friends

          </td>

        </tr>

        <tr>

          <th>Product</th>

          <td>new-bugs

          </td>

        </tr>

        <tr>

          <th>Version</th>

          <td>unspecified

          </td>

        </tr>

        <tr>

          <th>Hardware</th>

          <td>PC

          </td>

        </tr>

        <tr>

          <th>OS</th>

          <td>Linux

          </td>

        </tr>

        <tr>

          <th>Status</th>

          <td>NEW

          </td>

        </tr>

        <tr>

          <th>Severity</th>

          <td>enhancement

          </td>

        </tr>

        <tr>

          <th>Priority</th>

          <td>P

          </td>

        </tr>

        <tr>

          <th>Component</th>

          <td>new bugs

          </td>

        </tr>

        <tr>

          <th>Assignee</th>

          <td>unassignedbugs@nondot.org

          </td>

        </tr>

        <tr>

          <th>Reporter</th>

          <td>daniel@iogearbox.net

          </td>

        </tr>

        <tr>

          <th>CC</th>

          <td>llvm-bugs@lists.llvm.org

          </td>

        </tr>

        <tr>

          <th>Classification</th>

          <td>Unclassified

          </td>

        </tr></table>

      <p>

        <div>

        <pre>There seem to be different heuristics with regard to when __builtin_memcmp()

gets inlined for specific targets. Below is a comparison of similar code for

BPF / x86 with disassembly attached.

In BPF, bar() gets inlined/optimized while foo() and rab() function will emit

an external call to memcmp(). In x86 target, foo() and main() gets inlined

while rab() doesn't.

Obviously, when __builtin_memcmp() doesn't get inlined for BPF target, then the

program cannot be loaded into the kernel. It will result in call 0, which is

not permitted and thus rejected by the verifier.

It would be useful for the BPF target to always inline __builtin_memcmp()

invocations that have a compile-time known size (and perhaps throw an error if

size cannot be determined at compilation time if that's possible), so that eBPF

insns are always emitted for the builtin function.

That way, applications don't have to reimplement this over and over, and could

just use the provided compiler builtins. Also, it might also be useful to do

the same for other builtins like __builtin_memset / __builtin_memcpy / etc for

BPF target.

**** BPF target:

#define __section(N) __attribute__((section(N), used))

__section("p1")

int foo(void)

{

        struct bar {

                unsigned int a;

                unsigned int b;

        } x = { .a = 1, .b = 2 }, y = { .a = 1, .b = 2 };

        return __builtin_memcmp(&x, &y, sizeof(x)) == 0;

}

__section("p2")

int bar(void)

{

        unsigned long a = ~0, b = 123456789;

        return __builtin_memcmp(&a, &b, sizeof(a)) == 0;

}

__section("p3")

int rab(void)

{

        char a[16] = {}, b[16] = {};

        a[15] = 1; b[15] = 4;

        return __builtin_memcmp(a, b, sizeof(a)) == 0;

}

# clang -O2 -Wall -target bpf test-bpf.c -S -o -

        .text

        .section   p1,"ax",@progbits

        .globl     foo

        .align     8

foo:                                    # @foo

# BB#0:

        ld_64      r1, <MCOperand Expr:(.Lfoo.y)>

        ld_64      r2, <MCOperand Expr:(.Lfoo.y)>

        mov        r3, 8

        call       memcmp

        mov        r1, r0

        slli       r1, 32

        srli       r1, 32

        mov        r0, 1

        mov        r2, 0

        jeq        r1, r2 goto LBB0_2

# BB#1:

        mov        r0, 0

LBB0_2:

        ret

        .section   p2,"ax",@progbits

        .globl     bar

        .align     8

bar:                                    # @bar

# BB#0:

        mov        r0, 0

        ret

        .section   p3,"ax",@progbits

        .globl     rab

        .align     8

rab:                                    # @rab

# BB#0:

        mov        r7, 0

        stb        -2(r10), r7

        sth        -4(r10), r7

        stw        -8(r10), r7

        std        -16(r10), r7

        stb        -18(r10), r7

        sth        -20(r10), r7

        stw        -24(r10), r7

        std        -32(r10), r7

        mov        r6, 1

        stb        -1(r10), r6

        mov        r1, 4

        stb        -17(r10), r1

        mov        r1, r10

        addi       r1, -16

        mov        r2, r10

        addi       r2, -32

        mov        r3, 16

        call       memcmp

        slli       r0, 32

        srli       r0, 32

        jeq        r0, r7 goto LBB2_2

# BB#1:

        mov        r6, 0

LBB2_2:

        mov        r0, r6

        ret

        .section   .rodata.cst8,"aM",@progbits,8

        .align     4                       # @foo.y

.Lfoo.y:

        .long      1                       # 0x1

        .long      2                       # 0x2

**** X86 target for comparison:

#define barrier_data(ptr) __asm__ __volatile__("": :"r"(ptr) :"memory")

int foo(void)

{

        struct bar {

                unsigned int a;

                unsigned int b;

        } x = { .a = 1, .b = 2 }, y = { .a = 1, .b = 2 };

        barrier_data(&x);

        return __builtin_memcmp(&x, &y, sizeof(x)) == 0;

}

int rab(void)

{

        char a[16] = {}, b[16] = {};

        a[15] = 1; b[15] = 4;

        barrier_data(a);

        return __builtin_memcmp(a, b, sizeof(a)) == 0;

}

int main(void)

{

        unsigned long a = ~0, b = 123456789;

        barrier_data(&a);

        return __builtin_memcmp(&a, &b, sizeof(a)) == 0;

}

# clang -O2 -Wall test-x86.c -S -o -

    .text

    .file    "test-x86.c"

    .globl    foo

    .align    16, 0x90

    .type    foo,@function

foo:                                    # @foo

    .cfi_startproc

# BB#0:

    movabsq    $8589934593, %rax       # imm = 0x200000001

    movq    %rax, -8(%rsp)

    leaq    -8(%rsp), %rcx

    #APP

    #NO_APP

    cmpq    %rax, -8(%rsp)

    sete    %al

    movzbl    %al, %eax

    retq

.Lfunc_end0:

    .size    foo, .Lfunc_end0-foo

    .cfi_endproc

    .globl    rab

    .align    16, 0x90

    .type    rab,@function

rab:                                    # @rab

    .cfi_startproc

# BB#0:

    subq    $40, %rsp

.Ltmp0:

    .cfi_def_cfa_offset 48

    xorps    %xmm0, %xmm0

    movaps    %xmm0, 16(%rsp)

    movaps    %xmm0, (%rsp)

    movb    $1, 31(%rsp)

    movb    $4, 15(%rsp)

    leaq    16(%rsp), %rdi

    #APP

    #NO_APP

    leaq    (%rsp), %rsi

    movl    $16, %edx

    callq    memcmp

    testl    %eax, %eax

    sete    %al

    movzbl    %al, %eax

    addq    $40, %rsp

    retq

.Lfunc_end1:

    .size    rab, .Lfunc_end1-rab

    .cfi_endproc

    .globl    main

    .align    16, 0x90

    .type    main,@function

main:                                   # @main

    .cfi_startproc

# BB#0:

    movq    $-1, -8(%rsp)

    leaq    -8(%rsp), %rax

    #APP

    #NO_APP

    cmpq    $123456789, -8(%rsp)    # imm = 0x75BCD15

    sete    %al

    movzbl    %al, %eax

    retq

.Lfunc_end2:

    .size    main, .Lfunc_end2-main

    .cfi_endproc

    .type    .Lfoo.y,@object         # @foo.y

    .section    .rodata.cst8,"aM",@progbits,8

    .align    4

.Lfoo.y:

    .long    1                       # 0x1

    .long    2                       # 0x2

    .size    .Lfoo.y, 8

    .ident    "clang version 3.8.0 (<a href="http://llvm.org/git/clang.git">http://llvm.org/git/clang.git</a>

77c006d1e83a7053e170777f572b10268fe99c69) (<a href="http://llvm.org/git/llvm.git">http://llvm.org/git/llvm.git</a>

461ac91112c31d52c93bc9c5bd47d33df4c842a0)"

    .section    ".note.GNU-stack","",@progbits</pre>

        </div>

      </p>

      <hr>

      <span>You are receiving this mail because:</span>

      <ul>

          <li>You are on the CC list for the bug.</li>

      </ul>

    </body>

</html>