[llvm] [RFC][BPF] Support Jump Table (PR #133856)

via llvm-commits llvm-commits at lists.llvm.org
Mon Mar 31 21:48:00 PDT 2025


https://github.com/yonghong-song created https://github.com/llvm/llvm-project/pull/133856

NOTE: We probably need cpu v5 or other flags to enable this feature. We can add it later when necessary.

This patch adds jump table support. A new insn 'gotox <reg>' is added to allow goto through a register. The register represents the address in the current section. The function is a concrete example with bpf selftest progs/user_ringbuf_success.c.

Compilation command line to generate .s file:
=============================================
```
clang  -g -Wall -Werror -D__TARGET_ARCH_x86 -mlittle-endian \
    -I/home/yhs/work/bpf-next/tools/testing/selftests/bpf/tools/include \
    -I/home/yhs/work/bpf-next/tools/testing/selftests/bpf \
    -I/home/yhs/work/bpf-next/tools/include/uapi \
    -I/home/yhs/work/bpf-next/tools/testing/selftests/usr/include -std=gnu11 \
    -fno-strict-aliasing -Wno-compare-distinct-pointer-types \
    -idirafter /home/yhs/work/llvm-project/llvm/build.21/Release/lib/clang/21/include \
    -idirafter /usr/local/include -idirafter /usr/include \
    -DENABLE_ATOMICS_TESTS   -O2 -S progs/user_ringbuf_success.c \
    -o /home/yhs/work/bpf-next/tools/testing/selftests/bpf/user_ringbuf_success.bpf.o.s \
    --target=bpf -mcpu=v3
```
The related assembly:
```
  read_protocol_msg:
        ...
        r3 <<= 3
        r1 = .LJTI1_0 ll
        r1 += r3
        r1 = *(u64 *)(r1 + 0)
        gotox r1
  LBB1_4:
        r1 = *(u64 *)(r0 + 8)
        goto LBB1_5
  LBB1_7:
        r1 = *(u64 *)(r0 + 8)
        goto LBB1_8
  LBB1_9:
        w1 = *(u32 *)(r0 + 8)
        r1 <<= 32
        r1 s>>= 32
        r2 = kern_mutated ll
        r3 = *(u64 *)(r2 + 0)
        r3 *= r1
        *(u64 *)(r2 + 0) = r3
        goto LBB1_11
  LBB1_6:
        w1 = *(u32 *)(r0 + 8)
        r1 <<= 32
        r1 s>>= 32
  LBB1_5:
  ...
        .section        .rodata,"a", at progbits
        .p2align        3, 0x0
  .LJTI1_0:
        .quad   LBB1_4
        .quad   LBB1_6
        .quad   LBB1_7
        .quad   LBB1_9
  ...
  publish_next_kern_msg:
        ...
        r6 <<= 3
        r1 = .LJTI6_0 ll
        r1 += r6
        r1 = *(u64 *)(r1 + 0)
        gotox r1
  LBB6_3:
        ...
  LBB6_5:
        ...
  LBB6_6:
        ...
  LBB6_4:
        ...
        .section        .rodata,"a", at progbits
        .p2align        3, 0x0
.LJTI6_0:
        .quad   LBB6_3
        .quad   LBB6_4
        .quad   LBB6_5
        .quad   LBB6_6
```
Now let us look at .o file
==========================
```
clang  -g -Wall -Werror -D__TARGET_ARCH_x86 -mlittle-endian \
    -I/home/yhs/work/bpf-next/tools/testing/selftests/bpf/tools/include \
    -I/home/yhs/work/bpf-next/tools/testing/selftests/bpf \
    -I/home/yhs/work/bpf-next/tools/include/uapi \
    -I/home/yhs/work/bpf-next/tools/testing/selftests/usr/include \
    -std=gnu11 -fno-strict-aliasing -Wno-compare-distinct-pointer-types \
    -idirafter /home/yhs/work/llvm-project/llvm/build.21/Release/lib/clang/21/include \
    -idirafter /usr/local/include -idirafter /usr/include -DENABLE_ATOMICS_TESTS \
    -O2 -c progs/user_ringbuf_success.c \
    -o /home/yhs/work/bpf-next/tools/testing/selftests/bpf/user_ringbuf_success.bpf.o \
    --target=bpf -mcpu=v3
```
In obj file, all .rodata sections are merged together. So we have
```
    $ llvm-readelf -x '.rodata' user_ringbuf_success.bpf.o
    Hex dump of section '.rodata':
    0x00000000 a8020000 00000000 10030000 00000000 ................
    0x00000010 b8020000 00000000 c8020000 00000000 ................
    0x00000020 40040000 00000000 18050000 00000000 @...............
    0x00000030 88040000 00000000 d0040000 00000000 ................
    0x00000040 44726169 6e207265 7475726e 65643a20 Drain returned:
    0x00000050 256c640a 00556e65 78706563 7465646c %ld..Unexpectedl
    0x00000060 79206661 696c6564 20746f20 67657420 y failed to get
    0x00000070 6d73670a 00556e72 65636f67 6e697a65 msg..Unrecognize
    0x00000080 64206f70 2025640a 00256c75 20213d20 d op %d..%lu !=
    0x00000090 256c750a 00627066 5f64796e 7074725f %lu..bpf_dynptr_
    0x000000a0 72656164 28292066 61696c65 643a2025 read() failed: %
    0x000000b0 640a0055 6e657870 65637465 646c7920 d..Unexpectedly
    0x000000c0 6661696c 65642074 6f206765 74207361 failed to get sa
    0x000000d0 6d706c65 0a00                       mple..
```
Let us look at the insns. Some annotation explains details.
```
    $ llvm-objdump -Sr user_ringbuf_success.bpf.o
    ....
    Disassembly of section .text:
    0000000000000000 <read_protocol_msg>:
    ;       msg = bpf_dynptr_data(dynptr, 0, sizeof(*msg));
       0:       b4 02 00 00 00 00 00 00 w2 = 0x0
       1:       b4 03 00 00 10 00 00 00 w3 = 0x10
       2:       85 00 00 00 cb 00 00 00 call 0xcb
    ...
    0000000000000268 <handle_sample_msg>:
    ;       switch (msg->msg_op) {
      77:       61 13 00 00 00 00 00 00 w3 = *(u32 *)(r1 + 0x0)
      78:       26 03 1c 00 03 00 00 00 if w3 > 0x3 goto +0x1c <handle_sample_msg+0xf0>
      79:       67 03 00 00 03 00 00 00 r3 <<= 0x3
      80:       18 02 00 00 00 00 00 00 00 00 00 00 00 00 00 00 r2 = 0x0 ll
                0000000000000280:  R_BPF_64_64  .rodata
<=== r2 will be the address of .rodata with offset 0.
<=== look at the first 32 bytes of .rodata:
    0x00000000 a8020000 00000000 10030000 00000000 ................
    0x00000010 b8020000 00000000 c8020000 00000000 ................
The four actual addresses are
    0x2a8: insn idx 0x2a8/8 = 85
    0x310: insn idx 0x310/8 = 98
    0x2b8: insn idx 0x2b8/8 = 87
    0x2c8: insn idx 0x2c8/8 = 89

      82:       0f 32 00 00 00 00 00 00 r2 += r3
      83:       79 22 00 00 00 00 00 00 r2 = *(u64 *)(r2 + 0x0)
      84:       0d 02 00 00 00 00 00 00 gotox r2
<=== So eventually gotox will go to the insn idx in this section.
    ;               kern_mutated += msg->operand_64;
      85:       79 11 08 00 00 00 00 00 r1 = *(u64 *)(r1 + 0x8)
      86:       05 00 0e 00 00 00 00 00 goto +0xe <handle_sample_msg+0xc0>
    ;               kern_mutated *= msg->operand_64;
      87:       79 11 08 00 00 00 00 00 r1 = *(u64 *)(r1 + 0x8)
      88:       05 00 03 00 00 00 00 00 goto +0x3 <handle_sample_msg+0x78>
    ;               kern_mutated *= msg->operand_32;
      89:       61 11 08 00 00 00 00 00 w1 = *(u32 *)(r1 + 0x8)
      90:       67 01 00 00 20 00 00 00 r1 <<= 0x20
      91:       c7 01 00 00 20 00 00 00 r1 s>>= 0x20
      92:       18 02 00 00 00 00 00 00 00 00 00 00 00 00 00 00 r2 = 0x0 ll
    ...
    00000000000003a0 <publish_next_kern_msg>:
    ; {
     116:       bc 16 00 00 00 00 00 00 w6 = w1
    ;       msg = bpf_ringbuf_reserve(&kernel_ringbuf, sizeof(*msg), 0);
     117:       18 01 00 00 00 00 00 00 00 00 00 00 00 00 00 00 r1 = 0x0 ll
                00000000000003a8:  R_BPF_64_64  kernel_ringbuf
     119:       b7 02 00 00 10 00 00 00 r2 = 0x10
     120:       b7 03 00 00 00 00 00 00 r3 = 0x0
     121:       85 00 00 00 83 00 00 00 call 0x83
    ;       if (!msg) {
     122:       55 00 06 00 00 00 00 00 if r0 != 0x0 goto +0x6 <publish_next_kern_msg+0x68>
    ;               err = 4;
     123:       18 01 00 00 00 00 00 00 00 00 00 00 00 00 00 00 r1 = 0x0 ll
                00000000000003d8:  R_BPF_64_64  err
     125:       b4 02 00 00 04 00 00 00 w2 = 0x4
     126:       63 21 00 00 00 00 00 00 *(u32 *)(r1 + 0x0) = w2
     127:       b4 00 00 00 01 00 00 00 w0 = 0x1
    ;               return 1;
     128:       05 00 31 00 00 00 00 00 goto +0x31 <publish_next_kern_msg+0x1f0>
    ;       switch (index % TEST_MSG_OP_NUM_OPS) {
     129:       54 06 00 00 03 00 00 00 w6 &= 0x3
     130:       67 06 00 00 03 00 00 00 r6 <<= 0x3
     131:       18 01 00 00 20 00 00 00 00 00 00 00 00 00 00 00 r1 = 0x20 ll
                0000000000000418:  R_BPF_64_64  .rodata
<=== r2 will be the address of .rodata with offset 20.
<=== look at the first 32 bytes of .rodata:
    0x00000020 40040000 00000000 18050000 00000000 @...............
    0x00000030 88040000 00000000 d0040000 00000000 ................
The four actual addresses are
    0x440: insn idx 0x440/8 = 136
    0x518: insn idx 0x518/8 = 163
    0x488: insn idx 0x488/8 = 145
    0x4d0: insn idx 0x4d0/8 = 154
     133:       0f 61 00 00 00 00 00 00 r1 += r6
     134:       79 11 00 00 00 00 00 00 r1 = *(u64 *)(r1 + 0x0)
     135:       0d 01 00 00 00 00 00 00 gotox r1
<=== So eventually gotox will go to the insn idx in this section.
     136:       b4 01 00 00 00 00 00 00 w1 = 0x0
    ;               msg->msg_op = TEST_MSG_OP_INC64;
     137:       63 10 00 00 00 00 00 00 *(u32 *)(r0 + 0x0) = w1
     138:       b7 01 00 00 04 00 00 00 r1 = 0x4
    ;               msg->operand_64 = operand_64;
     139:       7b 10 08 00 00 00 00 00 *(u64 *)(r0 + 0x8) = r1
    ;               expected_user_mutated += operand_64;
     140:       18 01 00 00 00 00 00 00 00 00 00 00 00 00 00 00 r1 = 0x0 ll
                0000000000000460:  R_BPF_64_64  expected_user_mutated
     142:       79 11 00 00 00 00 00 00 r1 = *(u64 *)(r1 + 0x0)
     143:       07 01 00 00 04 00 00 00 r1 += 0x4
    ;               break;
     144:       05 00 1a 00 00 00 00 00 goto +0x1a <publish_next_kern_msg+0x1b8>
     145:       b4 01 00 00 02 00 00 00 w1 = 0x2
    ;               msg->msg_op = TEST_MSG_OP_MUL64;
    ...
```
There are a few things worth to discuss.
First, in the above, it is hard to find jump table size for a particular relocation ('R_BPF_64_64  .rodata + <offset>'). One thing is to scan through the whole elf file and you can find all '.rodata + <offset>' relocations. For example, here we have
```
   .rodata + 0
   .rodata + 0x20
   .rodata + 0x40
   .rodata + 0x55
   .rodata + 0x75
   .rodata + 0x89
   .rodata + 0x95
   .rodata + 0xb3
```
With the above information, the size for each sub-rodata can be found easily.

Second, the current gotox insn target is the instruction offset from the beginning of section. This is different from goto/gotol where the target is relative to the current insn. Note that x86 is using relative offset with $rip register like below:
```
      1c: 48 8d 0d 00 00 00 00          leaq    (%rip), %rcx            # 0x23 <read_protocol_msg+0x23>
                000000000000001f:  R_X86_64_PC32        .rodata-0x4
      ;       switch (msg->msg_op) {
      23: 48 63 14 91                   movslq  (%rcx,%rdx,4), %rdx
      27: 48 01 ca                      addq    %rcx, %rdx
      2a: ff e2                         jmpq    *%rdx
```
I could explore to use pc relative for .rodata contents but since we
do not have %rip, not sure how difficult it could be.

>From 6aa1350936feb6ab67ed11b2bfb28003b4ddfa97 Mon Sep 17 00:00:00 2001
From: Yonghong Song <yonghong.song at linux.dev>
Date: Mon, 31 Mar 2025 21:25:26 -0700
Subject: [PATCH] [RFC][BPF] Support Jump Table

NOTE: We probably need cpu v5 or other flags to enable this feature.
We can add it later when necessary.

This patch adds jump table support. A new insn 'gotox <reg>' is
added to allow goto through a register. The register represents
the address in the current section. The function is a concrete
example with bpf selftest progs/user_ringbuf_success.c.

Compilation command line to generate .s file:
=============================================
clang  -g -Wall -Werror -D__TARGET_ARCH_x86 -mlittle-endian \
    -I/home/yhs/work/bpf-next/tools/testing/selftests/bpf/tools/include \
    -I/home/yhs/work/bpf-next/tools/testing/selftests/bpf \
    -I/home/yhs/work/bpf-next/tools/include/uapi \
    -I/home/yhs/work/bpf-next/tools/testing/selftests/usr/include -std=gnu11 \
    -fno-strict-aliasing -Wno-compare-distinct-pointer-types \
    -idirafter /home/yhs/work/llvm-project/llvm/build.21/Release/lib/clang/21/include \
    -idirafter /usr/local/include -idirafter /usr/include \
    -DENABLE_ATOMICS_TESTS   -O2 -S progs/user_ringbuf_success.c \
    -o /home/yhs/work/bpf-next/tools/testing/selftests/bpf/user_ringbuf_success.bpf.o.s \
    --target=bpf -mcpu=v3

The related assembly:
  read_protocol_msg:
        ...
        r3 <<= 3
        r1 = .LJTI1_0 ll
        r1 += r3
        r1 = *(u64 *)(r1 + 0)
        gotox r1
  LBB1_4:
        r1 = *(u64 *)(r0 + 8)
        goto LBB1_5
  LBB1_7:
        r1 = *(u64 *)(r0 + 8)
        goto LBB1_8
  LBB1_9:
        w1 = *(u32 *)(r0 + 8)
        r1 <<= 32
        r1 s>>= 32
        r2 = kern_mutated ll
        r3 = *(u64 *)(r2 + 0)
        r3 *= r1
        *(u64 *)(r2 + 0) = r3
        goto LBB1_11
  LBB1_6:
        w1 = *(u32 *)(r0 + 8)
        r1 <<= 32
        r1 s>>= 32
  LBB1_5:
  ...
        .section        .rodata,"a", at progbits
        .p2align        3, 0x0
  .LJTI1_0:
        .quad   LBB1_4
        .quad   LBB1_6
        .quad   LBB1_7
        .quad   LBB1_9
  ...
  publish_next_kern_msg:
        ...
        r6 <<= 3
        r1 = .LJTI6_0 ll
        r1 += r6
        r1 = *(u64 *)(r1 + 0)
        gotox r1
  LBB6_3:
        ...
  LBB6_5:
        ...
  LBB6_6:
        ...
  LBB6_4:
        ...
        .section        .rodata,"a", at progbits
        .p2align        3, 0x0
.LJTI6_0:
        .quad   LBB6_3
        .quad   LBB6_4
        .quad   LBB6_5
        .quad   LBB6_6

Now let us look at .o file
==========================
clang  -g -Wall -Werror -D__TARGET_ARCH_x86 -mlittle-endian \
    -I/home/yhs/work/bpf-next/tools/testing/selftests/bpf/tools/include \
    -I/home/yhs/work/bpf-next/tools/testing/selftests/bpf \
    -I/home/yhs/work/bpf-next/tools/include/uapi \
    -I/home/yhs/work/bpf-next/tools/testing/selftests/usr/include \
    -std=gnu11 -fno-strict-aliasing -Wno-compare-distinct-pointer-types \
    -idirafter /home/yhs/work/llvm-project/llvm/build.21/Release/lib/clang/21/include \
    -idirafter /usr/local/include -idirafter /usr/include -DENABLE_ATOMICS_TESTS \
    -O2 -c progs/user_ringbuf_success.c \
    -o /home/yhs/work/bpf-next/tools/testing/selftests/bpf/user_ringbuf_success.bpf.o \
    --target=bpf -mcpu=v3

In obj file, all .rodata sections are merged together. So we have
    $ llvm-readelf -x '.rodata' user_ringbuf_success.bpf.o
    Hex dump of section '.rodata':
    0x00000000 a8020000 00000000 10030000 00000000 ................
    0x00000010 b8020000 00000000 c8020000 00000000 ................
    0x00000020 40040000 00000000 18050000 00000000 @...............
    0x00000030 88040000 00000000 d0040000 00000000 ................
    0x00000040 44726169 6e207265 7475726e 65643a20 Drain returned:
    0x00000050 256c640a 00556e65 78706563 7465646c %ld..Unexpectedl
    0x00000060 79206661 696c6564 20746f20 67657420 y failed to get
    0x00000070 6d73670a 00556e72 65636f67 6e697a65 msg..Unrecognize
    0x00000080 64206f70 2025640a 00256c75 20213d20 d op %d..%lu !=
    0x00000090 256c750a 00627066 5f64796e 7074725f %lu..bpf_dynptr_
    0x000000a0 72656164 28292066 61696c65 643a2025 read() failed: %
    0x000000b0 640a0055 6e657870 65637465 646c7920 d..Unexpectedly
    0x000000c0 6661696c 65642074 6f206765 74207361 failed to get sa
    0x000000d0 6d706c65 0a00                       mple..

Let us look at the insns. Some annotation explains details.
    $ llvm-objdump -Sr user_ringbuf_success.bpf.o
    ....
    Disassembly of section .text:
    0000000000000000 <read_protocol_msg>:
    ;       msg = bpf_dynptr_data(dynptr, 0, sizeof(*msg));
       0:       b4 02 00 00 00 00 00 00 w2 = 0x0
       1:       b4 03 00 00 10 00 00 00 w3 = 0x10
       2:       85 00 00 00 cb 00 00 00 call 0xcb
    ...
    0000000000000268 <handle_sample_msg>:
    ;       switch (msg->msg_op) {
      77:       61 13 00 00 00 00 00 00 w3 = *(u32 *)(r1 + 0x0)
      78:       26 03 1c 00 03 00 00 00 if w3 > 0x3 goto +0x1c <handle_sample_msg+0xf0>
      79:       67 03 00 00 03 00 00 00 r3 <<= 0x3
      80:       18 02 00 00 00 00 00 00 00 00 00 00 00 00 00 00 r2 = 0x0 ll
                0000000000000280:  R_BPF_64_64  .rodata
<=== r2 will be the address of .rodata with offset 0.
<=== look at the first 32 bytes of .rodata:
    0x00000000 a8020000 00000000 10030000 00000000 ................
    0x00000010 b8020000 00000000 c8020000 00000000 ................
The four actual addresses are
    0x2a8: insn idx 0x2a8/8 = 85
    0x310: insn idx 0x310/8 = 98
    0x2b8: insn idx 0x2b8/8 = 87
    0x2c8: insn idx 0x2c8/8 = 89

      82:       0f 32 00 00 00 00 00 00 r2 += r3
      83:       79 22 00 00 00 00 00 00 r2 = *(u64 *)(r2 + 0x0)
      84:       0d 02 00 00 00 00 00 00 gotox r2
<=== So eventually gotox will go to the insn idx in this section.
    ;               kern_mutated += msg->operand_64;
      85:       79 11 08 00 00 00 00 00 r1 = *(u64 *)(r1 + 0x8)
      86:       05 00 0e 00 00 00 00 00 goto +0xe <handle_sample_msg+0xc0>
    ;               kern_mutated *= msg->operand_64;
      87:       79 11 08 00 00 00 00 00 r1 = *(u64 *)(r1 + 0x8)
      88:       05 00 03 00 00 00 00 00 goto +0x3 <handle_sample_msg+0x78>
    ;               kern_mutated *= msg->operand_32;
      89:       61 11 08 00 00 00 00 00 w1 = *(u32 *)(r1 + 0x8)
      90:       67 01 00 00 20 00 00 00 r1 <<= 0x20
      91:       c7 01 00 00 20 00 00 00 r1 s>>= 0x20
      92:       18 02 00 00 00 00 00 00 00 00 00 00 00 00 00 00 r2 = 0x0 ll
    ...
    00000000000003a0 <publish_next_kern_msg>:
    ; {
     116:       bc 16 00 00 00 00 00 00 w6 = w1
    ;       msg = bpf_ringbuf_reserve(&kernel_ringbuf, sizeof(*msg), 0);
     117:       18 01 00 00 00 00 00 00 00 00 00 00 00 00 00 00 r1 = 0x0 ll
                00000000000003a8:  R_BPF_64_64  kernel_ringbuf
     119:       b7 02 00 00 10 00 00 00 r2 = 0x10
     120:       b7 03 00 00 00 00 00 00 r3 = 0x0
     121:       85 00 00 00 83 00 00 00 call 0x83
    ;       if (!msg) {
     122:       55 00 06 00 00 00 00 00 if r0 != 0x0 goto +0x6 <publish_next_kern_msg+0x68>
    ;               err = 4;
     123:       18 01 00 00 00 00 00 00 00 00 00 00 00 00 00 00 r1 = 0x0 ll
                00000000000003d8:  R_BPF_64_64  err
     125:       b4 02 00 00 04 00 00 00 w2 = 0x4
     126:       63 21 00 00 00 00 00 00 *(u32 *)(r1 + 0x0) = w2
     127:       b4 00 00 00 01 00 00 00 w0 = 0x1
    ;               return 1;
     128:       05 00 31 00 00 00 00 00 goto +0x31 <publish_next_kern_msg+0x1f0>
    ;       switch (index % TEST_MSG_OP_NUM_OPS) {
     129:       54 06 00 00 03 00 00 00 w6 &= 0x3
     130:       67 06 00 00 03 00 00 00 r6 <<= 0x3
     131:       18 01 00 00 20 00 00 00 00 00 00 00 00 00 00 00 r1 = 0x20 ll
                0000000000000418:  R_BPF_64_64  .rodata
<=== r2 will be the address of .rodata with offset 20.
<=== look at the first 32 bytes of .rodata:
    0x00000020 40040000 00000000 18050000 00000000 @...............
    0x00000030 88040000 00000000 d0040000 00000000 ................
The four actual addresses are
    0x440: insn idx 0x440/8 = 136
    0x518: insn idx 0x518/8 = 163
    0x488: insn idx 0x488/8 = 145
    0x4d0: insn idx 0x4d0/8 = 154
     133:       0f 61 00 00 00 00 00 00 r1 += r6
     134:       79 11 00 00 00 00 00 00 r1 = *(u64 *)(r1 + 0x0)
     135:       0d 01 00 00 00 00 00 00 gotox r1
<=== So eventually gotox will go to the insn idx in this section.
     136:       b4 01 00 00 00 00 00 00 w1 = 0x0
    ;               msg->msg_op = TEST_MSG_OP_INC64;
     137:       63 10 00 00 00 00 00 00 *(u32 *)(r0 + 0x0) = w1
     138:       b7 01 00 00 04 00 00 00 r1 = 0x4
    ;               msg->operand_64 = operand_64;
     139:       7b 10 08 00 00 00 00 00 *(u64 *)(r0 + 0x8) = r1
    ;               expected_user_mutated += operand_64;
     140:       18 01 00 00 00 00 00 00 00 00 00 00 00 00 00 00 r1 = 0x0 ll
                0000000000000460:  R_BPF_64_64  expected_user_mutated
     142:       79 11 00 00 00 00 00 00 r1 = *(u64 *)(r1 + 0x0)
     143:       07 01 00 00 04 00 00 00 r1 += 0x4
    ;               break;
     144:       05 00 1a 00 00 00 00 00 goto +0x1a <publish_next_kern_msg+0x1b8>
     145:       b4 01 00 00 02 00 00 00 w1 = 0x2
    ;               msg->msg_op = TEST_MSG_OP_MUL64;
    ...

There are a few things worth to discuss.
First, in the above, it is hard to find jump table size for a particular
relocation ('R_BPF_64_64  .rodata + <offset>'). One thing is to scan through
the whole elf file and you can find all '.rodata + <offset>' relocations.
For example, here we have
   .rodata + 0
   .rodata + 0x20
   .rodata + 0x40
   .rodata + 0x55
   .rodata + 0x75
   .rodata + 0x89
   .rodata + 0x95
   .rodata + 0xb3
With the above information, the size for each sub-rodata can be found easily.

Second, the current gotox insn target is the instruction offset from
the beginning of section. This is different from goto/gotol where the
target is relative to the current insn. Note that x86 is using relative
offset with $rip register like below:
      1c: 48 8d 0d 00 00 00 00          leaq    (%rip), %rcx            # 0x23 <read_protocol_msg+0x23>
                000000000000001f:  R_X86_64_PC32        .rodata-0x4
      ;       switch (msg->msg_op) {
      23: 48 63 14 91                   movslq  (%rcx,%rdx,4), %rdx
      27: 48 01 ca                      addq    %rcx, %rdx
      2a: ff e2                         jmpq    *%rdx
I could explore to use pc relative for .rodata contents but since we
do not have %rip, not sure how difficult it could be.
---
 llvm/lib/Target/BPF/BPFISelLowering.cpp | 31 +++++++++++++++++++++++--
 llvm/lib/Target/BPF/BPFISelLowering.h   |  2 ++
 llvm/lib/Target/BPF/BPFInstrInfo.td     | 27 +++++++++++++++++++++
 llvm/lib/Target/BPF/BPFMCInstLower.cpp  |  3 +++
 4 files changed, 61 insertions(+), 2 deletions(-)

diff --git a/llvm/lib/Target/BPF/BPFISelLowering.cpp b/llvm/lib/Target/BPF/BPFISelLowering.cpp
index 6c196309d2d1a..b1af4542714ad 100644
--- a/llvm/lib/Target/BPF/BPFISelLowering.cpp
+++ b/llvm/lib/Target/BPF/BPFISelLowering.cpp
@@ -65,10 +65,11 @@ BPFTargetLowering::BPFTargetLowering(const TargetMachine &TM,
 
   setOperationAction(ISD::BR_CC, MVT::i64, Custom);
   setOperationAction(ISD::BR_JT, MVT::Other, Expand);
-  setOperationAction(ISD::BRIND, MVT::Other, Expand);
   setOperationAction(ISD::BRCOND, MVT::Other, Expand);
 
-  setOperationAction({ISD::GlobalAddress, ISD::ConstantPool}, MVT::i64, Custom);
+  setOperationAction({ISD::GlobalAddress, ISD::ConstantPool, ISD::JumpTable,
+                      ISD::BlockAddress},
+                     MVT::i64, Custom);
 
   setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64, Custom);
   setOperationAction(ISD::STACKSAVE, MVT::Other, Expand);
@@ -312,10 +313,14 @@ SDValue BPFTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
     report_fatal_error("unimplemented opcode: " + Twine(Op.getOpcode()));
   case ISD::BR_CC:
     return LowerBR_CC(Op, DAG);
+  case ISD::JumpTable:
+    return LowerJumpTable(Op, DAG);
   case ISD::GlobalAddress:
     return LowerGlobalAddress(Op, DAG);
   case ISD::ConstantPool:
     return LowerConstantPool(Op, DAG);
+  case ISD::BlockAddress:
+    return LowerBlockAddress(Op, DAG);
   case ISD::SELECT_CC:
     return LowerSELECT_CC(Op, DAG);
   case ISD::SDIV:
@@ -726,6 +731,11 @@ SDValue BPFTargetLowering::LowerATOMIC_LOAD_STORE(SDValue Op,
   return Op;
 }
 
+SDValue BPFTargetLowering::LowerJumpTable(SDValue Op, SelectionDAG &DAG) const {
+  JumpTableSDNode *N = cast<JumpTableSDNode>(Op);
+  return getAddr(N, DAG);
+}
+
 const char *BPFTargetLowering::getTargetNodeName(unsigned Opcode) const {
   switch ((BPFISD::NodeType)Opcode) {
   case BPFISD::FIRST_NUMBER:
@@ -757,6 +767,17 @@ static SDValue getTargetNode(ConstantPoolSDNode *N, const SDLoc &DL, EVT Ty,
                                    N->getOffset(), Flags);
 }
 
+static SDValue getTargetNode(BlockAddressSDNode *N, const SDLoc &DL, EVT Ty,
+                             SelectionDAG &DAG, unsigned Flags) {
+  return DAG.getTargetBlockAddress(N->getBlockAddress(), Ty, N->getOffset(),
+                                   Flags);
+}
+
+static SDValue getTargetNode(JumpTableSDNode *N, const SDLoc &DL, EVT Ty,
+                             SelectionDAG &DAG, unsigned Flags) {
+  return DAG.getTargetJumpTable(N->getIndex(), Ty, Flags);
+}
+
 template <class NodeTy>
 SDValue BPFTargetLowering::getAddr(NodeTy *N, SelectionDAG &DAG,
                                    unsigned Flags) const {
@@ -783,6 +804,12 @@ SDValue BPFTargetLowering::LowerConstantPool(SDValue Op,
   return getAddr(N, DAG);
 }
 
+SDValue BPFTargetLowering::LowerBlockAddress(SDValue Op,
+                                             SelectionDAG &DAG) const {
+  BlockAddressSDNode *N = cast<BlockAddressSDNode>(Op);
+  return getAddr(N, DAG);
+}
+
 unsigned
 BPFTargetLowering::EmitSubregExt(MachineInstr &MI, MachineBasicBlock *BB,
                                  unsigned Reg, bool isSigned) const {
diff --git a/llvm/lib/Target/BPF/BPFISelLowering.h b/llvm/lib/Target/BPF/BPFISelLowering.h
index ad048ad05e6dd..7862c829fcb8f 100644
--- a/llvm/lib/Target/BPF/BPFISelLowering.h
+++ b/llvm/lib/Target/BPF/BPFISelLowering.h
@@ -80,6 +80,8 @@ class BPFTargetLowering : public TargetLowering {
   SDValue LowerATOMIC_LOAD_STORE(SDValue Op, SelectionDAG &DAG) const;
   SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG) const;
   SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const;
+  SDValue LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const;
+  SDValue LowerJumpTable(SDValue Op, SelectionDAG &DAG) const;
 
   template <class NodeTy>
   SDValue getAddr(NodeTy *N, SelectionDAG &DAG, unsigned Flags = 0) const;
diff --git a/llvm/lib/Target/BPF/BPFInstrInfo.td b/llvm/lib/Target/BPF/BPFInstrInfo.td
index 2dcf1eae086be..cb73e35cea508 100644
--- a/llvm/lib/Target/BPF/BPFInstrInfo.td
+++ b/llvm/lib/Target/BPF/BPFInstrInfo.td
@@ -183,6 +183,15 @@ class TYPE_LD_ST<bits<3> mode, bits<2> size,
   let Inst{60-59} = size;
 }
 
+// For indirect jump
+class TYPE_IND_JMP<bits<4> op, bits<1> srctype,
+                   dag outs, dag ins, string asmstr, list<dag> pattern>
+  : InstBPF<outs, ins, asmstr, pattern> {
+
+  let Inst{63-60} = op;
+  let Inst{59} = srctype;
+}
+
 // jump instructions
 class JMP_RR<BPFJumpOp Opc, string OpcodeStr, PatLeaf Cond>
     : TYPE_ALU_JMP<Opc.Value, BPF_X.Value,
@@ -216,6 +225,18 @@ class JMP_RI<BPFJumpOp Opc, string OpcodeStr, PatLeaf Cond>
   let BPFClass = BPF_JMP;
 }
 
+class JMP_IND<BPFJumpOp Opc, string OpcodeStr, list<dag> Pattern>
+    : TYPE_ALU_JMP<Opc.Value, BPF_X.Value,
+                   (outs),
+                   (ins GPR:$dst),
+                   !strconcat(OpcodeStr, " $dst"),
+                   Pattern> {
+  bits<4> dst;
+
+  let Inst{51-48} = dst;
+  let BPFClass = BPF_JMP;
+}
+
 class JMP_JCOND<BPFJumpOp Opc, string OpcodeStr, list<dag> Pattern>
     : TYPE_ALU_JMP<Opc.Value, BPF_K.Value,
                    (outs),
@@ -281,6 +302,10 @@ defm JSLT : J<BPF_JSLT, "s<", BPF_CC_LT, BPF_CC_LT_32>;
 defm JSLE : J<BPF_JSLE, "s<=", BPF_CC_LE, BPF_CC_LE_32>;
 defm JSET : J<BPF_JSET, "&", NoCond, NoCond>;
 def JCOND : JMP_JCOND<BPF_JCOND, "may_goto", []>;
+
+let isIndirectBranch = 1 in {
+  def JX : JMP_IND<BPF_JA, "gotox", [(brind i64:$dst)]>;
+}
 }
 
 // ALU instructions
@@ -851,6 +876,8 @@ let usesCustomInserter = 1, isCodeGenOnly = 1 in {
 // load 64-bit global addr into register
 def : Pat<(BPFWrapper tglobaladdr:$in), (LD_imm64 tglobaladdr:$in)>;
 def : Pat<(BPFWrapper tconstpool:$in), (LD_imm64 tconstpool:$in)>;
+def : Pat<(BPFWrapper tblockaddress:$in), (LD_imm64 tblockaddress:$in)>;
+def : Pat<(BPFWrapper tjumptable:$in), (LD_imm64 tjumptable:$in)>;
 
 // 0xffffFFFF doesn't fit into simm32, optimize common case
 def : Pat<(i64 (and (i64 GPR:$src), 0xffffFFFF)),
diff --git a/llvm/lib/Target/BPF/BPFMCInstLower.cpp b/llvm/lib/Target/BPF/BPFMCInstLower.cpp
index 040a1fb750702..164d172c241c8 100644
--- a/llvm/lib/Target/BPF/BPFMCInstLower.cpp
+++ b/llvm/lib/Target/BPF/BPFMCInstLower.cpp
@@ -77,6 +77,9 @@ void BPFMCInstLower::Lower(const MachineInstr *MI, MCInst &OutMI) const {
     case MachineOperand::MO_ConstantPoolIndex:
       MCOp = LowerSymbolOperand(MO, Printer.GetCPISymbol(MO.getIndex()));
       break;
+    case MachineOperand::MO_JumpTableIndex:
+      MCOp = LowerSymbolOperand(MO, Printer.GetJTISymbol(MO.getIndex()));
+      break;
     }
 
     OutMI.addOperand(MCOp);



More information about the llvm-commits mailing list