[llvm] [RFC][BPF] Support Jump Table (PR #133856)
via llvm-commits
llvm-commits at lists.llvm.org
Mon Mar 31 21:48:00 PDT 2025
https://github.com/yonghong-song created https://github.com/llvm/llvm-project/pull/133856
NOTE: We probably need cpu v5 or other flags to enable this feature. We can add it later when necessary.
This patch adds jump table support. A new insn 'gotox <reg>' is added to allow goto through a register. The register represents the address in the current section. The function is a concrete example with bpf selftest progs/user_ringbuf_success.c.
Compilation command line to generate .s file:
=============================================
```
clang -g -Wall -Werror -D__TARGET_ARCH_x86 -mlittle-endian \
-I/home/yhs/work/bpf-next/tools/testing/selftests/bpf/tools/include \
-I/home/yhs/work/bpf-next/tools/testing/selftests/bpf \
-I/home/yhs/work/bpf-next/tools/include/uapi \
-I/home/yhs/work/bpf-next/tools/testing/selftests/usr/include -std=gnu11 \
-fno-strict-aliasing -Wno-compare-distinct-pointer-types \
-idirafter /home/yhs/work/llvm-project/llvm/build.21/Release/lib/clang/21/include \
-idirafter /usr/local/include -idirafter /usr/include \
-DENABLE_ATOMICS_TESTS -O2 -S progs/user_ringbuf_success.c \
-o /home/yhs/work/bpf-next/tools/testing/selftests/bpf/user_ringbuf_success.bpf.o.s \
--target=bpf -mcpu=v3
```
The related assembly:
```
read_protocol_msg:
...
r3 <<= 3
r1 = .LJTI1_0 ll
r1 += r3
r1 = *(u64 *)(r1 + 0)
gotox r1
LBB1_4:
r1 = *(u64 *)(r0 + 8)
goto LBB1_5
LBB1_7:
r1 = *(u64 *)(r0 + 8)
goto LBB1_8
LBB1_9:
w1 = *(u32 *)(r0 + 8)
r1 <<= 32
r1 s>>= 32
r2 = kern_mutated ll
r3 = *(u64 *)(r2 + 0)
r3 *= r1
*(u64 *)(r2 + 0) = r3
goto LBB1_11
LBB1_6:
w1 = *(u32 *)(r0 + 8)
r1 <<= 32
r1 s>>= 32
LBB1_5:
...
.section .rodata,"a", at progbits
.p2align 3, 0x0
.LJTI1_0:
.quad LBB1_4
.quad LBB1_6
.quad LBB1_7
.quad LBB1_9
...
publish_next_kern_msg:
...
r6 <<= 3
r1 = .LJTI6_0 ll
r1 += r6
r1 = *(u64 *)(r1 + 0)
gotox r1
LBB6_3:
...
LBB6_5:
...
LBB6_6:
...
LBB6_4:
...
.section .rodata,"a", at progbits
.p2align 3, 0x0
.LJTI6_0:
.quad LBB6_3
.quad LBB6_4
.quad LBB6_5
.quad LBB6_6
```
Now let us look at .o file
==========================
```
clang -g -Wall -Werror -D__TARGET_ARCH_x86 -mlittle-endian \
-I/home/yhs/work/bpf-next/tools/testing/selftests/bpf/tools/include \
-I/home/yhs/work/bpf-next/tools/testing/selftests/bpf \
-I/home/yhs/work/bpf-next/tools/include/uapi \
-I/home/yhs/work/bpf-next/tools/testing/selftests/usr/include \
-std=gnu11 -fno-strict-aliasing -Wno-compare-distinct-pointer-types \
-idirafter /home/yhs/work/llvm-project/llvm/build.21/Release/lib/clang/21/include \
-idirafter /usr/local/include -idirafter /usr/include -DENABLE_ATOMICS_TESTS \
-O2 -c progs/user_ringbuf_success.c \
-o /home/yhs/work/bpf-next/tools/testing/selftests/bpf/user_ringbuf_success.bpf.o \
--target=bpf -mcpu=v3
```
In obj file, all .rodata sections are merged together. So we have
```
$ llvm-readelf -x '.rodata' user_ringbuf_success.bpf.o
Hex dump of section '.rodata':
0x00000000 a8020000 00000000 10030000 00000000 ................
0x00000010 b8020000 00000000 c8020000 00000000 ................
0x00000020 40040000 00000000 18050000 00000000 @...............
0x00000030 88040000 00000000 d0040000 00000000 ................
0x00000040 44726169 6e207265 7475726e 65643a20 Drain returned:
0x00000050 256c640a 00556e65 78706563 7465646c %ld..Unexpectedl
0x00000060 79206661 696c6564 20746f20 67657420 y failed to get
0x00000070 6d73670a 00556e72 65636f67 6e697a65 msg..Unrecognize
0x00000080 64206f70 2025640a 00256c75 20213d20 d op %d..%lu !=
0x00000090 256c750a 00627066 5f64796e 7074725f %lu..bpf_dynptr_
0x000000a0 72656164 28292066 61696c65 643a2025 read() failed: %
0x000000b0 640a0055 6e657870 65637465 646c7920 d..Unexpectedly
0x000000c0 6661696c 65642074 6f206765 74207361 failed to get sa
0x000000d0 6d706c65 0a00 mple..
```
Let us look at the insns. Some annotation explains details.
```
$ llvm-objdump -Sr user_ringbuf_success.bpf.o
....
Disassembly of section .text:
0000000000000000 <read_protocol_msg>:
; msg = bpf_dynptr_data(dynptr, 0, sizeof(*msg));
0: b4 02 00 00 00 00 00 00 w2 = 0x0
1: b4 03 00 00 10 00 00 00 w3 = 0x10
2: 85 00 00 00 cb 00 00 00 call 0xcb
...
0000000000000268 <handle_sample_msg>:
; switch (msg->msg_op) {
77: 61 13 00 00 00 00 00 00 w3 = *(u32 *)(r1 + 0x0)
78: 26 03 1c 00 03 00 00 00 if w3 > 0x3 goto +0x1c <handle_sample_msg+0xf0>
79: 67 03 00 00 03 00 00 00 r3 <<= 0x3
80: 18 02 00 00 00 00 00 00 00 00 00 00 00 00 00 00 r2 = 0x0 ll
0000000000000280: R_BPF_64_64 .rodata
<=== r2 will be the address of .rodata with offset 0.
<=== look at the first 32 bytes of .rodata:
0x00000000 a8020000 00000000 10030000 00000000 ................
0x00000010 b8020000 00000000 c8020000 00000000 ................
The four actual addresses are
0x2a8: insn idx 0x2a8/8 = 85
0x310: insn idx 0x310/8 = 98
0x2b8: insn idx 0x2b8/8 = 87
0x2c8: insn idx 0x2c8/8 = 89
82: 0f 32 00 00 00 00 00 00 r2 += r3
83: 79 22 00 00 00 00 00 00 r2 = *(u64 *)(r2 + 0x0)
84: 0d 02 00 00 00 00 00 00 gotox r2
<=== So eventually gotox will go to the insn idx in this section.
; kern_mutated += msg->operand_64;
85: 79 11 08 00 00 00 00 00 r1 = *(u64 *)(r1 + 0x8)
86: 05 00 0e 00 00 00 00 00 goto +0xe <handle_sample_msg+0xc0>
; kern_mutated *= msg->operand_64;
87: 79 11 08 00 00 00 00 00 r1 = *(u64 *)(r1 + 0x8)
88: 05 00 03 00 00 00 00 00 goto +0x3 <handle_sample_msg+0x78>
; kern_mutated *= msg->operand_32;
89: 61 11 08 00 00 00 00 00 w1 = *(u32 *)(r1 + 0x8)
90: 67 01 00 00 20 00 00 00 r1 <<= 0x20
91: c7 01 00 00 20 00 00 00 r1 s>>= 0x20
92: 18 02 00 00 00 00 00 00 00 00 00 00 00 00 00 00 r2 = 0x0 ll
...
00000000000003a0 <publish_next_kern_msg>:
; {
116: bc 16 00 00 00 00 00 00 w6 = w1
; msg = bpf_ringbuf_reserve(&kernel_ringbuf, sizeof(*msg), 0);
117: 18 01 00 00 00 00 00 00 00 00 00 00 00 00 00 00 r1 = 0x0 ll
00000000000003a8: R_BPF_64_64 kernel_ringbuf
119: b7 02 00 00 10 00 00 00 r2 = 0x10
120: b7 03 00 00 00 00 00 00 r3 = 0x0
121: 85 00 00 00 83 00 00 00 call 0x83
; if (!msg) {
122: 55 00 06 00 00 00 00 00 if r0 != 0x0 goto +0x6 <publish_next_kern_msg+0x68>
; err = 4;
123: 18 01 00 00 00 00 00 00 00 00 00 00 00 00 00 00 r1 = 0x0 ll
00000000000003d8: R_BPF_64_64 err
125: b4 02 00 00 04 00 00 00 w2 = 0x4
126: 63 21 00 00 00 00 00 00 *(u32 *)(r1 + 0x0) = w2
127: b4 00 00 00 01 00 00 00 w0 = 0x1
; return 1;
128: 05 00 31 00 00 00 00 00 goto +0x31 <publish_next_kern_msg+0x1f0>
; switch (index % TEST_MSG_OP_NUM_OPS) {
129: 54 06 00 00 03 00 00 00 w6 &= 0x3
130: 67 06 00 00 03 00 00 00 r6 <<= 0x3
131: 18 01 00 00 20 00 00 00 00 00 00 00 00 00 00 00 r1 = 0x20 ll
0000000000000418: R_BPF_64_64 .rodata
<=== r2 will be the address of .rodata with offset 20.
<=== look at the first 32 bytes of .rodata:
0x00000020 40040000 00000000 18050000 00000000 @...............
0x00000030 88040000 00000000 d0040000 00000000 ................
The four actual addresses are
0x440: insn idx 0x440/8 = 136
0x518: insn idx 0x518/8 = 163
0x488: insn idx 0x488/8 = 145
0x4d0: insn idx 0x4d0/8 = 154
133: 0f 61 00 00 00 00 00 00 r1 += r6
134: 79 11 00 00 00 00 00 00 r1 = *(u64 *)(r1 + 0x0)
135: 0d 01 00 00 00 00 00 00 gotox r1
<=== So eventually gotox will go to the insn idx in this section.
136: b4 01 00 00 00 00 00 00 w1 = 0x0
; msg->msg_op = TEST_MSG_OP_INC64;
137: 63 10 00 00 00 00 00 00 *(u32 *)(r0 + 0x0) = w1
138: b7 01 00 00 04 00 00 00 r1 = 0x4
; msg->operand_64 = operand_64;
139: 7b 10 08 00 00 00 00 00 *(u64 *)(r0 + 0x8) = r1
; expected_user_mutated += operand_64;
140: 18 01 00 00 00 00 00 00 00 00 00 00 00 00 00 00 r1 = 0x0 ll
0000000000000460: R_BPF_64_64 expected_user_mutated
142: 79 11 00 00 00 00 00 00 r1 = *(u64 *)(r1 + 0x0)
143: 07 01 00 00 04 00 00 00 r1 += 0x4
; break;
144: 05 00 1a 00 00 00 00 00 goto +0x1a <publish_next_kern_msg+0x1b8>
145: b4 01 00 00 02 00 00 00 w1 = 0x2
; msg->msg_op = TEST_MSG_OP_MUL64;
...
```
There are a few things worth to discuss.
First, in the above, it is hard to find jump table size for a particular relocation ('R_BPF_64_64 .rodata + <offset>'). One thing is to scan through the whole elf file and you can find all '.rodata + <offset>' relocations. For example, here we have
```
.rodata + 0
.rodata + 0x20
.rodata + 0x40
.rodata + 0x55
.rodata + 0x75
.rodata + 0x89
.rodata + 0x95
.rodata + 0xb3
```
With the above information, the size for each sub-rodata can be found easily.
Second, the current gotox insn target is the instruction offset from the beginning of section. This is different from goto/gotol where the target is relative to the current insn. Note that x86 is using relative offset with $rip register like below:
```
1c: 48 8d 0d 00 00 00 00 leaq (%rip), %rcx # 0x23 <read_protocol_msg+0x23>
000000000000001f: R_X86_64_PC32 .rodata-0x4
; switch (msg->msg_op) {
23: 48 63 14 91 movslq (%rcx,%rdx,4), %rdx
27: 48 01 ca addq %rcx, %rdx
2a: ff e2 jmpq *%rdx
```
I could explore to use pc relative for .rodata contents but since we
do not have %rip, not sure how difficult it could be.
>From 6aa1350936feb6ab67ed11b2bfb28003b4ddfa97 Mon Sep 17 00:00:00 2001
From: Yonghong Song <yonghong.song at linux.dev>
Date: Mon, 31 Mar 2025 21:25:26 -0700
Subject: [PATCH] [RFC][BPF] Support Jump Table
NOTE: We probably need cpu v5 or other flags to enable this feature.
We can add it later when necessary.
This patch adds jump table support. A new insn 'gotox <reg>' is
added to allow goto through a register. The register represents
the address in the current section. The function is a concrete
example with bpf selftest progs/user_ringbuf_success.c.
Compilation command line to generate .s file:
=============================================
clang -g -Wall -Werror -D__TARGET_ARCH_x86 -mlittle-endian \
-I/home/yhs/work/bpf-next/tools/testing/selftests/bpf/tools/include \
-I/home/yhs/work/bpf-next/tools/testing/selftests/bpf \
-I/home/yhs/work/bpf-next/tools/include/uapi \
-I/home/yhs/work/bpf-next/tools/testing/selftests/usr/include -std=gnu11 \
-fno-strict-aliasing -Wno-compare-distinct-pointer-types \
-idirafter /home/yhs/work/llvm-project/llvm/build.21/Release/lib/clang/21/include \
-idirafter /usr/local/include -idirafter /usr/include \
-DENABLE_ATOMICS_TESTS -O2 -S progs/user_ringbuf_success.c \
-o /home/yhs/work/bpf-next/tools/testing/selftests/bpf/user_ringbuf_success.bpf.o.s \
--target=bpf -mcpu=v3
The related assembly:
read_protocol_msg:
...
r3 <<= 3
r1 = .LJTI1_0 ll
r1 += r3
r1 = *(u64 *)(r1 + 0)
gotox r1
LBB1_4:
r1 = *(u64 *)(r0 + 8)
goto LBB1_5
LBB1_7:
r1 = *(u64 *)(r0 + 8)
goto LBB1_8
LBB1_9:
w1 = *(u32 *)(r0 + 8)
r1 <<= 32
r1 s>>= 32
r2 = kern_mutated ll
r3 = *(u64 *)(r2 + 0)
r3 *= r1
*(u64 *)(r2 + 0) = r3
goto LBB1_11
LBB1_6:
w1 = *(u32 *)(r0 + 8)
r1 <<= 32
r1 s>>= 32
LBB1_5:
...
.section .rodata,"a", at progbits
.p2align 3, 0x0
.LJTI1_0:
.quad LBB1_4
.quad LBB1_6
.quad LBB1_7
.quad LBB1_9
...
publish_next_kern_msg:
...
r6 <<= 3
r1 = .LJTI6_0 ll
r1 += r6
r1 = *(u64 *)(r1 + 0)
gotox r1
LBB6_3:
...
LBB6_5:
...
LBB6_6:
...
LBB6_4:
...
.section .rodata,"a", at progbits
.p2align 3, 0x0
.LJTI6_0:
.quad LBB6_3
.quad LBB6_4
.quad LBB6_5
.quad LBB6_6
Now let us look at .o file
==========================
clang -g -Wall -Werror -D__TARGET_ARCH_x86 -mlittle-endian \
-I/home/yhs/work/bpf-next/tools/testing/selftests/bpf/tools/include \
-I/home/yhs/work/bpf-next/tools/testing/selftests/bpf \
-I/home/yhs/work/bpf-next/tools/include/uapi \
-I/home/yhs/work/bpf-next/tools/testing/selftests/usr/include \
-std=gnu11 -fno-strict-aliasing -Wno-compare-distinct-pointer-types \
-idirafter /home/yhs/work/llvm-project/llvm/build.21/Release/lib/clang/21/include \
-idirafter /usr/local/include -idirafter /usr/include -DENABLE_ATOMICS_TESTS \
-O2 -c progs/user_ringbuf_success.c \
-o /home/yhs/work/bpf-next/tools/testing/selftests/bpf/user_ringbuf_success.bpf.o \
--target=bpf -mcpu=v3
In obj file, all .rodata sections are merged together. So we have
$ llvm-readelf -x '.rodata' user_ringbuf_success.bpf.o
Hex dump of section '.rodata':
0x00000000 a8020000 00000000 10030000 00000000 ................
0x00000010 b8020000 00000000 c8020000 00000000 ................
0x00000020 40040000 00000000 18050000 00000000 @...............
0x00000030 88040000 00000000 d0040000 00000000 ................
0x00000040 44726169 6e207265 7475726e 65643a20 Drain returned:
0x00000050 256c640a 00556e65 78706563 7465646c %ld..Unexpectedl
0x00000060 79206661 696c6564 20746f20 67657420 y failed to get
0x00000070 6d73670a 00556e72 65636f67 6e697a65 msg..Unrecognize
0x00000080 64206f70 2025640a 00256c75 20213d20 d op %d..%lu !=
0x00000090 256c750a 00627066 5f64796e 7074725f %lu..bpf_dynptr_
0x000000a0 72656164 28292066 61696c65 643a2025 read() failed: %
0x000000b0 640a0055 6e657870 65637465 646c7920 d..Unexpectedly
0x000000c0 6661696c 65642074 6f206765 74207361 failed to get sa
0x000000d0 6d706c65 0a00 mple..
Let us look at the insns. Some annotation explains details.
$ llvm-objdump -Sr user_ringbuf_success.bpf.o
....
Disassembly of section .text:
0000000000000000 <read_protocol_msg>:
; msg = bpf_dynptr_data(dynptr, 0, sizeof(*msg));
0: b4 02 00 00 00 00 00 00 w2 = 0x0
1: b4 03 00 00 10 00 00 00 w3 = 0x10
2: 85 00 00 00 cb 00 00 00 call 0xcb
...
0000000000000268 <handle_sample_msg>:
; switch (msg->msg_op) {
77: 61 13 00 00 00 00 00 00 w3 = *(u32 *)(r1 + 0x0)
78: 26 03 1c 00 03 00 00 00 if w3 > 0x3 goto +0x1c <handle_sample_msg+0xf0>
79: 67 03 00 00 03 00 00 00 r3 <<= 0x3
80: 18 02 00 00 00 00 00 00 00 00 00 00 00 00 00 00 r2 = 0x0 ll
0000000000000280: R_BPF_64_64 .rodata
<=== r2 will be the address of .rodata with offset 0.
<=== look at the first 32 bytes of .rodata:
0x00000000 a8020000 00000000 10030000 00000000 ................
0x00000010 b8020000 00000000 c8020000 00000000 ................
The four actual addresses are
0x2a8: insn idx 0x2a8/8 = 85
0x310: insn idx 0x310/8 = 98
0x2b8: insn idx 0x2b8/8 = 87
0x2c8: insn idx 0x2c8/8 = 89
82: 0f 32 00 00 00 00 00 00 r2 += r3
83: 79 22 00 00 00 00 00 00 r2 = *(u64 *)(r2 + 0x0)
84: 0d 02 00 00 00 00 00 00 gotox r2
<=== So eventually gotox will go to the insn idx in this section.
; kern_mutated += msg->operand_64;
85: 79 11 08 00 00 00 00 00 r1 = *(u64 *)(r1 + 0x8)
86: 05 00 0e 00 00 00 00 00 goto +0xe <handle_sample_msg+0xc0>
; kern_mutated *= msg->operand_64;
87: 79 11 08 00 00 00 00 00 r1 = *(u64 *)(r1 + 0x8)
88: 05 00 03 00 00 00 00 00 goto +0x3 <handle_sample_msg+0x78>
; kern_mutated *= msg->operand_32;
89: 61 11 08 00 00 00 00 00 w1 = *(u32 *)(r1 + 0x8)
90: 67 01 00 00 20 00 00 00 r1 <<= 0x20
91: c7 01 00 00 20 00 00 00 r1 s>>= 0x20
92: 18 02 00 00 00 00 00 00 00 00 00 00 00 00 00 00 r2 = 0x0 ll
...
00000000000003a0 <publish_next_kern_msg>:
; {
116: bc 16 00 00 00 00 00 00 w6 = w1
; msg = bpf_ringbuf_reserve(&kernel_ringbuf, sizeof(*msg), 0);
117: 18 01 00 00 00 00 00 00 00 00 00 00 00 00 00 00 r1 = 0x0 ll
00000000000003a8: R_BPF_64_64 kernel_ringbuf
119: b7 02 00 00 10 00 00 00 r2 = 0x10
120: b7 03 00 00 00 00 00 00 r3 = 0x0
121: 85 00 00 00 83 00 00 00 call 0x83
; if (!msg) {
122: 55 00 06 00 00 00 00 00 if r0 != 0x0 goto +0x6 <publish_next_kern_msg+0x68>
; err = 4;
123: 18 01 00 00 00 00 00 00 00 00 00 00 00 00 00 00 r1 = 0x0 ll
00000000000003d8: R_BPF_64_64 err
125: b4 02 00 00 04 00 00 00 w2 = 0x4
126: 63 21 00 00 00 00 00 00 *(u32 *)(r1 + 0x0) = w2
127: b4 00 00 00 01 00 00 00 w0 = 0x1
; return 1;
128: 05 00 31 00 00 00 00 00 goto +0x31 <publish_next_kern_msg+0x1f0>
; switch (index % TEST_MSG_OP_NUM_OPS) {
129: 54 06 00 00 03 00 00 00 w6 &= 0x3
130: 67 06 00 00 03 00 00 00 r6 <<= 0x3
131: 18 01 00 00 20 00 00 00 00 00 00 00 00 00 00 00 r1 = 0x20 ll
0000000000000418: R_BPF_64_64 .rodata
<=== r2 will be the address of .rodata with offset 20.
<=== look at the first 32 bytes of .rodata:
0x00000020 40040000 00000000 18050000 00000000 @...............
0x00000030 88040000 00000000 d0040000 00000000 ................
The four actual addresses are
0x440: insn idx 0x440/8 = 136
0x518: insn idx 0x518/8 = 163
0x488: insn idx 0x488/8 = 145
0x4d0: insn idx 0x4d0/8 = 154
133: 0f 61 00 00 00 00 00 00 r1 += r6
134: 79 11 00 00 00 00 00 00 r1 = *(u64 *)(r1 + 0x0)
135: 0d 01 00 00 00 00 00 00 gotox r1
<=== So eventually gotox will go to the insn idx in this section.
136: b4 01 00 00 00 00 00 00 w1 = 0x0
; msg->msg_op = TEST_MSG_OP_INC64;
137: 63 10 00 00 00 00 00 00 *(u32 *)(r0 + 0x0) = w1
138: b7 01 00 00 04 00 00 00 r1 = 0x4
; msg->operand_64 = operand_64;
139: 7b 10 08 00 00 00 00 00 *(u64 *)(r0 + 0x8) = r1
; expected_user_mutated += operand_64;
140: 18 01 00 00 00 00 00 00 00 00 00 00 00 00 00 00 r1 = 0x0 ll
0000000000000460: R_BPF_64_64 expected_user_mutated
142: 79 11 00 00 00 00 00 00 r1 = *(u64 *)(r1 + 0x0)
143: 07 01 00 00 04 00 00 00 r1 += 0x4
; break;
144: 05 00 1a 00 00 00 00 00 goto +0x1a <publish_next_kern_msg+0x1b8>
145: b4 01 00 00 02 00 00 00 w1 = 0x2
; msg->msg_op = TEST_MSG_OP_MUL64;
...
There are a few things worth to discuss.
First, in the above, it is hard to find jump table size for a particular
relocation ('R_BPF_64_64 .rodata + <offset>'). One thing is to scan through
the whole elf file and you can find all '.rodata + <offset>' relocations.
For example, here we have
.rodata + 0
.rodata + 0x20
.rodata + 0x40
.rodata + 0x55
.rodata + 0x75
.rodata + 0x89
.rodata + 0x95
.rodata + 0xb3
With the above information, the size for each sub-rodata can be found easily.
Second, the current gotox insn target is the instruction offset from
the beginning of section. This is different from goto/gotol where the
target is relative to the current insn. Note that x86 is using relative
offset with $rip register like below:
1c: 48 8d 0d 00 00 00 00 leaq (%rip), %rcx # 0x23 <read_protocol_msg+0x23>
000000000000001f: R_X86_64_PC32 .rodata-0x4
; switch (msg->msg_op) {
23: 48 63 14 91 movslq (%rcx,%rdx,4), %rdx
27: 48 01 ca addq %rcx, %rdx
2a: ff e2 jmpq *%rdx
I could explore to use pc relative for .rodata contents but since we
do not have %rip, not sure how difficult it could be.
---
llvm/lib/Target/BPF/BPFISelLowering.cpp | 31 +++++++++++++++++++++++--
llvm/lib/Target/BPF/BPFISelLowering.h | 2 ++
llvm/lib/Target/BPF/BPFInstrInfo.td | 27 +++++++++++++++++++++
llvm/lib/Target/BPF/BPFMCInstLower.cpp | 3 +++
4 files changed, 61 insertions(+), 2 deletions(-)
diff --git a/llvm/lib/Target/BPF/BPFISelLowering.cpp b/llvm/lib/Target/BPF/BPFISelLowering.cpp
index 6c196309d2d1a..b1af4542714ad 100644
--- a/llvm/lib/Target/BPF/BPFISelLowering.cpp
+++ b/llvm/lib/Target/BPF/BPFISelLowering.cpp
@@ -65,10 +65,11 @@ BPFTargetLowering::BPFTargetLowering(const TargetMachine &TM,
setOperationAction(ISD::BR_CC, MVT::i64, Custom);
setOperationAction(ISD::BR_JT, MVT::Other, Expand);
- setOperationAction(ISD::BRIND, MVT::Other, Expand);
setOperationAction(ISD::BRCOND, MVT::Other, Expand);
- setOperationAction({ISD::GlobalAddress, ISD::ConstantPool}, MVT::i64, Custom);
+ setOperationAction({ISD::GlobalAddress, ISD::ConstantPool, ISD::JumpTable,
+ ISD::BlockAddress},
+ MVT::i64, Custom);
setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64, Custom);
setOperationAction(ISD::STACKSAVE, MVT::Other, Expand);
@@ -312,10 +313,14 @@ SDValue BPFTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
report_fatal_error("unimplemented opcode: " + Twine(Op.getOpcode()));
case ISD::BR_CC:
return LowerBR_CC(Op, DAG);
+ case ISD::JumpTable:
+ return LowerJumpTable(Op, DAG);
case ISD::GlobalAddress:
return LowerGlobalAddress(Op, DAG);
case ISD::ConstantPool:
return LowerConstantPool(Op, DAG);
+ case ISD::BlockAddress:
+ return LowerBlockAddress(Op, DAG);
case ISD::SELECT_CC:
return LowerSELECT_CC(Op, DAG);
case ISD::SDIV:
@@ -726,6 +731,11 @@ SDValue BPFTargetLowering::LowerATOMIC_LOAD_STORE(SDValue Op,
return Op;
}
+SDValue BPFTargetLowering::LowerJumpTable(SDValue Op, SelectionDAG &DAG) const {
+ JumpTableSDNode *N = cast<JumpTableSDNode>(Op);
+ return getAddr(N, DAG);
+}
+
const char *BPFTargetLowering::getTargetNodeName(unsigned Opcode) const {
switch ((BPFISD::NodeType)Opcode) {
case BPFISD::FIRST_NUMBER:
@@ -757,6 +767,17 @@ static SDValue getTargetNode(ConstantPoolSDNode *N, const SDLoc &DL, EVT Ty,
N->getOffset(), Flags);
}
+static SDValue getTargetNode(BlockAddressSDNode *N, const SDLoc &DL, EVT Ty,
+ SelectionDAG &DAG, unsigned Flags) {
+ return DAG.getTargetBlockAddress(N->getBlockAddress(), Ty, N->getOffset(),
+ Flags);
+}
+
+static SDValue getTargetNode(JumpTableSDNode *N, const SDLoc &DL, EVT Ty,
+ SelectionDAG &DAG, unsigned Flags) {
+ return DAG.getTargetJumpTable(N->getIndex(), Ty, Flags);
+}
+
template <class NodeTy>
SDValue BPFTargetLowering::getAddr(NodeTy *N, SelectionDAG &DAG,
unsigned Flags) const {
@@ -783,6 +804,12 @@ SDValue BPFTargetLowering::LowerConstantPool(SDValue Op,
return getAddr(N, DAG);
}
+SDValue BPFTargetLowering::LowerBlockAddress(SDValue Op,
+ SelectionDAG &DAG) const {
+ BlockAddressSDNode *N = cast<BlockAddressSDNode>(Op);
+ return getAddr(N, DAG);
+}
+
unsigned
BPFTargetLowering::EmitSubregExt(MachineInstr &MI, MachineBasicBlock *BB,
unsigned Reg, bool isSigned) const {
diff --git a/llvm/lib/Target/BPF/BPFISelLowering.h b/llvm/lib/Target/BPF/BPFISelLowering.h
index ad048ad05e6dd..7862c829fcb8f 100644
--- a/llvm/lib/Target/BPF/BPFISelLowering.h
+++ b/llvm/lib/Target/BPF/BPFISelLowering.h
@@ -80,6 +80,8 @@ class BPFTargetLowering : public TargetLowering {
SDValue LowerATOMIC_LOAD_STORE(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerJumpTable(SDValue Op, SelectionDAG &DAG) const;
template <class NodeTy>
SDValue getAddr(NodeTy *N, SelectionDAG &DAG, unsigned Flags = 0) const;
diff --git a/llvm/lib/Target/BPF/BPFInstrInfo.td b/llvm/lib/Target/BPF/BPFInstrInfo.td
index 2dcf1eae086be..cb73e35cea508 100644
--- a/llvm/lib/Target/BPF/BPFInstrInfo.td
+++ b/llvm/lib/Target/BPF/BPFInstrInfo.td
@@ -183,6 +183,15 @@ class TYPE_LD_ST<bits<3> mode, bits<2> size,
let Inst{60-59} = size;
}
+// For indirect jump
+class TYPE_IND_JMP<bits<4> op, bits<1> srctype,
+ dag outs, dag ins, string asmstr, list<dag> pattern>
+ : InstBPF<outs, ins, asmstr, pattern> {
+
+ let Inst{63-60} = op;
+ let Inst{59} = srctype;
+}
+
// jump instructions
class JMP_RR<BPFJumpOp Opc, string OpcodeStr, PatLeaf Cond>
: TYPE_ALU_JMP<Opc.Value, BPF_X.Value,
@@ -216,6 +225,18 @@ class JMP_RI<BPFJumpOp Opc, string OpcodeStr, PatLeaf Cond>
let BPFClass = BPF_JMP;
}
+class JMP_IND<BPFJumpOp Opc, string OpcodeStr, list<dag> Pattern>
+ : TYPE_ALU_JMP<Opc.Value, BPF_X.Value,
+ (outs),
+ (ins GPR:$dst),
+ !strconcat(OpcodeStr, " $dst"),
+ Pattern> {
+ bits<4> dst;
+
+ let Inst{51-48} = dst;
+ let BPFClass = BPF_JMP;
+}
+
class JMP_JCOND<BPFJumpOp Opc, string OpcodeStr, list<dag> Pattern>
: TYPE_ALU_JMP<Opc.Value, BPF_K.Value,
(outs),
@@ -281,6 +302,10 @@ defm JSLT : J<BPF_JSLT, "s<", BPF_CC_LT, BPF_CC_LT_32>;
defm JSLE : J<BPF_JSLE, "s<=", BPF_CC_LE, BPF_CC_LE_32>;
defm JSET : J<BPF_JSET, "&", NoCond, NoCond>;
def JCOND : JMP_JCOND<BPF_JCOND, "may_goto", []>;
+
+let isIndirectBranch = 1 in {
+ def JX : JMP_IND<BPF_JA, "gotox", [(brind i64:$dst)]>;
+}
}
// ALU instructions
@@ -851,6 +876,8 @@ let usesCustomInserter = 1, isCodeGenOnly = 1 in {
// load 64-bit global addr into register
def : Pat<(BPFWrapper tglobaladdr:$in), (LD_imm64 tglobaladdr:$in)>;
def : Pat<(BPFWrapper tconstpool:$in), (LD_imm64 tconstpool:$in)>;
+def : Pat<(BPFWrapper tblockaddress:$in), (LD_imm64 tblockaddress:$in)>;
+def : Pat<(BPFWrapper tjumptable:$in), (LD_imm64 tjumptable:$in)>;
// 0xffffFFFF doesn't fit into simm32, optimize common case
def : Pat<(i64 (and (i64 GPR:$src), 0xffffFFFF)),
diff --git a/llvm/lib/Target/BPF/BPFMCInstLower.cpp b/llvm/lib/Target/BPF/BPFMCInstLower.cpp
index 040a1fb750702..164d172c241c8 100644
--- a/llvm/lib/Target/BPF/BPFMCInstLower.cpp
+++ b/llvm/lib/Target/BPF/BPFMCInstLower.cpp
@@ -77,6 +77,9 @@ void BPFMCInstLower::Lower(const MachineInstr *MI, MCInst &OutMI) const {
case MachineOperand::MO_ConstantPoolIndex:
MCOp = LowerSymbolOperand(MO, Printer.GetCPISymbol(MO.getIndex()));
break;
+ case MachineOperand::MO_JumpTableIndex:
+ MCOp = LowerSymbolOperand(MO, Printer.GetJTISymbol(MO.getIndex()));
+ break;
}
OutMI.addOperand(MCOp);
More information about the llvm-commits
mailing list