[llvm] [RFC][BPF] Support Jump Table (PR #133856)

Fri Apr 4 08:13:31 PDT 2025

https://github.com/yonghong-song updated https://github.com/llvm/llvm-project/pull/133856

>From 7301fc8ba28a9df69695029616a1fdbfa3054824 Mon Sep 17 00:00:00 2001
From: Yonghong Song <yonghong.song at linux.dev>
Date: Mon, 31 Mar 2025 21:25:26 -0700
Subject: [PATCH 1/3] [RFC][BPF] Support Jump Table

NOTE: We probably need cpu v5 or other flags to enable this feature.
We can add it later when necessary.

This patch adds jump table support. A new insn 'gotox <reg>' is
added to allow goto through a register. The register represents
the address in the current section. The function is a concrete
example with bpf selftest progs/user_ringbuf_success.c.

Compilation command line to generate .s file:
=============================================
clang  -g -Wall -Werror -D__TARGET_ARCH_x86 -mlittle-endian \
    -I/home/yhs/work/bpf-next/tools/testing/selftests/bpf/tools/include \
    -I/home/yhs/work/bpf-next/tools/testing/selftests/bpf \
    -I/home/yhs/work/bpf-next/tools/include/uapi \
    -I/home/yhs/work/bpf-next/tools/testing/selftests/usr/include -std=gnu11 \
    -fno-strict-aliasing -Wno-compare-distinct-pointer-types \
    -idirafter /home/yhs/work/llvm-project/llvm/build.21/Release/lib/clang/21/include \
    -idirafter /usr/local/include -idirafter /usr/include \
    -DENABLE_ATOMICS_TESTS   -O2 -S progs/user_ringbuf_success.c \
    -o /home/yhs/work/bpf-next/tools/testing/selftests/bpf/user_ringbuf_success.bpf.o.s \
    --target=bpf -mcpu=v3

The related assembly:
  read_protocol_msg:
        ...
        r3 <<= 3
        r1 = .LJTI1_0 ll
        r1 += r3
        r1 = *(u64 *)(r1 + 0)
        gotox r1
  LBB1_4:
        r1 = *(u64 *)(r0 + 8)
        goto LBB1_5
  LBB1_7:
        r1 = *(u64 *)(r0 + 8)
        goto LBB1_8
  LBB1_9:
        w1 = *(u32 *)(r0 + 8)
        r1 <<= 32
        r1 s>>= 32
        r2 = kern_mutated ll
        r3 = *(u64 *)(r2 + 0)
        r3 *= r1
        *(u64 *)(r2 + 0) = r3
        goto LBB1_11
  LBB1_6:
        w1 = *(u32 *)(r0 + 8)
        r1 <<= 32
        r1 s>>= 32
  LBB1_5:
  ...
        .section        .rodata,"a", at progbits
        .p2align        3, 0x0
  .LJTI1_0:
        .quad   LBB1_4
        .quad   LBB1_6
        .quad   LBB1_7
        .quad   LBB1_9
  ...
  publish_next_kern_msg:
        ...
        r6 <<= 3
        r1 = .LJTI6_0 ll
        r1 += r6
        r1 = *(u64 *)(r1 + 0)
        gotox r1
  LBB6_3:
        ...
  LBB6_5:
        ...
  LBB6_6:
        ...
  LBB6_4:
        ...
        .section        .rodata,"a", at progbits
        .p2align        3, 0x0
.LJTI6_0:
        .quad   LBB6_3
        .quad   LBB6_4
        .quad   LBB6_5
        .quad   LBB6_6

Now let us look at .o file
==========================
clang  -g -Wall -Werror -D__TARGET_ARCH_x86 -mlittle-endian \
    -I/home/yhs/work/bpf-next/tools/testing/selftests/bpf/tools/include \
    -I/home/yhs/work/bpf-next/tools/testing/selftests/bpf \
    -I/home/yhs/work/bpf-next/tools/include/uapi \
    -I/home/yhs/work/bpf-next/tools/testing/selftests/usr/include \
    -std=gnu11 -fno-strict-aliasing -Wno-compare-distinct-pointer-types \
    -idirafter /home/yhs/work/llvm-project/llvm/build.21/Release/lib/clang/21/include \
    -idirafter /usr/local/include -idirafter /usr/include -DENABLE_ATOMICS_TESTS \
    -O2 -c progs/user_ringbuf_success.c \
    -o /home/yhs/work/bpf-next/tools/testing/selftests/bpf/user_ringbuf_success.bpf.o \
    --target=bpf -mcpu=v3

In obj file, all .rodata sections are merged together. So we have
    $ llvm-readelf -x '.rodata' user_ringbuf_success.bpf.o
    Hex dump of section '.rodata':
    0x00000000 a8020000 00000000 10030000 00000000 ................
    0x00000010 b8020000 00000000 c8020000 00000000 ................
    0x00000020 40040000 00000000 18050000 00000000 @...............
    0x00000030 88040000 00000000 d0040000 00000000 ................
    0x00000040 44726169 6e207265 7475726e 65643a20 Drain returned:
    0x00000050 256c640a 00556e65 78706563 7465646c %ld..Unexpectedl
    0x00000060 79206661 696c6564 20746f20 67657420 y failed to get
    0x00000070 6d73670a 00556e72 65636f67 6e697a65 msg..Unrecognize
    0x00000080 64206f70 2025640a 00256c75 20213d20 d op %d..%lu !=
    0x00000090 256c750a 00627066 5f64796e 7074725f %lu..bpf_dynptr_
    0x000000a0 72656164 28292066 61696c65 643a2025 read() failed: %
    0x000000b0 640a0055 6e657870 65637465 646c7920 d..Unexpectedly
    0x000000c0 6661696c 65642074 6f206765 74207361 failed to get sa
    0x000000d0 6d706c65 0a00                       mple..

Let us look at the insns. Some annotation explains details.
    $ llvm-objdump -Sr user_ringbuf_success.bpf.o
    ....
    Disassembly of section .text:
    0000000000000000 <read_protocol_msg>:
    ;       msg = bpf_dynptr_data(dynptr, 0, sizeof(*msg));
       0:       b4 02 00 00 00 00 00 00 w2 = 0x0
       1:       b4 03 00 00 10 00 00 00 w3 = 0x10
       2:       85 00 00 00 cb 00 00 00 call 0xcb
    ...
    0000000000000268 <handle_sample_msg>:
    ;       switch (msg->msg_op) {
      77:       61 13 00 00 00 00 00 00 w3 = *(u32 *)(r1 + 0x0)
      78:       26 03 1c 00 03 00 00 00 if w3 > 0x3 goto +0x1c <handle_sample_msg+0xf0>
      79:       67 03 00 00 03 00 00 00 r3 <<= 0x3
      80:       18 02 00 00 00 00 00 00 00 00 00 00 00 00 00 00 r2 = 0x0 ll
                0000000000000280:  R_BPF_64_64  .rodata
<=== r2 will be the address of .rodata with offset 0.
<=== look at the first 32 bytes of .rodata:
    0x00000000 a8020000 00000000 10030000 00000000 ................
    0x00000010 b8020000 00000000 c8020000 00000000 ................
The four actual addresses are
    0x2a8: insn idx 0x2a8/8 = 85
    0x310: insn idx 0x310/8 = 98
    0x2b8: insn idx 0x2b8/8 = 87
    0x2c8: insn idx 0x2c8/8 = 89

      82:       0f 32 00 00 00 00 00 00 r2 += r3
      83:       79 22 00 00 00 00 00 00 r2 = *(u64 *)(r2 + 0x0)
      84:       0d 02 00 00 00 00 00 00 gotox r2
<=== So eventually gotox will go to the insn idx in this section.
    ;               kern_mutated += msg->operand_64;
      85:       79 11 08 00 00 00 00 00 r1 = *(u64 *)(r1 + 0x8)
      86:       05 00 0e 00 00 00 00 00 goto +0xe <handle_sample_msg+0xc0>
    ;               kern_mutated *= msg->operand_64;
      87:       79 11 08 00 00 00 00 00 r1 = *(u64 *)(r1 + 0x8)
      88:       05 00 03 00 00 00 00 00 goto +0x3 <handle_sample_msg+0x78>
    ;               kern_mutated *= msg->operand_32;
      89:       61 11 08 00 00 00 00 00 w1 = *(u32 *)(r1 + 0x8)
      90:       67 01 00 00 20 00 00 00 r1 <<= 0x20
      91:       c7 01 00 00 20 00 00 00 r1 s>>= 0x20
      92:       18 02 00 00 00 00 00 00 00 00 00 00 00 00 00 00 r2 = 0x0 ll
    ...
    00000000000003a0 <publish_next_kern_msg>:
    ; {
     116:       bc 16 00 00 00 00 00 00 w6 = w1
    ;       msg = bpf_ringbuf_reserve(&kernel_ringbuf, sizeof(*msg), 0);
     117:       18 01 00 00 00 00 00 00 00 00 00 00 00 00 00 00 r1 = 0x0 ll
                00000000000003a8:  R_BPF_64_64  kernel_ringbuf
     119:       b7 02 00 00 10 00 00 00 r2 = 0x10
     120:       b7 03 00 00 00 00 00 00 r3 = 0x0
     121:       85 00 00 00 83 00 00 00 call 0x83
    ;       if (!msg) {
     122:       55 00 06 00 00 00 00 00 if r0 != 0x0 goto +0x6 <publish_next_kern_msg+0x68>
    ;               err = 4;
     123:       18 01 00 00 00 00 00 00 00 00 00 00 00 00 00 00 r1 = 0x0 ll
                00000000000003d8:  R_BPF_64_64  err
     125:       b4 02 00 00 04 00 00 00 w2 = 0x4
     126:       63 21 00 00 00 00 00 00 *(u32 *)(r1 + 0x0) = w2
     127:       b4 00 00 00 01 00 00 00 w0 = 0x1
    ;               return 1;
     128:       05 00 31 00 00 00 00 00 goto +0x31 <publish_next_kern_msg+0x1f0>
    ;       switch (index % TEST_MSG_OP_NUM_OPS) {
     129:       54 06 00 00 03 00 00 00 w6 &= 0x3
     130:       67 06 00 00 03 00 00 00 r6 <<= 0x3
     131:       18 01 00 00 20 00 00 00 00 00 00 00 00 00 00 00 r1 = 0x20 ll
                0000000000000418:  R_BPF_64_64  .rodata
<=== r2 will be the address of .rodata with offset 20.
<=== look at the first 32 bytes of .rodata:
    0x00000020 40040000 00000000 18050000 00000000 @...............
    0x00000030 88040000 00000000 d0040000 00000000 ................
The four actual addresses are
    0x440: insn idx 0x440/8 = 136
    0x518: insn idx 0x518/8 = 163
    0x488: insn idx 0x488/8 = 145
    0x4d0: insn idx 0x4d0/8 = 154
     133:       0f 61 00 00 00 00 00 00 r1 += r6
     134:       79 11 00 00 00 00 00 00 r1 = *(u64 *)(r1 + 0x0)
     135:       0d 01 00 00 00 00 00 00 gotox r1
<=== So eventually gotox will go to the insn idx in this section.
     136:       b4 01 00 00 00 00 00 00 w1 = 0x0
    ;               msg->msg_op = TEST_MSG_OP_INC64;
     137:       63 10 00 00 00 00 00 00 *(u32 *)(r0 + 0x0) = w1
     138:       b7 01 00 00 04 00 00 00 r1 = 0x4
    ;               msg->operand_64 = operand_64;
     139:       7b 10 08 00 00 00 00 00 *(u64 *)(r0 + 0x8) = r1
    ;               expected_user_mutated += operand_64;
     140:       18 01 00 00 00 00 00 00 00 00 00 00 00 00 00 00 r1 = 0x0 ll
                0000000000000460:  R_BPF_64_64  expected_user_mutated
     142:       79 11 00 00 00 00 00 00 r1 = *(u64 *)(r1 + 0x0)
     143:       07 01 00 00 04 00 00 00 r1 += 0x4
    ;               break;
     144:       05 00 1a 00 00 00 00 00 goto +0x1a <publish_next_kern_msg+0x1b8>
     145:       b4 01 00 00 02 00 00 00 w1 = 0x2
    ;               msg->msg_op = TEST_MSG_OP_MUL64;
    ...

There are a few things worth to discuss.
First, in the above, it is hard to find jump table size for a particular
relocation ('R_BPF_64_64  .rodata + <offset>'). One thing is to scan through
the whole elf file and you can find all '.rodata + <offset>' relocations.
For example, here we have
   .rodata + 0
   .rodata + 0x20
   .rodata + 0x40
   .rodata + 0x55
   .rodata + 0x75
   .rodata + 0x89
   .rodata + 0x95
   .rodata + 0xb3
With the above information, the size for each sub-rodata can be found easily.

An option -bpf-min-jump-table-entries is implemented to control the minimum
number of entries to use a jump table on BPF. The default value 4, but it
can be changed with the following clang option
  clang ... -mllvm -bpf-min-jump-table-entries=6
where the number of jump table cases needs to be >= 6 in order to
use jump table.
---
 llvm/lib/Target/BPF/BPFISelLowering.cpp | 36 +++++++++++++++++++++++--
 llvm/lib/Target/BPF/BPFISelLowering.h   |  2 ++
 llvm/lib/Target/BPF/BPFInstrInfo.td     | 27 +++++++++++++++++++
 llvm/lib/Target/BPF/BPFMCInstLower.cpp  |  3 +++
 4 files changed, 66 insertions(+), 2 deletions(-)

diff --git a/llvm/lib/Target/BPF/BPFISelLowering.cpp b/llvm/lib/Target/BPF/BPFISelLowering.cpp
index 6c196309d2d1a..cff66ed628140 100644
--- a/llvm/lib/Target/BPF/BPFISelLowering.cpp
+++ b/llvm/lib/Target/BPF/BPFISelLowering.cpp
@@ -36,6 +36,10 @@ static cl::opt<bool> BPFExpandMemcpyInOrder("bpf-expand-memcpy-in-order",
   cl::Hidden, cl::init(false),
   cl::desc("Expand memcpy into load/store pairs in order"));
 
+static cl::opt<unsigned> BPFMinimumJumpTableEntries(
+    "bpf-min-jump-table-entries", cl::init(4), cl::Hidden,
+    cl::desc("Set minimum number of entries to use a jump table on BPF"));
+
 static void fail(const SDLoc &DL, SelectionDAG &DAG, const Twine &Msg,
                  SDValue Val = {}) {
   std::string Str;
@@ -65,10 +69,11 @@ BPFTargetLowering::BPFTargetLowering(const TargetMachine &TM,
 
   setOperationAction(ISD::BR_CC, MVT::i64, Custom);
   setOperationAction(ISD::BR_JT, MVT::Other, Expand);
-  setOperationAction(ISD::BRIND, MVT::Other, Expand);
   setOperationAction(ISD::BRCOND, MVT::Other, Expand);
 
-  setOperationAction({ISD::GlobalAddress, ISD::ConstantPool}, MVT::i64, Custom);
+  setOperationAction({ISD::GlobalAddress, ISD::ConstantPool, ISD::JumpTable,
+                      ISD::BlockAddress},
+                     MVT::i64, Custom);
 
   setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64, Custom);
   setOperationAction(ISD::STACKSAVE, MVT::Other, Expand);
@@ -155,6 +160,7 @@ BPFTargetLowering::BPFTargetLowering(const TargetMachine &TM,
 
   setBooleanContents(ZeroOrOneBooleanContent);
   setMaxAtomicSizeInBitsSupported(64);
+  setMinimumJumpTableEntries(BPFMinimumJumpTableEntries);
 
   // Function alignments
   setMinFunctionAlignment(Align(8));
@@ -312,10 +318,14 @@ SDValue BPFTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
     report_fatal_error("unimplemented opcode: " + Twine(Op.getOpcode()));
   case ISD::BR_CC:
     return LowerBR_CC(Op, DAG);
+  case ISD::JumpTable:
+    return LowerJumpTable(Op, DAG);
   case ISD::GlobalAddress:
     return LowerGlobalAddress(Op, DAG);
   case ISD::ConstantPool:
     return LowerConstantPool(Op, DAG);
+  case ISD::BlockAddress:
+    return LowerBlockAddress(Op, DAG);
   case ISD::SELECT_CC:
     return LowerSELECT_CC(Op, DAG);
   case ISD::SDIV:
@@ -726,6 +736,11 @@ SDValue BPFTargetLowering::LowerATOMIC_LOAD_STORE(SDValue Op,
   return Op;
 }
 
+SDValue BPFTargetLowering::LowerJumpTable(SDValue Op, SelectionDAG &DAG) const {
+  JumpTableSDNode *N = cast<JumpTableSDNode>(Op);
+  return getAddr(N, DAG);
+}
+
 const char *BPFTargetLowering::getTargetNodeName(unsigned Opcode) const {
   switch ((BPFISD::NodeType)Opcode) {
   case BPFISD::FIRST_NUMBER:
@@ -757,6 +772,17 @@ static SDValue getTargetNode(ConstantPoolSDNode *N, const SDLoc &DL, EVT Ty,
                                    N->getOffset(), Flags);
 }
 
+static SDValue getTargetNode(BlockAddressSDNode *N, const SDLoc &DL, EVT Ty,
+                             SelectionDAG &DAG, unsigned Flags) {
+  return DAG.getTargetBlockAddress(N->getBlockAddress(), Ty, N->getOffset(),
+                                   Flags);
+}
+
+static SDValue getTargetNode(JumpTableSDNode *N, const SDLoc &DL, EVT Ty,
+                             SelectionDAG &DAG, unsigned Flags) {
+  return DAG.getTargetJumpTable(N->getIndex(), Ty, Flags);
+}
+
 template <class NodeTy>
 SDValue BPFTargetLowering::getAddr(NodeTy *N, SelectionDAG &DAG,
                                    unsigned Flags) const {
@@ -783,6 +809,12 @@ SDValue BPFTargetLowering::LowerConstantPool(SDValue Op,
   return getAddr(N, DAG);
 }
 
+SDValue BPFTargetLowering::LowerBlockAddress(SDValue Op,
+                                             SelectionDAG &DAG) const {
+  BlockAddressSDNode *N = cast<BlockAddressSDNode>(Op);
+  return getAddr(N, DAG);
+}
+
 unsigned
 BPFTargetLowering::EmitSubregExt(MachineInstr &MI, MachineBasicBlock *BB,
                                  unsigned Reg, bool isSigned) const {
diff --git a/llvm/lib/Target/BPF/BPFISelLowering.h b/llvm/lib/Target/BPF/BPFISelLowering.h
index ad048ad05e6dd..7862c829fcb8f 100644
--- a/llvm/lib/Target/BPF/BPFISelLowering.h
+++ b/llvm/lib/Target/BPF/BPFISelLowering.h
@@ -80,6 +80,8 @@ class BPFTargetLowering : public TargetLowering {
   SDValue LowerATOMIC_LOAD_STORE(SDValue Op, SelectionDAG &DAG) const;
   SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG) const;
   SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const;
+  SDValue LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const;
+  SDValue LowerJumpTable(SDValue Op, SelectionDAG &DAG) const;
 
   template <class NodeTy>
   SDValue getAddr(NodeTy *N, SelectionDAG &DAG, unsigned Flags = 0) const;
diff --git a/llvm/lib/Target/BPF/BPFInstrInfo.td b/llvm/lib/Target/BPF/BPFInstrInfo.td
index 2dcf1eae086be..cb73e35cea508 100644
--- a/llvm/lib/Target/BPF/BPFInstrInfo.td
+++ b/llvm/lib/Target/BPF/BPFInstrInfo.td
@@ -183,6 +183,15 @@ class TYPE_LD_ST<bits<3> mode, bits<2> size,
   let Inst{60-59} = size;
 }
 
+// For indirect jump
+class TYPE_IND_JMP<bits<4> op, bits<1> srctype,
+                   dag outs, dag ins, string asmstr, list<dag> pattern>
+  : InstBPF<outs, ins, asmstr, pattern> {
+
+  let Inst{63-60} = op;
+  let Inst{59} = srctype;
+}
+
 // jump instructions
 class JMP_RR<BPFJumpOp Opc, string OpcodeStr, PatLeaf Cond>
     : TYPE_ALU_JMP<Opc.Value, BPF_X.Value,
@@ -216,6 +225,18 @@ class JMP_RI<BPFJumpOp Opc, string OpcodeStr, PatLeaf Cond>
   let BPFClass = BPF_JMP;
 }
 
+class JMP_IND<BPFJumpOp Opc, string OpcodeStr, list<dag> Pattern>
+    : TYPE_ALU_JMP<Opc.Value, BPF_X.Value,
+                   (outs),
+                   (ins GPR:$dst),
+                   !strconcat(OpcodeStr, " $dst"),
+                   Pattern> {
+  bits<4> dst;
+
+  let Inst{51-48} = dst;
+  let BPFClass = BPF_JMP;
+}
+
 class JMP_JCOND<BPFJumpOp Opc, string OpcodeStr, list<dag> Pattern>
     : TYPE_ALU_JMP<Opc.Value, BPF_K.Value,
                    (outs),
@@ -281,6 +302,10 @@ defm JSLT : J<BPF_JSLT, "s<", BPF_CC_LT, BPF_CC_LT_32>;
 defm JSLE : J<BPF_JSLE, "s<=", BPF_CC_LE, BPF_CC_LE_32>;
 defm JSET : J<BPF_JSET, "&", NoCond, NoCond>;
 def JCOND : JMP_JCOND<BPF_JCOND, "may_goto", []>;
+
+let isIndirectBranch = 1 in {
+  def JX : JMP_IND<BPF_JA, "gotox", [(brind i64:$dst)]>;
+}
 }
 
 // ALU instructions
@@ -851,6 +876,8 @@ let usesCustomInserter = 1, isCodeGenOnly = 1 in {
 // load 64-bit global addr into register
 def : Pat<(BPFWrapper tglobaladdr:$in), (LD_imm64 tglobaladdr:$in)>;
 def : Pat<(BPFWrapper tconstpool:$in), (LD_imm64 tconstpool:$in)>;
+def : Pat<(BPFWrapper tblockaddress:$in), (LD_imm64 tblockaddress:$in)>;
+def : Pat<(BPFWrapper tjumptable:$in), (LD_imm64 tjumptable:$in)>;
 
 // 0xffffFFFF doesn't fit into simm32, optimize common case
 def : Pat<(i64 (and (i64 GPR:$src), 0xffffFFFF)),
diff --git a/llvm/lib/Target/BPF/BPFMCInstLower.cpp b/llvm/lib/Target/BPF/BPFMCInstLower.cpp
index 040a1fb750702..164d172c241c8 100644
--- a/llvm/lib/Target/BPF/BPFMCInstLower.cpp
+++ b/llvm/lib/Target/BPF/BPFMCInstLower.cpp
@@ -77,6 +77,9 @@ void BPFMCInstLower::Lower(const MachineInstr *MI, MCInst &OutMI) const {
     case MachineOperand::MO_ConstantPoolIndex:
       MCOp = LowerSymbolOperand(MO, Printer.GetCPISymbol(MO.getIndex()));
       break;
+    case MachineOperand::MO_JumpTableIndex:
+      MCOp = LowerSymbolOperand(MO, Printer.GetJTISymbol(MO.getIndex()));
+      break;
     }
 
     OutMI.addOperand(MCOp);

>From 7f055d99937ff2d27f2d91bcc6f5b1eef6370782 Mon Sep 17 00:00:00 2001
From: Yonghong Song <yonghong.song at linux.dev>
Date: Wed, 2 Apr 2025 10:27:33 -0700
Subject: [PATCH 2/3] Fir an assert error

---
 llvm/lib/Target/BPF/BPFInstrInfo.cpp | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/llvm/lib/Target/BPF/BPFInstrInfo.cpp b/llvm/lib/Target/BPF/BPFInstrInfo.cpp
index 70bc163615f61..e61aa62c88f26 100644
--- a/llvm/lib/Target/BPF/BPFInstrInfo.cpp
+++ b/llvm/lib/Target/BPF/BPFInstrInfo.cpp
@@ -181,6 +181,10 @@ bool BPFInstrInfo::analyzeBranch(MachineBasicBlock &MBB,
     if (!isUnpredicatedTerminator(*I))
       break;
 
+    // If a JX insn, we're done.
+    if (I->getOpcode() == BPF::JX)
+      break;
+
     // A terminator that isn't a branch can't easily be handled
     // by this analysis.
     if (!I->isBranch())

>From d6f7a1a6252c7f8ece92ff612418010dea90896e Mon Sep 17 00:00:00 2001
From: Yonghong Song <yonghong.song at linux.dev>
Date: Wed, 2 Apr 2025 11:12:15 -0700
Subject: [PATCH 3/3] Generate .llvm_jump_table_sizes section

For example,
  [ 6] .rodata           PROGBITS        0000000000000000 000740 0000d6 00   A  0   0  8
  [ 7] .rel.rodata       REL             0000000000000000 003860 000080 10   I 39   6  8
  [ 8] .llvm_jump_table_sizes LLVM_JT_SIZES 0000000000000000 000816 000010 00      0   0  1
  [ 9] .rel.llvm_jump_table_sizes REL    0000000000000000 0038e0 000010 10   I 39   8  8
  ...
  [14] .llvm_jump_table_sizes LLVM_JT_SIZES 0000000000000000 000958 000010 00      0   0  1
  [15] .rel.llvm_jump_table_sizes REL    0000000000000000 003970 000010 10   I 39  14  8

With llvm-readelf dump section 8 and 14:
  $ llvm-readelf -x 8 user_ringbuf_success.bpf.o
  Hex dump of section '.llvm_jump_table_sizes':
  0x00000000 00000000 00000000 04000000 00000000 ................
  $ llvm-readelf -x 14 user_ringbuf_success.bpf.o
  Hex dump of section '.llvm_jump_table_sizes':
  0x00000000 20000000 00000000 04000000 00000000  ...............
You can see. There are two jump tables:
  jump table 1: offset 0, size 4 (4 labels)
  jump table 2: offset 0x20, size 4 (4 labels)

Check sections 9 and 15, we can find the corresponding section:
  Relocation section '.rel.llvm_jump_table_sizes' at offset 0x38e0 contains 1 entries:
      Offset             Info             Type               Symbol's Value  Symbol's Name
  0000000000000000  0000000a00000002 R_BPF_64_ABS64         0000000000000000 .rodata
  Relocation section '.rel.llvm_jump_table_sizes' at offset 0x3970 contains 1 entries:
      Offset             Info             Type               Symbol's Value  Symbol's Name
  0000000000000000  0000000a00000002 R_BPF_64_ABS64         0000000000000000 .rodata
and confirmed that the relocation is against '.rodata'.

Dump .rodata section:
  0x00000000 a8000000 00000000 10010000 00000000 ................
  0x00000010 b8000000 00000000 c8000000 00000000 ................
  0x00000020 28040000 00000000 00050000 00000000 (...............
  0x00000030 70040000 00000000 b8040000 00000000 p...............
  0x00000040 44726169 6e207265 7475726e 65643a20 Drain returned:

So we can get two jump tables:
  .rodata offset 0, # of lables 4:
  0x00000000 a8000000 00000000 10010000 00000000 ................
  0x00000010 b8000000 00000000 c8000000 00000000 ................
  .rodata offset 0x200, # of lables 4:
  0x00000020 28040000 00000000 00050000 00000000 (...............
  0x00000030 70040000 00000000 b8040000 00000000 p...............

This way, you just need to scan related code section. As long as it
matches one of jump tables (.rodata relocation, offset also matching),
you do not need to care about gotox at all in libbpf.
---
 llvm/include/llvm/CodeGen/AsmPrinter.h     | 2 ++
 llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp | 2 +-
 llvm/lib/Target/BPF/BPFAsmPrinter.cpp      | 1 +
 3 files changed, 4 insertions(+), 1 deletion(-)

diff --git a/llvm/include/llvm/CodeGen/AsmPrinter.h b/llvm/include/llvm/CodeGen/AsmPrinter.h
index 16363fbaa4f9a..265ebf483e1ac 100644
--- a/llvm/include/llvm/CodeGen/AsmPrinter.h
+++ b/llvm/include/llvm/CodeGen/AsmPrinter.h
@@ -26,6 +26,7 @@
 #include "llvm/CodeGen/StackMaps.h"
 #include "llvm/DebugInfo/CodeView/CodeView.h"
 #include "llvm/IR/InlineAsm.h"
+#include "llvm/Support/CommandLine.h"
 #include "llvm/Support/ErrorHandling.h"
 #include <cstdint>
 #include <memory>
@@ -33,6 +34,7 @@
 #include <vector>
 
 namespace llvm {
+extern cl::opt<bool> EmitJumpTableSizesSection;
 
 class AddrLabelMap;
 class AsmPrinterHandler;
diff --git a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
index 0deaf94502b11..b22a8be6baaa2 100644
--- a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
+++ b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
@@ -168,7 +168,7 @@ static cl::opt<bool> BBAddrMapSkipEmitBBEntries(
              "unnecessary for some PGOAnalysisMap features."),
     cl::Hidden, cl::init(false));
 
-static cl::opt<bool> EmitJumpTableSizesSection(
+cl::opt<bool> llvm::EmitJumpTableSizesSection(
     "emit-jump-table-sizes-section",
     cl::desc("Emit a section containing jump table addresses and sizes"),
     cl::Hidden, cl::init(false));
diff --git a/llvm/lib/Target/BPF/BPFAsmPrinter.cpp b/llvm/lib/Target/BPF/BPFAsmPrinter.cpp
index b3c27a3d1d6fa..af7a21c694a11 100644
--- a/llvm/lib/Target/BPF/BPFAsmPrinter.cpp
+++ b/llvm/lib/Target/BPF/BPFAsmPrinter.cpp
@@ -55,6 +55,7 @@ class BPFAsmPrinter : public AsmPrinter {
 } // namespace
 
 bool BPFAsmPrinter::doInitialization(Module &M) {
+  EmitJumpTableSizesSection = true;
   AsmPrinter::doInitialization(M);
 
   // Only emit BTF when debuginfo available.