[llvm] [BPF] Support Jump Table (PR #149715)

via llvm-commits llvm-commits at lists.llvm.org
Sun Jul 27 09:19:47 PDT 2025


https://github.com/yonghong-song updated https://github.com/llvm/llvm-project/pull/149715

>From 6766b8b8f8597b920b86f596743150475dd320be Mon Sep 17 00:00:00 2001
From: Yonghong Song <yonghong.song at linux.dev>
Date: Mon, 31 Mar 2025 21:25:26 -0700
Subject: [PATCH 1/4] [BPF] Add jump table support with switch statements and
 computed goto

NOTE 1: We probably need cpu v5 or other flags to enable this feature.
We can add it later when necessary. Let us use cpu v4 for now.
NOTE 2: An option -bpf-min-jump-table-entries is implemented to control the minimum
number of entries to use a jump table on BPF. The default value 5 and this is
to make it easy to test. Eventually we will increase min jump table entries to be 13.

This patch adds jump table support. A new insn 'gotox <reg>' is
added to allow goto through a register. The register represents
the address in the current section.

Example 1 (switch statement):
=============================

Code:
struct simple_ctx {
        int x;
        int y;
        int z;
};

int ret_user, ret_user2;
void bar(void);
int foo(struct simple_ctx *ctx, struct simple_ctx *ctx2)
{
        switch (ctx->x) {
        case 1: ret_user = 18; break;
        case 20: ret_user = 6; break;
        case 16: ret_user = 9; break;
        case 6: ret_user = 16; break;
        case 8: ret_user = 14; break;
        case 30: ret_user = 2; break;
        default: ret_user = 1; break;
        }

        bar();

        switch (ctx2->x) {
        case 0: ret_user2 = 8; break;
        case 31: ret_user2 = 5; break;
        case 13: ret_user2 = 8; break;
        case 1: ret_user2 = 3; break;
        case 11: ret_user2 = 4; break;
        default: ret_user2 = 29; break;
        }

        return 0;
}

Run: clang --target=bpf -mcpu=v4 -O2 -S test.c
The assembly code:
        ...
    # %bb.1:                                # %entry
        r1 <<= 3
        r2 = .LJTI0_0 ll
        r2 += r1
        r1 = *(u64 *)(r2 + 0)
        gotox r1
    LBB0_2:
        w1 = 18
        goto LBB0_9
        ...
    # %bb.10:                               # %sw.epilog
        r1 <<= 3
        r2 = .LJTI0_1 ll
        r2 += r1
        r1 = *(u64 *)(r2 + 0)
        gotox r1
    LBB0_11:
        w1 = 8
        goto LBB0_16
        ...
        .section        .rodata,"a", at progbits
        .p2align        3, 0x0
    .LJTI0_0:
        .quad   LBB0_2
        .quad   LBB0_8
        ...
        .quad   LBB0_7
    .LJTI0_1:
        .quad   LBB0_11
        .quad   LBB0_13
         ...

Although we do have labels .LJTI0_0 and .LJTI0_1, but since they have
prefix '.L' so they won't appear in the .o file like other symbols.

Run: llvm-objdump -Sr test.o
       ...
       4:       67 01 00 00 03 00 00 00 r1 <<= 0x3
       5:       18 02 00 00 00 00 00 00 00 00 00 00 00 00 00 00 r2 = 0x0 ll
                0000000000000028:  R_BPF_64_64  .rodata
       7:       0f 12 00 00 00 00 00 00 r2 += r1
       ...
      29:       67 01 00 00 03 00 00 00 r1 <<= 0x3
      30:       18 02 00 00 f0 00 00 00 00 00 00 00 00 00 00 00 r2 = 0xf0 ll
                00000000000000f0:  R_BPF_64_64  .rodata
      32:       0f 12 00 00 00 00 00 00 r2 += r1

The size of jump table is not obvious. The libbpf needs to check all relocations
against .rodata section in order to get precise size in order to construct bpf
maps.

Example 2 (Simple computed goto):
=================================

Code:
     int bar(int a) {
        __label__ l1, l2;
        void * volatile tgt;
        int ret = 0;
        if (a)
          tgt = &&l1; // synthetic jump table generated here
        else
          tgt = &&l2; // another synthetic jump table
        goto *tgt;
    l1: ret += 1;
    l2: ret += 2;
        return ret;
      }

Compile: clang --target=bpf -mcpu=v4 -O2 -c test1.c
Objdump: llvm-objdump -Sr test1.o

       0:       18 02 00 00 50 00 00 00 00 00 00 00 00 00 00 00 r2 = 0x50 ll
                0000000000000000:  R_BPF_64_64  .text
       2:       16 01 02 00 00 00 00 00 if w1 == 0x0 goto +0x2 <bar+0x28>
       3:       18 02 00 00 40 00 00 00 00 00 00 00 00 00 00 00 r2 = 0x40 ll
                0000000000000018:  R_BPF_64_64  .text
       5:       7b 2a f8 ff 00 00 00 00 *(u64 *)(r10 - 0x8) = r2
       6:       79 a1 f8 ff 00 00 00 00 r1 = *(u64 *)(r10 - 0x8)
       7:       0d 01 00 00 00 00 00 00 gotox r1
       8:       b4 00 00 00 03 00 00 00 w0 = 0x3
       9:       05 00 01 00 00 00 00 00 goto +0x1 <bar+0x58>
      10:       b4 00 00 00 02 00 00 00 w0 = 0x2
      11:       95 00 00 00 00 00 00 00 exit

For this case, there is no jump table so it would be hard to track offset
during verification esp. when offset needs adjustment. So practically we
need to create two jump tables for '&&l1' and '&&l2' respectively.

Example 3 (More complicated computed goto):
===========================================

Code:
  int foo(int a, int b) {
    __label__ l1, l2, l3, l4;
    void *jt1[] = {[0]=&&l1, [1]=&&l2};
    void *jt2[] = {[0]=&&l3, [1]=&&l4};
    int ret = 0;

    goto *jt1[a % 2];
    l1: ret += 1;
    l2: ret += 3;
    goto *jt2[b % 2];
    l3: ret += 5;
    l4: ret += 7;
    return ret;
  }

Compile: clang --target=bpf -mcpu=v4 -O2 -S test2.c
Asm code:
        ...
        r3 = (s32)r2
        r3 <<= 3
        r2 = .L__const.foo.jt2 ll
        r2 += r3
        r1 = (s32)r1
        r1 <<= 3
        r3 = .L__const.foo.jt1 ll
        r3 += r1
        w0 = 0
        r1 = *(u64 *)(r3 + 0)
        gotox r1
    .Ltmp0:                                 # Block address taken
    LBB0_1:                                 # %l1
                                        # =>This Inner Loop Header: Depth=1
        w0 += 1
        w0 += 3
        r1 = *(u64 *)(r2 + 0)
        gotox r1
    .Ltmp1:                                 # Block address taken
    LBB0_2:                                 # %l2
        ...
        .type   .L__const.foo.jt1, at object       # @__const.foo.jt1
        .section        .rodata,"a", at progbits
        .p2align        3, 0x0
    .L__const.foo.jt1:
        .quad   .Ltmp0
        .quad   .Ltmp1
        .size   .L__const.foo.jt1, 16

        .type   .L__const.foo.jt2, at object       # @__const.foo.jt2
        .p2align        3, 0x0
    .L__const.foo.jt2:
        .quad   .Ltmp2
        .quad   .Ltmp3
        .size   .L__const.foo.jt2, 16

Similar to switch statement case, for the binary, the symbols
.L__const.foo.jt* will not show up in the symbol table and jump table
will be in .rodata section.

We need to resolve Example 2 case.

Also with more libbpf work (dealing with .rodata sections etc.),
everything should work fine for Examples 1 and 3. But we could do
better by
  - Replacing symbols like .L<...> with symbols appearing in
    symbol table.
  - Add jump tables to .jumptables section instead of .rodata section.
This should make things easier for libbpf. User can also benefit
from this as relocation/section will be easy to check.

Next two patches will fix Example 2 and improve all of them as
mentioned in the above.
---
 .../lib/Target/BPF/AsmParser/BPFAsmParser.cpp |  1 +
 llvm/lib/Target/BPF/BPFISelLowering.cpp       | 38 +++++++++++++++-
 llvm/lib/Target/BPF/BPFISelLowering.h         |  4 ++
 llvm/lib/Target/BPF/BPFInstrInfo.cpp          | 45 +++++++++++++++++++
 llvm/lib/Target/BPF/BPFInstrInfo.h            |  3 ++
 llvm/lib/Target/BPF/BPFInstrInfo.td           | 18 ++++++++
 llvm/lib/Target/BPF/BPFMCInstLower.cpp        |  7 +++
 7 files changed, 115 insertions(+), 1 deletion(-)

diff --git a/llvm/lib/Target/BPF/AsmParser/BPFAsmParser.cpp b/llvm/lib/Target/BPF/AsmParser/BPFAsmParser.cpp
index a347794a9a30c..d96f403d2f814 100644
--- a/llvm/lib/Target/BPF/AsmParser/BPFAsmParser.cpp
+++ b/llvm/lib/Target/BPF/AsmParser/BPFAsmParser.cpp
@@ -234,6 +234,7 @@ struct BPFOperand : public MCParsedAsmOperand {
         .Case("callx", true)
         .Case("goto", true)
         .Case("gotol", true)
+        .Case("gotox", true)
         .Case("may_goto", true)
         .Case("*", true)
         .Case("exit", true)
diff --git a/llvm/lib/Target/BPF/BPFISelLowering.cpp b/llvm/lib/Target/BPF/BPFISelLowering.cpp
index f4f414d192df0..0afa2790d5173 100644
--- a/llvm/lib/Target/BPF/BPFISelLowering.cpp
+++ b/llvm/lib/Target/BPF/BPFISelLowering.cpp
@@ -18,6 +18,7 @@
 #include "llvm/CodeGen/MachineFrameInfo.h"
 #include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineJumpTableInfo.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
 #include "llvm/CodeGen/ValueTypes.h"
@@ -38,6 +39,10 @@ static cl::opt<bool> BPFExpandMemcpyInOrder("bpf-expand-memcpy-in-order",
   cl::Hidden, cl::init(false),
   cl::desc("Expand memcpy into load/store pairs in order"));
 
+static cl::opt<unsigned> BPFMinimumJumpTableEntries(
+    "bpf-min-jump-table-entries", cl::init(5), cl::Hidden,
+    cl::desc("Set minimum number of entries to use a jump table on BPF"));
+
 static void fail(const SDLoc &DL, SelectionDAG &DAG, const Twine &Msg,
                  SDValue Val = {}) {
   std::string Str;
@@ -67,12 +72,12 @@ BPFTargetLowering::BPFTargetLowering(const TargetMachine &TM,
 
   setOperationAction(ISD::BR_CC, MVT::i64, Custom);
   setOperationAction(ISD::BR_JT, MVT::Other, Expand);
-  setOperationAction(ISD::BRIND, MVT::Other, Expand);
   setOperationAction(ISD::BRCOND, MVT::Other, Expand);
 
   setOperationAction(ISD::TRAP, MVT::Other, Custom);
 
   setOperationAction({ISD::GlobalAddress, ISD::ConstantPool}, MVT::i64, Custom);
+  setOperationAction({ISD::JumpTable, ISD::BlockAddress}, MVT::i64, Custom);
 
   setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64, Custom);
   setOperationAction(ISD::STACKSAVE, MVT::Other, Expand);
@@ -159,6 +164,7 @@ BPFTargetLowering::BPFTargetLowering(const TargetMachine &TM,
 
   setBooleanContents(ZeroOrOneBooleanContent);
   setMaxAtomicSizeInBitsSupported(64);
+  setMinimumJumpTableEntries(BPFMinimumJumpTableEntries);
 
   // Function alignments
   setMinFunctionAlignment(Align(8));
@@ -246,6 +252,10 @@ bool BPFTargetLowering::isZExtFree(SDValue Val, EVT VT2) const {
   return TargetLoweringBase::isZExtFree(Val, VT2);
 }
 
+unsigned BPFTargetLowering::getJumpTableEncoding() const {
+  return MachineJumpTableInfo::EK_BlockAddress;
+}
+
 BPFTargetLowering::ConstraintType
 BPFTargetLowering::getConstraintType(StringRef Constraint) const {
   if (Constraint.size() == 1) {
@@ -316,10 +326,14 @@ SDValue BPFTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
     report_fatal_error("unimplemented opcode: " + Twine(Op.getOpcode()));
   case ISD::BR_CC:
     return LowerBR_CC(Op, DAG);
+  case ISD::JumpTable:
+    return LowerJumpTable(Op, DAG);
   case ISD::GlobalAddress:
     return LowerGlobalAddress(Op, DAG);
   case ISD::ConstantPool:
     return LowerConstantPool(Op, DAG);
+  case ISD::BlockAddress:
+    return LowerBlockAddress(Op, DAG);
   case ISD::SELECT_CC:
     return LowerSELECT_CC(Op, DAG);
   case ISD::SDIV:
@@ -780,6 +794,11 @@ SDValue BPFTargetLowering::LowerTRAP(SDValue Op, SelectionDAG &DAG) const {
   return LowerCall(CLI, InVals);
 }
 
+SDValue BPFTargetLowering::LowerJumpTable(SDValue Op, SelectionDAG &DAG) const {
+  JumpTableSDNode *N = cast<JumpTableSDNode>(Op);
+  return getAddr(N, DAG);
+}
+
 const char *BPFTargetLowering::getTargetNodeName(unsigned Opcode) const {
   switch ((BPFISD::NodeType)Opcode) {
   case BPFISD::FIRST_NUMBER:
@@ -811,6 +830,17 @@ static SDValue getTargetNode(ConstantPoolSDNode *N, const SDLoc &DL, EVT Ty,
                                    N->getOffset(), Flags);
 }
 
+static SDValue getTargetNode(BlockAddressSDNode *N, const SDLoc &DL, EVT Ty,
+                             SelectionDAG &DAG, unsigned Flags) {
+  return DAG.getTargetBlockAddress(N->getBlockAddress(), Ty, N->getOffset(),
+                                   Flags);
+}
+
+static SDValue getTargetNode(JumpTableSDNode *N, const SDLoc &DL, EVT Ty,
+                             SelectionDAG &DAG, unsigned Flags) {
+  return DAG.getTargetJumpTable(N->getIndex(), Ty, Flags);
+}
+
 template <class NodeTy>
 SDValue BPFTargetLowering::getAddr(NodeTy *N, SelectionDAG &DAG,
                                    unsigned Flags) const {
@@ -837,6 +867,12 @@ SDValue BPFTargetLowering::LowerConstantPool(SDValue Op,
   return getAddr(N, DAG);
 }
 
+SDValue BPFTargetLowering::LowerBlockAddress(SDValue Op,
+                                             SelectionDAG &DAG) const {
+  BlockAddressSDNode *N = cast<BlockAddressSDNode>(Op);
+  return getAddr(N, DAG);
+}
+
 unsigned
 BPFTargetLowering::EmitSubregExt(MachineInstr &MI, MachineBasicBlock *BB,
                                  unsigned Reg, bool isSigned) const {
diff --git a/llvm/lib/Target/BPF/BPFISelLowering.h b/llvm/lib/Target/BPF/BPFISelLowering.h
index 8f60261c10e9e..bd08840cec458 100644
--- a/llvm/lib/Target/BPF/BPFISelLowering.h
+++ b/llvm/lib/Target/BPF/BPFISelLowering.h
@@ -66,6 +66,8 @@ class BPFTargetLowering : public TargetLowering {
 
   MVT getScalarShiftAmountTy(const DataLayout &, EVT) const override;
 
+  unsigned getJumpTableEncoding() const override;
+
 private:
   // Control Instruction Selection Features
   bool HasAlu32;
@@ -81,6 +83,8 @@ class BPFTargetLowering : public TargetLowering {
   SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG) const;
   SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const;
   SDValue LowerTRAP(SDValue Op, SelectionDAG &DAG) const;
+  SDValue LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const;
+  SDValue LowerJumpTable(SDValue Op, SelectionDAG &DAG) const;
 
   template <class NodeTy>
   SDValue getAddr(NodeTy *N, SelectionDAG &DAG, unsigned Flags = 0) const;
diff --git a/llvm/lib/Target/BPF/BPFInstrInfo.cpp b/llvm/lib/Target/BPF/BPFInstrInfo.cpp
index 70bc163615f61..bf2b4213201d7 100644
--- a/llvm/lib/Target/BPF/BPFInstrInfo.cpp
+++ b/llvm/lib/Target/BPF/BPFInstrInfo.cpp
@@ -181,6 +181,11 @@ bool BPFInstrInfo::analyzeBranch(MachineBasicBlock &MBB,
     if (!isUnpredicatedTerminator(*I))
       break;
 
+    // From base method doc: ... returning true if it cannot be understood ...
+    // Indirect branch has multiple destinations and no true/false concepts.
+    if (I->isIndirectBranch())
+      return true;
+
     // A terminator that isn't a branch can't easily be handled
     // by this analysis.
     if (!I->isBranch())
@@ -259,3 +264,43 @@ unsigned BPFInstrInfo::removeBranch(MachineBasicBlock &MBB,
 
   return Count;
 }
+
+int BPFInstrInfo::getJumpTableIndex(const MachineInstr &MI) const {
+  if (MI.getOpcode() != BPF::JX)
+    return -1;
+
+  // The pattern looks like:
+  // %0 = LD_imm64 %jump-table.0   ; load jump-table address
+  // %1 = ADD_rr %0, $another_reg  ; address + offset
+  // %2 = LDD %1, 0                ; load the actual label
+  // JX %2
+  const MachineFunction &MF = *MI.getParent()->getParent();
+  const MachineRegisterInfo &MRI = MF.getRegInfo();
+
+  Register Reg = MI.getOperand(0).getReg();
+  if (!Reg.isVirtual())
+    return -1;
+  MachineInstr *Ldd = MRI.getUniqueVRegDef(Reg);
+  if (Ldd == nullptr || Ldd->getOpcode() != BPF::LDD)
+    return -1;
+
+  Reg = Ldd->getOperand(1).getReg();
+  if (!Reg.isVirtual())
+    return -1;
+  MachineInstr *Add = MRI.getUniqueVRegDef(Reg);
+  if (Add == nullptr || Add->getOpcode() != BPF::ADD_rr)
+    return -1;
+
+  Reg = Add->getOperand(1).getReg();
+  if (!Reg.isVirtual())
+    return -1;
+  MachineInstr *LDimm64 = MRI.getUniqueVRegDef(Reg);
+  if (LDimm64 == nullptr || LDimm64->getOpcode() != BPF::LD_imm64)
+    return -1;
+
+  const MachineOperand &MO = LDimm64->getOperand(1);
+  if (!MO.isJTI())
+    return -1;
+
+  return MO.getIndex();
+}
diff --git a/llvm/lib/Target/BPF/BPFInstrInfo.h b/llvm/lib/Target/BPF/BPFInstrInfo.h
index d8bbad44e314e..d88e37975980a 100644
--- a/llvm/lib/Target/BPF/BPFInstrInfo.h
+++ b/llvm/lib/Target/BPF/BPFInstrInfo.h
@@ -58,6 +58,9 @@ class BPFInstrInfo : public BPFGenInstrInfo {
                         MachineBasicBlock *FBB, ArrayRef<MachineOperand> Cond,
                         const DebugLoc &DL,
                         int *BytesAdded = nullptr) const override;
+
+  int getJumpTableIndex(const MachineInstr &MI) const override;
+
 private:
   void expandMEMCPY(MachineBasicBlock::iterator) const;
 
diff --git a/llvm/lib/Target/BPF/BPFInstrInfo.td b/llvm/lib/Target/BPF/BPFInstrInfo.td
index b21f1a0eee3b0..d2311defa5c7e 100644
--- a/llvm/lib/Target/BPF/BPFInstrInfo.td
+++ b/llvm/lib/Target/BPF/BPFInstrInfo.td
@@ -216,6 +216,18 @@ class JMP_RI<BPFJumpOp Opc, string OpcodeStr, PatLeaf Cond>
   let BPFClass = BPF_JMP;
 }
 
+class JMP_IND<BPFJumpOp Opc, string OpcodeStr, list<dag> Pattern>
+    : TYPE_ALU_JMP<Opc.Value, BPF_X.Value,
+                   (outs),
+                   (ins GPR:$dst),
+                   !strconcat(OpcodeStr, " $dst"),
+                   Pattern> {
+  bits<4> dst;
+
+  let Inst{51-48} = dst;
+  let BPFClass = BPF_JMP;
+}
+
 class JMP_JCOND<BPFJumpOp Opc, string OpcodeStr, list<dag> Pattern>
     : TYPE_ALU_JMP<Opc.Value, BPF_K.Value,
                    (outs),
@@ -281,6 +293,10 @@ defm JSLT : J<BPF_JSLT, "s<", BPF_CC_LT, BPF_CC_LT_32>;
 defm JSLE : J<BPF_JSLE, "s<=", BPF_CC_LE, BPF_CC_LE_32>;
 defm JSET : J<BPF_JSET, "&", NoCond, NoCond>;
 def JCOND : JMP_JCOND<BPF_JCOND, "may_goto", []>;
+
+let isIndirectBranch = 1, isBarrier = 1 in {
+  def JX : JMP_IND<BPF_JA, "gotox", [(brind i64:$dst)]>;
+}
 }
 
 // ALU instructions
@@ -851,6 +867,8 @@ let usesCustomInserter = 1, isCodeGenOnly = 1 in {
 // load 64-bit global addr into register
 def : Pat<(BPFWrapper tglobaladdr:$in), (LD_imm64 tglobaladdr:$in)>;
 def : Pat<(BPFWrapper tconstpool:$in), (LD_imm64 tconstpool:$in)>;
+def : Pat<(BPFWrapper tblockaddress:$in), (LD_imm64 tblockaddress:$in)>;
+def : Pat<(BPFWrapper tjumptable:$in), (LD_imm64 tjumptable:$in)>;
 
 // 0xffffFFFF doesn't fit into simm32, optimize common case
 def : Pat<(i64 (and (i64 GPR:$src), 0xffffFFFF)),
diff --git a/llvm/lib/Target/BPF/BPFMCInstLower.cpp b/llvm/lib/Target/BPF/BPFMCInstLower.cpp
index 040a1fb750702..e67b99a813945 100644
--- a/llvm/lib/Target/BPF/BPFMCInstLower.cpp
+++ b/llvm/lib/Target/BPF/BPFMCInstLower.cpp
@@ -77,6 +77,13 @@ void BPFMCInstLower::Lower(const MachineInstr *MI, MCInst &OutMI) const {
     case MachineOperand::MO_ConstantPoolIndex:
       MCOp = LowerSymbolOperand(MO, Printer.GetCPISymbol(MO.getIndex()));
       break;
+    case MachineOperand::MO_JumpTableIndex:
+      MCOp = LowerSymbolOperand(MO, Printer.GetJTISymbol(MO.getIndex()));
+      break;
+    case MachineOperand::MO_BlockAddress:
+      MCOp = LowerSymbolOperand(
+          MO, Printer.GetBlockAddressSymbol(MO.getBlockAddress()));
+      break;
     }
 
     OutMI.addOperand(MCOp);

>From 2009ea22adc555eb0d615ab3d4ce5c83a389aca0 Mon Sep 17 00:00:00 2001
From: Yonghong Song <yonghong.song at linux.dev>
Date: Fri, 25 Jul 2025 23:37:55 -0700
Subject: [PATCH 2/4] [BPF] Lower computed goto and generate proper jump tables

Example 2, Asm code:
        ...
    # %bb.0:                                # %entry
        r2 = .LJTI0_0 ll
        r2 = *(u64 *)(r2 + 0)
        r3 = .LJTI0_1 ll
        r3 = *(u64 *)(r3 + 0)
        if w1 == 0 goto LBB0_2
    # %bb.1:                                # %entry
        r3 = r2
    LBB0_2:                                 # %entry
        *(u64 *)(r10 - 8) = r3
        r1 = *(u64 *)(r10 - 8)
        gotox r1
    .Ltmp0:                                 # Block address taken
    LBB0_3:                                 # %l1
        w0 = 3
        goto LBB0_5
    .Ltmp1:                                 # Block address taken
    LBB0_4:                                 # %l2
        w0 = 2
    LBB0_5:                                 # %.split
        exit
        ...
        .section        .rodata,"a", at progbits
        .p2align        3, 0x0
    .LJTI0_0:
        .quad   LBB0_3
    .LJTI0_1:
        .quad   LBB0_4

Example 3, Asm Code:

        r3 = (s32)r2
        r3 <<= 3
        r2 = .LJTI0_0 ll
        r2 += r3
        r1 = (s32)r1
        r1 <<= 3
        r3 = .LJTI0_1 ll
        r3 += r1
        w0 = 0
        r1 = *(u64 *)(r3 + 0)
        gotox r1
    .Ltmp0:                                 # Block address taken
    LBB0_1:                                 # %l1
                                            # =>This Inner Loop Header: Depth=1
        w0 += 1
        w0 += 3
        r1 = *(u64 *)(r2 + 0)
        gotox r1
    .Ltmp1:                                 # Block address taken
    LBB0_2:                                 # %l2
                                            # =>This Inner Loop Header: Depth=1
        w0 += 3
        r1 = *(u64 *)(r2 + 0)
        gotox r1
    .Ltmp2:                                 # Block address taken
    LBB0_3:                                 # %l3
        w0 += 5
        goto LBB0_5
    .Ltmp3:                                 # Block address taken
    LBB0_4:                                 # %l4
    LBB0_5:                                 # %.split17
        w0 += 7
        exit
        ...
        .section        .rodata,"a", at progbits
        .p2align        3, 0x0
    .LJTI0_0:
        .quad   LBB0_3
        .quad   LBB0_4
    .LJTI0_1:
        .quad   LBB0_1
        .quad   LBB0_2
                                        # -- End function
        .type   .L__const.foo.jt1, at object       # @__const.foo.jt1
        .p2align        3, 0x0
    .L__const.foo.jt1:
        .quad   .Ltmp0
        .quad   .Ltmp1
        .size   .L__const.foo.jt1, 16

        .type   .L__const.foo.jt2, at object       # @__const.foo.jt2
        .p2align        3, 0x0
    .L__const.foo.jt2:
        .quad   .Ltmp2
        .quad   .Ltmp3
        .size   .L__const.foo.jt2, 16

Note that for both above examples, the jump table section is '.rodata'
and labels have '.L' prefix which means labels won't show up in the
symbol table. As mentioned in previous patch, we want to
  - Move jump tables to '.jumptables' section
  - Rename '.L*' labels with proper labels which are visible in symbol table.

Note that for Example 3, there are extra global functions like
  .L__const.foo.jt1 and .L__const.foo.jt2
and we are not able to remove them. But they won't show up in symbol
table either.
---
 llvm/lib/Target/BPF/BPFISelLowering.cpp | 117 +++++++++++++++++++++---
 llvm/lib/Target/BPF/BPFISelLowering.h   |   4 +-
 llvm/lib/Target/BPF/BPFInstrInfo.td     |   7 +-
 3 files changed, 110 insertions(+), 18 deletions(-)

diff --git a/llvm/lib/Target/BPF/BPFISelLowering.cpp b/llvm/lib/Target/BPF/BPFISelLowering.cpp
index 0afa2790d5173..8fa2ac808316f 100644
--- a/llvm/lib/Target/BPF/BPFISelLowering.cpp
+++ b/llvm/lib/Target/BPF/BPFISelLowering.cpp
@@ -819,23 +819,12 @@ const char *BPFTargetLowering::getTargetNodeName(unsigned Opcode) const {
   return nullptr;
 }
 
-static SDValue getTargetNode(GlobalAddressSDNode *N, const SDLoc &DL, EVT Ty,
-                             SelectionDAG &DAG, unsigned Flags) {
-  return DAG.getTargetGlobalAddress(N->getGlobal(), DL, Ty, 0, Flags);
-}
-
 static SDValue getTargetNode(ConstantPoolSDNode *N, const SDLoc &DL, EVT Ty,
                              SelectionDAG &DAG, unsigned Flags) {
   return DAG.getTargetConstantPool(N->getConstVal(), Ty, N->getAlign(),
                                    N->getOffset(), Flags);
 }
 
-static SDValue getTargetNode(BlockAddressSDNode *N, const SDLoc &DL, EVT Ty,
-                             SelectionDAG &DAG, unsigned Flags) {
-  return DAG.getTargetBlockAddress(N->getBlockAddress(), Ty, N->getOffset(),
-                                   Flags);
-}
-
 static SDValue getTargetNode(JumpTableSDNode *N, const SDLoc &DL, EVT Ty,
                              SelectionDAG &DAG, unsigned Flags) {
   return DAG.getTargetJumpTable(N->getIndex(), Ty, Flags);
@@ -857,7 +846,15 @@ SDValue BPFTargetLowering::LowerGlobalAddress(SDValue Op,
   if (N->getOffset() != 0)
     report_fatal_error("invalid offset for global address: " +
                        Twine(N->getOffset()));
-  return getAddr(N, DAG);
+
+  const GlobalValue *GVal = N->getGlobal();
+  SDLoc DL(Op);
+
+  // Wrap it in a TargetGlobalAddress
+  SDValue Addr = DAG.getTargetGlobalAddress(GVal, DL, MVT::i64);
+
+  // Emit pseudo instruction
+  return SDValue(DAG.getMachineNode(BPF::LDIMM64, DL, MVT::i64, Addr), 0);
 }
 
 SDValue BPFTargetLowering::LowerConstantPool(SDValue Op,
@@ -869,8 +866,14 @@ SDValue BPFTargetLowering::LowerConstantPool(SDValue Op,
 
 SDValue BPFTargetLowering::LowerBlockAddress(SDValue Op,
                                              SelectionDAG &DAG) const {
-  BlockAddressSDNode *N = cast<BlockAddressSDNode>(Op);
-  return getAddr(N, DAG);
+  const BlockAddress *BA = cast<BlockAddressSDNode>(Op)->getBlockAddress();
+  SDLoc DL(Op);
+
+  // Wrap it in a TargetBlockAddress
+  SDValue Addr = DAG.getTargetBlockAddress(BA, MVT::i64);
+
+  // Emit pseudo instruction
+  return SDValue(DAG.getMachineNode(BPF::LDIMM64, DL, MVT::i64, Addr), 0);
 }
 
 unsigned
@@ -936,6 +939,86 @@ BPFTargetLowering::EmitInstrWithCustomInserterMemcpy(MachineInstr &MI,
   return BB;
 }
 
+MachineBasicBlock *BPFTargetLowering::EmitInstrWithCustomInserterLDimm64(
+    MachineInstr &MI, MachineBasicBlock *BB) const {
+  MachineFunction *MF = BB->getParent();
+  const BPFInstrInfo *TII = MF->getSubtarget<BPFSubtarget>().getInstrInfo();
+  const TargetRegisterClass *RC = getRegClassFor(MVT::i64);
+  MachineRegisterInfo &RegInfo = MF->getRegInfo();
+  DebugLoc DL = MI.getDebugLoc();
+
+  // Build address taken map for Global Varaibles and BlockAddresses
+  DenseMap<const BasicBlock *, MachineBasicBlock *> AddressTakenBBs;
+  for (MachineBasicBlock &MBB : *MF) {
+    if (const BasicBlock *BB = MBB.getBasicBlock())
+      if (BB->hasAddressTaken())
+        AddressTakenBBs[BB] = &MBB;
+  }
+
+  MachineOperand &MO = MI.getOperand(1);
+  assert(MO.isBlockAddress() || MO.isGlobal());
+
+  MCRegister ResultReg = MI.getOperand(0).getReg();
+  Register TmpReg = RegInfo.createVirtualRegister(RC);
+
+  std::vector<MachineBasicBlock *> Targets;
+  unsigned JTI;
+
+  if (MO.isBlockAddress()) {
+    auto *BA = MO.getBlockAddress();
+    MachineBasicBlock *TgtMBB = AddressTakenBBs[BA->getBasicBlock()];
+    assert(TgtMBB);
+
+    Targets.push_back(TgtMBB);
+    JTI = MF->getOrCreateJumpTableInfo(getJumpTableEncoding())
+              ->createJumpTableIndex(Targets);
+
+    BuildMI(*BB, MI, DL, TII->get(BPF::LD_imm64), TmpReg)
+        .addJumpTableIndex(JTI);
+    BuildMI(*BB, MI, DL, TII->get(BPF::LDD), ResultReg)
+        .addReg(TmpReg)
+        .addImm(0);
+    MI.eraseFromParent();
+    return BB;
+  }
+
+  // Helper: emit LD_imm64 with operand GlobalAddress or JumpTable
+  auto emitLDImm64 = [&](const GlobalValue *GV = nullptr, unsigned JTI = -1) {
+    auto MIB = BuildMI(*BB, MI, DL, TII->get(BPF::LD_imm64), ResultReg);
+    if (GV)
+      MIB.addGlobalAddress(GV);
+    else
+      MIB.addJumpTableIndex(JTI);
+    MI.eraseFromParent();
+    return BB;
+  };
+
+  // Must be a global at this point
+  const GlobalValue *GVal = MO.getGlobal();
+  const auto *GV = dyn_cast<GlobalVariable>(GVal);
+
+  if (!GV || GV->getLinkage() != GlobalValue::PrivateLinkage ||
+      !GV->isConstant() || !GV->hasInitializer())
+    return emitLDImm64(GVal);
+
+  const auto *CA = dyn_cast<ConstantArray>(GV->getInitializer());
+  if (!CA)
+    return emitLDImm64(GVal);
+
+  for (const Use &Op : CA->operands()) {
+    if (!isa<BlockAddress>(Op))
+      return emitLDImm64(GVal);
+    auto *BA = cast<BlockAddress>(Op);
+    MachineBasicBlock *TgtMBB = AddressTakenBBs[BA->getBasicBlock()];
+    assert(TgtMBB);
+    Targets.push_back(TgtMBB);
+  }
+
+  JTI = MF->getOrCreateJumpTableInfo(getJumpTableEncoding())
+            ->createJumpTableIndex(Targets);
+  return emitLDImm64(nullptr, JTI);
+}
+
 MachineBasicBlock *
 BPFTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
                                                MachineBasicBlock *BB) const {
@@ -948,6 +1031,7 @@ BPFTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
                        Opc == BPF::Select_32_64);
 
   bool isMemcpyOp = Opc == BPF::MEMCPY;
+  bool isLDimm64Op = Opc == BPF::LDIMM64;
 
 #ifndef NDEBUG
   bool isSelectRIOp = (Opc == BPF::Select_Ri ||
@@ -955,13 +1039,16 @@ BPFTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
                        Opc == BPF::Select_Ri_32 ||
                        Opc == BPF::Select_Ri_32_64);
 
-  if (!(isSelectRROp || isSelectRIOp || isMemcpyOp))
+  if (!(isSelectRROp || isSelectRIOp || isMemcpyOp || isLDimm64Op))
     report_fatal_error("unhandled instruction type: " + Twine(Opc));
 #endif
 
   if (isMemcpyOp)
     return EmitInstrWithCustomInserterMemcpy(MI, BB);
 
+  if (isLDimm64Op)
+    return EmitInstrWithCustomInserterLDimm64(MI, BB);
+
   bool is32BitCmp = (Opc == BPF::Select_32 ||
                      Opc == BPF::Select_32_64 ||
                      Opc == BPF::Select_Ri_32 ||
diff --git a/llvm/lib/Target/BPF/BPFISelLowering.h b/llvm/lib/Target/BPF/BPFISelLowering.h
index bd08840cec458..5243d4944667d 100644
--- a/llvm/lib/Target/BPF/BPFISelLowering.h
+++ b/llvm/lib/Target/BPF/BPFISelLowering.h
@@ -167,7 +167,9 @@ class BPFTargetLowering : public TargetLowering {
   MachineBasicBlock * EmitInstrWithCustomInserterMemcpy(MachineInstr &MI,
                                                         MachineBasicBlock *BB)
                                                         const;
-
+  MachineBasicBlock *
+  EmitInstrWithCustomInserterLDimm64(MachineInstr &MI,
+                                     MachineBasicBlock *BB) const;
 };
 }
 
diff --git a/llvm/lib/Target/BPF/BPFInstrInfo.td b/llvm/lib/Target/BPF/BPFInstrInfo.td
index d2311defa5c7e..f7e6411883ccf 100644
--- a/llvm/lib/Target/BPF/BPFInstrInfo.td
+++ b/llvm/lib/Target/BPF/BPFInstrInfo.td
@@ -865,9 +865,7 @@ let usesCustomInserter = 1, isCodeGenOnly = 1 in {
 }
 
 // load 64-bit global addr into register
-def : Pat<(BPFWrapper tglobaladdr:$in), (LD_imm64 tglobaladdr:$in)>;
 def : Pat<(BPFWrapper tconstpool:$in), (LD_imm64 tconstpool:$in)>;
-def : Pat<(BPFWrapper tblockaddress:$in), (LD_imm64 tblockaddress:$in)>;
 def : Pat<(BPFWrapper tjumptable:$in), (LD_imm64 tjumptable:$in)>;
 
 // 0xffffFFFF doesn't fit into simm32, optimize common case
@@ -1390,3 +1388,8 @@ let usesCustomInserter = 1, isCodeGenOnly = 1 in {
       "#memcpy dst: $dst, src: $src, len: $len, align: $align",
       [(BPFmemcpy GPR:$dst, GPR:$src, imm:$len, imm:$align)]>;
 }
+
+// For GlobalValue and BlockAddress.
+let usesCustomInserter = 1, isCodeGenOnly = 1 in {
+  def LDIMM64 : Pseudo<(outs GPR:$dst), (ins i64imm:$addr), "", []>;
+}

>From db20e468a9daf4e6bccb8401e7627edc30fd2717 Mon Sep 17 00:00:00 2001
From: Yonghong Song <yonghong.song at linux.dev>
Date: Fri, 18 Jul 2025 19:52:13 -0700
Subject: [PATCH 3/4] [BPF] Use visible symbols and dedicated jump tables

For jumptables from switch statements, generate 'llvm-readelf -s' visible
symbols and put jumptables into a dedicated section. Most work from
Eduard.

For the previous example 1,
Compile: clang --target=bpf -mcpu=v4 -O2 -S test.c
Asm code:
        ...
    # %bb.1:                                # %entry
        r1 <<= 3
        r2 = BPF.JT.0.0 ll
        r2 += r1
        r1 = *(u64 *)(r2 + 0)
        gotox r1
    LBB0_2:
        w1 = 18
        goto LBB0_9
        ...
    # %bb.10:                               # %sw.epilog
        r1 <<= 3
        r2 = BPF.JT.0.1 ll
        r2 += r1
        r1 = *(u64 *)(r2 + 0)
        gotox r1
    LBB0_11:
        w1 = 8
        goto LBB0_16
        ...
        .section        .jumptables,"", at progbits
    BPF.JT.0.0:
        .quad   LBB0_2
        .quad   LBB0_8
        ...
        .quad   LBB0_7
        .size   BPF.JT.0.0, 240
    BPF.JT.0.1:
        .quad   LBB0_11
        .quad   LBB0_13
        ...
        .quad   LBB0_12
        .size   BPF.JT.0.1, 256

And symbols BPF.JT.0.{0,1} will be in symbol table.
The final binary:
       4:       67 01 00 00 03 00 00 00 r1 <<= 0x3
       5:       18 02 00 00 00 00 00 00 00 00 00 00 00 00 00 00 r2 = 0x0 ll
                0000000000000028:  R_BPF_64_64  BPF.JT.0.0
       7:       0f 12 00 00 00 00 00 00 r2 += r1
       ...
      29:       67 01 00 00 03 00 00 00 r1 <<= 0x3
      30:       18 02 00 00 00 00 00 00 00 00 00 00 00 00 00 00 r2 = 0x0 ll
                00000000000000f0:  R_BPF_64_64  BPF.JT.0.1
      32:       0f 12 00 00 00 00 00 00 r2 += r1
       ...
Symbol table:
     4: 0000000000000000   240 OBJECT  GLOBAL DEFAULT     4 BPF.JT.0.0
     5: 0000000000000000     4 OBJECT  GLOBAL DEFAULT     6 ret_user
     6: 0000000000000000     0 NOTYPE  GLOBAL DEFAULT   UND bar
     7: 00000000000000f0   256 OBJECT  GLOBAL DEFAULT     4 BPF.JT.0.1
and
  [ 4] .jumptables       PROGBITS        0000000000000000 0001c8 0001f0 00      0   0  1

For the previous example 2,
    Compile: clang --target=bpf -mcpu=v4 -O2 -S test1.c
    Asm code:
        ...
    # %bb.0:                                # %entry
        r2 = BPF.JT.0.0 ll
        r2 = *(u64 *)(r2 + 0)
        r3 = BPF.JT.0.1 ll
        r3 = *(u64 *)(r3 + 0)
        if w1 == 0 goto LBB0_2
    # %bb.1:                                # %entry
        r3 = r2
    LBB0_2:                                 # %entry
        *(u64 *)(r10 - 8) = r3
        r1 = *(u64 *)(r10 - 8)
        gotox r1
        ...
        .section        .jumptables,"", at progbits
    BPF.JT.0.0:
        .quad   LBB0_3
        .size   BPF.JT.0.0, 8
    BPF.JT.0.1:
        .quad   LBB0_4
        .size   BPF.JT.0.1, 8

    The binary: clang --target=bpf -mcpu=v4 -O2 -c test1.c

       0:       18 02 00 00 00 00 00 00 00 00 00 00 00 00 00 00 r2 = 0x0 ll
                0000000000000000:  R_BPF_64_64  BPF.JT.0.0
       2:       79 22 00 00 00 00 00 00 r2 = *(u64 *)(r2 + 0x0)
       3:       18 03 00 00 00 00 00 00 00 00 00 00 00 00 00 00 r3 = 0x0 ll
                0000000000000018:  R_BPF_64_64  BPF.JT.0.1
       5:       79 33 00 00 00 00 00 00 r3 = *(u64 *)(r3 + 0x0)
       6:       16 01 01 00 00 00 00 00 if w1 == 0x0 goto +0x1 <bar+0x40>
       7:       bf 23 00 00 00 00 00 00 r3 = r2
       8:       7b 3a f8 ff 00 00 00 00 *(u64 *)(r10 - 0x8) = r3
       9:       79 a1 f8 ff 00 00 00 00 r1 = *(u64 *)(r10 - 0x8)
      10:       0d 01 00 00 00 00 00 00 gotox r1

       4: 0000000000000000     8 OBJECT  GLOBAL DEFAULT     4 BPF.JT.0.0
       5: 0000000000000008     8 OBJECT  GLOBAL DEFAULT     4 BPF.JT.0.1

      [ 4] .jumptables       PROGBITS        0000000000000000 0000b8 000010 00      0   0  1

For the previous example 3,
    Compile: clang --target=bpf -mcpu=v4 -O2 -S test.c
    Asm code:
        ...
        r3 = (s32)r2
        r3 <<= 3
        r2 = BPF.JT.0.0 ll
        r2 += r3
        r1 = (s32)r1
        r1 <<= 3
        r3 = BPF.JT.0.1 ll
        r3 += r1
        w0 = 0
        r1 = *(u64 *)(r3 + 0)
        gotox r1
    .Ltmp0:                                 # Block address taken
    LBB0_1:                                 # %l1
                                            # =>This Inner Loop Header: Depth=1
        w0 += 1                                            # =>This Inner Loop Header: Depth=1
        ...
        .section        .jumptables,"", at progbits
    BPF.JT.0.0:
        .quad   LBB0_3
        .quad   LBB0_4
        .size   BPF.JT.0.0, 16
    BPF.JT.0.1:
        .quad   LBB0_1
        .quad   LBB0_2
        .size   BPF.JT.0.1, 16

    The binary: clang --target=bpf -mcpu=v4 -O2 -c test2.c

          12:       bf 23 20 00 00 00 00 00 r3 = (s32)r2
          13:       67 03 00 00 03 00 00 00 r3 <<= 0x3
          14:       18 02 00 00 00 00 00 00 00 00 00 00 00 00 00 00 r2 = 0x0 ll
                    0000000000000070:  R_BPF_64_64  BPF.JT.0.0
          16:       0f 32 00 00 00 00 00 00 r2 += r3
          17:       bf 11 20 00 00 00 00 00 r1 = (s32)r1
          18:       67 01 00 00 03 00 00 00 r1 <<= 0x3
          19:       18 03 00 00 00 00 00 00 00 00 00 00 00 00 00 00 r3 = 0x0 ll
                    0000000000000098:  R_BPF_64_64  BPF.JT.0.1
          21:       0f 13 00 00 00 00 00 00 r3 += r1

         4: 0000000000000000    16 OBJECT  GLOBAL DEFAULT     4 BPF.JT.0.0
         5: 0000000000000010    16 OBJECT  GLOBAL DEFAULT     4 BPF.JT.0.1

      [ 4] .jumptables       PROGBITS        0000000000000000 000160 000020 00      0   0  1
---
 llvm/lib/Target/BPF/BPFAsmPrinter.cpp         | 75 +++++++++++++------
 llvm/lib/Target/BPF/BPFAsmPrinter.h           | 43 +++++++++++
 llvm/lib/Target/BPF/BPFMCInstLower.cpp        |  5 +-
 llvm/lib/Target/BPF/BPFMCInstLower.h          |  6 +-
 .../BPF/BPFTargetLoweringObjectFile.cpp       | 19 +++++
 .../Target/BPF/BPFTargetLoweringObjectFile.h  | 25 +++++++
 llvm/lib/Target/BPF/BPFTargetMachine.cpp      |  3 +-
 llvm/lib/Target/BPF/CMakeLists.txt            |  1 +
 8 files changed, 148 insertions(+), 29 deletions(-)
 create mode 100644 llvm/lib/Target/BPF/BPFAsmPrinter.h
 create mode 100644 llvm/lib/Target/BPF/BPFTargetLoweringObjectFile.cpp
 create mode 100644 llvm/lib/Target/BPF/BPFTargetLoweringObjectFile.h

diff --git a/llvm/lib/Target/BPF/BPFAsmPrinter.cpp b/llvm/lib/Target/BPF/BPFAsmPrinter.cpp
index e3843e0e112e2..269d3b9566cde 100644
--- a/llvm/lib/Target/BPF/BPFAsmPrinter.cpp
+++ b/llvm/lib/Target/BPF/BPFAsmPrinter.cpp
@@ -11,52 +11,35 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "BPFAsmPrinter.h"
 #include "BPF.h"
 #include "BPFInstrInfo.h"
 #include "BPFMCInstLower.h"
 #include "BTFDebug.h"
 #include "MCTargetDesc/BPFInstPrinter.h"
 #include "TargetInfo/BPFTargetInfo.h"
+#include "llvm/BinaryFormat/ELF.h"
 #include "llvm/CodeGen/AsmPrinter.h"
 #include "llvm/CodeGen/MachineConstantPool.h"
 #include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineJumpTableInfo.h"
 #include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/TargetLowering.h"
 #include "llvm/IR/Module.h"
 #include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCExpr.h"
 #include "llvm/MC/MCInst.h"
 #include "llvm/MC/MCStreamer.h"
 #include "llvm/MC/MCSymbol.h"
+#include "llvm/MC/MCSymbolELF.h"
 #include "llvm/MC/TargetRegistry.h"
 #include "llvm/Support/Compiler.h"
 #include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetLoweringObjectFile.h"
 using namespace llvm;
 
 #define DEBUG_TYPE "asm-printer"
 
-namespace {
-class BPFAsmPrinter : public AsmPrinter {
-public:
-  explicit BPFAsmPrinter(TargetMachine &TM,
-                         std::unique_ptr<MCStreamer> Streamer)
-      : AsmPrinter(TM, std::move(Streamer), ID), BTF(nullptr) {}
-
-  StringRef getPassName() const override { return "BPF Assembly Printer"; }
-  bool doInitialization(Module &M) override;
-  void printOperand(const MachineInstr *MI, int OpNum, raw_ostream &O);
-  bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
-                       const char *ExtraCode, raw_ostream &O) override;
-  bool PrintAsmMemoryOperand(const MachineInstr *MI, unsigned OpNum,
-                             const char *ExtraCode, raw_ostream &O) override;
-
-  void emitInstruction(const MachineInstr *MI) override;
-
-  static char ID;
-
-private:
-  BTFDebug *BTF;
-};
-} // namespace
-
 bool BPFAsmPrinter::doInitialization(Module &M) {
   AsmPrinter::doInitialization(M);
 
@@ -150,6 +133,50 @@ void BPFAsmPrinter::emitInstruction(const MachineInstr *MI) {
   EmitToStreamer(*OutStreamer, TmpInst);
 }
 
+MCSymbol *BPFAsmPrinter::getJTPublicSymbol(unsigned JTI) {
+  SmallString<60> Name;
+  raw_svector_ostream(Name)
+      << "BPF.JT." << MF->getFunctionNumber() << '.' << JTI;
+  MCSymbol *S = OutContext.getOrCreateSymbol(Name);
+  if (auto *ES = dyn_cast<MCSymbolELF>(S)) {
+    ES->setBinding(ELF::STB_GLOBAL);
+    ES->setType(ELF::STT_OBJECT);
+  }
+  return S;
+}
+
+void BPFAsmPrinter::emitJumpTableInfo() {
+  const MachineJumpTableInfo *MJTI = MF->getJumpTableInfo();
+  if (!MJTI)
+    return;
+
+  const std::vector<MachineJumpTableEntry> &JT = MJTI->getJumpTables();
+  if (JT.empty())
+    return;
+
+  const TargetLoweringObjectFile &TLOF = getObjFileLowering();
+  const Function &F = MF->getFunction();
+  MCSection *JTS = TLOF.getSectionForJumpTable(F, TM);
+  assert(MJTI->getEntryKind() == MachineJumpTableInfo::EK_BlockAddress);
+  unsigned EntrySize = MJTI->getEntrySize(getDataLayout());
+  OutStreamer->switchSection(JTS);
+  for (unsigned JTI = 0; JTI < JT.size(); JTI++) {
+    ArrayRef<MachineBasicBlock *> JTBBs = JT[JTI].MBBs;
+    if (JTBBs.empty())
+      continue;
+
+    MCSymbol *JTStart = getJTPublicSymbol(JTI);
+    OutStreamer->emitLabel(JTStart);
+    for (const MachineBasicBlock *MBB : JTBBs) {
+      const MCExpr *LHS = MCSymbolRefExpr::create(MBB->getSymbol(), OutContext);
+      OutStreamer->emitValue(LHS, EntrySize);
+    }
+    const MCExpr *JTSize =
+        MCConstantExpr::create(JTBBs.size() * EntrySize, OutContext);
+    OutStreamer->emitELFSize(JTStart, JTSize);
+  }
+}
+
 char BPFAsmPrinter::ID = 0;
 
 INITIALIZE_PASS(BPFAsmPrinter, "bpf-asm-printer", "BPF Assembly Printer", false,
diff --git a/llvm/lib/Target/BPF/BPFAsmPrinter.h b/llvm/lib/Target/BPF/BPFAsmPrinter.h
new file mode 100644
index 0000000000000..3ae2dbc094540
--- /dev/null
+++ b/llvm/lib/Target/BPF/BPFAsmPrinter.h
@@ -0,0 +1,43 @@
+//===-- BPFFrameLowering.h - Define frame lowering for BPF -----*- C++ -*--===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_BPF_BPFASMPRINTER_H
+#define LLVM_LIB_TARGET_BPF_BPFASMPRINTER_H
+
+#include "BTFDebug.h"
+#include "llvm/CodeGen/AsmPrinter.h"
+
+namespace llvm {
+
+class BPFAsmPrinter : public AsmPrinter {
+public:
+  explicit BPFAsmPrinter(TargetMachine &TM,
+                         std::unique_ptr<MCStreamer> Streamer)
+      : AsmPrinter(TM, std::move(Streamer), ID), BTF(nullptr) {}
+
+  StringRef getPassName() const override { return "BPF Assembly Printer"; }
+  bool doInitialization(Module &M) override;
+  void printOperand(const MachineInstr *MI, int OpNum, raw_ostream &O);
+  bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
+                       const char *ExtraCode, raw_ostream &O) override;
+  bool PrintAsmMemoryOperand(const MachineInstr *MI, unsigned OpNum,
+                             const char *ExtraCode, raw_ostream &O) override;
+
+  void emitInstruction(const MachineInstr *MI) override;
+  MCSymbol *getJTPublicSymbol(unsigned JTI);
+  virtual void emitJumpTableInfo() override;
+
+  static char ID;
+
+private:
+  BTFDebug *BTF;
+};
+
+} // namespace llvm
+
+#endif /* LLVM_LIB_TARGET_BPF_BPFASMPRINTER_H */
diff --git a/llvm/lib/Target/BPF/BPFMCInstLower.cpp b/llvm/lib/Target/BPF/BPFMCInstLower.cpp
index e67b99a813945..295c27bd16c22 100644
--- a/llvm/lib/Target/BPF/BPFMCInstLower.cpp
+++ b/llvm/lib/Target/BPF/BPFMCInstLower.cpp
@@ -12,6 +12,8 @@
 //===----------------------------------------------------------------------===//
 
 #include "BPFMCInstLower.h"
+#include "BPFAsmPrinter.h"
+#include "BPFISelLowering.h"
 #include "llvm/CodeGen/AsmPrinter.h"
 #include "llvm/CodeGen/MachineBasicBlock.h"
 #include "llvm/CodeGen/MachineInstr.h"
@@ -19,6 +21,7 @@
 #include "llvm/MC/MCContext.h"
 #include "llvm/MC/MCExpr.h"
 #include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCStreamer.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/raw_ostream.h"
 using namespace llvm;
@@ -78,7 +81,7 @@ void BPFMCInstLower::Lower(const MachineInstr *MI, MCInst &OutMI) const {
       MCOp = LowerSymbolOperand(MO, Printer.GetCPISymbol(MO.getIndex()));
       break;
     case MachineOperand::MO_JumpTableIndex:
-      MCOp = LowerSymbolOperand(MO, Printer.GetJTISymbol(MO.getIndex()));
+      MCOp = LowerSymbolOperand(MO, Printer.getJTPublicSymbol(MO.getIndex()));
       break;
     case MachineOperand::MO_BlockAddress:
       MCOp = LowerSymbolOperand(
diff --git a/llvm/lib/Target/BPF/BPFMCInstLower.h b/llvm/lib/Target/BPF/BPFMCInstLower.h
index 4bd0f1f0bf1cf..483edd9a02831 100644
--- a/llvm/lib/Target/BPF/BPFMCInstLower.h
+++ b/llvm/lib/Target/BPF/BPFMCInstLower.h
@@ -12,7 +12,7 @@
 #include "llvm/Support/Compiler.h"
 
 namespace llvm {
-class AsmPrinter;
+class BPFAsmPrinter;
 class MCContext;
 class MCInst;
 class MCOperand;
@@ -24,10 +24,10 @@ class MachineOperand;
 class LLVM_LIBRARY_VISIBILITY BPFMCInstLower {
   MCContext &Ctx;
 
-  AsmPrinter &Printer;
+  BPFAsmPrinter &Printer;
 
 public:
-  BPFMCInstLower(MCContext &ctx, AsmPrinter &printer)
+  BPFMCInstLower(MCContext &ctx, BPFAsmPrinter &printer)
       : Ctx(ctx), Printer(printer) {}
   void Lower(const MachineInstr *MI, MCInst &OutMI) const;
 
diff --git a/llvm/lib/Target/BPF/BPFTargetLoweringObjectFile.cpp b/llvm/lib/Target/BPF/BPFTargetLoweringObjectFile.cpp
new file mode 100644
index 0000000000000..997f09870bad6
--- /dev/null
+++ b/llvm/lib/Target/BPF/BPFTargetLoweringObjectFile.cpp
@@ -0,0 +1,19 @@
+//===------------------ BPFTargetLoweringObjectFile.cpp -------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "BPFTargetLoweringObjectFile.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCSectionELF.h"
+
+using namespace llvm;
+
+MCSection *BPFTargetLoweringObjectFileELF::getSectionForJumpTable(
+    const Function &F, const TargetMachine &TM,
+    const MachineJumpTableEntry *JTE) const {
+  return getContext().getELFSection(".jumptables", ELF::SHT_PROGBITS, 0);
+}
diff --git a/llvm/lib/Target/BPF/BPFTargetLoweringObjectFile.h b/llvm/lib/Target/BPF/BPFTargetLoweringObjectFile.h
new file mode 100644
index 0000000000000..f3064c0c8cb8a
--- /dev/null
+++ b/llvm/lib/Target/BPF/BPFTargetLoweringObjectFile.h
@@ -0,0 +1,25 @@
+//===============-  BPFTargetLoweringObjectFile.h  -*- C++ -*-================//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_BPF_BPFTARGETLOWERINGOBJECTFILE
+#define LLVM_LIB_TARGET_BPF_BPFTARGETLOWERINGOBJECTFILE
+
+#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
+#include "llvm/Target/TargetLoweringObjectFile.h"
+
+namespace llvm {
+class BPFTargetLoweringObjectFileELF : public TargetLoweringObjectFileELF {
+
+public:
+  virtual MCSection *
+  getSectionForJumpTable(const Function &F, const TargetMachine &TM,
+                         const MachineJumpTableEntry *JTE) const override;
+};
+} // namespace llvm
+
+#endif // LLVM_LIB_TARGET_BPF_BPFTARGETLOWERINGOBJECTFILE
diff --git a/llvm/lib/Target/BPF/BPFTargetMachine.cpp b/llvm/lib/Target/BPF/BPFTargetMachine.cpp
index 527a480354571..d538b6fe11675 100644
--- a/llvm/lib/Target/BPF/BPFTargetMachine.cpp
+++ b/llvm/lib/Target/BPF/BPFTargetMachine.cpp
@@ -12,6 +12,7 @@
 
 #include "BPFTargetMachine.h"
 #include "BPF.h"
+#include "BPFTargetLoweringObjectFile.h"
 #include "BPFTargetTransformInfo.h"
 #include "MCTargetDesc/BPFMCAsmInfo.h"
 #include "TargetInfo/BPFTargetInfo.h"
@@ -80,7 +81,7 @@ BPFTargetMachine::BPFTargetMachine(const Target &T, const Triple &TT,
     : CodeGenTargetMachineImpl(T, computeDataLayout(TT), TT, CPU, FS, Options,
                                getEffectiveRelocModel(RM),
                                getEffectiveCodeModel(CM, CodeModel::Small), OL),
-      TLOF(std::make_unique<TargetLoweringObjectFileELF>()),
+      TLOF(std::make_unique<BPFTargetLoweringObjectFileELF>()),
       Subtarget(TT, std::string(CPU), std::string(FS), *this) {
   if (!DisableCheckUnreachable) {
     this->Options.TrapUnreachable = true;
diff --git a/llvm/lib/Target/BPF/CMakeLists.txt b/llvm/lib/Target/BPF/CMakeLists.txt
index eade4cacb7100..3678f1335ca36 100644
--- a/llvm/lib/Target/BPF/CMakeLists.txt
+++ b/llvm/lib/Target/BPF/CMakeLists.txt
@@ -37,6 +37,7 @@ add_llvm_target(BPFCodeGen
   BPFRegisterInfo.cpp
   BPFSelectionDAGInfo.cpp
   BPFSubtarget.cpp
+  BPFTargetLoweringObjectFile.cpp
   BPFTargetMachine.cpp
   BPFMIPeephole.cpp
   BPFMIChecking.cpp

>From c2022dd1ae8d762993754230b63f8c7b25a96f3d Mon Sep 17 00:00:00 2001
From: Yonghong Song <yonghong.song at linux.dev>
Date: Sat, 26 Jul 2025 20:31:05 -0700
Subject: [PATCH 4/4] [BPF] Ensure GotoX only supported at CPU V4

This is temporary and it makes easy to run bpf selftests.
Once kernel side is ready, we will implement CPU V5 which
will support jump tables.
---
 llvm/lib/Target/BPF/BPFISelLowering.cpp | 6 +++++-
 llvm/lib/Target/BPF/BPFInstrInfo.td     | 7 +++++--
 llvm/lib/Target/BPF/BPFSubtarget.cpp    | 4 ++++
 llvm/lib/Target/BPF/BPFSubtarget.h      | 3 ++-
 4 files changed, 16 insertions(+), 4 deletions(-)

diff --git a/llvm/lib/Target/BPF/BPFISelLowering.cpp b/llvm/lib/Target/BPF/BPFISelLowering.cpp
index 8fa2ac808316f..f580e0746868f 100644
--- a/llvm/lib/Target/BPF/BPFISelLowering.cpp
+++ b/llvm/lib/Target/BPF/BPFISelLowering.cpp
@@ -74,10 +74,14 @@ BPFTargetLowering::BPFTargetLowering(const TargetMachine &TM,
   setOperationAction(ISD::BR_JT, MVT::Other, Expand);
   setOperationAction(ISD::BRCOND, MVT::Other, Expand);
 
+  if (!STI.hasGotox())
+    setOperationAction(ISD::BRIND, MVT::Other, Expand);
+
   setOperationAction(ISD::TRAP, MVT::Other, Custom);
 
   setOperationAction({ISD::GlobalAddress, ISD::ConstantPool}, MVT::i64, Custom);
-  setOperationAction({ISD::JumpTable, ISD::BlockAddress}, MVT::i64, Custom);
+  if (STI.hasGotox())
+    setOperationAction({ISD::JumpTable, ISD::BlockAddress}, MVT::i64, Custom);
 
   setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64, Custom);
   setOperationAction(ISD::STACKSAVE, MVT::Other, Expand);
diff --git a/llvm/lib/Target/BPF/BPFInstrInfo.td b/llvm/lib/Target/BPF/BPFInstrInfo.td
index f7e6411883ccf..9d3ac7ffa6ffb 100644
--- a/llvm/lib/Target/BPF/BPFInstrInfo.td
+++ b/llvm/lib/Target/BPF/BPFInstrInfo.td
@@ -61,6 +61,7 @@ def BPFNoMovsx : Predicate<"!Subtarget->hasMovsx()">;
 def BPFNoBswap : Predicate<"!Subtarget->hasBswap()">;
 def BPFHasStoreImm : Predicate<"Subtarget->hasStoreImm()">;
 def BPFHasLoadAcqStoreRel : Predicate<"Subtarget->hasLoadAcqStoreRel()">;
+def BPFHasGotox : Predicate<"Subtarget->hasGotox()">;
 
 class ImmediateAsmOperand<string name> : AsmOperandClass {
   let Name = name;
@@ -294,8 +295,10 @@ defm JSLE : J<BPF_JSLE, "s<=", BPF_CC_LE, BPF_CC_LE_32>;
 defm JSET : J<BPF_JSET, "&", NoCond, NoCond>;
 def JCOND : JMP_JCOND<BPF_JCOND, "may_goto", []>;
 
-let isIndirectBranch = 1, isBarrier = 1 in {
-  def JX : JMP_IND<BPF_JA, "gotox", [(brind i64:$dst)]>;
+let Predicates = [BPFHasGotox] in {
+  let isIndirectBranch = 1, isBarrier = 1 in {
+    def JX : JMP_IND<BPF_JA, "gotox", [(brind i64:$dst)]>;
+  }
 }
 }
 
diff --git a/llvm/lib/Target/BPF/BPFSubtarget.cpp b/llvm/lib/Target/BPF/BPFSubtarget.cpp
index 4167547680b12..a11aa6933147c 100644
--- a/llvm/lib/Target/BPF/BPFSubtarget.cpp
+++ b/llvm/lib/Target/BPF/BPFSubtarget.cpp
@@ -43,6 +43,8 @@ static cl::opt<bool>
 static cl::opt<bool> Disable_load_acq_store_rel(
     "disable-load-acq-store-rel", cl::Hidden, cl::init(false),
     cl::desc("Disable load-acquire and store-release insns"));
+static cl::opt<bool> Disable_gotox("disable-gotox", cl::Hidden, cl::init(false),
+                                   cl::desc("Disable gotox insn"));
 
 void BPFSubtarget::anchor() {}
 
@@ -66,6 +68,7 @@ void BPFSubtarget::initializeEnvironment() {
   HasGotol = false;
   HasStoreImm = false;
   HasLoadAcqStoreRel = false;
+  HasGotox = false;
 }
 
 void BPFSubtarget::initSubtargetFeatures(StringRef CPU, StringRef FS) {
@@ -96,6 +99,7 @@ void BPFSubtarget::initSubtargetFeatures(StringRef CPU, StringRef FS) {
     HasGotol = !Disable_gotol;
     HasStoreImm = !Disable_StoreImm;
     HasLoadAcqStoreRel = !Disable_load_acq_store_rel;
+    HasGotox = !Disable_gotox;
     return;
   }
 }
diff --git a/llvm/lib/Target/BPF/BPFSubtarget.h b/llvm/lib/Target/BPF/BPFSubtarget.h
index aed2211265e23..e870dfdc85ec9 100644
--- a/llvm/lib/Target/BPF/BPFSubtarget.h
+++ b/llvm/lib/Target/BPF/BPFSubtarget.h
@@ -65,7 +65,7 @@ class BPFSubtarget : public BPFGenSubtargetInfo {
 
   // whether cpu v4 insns are enabled.
   bool HasLdsx, HasMovsx, HasBswap, HasSdivSmod, HasGotol, HasStoreImm,
-      HasLoadAcqStoreRel;
+      HasLoadAcqStoreRel, HasGotox;
 
   std::unique_ptr<CallLowering> CallLoweringInfo;
   std::unique_ptr<InstructionSelector> InstSelector;
@@ -94,6 +94,7 @@ class BPFSubtarget : public BPFGenSubtargetInfo {
   bool hasGotol() const { return HasGotol; }
   bool hasStoreImm() const { return HasStoreImm; }
   bool hasLoadAcqStoreRel() const { return HasLoadAcqStoreRel; }
+  bool hasGotox() const { return HasGotox; }
 
   bool isLittleEndian() const { return IsLittleEndian; }
 



More information about the llvm-commits mailing list