[llvm] Jump table annotations for Linux (PR #112606)

via llvm-commits llvm-commits at lists.llvm.org
Wed Oct 16 12:52:52 PDT 2024


llvmbot wrote:


<!--LLVM PR SUMMARY COMMENT-->

@llvm/pr-subscribers-llvm-selectiondag

Author: Ard Biesheuvel (ardbiesheuvel)

<details>
<summary>Changes</summary>

This patch adds jump table annotations to ELF objects so that indirect branches can be correlated with their associated jump tables by static analysis tooling operating on [partially linked] object files.

The jump table part is straight-forward:

```asm
	.section	.rodata,"a",@<!-- -->progbits
	.p2align	2, 0x0
.LJTI0_0:
	.long	.LBB0_1-.LJTI0_0
	.long	.LBB0_2-.LJTI0_0
	.long	.LBB0_3-.LJTI0_0
	.long	.LBB0_4-.LJTI0_0
	.long	.LBB0_5-.LJTI0_0
	.long	.LBB0_5-.LJTI0_0
.Ljt_end0:
.set $JTI0_0, .LJTI0_0
	.type	$JTI0_0,@<!-- -->object
	.size	$JTI0_0, .Ljt_end0-.LJTI0_0
```

The handling at the call site is a bit trickier. I ended up reusing the existing jump table debug info support, producing something like

```asm
	leaq	.LJTI0_0(%rip), %rcx
	movslq	(%rcx,%rax,4), %rax
	addq	%rcx, %rax
	.reloc .Ljtp0, BFD_RELOC_NONE, .LJTI0_0
.Ljtp0:
	jmpq	*%rax
```

which works in most cases. However, when the jump destination gets spilled to the stack, we may end up with something like

```asm
   1457c:       4a 8b 04 ed 00 00 00    mov    0x0(,%r13,8),%rax
   14583:       00 
                        14580: R_X86_64_32S     .rodata+0x88
   14584:       48 89 04 24             mov    %rax,(%rsp)

   ...
   14602:       48 8b 04 24             mov    (%rsp),%rax
                        14602: R_X86_64_NONE    .rodata+0x88
   14606:       ff e0                   jmp    *%rax
```

where the relocation is no longer attached to the `jmp` instruction but to the preceding `mov` instruction.

Any hints how to do this properly would be much appreciated.

---
Full diff: https://github.com/llvm/llvm-project/pull/112606.diff


5 Files Affected:

- (modified) llvm/include/llvm/CodeGen/AsmPrinter.h (+4) 
- (modified) llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp (+43-2) 
- (modified) llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp (+4-2) 
- (modified) llvm/lib/Target/ARM/Thumb2InstrInfo.cpp (+4-1) 
- (added) llvm/test/CodeGen/ARM/stack-guard-nomovt.ll (+32) 


``````````diff
diff --git a/llvm/include/llvm/CodeGen/AsmPrinter.h b/llvm/include/llvm/CodeGen/AsmPrinter.h
index c9a88d7b1c015c..fabe5bc226037d 100644
--- a/llvm/include/llvm/CodeGen/AsmPrinter.h
+++ b/llvm/include/llvm/CodeGen/AsmPrinter.h
@@ -453,6 +453,10 @@ class AsmPrinter : public MachineFunctionPass {
   /// function to the current output stream.
   virtual void emitJumpTableInfo();
 
+  /// Emit jump table annotations correlating each table with its associated
+  /// indirect branch instruction.
+  virtual void emitJumpTableAnnotation(const MachineFunction &MF, const MachineInstr &MI);
+
   /// Emit the specified global variable to the .s file.
   virtual void emitGlobalVariable(const GlobalVariable *GV);
 
diff --git a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
index db7adfd3b21e5f..32f1733383e331 100644
--- a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
+++ b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
@@ -162,6 +162,10 @@ static cl::opt<bool> EmitJumpTableSizesSection(
     cl::desc("Emit a section containing jump table addresses and sizes"),
     cl::Hidden, cl::init(false));
 
+static cl::opt<bool> AnnotateJumpTables("annotate-jump-tables",
+                                        cl::desc("Annotate jump tables"),
+                                        cl::Hidden, cl::init(false));
+
 STATISTIC(EmittedInsts, "Number of machine instrs printed");
 
 char AsmPrinter::ID = 0;
@@ -1528,6 +1532,25 @@ void AsmPrinter::emitPseudoProbe(const MachineInstr &MI) {
   }
 }
 
+void AsmPrinter::emitJumpTableAnnotation(const MachineFunction &MF,
+                                         const MachineInstr &MI) {
+  if (!AnnotateJumpTables || !TM.getTargetTriple().isOSBinFormatELF())
+    return;
+
+  MCSymbol *JTISymbol = GetJTISymbol(MI.getOperand(0).getImm());
+  MCSymbol *ProvenanceLabel = OutContext.createTempSymbol("jtp");
+
+  const MCExpr *OffsetExpr =
+      MCSymbolRefExpr::create(ProvenanceLabel, OutContext);
+  const MCExpr *JTISymbolExpr =
+      MCSymbolRefExpr::create(JTISymbol, OutContext);
+
+  OutStreamer->emitRelocDirective(*OffsetExpr, "BFD_RELOC_NONE",
+                                  JTISymbolExpr, SMLoc(),
+                                  *OutContext.getSubtargetInfo());
+  OutStreamer->emitLabel(ProvenanceLabel);
+}
+
 void AsmPrinter::emitStackSizeSection(const MachineFunction &MF) {
   if (!MF.getTarget().Options.EmitStackSizeSection)
     return;
@@ -1836,8 +1859,7 @@ void AsmPrinter::emitFunctionBody() {
         OutStreamer->emitRawComment("MEMBARRIER");
         break;
       case TargetOpcode::JUMP_TABLE_DEBUG_INFO:
-        // This instruction is only used to note jump table debug info, it's
-        // purely meta information.
+        emitJumpTableAnnotation(*MF, MI);
         break;
       case TargetOpcode::INIT_UNDEF:
         // This is only used to influence register allocation behavior, no
@@ -2792,6 +2814,25 @@ void AsmPrinter::emitJumpTableInfo() {
     // label differences will be evaluated at write time.
     for (const MachineBasicBlock *MBB : JTBBs)
       emitJumpTableEntry(MJTI, MBB, JTI);
+
+    if (AnnotateJumpTables && TM.getTargetTriple().isOSBinFormatELF()) {
+      // Create a temp symbol for the end of the jump table.
+      MCSymbol *JTIEndSymbol = createTempSymbol("jt_end");
+      OutStreamer->emitLabel(JTIEndSymbol);
+
+      const MCExpr *JTISymbolExpr =
+          MCSymbolRefExpr::create(JTISymbol, OutContext);
+
+      MCSymbol *JTISymbolForSize = OutContext.getOrCreateSymbol(
+          "$JTI" + Twine(MF->getFunctionNumber()) + "_" + Twine(JTI));
+      OutStreamer->emitAssignment(JTISymbolForSize, JTISymbolExpr);
+      OutStreamer->emitSymbolAttribute(JTISymbolForSize, MCSA_ELF_TypeObject);
+
+      const MCExpr *SizeExp = MCBinaryExpr::createSub(
+          MCSymbolRefExpr::create(JTIEndSymbol, OutContext), JTISymbolExpr,
+          OutContext);
+      OutStreamer->emitELFSize(JTISymbolForSize, SizeExp);
+    }
   }
 
   if (EmitJumpTableSizesSection)
diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
index 95937886280685..e1be41a3111afb 100644
--- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -478,8 +478,10 @@ SDValue TargetLowering::expandIndirectJTBranch(const SDLoc &dl, SDValue Value,
                                                SDValue Addr, int JTI,
                                                SelectionDAG &DAG) const {
   SDValue Chain = Value;
-  // Jump table debug info is only needed if CodeView is enabled.
-  if (DAG.getTarget().getTargetTriple().isOSBinFormatCOFF()) {
+  const auto &Triple = DAG.getTarget().getTargetTriple();
+  // Jump table debug info is only needed if CodeView is enabled,
+  // or when adding jump table annotations to ELF objects.
+  if (Triple.isOSBinFormatCOFF() || Triple.isOSBinFormatELF()) {
     Chain = DAG.getJumpTableDebugInfo(JTI, Chain, dl);
   }
   return DAG.getNode(ISD::BRIND, dl, MVT::Other, Chain, Addr);
diff --git a/llvm/lib/Target/ARM/Thumb2InstrInfo.cpp b/llvm/lib/Target/ARM/Thumb2InstrInfo.cpp
index d1e07b6703a5e6..27f86389a3856a 100644
--- a/llvm/lib/Target/ARM/Thumb2InstrInfo.cpp
+++ b/llvm/lib/Target/ARM/Thumb2InstrInfo.cpp
@@ -264,8 +264,11 @@ void Thumb2InstrInfo::expandLoadStackGuard(
   }
 
   const auto *GV = cast<GlobalValue>((*MI->memoperands_begin())->getValue());
-  if (MF.getSubtarget<ARMSubtarget>().isTargetELF() && !GV->isDSOLocal())
+  const ARMSubtarget &Subtarget = MF.getSubtarget<ARMSubtarget>();
+  if (Subtarget.isTargetELF() && !GV->isDSOLocal())
     expandLoadStackGuardBase(MI, ARM::t2LDRLIT_ga_pcrel, ARM::t2LDRi12);
+  else if (!Subtarget.useMovt())
+    expandLoadStackGuardBase(MI, ARM::tLDRLIT_ga_abs, ARM::t2LDRi12);
   else if (MF.getTarget().isPositionIndependent())
     expandLoadStackGuardBase(MI, ARM::t2MOV_ga_pcrel, ARM::t2LDRi12);
   else
diff --git a/llvm/test/CodeGen/ARM/stack-guard-nomovt.ll b/llvm/test/CodeGen/ARM/stack-guard-nomovt.ll
new file mode 100644
index 00000000000000..6802dabfda87a6
--- /dev/null
+++ b/llvm/test/CodeGen/ARM/stack-guard-nomovt.ll
@@ -0,0 +1,32 @@
+; RUN: llc -relocation-model=static -mattr=+no-movt < %s | FileCheck %s
+
+target triple = "thumbv7a-linux-gnueabi"
+
+define i32 @test1() #0 {
+; CHECK-LABEL: test1:
+; CHECK:       @ %bb.0:
+; CHECK-NEXT:    push	{r7, lr}
+; CHECK-NEXT:    sub.w	sp, sp, #1032
+; CHECK-NEXT:    ldr	r0, .LCPI0_0
+; CHECK-NEXT:    ldr	r0, [r0]
+; CHECK-NEXT:    str.w	r0, [sp, #1028]
+; CHECK-NEXT:    add	r0, sp, #4
+; CHECK-NEXT:    bl	foo
+; CHECK-NEXT:    ldr.w	r0, [sp, #1028]
+; CHECK-NEXT:    ldr	r1, .LCPI0_0
+; CHECK-NEXT:    ldr	r1, [r1]
+; CHECK-NEXT:    cmp	r1, r0
+; CHECK-NEXT:    ittt	eq
+; CHECK-NEXT:    moveq	r0, #0
+; CHECK-NEXT:    addeq.w	sp, sp, #1032
+; CHECK-NEXT:    popeq	{r7, pc}
+; CHECK-NEXT:  .LBB0_1:
+; CHECK-NEXT:    bl __stack_chk_fail
+  %a1 = alloca [256 x i32], align 4
+  call void @foo(ptr %a1) #3
+  ret i32 0
+}
+
+declare void @foo(ptr)
+
+attributes #0 = { nounwind sspstrong }

``````````

</details>


https://github.com/llvm/llvm-project/pull/112606


More information about the llvm-commits mailing list