[llvm] [BOLT][RISCV] Handle long tail calls (PR #67098)

Job Noorman via llvm-commits llvm-commits at lists.llvm.org
Fri Sep 22 01:36:38 PDT 2023


https://github.com/mtvec created https://github.com/llvm/llvm-project/pull/67098

Long tail calls use the following instruction sequence on RISC-V:

```
1: auipc xi, %pcrel_hi(sym)
jalr zero, %pcrel_lo(1b)(xi)
```

Since the second instruction in isolation looks like an indirect branch, this confused BOLT and most functions containing a long tail call got marked with "unknown control flow" and didn't get optimized as a consequence.

This patch fixes this by detecting long tail call sequence in `analyzeIndirectBranch`. `FixRISCVCallsPass` also had to be updated to expand long tail calls to `PseudoTAIL` instead of `PseudoCALL`.

Besides this, this patch also fixes a minor issue with compressed tail calls (`c.jr`) not being detected.

Note that I had to change `BinaryFunction::postProcessIndirectBranches` slightly: the documentation of `MCPlusBuilder::analyzeIndirectBranch` mentions that the [`Begin`, `End`) range contains the instructions immediately preceding `Instruction`. However, in
`postProcessIndirectBranches`, *all* the instructions in the BB where passed in the range. This made it difficult to find the preceding instruction so I made sure *only* the preceding instructions are passed.

>From 9a4faaee4283b6eef469a733e44fe952edfaf0ff Mon Sep 17 00:00:00 2001
From: Job Noorman <jnoorman at igalia.com>
Date: Fri, 22 Sep 2023 10:17:11 +0200
Subject: [PATCH] [BOLT][RISCV] Handle long tail calls

Long tail calls use the following instruction sequence on RISC-V:

```
1: auipc xi, %pcrel_hi(sym)
jalr zero, %pcrel_lo(1b)(xi)
```

Since the second instruction in isolation looks like an indirect branch,
this confused BOLT and most functions containing a long tail call got
marked with "unknown control flow" and didn't get optimized as a
consequence.

This patch fixes this by detecting long tail call sequence in
`analyzeIndirectBranch`. `FixRISCVCallsPass` also had to be updated to
expand long tail calls to `PseudoTAIL` instead of `PseudoCALL`.

Besides this, this patch also fixes a minor issue with compressed tail
calls (`c.jr`) not being detected.

Note that I had to change `BinaryFunction::postProcessIndirectBranches`
slightly: the documentation of `MCPlusBuilder::analyzeIndirectBranch`
mentions that the [`Begin`, `End`) range contains the instructions
immediately preceding `Instruction`. However, in
`postProcessIndirectBranches`, *all* the instructions in the BB where
passed in the range. This made it difficult to find the preceding
instruction so I made sure *only* the preceding instructions are passed.
---
 bolt/lib/Core/BinaryFunction.cpp             |  5 +--
 bolt/lib/Passes/FixRISCVCallsPass.cpp        |  7 ++++-
 bolt/lib/Target/RISCV/RISCVMCPlusBuilder.cpp | 12 +++++++
 bolt/test/RISCV/call-annotations.s           | 33 +++++++++++++++++++-
 4 files changed, 53 insertions(+), 4 deletions(-)

diff --git a/bolt/lib/Core/BinaryFunction.cpp b/bolt/lib/Core/BinaryFunction.cpp
index 6cf1cfb50a9c12c..0fe417c469c4eee 100644
--- a/bolt/lib/Core/BinaryFunction.cpp
+++ b/bolt/lib/Core/BinaryFunction.cpp
@@ -1761,7 +1761,8 @@ bool BinaryFunction::postProcessIndirectBranches(
   uint64_t LastJT = 0;
   uint16_t LastJTIndexReg = BC.MIB->getNoRegister();
   for (BinaryBasicBlock &BB : blocks()) {
-    for (MCInst &Instr : BB) {
+    for (BinaryBasicBlock::iterator II = BB.begin(); II != BB.end(); ++II) {
+      MCInst &Instr = *II;
       if (!BC.MIB->isIndirectBranch(Instr))
         continue;
 
@@ -1789,7 +1790,7 @@ bool BinaryFunction::postProcessIndirectBranches(
         const MCExpr *DispExpr;
         MCInst *PCRelBaseInstr;
         IndirectBranchType Type = BC.MIB->analyzeIndirectBranch(
-            Instr, BB.begin(), BB.end(), PtrSize, MemLocInstr, BaseRegNum,
+            Instr, BB.begin(), II, PtrSize, MemLocInstr, BaseRegNum,
             IndexRegNum, DispValue, DispExpr, PCRelBaseInstr);
         if (Type != IndirectBranchType::UNKNOWN || MemLocInstr != nullptr)
           continue;
diff --git a/bolt/lib/Passes/FixRISCVCallsPass.cpp b/bolt/lib/Passes/FixRISCVCallsPass.cpp
index 963a8b04cf9db29..e2984deda16dc3e 100644
--- a/bolt/lib/Passes/FixRISCVCallsPass.cpp
+++ b/bolt/lib/Passes/FixRISCVCallsPass.cpp
@@ -43,7 +43,12 @@ void FixRISCVCallsPass::runOnFunction(BinaryFunction &BF) {
 
         MCInst OldCall = *NextII;
         auto L = BC.scopeLock();
-        MIB->createCall(*II, Target, Ctx);
+
+        if (MIB->isTailCall(*NextII))
+          MIB->createTailCall(*II, Target, Ctx);
+        else
+          MIB->createCall(*II, Target, Ctx);
+
         MIB->moveAnnotations(std::move(OldCall), *II);
 
         // The original offset was set on the jalr of the auipc+jalr pair. Since
diff --git a/bolt/lib/Target/RISCV/RISCVMCPlusBuilder.cpp b/bolt/lib/Target/RISCV/RISCVMCPlusBuilder.cpp
index 0acfedabd08bd51..96c5cddfe00e2e5 100644
--- a/bolt/lib/Target/RISCV/RISCVMCPlusBuilder.cpp
+++ b/bolt/lib/Target/RISCV/RISCVMCPlusBuilder.cpp
@@ -83,6 +83,7 @@ class RISCVMCPlusBuilder : public MCPlusBuilder {
       return false;
     case RISCV::JALR:
     case RISCV::C_JALR:
+    case RISCV::C_JR:
       return true;
     }
   }
@@ -154,6 +155,17 @@ class RISCVMCPlusBuilder : public MCPlusBuilder {
     DispValue = 0;
     DispExpr = nullptr;
     PCRelBaseOut = nullptr;
+
+    // Check for the following long tail call sequence:
+    // 1: auipc xi, %pcrel_hi(sym)
+    // jalr zero, %pcrel_lo(1b)(xi)
+    if (Instruction.getOpcode() == RISCV::JALR && Begin != End) {
+      MCInst &PrevInst = *std::prev(End);
+      if (isRISCVCall(PrevInst, Instruction) &&
+          Instruction.getOperand(0).getReg() == RISCV::X0)
+        return IndirectBranchType::POSSIBLE_TAIL_CALL;
+    }
+
     return IndirectBranchType::UNKNOWN;
   }
 
diff --git a/bolt/test/RISCV/call-annotations.s b/bolt/test/RISCV/call-annotations.s
index e2c5a1040faee18..bc539bb0ec7796b 100644
--- a/bolt/test/RISCV/call-annotations.s
+++ b/bolt/test/RISCV/call-annotations.s
@@ -4,7 +4,7 @@
 // RUN: llvm-mc -triple riscv64 -filetype obj -o %t.o %s
 // RUN: ld.lld --emit-relocs -o %t %t.o
 // RUN: llvm-bolt --enable-bat --print-cfg --print-fix-riscv-calls \
-// RUN:     --print-only=_start -o /dev/null %t | FileCheck %s
+// RUN:     -o /dev/null %t | FileCheck %s
 
   .text
   .global f
@@ -19,11 +19,24 @@ f:
 // CHECK-NEXT: jal ra, f # Offset: 8
 // CHECK-NEXT: jal zero, f # TAILCALL  # Offset: 12
 
+// CHECK-LABEL: Binary Function "long_tail" after building cfg {
+// CHECK:      auipc t1, f
+// CHECK-NEXT: jalr zero, -24(t1) # TAILCALL  # Offset: 8
+
+// CHECK-LABEL: Binary Function "compressed_tail" after building cfg {
+// CHECK:      jr a0 # TAILCALL  # Offset: 0
+
 // CHECK-LABEL: Binary Function "_start" after fix-riscv-calls {
 // CHECK:      call f # Offset: 0
 // CHECK-NEXT: call f # Offset: 8
 // CHECK-NEXT: tail f # TAILCALL   # Offset: 12
 
+// CHECK-LABEL: Binary Function "long_tail" after fix-riscv-calls {
+// CHECK:      tail f # TAILCALL   # Offset: 4
+
+// CHECK-LABEL: Binary Function "compressed_tail" after fix-riscv-calls {
+// CHECK:      jr a0 # TAILCALL  # Offset: 0
+
   .globl _start
   .p2align 1
 _start:
@@ -31,3 +44,21 @@ _start:
   jal f
   jal zero, f
   .size _start, .-_start
+
+  .globl long_tail
+  .p2align 1
+long_tail:
+    // NOTE: BOLT assumes indirect calls in single-BB functions are tail calls
+    // so artificially introduce a second BB to force RISC-V-specific analysis
+    // to get triggered.
+    beq a0, a1, 1f
+1:
+    tail f
+    .size long_tail, .-long_tail
+
+   .globl compressed_tail
+   .p2align 1
+   .option rvc
+compressed_tail:
+    c.jr a0
+    .size compressed_tail, .-compressed_tail



More information about the llvm-commits mailing list