[llvm] [BOLT]Identify indirect call (PR #123305)

via llvm-commits llvm-commits at lists.llvm.org
Thu Jan 30 05:41:32 PST 2025


https://github.com/liusy58 updated https://github.com/llvm/llvm-project/pull/123305

>From 241e72477e3891b062c12a210adf54e8612d5553 Mon Sep 17 00:00:00 2001
From: Nicholas <45984215+liusy58 at users.noreply.github.com>
Date: Fri, 17 Jan 2025 09:48:17 +0800
Subject: [PATCH 1/3] [BOLT]  Support identifying indirect tail call.

AArch64 instructions have a fixed size 4 bytes, no need to compute.
---
 bolt/include/bolt/Core/MCPlusBuilder.h        |  5 ++
 bolt/lib/Core/BinaryFunction.cpp              | 53 +++++++++++++++++++
 .../Target/AArch64/AArch64MCPlusBuilder.cpp   | 10 ++++
 bolt/lib/Target/X86/X86MCPlusBuilder.cpp      | 11 ++++
 bolt/test/AArch64/indirect-tail-call.s        | 32 +++++++++++
 5 files changed, 111 insertions(+)
 create mode 100644 bolt/test/AArch64/indirect-tail-call.s

diff --git a/bolt/include/bolt/Core/MCPlusBuilder.h b/bolt/include/bolt/Core/MCPlusBuilder.h
index 5d77e6faff2fc6b..a38f17b2c81b09f 100644
--- a/bolt/include/bolt/Core/MCPlusBuilder.h
+++ b/bolt/include/bolt/Core/MCPlusBuilder.h
@@ -610,6 +610,11 @@ class MCPlusBuilder {
 
   virtual bool isLeave(const MCInst &Inst) const { return false; }
 
+  virtual bool hasUseOrDefofSPOrFP(const MCInst &Inst) const {
+    llvm_unreachable("not implemented");
+    return false;
+  }
+
   virtual bool isADRP(const MCInst &Inst) const {
     llvm_unreachable("not implemented");
     return false;
diff --git a/bolt/lib/Core/BinaryFunction.cpp b/bolt/lib/Core/BinaryFunction.cpp
index 1c5cd62a095b246..f44303b52c7a0b8 100644
--- a/bolt/lib/Core/BinaryFunction.cpp
+++ b/bolt/lib/Core/BinaryFunction.cpp
@@ -1961,6 +1961,59 @@ bool BinaryFunction::postProcessIndirectBranches(
       bool IsEpilogue = llvm::any_of(BB, [&](const MCInst &Instr) {
         return BC.MIB->isLeave(Instr) || BC.MIB->isPop(Instr);
       });
+      if (BC.isAArch64()) {
+        // Any adr instruction of aarch64 will generate a new entry,
+        // Adr instruction cannt afford to do any optimizations
+        if (!IsEpilogue && !isMultiEntry()) {
+          BinaryBasicBlock::iterator LastDefCFAOffsetInstIter = BB.end();
+          // find the last OpDefCfaOffset 0 instruction.
+          for (BinaryBasicBlock::iterator Iter = BB.begin(); Iter != BB.end();
+               ++Iter) {
+            if (&*Iter == &Instr) {
+              break;
+            }
+            if (BC.MIB->isCFI(*Iter)) {
+              const MCCFIInstruction *CFIInst =
+                  BB.getParent()->getCFIFor(*Iter);
+              if ((CFIInst->getOperation() ==
+                   MCCFIInstruction::OpDefCfaOffset) &&
+                  (CFIInst->getOffset() == 0)) {
+                LastDefCFAOffsetInstIter = Iter;
+                break;
+              }
+            }
+          }
+          if (LastDefCFAOffsetInstIter != BB.end()) {
+            IsEpilogue = true;
+            // make sure there is no instruction manipulating sp between the two
+            // instructions
+            BinaryBasicBlock::iterator Iter = LastDefCFAOffsetInstIter;
+            while (&*Iter != &Instr) {
+              if (BC.MIB->hasUseOrDefofSPOrFP(*Iter)) {
+                IsEpilogue = false;
+                break;
+              }
+              ++Iter;
+            }
+          }
+        }
+
+        if (!IsEpilogue) {
+          IsEpilogue = true;
+          BinaryFunction *Func = BB.getFunction();
+          for (const BinaryBasicBlock &BinaryBB : *Func) {
+            for (const MCInst &Inst : BinaryBB) {
+              if (BC.MIB->hasUseOrDefofSPOrFP(Inst)) {
+                IsEpilogue = false;
+                break;
+              }
+            }
+            if (!IsEpilogue) {
+              break;
+            }
+          }
+        }
+      }
       if (IsEpilogue) {
         BC.MIB->convertJmpToTailCall(Instr);
         BB.removeAllSuccessors();
diff --git a/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp b/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp
index d752751c17932a7..e6e79cd33605fc9 100644
--- a/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp
+++ b/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp
@@ -1797,6 +1797,16 @@ class AArch64MCPlusBuilder : public MCPlusBuilder {
   getInstructionSize(const MCInst &Inst) const override {
     return 4;
   }
+
+  bool hasUseOrDefofSPOrFP(const MCInst &Inst) const override {
+    if (isPseudo(Inst) || isNoop(Inst) || isCFI(Inst)) {
+      return false;
+    }
+    return hasDefOfPhysReg(Inst, AArch64::SP) ||
+           hasUseOfPhysReg(Inst, AArch64::SP) ||
+           hasDefOfPhysReg(Inst, AArch64::FP) ||
+           hasUseOfPhysReg(Inst, AArch64::FP);
+  }
 };
 
 } // end anonymous namespace
diff --git a/bolt/lib/Target/X86/X86MCPlusBuilder.cpp b/bolt/lib/Target/X86/X86MCPlusBuilder.cpp
index 63086c06d74fd98..ad998d8601e26f2 100644
--- a/bolt/lib/Target/X86/X86MCPlusBuilder.cpp
+++ b/bolt/lib/Target/X86/X86MCPlusBuilder.cpp
@@ -89,6 +89,17 @@ class X86MCPlusBuilder : public MCPlusBuilder {
 public:
   using MCPlusBuilder::MCPlusBuilder;
 
+  virtual bool hasUseOrDefofSPOrFP(const MCInst &Inst) const override {
+    bool IsLoad, IsStore, IsStoreFromReg, IsSimple, IsIndexed;
+    MCPhysReg Reg;
+    int32_t SrcImm;
+    uint16_t StackPtrReg;
+    int64_t StackOffset;
+    uint8_t Size;
+    return isStackAccess(Inst, IsLoad, IsStore, IsStoreFromReg, Reg, SrcImm,
+                         StackPtrReg, StackOffset, Size, IsSimple, IsIndexed);
+  }
+
   std::unique_ptr<MCSymbolizer>
   createTargetSymbolizer(BinaryFunction &Function,
                          bool CreateNewSymbols) const override {
diff --git a/bolt/test/AArch64/indirect-tail-call.s b/bolt/test/AArch64/indirect-tail-call.s
new file mode 100644
index 000000000000000..5eb600132dce9a2
--- /dev/null
+++ b/bolt/test/AArch64/indirect-tail-call.s
@@ -0,0 +1,32 @@
+## This test checks that indirect tail call is properly identified by BOLT on aarch64.
+
+# REQUIRES: system-linux
+
+# RUN: llvm-mc -filetype=obj -triple aarch64-unknown-unknown %s -o %t.o
+# RUN: %clang %cflags -O0 %t.o -o %t.exe -Wl,-q
+# RUN: llvm-bolt --print-all --print-only=indirect  \
+# RUN: %t.exe -o %t.bolt | FileCheck %s
+
+#CHECK: Binary Function "indirect" after building cfg {
+#CHECK-NOT: # UNKNOWN CONTROL FLOW
+#CHECK: End of Function "indirect"
+	.text
+	.globl	indirect                       
+	.type	indirect, at function
+indirect:      
+    cbz	x0, .LBB0_2                         
+	ldr	x8, [x0]
+	ldr	x1, [x8]
+	br	x1
+.LBB0_2:
+	mov	w0, #3
+	ret
+	.size	indirect, .-indirect
+
+
+	.globl	main                            
+	.type	main, at function
+main:                                   
+	mov	w0, wzr
+	ret
+	.size	main, .-main

>From 1c4982813f21eddede6e4070c780a6e532bcf112 Mon Sep 17 00:00:00 2001
From: liusy58 <liusy58 at linux.alibaba.com>
Date: Fri, 17 Jan 2025 16:59:57 +0800
Subject: [PATCH 2/3] [BOLT] Support identifying indirect tail calls.

---
 bolt/test/AArch64/indirect-tail-call.s | 1 -
 1 file changed, 1 deletion(-)

diff --git a/bolt/test/AArch64/indirect-tail-call.s b/bolt/test/AArch64/indirect-tail-call.s
index 5eb600132dce9a2..1c820b1ad94bbc0 100644
--- a/bolt/test/AArch64/indirect-tail-call.s
+++ b/bolt/test/AArch64/indirect-tail-call.s
@@ -1,5 +1,4 @@
 ## This test checks that indirect tail call is properly identified by BOLT on aarch64.
-
 # REQUIRES: system-linux
 
 # RUN: llvm-mc -filetype=obj -triple aarch64-unknown-unknown %s -o %t.o

>From b5f4fbb926c3acc0080a219c5e1e2982d636461f Mon Sep 17 00:00:00 2001
From: liusy58 <liusy58 at linux.alibaba.com>
Date: Mon, 20 Jan 2025 17:14:54 +0800
Subject: [PATCH 3/3] [BOLT] Add another test to identify indirect tail call by
 using CFI

---
 bolt/lib/Core/BinaryFunction.cpp        | 53 +++++++++----------------
 bolt/test/AArch64/indirect-tail-call2.s | 47 ++++++++++++++++++++++
 2 files changed, 66 insertions(+), 34 deletions(-)
 create mode 100644 bolt/test/AArch64/indirect-tail-call2.s

diff --git a/bolt/lib/Core/BinaryFunction.cpp b/bolt/lib/Core/BinaryFunction.cpp
index f44303b52c7a0b8..a22e92644a0f8f6 100644
--- a/bolt/lib/Core/BinaryFunction.cpp
+++ b/bolt/lib/Core/BinaryFunction.cpp
@@ -1962,16 +1962,19 @@ bool BinaryFunction::postProcessIndirectBranches(
         return BC.MIB->isLeave(Instr) || BC.MIB->isPop(Instr);
       });
       if (BC.isAArch64()) {
-        // Any adr instruction of aarch64 will generate a new entry,
-        // Adr instruction cannt afford to do any optimizations
+        // Any ADR instruction of AArch64 will generate a new entry,
+        // ADR instruction cannot afford to do any optimizations. Because ADR
+        // computes a PC-relative address within a limited range tied to the
+        // current program counter, optimizing transformations (like code
+        // rearrangements) can change address distances and potentially exceed
+        // ADR’s range.
         if (!IsEpilogue && !isMultiEntry()) {
           BinaryBasicBlock::iterator LastDefCFAOffsetInstIter = BB.end();
-          // find the last OpDefCfaOffset 0 instruction.
+          // Find the last OpDefCfaOffset 0 instruction.
           for (BinaryBasicBlock::iterator Iter = BB.begin(); Iter != BB.end();
                ++Iter) {
-            if (&*Iter == &Instr) {
+            if (&*Iter == &Instr)
               break;
-            }
             if (BC.MIB->isCFI(*Iter)) {
               const MCCFIInstruction *CFIInst =
                   BB.getParent()->getCFIFor(*Iter);
@@ -1979,39 +1982,21 @@ bool BinaryFunction::postProcessIndirectBranches(
                    MCCFIInstruction::OpDefCfaOffset) &&
                   (CFIInst->getOffset() == 0)) {
                 LastDefCFAOffsetInstIter = Iter;
+                IsEpilogue = true;
+                // Make sure there is no instruction manipulating sp between the
+                // two instructions.
+                BinaryBasicBlock::iterator Iter = LastDefCFAOffsetInstIter;
+                while (&*Iter != &Instr) {
+                  if (BC.MIB->hasUseOrDefofSPOrFP(*Iter)) {
+                    IsEpilogue = false;
+                    break;
+                  }
+                  ++Iter;
+                }
                 break;
               }
             }
           }
-          if (LastDefCFAOffsetInstIter != BB.end()) {
-            IsEpilogue = true;
-            // make sure there is no instruction manipulating sp between the two
-            // instructions
-            BinaryBasicBlock::iterator Iter = LastDefCFAOffsetInstIter;
-            while (&*Iter != &Instr) {
-              if (BC.MIB->hasUseOrDefofSPOrFP(*Iter)) {
-                IsEpilogue = false;
-                break;
-              }
-              ++Iter;
-            }
-          }
-        }
-
-        if (!IsEpilogue) {
-          IsEpilogue = true;
-          BinaryFunction *Func = BB.getFunction();
-          for (const BinaryBasicBlock &BinaryBB : *Func) {
-            for (const MCInst &Inst : BinaryBB) {
-              if (BC.MIB->hasUseOrDefofSPOrFP(Inst)) {
-                IsEpilogue = false;
-                break;
-              }
-            }
-            if (!IsEpilogue) {
-              break;
-            }
-          }
         }
       }
       if (IsEpilogue) {
diff --git a/bolt/test/AArch64/indirect-tail-call2.s b/bolt/test/AArch64/indirect-tail-call2.s
new file mode 100644
index 000000000000000..6c1cb338075cab8
--- /dev/null
+++ b/bolt/test/AArch64/indirect-tail-call2.s
@@ -0,0 +1,47 @@
+## This test checks that indirect tail call is properly identified by BOLT on aarch64
+## when that OpDefCfaOffset=0 is in place without any stack changes between the 
+## CFI instruction and the indirect branch.
+
+# REQUIRES: system-linux
+
+# RUN: llvm-mc -filetype=obj -triple aarch64-unknown-unknown %s -o %t.o
+# RUN: %clang %cflags -O0 %t.o -o %t.exe -Wl,-q
+# RUN: llvm-bolt --print-all --print-only=indirect  \
+# RUN: %t.exe -o %t.bolt | FileCheck %s
+
+#CHECK: Binary Function "indirect" after building cfg {
+#CHECK-NOT: # UNKNOWN CONTROL FLOW
+#CHECK: End of Function "indirect"
+	.text
+	.global	indirect
+	.type	indirect, %function
+indirect:
+.LFB0:
+	.cfi_startproc
+	stp	x29, x30, [sp, -16]!
+	.cfi_def_cfa_offset 16
+	.cfi_offset 29, -16
+	.cfi_offset 30, -8
+	mov	w3, w1
+	add	w1, w0, w1
+	add	w5, w1, w1, lsr 31
+	tst	x1, 1
+	asr	w5, w5, 1
+	csel	w1, w1, w5, eq
+	cmp	w0, w3
+	beq	.L3
+	mov	w0, w3
+	mov	x16, x2
+	ldp	x29, x30, [sp], 16
+	.cfi_restore 30
+	.cfi_restore 29
+	.cfi_def_cfa_offset 0
+	mov w3, w1
+	add w1, w0, w1
+	br	x16
+.L3:
+	ret
+	.cfi_endproc
+.LFE0:
+	.size	indirect, .-indirect
+



More information about the llvm-commits mailing list