[Lldb-commits] [lldb] 0081149 - [lldb/DWARF] Fix PC value for artificial tail call frames for the "GNU" case

Pavel Labath via lldb-commits lldb-commits at lists.llvm.org
Mon Jun 8 05:44:45 PDT 2020


Author: Pavel Labath
Date: 2020-06-08T14:44:36+02:00
New Revision: 0081149f96bd62f1d1d21040ead30bfded86e8e5

URL: https://github.com/llvm/llvm-project/commit/0081149f96bd62f1d1d21040ead30bfded86e8e5
DIFF: https://github.com/llvm/llvm-project/commit/0081149f96bd62f1d1d21040ead30bfded86e8e5.diff

LOG: [lldb/DWARF] Fix PC value for artificial tail call frames for the "GNU" case

Summary:
The way that the support for the GNU dialect of tail call frames was
implemented in D80519 meant that the were reporting very bogus PC values
which pointed into the middle of an instruction: the -1 trick is
necessary for the address to resolve to the right function, but we
should still be reporting a more realistic PC value -- I say "realistic"
and not "real", because it's very debatable what should be the correct
PC value for frames like this.

This patch achieves that my moving the -1 from SymbolFileDWARF into the
stack frame computation code. The idea is that SymbolFileDWARF will
merely report whether it has provided an address of the instruction
after the tail call, or the address of the call instruction itself. The
StackFrameList machinery uses this information to set the "behaves like
frame zero" property of the artificial frames (the main thing this flag
does is it controls the -1 subtraction when looking up the function
address).

This required a moderate refactor of the CallEdge class, because it was
implicitly assuming that edges pointing after the call were real calls
and those pointing the the call insn were tail calls. The class now
carries this information explicitly -- it carries three mostly
independent pieces of information:
- an address of interest in the caller
- a bit saying whether this address points to the call insn or after it
- whether this is a tail call

Reviewers: vsk, dblaikie

Subscribers: aprantl, mgrang, lldb-commits

Tags: #lldb

Differential Revision: https://reviews.llvm.org/D81010

Added: 
    

Modified: 
    lldb/include/lldb/Symbol/Function.h
    lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARF.cpp
    lldb/source/Symbol/Function.cpp
    lldb/source/Target/StackFrameList.cpp
    lldb/test/API/functionalities/tail_call_frames/unambiguous_sequence/main.cpp

Removed: 
    


################################################################################
diff  --git a/lldb/include/lldb/Symbol/Function.h b/lldb/include/lldb/Symbol/Function.h
index 156e61510820..e548b8d626a3 100644
--- a/lldb/include/lldb/Symbol/Function.h
+++ b/lldb/include/lldb/Symbol/Function.h
@@ -266,6 +266,7 @@ using CallSiteParameterArray = llvm::SmallVector<CallSiteParameter, 0>;
 /// in the call graph between two functions, or to evaluate DW_OP_entry_value.
 class CallEdge {
 public:
+  enum class AddrType : uint8_t { Call, AfterCall };
   virtual ~CallEdge() {}
 
   /// Get the callee's definition.
@@ -281,21 +282,32 @@ class CallEdge {
   /// made the call.
   lldb::addr_t GetReturnPCAddress(Function &caller, Target &target) const;
 
-  /// Like \ref GetReturnPCAddress, but returns an unresolved file address.
-  lldb::addr_t GetUnresolvedReturnPCAddress() const { return return_pc; }
+  /// Return an address in the caller. This can either be the address of the
+  /// call instruction, or the address of the instruction after the call.
+  std::pair<AddrType, lldb::addr_t> GetCallerAddress(Function &caller,
+                                                     Target &target) const {
+    return {caller_address_type,
+            GetLoadAddress(caller_address, caller, target)};
+  }
 
-  /// Get the load PC address of the call instruction (or LLDB_INVALID_ADDRESS).
-  lldb::addr_t GetCallInstPC(Function &caller, Target &target) const;
+  bool IsTailCall() const { return is_tail_call; }
 
   /// Get the call site parameters available at this call edge.
   llvm::ArrayRef<CallSiteParameter> GetCallSiteParameters() const {
     return parameters;
   }
 
+  /// Non-tail-calls go first, sorted by the return address. They are followed
+  /// by tail calls, which have no specific order.
+  std::pair<bool, lldb::addr_t> GetSortKey() const {
+    return {is_tail_call, GetUnresolvedReturnPCAddress()};
+  }
+
 protected:
-  CallEdge(lldb::addr_t return_pc, lldb::addr_t call_inst_pc,
-           CallSiteParameterArray &&parameters)
-      : return_pc(return_pc), call_inst_pc(call_inst_pc),
+  CallEdge(AddrType caller_address_type, lldb::addr_t caller_address,
+           bool is_tail_call, CallSiteParameterArray &&parameters)
+      : caller_address(caller_address),
+        caller_address_type(caller_address_type), is_tail_call(is_tail_call),
         parameters(std::move(parameters)) {}
 
   /// Helper that finds the load address of \p unresolved_pc, a file address
@@ -303,13 +315,17 @@ class CallEdge {
   static lldb::addr_t GetLoadAddress(lldb::addr_t unresolved_pc,
                                      Function &caller, Target &target);
 
-  /// An invalid address if this is a tail call. Otherwise, the return PC for
-  /// the call. Note that this is a file address which must be resolved.
-  lldb::addr_t return_pc;
+  /// Like \ref GetReturnPCAddress, but returns an unresolved file address.
+  lldb::addr_t GetUnresolvedReturnPCAddress() const {
+    return caller_address_type == AddrType::AfterCall && !is_tail_call
+               ? caller_address
+               : LLDB_INVALID_ADDRESS;
+  }
 
-  /// The address of the call instruction. Usually an invalid address, unless
-  /// this is a tail call.
-  lldb::addr_t call_inst_pc;
+private:
+  lldb::addr_t caller_address;
+  AddrType caller_address_type;
+  bool is_tail_call;
 
   CallSiteParameterArray parameters;
 };
@@ -321,9 +337,11 @@ class DirectCallEdge : public CallEdge {
 public:
   /// Construct a call edge using a symbol name to identify the callee, and a
   /// return PC within the calling function to identify a specific call site.
-  DirectCallEdge(const char *symbol_name, lldb::addr_t return_pc,
-                 lldb::addr_t call_inst_pc, CallSiteParameterArray &&parameters)
-      : CallEdge(return_pc, call_inst_pc, std::move(parameters)) {
+  DirectCallEdge(const char *symbol_name, AddrType caller_address_type,
+                 lldb::addr_t caller_address, bool is_tail_call,
+                 CallSiteParameterArray &&parameters)
+      : CallEdge(caller_address_type, caller_address, is_tail_call,
+                 std::move(parameters)) {
     lazy_callee.symbol_name = symbol_name;
   }
 
@@ -352,10 +370,11 @@ class IndirectCallEdge : public CallEdge {
 public:
   /// Construct a call edge using a DWARFExpression to identify the callee, and
   /// a return PC within the calling function to identify a specific call site.
-  IndirectCallEdge(DWARFExpression call_target, lldb::addr_t return_pc,
-                   lldb::addr_t call_inst_pc,
+  IndirectCallEdge(DWARFExpression call_target, AddrType caller_address_type,
+                   lldb::addr_t caller_address, bool is_tail_call,
                    CallSiteParameterArray &&parameters)
-      : CallEdge(return_pc, call_inst_pc, std::move(parameters)),
+      : CallEdge(caller_address_type, caller_address, is_tail_call,
+                 std::move(parameters)),
         call_target(std::move(call_target)) {}
 
   Function *GetCallee(ModuleList &images, ExecutionContext &exe_ctx) override;

diff  --git a/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARF.cpp b/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARF.cpp
index 71a01947da26..b6ae9e200f22 100644
--- a/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARF.cpp
+++ b/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARF.cpp
@@ -3733,26 +3733,30 @@ SymbolFileDWARF::CollectCallEdges(ModuleSP module, DWARFDIE function_die) {
             child.GetCU());
       }
     }
-    if (child.Tag() == DW_TAG_GNU_call_site) {
-      // In DWARF v4 DW_AT_low_pc is always the address of the instruction after
-      // the call. For tail calls, approximate the address of the call
-      // instruction by subtracting 1.
-      if (tail_call)
-        call_inst_pc = low_pc - 1;
-      else
-        return_pc = low_pc;
-    }
-
     if (!call_origin && !call_target) {
       LLDB_LOG(log, "CollectCallEdges: call site without any call target");
       continue;
     }
 
+    addr_t caller_address;
+    CallEdge::AddrType caller_address_type;
+    if (return_pc != LLDB_INVALID_ADDRESS) {
+      caller_address = return_pc;
+      caller_address_type = CallEdge::AddrType::AfterCall;
+    } else if (low_pc != LLDB_INVALID_ADDRESS) {
+      caller_address = low_pc;
+      caller_address_type = CallEdge::AddrType::AfterCall;
+    } else if (call_inst_pc != LLDB_INVALID_ADDRESS) {
+      caller_address = call_inst_pc;
+      caller_address_type = CallEdge::AddrType::Call;
+    } else {
+      LLDB_LOG(log, "CollectCallEdges: No caller address");
+      continue;
+    }
     // Adjust any PC forms. It needs to be fixed up if the main executable
     // contains a debug map (i.e. pointers to object files), because we need a
     // file address relative to the executable's text section.
-    return_pc = FixupAddress(return_pc);
-    call_inst_pc = FixupAddress(call_inst_pc);
+    caller_address = FixupAddress(caller_address);
 
     // Extract call site parameters.
     CallSiteParameterArray parameters =
@@ -3764,9 +3768,9 @@ SymbolFileDWARF::CollectCallEdges(ModuleSP module, DWARFDIE function_die) {
                "CollectCallEdges: Found call origin: {0} (retn-PC: {1:x}) "
                "(call-PC: {2:x})",
                call_origin->GetPubname(), return_pc, call_inst_pc);
-      edge = std::make_unique<DirectCallEdge>(call_origin->GetMangledName(),
-                                              return_pc, call_inst_pc,
-                                              std::move(parameters));
+      edge = std::make_unique<DirectCallEdge>(
+          call_origin->GetMangledName(), caller_address_type, caller_address,
+          tail_call, std::move(parameters));
     } else {
       if (log) {
         StreamString call_target_desc;
@@ -3776,7 +3780,8 @@ SymbolFileDWARF::CollectCallEdges(ModuleSP module, DWARFDIE function_die) {
                  call_target_desc.GetString());
       }
       edge = std::make_unique<IndirectCallEdge>(
-          *call_target, return_pc, call_inst_pc, std::move(parameters));
+          *call_target, caller_address_type, caller_address, tail_call,
+          std::move(parameters));
     }
 
     if (log && parameters.size()) {

diff  --git a/lldb/source/Symbol/Function.cpp b/lldb/source/Symbol/Function.cpp
index 953c77632e01..533fdd465c42 100644
--- a/lldb/source/Symbol/Function.cpp
+++ b/lldb/source/Symbol/Function.cpp
@@ -145,11 +145,7 @@ lldb::addr_t CallEdge::GetLoadAddress(lldb::addr_t unresolved_pc,
 
 lldb::addr_t CallEdge::GetReturnPCAddress(Function &caller,
                                           Target &target) const {
-  return GetLoadAddress(return_pc, caller, target);
-}
-
-lldb::addr_t CallEdge::GetCallInstPC(Function &caller, Target &target) const {
-  return GetLoadAddress(call_inst_pc, caller, target);
+  return GetLoadAddress(GetUnresolvedReturnPCAddress(), caller, target);
 }
 
 void DirectCallEdge::ParseSymbolFileAndResolve(ModuleList &images) {
@@ -313,36 +309,34 @@ llvm::ArrayRef<std::unique_ptr<CallEdge>> Function::GetCallEdges() {
   m_call_edges = sym_file->ParseCallEdgesInFunction(GetID());
 
   // Sort the call edges to speed up return_pc lookups.
-  llvm::sort(m_call_edges.begin(), m_call_edges.end(),
-             [](const std::unique_ptr<CallEdge> &LHS,
-                const std::unique_ptr<CallEdge> &RHS) {
-               return LHS->GetUnresolvedReturnPCAddress() <
-                      RHS->GetUnresolvedReturnPCAddress();
-             });
+  llvm::sort(m_call_edges, [](const std::unique_ptr<CallEdge> &LHS,
+                              const std::unique_ptr<CallEdge> &RHS) {
+    return LHS->GetSortKey() < RHS->GetSortKey();
+  });
 
   return m_call_edges;
 }
 
 llvm::ArrayRef<std::unique_ptr<CallEdge>> Function::GetTailCallingEdges() {
-  // Call edges are sorted by return PC, and tail calling edges have invalid
-  // return PCs. Find them at the end of the list.
-  return GetCallEdges().drop_until([](const std::unique_ptr<CallEdge> &edge) {
-    return edge->GetUnresolvedReturnPCAddress() == LLDB_INVALID_ADDRESS;
-  });
+  // Tail calling edges are sorted at the end of the list. Find them by dropping
+  // all non-tail-calls.
+  return GetCallEdges().drop_until(
+      [](const std::unique_ptr<CallEdge> &edge) { return edge->IsTailCall(); });
 }
 
 CallEdge *Function::GetCallEdgeForReturnAddress(addr_t return_pc,
                                                 Target &target) {
   auto edges = GetCallEdges();
   auto edge_it =
-      std::lower_bound(edges.begin(), edges.end(), return_pc,
-                       [&](const std::unique_ptr<CallEdge> &edge, addr_t pc) {
-                         return edge->GetReturnPCAddress(*this, target) < pc;
-                       });
+      llvm::partition_point(edges, [&](const std::unique_ptr<CallEdge> &edge) {
+        return std::make_pair(edge->IsTailCall(),
+                              edge->GetReturnPCAddress(*this, target)) <
+               std::make_pair(false, return_pc);
+      });
   if (edge_it == edges.end() ||
       edge_it->get()->GetReturnPCAddress(*this, target) != return_pc)
     return nullptr;
-  return &const_cast<CallEdge &>(*edge_it->get());
+  return edge_it->get();
 }
 
 Block &Function::GetBlock(bool can_create) {

diff  --git a/lldb/source/Target/StackFrameList.cpp b/lldb/source/Target/StackFrameList.cpp
index 1a75986a80cf..8549ac079b02 100644
--- a/lldb/source/Target/StackFrameList.cpp
+++ b/lldb/source/Target/StackFrameList.cpp
@@ -239,7 +239,12 @@ void StackFrameList::GetOnlyConcreteFramesUpTo(uint32_t end_idx,
 /// A sequence of calls that comprise some portion of a backtrace. Each frame
 /// is represented as a pair of a callee (Function *) and an address within the
 /// callee.
-using CallSequence = std::vector<std::pair<Function *, addr_t>>;
+struct CallDescriptor {
+  Function *func;
+  CallEdge::AddrType address_type;
+  addr_t address;
+};
+using CallSequence = std::vector<CallDescriptor>;
 
 /// Find the unique path through the call graph from \p begin (with return PC
 /// \p return_pc) to \p end. On success this path is stored into \p path, and 
@@ -319,14 +324,14 @@ static void FindInterveningFrames(Function &begin, Function &end,
       }
 
       // Search the calls made from this callee.
-      active_path.emplace_back(&callee, LLDB_INVALID_ADDRESS);
+      active_path.push_back(CallDescriptor{&callee});
       for (const auto &edge : callee.GetTailCallingEdges()) {
         Function *next_callee = edge->GetCallee(images, context);
         if (!next_callee)
           continue;
 
-        addr_t tail_call_pc = edge->GetCallInstPC(callee, target);
-        active_path.back().second = tail_call_pc;
+        std::tie(active_path.back().address_type, active_path.back().address) =
+            edge->GetCallerAddress(callee, target);
 
         dfs(*edge, *next_callee);
         if (ambiguous)
@@ -400,16 +405,16 @@ void StackFrameList::SynthesizeTailCallFrames(StackFrame &next_frame) {
 
   // Push synthetic tail call frames.
   for (auto calleeInfo : llvm::reverse(path)) {
-    Function *callee = calleeInfo.first;
+    Function *callee = calleeInfo.func;
     uint32_t frame_idx = m_frames.size();
     uint32_t concrete_frame_idx = next_frame.GetConcreteFrameIndex();
     addr_t cfa = LLDB_INVALID_ADDRESS;
     bool cfa_is_valid = false;
-    addr_t pc = calleeInfo.second;
-    // We do not want to subtract 1 from this PC, as it's the actual address
-    // of the tail-calling branch instruction. This address is provided by the
-    // compiler via DW_AT_call_pc.
-    constexpr bool behaves_like_zeroth_frame = true;
+    addr_t pc = calleeInfo.address;
+    // If the callee address refers to the call instruction, we do not want to
+    // subtract 1 from this value.
+    const bool behaves_like_zeroth_frame =
+        calleeInfo.address_type == CallEdge::AddrType::Call;
     SymbolContext sc;
     callee->CalculateSymbolContext(&sc);
     auto synth_frame = std::make_shared<StackFrame>(

diff  --git a/lldb/test/API/functionalities/tail_call_frames/unambiguous_sequence/main.cpp b/lldb/test/API/functionalities/tail_call_frames/unambiguous_sequence/main.cpp
index 4b92a34709e0..2d6241a3fe3b 100644
--- a/lldb/test/API/functionalities/tail_call_frames/unambiguous_sequence/main.cpp
+++ b/lldb/test/API/functionalities/tail_call_frames/unambiguous_sequence/main.cpp
@@ -3,12 +3,22 @@ volatile int x;
 void __attribute__((noinline)) sink() {
   x++; //% self.filecheck("bt", "main.cpp", "-implicit-check-not=artificial")
   // CHECK: frame #0: 0x{{[0-9a-f]+}} a.out`sink() at main.cpp:[[@LINE-1]]:4
-  // CHECK-NEXT: frame #1: 0x{{[0-9a-f]+}} a.out`func3() at main.cpp:16:3
+  // CHECK-NEXT: frame #1: 0x{{[0-9a-f]+}} a.out`func3() at main.cpp:26:3
   // CHECK-SAME: [artificial]
   // CHECK-NEXT: frame #2: 0x{{[0-9a-f]+}} a.out`func2()
-  // CHECK-NEXT: frame #3: 0x{{[0-9a-f]+}} a.out`func1() at main.cpp:25:3
+  // CHECK-NEXT: frame #3: 0x{{[0-9a-f]+}} a.out`func1() at main.cpp:35:3
   // CHECK-SAME: [artificial]
   // CHECK-NEXT: frame #4: 0x{{[0-9a-f]+}} a.out`main
+  // In the GNU style, the artificial frames will point after the tail call
+  // instruction. In v5 they should point to the instruction itself.
+  //% frame1 = self.thread().GetFrameAtIndex(1)
+  //% func3 = frame1.GetFunction()
+  //% func3_insns = func3.GetInstructions(self.target())
+  //% self.trace("func3:\n%s"%func3_insns)
+  //% last_insn = func3_insns.GetInstructionAtIndex(func3_insns.GetSize()-1)
+  //% addr = last_insn.GetAddress()
+  //% if "GNU" in self.name: addr.OffsetAddress(last_insn.GetByteSize())
+  //% self.assertEquals(frame1.GetPCAddress(), addr)
 }
 
 void __attribute__((noinline)) func3() {


        


More information about the lldb-commits mailing list