[Lldb-commits] [lldb] 03e29e2 - [lldb/DWARF] Reland: Use DW_AT_call_pc to determine artificial frame address

Vedant Kumar via lldb-commits lldb-commits at lists.llvm.org
Tue Mar 24 12:54:49 PDT 2020


Author: Vedant Kumar
Date: 2020-03-24T12:54:40-07:00
New Revision: 03e29e2c19a8e1f6a225b1878df3eed4e54891e5

URL: https://github.com/llvm/llvm-project/commit/03e29e2c19a8e1f6a225b1878df3eed4e54891e5
DIFF: https://github.com/llvm/llvm-project/commit/03e29e2c19a8e1f6a225b1878df3eed4e54891e5.diff

LOG: [lldb/DWARF] Reland: Use DW_AT_call_pc to determine artificial frame address

Reland with changes: the test modified in this change originally failed
on a Debian/x86_64 builder, and I suspect the cause was that lldb looked
up the line location for an artificial frame by subtracting 1 from the
frame's address. For artificial frames, the subtraction must not happen
because the address is already exact.

---

lldb currently guesses the address to use when creating an artificial
frame (i.e., a frame constructed by determining the sequence of (tail)
calls which must have happened).

Guessing the address creates problems -- use the actual address provided
by the DW_AT_call_pc attribute instead.

Depends on D76336.

rdar://60307600

Differential Revision: https://reviews.llvm.org/D76337

Added: 
    

Modified: 
    lldb/include/lldb/Symbol/Function.h
    lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARF.cpp
    lldb/source/Symbol/Function.cpp
    lldb/source/Target/StackFrameList.cpp
    lldb/test/API/functionalities/tail_call_frames/unambiguous_sequence/main.cpp

Removed: 
    


################################################################################
diff  --git a/lldb/include/lldb/Symbol/Function.h b/lldb/include/lldb/Symbol/Function.h
index 0db9a5116d25..40d316fa78eb 100644
--- a/lldb/include/lldb/Symbol/Function.h
+++ b/lldb/include/lldb/Symbol/Function.h
@@ -284,19 +284,33 @@ class CallEdge {
   /// Like \ref GetReturnPCAddress, but returns an unresolved file address.
   lldb::addr_t GetUnresolvedReturnPCAddress() const { return return_pc; }
 
+  /// Get the load PC address of the call instruction (or LLDB_INVALID_ADDRESS).
+  lldb::addr_t GetCallInstPC(Function &caller, Target &target) const;
+
   /// Get the call site parameters available at this call edge.
   llvm::ArrayRef<CallSiteParameter> GetCallSiteParameters() const {
     return parameters;
   }
 
 protected:
-  CallEdge(lldb::addr_t return_pc, CallSiteParameterArray &&parameters)
-      : return_pc(return_pc), parameters(std::move(parameters)) {}
+  CallEdge(lldb::addr_t return_pc, lldb::addr_t call_inst_pc,
+           CallSiteParameterArray &&parameters)
+      : return_pc(return_pc), call_inst_pc(call_inst_pc),
+        parameters(std::move(parameters)) {}
+
+  /// Helper that finds the load address of \p unresolved_pc, a file address
+  /// which refers to an instruction within \p caller.
+  static lldb::addr_t GetLoadAddress(lldb::addr_t unresolved_pc,
+                                     Function &caller, Target &target);
 
   /// An invalid address if this is a tail call. Otherwise, the return PC for
   /// the call. Note that this is a file address which must be resolved.
   lldb::addr_t return_pc;
 
+  /// The address of the call instruction. Usually an invalid address, unless
+  /// this is a tail call.
+  lldb::addr_t call_inst_pc;
+
   CallSiteParameterArray parameters;
 };
 
@@ -308,8 +322,8 @@ class DirectCallEdge : public CallEdge {
   /// Construct a call edge using a symbol name to identify the callee, and a
   /// return PC within the calling function to identify a specific call site.
   DirectCallEdge(const char *symbol_name, lldb::addr_t return_pc,
-                 CallSiteParameterArray &&parameters)
-      : CallEdge(return_pc, std::move(parameters)) {
+                 lldb::addr_t call_inst_pc, CallSiteParameterArray &&parameters)
+      : CallEdge(return_pc, call_inst_pc, std::move(parameters)) {
     lazy_callee.symbol_name = symbol_name;
   }
 
@@ -339,8 +353,9 @@ class IndirectCallEdge : public CallEdge {
   /// Construct a call edge using a DWARFExpression to identify the callee, and
   /// a return PC within the calling function to identify a specific call site.
   IndirectCallEdge(DWARFExpression call_target, lldb::addr_t return_pc,
+                   lldb::addr_t call_inst_pc,
                    CallSiteParameterArray &&parameters)
-      : CallEdge(return_pc, std::move(parameters)),
+      : CallEdge(return_pc, call_inst_pc, std::move(parameters)),
         call_target(std::move(call_target)) {}
 
   Function *GetCallee(ModuleList &images, ExecutionContext &exe_ctx) override;

diff  --git a/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARF.cpp b/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARF.cpp
index a703b1e1217d..c98694fca6b5 100644
--- a/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARF.cpp
+++ b/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARF.cpp
@@ -3737,6 +3737,7 @@ SymbolFileDWARF::CollectCallEdges(ModuleSP module, DWARFDIE function_die) {
     llvm::Optional<DWARFDIE> call_origin;
     llvm::Optional<DWARFExpression> call_target;
     addr_t return_pc = LLDB_INVALID_ADDRESS;
+    addr_t call_inst_pc = LLDB_INVALID_ADDRESS;
 
     DWARFAttributes attributes;
     const size_t num_attributes = child.GetAttributes(attributes);
@@ -3765,6 +3766,12 @@ SymbolFileDWARF::CollectCallEdges(ModuleSP module, DWARFDIE function_die) {
       if (attr == DW_AT_call_return_pc)
         return_pc = form_value.Address();
 
+      // Extract DW_AT_call_pc (the PC at the call/branch instruction). It
+      // should only ever be unavailable for non-tail calls, in which case use
+      // LLDB_INVALID_ADDRESS.
+      if (attr == DW_AT_call_pc)
+        call_inst_pc = form_value.Address();
+
       // Extract DW_AT_call_target (the location of the address of the indirect
       // call).
       if (attr == DW_AT_call_target) {
@@ -3787,10 +3794,11 @@ SymbolFileDWARF::CollectCallEdges(ModuleSP module, DWARFDIE function_die) {
       continue;
     }
 
-    // Adjust the return PC. It needs to be fixed up if the main executable
+    // Adjust any PC forms. It needs to be fixed up if the main executable
     // contains a debug map (i.e. pointers to object files), because we need a
     // file address relative to the executable's text section.
     return_pc = FixupAddress(return_pc);
+    call_inst_pc = FixupAddress(call_inst_pc);
 
     // Extract call site parameters.
     CallSiteParameterArray parameters =
@@ -3798,10 +3806,13 @@ SymbolFileDWARF::CollectCallEdges(ModuleSP module, DWARFDIE function_die) {
 
     std::unique_ptr<CallEdge> edge;
     if (call_origin) {
-      LLDB_LOG(log, "CollectCallEdges: Found call origin: {0} (retn-PC: {1:x})",
-               call_origin->GetPubname(), return_pc);
+      LLDB_LOG(log,
+               "CollectCallEdges: Found call origin: {0} (retn-PC: {1:x}) "
+               "(call-PC: {2:x})",
+               call_origin->GetPubname(), return_pc, call_inst_pc);
       edge = std::make_unique<DirectCallEdge>(call_origin->GetMangledName(),
-                                              return_pc, std::move(parameters));
+                                              return_pc, call_inst_pc,
+                                              std::move(parameters));
     } else {
       if (log) {
         StreamString call_target_desc;
@@ -3810,8 +3821,8 @@ SymbolFileDWARF::CollectCallEdges(ModuleSP module, DWARFDIE function_die) {
         LLDB_LOG(log, "CollectCallEdges: Found indirect call target: {0}",
                  call_target_desc.GetString());
       }
-      edge = std::make_unique<IndirectCallEdge>(*call_target, return_pc,
-                                                std::move(parameters));
+      edge = std::make_unique<IndirectCallEdge>(
+          *call_target, return_pc, call_inst_pc, std::move(parameters));
     }
 
     if (log && parameters.size()) {

diff  --git a/lldb/source/Symbol/Function.cpp b/lldb/source/Symbol/Function.cpp
index e1d5a720bcc3..0b1f6a8c3a3d 100644
--- a/lldb/source/Symbol/Function.cpp
+++ b/lldb/source/Symbol/Function.cpp
@@ -120,27 +120,36 @@ size_t InlineFunctionInfo::MemorySize() const {
 /// @name Call site related structures
 /// @{
 
-lldb::addr_t CallEdge::GetReturnPCAddress(Function &caller,
-                                          Target &target) const {
+lldb::addr_t CallEdge::GetLoadAddress(lldb::addr_t unresolved_pc,
+                                      Function &caller, Target &target) {
   Log *log(lldb_private::GetLogIfAllCategoriesSet(LIBLLDB_LOG_STEP));
 
   const Address &caller_start_addr = caller.GetAddressRange().GetBaseAddress();
 
   ModuleSP caller_module_sp = caller_start_addr.GetModule();
   if (!caller_module_sp) {
-    LLDB_LOG(log, "GetReturnPCAddress: cannot get Module for caller");
+    LLDB_LOG(log, "GetLoadAddress: cannot get Module for caller");
     return LLDB_INVALID_ADDRESS;
   }
 
   SectionList *section_list = caller_module_sp->GetSectionList();
   if (!section_list) {
-    LLDB_LOG(log, "GetReturnPCAddress: cannot get SectionList for Module");
+    LLDB_LOG(log, "GetLoadAddress: cannot get SectionList for Module");
     return LLDB_INVALID_ADDRESS;
   }
 
-  Address return_pc_addr = Address(return_pc, section_list);
-  lldb::addr_t ret_addr = return_pc_addr.GetLoadAddress(&target);
-  return ret_addr;
+  Address the_addr = Address(unresolved_pc, section_list);
+  lldb::addr_t load_addr = the_addr.GetLoadAddress(&target);
+  return load_addr;
+}
+
+lldb::addr_t CallEdge::GetReturnPCAddress(Function &caller,
+                                          Target &target) const {
+  return GetLoadAddress(return_pc, caller, target);
+}
+
+lldb::addr_t CallEdge::GetCallInstPC(Function &caller, Target &target) const {
+  return GetLoadAddress(call_inst_pc, caller, target);
 }
 
 void DirectCallEdge::ParseSymbolFileAndResolve(ModuleList &images) {

diff  --git a/lldb/source/Target/StackFrameList.cpp b/lldb/source/Target/StackFrameList.cpp
index e8e72203e204..1a75986a80cf 100644
--- a/lldb/source/Target/StackFrameList.cpp
+++ b/lldb/source/Target/StackFrameList.cpp
@@ -236,13 +236,17 @@ void StackFrameList::GetOnlyConcreteFramesUpTo(uint32_t end_idx,
   m_frames.resize(num_frames);
 }
 
+/// A sequence of calls that comprise some portion of a backtrace. Each frame
+/// is represented as a pair of a callee (Function *) and an address within the
+/// callee.
+using CallSequence = std::vector<std::pair<Function *, addr_t>>;
+
 /// Find the unique path through the call graph from \p begin (with return PC
 /// \p return_pc) to \p end. On success this path is stored into \p path, and 
 /// on failure \p path is unchanged.
 static void FindInterveningFrames(Function &begin, Function &end,
                                   ExecutionContext &exe_ctx, Target &target,
-                                  addr_t return_pc,
-                                  std::vector<Function *> &path,
+                                  addr_t return_pc, CallSequence &path,
                                   ModuleList &images, Log *log) {
   LLDB_LOG(log, "Finding frames between {0} and {1}, retn-pc={2:x}",
            begin.GetDisplayName(), end.GetDisplayName(), return_pc);
@@ -275,24 +279,27 @@ static void FindInterveningFrames(Function &begin, Function &end,
   // Fully explore the set of functions reachable from the first edge via tail
   // calls in order to detect ambiguous executions.
   struct DFS {
-    std::vector<Function *> active_path = {};
-    std::vector<Function *> solution_path = {};
+    CallSequence active_path = {};
+    CallSequence solution_path = {};
     llvm::SmallPtrSet<Function *, 2> visited_nodes = {};
     bool ambiguous = false;
     Function *end;
     ModuleList &images;
+    Target ⌖
     ExecutionContext &context;
 
-    DFS(Function *end, ModuleList &images, ExecutionContext &context)
-        : end(end), images(images), context(context) {}
+    DFS(Function *end, ModuleList &images, Target &target,
+        ExecutionContext &context)
+        : end(end), images(images), target(target), context(context) {}
 
-    void search(Function &first_callee, std::vector<Function *> &path) {
-      dfs(first_callee);
+    void search(CallEdge &first_edge, Function &first_callee,
+                CallSequence &path) {
+      dfs(first_edge, first_callee);
       if (!ambiguous)
         path = std::move(solution_path);
     }
 
-    void dfs(Function &callee) {
+    void dfs(CallEdge &current_edge, Function &callee) {
       // Found a path to the target function.
       if (&callee == end) {
         if (solution_path.empty())
@@ -312,13 +319,16 @@ static void FindInterveningFrames(Function &begin, Function &end,
       }
 
       // Search the calls made from this callee.
-      active_path.push_back(&callee);
+      active_path.emplace_back(&callee, LLDB_INVALID_ADDRESS);
       for (const auto &edge : callee.GetTailCallingEdges()) {
         Function *next_callee = edge->GetCallee(images, context);
         if (!next_callee)
           continue;
 
-        dfs(*next_callee);
+        addr_t tail_call_pc = edge->GetCallInstPC(callee, target);
+        active_path.back().second = tail_call_pc;
+
+        dfs(*edge, *next_callee);
         if (ambiguous)
           return;
       }
@@ -326,7 +336,7 @@ static void FindInterveningFrames(Function &begin, Function &end,
     }
   };
 
-  DFS(&end, images, exe_ctx).search(*first_callee, path);
+  DFS(&end, images, target, exe_ctx).search(*first_edge, *first_callee, path);
 }
 
 /// Given that \p next_frame will be appended to the frame list, synthesize
@@ -379,7 +389,7 @@ void StackFrameList::SynthesizeTailCallFrames(StackFrame &next_frame) {
 
   // Try to find the unique sequence of (tail) calls which led from next_frame
   // to prev_frame.
-  std::vector<Function *> path;
+  CallSequence path;
   addr_t return_pc = next_reg_ctx_sp->GetPC();
   Target &target = *target_sp.get();
   ModuleList &images = next_frame.CalculateTarget()->GetImages();
@@ -389,14 +399,17 @@ void StackFrameList::SynthesizeTailCallFrames(StackFrame &next_frame) {
                         path, images, log);
 
   // Push synthetic tail call frames.
-  for (Function *callee : llvm::reverse(path)) {
+  for (auto calleeInfo : llvm::reverse(path)) {
+    Function *callee = calleeInfo.first;
     uint32_t frame_idx = m_frames.size();
     uint32_t concrete_frame_idx = next_frame.GetConcreteFrameIndex();
     addr_t cfa = LLDB_INVALID_ADDRESS;
     bool cfa_is_valid = false;
-    addr_t pc =
-        callee->GetAddressRange().GetBaseAddress().GetLoadAddress(&target);
-    constexpr bool behaves_like_zeroth_frame = false;
+    addr_t pc = calleeInfo.second;
+    // We do not want to subtract 1 from this PC, as it's the actual address
+    // of the tail-calling branch instruction. This address is provided by the
+    // compiler via DW_AT_call_pc.
+    constexpr bool behaves_like_zeroth_frame = true;
     SymbolContext sc;
     callee->CalculateSymbolContext(&sc);
     auto synth_frame = std::make_shared<StackFrame>(
@@ -404,7 +417,7 @@ void StackFrameList::SynthesizeTailCallFrames(StackFrame &next_frame) {
         cfa_is_valid, pc, StackFrame::Kind::Artificial,
         behaves_like_zeroth_frame, &sc);
     m_frames.push_back(synth_frame);
-    LLDB_LOG(log, "Pushed frame {0}", callee->GetDisplayName());
+    LLDB_LOG(log, "Pushed frame {0} at {1:x}", callee->GetDisplayName(), pc);
   }
 
   // If any frames were created, adjust next_frame's index.

diff  --git a/lldb/test/API/functionalities/tail_call_frames/unambiguous_sequence/main.cpp b/lldb/test/API/functionalities/tail_call_frames/unambiguous_sequence/main.cpp
index c9ab74074f90..559f8a6d66aa 100644
--- a/lldb/test/API/functionalities/tail_call_frames/unambiguous_sequence/main.cpp
+++ b/lldb/test/API/functionalities/tail_call_frames/unambiguous_sequence/main.cpp
@@ -3,19 +3,28 @@ volatile int x;
 void __attribute__((noinline)) sink() {
   x++; //% self.filecheck("bt", "main.cpp", "-implicit-check-not=artificial")
   // CHECK: frame #0: 0x{{[0-9a-f]+}} a.out`sink() at main.cpp:[[@LINE-1]]:4 [opt]
-  // CHECK-NEXT: frame #1: 0x{{[0-9a-f]+}} a.out`func3{{.*}} [opt] [artificial]
-  // CHECK-NEXT: frame #2: 0x{{[0-9a-f]+}} a.out`func2{{.*}} [opt]
-  // CHECK-NEXT: frame #3: 0x{{[0-9a-f]+}} a.out`func1{{.*}} [opt] [artificial]
+  // CHECK-NEXT: frame #1: 0x{{[0-9a-f]+}} a.out`func3() at main.cpp:14:3 [opt] [artificial]
+  // CHECK-NEXT: frame #2: 0x{{[0-9a-f]+}} a.out`func2() {{.*}} [opt]
+  // CHECK-NEXT: frame #3: 0x{{[0-9a-f]+}} a.out`func1() at main.cpp:23:3 [opt] [artificial]
   // CHECK-NEXT: frame #4: 0x{{[0-9a-f]+}} a.out`main{{.*}} [opt]
 }
 
-void __attribute__((noinline)) func3() { sink(); /* tail */ }
+void __attribute__((noinline)) func3() {
+  x++;
+  sink(); /* tail */
+}
 
-void __attribute__((disable_tail_calls, noinline)) func2() { func3(); /* regular */ }
+void __attribute__((disable_tail_calls, noinline)) func2() {
+  func3(); /* regular */
+}
 
-void __attribute__((noinline)) func1() { func2(); /* tail */ }
+void __attribute__((noinline)) func1() {
+  x++;
+  func2(); /* tail */
+}
 
 int __attribute__((disable_tail_calls)) main() {
+  // DEBUG: self.runCmd("log enable lldb step -f /tmp/lldbstep.log")
   func1(); /* regular */
   return 0;
 }


        


More information about the lldb-commits mailing list