[Lldb-commits] [lldb] Reland "[lldb] Parse and display register field enums" (#97258)" (PR #97270)

via lldb-commits lldb-commits at lists.llvm.org
Mon Jul 1 02:46:24 PDT 2024


llvmbot wrote:


<!--LLVM PR SUMMARY COMMENT-->

@llvm/pr-subscribers-lldb

Author: David Spickett (DavidSpickett)

<details>
<summary>Changes</summary>

This reverts commit d9e659c538516036e40330b6a98160cbda4ff100.

I could not reproduce the Mac OS ASAN failure locally but I narrowed it down to the test `test_many_fields_same_enum`. This test shares an enum between x0, which is 64 bit, and cpsr, which is 32 bit.

My theory is that when it does `register read x0`, an enum type is created where the undlerying enumerators are 64 bit, matching the register size.

Then it does `register read cpsr` which used the cached enum type, but this register is 32 bit. This caused lldb to try to read an 8 byte value out of a 4 byte allocation:
READ of size 8 at 0x60200014b874 thread T0
<...>
=>0x60200014b800: fa fa fd fa fa fa fd fa fa fa fd fa fa fa[04]fa

To fix this I've added the register's size in bytes to the constructed enum type's name. This means that x0 uses: __lldb_register_fields_enum_some_enum_8
And cpsr uses:
__lldb_register_fields_enum_some_enum_4

If any other registers use this enum and are read, they will use the cached type as long as their size matches, otherwise we make a new type.

---

Patch is 35.12 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/97270.diff


8 Files Affected:

- (modified) lldb/include/lldb/Target/RegisterFlags.h (+7) 
- (modified) lldb/source/Core/DumpRegisterInfo.cpp (+6-1) 
- (modified) lldb/source/Plugins/Process/gdb-remote/ProcessGDBRemote.cpp (+180-18) 
- (modified) lldb/source/Plugins/Process/gdb-remote/ProcessGDBRemote.h (+5) 
- (modified) lldb/source/Plugins/RegisterTypeBuilder/RegisterTypeBuilderClang.cpp (+42-3) 
- (modified) lldb/source/Target/RegisterFlags.cpp (+10) 
- (modified) lldb/test/API/functionalities/gdb_remote_client/TestXMLRegisterFlags.py (+402) 
- (modified) lldb/unittests/Core/DumpRegisterInfoTest.cpp (+26) 


``````````diff
diff --git a/lldb/include/lldb/Target/RegisterFlags.h b/lldb/include/lldb/Target/RegisterFlags.h
index 1112972cf72e1..1250fd0330958 100644
--- a/lldb/include/lldb/Target/RegisterFlags.h
+++ b/lldb/include/lldb/Target/RegisterFlags.h
@@ -32,10 +32,15 @@ class FieldEnum {
         : m_value(value), m_name(std::move(name)) {}
 
     void ToXML(Stream &strm) const;
+
+    void DumpToLog(Log *log) const;
   };
 
   typedef std::vector<Enumerator> Enumerators;
 
+  // GDB also includes a "size" that is the size of the underlying register.
+  // We will not store that here but instead use the size of the register
+  // this gets attached to when emitting XML.
   FieldEnum(std::string id, const Enumerators &enumerators);
 
   const Enumerators &GetEnumerators() const { return m_enumerators; }
@@ -44,6 +49,8 @@ class FieldEnum {
 
   void ToXML(Stream &strm, unsigned size) const;
 
+  void DumpToLog(Log *log) const;
+
 private:
   std::string m_id;
   Enumerators m_enumerators;
diff --git a/lldb/source/Core/DumpRegisterInfo.cpp b/lldb/source/Core/DumpRegisterInfo.cpp
index 8334795416902..eccc6784cd497 100644
--- a/lldb/source/Core/DumpRegisterInfo.cpp
+++ b/lldb/source/Core/DumpRegisterInfo.cpp
@@ -111,6 +111,11 @@ void lldb_private::DoDumpRegisterInfo(
   };
   DumpList(strm, "    In sets: ", in_sets, emit_set);
 
-  if (flags_type)
+  if (flags_type) {
     strm.Printf("\n\n%s", flags_type->AsTable(terminal_width).c_str());
+
+    std::string enumerators = flags_type->DumpEnums(terminal_width);
+    if (enumerators.size())
+      strm << "\n\n" << enumerators;
+  }
 }
diff --git a/lldb/source/Plugins/Process/gdb-remote/ProcessGDBRemote.cpp b/lldb/source/Plugins/Process/gdb-remote/ProcessGDBRemote.cpp
index 060a30abee83e..604c92369e9a2 100644
--- a/lldb/source/Plugins/Process/gdb-remote/ProcessGDBRemote.cpp
+++ b/lldb/source/Plugins/Process/gdb-remote/ProcessGDBRemote.cpp
@@ -4179,21 +4179,134 @@ struct GdbServerTargetInfo {
   RegisterSetMap reg_set_map;
 };
 
-static std::vector<RegisterFlags::Field> ParseFlagsFields(XMLNode flags_node,
-                                                          unsigned size) {
+static FieldEnum::Enumerators ParseEnumEvalues(const XMLNode &enum_node) {
+  Log *log(GetLog(GDBRLog::Process));
+  // We will use the last instance of each value. Also we preserve the order
+  // of declaration in the XML, as it may not be numerical.
+  // For example, hardware may intially release with two states that softwware
+  // can read from a register field:
+  // 0 = startup, 1 = running
+  // If in a future hardware release, the designers added a pre-startup state:
+  // 0 = startup, 1 = running, 2 = pre-startup
+  // Now it makes more sense to list them in this logical order as opposed to
+  // numerical order:
+  // 2 = pre-startup, 1 = startup, 0 = startup
+  // This only matters for "register info" but let's trust what the server
+  // chose regardless.
+  std::map<uint64_t, FieldEnum::Enumerator> enumerators;
+
+  enum_node.ForEachChildElementWithName(
+      "evalue", [&enumerators, &log](const XMLNode &enumerator_node) {
+        std::optional<llvm::StringRef> name;
+        std::optional<uint64_t> value;
+
+        enumerator_node.ForEachAttribute(
+            [&name, &value, &log](const llvm::StringRef &attr_name,
+                                  const llvm::StringRef &attr_value) {
+              if (attr_name == "name") {
+                if (attr_value.size())
+                  name = attr_value;
+                else
+                  LLDB_LOG(log, "ProcessGDBRemote::ParseEnumEvalues "
+                                "Ignoring empty name in evalue");
+              } else if (attr_name == "value") {
+                uint64_t parsed_value = 0;
+                if (llvm::to_integer(attr_value, parsed_value))
+                  value = parsed_value;
+                else
+                  LLDB_LOG(log,
+                           "ProcessGDBRemote::ParseEnumEvalues "
+                           "Invalid value \"{0}\" in "
+                           "evalue",
+                           attr_value.data());
+              } else
+                LLDB_LOG(log,
+                         "ProcessGDBRemote::ParseEnumEvalues Ignoring "
+                         "unknown attribute "
+                         "\"{0}\" in evalue",
+                         attr_name.data());
+
+              // Keep walking attributes.
+              return true;
+            });
+
+        if (value && name)
+          enumerators.insert_or_assign(
+              *value, FieldEnum::Enumerator(*value, name->str()));
+
+        // Find all evalue elements.
+        return true;
+      });
+
+  FieldEnum::Enumerators final_enumerators;
+  for (auto [_, enumerator] : enumerators)
+    final_enumerators.push_back(enumerator);
+
+  return final_enumerators;
+}
+
+static void
+ParseEnums(XMLNode feature_node,
+           llvm::StringMap<std::unique_ptr<FieldEnum>> &registers_enum_types) {
+  Log *log(GetLog(GDBRLog::Process));
+
+  // The top level element is "<enum...".
+  feature_node.ForEachChildElementWithName(
+      "enum", [log, &registers_enum_types](const XMLNode &enum_node) {
+        std::string id;
+
+        enum_node.ForEachAttribute([&id](const llvm::StringRef &attr_name,
+                                         const llvm::StringRef &attr_value) {
+          if (attr_name == "id")
+            id = attr_value;
+
+          // There is also a "size" attribute that is supposed to be the size in
+          // bytes of the register this applies to. However:
+          // * LLDB doesn't need this information.
+          // * It  is difficult to verify because you have to wait until the
+          //   enum is applied to a field.
+          //
+          // So we will emit this attribute in XML for GDB's sake, but will not
+          // bother ingesting it.
+
+          // Walk all attributes.
+          return true;
+        });
+
+        if (!id.empty()) {
+          FieldEnum::Enumerators enumerators = ParseEnumEvalues(enum_node);
+          if (!enumerators.empty()) {
+            LLDB_LOG(log,
+                     "ProcessGDBRemote::ParseEnums Found enum type \"{0}\"",
+                     id);
+            registers_enum_types.insert_or_assign(
+                id, std::make_unique<FieldEnum>(id, enumerators));
+          }
+        }
+
+        // Find all <enum> elements.
+        return true;
+      });
+}
+
+static std::vector<RegisterFlags::Field> ParseFlagsFields(
+    XMLNode flags_node, unsigned size,
+    const llvm::StringMap<std::unique_ptr<FieldEnum>> &registers_enum_types) {
   Log *log(GetLog(GDBRLog::Process));
   const unsigned max_start_bit = size * 8 - 1;
 
   // Process the fields of this set of flags.
   std::vector<RegisterFlags::Field> fields;
-  flags_node.ForEachChildElementWithName("field", [&fields, max_start_bit,
-                                                   &log](const XMLNode
-                                                             &field_node) {
+  flags_node.ForEachChildElementWithName("field", [&fields, max_start_bit, &log,
+                                                   &registers_enum_types](
+                                                      const XMLNode
+                                                          &field_node) {
     std::optional<llvm::StringRef> name;
     std::optional<unsigned> start;
     std::optional<unsigned> end;
+    std::optional<llvm::StringRef> type;
 
-    field_node.ForEachAttribute([&name, &start, &end, max_start_bit,
+    field_node.ForEachAttribute([&name, &start, &end, &type, max_start_bit,
                                  &log](const llvm::StringRef &attr_name,
                                        const llvm::StringRef &attr_value) {
       // Note that XML in general requires that each of these attributes only
@@ -4240,8 +4353,7 @@ static std::vector<RegisterFlags::Field> ParseFlagsFields(XMLNode flags_node,
                    attr_value.data());
         }
       } else if (attr_name == "type") {
-        // Type is a known attribute but we do not currently use it and it is
-        // not required.
+        type = attr_value;
       } else {
         LLDB_LOG(
             log,
@@ -4254,14 +4366,55 @@ static std::vector<RegisterFlags::Field> ParseFlagsFields(XMLNode flags_node,
     });
 
     if (name && start && end) {
-      if (*start > *end) {
+      if (*start > *end)
         LLDB_LOG(
             log,
             "ProcessGDBRemote::ParseFlagsFields Start {0} > end {1} in field "
             "\"{2}\", ignoring",
             *start, *end, name->data());
-      } else {
-        fields.push_back(RegisterFlags::Field(name->str(), *start, *end));
+      else {
+        if (RegisterFlags::Field::GetSizeInBits(*start, *end) > 64)
+          LLDB_LOG(log,
+                   "ProcessGDBRemote::ParseFlagsFields Ignoring field \"{2}\" "
+                   "that has "
+                   "size > 64 bits, this is not supported",
+                   name->data());
+        else {
+          // A field's type may be set to the name of an enum type.
+          const FieldEnum *enum_type = nullptr;
+          if (type && !type->empty()) {
+            auto found = registers_enum_types.find(*type);
+            if (found != registers_enum_types.end()) {
+              enum_type = found->second.get();
+
+              // No enumerator can exceed the range of the field itself.
+              uint64_t max_value =
+                  RegisterFlags::Field::GetMaxValue(*start, *end);
+              for (const auto &enumerator : enum_type->GetEnumerators()) {
+                if (enumerator.m_value > max_value) {
+                  enum_type = nullptr;
+                  LLDB_LOG(
+                      log,
+                      "ProcessGDBRemote::ParseFlagsFields In enum \"{0}\" "
+                      "evalue \"{1}\" with value {2} exceeds the maximum value "
+                      "of field \"{3}\" ({4}), ignoring enum",
+                      type->data(), enumerator.m_name, enumerator.m_value,
+                      name->data(), max_value);
+                  break;
+                }
+              }
+            } else {
+              LLDB_LOG(log,
+                       "ProcessGDBRemote::ParseFlagsFields Could not find type "
+                       "\"{0}\" "
+                       "for field \"{1}\", ignoring",
+                       type->data(), name->data());
+            }
+          }
+
+          fields.push_back(
+              RegisterFlags::Field(name->str(), *start, *end, enum_type));
+        }
       }
     }
 
@@ -4272,12 +4425,14 @@ static std::vector<RegisterFlags::Field> ParseFlagsFields(XMLNode flags_node,
 
 void ParseFlags(
     XMLNode feature_node,
-    llvm::StringMap<std::unique_ptr<RegisterFlags>> &registers_flags_types) {
+    llvm::StringMap<std::unique_ptr<RegisterFlags>> &registers_flags_types,
+    const llvm::StringMap<std::unique_ptr<FieldEnum>> &registers_enum_types) {
   Log *log(GetLog(GDBRLog::Process));
 
   feature_node.ForEachChildElementWithName(
       "flags",
-      [&log, &registers_flags_types](const XMLNode &flags_node) -> bool {
+      [&log, &registers_flags_types,
+       &registers_enum_types](const XMLNode &flags_node) -> bool {
         LLDB_LOG(log, "ProcessGDBRemote::ParseFlags Found flags node \"{0}\"",
                  flags_node.GetAttributeValue("id").c_str());
 
@@ -4310,7 +4465,7 @@ void ParseFlags(
         if (id && size) {
           // Process the fields of this set of flags.
           std::vector<RegisterFlags::Field> fields =
-              ParseFlagsFields(flags_node, *size);
+              ParseFlagsFields(flags_node, *size, registers_enum_types);
           if (fields.size()) {
             // Sort so that the fields with the MSBs are first.
             std::sort(fields.rbegin(), fields.rend());
@@ -4375,13 +4530,19 @@ void ParseFlags(
 bool ParseRegisters(
     XMLNode feature_node, GdbServerTargetInfo &target_info,
     std::vector<DynamicRegisterInfo::Register> &registers,
-    llvm::StringMap<std::unique_ptr<RegisterFlags>> &registers_flags_types) {
+    llvm::StringMap<std::unique_ptr<RegisterFlags>> &registers_flags_types,
+    llvm::StringMap<std::unique_ptr<FieldEnum>> &registers_enum_types) {
   if (!feature_node)
     return false;
 
   Log *log(GetLog(GDBRLog::Process));
 
-  ParseFlags(feature_node, registers_flags_types);
+  // Enums first because they are referenced by fields in the flags.
+  ParseEnums(feature_node, registers_enum_types);
+  for (const auto &enum_type : registers_enum_types)
+    enum_type.second->DumpToLog(log);
+
+  ParseFlags(feature_node, registers_flags_types, registers_enum_types);
   for (const auto &flags : registers_flags_types)
     flags.second->DumpToLog(log);
 
@@ -4643,7 +4804,7 @@ bool ProcessGDBRemote::GetGDBServerRegisterInfoXMLAndProcess(
     if (arch_to_use.IsValid()) {
       for (auto &feature_node : feature_nodes) {
         ParseRegisters(feature_node, target_info, registers,
-                       m_registers_flags_types);
+                       m_registers_flags_types, m_registers_enum_types);
       }
 
       for (const auto &include : target_info.includes) {
@@ -4708,13 +4869,14 @@ bool ProcessGDBRemote::GetGDBServerRegisterInfo(ArchSpec &arch_to_use) {
   if (!m_gdb_comm.GetQXferFeaturesReadSupported())
     return false;
 
-  // This holds register flags information for the whole of target.xml.
+  // These hold register type information for the whole of target.xml.
   // target.xml may include further documents that
   // GetGDBServerRegisterInfoXMLAndProcess will recurse to fetch and process.
   // That's why we clear the cache here, and not in
   // GetGDBServerRegisterInfoXMLAndProcess. To prevent it being cleared on every
   // include read.
   m_registers_flags_types.clear();
+  m_registers_enum_types.clear();
   std::vector<DynamicRegisterInfo::Register> registers;
   if (GetGDBServerRegisterInfoXMLAndProcess(arch_to_use, "target.xml",
                                             registers) &&
diff --git a/lldb/source/Plugins/Process/gdb-remote/ProcessGDBRemote.h b/lldb/source/Plugins/Process/gdb-remote/ProcessGDBRemote.h
index 610a1ee0b34d2..b44ffefcd0d36 100644
--- a/lldb/source/Plugins/Process/gdb-remote/ProcessGDBRemote.h
+++ b/lldb/source/Plugins/Process/gdb-remote/ProcessGDBRemote.h
@@ -484,6 +484,11 @@ class ProcessGDBRemote : public Process,
   // entries are added. Which would invalidate any pointers set in the register
   // info up to that point.
   llvm::StringMap<std::unique_ptr<RegisterFlags>> m_registers_flags_types;
+
+  // Enum types are referenced by register fields. This does not store the data
+  // directly because the map may reallocate. Pointers to these are contained
+  // within instances of RegisterFlags.
+  llvm::StringMap<std::unique_ptr<FieldEnum>> m_registers_enum_types;
 };
 
 } // namespace process_gdb_remote
diff --git a/lldb/source/Plugins/RegisterTypeBuilder/RegisterTypeBuilderClang.cpp b/lldb/source/Plugins/RegisterTypeBuilder/RegisterTypeBuilderClang.cpp
index 067768537c068..1ecde7bee5820 100644
--- a/lldb/source/Plugins/RegisterTypeBuilder/RegisterTypeBuilderClang.cpp
+++ b/lldb/source/Plugins/RegisterTypeBuilder/RegisterTypeBuilderClang.cpp
@@ -43,8 +43,7 @@ CompilerType RegisterTypeBuilderClang::GetRegisterType(
       ScratchTypeSystemClang::GetForTarget(m_target);
   assert(type_system);
 
-  std::string register_type_name = "__lldb_register_fields_";
-  register_type_name += name;
+  std::string register_type_name = "__lldb_register_fields_" + name;
   // See if we have made this type before and can reuse it.
   CompilerType fields_type =
       type_system->GetTypeForIdentifier<clang::CXXRecordDecl>(
@@ -67,8 +66,48 @@ CompilerType RegisterTypeBuilderClang::GetRegisterType(
     // We assume that RegisterFlags has padded and sorted the fields
     // already.
     for (const RegisterFlags::Field &field : flags.GetFields()) {
+      CompilerType field_type = field_uint_type;
+
+      if (const FieldEnum *enum_type = field.GetEnum()) {
+        const FieldEnum::Enumerators &enumerators = enum_type->GetEnumerators();
+        if (!enumerators.empty()) {
+          // Enums can be used by many registers and the size of each register
+          // may be different. The register size is used as the underlying size
+          // of the enumerators, so we must make one enum type per register size
+          // it is used with.
+          std::string enum_type_name = "__lldb_register_fields_enum_" +
+                                       enum_type->GetID() + "_" +
+                                       std::to_string(byte_size);
+
+          // Enums can be used by mutiple fields and multiple registers, so we
+          // may have built this one already.
+          CompilerType field_enum_type =
+              type_system->GetTypeForIdentifier<clang::EnumDecl>(
+                  enum_type_name);
+
+          if (field_enum_type)
+            field_type = field_enum_type;
+          else {
+            field_type = type_system->CreateEnumerationType(
+                enum_type_name, type_system->GetTranslationUnitDecl(),
+                OptionalClangModuleID(), Declaration(), field_uint_type, false);
+
+            type_system->StartTagDeclarationDefinition(field_type);
+
+            Declaration decl;
+            for (auto enumerator : enumerators) {
+              type_system->AddEnumerationValueToEnumerationType(
+                  field_type, decl, enumerator.m_name.c_str(),
+                  enumerator.m_value, byte_size * 8);
+            }
+
+            type_system->CompleteTagDeclarationDefinition(field_type);
+          }
+        }
+      }
+
       type_system->AddFieldToRecordType(fields_type, field.GetName(),
-                                        field_uint_type, lldb::eAccessPublic,
+                                        field_type, lldb::eAccessPublic,
                                         field.GetSizeInBits());
     }
 
diff --git a/lldb/source/Target/RegisterFlags.cpp b/lldb/source/Target/RegisterFlags.cpp
index 476150251221a..976e03870ad9e 100644
--- a/lldb/source/Target/RegisterFlags.cpp
+++ b/lldb/source/Target/RegisterFlags.cpp
@@ -366,6 +366,16 @@ void FieldEnum::Enumerator::ToXML(Stream &strm) const {
               escaped_name.c_str(), m_value);
 }
 
+void FieldEnum::Enumerator::DumpToLog(Log *log) const {
+  LLDB_LOG(log, "  Name: \"{0}\" Value: {1}", m_name.c_str(), m_value);
+}
+
+void FieldEnum::DumpToLog(Log *log) const {
+  LLDB_LOG(log, "ID: \"{0}\"", m_id.c_str());
+  for (const auto &enumerator : GetEnumerators())
+    enumerator.DumpToLog(log);
+}
+
 void RegisterFlags::ToXML(Stream &strm) const {
   // Example XML:
   // <flags id="cpsr_flags" size="4">
diff --git a/lldb/test/API/functionalities/gdb_remote_client/TestXMLRegisterFlags.py b/lldb/test/API/functionalities/gdb_remote_client/TestXMLRegisterFlags.py
index e2c75970c2d2e..2dbb2b5f5e3a9 100644
--- a/lldb/test/API/functionalities/gdb_remote_client/TestXMLRegisterFlags.py
+++ b/lldb/test/API/functionalities/gdb_remote_client/TestXMLRegisterFlags.py
@@ -654,3 +654,405 @@ def test_flags_name_xml_reserved_characters(self):
             "register info cpsr",
             substrs=["| A< | B> | C' | D\" | E& |"],
         )
+
+    @skipIfXmlSupportMissing
+    @skipIfRemote
+    def test_no_enum(self):
+        """Check that lldb does not try to print an enum when there isn't one."""
+
+        self.setup_flags_test('<field name="E" start="0" end="0">' "</field>")
+
+        self.expect("register info cpsr", patterns=["E:.*$"], matching=False)
+
+    @skipIfXmlSupportMissing
+    @skipIfRemote
+    def test_enum_type_not_found(self):
+        """Check that lldb uses the default format if we don't find the enum type."""
+        self.setup_register_test(
+            """\
+          <flags id="cpsr_flags" size="4">
+            <field name="E" start="0" end="0" type="some_enum"/>
+          </flags>
+          <reg name="pc" bitsize="64"/>
+          <reg name="x0" regnum="0" bitsize="64"/>
+          <reg name="cpsr" regnum="33" bitsize="32" type="cpsr_flags"/>"""
+        )
+
+        self.expect("register read cpsr", patterns=["\(E = 1\)$"])
+
+    @skipIfXmlSupportMissing
+    @skipIfRemote
+    def test_enum_duplicated_evalue(self):
+        """Check that lldb only uses the last insta...
[truncated]

``````````

</details>


https://github.com/llvm/llvm-project/pull/97270


More information about the lldb-commits mailing list