[Lldb-commits] [lldb] 2a6dbed - [lldb] Fix (unintentional) recursion in CommandObjectRegexCommand

Jonas Devlieghere via lldb-commits lldb-commits at lists.llvm.org
Wed Feb 23 12:34:24 PST 2022


Author: Jonas Devlieghere
Date: 2022-02-23T12:34:14-08:00
New Revision: 2a6dbedf5a923512ba8b5c2d192b5fc8bb1210fd

URL: https://github.com/llvm/llvm-project/commit/2a6dbedf5a923512ba8b5c2d192b5fc8bb1210fd
DIFF: https://github.com/llvm/llvm-project/commit/2a6dbedf5a923512ba8b5c2d192b5fc8bb1210fd.diff

LOG: [lldb] Fix (unintentional) recursion in CommandObjectRegexCommand

Jim noticed that the regex command is unintentionally recursive. Let's
use the following command regex as an example:

  (lldb) com regex humm 's/([^ ]+) ([^ ]+)/p %1 %2 %1 %2/'

If we call it with arguments foo bar, thing behave as expected:

  (lldb) humm foo bar
  (...)
  foo bar foo bar

However, if we include %2 in the arguments, things break down:

  (lldb) humm fo%2o bar
  (...)
  fobaro bar fobaro bar

The problem is that the implementation of the substitution is too naive.
It substitutes the %1 token into the target template in place, then does
the %2 substitution starting with the resultant string. So if the
previous substitution introduced a %2 token, it would get processed in
the second sweep, etc.

This patch addresses the issue by walking the command once and
substituting the % variables in place.

  (lldb) humm fo%2o bar
  (...)
  fo%2o bar fo%2o bar

Furthermore, this patch also reports an error if not enough variables
were provided and add support for substituting %0.

rdar://81236994

Differential revision: https://reviews.llvm.org/D120101

Added: 
    lldb/unittests/Interpreter/TestRegexCommand.cpp

Modified: 
    lldb/include/lldb/Interpreter/CommandReturnObject.h
    lldb/source/Commands/CommandObjectRegexCommand.cpp
    lldb/source/Commands/CommandObjectRegexCommand.h
    lldb/source/Interpreter/CommandReturnObject.cpp
    lldb/unittests/Interpreter/CMakeLists.txt

Removed: 
    


################################################################################
diff  --git a/lldb/include/lldb/Interpreter/CommandReturnObject.h b/lldb/include/lldb/Interpreter/CommandReturnObject.h
index 72518a902144b..2177e721f9a40 100644
--- a/lldb/include/lldb/Interpreter/CommandReturnObject.h
+++ b/lldb/include/lldb/Interpreter/CommandReturnObject.h
@@ -15,6 +15,7 @@
 #include "lldb/lldb-private.h"
 
 #include "llvm/ADT/StringRef.h"
+#include "llvm/Support/Error.h"
 #include "llvm/Support/FormatVariadic.h"
 #include "llvm/Support/WithColor.h"
 
@@ -132,6 +133,8 @@ class CommandReturnObject {
 
   void SetError(const Status &error, const char *fallback_error_cstr = nullptr);
 
+  void SetError(llvm::Error error);
+
   lldb::ReturnStatus GetStatus() const;
 
   void SetStatus(lldb::ReturnStatus status);

diff  --git a/lldb/source/Commands/CommandObjectRegexCommand.cpp b/lldb/source/Commands/CommandObjectRegexCommand.cpp
index 7ddc5c0c7e083..a99a9e760cb26 100644
--- a/lldb/source/Commands/CommandObjectRegexCommand.cpp
+++ b/lldb/source/Commands/CommandObjectRegexCommand.cpp
@@ -10,6 +10,9 @@
 #include "lldb/Interpreter/CommandInterpreter.h"
 #include "lldb/Interpreter/CommandReturnObject.h"
 
+#include "llvm/Support/Errc.h"
+#include "llvm/Support/Error.h"
+
 using namespace lldb;
 using namespace lldb_private;
 
@@ -25,35 +28,53 @@ CommandObjectRegexCommand::CommandObjectRegexCommand(
 // Destructor
 CommandObjectRegexCommand::~CommandObjectRegexCommand() = default;
 
+llvm::Expected<std::string> CommandObjectRegexCommand::SubstituteVariables(
+    llvm::StringRef input,
+    const llvm::SmallVectorImpl<llvm::StringRef> &replacements) {
+  std::string buffer;
+  llvm::raw_string_ostream output(buffer);
+
+  llvm::SmallVector<llvm::StringRef, 4> parts;
+  input.split(parts, '%');
+
+  output << parts[0];
+  for (llvm::StringRef part : drop_begin(parts)) {
+    size_t idx = 0;
+    if (part.consumeInteger(10, idx))
+      output << '%';
+    else if (idx < replacements.size())
+      output << replacements[idx];
+    else
+      return llvm::make_error<llvm::StringError>(
+          llvm::formatv("%{0} is out of range: not enough arguments specified",
+                        idx),
+          llvm::errc::invalid_argument);
+    output << part;
+  }
+
+  return output.str();
+}
+
 bool CommandObjectRegexCommand::DoExecute(llvm::StringRef command,
                                           CommandReturnObject &result) {
   EntryCollection::const_iterator pos, end = m_entries.end();
   for (pos = m_entries.begin(); pos != end; ++pos) {
     llvm::SmallVector<llvm::StringRef, 4> matches;
     if (pos->regex.Execute(command, &matches)) {
-      std::string new_command(pos->command);
-      char percent_var[8];
-      size_t idx, percent_var_idx;
-      for (uint32_t match_idx = 1; match_idx <= m_max_matches; ++match_idx) {
-        if (match_idx < matches.size()) {
-          const std::string match_str = matches[match_idx].str();
-          const int percent_var_len =
-              ::snprintf(percent_var, sizeof(percent_var), "%%%u", match_idx);
-          for (idx = 0; (percent_var_idx = new_command.find(
-                             percent_var, idx)) != std::string::npos;) {
-            new_command.erase(percent_var_idx, percent_var_len);
-            new_command.insert(percent_var_idx, match_str);
-            idx = percent_var_idx + match_str.size();
-          }
-        }
+      llvm::Expected<std::string> new_command =
+          SubstituteVariables(pos->command, matches);
+      if (!new_command) {
+        result.SetError(new_command.takeError());
+        return false;
       }
+
       // Interpret the new command and return this as the result!
       if (m_interpreter.GetExpandRegexAliases())
-        result.GetOutputStream().Printf("%s\n", new_command.c_str());
+        result.GetOutputStream().Printf("%s\n", new_command->c_str());
       // Pass in true for "no context switching".  The command that called us
       // should have set up the context appropriately, we shouldn't have to
       // redo that.
-      return m_interpreter.HandleCommand(new_command.c_str(),
+      return m_interpreter.HandleCommand(new_command->c_str(),
                                          eLazyBoolCalculate, result);
     }
   }
@@ -61,10 +82,10 @@ bool CommandObjectRegexCommand::DoExecute(llvm::StringRef command,
   if (!GetSyntax().empty())
     result.AppendError(GetSyntax());
   else
-    result.GetOutputStream() << "Command contents '" << command
-                             << "' failed to match any "
-                                "regular expression in the '"
-                             << m_cmd_name << "' regex ";
+    result.GetErrorStream() << "Command contents '" << command
+                            << "' failed to match any "
+                               "regular expression in the '"
+                            << m_cmd_name << "' regex ";
   return false;
 }
 

diff  --git a/lldb/source/Commands/CommandObjectRegexCommand.h b/lldb/source/Commands/CommandObjectRegexCommand.h
index 2f65c2cd815d0..b2a375c63a764 100644
--- a/lldb/source/Commands/CommandObjectRegexCommand.h
+++ b/lldb/source/Commands/CommandObjectRegexCommand.h
@@ -39,6 +39,11 @@ class CommandObjectRegexCommand : public CommandObjectRaw {
 protected:
   bool DoExecute(llvm::StringRef command, CommandReturnObject &result) override;
 
+  /// Substitute variables of the format %\d+ in the input string.
+  static llvm::Expected<std::string> SubstituteVariables(
+      llvm::StringRef input,
+      const llvm::SmallVectorImpl<llvm::StringRef> &replacements);
+
   struct Entry {
     RegularExpression regex;
     std::string command;

diff  --git a/lldb/source/Interpreter/CommandReturnObject.cpp b/lldb/source/Interpreter/CommandReturnObject.cpp
index 1b1e6996764c9..798dced0f1c6d 100644
--- a/lldb/source/Interpreter/CommandReturnObject.cpp
+++ b/lldb/source/Interpreter/CommandReturnObject.cpp
@@ -109,6 +109,11 @@ void CommandReturnObject::SetError(const Status &error,
   AppendError(error.AsCString(fallback_error_cstr));
 }
 
+void CommandReturnObject::SetError(llvm::Error error) {
+  if (error)
+    AppendError(llvm::toString(std::move(error)));
+}
+
 // Similar to AppendError, but do not prepend 'Status: ' to message, and don't
 // append "\n" to the end of it.
 

diff  --git a/lldb/unittests/Interpreter/CMakeLists.txt b/lldb/unittests/Interpreter/CMakeLists.txt
index 2080ce9085400..5b5268ffe9732 100644
--- a/lldb/unittests/Interpreter/CMakeLists.txt
+++ b/lldb/unittests/Interpreter/CMakeLists.txt
@@ -4,6 +4,7 @@ add_lldb_unittest(InterpreterTests
   TestOptionArgParser.cpp
   TestOptionValue.cpp
   TestOptionValueFileColonLine.cpp
+  TestRegexCommand.cpp
 
   LINK_LIBS
       lldbCore
@@ -14,4 +15,5 @@ add_lldb_unittest(InterpreterTests
       lldbUtilityHelpers
       lldbInterpreter
       lldbPluginPlatformMacOSX
+      LLVMTestingSupport
 )

diff  --git a/lldb/unittests/Interpreter/TestRegexCommand.cpp b/lldb/unittests/Interpreter/TestRegexCommand.cpp
new file mode 100644
index 0000000000000..ee30faab4814b
--- /dev/null
+++ b/lldb/unittests/Interpreter/TestRegexCommand.cpp
@@ -0,0 +1,68 @@
+//===-- TestRegexCommand.cpp ----------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "Commands/CommandObjectRegexCommand.h"
+#include "llvm/Testing/Support/Error.h"
+#include "gtest/gtest.h"
+
+using namespace lldb_private;
+using namespace lldb;
+
+namespace {
+class TestRegexCommand : public CommandObjectRegexCommand {
+public:
+  using CommandObjectRegexCommand::SubstituteVariables;
+
+  static std::string
+  Substitute(llvm::StringRef input,
+             const llvm::SmallVectorImpl<llvm::StringRef> &replacements) {
+    llvm::Expected<std::string> str = SubstituteVariables(input, replacements);
+    if (!str)
+      return llvm::toString(str.takeError());
+    return *str;
+  }
+};
+} // namespace
+
+TEST(RegexCommandTest, SubstituteVariablesSuccess) {
+  const llvm::SmallVector<llvm::StringRef, 4> substitutions = {"all", "foo",
+                                                               "bar", "baz"};
+
+  EXPECT_EQ(TestRegexCommand::Substitute("%0", substitutions), "all");
+  EXPECT_EQ(TestRegexCommand::Substitute("%1", substitutions), "foo");
+  EXPECT_EQ(TestRegexCommand::Substitute("%2", substitutions), "bar");
+  EXPECT_EQ(TestRegexCommand::Substitute("%3", substitutions), "baz");
+  EXPECT_EQ(TestRegexCommand::Substitute("%1%2%3", substitutions), "foobarbaz");
+  EXPECT_EQ(TestRegexCommand::Substitute("#%1#%2#%3#", substitutions),
+            "#foo#bar#baz#");
+}
+
+TEST(RegexCommandTest, SubstituteVariablesFailed) {
+  const llvm::SmallVector<llvm::StringRef, 4> substitutions = {"all", "foo",
+                                                               "bar", "baz"};
+
+  ASSERT_THAT_EXPECTED(
+      TestRegexCommand::SubstituteVariables("%1%2%3%4", substitutions),
+      llvm::Failed());
+  ASSERT_THAT_EXPECTED(
+      TestRegexCommand::SubstituteVariables("%5", substitutions),
+      llvm::Failed());
+  ASSERT_THAT_EXPECTED(
+      TestRegexCommand::SubstituteVariables("%11", substitutions),
+      llvm::Failed());
+}
+
+TEST(RegexCommandTest, SubstituteVariablesNoRecursion) {
+  const llvm::SmallVector<llvm::StringRef, 4> substitutions = {"all", "%2",
+                                                               "%3", "%4"};
+  EXPECT_EQ(TestRegexCommand::Substitute("%0", substitutions), "all");
+  EXPECT_EQ(TestRegexCommand::Substitute("%1", substitutions), "%2");
+  EXPECT_EQ(TestRegexCommand::Substitute("%2", substitutions), "%3");
+  EXPECT_EQ(TestRegexCommand::Substitute("%3", substitutions), "%4");
+  EXPECT_EQ(TestRegexCommand::Substitute("%1%2%3", substitutions), "%2%3%4");
+}


        


More information about the lldb-commits mailing list