[Lldb-commits] [lldb] r230955 - Fix handling of backslashes in Args parsing

Pavel Labath labath at google.com
Mon Mar 2 04:46:22 PST 2015


Author: labath
Date: Mon Mar  2 06:46:22 2015
New Revision: 230955

URL: http://llvm.org/viewvc/llvm-project?rev=230955&view=rev
Log:
Fix handling of backslashes in Args parsing

Summary:
Presently Args::SetCommandString allows quotes to be escaped with backslash. However, the
backslash itself is not removed from the argument, nor there is a way to escape the backslash
itself. This leads to surprising results:

"a b" c"   -> 'a b', 'c'  # Here we actually have an unterminated quote, but that is ignored
"a b\" c"  -> 'a b\" c'   # We try to escape the quote. That works but the backslash is not removed.
"a b\\" c" -> 'a b\\" c'  # Escaping the backslash has no effect.

This change changes quote handling to be more shell-like:
- single quotes and backquotes are literal and there is no way to escape the closing quote or
  anything else inside;
- inside double quotes you can use backslash to escape the closing quote and another backslash
- outside any quotes, you can use backslash to escape quotes, spaces and itself.

This makes the parsing more consistent with what the user is familiar and increases the
probability that pasting the command line from shell to the "process launch" command "just work".

Reviewers: clayborg

Subscribers: lldb-commits

Differential Revision: http://reviews.llvm.org/D7855

Added:
    lldb/trunk/test/settings/quoting/
    lldb/trunk/test/settings/quoting/Makefile
    lldb/trunk/test/settings/quoting/TestQuoting.py
    lldb/trunk/test/settings/quoting/main.c
Modified:
    lldb/trunk/include/lldb/Interpreter/Args.h
    lldb/trunk/source/Commands/CommandObjectExpression.cpp
    lldb/trunk/source/Commands/CommandObjectPlatform.cpp
    lldb/trunk/source/Commands/CommandObjectWatchpoint.cpp
    lldb/trunk/source/Interpreter/Args.cpp
    lldb/trunk/source/Interpreter/CommandInterpreter.cpp

Modified: lldb/trunk/include/lldb/Interpreter/Args.h
URL: http://llvm.org/viewvc/llvm-project/lldb/trunk/include/lldb/Interpreter/Args.h?rev=230955&r1=230954&r2=230955&view=diff
==============================================================================
--- lldb/trunk/include/lldb/Interpreter/Args.h (original)
+++ lldb/trunk/include/lldb/Interpreter/Args.h Mon Mar  2 06:46:22 2015
@@ -18,6 +18,7 @@
 #include <utility>
 
 // Other libraries and framework includes
+#include "llvm/ADT/StringRef.h"
 // Project includes
 #include "lldb/lldb-private-types.h"
 #include "lldb/lldb-types.h"
@@ -75,11 +76,9 @@ public:
     ///     A NULL terminated command that will be copied and split up
     ///     into arguments.
     ///
-    /// @see Args::SetCommandString(const char *)
+    /// @see Args::SetCommandString(llvm::StringRef)
     //------------------------------------------------------------------
-    Args (const char *command = NULL);
-
-    Args (const char *command, size_t len);
+    Args (llvm::StringRef command = llvm::StringRef());
 
     Args (const Args &rhs);
     
@@ -108,7 +107,7 @@ public:
     /// that can be accessed via the accessor functions.
     ///
     /// @param[in] command
-    ///     A NULL terminated command that will be copied and split up
+    ///     A command StringRef that will be copied and split up
     ///     into arguments.
     ///
     /// @see Args::GetArgumentCount() const
@@ -118,10 +117,7 @@ public:
     /// @see Args::Unshift (const char *)
     //------------------------------------------------------------------
     void
-    SetCommandString (const char *command);
-
-    void
-    SetCommandString (const char *command, size_t len);
+    SetCommandString (llvm::StringRef command);
 
     bool
     GetCommandString (std::string &command) const;
@@ -449,6 +445,9 @@ protected:
 
     void
     UpdateArgvFromArgs ();
+
+    llvm::StringRef
+    ParseSingleArgument (llvm::StringRef command);
 };
 
 } // namespace lldb_private

Modified: lldb/trunk/source/Commands/CommandObjectExpression.cpp
URL: http://llvm.org/viewvc/llvm-project/lldb/trunk/source/Commands/CommandObjectExpression.cpp?rev=230955&r1=230954&r2=230955&view=diff
==============================================================================
--- lldb/trunk/source/Commands/CommandObjectExpression.cpp (original)
+++ lldb/trunk/source/Commands/CommandObjectExpression.cpp Mon Mar  2 06:46:22 2015
@@ -487,7 +487,7 @@ CommandObjectExpression::DoExecute
 
         if (end_options)
         {
-            Args args (command, end_options - command);
+            Args args (llvm::StringRef(command, end_options - command));
             if (!ParseOptions (args, result))
                 return false;
             

Modified: lldb/trunk/source/Commands/CommandObjectPlatform.cpp
URL: http://llvm.org/viewvc/llvm-project/lldb/trunk/source/Commands/CommandObjectPlatform.cpp?rev=230955&r1=230954&r2=230955&view=diff
==============================================================================
--- lldb/trunk/source/Commands/CommandObjectPlatform.cpp (original)
+++ lldb/trunk/source/Commands/CommandObjectPlatform.cpp Mon Mar  2 06:46:22 2015
@@ -2142,7 +2142,7 @@ public:
             
             if (end_options)
             {
-                Args args (raw_command_line, end_options - raw_command_line);
+                Args args (llvm::StringRef(raw_command_line, end_options - raw_command_line));
                 if (!ParseOptions (args, result))
                     return false;
             }

Modified: lldb/trunk/source/Commands/CommandObjectWatchpoint.cpp
URL: http://llvm.org/viewvc/llvm-project/lldb/trunk/source/Commands/CommandObjectWatchpoint.cpp?rev=230955&r1=230954&r2=230955&view=diff
==============================================================================
--- lldb/trunk/source/Commands/CommandObjectWatchpoint.cpp (original)
+++ lldb/trunk/source/Commands/CommandObjectWatchpoint.cpp Mon Mar  2 06:46:22 2015
@@ -1211,7 +1211,7 @@ protected:
             
             if (end_options)
             {
-                Args args (raw_command, end_options - raw_command);
+                Args args (llvm::StringRef(raw_command, end_options - raw_command));
                 if (!ParseOptions (args, result))
                     return false;
                 

Modified: lldb/trunk/source/Interpreter/Args.cpp
URL: http://llvm.org/viewvc/llvm-project/lldb/trunk/source/Interpreter/Args.cpp?rev=230955&r1=230954&r2=230955&view=diff
==============================================================================
--- lldb/trunk/source/Interpreter/Args.cpp (original)
+++ lldb/trunk/source/Interpreter/Args.cpp Mon Mar  2 06:46:22 2015
@@ -33,25 +33,15 @@ using namespace lldb_private;
 //----------------------------------------------------------------------
 // Args constructor
 //----------------------------------------------------------------------
-Args::Args (const char *command) :
+Args::Args (llvm::StringRef command) :
     m_args(),
     m_argv(),
     m_args_quote_char()
 {
-    if (command)
-        SetCommandString (command);
+    SetCommandString (command);
 }
 
 
-Args::Args (const char *command, size_t len) :
-    m_args(),
-    m_argv(),
-    m_args_quote_char()
-{
-    if (command && len)
-        SetCommandString (command, len);
-}
-
 //----------------------------------------------------------------------
 // We have to be very careful on the copy constructor of this class
 // to make sure we copy all of the string values, but we can't copy the
@@ -146,214 +136,159 @@ Args::GetQuotedCommandString (std::strin
     return argc > 0;
 }
 
-void
-Args::SetCommandString (const char *command, size_t len)
+// A helper function for argument parsing.
+// Parses the initial part of the first argument using normal double quote rules:
+// backslash escapes the double quote and itself. The parsed string is appended to the second
+// argument. The function returns the unparsed portion of the string, starting at the closing
+// quote.
+static llvm::StringRef
+ParseDoubleQuotes(llvm::StringRef quoted, std::string &result)
 {
-    // Use std::string to make sure we get a NULL terminated string we can use
-    // as "command" could point to a string within a large string....
-    std::string null_terminated_command(command, len);
-    SetCommandString(null_terminated_command.c_str());
+    // Inside double quotes, '\' and '"' are special.
+    static const char *k_escapable_characters = "\"\\";
+    while (true)
+    {
+        // Skip over over regular characters and append them.
+        size_t regular = quoted.find_first_of(k_escapable_characters);
+        result += quoted.substr(0, regular);
+        quoted = quoted.substr(regular);
+
+        // If we have reached the end of string or the closing quote, we're done.
+        if (quoted.empty() || quoted.front() == '"')
+            break;
+
+        // We have found a backslash.
+        quoted = quoted.drop_front();
+
+        if (quoted.empty())
+        {
+            // A lone backslash at the end of string, let's just append it.
+            result += '\\';
+            break;
+        }
+
+        // If the character after the backslash is not a whitelisted escapable character, we
+        // leave the character sequence untouched.
+        if (strchr(k_escapable_characters, quoted.front()) == nullptr)
+            result += '\\';
+
+        result += quoted.front();
+        quoted = quoted.drop_front();
+    }
+
+    return quoted;
 }
 
-void
-Args::SetCommandString (const char *command)
+// A helper function for SetCommandString.
+// Parses a single argument from the command string, processing quotes and backslashes in a
+// shell-like manner. The parsed argument is appended to the m_args array. The function returns
+// the unparsed portion of the string, starting at the first unqouted, unescaped whitespace
+// character.
+llvm::StringRef
+Args::ParseSingleArgument(llvm::StringRef command)
 {
-    m_args.clear();
-    m_argv.clear();
-    m_args_quote_char.clear();
+    // Argument can be split into multiple discontiguous pieces,
+    // for example:
+    //  "Hello ""World"
+    // this would result in a single argument "Hello World" (without/
+    // the quotes) since the quotes would be removed and there is
+    // not space between the strings.
+
+    std::string arg;
+
+    // Since we can have multiple quotes that form a single command
+    // in a command like: "Hello "world'!' (which will make a single
+    // argument "Hello world!") we remember the first quote character
+    // we encounter and use that for the quote character.
+    char first_quote_char = '\0';
 
-    if (command && command[0])
+    bool arg_complete = false;
+    do
     {
-        static const char *k_space_separators = " \t";
-        static const char *k_escapable_characters = " \t\\'\"";
-        const char *arg_end = nullptr;
-        const char *arg_pos;
-        for (arg_pos = command;
-             arg_pos && arg_pos[0];
-             arg_pos = arg_end)
+        // Skip over over regular characters and append them.
+        size_t regular = command.find_first_of(" \t\"'`\\");
+        arg += command.substr(0, regular);
+        command = command.substr(regular);
+
+        if (command.empty())
+            break;
+
+        char special = command.front();
+        command = command.drop_front();
+        switch (special)
         {
-            // Skip any leading space separators
-            const char *arg_start = ::strspn (arg_pos, k_space_separators) + arg_pos;
-            
-            // If there were only space separators to the end of the line, then
-            // we're done.
-            if (*arg_start == '\0')
+        case '\\':
+            if (command.empty())
+            {
+                arg += '\\';
                 break;
+            }
 
-            // Arguments can be split into multiple discontiguous pieces,
-            // for example:
-            //  "Hello ""World"
-            // this would result in a single argument "Hello World" (without/
-            // the quotes) since the quotes would be removed and there is 
-            // not space between the strings. So we need to keep track of the
-            // current start of each argument piece in "arg_piece_start"
-            const char *arg_piece_start = arg_start;
-            arg_pos = arg_piece_start;
-
-            std::string arg;
-            // Since we can have multiple quotes that form a single command
-            // in a command like: "Hello "world'!' (which will make a single
-            // argument "Hello world!") we remember the first quote character
-            // we encounter and use that for the quote character.
-            char first_quote_char = '\0';
-            char quote_char = '\0';
-            bool arg_complete = false;
-
-            do
+            // If the character after the backslash is not a whitelisted escapable character, we
+            // leave the character sequence untouched.
+            static const char *k_escapable_characters = " \t\\'\"`";
+            if (strchr(k_escapable_characters, command.front()) == nullptr)
+                arg += '\\';
+
+            arg += command.front();
+            command.drop_front();
+
+            break;
+
+        case ' ':
+        case '\t':
+            // We are not inside any quotes, we just found a space after an
+            // argument. We are done.
+            arg_complete = true;
+            break;
+
+        case '"':
+        case '\'':
+        case '`':
+            // We found the start of a quote scope.
+            if (first_quote_char == '\0')
+                first_quote_char = special;
+
+            if (special == '"')
+                command = ParseDoubleQuotes(command, arg);
+            else
             {
-                arg_end = ::strcspn (arg_pos, k_escapable_characters) + arg_pos;
-
-                switch (arg_end[0])
-                {
-                default:
-                    assert (!"Unhandled case statement, we must handle this...");
-                    break;
-
-                case '\0':
-                    // End of C string
-                    if (arg_piece_start && arg_piece_start[0])
-                        arg.append (arg_piece_start);
-                    arg_complete = true;
-                    break;
-                case '\\':
-                    // Backslash character
-                    switch (arg_end[1])
-                    {
-                        case '\0':
-                            arg.append (arg_piece_start);
-                            ++arg_end;
-                            arg_complete = true;
-                            break;
-
-                        default:
-                            // Only consider this two-character sequence an escape sequence if we're unquoted and
-                            // the character after the backslash is a whitelisted escapable character.  Otherwise
-                            // leave the character sequence untouched.
-                            if (quote_char == '\0' && (nullptr != strchr(k_escapable_characters, arg_end[1])))
-                            {
-                                arg.append (arg_piece_start, arg_end - arg_piece_start);
-                                arg.append (arg_end + 1, 1);
-                                arg_pos = arg_end + 2;
-                                arg_piece_start = arg_pos;
-                            }
-                            else
-                                arg_pos = arg_end + 2;
-                            break;
-                    }
-                    break;
-                case '"':
-                case '\'':
-                case '`':
-                    // Quote characters 
-                    if (quote_char)
-                    {
-                        // We found a quote character while inside a quoted
-                        // character argument. If it matches our current quote
-                        // character, this ends the effect of the quotes. If it
-                        // doesn't we ignore it.
-                        if (quote_char == arg_end[0])
-                        {
-                            arg.append (arg_piece_start, arg_end - arg_piece_start);
-                            // Clear the quote character and let parsing
-                            // continue (we need to watch for things like:
-                            // "Hello ""World"
-                            // "Hello "World
-                            // "Hello "'World'
-                            // All of which will result in a single argument "Hello World"
-                            quote_char = '\0'; // Note that we are no longer inside quotes
-                            arg_pos = arg_end + 1; // Skip the quote character
-                            arg_piece_start = arg_pos; // Note we are starting from later in the string
-                        }
-                        else
-                        {
-                            // different quote, skip it and keep going
-                            arg_pos = arg_end + 1;
-                        }
-                    }
-                    else
-                    {
-                        // We found the start of a quote scope.
-                        // Make sure there isn't a string that precedes
-                        // the start of a quote scope like:
-                        // Hello" World"
-                        // If so, then add the "Hello" to the arg
-                        if (arg_end > arg_piece_start)
-                            arg.append (arg_piece_start, arg_end - arg_piece_start);
-                            
-                        // Enter into a quote scope
-                        quote_char = arg_end[0];
-                        
-                        if (first_quote_char == '\0')
-                            first_quote_char = quote_char;
-
-                        arg_pos = arg_end;
-                        ++arg_pos;                 // Skip the quote character
-                        arg_piece_start = arg_pos; // Note we are starting from later in the string
-                        
-                        // Skip till the next quote character
-                        const char *end_quote = ::strchr (arg_piece_start, quote_char);
-                        while (end_quote && end_quote[-1] == '\\')
-                        {
-                            // Don't skip the quote character if it is 
-                            // preceded by a '\' character
-                            end_quote = ::strchr (end_quote + 1, quote_char);
-                        }
-                        
-                        if (end_quote)
-                        {
-                            if (end_quote > arg_piece_start)
-                                arg.append (arg_piece_start, end_quote - arg_piece_start);
-
-                            // If the next character is a space or the end of 
-                            // string, this argument is complete...
-                            if (end_quote[1] == ' ' || end_quote[1] == '\t' || end_quote[1] == '\0')
-                            {
-                                arg_complete = true;
-                                arg_end = end_quote + 1;
-                            }
-                            else
-                            {
-                                arg_pos = end_quote + 1;
-                                arg_piece_start = arg_pos;
-                            }
-                            quote_char = '\0';
-                        }
-                        else
-                        {
-                            // Consume the rest of the string as there was no terminating quote
-                            arg.append(arg_piece_start);
-                            arg_end = arg_piece_start + strlen(arg_piece_start);
-                            arg_complete = true;
-                        }
-                    }
-                    break;
-
-                case ' ':
-                case '\t':
-                    if (quote_char)
-                    {
-                        // We are currently processing a quoted character and found
-                        // a space character, skip any spaces and keep trying to find
-                        // the end of the argument. 
-                        arg_pos = ::strspn (arg_end, k_space_separators) + arg_end;
-                    }
-                    else
-                    {
-                        // We are not inside any quotes, we just found a space after an
-                        // argument
-                        if (arg_end > arg_piece_start)
-                            arg.append (arg_piece_start, arg_end - arg_piece_start);
-                        arg_complete = true;
-                    }
-                    break;
-                }
-            } while (!arg_complete);
+                // For single quotes, we simply skip ahead to the matching quote character
+                // (or the end of the string).
+                size_t quoted = command.find(special);
+                arg += command.substr(0, quoted);
+                command = command.substr(quoted);
+            }
+
+            // If we found a closing quote, skip it.
+            if (! command.empty())
+                command = command.drop_front();
 
-            m_args.push_back(arg);
-            m_args_quote_char.push_back (first_quote_char);
+            break;
         }
-        UpdateArgvFromArgs();
+    } while (!arg_complete);
+
+    m_args.push_back(arg);
+    m_args_quote_char.push_back (first_quote_char);
+    return command;
+}
+
+void
+Args::SetCommandString (llvm::StringRef command)
+{
+    m_args.clear();
+    m_argv.clear();
+    m_args_quote_char.clear();
+
+    static const char *k_space_separators = " \t";
+    command = command.ltrim(k_space_separators);
+    while (!command.empty())
+    {
+        command = ParseSingleArgument(command);
+        command = command.ltrim(k_space_separators);
     }
+
+    UpdateArgvFromArgs();
 }
 
 void

Modified: lldb/trunk/source/Interpreter/CommandInterpreter.cpp
URL: http://llvm.org/viewvc/llvm-project/lldb/trunk/source/Interpreter/CommandInterpreter.cpp?rev=230955&r1=230954&r2=230955&view=diff
==============================================================================
--- lldb/trunk/source/Interpreter/CommandInterpreter.cpp (original)
+++ lldb/trunk/source/Interpreter/CommandInterpreter.cpp Mon Mar  2 06:46:22 2015
@@ -1412,7 +1412,7 @@ CommandInterpreter::BuildAliasResult (co
                                       CommandReturnObject &result)
 {
     CommandObject *alias_cmd_obj = nullptr;
-    Args cmd_args (raw_input_string.c_str());
+    Args cmd_args (raw_input_string);
     alias_cmd_obj = GetCommandObject (alias_name);
     StreamString result_str;
     
@@ -2082,8 +2082,8 @@ CommandInterpreter::HandleCompletion (co
     // We parse the argument up to the cursor, so the last argument in parsed_line is
     // the one containing the cursor, and the cursor is after the last character.
 
-    Args parsed_line(current_line, last_char - current_line);
-    Args partial_parsed_line(current_line, cursor - current_line);
+    Args parsed_line(llvm::StringRef(current_line, last_char - current_line));
+    Args partial_parsed_line(llvm::StringRef(current_line, cursor - current_line));
 
     // Don't complete comments, and if the line we are completing is just the history repeat character, 
     // substitute the appropriate history line.

Added: lldb/trunk/test/settings/quoting/Makefile
URL: http://llvm.org/viewvc/llvm-project/lldb/trunk/test/settings/quoting/Makefile?rev=230955&view=auto
==============================================================================
--- lldb/trunk/test/settings/quoting/Makefile (added)
+++ lldb/trunk/test/settings/quoting/Makefile Mon Mar  2 06:46:22 2015
@@ -0,0 +1,5 @@
+LEVEL = ../../make
+
+C_SOURCES := main.c
+
+include $(LEVEL)/Makefile.rules

Added: lldb/trunk/test/settings/quoting/TestQuoting.py
URL: http://llvm.org/viewvc/llvm-project/lldb/trunk/test/settings/quoting/TestQuoting.py?rev=230955&view=auto
==============================================================================
--- lldb/trunk/test/settings/quoting/TestQuoting.py (added)
+++ lldb/trunk/test/settings/quoting/TestQuoting.py Mon Mar  2 06:46:22 2015
@@ -0,0 +1,82 @@
+"""
+Test quoting of arguments to lldb commands
+"""
+
+import os, time, re
+import unittest2
+import lldb
+from lldbtest import *
+
+class SettingsCommandTestCase(TestBase):
+
+    mydir = TestBase.compute_mydir(__file__)
+
+    @classmethod
+    def classCleanup(cls):
+        """Cleanup the test byproducts."""
+        cls.RemoveTempFile("stdout.txt")
+
+    def test_no_quote(self):
+        self.do_test_args("a b c", "a\0b\0c\0")
+
+    def test_single_quote(self):
+        self.do_test_args("'a b c'", "a b c\0")
+
+    def test_double_quote(self):
+        self.do_test_args('"a b c"', "a b c\0")
+
+    def test_double_quote(self):
+        self.do_test_args('"a b c"', 'a b c\0')
+
+    def test_single_quote_escape(self):
+        self.do_test_args("'a b\\' c", "a b\\\0c\0")
+
+    def test_double_quote_escape(self):
+        self.do_test_args('"a b\\" c"', 'a b" c\0')
+
+    def test_double_quote_escape(self):
+        self.do_test_args('"a b\\" c"', 'a b" c\0')
+
+    def test_double_quote_escape2(self):
+        self.do_test_args('"a b\\\\" c', 'a b\\\0c\0')
+
+    def test_double_quote_escape2(self):
+        self.do_test_args('"a b\\\\" c', 'a b\\\0c\0')
+
+    def test_single_in_double(self):
+        self.do_test_args('"a\'b"', "a'b\0")
+
+    def test_double_in_single(self):
+        self.do_test_args("'a\"b'", 'a"b\0')
+
+    def test_combined(self):
+        self.do_test_args('"a b"c\'d e\'', 'a bcd e\0')
+
+    def test_bare_single(self):
+        self.do_test_args("a\\'b", "a'b\0")
+
+    def test_bare_double(self):
+        self.do_test_args('a\\"b', 'a"b\0')
+
+    def do_test_args(self, args_in, args_out):
+        """Test argument parsing. Run the program with args_in. The program dumps its arguments
+        to stdout. Compare the stdout with args_out."""
+        self.buildDefault()
+
+        exe = os.path.join(os.getcwd(), "a.out")
+        self.runCmd("file " + exe, CURRENT_EXECUTABLE_SET)
+
+        self.runCmd("process launch -o stdout.txt -- " + args_in)
+
+        with open('stdout.txt', 'r') as f:
+            output = f.read()
+
+        self.RemoveTempFile("stdout.txt")
+
+        self.assertEqual(output, args_out)
+
+if __name__ == '__main__':
+    import atexit
+    lldb.SBDebugger.Initialize()
+    atexit.register(lambda: lldb.SBDebugger.Terminate())
+    unittest2.main()

Added: lldb/trunk/test/settings/quoting/main.c
URL: http://llvm.org/viewvc/llvm-project/lldb/trunk/test/settings/quoting/main.c?rev=230955&view=auto
==============================================================================
--- lldb/trunk/test/settings/quoting/main.c (added)
+++ lldb/trunk/test/settings/quoting/main.c Mon Mar  2 06:46:22 2015
@@ -0,0 +1,13 @@
+#include <stdio.h>
+#include <string.h>
+
+/* This program writes its arguments (separated by '\0') to stdout. */
+int
+main(int argc, char const *argv[])
+{
+    int i;
+    for (i = 1; i < argc; ++i)
+        fwrite(argv[i], strlen(argv[i])+1, 1, stdout);
+
+    return 0;
+}





More information about the lldb-commits mailing list