[Lldb-commits] [lldb] r122166 - in /lldb/trunk: include/lldb/Core/RegularExpression.h source/Core/RegularExpression.cpp source/Core/UserSettingsController.cpp source/Interpreter/Args.cpp

Greg Clayton gclayton at apple.com
Sat Dec 18 19:41:24 PST 2010


Author: gclayton
Date: Sat Dec 18 21:41:24 2010
New Revision: 122166

URL: http://llvm.org/viewvc/llvm-project?rev=122166&view=rev
Log:
Improved our argument parsing abilities to be able to handle stuff more like
a shell would interpret it. A few examples that we now handle correctly

INPUT: "Hello "world
OUTPUT: "Hello World"

INPUT: "Hello "' World'
OUTPUT: "Hello World"

INPUT: Hello" World"
OUTPUT: "Hello World"

This broke the setting of dictionary values for the "settings set" command
for things like:

(lldb) settings set target.process.env-vars ["MY_ENV_VAR"]=YES

since we would drop the quotes. I fixed the user settings controller to use
a regular expression so it can accept any of the following inputs for
dictionary setting:

settings set target.process.env-vars ["MY_ENV_VAR"]=YES
settings set target.process.env-vars [MY_ENV_VAR]=YES
settings set target.process.env-vars MY_ENV_VAR=YES

We might want to eventually drop the first two syntaxes, but I won't make
that decision right now.

This allows more natural setting of the envirorment variables:

settings set target.process.env-vars MY_ENV_VAR=YES ABC=DEF CWD=/tmp





Modified:
    lldb/trunk/include/lldb/Core/RegularExpression.h
    lldb/trunk/source/Core/RegularExpression.cpp
    lldb/trunk/source/Core/UserSettingsController.cpp
    lldb/trunk/source/Interpreter/Args.cpp

Modified: lldb/trunk/include/lldb/Core/RegularExpression.h
URL: http://llvm.org/viewvc/llvm-project/lldb/trunk/include/lldb/Core/RegularExpression.h?rev=122166&r1=122165&r2=122166&view=diff
==============================================================================
--- lldb/trunk/include/lldb/Core/RegularExpression.h (original)
+++ lldb/trunk/include/lldb/Core/RegularExpression.h Sat Dec 18 21:41:24 2010
@@ -101,10 +101,6 @@
     /// @param[in] match_count
     ///     The number of regmatch_t objects in \a match_ptr
     ///
-    /// @param[out] match_ptr
-    ///     A pointer to at least \a match_count regmatch_t objects
-    ///     if \a match_count is non-zero.
-    ///
     /// @param[in] execute_flags
     ///     Flags to pass to the \c regexec() function.
     ///

Modified: lldb/trunk/source/Core/RegularExpression.cpp
URL: http://llvm.org/viewvc/llvm-project/lldb/trunk/source/Core/RegularExpression.cpp?rev=122166&r1=122165&r2=122166&view=diff
==============================================================================
--- lldb/trunk/source/Core/RegularExpression.cpp (original)
+++ lldb/trunk/source/Core/RegularExpression.cpp Sat Dec 18 21:41:24 2010
@@ -100,7 +100,7 @@
         match_result = ::regexec (&m_preg,
                                   s,
                                   m_matches.size(),
-                                  &m_matches.front(),
+                                  &m_matches[0],
                                   execute_flags);
     }
     return match_result == 0;
@@ -111,9 +111,18 @@
 {
     if (idx <= m_preg.re_nsub && idx < m_matches.size())
     {
-        match_str.assign (s + m_matches[idx].rm_so,
-                          m_matches[idx].rm_eo - m_matches[idx].rm_so);
-        return true;
+        if (m_matches[idx].rm_eo == m_matches[idx].rm_so)
+        {
+            // Matched the empty string...
+            match_str.clear();
+            return true;
+        }
+        else if (m_matches[idx].rm_eo > m_matches[idx].rm_so)
+        {
+            match_str.assign (s + m_matches[idx].rm_so,
+                              m_matches[idx].rm_eo - m_matches[idx].rm_so);
+            return true;
+        }
     }
     return false;
 }

Modified: lldb/trunk/source/Core/UserSettingsController.cpp
URL: http://llvm.org/viewvc/llvm-project/lldb/trunk/source/Core/UserSettingsController.cpp?rev=122166&r1=122165&r2=122166&view=diff
==============================================================================
--- lldb/trunk/source/Core/UserSettingsController.cpp (original)
+++ lldb/trunk/source/Core/UserSettingsController.cpp Sat Dec 18 21:41:24 2010
@@ -12,6 +12,7 @@
 
 #include "lldb/Core/UserSettingsController.h"
 #include "lldb/Core/Error.h"
+#include "lldb/Core/RegularExpression.h"
 #include "lldb/Core/Stream.h"
 #include "lldb/Core/StreamString.h"
 #include "lldb/Interpreter/CommandInterpreter.h"
@@ -2102,34 +2103,32 @@
                 }
                 Args args (new_value);
                 size_t num_args = args.GetArgumentCount();
+                RegularExpression regex("(\\[\"?)?"                 // Regex match 1 (optional key prefix of '["' pr '[')
+                                        "([A-Za-z_][A-Za-z_0-9]*)"  // Regex match 2 (key string)
+                                        "(\"?\\])?"                 // Regex match 3 (optional key suffix of '"]' pr ']')
+                                        "="                         // The equal sign that is required
+                                        "(.*)");                    // Regex match 4 (value string)
+                std::string key, value;
+
                 for (size_t i = 0; i < num_args; ++i)
                 {
-                    std::string tmp_arg = args.GetArgumentAtIndex (i);
-                    size_t eq_sign = tmp_arg.find ('=');
-                    if (eq_sign != std::string::npos)
+                    const char *key_equal_value_arg = args.GetArgumentAtIndex (i);
+                    // Execute the regular expression on each arg.
+                    if (regex.Execute(key_equal_value_arg, 5))
                     {
-                        if (eq_sign > 4)
-                        {
-                            std::string tmp_key = tmp_arg.substr (0, eq_sign);
-                            std::string real_value = tmp_arg.substr (eq_sign+1);
-                            if ((tmp_key[0] == '[')
-                                && (tmp_key[1] == '"')
-                                && (tmp_key[eq_sign-2] == '"')
-                                && (tmp_key[eq_sign-1] == ']'))
-                              {
-                                std::string real_key = tmp_key.substr (2, eq_sign-4); 
-                                dictionary[real_key] = real_value;
-                              }
-                            else
-                                err.SetErrorString ("Invalid key format for dictionary assignment.  "
-                                                    "Expected '[\"<key>\"]'\n");
-                        }
-                        else
-                            err.SetErrorString ("Invalid key format for dictionary assignment.  "
-                                                "Expected '[\"<key>\"]'\n");
+                        // The regular expression succeeded. The match at index
+                        // zero will be the entire string that matched the entire
+                        // regular expression. The match at index 1 - 4 will be
+                        // as mentioned above by the creation of the regex pattern.
+                        // Match index 2 is the key, match index 4 is the value.
+                        regex.GetMatchAtIndex (key_equal_value_arg, 2, key);
+                        regex.GetMatchAtIndex (key_equal_value_arg, 4, value);
+                        dictionary[key] = value;
                     }
                     else
-                        err.SetErrorString ("Invalid format for dictionary value.  Expected '[\"<key>\"]=<value>'\n");
+                    {
+                        err.SetErrorString ("Invalid format for dictionary value.  Expected one of '[\"<key>\"]=<value>', '[<key>]=<value>', or '<key>=<value>'\n");
+                    }
                 }
             }  
             break;

Modified: lldb/trunk/source/Interpreter/Args.cpp
URL: http://llvm.org/viewvc/llvm-project/lldb/trunk/source/Interpreter/Args.cpp?rev=122166&r1=122165&r2=122166&view=diff
==============================================================================
--- lldb/trunk/source/Interpreter/Args.cpp (original)
+++ lldb/trunk/source/Interpreter/Args.cpp Sat Dec 18 21:41:24 2010
@@ -23,10 +23,6 @@
 using namespace lldb;
 using namespace lldb_private;
 
-static const char *k_space_characters = "\t\n\v\f\r ";
-static const char *k_space_characters_with_slash = "\t\n\v\f\r \\";
-
-
 //----------------------------------------------------------------------
 // Args constructor
 //----------------------------------------------------------------------
@@ -34,7 +30,8 @@
     m_args(),
     m_argv()
 {
-    SetCommandString (command);
+    if (command)
+        SetCommandString (command);
 }
 
 
@@ -42,11 +39,10 @@
     m_args(),
     m_argv()
 {
-    SetCommandString (command, len);
+    if (command && len)
+        SetCommandString (command, len);
 }
 
-
-
 //----------------------------------------------------------------------
 // Destructor
 //----------------------------------------------------------------------
@@ -97,20 +93,20 @@
 Args::GetQuotedCommandString (std::string &command)
 {
     command.clear ();
-    int argc = GetArgumentCount ();
-    for (int i = 0; i < argc; ++i)
+    size_t argc = GetArgumentCount ();
+    for (size_t i = 0; i < argc; ++i)
     {
         if (i > 0)
-            command += ' ';
-        char quote_char = m_args_quote_char[i];
-        if (quote_char != '\0')
-        {
-            command += quote_char;
-            command += m_argv[i];
-            command += quote_char;
+            command.append (1, ' ');
+        char quote_char = GetArgumentQuoteCharAtIndex(i);
+        if (quote_char)
+        {
+            command.append (1, quote_char);
+            command.append (m_argv[i]);
+            command.append (1, quote_char);
         }
         else
-            command += m_argv[i];
+            command.append (m_argv[i]);
     }
     return argc > 0;
 }
@@ -127,136 +123,197 @@
 void
 Args::SetCommandString (const char *command)
 {
+    StreamFile s(stdout);
+    s.Printf("\nCOMMAND: %s\n", command);
     m_args.clear();
     m_argv.clear();
+    m_args_quote_char.clear();
+
     if (command && command[0])
     {
-        const char *arg_start;
-        const char *next_arg_start;
-        for (arg_start = command, next_arg_start = NULL;
-             arg_start && arg_start[0];
-             arg_start = next_arg_start, next_arg_start = NULL)
+        static const char *k_space_separators = " \t";
+        static const char *k_space_separators_with_slash_and_quotes = " \t \\'\"`";
+        const char *arg_end = NULL;
+        const char *arg_pos;
+        for (arg_pos = command;
+             arg_pos && arg_pos[0];
+             arg_pos = arg_end)
         {
-            // Skip any leading space characters
-            arg_start = ::strspn (arg_start, k_space_characters) + arg_start;
-
-            // If there were only space characters to the end of the line, then
+            // Skip any leading space separators
+            const char *arg_start = ::strspn (arg_pos, k_space_separators) + arg_pos;
+            
+            // If there were only space separators to the end of the line, then
             // we're done.
             if (*arg_start == '\0')
                 break;
 
+            // Arguments can be split into multiple discongituous pieces,
+            // for example:
+            //  "Hello ""World"
+            // this would result in a single argument "Hello World" (without/
+            // the quotes) since the quotes would be removed and there is 
+            // not space between the strings. So we need to keep track of the
+            // current start of each argument piece in "arg_piece_start"
+            const char *arg_piece_start = arg_start;
+            arg_pos = arg_piece_start;
+
             std::string arg;
-            const char *arg_end = NULL;
+            // Since we can have multiple quotes that form a single command
+            // in a command like: "Hello "world'!' (which will make a single
+            // argument "Hello world!") we remember the first quote character
+            // we encounter and use that for the quote character.
+            char first_quote_char = '\0';
+            char quote_char = '\0';
+            bool arg_complete = false;
 
-            switch (*arg_start)
+            do
             {
-            case '\'':
-            case '"':
-            case '`':
+                arg_end = ::strcspn (arg_pos, k_space_separators_with_slash_and_quotes) + arg_pos;
+
+                switch (arg_end[0])
                 {
-                    // Look for either a quote character, or the backslash
-                    // character
-                    const char quote_char = *arg_start;
-                    char find_chars[3] = { quote_char, '\\' , '\0'};
-                    bool is_backtick = (quote_char == '`');
-                    if (quote_char == '"' || quote_char == '`')
-                        m_args_quote_char.push_back(quote_char);
-                    else
-                        m_args_quote_char.push_back('\0');
+                default:
+                    assert (!"Unhandled case statement, we must handle this...");
+                    break;
 
-                    while (*arg_start != '\0')
+                case '\0':
+                    // End of C string
+                    if (arg_piece_start && arg_piece_start[0])
+                        arg.append (arg_piece_start);
+                    arg_complete = true;
+                    break;
+                    
+                case '\\':
+                    // Backslash character
+                    switch (arg_end[1])
                     {
-                        arg_end = ::strcspn (arg_start + 1, find_chars) + arg_start + 1;
+                        case '\0':
+                            arg.append (arg_piece_start);
+                            arg_complete = true;
+                            break;
 
-                        if (*arg_end == '\0')
-                        {
-                            arg.append (arg_start);
+                        default:
+                            arg_pos = arg_end + 2;
                             break;
+                    }
+                    break;
+                
+                case '"':
+                case '\'':
+                case '`':
+                    // Quote characters 
+                    if (quote_char)
+                    {
+                        // We found a quote character while inside a quoted
+                        // character argument. If it matches our current quote
+                        // character, this ends the effect of the quotes. If it
+                        // doesn't we ignore it.
+                        if (quote_char == arg_end[0])
+                        {
+                            arg.append (arg_piece_start, arg_end - arg_piece_start);
+                            // Clear the quote character and let parsing
+                            // continue (we need to watch for things like:
+                            // "Hello ""World"
+                            // "Hello "World
+                            // "Hello "'World'
+                            // All of which will result in a single argument "Hello World"
+                            quote_char = '\0'; // Note that we are no longer inside quotes
+                            arg_pos = arg_end + 1; // Skip the quote character
+                            arg_piece_start = arg_pos; // Note we are starting from later in the string
                         }
-
-                        // Watch out for quote characters prefixed with '\'
-                        if (*arg_end == '\\')
+                        else
                         {
-                            if (arg_end[1] == quote_char)
+                            // different quote, skip it and keep going
+                            arg_pos = arg_end + 1;
+                        }
+                    }
+                    else
+                    {
+                        // We found the start of a quote scope.
+                        // Make sure there isn't a string that predeces
+                        // the start of a quote scope like:
+                        // Hello" World"
+                        // If so, then add the "Hello" to the arg
+                        if (arg_end > arg_piece_start)
+                            arg.append (arg_piece_start, arg_end - arg_piece_start);
+                            
+                        // Enter into a quote scope
+                        quote_char = arg_end[0];
+                        
+                        if (first_quote_char == '\0')
+                            first_quote_char = quote_char;
+
+                        arg_pos = arg_end;
+                        
+                        if (quote_char != '`')
+                            ++arg_pos; // Skip the quote character if it is not a backtick
+
+                        arg_piece_start = arg_pos; // Note we are starting from later in the string
+                        
+                        // Skip till the next quote character
+                        const char *end_quote = ::strchr (arg_piece_start, quote_char);
+                        while (end_quote && end_quote[-1] == '\\')
+                        {
+                            // Don't skip the quote character if it is 
+                            // preceded by a '\' character
+                            end_quote = ::strchr (end_quote + 1, quote_char);
+                        }
+                        
+                        if (end_quote)
+                        {
+                            if (end_quote > arg_piece_start)
                             {
-                                // The character following the '\' is our quote
-                                // character so strip the backslash character
-                                arg.append (arg_start, arg_end);
+                                // Keep the backtick quote on commands
+                                if (quote_char == '`')
+                                    arg.append (arg_piece_start, end_quote + 1 - arg_piece_start);
+                                else
+                                    arg.append (arg_piece_start, end_quote - arg_piece_start);
+                            }
+
+                            // If the next character is a space or the end of 
+                            // string, this argument is complete...
+                            if (end_quote[1] == ' ' || end_quote[1] == '\t' || end_quote[1] == '\0')
+                            {
+                                arg_complete = true;
+                                arg_end = end_quote + 1;
                             }
                             else
                             {
-                                // The character following the '\' is NOT our
-                                // quote character, so include the backslash
-                                // and continue
-                                arg.append (arg_start, arg_end + 1);
+                                arg_pos = end_quote + 1;
+                                arg_piece_start = arg_pos;
                             }
-                            arg_start = arg_end + 1;
-                            continue;
-                        }
-                        else
-                        {
-                            arg.append (arg_start, arg_end + 1);
-                            next_arg_start = arg_end + 1;
-                            break;
+                            quote_char = '\0';
                         }
                     }
+                    break;
 
-                    // Skip single and double quotes, but leave backtick quotes
-                    if (!is_backtick)
+                case ' ':
+                case '\t':
+                    if (quote_char)
                     {
-                        char first_c = arg[0];
-                        arg.erase(0,1);
-                        // Only erase the last character if it is the same as the first.
-                        // Otherwise, we're parsing an incomplete command line, and we
-                        // would be stripping off the last character of that string.
-                        if (arg[arg.size() - 1] == first_c)
-                            arg.erase(arg.size() - 1, 1);
+                        // We are currently processing a quoted character and found
+                        // a space character, skip any spaces and keep trying to find
+                        // the end of the argument. 
+                        arg_pos = ::strspn (arg_end, k_space_separators) + arg_end;
                     }
-                }
-                break;
-            default:
-                {
-                    m_args_quote_char.push_back('\0');
-                    // Look for the next non-escaped space character
-                    while (*arg_start != '\0')
+                    else
                     {
-                        arg_end = ::strcspn (arg_start, k_space_characters_with_slash) + arg_start;
-
-                        if (arg_end == NULL)
-                        {
-                            arg.append(arg_start);
-                            break;
-                        }
-
-                        if (*arg_end == '\\')
-                        {
-                            // Append up to the '\' char
-                            arg.append (arg_start, arg_end);
-
-                            if (arg_end[1] == '\0')
-                                break;
-
-                            // Append the character following the '\' if it isn't
-                            // the end of the string
-                            arg.append (1, arg_end[1]);
-                            arg_start = arg_end + 2;
-                            continue;
-                        }
-                        else
-                        {
-                            arg.append (arg_start, arg_end);
-                            next_arg_start = arg_end;
-                            break;
-                        }
+                        // We are not inside any quotes, we just found a space after an
+                        // argument
+                        if (arg_end > arg_piece_start)
+                            arg.append (arg_piece_start, arg_end - arg_piece_start);
+                        arg_complete = true;
                     }
+                    break;
                 }
-                break;
-            }
+            } while (!arg_complete);
 
             m_args.push_back(arg);
+            m_args_quote_char.push_back (first_quote_char);
         }
+        UpdateArgvFromArgs();
     }
-    UpdateArgvFromArgs();
+    Dump (&s);
 }
 
 void
@@ -309,6 +366,9 @@
     for (pos = m_args.begin(); pos != end; ++pos)
         m_argv.push_back(pos->c_str());
     m_argv.push_back(NULL);
+    // Make sure we have enough arg quote chars in the array
+    if (m_args_quote_char.size() < m_args.size())
+        m_args_quote_char.resize (m_argv.size());
 }
 
 size_t
@@ -359,7 +419,8 @@
     {
         m_argv.erase(m_argv.begin());
         m_args.pop_front();
-        m_args_quote_char.erase(m_args_quote_char.begin());
+        if (!m_args_quote_char.empty())
+            m_args_quote_char.erase(m_args_quote_char.begin());
     }
 }
 
@@ -399,8 +460,13 @@
 
     pos = m_args.insert(pos, arg_cstr);
     
-
-    m_args_quote_char.insert(m_args_quote_char.begin() + idx, quote_char);
+    if (idx >= m_args_quote_char.size())
+    {
+        m_args_quote_char.resize(idx + 1);
+        m_args_quote_char[idx] = quote_char;
+    }
+    else
+        m_args_quote_char.insert(m_args_quote_char.begin() + idx, quote_char);
     
     UpdateArgvFromArgs();
     return GetArgumentAtIndex(idx);
@@ -422,6 +488,8 @@
         pos->assign(arg_cstr);
         assert(idx < m_argv.size() - 1);
         m_argv[idx] = pos->c_str();
+        if (idx >= m_args_quote_char.size())
+            m_args_quote_char.resize(idx + 1);
         m_args_quote_char[idx] = quote_char;
         return GetArgumentAtIndex(idx);
     }
@@ -444,7 +512,8 @@
         m_args.erase (pos);
         assert(idx < m_argv.size() - 1);
         m_argv.erase(m_argv.begin() + idx);
-        m_args_quote_char.erase(m_args_quote_char.begin() + idx);
+        if (idx < m_args_quote_char.size())
+            m_args_quote_char.erase(m_args_quote_char.begin() + idx);
     }
 }
 
@@ -462,7 +531,7 @@
     for (i=0; i<argc; ++i)
     {
         m_args.push_back (argv[i]);
-        if ((argv[i][0] == '"') || (argv[i][0] == '`'))
+        if ((argv[i][0] == '\'') || (argv[i][0] == '"') || (argv[i][0] == '`'))
             m_args_quote_char.push_back (argv[i][0]);
         else
             m_args_quote_char.push_back ('\0');





More information about the lldb-commits mailing list