[Lldb-commits] [lldb] r122166 - in /lldb/trunk: include/lldb/Core/RegularExpression.h source/Core/RegularExpression.cpp source/Core/UserSettingsController.cpp source/Interpreter/Args.cpp
Greg Clayton
gclayton at apple.com
Sat Dec 18 19:41:24 PST 2010
Author: gclayton
Date: Sat Dec 18 21:41:24 2010
New Revision: 122166
URL: http://llvm.org/viewvc/llvm-project?rev=122166&view=rev
Log:
Improved our argument parsing abilities to be able to handle stuff more like
a shell would interpret it. A few examples that we now handle correctly
INPUT: "Hello "world
OUTPUT: "Hello World"
INPUT: "Hello "' World'
OUTPUT: "Hello World"
INPUT: Hello" World"
OUTPUT: "Hello World"
This broke the setting of dictionary values for the "settings set" command
for things like:
(lldb) settings set target.process.env-vars ["MY_ENV_VAR"]=YES
since we would drop the quotes. I fixed the user settings controller to use
a regular expression so it can accept any of the following inputs for
dictionary setting:
settings set target.process.env-vars ["MY_ENV_VAR"]=YES
settings set target.process.env-vars [MY_ENV_VAR]=YES
settings set target.process.env-vars MY_ENV_VAR=YES
We might want to eventually drop the first two syntaxes, but I won't make
that decision right now.
This allows more natural setting of the envirorment variables:
settings set target.process.env-vars MY_ENV_VAR=YES ABC=DEF CWD=/tmp
Modified:
lldb/trunk/include/lldb/Core/RegularExpression.h
lldb/trunk/source/Core/RegularExpression.cpp
lldb/trunk/source/Core/UserSettingsController.cpp
lldb/trunk/source/Interpreter/Args.cpp
Modified: lldb/trunk/include/lldb/Core/RegularExpression.h
URL: http://llvm.org/viewvc/llvm-project/lldb/trunk/include/lldb/Core/RegularExpression.h?rev=122166&r1=122165&r2=122166&view=diff
==============================================================================
--- lldb/trunk/include/lldb/Core/RegularExpression.h (original)
+++ lldb/trunk/include/lldb/Core/RegularExpression.h Sat Dec 18 21:41:24 2010
@@ -101,10 +101,6 @@
/// @param[in] match_count
/// The number of regmatch_t objects in \a match_ptr
///
- /// @param[out] match_ptr
- /// A pointer to at least \a match_count regmatch_t objects
- /// if \a match_count is non-zero.
- ///
/// @param[in] execute_flags
/// Flags to pass to the \c regexec() function.
///
Modified: lldb/trunk/source/Core/RegularExpression.cpp
URL: http://llvm.org/viewvc/llvm-project/lldb/trunk/source/Core/RegularExpression.cpp?rev=122166&r1=122165&r2=122166&view=diff
==============================================================================
--- lldb/trunk/source/Core/RegularExpression.cpp (original)
+++ lldb/trunk/source/Core/RegularExpression.cpp Sat Dec 18 21:41:24 2010
@@ -100,7 +100,7 @@
match_result = ::regexec (&m_preg,
s,
m_matches.size(),
- &m_matches.front(),
+ &m_matches[0],
execute_flags);
}
return match_result == 0;
@@ -111,9 +111,18 @@
{
if (idx <= m_preg.re_nsub && idx < m_matches.size())
{
- match_str.assign (s + m_matches[idx].rm_so,
- m_matches[idx].rm_eo - m_matches[idx].rm_so);
- return true;
+ if (m_matches[idx].rm_eo == m_matches[idx].rm_so)
+ {
+ // Matched the empty string...
+ match_str.clear();
+ return true;
+ }
+ else if (m_matches[idx].rm_eo > m_matches[idx].rm_so)
+ {
+ match_str.assign (s + m_matches[idx].rm_so,
+ m_matches[idx].rm_eo - m_matches[idx].rm_so);
+ return true;
+ }
}
return false;
}
Modified: lldb/trunk/source/Core/UserSettingsController.cpp
URL: http://llvm.org/viewvc/llvm-project/lldb/trunk/source/Core/UserSettingsController.cpp?rev=122166&r1=122165&r2=122166&view=diff
==============================================================================
--- lldb/trunk/source/Core/UserSettingsController.cpp (original)
+++ lldb/trunk/source/Core/UserSettingsController.cpp Sat Dec 18 21:41:24 2010
@@ -12,6 +12,7 @@
#include "lldb/Core/UserSettingsController.h"
#include "lldb/Core/Error.h"
+#include "lldb/Core/RegularExpression.h"
#include "lldb/Core/Stream.h"
#include "lldb/Core/StreamString.h"
#include "lldb/Interpreter/CommandInterpreter.h"
@@ -2102,34 +2103,32 @@
}
Args args (new_value);
size_t num_args = args.GetArgumentCount();
+ RegularExpression regex("(\\[\"?)?" // Regex match 1 (optional key prefix of '["' pr '[')
+ "([A-Za-z_][A-Za-z_0-9]*)" // Regex match 2 (key string)
+ "(\"?\\])?" // Regex match 3 (optional key suffix of '"]' pr ']')
+ "=" // The equal sign that is required
+ "(.*)"); // Regex match 4 (value string)
+ std::string key, value;
+
for (size_t i = 0; i < num_args; ++i)
{
- std::string tmp_arg = args.GetArgumentAtIndex (i);
- size_t eq_sign = tmp_arg.find ('=');
- if (eq_sign != std::string::npos)
+ const char *key_equal_value_arg = args.GetArgumentAtIndex (i);
+ // Execute the regular expression on each arg.
+ if (regex.Execute(key_equal_value_arg, 5))
{
- if (eq_sign > 4)
- {
- std::string tmp_key = tmp_arg.substr (0, eq_sign);
- std::string real_value = tmp_arg.substr (eq_sign+1);
- if ((tmp_key[0] == '[')
- && (tmp_key[1] == '"')
- && (tmp_key[eq_sign-2] == '"')
- && (tmp_key[eq_sign-1] == ']'))
- {
- std::string real_key = tmp_key.substr (2, eq_sign-4);
- dictionary[real_key] = real_value;
- }
- else
- err.SetErrorString ("Invalid key format for dictionary assignment. "
- "Expected '[\"<key>\"]'\n");
- }
- else
- err.SetErrorString ("Invalid key format for dictionary assignment. "
- "Expected '[\"<key>\"]'\n");
+ // The regular expression succeeded. The match at index
+ // zero will be the entire string that matched the entire
+ // regular expression. The match at index 1 - 4 will be
+ // as mentioned above by the creation of the regex pattern.
+ // Match index 2 is the key, match index 4 is the value.
+ regex.GetMatchAtIndex (key_equal_value_arg, 2, key);
+ regex.GetMatchAtIndex (key_equal_value_arg, 4, value);
+ dictionary[key] = value;
}
else
- err.SetErrorString ("Invalid format for dictionary value. Expected '[\"<key>\"]=<value>'\n");
+ {
+ err.SetErrorString ("Invalid format for dictionary value. Expected one of '[\"<key>\"]=<value>', '[<key>]=<value>', or '<key>=<value>'\n");
+ }
}
}
break;
Modified: lldb/trunk/source/Interpreter/Args.cpp
URL: http://llvm.org/viewvc/llvm-project/lldb/trunk/source/Interpreter/Args.cpp?rev=122166&r1=122165&r2=122166&view=diff
==============================================================================
--- lldb/trunk/source/Interpreter/Args.cpp (original)
+++ lldb/trunk/source/Interpreter/Args.cpp Sat Dec 18 21:41:24 2010
@@ -23,10 +23,6 @@
using namespace lldb;
using namespace lldb_private;
-static const char *k_space_characters = "\t\n\v\f\r ";
-static const char *k_space_characters_with_slash = "\t\n\v\f\r \\";
-
-
//----------------------------------------------------------------------
// Args constructor
//----------------------------------------------------------------------
@@ -34,7 +30,8 @@
m_args(),
m_argv()
{
- SetCommandString (command);
+ if (command)
+ SetCommandString (command);
}
@@ -42,11 +39,10 @@
m_args(),
m_argv()
{
- SetCommandString (command, len);
+ if (command && len)
+ SetCommandString (command, len);
}
-
-
//----------------------------------------------------------------------
// Destructor
//----------------------------------------------------------------------
@@ -97,20 +93,20 @@
Args::GetQuotedCommandString (std::string &command)
{
command.clear ();
- int argc = GetArgumentCount ();
- for (int i = 0; i < argc; ++i)
+ size_t argc = GetArgumentCount ();
+ for (size_t i = 0; i < argc; ++i)
{
if (i > 0)
- command += ' ';
- char quote_char = m_args_quote_char[i];
- if (quote_char != '\0')
- {
- command += quote_char;
- command += m_argv[i];
- command += quote_char;
+ command.append (1, ' ');
+ char quote_char = GetArgumentQuoteCharAtIndex(i);
+ if (quote_char)
+ {
+ command.append (1, quote_char);
+ command.append (m_argv[i]);
+ command.append (1, quote_char);
}
else
- command += m_argv[i];
+ command.append (m_argv[i]);
}
return argc > 0;
}
@@ -127,136 +123,197 @@
void
Args::SetCommandString (const char *command)
{
+ StreamFile s(stdout);
+ s.Printf("\nCOMMAND: %s\n", command);
m_args.clear();
m_argv.clear();
+ m_args_quote_char.clear();
+
if (command && command[0])
{
- const char *arg_start;
- const char *next_arg_start;
- for (arg_start = command, next_arg_start = NULL;
- arg_start && arg_start[0];
- arg_start = next_arg_start, next_arg_start = NULL)
+ static const char *k_space_separators = " \t";
+ static const char *k_space_separators_with_slash_and_quotes = " \t \\'\"`";
+ const char *arg_end = NULL;
+ const char *arg_pos;
+ for (arg_pos = command;
+ arg_pos && arg_pos[0];
+ arg_pos = arg_end)
{
- // Skip any leading space characters
- arg_start = ::strspn (arg_start, k_space_characters) + arg_start;
-
- // If there were only space characters to the end of the line, then
+ // Skip any leading space separators
+ const char *arg_start = ::strspn (arg_pos, k_space_separators) + arg_pos;
+
+ // If there were only space separators to the end of the line, then
// we're done.
if (*arg_start == '\0')
break;
+ // Arguments can be split into multiple discongituous pieces,
+ // for example:
+ // "Hello ""World"
+ // this would result in a single argument "Hello World" (without/
+ // the quotes) since the quotes would be removed and there is
+ // not space between the strings. So we need to keep track of the
+ // current start of each argument piece in "arg_piece_start"
+ const char *arg_piece_start = arg_start;
+ arg_pos = arg_piece_start;
+
std::string arg;
- const char *arg_end = NULL;
+ // Since we can have multiple quotes that form a single command
+ // in a command like: "Hello "world'!' (which will make a single
+ // argument "Hello world!") we remember the first quote character
+ // we encounter and use that for the quote character.
+ char first_quote_char = '\0';
+ char quote_char = '\0';
+ bool arg_complete = false;
- switch (*arg_start)
+ do
{
- case '\'':
- case '"':
- case '`':
+ arg_end = ::strcspn (arg_pos, k_space_separators_with_slash_and_quotes) + arg_pos;
+
+ switch (arg_end[0])
{
- // Look for either a quote character, or the backslash
- // character
- const char quote_char = *arg_start;
- char find_chars[3] = { quote_char, '\\' , '\0'};
- bool is_backtick = (quote_char == '`');
- if (quote_char == '"' || quote_char == '`')
- m_args_quote_char.push_back(quote_char);
- else
- m_args_quote_char.push_back('\0');
+ default:
+ assert (!"Unhandled case statement, we must handle this...");
+ break;
- while (*arg_start != '\0')
+ case '\0':
+ // End of C string
+ if (arg_piece_start && arg_piece_start[0])
+ arg.append (arg_piece_start);
+ arg_complete = true;
+ break;
+
+ case '\\':
+ // Backslash character
+ switch (arg_end[1])
{
- arg_end = ::strcspn (arg_start + 1, find_chars) + arg_start + 1;
+ case '\0':
+ arg.append (arg_piece_start);
+ arg_complete = true;
+ break;
- if (*arg_end == '\0')
- {
- arg.append (arg_start);
+ default:
+ arg_pos = arg_end + 2;
break;
+ }
+ break;
+
+ case '"':
+ case '\'':
+ case '`':
+ // Quote characters
+ if (quote_char)
+ {
+ // We found a quote character while inside a quoted
+ // character argument. If it matches our current quote
+ // character, this ends the effect of the quotes. If it
+ // doesn't we ignore it.
+ if (quote_char == arg_end[0])
+ {
+ arg.append (arg_piece_start, arg_end - arg_piece_start);
+ // Clear the quote character and let parsing
+ // continue (we need to watch for things like:
+ // "Hello ""World"
+ // "Hello "World
+ // "Hello "'World'
+ // All of which will result in a single argument "Hello World"
+ quote_char = '\0'; // Note that we are no longer inside quotes
+ arg_pos = arg_end + 1; // Skip the quote character
+ arg_piece_start = arg_pos; // Note we are starting from later in the string
}
-
- // Watch out for quote characters prefixed with '\'
- if (*arg_end == '\\')
+ else
{
- if (arg_end[1] == quote_char)
+ // different quote, skip it and keep going
+ arg_pos = arg_end + 1;
+ }
+ }
+ else
+ {
+ // We found the start of a quote scope.
+ // Make sure there isn't a string that predeces
+ // the start of a quote scope like:
+ // Hello" World"
+ // If so, then add the "Hello" to the arg
+ if (arg_end > arg_piece_start)
+ arg.append (arg_piece_start, arg_end - arg_piece_start);
+
+ // Enter into a quote scope
+ quote_char = arg_end[0];
+
+ if (first_quote_char == '\0')
+ first_quote_char = quote_char;
+
+ arg_pos = arg_end;
+
+ if (quote_char != '`')
+ ++arg_pos; // Skip the quote character if it is not a backtick
+
+ arg_piece_start = arg_pos; // Note we are starting from later in the string
+
+ // Skip till the next quote character
+ const char *end_quote = ::strchr (arg_piece_start, quote_char);
+ while (end_quote && end_quote[-1] == '\\')
+ {
+ // Don't skip the quote character if it is
+ // preceded by a '\' character
+ end_quote = ::strchr (end_quote + 1, quote_char);
+ }
+
+ if (end_quote)
+ {
+ if (end_quote > arg_piece_start)
{
- // The character following the '\' is our quote
- // character so strip the backslash character
- arg.append (arg_start, arg_end);
+ // Keep the backtick quote on commands
+ if (quote_char == '`')
+ arg.append (arg_piece_start, end_quote + 1 - arg_piece_start);
+ else
+ arg.append (arg_piece_start, end_quote - arg_piece_start);
+ }
+
+ // If the next character is a space or the end of
+ // string, this argument is complete...
+ if (end_quote[1] == ' ' || end_quote[1] == '\t' || end_quote[1] == '\0')
+ {
+ arg_complete = true;
+ arg_end = end_quote + 1;
}
else
{
- // The character following the '\' is NOT our
- // quote character, so include the backslash
- // and continue
- arg.append (arg_start, arg_end + 1);
+ arg_pos = end_quote + 1;
+ arg_piece_start = arg_pos;
}
- arg_start = arg_end + 1;
- continue;
- }
- else
- {
- arg.append (arg_start, arg_end + 1);
- next_arg_start = arg_end + 1;
- break;
+ quote_char = '\0';
}
}
+ break;
- // Skip single and double quotes, but leave backtick quotes
- if (!is_backtick)
+ case ' ':
+ case '\t':
+ if (quote_char)
{
- char first_c = arg[0];
- arg.erase(0,1);
- // Only erase the last character if it is the same as the first.
- // Otherwise, we're parsing an incomplete command line, and we
- // would be stripping off the last character of that string.
- if (arg[arg.size() - 1] == first_c)
- arg.erase(arg.size() - 1, 1);
+ // We are currently processing a quoted character and found
+ // a space character, skip any spaces and keep trying to find
+ // the end of the argument.
+ arg_pos = ::strspn (arg_end, k_space_separators) + arg_end;
}
- }
- break;
- default:
- {
- m_args_quote_char.push_back('\0');
- // Look for the next non-escaped space character
- while (*arg_start != '\0')
+ else
{
- arg_end = ::strcspn (arg_start, k_space_characters_with_slash) + arg_start;
-
- if (arg_end == NULL)
- {
- arg.append(arg_start);
- break;
- }
-
- if (*arg_end == '\\')
- {
- // Append up to the '\' char
- arg.append (arg_start, arg_end);
-
- if (arg_end[1] == '\0')
- break;
-
- // Append the character following the '\' if it isn't
- // the end of the string
- arg.append (1, arg_end[1]);
- arg_start = arg_end + 2;
- continue;
- }
- else
- {
- arg.append (arg_start, arg_end);
- next_arg_start = arg_end;
- break;
- }
+ // We are not inside any quotes, we just found a space after an
+ // argument
+ if (arg_end > arg_piece_start)
+ arg.append (arg_piece_start, arg_end - arg_piece_start);
+ arg_complete = true;
}
+ break;
}
- break;
- }
+ } while (!arg_complete);
m_args.push_back(arg);
+ m_args_quote_char.push_back (first_quote_char);
}
+ UpdateArgvFromArgs();
}
- UpdateArgvFromArgs();
+ Dump (&s);
}
void
@@ -309,6 +366,9 @@
for (pos = m_args.begin(); pos != end; ++pos)
m_argv.push_back(pos->c_str());
m_argv.push_back(NULL);
+ // Make sure we have enough arg quote chars in the array
+ if (m_args_quote_char.size() < m_args.size())
+ m_args_quote_char.resize (m_argv.size());
}
size_t
@@ -359,7 +419,8 @@
{
m_argv.erase(m_argv.begin());
m_args.pop_front();
- m_args_quote_char.erase(m_args_quote_char.begin());
+ if (!m_args_quote_char.empty())
+ m_args_quote_char.erase(m_args_quote_char.begin());
}
}
@@ -399,8 +460,13 @@
pos = m_args.insert(pos, arg_cstr);
-
- m_args_quote_char.insert(m_args_quote_char.begin() + idx, quote_char);
+ if (idx >= m_args_quote_char.size())
+ {
+ m_args_quote_char.resize(idx + 1);
+ m_args_quote_char[idx] = quote_char;
+ }
+ else
+ m_args_quote_char.insert(m_args_quote_char.begin() + idx, quote_char);
UpdateArgvFromArgs();
return GetArgumentAtIndex(idx);
@@ -422,6 +488,8 @@
pos->assign(arg_cstr);
assert(idx < m_argv.size() - 1);
m_argv[idx] = pos->c_str();
+ if (idx >= m_args_quote_char.size())
+ m_args_quote_char.resize(idx + 1);
m_args_quote_char[idx] = quote_char;
return GetArgumentAtIndex(idx);
}
@@ -444,7 +512,8 @@
m_args.erase (pos);
assert(idx < m_argv.size() - 1);
m_argv.erase(m_argv.begin() + idx);
- m_args_quote_char.erase(m_args_quote_char.begin() + idx);
+ if (idx < m_args_quote_char.size())
+ m_args_quote_char.erase(m_args_quote_char.begin() + idx);
}
}
@@ -462,7 +531,7 @@
for (i=0; i<argc; ++i)
{
m_args.push_back (argv[i]);
- if ((argv[i][0] == '"') || (argv[i][0] == '`'))
+ if ((argv[i][0] == '\'') || (argv[i][0] == '"') || (argv[i][0] == '`'))
m_args_quote_char.push_back (argv[i][0]);
else
m_args_quote_char.push_back ('\0');
More information about the lldb-commits
mailing list