[llvm] f8a29b1 - [OptTable] Support grouped short options

Fangrui Song via llvm-commits llvm-commits at lists.llvm.org
Fri Jul 17 09:32:50 PDT 2020


Author: Fangrui Song
Date: 2020-07-17T09:32:43-07:00
New Revision: f8a29b174a965acb942dd3ef5f8ef2c32620777b

URL: https://github.com/llvm/llvm-project/commit/f8a29b174a965acb942dd3ef5f8ef2c32620777b
DIFF: https://github.com/llvm/llvm-project/commit/f8a29b174a965acb942dd3ef5f8ef2c32620777b.diff

LOG: [OptTable] Support grouped short options

POSIX.1-2017 12.2 Utility Syntax Guidelines, Guideline 5 says:

> One or more options without option-arguments, followed by at most one option that takes an option-argument, should be accepted when grouped behind one '-' delimiter.

i.e. -abc represents -a -b -c. The grouped short options are very common.  Many
utilities extend the syntax by allowing (an option with an argument) following a
sequence of short options.

This patch adds the support to OptTable, similar to cl::Group for CommandLine
(D58711).  llvm-symbolizer will use the feature (D83530). CommandLine is exotic
in some aspects. OptTable is preferred if the user wants to get rid of the
behaviors.

* `cl::opt<bool> i(...)` can be disabled via -i=false or -i=0, which is
  different from conventional --no-i.
* Handling --foo & --no-foo requires a comparison of argument positions,
  which is a bit clumsy in user code.

OptTable::parseOneArg (non-const reference InputArgList) is added along with
ParseOneArg (const ArgList &). The duplicate does not look great at first
glance. However, The implementation can be simpler if ArgList is mutable.
(ParseOneArg is used by clang-cl (FlagsToInclude/FlagsToExclude) and lld COFF
(case-insensitive). Adding grouped short options can make the function even more
complex.)

The implementation allows a long option following a group of short options. We
probably should refine the code to disallow this in the future. Allowing this
seems benign for now.

Reviewed By: grimar, jhenderson

Differential Revision: https://reviews.llvm.org/D83639

Added: 
    

Modified: 
    llvm/include/llvm/Option/ArgList.h
    llvm/include/llvm/Option/OptTable.h
    llvm/include/llvm/Option/Option.h
    llvm/lib/Option/OptTable.cpp
    llvm/lib/Option/Option.cpp
    llvm/unittests/Option/OptionParsingTest.cpp
    llvm/unittests/Option/Opts.td

Removed: 
    


################################################################################
diff  --git a/llvm/include/llvm/Option/ArgList.h b/llvm/include/llvm/Option/ArgList.h
index 74bfadcba726..9ce783978185 100644
--- a/llvm/include/llvm/Option/ArgList.h
+++ b/llvm/include/llvm/Option/ArgList.h
@@ -412,6 +412,10 @@ class InputArgList final : public ArgList {
     return ArgStrings[Index];
   }
 
+  void replaceArgString(unsigned Index, const Twine &S) {
+    ArgStrings[Index] = MakeArgString(S);
+  }
+
   unsigned getNumInputArgStrings() const override {
     return NumInputArgStrings;
   }

diff  --git a/llvm/include/llvm/Option/OptTable.h b/llvm/include/llvm/Option/OptTable.h
index 5db30436069d..b9984bed55a7 100644
--- a/llvm/include/llvm/Option/OptTable.h
+++ b/llvm/include/llvm/Option/OptTable.h
@@ -59,6 +59,7 @@ class OptTable {
   /// The option information table.
   std::vector<Info> OptionInfos;
   bool IgnoreCase;
+  bool GroupedShortOptions = false;
 
   unsigned TheInputOptionID = 0;
   unsigned TheUnknownOptionID = 0;
@@ -79,6 +80,8 @@ class OptTable {
     return OptionInfos[id - 1];
   }
 
+  Arg *parseOneArgGrouped(InputArgList &Args, unsigned &Index) const;
+
 protected:
   OptTable(ArrayRef<Info> OptionInfos, bool IgnoreCase = false);
 
@@ -120,6 +123,9 @@ class OptTable {
     return getInfo(id).MetaVar;
   }
 
+  /// Support grouped short options. e.g. -ab represents -a -b.
+  void setGroupedShortOptions(bool Value) { GroupedShortOptions = Value; }
+
   /// Find possible value for given flags. This is used for shell
   /// autocompletion.
   ///

diff  --git a/llvm/include/llvm/Option/Option.h b/llvm/include/llvm/Option/Option.h
index 73ee8e0073b8..196cf656355d 100644
--- a/llvm/include/llvm/Option/Option.h
+++ b/llvm/include/llvm/Option/Option.h
@@ -213,14 +213,16 @@ class Option {
   /// Index to the position where argument parsing should resume
   /// (even if the argument is missing values).
   ///
-  /// \param ArgSize The number of bytes taken up by the matched Option prefix
-  ///                and name. This is used to determine where joined values
-  ///                start.
-  Arg *accept(const ArgList &Args, unsigned &Index, unsigned ArgSize) const;
+  /// \p CurArg The argument to be matched. It may be shorter than the
+  /// underlying storage to represent a Joined argument.
+  /// \p GroupedShortOption If true, we are handling the fallback case of
+  /// parsing a prefix of the current argument as a short option.
+  Arg *accept(const ArgList &Args, StringRef CurArg, bool GroupedShortOption,
+              unsigned &Index) const;
 
 private:
-  Arg *acceptInternal(const ArgList &Args, unsigned &Index,
-                      unsigned ArgSize) const;
+  Arg *acceptInternal(const ArgList &Args, StringRef CurArg,
+                      unsigned &Index) const;
 
 public:
   void print(raw_ostream &O) const;

diff  --git a/llvm/lib/Option/OptTable.cpp b/llvm/lib/Option/OptTable.cpp
index 926eb8e0437f..16404d3d8107 100644
--- a/llvm/lib/Option/OptTable.cpp
+++ b/llvm/lib/Option/OptTable.cpp
@@ -330,6 +330,60 @@ bool OptTable::addValues(const char *Option, const char *Values) {
   return false;
 }
 
+// Parse a single argument, return the new argument, and update Index. If
+// GroupedShortOptions is true, -a matches "-abc" and the argument in Args will
+// be updated to "-bc". This overload does not support
+// FlagsToInclude/FlagsToExclude or case insensitive options.
+Arg *OptTable::parseOneArgGrouped(InputArgList &Args, unsigned &Index) const {
+  // Anything that doesn't start with PrefixesUnion is an input, as is '-'
+  // itself.
+  const char *CStr = Args.getArgString(Index);
+  StringRef Str(CStr);
+  if (isInput(PrefixesUnion, Str))
+    return new Arg(getOption(TheInputOptionID), Str, Index++, CStr);
+
+  const Info *End = OptionInfos.data() + OptionInfos.size();
+  StringRef Name = Str.ltrim(PrefixChars);
+  const Info *Start = std::lower_bound(
+      OptionInfos.data() + FirstSearchableIndex, End, Name.data());
+  const Info *Fallback = nullptr;
+  unsigned Prev = Index;
+
+  // Search for the option which matches Str.
+  for (; Start != End; ++Start) {
+    unsigned ArgSize = matchOption(Start, Str, IgnoreCase);
+    if (!ArgSize)
+      continue;
+
+    Option Opt(Start, this);
+    if (Arg *A = Opt.accept(Args, StringRef(Args.getArgString(Index), ArgSize),
+                            false, Index))
+      return A;
+
+    // If Opt is a Flag of length 2 (e.g. "-a"), we know it is a prefix of
+    // the current argument (e.g. "-abc"). Match it as a fallback if no longer
+    // option (e.g. "-ab") exists.
+    if (ArgSize == 2 && Opt.getKind() == Option::FlagClass)
+      Fallback = Start;
+
+    // Otherwise, see if the argument is missing.
+    if (Prev != Index)
+      return nullptr;
+  }
+  if (Fallback) {
+    Option Opt(Fallback, this);
+    if (Arg *A = Opt.accept(Args, Str.substr(0, 2), true, Index)) {
+      if (Str.size() == 2)
+        ++Index;
+      else
+        Args.replaceArgString(Index, Twine('-') + Str.substr(2));
+      return A;
+    }
+  }
+
+  return new Arg(getOption(TheUnknownOptionID), Str, Index++, CStr);
+}
+
 Arg *OptTable::ParseOneArg(const ArgList &Args, unsigned &Index,
                            unsigned FlagsToInclude,
                            unsigned FlagsToExclude) const {
@@ -373,7 +427,8 @@ Arg *OptTable::ParseOneArg(const ArgList &Args, unsigned &Index,
       continue;
 
     // See if this option matches.
-    if (Arg *A = Opt.accept(Args, Index, ArgSize))
+    if (Arg *A = Opt.accept(Args, StringRef(Args.getArgString(Index), ArgSize),
+                            false, Index))
       return A;
 
     // Otherwise, see if this argument was missing values.
@@ -414,8 +469,11 @@ InputArgList OptTable::ParseArgs(ArrayRef<const char *> ArgArr,
     }
 
     unsigned Prev = Index;
-    Arg *A = ParseOneArg(Args, Index, FlagsToInclude, FlagsToExclude);
-    assert(Index > Prev && "Parser failed to consume argument.");
+    Arg *A = GroupedShortOptions
+                 ? parseOneArgGrouped(Args, Index)
+                 : ParseOneArg(Args, Index, FlagsToInclude, FlagsToExclude);
+    assert((Index > Prev || GroupedShortOptions) &&
+           "Parser failed to consume argument.");
 
     // Check for missing argument error.
     if (!A) {

diff  --git a/llvm/lib/Option/Option.cpp b/llvm/lib/Option/Option.cpp
index 9abc9fdce4c7..68d074b2702e 100644
--- a/llvm/lib/Option/Option.cpp
+++ b/llvm/lib/Option/Option.cpp
@@ -106,9 +106,9 @@ bool Option::matches(OptSpecifier Opt) const {
   return false;
 }
 
-Arg *Option::acceptInternal(const ArgList &Args, unsigned &Index,
-                            unsigned ArgSize) const {
-  StringRef Spelling = StringRef(Args.getArgString(Index), ArgSize);
+Arg *Option::acceptInternal(const ArgList &Args, StringRef Spelling,
+                            unsigned &Index) const {
+  size_t ArgSize = Spelling.size();
   switch (getKind()) {
   case FlagClass: {
     if (ArgSize != strlen(Args.getArgString(Index)))
@@ -230,10 +230,11 @@ Arg *Option::acceptInternal(const ArgList &Args, unsigned &Index,
   }
 }
 
-Arg *Option::accept(const ArgList &Args,
-                    unsigned &Index,
-                    unsigned ArgSize) const {
-  std::unique_ptr<Arg> A(acceptInternal(Args, Index, ArgSize));
+Arg *Option::accept(const ArgList &Args, StringRef CurArg,
+                    bool GroupedShortOption, unsigned &Index) const {
+  std::unique_ptr<Arg> A(GroupedShortOption && getKind() == FlagClass
+                             ? new Arg(*this, CurArg, Index)
+                             : acceptInternal(Args, CurArg, Index));
   if (!A)
     return nullptr;
 

diff  --git a/llvm/unittests/Option/OptionParsingTest.cpp b/llvm/unittests/Option/OptionParsingTest.cpp
index e1d7a473ee7f..feeb3b7866cd 100644
--- a/llvm/unittests/Option/OptionParsingTest.cpp
+++ b/llvm/unittests/Option/OptionParsingTest.cpp
@@ -332,3 +332,47 @@ TEST(DISABLED_Option, FindNearestFIXME) {
   EXPECT_EQ(Nearest, "--ermghFoo");
 
 }
+
+TEST(Option, ParseGroupedShortOptions) {
+  TestOptTable T;
+  T.setGroupedShortOptions(true);
+  unsigned MAI, MAC;
+
+  // Grouped short options can be followed by a long Flag (-Joo), or a non-Flag
+  // option (-C=1).
+  const char *Args1[] = {"-AIJ", "-AIJoo", "-AC=1"};
+  InputArgList AL = T.ParseArgs(Args1, MAI, MAC);
+  EXPECT_TRUE(AL.hasArg(OPT_A));
+  EXPECT_TRUE(AL.hasArg(OPT_H));
+  ASSERT_EQ((size_t)2, AL.getAllArgValues(OPT_B).size());
+  EXPECT_EQ("foo", AL.getAllArgValues(OPT_B)[0]);
+  EXPECT_EQ("bar", AL.getAllArgValues(OPT_B)[1]);
+  ASSERT_TRUE(AL.hasArg(OPT_C));
+  EXPECT_EQ("1", AL.getAllArgValues(OPT_C)[0]);
+
+  // Prefer a long option to a short option.
+  const char *Args2[] = {"-AB"};
+  InputArgList AL2 = T.ParseArgs(Args2, MAI, MAC);
+  EXPECT_TRUE(!AL2.hasArg(OPT_A));
+  EXPECT_TRUE(AL2.hasArg(OPT_AB));
+
+  // Short options followed by a long option. We probably should disallow this.
+  const char *Args3[] = {"-AIblorp"};
+  InputArgList AL3 = T.ParseArgs(Args3, MAI, MAC);
+  EXPECT_TRUE(AL3.hasArg(OPT_A));
+  EXPECT_TRUE(AL3.hasArg(OPT_Blorp));
+}
+
+TEST(Option, UnknownOptions) {
+  TestOptTable T;
+  unsigned MAI, MAC;
+  const char *Args[] = {"-u", "--long", "0"};
+  for (int I = 0; I < 2; ++I) {
+    T.setGroupedShortOptions(I != 0);
+    InputArgList AL = T.ParseArgs(Args, MAI, MAC);
+    const std::vector<std::string> Unknown = AL.getAllArgValues(OPT_UNKNOWN);
+    ASSERT_EQ((size_t)2, Unknown.size());
+    EXPECT_EQ("-u", Unknown[0]);
+    EXPECT_EQ("--long", Unknown[1]);
+  }
+}

diff  --git a/llvm/unittests/Option/Opts.td b/llvm/unittests/Option/Opts.td
index 70920a6a76b4..e1ebffd1881f 100644
--- a/llvm/unittests/Option/Opts.td
+++ b/llvm/unittests/Option/Opts.td
@@ -5,6 +5,7 @@ def OptFlag2 : OptionFlag;
 def OptFlag3 : OptionFlag;
 
 def A : Flag<["-"], "A">, HelpText<"The A option">, Flags<[OptFlag1]>;
+def AB : Flag<["-"], "AB">;
 def B : Joined<["-"], "B">, HelpText<"The B option">, MetaVarName<"B">, Flags<[OptFlag2]>;
 def C : Separate<["-"], "C">, HelpText<"The C option">, MetaVarName<"C">, Flags<[OptFlag1]>;
 def SLASH_C : Separate<["/", "-"], "C">, HelpText<"The C option">, MetaVarName<"C">, Flags<[OptFlag3]>;


        


More information about the llvm-commits mailing list