[llvm] r359447 - FileCheck [2/12]: Stricter parsing of -D option

Thomas Preud'homme via llvm-commits llvm-commits at lists.llvm.org
Mon Apr 29 06:32:36 PDT 2019


Author: thopre
Date: Mon Apr 29 06:32:36 2019
New Revision: 359447

URL: http://llvm.org/viewvc/llvm-project?rev=359447&view=rev
Log:
FileCheck [2/12]: Stricter parsing of -D option

Summary:
This patch is part of a patch series to add support for FileCheck
numeric expressions. This specific patch gives earlier and better
diagnostics for the -D option.

Prior to this change, parsing of -D option was very loose: it assumed
that there is an equal sign (which to be fair is now checked by the
FileCheck executable) and that the part on the left of the equal sign
was a valid variable name. This commit adds logic to ensure that this
is the case and gives diagnostic when it is not, making it clear that
the issue came from a command-line option error. This is achieved by
sharing the variable parsing code into a new function ParseVariable.

Copyright:
    - Linaro (changes up to diff 183612 of revision D55940)
    - GraphCore (changes in later versions of revision D55940 and
                 in new revision created off D55940)

Reviewers: jhenderson, chandlerc, jdenny, probinson, grimar, arichardson, rnk

Subscribers: hiraditya, llvm-commits, probinson, dblaikie, grimar, arichardson, tra, rnk, kristina, hfinkel, rogfer01, JonChesterfield

Tags: #llvm

Differential Revision: https://reviews.llvm.org/D60382

Modified:
    llvm/trunk/include/llvm/Support/FileCheck.h
    llvm/trunk/lib/Support/FileCheck.cpp
    llvm/trunk/test/FileCheck/defines.txt
    llvm/trunk/unittests/Support/FileCheckTest.cpp

Modified: llvm/trunk/include/llvm/Support/FileCheck.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/Support/FileCheck.h?rev=359447&r1=359446&r2=359447&view=diff
==============================================================================
--- llvm/trunk/include/llvm/Support/FileCheck.h (original)
+++ llvm/trunk/include/llvm/Support/FileCheck.h Mon Apr 29 06:32:36 2019
@@ -102,8 +102,10 @@ public:
   llvm::Optional<StringRef> getVarValue(StringRef VarName);
 
   /// Define variables from definitions given on the command line passed as a
-  /// vector of VAR=VAL strings in \p CmdlineDefines.
-  void defineCmdlineVariables(std::vector<std::string> &CmdlineDefines);
+  /// vector of VAR=VAL strings in \p CmdlineDefines. Report any error to \p SM
+  /// and return whether an error occured.
+  bool defineCmdlineVariables(std::vector<std::string> &CmdlineDefines,
+                              SourceMgr &SM);
 
   /// Undefine local variables (variables whose name does not start with a '$'
   /// sign), i.e. remove them from GlobalVariableTable.
@@ -153,6 +155,13 @@ public:
   /// Returns the pointer to the global state for all patterns in this
   /// FileCheck instance.
   FileCheckPatternContext *getContext() const { return Context; }
+  /// Return whether \p is a valid first character for a variable name.
+  static bool isValidVarNameStart(char C);
+  /// Verify that the string at the start of \p Str is a well formed variable.
+  /// Return false if it is and set \p IsPseudo to indicate if it is a pseudo
+  /// variable and \p TrailIdx to the position of the last character that is
+  /// part of the variable name. Otherwise, only return true.
+  static bool parseVariable(StringRef Str, bool &IsPseudo, unsigned &TrailIdx);
   bool ParsePattern(StringRef PatternStr, StringRef Prefix, SourceMgr &SM,
                     unsigned LineNumber, const FileCheckRequest &Req);
   size_t match(StringRef Buffer, size_t &MatchLen) const;

Modified: llvm/trunk/lib/Support/FileCheck.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Support/FileCheck.cpp?rev=359447&r1=359446&r2=359447&view=diff
==============================================================================
--- llvm/trunk/lib/Support/FileCheck.cpp (original)
+++ llvm/trunk/lib/Support/FileCheck.cpp Mon Apr 29 06:32:36 2019
@@ -24,6 +24,37 @@
 
 using namespace llvm;
 
+bool FileCheckPattern::isValidVarNameStart(char C) {
+  return C == '_' || isalpha(C);
+}
+
+bool FileCheckPattern::parseVariable(StringRef Str, bool &IsPseudo,
+                                     unsigned &TrailIdx) {
+  if (Str.empty())
+    return true;
+
+  bool ParsedOneChar = false;
+  unsigned I = 0;
+  IsPseudo = Str[0] == '@';
+
+  // Global vars start with '$'.
+  if (Str[0] == '$' || IsPseudo)
+    ++I;
+
+  for (unsigned E = Str.size(); I != E; ++I) {
+    if (!ParsedOneChar && !isValidVarNameStart(Str[I]))
+      return true;
+
+    // Variable names are composed of alphanumeric characters and underscores.
+    if (Str[I] != '_' && !isalnum(Str[I]))
+      break;
+    ParsedOneChar = true;
+  }
+
+  TrailIdx = I;
+  return false;
+}
+
 /// Parses the given string into the Pattern.
 ///
 /// \p Prefix provides which prefix is being matched, \p SM provides the
@@ -117,9 +148,10 @@ bool FileCheckPattern::ParsePattern(Stri
     // itself must be of the form "[a-zA-Z_][0-9a-zA-Z_]*", otherwise we reject
     // it.  This is to catch some common errors.
     if (PatternStr.startswith("[[")) {
+      StringRef MatchStr = PatternStr.substr(2);
       // Find the closing bracket pair ending the match.  End is going to be an
       // offset relative to the beginning of the match string.
-      size_t End = FindRegexVarEnd(PatternStr.substr(2), SM);
+      size_t End = FindRegexVarEnd(MatchStr, SM);
 
       if (End == StringRef::npos) {
         SM.PrintMessage(SMLoc::getFromPointer(PatternStr.data()),
@@ -128,55 +160,44 @@ bool FileCheckPattern::ParsePattern(Stri
         return true;
       }
 
-      StringRef MatchStr = PatternStr.substr(2, End);
+      MatchStr = MatchStr.substr(0, End);
       PatternStr = PatternStr.substr(End + 4);
 
-      // Get the regex name (e.g. "foo").
-      size_t NameEnd = MatchStr.find(':');
-      StringRef Name = MatchStr.substr(0, NameEnd);
-
-      if (Name.empty()) {
-        SM.PrintMessage(SMLoc::getFromPointer(Name.data()), SourceMgr::DK_Error,
-                        "invalid name in named regex: empty name");
+      // Get the regex name (e.g. "foo") and verify it is well formed.
+      bool IsPseudo;
+      unsigned TrailIdx;
+      if (parseVariable(MatchStr, IsPseudo, TrailIdx)) {
+        SM.PrintMessage(SMLoc::getFromPointer(MatchStr.data()),
+                        SourceMgr::DK_Error, "invalid name in named regex");
+        return true;
+      }
+
+      StringRef Name = MatchStr.substr(0, TrailIdx);
+      StringRef Trailer = MatchStr.substr(TrailIdx);
+      bool IsVarDef = (Trailer.find(":") != StringRef::npos);
+
+      if (IsVarDef && (IsPseudo || !Trailer.consume_front(":"))) {
+        SM.PrintMessage(SMLoc::getFromPointer(MatchStr.data()),
+                        SourceMgr::DK_Error,
+                        "invalid name in named regex definition");
         return true;
       }
 
       // Verify that the name/expression is well formed. FileCheck currently
       // supports @LINE, @LINE+number, @LINE-number expressions. The check here
-      // is relaxed, more strict check is performed in \c EvaluateExpression.
-      bool IsExpression = false;
-      for (unsigned i = 0, e = Name.size(); i != e; ++i) {
-        if (i == 0) {
-          if (Name[i] == '$')  // Global vars start with '$'
-            continue;
-          if (Name[i] == '@') {
-            if (NameEnd != StringRef::npos) {
-              SM.PrintMessage(SMLoc::getFromPointer(Name.data()),
-                              SourceMgr::DK_Error,
-                              "invalid name in named regex definition");
-              return true;
-            }
-            IsExpression = true;
-            continue;
+      // is relaxed. A stricter check is performed in \c EvaluateExpression.
+      if (IsPseudo) {
+        for (unsigned I = 0, E = Trailer.size(); I != E; ++I) {
+          if (!isalnum(Trailer[I]) && Trailer[I] != '+' && Trailer[I] != '-') {
+            SM.PrintMessage(SMLoc::getFromPointer(Name.data() + I),
+                            SourceMgr::DK_Error, "invalid name in named regex");
+            return true;
           }
         }
-        if (Name[i] != '_' && !isalnum(Name[i]) &&
-            (!IsExpression || (Name[i] != '+' && Name[i] != '-'))) {
-          SM.PrintMessage(SMLoc::getFromPointer(Name.data() + i),
-                          SourceMgr::DK_Error, "invalid name in named regex");
-          return true;
-        }
-      }
-
-      // Name can't start with a digit.
-      if (isdigit(static_cast<unsigned char>(Name[0]))) {
-        SM.PrintMessage(SMLoc::getFromPointer(Name.data()), SourceMgr::DK_Error,
-                        "invalid name in named regex");
-        return true;
       }
 
       // Handle [[foo]].
-      if (NameEnd == StringRef::npos) {
+      if (!IsVarDef) {
         // Handle variables that were defined earlier on the same line by
         // emitting a backreference.
         if (VariableDefs.find(Name) != VariableDefs.end()) {
@@ -189,7 +210,7 @@ bool FileCheckPattern::ParsePattern(Stri
           }
           AddBackrefToRegEx(VarParenNum);
         } else {
-          VariableUses.push_back(std::make_pair(Name, RegExStr.size()));
+          VariableUses.push_back(std::make_pair(MatchStr, RegExStr.size()));
         }
         continue;
       }
@@ -199,7 +220,7 @@ bool FileCheckPattern::ParsePattern(Stri
       RegExStr += '(';
       ++CurParen;
 
-      if (AddRegExToRegEx(MatchStr.substr(NameEnd + 1), CurParen, SM))
+      if (AddRegExToRegEx(Trailer, CurParen, SM))
         return true;
 
       RegExStr += ')';
@@ -755,7 +776,8 @@ FindFirstMatchingPrefix(Regex &PrefixRE,
 bool llvm::FileCheck::ReadCheckFile(
     SourceMgr &SM, StringRef Buffer, Regex &PrefixRE,
     std::vector<FileCheckString> &CheckStrings) {
-  PatternContext.defineCmdlineVariables(Req.GlobalDefines);
+  if (PatternContext.defineCmdlineVariables(Req.GlobalDefines, SM))
+    return true;
 
   std::vector<FileCheckPattern> ImplicitNegativeChecks;
   for (const auto &PatternString : Req.ImplicitCheckNot) {
@@ -1374,12 +1396,59 @@ Regex llvm::FileCheck::buildCheckPrefixR
   return Regex(PrefixRegexStr);
 }
 
-void FileCheckPatternContext::defineCmdlineVariables(
-    std::vector<std::string> &CmdlineDefines) {
+bool FileCheckPatternContext::defineCmdlineVariables(
+    std::vector<std::string> &CmdlineDefines, SourceMgr &SM) {
   assert(GlobalVariableTable.empty() &&
          "Overriding defined variable with command-line variable definitions");
+
+  if (CmdlineDefines.empty())
+    return false;
+
+  // Create a string representing the vector of command-line definitions. Each
+  // definition is on its own line and prefixed with a definition number to
+  // clarify which definition a given diagnostic corresponds to.
+  unsigned I = 0;
+  bool ErrorFound = false;
+  std::string CmdlineDefsDiag;
+  StringRef Prefix1 = "Global define #";
+  StringRef Prefix2 = ": ";
   for (StringRef CmdlineDef : CmdlineDefines)
-    GlobalVariableTable.insert(CmdlineDef.split('='));
+    CmdlineDefsDiag +=
+        (Prefix1 + Twine(++I) + Prefix2 + CmdlineDef + "\n").str();
+
+  std::unique_ptr<MemoryBuffer> CmdLineDefsDiagBuffer =
+      MemoryBuffer::getMemBufferCopy(CmdlineDefsDiag, "Global defines");
+  StringRef CmdlineDefsDiagRef = CmdLineDefsDiagBuffer->getBuffer();
+  SM.AddNewSourceBuffer(std::move(CmdLineDefsDiagBuffer), SMLoc());
+
+  SmallVector<StringRef, 4> CmdlineDefsDiagVec;
+  CmdlineDefsDiagRef.split(CmdlineDefsDiagVec, '\n', -1 /*MaxSplit*/,
+                           false /*KeepEmpty*/);
+  for (StringRef CmdlineDefDiag : CmdlineDefsDiagVec) {
+    unsigned NameStart = CmdlineDefDiag.find(Prefix2) + Prefix2.size();
+    if (CmdlineDefDiag.substr(NameStart).find('=') == StringRef::npos) {
+      SM.PrintMessage(SMLoc::getFromPointer(CmdlineDefDiag.data()),
+                      SourceMgr::DK_Error,
+                      "Missing equal sign in global definition");
+      ErrorFound = true;
+      continue;
+    }
+    std::pair<StringRef, StringRef> CmdlineNameVal =
+        CmdlineDefDiag.substr(NameStart).split('=');
+    StringRef Name = CmdlineNameVal.first;
+    bool IsPseudo;
+    unsigned TrailIdx;
+    if (FileCheckPattern::parseVariable(Name, IsPseudo, TrailIdx) || IsPseudo ||
+        TrailIdx != Name.size() || Name.empty()) {
+      SM.PrintMessage(SMLoc::getFromPointer(Name.data()), SourceMgr::DK_Error,
+                      "invalid name for variable definition '" + Name + "'");
+      ErrorFound = true;
+      continue;
+    }
+    GlobalVariableTable.insert(CmdlineNameVal);
+  }
+
+  return ErrorFound;
 }
 
 void FileCheckPatternContext::clearLocalVars() {

Modified: llvm/trunk/test/FileCheck/defines.txt
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/FileCheck/defines.txt?rev=359447&r1=359446&r2=359447&view=diff
==============================================================================
--- llvm/trunk/test/FileCheck/defines.txt (original)
+++ llvm/trunk/test/FileCheck/defines.txt Mon Apr 29 06:32:36 2019
@@ -1,6 +1,5 @@
 ; RUN: FileCheck -DVALUE=10 -input-file %s %s
 ; RUN: not FileCheck -DVALUE=20 -input-file %s %s 2>&1 | FileCheck %s -check-prefix ERRMSG
-;
 ; RUN: not FileCheck -DVALUE=10 -check-prefix NOT -input-file %s %s 2>&1 | FileCheck %s -check-prefix NOT-ERRMSG
 ; RUN: FileCheck -DVALUE=20 -check-prefix NOT -input-file %s %s
 ; RUN: not FileCheck -DVALUE10 -input-file %s %s 2>&1 | FileCheck %s -check-prefix ERRCLIEQ1
@@ -9,6 +8,9 @@
 ; RUN: not FileCheck -D= -input-file %s %s 2>&1 | FileCheck %s -check-prefix ERRCLIVAR2
 ; RUN: FileCheck -DVALUE= -check-prefix EMPTY -input-file %s %s 2>&1
 
+; RUN: not FileCheck -D10VALUE=10 -input-file %s %s 2>&1 | FileCheck %s --strict-whitespace -check-prefix ERRCLIFMT
+; RUN: not FileCheck -D at VALUE=10 -input-file %s %s 2>&1 | FileCheck %s --strict-whitespace -check-prefix ERRCLIPSEUDO
+; RUN: not FileCheck -D'VALUE + 2=10' -input-file %s %s 2>&1 | FileCheck %s --strict-whitespace -check-prefix ERRCLITRAIL
 Value = 10
 ; CHECK: Value = [[VALUE]]
 ; NOT-NOT: Value = [[VALUE]]
@@ -32,3 +34,15 @@ Value = 10
 
 Empty value = @@
 ; EMPTY: Empty value = @[[VALUE]]@
+
+; ERRCLIFMT: Global defines:1:19: error: invalid name for variable definition '10VALUE'
+; ERRCLIFMT-NEXT: Global define #1: 10VALUE=10
+; ERRCLIFMT-NEXT: {{^                  \^$}}
+
+; ERRCLIPSEUDO: Global defines:1:19: error: invalid name for variable definition '@VALUE'
+; ERRCLIPSEUDO-NEXT: Global define #1: @VALUE=10
+; ERRCLIPSEUDO-NEXT: {{^                  \^$}}
+
+; ERRCLITRAIL: Global defines:1:19: error: invalid name for variable definition 'VALUE + 2'
+; ERRCLITRAIL-NEXT: Global define #1: VALUE + 2=10
+; ERRCLITRAIL-NEXT: {{^                  \^$}}

Modified: llvm/trunk/unittests/Support/FileCheckTest.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/unittests/Support/FileCheckTest.cpp?rev=359447&r1=359446&r2=359447&view=diff
==============================================================================
--- llvm/trunk/unittests/Support/FileCheckTest.cpp (original)
+++ llvm/trunk/unittests/Support/FileCheckTest.cpp Mon Apr 29 06:32:36 2019
@@ -14,31 +14,132 @@ namespace {
 
 class FileCheckTest : public ::testing::Test {};
 
+TEST_F(FileCheckTest, ValidVarNameStart) {
+  EXPECT_TRUE(FileCheckPattern::isValidVarNameStart('a'));
+  EXPECT_TRUE(FileCheckPattern::isValidVarNameStart('G'));
+  EXPECT_TRUE(FileCheckPattern::isValidVarNameStart('_'));
+  EXPECT_FALSE(FileCheckPattern::isValidVarNameStart('2'));
+  EXPECT_FALSE(FileCheckPattern::isValidVarNameStart('$'));
+  EXPECT_FALSE(FileCheckPattern::isValidVarNameStart('@'));
+  EXPECT_FALSE(FileCheckPattern::isValidVarNameStart('+'));
+  EXPECT_FALSE(FileCheckPattern::isValidVarNameStart('-'));
+  EXPECT_FALSE(FileCheckPattern::isValidVarNameStart(':'));
+}
+
+TEST_F(FileCheckTest, ParseVar) {
+  StringRef VarName = "GoodVar42";
+  bool IsPseudo = true;
+  unsigned TrailIdx = 0;
+  EXPECT_FALSE(FileCheckPattern::parseVariable(VarName, IsPseudo, TrailIdx));
+  EXPECT_FALSE(IsPseudo);
+  EXPECT_EQ(TrailIdx, VarName.size());
+
+  VarName = "$GoodGlobalVar";
+  IsPseudo = true;
+  TrailIdx = 0;
+  EXPECT_FALSE(FileCheckPattern::parseVariable(VarName, IsPseudo, TrailIdx));
+  EXPECT_FALSE(IsPseudo);
+  EXPECT_EQ(TrailIdx, VarName.size());
+
+  VarName = "@GoodPseudoVar";
+  IsPseudo = true;
+  TrailIdx = 0;
+  EXPECT_FALSE(FileCheckPattern::parseVariable(VarName, IsPseudo, TrailIdx));
+  EXPECT_TRUE(IsPseudo);
+  EXPECT_EQ(TrailIdx, VarName.size());
+
+  VarName = "42BadVar";
+  EXPECT_TRUE(FileCheckPattern::parseVariable(VarName, IsPseudo, TrailIdx));
+
+  VarName = "$@";
+  EXPECT_TRUE(FileCheckPattern::parseVariable(VarName, IsPseudo, TrailIdx));
+
+  VarName = "B at dVar";
+  IsPseudo = true;
+  TrailIdx = 0;
+  EXPECT_FALSE(FileCheckPattern::parseVariable(VarName, IsPseudo, TrailIdx));
+  EXPECT_FALSE(IsPseudo);
+  EXPECT_EQ(TrailIdx, 1U);
+
+  VarName = "B$dVar";
+  IsPseudo = true;
+  TrailIdx = 0;
+  EXPECT_FALSE(FileCheckPattern::parseVariable(VarName, IsPseudo, TrailIdx));
+  EXPECT_FALSE(IsPseudo);
+  EXPECT_EQ(TrailIdx, 1U);
+
+  VarName = "BadVar+";
+  IsPseudo = true;
+  TrailIdx = 0;
+  EXPECT_FALSE(FileCheckPattern::parseVariable(VarName, IsPseudo, TrailIdx));
+  EXPECT_FALSE(IsPseudo);
+  EXPECT_EQ(TrailIdx, VarName.size() - 1);
+
+  VarName = "BadVar-";
+  IsPseudo = true;
+  TrailIdx = 0;
+  EXPECT_FALSE(FileCheckPattern::parseVariable(VarName, IsPseudo, TrailIdx));
+  EXPECT_FALSE(IsPseudo);
+  EXPECT_EQ(TrailIdx, VarName.size() - 1);
+
+  VarName = "BadVar:";
+  IsPseudo = true;
+  TrailIdx = 0;
+  EXPECT_FALSE(FileCheckPattern::parseVariable(VarName, IsPseudo, TrailIdx));
+  EXPECT_FALSE(IsPseudo);
+  EXPECT_EQ(TrailIdx, VarName.size() - 1);
+}
+
 TEST_F(FileCheckTest, FileCheckContext) {
-  FileCheckPatternContext Cxt;
+  FileCheckPatternContext Cxt = FileCheckPatternContext();
   std::vector<std::string> GlobalDefines;
+  SourceMgr SM;
+
+  // Missing equal sign
+  GlobalDefines.emplace_back(std::string("LocalVar"));
+  EXPECT_TRUE(Cxt.defineCmdlineVariables(GlobalDefines, SM));
+
+  // Empty variable
+  GlobalDefines.clear();
+  GlobalDefines.emplace_back(std::string("=18"));
+  EXPECT_TRUE(Cxt.defineCmdlineVariables(GlobalDefines, SM));
+
+  // Invalid variable
+  GlobalDefines.clear();
+  GlobalDefines.emplace_back(std::string("18LocalVar=18"));
+  EXPECT_TRUE(Cxt.defineCmdlineVariables(GlobalDefines, SM));
 
-  // Define local and global variables from command-line.
+  // Define local variables from command-line.
+  GlobalDefines.clear();
   GlobalDefines.emplace_back(std::string("LocalVar=FOO"));
-  Cxt.defineCmdlineVariables(GlobalDefines);
+  GlobalDefines.emplace_back(std::string("EmptyVar="));
+  bool GotError = Cxt.defineCmdlineVariables(GlobalDefines, SM);
+  EXPECT_FALSE(GotError);
 
   // Check defined variables are present and undefined is absent.
   StringRef LocalVarStr = "LocalVar";
+  StringRef EmptyVarStr = "EmptyVar";
   StringRef UnknownVarStr = "UnknownVar";
   llvm::Optional<StringRef> LocalVar = Cxt.getVarValue(LocalVarStr);
+  llvm::Optional<StringRef> EmptyVar = Cxt.getVarValue(EmptyVarStr);
   llvm::Optional<StringRef> UnknownVar = Cxt.getVarValue(UnknownVarStr);
   EXPECT_TRUE(LocalVar);
   EXPECT_EQ(*LocalVar, "FOO");
+  EXPECT_TRUE(EmptyVar);
+  EXPECT_EQ(*EmptyVar, "");
   EXPECT_FALSE(UnknownVar);
 
   // Clear local variables and check they become absent.
   Cxt.clearLocalVars();
   LocalVar = Cxt.getVarValue(LocalVarStr);
   EXPECT_FALSE(LocalVar);
+  EmptyVar = Cxt.getVarValue(EmptyVarStr);
+  EXPECT_FALSE(EmptyVar);
 
   // Redefine global variables and check variables are defined again.
   GlobalDefines.emplace_back(std::string("$GlobalVar=BAR"));
-  Cxt.defineCmdlineVariables(GlobalDefines);
+  GotError = Cxt.defineCmdlineVariables(GlobalDefines, SM);
+  EXPECT_FALSE(GotError);
   StringRef GlobalVarStr = "$GlobalVar";
   llvm::Optional<StringRef> GlobalVar = Cxt.getVarValue(GlobalVarStr);
   EXPECT_TRUE(GlobalVar);




More information about the llvm-commits mailing list