[llvm] r289378 - Refactor FileCheck some to reduce memory allocation and copying. Also

Chandler Carruth via llvm-commits llvm-commits at lists.llvm.org
Sun Dec 11 01:50:06 PST 2016


Author: chandlerc
Date: Sun Dec 11 03:50:05 2016
New Revision: 289378

URL: http://llvm.org/viewvc/llvm-project?rev=289378&view=rev
Log:
Refactor FileCheck some to reduce memory allocation and copying. Also
make some readability improvements.

Both the check file and input file have to be fully buffered to
normalize their whitespace. But previously this would be done in a stack
SmallString and then copied into a heap allocated MemoryBuffer. That
seems pretty wasteful, especially for something like FileCheck where
there are only ever two such entities.

This just rearranges the code so that we can keep the canonicalized
buffers on the stack of the main function, use reasonably large stack
buffers to reduce allocation. A rough estimate seems to show that about
80% of LLVM's .ll and .s files will fit into a 4k buffer, so this should
completely avoid heap allocation for the buffer in those cases. My
system's malloc is fast enough that the allocations don't directly show
up in timings. However, on some very slow test cases, this saves 1% - 2%
by avoiding the copy into the heap allocated buffer.

This also splits out the code which checks the input into a helper much
like the code to build the checks as that made the code much more
readable to me. Nit picks and suggestions welcome here. It has really
exposed a *bunch* of stuff that could be cleaned up though, so I'm
probably going to go and spring clean all of this code as I have more
changes coming to speed things up.

Modified:
    llvm/trunk/utils/FileCheck/FileCheck.cpp

Modified: llvm/trunk/utils/FileCheck/FileCheck.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/utils/FileCheck/FileCheck.cpp?rev=289378&r1=289377&r2=289378&view=diff
==============================================================================
--- llvm/trunk/utils/FileCheck/FileCheck.cpp (original)
+++ llvm/trunk/utils/FileCheck/FileCheck.cpp Sun Dec 11 03:50:05 2016
@@ -654,18 +654,16 @@ struct CheckString {
                   StringMap<StringRef> &VariableTable) const;
 };
 
-/// Canonicalize whitespaces in the input file. Line endings are replaced
-/// with UNIX-style '\n'.
+/// Canonicalize whitespaces in the file. Line endings are replaced with
+/// UNIX-style '\n'.
 ///
 /// \param PreserveHorizontal Don't squash consecutive horizontal whitespace
 /// characters to a single space.
-static std::unique_ptr<MemoryBuffer>
-CanonicalizeInputFile(std::unique_ptr<MemoryBuffer> MB,
-                      bool PreserveHorizontal) {
-  SmallString<128> NewFile;
-  NewFile.reserve(MB->getBufferSize());
+static StringRef CanonicalizeFile(MemoryBuffer &MB, bool PreserveHorizontal,
+                                  SmallVectorImpl<char> &OutputBuffer) {
+  OutputBuffer.reserve(MB.getBufferSize());
 
-  for (const char *Ptr = MB->getBufferStart(), *End = MB->getBufferEnd();
+  for (const char *Ptr = MB.getBufferStart(), *End = MB.getBufferEnd();
        Ptr != End; ++Ptr) {
     // Eliminate trailing dosish \r.
     if (Ptr <= End - 2 && Ptr[0] == '\r' && Ptr[1] == '\n') {
@@ -675,19 +673,20 @@ CanonicalizeInputFile(std::unique_ptr<Me
     // If current char is not a horizontal whitespace or if horizontal
     // whitespace canonicalization is disabled, dump it to output as is.
     if (PreserveHorizontal || (*Ptr != ' ' && *Ptr != '\t')) {
-      NewFile.push_back(*Ptr);
+      OutputBuffer.push_back(*Ptr);
       continue;
     }
 
     // Otherwise, add one space and advance over neighboring space.
-    NewFile.push_back(' ');
+    OutputBuffer.push_back(' ');
     while (Ptr+1 != End &&
            (Ptr[1] == ' ' || Ptr[1] == '\t'))
       ++Ptr;
   }
 
-  return std::unique_ptr<MemoryBuffer>(
-      MemoryBuffer::getMemBufferCopy(NewFile.str(), MB->getBufferIdentifier()));
+  // Add a null byte and then return all but that byte.
+  OutputBuffer.push_back('\0');
+  return StringRef(OutputBuffer.data(), OutputBuffer.size() - 1);
 }
 
 static bool IsPartOfWord(char c) {
@@ -861,26 +860,8 @@ static StringRef FindFirstMatchingPrefix
 /// ReadCheckFile - Read the check file, which specifies the sequence of
 /// expected strings.  The strings are added to the CheckStrings vector.
 /// Returns true in case of an error, false otherwise.
-static bool ReadCheckFile(SourceMgr &SM,
+static bool ReadCheckFile(SourceMgr &SM, StringRef Buffer,
                           std::vector<CheckString> &CheckStrings) {
-  ErrorOr<std::unique_ptr<MemoryBuffer>> FileOrErr =
-      MemoryBuffer::getFileOrSTDIN(CheckFilename);
-  if (std::error_code EC = FileOrErr.getError()) {
-    errs() << "Could not open check file '" << CheckFilename
-           << "': " << EC.message() << '\n';
-    return true;
-  }
-
-  // If we want to canonicalize whitespace, strip excess whitespace from the
-  // buffer containing the CHECK lines. Remove DOS style line endings.
-  std::unique_ptr<MemoryBuffer> F = CanonicalizeInputFile(
-      std::move(FileOrErr.get()), NoCanonicalizeWhiteSpace);
-
-  // Find all instances of CheckPrefix followed by : in the file.
-  StringRef Buffer = F->getBuffer();
-
-  SM.AddNewSourceBuffer(std::move(F), SMLoc());
-
   std::vector<Pattern> ImplicitNegativeChecks;
   for (const auto &PatternString : ImplicitCheckNot) {
     // Create a buffer with fake command line content in order to display the
@@ -1308,61 +1289,18 @@ static void DumpCommandLine(int argc, ch
   errs() << "\n";
 }
 
-int main(int argc, char **argv) {
-  sys::PrintStackTraceOnErrorSignal(argv[0]);
-  PrettyStackTraceProgram X(argc, argv);
-  cl::ParseCommandLineOptions(argc, argv);
-
-  if (!ValidateCheckPrefixes()) {
-    errs() << "Supplied check-prefix is invalid! Prefixes must be unique and "
-              "start with a letter and contain only alphanumeric characters, "
-              "hyphens and underscores\n";
-    return 2;
-  }
-
-  AddCheckPrefixIfNeeded();
-
-  SourceMgr SM;
-
-  // Read the expected strings from the check file.
-  std::vector<CheckString> CheckStrings;
-  if (ReadCheckFile(SM, CheckStrings))
-    return 2;
-
-  // Open the file to check and add it to SourceMgr.
-  ErrorOr<std::unique_ptr<MemoryBuffer>> FileOrErr =
-      MemoryBuffer::getFileOrSTDIN(InputFilename);
-  if (std::error_code EC = FileOrErr.getError()) {
-    errs() << "Could not open input file '" << InputFilename
-           << "': " << EC.message() << '\n';
-    return 2;
-  }
-  std::unique_ptr<MemoryBuffer> &File = FileOrErr.get();
-
-  if (File->getBufferSize() == 0 && !AllowEmptyInput) {
-    errs() << "FileCheck error: '" << InputFilename << "' is empty.\n";
-    DumpCommandLine(argc, argv);
-    return 2;
-  }
-
-  // Remove duplicate spaces in the input file if requested.
-  // Remove DOS style line endings.
-  std::unique_ptr<MemoryBuffer> F =
-      CanonicalizeInputFile(std::move(File), NoCanonicalizeWhiteSpace);
-
-  // Check that we have all of the expected strings, in order, in the input
-  // file.
-  StringRef Buffer = F->getBuffer();
-
-  SM.AddNewSourceBuffer(std::move(F), SMLoc());
+/// Check the input to FileCheck provided in the \p Buffer against the \p
+/// CheckStrings read from the check file.
+///
+/// Returns false if the input fails to satisfy the checks.
+bool CheckInput(SourceMgr &SM, StringRef Buffer,
+                ArrayRef<CheckString> CheckStrings) {
+  bool ChecksFailed = false;
 
   /// VariableTable - This holds all the current filecheck variables.
   StringMap<StringRef> VariableTable;
 
-  bool hasError = false;
-
   unsigned i = 0, j = 0, e = CheckStrings.size();
-
   while (true) {
     StringRef CheckRegion;
     if (j == e) {
@@ -1378,10 +1316,9 @@ int main(int argc, char **argv) {
       size_t MatchLabelLen = 0;
       size_t MatchLabelPos = CheckLabelStr.Check(SM, Buffer, true,
                                                  MatchLabelLen, VariableTable);
-      if (MatchLabelPos == StringRef::npos) {
-        hasError = true;
-        break;
-      }
+      if (MatchLabelPos == StringRef::npos)
+        // Immediately bail of CHECK-LABEL fails, nothing else we can do.
+        return false;
 
       CheckRegion = Buffer.substr(0, MatchLabelPos + MatchLabelLen);
       Buffer = Buffer.substr(MatchLabelPos + MatchLabelLen);
@@ -1398,7 +1335,7 @@ int main(int argc, char **argv) {
                                        VariableTable);
 
       if (MatchPos == StringRef::npos) {
-        hasError = true;
+        ChecksFailed = true;
         i = j;
         break;
       }
@@ -1410,5 +1347,71 @@ int main(int argc, char **argv) {
       break;
   }
 
-  return hasError ? 1 : 0;
+  // Success if no checks failed.
+  return !ChecksFailed;
+}
+
+int main(int argc, char **argv) {
+  sys::PrintStackTraceOnErrorSignal(argv[0]);
+  PrettyStackTraceProgram X(argc, argv);
+  cl::ParseCommandLineOptions(argc, argv);
+
+  if (!ValidateCheckPrefixes()) {
+    errs() << "Supplied check-prefix is invalid! Prefixes must be unique and "
+              "start with a letter and contain only alphanumeric characters, "
+              "hyphens and underscores\n";
+    return 2;
+  }
+
+  AddCheckPrefixIfNeeded();
+
+  SourceMgr SM;
+
+  // Read the expected strings from the check file.
+  ErrorOr<std::unique_ptr<MemoryBuffer>> CheckFileOrErr =
+      MemoryBuffer::getFileOrSTDIN(CheckFilename);
+  if (std::error_code EC = CheckFileOrErr.getError()) {
+    errs() << "Could not open check file '" << CheckFilename
+           << "': " << EC.message() << '\n';
+    return 2;
+  }
+  MemoryBuffer &CheckFile = *CheckFileOrErr.get();
+
+  SmallString<4096> CheckFileBuffer;
+  StringRef CheckFileText =
+      CanonicalizeFile(CheckFile, NoCanonicalizeWhiteSpace, CheckFileBuffer);
+
+  SM.AddNewSourceBuffer(MemoryBuffer::getMemBuffer(
+                            CheckFileText, CheckFile.getBufferIdentifier()),
+                        SMLoc());
+
+  std::vector<CheckString> CheckStrings;
+  if (ReadCheckFile(SM, CheckFileText, CheckStrings))
+    return 2;
+
+
+  // Open the file to check and add it to SourceMgr.
+  ErrorOr<std::unique_ptr<MemoryBuffer>> InputFileOrErr =
+      MemoryBuffer::getFileOrSTDIN(InputFilename);
+  if (std::error_code EC = InputFileOrErr.getError()) {
+    errs() << "Could not open input file '" << InputFilename
+           << "': " << EC.message() << '\n';
+    return 2;
+  }
+  MemoryBuffer &InputFile = *InputFileOrErr.get();
+
+  if (InputFile.getBufferSize() == 0 && !AllowEmptyInput) {
+    errs() << "FileCheck error: '" << InputFilename << "' is empty.\n";
+    DumpCommandLine(argc, argv);
+    return 2;
+  }
+
+  SmallString<4096> InputFileBuffer;
+  StringRef InputFileText =
+      CanonicalizeFile(InputFile, NoCanonicalizeWhiteSpace, InputFileBuffer);
+
+  SM.AddNewSourceBuffer(
+      MemoryBuffer::getMemBuffer(InputFileText, InputFile.getBufferIdentifier()), SMLoc());
+
+  return CheckInput(SM, InputFileText, CheckStrings) ? EXIT_SUCCESS : 1;
 }




More information about the llvm-commits mailing list