[llvm] r271949 - Retry "[llvm-profdata] Add option to ingest filepaths from a file"

Vedant Kumar via llvm-commits llvm-commits at lists.llvm.org
Mon Jun 6 15:39:23 PDT 2016


Author: vedantk
Date: Mon Jun  6 17:39:22 2016
New Revision: 271949

URL: http://llvm.org/viewvc/llvm-project?rev=271949&view=rev
Log:
Retry "[llvm-profdata] Add option to ingest filepaths from a file"

Changes since the initial commit:
- Normalize file paths read from the file to prevent Windows path
  separators from escaping parts of the path.
- Since we need to store the normalized file paths in WeightedFile,
  don't do tricky things to keep the source MemoryBuffer alive.

Differential Revision: http://reviews.llvm.org/D20980

Added:
    llvm/trunk/test/tools/llvm-profdata/input-filenames.test
Modified:
    llvm/trunk/docs/CommandGuide/llvm-profdata.rst
    llvm/trunk/tools/llvm-profdata/llvm-profdata.cpp

Modified: llvm/trunk/docs/CommandGuide/llvm-profdata.rst
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/docs/CommandGuide/llvm-profdata.rst?rev=271949&r1=271948&r2=271949&view=diff
==============================================================================
--- llvm/trunk/docs/CommandGuide/llvm-profdata.rst (original)
+++ llvm/trunk/docs/CommandGuide/llvm-profdata.rst Mon Jun  6 17:39:22 2016
@@ -44,6 +44,9 @@ interpreted as relatively more important
 nature of the training runs it may be useful to adjust the weight given to each
 input file by using the ``-weighted-input`` option.
 
+Profiles passed in via ``-weighted-input``, ``-input-files``, or via positional
+arguments are processed once for each time they are seen.
+
 
 OPTIONS
 ^^^^^^^
@@ -65,6 +68,12 @@ OPTIONS
  Input files specified without using this option are assigned a default
  weight of 1. Examples are shown below.
 
+.. option:: -input-files=path, -f=path
+
+  Specify a file which contains a list of files to merge. The entries in this
+  file are newline-separated. Lines starting with '#' are skipped. Entries may
+  be of the form <filename> or <weight>,<filename>.
+
 .. option:: -instr (default)
 
  Specify that the input profile is an instrumentation-based profile.

Added: llvm/trunk/test/tools/llvm-profdata/input-filenames.test
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/tools/llvm-profdata/input-filenames.test?rev=271949&view=auto
==============================================================================
--- llvm/trunk/test/tools/llvm-profdata/input-filenames.test (added)
+++ llvm/trunk/test/tools/llvm-profdata/input-filenames.test Mon Jun  6 17:39:22 2016
@@ -0,0 +1,16 @@
+RUN: printf '# comment 1\n' > %t
+RUN: printf ' # comment 2\n' >> %t
+
+RUN: printf 'bar\n' >> %t
+RUN: printf ' baz\n' >> %t
+
+RUN: printf '2,%t.weighted\n' >> %t
+RUN: printf ' ' > %t.weighted
+
+RUN: llvm-profdata merge -input-files %t -dump-input-file-list foo -o /dev/null | FileCheck %s
+RUN: llvm-profdata merge -f %t -dump-input-file-list foo -o /dev/null | FileCheck %s
+
+CHECK: 1,foo
+CHECK-NEXT: 1,bar
+CHECK-NEXT: 1,baz
+CHECK-NEXT: 2,{{.*}}.weighted

Modified: llvm/trunk/tools/llvm-profdata/llvm-profdata.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/tools/llvm-profdata/llvm-profdata.cpp?rev=271949&r1=271948&r2=271949&view=diff
==============================================================================
--- llvm/trunk/tools/llvm-profdata/llvm-profdata.cpp (original)
+++ llvm/trunk/tools/llvm-profdata/llvm-profdata.cpp Mon Jun  6 17:39:22 2016
@@ -108,12 +108,12 @@ static void handleMergeWriterError(Error
 }
 
 struct WeightedFile {
-  StringRef Filename;
+  std::string Filename;
   uint64_t Weight;
 
   WeightedFile() {}
 
-  WeightedFile(StringRef F, uint64_t W) : Filename{F}, Weight{W} {}
+  WeightedFile(std::string F, uint64_t W) : Filename{F}, Weight{W} {}
 };
 typedef SmallVector<WeightedFile, 5> WeightedFileVector;
 
@@ -209,18 +209,47 @@ static void mergeSampleProfile(const Wei
 }
 
 static WeightedFile parseWeightedFile(const StringRef &WeightedFilename) {
-  StringRef WeightStr, FileName;
-  std::tie(WeightStr, FileName) = WeightedFilename.split(',');
+  StringRef WeightStr, FilenameStr;
+  std::tie(WeightStr, FilenameStr) = WeightedFilename.split(',');
 
   uint64_t Weight;
   if (WeightStr.getAsInteger(10, Weight) || Weight < 1)
     exitWithError("Input weight must be a positive integer.");
 
-  if (!sys::fs::exists(FileName))
+  SmallString<256> CanonicalFilename;
+  sys::path::native(FilenameStr, CanonicalFilename);
+
+  if (!sys::fs::exists(CanonicalFilename))
     exitWithErrorCode(make_error_code(errc::no_such_file_or_directory),
-                      FileName);
+                      CanonicalFilename);
+
+  return WeightedFile(StringRef(CanonicalFilename).str(), Weight);
+}
 
-  return WeightedFile(FileName, Weight);
+static void parseInputFilenamesFile(const StringRef &InputFilenamesFile,
+                                    WeightedFileVector &WFV) {
+  if (InputFilenamesFile == "")
+    return;
+
+  auto BufOrError = MemoryBuffer::getFileOrSTDIN(InputFilenamesFile);
+  if (!BufOrError)
+    exitWithErrorCode(BufOrError.getError(), InputFilenamesFile);
+
+  auto Buffer = std::move(*BufOrError);
+  StringRef Data = Buffer->getBuffer();
+  SmallVector<StringRef, 8> Entries;
+  Data.split(Entries, '\n', /*MaxSplit=*/-1, /*KeepEmpty=*/false);
+  for (const StringRef &FileWeightEntry : Entries) {
+    StringRef SanitizedEntry = FileWeightEntry.trim(" \t\v\f\r");
+    // Skip comments.
+    if (SanitizedEntry.startswith("#"))
+      continue;
+    // If there's no comma, it's an unweighted profile.
+    else if (SanitizedEntry.rfind(',') == StringRef::npos)
+      WFV.emplace_back(SanitizedEntry, 1);
+    else
+      WFV.emplace_back(parseWeightedFile(SanitizedEntry));
+  }
 }
 
 static int merge_main(int argc, const char *argv[]) {
@@ -228,6 +257,15 @@ static int merge_main(int argc, const ch
                                        cl::desc("<filename...>"));
   cl::list<std::string> WeightedInputFilenames("weighted-input",
                                                cl::desc("<weight>,<filename>"));
+  cl::opt<std::string> InputFilenamesFile(
+      "input-files", cl::init(""),
+      cl::desc("Path to file containing newline-separated "
+               "[<weight>,]<filename> entries"));
+  cl::alias InputFilenamesFileA("f", cl::desc("Alias for --input-files"),
+                                cl::aliasopt(InputFilenamesFile));
+  cl::opt<bool> DumpInputFileList(
+      "dump-input-file-list", cl::init(false), cl::Hidden,
+      cl::desc("Dump the list of input files and their weights, then exit"));
   cl::opt<std::string> OutputFilename("output", cl::value_desc("output"),
                                       cl::init("-"), cl::Required,
                                       cl::desc("Output file"));
@@ -249,15 +287,22 @@ static int merge_main(int argc, const ch
 
   cl::ParseCommandLineOptions(argc, argv, "LLVM profile data merger\n");
 
-  if (InputFilenames.empty() && WeightedInputFilenames.empty())
-    exitWithError("No input files specified. See " +
-                  sys::path::filename(argv[0]) + " -help");
-
   WeightedFileVector WeightedInputs;
   for (StringRef Filename : InputFilenames)
-    WeightedInputs.push_back(WeightedFile(Filename, 1));
+    WeightedInputs.emplace_back(Filename, 1);
   for (StringRef WeightedFilename : WeightedInputFilenames)
-    WeightedInputs.push_back(parseWeightedFile(WeightedFilename));
+    WeightedInputs.emplace_back(parseWeightedFile(WeightedFilename));
+  parseInputFilenamesFile(InputFilenamesFile, WeightedInputs);
+
+  if (WeightedInputs.empty())
+    exitWithError("No input files specified. See " +
+                  sys::path::filename(argv[0]) + " -help");
+
+  if (DumpInputFileList) {
+    for (auto &WF : WeightedInputs)
+      outs() << WF.Weight << "," << WF.Filename << "\n";
+    return 0;
+  }
 
   if (ProfileKind == instr)
     mergeInstrProfile(WeightedInputs, OutputFilename, OutputFormat,




More information about the llvm-commits mailing list