[lld] bcea3a7 - Add test utility 'split-file'

Fangrui Song via llvm-commits llvm-commits at lists.llvm.org
Mon Aug 3 20:43:37 PDT 2020


Author: Fangrui Song
Date: 2020-08-03T20:42:09-07:00
New Revision: bcea3a7a288e0b5ac977f90c46e4eef7946467e7

URL: https://github.com/llvm/llvm-project/commit/bcea3a7a288e0b5ac977f90c46e4eef7946467e7
DIFF: https://github.com/llvm/llvm-project/commit/bcea3a7a288e0b5ac977f90c46e4eef7946467e7.diff

LOG: Add test utility 'split-file'

See https://lists.llvm.org/pipermail/llvm-dev/2020-July/143373.html
"[llvm-dev] Multiple documents in one test file" for some discussions.

This patch has explored several alternatives. The current semantics are similar to
what @dblaikie proposed.
`split-file filename output` splits the input file into multiple parts separated by
regex `^(.|//)--- filename` and write each part to the file `output/filename`
(`filename` can include path separators).

Use case A (organizing input of different formats (e.g. linker
script+assembly) in one file).

```
# RUN: split-file %s %t
# RUN: llvm-mc %t/asm -o %t.o
# RUN: ld.lld -T %t/lds %t.o -o %t
This is sometimes better than the %S/Inputs/ approach because the user
can see the auxiliary files immediately and don't have to open another file.

# asm
...
# lds
...
```

Use case B (for utilities which don't have built-in input splitting
feature):

```
// RUN: split-file %s %t
// RUN: llc < %t/1.ll | FileCheck %s --check-prefix=CASE1
// RUN: llc < %t/2.ll | FileCheck %s --check-prefix=CASE2
Combing tests prudently can improve readability.
For example, when testing parsing errors if the recovery mechanism isn't possible,
grouping the tests in one file can more readily see test coverage/strategy.

//--- 1.ll
...
//--- 2.ll
...
```

Since this is a new utility, there is no git history concerns for
UpperCase variable names. I use lowerCase variable names like mlir/lld.

Reviewed By: jhenderson, lattner

Differential Revision: https://reviews.llvm.org/D83834

Added: 
    llvm/test/tools/split-file/Inputs/basic-aa.txt
    llvm/test/tools/split-file/Inputs/basic-bb.txt
    llvm/test/tools/split-file/Inputs/basic-cc.txt
    llvm/test/tools/split-file/basic.test
    llvm/test/tools/split-file/empty.test
    llvm/test/tools/split-file/error.test
    llvm/test/tools/split-file/help.test
    llvm/test/tools/split-file/no-leading-lines.test
    llvm/test/tools/split-file/output-is-special.test
    llvm/tools/split-file/.clang-tidy
    llvm/tools/split-file/CMakeLists.txt
    llvm/tools/split-file/split-file.cpp
    llvm/utils/gn/secondary/llvm/tools/split-file/BUILD.gn

Modified: 
    lld/test/CMakeLists.txt
    lld/test/ELF/linkerscript/noload.s
    llvm/docs/TestingGuide.rst
    llvm/test/CMakeLists.txt
    llvm/test/lit.cfg.py
    llvm/test/tools/gold/X86/multiple-sections.ll
    llvm/test/tools/llvm-strings/radix.test
    llvm/utils/gn/secondary/lld/test/BUILD.gn
    llvm/utils/gn/secondary/llvm/test/BUILD.gn

Removed: 
    


################################################################################
diff  --git a/lld/test/CMakeLists.txt b/lld/test/CMakeLists.txt
index e7d113330739..52e6118ba876 100644
--- a/lld/test/CMakeLists.txt
+++ b/lld/test/CMakeLists.txt
@@ -28,7 +28,7 @@ if (NOT LLD_BUILT_STANDALONE)
     FileCheck count llc llvm-ar llvm-as llvm-bcanalyzer llvm-config llvm-cvtres
     llvm-dis llvm-dwarfdump llvm-lib llvm-lipo llvm-mc llvm-nm llvm-objcopy
     llvm-objdump llvm-pdbutil llvm-readelf llvm-readobj llvm-strip not obj2yaml
-    opt yaml2obj
+    opt split-file yaml2obj
     )
 endif()
 

diff  --git a/lld/test/ELF/linkerscript/noload.s b/lld/test/ELF/linkerscript/noload.s
index 2f52b465854e..20b07b2b185a 100644
--- a/lld/test/ELF/linkerscript/noload.s
+++ b/lld/test/ELF/linkerscript/noload.s
@@ -1,12 +1,8 @@
 # REQUIRES: x86
-# RUN: llvm-mc -filetype=obj -triple=x86_64-unknown-linux %s -o %t.o
-# RUN: echo "SECTIONS { \
-# RUN:        .data_noload_a (NOLOAD) : { *(.data_noload_a) } \
-# RUN:        .data_noload_b (0x10000) (NOLOAD) : { *(.data_noload_b) } \
-# RUN:        .no_input_sec_noload (NOLOAD) : { . += 1; } \
-# RUN:        .text (0x20000) : { *(.text) } };" > %t.script
-# RUN: ld.lld -o %t --script %t.script %t.o
-# RUN: llvm-readelf -S -l %t | FileCheck %s
+# RUN: split-file %s %t
+# RUN: llvm-mc -filetype=obj -triple=x86_64 %t/asm -o %t.o
+# RUN: ld.lld --script %t/lds %t.o -o %t/out
+# RUN: llvm-readelf -S -l %t/out | FileCheck %s
 
 # CHECK:      Name                 Type   Address          Off               Size
 # CHECK:      .data_noload_a       NOBITS 0000000000000000 [[OFF:[0-9a-f]+]] 001000
@@ -16,6 +12,7 @@
 # CHECK:      Type Offset   VirtAddr           PhysAddr
 # CHECK-NEXT: LOAD 0x001000 0x0000000000020000 0x0000000000020000
 
+#--- asm
 .section .text,"ax", at progbits
   nop
 
@@ -24,3 +21,11 @@
 
 .section .data_noload_b,"aw", at progbits
 .zero 4096
+
+#--- lds
+SECTIONS {
+  .data_noload_a (NOLOAD) : { *(.data_noload_a) }
+  .data_noload_b (0x10000) (NOLOAD) : { *(.data_noload_b) }
+  .no_input_sec_noload (NOLOAD) : { . += 1; }
+  .text (0x20000) : { *(.text) }
+}

diff  --git a/llvm/docs/TestingGuide.rst b/llvm/docs/TestingGuide.rst
index 2e937f000627..4ca1a359b64c 100644
--- a/llvm/docs/TestingGuide.rst
+++ b/llvm/docs/TestingGuide.rst
@@ -271,8 +271,27 @@ adding your code there instead of creating a new file.
 Extra files
 -----------
 
-If your test requires extra files besides the file containing the ``RUN:``
-lines, the idiomatic place to put them is in a subdirectory ``Inputs``.
+If your test requires extra files besides the file containing the ``RUN:`` lines
+and the extra files are small, consider specifying them in the same file and
+using ``split-file`` to extract them. For example,
+
+.. code-block:: llvm
+
+  ; RUN: split-file %s %t
+  ; RUN: llvm-link -S %t/a.ll %t/b.ll | FileCheck %s
+
+  ; CHECK: ...
+
+  ;--- a.ll
+  ...
+  ;--- b.ll
+  ...
+
+The parts are separated by the regex ``^(.|//)--- <part>``. By default the
+extracted content has leading empty lines to preserve line numbers. Specify
+``--no-leading-lines`` to drop leading lines.
+
+If the extra files are large, the idiomatic place to put them is in a subdirectory ``Inputs``.
 You can then refer to the extra files as ``%S/Inputs/foo.bar``.
 
 For example, consider ``test/Linker/ident.ll``. The directory structure is

diff  --git a/llvm/test/CMakeLists.txt b/llvm/test/CMakeLists.txt
index 91215b3ca0ef..cde80035a09b 100644
--- a/llvm/test/CMakeLists.txt
+++ b/llvm/test/CMakeLists.txt
@@ -119,6 +119,7 @@ set(LLVM_TEST_DEPENDS
           opt
           sancov
           sanstats
+          split-file
           verify-uselistorder
           yaml-bench
           yaml2obj

diff  --git a/llvm/test/lit.cfg.py b/llvm/test/lit.cfg.py
index 0a3289fcc4ad..4502ac58c45a 100644
--- a/llvm/test/lit.cfg.py
+++ b/llvm/test/lit.cfg.py
@@ -141,6 +141,7 @@ def get_asan_rtlib():
     ToolSubst('%llvm-objcopy', FindTool('llvm-objcopy')),
     ToolSubst('%llvm-strip', FindTool('llvm-strip')),
     ToolSubst('%llvm-install-name-tool', FindTool('llvm-install-name-tool')),
+    ToolSubst('%split-file', FindTool('split-file')),
 ]
 
 # FIXME: Why do we have both `lli` and `%lli` that do slightly 
diff erent things?

diff  --git a/llvm/test/tools/gold/X86/multiple-sections.ll b/llvm/test/tools/gold/X86/multiple-sections.ll
index facbd8d992ed..575fb81fcd6f 100644
--- a/llvm/test/tools/gold/X86/multiple-sections.ll
+++ b/llvm/test/tools/gold/X86/multiple-sections.ll
@@ -1,10 +1,8 @@
-; RUN: echo ".text.tin" > %t_order_lto.txt
-; RUN: echo ".text._start" >> %t_order_lto.txt
-; RUN: echo ".text.pat" >> %t_order_lto.txt
-; RUN: llvm-as %s -o %t.o
+; RUN: split-file %s %t
+; RUN: llvm-as %t/a.ll -o %t.o
 ; RUN: %gold -plugin %llvmshlibdir/LLVMgold%shlibext \
 ; RUN:     -m elf_x86_64 -o %t.exe %t.o \
-; RUN:     --section-ordering-file=%t_order_lto.txt
+; RUN:     --section-ordering-file=%t/order
 ; RUN: llvm-readelf -s %t.exe | FileCheck %s
 
 ; Check that the order of the sections is tin -> _start -> pat.
@@ -13,6 +11,12 @@
 ; CHECK:      00000000004000b0     1 FUNC    LOCAL  DEFAULT    1 tin
 ; CHECK:      00000000004000c0    15 FUNC    GLOBAL DEFAULT    1 _start
 
+;--- order
+.text.tin
+.text._start
+.text.pat
+
+;--- a.ll
 target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
 target triple = "x86_64-unknown-linux-gnu"
 

diff  --git a/llvm/test/tools/llvm-strings/radix.test b/llvm/test/tools/llvm-strings/radix.test
index d23fb3cddc8f..4dafbd1c84fc 100644
--- a/llvm/test/tools/llvm-strings/radix.test
+++ b/llvm/test/tools/llvm-strings/radix.test
@@ -1,29 +1,32 @@
 ## Show that llvm-strings can handle the -t/--radix switch properly.
 
-RUN: echo one > %t
-RUN: echo two >> %t
-RUN: echo three >> %t
-RUN: echo four >> %t
-RUN: echo five >> %t
-RUN: echo six >> %t
-RUN: echo seven >> %t
-RUN: echo eight >> %t
-RUN: echo nine >> %t
-RUN: echo ten >> %t
-
-RUN: llvm-strings %t | FileCheck %s -check-prefix CHECK-NONE --implicit-check-not={{.}}
-RUN: llvm-strings -t d %t | FileCheck %s -check-prefix CHECK-DEC --strict-whitespace --implicit-check-not={{.}}
-RUN: llvm-strings -t o %t | FileCheck %s -check-prefix CHECK-OCT --strict-whitespace --implicit-check-not={{.}}
-RUN: llvm-strings -t x %t | FileCheck %s -check-prefix CHECK-HEX --strict-whitespace --implicit-check-not={{.}}
+RUN: split-file --no-leading-lines %s %t
+#--- a.txt
+one
+two
+three
+four
+five
+six
+seven
+eight
+nine
+ten
+#--- end
+
+RUN: llvm-strings %t/a.txt | FileCheck %s -check-prefix CHECK-NONE --implicit-check-not={{.}}
+RUN: llvm-strings -t d %t/a.txt | FileCheck %s -check-prefix CHECK-DEC --strict-whitespace --implicit-check-not={{.}}
+RUN: llvm-strings -t o %t/a.txt | FileCheck %s -check-prefix CHECK-OCT --strict-whitespace --implicit-check-not={{.}}
+RUN: llvm-strings -t x %t/a.txt | FileCheck %s -check-prefix CHECK-HEX --strict-whitespace --implicit-check-not={{.}}
 
 ## Show --radix works too.
-RUN: llvm-strings --radix d %t | FileCheck %s -check-prefix CHECK-DEC --strict-whitespace
-RUN: llvm-strings --radix o %t | FileCheck %s -check-prefix CHECK-OCT --strict-whitespace
-RUN: llvm-strings --radix x %t | FileCheck %s -check-prefix CHECK-HEX --strict-whitespace
+RUN: llvm-strings --radix d %t/a.txt | FileCheck %s -check-prefix CHECK-DEC --strict-whitespace
+RUN: llvm-strings --radix o %t/a.txt | FileCheck %s -check-prefix CHECK-OCT --strict-whitespace
+RUN: llvm-strings --radix x %t/a.txt | FileCheck %s -check-prefix CHECK-HEX --strict-whitespace
 
 ## Show 
diff erent syntaxes work.
-RUN: llvm-strings --radix=d %t | FileCheck %s -check-prefix CHECK-DEC --strict-whitespace
-RUN: llvm-strings -t=d %t | FileCheck %s -check-prefix CHECK-DEC --strict-whitespace
+RUN: llvm-strings --radix=d %t/a.txt | FileCheck %s -check-prefix CHECK-DEC --strict-whitespace
+RUN: llvm-strings -t=d %t/a.txt | FileCheck %s -check-prefix CHECK-DEC --strict-whitespace
 
 CHECK-NONE: {{^}}three
 CHECK-NONE: {{^}}four
@@ -54,5 +57,5 @@ CHECK-HEX: {{^}}     22 eight
 CHECK-HEX: {{^}}     28 nine
 
 ## Show that an invalid value is rejected.
-RUN: not llvm-strings --radix z %t 2>&1 | FileCheck %s --check-prefix=INVALID
+RUN: not llvm-strings --radix z %t/a.txt 2>&1 | FileCheck %s --check-prefix=INVALID
 INVALID: llvm-strings{{.*}}: for the --radix option: Cannot find option named 'z'!

diff  --git a/llvm/test/tools/split-file/Inputs/basic-aa.txt b/llvm/test/tools/split-file/Inputs/basic-aa.txt
new file mode 100644
index 000000000000..0b9ddeb2fc12
--- /dev/null
+++ b/llvm/test/tools/split-file/Inputs/basic-aa.txt
@@ -0,0 +1,2 @@
+
+aa

diff  --git a/llvm/test/tools/split-file/Inputs/basic-bb.txt b/llvm/test/tools/split-file/Inputs/basic-bb.txt
new file mode 100644
index 000000000000..0f20b8cf755b
--- /dev/null
+++ b/llvm/test/tools/split-file/Inputs/basic-bb.txt
@@ -0,0 +1,6 @@
+
+
+
+; Comments are preserved.
+bb
+

diff  --git a/llvm/test/tools/split-file/Inputs/basic-cc.txt b/llvm/test/tools/split-file/Inputs/basic-cc.txt
new file mode 100644
index 000000000000..dc815bf4b7dc
--- /dev/null
+++ b/llvm/test/tools/split-file/Inputs/basic-cc.txt
@@ -0,0 +1,8 @@
+
+
+
+
+
+
+
+cc

diff  --git a/llvm/test/tools/split-file/basic.test b/llvm/test/tools/split-file/basic.test
new file mode 100644
index 000000000000..5d32c3429ed3
--- /dev/null
+++ b/llvm/test/tools/split-file/basic.test
@@ -0,0 +1,40 @@
+#--- aa
+aa
+;--- bb
+; Comments are preserved.
+bb
+
+//--- subdir/cc
+cc
+//--- end
+
+# RUN: rm -rf %t
+# RUN: split-file %s %t
+# RUN: 
diff  %S/Inputs/basic-aa.txt %t/aa
+# RUN: 
diff  %S/Inputs/basic-bb.txt %t/bb
+# RUN: 
diff  %S/Inputs/basic-cc.txt %t/subdir/cc
+# RUN: FileCheck %s --check-prefix=END < %t/end
+
+## Can be called on a non-empty directory.
+# RUN: split-file %s %t
+# RUN: 
diff  %S/Inputs/basic-aa.txt %t/aa
+
+## Test that we will delete the output if it is a file, so that we can create
+## a directory.
+# RUN: rm -rf %t && touch %t
+# RUN: split-file %s %t
+# RUN: 
diff  %S/Inputs/basic-aa.txt %t/aa
+
+# END: RUN: split-file %s %t
+
+# RUN: not %split-file 2>&1 | FileCheck %s --check-prefix=NO_INPUT
+
+# NO_INPUT: split-file: error: input filename is not specified
+
+# RUN: not %split-file %s '' 2>&1 | FileCheck %s --check-prefix=NO_OUTPUT
+
+# NO_OUTPUT: split-file: error: output directory is not specified
+
+# RUN: not %split-file %S/Inputs/basic-aa.txt %t 2>&1 | FileCheck %s --check-prefix=NOT_EXIST
+
+# NOT_EXIST: split-file: error: {{.*}}.txt: no part separator was found

diff  --git a/llvm/test/tools/split-file/empty.test b/llvm/test/tools/split-file/empty.test
new file mode 100644
index 000000000000..e76bea93bc2e
--- /dev/null
+++ b/llvm/test/tools/split-file/empty.test
@@ -0,0 +1,4 @@
+# RUN: split-file --no-leading-lines %s %t
+# RUN: count 0 < %t/empty
+
+#--- empty

diff  --git a/llvm/test/tools/split-file/error.test b/llvm/test/tools/split-file/error.test
new file mode 100644
index 000000000000..9efa5adca49a
--- /dev/null
+++ b/llvm/test/tools/split-file/error.test
@@ -0,0 +1,16 @@
+# RUN: not %split-file %s %t 2>&1 | FileCheck %s
+# RUN: not ls %t/dup
+
+# CHECK: {{.*}}.test:[[#@LINE+1]]: error: empty part name
+//--- 
+
+# CHECK: {{.*}}.test:[[#@LINE+1]]: error: part name cannot have leading or trailing space
+//---  leading_space
+
+# CHECK: {{.*}}.test:[[#@LINE+1]]: error: part name cannot have leading or trailing space
+//--- trailing_space 
+
+;--- dup
+
+# CHECK: {{.*}}.test:[[#@LINE+1]]: error: ';--- dup' occurs more than once
+;--- dup

diff  --git a/llvm/test/tools/split-file/help.test b/llvm/test/tools/split-file/help.test
new file mode 100644
index 000000000000..27c450aeac3a
--- /dev/null
+++ b/llvm/test/tools/split-file/help.test
@@ -0,0 +1,6 @@
+RUN: split-file --help 2>&1 | FileCheck --implicit-check-not='General Options:' %s
+CHECK: OVERVIEW: Split input {{.*}}
+CHECK: USAGE: split-file [options] filename directory
+CHECK: Generic Options:
+CHECK: split-file Options:
+CHECK:   --no-leading-lines

diff  --git a/llvm/test/tools/split-file/no-leading-lines.test b/llvm/test/tools/split-file/no-leading-lines.test
new file mode 100644
index 000000000000..d4de34f33d1e
--- /dev/null
+++ b/llvm/test/tools/split-file/no-leading-lines.test
@@ -0,0 +1,10 @@
+## With --no-leading-lines, don't add leading lines (which is used to preserve line numbers).
+
+# RUN: split-file --no-leading-lines %s %t
+# RUN: count 1 < %t/a.txt
+# RUN: FileCheck %s < %t/a.txt
+
+# CHECK: input
+
+#--- a.txt
+input

diff  --git a/llvm/test/tools/split-file/output-is-special.test b/llvm/test/tools/split-file/output-is-special.test
new file mode 100644
index 000000000000..98bb4d36a4ff
--- /dev/null
+++ b/llvm/test/tools/split-file/output-is-special.test
@@ -0,0 +1,8 @@
+# UNSUPPORTED: system-windows
+# REQUIRES: shell
+
+## Don't delete the output if it is special, otherwise root may accidentally
+## remove important special files.
+# RUN: not split-file %s /dev/null 2>&1 | FileCheck %s
+
+# CHECK: error: /dev/null: output cannot be a special file

diff  --git a/llvm/tools/split-file/.clang-tidy b/llvm/tools/split-file/.clang-tidy
new file mode 100644
index 000000000000..87ec2ff53af6
--- /dev/null
+++ b/llvm/tools/split-file/.clang-tidy
@@ -0,0 +1,19 @@
+# Almost identical to the top-level .clang-tidy, except that {Member,Parameter,Variable}Case use camelBack.
+Checks: '-*,clang-diagnostic-*,llvm-*,misc-*,-misc-unused-parameters,-misc-non-private-member-variables-in-classes,readability-identifier-naming'
+CheckOptions:
+  - key:             readability-identifier-naming.ClassCase
+    value:           CamelCase
+  - key:             readability-identifier-naming.EnumCase
+    value:           CamelCase
+  - key:             readability-identifier-naming.FunctionCase
+    value:           camelBack
+  - key:             readability-identifier-naming.MemberCase
+    value:           camelBack
+  - key:             readability-identifier-naming.ParameterCase
+    value:           camelBack
+  - key:             readability-identifier-naming.UnionCase
+    value:           CamelCase
+  - key:             readability-identifier-naming.VariableCase
+    value:           camelBack
+  - key:             readability-identifier-naming.IgnoreMainLikeFunctions
+    value:           1

diff  --git a/llvm/tools/split-file/CMakeLists.txt b/llvm/tools/split-file/CMakeLists.txt
new file mode 100644
index 000000000000..ba998483c22a
--- /dev/null
+++ b/llvm/tools/split-file/CMakeLists.txt
@@ -0,0 +1,7 @@
+set(LLVM_LINK_COMPONENTS
+  Support
+  )
+
+add_llvm_tool(split-file
+  split-file.cpp
+  )

diff  --git a/llvm/tools/split-file/split-file.cpp b/llvm/tools/split-file/split-file.cpp
new file mode 100644
index 000000000000..772a19164dc4
--- /dev/null
+++ b/llvm/tools/split-file/split-file.cpp
@@ -0,0 +1,172 @@
+//===- split-file.cpp - Input splitting utility ---------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// Split input into multipe parts separated by regex '^(.|//)--- ' and extract
+// the specified part.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/FileOutputBuffer.h"
+#include "llvm/Support/LineIterator.h"
+#include "llvm/Support/MemoryBuffer.h"
+#include "llvm/Support/Path.h"
+#include "llvm/Support/ToolOutputFile.h"
+#include "llvm/Support/WithColor.h"
+#include <string>
+#include <system_error>
+
+using namespace llvm;
+
+static cl::OptionCategory cat("split-file Options");
+
+static cl::opt<std::string> input(cl::Positional, cl::desc("filename"),
+                                  cl::cat(cat));
+
+static cl::opt<std::string> output(cl::Positional, cl::desc("directory"),
+                                   cl::value_desc("directory"), cl::cat(cat));
+
+static cl::opt<bool> noLeadingLines("no-leading-lines",
+                                    cl::desc("Don't preserve line numbers"),
+                                    cl::cat(cat));
+
+static StringRef toolName;
+static int errorCount;
+
+LLVM_ATTRIBUTE_NORETURN static void fatal(StringRef filename,
+                                          const Twine &message) {
+  if (filename.empty())
+    WithColor::error(errs(), toolName) << message << '\n';
+  else
+    WithColor::error(errs(), toolName) << filename << ": " << message << '\n';
+  exit(1);
+}
+
+static void error(StringRef filename, int64_t line, const Twine &message) {
+  ++errorCount;
+  errs() << filename << ':' << line << ": ";
+  WithColor::error(errs()) << message << '\n';
+}
+
+namespace {
+struct Part {
+  const char *begin = nullptr;
+  const char *end = nullptr;
+  int64_t leadingLines = 0;
+};
+} // namespace
+
+static int handle(MemoryBuffer &inputBuf, StringRef input) {
+  DenseMap<StringRef, Part> partToBegin;
+  StringRef lastPart, separator;
+  for (line_iterator i(inputBuf, /*SkipBlanks=*/false, '\0'); !i.is_at_eof();) {
+    const int64_t lineNo = i.line_number();
+    const StringRef line = *i++;
+    const size_t markerLen = line.startswith("//") ? 6 : 5;
+    if (!(line.size() >= markerLen &&
+          line.substr(markerLen - 4).startswith("--- ")))
+      continue;
+    separator = line.substr(0, markerLen);
+    const StringRef partName = line.substr(markerLen);
+    if (partName.empty()) {
+      error(input, lineNo, "empty part name");
+      continue;
+    }
+    if (isSpace(partName.front()) || isSpace(partName.back())) {
+      error(input, lineNo, "part name cannot have leading or trailing space");
+      continue;
+    }
+
+    auto res = partToBegin.try_emplace(partName);
+    if (!res.second) {
+      error(input, lineNo,
+            "'" + separator + partName + "' occurs more than once");
+      continue;
+    }
+    if (!lastPart.empty())
+      partToBegin[lastPart].end = line.data();
+    Part &cur = res.first->second;
+    if (!i.is_at_eof())
+      cur.begin = i->data();
+    // If --no-leading-lines is not specified, numEmptyLines is 0. Append
+    // newlines so that the extracted part preserves line numbers.
+    cur.leadingLines = noLeadingLines ? 0 : i.line_number() - 1;
+
+    lastPart = partName;
+  }
+  if (lastPart.empty())
+    fatal(input, "no part separator was found");
+  if (errorCount)
+    return 1;
+  partToBegin[lastPart].end = inputBuf.getBufferEnd();
+
+  std::vector<std::unique_ptr<ToolOutputFile>> outputFiles;
+  SmallString<256> partPath;
+  for (auto &keyValue : partToBegin) {
+    partPath.clear();
+    sys::path::append(partPath, output, keyValue.first);
+    std::error_code ec =
+        sys::fs::create_directories(sys::path::parent_path(partPath));
+    if (ec)
+      fatal(input, ec.message());
+    auto f = std::make_unique<ToolOutputFile>(partPath.str(), ec,
+                                              llvm::sys::fs::OF_None);
+    if (!f)
+      fatal(input, ec.message());
+
+    Part &part = keyValue.second;
+    for (int64_t i = 0; i != part.leadingLines; ++i)
+      (*f).os().write('\n');
+    if (part.begin)
+      (*f).os().write(part.begin, part.end - part.begin);
+    outputFiles.push_back(std::move(f));
+  }
+
+  for (std::unique_ptr<ToolOutputFile> &outputFile : outputFiles)
+    outputFile->keep();
+  return 0;
+}
+
+int main(int argc, const char **argv) {
+  toolName = sys::path::stem(argv[0]);
+  cl::HideUnrelatedOptions({&cat});
+  cl::ParseCommandLineOptions(
+      argc, argv,
+      "Split input into multiple parts separated by regex '^(.|//)--- ' and "
+      "extract the part specified by '^(.|//)--- <part>'\n",
+      nullptr,
+      /*EnvVar=*/nullptr,
+      /*LongOptionsUseDoubleDash=*/true);
+
+  if (input.empty())
+    fatal("", "input filename is not specified");
+  if (output.empty())
+    fatal("", "output directory is not specified");
+  ErrorOr<std::unique_ptr<MemoryBuffer>> bufferOrErr =
+      MemoryBuffer::getFileOrSTDIN(input);
+  if (std::error_code ec = bufferOrErr.getError())
+    fatal(input, ec.message());
+
+  // Delete output if it is a file or an empty directory, so that we can create
+  // a directory.
+  sys::fs::file_status status;
+  if (std::error_code ec = sys::fs::status(output, status))
+    if (ec.value() != static_cast<int>(std::errc::no_such_file_or_directory))
+      fatal(output, ec.message());
+  if (status.type() != sys::fs::file_type::file_not_found &&
+      status.type() != sys::fs::file_type::directory_file &&
+      status.type() != sys::fs::file_type::regular_file)
+    fatal(output, "output cannot be a special file");
+  if (std::error_code ec = sys::fs::remove(output, /*IgnoreNonExisting=*/true))
+    if (ec.value() != static_cast<int>(std::errc::directory_not_empty))
+      fatal(output, ec.message());
+  return handle(**bufferOrErr, input);
+}

diff  --git a/llvm/utils/gn/secondary/lld/test/BUILD.gn b/llvm/utils/gn/secondary/lld/test/BUILD.gn
index 581cc5482578..bfb63a39ba65 100644
--- a/llvm/utils/gn/secondary/lld/test/BUILD.gn
+++ b/llvm/utils/gn/secondary/lld/test/BUILD.gn
@@ -94,6 +94,7 @@ group("test") {
     "//llvm/tools/llvm-readobj:symlinks",
     "//llvm/tools/obj2yaml",
     "//llvm/tools/opt",
+    "//llvm/tools/split-file",
     "//llvm/tools/yaml2obj",
     "//llvm/utils/FileCheck",
     "//llvm/utils/count",

diff  --git a/llvm/utils/gn/secondary/llvm/test/BUILD.gn b/llvm/utils/gn/secondary/llvm/test/BUILD.gn
index 2c4a23ffbaac..c714d9b5ba7b 100644
--- a/llvm/utils/gn/secondary/llvm/test/BUILD.gn
+++ b/llvm/utils/gn/secondary/llvm/test/BUILD.gn
@@ -260,6 +260,7 @@ group("test") {
     "//llvm/tools/opt",
     "//llvm/tools/sancov",
     "//llvm/tools/sanstats",
+    "//llvm/tools/split-file",
     "//llvm/tools/verify-uselistorder",
     "//llvm/tools/yaml2obj",
     "//llvm/unittests",

diff  --git a/llvm/utils/gn/secondary/llvm/tools/split-file/BUILD.gn b/llvm/utils/gn/secondary/llvm/tools/split-file/BUILD.gn
new file mode 100644
index 000000000000..4bf9269c3c38
--- /dev/null
+++ b/llvm/utils/gn/secondary/llvm/tools/split-file/BUILD.gn
@@ -0,0 +1,4 @@
+executable("split-file") {
+  deps = [ "//llvm/lib/Support" ]
+  sources = [ "split-file.cpp" ]
+}


        


More information about the llvm-commits mailing list