[lld] bcea3a7 - Add test utility 'split-file'
Fangrui Song via llvm-commits
llvm-commits at lists.llvm.org
Mon Aug 3 20:43:37 PDT 2020
Author: Fangrui Song
Date: 2020-08-03T20:42:09-07:00
New Revision: bcea3a7a288e0b5ac977f90c46e4eef7946467e7
URL: https://github.com/llvm/llvm-project/commit/bcea3a7a288e0b5ac977f90c46e4eef7946467e7
DIFF: https://github.com/llvm/llvm-project/commit/bcea3a7a288e0b5ac977f90c46e4eef7946467e7.diff
LOG: Add test utility 'split-file'
See https://lists.llvm.org/pipermail/llvm-dev/2020-July/143373.html
"[llvm-dev] Multiple documents in one test file" for some discussions.
This patch has explored several alternatives. The current semantics are similar to
what @dblaikie proposed.
`split-file filename output` splits the input file into multiple parts separated by
regex `^(.|//)--- filename` and write each part to the file `output/filename`
(`filename` can include path separators).
Use case A (organizing input of different formats (e.g. linker
script+assembly) in one file).
```
# RUN: split-file %s %t
# RUN: llvm-mc %t/asm -o %t.o
# RUN: ld.lld -T %t/lds %t.o -o %t
This is sometimes better than the %S/Inputs/ approach because the user
can see the auxiliary files immediately and don't have to open another file.
# asm
...
# lds
...
```
Use case B (for utilities which don't have built-in input splitting
feature):
```
// RUN: split-file %s %t
// RUN: llc < %t/1.ll | FileCheck %s --check-prefix=CASE1
// RUN: llc < %t/2.ll | FileCheck %s --check-prefix=CASE2
Combing tests prudently can improve readability.
For example, when testing parsing errors if the recovery mechanism isn't possible,
grouping the tests in one file can more readily see test coverage/strategy.
//--- 1.ll
...
//--- 2.ll
...
```
Since this is a new utility, there is no git history concerns for
UpperCase variable names. I use lowerCase variable names like mlir/lld.
Reviewed By: jhenderson, lattner
Differential Revision: https://reviews.llvm.org/D83834
Added:
llvm/test/tools/split-file/Inputs/basic-aa.txt
llvm/test/tools/split-file/Inputs/basic-bb.txt
llvm/test/tools/split-file/Inputs/basic-cc.txt
llvm/test/tools/split-file/basic.test
llvm/test/tools/split-file/empty.test
llvm/test/tools/split-file/error.test
llvm/test/tools/split-file/help.test
llvm/test/tools/split-file/no-leading-lines.test
llvm/test/tools/split-file/output-is-special.test
llvm/tools/split-file/.clang-tidy
llvm/tools/split-file/CMakeLists.txt
llvm/tools/split-file/split-file.cpp
llvm/utils/gn/secondary/llvm/tools/split-file/BUILD.gn
Modified:
lld/test/CMakeLists.txt
lld/test/ELF/linkerscript/noload.s
llvm/docs/TestingGuide.rst
llvm/test/CMakeLists.txt
llvm/test/lit.cfg.py
llvm/test/tools/gold/X86/multiple-sections.ll
llvm/test/tools/llvm-strings/radix.test
llvm/utils/gn/secondary/lld/test/BUILD.gn
llvm/utils/gn/secondary/llvm/test/BUILD.gn
Removed:
################################################################################
diff --git a/lld/test/CMakeLists.txt b/lld/test/CMakeLists.txt
index e7d113330739..52e6118ba876 100644
--- a/lld/test/CMakeLists.txt
+++ b/lld/test/CMakeLists.txt
@@ -28,7 +28,7 @@ if (NOT LLD_BUILT_STANDALONE)
FileCheck count llc llvm-ar llvm-as llvm-bcanalyzer llvm-config llvm-cvtres
llvm-dis llvm-dwarfdump llvm-lib llvm-lipo llvm-mc llvm-nm llvm-objcopy
llvm-objdump llvm-pdbutil llvm-readelf llvm-readobj llvm-strip not obj2yaml
- opt yaml2obj
+ opt split-file yaml2obj
)
endif()
diff --git a/lld/test/ELF/linkerscript/noload.s b/lld/test/ELF/linkerscript/noload.s
index 2f52b465854e..20b07b2b185a 100644
--- a/lld/test/ELF/linkerscript/noload.s
+++ b/lld/test/ELF/linkerscript/noload.s
@@ -1,12 +1,8 @@
# REQUIRES: x86
-# RUN: llvm-mc -filetype=obj -triple=x86_64-unknown-linux %s -o %t.o
-# RUN: echo "SECTIONS { \
-# RUN: .data_noload_a (NOLOAD) : { *(.data_noload_a) } \
-# RUN: .data_noload_b (0x10000) (NOLOAD) : { *(.data_noload_b) } \
-# RUN: .no_input_sec_noload (NOLOAD) : { . += 1; } \
-# RUN: .text (0x20000) : { *(.text) } };" > %t.script
-# RUN: ld.lld -o %t --script %t.script %t.o
-# RUN: llvm-readelf -S -l %t | FileCheck %s
+# RUN: split-file %s %t
+# RUN: llvm-mc -filetype=obj -triple=x86_64 %t/asm -o %t.o
+# RUN: ld.lld --script %t/lds %t.o -o %t/out
+# RUN: llvm-readelf -S -l %t/out | FileCheck %s
# CHECK: Name Type Address Off Size
# CHECK: .data_noload_a NOBITS 0000000000000000 [[OFF:[0-9a-f]+]] 001000
@@ -16,6 +12,7 @@
# CHECK: Type Offset VirtAddr PhysAddr
# CHECK-NEXT: LOAD 0x001000 0x0000000000020000 0x0000000000020000
+#--- asm
.section .text,"ax", at progbits
nop
@@ -24,3 +21,11 @@
.section .data_noload_b,"aw", at progbits
.zero 4096
+
+#--- lds
+SECTIONS {
+ .data_noload_a (NOLOAD) : { *(.data_noload_a) }
+ .data_noload_b (0x10000) (NOLOAD) : { *(.data_noload_b) }
+ .no_input_sec_noload (NOLOAD) : { . += 1; }
+ .text (0x20000) : { *(.text) }
+}
diff --git a/llvm/docs/TestingGuide.rst b/llvm/docs/TestingGuide.rst
index 2e937f000627..4ca1a359b64c 100644
--- a/llvm/docs/TestingGuide.rst
+++ b/llvm/docs/TestingGuide.rst
@@ -271,8 +271,27 @@ adding your code there instead of creating a new file.
Extra files
-----------
-If your test requires extra files besides the file containing the ``RUN:``
-lines, the idiomatic place to put them is in a subdirectory ``Inputs``.
+If your test requires extra files besides the file containing the ``RUN:`` lines
+and the extra files are small, consider specifying them in the same file and
+using ``split-file`` to extract them. For example,
+
+.. code-block:: llvm
+
+ ; RUN: split-file %s %t
+ ; RUN: llvm-link -S %t/a.ll %t/b.ll | FileCheck %s
+
+ ; CHECK: ...
+
+ ;--- a.ll
+ ...
+ ;--- b.ll
+ ...
+
+The parts are separated by the regex ``^(.|//)--- <part>``. By default the
+extracted content has leading empty lines to preserve line numbers. Specify
+``--no-leading-lines`` to drop leading lines.
+
+If the extra files are large, the idiomatic place to put them is in a subdirectory ``Inputs``.
You can then refer to the extra files as ``%S/Inputs/foo.bar``.
For example, consider ``test/Linker/ident.ll``. The directory structure is
diff --git a/llvm/test/CMakeLists.txt b/llvm/test/CMakeLists.txt
index 91215b3ca0ef..cde80035a09b 100644
--- a/llvm/test/CMakeLists.txt
+++ b/llvm/test/CMakeLists.txt
@@ -119,6 +119,7 @@ set(LLVM_TEST_DEPENDS
opt
sancov
sanstats
+ split-file
verify-uselistorder
yaml-bench
yaml2obj
diff --git a/llvm/test/lit.cfg.py b/llvm/test/lit.cfg.py
index 0a3289fcc4ad..4502ac58c45a 100644
--- a/llvm/test/lit.cfg.py
+++ b/llvm/test/lit.cfg.py
@@ -141,6 +141,7 @@ def get_asan_rtlib():
ToolSubst('%llvm-objcopy', FindTool('llvm-objcopy')),
ToolSubst('%llvm-strip', FindTool('llvm-strip')),
ToolSubst('%llvm-install-name-tool', FindTool('llvm-install-name-tool')),
+ ToolSubst('%split-file', FindTool('split-file')),
]
# FIXME: Why do we have both `lli` and `%lli` that do slightly
diff erent things?
diff --git a/llvm/test/tools/gold/X86/multiple-sections.ll b/llvm/test/tools/gold/X86/multiple-sections.ll
index facbd8d992ed..575fb81fcd6f 100644
--- a/llvm/test/tools/gold/X86/multiple-sections.ll
+++ b/llvm/test/tools/gold/X86/multiple-sections.ll
@@ -1,10 +1,8 @@
-; RUN: echo ".text.tin" > %t_order_lto.txt
-; RUN: echo ".text._start" >> %t_order_lto.txt
-; RUN: echo ".text.pat" >> %t_order_lto.txt
-; RUN: llvm-as %s -o %t.o
+; RUN: split-file %s %t
+; RUN: llvm-as %t/a.ll -o %t.o
; RUN: %gold -plugin %llvmshlibdir/LLVMgold%shlibext \
; RUN: -m elf_x86_64 -o %t.exe %t.o \
-; RUN: --section-ordering-file=%t_order_lto.txt
+; RUN: --section-ordering-file=%t/order
; RUN: llvm-readelf -s %t.exe | FileCheck %s
; Check that the order of the sections is tin -> _start -> pat.
@@ -13,6 +11,12 @@
; CHECK: 00000000004000b0 1 FUNC LOCAL DEFAULT 1 tin
; CHECK: 00000000004000c0 15 FUNC GLOBAL DEFAULT 1 _start
+;--- order
+.text.tin
+.text._start
+.text.pat
+
+;--- a.ll
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux-gnu"
diff --git a/llvm/test/tools/llvm-strings/radix.test b/llvm/test/tools/llvm-strings/radix.test
index d23fb3cddc8f..4dafbd1c84fc 100644
--- a/llvm/test/tools/llvm-strings/radix.test
+++ b/llvm/test/tools/llvm-strings/radix.test
@@ -1,29 +1,32 @@
## Show that llvm-strings can handle the -t/--radix switch properly.
-RUN: echo one > %t
-RUN: echo two >> %t
-RUN: echo three >> %t
-RUN: echo four >> %t
-RUN: echo five >> %t
-RUN: echo six >> %t
-RUN: echo seven >> %t
-RUN: echo eight >> %t
-RUN: echo nine >> %t
-RUN: echo ten >> %t
-
-RUN: llvm-strings %t | FileCheck %s -check-prefix CHECK-NONE --implicit-check-not={{.}}
-RUN: llvm-strings -t d %t | FileCheck %s -check-prefix CHECK-DEC --strict-whitespace --implicit-check-not={{.}}
-RUN: llvm-strings -t o %t | FileCheck %s -check-prefix CHECK-OCT --strict-whitespace --implicit-check-not={{.}}
-RUN: llvm-strings -t x %t | FileCheck %s -check-prefix CHECK-HEX --strict-whitespace --implicit-check-not={{.}}
+RUN: split-file --no-leading-lines %s %t
+#--- a.txt
+one
+two
+three
+four
+five
+six
+seven
+eight
+nine
+ten
+#--- end
+
+RUN: llvm-strings %t/a.txt | FileCheck %s -check-prefix CHECK-NONE --implicit-check-not={{.}}
+RUN: llvm-strings -t d %t/a.txt | FileCheck %s -check-prefix CHECK-DEC --strict-whitespace --implicit-check-not={{.}}
+RUN: llvm-strings -t o %t/a.txt | FileCheck %s -check-prefix CHECK-OCT --strict-whitespace --implicit-check-not={{.}}
+RUN: llvm-strings -t x %t/a.txt | FileCheck %s -check-prefix CHECK-HEX --strict-whitespace --implicit-check-not={{.}}
## Show --radix works too.
-RUN: llvm-strings --radix d %t | FileCheck %s -check-prefix CHECK-DEC --strict-whitespace
-RUN: llvm-strings --radix o %t | FileCheck %s -check-prefix CHECK-OCT --strict-whitespace
-RUN: llvm-strings --radix x %t | FileCheck %s -check-prefix CHECK-HEX --strict-whitespace
+RUN: llvm-strings --radix d %t/a.txt | FileCheck %s -check-prefix CHECK-DEC --strict-whitespace
+RUN: llvm-strings --radix o %t/a.txt | FileCheck %s -check-prefix CHECK-OCT --strict-whitespace
+RUN: llvm-strings --radix x %t/a.txt | FileCheck %s -check-prefix CHECK-HEX --strict-whitespace
## Show
diff erent syntaxes work.
-RUN: llvm-strings --radix=d %t | FileCheck %s -check-prefix CHECK-DEC --strict-whitespace
-RUN: llvm-strings -t=d %t | FileCheck %s -check-prefix CHECK-DEC --strict-whitespace
+RUN: llvm-strings --radix=d %t/a.txt | FileCheck %s -check-prefix CHECK-DEC --strict-whitespace
+RUN: llvm-strings -t=d %t/a.txt | FileCheck %s -check-prefix CHECK-DEC --strict-whitespace
CHECK-NONE: {{^}}three
CHECK-NONE: {{^}}four
@@ -54,5 +57,5 @@ CHECK-HEX: {{^}} 22 eight
CHECK-HEX: {{^}} 28 nine
## Show that an invalid value is rejected.
-RUN: not llvm-strings --radix z %t 2>&1 | FileCheck %s --check-prefix=INVALID
+RUN: not llvm-strings --radix z %t/a.txt 2>&1 | FileCheck %s --check-prefix=INVALID
INVALID: llvm-strings{{.*}}: for the --radix option: Cannot find option named 'z'!
diff --git a/llvm/test/tools/split-file/Inputs/basic-aa.txt b/llvm/test/tools/split-file/Inputs/basic-aa.txt
new file mode 100644
index 000000000000..0b9ddeb2fc12
--- /dev/null
+++ b/llvm/test/tools/split-file/Inputs/basic-aa.txt
@@ -0,0 +1,2 @@
+
+aa
diff --git a/llvm/test/tools/split-file/Inputs/basic-bb.txt b/llvm/test/tools/split-file/Inputs/basic-bb.txt
new file mode 100644
index 000000000000..0f20b8cf755b
--- /dev/null
+++ b/llvm/test/tools/split-file/Inputs/basic-bb.txt
@@ -0,0 +1,6 @@
+
+
+
+; Comments are preserved.
+bb
+
diff --git a/llvm/test/tools/split-file/Inputs/basic-cc.txt b/llvm/test/tools/split-file/Inputs/basic-cc.txt
new file mode 100644
index 000000000000..dc815bf4b7dc
--- /dev/null
+++ b/llvm/test/tools/split-file/Inputs/basic-cc.txt
@@ -0,0 +1,8 @@
+
+
+
+
+
+
+
+cc
diff --git a/llvm/test/tools/split-file/basic.test b/llvm/test/tools/split-file/basic.test
new file mode 100644
index 000000000000..5d32c3429ed3
--- /dev/null
+++ b/llvm/test/tools/split-file/basic.test
@@ -0,0 +1,40 @@
+#--- aa
+aa
+;--- bb
+; Comments are preserved.
+bb
+
+//--- subdir/cc
+cc
+//--- end
+
+# RUN: rm -rf %t
+# RUN: split-file %s %t
+# RUN:
diff %S/Inputs/basic-aa.txt %t/aa
+# RUN:
diff %S/Inputs/basic-bb.txt %t/bb
+# RUN:
diff %S/Inputs/basic-cc.txt %t/subdir/cc
+# RUN: FileCheck %s --check-prefix=END < %t/end
+
+## Can be called on a non-empty directory.
+# RUN: split-file %s %t
+# RUN:
diff %S/Inputs/basic-aa.txt %t/aa
+
+## Test that we will delete the output if it is a file, so that we can create
+## a directory.
+# RUN: rm -rf %t && touch %t
+# RUN: split-file %s %t
+# RUN:
diff %S/Inputs/basic-aa.txt %t/aa
+
+# END: RUN: split-file %s %t
+
+# RUN: not %split-file 2>&1 | FileCheck %s --check-prefix=NO_INPUT
+
+# NO_INPUT: split-file: error: input filename is not specified
+
+# RUN: not %split-file %s '' 2>&1 | FileCheck %s --check-prefix=NO_OUTPUT
+
+# NO_OUTPUT: split-file: error: output directory is not specified
+
+# RUN: not %split-file %S/Inputs/basic-aa.txt %t 2>&1 | FileCheck %s --check-prefix=NOT_EXIST
+
+# NOT_EXIST: split-file: error: {{.*}}.txt: no part separator was found
diff --git a/llvm/test/tools/split-file/empty.test b/llvm/test/tools/split-file/empty.test
new file mode 100644
index 000000000000..e76bea93bc2e
--- /dev/null
+++ b/llvm/test/tools/split-file/empty.test
@@ -0,0 +1,4 @@
+# RUN: split-file --no-leading-lines %s %t
+# RUN: count 0 < %t/empty
+
+#--- empty
diff --git a/llvm/test/tools/split-file/error.test b/llvm/test/tools/split-file/error.test
new file mode 100644
index 000000000000..9efa5adca49a
--- /dev/null
+++ b/llvm/test/tools/split-file/error.test
@@ -0,0 +1,16 @@
+# RUN: not %split-file %s %t 2>&1 | FileCheck %s
+# RUN: not ls %t/dup
+
+# CHECK: {{.*}}.test:[[#@LINE+1]]: error: empty part name
+//---
+
+# CHECK: {{.*}}.test:[[#@LINE+1]]: error: part name cannot have leading or trailing space
+//--- leading_space
+
+# CHECK: {{.*}}.test:[[#@LINE+1]]: error: part name cannot have leading or trailing space
+//--- trailing_space
+
+;--- dup
+
+# CHECK: {{.*}}.test:[[#@LINE+1]]: error: ';--- dup' occurs more than once
+;--- dup
diff --git a/llvm/test/tools/split-file/help.test b/llvm/test/tools/split-file/help.test
new file mode 100644
index 000000000000..27c450aeac3a
--- /dev/null
+++ b/llvm/test/tools/split-file/help.test
@@ -0,0 +1,6 @@
+RUN: split-file --help 2>&1 | FileCheck --implicit-check-not='General Options:' %s
+CHECK: OVERVIEW: Split input {{.*}}
+CHECK: USAGE: split-file [options] filename directory
+CHECK: Generic Options:
+CHECK: split-file Options:
+CHECK: --no-leading-lines
diff --git a/llvm/test/tools/split-file/no-leading-lines.test b/llvm/test/tools/split-file/no-leading-lines.test
new file mode 100644
index 000000000000..d4de34f33d1e
--- /dev/null
+++ b/llvm/test/tools/split-file/no-leading-lines.test
@@ -0,0 +1,10 @@
+## With --no-leading-lines, don't add leading lines (which is used to preserve line numbers).
+
+# RUN: split-file --no-leading-lines %s %t
+# RUN: count 1 < %t/a.txt
+# RUN: FileCheck %s < %t/a.txt
+
+# CHECK: input
+
+#--- a.txt
+input
diff --git a/llvm/test/tools/split-file/output-is-special.test b/llvm/test/tools/split-file/output-is-special.test
new file mode 100644
index 000000000000..98bb4d36a4ff
--- /dev/null
+++ b/llvm/test/tools/split-file/output-is-special.test
@@ -0,0 +1,8 @@
+# UNSUPPORTED: system-windows
+# REQUIRES: shell
+
+## Don't delete the output if it is special, otherwise root may accidentally
+## remove important special files.
+# RUN: not split-file %s /dev/null 2>&1 | FileCheck %s
+
+# CHECK: error: /dev/null: output cannot be a special file
diff --git a/llvm/tools/split-file/.clang-tidy b/llvm/tools/split-file/.clang-tidy
new file mode 100644
index 000000000000..87ec2ff53af6
--- /dev/null
+++ b/llvm/tools/split-file/.clang-tidy
@@ -0,0 +1,19 @@
+# Almost identical to the top-level .clang-tidy, except that {Member,Parameter,Variable}Case use camelBack.
+Checks: '-*,clang-diagnostic-*,llvm-*,misc-*,-misc-unused-parameters,-misc-non-private-member-variables-in-classes,readability-identifier-naming'
+CheckOptions:
+ - key: readability-identifier-naming.ClassCase
+ value: CamelCase
+ - key: readability-identifier-naming.EnumCase
+ value: CamelCase
+ - key: readability-identifier-naming.FunctionCase
+ value: camelBack
+ - key: readability-identifier-naming.MemberCase
+ value: camelBack
+ - key: readability-identifier-naming.ParameterCase
+ value: camelBack
+ - key: readability-identifier-naming.UnionCase
+ value: CamelCase
+ - key: readability-identifier-naming.VariableCase
+ value: camelBack
+ - key: readability-identifier-naming.IgnoreMainLikeFunctions
+ value: 1
diff --git a/llvm/tools/split-file/CMakeLists.txt b/llvm/tools/split-file/CMakeLists.txt
new file mode 100644
index 000000000000..ba998483c22a
--- /dev/null
+++ b/llvm/tools/split-file/CMakeLists.txt
@@ -0,0 +1,7 @@
+set(LLVM_LINK_COMPONENTS
+ Support
+ )
+
+add_llvm_tool(split-file
+ split-file.cpp
+ )
diff --git a/llvm/tools/split-file/split-file.cpp b/llvm/tools/split-file/split-file.cpp
new file mode 100644
index 000000000000..772a19164dc4
--- /dev/null
+++ b/llvm/tools/split-file/split-file.cpp
@@ -0,0 +1,172 @@
+//===- split-file.cpp - Input splitting utility ---------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// Split input into multipe parts separated by regex '^(.|//)--- ' and extract
+// the specified part.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/FileOutputBuffer.h"
+#include "llvm/Support/LineIterator.h"
+#include "llvm/Support/MemoryBuffer.h"
+#include "llvm/Support/Path.h"
+#include "llvm/Support/ToolOutputFile.h"
+#include "llvm/Support/WithColor.h"
+#include <string>
+#include <system_error>
+
+using namespace llvm;
+
+static cl::OptionCategory cat("split-file Options");
+
+static cl::opt<std::string> input(cl::Positional, cl::desc("filename"),
+ cl::cat(cat));
+
+static cl::opt<std::string> output(cl::Positional, cl::desc("directory"),
+ cl::value_desc("directory"), cl::cat(cat));
+
+static cl::opt<bool> noLeadingLines("no-leading-lines",
+ cl::desc("Don't preserve line numbers"),
+ cl::cat(cat));
+
+static StringRef toolName;
+static int errorCount;
+
+LLVM_ATTRIBUTE_NORETURN static void fatal(StringRef filename,
+ const Twine &message) {
+ if (filename.empty())
+ WithColor::error(errs(), toolName) << message << '\n';
+ else
+ WithColor::error(errs(), toolName) << filename << ": " << message << '\n';
+ exit(1);
+}
+
+static void error(StringRef filename, int64_t line, const Twine &message) {
+ ++errorCount;
+ errs() << filename << ':' << line << ": ";
+ WithColor::error(errs()) << message << '\n';
+}
+
+namespace {
+struct Part {
+ const char *begin = nullptr;
+ const char *end = nullptr;
+ int64_t leadingLines = 0;
+};
+} // namespace
+
+static int handle(MemoryBuffer &inputBuf, StringRef input) {
+ DenseMap<StringRef, Part> partToBegin;
+ StringRef lastPart, separator;
+ for (line_iterator i(inputBuf, /*SkipBlanks=*/false, '\0'); !i.is_at_eof();) {
+ const int64_t lineNo = i.line_number();
+ const StringRef line = *i++;
+ const size_t markerLen = line.startswith("//") ? 6 : 5;
+ if (!(line.size() >= markerLen &&
+ line.substr(markerLen - 4).startswith("--- ")))
+ continue;
+ separator = line.substr(0, markerLen);
+ const StringRef partName = line.substr(markerLen);
+ if (partName.empty()) {
+ error(input, lineNo, "empty part name");
+ continue;
+ }
+ if (isSpace(partName.front()) || isSpace(partName.back())) {
+ error(input, lineNo, "part name cannot have leading or trailing space");
+ continue;
+ }
+
+ auto res = partToBegin.try_emplace(partName);
+ if (!res.second) {
+ error(input, lineNo,
+ "'" + separator + partName + "' occurs more than once");
+ continue;
+ }
+ if (!lastPart.empty())
+ partToBegin[lastPart].end = line.data();
+ Part &cur = res.first->second;
+ if (!i.is_at_eof())
+ cur.begin = i->data();
+ // If --no-leading-lines is not specified, numEmptyLines is 0. Append
+ // newlines so that the extracted part preserves line numbers.
+ cur.leadingLines = noLeadingLines ? 0 : i.line_number() - 1;
+
+ lastPart = partName;
+ }
+ if (lastPart.empty())
+ fatal(input, "no part separator was found");
+ if (errorCount)
+ return 1;
+ partToBegin[lastPart].end = inputBuf.getBufferEnd();
+
+ std::vector<std::unique_ptr<ToolOutputFile>> outputFiles;
+ SmallString<256> partPath;
+ for (auto &keyValue : partToBegin) {
+ partPath.clear();
+ sys::path::append(partPath, output, keyValue.first);
+ std::error_code ec =
+ sys::fs::create_directories(sys::path::parent_path(partPath));
+ if (ec)
+ fatal(input, ec.message());
+ auto f = std::make_unique<ToolOutputFile>(partPath.str(), ec,
+ llvm::sys::fs::OF_None);
+ if (!f)
+ fatal(input, ec.message());
+
+ Part &part = keyValue.second;
+ for (int64_t i = 0; i != part.leadingLines; ++i)
+ (*f).os().write('\n');
+ if (part.begin)
+ (*f).os().write(part.begin, part.end - part.begin);
+ outputFiles.push_back(std::move(f));
+ }
+
+ for (std::unique_ptr<ToolOutputFile> &outputFile : outputFiles)
+ outputFile->keep();
+ return 0;
+}
+
+int main(int argc, const char **argv) {
+ toolName = sys::path::stem(argv[0]);
+ cl::HideUnrelatedOptions({&cat});
+ cl::ParseCommandLineOptions(
+ argc, argv,
+ "Split input into multiple parts separated by regex '^(.|//)--- ' and "
+ "extract the part specified by '^(.|//)--- <part>'\n",
+ nullptr,
+ /*EnvVar=*/nullptr,
+ /*LongOptionsUseDoubleDash=*/true);
+
+ if (input.empty())
+ fatal("", "input filename is not specified");
+ if (output.empty())
+ fatal("", "output directory is not specified");
+ ErrorOr<std::unique_ptr<MemoryBuffer>> bufferOrErr =
+ MemoryBuffer::getFileOrSTDIN(input);
+ if (std::error_code ec = bufferOrErr.getError())
+ fatal(input, ec.message());
+
+ // Delete output if it is a file or an empty directory, so that we can create
+ // a directory.
+ sys::fs::file_status status;
+ if (std::error_code ec = sys::fs::status(output, status))
+ if (ec.value() != static_cast<int>(std::errc::no_such_file_or_directory))
+ fatal(output, ec.message());
+ if (status.type() != sys::fs::file_type::file_not_found &&
+ status.type() != sys::fs::file_type::directory_file &&
+ status.type() != sys::fs::file_type::regular_file)
+ fatal(output, "output cannot be a special file");
+ if (std::error_code ec = sys::fs::remove(output, /*IgnoreNonExisting=*/true))
+ if (ec.value() != static_cast<int>(std::errc::directory_not_empty))
+ fatal(output, ec.message());
+ return handle(**bufferOrErr, input);
+}
diff --git a/llvm/utils/gn/secondary/lld/test/BUILD.gn b/llvm/utils/gn/secondary/lld/test/BUILD.gn
index 581cc5482578..bfb63a39ba65 100644
--- a/llvm/utils/gn/secondary/lld/test/BUILD.gn
+++ b/llvm/utils/gn/secondary/lld/test/BUILD.gn
@@ -94,6 +94,7 @@ group("test") {
"//llvm/tools/llvm-readobj:symlinks",
"//llvm/tools/obj2yaml",
"//llvm/tools/opt",
+ "//llvm/tools/split-file",
"//llvm/tools/yaml2obj",
"//llvm/utils/FileCheck",
"//llvm/utils/count",
diff --git a/llvm/utils/gn/secondary/llvm/test/BUILD.gn b/llvm/utils/gn/secondary/llvm/test/BUILD.gn
index 2c4a23ffbaac..c714d9b5ba7b 100644
--- a/llvm/utils/gn/secondary/llvm/test/BUILD.gn
+++ b/llvm/utils/gn/secondary/llvm/test/BUILD.gn
@@ -260,6 +260,7 @@ group("test") {
"//llvm/tools/opt",
"//llvm/tools/sancov",
"//llvm/tools/sanstats",
+ "//llvm/tools/split-file",
"//llvm/tools/verify-uselistorder",
"//llvm/tools/yaml2obj",
"//llvm/unittests",
diff --git a/llvm/utils/gn/secondary/llvm/tools/split-file/BUILD.gn b/llvm/utils/gn/secondary/llvm/tools/split-file/BUILD.gn
new file mode 100644
index 000000000000..4bf9269c3c38
--- /dev/null
+++ b/llvm/utils/gn/secondary/llvm/tools/split-file/BUILD.gn
@@ -0,0 +1,4 @@
+executable("split-file") {
+ deps = [ "//llvm/lib/Support" ]
+ sources = [ "split-file.cpp" ]
+}
More information about the llvm-commits
mailing list