[lld] d054c7e - Add test utility 'extract'
Fangrui Song via llvm-commits
llvm-commits at lists.llvm.org
Thu Jul 23 19:15:50 PDT 2020
Author: Fangrui Song
Date: 2020-07-23T19:15:35-07:00
New Revision: d054c7ee2e9f4f98af7f22a5b00a941eb919bd59
URL: https://github.com/llvm/llvm-project/commit/d054c7ee2e9f4f98af7f22a5b00a941eb919bd59
DIFF: https://github.com/llvm/llvm-project/commit/d054c7ee2e9f4f98af7f22a5b00a941eb919bd59.diff
LOG: Add test utility 'extract'
See https://lists.llvm.org/pipermail/llvm-dev/2020-July/143373.html
"[llvm-dev] Multiple documents in one test file" for some discussions.
`extract part filename` splits the input file into multiple parts separated by
regex `^(.|//)--- ` and extract the specified part to stdout or the
output file (if specified).
Use case A (organizing input of different formats (e.g. linker
script+assembly) in one file).
```
// RUN: extract lds %s -o %t.lds
// RUN: extract asm %s -o %t.s
// RUN: llvm-mc %t.s -o %t.o
// RUN: ld.lld -T %t.lds %t.o -o %t
This is sometimes better than the %S/Inputs/ approach because the user
can see the auxiliary files immediately and don't have to open another file.
```
Use case B (for utilities which don't have built-in input splitting
feature):
```
// RUN: extract case1 %s | llc | FileCheck %s --check-prefix=CASE1
// RUN: extract case2 %s | llc | FileCheck %s --check-prefix=CASE2
Combing tests prudently can improve readability.
This is sometimes better than having multiple test files.
```
Since this is a new utility, there is no git history concerns for
UpperCase variable names. I use lowerCase variable names like mlir/lld.
Reviewed By: jhenderson
Differential Revision: https://reviews.llvm.org/D83834
Added:
llvm/test/tools/extract/Inputs/basic-aa.txt
llvm/test/tools/extract/Inputs/basic-bb.txt
llvm/test/tools/extract/basic.test
llvm/test/tools/extract/help.test
llvm/test/tools/extract/no-leading-lines.test
llvm/tools/extract/.clang-tidy
llvm/tools/extract/CMakeLists.txt
llvm/tools/extract/extract.cpp
Modified:
lld/test/CMakeLists.txt
lld/test/ELF/linkerscript/noload.s
lld/test/lit.cfg.py
llvm/docs/TestingGuide.rst
llvm/test/CMakeLists.txt
llvm/test/lit.cfg.py
llvm/test/tools/gold/X86/multiple-sections.ll
llvm/test/tools/llvm-objcopy/ELF/strip-symbol.test
llvm/test/tools/llvm-strings/radix.test
Removed:
################################################################################
diff --git a/lld/test/CMakeLists.txt b/lld/test/CMakeLists.txt
index 4fbd2534b5a9..7831bb1a8de0 100644
--- a/lld/test/CMakeLists.txt
+++ b/lld/test/CMakeLists.txt
@@ -34,7 +34,7 @@ configure_lit_site_cfg(
set(LLD_TEST_DEPS lld)
if (NOT LLD_BUILT_STANDALONE)
list(APPEND LLD_TEST_DEPS
- FileCheck count llc llvm-ar llvm-as llvm-bcanalyzer llvm-config llvm-cvtres
+ FileCheck count extract llc llvm-ar llvm-as llvm-bcanalyzer llvm-config llvm-cvtres
llvm-dis llvm-dwarfdump llvm-lib llvm-lipo llvm-mc llvm-nm llvm-objcopy
llvm-objdump llvm-pdbutil llvm-readelf llvm-readobj llvm-strip not obj2yaml
opt yaml2obj
diff --git a/lld/test/ELF/linkerscript/noload.s b/lld/test/ELF/linkerscript/noload.s
index 2f52b465854e..c2014722985d 100644
--- a/lld/test/ELF/linkerscript/noload.s
+++ b/lld/test/ELF/linkerscript/noload.s
@@ -1,11 +1,7 @@
# REQUIRES: x86
-# RUN: llvm-mc -filetype=obj -triple=x86_64-unknown-linux %s -o %t.o
-# RUN: echo "SECTIONS { \
-# RUN: .data_noload_a (NOLOAD) : { *(.data_noload_a) } \
-# RUN: .data_noload_b (0x10000) (NOLOAD) : { *(.data_noload_b) } \
-# RUN: .no_input_sec_noload (NOLOAD) : { . += 1; } \
-# RUN: .text (0x20000) : { *(.text) } };" > %t.script
-# RUN: ld.lld -o %t --script %t.script %t.o
+# RUN: extract asm %s -o %t.s && extract lds %s -o %t.lds
+# RUN: llvm-mc -filetype=obj -triple=x86_64 %t.s -o %t.o
+# RUN: ld.lld -o %t --script %t.lds %t.o
# RUN: llvm-readelf -S -l %t | FileCheck %s
# CHECK: Name Type Address Off Size
@@ -16,6 +12,7 @@
# CHECK: Type Offset VirtAddr PhysAddr
# CHECK-NEXT: LOAD 0x001000 0x0000000000020000 0x0000000000020000
+#--- asm
.section .text,"ax", at progbits
nop
@@ -24,3 +21,11 @@
.section .data_noload_b,"aw", at progbits
.zero 4096
+
+#--- lds
+SECTIONS {
+ .data_noload_a (NOLOAD) : { *(.data_noload_a) }
+ .data_noload_b (0x10000) (NOLOAD) : { *(.data_noload_b) }
+ .no_input_sec_noload (NOLOAD) : { . += 1; }
+ .text (0x20000) : { *(.text) }
+}
diff --git a/lld/test/lit.cfg.py b/lld/test/lit.cfg.py
index 267f8c517858..0fa9b48c3c79 100644
--- a/lld/test/lit.cfg.py
+++ b/lld/test/lit.cfg.py
@@ -39,9 +39,9 @@
llvm_config.use_lld()
tool_patterns = [
- 'llc', 'llvm-as', 'llvm-mc', 'llvm-nm', 'llvm-objdump', 'llvm-pdbutil',
- 'llvm-dwarfdump', 'llvm-readelf', 'llvm-readobj', 'obj2yaml', 'yaml2obj',
- 'opt', 'llvm-dis']
+ 'extract', 'llc', 'llvm-as', 'llvm-mc', 'llvm-nm', 'llvm-objdump',
+ 'llvm-pdbutil', 'llvm-dwarfdump', 'llvm-readelf', 'llvm-readobj',
+ 'obj2yaml', 'yaml2obj', 'opt', 'llvm-dis']
llvm_config.add_tool_substitutions(tool_patterns)
@@ -87,7 +87,7 @@
# Indirectly check if the mt.exe Microsoft utility exists by searching for
# cvtres, which always accompanies it. Alternatively, check if we can use
# libxml2 to merge manifests.
-if (lit.util.which('cvtres', config.environment['PATH']) or
+if (lit.util.which('cvtres', config.environment['PATH']) or
config.llvm_libxml2_enabled):
config.available_features.add('manifest_tool')
diff --git a/llvm/docs/TestingGuide.rst b/llvm/docs/TestingGuide.rst
index 2e937f000627..6fd9ab2d24ca 100644
--- a/llvm/docs/TestingGuide.rst
+++ b/llvm/docs/TestingGuide.rst
@@ -271,8 +271,27 @@ adding your code there instead of creating a new file.
Extra files
-----------
-If your test requires extra files besides the file containing the ``RUN:``
-lines, the idiomatic place to put them is in a subdirectory ``Inputs``.
+If your test requires extra files besides the file containing the ``RUN:`` lines
+and the extra files are small, consider specifying them in the same file and
+using ``extract`` to extract them. For example,
+
+.. code-block:: llvm
+
+ ; RUN: extract b %s -o %tb.ll
+ ; RUN: extract a %s | llvm-link - %tb.ll -S | FileCheck %s
+
+ ; CHECK: ...
+
+ ;--- a
+ ...
+ ;--- b
+ ...
+
+The parts are separated by the regex ``^(.|//)--- <part>``. By default the
+extracted content has leading empty lines to preserve line numbers. Specify
+``--no-leading-lines`` to drop leading lines.
+
+If the extra files are large, the idiomatic place to put them is in a subdirectory ``Inputs``.
You can then refer to the extra files as ``%S/Inputs/foo.bar``.
For example, consider ``test/Linker/ident.ll``. The directory structure is
diff --git a/llvm/test/CMakeLists.txt b/llvm/test/CMakeLists.txt
index 6994c29efa9a..529c06c82b24 100644
--- a/llvm/test/CMakeLists.txt
+++ b/llvm/test/CMakeLists.txt
@@ -52,6 +52,7 @@ set(LLVM_TEST_DEPENDS
UnitTests
bugpoint
count
+ extract
llc
lli
lli-child-target
diff --git a/llvm/test/lit.cfg.py b/llvm/test/lit.cfg.py
index 0a3289fcc4ad..49bd8ddfb2dc 100644
--- a/llvm/test/lit.cfg.py
+++ b/llvm/test/lit.cfg.py
@@ -130,6 +130,7 @@ def get_asan_rtlib():
config.llvm_locstats_used = os.path.exists(llvm_locstats_tool)
tools = [
+ ToolSubst('%extract', FindTool('extract')),
ToolSubst('%lli', FindTool('lli'), post='.', extra_args=lli_args),
ToolSubst('%llc_dwarf', FindTool('llc'), extra_args=llc_args),
ToolSubst('%go', config.go_executable, unresolved='ignore'),
diff --git a/llvm/test/tools/extract/Inputs/basic-aa.txt b/llvm/test/tools/extract/Inputs/basic-aa.txt
new file mode 100644
index 000000000000..9eac3fdccbee
--- /dev/null
+++ b/llvm/test/tools/extract/Inputs/basic-aa.txt
@@ -0,0 +1,6 @@
+
+
+
+aa
+; BB-NOT: {{.}}
+; BB: {{^}}bb{{$}}
diff --git a/llvm/test/tools/extract/Inputs/basic-bb.txt b/llvm/test/tools/extract/Inputs/basic-bb.txt
new file mode 100644
index 000000000000..de17efab6fb6
--- /dev/null
+++ b/llvm/test/tools/extract/Inputs/basic-bb.txt
@@ -0,0 +1,10 @@
+
+
+
+
+
+
+
+bb
+
+// CC: // Comments are preserved.
diff --git a/llvm/test/tools/extract/basic.test b/llvm/test/tools/extract/basic.test
new file mode 100644
index 000000000000..9f9413106cc7
--- /dev/null
+++ b/llvm/test/tools/extract/basic.test
@@ -0,0 +1,32 @@
+# AA-NOT: {{.}}
+# AA: {{^}}aa{{$}}
+#--- aa
+aa
+; BB-NOT: {{.}}
+; BB: {{^}}bb{{$}}
+;--- bb
+bb
+
+// CC: // Comments are preserved.
+//--- cc
+cc
+// Comments are preserved.
+;--- dup
+;--- dup
+
+# RUN: extract aa %s |
diff %S/Inputs/basic-aa.txt -
+# RUN: extract bb - < %s |
diff %S/Inputs/basic-bb.txt -
+# RUN: extract cc %s -o %t
+# RUN: FileCheck %s --check-prefix=CC < %t
+
+# RUN: not %extract aa 2>&1 | FileCheck %s --check-prefix=NO_INPUT
+
+# NO_INPUT: extract: error: input filename is not specified
+
+# RUN: not %extract dup %s 2>&1 | FileCheck %s --check-prefix=DUP
+
+# DUP: extract: error: {{.*}}.test: ';--- dup' occurs more than once
+
+# RUN: not %extract not_exist %s 2>&1 | FileCheck %s --check-prefix=NOT_EXIST
+
+# NOT_EXIST: extract: error: {{.*}}.test: ';--- not_exist' was not found
diff --git a/llvm/test/tools/extract/help.test b/llvm/test/tools/extract/help.test
new file mode 100644
index 000000000000..282052869116
--- /dev/null
+++ b/llvm/test/tools/extract/help.test
@@ -0,0 +1,5 @@
+RUN: extract --help 2>&1 | FileCheck --implicit-check-not='General Options:' %s
+CHECK: OVERVIEW: Split input {{.*}}
+CHECK: Generic Options:
+CHECK: extract Options:
+CHECK: -o
diff --git a/llvm/test/tools/extract/no-leading-lines.test b/llvm/test/tools/extract/no-leading-lines.test
new file mode 100644
index 000000000000..f0efff5475af
--- /dev/null
+++ b/llvm/test/tools/extract/no-leading-lines.test
@@ -0,0 +1,10 @@
+## With --no-leading-lines, don't add leading lines (which is used to preserve line numbers).
+
+# RUN: extract --no-leading-lines input %s -o %t
+# RUN: count 1 < %t
+# RUN: FileCheck %s < %t
+
+# CHECK: input
+
+#--- input
+input
diff --git a/llvm/test/tools/gold/X86/multiple-sections.ll b/llvm/test/tools/gold/X86/multiple-sections.ll
index facbd8d992ed..31a89a9d3b48 100644
--- a/llvm/test/tools/gold/X86/multiple-sections.ll
+++ b/llvm/test/tools/gold/X86/multiple-sections.ll
@@ -1,10 +1,8 @@
-; RUN: echo ".text.tin" > %t_order_lto.txt
-; RUN: echo ".text._start" >> %t_order_lto.txt
-; RUN: echo ".text.pat" >> %t_order_lto.txt
-; RUN: llvm-as %s -o %t.o
+; RUN: extract order %s -o %t.order
+; RUN: extract ir %s | llvm-as -o %t.o
; RUN: %gold -plugin %llvmshlibdir/LLVMgold%shlibext \
; RUN: -m elf_x86_64 -o %t.exe %t.o \
-; RUN: --section-ordering-file=%t_order_lto.txt
+; RUN: --section-ordering-file=%t.order
; RUN: llvm-readelf -s %t.exe | FileCheck %s
; Check that the order of the sections is tin -> _start -> pat.
@@ -13,6 +11,12 @@
; CHECK: 00000000004000b0 1 FUNC LOCAL DEFAULT 1 tin
; CHECK: 00000000004000c0 15 FUNC GLOBAL DEFAULT 1 _start
+;--- order
+.text.tin
+.text._start
+.text.pat
+
+;--- ir
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux-gnu"
diff --git a/llvm/test/tools/llvm-objcopy/ELF/strip-symbol.test b/llvm/test/tools/llvm-objcopy/ELF/strip-symbol.test
index 78de46cc47b5..ad71e81eab83 100644
--- a/llvm/test/tools/llvm-objcopy/ELF/strip-symbol.test
+++ b/llvm/test/tools/llvm-objcopy/ELF/strip-symbol.test
@@ -1,19 +1,24 @@
-# RUN: yaml2obj %s -o %t
+# RUN: extract yaml %s | yaml2obj - -o %t
# RUN: llvm-objcopy --strip-symbol baz -N bar %t %t2
# RUN: llvm-readobj --symbols --sections %t2 | FileCheck %s
# RUN: llvm-strip --strip-symbol baz -N bar %t -o %t3
# RUN: cmp %t2 %t3
# RUN: llvm-strip --regex --strip-symbol '^b.*' -N bar %t -o %t4
# RUN: cmp %t3 %t4
-# RUN: echo " bar # bar" > %t-list.txt
-# RUN: echo " baz # baz" >> %t-list.txt
-# RUN: echo " # no symbol" >> %t-list.txt
-# RUN: llvm-objcopy --strip-symbols %t-list.txt %t %t5
+# RUN: extract list1 %s -o %t-list.txt && llvm-objcopy --strip-symbols %t-list.txt %t %t5
# RUN: cmp %t3 %t5
-# RUN: echo "b.* # bar & baz" > %t-list2.txt
-# RUN: llvm-objcopy --regex --strip-symbols %t-list2.txt %t %t6
+# RUN: extract list2 %s -o %t-list2.txt && llvm-objcopy --regex --strip-symbols %t-list2.txt %t %t6
# RUN: cmp %t3 %t6
+#--- list1
+bar # bar
+baz # baz
+# no symbol
+
+#--- list2
+b.* # bar & baz
+
+#--- yaml
!ELF
FileHeader:
Class: ELFCLASS64
diff --git a/llvm/test/tools/llvm-strings/radix.test b/llvm/test/tools/llvm-strings/radix.test
index d23fb3cddc8f..d9796a937d90 100644
--- a/llvm/test/tools/llvm-strings/radix.test
+++ b/llvm/test/tools/llvm-strings/radix.test
@@ -1,15 +1,18 @@
## Show that llvm-strings can handle the -t/--radix switch properly.
-RUN: echo one > %t
-RUN: echo two >> %t
-RUN: echo three >> %t
-RUN: echo four >> %t
-RUN: echo five >> %t
-RUN: echo six >> %t
-RUN: echo seven >> %t
-RUN: echo eight >> %t
-RUN: echo nine >> %t
-RUN: echo ten >> %t
+RUN: extract --no-leading-lines input %s -o %t
+#--- input
+one
+two
+three
+four
+five
+six
+seven
+eight
+nine
+ten
+#--- end
RUN: llvm-strings %t | FileCheck %s -check-prefix CHECK-NONE --implicit-check-not={{.}}
RUN: llvm-strings -t d %t | FileCheck %s -check-prefix CHECK-DEC --strict-whitespace --implicit-check-not={{.}}
diff --git a/llvm/tools/extract/.clang-tidy b/llvm/tools/extract/.clang-tidy
new file mode 100644
index 000000000000..87ec2ff53af6
--- /dev/null
+++ b/llvm/tools/extract/.clang-tidy
@@ -0,0 +1,19 @@
+# Almost identical to the top-level .clang-tidy, except that {Member,Parameter,Variable}Case use camelBack.
+Checks: '-*,clang-diagnostic-*,llvm-*,misc-*,-misc-unused-parameters,-misc-non-private-member-variables-in-classes,readability-identifier-naming'
+CheckOptions:
+ - key: readability-identifier-naming.ClassCase
+ value: CamelCase
+ - key: readability-identifier-naming.EnumCase
+ value: CamelCase
+ - key: readability-identifier-naming.FunctionCase
+ value: camelBack
+ - key: readability-identifier-naming.MemberCase
+ value: camelBack
+ - key: readability-identifier-naming.ParameterCase
+ value: camelBack
+ - key: readability-identifier-naming.UnionCase
+ value: CamelCase
+ - key: readability-identifier-naming.VariableCase
+ value: camelBack
+ - key: readability-identifier-naming.IgnoreMainLikeFunctions
+ value: 1
diff --git a/llvm/tools/extract/CMakeLists.txt b/llvm/tools/extract/CMakeLists.txt
new file mode 100644
index 000000000000..dae1f463f066
--- /dev/null
+++ b/llvm/tools/extract/CMakeLists.txt
@@ -0,0 +1,7 @@
+set(LLVM_LINK_COMPONENTS
+ Support
+ )
+
+add_llvm_tool(extract
+ extract.cpp
+ )
diff --git a/llvm/tools/extract/extract.cpp b/llvm/tools/extract/extract.cpp
new file mode 100644
index 000000000000..8ccb53915614
--- /dev/null
+++ b/llvm/tools/extract/extract.cpp
@@ -0,0 +1,113 @@
+//===- extract.cpp - Input splitting utility ------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// Split input into multipe parts separated by regex '^(.|//)--- ' and extract
+// the specified part.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/StringRef.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/FileOutputBuffer.h"
+#include "llvm/Support/LineIterator.h"
+#include "llvm/Support/MemoryBuffer.h"
+#include "llvm/Support/Path.h"
+#include "llvm/Support/WithColor.h"
+#include <string>
+
+using namespace llvm;
+
+static cl::OptionCategory cat("extract Options");
+
+static cl::opt<std::string> part(cl::Positional, cl::desc("part"),
+ cl::cat(cat));
+
+static cl::opt<std::string> input(cl::Positional, cl::desc("filename"),
+ cl::cat(cat));
+
+static cl::opt<std::string> output("o", cl::desc("Output filename"),
+ cl::value_desc("filename"), cl::init("-"),
+ cl::cat(cat));
+
+static cl::opt<bool> noLeadingLines("no-leading-lines",
+ cl::desc("Don't preserve line numbers"),
+ cl::cat(cat));
+
+static StringRef toolName;
+
+LLVM_ATTRIBUTE_NORETURN static void error(StringRef filename,
+ const Twine &message) {
+ if (filename.empty())
+ WithColor::error(errs(), toolName) << message << '\n';
+ else
+ WithColor::error(errs(), toolName) << filename << ": " << message << '\n';
+ exit(1);
+}
+
+static void handle(MemoryBuffer &inputBuf, StringRef input) {
+ const char *partBegin = nullptr, *partEnd = nullptr;
+ int numEmptyLines = 0;
+ StringRef separator;
+ for (line_iterator i(inputBuf, /*SkipBlanks=*/false, '\0'); !i.is_at_eof();) {
+ StringRef line = *i++;
+ size_t markerLen = line.startswith("//") ? 6 : 5;
+ if (!(line.size() > markerLen &&
+ line.substr(markerLen - 4).startswith("--- ")))
+ continue;
+ separator = line.substr(0, markerLen);
+ StringRef cur = line.substr(markerLen);
+ if (cur == part) {
+ if (partBegin)
+ error(input, "'" + separator + cur + "' occurs more than once");
+ if (!noLeadingLines)
+ numEmptyLines = i.line_number() - 1;
+ if (i.is_at_eof())
+ break;
+ partBegin = i->data();
+ } else if (partBegin && !partEnd) {
+ partEnd = line.data();
+ }
+ }
+ if (!partBegin)
+ error(input, "'" + separator + part + "' was not found");
+ if (!partEnd)
+ partEnd = inputBuf.getBufferEnd();
+
+ Expected<std::unique_ptr<FileOutputBuffer>> outputBuf =
+ FileOutputBuffer::create(output, numEmptyLines + (partEnd - partBegin));
+ if (!outputBuf)
+ error(input, toString(outputBuf.takeError()));
+ uint8_t *buf = (*outputBuf)->getBufferStart();
+
+ // If --no-leading-lines is not specified, numEmptyLines is 0. Append newlines
+ // so that the extracted part preserves line numbers.
+ std::fill_n(buf, numEmptyLines, '\n');
+ std::copy(partBegin, partEnd, buf + numEmptyLines);
+ if (Error e = (*outputBuf)->commit())
+ error(input, toString(std::move(e)));
+}
+
+int main(int argc, const char **argv) {
+ toolName = sys::path::stem(argv[0]);
+ cl::HideUnrelatedOptions({&cat});
+ cl::ParseCommandLineOptions(
+ argc, argv,
+ "Split input into multiple parts separated by regex '^(.|//)--- ' and "
+ "extract the part specified by '^(.|//)--- <part>'\n",
+ nullptr,
+ /*EnvVar=*/nullptr,
+ /*LongOptionsUseDoubleDash=*/true);
+
+ if (input.empty())
+ error("", "input filename is not specified");
+ ErrorOr<std::unique_ptr<MemoryBuffer>> bufferOrErr =
+ MemoryBuffer::getFileOrSTDIN(input);
+ if (std::error_code ec = bufferOrErr.getError())
+ error(input, ec.message());
+ handle(**bufferOrErr, input);
+}
More information about the llvm-commits
mailing list