[lld] d054c7e - Add test utility 'extract'

Thu Jul 23 19:15:50 PDT 2020

Author: Fangrui Song
Date: 2020-07-23T19:15:35-07:00
New Revision: d054c7ee2e9f4f98af7f22a5b00a941eb919bd59

URL: https://github.com/llvm/llvm-project/commit/d054c7ee2e9f4f98af7f22a5b00a941eb919bd59
DIFF: https://github.com/llvm/llvm-project/commit/d054c7ee2e9f4f98af7f22a5b00a941eb919bd59.diff

LOG: Add test utility 'extract'

See https://lists.llvm.org/pipermail/llvm-dev/2020-July/143373.html
"[llvm-dev] Multiple documents in one test file" for some discussions.

`extract part filename` splits the input file into multiple parts separated by
regex `^(.|//)--- ` and extract the specified part to stdout or the
output file (if specified).

Use case A (organizing input of different formats (e.g. linker
script+assembly) in one file).

```
// RUN: extract lds %s -o %t.lds
// RUN: extract asm %s -o %t.s
// RUN: llvm-mc %t.s -o %t.o
// RUN: ld.lld -T %t.lds %t.o -o %t
This is sometimes better than the %S/Inputs/ approach because the user
can see the auxiliary files immediately and don't have to open another file.
```

Use case B (for utilities which don't have built-in input splitting
feature):

```
// RUN: extract case1 %s | llc | FileCheck %s --check-prefix=CASE1
// RUN: extract case2 %s | llc | FileCheck %s --check-prefix=CASE2
Combing tests prudently can improve readability.
This is sometimes better than having multiple test files.
```

Since this is a new utility, there is no git history concerns for
UpperCase variable names. I use lowerCase variable names like mlir/lld.

Reviewed By: jhenderson

Differential Revision: https://reviews.llvm.org/D83834

Added: 
    llvm/test/tools/extract/Inputs/basic-aa.txt
    llvm/test/tools/extract/Inputs/basic-bb.txt
    llvm/test/tools/extract/basic.test
    llvm/test/tools/extract/help.test
    llvm/test/tools/extract/no-leading-lines.test
    llvm/tools/extract/.clang-tidy
    llvm/tools/extract/CMakeLists.txt
    llvm/tools/extract/extract.cpp

Modified: 
    lld/test/CMakeLists.txt
    lld/test/ELF/linkerscript/noload.s
    lld/test/lit.cfg.py
    llvm/docs/TestingGuide.rst
    llvm/test/CMakeLists.txt
    llvm/test/lit.cfg.py
    llvm/test/tools/gold/X86/multiple-sections.ll
    llvm/test/tools/llvm-objcopy/ELF/strip-symbol.test
    llvm/test/tools/llvm-strings/radix.test

Removed: 
    


################################################################################
diff  --git a/lld/test/CMakeLists.txt b/lld/test/CMakeLists.txt
index 4fbd2534b5a9..7831bb1a8de0 100644

--- a/lld/test/CMakeLists.txt
+++ b/lld/test/CMakeLists.txt
@@ -34,7 +34,7 @@ configure_lit_site_cfg(
 set(LLD_TEST_DEPS lld)
 if (NOT LLD_BUILT_STANDALONE)
   list(APPEND LLD_TEST_DEPS
-    FileCheck count llc llvm-ar llvm-as llvm-bcanalyzer llvm-config llvm-cvtres
+    FileCheck count extract llc llvm-ar llvm-as llvm-bcanalyzer llvm-config llvm-cvtres
     llvm-dis llvm-dwarfdump llvm-lib llvm-lipo llvm-mc llvm-nm llvm-objcopy
     llvm-objdump llvm-pdbutil llvm-readelf llvm-readobj llvm-strip not obj2yaml
     opt yaml2obj

diff  --git a/lld/test/ELF/linkerscript/noload.s b/lld/test/ELF/linkerscript/noload.s
index 2f52b465854e..c2014722985d 100644
--- a/lld/test/ELF/linkerscript/noload.s
+++ b/lld/test/ELF/linkerscript/noload.s
@@ -1,11 +1,7 @@
 # REQUIRES: x86
-# RUN: llvm-mc -filetype=obj -triple=x86_64-unknown-linux %s -o %t.o
-# RUN: echo "SECTIONS { \
-# RUN:        .data_noload_a (NOLOAD) : { *(.data_noload_a) } \
-# RUN:        .data_noload_b (0x10000) (NOLOAD) : { *(.data_noload_b) } \
-# RUN:        .no_input_sec_noload (NOLOAD) : { . += 1; } \
-# RUN:        .text (0x20000) : { *(.text) } };" > %t.script
-# RUN: ld.lld -o %t --script %t.script %t.o
+# RUN: extract asm %s -o %t.s && extract lds %s -o %t.lds
+# RUN: llvm-mc -filetype=obj -triple=x86_64 %t.s -o %t.o
+# RUN: ld.lld -o %t --script %t.lds %t.o
 # RUN: llvm-readelf -S -l %t | FileCheck %s
 
 # CHECK:      Name                 Type   Address          Off               Size
@@ -16,6 +12,7 @@
 # CHECK:      Type Offset   VirtAddr           PhysAddr
 # CHECK-NEXT: LOAD 0x001000 0x0000000000020000 0x0000000000020000
 
+#--- asm
 .section .text,"ax", at progbits
   nop
 
@@ -24,3 +21,11 @@
 
 .section .data_noload_b,"aw", at progbits
 .zero 4096
+
+#--- lds
+SECTIONS {
+  .data_noload_a (NOLOAD) : { *(.data_noload_a) }
+  .data_noload_b (0x10000) (NOLOAD) : { *(.data_noload_b) }
+  .no_input_sec_noload (NOLOAD) : { . += 1; }
+  .text (0x20000) : { *(.text) }
+}

diff  --git a/lld/test/lit.cfg.py b/lld/test/lit.cfg.py
index 267f8c517858..0fa9b48c3c79 100644
--- a/lld/test/lit.cfg.py
+++ b/lld/test/lit.cfg.py
@@ -39,9 +39,9 @@
 llvm_config.use_lld()
 
 tool_patterns = [
-    'llc', 'llvm-as', 'llvm-mc', 'llvm-nm', 'llvm-objdump', 'llvm-pdbutil',
-    'llvm-dwarfdump', 'llvm-readelf', 'llvm-readobj', 'obj2yaml', 'yaml2obj',
-    'opt', 'llvm-dis']
+    'extract', 'llc', 'llvm-as', 'llvm-mc', 'llvm-nm', 'llvm-objdump',
+    'llvm-pdbutil', 'llvm-dwarfdump', 'llvm-readelf', 'llvm-readobj',
+    'obj2yaml', 'yaml2obj', 'opt', 'llvm-dis']
 
 llvm_config.add_tool_substitutions(tool_patterns)
 
@@ -87,7 +87,7 @@
 # Indirectly check if the mt.exe Microsoft utility exists by searching for
 # cvtres, which always accompanies it.  Alternatively, check if we can use
 # libxml2 to merge manifests.
-if (lit.util.which('cvtres', config.environment['PATH']) or 
+if (lit.util.which('cvtres', config.environment['PATH']) or
         config.llvm_libxml2_enabled):
     config.available_features.add('manifest_tool')
 

diff  --git a/llvm/docs/TestingGuide.rst b/llvm/docs/TestingGuide.rst
index 2e937f000627..6fd9ab2d24ca 100644
--- a/llvm/docs/TestingGuide.rst
+++ b/llvm/docs/TestingGuide.rst
@@ -271,8 +271,27 @@ adding your code there instead of creating a new file.
 Extra files
 -----------
 
-If your test requires extra files besides the file containing the ``RUN:``
-lines, the idiomatic place to put them is in a subdirectory ``Inputs``.
+If your test requires extra files besides the file containing the ``RUN:`` lines
+and the extra files are small, consider specifying them in the same file and
+using ``extract`` to extract them. For example,
+
+.. code-block:: llvm
+
+  ; RUN: extract b %s -o %tb.ll
+  ; RUN: extract a %s | llvm-link - %tb.ll -S | FileCheck %s
+
+  ; CHECK: ...
+
+  ;--- a
+  ...
+  ;--- b
+  ...
+
+The parts are separated by the regex ``^(.|//)--- <part>``. By default the
+extracted content has leading empty lines to preserve line numbers. Specify
+``--no-leading-lines`` to drop leading lines.
+
+If the extra files are large, the idiomatic place to put them is in a subdirectory ``Inputs``.
 You can then refer to the extra files as ``%S/Inputs/foo.bar``.
 
 For example, consider ``test/Linker/ident.ll``. The directory structure is

diff  --git a/llvm/test/CMakeLists.txt b/llvm/test/CMakeLists.txt
index 6994c29efa9a..529c06c82b24 100644
--- a/llvm/test/CMakeLists.txt
+++ b/llvm/test/CMakeLists.txt
@@ -52,6 +52,7 @@ set(LLVM_TEST_DEPENDS
           UnitTests
           bugpoint
           count
+          extract
           llc
           lli
           lli-child-target

diff  --git a/llvm/test/lit.cfg.py b/llvm/test/lit.cfg.py
index 0a3289fcc4ad..49bd8ddfb2dc 100644
--- a/llvm/test/lit.cfg.py
+++ b/llvm/test/lit.cfg.py
@@ -130,6 +130,7 @@ def get_asan_rtlib():
 config.llvm_locstats_used = os.path.exists(llvm_locstats_tool)
 
 tools = [
+    ToolSubst('%extract', FindTool('extract')),
     ToolSubst('%lli', FindTool('lli'), post='.', extra_args=lli_args),
     ToolSubst('%llc_dwarf', FindTool('llc'), extra_args=llc_args),
     ToolSubst('%go', config.go_executable, unresolved='ignore'),

diff  --git a/llvm/test/tools/extract/Inputs/basic-aa.txt b/llvm/test/tools/extract/Inputs/basic-aa.txt
new file mode 100644
index 000000000000..9eac3fdccbee
--- /dev/null
+++ b/llvm/test/tools/extract/Inputs/basic-aa.txt
@@ -0,0 +1,6 @@
+
+
+
+aa
+; BB-NOT: {{.}}
+; BB: {{^}}bb{{$}}

diff  --git a/llvm/test/tools/extract/Inputs/basic-bb.txt b/llvm/test/tools/extract/Inputs/basic-bb.txt
new file mode 100644
index 000000000000..de17efab6fb6
--- /dev/null
+++ b/llvm/test/tools/extract/Inputs/basic-bb.txt
@@ -0,0 +1,10 @@
+
+
+
+
+
+
+
+bb
+
+// CC: // Comments are preserved.

diff  --git a/llvm/test/tools/extract/basic.test b/llvm/test/tools/extract/basic.test
new file mode 100644
index 000000000000..9f9413106cc7
--- /dev/null
+++ b/llvm/test/tools/extract/basic.test
@@ -0,0 +1,32 @@
+# AA-NOT: {{.}}
+# AA: {{^}}aa{{$}}
+#--- aa
+aa
+; BB-NOT: {{.}}
+; BB: {{^}}bb{{$}}
+;--- bb
+bb
+
+// CC: // Comments are preserved.
+//--- cc
+cc
+// Comments are preserved.
+;--- dup
+;--- dup
+
+# RUN: extract aa %s | 
diff  %S/Inputs/basic-aa.txt -
+# RUN: extract bb - < %s | 
diff  %S/Inputs/basic-bb.txt -
+# RUN: extract cc %s -o %t
+# RUN: FileCheck %s --check-prefix=CC < %t
+
+# RUN: not %extract aa 2>&1 | FileCheck %s --check-prefix=NO_INPUT
+
+# NO_INPUT: extract: error: input filename is not specified
+
+# RUN: not %extract dup %s 2>&1 | FileCheck %s --check-prefix=DUP
+
+# DUP: extract: error: {{.*}}.test: ';--- dup' occurs more than once
+
+# RUN: not %extract not_exist %s 2>&1 | FileCheck %s --check-prefix=NOT_EXIST
+
+# NOT_EXIST: extract: error: {{.*}}.test: ';--- not_exist' was not found

diff  --git a/llvm/test/tools/extract/help.test b/llvm/test/tools/extract/help.test
new file mode 100644
index 000000000000..282052869116
--- /dev/null
+++ b/llvm/test/tools/extract/help.test
@@ -0,0 +1,5 @@
+RUN: extract --help 2>&1 | FileCheck --implicit-check-not='General Options:' %s
+CHECK: OVERVIEW: Split input {{.*}}
+CHECK: Generic Options:
+CHECK: extract Options:
+CHECK:   -o

diff  --git a/llvm/test/tools/extract/no-leading-lines.test b/llvm/test/tools/extract/no-leading-lines.test
new file mode 100644
index 000000000000..f0efff5475af
--- /dev/null
+++ b/llvm/test/tools/extract/no-leading-lines.test
@@ -0,0 +1,10 @@
+## With --no-leading-lines, don't add leading lines (which is used to preserve line numbers).
+
+# RUN: extract --no-leading-lines input %s -o %t
+# RUN: count 1 < %t
+# RUN: FileCheck %s < %t
+
+# CHECK: input
+
+#--- input
+input

diff  --git a/llvm/test/tools/gold/X86/multiple-sections.ll b/llvm/test/tools/gold/X86/multiple-sections.ll
index facbd8d992ed..31a89a9d3b48 100644
--- a/llvm/test/tools/gold/X86/multiple-sections.ll
+++ b/llvm/test/tools/gold/X86/multiple-sections.ll
@@ -1,10 +1,8 @@
-; RUN: echo ".text.tin" > %t_order_lto.txt
-; RUN: echo ".text._start" >> %t_order_lto.txt
-; RUN: echo ".text.pat" >> %t_order_lto.txt
-; RUN: llvm-as %s -o %t.o
+; RUN: extract order %s -o %t.order
+; RUN: extract ir %s | llvm-as -o %t.o
 ; RUN: %gold -plugin %llvmshlibdir/LLVMgold%shlibext \
 ; RUN:     -m elf_x86_64 -o %t.exe %t.o \
-; RUN:     --section-ordering-file=%t_order_lto.txt
+; RUN:     --section-ordering-file=%t.order
 ; RUN: llvm-readelf -s %t.exe | FileCheck %s
 
 ; Check that the order of the sections is tin -> _start -> pat.
@@ -13,6 +11,12 @@
 ; CHECK:      00000000004000b0     1 FUNC    LOCAL  DEFAULT    1 tin
 ; CHECK:      00000000004000c0    15 FUNC    GLOBAL DEFAULT    1 _start
 
+;--- order
+.text.tin
+.text._start
+.text.pat
+
+;--- ir
 target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
 target triple = "x86_64-unknown-linux-gnu"
 

diff  --git a/llvm/test/tools/llvm-objcopy/ELF/strip-symbol.test b/llvm/test/tools/llvm-objcopy/ELF/strip-symbol.test
index 78de46cc47b5..ad71e81eab83 100644
--- a/llvm/test/tools/llvm-objcopy/ELF/strip-symbol.test
+++ b/llvm/test/tools/llvm-objcopy/ELF/strip-symbol.test
@@ -1,19 +1,24 @@
-# RUN: yaml2obj %s -o %t
+# RUN: extract yaml %s | yaml2obj - -o %t
 # RUN: llvm-objcopy --strip-symbol baz -N bar %t %t2
 # RUN: llvm-readobj --symbols --sections %t2 | FileCheck %s
 # RUN: llvm-strip --strip-symbol baz -N bar %t -o %t3
 # RUN: cmp %t2 %t3
 # RUN: llvm-strip --regex --strip-symbol '^b.*' -N bar %t -o %t4
 # RUN: cmp %t3 %t4
-# RUN: echo " bar # bar" > %t-list.txt
-# RUN: echo " baz # baz" >> %t-list.txt
-# RUN: echo " # no symbol" >> %t-list.txt
-# RUN: llvm-objcopy --strip-symbols %t-list.txt %t %t5
+# RUN: extract list1 %s -o %t-list.txt && llvm-objcopy --strip-symbols %t-list.txt %t %t5
 # RUN: cmp %t3 %t5
-# RUN: echo "b.* # bar & baz" > %t-list2.txt
-# RUN: llvm-objcopy --regex --strip-symbols %t-list2.txt %t %t6
+# RUN: extract list2 %s -o %t-list2.txt && llvm-objcopy --regex --strip-symbols %t-list2.txt %t %t6
 # RUN: cmp %t3 %t6
 
+#--- list1
+bar # bar
+baz # baz
+# no symbol
+
+#--- list2
+b.* # bar & baz
+
+#--- yaml
 !ELF
 FileHeader:
   Class:           ELFCLASS64

diff  --git a/llvm/test/tools/llvm-strings/radix.test b/llvm/test/tools/llvm-strings/radix.test
index d23fb3cddc8f..d9796a937d90 100644
--- a/llvm/test/tools/llvm-strings/radix.test
+++ b/llvm/test/tools/llvm-strings/radix.test
@@ -1,15 +1,18 @@
 ## Show that llvm-strings can handle the -t/--radix switch properly.
 
-RUN: echo one > %t
-RUN: echo two >> %t
-RUN: echo three >> %t
-RUN: echo four >> %t
-RUN: echo five >> %t
-RUN: echo six >> %t
-RUN: echo seven >> %t
-RUN: echo eight >> %t
-RUN: echo nine >> %t
-RUN: echo ten >> %t
+RUN: extract --no-leading-lines input %s -o %t
+#--- input
+one
+two
+three
+four
+five
+six
+seven
+eight
+nine
+ten
+#--- end
 
 RUN: llvm-strings %t | FileCheck %s -check-prefix CHECK-NONE --implicit-check-not={{.}}
 RUN: llvm-strings -t d %t | FileCheck %s -check-prefix CHECK-DEC --strict-whitespace --implicit-check-not={{.}}

diff  --git a/llvm/tools/extract/.clang-tidy b/llvm/tools/extract/.clang-tidy
new file mode 100644
index 000000000000..87ec2ff53af6
--- /dev/null
+++ b/llvm/tools/extract/.clang-tidy
@@ -0,0 +1,19 @@
+# Almost identical to the top-level .clang-tidy, except that {Member,Parameter,Variable}Case use camelBack.
+Checks: '-*,clang-diagnostic-*,llvm-*,misc-*,-misc-unused-parameters,-misc-non-private-member-variables-in-classes,readability-identifier-naming'
+CheckOptions:
+  - key:             readability-identifier-naming.ClassCase
+    value:           CamelCase
+  - key:             readability-identifier-naming.EnumCase
+    value:           CamelCase
+  - key:             readability-identifier-naming.FunctionCase
+    value:           camelBack
+  - key:             readability-identifier-naming.MemberCase
+    value:           camelBack
+  - key:             readability-identifier-naming.ParameterCase
+    value:           camelBack
+  - key:             readability-identifier-naming.UnionCase
+    value:           CamelCase
+  - key:             readability-identifier-naming.VariableCase
+    value:           camelBack
+  - key:             readability-identifier-naming.IgnoreMainLikeFunctions
+    value:           1

diff  --git a/llvm/tools/extract/CMakeLists.txt b/llvm/tools/extract/CMakeLists.txt
new file mode 100644
index 000000000000..dae1f463f066
--- /dev/null
+++ b/llvm/tools/extract/CMakeLists.txt
@@ -0,0 +1,7 @@
+set(LLVM_LINK_COMPONENTS
+  Support
+  )
+
+add_llvm_tool(extract
+  extract.cpp
+  )

diff  --git a/llvm/tools/extract/extract.cpp b/llvm/tools/extract/extract.cpp
new file mode 100644
index 000000000000..8ccb53915614
--- /dev/null
+++ b/llvm/tools/extract/extract.cpp
@@ -0,0 +1,113 @@
+//===- extract.cpp - Input splitting utility ------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// Split input into multipe parts separated by regex '^(.|//)--- ' and extract
+// the specified part.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/StringRef.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/FileOutputBuffer.h"
+#include "llvm/Support/LineIterator.h"
+#include "llvm/Support/MemoryBuffer.h"
+#include "llvm/Support/Path.h"
+#include "llvm/Support/WithColor.h"
+#include <string>
+
+using namespace llvm;
+
+static cl::OptionCategory cat("extract Options");
+
+static cl::opt<std::string> part(cl::Positional, cl::desc("part"),
+                                 cl::cat(cat));
+
+static cl::opt<std::string> input(cl::Positional, cl::desc("filename"),
+                                  cl::cat(cat));
+
+static cl::opt<std::string> output("o", cl::desc("Output filename"),
+                                   cl::value_desc("filename"), cl::init("-"),
+                                   cl::cat(cat));
+
+static cl::opt<bool> noLeadingLines("no-leading-lines",
+                                    cl::desc("Don't preserve line numbers"),
+                                    cl::cat(cat));
+
+static StringRef toolName;
+
+LLVM_ATTRIBUTE_NORETURN static void error(StringRef filename,
+                                          const Twine &message) {
+  if (filename.empty())
+    WithColor::error(errs(), toolName) << message << '\n';
+  else
+    WithColor::error(errs(), toolName) << filename << ": " << message << '\n';
+  exit(1);
+}
+
+static void handle(MemoryBuffer &inputBuf, StringRef input) {
+  const char *partBegin = nullptr, *partEnd = nullptr;
+  int numEmptyLines = 0;
+  StringRef separator;
+  for (line_iterator i(inputBuf, /*SkipBlanks=*/false, '\0'); !i.is_at_eof();) {
+    StringRef line = *i++;
+    size_t markerLen = line.startswith("//") ? 6 : 5;
+    if (!(line.size() > markerLen &&
+          line.substr(markerLen - 4).startswith("--- ")))
+      continue;
+    separator = line.substr(0, markerLen);
+    StringRef cur = line.substr(markerLen);
+    if (cur == part) {
+      if (partBegin)
+        error(input, "'" + separator + cur + "' occurs more than once");
+      if (!noLeadingLines)
+        numEmptyLines = i.line_number() - 1;
+      if (i.is_at_eof())
+        break;
+      partBegin = i->data();
+    } else if (partBegin && !partEnd) {
+      partEnd = line.data();
+    }
+  }
+  if (!partBegin)
+    error(input, "'" + separator + part + "' was not found");
+  if (!partEnd)
+    partEnd = inputBuf.getBufferEnd();
+
+  Expected<std::unique_ptr<FileOutputBuffer>> outputBuf =
+      FileOutputBuffer::create(output, numEmptyLines + (partEnd - partBegin));
+  if (!outputBuf)
+    error(input, toString(outputBuf.takeError()));
+  uint8_t *buf = (*outputBuf)->getBufferStart();
+
+  // If --no-leading-lines is not specified, numEmptyLines is 0. Append newlines
+  // so that the extracted part preserves line numbers.
+  std::fill_n(buf, numEmptyLines, '\n');
+  std::copy(partBegin, partEnd, buf + numEmptyLines);
+  if (Error e = (*outputBuf)->commit())
+    error(input, toString(std::move(e)));
+}
+
+int main(int argc, const char **argv) {
+  toolName = sys::path::stem(argv[0]);
+  cl::HideUnrelatedOptions({&cat});
+  cl::ParseCommandLineOptions(
+      argc, argv,
+      "Split input into multiple parts separated by regex '^(.|//)--- ' and "
+      "extract the part specified by '^(.|//)--- <part>'\n",
+      nullptr,
+      /*EnvVar=*/nullptr,
+      /*LongOptionsUseDoubleDash=*/true);
+
+  if (input.empty())
+    error("", "input filename is not specified");
+  ErrorOr<std::unique_ptr<MemoryBuffer>> bufferOrErr =
+      MemoryBuffer::getFileOrSTDIN(input);
+  if (std::error_code ec = bufferOrErr.getError())
+    error(input, ec.message());
+  handle(**bufferOrErr, input);
+}