[llvm] [MLGO] Remove absl dependency from scripts (PR #78880)
Aiden Grossman via llvm-commits
llvm-commits at lists.llvm.org
Sun Jan 21 15:01:24 PST 2024
https://github.com/boomanaiden154 updated https://github.com/llvm/llvm-project/pull/78880
>From 80c9507d7f49ddbc5f2554f597950f797355c255 Mon Sep 17 00:00:00 2001
From: Aiden Grossman <agrossman154 at yahoo.com>
Date: Sun, 21 Jan 2024 03:53:03 +0000
Subject: [PATCH 1/7] Add make_corpus script test
---
.../tests/corpus/make_corpus_script.test | 22 +++++++++++++++++++
llvm/utils/mlgo-utils/tests/lit.cfg | 7 +++++-
2 files changed, 28 insertions(+), 1 deletion(-)
create mode 100644 llvm/utils/mlgo-utils/tests/corpus/make_corpus_script.test
diff --git a/llvm/utils/mlgo-utils/tests/corpus/make_corpus_script.test b/llvm/utils/mlgo-utils/tests/corpus/make_corpus_script.test
new file mode 100644
index 00000000000000..f4f97544bce47d
--- /dev/null
+++ b/llvm/utils/mlgo-utils/tests/corpus/make_corpus_script.test
@@ -0,0 +1,22 @@
+## Testing that the make_corpus script works as expected when invoked.
+
+# RUN: rm -rf %t.dir && mkdir %t.dir
+# RUN: touch %t.dir/test1.bc
+# RUN: touch %t.dir/test2.bc
+# RUN: rm -rf %t.out.dir && mkdir %t.out.dir
+
+# RUN: %python %scripts_dir/corpus/make_corpus.py --input_dir=%t.dir --output_dir=%t.out.dir --default_args="-test"
+
+# RUN: cat %t.out.dir/corpus_description.json | FileCheck %s
+
+## Check that we get the expected command in the global command override
+# CHECK: "-test"
+# CHECK: "has_thinlto": false
+## Check that the modules are in the corpus description
+# CHECK: "test1"
+# CHECK: "test2"
+
+# RUN: ls %t.out.dir | FileCheck %s --check-prefix CHECK-DIR
+
+# CHECK-DIR: test1.bc
+# CHECK-DIR: test2.bc
diff --git a/llvm/utils/mlgo-utils/tests/lit.cfg b/llvm/utils/mlgo-utils/tests/lit.cfg
index 055f0945942fc1..9afced53f195c5 100644
--- a/llvm/utils/mlgo-utils/tests/lit.cfg
+++ b/llvm/utils/mlgo-utils/tests/lit.cfg
@@ -1,3 +1,5 @@
+import os
+
import lit.formats
from lit.llvm import llvm_config
@@ -5,7 +7,7 @@ from lit.llvm import llvm_config
config.name = "mlgo-utils"
config.test_format = lit.formats.ShTest(execute_external=False)
-config.suffixes = [".py"]
+config.suffixes = [".py", ".test"]
config.test_source_root = os.path.dirname(__file__)
config.test_exec_root = config.obj_root
@@ -13,3 +15,6 @@ config.test_exec_root = config.obj_root
config.environment["PYTHONPATH"] = os.path.join(config.src_root, "utils", "mlgo-utils")
llvm_config.use_default_substitutions()
+
+scripts_dir = os.path.join(config.src_root, "utils/mlgo-utils/mlgo")
+config.substitutions.append(("%scripts_dir", scripts_dir))
>From d99f5d4cd2c7c6d9e70125e893dc2ae40c897d36 Mon Sep 17 00:00:00 2001
From: Aiden Grossman <agrossman154 at yahoo.com>
Date: Sun, 21 Jan 2024 04:58:42 +0000
Subject: [PATCH 2/7] Add another test
---
llvm/utils/mlgo-utils/CMakeLists.txt | 2 +-
.../combine_training_corpus_script.test | 29 +++++++++++++++++++
llvm/utils/mlgo-utils/tests/lit.cfg | 1 +
3 files changed, 31 insertions(+), 1 deletion(-)
create mode 100644 llvm/utils/mlgo-utils/tests/corpus/combine_training_corpus_script.test
diff --git a/llvm/utils/mlgo-utils/CMakeLists.txt b/llvm/utils/mlgo-utils/CMakeLists.txt
index 7b303c7639401a..c263c92c632797 100644
--- a/llvm/utils/mlgo-utils/CMakeLists.txt
+++ b/llvm/utils/mlgo-utils/CMakeLists.txt
@@ -5,7 +5,7 @@ configure_lit_site_cfg(
add_lit_testsuite(check-mlgo-utils "Running mlgo-utils tests"
${CMAKE_CURRENT_BINARY_DIR}
- DEPENDS "FileCheck" "not" "count"
+ DEPENDS "FileCheck" "not" "count" "split-file"
)
set_target_properties(check-mlgo-utils PROPERTIES FOLDER "Tests")
diff --git a/llvm/utils/mlgo-utils/tests/corpus/combine_training_corpus_script.test b/llvm/utils/mlgo-utils/tests/corpus/combine_training_corpus_script.test
new file mode 100644
index 00000000000000..1aa182146a49ee
--- /dev/null
+++ b/llvm/utils/mlgo-utils/tests/corpus/combine_training_corpus_script.test
@@ -0,0 +1,29 @@
+## Testing that the combine_trainig_corpus script works as expected when
+## invoked.
+
+# RUN: rm -rf %t.dir && mkdir %t.dir
+# RUN: split-file %s %t.dir
+# RUN: %python %scripts_dir/corpus/combine_training_corpus.py --root_dir=%t.dir
+# RUN: cat %t.dir/corpus_description.json | FileCheck %s
+
+## Check that we end up with the same properties as the original corpora
+# CHECK: "has_thinlto": false
+
+## Check that the modules end up in the combined corpus. Order does not matter.
+# CHECK-DAG: "subcorpus1/test1.o"
+# CHECK-DAG: "subcorpus2/test2.o"
+
+#--- subcorpus1/corpus_description.json
+{
+ "has_thinlto": false,
+ "modules": [
+ "test1.o"
+ ]
+}
+#--- subcorpus2/corpus_description.json
+{
+ "has_thinlto": false,
+ "modules": [
+ "test2.o"
+ ]
+}
diff --git a/llvm/utils/mlgo-utils/tests/lit.cfg b/llvm/utils/mlgo-utils/tests/lit.cfg
index 9afced53f195c5..58c35e69c652c5 100644
--- a/llvm/utils/mlgo-utils/tests/lit.cfg
+++ b/llvm/utils/mlgo-utils/tests/lit.cfg
@@ -15,6 +15,7 @@ config.test_exec_root = config.obj_root
config.environment["PYTHONPATH"] = os.path.join(config.src_root, "utils", "mlgo-utils")
llvm_config.use_default_substitutions()
+config.substitutions.append(("split-file", llvm_config.use_llvm_tool("split-file")))
scripts_dir = os.path.join(config.src_root, "utils/mlgo-utils/mlgo")
config.substitutions.append(("%scripts_dir", scripts_dir))
>From 0f2d0cd83efb07fdaee048b49f2562f4372c944d Mon Sep 17 00:00:00 2001
From: Aiden Grossman <agrossman154 at yahoo.com>
Date: Sun, 21 Jan 2024 05:16:05 +0000
Subject: [PATCH 3/7] Add extract_ir test
---
llvm/utils/mlgo-utils/CMakeLists.txt | 2 +-
.../tests/corpus/extract_ir_script.test | 44 +++++++++++++++++++
llvm/utils/mlgo-utils/tests/lit.cfg | 2 +
3 files changed, 47 insertions(+), 1 deletion(-)
create mode 100644 llvm/utils/mlgo-utils/tests/corpus/extract_ir_script.test
diff --git a/llvm/utils/mlgo-utils/CMakeLists.txt b/llvm/utils/mlgo-utils/CMakeLists.txt
index c263c92c632797..3129331d58c75b 100644
--- a/llvm/utils/mlgo-utils/CMakeLists.txt
+++ b/llvm/utils/mlgo-utils/CMakeLists.txt
@@ -5,7 +5,7 @@ configure_lit_site_cfg(
add_lit_testsuite(check-mlgo-utils "Running mlgo-utils tests"
${CMAKE_CURRENT_BINARY_DIR}
- DEPENDS "FileCheck" "not" "count" "split-file"
+ DEPENDS "FileCheck" "not" "count" "split-file" "yaml2obj" "llvm-objcopy"
)
set_target_properties(check-mlgo-utils PROPERTIES FOLDER "Tests")
diff --git a/llvm/utils/mlgo-utils/tests/corpus/extract_ir_script.test b/llvm/utils/mlgo-utils/tests/corpus/extract_ir_script.test
new file mode 100644
index 00000000000000..a7629eb629219d
--- /dev/null
+++ b/llvm/utils/mlgo-utils/tests/corpus/extract_ir_script.test
@@ -0,0 +1,44 @@
+## Test that invoking the extract_ir script work as expected.
+
+# RUN: rm -rf %t.dir && mkdir %t.dir
+# RUN: yaml2obj %s -o %t.dir/test1.o
+# RUN: yaml2obj %s -o %t.dir/test2.o
+# RUN: rm -rf %t.dir.out && mkdir %t.dir.out
+
+# RUN: %python %scripts_dir/corpus/extract_ir.py --input=%t.dir --input_type=directory --output_dir=%t.dir.out --llvm_objcopy_path=llvm-objcopy
+# RUN: cat %t.dir.out/corpus_description.json | FileCheck %s
+
+## Check that this is not a thinLTO build
+# CHECK: "has_thinlto": false
+## Check that the expected modules end up in the corpus description
+# CHECK-DAG: "test1.o"
+# CHECK-DAG: "test2.o"
+
+# RUN: ls %t.dir.out | FileCheck %s --check-prefix CHECK-DIR
+
+# CHECK-DIR: test1.o.bc
+# CHECK-DIR: test1.o.cmd
+# CHECK-DIR: test2.o.bc
+# CHECK-DIR: test2.o.cmd
+
+--- !ELF
+FileHeader:
+ Class: ELFCLASS64
+ Data: ELFDATA2LSB
+ Type: ET_REL
+ Machine: EM_X86_64
+ SectionHeaderStringTable: .strtab
+Sections:
+ - Name: .text
+ Type: SHT_PROGBITS
+ Flags: [ SHF_ALLOC, SHF_EXECINSTR ]
+ AddressAlign: 0x10
+ Content: 55
+ - Name: .llvmbc
+ Type: SHT_PROGBITS
+ AddressAlign: 0x1
+ Content: 55
+ - Name: .llvmcmd
+ Type: SHT_PROGBITS
+ AddressAlign: 0x1
+ Content: ff
diff --git a/llvm/utils/mlgo-utils/tests/lit.cfg b/llvm/utils/mlgo-utils/tests/lit.cfg
index 58c35e69c652c5..0f6137e5e91383 100644
--- a/llvm/utils/mlgo-utils/tests/lit.cfg
+++ b/llvm/utils/mlgo-utils/tests/lit.cfg
@@ -16,6 +16,8 @@ config.environment["PYTHONPATH"] = os.path.join(config.src_root, "utils", "mlgo-
llvm_config.use_default_substitutions()
config.substitutions.append(("split-file", llvm_config.use_llvm_tool("split-file")))
+config.substitutions.append(("yaml2obj", llvm_config.use_llvm_tool("yaml2obj")))
+config.substitutions.append(("llvm-objcopy", llvm_config.use_llvm_tool("llvm-objcopy")))
scripts_dir = os.path.join(config.src_root, "utils/mlgo-utils/mlgo")
config.substitutions.append(("%scripts_dir", scripts_dir))
>From 01dd2821526a435524fbe2d4cad0fff4b880a8fd Mon Sep 17 00:00:00 2001
From: Aiden Grossman <agrossman154 at yahoo.com>
Date: Sun, 21 Jan 2024 06:08:28 +0000
Subject: [PATCH 4/7] [MLGO] Remove absl dependency from scripts
This patch removes the absl dependency from the mlgo-utils scripts. We
were only using absl.logging, and absl.flags, so this patch just
consists of mechanically converting the absl flags parsing to Python's
builtin argparse as Python's logging is a drop in replacement for
absl.logging.
---
.../mlgo/corpus/combine_training_corpus.py | 27 +--
.../mlgo-utils/mlgo/corpus/extract_ir.py | 204 +++++++++---------
.../mlgo-utils/mlgo/corpus/make_corpus.py | 45 ++--
3 files changed, 137 insertions(+), 139 deletions(-)
diff --git a/llvm/utils/mlgo-utils/mlgo/corpus/combine_training_corpus.py b/llvm/utils/mlgo-utils/mlgo/corpus/combine_training_corpus.py
index 9aabd87b4688e0..cc21061cbbef5e 100644
--- a/llvm/utils/mlgo-utils/mlgo/corpus/combine_training_corpus.py
+++ b/llvm/utils/mlgo-utils/mlgo/corpus/combine_training_corpus.py
@@ -23,26 +23,21 @@
and corpus2 are combined into combinedcorpus.
"""
-from absl import app
-from absl import flags
+import argparse
from mlgo.corpus import combine_training_corpus_lib
-flags.DEFINE_string("root_dir", "", "root dir of module paths to combine.")
-FLAGS = flags.FLAGS
-
-
-def main(argv):
- if len(argv) > 1:
- raise app.UsageError("Too many command-line arguments.")
-
- combine_training_corpus_lib.combine_corpus(FLAGS.root_dir)
-
-
-def entrypoint():
- app.run(main)
+def main(args):
+ combine_training_corpus_lib.combine_corpus(args.root_dir)
if __name__ == "__main__":
- entrypoint()
+ parser = argparse.ArgumentParser(
+ description="A tool for combining multiple training corpora"
+ )
+ parser.add_argument(
+ "--root_dir", type=str, help="The root dir of module paths to combine."
+ )
+ args = parser.parse_args()
+ main(args)
diff --git a/llvm/utils/mlgo-utils/mlgo/corpus/extract_ir.py b/llvm/utils/mlgo-utils/mlgo/corpus/extract_ir.py
index 9463e61dc534fe..4426463e22b0e7 100644
--- a/llvm/utils/mlgo-utils/mlgo/corpus/extract_ir.py
+++ b/llvm/utils/mlgo-utils/mlgo/corpus/extract_ir.py
@@ -26,127 +26,59 @@
import json
import multiprocessing
-
-from absl import app
-from absl import flags
-from absl import logging
+import logging
+import argparse
from mlgo.corpus import extract_ir_lib
-flags.DEFINE_string(
- "input",
- None,
- "Input file or directory - either compile_commands.json, a linker parameter"
- "list, or a path to a directory containing object files.",
-)
-flags.DEFINE_enum(
- "input_type",
- "json",
- ["json", "params", "directory"],
- "Input file type - json, params, or directory. params latter refers to lld"
- "params.",
-)
-flags.DEFINE_string("output_dir", None, "Output directory")
-flags.DEFINE_integer(
- "num_workers",
- None,
- "Number of parallel workers for objcopy. `None` for maximum available.",
-)
-flags.DEFINE_string("llvm_objcopy_path", "llvm-objcopy", "Path to llvm-objcopy")
-flags.DEFINE_string(
- "obj_base_dir",
- "",
- "Base directory for object files. Defaults to current working dir.",
-)
-flags.DEFINE_string(
- "cmd_filter",
- None,
- "Include only those modules with a command line matching this regexp. "
- "Setting it to None for not filtering. Note that the regexp is applied "
- "independently for each separate command line option. For example, ^-Oz$ "
- "will match Oz - built binaries. Does not work with thinlto_build=lld.",
-)
-flags.DEFINE_enum(
- "thinlto_build",
- None,
- ["distributed", "local"],
- "Set if the build was performed with either 'distributed' or "
- "'local' ThinLTO. This ensures the thinlto.bc files are also copied. "
- "The build is assumed to have had "
- "-mllvm -lto-embed-bitcode=post-merge-pre-opt passed in the distributed "
- "case, or -Wl,--save-temps=import and -Wl,--thinlto-emit-index-files "
- "passed in the local case.",
-)
-flags.DEFINE_string(
- "cmd_section_name",
- ".llvmcmd",
- "The section name passed to llvm-objcopy. For ELF object files, the "
- "default .llvmcmd is correct. For Mach-O object files, one should use "
- "something like __LLVM,__cmdline",
-)
-flags.DEFINE_string(
- "bitcode_section_name",
- ".llvmbc",
- "The section name passed to llvm-objcopy. For ELF object files, the "
- "default .llvmbc is correct. For Mach-O object files, one should use "
- "__LLVM,__bitcode",
-)
-
-flags.mark_flag_as_required("output_dir")
-
-FLAGS = flags.FLAGS
-
-
-def main(argv):
- if len(argv) > 1:
- raise app.UsageError("Too many command-line arguments.")
+def main(args):
objs = []
- if FLAGS.input is not None and FLAGS.thinlto_build == "local":
+ if args.input is not None and args.thinlto_build == "local":
raise ValueError("--thinlto_build=local cannot be run with --input")
- if FLAGS.input is None:
- if FLAGS.thinlto_build != "local":
+ if args.input is None:
+ if args.thinlto_build != "local":
raise ValueError("--input or --thinlto_build=local must be provided")
- objs = extract_ir_lib.load_for_lld_thinlto(FLAGS.obj_base_dir, FLAGS.output_dir)
- elif FLAGS.input_type == "json":
- with open(FLAGS.input, encoding="utf-8") as f:
+ objs = extract_ir_lib.load_for_lld_thinlto(args.obj_base_dir, args.output_dir)
+ elif args.input_type == "json":
+ with open(args.input, encoding="utf-8") as f:
objs = extract_ir_lib.load_from_compile_commands(
- json.load(f), FLAGS.output_dir
+ json.load(f), args.output_dir
)
- elif FLAGS.input_type == "params":
- if not FLAGS.obj_base_dir:
+ elif args.input_type == "params":
+ if not args.obj_base_dir:
logging.info(
"-obj_base_dir is unspecified, assuming current directory."
"If no objects are found, use this option to specify the root"
"directory for the object file paths in the input file."
)
- with open(FLAGS.input, encoding="utf-8") as f:
+ with open(args.input, encoding="utf-8") as f:
objs = extract_ir_lib.load_from_lld_params(
- [l.strip() for l in f.readlines()], FLAGS.obj_base_dir, FLAGS.output_dir
+ [l.strip() for l in f.readlines()], args.obj_base_dir, args.output_dir
)
- elif FLAGS.input_type == "directory":
+ elif args.input_type == "directory":
logging.warning(
"Using the directory input is only recommended if the build system"
"your project uses does not support any structured output that"
"ml-compiler-opt understands. If your build system provides a"
"structured compilation database, use that instead"
)
- objs = extract_ir_lib.load_from_directory(FLAGS.input, FLAGS.output_dir)
+ objs = extract_ir_lib.load_from_directory(args.input, args.output_dir)
else:
- logging.error("Unknown input type: %s", FLAGS.input_type)
+ logging.error("Unknown input type: %s", args.input_type)
relative_output_paths = extract_ir_lib.run_extraction(
objs,
- FLAGS.num_workers,
- FLAGS.llvm_objcopy_path,
- FLAGS.cmd_filter,
- FLAGS.thinlto_build,
- FLAGS.cmd_section_name,
- FLAGS.bitcode_section_name,
+ args.num_workers,
+ args.llvm_objcopy_path,
+ args.cmd_filter,
+ args.thinlto_build,
+ args.cmd_section_name,
+ args.bitcode_section_name,
)
extract_ir_lib.write_corpus_manifest(
- FLAGS.thinlto_build, relative_output_paths, FLAGS.output_dir
+ args.thinlto_build, relative_output_paths, args.output_dir
)
logging.info(
@@ -156,10 +88,86 @@ def main(argv):
)
-def entrypoint():
- multiprocessing.set_start_method("fork")
- app.run(main)
-
-
if __name__ == "__main__":
- entrypoint()
+ parser = argparse.ArgumentParser(
+ description="A tool for making a corpus from build artifacts"
+ )
+ parser.add_argument(
+ "--input",
+ type=str,
+ help="Input file or directory - either compile_commands.json, a linker "
+ "parameter list, or a path to a directory containing object files.",
+ )
+ parser.add_argument(
+ "--input_type",
+ type=str,
+ help="Input file type - JSON, LLD params, or directory.",
+ choices=["json", "params", "directory"],
+ default="json",
+ nargs="?",
+ )
+ parser.add_argument("--output_dir", type=str, help="Output directory")
+ parser.add_argument(
+ "--num_workers",
+ type=int,
+ help="Number of parallel works for objcopy. `None` for maximum available.",
+ default=None,
+ nargs="?",
+ )
+ parser.add_argument(
+ "--llvm_objcopy_path",
+ type=str,
+ help="Path to llvm-objcopy",
+ default="llvm-objcopy",
+ nargs="?",
+ )
+ parser.add_argument(
+ "--obj_base_dir",
+ type=str,
+ help="Base directory for object files. Defaults to current working dir.",
+ default="",
+ nargs="?",
+ )
+ parser.add_argument(
+ "--cmd_filter",
+ type=str,
+ help="Include only those modules with a command line matching this regular "
+ "expression. Set it to None to not perform any filtering. Note that the "
+ "regular expression is applied independently for each separate command line "
+ "option. For example, ^-Oz$ will match Oz built binaries. This does not work "
+ "with thinlto_build=lld.",
+ default=None,
+ nargs="?",
+ )
+ parser.add_argument(
+ "--thinlto_build",
+ type=str,
+ help="Set if the build was performed with either 'distributed' or 'local' "
+ "ThinLTO. This ensures the thinlto.bc files are also copied. The build is "
+ "assumed to have had -mllvm -lto-embed-bitcode=post-merge-pre-opt passed in "
+ "the distributed case or -Wl,--save-temps=import and "
+ "-Wl,--thinlto-emit-index-files passed in the local case",
+ choices=["distributed", "local"],
+ default=None,
+ nargs="?",
+ )
+ parser.add_argument(
+ "--cmd_section_name",
+ type=str,
+ help="The section name passed to llvm-objcopy. For ELF object files, the "
+ "default .llvmcmd is correct. For Mach-O object files, one should use "
+ "something like __LLVM,__cmdline",
+ default=".llvmcmd",
+ nargs="?",
+ )
+ parser.add_argument(
+ "--bitcode_section_name",
+ type=str,
+ help="The section name passed to llvm-objcopy. For ELF object files, the "
+ "default .llvmbc is correct. For Mach-O object files, one should use "
+ "__LLVM,__bitcode",
+ default=".llvmbc",
+ nargs="?",
+ )
+ args = parser.parse_args()
+ main(args)
diff --git a/llvm/utils/mlgo-utils/mlgo/corpus/make_corpus.py b/llvm/utils/mlgo-utils/mlgo/corpus/make_corpus.py
index edb0ecd853de24..05ceb750de673e 100644
--- a/llvm/utils/mlgo-utils/mlgo/corpus/make_corpus.py
+++ b/llvm/utils/mlgo-utils/mlgo/corpus/make_corpus.py
@@ -12,43 +12,38 @@
--default_args="<list of space separated flags>"
"""
-from absl import app
-from absl import flags
-from absl import logging
+import logging
+import argparse
from mlgo.corpus import make_corpus_lib
-flags.DEFINE_string("input_dir", None, "The input directory.")
-flags.DEFINE_string("output_dir", None, "The output directory.")
-flags.DEFINE_string(
- "default_args",
- "",
- "The compiler flags to compile with when using downstream tooling.",
-)
-flags.mark_flag_as_required("input_dir")
-flags.mark_flag_as_required("output_dir")
-
-FLAGS = flags.FLAGS
-
-
-def main(_):
+def main(args):
logging.warning(
"Using this tool does not guarantee that the bitcode is taken at "
"the correct stage for consumption during model training. Make "
"sure to validate assumptions about where the bitcode is coming "
"from before using it in production."
)
- relative_paths = make_corpus_lib.load_bitcode_from_directory(FLAGS.input_dir)
- make_corpus_lib.copy_bitcode(relative_paths, FLAGS.input_dir, FLAGS.output_dir)
+ relative_paths = make_corpus_lib.load_bitcode_from_directory(args.input_dir)
+ make_corpus_lib.copy_bitcode(relative_paths, args.input_dir, args.output_dir)
make_corpus_lib.write_corpus_manifest(
- relative_paths, FLAGS.output_dir, FLAGS.default_args.split()
+ relative_paths, args.output_dir, args.default_args.split()
)
-def entrypoint():
- app.run(main)
-
-
if __name__ == "__main__":
- entrypoint()
+ parser = argparse.ArgumentParser(
+ description="A tool for making a corpus from arbitrary bitcode"
+ )
+ parser.add_argument("--input_dir", type=str, help="The input directory.")
+ parser.add_argument("--output_dir", type=str, help="The output directory.")
+ parser.add_argument(
+ "--default_args",
+ type=str,
+ help="The compiler flags to compile with when using downstream tooling.",
+ default="",
+ nargs="?",
+ )
+ args = parser.parse_args()
+ main(args)
>From 69e230ad1bae8060c9055157eced593b15b436ee Mon Sep 17 00:00:00 2001
From: Aiden Grossman <agrossman154 at yahoo.com>
Date: Sun, 21 Jan 2024 06:12:29 +0000
Subject: [PATCH 5/7] Add requires lines to tests
---
.../mlgo-utils/tests/corpus/combine_training_corpus_script.test | 2 ++
llvm/utils/mlgo-utils/tests/corpus/extract_ir_script.test | 2 ++
llvm/utils/mlgo-utils/tests/corpus/make_corpus_script.test | 2 ++
3 files changed, 6 insertions(+)
diff --git a/llvm/utils/mlgo-utils/tests/corpus/combine_training_corpus_script.test b/llvm/utils/mlgo-utils/tests/corpus/combine_training_corpus_script.test
index 1aa182146a49ee..933a9c2b9f811e 100644
--- a/llvm/utils/mlgo-utils/tests/corpus/combine_training_corpus_script.test
+++ b/llvm/utils/mlgo-utils/tests/corpus/combine_training_corpus_script.test
@@ -1,3 +1,5 @@
+# REQUIRES: python-38, absl, system-linux
+
## Testing that the combine_trainig_corpus script works as expected when
## invoked.
diff --git a/llvm/utils/mlgo-utils/tests/corpus/extract_ir_script.test b/llvm/utils/mlgo-utils/tests/corpus/extract_ir_script.test
index a7629eb629219d..c20581dacdc651 100644
--- a/llvm/utils/mlgo-utils/tests/corpus/extract_ir_script.test
+++ b/llvm/utils/mlgo-utils/tests/corpus/extract_ir_script.test
@@ -1,3 +1,5 @@
+# REQUIRES: python-38, absl, system-linux
+
## Test that invoking the extract_ir script work as expected.
# RUN: rm -rf %t.dir && mkdir %t.dir
diff --git a/llvm/utils/mlgo-utils/tests/corpus/make_corpus_script.test b/llvm/utils/mlgo-utils/tests/corpus/make_corpus_script.test
index f4f97544bce47d..3c1b96523718e4 100644
--- a/llvm/utils/mlgo-utils/tests/corpus/make_corpus_script.test
+++ b/llvm/utils/mlgo-utils/tests/corpus/make_corpus_script.test
@@ -1,3 +1,5 @@
+# REQUIRES: python-38, absl, system-linux
+
## Testing that the make_corpus script works as expected when invoked.
# RUN: rm -rf %t.dir && mkdir %t.dir
>From b919c42c768a3343056998a0643c3a02896b1202 Mon Sep 17 00:00:00 2001
From: Aiden Grossman <agrossman154 at yahoo.com>
Date: Sun, 21 Jan 2024 06:14:14 +0000
Subject: [PATCH 6/7] Remove other references to absl dep
---
llvm/utils/mlgo-utils/pyproject.toml | 3 ---
.../tests/corpus/combine_training_corpus_script.test | 2 +-
llvm/utils/mlgo-utils/tests/corpus/extract_ir_script.test | 2 +-
.../utils/mlgo-utils/tests/corpus/make_corpus_script.test | 2 +-
llvm/utils/mlgo-utils/tests/lit.local.cfg | 8 --------
5 files changed, 3 insertions(+), 14 deletions(-)
diff --git a/llvm/utils/mlgo-utils/pyproject.toml b/llvm/utils/mlgo-utils/pyproject.toml
index be2af86cd05df3..dac18a785c17b9 100644
--- a/llvm/utils/mlgo-utils/pyproject.toml
+++ b/llvm/utils/mlgo-utils/pyproject.toml
@@ -7,9 +7,6 @@ name = "mlgo"
description = "Tooling for ML in LLVM"
readme = "README.md"
requires-python = ">=3.8,<3.11"
-dependencies = [
- "absl-py>=1.0.0"
-]
dynamic = ["version"]
license = {text = "Apache-2.0 WITH LLVM-exception"}
classifiers = [
diff --git a/llvm/utils/mlgo-utils/tests/corpus/combine_training_corpus_script.test b/llvm/utils/mlgo-utils/tests/corpus/combine_training_corpus_script.test
index 933a9c2b9f811e..51dc637347caf0 100644
--- a/llvm/utils/mlgo-utils/tests/corpus/combine_training_corpus_script.test
+++ b/llvm/utils/mlgo-utils/tests/corpus/combine_training_corpus_script.test
@@ -1,4 +1,4 @@
-# REQUIRES: python-38, absl, system-linux
+# REQUIRES: python-38, system-linux
## Testing that the combine_trainig_corpus script works as expected when
## invoked.
diff --git a/llvm/utils/mlgo-utils/tests/corpus/extract_ir_script.test b/llvm/utils/mlgo-utils/tests/corpus/extract_ir_script.test
index c20581dacdc651..107116618ce97b 100644
--- a/llvm/utils/mlgo-utils/tests/corpus/extract_ir_script.test
+++ b/llvm/utils/mlgo-utils/tests/corpus/extract_ir_script.test
@@ -1,4 +1,4 @@
-# REQUIRES: python-38, absl, system-linux
+# REQUIRES: python-38, system-linux
## Test that invoking the extract_ir script work as expected.
diff --git a/llvm/utils/mlgo-utils/tests/corpus/make_corpus_script.test b/llvm/utils/mlgo-utils/tests/corpus/make_corpus_script.test
index 3c1b96523718e4..a08780055f31f1 100644
--- a/llvm/utils/mlgo-utils/tests/corpus/make_corpus_script.test
+++ b/llvm/utils/mlgo-utils/tests/corpus/make_corpus_script.test
@@ -1,4 +1,4 @@
-# REQUIRES: python-38, absl, system-linux
+# REQUIRES: python-38, system-linux
## Testing that the make_corpus script works as expected when invoked.
diff --git a/llvm/utils/mlgo-utils/tests/lit.local.cfg b/llvm/utils/mlgo-utils/tests/lit.local.cfg
index 90cdf8ba618ed8..a9088750cb58b1 100644
--- a/llvm/utils/mlgo-utils/tests/lit.local.cfg
+++ b/llvm/utils/mlgo-utils/tests/lit.local.cfg
@@ -4,11 +4,3 @@ import sys
# the entire project has been bumped to 3.8.
if sys.version_info > (3,8):
config.available_features.add("python-38")
-
-# TODO(boomanaiden154): Remove this flag once the scripts are converted to
-# not use absl anymore.
-try:
- import absl
- config.available_features.add("absl")
-except:
- pass
>From 336ccb0d643ed627343d2e38d5dbdd659e8688bc Mon Sep 17 00:00:00 2001
From: Aiden Grossman <agrossman154 at yahoo.com>
Date: Sun, 21 Jan 2024 23:01:09 +0000
Subject: [PATCH 7/7] Move argument parsing and sort imports
---
.../mlgo/corpus/combine_training_corpus.py | 14 +-
.../mlgo-utils/mlgo/corpus/extract_ir.py | 122 +++++++++---------
.../mlgo-utils/mlgo/corpus/make_corpus.py | 34 ++---
3 files changed, 91 insertions(+), 79 deletions(-)
diff --git a/llvm/utils/mlgo-utils/mlgo/corpus/combine_training_corpus.py b/llvm/utils/mlgo-utils/mlgo/corpus/combine_training_corpus.py
index cc21061cbbef5e..3b2077b4c0e0e6 100644
--- a/llvm/utils/mlgo-utils/mlgo/corpus/combine_training_corpus.py
+++ b/llvm/utils/mlgo-utils/mlgo/corpus/combine_training_corpus.py
@@ -28,11 +28,7 @@
from mlgo.corpus import combine_training_corpus_lib
-def main(args):
- combine_training_corpus_lib.combine_corpus(args.root_dir)
-
-
-if __name__ == "__main__":
+def parse_args_and_run():
parser = argparse.ArgumentParser(
description="A tool for combining multiple training corpora"
)
@@ -41,3 +37,11 @@ def main(args):
)
args = parser.parse_args()
main(args)
+
+
+def main(args):
+ combine_training_corpus_lib.combine_corpus(args.root_dir)
+
+
+if __name__ == "__main__":
+ parse_args_and_run()
diff --git a/llvm/utils/mlgo-utils/mlgo/corpus/extract_ir.py b/llvm/utils/mlgo-utils/mlgo/corpus/extract_ir.py
index 4426463e22b0e7..94415431ab4a38 100644
--- a/llvm/utils/mlgo-utils/mlgo/corpus/extract_ir.py
+++ b/llvm/utils/mlgo-utils/mlgo/corpus/extract_ir.py
@@ -24,71 +24,15 @@
any output.
"""
+import argparse
import json
-import multiprocessing
import logging
-import argparse
+import multiprocessing
from mlgo.corpus import extract_ir_lib
-def main(args):
- objs = []
- if args.input is not None and args.thinlto_build == "local":
- raise ValueError("--thinlto_build=local cannot be run with --input")
- if args.input is None:
- if args.thinlto_build != "local":
- raise ValueError("--input or --thinlto_build=local must be provided")
- objs = extract_ir_lib.load_for_lld_thinlto(args.obj_base_dir, args.output_dir)
- elif args.input_type == "json":
- with open(args.input, encoding="utf-8") as f:
- objs = extract_ir_lib.load_from_compile_commands(
- json.load(f), args.output_dir
- )
- elif args.input_type == "params":
- if not args.obj_base_dir:
- logging.info(
- "-obj_base_dir is unspecified, assuming current directory."
- "If no objects are found, use this option to specify the root"
- "directory for the object file paths in the input file."
- )
- with open(args.input, encoding="utf-8") as f:
- objs = extract_ir_lib.load_from_lld_params(
- [l.strip() for l in f.readlines()], args.obj_base_dir, args.output_dir
- )
- elif args.input_type == "directory":
- logging.warning(
- "Using the directory input is only recommended if the build system"
- "your project uses does not support any structured output that"
- "ml-compiler-opt understands. If your build system provides a"
- "structured compilation database, use that instead"
- )
- objs = extract_ir_lib.load_from_directory(args.input, args.output_dir)
- else:
- logging.error("Unknown input type: %s", args.input_type)
-
- relative_output_paths = extract_ir_lib.run_extraction(
- objs,
- args.num_workers,
- args.llvm_objcopy_path,
- args.cmd_filter,
- args.thinlto_build,
- args.cmd_section_name,
- args.bitcode_section_name,
- )
-
- extract_ir_lib.write_corpus_manifest(
- args.thinlto_build, relative_output_paths, args.output_dir
- )
-
- logging.info(
- "Converted %d files out of %d",
- len(objs) - relative_output_paths.count(None),
- len(objs),
- )
-
-
-if __name__ == "__main__":
+def parse_args_and_run():
parser = argparse.ArgumentParser(
description="A tool for making a corpus from build artifacts"
)
@@ -171,3 +115,63 @@ def main(args):
)
args = parser.parse_args()
main(args)
+
+
+def main(args):
+ objs = []
+ if args.input is not None and args.thinlto_build == "local":
+ raise ValueError("--thinlto_build=local cannot be run with --input")
+ if args.input is None:
+ if args.thinlto_build != "local":
+ raise ValueError("--input or --thinlto_build=local must be provided")
+ objs = extract_ir_lib.load_for_lld_thinlto(args.obj_base_dir, args.output_dir)
+ elif args.input_type == "json":
+ with open(args.input, encoding="utf-8") as f:
+ objs = extract_ir_lib.load_from_compile_commands(
+ json.load(f), args.output_dir
+ )
+ elif args.input_type == "params":
+ if not args.obj_base_dir:
+ logging.info(
+ "-obj_base_dir is unspecified, assuming current directory."
+ "If no objects are found, use this option to specify the root"
+ "directory for the object file paths in the input file."
+ )
+ with open(args.input, encoding="utf-8") as f:
+ objs = extract_ir_lib.load_from_lld_params(
+ [l.strip() for l in f.readlines()], args.obj_base_dir, args.output_dir
+ )
+ elif args.input_type == "directory":
+ logging.warning(
+ "Using the directory input is only recommended if the build system"
+ "your project uses does not support any structured output that"
+ "ml-compiler-opt understands. If your build system provides a"
+ "structured compilation database, use that instead"
+ )
+ objs = extract_ir_lib.load_from_directory(args.input, args.output_dir)
+ else:
+ logging.error("Unknown input type: %s", args.input_type)
+
+ relative_output_paths = extract_ir_lib.run_extraction(
+ objs,
+ args.num_workers,
+ args.llvm_objcopy_path,
+ args.cmd_filter,
+ args.thinlto_build,
+ args.cmd_section_name,
+ args.bitcode_section_name,
+ )
+
+ extract_ir_lib.write_corpus_manifest(
+ args.thinlto_build, relative_output_paths, args.output_dir
+ )
+
+ logging.info(
+ "Converted %d files out of %d",
+ len(objs) - relative_output_paths.count(None),
+ len(objs),
+ )
+
+
+if __name__ == "__main__":
+ parse_args_and_run()
diff --git a/llvm/utils/mlgo-utils/mlgo/corpus/make_corpus.py b/llvm/utils/mlgo-utils/mlgo/corpus/make_corpus.py
index 05ceb750de673e..221486e16c6e00 100644
--- a/llvm/utils/mlgo-utils/mlgo/corpus/make_corpus.py
+++ b/llvm/utils/mlgo-utils/mlgo/corpus/make_corpus.py
@@ -12,12 +12,29 @@
--default_args="<list of space separated flags>"
"""
-import logging
import argparse
+import logging
from mlgo.corpus import make_corpus_lib
+def parse_args_and_run():
+ parser = argparse.ArgumentParser(
+ description="A tool for making a corpus from arbitrary bitcode"
+ )
+ parser.add_argument("--input_dir", type=str, help="The input directory.")
+ parser.add_argument("--output_dir", type=str, help="The output directory.")
+ parser.add_argument(
+ "--default_args",
+ type=str,
+ help="The compiler flags to compile with when using downstream tooling.",
+ default="",
+ nargs="?",
+ )
+ args = parser.parse_args()
+ main(args)
+
+
def main(args):
logging.warning(
"Using this tool does not guarantee that the bitcode is taken at "
@@ -33,17 +50,4 @@ def main(args):
if __name__ == "__main__":
- parser = argparse.ArgumentParser(
- description="A tool for making a corpus from arbitrary bitcode"
- )
- parser.add_argument("--input_dir", type=str, help="The input directory.")
- parser.add_argument("--output_dir", type=str, help="The output directory.")
- parser.add_argument(
- "--default_args",
- type=str,
- help="The compiler flags to compile with when using downstream tooling.",
- default="",
- nargs="?",
- )
- args = parser.parse_args()
- main(args)
+ parse_args_and_run()
More information about the llvm-commits
mailing list