[llvm] [utils][filecheck-lint]: speedup filecheck_lint by caching edit distance (PR #94191)
via llvm-commits
llvm-commits at lists.llvm.org
Mon Jun 3 08:00:47 PDT 2024
https://github.com/klensy updated https://github.com/llvm/llvm-project/pull/94191
>From faeba1e67bfb7822fd30f0cab6e129d28c985485 Mon Sep 17 00:00:00 2001
From: klensy <nightouser at gmail.com>
Date: Mon, 3 Jun 2024 11:08:52 +0300
Subject: [PATCH 1/2] [utils][filecheck-lint]: speedup filecheck_lint by
caching edit_distance calculations
---
llvm/utils/filecheck_lint/filecheck_lint.py | 9 +++++++--
1 file changed, 7 insertions(+), 2 deletions(-)
diff --git a/llvm/utils/filecheck_lint/filecheck_lint.py b/llvm/utils/filecheck_lint/filecheck_lint.py
index 837846db83321..d238277c9e01b 100755
--- a/llvm/utils/filecheck_lint/filecheck_lint.py
+++ b/llvm/utils/filecheck_lint/filecheck_lint.py
@@ -228,7 +228,8 @@ def find_best_match(typo):
)
potential_directives = find_potential_directives(content)
-
+ # cache score and best_match to skip recalculating
+ checked_potential_directives = dict()
for filerange, potential_directive in potential_directives:
# TODO(bchetioui): match count directives more finely. We skip directives
# starting with 'CHECK-COUNT-' for the moment as they require more complex
@@ -244,7 +245,11 @@ def find_best_match(typo):
if len(potential_directive) > max(map(len, all_directives)) + threshold:
continue
- score, best_match = find_best_match(potential_directive)
+ if potential_directive not in checked_potential_directives:
+ score, best_match = find_best_match(potential_directive)
+ checked_potential_directives[potential_directive] = (score, best_match)
+ else:
+ score, best_match = checked_potential_directives[potential_directive]
if score == 0: # This is an actual directive, ignore.
continue
elif score <= threshold and best_match not in _ignore:
>From 01bd17177bc2f5692c6dac4d5a834f41bd92dca5 Mon Sep 17 00:00:00 2001
From: klensy <nightouser at gmail.com>
Date: Mon, 3 Jun 2024 17:44:50 +0300
Subject: [PATCH 2/2] build FileRange string lazily
---
llvm/utils/filecheck_lint/filecheck_lint.py | 45 +++++++++++--------
.../filecheck_lint/filecheck_lint_test.py | 18 ++------
2 files changed, 29 insertions(+), 34 deletions(-)
diff --git a/llvm/utils/filecheck_lint/filecheck_lint.py b/llvm/utils/filecheck_lint/filecheck_lint.py
index d238277c9e01b..be84dae4e910d 100755
--- a/llvm/utils/filecheck_lint/filecheck_lint.py
+++ b/llvm/utils/filecheck_lint/filecheck_lint.py
@@ -78,32 +78,39 @@ def levenshtein(s1: str, s2: str) -> int: # pylint: disable=g-doc-args
class FileRange:
- """Stores the coordinates of a span on a single line within a file.
+ """Derives the coordinates of a span on a single line within a file.
Attributes:
- line: the line number
- start_column: the (inclusive) column where the span starts
- end_column: the (inclusive) column where the span ends
+ content: line str
+ start_byte: the (inclusive) byte offset the span starts
+ end_byte: the (inclusive) byte offset the span ends
"""
- line: int
- start_column: int
- end_column: int
-
- def __init__(
- self, content: str, start_byte: int, end_byte: int
- ): # pylint: disable=g-doc-args
- """Derives a span's coordinates based on a string and start/end bytes.
+ content: str
+ start_byte: int
+ end_byte: int
+ def __init__(self, content: str, start_byte: int, end_byte: int): # pylint: disable=g-doc-args
+ """
`start_byte` and `end_byte` are assumed to be on the same line.
"""
- content_before_span = content[:start_byte]
- self.line = content_before_span.count("\n") + 1
- self.start_column = start_byte - content_before_span.rfind("\n")
- self.end_column = self.start_column + (end_byte - start_byte - 1)
+ self.content = content
+ self.start_byte = start_byte
+ self.end_byte = end_byte
- def __str__(self) -> str:
- return f"{self.line}:{self.start_column}-{self.end_column}"
+ def as_str(self):
+ """
+ Derives span from line and coordinates
+
+ start_column: the (inclusive) column where the span starts
+ end_column: the (inclusive) column where the span ends
+ """
+ content_before_span = self.content[: self.start_byte]
+ line = content_before_span.count("\n") + 1
+ start_column = self.start_byte - content_before_span.rfind("\n")
+ end_column = start_column + (self.end_byte - self.start_byte - 1)
+
+ return f"{line}:{start_column}-{end_column}"
class Diagnostic:
@@ -134,7 +141,7 @@ def __init__(
self.fix = fix
def __str__(self) -> str:
- return f"{self.filepath}:" + str(self.filerange) + f": {self.summary()}"
+ return f"{self.filepath}:" + self.filerange.as_str() + f": {self.summary()}"
def summary(self) -> str:
return (
diff --git a/llvm/utils/filecheck_lint/filecheck_lint_test.py b/llvm/utils/filecheck_lint/filecheck_lint_test.py
index 16f381d5b0455..6edcf0abd25a9 100644
--- a/llvm/utils/filecheck_lint/filecheck_lint_test.py
+++ b/llvm/utils/filecheck_lint/filecheck_lint_test.py
@@ -49,27 +49,15 @@ def test_find_potential_directives_comment_prefix(self):
results = list(fcl.find_potential_directives(content))
assert len(results) == 3
pos, match = results[0]
- assert (
- pos.line == 1
- and pos.start_column == len("junk; ") + 1
- and pos.end_column == len(lines[0]) - 1
- )
+ assert pos.as_str() == "1:7-11"
assert match == "CHCK1"
pos, match = results[1]
- assert (
- pos.line == 2
- and pos.start_column == len("junk// ") + 1
- and pos.end_column == len(lines[1]) - 1
- )
+ assert pos.as_str() == "2:8-12"
assert match == "CHCK2"
pos, match = results[2]
- assert (
- pos.line == 3
- and pos.start_column == 1
- and pos.end_column == len(lines[2]) - 1
- )
+ assert pos.as_str() == "3:1-10"
assert match == "SOME CHCK3"
def test_levenshtein(self):
More information about the llvm-commits
mailing list