[PATCH] D142555: Refactor symbol page parsing.
Viktoriia Bakalova via Phabricator via cfe-commits
cfe-commits at lists.llvm.org
Wed Jan 25 08:24:43 PST 2023
VitaNuo created this revision.
Herald added a project: All.
VitaNuo requested review of this revision.
Herald added a project: clang.
Herald added a subscriber: cfe-commits.
Repository:
rG LLVM Github Monorepo
https://reviews.llvm.org/D142555
Files:
clang/tools/include-mapping/cppreference_parser.py
Index: clang/tools/include-mapping/cppreference_parser.py
===================================================================
--- clang/tools/include-mapping/cppreference_parser.py
+++ clang/tools/include-mapping/cppreference_parser.py
@@ -47,7 +47,7 @@
Returns a list of headers.
"""
- headers = set()
+ symbol_headers = set()
all_headers = set()
soup = BeautifulSoup(symbol_page_html, "html.parser")
@@ -58,31 +58,39 @@
# Defined in header <baz> .t-dsc-header
# decl2 .t-dcl
for table in soup.select('table.t-dcl-begin, table.t-dsc-begin'):
- current_headers = []
- was_decl = False
- for row in table.select('tr'):
- if _HasClass(row, 't-dcl', 't-dsc'):
- was_decl = True
- # Symbols are in the first cell.
- found_symbols = row.find('td').stripped_strings
- if not symbol_name in found_symbols:
- continue
- headers.update(current_headers)
- elif _HasClass(row, 't-dsc-header'):
- # If we saw a decl since the last header, this is a new block of headers
- # for a new block of decls.
- if was_decl:
- current_headers = []
- was_decl = False
+ rows = table.select('tr')
+ i = 0
+ while i < len(rows):
+ start = i
+ current_headers = set()
+ while i < len(rows) and _HasClass(rows[i], 't-dsc-header'):
+ row = rows[i]
# There are also .t-dsc-header for "defined in namespace".
if not "Defined in header " in row.text:
+ i = i + 1
continue
# The interesting header content (e.g. <cstdlib>) is wrapped in <code>.
for header_code in row.find_all("code"):
- current_headers.append(header_code.text)
all_headers.add(header_code.text)
- # If the symbol was never named, consider all named headers.
- return headers or all_headers
+ current_headers.add(header_code.text)
+ i = i + 1
+ # some tables have header rows, skip them
+ while i < len(rows) and _HasClass(rows[i], 't-dsc-hitem'):
+ i = i + 1
+ while i < len(rows) and (_HasClass(rows[i], 't-dcl', 't-dsc') or not rows[i].has_attr("class")):
+ row = rows[i]
+ # Symbols are in the first cell.
+ found_symbols = row.find('td').stripped_strings
+ if symbol_name in found_symbols:
+ for header in current_headers:
+ symbol_headers.add(header)
+ i = i + 1
+ # no headers or symbols in this block
+ if i == start:
+ i = i + 1
+
+ # If the symbol was never named, consider all named headers.
+ return symbol_headers or all_headers
def _ParseIndexPage(index_page_html):
-------------- next part --------------
A non-text attachment was scrubbed...
Name: D142555.492127.patch
Type: text/x-patch
Size: 2708 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/cfe-commits/attachments/20230125/4e610bfd/attachment.bin>
More information about the cfe-commits
mailing list