[clang-tools-extra] r359771 - [clangd] Improvements to header mapping: more precise parsing of cppreference symbol pages.
Sam McCall via cfe-commits
cfe-commits at lists.llvm.org
Thu May 2 02:34:30 PDT 2019
Author: sammccall
Date: Thu May 2 02:34:30 2019
New Revision: 359771
URL: http://llvm.org/viewvc/llvm-project?rev=359771&view=rev
Log:
[clangd] Improvements to header mapping: more precise parsing of cppreference symbol pages.
Summary:
Previously we were just jumping from the symbol index to the symbol page, and
grabbing all the headers mentioned there. But the page often lists multiple
symbols, and so we got false positives and thus ambiguities (which were dropped).
Now we look at which declarations are for the symbol we want, and prefer headers
listed above that symbol. If there are none, we fall back to the old behavior.
Reviewers: kadircet
Subscribers: ilya-biryukov, MaskRay, jkorous, arphaman, cfe-commits
Tags: #clang
Differential Revision: https://reviews.llvm.org/D61316
Modified:
clang-tools-extra/trunk/clangd/StdSymbolMap.inc
clang-tools-extra/trunk/clangd/include-mapping/gen_std.py
clang-tools-extra/trunk/clangd/include-mapping/test.py
Modified: clang-tools-extra/trunk/clangd/StdSymbolMap.inc
URL: http://llvm.org/viewvc/llvm-project/clang-tools-extra/trunk/clangd/StdSymbolMap.inc?rev=359771&r1=359770&r2=359771&view=diff
==============================================================================
--- clang-tools-extra/trunk/clangd/StdSymbolMap.inc (original)
+++ clang-tools-extra/trunk/clangd/StdSymbolMap.inc Thu May 2 02:34:30 2019
@@ -141,12 +141,15 @@ SYMBOL(bad_typeid, std::, <typeinfo>)
SYMBOL(bad_variant_access, std::, <variant>)
SYMBOL(bad_weak_ptr, std::, <memory>)
SYMBOL(basic_common_reference, std::, <type_traits>)
+SYMBOL(basic_filebuf, std::, <fstream>)
SYMBOL(basic_fstream, std::, <fstream>)
SYMBOL(basic_ifstream, std::, <fstream>)
SYMBOL(basic_ios, std::, <ios>)
SYMBOL(basic_iostream, std::, <istream>)
+SYMBOL(basic_istream, std::, <istream>)
SYMBOL(basic_istringstream, std::, <sstream>)
SYMBOL(basic_ofstream, std::, <fstream>)
+SYMBOL(basic_ostream, std::, <ostream>)
SYMBOL(basic_ostringstream, std::, <sstream>)
SYMBOL(basic_osyncstream, std::, <syncstream>)
SYMBOL(basic_regex, std::, <regex>)
@@ -200,6 +203,7 @@ SYMBOL(cmatch, std::, <regex>)
SYMBOL(codecvt, std::, <locale>)
SYMBOL(codecvt_base, std::, <locale>)
SYMBOL(codecvt_byname, std::, <locale>)
+SYMBOL(codecvt_mode, std::, <codecvt>)
SYMBOL(codecvt_utf16, std::, <codecvt>)
SYMBOL(codecvt_utf8, std::, <codecvt>)
SYMBOL(codecvt_utf8_utf16, std::, <codecvt>)
@@ -254,6 +258,7 @@ SYMBOL(declare_no_pointers, std::, <memo
SYMBOL(declare_reachable, std::, <memory>)
SYMBOL(declval, std::, <utility>)
SYMBOL(default_delete, std::, <memory>)
+SYMBOL(default_random_engine, std::, <random>)
SYMBOL(default_searcher, std::, <functional>)
SYMBOL(defaultfloat, std::, <ios>)
SYMBOL(defer_lock, std::, <mutex>)
@@ -273,8 +278,10 @@ SYMBOL(discrete_distribution, std::, <ra
SYMBOL(disjunction, std::, <type_traits>)
SYMBOL(disjunction_v, std::, <type_traits>)
SYMBOL(distance, std::, <iterator>)
+SYMBOL(div_t, std::, <cstdlib>)
SYMBOL(divides, std::, <functional>)
SYMBOL(domain_error, std::, <stdexcept>)
+SYMBOL(double_t, std::, <cmath>)
SYMBOL(dynamic_extent, std::, <span>)
SYMBOL(dynamic_pointer_cast, std::, <memory>)
SYMBOL(emit_on_flush, std::, <ostream>)
@@ -333,6 +340,7 @@ SYMBOL(fgetpos, std::, <cstdio>)
SYMBOL(fgets, std::, <cstdio>)
SYMBOL(fgetwc, std::, <cwchar>)
SYMBOL(fgetws, std::, <cwchar>)
+SYMBOL(filebuf, std::, <streambuf>)
SYMBOL(fill, std::, <algorithm>)
SYMBOL(fill_n, std::, <algorithm>)
SYMBOL(find, std::, <algorithm>)
@@ -344,6 +352,7 @@ SYMBOL(fisher_f_distribution, std::, <ra
SYMBOL(fixed, std::, <ios>)
SYMBOL(float_denorm_style, std::, <limits>)
SYMBOL(float_round_style, std::, <limits>)
+SYMBOL(float_t, std::, <cmath>)
SYMBOL(floor, std::, <cmath>)
SYMBOL(floor2, std::, <bit>)
SYMBOL(flush, std::, <ostream>)
@@ -432,6 +441,9 @@ SYMBOL(ifstream, std::, <fstream>)
SYMBOL(ignore, std::, <tuple>)
SYMBOL(ilogb, std::, <cmath>)
SYMBOL(imag, std::, <complex>)
+SYMBOL(imaxabs, std::, <cinttypes>)
+SYMBOL(imaxdiv, std::, <cinttypes>)
+SYMBOL(imaxdiv_t, std::, <cinttypes>)
SYMBOL(in_place, std::, <utility>)
SYMBOL(in_place_index, std::, <utility>)
SYMBOL(in_place_index_t, std::, <utility>)
@@ -450,6 +462,8 @@ SYMBOL(inserter, std::, <iterator>)
SYMBOL(integer_sequence, std::, <utility>)
SYMBOL(integral_constant, std::, <type_traits>)
SYMBOL(internal, std::, <ios>)
+SYMBOL(intmax_t, std::, <cstdint>)
+SYMBOL(intptr_t, std::, <cstdint>)
SYMBOL(invalid_argument, std::, <stdexcept>)
SYMBOL(invoke, std::, <functional>)
SYMBOL(invoke_result, std::, <type_traits>)
@@ -639,6 +653,7 @@ SYMBOL(ispow2, std::, <bit>)
SYMBOL(isprint, std::, <cctype>)
SYMBOL(ispunct, std::, <cctype>)
SYMBOL(isspace, std::, <cctype>)
+SYMBOL(istream, std::, <istream>)
SYMBOL(istream_iterator, std::, <iterator>)
SYMBOL(istreambuf_iterator, std::, <iterator>)
SYMBOL(istringstream, std::, <sstream>)
@@ -664,11 +679,15 @@ SYMBOL(iterator_traits, std::, <iterator
SYMBOL(jmp_buf, std::, <csetjmp>)
SYMBOL(kill_dependency, std::, <atomic>)
SYMBOL(kilo, std::, <ratio>)
+SYMBOL(knuth_b, std::, <random>)
+SYMBOL(labs, std::, <cstdlib>)
SYMBOL(launch, std::, <future>)
SYMBOL(launder, std::, <new>)
SYMBOL(lcm, std::, <numeric>)
SYMBOL(lconv, std::, <clocale>)
SYMBOL(ldexp, std::, <cmath>)
+SYMBOL(ldiv, std::, <cstdlib>)
+SYMBOL(ldiv_t, std::, <cstdlib>)
SYMBOL(left, std::, <ios>)
SYMBOL(length_error, std::, <stdexcept>)
SYMBOL(less, std::, <functional>)
@@ -678,6 +697,9 @@ SYMBOL(lexicographical_compare_3way, std
SYMBOL(lgamma, std::, <cmath>)
SYMBOL(linear_congruential_engine, std::, <random>)
SYMBOL(list, std::, <list>)
+SYMBOL(llabs, std::, <cstdlib>)
+SYMBOL(lldiv, std::, <cstdlib>)
+SYMBOL(lldiv_t, std::, <cstdlib>)
SYMBOL(llrint, std::, <cmath>)
SYMBOL(llround, std::, <cmath>)
SYMBOL(locale, std::, <locale>)
@@ -755,6 +777,8 @@ SYMBOL(min, std::, <algorithm>)
SYMBOL(min_element, std::, <algorithm>)
SYMBOL(minmax, std::, <algorithm>)
SYMBOL(minmax_element, std::, <algorithm>)
+SYMBOL(minstd_rand, std::, <random>)
+SYMBOL(minstd_rand0, std::, <random>)
SYMBOL(minus, std::, <functional>)
SYMBOL(mismatch, std::, <algorithm>)
SYMBOL(mktime, std::, <ctime>)
@@ -769,6 +793,8 @@ SYMBOL(monostate, std::, <variant>)
SYMBOL(move_backward, std::, <algorithm>)
SYMBOL(move_if_noexcept, std::, <utility>)
SYMBOL(move_iterator, std::, <iterator>)
+SYMBOL(mt19937, std::, <random>)
+SYMBOL(mt19937_64, std::, <random>)
SYMBOL(multimap, std::, <map>)
SYMBOL(multiplies, std::, <functional>)
SYMBOL(multiset, std::, <set>)
@@ -817,6 +843,7 @@ SYMBOL(oct, std::, <ios>)
SYMBOL(ofstream, std::, <fstream>)
SYMBOL(once_flag, std::, <mutex>)
SYMBOL(optional, std::, <optional>)
+SYMBOL(ostream, std::, <ostream>)
SYMBOL(ostream_iterator, std::, <iterator>)
SYMBOL(ostreambuf_iterator, std::, <iterator>)
SYMBOL(ostringstream, std::, <sstream>)
@@ -876,6 +903,10 @@ SYMBOL(random_shuffle, std::, <algorithm
SYMBOL(range_error, std::, <stdexcept>)
SYMBOL(rank, std::, <type_traits>)
SYMBOL(rank_v, std::, <type_traits>)
+SYMBOL(ranlux24, std::, <random>)
+SYMBOL(ranlux24_base, std::, <random>)
+SYMBOL(ranlux48, std::, <random>)
+SYMBOL(ranlux48_base, std::, <random>)
SYMBOL(ratio, std::, <ratio>)
SYMBOL(ratio_add, std::, <ratio>)
SYMBOL(ratio_divide, std::, <ratio>)
@@ -1119,6 +1150,8 @@ SYMBOL(u16string_view, std::, <string_vi
SYMBOL(u32streampos, std::, <ios>)
SYMBOL(u32string, std::, <string>)
SYMBOL(u32string_view, std::, <string_view>)
+SYMBOL(uintmax_t, std::, <cstdint>)
+SYMBOL(uintptr_t, std::, <cstdint>)
SYMBOL(uncaught_exceptions, std::, <exception>)
SYMBOL(undeclare_no_pointers, std::, <memory>)
SYMBOL(undeclare_reachable, std::, <memory>)
@@ -1218,17 +1251,21 @@ SYMBOL(wcsxfrm, std::, <cwchar>)
SYMBOL(wctob, std::, <cwchar>)
SYMBOL(wctomb, std::, <cstdlib>)
SYMBOL(wctrans, std::, <cwctype>)
+SYMBOL(wctrans_t, std::, <cwctype>)
SYMBOL(wctype, std::, <cwctype>)
+SYMBOL(wctype_t, std::, <cwctype>)
SYMBOL(weak_equal, std::, <compare>)
SYMBOL(weak_equality, std::, <compare>)
SYMBOL(weak_order, std::, <compare>)
SYMBOL(weak_ordering, std::, <compare>)
SYMBOL(weak_ptr, std::, <memory>)
SYMBOL(weibull_distribution, std::, <random>)
+SYMBOL(wfilebuf, std::, <streambuf>)
SYMBOL(wfstream, std::, <fstream>)
SYMBOL(wifstream, std::, <fstream>)
SYMBOL(wios, std::, <ios>)
SYMBOL(wiostream, std::, <istream>)
+SYMBOL(wistream, std::, <istream>)
SYMBOL(wistringstream, std::, <sstream>)
SYMBOL(wmemchr, std::, <cwchar>)
SYMBOL(wmemcmp, std::, <cwchar>)
@@ -1236,6 +1273,7 @@ SYMBOL(wmemcpy, std::, <cwchar>)
SYMBOL(wmemmove, std::, <cwchar>)
SYMBOL(wmemset, std::, <cwchar>)
SYMBOL(wofstream, std::, <fstream>)
+SYMBOL(wostream, std::, <ostream>)
SYMBOL(wostringstream, std::, <sstream>)
SYMBOL(wosyncstream, std::, <syncstream>)
SYMBOL(wprintf, std::, <cwchar>)
Modified: clang-tools-extra/trunk/clangd/include-mapping/gen_std.py
URL: http://llvm.org/viewvc/llvm-project/clang-tools-extra/trunk/clangd/include-mapping/gen_std.py?rev=359771&r1=359770&r2=359771&view=diff
==============================================================================
--- clang-tools-extra/trunk/clangd/include-mapping/gen_std.py (original)
+++ clang-tools-extra/trunk/clangd/include-mapping/gen_std.py Thu May 2 02:34:30 2019
@@ -35,6 +35,7 @@ import collections
import datetime
import multiprocessing
import os
+import re
import signal
import sys
@@ -50,7 +51,13 @@ STDGEN_CODE_PREFIX = """\
//===----------------------------------------------------------------------===//
"""
-def ParseSymbolPage(symbol_page_html):
+def HasClass(tag, *classes):
+ for c in tag.get('class', []):
+ if c in classes:
+ return True
+ return False
+
+def ParseSymbolPage(symbol_page_html, symbol_name):
"""Parse symbol page and retrieve the include header defined in this page.
The symbol page provides header for the symbol, specifically in
"Defined in header <header>" section. An example:
@@ -61,17 +68,43 @@ def ParseSymbolPage(symbol_page_html):
Returns a list of headers.
"""
- headers = []
+ headers = set()
+ all_headers = set()
soup = BeautifulSoup(symbol_page_html, "html.parser")
- # "Defined in header " are defined in <tr class="t-dsc-header"> or
- # <tr class="t-dcl-header">.
- for header_tr in soup.select('tr.t-dcl-header,tr.t-dsc-header'):
- if "Defined in header " in header_tr.text:
- # The interesting header content (e.g. <cstdlib>) is wrapped in <code>.
- for header_code in header_tr.find_all("code"):
- headers.append(header_code.text)
- return headers
+ # Rows in table are like:
+ # Defined in header <foo> .t-dsc-header
+ # Defined in header <bar> .t-dsc-header
+ # decl1 .t-dcl
+ # Defined in header <baz> .t-dsc-header
+ # decl2 .t-dcl
+ for table in soup.select('table.t-dcl-begin, table.t-dsc-begin'):
+ current_headers = []
+ was_decl = False
+ for row in table.select('tr'):
+ if HasClass(row, 't-dcl', 't-dsc'):
+ was_decl = True
+ # Declaration is in the first cell.
+ text = row.find('td').text
+ # Decl may not be for the symbol name we're looking for.
+ if not re.search("\\b%s\\b" % symbol_name, text):
+ continue
+ headers.update(current_headers)
+ elif HasClass(row, 't-dsc-header'):
+ # If we saw a decl since the last header, this is a new block of headers
+ # for a new block of decls.
+ if was_decl:
+ current_headers = []
+ was_decl = False
+ # There are also .t-dsc-header for "defined in namespace".
+ if not "Defined in header " in row.text:
+ continue
+ # The interesting header content (e.g. <cstdlib>) is wrapped in <code>.
+ for header_code in row.find_all("code"):
+ current_headers.append(header_code.text)
+ all_headers.add(header_code.text)
+ # If the symbol was never named, consider all named headers.
+ return headers or all_headers
def ParseIndexPage(index_page_html):
@@ -112,7 +145,7 @@ class Symbol:
def ReadSymbolPage(path, name):
with open(path) as f:
- return ParseSymbolPage(f.read())
+ return ParseSymbolPage(f.read(), name)
def GetSymbols(pool, root_dir, index_page_name, namespace):
Modified: clang-tools-extra/trunk/clangd/include-mapping/test.py
URL: http://llvm.org/viewvc/llvm-project/clang-tools-extra/trunk/clangd/include-mapping/test.py?rev=359771&r1=359770&r2=359771&view=diff
==============================================================================
--- clang-tools-extra/trunk/clangd/include-mapping/test.py (original)
+++ clang-tools-extra/trunk/clangd/include-mapping/test.py Thu May 2 02:34:30 2019
@@ -47,9 +47,13 @@ class TestStdGen(unittest.TestCase):
<td></td>
<td></td>
</tr>
+ <tr class="t-dcl">
+ <td>void foo()</td>
+ <td>this is matched</td>
+ </tr>
</tbody></table>
"""
- self.assertEqual(ParseSymbolPage(html), ['<cmath>'])
+ self.assertEqual(ParseSymbolPage(html, 'foo'), set(['<cmath>']))
def testParseSymbolPage_MulHeaders(self):
@@ -64,6 +68,10 @@ class TestStdGen(unittest.TestCase):
<td></td>
<td></td>
</tr>
+ <tr class="t-dcl">
+ <td>void bar()</td>
+ <td>this mentions foo, but isn't matched</td>
+ </tr>
<tr class="t-dsc-header">
<td> <div>Defined in header <code><a href="cstdio.html" title="cstdio"><cstdio></a></code>
</div></td>
@@ -76,10 +84,14 @@ class TestStdGen(unittest.TestCase):
<td></td>
<td></td>
</tr>
+ <tr class="t-dcl">
+ <td>void foo()</td>
+ <td>this is matched</td>
+ </tr>
</tbody></table>
"""
- self.assertEqual(ParseSymbolPage(html),
- ['<cstddef>', '<cstdio>', '<cstdlib>'])
+ self.assertEqual(ParseSymbolPage(html, "foo"),
+ set(['<cstdio>', '<cstdlib>']))
def testParseSymbolPage_MulHeadersInSameDiv(self):
@@ -87,6 +99,7 @@ class TestStdGen(unittest.TestCase):
# Defined in header <algorithm>
# Defined in header <utility>
html = """
+<table class="t-dcl-begin"><tbody>
<tr class="t-dsc-header">
<td><div>
Defined in header <code><a href="../header/algorithm.html" title="cpp/header/algorithm"><algorithm></a></code><br>
@@ -94,8 +107,14 @@ class TestStdGen(unittest.TestCase):
</div></td>
<td></td>
</tr>
+<tr class="t-dcl">
+ <td>void foo()</td>
+ <td>this is matched</td>
+</tr>
+</tbody></table>
"""
- self.assertEqual(ParseSymbolPage(html), ['<algorithm>', '<utility>'])
+ self.assertEqual(ParseSymbolPage(html, "foo"),
+ set(['<algorithm>', '<utility>']))
if __name__ == '__main__':
More information about the cfe-commits
mailing list