[llvm-branch-commits] [clang-tools-extra] 85c1c6a - [clangd] Add Random Forest runtime for code completion.
Eric Christopher via llvm-branch-commits
llvm-branch-commits at lists.llvm.org
Fri Sep 18 14:51:19 PDT 2020
Hi Utkarsh,
I've temporarily reverted this here:
echristo at athyra ~/s/llvm-project (master)> git push
To github.com:llvm/llvm-project.git
1f0b43638ed..549e55b3d56 master -> master
the decision forest header file referenced in the unittest doesn't appear
to have made it into the commit?
Thanks and feel free to follow up if I've missed something.
-eric
On Fri, Sep 18, 2020 at 12:38 PM Utkarsh Saxena via llvm-branch-commits <
llvm-branch-commits at lists.llvm.org> wrote:
>
> Author: Utkarsh Saxena
> Date: 2020-09-18T18:27:42+02:00
> New Revision: 85c1c6a4ba4eebbd3f5cefb1512498b9f8a5bb7a
>
> URL:
> https://github.com/llvm/llvm-project/commit/85c1c6a4ba4eebbd3f5cefb1512498b9f8a5bb7a
> DIFF:
> https://github.com/llvm/llvm-project/commit/85c1c6a4ba4eebbd3f5cefb1512498b9f8a5bb7a.diff
>
> LOG: [clangd] Add Random Forest runtime for code completion.
>
> Summary:
> [WIP]
> - Proposes a json format for representing Random Forest model.
> - Proposes a way to test the generated runtime using a test model.
>
> TODO:
> - Add generated source code snippet for easier review.
> - Fix unused label warning.
> - Figure out required using declarations for CATEGORICAL columns from
> Features.json.
> - Necessary Google3 internal modifications for blaze before landing.
> - Add documentation for format of the model.
> - Document more.
>
> Subscribers: mgorny, ilya-biryukov, MaskRay, jkorous, arphaman, kadircet,
> cfe-commits
>
> Tags: #clang
>
> Differential Revision: https://reviews.llvm.org/D83814
>
> Added:
> clang-tools-extra/clangd/quality/CompletionModel.cmake
> clang-tools-extra/clangd/quality/CompletionModelCodegen.py
> clang-tools-extra/clangd/quality/README.md
> clang-tools-extra/clangd/quality/model/features.json
> clang-tools-extra/clangd/quality/model/forest.json
> clang-tools-extra/clangd/unittests/DecisionForestTests.cpp
>
> clang-tools-extra/clangd/unittests/decision_forest_model/CategoricalFeature.h
> clang-tools-extra/clangd/unittests/decision_forest_model/features.json
> clang-tools-extra/clangd/unittests/decision_forest_model/forest.json
>
> Modified:
> clang-tools-extra/clangd/CMakeLists.txt
> clang-tools-extra/clangd/unittests/CMakeLists.txt
> clang-tools-extra/clangd/unittests/CodeCompleteTests.cpp
>
> Removed:
>
>
>
>
> ################################################################################
> diff --git a/clang-tools-extra/clangd/CMakeLists.txt
> b/clang-tools-extra/clangd/CMakeLists.txt
> index 3a1a034ed17b..9d2ab5be222a 100644
> --- a/clang-tools-extra/clangd/CMakeLists.txt
> +++ b/clang-tools-extra/clangd/CMakeLists.txt
> @@ -28,6 +28,9 @@ set(LLVM_LINK_COMPONENTS
> FrontendOpenMP
> Option
> )
> +
> +include(${CMAKE_CURRENT_SOURCE_DIR}/quality/CompletionModel.cmake)
> +gen_decision_forest(${CMAKE_CURRENT_SOURCE_DIR}/quality/model
> CompletionModel clang::clangd::Example)
>
> if(MSVC AND NOT CLANG_CL)
> set_source_files_properties(CompileCommands.cpp PROPERTIES COMPILE_FLAGS
> -wd4130) # disables C4130: logical operation on address of string constant
> @@ -77,6 +80,7 @@ add_clang_library(clangDaemon
> TUScheduler.cpp
> URI.cpp
> XRefs.cpp
> + ${CMAKE_CURRENT_BINARY_DIR}/CompletionModel.cpp
>
> index/Background.cpp
> index/BackgroundIndexLoader.cpp
> @@ -117,6 +121,11 @@ add_clang_library(clangDaemon
> omp_gen
> )
>
> +# Include generated CompletionModel headers.
> +target_include_directories(clangDaemon PUBLIC
> + $<BUILD_INTERFACE:${CMAKE_CURRENT_BINARY_DIR}>
> +)
> +
> clang_target_link_libraries(clangDaemon
> PRIVATE
> clangAST
>
> diff --git a/clang-tools-extra/clangd/quality/CompletionModel.cmake
> b/clang-tools-extra/clangd/quality/CompletionModel.cmake
> new file mode 100644
> index 000000000000..60c6d2aa8433
> --- /dev/null
> +++ b/clang-tools-extra/clangd/quality/CompletionModel.cmake
> @@ -0,0 +1,37 @@
> +# Run the Completion Model Codegenerator on the model present in the
> +# ${model} directory.
> +# Produces a pair of files called ${filename}.h and ${filename}.cpp in
> the
> +# ${CMAKE_CURRENT_BINARY_DIR}. The generated header
> +# will define a C++ class called ${cpp_class} - which may be a
> +# namespace-qualified class name.
> +function(gen_decision_forest model filename cpp_class)
> + set(model_compiler
> ${CMAKE_SOURCE_DIR}/../clang-tools-extra/clangd/quality/CompletionModelCodegen.py)
> +
> + set(output_dir ${CMAKE_CURRENT_BINARY_DIR})
> + set(header_file ${output_dir}/${filename}.h)
> + set(cpp_file ${output_dir}/${filename}.cpp)
> +
> + add_custom_command(OUTPUT ${header_file} ${cpp_file}
> + COMMAND "${Python3_EXECUTABLE}" ${model_compiler}
> + --model ${model}
> + --output_dir ${output_dir}
> + --filename ${filename}
> + --cpp_class ${cpp_class}
> + COMMENT "Generating code completion model runtime..."
> + DEPENDS ${model_compiler} ${model}/forest.json ${model}/features.json
> + VERBATIM )
> +
> + set_source_files_properties(${header_file} PROPERTIES
> + GENERATED 1)
> + set_source_files_properties(${cpp_file} PROPERTIES
> + GENERATED 1)
> +
> + # Disable unused label warning for generated files.
> + if (CMAKE_CXX_COMPILER_ID STREQUAL "MSVC")
> + set_source_files_properties(${cpp_file} PROPERTIES
> + COMPILE_FLAGS /wd4102)
> + else()
> + set_source_files_properties(${cpp_file} PROPERTIES
> + COMPILE_FLAGS -Wno-unused)
> + endif()
> +endfunction()
>
> diff --git a/clang-tools-extra/clangd/quality/CompletionModelCodegen.py
> b/clang-tools-extra/clangd/quality/CompletionModelCodegen.py
> new file mode 100644
> index 000000000000..8f8234f6ebbc
> --- /dev/null
> +++ b/clang-tools-extra/clangd/quality/CompletionModelCodegen.py
> @@ -0,0 +1,283 @@
> +"""Code generator for Code Completion Model Inference.
> +
> +Tool runs on the Decision Forest model defined in {model} directory.
> +It generates two files: {output_dir}/{filename}.h and
> {output_dir}/{filename}.cpp
> +The generated files defines the Example class named {cpp_class} having
> all the features as class members.
> +The generated runtime provides an `Evaluate` function which can be used
> to score a code completion candidate.
> +"""
> +
> +import argparse
> +import json
> +import struct
> +from enum import Enum
> +
> +
> +class CppClass:
> + """Holds class name and names of the enclosing namespaces."""
> +
> + def __init__(self, cpp_class):
> + ns_and_class = cpp_class.split("::")
> + self.ns = [ns for ns in ns_and_class[0:-1] if len(ns) > 0]
> + self.name = ns_and_class[-1]
> + if len(self.name) == 0:
> + raise ValueError("Empty class name.")
> +
> + def ns_begin(self):
> + """Returns snippet for opening namespace declarations."""
> + open_ns = [f"namespace {ns} {{" for ns in self.ns]
> + return "\n".join(open_ns)
> +
> + def ns_end(self):
> + """Returns snippet for closing namespace declarations."""
> + close_ns = [
> + f"}} // namespace {ns}" for ns in reversed(self.ns)]
> + return "\n".join(close_ns)
> +
> +
> +def header_guard(filename):
> + '''Returns the header guard for the generated header.'''
> + return f"GENERATED_DECISION_FOREST_MODEL_{filename.upper()}_H"
> +
> +
> +def boost_node(n, label, next_label):
> + """Returns code snippet for a leaf/boost node.
> + Adds value of leaf to the score and jumps to the root of the next
> tree."""
> + return f"{label}: Score += {n['score']}; goto {next_label};"
> +
> +
> +def if_greater_node(n, label, next_label):
> + """Returns code snippet for a if_greater node.
> + Jumps to true_label if the Example feature (NUMBER) is greater than
> the threshold.
> + Comparing integers is much faster than comparing floats. Assuming
> floating points
> + are represented as IEEE 754, it order-encodes the floats to integers
> before comparing them.
> + Control falls through if condition is evaluated to false."""
> + threshold = n["threshold"]
> + return f"{label}: if (E.{n['feature']} >= {order_encode(threshold)}
> /*{threshold}*/) goto {next_label};"
> +
> +
> +def if_member_node(n, label, next_label):
> + """Returns code snippet for a if_member node.
> + Jumps to true_label if the Example feature (ENUM) is present in the
> set of enum values
> + described in the node.
> + Control falls through if condition is evaluated to false."""
> + members = '|'.join([
> + f"BIT({n['feature']}_type::{member})"
> + for member in n["set"]
> + ])
> + return f"{label}: if (E.{n['feature']} & ({members})) goto
> {next_label};"
> +
> +
> +def node(n, label, next_label):
> + """Returns code snippet for the node."""
> + return {
> + 'boost': boost_node,
> + 'if_greater': if_greater_node,
> + 'if_member': if_member_node,
> + }[n['operation']](n, label, next_label)
> +
> +
> +def tree(t, tree_num: int, node_num: int):
> + """Returns code for inferencing a Decision Tree.
> + Also returns the size of the decision tree.
> +
> + A tree starts with its label `t{tree#}`.
> + A node of the tree starts with label `t{tree#}_n{node#}`.
> +
> + The tree contains two types of node: Conditional node and Leaf node.
> + - Conditional node evaluates a condition. If true, it jumps to the
> true node/child.
> + Code is generated using pre-order traversal of the tree
> considering
> + false node as the first child. Therefore the false node is always
> the
> + immediately next label.
> + - Leaf node adds the value to the score and jumps to the next tree.
> + """
> + label = f"t{tree_num}_n{node_num}"
> + code = []
> + if node_num == 0:
> + code.append(f"t{tree_num}:")
> +
> + if t["operation"] == "boost":
> + code.append(node(t, label=label, next_label=f"t{tree_num+1}"))
> + return code, 1
> +
> + false_code, false_size = tree(
> + t['else'], tree_num=tree_num, node_num=node_num+1)
> +
> + true_node_num = node_num+false_size+1
> + true_label = f"t{tree_num}_n{true_node_num}"
> +
> + true_code, true_size = tree(
> + t['then'], tree_num=tree_num, node_num=true_node_num)
> +
> + code.append(node(t, label=label, next_label=true_label))
> +
> + return code+false_code+true_code, 1+false_size+true_size
> +
> +
> +def gen_header_code(features_json: list, cpp_class, filename: str):
> + """Returns code for header declaring the inference runtime.
> +
> + Declares the Example class named {cpp_class} inside relevant
> namespaces.
> + The Example class contains all the features as class members. This
> + class can be used to represent a code completion candidate.
> + Provides `float Evaluate()` function which can be used to score the
> Example.
> + """
> + setters = []
> + for f in features_json:
> + feature = f["name"]
> + if f["kind"] == "NUMBER":
> + # Floats are order-encoded to integers for faster comparison.
> + setters.append(
> + f"void set{feature}(float V) {{ {feature} =
> OrderEncode(V); }}")
> + elif f["kind"] == "ENUM":
> + setters.append(
> + f"void set{feature}(unsigned V) {{ {feature} = 1 << V;
> }}")
> + else:
> + raise ValueError("Unhandled feature type.", f["kind"])
> +
> + # Class members represent all the features of the Example.
> + class_members = [f"uint32_t {f['name']} = 0;" for f in features_json]
> +
> + nline = "\n "
> + guard = header_guard(filename)
> + return f"""#ifndef {guard}
> +#define {guard}
> +#include <cstdint>
> +
> +{cpp_class.ns_begin()}
> +class {cpp_class.name} {{
> +public:
> + {nline.join(setters)}
> +
> +private:
> + {nline.join(class_members)}
> +
> + // Produces an integer that sorts in the same order as F.
> + // That is: a < b <==> orderEncode(a) < orderEncode(b).
> + static uint32_t OrderEncode(float F);
> + friend float Evaluate(const {cpp_class.name}&);
> +}};
> +
> +float Evaluate(const {cpp_class.name}&);
> +{cpp_class.ns_end()}
> +#endif // {guard}
> +"""
> +
> +
> +def order_encode(v: float):
> + i = struct.unpack('<I', struct.pack('<f', v))[0]
> + TopBit = 1 << 31
> + # IEEE 754 floats compare like sign-magnitude integers.
> + if (i & TopBit): # Negative float
> + return (1 << 32) - i # low half of integers, order reversed.
> + return TopBit + i # top half of integers
> +
> +
> +def evaluate_func(forest_json: list, cpp_class: CppClass):
> + """Generates code for `float Evaluate(const {Example}&)` function.
> + The generated function can be used to score an Example."""
> + code = f"float Evaluate(const {cpp_class.name}& E) {{\n"
> + lines = []
> + lines.append("float Score = 0;")
> + tree_num = 0
> + for tree_json in forest_json:
> + lines.extend(tree(tree_json, tree_num=tree_num, node_num=0)[0])
> + lines.append("")
> + tree_num += 1
> +
> + lines.append(f"t{len(forest_json)}: // No such tree.")
> + lines.append("return Score;")
> + code += " " + "\n ".join(lines)
> + code += "\n}"
> + return code
> +
> +
> +def gen_cpp_code(forest_json: list, features_json: list, filename: str,
> + cpp_class: CppClass):
> + """Generates code for the .cpp file."""
> + # Headers
> + # Required by OrderEncode(float F).
> + angled_include = [
> + f'#include <{h}>'
> + for h in ["cstring", "limits"]
> + ]
> +
> + # Include generated header.
> + qouted_headers = {f"{filename}.h", "llvm/ADT/bit.h"}
> + # Headers required by ENUM features used by the model.
> + qouted_headers |= {f["header"]
> + for f in features_json if f["kind"] == "ENUM"}
> + quoted_include = [f'#include "{h}"' for h in sorted(qouted_headers)]
> +
> + # using-decl for ENUM features.
> + using_decls = "\n".join(f"using {feature['name']}_type =
> {feature['type']};"
> + for feature in features_json
> + if feature["kind"] == "ENUM")
> + nl = "\n"
> + return f"""{nl.join(angled_include)}
> +
> +{nl.join(quoted_include)}
> +
> +#define BIT(X) (1 << X)
> +
> +{cpp_class.ns_begin()}
> +
> +{using_decls}
> +
> +uint32_t {cpp_class.name}::OrderEncode(float F) {{
> + static_assert(std::numeric_limits<float>::is_iec559, "");
> + constexpr uint32_t TopBit = ~(~uint32_t{{0}} >> 1);
> +
> + // Get the bits of the float. Endianness is the same as for integers.
> + uint32_t U = llvm::bit_cast<uint32_t>(F);
> + std::memcpy(&U, &F, sizeof(U));
> + // IEEE 754 floats compare like sign-magnitude integers.
> + if (U & TopBit) // Negative float.
> + return 0 - U; // Map onto the low half of integers, order reversed.
> + return U + TopBit; // Positive floats map onto the high half of
> integers.
> +}}
> +
> +{evaluate_func(forest_json, cpp_class)}
> +{cpp_class.ns_end()}
> +"""
> +
> +
> +def main():
> + parser = argparse.ArgumentParser('DecisionForestCodegen')
> + parser.add_argument('--filename', help='output file name.')
> + parser.add_argument('--output_dir', help='output directory.')
> + parser.add_argument('--model', help='path to model directory.')
> + parser.add_argument(
> + '--cpp_class',
> + help='The name of the class (which may be a namespace-qualified)
> created in generated header.'
> + )
> + ns = parser.parse_args()
> +
> + output_dir = ns.output_dir
> + filename = ns.filename
> + header_file = f"{output_dir}/{filename}.h"
> + cpp_file = f"{output_dir}/{filename}.cpp"
> + cpp_class = CppClass(cpp_class=ns.cpp_class)
> +
> + model_file = f"{ns.model}/forest.json"
> + features_file = f"{ns.model}/features.json"
> +
> + with open(features_file) as f:
> + features_json = json.load(f)
> +
> + with open(model_file) as m:
> + forest_json = json.load(m)
> +
> + with open(cpp_file, 'w+t') as output_cc:
> + output_cc.write(
> + gen_cpp_code(forest_json=forest_json,
> + features_json=features_json,
> + filename=filename,
> + cpp_class=cpp_class))
> +
> + with open(header_file, 'w+t') as output_h:
> + output_h.write(gen_header_code(
> + features_json=features_json, cpp_class=cpp_class,
> filename=filename))
> +
> +
> +if __name__ == '__main__':
> + main()
>
> diff --git a/clang-tools-extra/clangd/quality/README.md
> b/clang-tools-extra/clangd/quality/README.md
> new file mode 100644
> index 000000000000..36fa37320e54
> --- /dev/null
> +++ b/clang-tools-extra/clangd/quality/README.md
> @@ -0,0 +1,220 @@
> +# Decision Forest Code Completion Model
> +
> +## Decision Forest
> +A **decision forest** is a collection of many decision trees. A
> **decision tree** is a full binary tree that provides a quality prediction
> for an input (code completion item). Internal nodes represent a **binary
> decision** based on the input data, and leaf nodes represent a prediction.
> +
> +In order to predict the relevance of a code completion item, we traverse
> each of the decision trees beginning with their roots until we reach a
> leaf.
> +
> +An input (code completion candidate) is characterized as a set of
> **features**, such as the *type of symbol* or the *number of existing
> references*.
> +
> +At every non-leaf node, we evaluate the condition to decide whether to go
> left or right. The condition compares one *feature** of the input against a
> constant. The condition can be of two types:
> +- **if_greater**: Checks whether a numerical feature is **>=** a
> **threshold**.
> +- **if_member**: Check whether the **enum** feature is contained in the
> **set** defined in the node.
> +
> +A leaf node contains the value **score**.
> +To compute an overall **quality** score, we traverse each tree in this
> way and add up the scores.
> +
> +## Model Input Format
> +The input model is represented in json format.
> +
> +### Features
> +The file **features.json** defines the features available to the model.
> +It is a json list of features. The features can be of following two kinds.
> +
> +#### Number
> +```
> +{
> + "name": "a_numerical_feature",
> + "kind": "NUMBER"
> +}
> +```
> +#### Enum
> +```
> +{
> + "name": "an_enum_feature",
> + "kind": "ENUM",
> + "enum": "fully::qualified::enum",
> + "header": "path/to/HeaderDeclaringEnum.h"
> +}
> +```
> +The field `enum` specifies the fully qualified name of the enum.
> +The maximum cardinality of the enum can be **32**.
> +
> +The field `header` specifies the header containing the declaration of the
> enum.
> +This header is included by the inference runtime.
> +
> +
> +### Decision Forest
> +The file `forest.json` defines the decision forest. It is a json list of
> **DecisionTree**.
> +
> +**DecisionTree** is one of **IfGreaterNode**, **IfMemberNode**,
> **LeafNode**.
> +#### IfGreaterNode
> +```
> +{
> + "operation": "if_greater",
> + "feature": "a_numerical_feature",
> + "threshold": A real number,
> + "then": {A DecisionTree},
> + "else": {A DecisionTree}
> +}
> +```
> +#### IfMemberNode
> +```
> +{
> + "operation": "if_member",
> + "feature": "an_enum_feature",
> + "set": ["enum_value1", "enum_value2", ...],
> + "then": {A DecisionTree},
> + "else": {A DecisionTree}
> +}
> +```
> +#### LeafNode
> +```
> +{
> + "operation": "boost",
> + "score": A real number
> +}
> +```
> +
> +## Code Generator for Inference
> +The implementation of inference runtime is split across:
> +
> +### Code generator
> +The code generator `CompletionModelCodegen.py` takes input the `${model}`
> dir and generates the inference library:
> +- `${output_dir}/{filename}.h`
> +- `${output_dir}/{filename}.cpp`
> +
> +Invocation
> +```
> +python3 CompletionModelCodegen.py \
> + --model path/to/model/dir \
> + --output_dir path/to/output/dir \
> + --filename OutputFileName \
> + --cpp_class clang::clangd::YourExampleClass
> +```
> +### Build System
> +`CompletionModel.cmake` provides `gen_decision_forest` method .
> +Client intending to use the CompletionModel for inference can use this to
> trigger the code generator and generate the inference library.
> +It can then use the generated API by including and depending on this
> library.
> +
> +### Generated API for inference
> +The code generator defines the Example `class` inside relevant namespaces
> as specified in option `${cpp_class}`.
> +
> +Members of this generated class comprises of all the features mentioned
> in `features.json`.
> +Thus this class can represent a code completion candidate that needs to
> be scored.
> +
> +The API also provides `float Evaluate(const MyClass&)` which can be used
> to score the completion candidate.
> +
> +
> +## Example
> +### model/features.json
> +```
> +[
> + {
> + "name": "ANumber",
> + "type": "NUMBER"
> + },
> + {
> + "name": "AFloat",
> + "type": "NUMBER"
> + },
> + {
> + "name": "ACategorical",
> + "type": "ENUM",
> + "enum": "ns1::ns2::TestEnum",
> + "header": "model/CategoricalFeature.h"
> + }
> +]
> +```
> +### model/forest.json
> +```
> +[
> + {
> + "operation": "if_greater",
> + "feature": "ANumber",
> + "threshold": 200.0,
> + "then": {
> + "operation": "if_greater",
> + "feature": "AFloat",
> + "threshold": -1,
> + "then": {
> + "operation": "boost",
> + "score": 10.0
> + },
> + "else": {
> + "operation": "boost",
> + "score": -20.0
> + }
> + },
> + "else": {
> + "operation": "if_member",
> + "feature": "ACategorical",
> + "set": [
> + "A",
> + "C"
> + ],
> + "then": {
> + "operation": "boost",
> + "score": 3.0
> + },
> + "else": {
> + "operation": "boost",
> + "score": -4.0
> + }
> + }
> + },
> + {
> + "operation": "if_member",
> + "feature": "ACategorical",
> + "set": [
> + "A",
> + "B"
> + ],
> + "then": {
> + "operation": "boost",
> + "score": 5.0
> + },
> + "else": {
> + "operation": "boost",
> + "score": -6.0
> + }
> + }
> +]
> +```
> +### DecisionForestRuntime.h
> +```
> +...
> +namespace ns1 {
> +namespace ns2 {
> +namespace test {
> +class Example {
> +public:
> + void setANumber(float V) { ... }
> + void setAFloat(float V) { ... }
> + void setACategorical(unsigned V) { ... }
> +
> +private:
> + ...
> +};
> +
> +float Evaluate(const Example&);
> +} // namespace test
> +} // namespace ns2
> +} // namespace ns1
> +```
> +
> +### CMake Invocation
> +Inorder to use the inference runtime, one can use `gen_decision_forest`
> function
> +described in `CompletionModel.cmake` which invokes
> `CodeCompletionCodegen.py` with the appropriate arguments.
> +
> +For example, the following invocation reads the model present in
> `path/to/model` and creates
> +`${CMAKE_CURRENT_BINARY_DIR}/myfilename.h` and
> `${CMAKE_CURRENT_BINARY_DIR}/myfilename.cpp`
> +describing a `class` named `MyClass` in namespace `fully::qualified`.
> +
> +
> +
> +```
> +gen_decision_forest(path/to/model
> + myfilename
> + ::fully::qualifed::MyClass)
> +```
> \ No newline at end of file
>
> diff --git a/clang-tools-extra/clangd/quality/model/features.json
> b/clang-tools-extra/clangd/quality/model/features.json
> new file mode 100644
> index 000000000000..e91eccd1ce20
> --- /dev/null
> +++ b/clang-tools-extra/clangd/quality/model/features.json
> @@ -0,0 +1,8 @@
> +[
> + {
> + "name": "ContextKind",
> + "kind": "ENUM",
> + "type": "clang::CodeCompletionContext::Kind",
> + "header": "clang/Sema/CodeCompleteConsumer.h"
> + }
> +]
> \ No newline at end of file
>
> diff --git a/clang-tools-extra/clangd/quality/model/forest.json
> b/clang-tools-extra/clangd/quality/model/forest.json
> new file mode 100644
> index 000000000000..78a1524e2d81
> --- /dev/null
> +++ b/clang-tools-extra/clangd/quality/model/forest.json
> @@ -0,0 +1,18 @@
> +[
> + {
> + "operation": "if_member",
> + "feature": "ContextKind",
> + "set": [
> + "CCC_DotMemberAccess",
> + "CCC_ArrowMemberAccess"
> + ],
> + "then": {
> + "operation": "boost",
> + "score": 3.0
> + },
> + "else": {
> + "operation": "boost",
> + "score": 1.0
> + }
> + }
> +]
> \ No newline at end of file
>
> diff --git a/clang-tools-extra/clangd/unittests/CMakeLists.txt
> b/clang-tools-extra/clangd/unittests/CMakeLists.txt
> index 2167b5e210e2..a84fd0b71ca5 100644
> --- a/clang-tools-extra/clangd/unittests/CMakeLists.txt
> +++ b/clang-tools-extra/clangd/unittests/CMakeLists.txt
> @@ -28,6 +28,9 @@ if (CLANGD_ENABLE_REMOTE)
> set(REMOTE_TEST_SOURCES remote/MarshallingTests.cpp)
> endif()
>
> +include(${CMAKE_CURRENT_SOURCE_DIR}/../quality/CompletionModel.cmake)
> +gen_decision_forest(${CMAKE_CURRENT_SOURCE_DIR}/decision_forest_model
> DecisionForestRuntimeTest ::ns1::ns2::test::Example)
> +
> add_custom_target(ClangdUnitTests)
> add_unittest(ClangdUnitTests ClangdTests
> Annotations.cpp
> @@ -44,6 +47,7 @@ add_unittest(ClangdUnitTests ClangdTests
> ConfigCompileTests.cpp
> ConfigProviderTests.cpp
> ConfigYAMLTests.cpp
> + DecisionForestTests.cpp
> DexTests.cpp
> DiagnosticsTests.cpp
> DraftStoreTests.cpp
> @@ -89,6 +93,7 @@ add_unittest(ClangdUnitTests ClangdTests
> TweakTesting.cpp
> URITests.cpp
> XRefsTests.cpp
> + ${CMAKE_CURRENT_BINARY_DIR}/DecisionForestRuntimeTest.cpp
>
> support/CancellationTests.cpp
> support/ContextTests.cpp
> @@ -103,6 +108,11 @@ add_unittest(ClangdUnitTests ClangdTests
> $<TARGET_OBJECTS:obj.clangDaemonTweaks>
> )
>
> +# Include generated ComletionModel headers.
> +target_include_directories(ClangdTests PUBLIC
> + $<BUILD_INTERFACE:${CMAKE_CURRENT_BINARY_DIR}>
> +)
> +
> clang_target_link_libraries(ClangdTests
> PRIVATE
> clangAST
>
> diff --git a/clang-tools-extra/clangd/unittests/CodeCompleteTests.cpp
> b/clang-tools-extra/clangd/unittests/CodeCompleteTests.cpp
> index 635e036039a0..460976d64f9f 100644
> --- a/clang-tools-extra/clangd/unittests/CodeCompleteTests.cpp
> +++ b/clang-tools-extra/clangd/unittests/CodeCompleteTests.cpp
> @@ -10,6 +10,7 @@
> #include "ClangdServer.h"
> #include "CodeComplete.h"
> #include "Compiler.h"
> +#include "CompletionModel.h"
> #include "Matchers.h"
> #include "Protocol.h"
> #include "Quality.h"
> @@ -47,6 +48,7 @@ using ::testing::HasSubstr;
> using ::testing::IsEmpty;
> using ::testing::Not;
> using ::testing::UnorderedElementsAre;
> +using ContextKind = CodeCompletionContext::Kind;
>
> // GMock helpers for matching completion items.
> MATCHER_P(Named, Name, "") { return arg.Name == Name; }
> @@ -161,6 +163,16 @@ Symbol withReferences(int N, Symbol S) {
> return S;
> }
>
> +TEST(DecisionForestRuntime, SanityTest) {
> + using Example = clangd::Example;
> + using clangd::Evaluate;
> + Example E1;
> + E1.setContextKind(ContextKind::CCC_ArrowMemberAccess);
> + Example E2;
> + E2.setContextKind(ContextKind::CCC_SymbolOrNewName);
> + EXPECT_GT(Evaluate(E1), Evaluate(E2));
> +}
> +
> TEST(CompletionTest, Limit) {
> clangd::CodeCompleteOptions Opts;
> Opts.Limit = 2;
>
> diff --git a/clang-tools-extra/clangd/unittests/DecisionForestTests.cpp
> b/clang-tools-extra/clangd/unittests/DecisionForestTests.cpp
> new file mode 100644
> index 000000000000..d29c8a4a0358
> --- /dev/null
> +++ b/clang-tools-extra/clangd/unittests/DecisionForestTests.cpp
> @@ -0,0 +1,29 @@
> +#include "DecisionForestRuntimeTest.h"
> +#include "decision_forest_model/CategoricalFeature.h"
> +#include "gtest/gtest.h"
> +
> +namespace clang {
> +namespace clangd {
> +
> +TEST(DecisionForestRuntime, Evaluate) {
> + using Example = ::ns1::ns2::test::Example;
> + using Cat = ::ns1::ns2::TestEnum;
> + using ::ns1::ns2::test::Evaluate;
> +
> + Example E;
> + E.setANumber(200); // True
> + E.setAFloat(0); // True: +10.0
> + E.setACategorical(Cat::A); // True: +5.0
> + EXPECT_EQ(Evaluate(E), 15.0);
> +
> + E.setANumber(200); // True
> + E.setAFloat(-2.5); // False: -20.0
> + E.setACategorical(Cat::B); // True: +5.0
> + EXPECT_EQ(Evaluate(E), -15.0);
> +
> + E.setANumber(100); // False
> + E.setACategorical(Cat::C); // True: +3.0, False: -6.0
> + EXPECT_EQ(Evaluate(E), -3.0);
> +}
> +} // namespace clangd
> +} // namespace clang
>
> diff --git
> a/clang-tools-extra/clangd/unittests/decision_forest_model/CategoricalFeature.h
> b/clang-tools-extra/clangd/unittests/decision_forest_model/CategoricalFeature.h
> new file mode 100644
> index 000000000000..dfb6ab3b199d
> --- /dev/null
> +++
> b/clang-tools-extra/clangd/unittests/decision_forest_model/CategoricalFeature.h
> @@ -0,0 +1,5 @@
> +namespace ns1 {
> +namespace ns2 {
> +enum TestEnum { A, B, C, D };
> +} // namespace ns2
> +} // namespace ns1
>
> diff --git
> a/clang-tools-extra/clangd/unittests/decision_forest_model/features.json
> b/clang-tools-extra/clangd/unittests/decision_forest_model/features.json
> new file mode 100644
> index 000000000000..7f159f192e19
> --- /dev/null
> +++
> b/clang-tools-extra/clangd/unittests/decision_forest_model/features.json
> @@ -0,0 +1,16 @@
> +[
> + {
> + "name": "ANumber",
> + "kind": "NUMBER"
> + },
> + {
> + "name": "AFloat",
> + "kind": "NUMBER"
> + },
> + {
> + "name": "ACategorical",
> + "kind": "ENUM",
> + "type": "ns1::ns2::TestEnum",
> + "header": "decision_forest_model/CategoricalFeature.h"
> + }
> +]
> \ No newline at end of file
>
> diff --git
> a/clang-tools-extra/clangd/unittests/decision_forest_model/forest.json
> b/clang-tools-extra/clangd/unittests/decision_forest_model/forest.json
> new file mode 100644
> index 000000000000..26f071da485d
> --- /dev/null
> +++ b/clang-tools-extra/clangd/unittests/decision_forest_model/forest.json
> @@ -0,0 +1,52 @@
> +[
> + {
> + "operation": "if_greater",
> + "feature": "ANumber",
> + "threshold": 200.0,
> + "then": {
> + "operation": "if_greater",
> + "feature": "AFloat",
> + "threshold": -1,
> + "then": {
> + "operation": "boost",
> + "score": 10.0
> + },
> + "else": {
> + "operation": "boost",
> + "score": -20.0
> + }
> + },
> + "else": {
> + "operation": "if_member",
> + "feature": "ACategorical",
> + "set": [
> + "A",
> + "C"
> + ],
> + "then": {
> + "operation": "boost",
> + "score": 3.0
> + },
> + "else": {
> + "operation": "boost",
> + "score": -4.0
> + }
> + }
> + },
> + {
> + "operation": "if_member",
> + "feature": "ACategorical",
> + "set": [
> + "A",
> + "B"
> + ],
> + "then": {
> + "operation": "boost",
> + "score": 5.0
> + },
> + "else": {
> + "operation": "boost",
> + "score": -6.0
> + }
> + }
> +]
> \ No newline at end of file
>
>
>
> _______________________________________________
> llvm-branch-commits mailing list
> llvm-branch-commits at lists.llvm.org
> https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
>
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.llvm.org/pipermail/llvm-branch-commits/attachments/20200918/3fe0c703/attachment-0001.html>
More information about the llvm-branch-commits
mailing list