[clang-tools-extra] 985deba - Revert "Temporarily Revert "[clangd] Add Random Forest runtime for code completion.""

Utkarsh Saxena via cfe-commits cfe-commits at lists.llvm.org
Sat Sep 19 02:20:56 PDT 2020


Author: Utkarsh Saxena
Date: 2020-09-19T10:54:04+02:00
New Revision: 985deba9319be464673c1002767f8a3ec597480d

URL: https://github.com/llvm/llvm-project/commit/985deba9319be464673c1002767f8a3ec597480d
DIFF: https://github.com/llvm/llvm-project/commit/985deba9319be464673c1002767f8a3ec597480d.diff

LOG: Revert "Temporarily Revert "[clangd] Add Random Forest runtime for code completion.""

We intend to replace heuristics based code completion ranking with a Decision Forest Model.

This patch introduces a format for representing the model and an inference runtime that is code-generated at build time.
- Forest.json contains all the trees as an array of trees.
- Features.json describes the features to be used.
- Codegen file takes the above two files and generates CompletionModel containing Feature struct and corresponding Evaluate function.
   The Evaluate function maps a feature to a real number describing the relevance of this candidate.
- The codegen is part of build system and these files are generated at build time.
- Proposes a way to test the generated runtime using a test model.
  - Replicates the model structure in unittests.
  - unittest tests both the test model (for correct tree traversal) and the real model (for sanity).

This reverts commit 549e55b3d5634870aa9d42135f51ad46a6a0e347.

Added: 
    clang-tools-extra/clangd/quality/CompletionModel.cmake
    clang-tools-extra/clangd/quality/CompletionModelCodegen.py
    clang-tools-extra/clangd/quality/README.md
    clang-tools-extra/clangd/quality/model/features.json
    clang-tools-extra/clangd/quality/model/forest.json
    clang-tools-extra/clangd/unittests/DecisionForestTests.cpp
    clang-tools-extra/clangd/unittests/decision_forest_model/CategoricalFeature.h
    clang-tools-extra/clangd/unittests/decision_forest_model/features.json
    clang-tools-extra/clangd/unittests/decision_forest_model/forest.json

Modified: 
    clang-tools-extra/clangd/CMakeLists.txt
    clang-tools-extra/clangd/unittests/CMakeLists.txt
    clang-tools-extra/clangd/unittests/CodeCompleteTests.cpp

Removed: 
    


################################################################################
diff  --git a/clang-tools-extra/clangd/CMakeLists.txt b/clang-tools-extra/clangd/CMakeLists.txt
index 3a1a034ed17b..9d2ab5be222a 100644
--- a/clang-tools-extra/clangd/CMakeLists.txt
+++ b/clang-tools-extra/clangd/CMakeLists.txt
@@ -28,6 +28,9 @@ set(LLVM_LINK_COMPONENTS
   FrontendOpenMP
   Option
   )
+  
+include(${CMAKE_CURRENT_SOURCE_DIR}/quality/CompletionModel.cmake)
+gen_decision_forest(${CMAKE_CURRENT_SOURCE_DIR}/quality/model CompletionModel clang::clangd::Example)
 
 if(MSVC AND NOT CLANG_CL)
  set_source_files_properties(CompileCommands.cpp PROPERTIES COMPILE_FLAGS -wd4130) # disables C4130: logical operation on address of string constant
@@ -77,6 +80,7 @@ add_clang_library(clangDaemon
   TUScheduler.cpp
   URI.cpp
   XRefs.cpp
+  ${CMAKE_CURRENT_BINARY_DIR}/CompletionModel.cpp
 
   index/Background.cpp
   index/BackgroundIndexLoader.cpp
@@ -117,6 +121,11 @@ add_clang_library(clangDaemon
   omp_gen
   )
 
+# Include generated CompletionModel headers.
+target_include_directories(clangDaemon PUBLIC
+  $<BUILD_INTERFACE:${CMAKE_CURRENT_BINARY_DIR}>
+)
+
 clang_target_link_libraries(clangDaemon
   PRIVATE
   clangAST

diff  --git a/clang-tools-extra/clangd/quality/CompletionModel.cmake b/clang-tools-extra/clangd/quality/CompletionModel.cmake
new file mode 100644
index 000000000000..60c6d2aa8433
--- /dev/null
+++ b/clang-tools-extra/clangd/quality/CompletionModel.cmake
@@ -0,0 +1,37 @@
+# Run the Completion Model Codegenerator on the model present in the 
+# ${model} directory.
+# Produces a pair of files called ${filename}.h and  ${filename}.cpp in the 
+# ${CMAKE_CURRENT_BINARY_DIR}. The generated header
+# will define a C++ class called ${cpp_class} - which may be a
+# namespace-qualified class name.
+function(gen_decision_forest model filename cpp_class)
+  set(model_compiler ${CMAKE_SOURCE_DIR}/../clang-tools-extra/clangd/quality/CompletionModelCodegen.py)
+  
+  set(output_dir ${CMAKE_CURRENT_BINARY_DIR})
+  set(header_file ${output_dir}/${filename}.h)
+  set(cpp_file ${output_dir}/${filename}.cpp)
+
+  add_custom_command(OUTPUT ${header_file} ${cpp_file}
+    COMMAND "${Python3_EXECUTABLE}" ${model_compiler}
+      --model ${model}
+      --output_dir ${output_dir}
+      --filename ${filename}
+      --cpp_class ${cpp_class}
+    COMMENT "Generating code completion model runtime..."
+    DEPENDS ${model_compiler} ${model}/forest.json ${model}/features.json
+    VERBATIM )
+
+  set_source_files_properties(${header_file} PROPERTIES
+    GENERATED 1)
+  set_source_files_properties(${cpp_file} PROPERTIES
+    GENERATED 1)
+
+  # Disable unused label warning for generated files.
+  if (CMAKE_CXX_COMPILER_ID STREQUAL "MSVC")
+    set_source_files_properties(${cpp_file} PROPERTIES
+      COMPILE_FLAGS /wd4102)
+  else()
+    set_source_files_properties(${cpp_file} PROPERTIES
+      COMPILE_FLAGS -Wno-unused)
+  endif()
+endfunction()

diff  --git a/clang-tools-extra/clangd/quality/CompletionModelCodegen.py b/clang-tools-extra/clangd/quality/CompletionModelCodegen.py
new file mode 100644
index 000000000000..20bfccd8806f
--- /dev/null
+++ b/clang-tools-extra/clangd/quality/CompletionModelCodegen.py
@@ -0,0 +1,290 @@
+"""Code generator for Code Completion Model Inference.
+
+Tool runs on the Decision Forest model defined in {model} directory.
+It generates two files: {output_dir}/{filename}.h and {output_dir}/{filename}.cpp 
+The generated files defines the Example class named {cpp_class} having all the features as class members.
+The generated runtime provides an `Evaluate` function which can be used to score a code completion candidate.
+"""
+
+import argparse
+import json
+import struct
+
+
+class CppClass:
+    """Holds class name and names of the enclosing namespaces."""
+
+    def __init__(self, cpp_class):
+        ns_and_class = cpp_class.split("::")
+        self.ns = [ns for ns in ns_and_class[0:-1] if len(ns) > 0]
+        self.name = ns_and_class[-1]
+        if len(self.name) == 0:
+            raise ValueError("Empty class name.")
+
+    def ns_begin(self):
+        """Returns snippet for opening namespace declarations."""
+        open_ns = ["namespace %s {" % ns for ns in self.ns]
+        return "\n".join(open_ns)
+
+    def ns_end(self):
+        """Returns snippet for closing namespace declarations."""
+        close_ns = [
+            "} // namespace %s" % ns for ns in reversed(self.ns)]
+        return "\n".join(close_ns)
+
+
+def header_guard(filename):
+    '''Returns the header guard for the generated header.'''
+    return "GENERATED_DECISION_FOREST_MODEL_%s_H" % filename.upper()
+
+
+def boost_node(n, label, next_label):
+    """Returns code snippet for a leaf/boost node.
+    Adds value of leaf to the score and jumps to the root of the next tree."""
+    return "%s: Score += %s; goto %s;" % (
+            label, n['score'], next_label)
+
+
+def if_greater_node(n, label, next_label):
+    """Returns code snippet for a if_greater node.
+    Jumps to true_label if the Example feature (NUMBER) is greater than the threshold. 
+    Comparing integers is much faster than comparing floats. Assuming floating points 
+    are represented as IEEE 754, it order-encodes the floats to integers before comparing them.
+    Control falls through if condition is evaluated to false."""
+    threshold = n["threshold"]
+    return "%s: if (E.%s >= %s /*%s*/) goto %s;" % (
+            label, n['feature'], order_encode(threshold), threshold, next_label)
+
+
+def if_member_node(n, label, next_label):
+    """Returns code snippet for a if_member node.
+    Jumps to true_label if the Example feature (ENUM) is present in the set of enum values 
+    described in the node.
+    Control falls through if condition is evaluated to false."""
+    members = '|'.join([
+        "BIT(%s_type::%s)" % (n['feature'], member)
+        for member in n["set"]
+    ])
+    return "%s: if (E.%s & (%s)) goto %s;" % (
+            label, n['feature'], members, next_label)
+
+
+def node(n, label, next_label):
+    """Returns code snippet for the node."""
+    return {
+        'boost': boost_node,
+        'if_greater': if_greater_node,
+        'if_member': if_member_node,
+    }[n['operation']](n, label, next_label)
+
+
+def tree(t, tree_num, node_num):
+    """Returns code for inferencing a Decision Tree.
+    Also returns the size of the decision tree.
+
+    A tree starts with its label `t{tree#}`.
+    A node of the tree starts with label `t{tree#}_n{node#}`.
+
+    The tree contains two types of node: Conditional node and Leaf node.
+    -   Conditional node evaluates a condition. If true, it jumps to the true node/child.
+        Code is generated using pre-order traversal of the tree considering
+        false node as the first child. Therefore the false node is always the
+        immediately next label.
+    -   Leaf node adds the value to the score and jumps to the next tree.
+    """
+    label = "t%d_n%d" % (tree_num, node_num)
+    code = []
+    if node_num == 0:
+        code.append("t%d:" % tree_num)
+
+    if t["operation"] == "boost":
+        code.append(node(t, label=label, next_label="t%d" % (tree_num + 1)))
+        return code, 1
+
+    false_code, false_size = tree(
+        t['else'], tree_num=tree_num, node_num=node_num+1)
+
+    true_node_num = node_num+false_size+1
+    true_label = "t%d_n%d" % (tree_num, true_node_num)
+
+    true_code, true_size = tree(
+        t['then'], tree_num=tree_num, node_num=true_node_num)
+
+    code.append(node(t, label=label, next_label=true_label))
+
+    return code+false_code+true_code, 1+false_size+true_size
+
+
+def gen_header_code(features_json, cpp_class, filename):
+    """Returns code for header declaring the inference runtime.
+
+    Declares the Example class named {cpp_class} inside relevant namespaces.
+    The Example class contains all the features as class members. This 
+    class can be used to represent a code completion candidate.
+    Provides `float Evaluate()` function which can be used to score the Example.
+    """
+    setters = []
+    for f in features_json:
+        feature = f["name"]
+        if f["kind"] == "NUMBER":
+            # Floats are order-encoded to integers for faster comparison.
+            setters.append(
+                "void set%s(float V) { %s = OrderEncode(V); }" % (
+                    feature, feature))
+        elif f["kind"] == "ENUM":
+            setters.append(
+                "void set%s(unsigned V) { %s = 1 << V; }" % (feature, feature))
+        else:
+            raise ValueError("Unhandled feature type.", f["kind"])
+
+    # Class members represent all the features of the Example.
+    class_members = ["uint32_t %s = 0;" % f['name'] for f in features_json]
+
+    nline = "\n  "
+    guard = header_guard(filename)
+    return """#ifndef %s
+#define %s
+#include <cstdint>
+
+%s
+class %s {
+public:
+  %s
+
+private:
+  %s
+
+  // Produces an integer that sorts in the same order as F.
+  // That is: a < b <==> orderEncode(a) < orderEncode(b).
+  static uint32_t OrderEncode(float F);
+  friend float Evaluate(const %s&);
+};
+
+float Evaluate(const %s&);
+%s
+#endif // %s
+""" % (guard, guard, cpp_class.ns_begin(), cpp_class.name, nline.join(setters),
+        nline.join(class_members), cpp_class.name, cpp_class.name,
+        cpp_class.ns_end(), guard)
+
+
+def order_encode(v):
+    i = struct.unpack('<I', struct.pack('<f', v))[0]
+    TopBit = 1 << 31
+    # IEEE 754 floats compare like sign-magnitude integers.
+    if (i & TopBit):  # Negative float
+        return (1 << 32) - i  # low half of integers, order reversed.
+    return TopBit + i  # top half of integers
+
+
+def evaluate_func(forest_json, cpp_class):
+    """Generates code for `float Evaluate(const {Example}&)` function.
+    The generated function can be used to score an Example."""
+    code = "float Evaluate(const %s& E) {\n" % cpp_class.name
+    lines = []
+    lines.append("float Score = 0;")
+    tree_num = 0
+    for tree_json in forest_json:
+        lines.extend(tree(tree_json, tree_num=tree_num, node_num=0)[0])
+        lines.append("")
+        tree_num += 1
+
+    lines.append("t%s: // No such tree." % len(forest_json))
+    lines.append("return Score;")
+    code += "  " + "\n  ".join(lines)
+    code += "\n}"
+    return code
+
+
+def gen_cpp_code(forest_json, features_json, filename, cpp_class):
+    """Generates code for the .cpp file."""
+    # Headers
+    # Required by OrderEncode(float F).
+    angled_include = [
+        '#include <%s>' % h
+        for h in ["cstring", "limits"]
+    ]
+
+    # Include generated header.
+    qouted_headers = {filename + '.h', 'llvm/ADT/bit.h'}
+    # Headers required by ENUM features used by the model.
+    qouted_headers |= {f["header"]
+                       for f in features_json if f["kind"] == "ENUM"}
+    quoted_include = ['#include "%s"' % h for h in sorted(qouted_headers)]
+
+    # using-decl for ENUM features.
+    using_decls = "\n".join("using %s_type = %s;" % (
+                                feature['name'], feature['type'])
+                            for feature in features_json
+                            if feature["kind"] == "ENUM")
+    nl = "\n"
+    return """%s
+
+%s
+
+#define BIT(X) (1 << X)
+
+%s
+
+%s
+
+uint32_t %s::OrderEncode(float F) {
+  static_assert(std::numeric_limits<float>::is_iec559, "");
+  constexpr uint32_t TopBit = ~(~uint32_t{0} >> 1);
+
+  // Get the bits of the float. Endianness is the same as for integers.
+  uint32_t U = llvm::bit_cast<uint32_t>(F);
+  std::memcpy(&U, &F, sizeof(U));
+  // IEEE 754 floats compare like sign-magnitude integers.
+  if (U & TopBit)    // Negative float.
+    return 0 - U;    // Map onto the low half of integers, order reversed.
+  return U + TopBit; // Positive floats map onto the high half of integers.
+}
+
+%s
+%s
+""" % (nl.join(angled_include), nl.join(quoted_include), cpp_class.ns_begin(),
+       using_decls, cpp_class.name, evaluate_func(forest_json, cpp_class),
+       cpp_class.ns_end())
+
+
+def main():
+    parser = argparse.ArgumentParser('DecisionForestCodegen')
+    parser.add_argument('--filename', help='output file name.')
+    parser.add_argument('--output_dir', help='output directory.')
+    parser.add_argument('--model', help='path to model directory.')
+    parser.add_argument(
+        '--cpp_class',
+        help='The name of the class (which may be a namespace-qualified) created in generated header.'
+    )
+    ns = parser.parse_args()
+
+    output_dir = ns.output_dir
+    filename = ns.filename
+    header_file = "%s/%s.h" % (output_dir, filename)
+    cpp_file = "%s/%s.cpp" % (output_dir, filename)
+    cpp_class = CppClass(cpp_class=ns.cpp_class)
+
+    model_file = "%s/forest.json" % ns.model
+    features_file = "%s/features.json" % ns.model
+
+    with open(features_file) as f:
+        features_json = json.load(f)
+
+    with open(model_file) as m:
+        forest_json = json.load(m)
+
+    with open(cpp_file, 'w+t') as output_cc:
+        output_cc.write(
+            gen_cpp_code(forest_json=forest_json,
+                         features_json=features_json,
+                         filename=filename,
+                         cpp_class=cpp_class))
+
+    with open(header_file, 'w+t') as output_h:
+        output_h.write(gen_header_code(
+            features_json=features_json, cpp_class=cpp_class, filename=filename))
+
+
+if __name__ == '__main__':
+    main()

diff  --git a/clang-tools-extra/clangd/quality/README.md b/clang-tools-extra/clangd/quality/README.md
new file mode 100644
index 000000000000..36fa37320e54
--- /dev/null
+++ b/clang-tools-extra/clangd/quality/README.md
@@ -0,0 +1,220 @@
+# Decision Forest Code Completion Model
+
+## Decision Forest
+A **decision forest** is a collection of many decision trees. A **decision tree** is a full binary tree that provides a quality prediction for an input (code completion item). Internal nodes represent a **binary decision** based on the input data, and leaf nodes represent a prediction.
+
+In order to predict the relevance of a code completion item, we traverse each of the decision trees beginning with their roots until we reach a leaf. 
+
+An input (code completion candidate) is characterized as a set of **features**, such as the *type of symbol* or the *number of existing references*.
+
+At every non-leaf node, we evaluate the condition to decide whether to go left or right. The condition compares one *feature** of the input against a constant. The condition can be of two types:
+- **if_greater**: Checks whether a numerical feature is **>=** a **threshold**.
+- **if_member**: Check whether the **enum** feature is contained in the **set** defined in the node.
+
+A leaf node contains the value **score**.
+To compute an overall **quality** score, we traverse each tree in this way and add up the scores.
+
+## Model Input Format
+The input model is represented in json format.
+
+### Features
+The file **features.json** defines the features available to the model. 
+It is a json list of features. The features can be of following two kinds.
+
+#### Number
+```
+{
+  "name": "a_numerical_feature",
+  "kind": "NUMBER"
+}
+```
+#### Enum
+```
+{
+  "name": "an_enum_feature",
+  "kind": "ENUM",
+  "enum": "fully::qualified::enum",
+  "header": "path/to/HeaderDeclaringEnum.h"
+}
+```
+The field `enum` specifies the fully qualified name of the enum.
+The maximum cardinality of the enum can be **32**.
+
+The field `header` specifies the header containing the declaration of the enum.
+This header is included by the inference runtime.
+
+
+### Decision Forest
+The file `forest.json` defines the  decision forest. It is a json list of **DecisionTree**.
+
+**DecisionTree** is one of **IfGreaterNode**, **IfMemberNode**, **LeafNode**.
+#### IfGreaterNode
+```
+{
+  "operation": "if_greater",
+  "feature": "a_numerical_feature",
+  "threshold": A real number,
+  "then": {A DecisionTree},
+  "else": {A DecisionTree}
+}
+```
+#### IfMemberNode
+```
+{
+  "operation": "if_member",
+  "feature": "an_enum_feature",
+  "set": ["enum_value1", "enum_value2", ...],
+  "then": {A DecisionTree},
+  "else": {A DecisionTree}
+}
+```
+#### LeafNode
+```
+{
+  "operation": "boost",
+  "score": A real number
+}
+```
+
+## Code Generator for Inference
+The implementation of inference runtime is split across:
+
+### Code generator
+The code generator `CompletionModelCodegen.py` takes input the `${model}` dir and generates the inference library: 
+- `${output_dir}/{filename}.h`
+- `${output_dir}/{filename}.cpp`
+
+Invocation
+```
+python3 CompletionModelCodegen.py \
+        --model path/to/model/dir \
+        --output_dir path/to/output/dir \
+        --filename OutputFileName \
+        --cpp_class clang::clangd::YourExampleClass
+```
+### Build System
+`CompletionModel.cmake` provides `gen_decision_forest` method . 
+Client intending to use the CompletionModel for inference can use this to trigger the code generator and generate the inference library.
+It can then use the generated API by including and depending on this library.
+
+### Generated API for inference
+The code generator defines the Example `class` inside relevant namespaces as specified in option `${cpp_class}`.
+
+Members of this generated class comprises of all the features mentioned in `features.json`. 
+Thus this class can represent a code completion candidate that needs to be scored.
+
+The API also provides `float Evaluate(const MyClass&)` which can be used to score the completion candidate.
+
+
+## Example
+### model/features.json
+```
+[
+  {
+    "name": "ANumber",
+    "type": "NUMBER"
+  },
+  {
+    "name": "AFloat",
+    "type": "NUMBER"
+  },
+  {
+    "name": "ACategorical",
+    "type": "ENUM",
+    "enum": "ns1::ns2::TestEnum",
+    "header": "model/CategoricalFeature.h"
+  }
+]
+```
+### model/forest.json
+```
+[
+  {
+    "operation": "if_greater",
+    "feature": "ANumber",
+    "threshold": 200.0,
+    "then": {
+      "operation": "if_greater",
+      "feature": "AFloat",
+      "threshold": -1,
+      "then": {
+        "operation": "boost",
+        "score": 10.0
+      },
+      "else": {
+        "operation": "boost",
+        "score": -20.0
+      }
+    },
+    "else": {
+      "operation": "if_member",
+      "feature": "ACategorical",
+      "set": [
+        "A",
+        "C"
+      ],
+      "then": {
+        "operation": "boost",
+        "score": 3.0
+      },
+      "else": {
+        "operation": "boost",
+        "score": -4.0
+      }
+    }
+  },
+  {
+    "operation": "if_member",
+    "feature": "ACategorical",
+    "set": [
+      "A",
+      "B"
+    ],
+    "then": {
+      "operation": "boost",
+      "score": 5.0
+    },
+    "else": {
+      "operation": "boost",
+      "score": -6.0
+    }
+  }
+]
+```
+### DecisionForestRuntime.h
+```
+...
+namespace ns1 {
+namespace ns2 {
+namespace test {
+class Example {
+public:
+  void setANumber(float V) { ... }
+  void setAFloat(float V) { ... }
+  void setACategorical(unsigned V) { ... }
+
+private:
+  ...
+};
+
+float Evaluate(const Example&);
+} // namespace test
+} // namespace ns2
+} // namespace ns1
+```
+
+### CMake Invocation
+Inorder to use the inference runtime, one can use `gen_decision_forest` function 
+described in `CompletionModel.cmake` which invokes `CodeCompletionCodegen.py` with the appropriate arguments.
+
+For example, the following invocation reads the model present in `path/to/model` and creates 
+`${CMAKE_CURRENT_BINARY_DIR}/myfilename.h` and `${CMAKE_CURRENT_BINARY_DIR}/myfilename.cpp` 
+describing a `class` named `MyClass` in namespace `fully::qualified`.
+
+
+
+```
+gen_decision_forest(path/to/model
+  myfilename
+  ::fully::qualifed::MyClass)
+```
\ No newline at end of file

diff  --git a/clang-tools-extra/clangd/quality/model/features.json b/clang-tools-extra/clangd/quality/model/features.json
new file mode 100644
index 000000000000..e91eccd1ce20
--- /dev/null
+++ b/clang-tools-extra/clangd/quality/model/features.json
@@ -0,0 +1,8 @@
+[
+    {
+        "name": "ContextKind",
+        "kind": "ENUM",
+        "type": "clang::CodeCompletionContext::Kind",
+        "header": "clang/Sema/CodeCompleteConsumer.h"
+    }
+]
\ No newline at end of file

diff  --git a/clang-tools-extra/clangd/quality/model/forest.json b/clang-tools-extra/clangd/quality/model/forest.json
new file mode 100644
index 000000000000..78a1524e2d81
--- /dev/null
+++ b/clang-tools-extra/clangd/quality/model/forest.json
@@ -0,0 +1,18 @@
+[
+    {
+        "operation": "if_member",
+        "feature": "ContextKind",
+        "set": [
+            "CCC_DotMemberAccess",
+            "CCC_ArrowMemberAccess"
+        ],
+        "then": {
+            "operation": "boost",
+            "score": 3.0
+        },
+        "else": {
+            "operation": "boost",
+            "score": 1.0
+        }
+    }
+]
\ No newline at end of file

diff  --git a/clang-tools-extra/clangd/unittests/CMakeLists.txt b/clang-tools-extra/clangd/unittests/CMakeLists.txt
index 2167b5e210e2..a84fd0b71ca5 100644
--- a/clang-tools-extra/clangd/unittests/CMakeLists.txt
+++ b/clang-tools-extra/clangd/unittests/CMakeLists.txt
@@ -28,6 +28,9 @@ if (CLANGD_ENABLE_REMOTE)
   set(REMOTE_TEST_SOURCES remote/MarshallingTests.cpp)
 endif()
 
+include(${CMAKE_CURRENT_SOURCE_DIR}/../quality/CompletionModel.cmake)
+gen_decision_forest(${CMAKE_CURRENT_SOURCE_DIR}/decision_forest_model DecisionForestRuntimeTest ::ns1::ns2::test::Example)
+
 add_custom_target(ClangdUnitTests)
 add_unittest(ClangdUnitTests ClangdTests
   Annotations.cpp
@@ -44,6 +47,7 @@ add_unittest(ClangdUnitTests ClangdTests
   ConfigCompileTests.cpp
   ConfigProviderTests.cpp
   ConfigYAMLTests.cpp
+  DecisionForestTests.cpp
   DexTests.cpp
   DiagnosticsTests.cpp
   DraftStoreTests.cpp
@@ -89,6 +93,7 @@ add_unittest(ClangdUnitTests ClangdTests
   TweakTesting.cpp
   URITests.cpp
   XRefsTests.cpp
+  ${CMAKE_CURRENT_BINARY_DIR}/DecisionForestRuntimeTest.cpp
 
   support/CancellationTests.cpp
   support/ContextTests.cpp
@@ -103,6 +108,11 @@ add_unittest(ClangdUnitTests ClangdTests
   $<TARGET_OBJECTS:obj.clangDaemonTweaks>
   )
 
+# Include generated ComletionModel headers.
+target_include_directories(ClangdTests PUBLIC
+  $<BUILD_INTERFACE:${CMAKE_CURRENT_BINARY_DIR}>
+)
+
 clang_target_link_libraries(ClangdTests
   PRIVATE
   clangAST

diff  --git a/clang-tools-extra/clangd/unittests/CodeCompleteTests.cpp b/clang-tools-extra/clangd/unittests/CodeCompleteTests.cpp
index 635e036039a0..460976d64f9f 100644
--- a/clang-tools-extra/clangd/unittests/CodeCompleteTests.cpp
+++ b/clang-tools-extra/clangd/unittests/CodeCompleteTests.cpp
@@ -10,6 +10,7 @@
 #include "ClangdServer.h"
 #include "CodeComplete.h"
 #include "Compiler.h"
+#include "CompletionModel.h"
 #include "Matchers.h"
 #include "Protocol.h"
 #include "Quality.h"
@@ -47,6 +48,7 @@ using ::testing::HasSubstr;
 using ::testing::IsEmpty;
 using ::testing::Not;
 using ::testing::UnorderedElementsAre;
+using ContextKind = CodeCompletionContext::Kind;
 
 // GMock helpers for matching completion items.
 MATCHER_P(Named, Name, "") { return arg.Name == Name; }
@@ -161,6 +163,16 @@ Symbol withReferences(int N, Symbol S) {
   return S;
 }
 
+TEST(DecisionForestRuntime, SanityTest) {
+  using Example = clangd::Example;
+  using clangd::Evaluate;
+  Example E1;
+  E1.setContextKind(ContextKind::CCC_ArrowMemberAccess);
+  Example E2;
+  E2.setContextKind(ContextKind::CCC_SymbolOrNewName);
+  EXPECT_GT(Evaluate(E1), Evaluate(E2));
+}
+
 TEST(CompletionTest, Limit) {
   clangd::CodeCompleteOptions Opts;
   Opts.Limit = 2;

diff  --git a/clang-tools-extra/clangd/unittests/DecisionForestTests.cpp b/clang-tools-extra/clangd/unittests/DecisionForestTests.cpp
new file mode 100644
index 000000000000..d29c8a4a0358
--- /dev/null
+++ b/clang-tools-extra/clangd/unittests/DecisionForestTests.cpp
@@ -0,0 +1,29 @@
+#include "DecisionForestRuntimeTest.h"
+#include "decision_forest_model/CategoricalFeature.h"
+#include "gtest/gtest.h"
+
+namespace clang {
+namespace clangd {
+
+TEST(DecisionForestRuntime, Evaluate) {
+  using Example = ::ns1::ns2::test::Example;
+  using Cat = ::ns1::ns2::TestEnum;
+  using ::ns1::ns2::test::Evaluate;
+
+  Example E;
+  E.setANumber(200);         // True
+  E.setAFloat(0);            // True: +10.0
+  E.setACategorical(Cat::A); // True: +5.0
+  EXPECT_EQ(Evaluate(E), 15.0);
+
+  E.setANumber(200);         // True
+  E.setAFloat(-2.5);         // False: -20.0
+  E.setACategorical(Cat::B); // True: +5.0
+  EXPECT_EQ(Evaluate(E), -15.0);
+
+  E.setANumber(100);         // False
+  E.setACategorical(Cat::C); // True: +3.0, False: -6.0
+  EXPECT_EQ(Evaluate(E), -3.0);
+}
+} // namespace clangd
+} // namespace clang

diff  --git a/clang-tools-extra/clangd/unittests/decision_forest_model/CategoricalFeature.h b/clang-tools-extra/clangd/unittests/decision_forest_model/CategoricalFeature.h
new file mode 100644
index 000000000000..dfb6ab3b199d
--- /dev/null
+++ b/clang-tools-extra/clangd/unittests/decision_forest_model/CategoricalFeature.h
@@ -0,0 +1,5 @@
+namespace ns1 {
+namespace ns2 {
+enum TestEnum { A, B, C, D };
+} // namespace ns2
+} // namespace ns1

diff  --git a/clang-tools-extra/clangd/unittests/decision_forest_model/features.json b/clang-tools-extra/clangd/unittests/decision_forest_model/features.json
new file mode 100644
index 000000000000..7f159f192e19
--- /dev/null
+++ b/clang-tools-extra/clangd/unittests/decision_forest_model/features.json
@@ -0,0 +1,16 @@
+[
+    {
+        "name": "ANumber",
+        "kind": "NUMBER"
+    },
+    {
+        "name": "AFloat",
+        "kind": "NUMBER"
+    },
+    {
+        "name": "ACategorical",
+        "kind": "ENUM",
+        "type": "ns1::ns2::TestEnum",
+        "header": "decision_forest_model/CategoricalFeature.h"
+    }
+]
\ No newline at end of file

diff  --git a/clang-tools-extra/clangd/unittests/decision_forest_model/forest.json b/clang-tools-extra/clangd/unittests/decision_forest_model/forest.json
new file mode 100644
index 000000000000..26f071da485d
--- /dev/null
+++ b/clang-tools-extra/clangd/unittests/decision_forest_model/forest.json
@@ -0,0 +1,52 @@
+[
+    {
+        "operation": "if_greater",
+        "feature": "ANumber",
+        "threshold": 200.0,
+        "then": {
+            "operation": "if_greater",
+            "feature": "AFloat",
+            "threshold": -1,
+            "then": {
+                "operation": "boost",
+                "score": 10.0
+            },
+            "else": {
+                "operation": "boost",
+                "score": -20.0
+            }
+        },
+        "else": {
+            "operation": "if_member",
+            "feature": "ACategorical",
+            "set": [
+                "A",
+                "C"
+            ],
+            "then": {
+                "operation": "boost",
+                "score": 3.0
+            },
+            "else": {
+                "operation": "boost",
+                "score": -4.0
+            }
+        }
+    },
+    {
+        "operation": "if_member",
+        "feature": "ACategorical",
+        "set": [
+            "A",
+            "B"
+        ],
+        "then": {
+            "operation": "boost",
+            "score": 5.0
+        },
+        "else": {
+            "operation": "boost",
+            "score": -6.0
+        }
+    }
+]
\ No newline at end of file


        


More information about the cfe-commits mailing list