[clang-tools-extra] 45698ac - [clangd] Split DecisionForest Evaluate() into one func per tree.
Utkarsh Saxena via cfe-commits
cfe-commits at lists.llvm.org
Thu Oct 1 09:12:47 PDT 2020
Author: Utkarsh Saxena
Date: 2020-10-01T18:07:23+02:00
New Revision: 45698ac0052ae5b1c5beb739636396a5b7263966
URL: https://github.com/llvm/llvm-project/commit/45698ac0052ae5b1c5beb739636396a5b7263966
DIFF: https://github.com/llvm/llvm-project/commit/45698ac0052ae5b1c5beb739636396a5b7263966.diff
LOG: [clangd] Split DecisionForest Evaluate() into one func per tree.
This allows us MSAN to instrument this function. Previous version is not
instrumentable due to it shear volume.
Differential Revision: https://reviews.llvm.org/D88536
Added:
Modified:
clang-tools-extra/clangd/quality/CompletionModelCodegen.py
Removed:
################################################################################
diff --git a/clang-tools-extra/clangd/quality/CompletionModelCodegen.py b/clang-tools-extra/clangd/quality/CompletionModelCodegen.py
index 423e5d14cf52..a1f0cb78037a 100644
--- a/clang-tools-extra/clangd/quality/CompletionModelCodegen.py
+++ b/clang-tools-extra/clangd/quality/CompletionModelCodegen.py
@@ -1,7 +1,7 @@
"""Code generator for Code Completion Model Inference.
Tool runs on the Decision Forest model defined in {model} directory.
-It generates two files: {output_dir}/{filename}.h and {output_dir}/{filename}.cpp
+It generates two files: {output_dir}/{filename}.h and {output_dir}/{filename}.cpp
The generated files defines the Example class named {cpp_class} having all the features as class members.
The generated runtime provides an `Evaluate` function which can be used to score a code completion candidate.
"""
@@ -39,34 +39,32 @@ def header_guard(filename):
def boost_node(n, label, next_label):
- """Returns code snippet for a leaf/boost node.
- Adds value of leaf to the score and jumps to the root of the next tree."""
- return "%s: Score += %s; goto %s;" % (
- label, n['score'], next_label)
+ """Returns code snippet for a leaf/boost node."""
+ return "%s: return %s;" % (label, n['score'])
def if_greater_node(n, label, next_label):
"""Returns code snippet for a if_greater node.
- Jumps to true_label if the Example feature (NUMBER) is greater than the threshold.
- Comparing integers is much faster than comparing floats. Assuming floating points
+ Jumps to true_label if the Example feature (NUMBER) is greater than the threshold.
+ Comparing integers is much faster than comparing floats. Assuming floating points
are represented as IEEE 754, it order-encodes the floats to integers before comparing them.
Control falls through if condition is evaluated to false."""
threshold = n["threshold"]
- return "%s: if (E.%s >= %s /*%s*/) goto %s;" % (
- label, n['feature'], order_encode(threshold), threshold, next_label)
+ return "%s: if (E.get%s() >= %s /*%s*/) goto %s;" % (
+ label, n['feature'], order_encode(threshold), threshold, next_label)
def if_member_node(n, label, next_label):
"""Returns code snippet for a if_member node.
- Jumps to true_label if the Example feature (ENUM) is present in the set of enum values
+ Jumps to true_label if the Example feature (ENUM) is present in the set of enum values
described in the node.
Control falls through if condition is evaluated to false."""
members = '|'.join([
"BIT(%s_type::%s)" % (n['feature'], member)
for member in n["set"]
])
- return "%s: if (E.%s & (%s)) goto %s;" % (
- label, n['feature'], members, next_label)
+ return "%s: if (E.get%s() & (%s)) goto %s;" % (
+ label, n['feature'], members, next_label)
def node(n, label, next_label):
@@ -94,8 +92,6 @@ def tree(t, tree_num, node_num):
"""
label = "t%d_n%d" % (tree_num, node_num)
code = []
- if node_num == 0:
- code.append("t%d:" % tree_num)
if t["operation"] == "boost":
code.append(node(t, label=label, next_label="t%d" % (tree_num + 1)))
@@ -119,13 +115,15 @@ def gen_header_code(features_json, cpp_class, filename):
"""Returns code for header declaring the inference runtime.
Declares the Example class named {cpp_class} inside relevant namespaces.
- The Example class contains all the features as class members. This
+ The Example class contains all the features as class members. This
class can be used to represent a code completion candidate.
Provides `float Evaluate()` function which can be used to score the Example.
"""
setters = []
+ getters = []
for f in features_json:
feature = f["name"]
+
if f["kind"] == "NUMBER":
# Floats are order-encoded to integers for faster comparison.
setters.append(
@@ -138,8 +136,15 @@ class can be used to represent a code completion candidate.
raise ValueError("Unhandled feature type.", f["kind"])
# Class members represent all the features of the Example.
- class_members = ["uint32_t %s = 0;" % f['name'] for f in features_json]
-
+ class_members = [
+ "uint32_t %s = 0;" % f['name']
+ for f in features_json
+ ]
+ getters = [
+ "LLVM_ATTRIBUTE_ALWAYS_INLINE uint32_t get%s() const { return %s; }"
+ % (f['name'], f['name'])
+ for f in features_json
+ ]
nline = "\n "
guard = header_guard(filename)
return """#ifndef %s
@@ -150,6 +155,10 @@ class can be used to represent a code completion candidate.
%s
class %s {
public:
+ // Setters.
+ %s
+
+ // Getters.
%s
private:
@@ -158,18 +167,16 @@ class %s {
// Produces an integer that sorts in the same order as F.
// That is: a < b <==> orderEncode(a) < orderEncode(b).
static uint32_t OrderEncode(float F);
- friend float Evaluate(const %s&);
};
-// The function may have large number of lines of code. MSAN
-// build times out in such case.
-LLVM_NO_SANITIZE("memory")
float Evaluate(const %s&);
%s
#endif // %s
-""" % (guard, guard, cpp_class.ns_begin(), cpp_class.name, nline.join(setters),
- nline.join(class_members), cpp_class.name, cpp_class.name,
- cpp_class.ns_end(), guard)
+""" % (guard, guard, cpp_class.ns_begin(), cpp_class.name,
+ nline.join(setters),
+ nline.join(getters),
+ nline.join(class_members),
+ cpp_class.name, cpp_class.ns_end(), guard)
def order_encode(v):
@@ -182,21 +189,33 @@ def order_encode(v):
def evaluate_func(forest_json, cpp_class):
- """Generates code for `float Evaluate(const {Example}&)` function.
- The generated function can be used to score an Example."""
- code = "float Evaluate(const %s& E) {\n" % cpp_class.name
- lines = []
- lines.append("float Score = 0;")
+ """Generates evaluation functions for each tree and combines them in
+ `float Evaluate(const {Example}&)` function. This function can be
+ used to score an Example."""
+
+ code = ""
+
+ # Generate evaluation function of each tree.
+ code += "namespace {\n"
tree_num = 0
for tree_json in forest_json:
- lines.extend(tree(tree_json, tree_num=tree_num, node_num=0)[0])
- lines.append("")
+ code += "LLVM_ATTRIBUTE_NOINLINE float EvaluateTree%d(const %s& E) {\n" % (tree_num, cpp_class.name)
+ code += " " + \
+ "\n ".join(
+ tree(tree_json, tree_num=tree_num, node_num=0)[0]) + "\n"
+ code += "}\n\n"
tree_num += 1
+ code += "} // namespace\n\n"
+
+ # Combine the scores of all trees in the final function.
+ # MSAN will timeout if these functions are inlined.
+ code += "float Evaluate(const %s& E) {\n" % cpp_class.name
+ code += " float Score = 0;\n"
+ for tree_num in range(len(forest_json)):
+ code += " Score += EvaluateTree%d(E);\n" % tree_num
+ code += " return Score;\n"
+ code += "}\n"
- lines.append("t%s: // No such tree." % len(forest_json))
- lines.append("return Score;")
- code += " " + "\n ".join(lines)
- code += "\n}"
return code
@@ -218,9 +237,9 @@ def gen_cpp_code(forest_json, features_json, filename, cpp_class):
# using-decl for ENUM features.
using_decls = "\n".join("using %s_type = %s;" % (
- feature['name'], feature['type'])
- for feature in features_json
- if feature["kind"] == "ENUM")
+ feature['name'], feature['type'])
+ for feature in features_json
+ if feature["kind"] == "ENUM")
nl = "\n"
return """%s
@@ -287,7 +306,9 @@ def main():
with open(header_file, 'w+t') as output_h:
output_h.write(gen_header_code(
- features_json=features_json, cpp_class=cpp_class, filename=filename))
+ features_json=features_json,
+ cpp_class=cpp_class,
+ filename=filename))
if __name__ == '__main__':
More information about the cfe-commits
mailing list