[clang-tools-extra] e9fb150 - [clangd] Config: Fragments and parsing from YAML

Sam McCall via cfe-commits cfe-commits at lists.llvm.org
Thu Jun 25 14:02:40 PDT 2020


Author: Sam McCall
Date: 2020-06-25T22:55:45+02:00
New Revision: e9fb1506b83d001082dc535af12acf45050a527c

URL: https://github.com/llvm/llvm-project/commit/e9fb1506b83d001082dc535af12acf45050a527c
DIFF: https://github.com/llvm/llvm-project/commit/e9fb1506b83d001082dc535af12acf45050a527c.diff

LOG: [clangd] Config: Fragments and parsing from YAML

Summary:
This is a piece from the design in https://tinyurl.com/clangd-config
https://reviews.llvm.org/D82335 is a draft with all the bits linked together.
It doesn't do anything yet, it's just a library + tests.

It deliberately implements a minimal set of actual configuration options.

Reviewers: kadircet, adamcz

Subscribers: mgorny, ilya-biryukov, MaskRay, jkorous, arphaman, usaxena95, cfe-commits

Tags: #clang

Differential Revision: https://reviews.llvm.org/D82386

Added: 
    clang-tools-extra/clangd/ConfigFragment.h
    clang-tools-extra/clangd/ConfigYAML.cpp
    clang-tools-extra/clangd/unittests/ConfigYAMLTests.cpp

Modified: 
    clang-tools-extra/clangd/CMakeLists.txt
    clang-tools-extra/clangd/unittests/CMakeLists.txt

Removed: 
    


################################################################################
diff  --git a/clang-tools-extra/clangd/CMakeLists.txt b/clang-tools-extra/clangd/CMakeLists.txt
index 3ff609a86da9..de3dd81a81db 100644
--- a/clang-tools-extra/clangd/CMakeLists.txt
+++ b/clang-tools-extra/clangd/CMakeLists.txt
@@ -36,6 +36,7 @@ add_clang_library(clangDaemon
   CollectMacros.cpp
   CompileCommands.cpp
   Compiler.cpp
+  ConfigYAML.cpp
   Diagnostics.cpp
   DraftStore.cpp
   ExpectedTypes.cpp

diff  --git a/clang-tools-extra/clangd/ConfigFragment.h b/clang-tools-extra/clangd/ConfigFragment.h
new file mode 100644
index 000000000000..2cb646757e17
--- /dev/null
+++ b/clang-tools-extra/clangd/ConfigFragment.h
@@ -0,0 +1,118 @@
+//===--- ConfigFragment.h - Unit of user-specified configuration -*- C++-*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// Various clangd features have configurable behaviour (or can be disabled).
+// The configuration system allows users to control this:
+//  - in a user config file, a project config file, via LSP, or via flags
+//  - specifying 
diff erent settings for 
diff erent files
+//
+// This file defines the config::Fragment structure which models one piece of
+// configuration as obtained from a source like a file.
+//
+// This is distinct from how the config is interpreted (CompiledFragment),
+// combined (Provider) and exposed to the rest of clangd (Config).
+//
+//===----------------------------------------------------------------------===//
+//
+// To add a new configuration option, you must:
+//  - add its syntactic form to Fragment
+//  - update ConfigYAML.cpp to parse it
+//  - add its semantic form to Config (in Config.h)
+//  - update ConfigCompile.cpp to map Fragment -> Config
+//  - make use of the option inside clangd
+//  - document the new option (config.md in the llvm/clangd-www repository)
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANGD_CONFIGFRAGMENT_H
+#define LLVM_CLANG_TOOLS_EXTRA_CLANGD_CONFIGFRAGMENT_H
+
+#include "llvm/ADT/Optional.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/Support/Error.h"
+#include "llvm/Support/SMLoc.h"
+#include "llvm/Support/SourceMgr.h"
+#include <string>
+#include <vector>
+
+namespace clang {
+namespace clangd {
+namespace config {
+
+/// An entity written in config along, with its optional location in the file.
+template <typename T> struct Located {
+  Located(T Value, llvm::SMRange Range = {})
+      : Range(Range), Value(std::move(Value)) {}
+
+  llvm::SMRange Range;
+  T &operator->() { return Value; }
+  const T &operator->() const { return Value; }
+  T &operator*() { return Value; }
+  const T &operator*() const { return Value; }
+
+private:
+  T Value;
+};
+
+/// Used to report problems in parsing or interpreting a config.
+/// Errors reflect structurally invalid config that should be user-visible.
+/// Warnings reflect e.g. unknown properties that are recoverable.
+using DiagnosticCallback = llvm::function_ref<void(const llvm::SMDiagnostic &)>;
+
+/// A chunk of configuration obtained from a config file, LSP, or elsewhere.
+struct Fragment {
+  /// Parses fragments from a YAML file (one from each --- delimited document).
+  /// Documents that contained fatal errors are omitted from the results.
+  /// BufferName is used for the SourceMgr and diagnostics.
+  static std::vector<Fragment> parseYAML(llvm::StringRef YAML,
+                                         llvm::StringRef BufferName,
+                                         DiagnosticCallback);
+
+  /// These fields are not part of the user-specified configuration, but
+  /// instead are populated by the parser to describe the configuration source.
+  struct SourceInfo {
+    /// Retains a buffer of the original source this fragment was parsed from.
+    /// Locations within Located<T> objects point into this SourceMgr.
+    /// Shared because multiple fragments are often parsed from one (YAML) file.
+    /// May be null, then all locations should be ignored.
+    std::shared_ptr<llvm::SourceMgr> Manager;
+    /// The start of the original source for this fragment.
+    /// Only valid if SourceManager is set.
+    llvm::SMLoc Location;
+  };
+  SourceInfo Source;
+
+  /// Conditions restrict when a Fragment applies.
+  ///
+  /// Each separate condition must match (combined with AND).
+  /// When one condition has multiple values, any may match (combined with OR).
+  ///
+  /// Conditions based on a file's path use the following form:
+  /// - if the fragment came from a project directory, the path is relative
+  /// - if the fragment is global (e.g. user config), the path is absolute
+  /// - paths always use forward-slashes (UNIX-style)
+  /// If no file is being processed, these conditions will not match.
+  struct ConditionBlock {
+    /// The file being processed must fully match a regular expression.
+    std::vector<Located<std::string>> PathMatch;
+    /// An unrecognized key was found while parsing the condition.
+    /// The condition will evaluate to false.
+    bool HasUnrecognizedCondition = false;
+  };
+  ConditionBlock Condition;
+
+  struct CompileFlagsBlock {
+    std::vector<Located<std::string>> Add;
+  } CompileFlags;
+};
+
+} // namespace config
+} // namespace clangd
+} // namespace clang
+
+#endif

diff  --git a/clang-tools-extra/clangd/ConfigYAML.cpp b/clang-tools-extra/clangd/ConfigYAML.cpp
new file mode 100644
index 000000000000..ee3331df8694
--- /dev/null
+++ b/clang-tools-extra/clangd/ConfigYAML.cpp
@@ -0,0 +1,213 @@
+//===--- ConfigYAML.cpp - Loading configuration fragments from YAML files -===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "ConfigFragment.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/Support/MemoryBuffer.h"
+#include "llvm/Support/SourceMgr.h"
+#include "llvm/Support/YAMLParser.h"
+#include <system_error>
+
+namespace clang {
+namespace clangd {
+namespace config {
+namespace {
+using llvm::yaml::BlockScalarNode;
+using llvm::yaml::MappingNode;
+using llvm::yaml::Node;
+using llvm::yaml::ScalarNode;
+using llvm::yaml::SequenceNode;
+
+class Parser {
+  llvm::SourceMgr &SM;
+
+public:
+  Parser(llvm::SourceMgr &SM) : SM(SM) {}
+
+  // Tries to parse N into F, returning false if it failed and we couldn't
+  // meaningfully recover (e.g. YAML syntax error broke the stream).
+  // The private parse() helpers follow the same pattern.
+  bool parse(Fragment &F, Node &N) {
+    DictParser Dict("Config", this);
+    Dict.handle("If", [&](Node &N) { return parse(F.Condition, N); });
+    Dict.handle("CompileFlags",
+                [&](Node &N) { return parse(F.CompileFlags, N); });
+    return Dict.parse(N);
+  }
+
+private:
+  bool parse(Fragment::ConditionBlock &F, Node &N) {
+    DictParser Dict("Condition", this);
+    Dict.unrecognized(
+        [&](llvm::StringRef) { F.HasUnrecognizedCondition = true; });
+    Dict.handle("PathMatch", [&](Node &N) {
+      if (auto Values = scalarValues(N))
+        F.PathMatch = std::move(*Values);
+      return !N.failed();
+    });
+    return Dict.parse(N);
+  }
+
+  bool parse(Fragment::CompileFlagsBlock &F, Node &N) {
+    DictParser Dict("CompileFlags", this);
+    Dict.handle("Add", [&](Node &N) {
+      if (auto Values = scalarValues(N))
+        F.Add = std::move(*Values);
+      return !N.failed();
+    });
+    return Dict.parse(N);
+  }
+
+  // Helper for parsing mapping nodes (dictionaries).
+  // We don't use YamlIO as we want to control over unknown keys.
+  class DictParser {
+    llvm::StringRef Description;
+    std::vector<std::pair<llvm::StringRef, std::function<bool(Node &)>>> Keys;
+    std::function<void(llvm::StringRef)> Unknown;
+    Parser *Outer;
+
+  public:
+    DictParser(llvm::StringRef Description, Parser *Outer)
+        : Description(Description), Outer(Outer) {}
+
+    // Parse is called when Key is encountered, and passed the associated value.
+    // It should emit diagnostics if the value is invalid (e.g. wrong type).
+    // If Key is seen twice, Parse runs only once and an error is reported.
+    void handle(llvm::StringLiteral Key, std::function<bool(Node &)> Parse) {
+      for (const auto &Entry : Keys)
+        assert(Entry.first != Key && "duplicate key handler");
+      Keys.emplace_back(Key, std::move(Parse));
+    }
+
+    // Fallback is called when a Key is not matched by any handle().
+    // A warning is also automatically emitted.
+    void unrecognized(std::function<void(llvm::StringRef)> Fallback) {
+      Unknown = std::move(Fallback);
+    }
+
+    // Process a mapping node and call handlers for each key/value pair.
+    bool parse(Node &N) const {
+      if (N.getType() != Node::NK_Mapping) {
+        Outer->error(Description + " should be a dictionary", N);
+        return false;
+      }
+      llvm::SmallSet<std::string, 8> Seen;
+      for (auto &KV : llvm::cast<MappingNode>(N)) {
+        auto *K = KV.getKey();
+        if (!K) // YAMLParser emitted an error.
+          return false;
+        auto Key = Outer->scalarValue(*K, "Dictionary key");
+        if (!Key)
+          continue;
+        if (!Seen.insert(**Key).second) {
+          Outer->warning("Duplicate key " + **Key + " is ignored", *K);
+          continue;
+        }
+        auto *Value = KV.getValue();
+        if (!Value) // YAMLParser emitted an error.
+          return false;
+        bool Matched = false;
+        for (const auto &Handler : Keys) {
+          if (Handler.first == **Key) {
+            if (!Handler.second(*Value))
+              return false;
+            Matched = true;
+            break;
+          }
+        }
+        if (!Matched) {
+          Outer->warning("Unknown " + Description + " key " + **Key, *K);
+          if (Unknown)
+            Unknown(**Key);
+        }
+      }
+      return true;
+    }
+  };
+
+  // Try to parse a single scalar value from the node, warn on failure.
+  llvm::Optional<Located<std::string>> scalarValue(Node &N,
+                                                   llvm::StringRef Desc) {
+    llvm::SmallString<256> Buf;
+    if (auto *S = llvm::dyn_cast<ScalarNode>(&N))
+      return Located<std::string>(S->getValue(Buf).str(), N.getSourceRange());
+    if (auto *BS = llvm::dyn_cast<BlockScalarNode>(&N))
+      return Located<std::string>(BS->getValue().str(), N.getSourceRange());
+    warning(Desc + " should be scalar", N);
+    return llvm::None;
+  }
+
+  // Try to parse a list of single scalar values, or just a single value.
+  llvm::Optional<std::vector<Located<std::string>>> scalarValues(Node &N) {
+    std::vector<Located<std::string>> Result;
+    if (auto *S = llvm::dyn_cast<ScalarNode>(&N)) {
+      llvm::SmallString<256> Buf;
+      Result.emplace_back(S->getValue(Buf).str(), N.getSourceRange());
+    } else if (auto *S = llvm::dyn_cast<BlockScalarNode>(&N)) {
+      Result.emplace_back(S->getValue().str(), N.getSourceRange());
+    } else if (auto *S = llvm::dyn_cast<SequenceNode>(&N)) {
+      for (auto &Child : *S) {
+        if (auto Value = scalarValue(Child, "List item"))
+          Result.push_back(std::move(*Value));
+      }
+    } else {
+      warning("Expected scalar or list of scalars", N);
+      return llvm::None;
+    }
+    return Result;
+  }
+
+  // Report a "hard" error, reflecting a config file that can never be valid.
+  void error(const llvm::Twine &Msg, const Node &N) {
+    SM.PrintMessage(N.getSourceRange().Start, llvm::SourceMgr::DK_Error, Msg,
+                    N.getSourceRange());
+  }
+
+  // Report a "soft" error that could be caused by e.g. version skew.
+  void warning(const llvm::Twine &Msg, const Node &N) {
+    SM.PrintMessage(N.getSourceRange().Start, llvm::SourceMgr::DK_Warning, Msg,
+                    N.getSourceRange());
+  }
+};
+
+} // namespace
+
+std::vector<Fragment> Fragment::parseYAML(llvm::StringRef YAML,
+                                          llvm::StringRef BufferName,
+                                          DiagnosticCallback Diags) {
+  // The YAML document may contain multiple conditional fragments.
+  // The SourceManager is shared for all of them.
+  auto SM = std::make_shared<llvm::SourceMgr>();
+  auto Buf = llvm::MemoryBuffer::getMemBufferCopy(YAML, BufferName);
+  // Adapt DiagnosticCallback to function-pointer interface.
+  // Callback receives both errors we emit and those from the YAML parser.
+  SM->setDiagHandler(
+      [](const llvm::SMDiagnostic &Diag, void *Ctx) {
+        (*reinterpret_cast<DiagnosticCallback *>(Ctx))(Diag);
+      },
+      &Diags);
+  std::vector<Fragment> Result;
+  for (auto &Doc : llvm::yaml::Stream(*Buf, *SM)) {
+    if (Node *N = Doc.parseBlockNode()) {
+      Fragment Fragment;
+      Fragment.Source.Manager = SM;
+      Fragment.Source.Location = N->getSourceRange().Start;
+      if (Parser(*SM).parse(Fragment, *N))
+        Result.push_back(std::move(Fragment));
+    }
+  }
+  // Hack: stash the buffer in the SourceMgr to keep it alive.
+  // SM has two entries: "main" non-owning buffer, and ignored owning buffer.
+  SM->AddNewSourceBuffer(std::move(Buf), llvm::SMLoc());
+  return Result;
+}
+
+} // namespace config
+} // namespace clangd
+} // namespace clang

diff  --git a/clang-tools-extra/clangd/unittests/CMakeLists.txt b/clang-tools-extra/clangd/unittests/CMakeLists.txt
index 03e09669b13f..7cfe37938654 100644
--- a/clang-tools-extra/clangd/unittests/CMakeLists.txt
+++ b/clang-tools-extra/clangd/unittests/CMakeLists.txt
@@ -41,6 +41,7 @@ add_unittest(ClangdUnitTests ClangdTests
   CollectMacrosTests.cpp
   CompileCommandsTests.cpp
   CompilerTests.cpp
+  ConfigYAMLTests.cpp
   DexTests.cpp
   DiagnosticsTests.cpp
   DraftStoreTests.cpp

diff  --git a/clang-tools-extra/clangd/unittests/ConfigYAMLTests.cpp b/clang-tools-extra/clangd/unittests/ConfigYAMLTests.cpp
new file mode 100644
index 000000000000..db94e7b8ac56
--- /dev/null
+++ b/clang-tools-extra/clangd/unittests/ConfigYAMLTests.cpp
@@ -0,0 +1,160 @@
+//===-- ConfigYAMLTests.cpp -----------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "Annotations.h"
+#include "ConfigFragment.h"
+#include "Matchers.h"
+#include "Protocol.h"
+#include "llvm/Support/SMLoc.h"
+#include "llvm/Support/ScopedPrinter.h"
+#include "llvm/Support/SourceMgr.h"
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
+#include "gtest/internal/gtest-internal.h"
+
+namespace clang {
+namespace clangd {
+namespace config {
+template <typename T> void PrintTo(const Located<T> &V, std::ostream *OS) {
+  *OS << ::testing::PrintToString(*V);
+}
+
+namespace {
+using ::testing::AllOf;
+using ::testing::ElementsAre;
+using ::testing::IsEmpty;
+
+MATCHER_P(Val, Value, "") {
+  if (*arg == Value)
+    return true;
+  *result_listener << "value is " << *arg;
+  return false;
+}
+
+Position toPosition(llvm::SMLoc L, const llvm::SourceMgr &SM) {
+  auto LineCol = SM.getLineAndColumn(L);
+  Position P;
+  P.line = LineCol.first - 1;
+  P.character = LineCol.second - 1;
+  return P;
+}
+
+Range toRange(llvm::SMRange R, const llvm::SourceMgr &SM) {
+  return Range{toPosition(R.Start, SM), toPosition(R.End, SM)};
+}
+
+struct CapturedDiags {
+  std::function<void(const llvm::SMDiagnostic &)> callback() {
+    return [this](const llvm::SMDiagnostic &D) {
+      Diagnostics.emplace_back();
+      Diag &Out = Diagnostics.back();
+      Out.Message = D.getMessage().str();
+      Out.Kind = D.getKind();
+      Out.Pos.line = D.getLineNo() - 1;
+      Out.Pos.character = D.getColumnNo(); // Zero-based - bug in SourceMgr?
+      if (!D.getRanges().empty()) {
+        const auto &R = D.getRanges().front();
+        Out.Range.emplace();
+        Out.Range->start.line = Out.Range->end.line = Out.Pos.line;
+        Out.Range->start.character = R.first;
+        Out.Range->end.character = R.second;
+      }
+    };
+  }
+  struct Diag {
+    std::string Message;
+    llvm::SourceMgr::DiagKind Kind;
+    Position Pos;
+    llvm::Optional<Range> Range;
+
+    friend void PrintTo(const Diag &D, std::ostream *OS) {
+      *OS << (D.Kind == llvm::SourceMgr::DK_Error ? "error: " : "warning: ")
+          << D.Message << "@" << llvm::to_string(D.Pos);
+    }
+  };
+  std::vector<Diag> Diagnostics;
+};
+
+MATCHER_P(DiagMessage, M, "") { return arg.Message == M; }
+MATCHER_P(DiagKind, K, "") { return arg.Kind == K; }
+MATCHER_P(DiagPos, P, "") { return arg.Pos == P; }
+MATCHER_P(DiagRange, R, "") { return arg.Range == R; }
+
+TEST(ParseYAML, SyntacticForms) {
+  CapturedDiags Diags;
+  const char *YAML = R"yaml(
+If:
+  PathMatch:
+    - 'abc'
+CompileFlags: { Add: [foo, bar] }
+---
+CompileFlags:
+  Add: |
+    b
+    az
+  )yaml";
+  auto Results = Fragment::parseYAML(YAML, "config.yaml", Diags.callback());
+  EXPECT_THAT(Diags.Diagnostics, IsEmpty());
+  ASSERT_EQ(Results.size(), 2u);
+  EXPECT_FALSE(Results.front().Condition.HasUnrecognizedCondition);
+  EXPECT_THAT(Results.front().Condition.PathMatch, ElementsAre(Val("abc")));
+  EXPECT_THAT(Results.front().CompileFlags.Add,
+              ElementsAre(Val("foo"), Val("bar")));
+
+  EXPECT_THAT(Results.back().CompileFlags.Add, ElementsAre(Val("b\naz\n")));
+}
+
+TEST(ParseYAML, Locations) {
+  CapturedDiags Diags;
+  Annotations YAML(R"yaml(
+If:
+  PathMatch: [['???bad***regex(((']]
+  )yaml");
+  auto Results =
+      Fragment::parseYAML(YAML.code(), "config.yaml", Diags.callback());
+  EXPECT_THAT(Diags.Diagnostics, IsEmpty());
+  ASSERT_EQ(Results.size(), 1u);
+  ASSERT_NE(Results.front().Source.Manager, nullptr);
+  EXPECT_EQ(toRange(Results.front().Condition.PathMatch.front().Range,
+                    *Results.front().Source.Manager),
+            YAML.range());
+}
+
+TEST(ParseYAML, Diagnostics) {
+  CapturedDiags Diags;
+  Annotations YAML(R"yaml(
+If:
+  [[UnknownCondition]]: "foo"
+CompileFlags:
+  Add: 'first'
+---
+CompileFlags: {^
+)yaml");
+  auto Results =
+      Fragment::parseYAML(YAML.code(), "config.yaml", Diags.callback());
+
+  ASSERT_THAT(
+      Diags.Diagnostics,
+      ElementsAre(AllOf(DiagMessage("Unknown Condition key UnknownCondition"),
+                        DiagKind(llvm::SourceMgr::DK_Warning),
+                        DiagPos(YAML.range().start), DiagRange(YAML.range())),
+                  AllOf(DiagMessage("Unexpected token. Expected Key, Flow "
+                                    "Entry, or Flow Mapping End."),
+                        DiagKind(llvm::SourceMgr::DK_Error),
+                        DiagPos(YAML.point()), DiagRange(llvm::None))));
+
+  ASSERT_EQ(Results.size(), 2u);
+  EXPECT_THAT(Results.front().CompileFlags.Add, ElementsAre(Val("first")));
+  EXPECT_TRUE(Results.front().Condition.HasUnrecognizedCondition);
+  EXPECT_THAT(Results.back().CompileFlags.Add, IsEmpty());
+}
+
+} // namespace
+} // namespace config
+} // namespace clangd
+} // namespace clang


        


More information about the cfe-commits mailing list