r248782 - clang-format: Extend #include sorting functionality

Nico Weber via cfe-commits cfe-commits at lists.llvm.org
Sun Oct 18 18:40:53 PDT 2015


On Tue, Sep 29, 2015 at 12:53 AM, Daniel Jasper via cfe-commits <
cfe-commits at lists.llvm.org> wrote:

> Author: djasper
> Date: Tue Sep 29 02:53:08 2015
> New Revision: 248782
>
> URL: http://llvm.org/viewvc/llvm-project?rev=248782&view=rev
> Log:
> clang-format: Extend #include sorting functionality
>
> Recognize the main module header as well as different #include categories.
> This should now mimic the behavior of llvm/utils/sort_includes.py as
> well as clang-tools-extra/clang-tidy/llvm/IncludeOrderCheck.cpp very
> closely.
>
> Modified:
>     cfe/trunk/include/clang/Format/Format.h
>     cfe/trunk/lib/Format/Format.cpp
>     cfe/trunk/tools/clang-format/ClangFormat.cpp
>     cfe/trunk/unittests/Format/SortIncludesTest.cpp
>
> Modified: cfe/trunk/include/clang/Format/Format.h
> URL:
> http://llvm.org/viewvc/llvm-project/cfe/trunk/include/clang/Format/Format.h?rev=248782&r1=248781&r2=248782&view=diff
>
> ==============================================================================
> --- cfe/trunk/include/clang/Format/Format.h (original)
> +++ cfe/trunk/include/clang/Format/Format.h Tue Sep 29 02:53:08 2015
> @@ -259,6 +259,21 @@ struct FormatStyle {
>    /// For example: BOOST_FOREACH.
>    std::vector<std::string> ForEachMacros;
>
> +  /// \brief Regular expressions denoting the different #include
> categories used
> +  /// for ordering #includes.
> +  ///
> +  /// These regular expressions are matched against the filename of an
> include
> +  /// (including the <> or "") in order. The value belonging to the first
> +  /// matching regular expression is assigned and #includes are sorted
> first
> +  /// according to increasing category number and then alphabetically
> within
> +  /// each category.
> +  ///
> +  /// If none of the regular expressions match, UINT_MAX is assigned as
> +  /// category. The main header for a source file automatically gets
> category 0,
> +  /// so that it is kept at the beginning of the #includes
> +  /// (http://llvm.org/docs/CodingStandards.html#include-style).
> +  std::vector<std::pair<std::string, unsigned>> IncludeCategories;
> +
>    /// \brief Indent case labels one level from the switch statement.
>    ///
>    /// When \c false, use the same indentation level as for the switch
> statement.
>
> Modified: cfe/trunk/lib/Format/Format.cpp
> URL:
> http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Format/Format.cpp?rev=248782&r1=248781&r2=248782&view=diff
>
> ==============================================================================
> --- cfe/trunk/lib/Format/Format.cpp (original)
> +++ cfe/trunk/lib/Format/Format.cpp Tue Sep 29 02:53:08 2015
> @@ -13,6 +13,7 @@
>  ///
>
>  //===----------------------------------------------------------------------===//
>
> +#include "clang/Format/Format.h"
>  #include "ContinuationIndenter.h"
>  #include "TokenAnnotator.h"
>  #include "UnwrappedLineFormatter.h"
> @@ -21,7 +22,6 @@
>  #include "clang/Basic/Diagnostic.h"
>  #include "clang/Basic/DiagnosticOptions.h"
>  #include "clang/Basic/SourceManager.h"
> -#include "clang/Format/Format.h"
>  #include "clang/Lex/Lexer.h"
>  #include "llvm/ADT/STLExtras.h"
>  #include "llvm/Support/Allocator.h"
> @@ -375,6 +375,9 @@ FormatStyle getLLVMStyle() {
>    LLVMStyle.ForEachMacros.push_back("foreach");
>    LLVMStyle.ForEachMacros.push_back("Q_FOREACH");
>    LLVMStyle.ForEachMacros.push_back("BOOST_FOREACH");
> +  LLVMStyle.IncludeCategories = {{"^\"(llvm|llvm-c|clang|clang-c)/", 2},
> +                                 {"^(<|\"(gtest|isl|json)/)", 3},
> +                                 {".*", 1}};
>    LLVMStyle.IndentCaseLabels = false;
>    LLVMStyle.IndentWrappedFunctionNames = false;
>    LLVMStyle.IndentWidth = 2;
> @@ -423,6 +426,7 @@ FormatStyle getGoogleStyle(FormatStyle::
>    GoogleStyle.AlwaysBreakTemplateDeclarations = true;
>    GoogleStyle.ConstructorInitializerAllOnOneLineOrOnePerLine = true;
>    GoogleStyle.DerivePointerAlignment = true;
> +  GoogleStyle.IncludeCategories = {{"^<.*\\.h>", 1}, {"^<.*", 2}, {".*",
> 3}};
>    GoogleStyle.IndentCaseLabels = true;
>    GoogleStyle.KeepEmptyLinesAtTheStartOfBlocks = false;
>    GoogleStyle.ObjCSpaceAfterProperty = false;
> @@ -1575,7 +1579,7 @@ struct IncludeDirective {
>    StringRef Filename;
>    StringRef Text;
>    unsigned Offset;
> -  bool IsAngled;
> +  unsigned Category;
>  };
>
>  } // end anonymous namespace
> @@ -1605,7 +1609,8 @@ static void sortIncludes(const FormatSty
>    for (unsigned i = 0, e = Includes.size(); i != e; ++i)
>      Indices.push_back(i);
>    std::sort(Indices.begin(), Indices.end(), [&](unsigned LHSI, unsigned
> RHSI) {
> -    return Includes[LHSI].Filename < Includes[RHSI].Filename;
> +    return std::tie(Includes[LHSI].Category, Includes[LHSI].Filename) <
> +           std::tie(Includes[RHSI].Category, Includes[RHSI].Filename);
>    });
>
>    // If the #includes are out of order, we generate a single replacement
> fixing
> @@ -1642,22 +1647,49 @@ tooling::Replacements sortIncludes(const
>    tooling::Replacements Replaces;
>    unsigned Prev = 0;
>    unsigned SearchFrom = 0;
> -  llvm::Regex IncludeRegex(R"(^[\t\ ]*#[\t\
> ]*include[^"<]*["<]([^">]*)([">]))");
> +  llvm::Regex IncludeRegex(
> +      R"(^[\t\ ]*#[\t\ ]*include[^"<]*(["<][^">]*[">]))");
>    SmallVector<StringRef, 4> Matches;
>    SmallVector<IncludeDirective, 16> IncludesInBlock;
> +
> +  // In compiled files, consider the first #include to be the main
> #include of
> +  // the file if it is not a system #include. This ensures that the header
> +  // doesn't have hidden dependencies
> +  // (http://llvm.org/docs/CodingStandards.html#include-style).
> +  //
> +  // FIXME: Do some sanity checking, e.g. edit distance of the base name,
> to fix
> +  // cases where the first #include is unlikely to be the main header.
>

FYI: This happens to make include sorting in webkit cpp files work too,
where the style guide says that one must include config.h first and the
main header second (https://www.webkit.org/coding/coding-style.html). Since
clang-format won't reorder the first line, it keeps the config.h header
first, even though config.h isn't technically the main header. If this
FIXME gets fixed, it'd be good if formatting of webkit cpp files isn't
broke in the process.


> +  bool LookForMainHeader = FileName.endswith(".c") ||
> +                           FileName.endswith(".cc") ||
> +                           FileName.endswith(".cpp")||
> +                           FileName.endswith(".c++")||
> +                           FileName.endswith(".cxx");
> +
> +  // Create pre-compiled regular expressions for the #include categories.
> +  SmallVector<llvm::Regex, 4> CategoryRegexs;
> +  for (const auto &IncludeBlock : Style.IncludeCategories)
> +    CategoryRegexs.emplace_back(IncludeBlock.first);
> +
>    for (;;) {
>      auto Pos = Code.find('\n', SearchFrom);
>      StringRef Line =
>          Code.substr(Prev, (Pos != StringRef::npos ? Pos : Code.size()) -
> Prev);
>      if (!Line.endswith("\\")) {
>        if (IncludeRegex.match(Line, &Matches)) {
> -        bool IsAngled = Matches[2] == ">";
> -        if (!IncludesInBlock.empty() &&
> -            IsAngled != IncludesInBlock.back().IsAngled) {
> -          sortIncludes(Style, IncludesInBlock, Ranges, FileName,
> Replaces);
> -          IncludesInBlock.clear();
> +        unsigned Category;
> +        if (LookForMainHeader && !Matches[1].startswith("<")) {
> +          Category = 0;
> +        } else {
> +          Category = UINT_MAX;
> +          for (unsigned i = 0, e = CategoryRegexs.size(); i != e; ++i) {
> +            if (CategoryRegexs[i].match(Matches[1])) {
> +              Category = Style.IncludeCategories[i].second;
> +              break;
> +            }
> +          }
>          }
> -        IncludesInBlock.push_back({Matches[1], Line, Prev, Matches[2] ==
> ">"});
> +        LookForMainHeader = false;
> +        IncludesInBlock.push_back({Matches[1], Line, Prev, Category});
>        } else if (!IncludesInBlock.empty()) {
>          sortIncludes(Style, IncludesInBlock, Ranges, FileName, Replaces);
>          IncludesInBlock.clear();
>
> Modified: cfe/trunk/tools/clang-format/ClangFormat.cpp
> URL:
> http://llvm.org/viewvc/llvm-project/cfe/trunk/tools/clang-format/ClangFormat.cpp?rev=248782&r1=248781&r2=248782&view=diff
>
> ==============================================================================
> --- cfe/trunk/tools/clang-format/ClangFormat.cpp (original)
> +++ cfe/trunk/tools/clang-format/ClangFormat.cpp Tue Sep 29 02:53:08 2015
> @@ -73,7 +73,7 @@ FallbackStyle("fallback-style",
>                cl::init("LLVM"), cl::cat(ClangFormatCategory));
>
>  static cl::opt<std::string>
> -AssumeFilename("assume-filename",
> +AssumeFileName("assume-filename",
>                 cl::desc("When reading from stdin, clang-format assumes
> this\n"
>                          "filename to look for a style config file (with\n"
>                          "-style=file) and to determine the language."),
> @@ -239,13 +239,13 @@ static bool format(StringRef FileName) {
>    std::vector<tooling::Range> Ranges;
>    if (fillRanges(Code.get(), Ranges))
>      return true;
> -  FormatStyle FormatStyle = getStyle(
> -      Style, (FileName == "-") ? AssumeFilename : FileName,
> FallbackStyle);
> +  StringRef AssumedFileName = (FileName == "-") ? AssumeFileName :
> FileName;
> +  FormatStyle FormatStyle = getStyle(Style, AssumedFileName,
> FallbackStyle);
>    Replacements Replaces;
>    std::string ChangedCode;
>    if (SortIncludes) {
>      Replaces =
> -        sortIncludes(FormatStyle, Code->getBuffer(), Ranges, FileName);
> +        sortIncludes(FormatStyle, Code->getBuffer(), Ranges,
> AssumedFileName);
>      ChangedCode = tooling::applyAllReplacements(Code->getBuffer(),
> Replaces);
>      for (const auto &R : Replaces)
>        Ranges.push_back({R.getOffset(), R.getLength()});
> @@ -324,7 +324,7 @@ int main(int argc, const char **argv) {
>    if (DumpConfig) {
>      std::string Config =
>          clang::format::configurationAsText(clang::format::getStyle(
> -            Style, FileNames.empty() ? AssumeFilename : FileNames[0],
> +            Style, FileNames.empty() ? AssumeFileName : FileNames[0],
>              FallbackStyle));
>      llvm::outs() << Config << "\n";
>      return 0;
>
> Modified: cfe/trunk/unittests/Format/SortIncludesTest.cpp
> URL:
> http://llvm.org/viewvc/llvm-project/cfe/trunk/unittests/Format/SortIncludesTest.cpp?rev=248782&r1=248781&r2=248782&view=diff
>
> ==============================================================================
> --- cfe/trunk/unittests/Format/SortIncludesTest.cpp (original)
> +++ cfe/trunk/unittests/Format/SortIncludesTest.cpp Tue Sep 29 02:53:08
> 2015
> @@ -20,13 +20,15 @@ namespace {
>
>  class SortIncludesTest : public ::testing::Test {
>  protected:
> -  std::string sort(llvm::StringRef Code) {
> +  std::string sort(llvm::StringRef Code, StringRef FileName =
> "input.cpp") {
>      std::vector<tooling::Range> Ranges(1, tooling::Range(0, Code.size()));
> -    std::string Sorted = applyAllReplacements(
> -        Code, sortIncludes(getLLVMStyle(), Code, Ranges, "input.cpp"));
> -    return applyAllReplacements(
> -        Sorted, reformat(getLLVMStyle(), Sorted, Ranges, "input.cpp"));
> +    std::string Sorted =
> +        applyAllReplacements(Code, sortIncludes(Style, Code, Ranges,
> FileName));
> +    return applyAllReplacements(Sorted,
> +                                reformat(Style, Sorted, Ranges,
> FileName));
>    }
> +
> +  FormatStyle Style = getLLVMStyle();
>  };
>
>  TEST_F(SortIncludesTest, BasicSorting) {
> @@ -76,13 +78,23 @@ TEST_F(SortIncludesTest, SortsLocallyInE
>              "#include \"c.h\"\n"
>              "\n"
>              "#include \"b.h\"\n",
> -            sort("#include \"c.h\"\n"
> -                 "#include \"a.h\"\n"
> +            sort("#include \"a.h\"\n"
> +                 "#include \"c.h\"\n"
>                   "\n"
>                   "#include \"b.h\"\n"));
>  }
>
>  TEST_F(SortIncludesTest, HandlesAngledIncludesAsSeparateBlocks) {
> +  EXPECT_EQ("#include \"a.h\"\n"
> +            "#include \"c.h\"\n"
> +            "#include <b.h>\n"
> +            "#include <d.h>\n",
> +            sort("#include <d.h>\n"
> +                 "#include <b.h>\n"
> +                 "#include \"c.h\"\n"
> +                 "#include \"a.h\"\n"));
> +
> +  Style = getGoogleStyle(FormatStyle::LK_Cpp);
>    EXPECT_EQ("#include <b.h>\n"
>              "#include <d.h>\n"
>              "#include \"a.h\"\n"
> @@ -103,6 +115,24 @@ TEST_F(SortIncludesTest, HandlesMultilin
>                   "#include \"b.h\"\n"));
>  }
>
> +TEST_F(SortIncludesTest, LeavesMainHeaderFirst) {
> +  EXPECT_EQ("#include \"llvm/a.h\"\n"
> +            "#include \"b.h\"\n"
> +            "#include \"c.h\"\n",
> +            sort("#include \"llvm/a.h\"\n"
> +                 "#include \"c.h\"\n"
> +                 "#include \"b.h\"\n"));
> +
> +  // Don't do this in headers.
> +  EXPECT_EQ("#include \"b.h\"\n"
> +            "#include \"c.h\"\n"
> +            "#include \"llvm/a.h\"\n",
> +            sort("#include \"llvm/a.h\"\n"
> +                 "#include \"c.h\"\n"
> +                 "#include \"b.h\"\n",
> +                 "some_header.h"));
> +}
> +
>  } // end namespace
>  } // end namespace format
>  } // end namespace clang
>
>
> _______________________________________________
> cfe-commits mailing list
> cfe-commits at lists.llvm.org
> http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
>
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.llvm.org/pipermail/cfe-commits/attachments/20151018/bd1923c7/attachment-0001.html>


More information about the cfe-commits mailing list