[clang-tools-extra] r265303 - [clang-tidy] Reduce false-positive ratio in misc-suspicious-missing-comma check.

Etienne Bergeron via cfe-commits cfe-commits at lists.llvm.org
Mon Apr 4 08:46:38 PDT 2016


Author: etienneb
Date: Mon Apr  4 10:46:38 2016
New Revision: 265303

URL: http://llvm.org/viewvc/llvm-project?rev=265303&view=rev
Log:
[clang-tidy] Reduce false-positive ratio in misc-suspicious-missing-comma check.

Summary:
This patch is adding detection of common string literal patterns
that should not trigger warnings.

  [*] Add a limit on the number of concatenated token,
  [*] Add support for parenthese sequence of tokens,
  [*] Add detection of valid indentation.

As an example, this code will no longer trigger a warning:
```
const char* Array[] = {
  "first literal"
    "indented literal"
    "indented literal",
  "second literal",
  [...]
```

Reviewers: alexfh

Subscribers: cfe-commits

Differential Revision: http://reviews.llvm.org/D18695

Modified:
    clang-tools-extra/trunk/clang-tidy/misc/SuspiciousMissingCommaCheck.cpp
    clang-tools-extra/trunk/clang-tidy/misc/SuspiciousMissingCommaCheck.h
    clang-tools-extra/trunk/test/clang-tidy/misc-suspicious-missing-comma.cpp

Modified: clang-tools-extra/trunk/clang-tidy/misc/SuspiciousMissingCommaCheck.cpp
URL: http://llvm.org/viewvc/llvm-project/clang-tools-extra/trunk/clang-tidy/misc/SuspiciousMissingCommaCheck.cpp?rev=265303&r1=265302&r2=265303&view=diff
==============================================================================
--- clang-tools-extra/trunk/clang-tidy/misc/SuspiciousMissingCommaCheck.cpp (original)
+++ clang-tools-extra/trunk/clang-tidy/misc/SuspiciousMissingCommaCheck.cpp Mon Apr  4 10:46:38 2016
@@ -19,8 +19,51 @@ namespace misc {
 
 namespace {
 
-AST_MATCHER(StringLiteral, isConcatenatedLiteral) {
-  return Node.getNumConcatenated() > 1;
+bool isConcatenatedLiteralsOnPurpose(ASTContext* Ctx,
+                                     const StringLiteral* Lit) {
+  // String literals surrounded by parentheses are assumed to be on purpose.
+  //    i.e.:  const char* Array[] = { ("a" "b" "c"), "d", [...] };
+  auto Parents = Ctx->getParents(*Lit);
+  if (Parents.size() == 1 && Parents[0].get<ParenExpr>() != nullptr)
+    return true;
+
+  // Appropriately indented string literals are assumed to be on purpose.
+  // The following frequent indentation is accepted:
+  //     const char* Array[] = {
+  //       "first literal"
+  //           "indented literal"
+  //           "indented literal",
+  //       "second literal",
+  //       [...]
+  //     };
+  const SourceManager& SM = Ctx->getSourceManager();
+  bool IndentedCorrectly = true;
+  SourceLocation FirstToken = Lit->getStrTokenLoc(0);
+  FileID BaseFID = SM.getFileID(FirstToken);
+  unsigned int BaseIndent = SM.getSpellingColumnNumber(FirstToken);
+  unsigned int BaseLine = SM.getSpellingLineNumber(FirstToken);
+  for (unsigned int TokNum = 1; TokNum < Lit->getNumConcatenated(); ++ TokNum) {
+    SourceLocation Token = Lit->getStrTokenLoc(TokNum);
+    FileID FID = SM.getFileID(Token);
+    unsigned int Indent = SM.getSpellingColumnNumber(Token);
+    unsigned int Line = SM.getSpellingLineNumber(Token);
+    if (FID != BaseFID || Line != BaseLine + TokNum || Indent <= BaseIndent) {
+      IndentedCorrectly = false;
+      break;
+    }
+  }
+  if (IndentedCorrectly)
+    return true;
+
+  // There is no pattern recognized by the checker, assume it's not on purpose.
+  return false;
+}
+
+AST_MATCHER_P(StringLiteral, isConcatenatedLiteral,
+              unsigned, MaxConcatenatedTokens) {
+  return Node.getNumConcatenated() > 1 &&
+         Node.getNumConcatenated() < MaxConcatenatedTokens &&
+         !isConcatenatedLiteralsOnPurpose(&Finder->getASTContext(), &Node);
 }
 
 }  // namespace
@@ -29,17 +72,19 @@ SuspiciousMissingCommaCheck::SuspiciousM
     StringRef Name, ClangTidyContext *Context)
     : ClangTidyCheck(Name, Context),
       SizeThreshold(Options.get("SizeThreshold", 5U)),
-      RatioThreshold(std::stod(Options.get("RatioThreshold", ".2"))) {}
+      RatioThreshold(std::stod(Options.get("RatioThreshold", ".2"))),
+      MaxConcatenatedTokens(Options.get("MaxConcatenatedTokens", 5U)) {}
 
 void SuspiciousMissingCommaCheck::storeOptions(
     ClangTidyOptions::OptionMap &Opts) {
   Options.store(Opts, "SizeThreshold", SizeThreshold);
   Options.store(Opts, "RatioThreshold", std::to_string(RatioThreshold));
+  Options.store(Opts, "MaxConcatenatedTokens", MaxConcatenatedTokens);
 }
 
 void SuspiciousMissingCommaCheck::registerMatchers(MatchFinder *Finder) {
   const auto ConcatenatedStringLiteral =
-      stringLiteral(isConcatenatedLiteral()).bind("str");
+      stringLiteral(isConcatenatedLiteral(MaxConcatenatedTokens)).bind("str");
 
   const auto StringsInitializerList =
       initListExpr(hasType(constantArrayType()),
@@ -51,9 +96,10 @@ void SuspiciousMissingCommaCheck::regist
 void SuspiciousMissingCommaCheck::check(
     const MatchFinder::MatchResult &Result) {
   const auto *InitializerList = Result.Nodes.getNodeAs<InitListExpr>("list");
-  const auto *ConcatenatedLiteral = Result.Nodes.getNodeAs<Expr>("str");
+  const auto *ConcatenatedLiteral =
+      Result.Nodes.getNodeAs<StringLiteral>("str");
   assert(InitializerList && ConcatenatedLiteral);
-
+  
   // Skip small arrays as they often generate false-positive.
   unsigned int Size = InitializerList->getNumInits();
   if (Size < SizeThreshold) return;

Modified: clang-tools-extra/trunk/clang-tidy/misc/SuspiciousMissingCommaCheck.h
URL: http://llvm.org/viewvc/llvm-project/clang-tools-extra/trunk/clang-tidy/misc/SuspiciousMissingCommaCheck.h?rev=265303&r1=265302&r2=265303&view=diff
==============================================================================
--- clang-tools-extra/trunk/clang-tidy/misc/SuspiciousMissingCommaCheck.h (original)
+++ clang-tools-extra/trunk/clang-tidy/misc/SuspiciousMissingCommaCheck.h Mon Apr  4 10:46:38 2016
@@ -32,6 +32,8 @@ private:
   const unsigned SizeThreshold;
   // Maximal threshold ratio of suspicious string literals to be considered.
   const double RatioThreshold;
+  // Maximal number of concatenated tokens.
+  const unsigned MaxConcatenatedTokens;
 };
 
 } // namespace misc

Modified: clang-tools-extra/trunk/test/clang-tidy/misc-suspicious-missing-comma.cpp
URL: http://llvm.org/viewvc/llvm-project/clang-tools-extra/trunk/test/clang-tidy/misc-suspicious-missing-comma.cpp?rev=265303&r1=265302&r2=265303&view=diff
==============================================================================
--- clang-tools-extra/trunk/test/clang-tidy/misc-suspicious-missing-comma.cpp (original)
+++ clang-tools-extra/trunk/test/clang-tidy/misc-suspicious-missing-comma.cpp Mon Apr  4 10:46:38 2016
@@ -15,7 +15,8 @@ const wchar_t* Colors[] = {
   L"Red", L"Yellow", L"Blue", L"Green", L"Purple", L"Rose", L"White", L"Black"
 };
 
-// The following array should not trigger any warnings.
+// The following array should not trigger any warnings. There is more than 5
+// elements, but they are all concatenated string literals.
 const char* HttpCommands[] = {
   "GET / HTTP/1.0\r\n"
   "\r\n",
@@ -26,9 +27,56 @@ const char* HttpCommands[] = {
   "GET /favicon.ico HTTP/1.0\r\n"
   "header: dummy"
   "\r\n",
+
+  "GET /index.html-en HTTP/1.0\r\n"
+  "\r\n",
+
+  "GET /index.html-fr HTTP/1.0\r\n"
+  "\r\n",
+
+  "GET /index.html-es HTTP/1.0\r\n"
+  "\r\n",
 };
 
 // This array is too small to trigger a warning.
 const char* SmallArray[] = {
   "a" "b", "c"
 };
+
+// Parentheses should be enough to avoid warnings.
+const char* ParentheseArray[] = {
+  ("a" "b"), "c",
+  ("d"
+   "e"
+   "f"),
+  "g", "h", "i", "j", "k", "l"
+};
+
+// Indentation should be enough to avoid warnings.
+const char* CorrectlyIndentedArray[] = {
+  "This is a long message "
+      "which is spanning over multiple lines."
+      "And this should be fine.",
+  "a", "b", "c", "d", "e", "f",
+  "g", "h", "i", "j", "k", "l"
+};
+
+const char* IncorrectlyIndentedArray[] = {
+  "This is a long message "
+  "which is spanning over multiple lines."
+      "And this should be fine.",
+  "a", "b", "c", "d", "e", "f",
+  "g", "h", "i", "j", "k", "l"
+};
+// CHECK-MESSAGES: :[[@LINE-6]]:3: warning: suspicious string literal, probably missing a comma [misc-suspicious-missing-comma]
+
+const char* TooManyConcatenatedTokensArray[] = {
+  "Dummy line",
+  "Dummy line",
+  "a" "b" "c" "d" "e" "f",
+  "g" "h" "i" "j" "k" "l",
+  "Dummy line",
+  "Dummy line",
+  "Dummy line",
+  "Dummy line",
+};




More information about the cfe-commits mailing list